14 #include <cuda_runtime.h>
17 #include <helper_functions.h>
18 #include <helper_cuda.h>
21 #include <cublas_v2.h>
23 #include <thrust/device_vector.h>
24 #include <thrust/device_ptr.h>
25 #include <thrust/host_vector.h>
26 #include <thrust/transform.h>
37 void vector_axpy(
const double a,
const double *X,
const double *Y,
double *Z,
int N){
46 double errorinf(
const double *X,
const double *Y,
int N){
47 double norm=fabs(X[0]-Y[0]);
48 for (
int i=1;i<N;i++){
49 double s=fabs(X[i]-Y[i]);
59 struct trans :
public thrust::unary_function<T,T>
64 trans(T _a, T _b) : a(_a), b(_b) {}
88 int N,
double a,
double b,
unsigned int mainSeed
90 thrust::device_vector<double> V(N);
91 curandGenerator_t m_prng;
93 curandCreateGenerator(&m_prng, CURAND_RNG_PSEUDO_DEFAULT);
95 curandSetPseudoRandomGeneratorSeed(m_prng, (
unsigned long) mainSeed);
96 curandGenerateUniformDouble(m_prng, thrust::raw_pointer_cast(V.data()), N);
97 thrust::transform(V.begin(),V.end(),V.begin(),
trans<double>(a,b));
98 curandDestroyGenerator(m_prng);
109 void print(
const thrust::device_vector<double> &V,
int n){
116 std::cout <<
" <device_vector>["<< V.size() <<
"]\n";
118 std::cout.precision(16);
119 for (
int i=0;i<np;i++)
120 std::cout <<
" [" << i <<
"]: " << V[i] << std::endl;
122 std::cout <<
" ...\n";
123 for (
int i=nd;i<N;i++)
124 std::cout <<
" [" << i <<
"]: " << V[i] << std::endl;
135 void print(
const thrust::host_vector<double> &V,
int n){
142 std::cout <<
" <host_vector>["<< V.size() <<
"]\n";
144 std::cout.precision(16);
145 for (
int i=0;i<np;i++)
146 std::cout <<
" [" << i <<
"]: " << V[i] << std::endl;
148 std::cout <<
" ...\n";
149 for (
int i=nd;i<N;i++)
150 std::cout <<
" [" << i <<
"]: " << V[i] << std::endl;
154 #define CURAND_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \
155 printf("Error at %s:%d\n",__FILE__,__LINE__);\
156 return EXIT_FAILURE;}} while(0)
170 int main(
int argc,
char** argv){
172 thrust::host_vector<double> h_X(N),h_Y(N),h_Z(N),h_Zgpu(N);
173 thrust::device_vector<double> d_X,d_Y,d_Z;
175 cublasHandle_t handle;
176 cublasStatus_t stat = cublasCreate(&handle);
177 if (stat != CUBLAS_STATUS_SUCCESS) {
178 printf (
"CUBLAS initialization failed\n");
185 std::cout<<
"A) Generate uniform random [-2,2] on <device> : \nd_X =\n";
189 std::cout<<
"B) Generate uniform random ]0,1] on <host> : \nh_X =\n";
190 thrust::generate(h_X.begin(), h_X.end(), drand48);
193 std::cout<<
"C) Generate uniform random [0,1] on <host> : \nh_Y =\n";
194 thrust::generate(h_Y.begin(), h_Y.end(), drand48);
197 std::cout<<
"D) Copy <host> array h_x on <device> array d_X : \nd_X =\n";
200 std::cout<<
"E) Copy <host> array h_y on <device> array d_Y : \nd_Y =\n";
204 std::cout<<
"F) Compute on <device>, d_Y <- d_Y - d_X : \nd_Y =\n";
206 stat=cublasDaxpy(handle,N,&alpha,thrust::raw_pointer_cast(d_X.data()),1,
207 thrust::raw_pointer_cast(d_Y.data()),1);
208 if (stat != CUBLAS_STATUS_SUCCESS) {
209 printf (
"CUBLAS cublasDaxpy failed\n");
214 std::cout<<
"G) Copy <device> array d_Y on <host> array h_Zgpu : \nh_Zgpu =\n";
218 std::cout<<
"H) Compute h_Z <- h_Y - h_X : \nh_Z =\n";
222 std::cout<<
"I) Compute max|h_Zgpu - h_Z| = " <<
errorinf(&h_Zgpu[0],&h_Z[0],N);
223 std::cout <<
"\n...End\n";