TP CUDA 5.0 / Exemple axpy
axpy (CUDA 5.0)
 Tout Classes Fichiers Fonctions Pages
thrust_axpy.cu
Aller à la documentation de ce fichier.
1 
8 // includes, system
9 #include <iostream>
10 #include <stdlib.h>
11 #include <time.h>
12 
13 // Utilities and system includes
14 #include <cuda_runtime.h>
15 #include <curand.h>
16 
17 #include <helper_functions.h>
18 #include <helper_cuda.h>
19 
20 #include <curand.h>
21 #include <cublas_v2.h>
22 
23 #include <thrust/device_vector.h>
24 #include <thrust/device_ptr.h>
25 #include <thrust/host_vector.h>
26 #include <thrust/transform.h>
27 
37 void vector_axpy(const double a,const double *X, const double *Y, double *Z, int N){
38  for (int i=0;i<N;i++)
39  Z[i]=a*X[i]+Y[i];
40 }
41 
46 double errorinf(const double *X,const double *Y,int N){
47  double norm=fabs(X[0]-Y[0]);
48  for (int i=1;i<N;i++){
49  double s=fabs(X[i]-Y[i]);
50  if (s>norm) norm=s;
51  }
52  return norm;
53 }
54 
58 template <typename T>
59 struct trans : public thrust::unary_function<T,T>
60 {
61  T a, b;
62 
63  __host__ __device__
64  trans(T _a, T _b) : a(_a), b(_b) {}
65 
66  __host__ __device__
67  T operator()(T x)
68  {
69  return a+(b-a)*x;
70  }
71 };
72 
73 
87 thrust::device_vector<double> UniformRandom(
88  int N, double a, double b, unsigned int mainSeed
89 ){
90  thrust::device_vector<double> V(N);
91  curandGenerator_t m_prng;
92  //Create a new generator
93  curandCreateGenerator(&m_prng, CURAND_RNG_PSEUDO_DEFAULT);
94  //Set the generator options
95  curandSetPseudoRandomGeneratorSeed(m_prng, (unsigned long) mainSeed);
96  curandGenerateUniformDouble(m_prng, thrust::raw_pointer_cast(V.data()), N);
97  thrust::transform(V.begin(),V.end(),V.begin(), trans<double>(a,b));
98  curandDestroyGenerator(m_prng);
99  return V;
100 }
101 
109 void print(const thrust::device_vector<double> &V,int n){
110  int N=V.size();
111  int nd=N-n,np=n;
112 
113  if (n>N){
114  np=N;nd=N+1;
115  }
116  std::cout << " <device_vector>["<< V.size() << "]\n";
117  //std::setprecision(16)
118  std::cout.precision(16);
119  for (int i=0;i<np;i++)
120  std::cout << " [" << i << "]: " << V[i] << std::endl;
121  if (nd<N)
122  std::cout << " ...\n";
123  for (int i=nd;i<N;i++)
124  std::cout << " [" << i << "]: " << V[i] << std::endl;
125  std::cout << "\n";
126 }
127 
135 void print(const thrust::host_vector<double> &V,int n){
136  int N=V.size();
137  int nd=N-n,np=n;
138 
139  if (n>N){
140  np=N;nd=N+1;
141  }
142  std::cout << " <host_vector>["<< V.size() << "]\n";
143  //std::setprecision(16)
144  std::cout.precision(16);
145  for (int i=0;i<np;i++)
146  std::cout << " [" << i << "]: " << V[i] << std::endl;
147  if (nd<N)
148  std::cout << " ...\n";
149  for (int i=nd;i<N;i++)
150  std::cout << " [" << i << "]: " << V[i] << std::endl;
151  std::cout << "\n";
152 }
153 
154 #define CURAND_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \
155  printf("Error at %s:%d\n",__FILE__,__LINE__);\
156  return EXIT_FAILURE;}} while(0)
157 
158 
170 int main(int argc, char** argv){
171  int N=10000;
172  thrust::host_vector<double> h_X(N),h_Y(N),h_Z(N),h_Zgpu(N);
173  thrust::device_vector<double> d_X,d_Y,d_Z;
174 
175  cublasHandle_t handle;
176  cublasStatus_t stat = cublasCreate(&handle);
177  if (stat != CUBLAS_STATUS_SUCCESS) {
178  printf ("CUBLAS initialization failed\n");
179  return EXIT_FAILURE;
180  }
181  time_t t1;
182  time(&t1);
183  srand48((long) t1); // Initialize
184 
185  std::cout<< "A) Generate uniform random [-2,2] on <device> : \nd_X =\n";
186  d_X=UniformRandom(N,-2.,2.,(long) t1);
187  print(d_X,3);
188 
189  std::cout<< "B) Generate uniform random ]0,1] on <host> : \nh_X =\n";
190  thrust::generate(h_X.begin(), h_X.end(), drand48);
191  print(h_X,3);
192 
193  std::cout<< "C) Generate uniform random [0,1] on <host> : \nh_Y =\n";
194  thrust::generate(h_Y.begin(), h_Y.end(), drand48);
195  print(h_Y,3);
196 
197  std::cout<< "D) Copy <host> array h_x on <device> array d_X : \nd_X =\n";
198  d_X=h_X;
199  print(d_X,3);
200  std::cout<< "E) Copy <host> array h_y on <device> array d_Y : \nd_Y =\n";
201  d_Y=h_Y;
202  print(d_Y,3);
203 
204  std::cout<< "F) Compute on <device>, d_Y <- d_Y - d_X : \nd_Y =\n";
205  double alpha=-1.0;
206  stat=cublasDaxpy(handle,N,&alpha,thrust::raw_pointer_cast(d_X.data()),1,
207  thrust::raw_pointer_cast(d_Y.data()),1);
208  if (stat != CUBLAS_STATUS_SUCCESS) {
209  printf ("CUBLAS cublasDaxpy failed\n");
210  return EXIT_FAILURE;
211  }
212  print(d_Y,3);
213 
214  std::cout<< "G) Copy <device> array d_Y on <host> array h_Zgpu : \nh_Zgpu =\n";
215  h_Zgpu=d_Y;
216  print(h_Zgpu,3);
217 
218  std::cout<< "H) Compute h_Z <- h_Y - h_X : \nh_Z =\n";
219  vector_axpy(-1.,&h_X[0], &h_Y[0], &h_Z[0], N);
220  print(h_Z,3);
221 
222  std::cout<< "I) Compute max|h_Zgpu - h_Z| = " << errorinf(&h_Zgpu[0],&h_Z[0],N);
223  std::cout << "\n...End\n";
224 }