代码如下:
- #define Nx 24
- #define Ny 24
- int main()
- {
- float *data_GPU,*test;
- cudaMalloc((void**)&data_GPU,sizeof(float)*Nx*Ny);cudaMemset(data_GPU,3.0,Nx*Ny*sizeof(float));
- cudaMalloc((void**)&test,sizeof(float)*Nx*Ny);cudaMemset(test,0.0,Nx*Ny*sizeof(float));
- dim3 threadsPerblock=(8,8);
- dim3 blockspergrid=(3,3);
- //mykernel<<<blockspergrid,threadsPerblock>>>(data_GPU,test);
- cudaDeviceSynchronize();
- printf("cudaGetLastError=%s\n",cudaGetErrorString(cudaGetLastError()));
- float *CPU_data;
- cudaMallocHost((void**)&CPU_data,sizeof(float)*Nx*Ny);
- cudaMemcpy(CPU_data,data_GPU,sizeof(float)*Nx*Ny,cudaMemcpyDeviceToHost);
- for(int iy=0;iy<Ny;iy++){
- for(int ix=0;ix<Nx;ix++){
- printf("CPU_DATA=%f\n",CPU_data[iy*Nx+ix]);
- }
- }
- cudaFreeHost(CPU_data);
- cudaFree(data_GPU);
- cudaFree(test);
- return 0;
- }
复制代码 kerne函数已经被屏蔽,本意是想用memset对显存上数据data_GPU初始化为3.0,但是发现好像没有成功呢?因为拷贝到CPU上发现都是0.000。
多谢斑竹
|