#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#include<cuda.h>
__global__ void MyKernel(cudaPitchedPtr devPitchedPtr,int width, int height, int depth)
{
char* devPtr=devPitchedPtr.ptr;
size_t pitch = devPitchedPtr.pitch;
size_t slicePitch=pitch*height;
for ( int z = 0; z < depth; ++z)
{
char* slice = devPtr+z*slicePitch;
for ( int y = 0; y < height; ++y)
{
float *row = (float*)( slice + y *pitch);
for ( int x = 0; x < width; ++x)
{
float element = row[x];
}
}
}
}
int main(void)
{
// clock_t start, finish;
// double duration = 0.0;
int width = 64, height = 64, depth = 64;
cudaExtent extent = make_cudaExtent(width*sizeof(float),height, depth);
cudaPitchedPtr devPitchedPtr;
cudaMalloc3D(&devPitchedPtr, extent);
// start = clock();
MyKernel<<<100, 512>>>(devPitchedPtr, width, height, depth);
// finish = clock();
// duration = (float)(finish - start);
printf("\nP=\n");
//for(int i=0;i<Width*Width;i++)
/* for(int i=0;i<3;i++)
{
if(i%Width==0)
printf("\n");
printf(" %f",P);
}
printf("\n");*/
// printf(" %f mS\n",duration);
// free(P);
getchar();
return 0;
}
|