// Code that is executed before the kernel is launched |
int threadsInX = 32; |
int threadsInY = 16; |
int blocksInX = DATA_W/threadsInX; |
int blocksInX = DATA_H/threadsInY; |
dimGrid = dim3(blocksInX, blocksInY); |
dimBlock = dim3(threadsInX, threadsInY, 1); |
// Code that is executed inside the kernel |
int x = blockIdx.x * blockDim.x + threadIdx.x; |
int y = blockIdx.y * blockDim.y + threadIx.yd; |