extern"C" global void TracePixelReference()
 {
  // setup path
  int numRays = context.width  *  context.height;
  int idx0 = threadIdx.y + blockDim.y *
   (blockIdx.x + gridDim.x * blockIdx.y) +
   ((context.firstline * context.width) >> 5);
  int tx = threadIdx.x & 7, ty = threadIdx.x >> 3;
  int tilesperline = context.width >> 3;
  int xt = idx0 % tilesperline;
  int yt = idx0/tilesperline;
  int px = (xt << 3) + tx, py = (yt << 2) + ty;
  int pidx = numRays 1
   (px + py * context.width);
  RNG genrand(pidx, (clock() * pidx *
   8191) 140167);
  int spp = context.SampleCount;
  float rcpw = 1.0f/context.width;
  float u = (float)px * rcpw 0.5f;
  floatv = (float)(py + (context.width
   context.height) * 0.5f) * rcpw 0.5f;
  float3 E = make_float3(0, 0, 0);
  // trace path
  for(int sample = 0; sample < spp; sample++)
  {
   // construct primary ray
   float3 O, D;
   CreatePrimaryRay(O, D);
   // trace path
   float3 throughput = make_float3(1, 1, 1);
   int depth = 0;
   while (1)
   {
    int prim = 0;
    float2 BC, UV = make_float2(0, 0);
    float dist = 1000000;
    bool backfaced = false;
    intersect(O,   D,   dist,   BC,   prim,  backfaced);
    O += D * dist;
    if (prim == 1)
    {
     E += throughput * GetSkySample(D);
     break;
    }
    Triangle& tri = context.Triangles[prim];
    TracerMaterial mat =
     context.Materials[tri.GetMaterialIdx()];
    if (mat.flags & Material::EMITTER) // light
    {
     E += throughput * mat.EmissiveColor;
     break;
    }
    else // diffuse reflection
    {
     float3 matcol = tri.GetMaterialColor(
      mat, BC, UV);
     float3 N = tri.GetNormal(mat, BC, UV) *
       (backfaced ? 1: 1);
     D = normalize(RandomReflection(
       genrand, N) );
    throughput *= matcol * dot(D, N);
   }
   O += D * EPSILON;
   depth++;
   if (depth > 3)
   {
    if (genrand() > 0.5f) break;
    throughput *= 2.0f;
   }
  }
 }
context.RenderTarget[pidx] =
  make_float4(E/(float)spp, 1);
}
Algorithm 2: Path tracing implemented in CUDA.