Research Article
Multi-GPU Support on Single Node Using Directive-Based Programming Model
Algorithm 4
Single GPU implementation of 2D heat equation.
void step_kernel{…} | { | #pragma acc parallel present(temp_in[0:ninj], temp_out[0:ninj]) | num_gangs(32) vector_length(32) | { | // loop over all points in domain (except boundary) | #pragma acc loop gang | for (j=1; j < nj-1; j++) { | #pragma acc loop vector | for (i=1; i < ni-1; i++) { | // find indices into linear memory | // for central point and neighbours | i00 = I2D(ni, i, j); | im10 = I2D(ni, i-1, j); | ip10 = I2D(ni, i+1, j); | i0m1 = I2D(ni, i, j-1); | i0p1 = I2D(ni, i, j+1); | // evaluate derivatives | d2tdx2 = temp_in[im10]-2temp_in[i00]+temp_in[ip10]; | d2tdy2 = temp_in[i0m1]-2temp_in[i00]+temp_in[i0p1]; | // update temperatures | temp_out[i00] = temp_in[i00]+tfac(d2tdx2 + d2tdy2); | } | } | } | } | #pragma acc data copy(temp1[0:ninj]) | copyin(temp2[0:ninj]) | { | for (istep=0; istep < nstep; istep++) { | step_kernel(ni, nj, tfac, temp1, temp2); | // swap the temp pointers | temp = temp1; | temp1 = temp2; | temp2 = temp; | } | } |
|