(1) | if device_count>1 then | (2) | cudaMemcpyAsync (h_a, d_a, sizeof(float)n, cudaMemcpyDeviceToHost, stream[0]); | (3) | MPI_Isend (buf, int count, MPI_Datatype, int dest, int tag, MPI_COMM_WORLD, MPI_Request request); | (4) | //Primitive_Variables_Exchange; | (5) | Boundary_Processing_GPU<<<Block_size, Thread_size, stream[1]>>> ( ); | (6) | Time_Step_GPU<<<Block_size, Thread_size, stream[1]>>> ( ); | (7) | //Grad_Primitive_Variables_Exchange; | (8) | Convective_Flux_GPU<<<Block_size, Thread_size, stream[1]>>> ( ); | (9) | MPI_Irecv (buf, int count, MPI_Datatype, int source, int tag, MPI_COMM_WORLD, MPI_Status status, MPI_Request request); | (10) | MPI_Waitall ( ); | (11) | cudaMemcpyAsync (d_a, h_a, sizeof(float)n, cudaMemcpyHostToDevice, stream[0]); | (12) | end if |
|