Research Article

GPU Preconditioning for Block Linear Systems Using Block Incomplete Sparse Approximate Inverses

Algorithm 2

GPU kernel for solving LGSBTS.
(1)__shared__ double sm[warps_per_block][warp_size];
(2)__shared__ double inv[warps_per_block][bsz][bsz];
(3)__shared__ double ssol[warps_per_block][bsz][bsz];
(4)tid = threadIdx.x + blockIdx.x blockDim.x;
(5)wpid = threadIdx.x/warp_size;
(6)gwpid = tid/warp_size;
(7)rpos = threadIdx.x%warp size;
(8)range = ;
(9)if gwpid < nbr then
(10)if rpos < range then
(11)  r = rpos%bsz;
(12)  c = (rpos/bsz)%bsz;
(13)  InvStIdx = NLColPtr[gwpid];
(14)  InvEdIdx = NLColPtr[gwpid + 1];
(15)  for idx = InvStIdx to InvEdIdx − 1 do
(16)   irow = NLRowVal[idx];
(17)   if rpos < then
(18)    tmpidx = LRowPtr[irow + 1] − 1;
(19)    ssol[wpid][r][c] = L[tmpidx + rpos];
(20)    inv[wpid][r][c] = 1.0/det ;
(21)    sm[wpid][rpos] = Rhs[idx + rpos];
(22)    ssol[wpid][r][c] = ;
(23)    NL[idx + rpos] = ssol[wpid][r][c];
(24)   end if
(25)   myidx = idx + 1 + ;
(26)   triEdIdx = triStIdx + (n − icol) ;
(27)   solIdx = solStIdx +  + rpos;
(28)   while myidx < InvEdIdx do
(29)     sm[wpid][rpos] = 0.0;
(30)     myrow = NLRowVal[myidx];
(31)     tmpidx = LRowPtr[myrow + 1] − 1;
(32)     while tmpidx  LRowPtr[myrow] && LColVal[tmpidx] == irow do
(33)      tmpidx−−;
(34)     end while
(35)     if tmpidx  LRowPtr[myrow] && LColVal[tmpidx] == irow then
(36)      s[wpid][lane] = L[tmpidx  + rpos%];
(37)     end if
(38)    Rhs[myidx  + lane%]− = ;
(39)    myidx+ = warp_size/;
(40)   end while
(41)  end for
(42)end if
(43)end if