Research Article
An Efficient Multi-Core SIMD Implementation for H.264/AVC Encoder
Algorithm 3
Unaligned load SIMD implementation without concatenate instruction.
ui32_t PackCurr0 = *(orig_line); | ui32_t PackCurr1 = *(orig_line+1); | /* Pack to 128 bits */ | TmpVectArray[0] = PackCurr0; | TmpVectArray[1] = PackCurr1; | Pack128In = ldqi(Pack128In, TmpVectArray,0); | /* Reorganize pixels */ | Va = vmrgbeh(Va,Pack128In,VZero,permute0); | Vb = vmrgboh(Vb,Pack128In,VZero, permute1); | VPackCurr = vaddh(VPackCurr,Va,Vb,0); |
|