Research Article
Effective SIMD Vectorization for Intel Xeon Phi Coprocessors
Algorithm 5
An example of OpenMP
parallel for and SIMD combined usage.
#pragma omp declare SIMD uniform(max_iter) SIMDlen(32) | uint32_t mandel (fcomplex c, uint32_t max_iter) | | // Computes number of iterations(count variable) | // that it takes for parameter c to be known to | // be outside mandelbrot set | uint32_t count = 1; fcomplex z = c; | for (int32_t i = 0; i < max_iter; i += 1) { | z = z z + c; | int t = (cabsf(z) < 2.0f); | count += t; | if (t == 0) { break;} | } | return count; | | Caller site code: | int main() { | … … … | #pragma omp parallel for schedule(guided) | for (int32_t y = 0; y < ImageHeight; ++y) { | float c_im = max_imag - y imag_factor; | #pragma omp SIMD safelen(32) | for (int32_t x = 0; x < ImageWidth; ++x) { | fcomplex in_val; | in_val = (min_real + xreal_factor) + (c_im1.0iF); | countyx = mandel (in_val, max_iter); | } | } | … … … | |
|