Implementation and Optimization of a CFD Solver Using Overlapped Meshes on Multiple MIC Coprocessors

<table class="algorithm-group"><tr><td><table class="algorithm" id="alg3"><tr><td>(1)</td><td>!$omp parallel do private(idev, <i>ib</i>, … …)</td></tr><tr><td>(2)</td><td>do idev = 0, 1</td></tr><tr><td>(3)</td><td><b>repeat</b></td></tr><tr><td>(4)</td><td> offload target(mic:idev): set_boundary_condition for each block</td></tr><tr><td>(5)</td><td> if(icycle <svg height="7.88973pt" id="M27" style="vertical-align:-0.6370101pt" version="1.1" viewbox="-0.0498162 -7.25272 7.75925 7.88973" width="7.75925pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M512 230V281L75 514V456L453 256V254L75 55V-3L512 230Z"></path></g></svg> 1) offload target(mic:idev) wait(sgr(idev)): set_CRI_to_domain</td></tr><tr><td>(6)</td><td> offload target(mic:idev) exchange_interface_data</td></tr><tr><td>(7)</td><td> <b>do </b><i>ib</i> = 1, nb(idev)</td></tr><tr><td>(8)</td><td>  offload target(mic:idev): spatial_step</td></tr><tr><td>(9)</td><td>  offload target(mic:idev): temporal_step</td></tr><tr><td>(10)</td><td>  offload target(mic:idev): compute_CPI</td></tr><tr><td>(11)</td><td>  offload_transfer target(mic:idev) out(<span class="nowrap"><svg height="12.0444pt" id="M28" style="vertical-align:-3.29106pt" version="1.1" viewbox="-0.0498162 -8.75334 27.9296 12.0444" width="27.9296pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M614 175C564 76 510 21 408 21C256 21 146 149 146 336C146 488 235 629 402 629C510 629 570 586 597 480L626 488C620 541 614 582 606 638C578 643 510 665 429 665C206 665 44 527 44 316C44 157 153 -15 402 -15C474 -15 558 5 586 11C604 45 629 119 643 165L614 175Z"></path></g><g transform="matrix(.013,0,0,-0.013,8.645,0)"><path d="M46 650V622C120 617 128 613 128 525V125C128 42 120 34 40 28V0H311V28C221 34 212 39 212 124V281L286 262C297 261 316 261 331 263C429 275 526 338 526 468C526 533 501 579 462 609C422 638 364 650 293 650H46ZM212 559C212 588 215 600 223 606C230 613 251 618 279 618C361 618 430 572 430 464C430 337 350 302 285 302C252 302 225 309 212 314V559Z"></path></g><g transform="matrix(.013,0,0,-0.013,15.899,0)"><path d="M303 0V28C221 34 213 39 213 125V525C213 610 221 616 303 622V650H38V622C120 616 128 610 128 525V125C128 40 120 34 38 28V0H303Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,20.408,3.132)"><path d="M250 606C250 634 233 656 203 656C168 656 146 618 146 593C146 564 169 545 192 545C227 545 250 573 250 606ZM227 95L212 119C187 98 152 71 135 71C129 71 128 78 134 102L207 373C219 418 217 451 194 451C165 451 92 411 30 351L44 326C77 353 106 371 114 371C124 371 121 357 117 341L55 97C32 5 46 -12 70 -12C108 -12 191 51 227 95Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,22.956,3.132)"><path d="M460 334C460 396 434 451 378 451C330 451 241 408 148 300H146L237 679C241 697 241 710 232 710C213 710 153 684 67 675L66 646H95C141 646 145 642 134 595L39 170C23 97 31 54 46 33C64 8 100 -12 137 -12C178 -12 234 3 298 43C391 101 460 222 460 334ZM371 320C371 204 316 89 248 51C230 41 208 37 192 37C143 37 102 72 119 166C124 194 129 215 135 235C202 323 298 392 335 392C353 392 371 372 371 320Z"></path></g></svg>)</span> signal(<svg height="10.1028pt" id="M29" style="vertical-align:-3.97879pt" version="1.1" viewbox="-0.0498162 -6.12401 33.7859 10.1028" width="33.7859pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M319 325C317 349 306 409 297 431C277 440 250 449 209 449C117 449 57 389 57 319C57 243 122 209 182 182C232 159 261 135 261 91C261 48 227 21 190 21C130 21 85 79 68 145L41 140C41 104 51 36 58 22C75 7 121 -12 172 -12C252 -12 337 35 337 126C337 195 286 231 210 262C166 281 126 304 126 348C126 388 152 417 191 417C240 417 274 378 294 318L319 325Z"></path></g><g transform="matrix(.013,0,0,-0.013,4.771,0)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.013,0,0,-0.013,10.855,0)"><path d="M169 380V459C122 440 66 423 24 416V392C86 384 90 382 90 317V-135C90 -201 81 -207 17 -213V-240H253V-213C176 -207 169 -201 169 -125V6C182 -1 208 -11 238 -12C368 12 487 109 487 260C487 358 421 449 310 449C298 449 279 444 261 433L169 380ZM169 346C196 367 237 389 269 389C341 389 403 329 403 221C403 109 347 37 263 37C228 37 191 53 169 76V346Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,17.733,3.838)"><path d="M135 536C164 536 186 560 186 587C186 617 164 639 136 639C109 639 85 617 85 587C85 560 109 536 135 536ZM252 0V26C188 32 181 38 181 106V451C138 433 90 420 39 412V388C99 379 102 374 102 312V106C102 38 95 32 32 26V0H252Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,20.172,3.838)"><path d="M517 51L485 54C448 58 441 63 441 115V712C404 700 337 684 285 678V653C357 648 362 645 362 580V437C339 446 309 449 295 449C159 449 38 340 38 201C38 61 143 -12 223 -12C234 -12 261 -6 301 17L362 53V-12C420 9 495 22 517 26V51ZM362 85C338 67 301 51 266 51C201 51 128 109 128 228C128 373 212 411 259 411C296 411 338 395 362 360V85Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,24.977,3.838)"><path d="M380 106C343 72 306 56 265 56C195 56 116 112 115 248C235 252 361 262 377 265C396 269 400 277 400 297C400 374 333 449 250 449H249C198 449 144 421 103 376S37 269 37 201C37 88 109 -12 232 -12C263 -12 332 6 395 84L380 106ZM225 412C281 412 315 364 314 312C314 297 308 292 290 292C232 290 176 289 120 289C135 370 180 412 225 412Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,28.917,3.838)"><path d="M478 437H300V411C357 404 363 393 347 345C325 280 293 190 258 107C229 178 188 284 163 354C147 397 149 405 203 411V437H-2V411C52 403 59 396 82 339C130 222 176 109 220 -11H248C304 138 356 261 390 336C417 395 425 404 478 411V437Z"></path></g></svg>(ib))</td></tr><tr><td>(12)</td><td> <b>end do</b></td></tr><tr><td>(13)</td><td> master thread: offload_wait all <svg height="10.1028pt" id="M30" style="vertical-align:-3.97879pt" version="1.1" viewbox="-0.0498162 -6.12401 33.7859 10.1028" width="33.7859pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M319 325C317 349 306 409 297 431C277 440 250 449 209 449C117 449 57 389 57 319C57 243 122 209 182 182C232 159 261 135 261 91C261 48 227 21 190 21C130 21 85 79 68 145L41 140C41 104 51 36 58 22C75 7 121 -12 172 -12C252 -12 337 35 337 126C337 195 286 231 210 262C166 281 126 304 126 348C126 388 152 417 191 417C240 417 274 378 294 318L319 325Z"></path></g><g transform="matrix(.013,0,0,-0.013,4.771,0)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.013,0,0,-0.013,10.855,0)"><path d="M169 380V459C122 440 66 423 24 416V392C86 384 90 382 90 317V-135C90 -201 81 -207 17 -213V-240H253V-213C176 -207 169 -201 169 -125V6C182 -1 208 -11 238 -12C368 12 487 109 487 260C487 358 421 449 310 449C298 449 279 444 261 433L169 380ZM169 346C196 367 237 389 269 389C341 389 403 329 403 221C403 109 347 37 263 37C228 37 191 53 169 76V346Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,17.733,3.838)"><path d="M135 536C164 536 186 560 186 587C186 617 164 639 136 639C109 639 85 617 85 587C85 560 109 536 135 536ZM252 0V26C188 32 181 38 181 106V451C138 433 90 420 39 412V388C99 379 102 374 102 312V106C102 38 95 32 32 26V0H252Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,20.172,3.838)"><path d="M517 51L485 54C448 58 441 63 441 115V712C404 700 337 684 285 678V653C357 648 362 645 362 580V437C339 446 309 449 295 449C159 449 38 340 38 201C38 61 143 -12 223 -12C234 -12 261 -6 301 17L362 53V-12C420 9 495 22 517 26V51ZM362 85C338 67 301 51 266 51C201 51 128 109 128 228C128 373 212 411 259 411C296 411 338 395 362 360V85Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,24.977,3.838)"><path d="M380 106C343 72 306 56 265 56C195 56 116 112 115 248C235 252 361 262 377 265C396 269 400 277 400 297C400 374 333 449 250 449H249C198 449 144 421 103 376S37 269 37 201C37 88 109 -12 232 -12C263 -12 332 6 395 84L380 106ZM225 412C281 412 315 364 314 312C314 297 308 292 290 292C232 290 176 289 120 289C135 370 180 412 225 412Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,28.917,3.838)"><path d="M478 437H300V411C357 404 363 393 347 345C325 280 293 190 258 107C229 178 188 284 163 354C147 397 149 405 203 411V437H-2V411C52 403 59 396 82 339C130 222 176 109 220 -11H248C304 138 356 261 390 336C417 395 425 404 478 411V437Z"></path></g></svg> related to each device</td></tr><tr><td>(14)</td><td> master thread on CPU: exchange_interpolation_data</td></tr><tr><td>(15)</td><td> master thread: offload_transfer target(mic:idev) in(<span class="nowrap"><svg height="12.0262pt" id="M31" style="vertical-align:-3.272861pt" version="1.1" viewbox="-0.0498162 -8.75334 40.2515 12.0262" width="40.2515pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M614 175C564 76 510 21 408 21C256 21 146 149 146 336C146 488 235 629 402 629C510 629 570 586 597 480L626 488C620 541 614 582 606 638C578 643 510 665 429 665C206 665 44 527 44 316C44 157 153 -15 402 -15C474 -15 558 5 586 11C604 45 629 119 643 165L614 175Z"></path></g><g transform="matrix(.013,0,0,-0.013,8.645,0)"><path d="M631 18C609 24 585 35 559 65C534 91 514 117 478 169C448 214 406 281 389 313C462 346 516 399 516 485C516 545 490 590 449 616C412 641 363 650 290 650H42V622C120 615 128 612 128 527V125C128 40 120 34 38 28V0H300V28C221 34 212 40 212 125V284H244C295 284 312 272 329 244C359 195 395 133 430 84C475 19 516 -3 592 -7C603 -8 615 -8 627 -8L631 18ZM212 316V563C212 591 215 602 223 607C231 613 248 617 277 617C352 617 423 577 423 469C423 415 407 375 368 345C343 324 310 316 260 316H212Z"></path></g><g transform="matrix(.013,0,0,-0.013,16.718,0)"><path d="M303 0V28C221 34 213 39 213 125V525C213 610 221 616 303 622V650H38V622C120 616 128 610 128 525V125C128 40 120 34 38 28V0H303Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,21.23,3.132)"><path d="M135 536C164 536 186 560 186 587C186 617 164 639 136 639C109 639 85 617 85 587C85 560 109 536 135 536ZM252 0V26C188 32 181 38 181 106V451C138 433 90 420 39 412V388C99 379 102 374 102 312V106C102 38 95 32 32 26V0H252Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,23.669,3.132)"><path d="M797 0V26C739 32 732 36 732 103V296C732 394 682 449 605 449C576 449 550 437 529 423C504 407 475 389 446 366C425 418 382 449 334 449C303 449 279 437 253 421C222 403 201 385 180 371V452C135 432 85 419 41 411V388C99 379 102 374 102 310V103C102 38 93 32 27 26V0H238V26C189 32 180 38 180 103V338C210 363 250 390 289 390C351 390 377 348 377 275V103C377 37 368 32 306 26V0H520V26C465 32 456 38 456 101V296C456 314 455 326 453 338C491 369 529 390 565 390C628 390 653 345 653 274V107C653 36 642 32 583 26V0H797Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,31.031,3.132)"><path d="M152 404V712C115 698 54 683 7 677V654C71 648 73 642 73 579V24C128 -2 179 -12 220 -12C353 -12 471 92 471 238C471 357 381 449 274 449C262 449 249 446 233 439L152 404ZM152 374C170 384 202 393 230 393C313 393 382 326 382 213C382 97 330 26 246 26C194 26 165 62 158 81C154 91 152 101 152 116V374Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,35.736,3.132)"><path d="M390 111C344 68 312 56 269 56C212 56 118 102 118 241C118 346 175 401 241 401C277 401 312 388 342 360C350 352 355 349 361 349C372 349 394 371 394 392C394 403 391 411 378 422C362 436 329 449 288 449H287C250 449 190 432 138 392C71 341 37 274 37 197C37 90 112 -12 238 -12C297 -12 363 32 407 90L390 111Z"></path></g></svg>)</span> signal(sgr(idev))</td></tr><tr><td>(16)</td><td><b>until</b> convergence</td></tr><tr><td>(17)</td><td>!$omp end parallel do</td></tr></table></td></tr></table>

<div>Communication optimization algorithm.</div>

Scientific Programming

alg3

Algorithm 1

Algorithm 1: Implementation and Optimization of a CFD Solver Using Overlapped Meshes on Multiple MIC Coprocessors