Visual Experience-Based Question Answering with Complex Multimodal Environments

<table class="table-group" id="tab4"><tr><td><table class="table"><tr><td class="thead-hr" colspan="5"><hr/></td></tr><tr class="thead"><td class="align_left" rowspan="2">Models</td><td class="align_center" rowspan="2">Object mAP (%)</td><td class="align_center" colspan="3">SGGen (%)</td></tr><tr class="thead"><td class="align_center">Attribute</td><td class="align_center">Relation</td><td class="align_center">Total</td></tr><tr><td class="thead-hr" colspan="5"><hr/></td></tr><tr><td class="align_left"><span style="width: 75.5995ptpx;"><svg height="9.31205pt" id="M119" style="vertical-align:-0.3499298pt" version="1.1" viewbox="-0.0498162 -8.96212 75.5995 9.31205" width="75.5995pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M699 369C699 549 575 666 407 666C186 666 23 488 23 278C23 101 145 -16 312 -16C535 -16 699 153 699 369ZM600 373C600 210 500 19 321 19C186 19 120 129 120 272C120 450 232 631 399 631C541 631 600 522 600 373Z"></path></g><g transform="matrix(.013,0,0,-0.013,12.293,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,22.829,0)"><path d="M686 28C612 35 607 44 591 112C563 234 541 360 519 489L489 666L457 658L147 121C100 40 89 36 24 28L17 0H240L250 28C168 34 159 41 190 101L262 237H482C495 180 503 137 510 91C517 47 514 35 441 28L433 0H677L686 28ZM475 280H285L429 541H431L475 280Z"></path></g><g transform="matrix(.013,0,0,-0.013,34.87,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,45.406,0)"><path d="M620 675H597C578 656 570 650 541 650H144C112 650 104 653 94 675H72C59 618 42 552 23 493L53 491C71 534 88 564 105 585C124 608 144 615 238 615H290L197 121C182 40 174 34 88 28L82 0H361L367 28C275 34 266 38 281 121L374 615H441C522 615 543 608 553 583C562 560 566 531 565 493L597 494C603 551 612 629 620 675Z"></path></g><g transform="matrix(.013,0,0,-0.013,56.597,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,67.133,0)"><path d="M610 18C585 26 567 34 540 68C517 97 499 128 476 171C452 215 425 276 413 304C496 332 570 394 570 494C570 555 545 595 509 619S419 650 364 650H139L133 622C216 615 219 612 203 527L129 132C112 40 105 36 23 28L17 0H279L285 28C199 34 194 40 211 132L239 284H284C320 284 334 275 351 236C374 182 394 140 420 93C459 23 495 -1 592 -8H600L610 18ZM480 485C480 424 449 372 403 342C374 323 338 316 293 316H245L291 562C296 589 301 601 311 608S337 618 358 618C432 618 480 575 480 485Z"></path></g></svg></span></td><td class="align_center">68.79</td><td class="align_center">56.87</td><td class="align_center">46.79</td><td class="align_center">53.73</td></tr><tr><td class="align_left"><span style="width: 82.0491ptpx;"><svg height="14.4911pt" id="M120" style="vertical-align:-5.52898pt" version="1.1" viewbox="-0.0498162 -8.96212 82.0491 14.4911" width="82.0491pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M699 369C699 549 575 666 407 666C186 666 23 488 23 278C23 101 145 -16 312 -16C535 -16 699 153 699 369ZM600 373C600 210 500 19 321 19C186 19 120 129 120 272C120 450 232 631 399 631C541 631 600 522 600 373Z"></path></g><g transform="matrix(.013,0,0,-0.013,12.293,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,22.829,0)"><path d="M686 28C612 35 607 44 591 112C563 234 541 360 519 489L489 666L457 658L147 121C100 40 89 36 24 28L17 0H240L250 28C168 34 159 41 190 101L262 237H482C495 180 503 137 510 91C517 47 514 35 441 28L433 0H677L686 28ZM475 280H285L429 541H431L475 280Z"></path></g><g transform="matrix(.013,0,0,-0.013,34.87,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,45.406,0)"><path d="M620 675H597C578 656 570 650 541 650H144C112 650 104 653 94 675H72C59 618 42 552 23 493L53 491C71 534 88 564 105 585C124 608 144 615 238 615H290L197 121C182 40 174 34 88 28L82 0H361L367 28C275 34 266 38 281 121L374 615H441C522 615 543 608 553 583C562 560 566 531 565 493L597 494C603 551 612 629 620 675Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,52.582,3.132)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,56.841,3.132)"><path d="M298 36L289 62C276 55 253 45 228 45C202 45 169 60 169 141V397H276C289 405 292 426 282 437H169V574L155 576L90 509V437H45L17 408L21 397H90V107C90 28 125 -12 188 -12C198 -12 213 -8 230 1L298 36Z"></path></g><g transform="matrix(.013,0,0,-0.013,63.046,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,73.583,0)"><path d="M610 18C585 26 567 34 540 68C517 97 499 128 476 171C452 215 425 276 413 304C496 332 570 394 570 494C570 555 545 595 509 619S419 650 364 650H139L133 622C216 615 219 612 203 527L129 132C112 40 105 36 23 28L17 0H279L285 28C199 34 194 40 211 132L239 284H284C320 284 334 275 351 236C374 182 394 140 420 93C459 23 495 -1 592 -8H600L610 18ZM480 485C480 424 449 372 403 342C374 323 338 316 293 316H245L291 562C296 589 301 601 311 608S337 618 358 618C432 618 480 575 480 485Z"></path></g></svg></span></td><td class="align_center">85.12</td><td class="align_center">69.69</td><td class="align_center">60.09</td><td class="align_center">67.35</td></tr><tr><td class="align_left"><span style="width: 90.717ptpx;"><svg height="14.4911pt" id="M121" style="vertical-align:-5.52898pt" version="1.1" viewbox="-0.0498162 -8.96212 90.717 14.4911" width="90.717pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M699 369C699 549 575 666 407 666C186 666 23 488 23 278C23 101 145 -16 312 -16C535 -16 699 153 699 369ZM600 373C600 210 500 19 321 19C186 19 120 129 120 272C120 450 232 631 399 631C541 631 600 522 600 373Z"></path></g><g transform="matrix(.013,0,0,-0.013,12.293,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,22.829,0)"><path d="M686 28C612 35 607 44 591 112C563 234 541 360 519 489L489 666L457 658L147 121C100 40 89 36 24 28L17 0H240L250 28C168 34 159 41 190 101L262 237H482C495 180 503 137 510 91C517 47 514 35 441 28L433 0H677L686 28ZM475 280H285L429 541H431L475 280Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,33.073,3.132)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,37.332,3.132)"><path d="M298 36L289 62C276 55 253 45 228 45C202 45 169 60 169 141V397H276C289 405 292 426 282 437H169V574L155 576L90 509V437H45L17 408L21 397H90V107C90 28 125 -12 188 -12C198 -12 213 -8 230 1L298 36Z"></path></g><g transform="matrix(.013,0,0,-0.013,43.538,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,54.074,0)"><path d="M620 675H597C578 656 570 650 541 650H144C112 650 104 653 94 675H72C59 618 42 552 23 493L53 491C71 534 88 564 105 585C124 608 144 615 238 615H290L197 121C182 40 174 34 88 28L82 0H361L367 28C275 34 266 38 281 121L374 615H441C522 615 543 608 553 583C562 560 566 531 565 493L597 494C603 551 612 629 620 675Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,61.25,3.132)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,65.509,3.132)"><path d="M298 36L289 62C276 55 253 45 228 45C202 45 169 60 169 141V397H276C289 405 292 426 282 437H169V574L155 576L90 509V437H45L17 408L21 397H90V107C90 28 125 -12 188 -12C198 -12 213 -8 230 1L298 36Z"></path></g><g transform="matrix(.013,0,0,-0.013,71.714,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,82.251,0)"><path d="M610 18C585 26 567 34 540 68C517 97 499 128 476 171C452 215 425 276 413 304C496 332 570 394 570 494C570 555 545 595 509 619S419 650 364 650H139L133 622C216 615 219 612 203 527L129 132C112 40 105 36 23 28L17 0H279L285 28C199 34 194 40 211 132L239 284H284C320 284 334 275 351 236C374 182 394 140 420 93C459 23 495 -1 592 -8H600L610 18ZM480 485C480 424 449 372 403 342C374 323 338 316 293 316H245L291 562C296 589 301 601 311 608S337 618 358 618C432 618 480 575 480 485Z"></path></g></svg></span></td><td class="align_center">85.12</td><td class="align_center">84.61</td><td class="align_center">60.09</td><td class="align_center">79.62</td></tr><tr><td class="align_left"><span style="width: 97.9496ptpx;"><svg height="14.4911pt" id="M122" style="vertical-align:-5.52898pt" version="1.1" viewbox="-0.0498162 -8.96212 97.9496 14.4911" width="97.9496pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M699 369C699 549 575 666 407 666C186 666 23 488 23 278C23 101 145 -16 312 -16C535 -16 699 153 699 369ZM600 373C600 210 500 19 321 19C186 19 120 129 120 272C120 450 232 631 399 631C541 631 600 522 600 373Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,9.061,3.132)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,13.32,3.132)"><path d="M298 36L289 62C276 55 253 45 228 45C202 45 169 60 169 141V397H276C289 405 292 426 282 437H169V574L155 576L90 509V437H45L17 408L21 397H90V107C90 28 125 -12 188 -12C198 -12 213 -8 230 1L298 36Z"></path></g><g transform="matrix(.013,0,0,-0.013,19.525,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,30.062,0)"><path d="M686 28C612 35 607 44 591 112C563 234 541 360 519 489L489 666L457 658L147 121C100 40 89 36 24 28L17 0H240L250 28C168 34 159 41 190 101L262 237H482C495 180 503 137 510 91C517 47 514 35 441 28L433 0H677L686 28ZM475 280H285L429 541H431L475 280Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,40.306,3.132)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,44.565,3.132)"><path d="M298 36L289 62C276 55 253 45 228 45C202 45 169 60 169 141V397H276C289 405 292 426 282 437H169V574L155 576L90 509V437H45L17 408L21 397H90V107C90 28 125 -12 188 -12C198 -12 213 -8 230 1L298 36Z"></path></g><g transform="matrix(.013,0,0,-0.013,50.77,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,61.307,0)"><path d="M620 675H597C578 656 570 650 541 650H144C112 650 104 653 94 675H72C59 618 42 552 23 493L53 491C71 534 88 564 105 585C124 608 144 615 238 615H290L197 121C182 40 174 34 88 28L82 0H361L367 28C275 34 266 38 281 121L374 615H441C522 615 543 608 553 583C562 560 566 531 565 493L597 494C603 551 612 629 620 675Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,68.483,3.132)"><path d="M463 437C426 431 375 425 327 422C297 440 264 449 231 449H230C153 449 51 396 51 283C51 215 94 168 139 149C123 129 91 103 51 88C50 78 53 60 62 46C75 25 100 2 136 -9C112 -28 73 -59 53 -79C38 -94 29 -113 29 -135C30 -192 91 -257 203 -257C336 -257 452 -160 452 -59C452 39 371 59 309 59C275 59 240 58 203 58C158 58 140 77 140 96C140 110 157 129 170 138C186 135 205 133 221 133C306 133 396 185 396 293C396 328 384 360 366 381L423 378C439 387 459 413 468 429L463 437ZM219 418C277 418 314 362 314 284C314 205 275 166 231 165C176 165 137 221 137 299C137 376 177 418 219 418ZM241 -11C285 -11 314 -14 339 -24C367 -36 384 -61 384 -95C384 -157 335 -206 240 -206C166 -206 108 -165 108 -110C108 -82 128 -54 154 -32C172 -17 195 -11 241 -11Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,72.741,3.132)"><path d="M298 36L289 62C276 55 253 45 228 45C202 45 169 60 169 141V397H276C289 405 292 426 282 437H169V574L155 576L90 509V437H45L17 408L21 397H90V107C90 28 125 -12 188 -12C198 -12 213 -8 230 1L298 36Z"></path></g><g transform="matrix(.013,0,0,-0.013,78.947,0)"><path d="M535 230V280H323V490H265V280H52V230H265V-3H323V230H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,89.483,0)"><path d="M610 18C585 26 567 34 540 68C517 97 499 128 476 171C452 215 425 276 413 304C496 332 570 394 570 494C570 555 545 595 509 619S419 650 364 650H139L133 622C216 615 219 612 203 527L129 132C112 40 105 36 23 28L17 0H279L285 28C199 34 194 40 211 132L239 284H284C320 284 334 275 351 236C374 182 394 140 420 93C459 23 495 -1 592 -8H600L610 18ZM480 485C480 424 449 372 403 342C374 323 338 316 293 316H245L291 562C296 589 301 601 311 608S337 618 358 618C432 618 480 575 480 485Z"></path></g></svg></span></td><td class="align_center">100.0</td><td class="align_center">100.0</td><td class="align_center">95.89</td><td class="align_center">98.80</td></tr><tr class="table-tr"><td colspan="5"><hr class="tbody-hr"/></td></tr></table></td></tr></table>

<div>Performance analysis of scene graph generation depending on different state recognition models.</div>

Mathematical Problems in Engineering

tab4

Table 4

Table 4: Visual Experience-Based Question Answering with Complex Multimodal Environments