Ensemble Convolution Neural Network for Robust Video Emotion Recognition Using Deep Semantics

<table class="algorithm-group"><tr><td><table class="algorithm" id="alg3"><tr><td> </td><td>Input: Keyframes</td></tr><tr><td> </td><td>Output: Representation of occluded face</td></tr><tr><td> </td><td>Input the extracted keyframe <svg height="11.927pt" id="M84" style="vertical-align:-3.291101pt" version="1.1" viewbox="-0.0498162 -8.6359 20.5362 11.927" width="20.5362pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M743 650H503L496 622L527 618C563 613 564 603 532 573C449 495 371 431 323 392C301 374 272 355 246 346L280 522C297 609 300 614 379 622L385 650H135L129 622C209 614 215 609 198 522L124 133C106 39 99 35 23 28L17 0H271L277 28C193 35 192 39 208 133L239 316C264 328 280 325 303 288C368 183 435 90 502 0H652L659 28C602 34 584 43 543 94C495 154 403 283 347 369L574 554C634 603 659 612 735 624L743 650Z"></path></g><g transform="matrix(.013,0,0,-0.013,9.426,0)"><path d="M584 650H137L131 622C214 614 217 612 200 521L125 127C109 41 101 35 23 28L17 0H288L294 28C201 35 193 42 209 128L242 309H348C440 309 442 300 443 226H471L510 422H482C452 354 449 348 357 348H251L295 575C302 609 304 615 338 615H426C502 615 517 604 526 581C534 560 536 524 537 492L565 494C574 554 583 631 584 650Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,17.381,3.132)"><path d="M250 606C250 634 233 656 203 656C168 656 146 618 146 593C146 564 169 545 192 545C227 545 250 573 250 606ZM227 95L212 119C187 98 152 71 135 71C129 71 128 78 134 102L207 373C219 418 217 451 194 451C165 451 92 411 30 351L44 326C77 353 106 371 114 371C124 371 121 357 117 341L55 97C32 5 46 -12 70 -12C108 -12 191 51 227 95Z"></path></g></svg> as a face image</td></tr><tr><td> </td><td>Generate a feature map (FM) from each keyframe</td></tr><tr><td> </td><td> Return 24 local patches (<span class="nowrap"><svg height="11.8174pt" id="M85" style="vertical-align:-3.1815pt" version="1.1" viewbox="-0.0498162 -8.6359 11.727 11.8174" width="11.727pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M600 480C600 590 528 650 384 650H143L137 622C222 614 225 607 210 531L130 127C113 41 106 36 23 28L17 0H294L300 28C204 36 195 42 212 127L243 284L314 263C327 263 339 263 352 264C465 271 600 337 600 480ZM508 481C508 351 402 304 329 304C289 304 265 311 250 317L295 559C302 594 310 606 323 611C335 616 350 619 367 619C455 619 508 573 508 481Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,6.656,3.132)"><path d="M389 0V32C297 38 291 46 291 118V635C234 613 175 595 109 583V556L161 554C203 552 207 547 207 497V118C207 46 201 38 110 32V0H389Z"></path></g></svg>,</span> <svg height="11.8174pt" id="M86" style="vertical-align:-3.1815pt" version="1.1" viewbox="-0.0498162 -8.6359 11.727 11.8174" width="11.727pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M600 480C600 590 528 650 384 650H143L137 622C222 614 225 607 210 531L130 127C113 41 106 36 23 28L17 0H294L300 28C204 36 195 42 212 127L243 284L314 263C327 263 339 263 352 264C465 271 600 337 600 480ZM508 481C508 351 402 304 329 304C289 304 265 311 250 317L295 559C302 594 310 606 323 611C335 616 350 619 367 619C455 619 508 573 508 481Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,6.656,3.132)"><path d="M414 144C384 79 371 75 317 75H135L276 221C367 316 408 376 408 465C408 570 327 635 237 635C179 635 131 609 100 575L42 494L67 471C94 510 138 565 205 565C277 565 321 517 321 435C321 348 258 270 195 195C146 137 88 81 33 26V0H411C423 44 433 88 446 135L414 144Z"></path></g></svg>… <span class="nowrap"><svg height="11.8174pt" id="M87" style="vertical-align:-3.1815pt" version="1.1" viewbox="-0.0498162 -8.6359 16.1753 11.8174" width="16.1753pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M600 480C600 590 528 650 384 650H143L137 622C222 614 225 607 210 531L130 127C113 41 106 36 23 28L17 0H294L300 28C204 36 195 42 212 127L243 284L314 263C327 263 339 263 352 264C465 271 600 337 600 480ZM508 481C508 351 402 304 329 304C289 304 265 311 250 317L295 559C302 594 310 606 323 611C335 616 350 619 367 619C455 619 508 573 508 481Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,6.656,3.132)"><path d="M414 144C384 79 371 75 317 75H135L276 221C367 316 408 376 408 465C408 570 327 635 237 635C179 635 131 609 100 575L42 494L67 471C94 510 138 565 205 565C277 565 321 517 321 435C321 348 258 270 195 195C146 137 88 81 33 26V0H411C423 44 433 88 446 135L414 144Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,11.088,3.132)"><path d="M462 177V227H365V632H320C217 496 116 350 21 208V177H284V109C284 43 280 38 190 31V0H451V31C369 38 365 43 365 108V177H462ZM284 227H88C155 336 218 430 282 519H284V227Z"></path></g></svg>)</span></td></tr><tr><td> </td><td> For each local patch</td></tr><tr><td> </td><td>  Decomposes the feature map into 24 subfeature-maps (<svg height="11.9479pt" id="M88" style="vertical-align:-3.181499pt" version="1.1" viewbox="-0.0498162 -8.7664 29.6038 11.9479" width="29.6038pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M409 504C401 567 396 607 392 642C354 654 312 665 266 665C137 665 60 583 60 487C60 374 161 325 225 290C300 250 355 215 355 141C355 68 311 21 235 21C131 21 86 122 71 183L41 176C48 128 61 42 68 21C78 16 93 8 118 0C142 -7 175 -15 216 -15C349 -15 438 69 438 174C438 287 344 333 265 374C186 414 138 449 138 522C138 576 172 631 249 631C336 631 363 562 380 499L409 504Z"></path></g><g transform="matrix(.013,0,0,-0.013,6.005,0)"><path d="M493 503C489 551 484 614 483 650H43V622C120 616 128 611 128 525V126C128 40 120 34 40 28V0H312V28C221 34 213 40 213 126V307H316C407 307 412 296 424 227H453V420H424C412 355 407 346 316 346H213V584C213 613 216 616 246 616H322C398 616 419 607 436 579C449 559 455 539 464 499L493 503Z"></path></g><g transform="matrix(.013,0,0,-0.013,12.882,0)"><path d="M861 0V28C774 35 771 41 768 147L759 509C756 612 762 614 851 622V650H681L449 149L221 650H57V622C148 613 153 609 144 479L130 271C123 166 117 123 111 88C104 46 85 34 26 28V0H259V28C192 35 169 42 167 90C166 130 166 173 170 256L185 541H187L411 7H431L675 555H679L683 147C683 41 680 35 598 28V0H861Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,24.558,3.132)"><path d="M389 0V32C297 38 291 46 291 118V635C234 613 175 595 109 583V556L161 554C203 552 207 547 207 497V118C207 46 201 38 110 32V0H389Z"></path></g></svg>… <span class="nowrap"><svg height="11.9479pt" id="M89" style="vertical-align:-3.181499pt" version="1.1" viewbox="-0.0498162 -8.7664 34.0521 11.9479" width="34.0521pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M409 504C401 567 396 607 392 642C354 654 312 665 266 665C137 665 60 583 60 487C60 374 161 325 225 290C300 250 355 215 355 141C355 68 311 21 235 21C131 21 86 122 71 183L41 176C48 128 61 42 68 21C78 16 93 8 118 0C142 -7 175 -15 216 -15C349 -15 438 69 438 174C438 287 344 333 265 374C186 414 138 449 138 522C138 576 172 631 249 631C336 631 363 562 380 499L409 504Z"></path></g><g transform="matrix(.013,0,0,-0.013,6.005,0)"><path d="M493 503C489 551 484 614 483 650H43V622C120 616 128 611 128 525V126C128 40 120 34 40 28V0H312V28C221 34 213 40 213 126V307H316C407 307 412 296 424 227H453V420H424C412 355 407 346 316 346H213V584C213 613 216 616 246 616H322C398 616 419 607 436 579C449 559 455 539 464 499L493 503Z"></path></g><g transform="matrix(.013,0,0,-0.013,12.882,0)"><path d="M861 0V28C774 35 771 41 768 147L759 509C756 612 762 614 851 622V650H681L449 149L221 650H57V622C148 613 153 609 144 479L130 271C123 166 117 123 111 88C104 46 85 34 26 28V0H259V28C192 35 169 42 167 90C166 130 166 173 170 256L185 541H187L411 7H431L675 555H679L683 147C683 41 680 35 598 28V0H861Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,24.558,3.132)"><path d="M414 144C384 79 371 75 317 75H135L276 221C367 316 408 376 408 465C408 570 327 635 237 635C179 635 131 609 100 575L42 494L67 471C94 510 138 565 205 565C277 565 321 517 321 435C321 348 258 270 195 195C146 137 88 81 33 26V0H411C423 44 433 88 446 135L414 144Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,28.989,3.132)"><path d="M462 177V227H365V632H320C217 496 116 350 21 208V177H284V109C284 43 280 38 190 31V0H451V31C369 38 365 43 365 108V177H462ZM284 227H88C155 336 218 430 282 519H284V227Z"></path></g></svg>)</span></td></tr><tr><td> </td><td>  Encode a weighted vector (wv) of local feature (lf) by a PG-Unit</td></tr><tr><td> </td><td>  PG-Unit computes the weight by an attention net based on its obstructed-ness</td></tr><tr><td> </td><td>  Concatenate the weighted local features</td></tr><tr><td> </td><td>  Return the representation of the occluded face.</td></tr><tr><td> </td><td> End For</td></tr></table></td></tr></table>

<div> Occlusion detection from patched image.</div>

Scientific Programming

alg3

Algorithm 3

Algorithm 3: Ensemble Convolution Neural Network for Robust Video Emotion Recognition Using Deep Semantics