Reinforcement Learning Guided by Double Replay Memory

<table class="table-group" id="tab1"><tr><td><table class="table"><tr><td class="thead-hr" colspan="3"><hr/></td></tr><tr class="thead"><td class="align_left"></td><td class="align_center">Max score</td><td class="align_center">Average score</td></tr><tr><td class="thead-hr" colspan="3"><hr/></td></tr><tr><td class="align_left">DQN</td><td class="align_center">373.80</td><td class="align_center">229.30</td></tr><tr><td class="align_left"><span style="width: 37.7278ptpx;"><svg height="9.49473pt" id="M115" style="vertical-align:-0.2063999pt" version="1.1" viewbox="-0.0498162 -9.28833 37.7278 9.49473" width="37.7278pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M529 97L508 118C475 75 449 58 438 58C428 58 421 66 415 104C393 234 374 403 364 496C345 670 307 712 254 712C220 712 174 691 153 669L161 645C176 653 194 658 206 658C237 658 261 640 278 562C287 522 290 483 293 434C223 269 110 105 23 9L32 -12C59 -6 85 0 108 7C152 64 251 252 300 366C307 297 315 221 337 82C346 24 363 -12 393 -12C425 -12 475 13 529 97Z"></path></g><g transform="matrix(.013,0,0,-0.013,10.806,0)"><path d="M535 323V373H52V323H535ZM535 138V188H52V138H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,22.068,0)"><path d="M241 635C89 635 35 457 35 312C35 153 89 -12 240 -12C390 -12 443 166 443 312C443 466 390 635 241 635ZM238 602C329 602 354 454 354 312C354 172 330 22 240 22C152 22 124 173 124 313S148 602 238 602Z"></path></g><g transform="matrix(.013,0,0,-0.013,28.308,0)"><path d="M113 -12C146 -12 170 11 170 46C170 78 146 103 114 103S58 78 58 46C58 11 82 -12 113 -12Z"></path></g><g transform="matrix(.013,0,0,-0.013,31.272,0)"><path d="M384 0V27C293 34 287 42 287 114V635C232 613 172 594 109 583V559L157 557C201 555 205 550 205 499V114C205 42 199 34 109 27V0H384Z"></path></g></svg></span></td><td class="align_center">478.65</td><td class="align_center">210.58</td></tr><tr><td class="align_left"><span style="width: 37.7278ptpx;"><svg height="9.49473pt" id="M116" style="vertical-align:-0.2063999pt" version="1.1" viewbox="-0.0498162 -9.28833 37.7278 9.49473" width="37.7278pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M529 97L508 118C475 75 449 58 438 58C428 58 421 66 415 104C393 234 374 403 364 496C345 670 307 712 254 712C220 712 174 691 153 669L161 645C176 653 194 658 206 658C237 658 261 640 278 562C287 522 290 483 293 434C223 269 110 105 23 9L32 -12C59 -6 85 0 108 7C152 64 251 252 300 366C307 297 315 221 337 82C346 24 363 -12 393 -12C425 -12 475 13 529 97Z"></path></g><g transform="matrix(.013,0,0,-0.013,10.806,0)"><path d="M535 323V373H52V323H535ZM535 138V188H52V138H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,22.068,0)"><path d="M241 635C89 635 35 457 35 312C35 153 89 -12 240 -12C390 -12 443 166 443 312C443 466 390 635 241 635ZM238 602C329 602 354 454 354 312C354 172 330 22 240 22C152 22 124 173 124 313S148 602 238 602Z"></path></g><g transform="matrix(.013,0,0,-0.013,28.308,0)"><path d="M113 -12C146 -12 170 11 170 46C170 78 146 103 114 103S58 78 58 46C58 11 82 -12 113 -12Z"></path></g><g transform="matrix(.013,0,0,-0.013,31.272,0)"><path d="M153 550H386L412 615L406 623H120L82 318C104 327 142 338 184 338C294 338 347 275 347 187C347 112 305 39 221 39C160 39 119 71 97 89C88 97 80 96 71 90C59 80 50 67 49 57C48 45 52 36 66 23C80 9 123 -12 169 -12C221 -11 288 15 342 59C403 109 431 165 431 225C431 308 366 395 238 395C212 395 165 379 127 364L153 550Z"></path></g></svg></span></td><td class="align_center">500</td><td class="align_center">259.23</td></tr><tr><td class="align_left"><span style="width: 37.7278ptpx;"><svg height="9.49473pt" id="M117" style="vertical-align:-0.2063999pt" version="1.1" viewbox="-0.0498162 -9.28833 37.7278 9.49473" width="37.7278pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M529 97L508 118C475 75 449 58 438 58C428 58 421 66 415 104C393 234 374 403 364 496C345 670 307 712 254 712C220 712 174 691 153 669L161 645C176 653 194 658 206 658C237 658 261 640 278 562C287 522 290 483 293 434C223 269 110 105 23 9L32 -12C59 -6 85 0 108 7C152 64 251 252 300 366C307 297 315 221 337 82C346 24 363 -12 393 -12C425 -12 475 13 529 97Z"></path></g><g transform="matrix(.013,0,0,-0.013,10.806,0)"><path d="M535 323V373H52V323H535ZM535 138V188H52V138H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,22.068,0)"><path d="M241 635C89 635 35 457 35 312C35 153 89 -12 240 -12C390 -12 443 166 443 312C443 466 390 635 241 635ZM238 602C329 602 354 454 354 312C354 172 330 22 240 22C152 22 124 173 124 313S148 602 238 602Z"></path></g><g transform="matrix(.013,0,0,-0.013,28.308,0)"><path d="M113 -12C146 -12 170 11 170 46C170 78 146 103 114 103S58 78 58 46C58 11 82 -12 113 -12Z"></path></g><g transform="matrix(.013,0,0,-0.013,31.272,0)"><path d="M244 635C114 635 38 519 38 422C38 317 111 240 217 240C236 240 255 244 277 256L345 292C311 140 203 39 59 15L64 -15C89 -15 150 -5 204 17C339 72 440 202 440 386C440 521 368 635 244 635ZM228 602C326 602 352 479 352 390C352 370 351 347 348 324C327 308 293 296 258 296C174 296 124 369 124 458C124 517 152 602 228 602Z"></path></g></svg></span></td><td class="align_center"><i>500</i></td><td class="align_center"><i>285.49</i></td></tr><tr><td class="align_justify">PER</td><td class="align_center">500</td><td class="align_center">237.63</td></tr><tr class="table-tr"><td colspan="3"><hr class="tbody-hr"/></td></tr></table></td></tr></table>

<div>The scores from CartPole simulations.</div>

Journal of Sensors

tab1

Table 1

Table 1: Reinforcement Learning Guided by Double Replay Memory