diff options
author | Tom Smeding <tom.smeding@gmail.com> | 2019-02-15 11:32:44 +0100 |
---|---|---|
committer | Tom Smeding <tom.smeding@gmail.com> | 2019-02-15 11:32:44 +0100 |
commit | 3711c92f43eedbd698a9c477e3248fb87892fbd9 (patch) | |
tree | 7132c9f0396304acfbab612469d6386a9a2ab2a2 /ai_mc.cpp | |
parent | 99f3ad9bc30815213af5c666150daa6080c509db (diff) |
Multiple AI's (+RAND)
Diffstat (limited to 'ai_mc.cpp')
-rw-r--r-- | ai_mc.cpp | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/ai_mc.cpp b/ai_mc.cpp new file mode 100644 index 0000000..6175d96 --- /dev/null +++ b/ai_mc.cpp @@ -0,0 +1,79 @@ +#include <iostream> +#include <cassert> +#include <climits> +#include "ai_mc.h" +#include "util.h" + +using namespace std; + +#define NPLAYOUTS 1000 +#define SCORE_WIN 1 +#define SCORE_LOSE (-1) +#define SCORE_TIE 0 + + +// Takes copy of board, since it probably isn't worth it to undo the whole rest +// of the game. +// TODO: Multiply the playout score with its cumulative probability (which is +// pretty small!) to get a probabilistically correct estimate of the expected +// score. +static int playout(Board bd, uint8_t myclr) { + // cerr << " PLAYOUT" << endl; + while (bd.bag.totalLeft() > 0) { + uint8_t clr = bd.bag.drawRandom(); + + int moves[BSZ * BSZ], nmoves = 0; + bd.forEachMove([&moves, &nmoves](int idx) { moves[nmoves++] = idx; }); + + assert(nmoves > 0); + int idx = moves[random() % nmoves]; + + // cerr << " idx = " << Idx(idx) << " clr=" << (unsigned)clr << endl; + + uint8_t win = bd.putCW(idx, clr); + if (win != 0) { + return win == myclr ? SCORE_WIN : SCORE_LOSE; + } + } + + return SCORE_TIE; +} + + +int MC::calcMove(Board &bd, uint8_t myclr) { + assert(bd.bag.totalLeft() > 0); + + float maxscore = INT_MIN; + int maxat = -1; + + bd.forEachMove([&bd, myclr, &maxscore, &maxat](int idx) { + // cerr << "MC::calcMove: trying idx=" << Idx(idx) << endl; + float score = 0; + + for (int i = 0; i < NPLAYOUTS; i++) { + // cerr << "playout " << i << endl; + + uint8_t clr = bd.bag.peekRandom(); + float probability = (float)bd.bag.numLeft(clr) / bd.bag.totalLeft(); + bd.bag.drawColour(clr); + + // cerr << " random clr=" << (unsigned)clr << endl; + uint8_t win = bd.putCW(idx, clr); + if (win != 0) { + score += probability * (win == myclr ? SCORE_WIN : SCORE_LOSE); + } else { + score += probability * playout(bd, myclr); + } + + bd.bag.replace(clr); + bd.undo(idx); + } + + if (score > maxscore) { + maxscore = score; + maxat = idx; + } + }); + + return maxat; +} |