ai_mc.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

#include <iostream>
#include <cassert>
#include <climits>
#include "ai_mc.h"
#include "util.h"

using namespace std;

#define NPLAYOUTS 3000
#define SCORE_WIN 1
#define SCORE_LOSE (-1)
#define SCORE_TIE 0


// Takes copy of board, since it probably isn't worth it to undo the whole rest
// of the game.
// TODO: Multiply the playout score with its cumulative probability (which is
// pretty small!) to get a probabilistically correct estimate of the expected
// score.
static int playout(Board bd, uint8_t myclr) {
	// cerr << "  PLAYOUT" << endl;
	while (bd.bag.totalLeft() > 0) {
		uint8_t clr = bd.bag.drawRandom();

		const vector<int> &moves = bd.getEdgeCells();

		assert(moves.size() > 0);
		int idx = moves[random() % moves.size()];

		// cerr << "    idx = " << Idx(idx) << " clr=" << (unsigned)clr << endl;

		uint8_t win = bd.putCW(idx, clr);
		if (win != 0) {
			return win == myclr ? SCORE_WIN : SCORE_LOSE;
		}
	}

	return SCORE_TIE;
}


int MC::calcMove(Board &bd, uint8_t myclr) {
	assert(bd.bag.totalLeft() > 0);

	float maxscore = INT_MIN;
	int maxat = -1;

	vector<int> edgeCells = bd.getEdgeCells();
	for (int idx : edgeCells) {
		// cerr << "MC::calcMove: trying idx=" << Idx(idx) << endl;
		float score = 0;

		for (int i = 0; i < NPLAYOUTS; i++) {
			// cerr << "playout " << i << endl;

			uint8_t clr = bd.bag.peekRandom();
			float probability = (float)bd.bag.numLeft(clr) / bd.bag.totalLeft();
			bd.bag.drawColour(clr);

			// cerr << "  random clr=" << (unsigned)clr << endl;
			uint8_t win = bd.putCW(idx, clr);
			if (win != 0) {
				score += probability * (win == myclr ? SCORE_WIN : SCORE_LOSE);
			} else {
				score += probability * playout(bd, myclr);
			}

			bd.bag.replace(clr);
			bd.undo(idx);
		}

		if (score > maxscore) {
			maxscore = score;
			maxat = idx;
		}
	}

	return maxat;
}