Add MCTS AI

author: Tom Smeding <tom.smeding@gmail.com> 2018-07-21 22:01:57 +0200
committer: Tom Smeding <tom.smeding@gmail.com> 2018-07-21 22:01:57 +0200
commit: 53291958e0cda68ed762c0dfb36c0602f876c06d (patch)
tree: 1a1ed725c0c4a993c41bc25ace5484a032764cb9
parent: d7828673e787afc76913ede36fed78ba6a179470 (diff)
4 files changed, 227 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 7536e18..a6f0b55 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CXX = g++
-CXXFLAGS = -Wall -Wextra -O3 -std=c++17 -fwrapv -flto
+CXXFLAGS = -Wall -Wextra -O3 -g -std=c++17 -fwrapv -flto
 
 TARGET = main
 
diff --git a/ai_mcts.cpp b/ai_mcts.cpp
new file mode 100644
index 0000000..b8a50ae
--- /dev/null
+++ b/ai_mcts.cpp
@@ -0,0 +1,215 @@
+#include <fstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <climits>
+#include <cmath>
+#include <cassert>
+#include "ai_mcts.h"
+
+
+static int mcts_niterations = 5000;
+// static int mcts_niterations = 5000;
+static int mcts_newnode_playouts = 3;
+
+
+static int playout(Board &bd, int player) {
+	Move poss[N * N * N];
+
+	while (true) {
+		int nposs = 0;
+		int winidx = -1;
+		bd.forEachMove(player, [&bd, &poss, &nposs, &winidx, player](Move mv) {
+			Board bd2 = bd;
+			int win = bd2.applyCW(mv);
+			if (win * player >= 0) {
+				poss[nposs++] = mv;
+				if (win != 0) {
+					winidx = nposs - 1;
+					return true;
+				}
+			}
+			return false;
+		});
+
+		if (nposs == 0) return -player;
+
+		int index = winidx == -1 ? rand() % nposs : winidx;
+
+		int win = bd.applyCW(poss[index]);
+		if (win != 0) return win;
+
+		player = -player;
+	}
+}
+
+struct Node {
+	int nwin = 0, ntotal = 0;  // nwin: number of wins, as regarded from the player on turn after the parent node
+	bool terminal = false;  // someone has won
+	Move inedge;
+	Node *parent;
+
+	vector<Node> children;
+	bool allExpanded = false;
+};
+
+static float scoreFormula(const Node &node) {
+	return (float)node.nwin / node.ntotal + sqrtf(2.0f * logf(node.parent->ntotal) / node.ntotal);
+}
+
+static Node& nodeSelect(Node &from, Board *bd, int *onturn) {
+	if (!from.allExpanded) return from;
+	assert(from.children.size() != 0);
+
+	float maxscore = -1;
+	Node *choice = nullptr;
+	for (Node &ch : from.children) {
+		float score = scoreFormula(ch);
+		if (score > maxscore) {
+			maxscore = score;
+			choice = &ch;
+		}
+	}
+
+	bd->apply(choice->inedge);
+	if (choice->terminal) {
+		return *choice;
+	}
+	*onturn = -*onturn;
+
+	return nodeSelect(*choice, bd, onturn);
+}
+
+static Node& expand(Node &from, const Board &bd, int onturn) {
+	if (from.terminal) {
+		from.ntotal += mcts_newnode_playouts;
+		if (from.nwin > 0) {
+			from.nwin += from.ntotal;
+		}
+		return from;
+	}
+
+	assert(!from.allExpanded);
+
+	if (from.children.size() == 0) {
+		// cerr << "  expand: initialising children" << endl;
+		bd.forEachMove(onturn, [&from, &bd](Move mv) {
+			from.children.emplace_back();
+			Node &ch = from.children.back();
+			ch.inedge = mv;
+			ch.parent = &from;
+			return false;
+		});
+	}
+
+	vector<int> poss;
+	poss.reserve(from.children.size());
+
+	for (int i = 0; i < (int)from.children.size(); i++) {
+		if (from.children[i].ntotal == 0) {
+			poss.push_back(i);
+		}
+	}
+
+	assert(poss.size() != 0);
+	int index = poss[rand() % poss.size()];
+
+	// cerr << "  expand: poss.size()=" << poss.size() << " index=" << index << " f.c[i].nt=" << from.children[index].ntotal << endl;
+
+	if (poss.size() == 1) from.allExpanded = true;
+
+	Node &node = from.children[index];
+
+	Board bd2 = bd;
+	int win = bd2.applyCW(node.inedge);
+	if (win != 0) {
+		node.terminal = true;
+		node.nwin = mcts_newnode_playouts * (win == onturn);
+		node.ntotal = mcts_newnode_playouts;
+		return node;
+	}
+	
+	for (int i = 0; i < mcts_newnode_playouts; i++) {
+		Board bd3 = bd2;
+		win = playout(bd3, onturn);
+		node.nwin = win == onturn;
+	}
+
+	node.ntotal = mcts_newnode_playouts;
+
+	return node;
+}
+
+static void backPropagate(Node &node, int propWins, int propTotal) {
+	node.nwin += propWins;
+	node.ntotal += propTotal;
+
+	if (node.parent == nullptr) return;
+	backPropagate(*node.parent, propTotal - propWins, propTotal);
+}
+
+static string incrementalFilename() {
+	static int i = 1;
+	return "tree_" + to_string(i++) + ".dot";
+}
+
+static void writeTreeNode(const Node &node, ostream &stream, int maxdepth) {
+	stream << "\"" << &node << "\" [label=\"" << node.nwin << "/" << node.ntotal << "\\n" << node.inedge << "\"];\n";
+	if (maxdepth <= 0) return;
+
+	vector<const Node*> nexts;
+	nexts.reserve(node.children.size());
+	for (const Node &ch : node.children) {
+		nexts.push_back(&ch);
+	}
+	sort(nexts.begin(), nexts.end(), [](const Node *a, const Node *b) {
+		return a->ntotal < b->ntotal;
+	});
+
+	for (const Node *ch : nexts) {
+		stream << "\"" << &node << "\" -> \"" << ch << "\";\n";
+		writeTreeNode(*ch, stream, maxdepth - 1);
+	}
+}
+
+static void writeTree(const Node &root, const string &filename, int maxdepth = 1) {
+	ofstream f(filename);
+	assert(f);
+	f << "digraph G {\n";
+	writeTreeNode(root, f, maxdepth);
+	f << "}\n";
+	f.close();
+
+	cerr << "Wrote tree to \"" << filename << "\"" << endl;
+}
+
+Move AI::MCTS::findMove(const Board &bd, int player) {
+	Node root;
+	root.inedge = Move(-1, -1);
+	root.parent = nullptr;
+
+	for (int iter = 0; iter < mcts_niterations; iter++) {
+		// cerr << "ITERATION " << iter << " root.ntotal = " << root.ntotal << endl;
+		Board bd2 = bd;
+		int onturn = player;
+
+		Node &node = nodeSelect(root, &bd2, &onturn);
+		// cerr << "Selected " << &node << endl;
+		Node &newnode = expand(node, bd2, onturn);
+		// cerr << "Expanded " << &newnode << endl;
+		backPropagate(node, newnode.ntotal - newnode.nwin, newnode.ntotal);
+	}
+
+	int maxtotal = -1;
+	Move maxat;
+	for (const Node &node : root.children) {
+		if (node.ntotal > maxtotal) {
+			maxtotal = node.ntotal;
+			maxat = node.inedge;
+		}
+	}
+
+	// writeTree(root, incrementalFilename());
+
+	return maxat;
+}
diff --git a/ai_mcts.h b/ai_mcts.h
new file mode 100644
index 0000000..1ef5d7f
--- /dev/null
+++ b/ai_mcts.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "board.h"
+
+
+namespace AI::MCTS {
+	Move findMove(const Board &bd, int player);
+}
diff --git a/main.cpp b/main.cpp
index 9dfdfe9..993bfc3 100644
--- a/main.cpp
+++ b/main.cpp
@@ -4,12 +4,13 @@
 #include "board.h"
 #include "ai_mc.h"
 #include "ai_mm.h"
+#include "ai_mcts.h"
 #include "ai_rand.h"
 
 using namespace std;
 
 #ifndef AI_CHOICE
-#define AI_CHOICE MC
+#define AI_CHOICE MCTS
 #endif
 
 #define STR_(x) #x
@@ -51,7 +52,7 @@ int main() {
 		onturn = -onturn;
 	}
 
-	int win;
+	int win = 0;
 
 	string line;
 	while (true) {
author	Tom Smeding <tom.smeding@gmail.com>	2018-07-21 22:01:57 +0200
committer	Tom Smeding <tom.smeding@gmail.com>	2018-07-21 22:01:57 +0200
commit	53291958e0cda68ed762c0dfb36c0602f876c06d (patch)
tree	1a1ed725c0c4a993c41bc25ace5484a032764cb9
parent	d7828673e787afc76913ede36fed78ba6a179470 (diff)