summaryrefslogtreecommitdiff
path: root/modules/unicode/unicode.js
blob: d3e7fb9da564a63458b16f28a6a0e227bf69f4b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"use strict";

const cmn = require("../$common.js");
const fs = require("fs");
const path = require("path");
const https = require("https");


const fCODE      = 0;
const fNAME      = 1;
const fCATEGORY  = 2;
// combining class, bidirectional category, decomposition mapping
const fDECDIGIT  = 6;
const fDIGIT     = 7;
const fNUMERIC   = 8;
const fMIRRORED  = 9;
const fOLDNAME   = 10;
const fCOMMENT   = 11;
const fUPPERCASE = 12;
const fLOWERCASE = 13;
const fTITLECASE = 14;

// db: Map(int => [Field])  (NAME is in uppercase)
let db = new Map();
// ranges: [{name: String, first: int, last: int}]
let ranges = [];

function importDatabase(csv) {
	// clear the database if necessary
	db = new Map();
	ranges = [];

	let currentRange = null;

	let cursor = 0, endidx;
	while ((endidx = csv.indexOf("\n", cursor)) != -1) {
		const row = csv.slice(cursor, endidx).split(";");
		cursor = endidx + 1;

		const code = parseInt(row[fCODE], 16);

		const m = row[fNAME].match(/^<(.*), (First|Last)>$/)
		if (m != null) {
			if (m[2] == "First") {
				row[fNAME] = "<" + m[1] + ">";
				currentRange = {name: m[1], first: code, last: null, row: row};
			} else if (currentRange != null && m[1] == currentRange.name) {
				currentRange.last = code;
				ranges.push(currentRange);
				currentRange = null;
			}
		} else {
			row[fNAME] = row[fNAME].toUpperCase();
			db.set(code, row);
		}
	}
}

function lookupCode(codepoint) {
	for (const range of ranges) {
		if (range.first <= codepoint && codepoint <= range.last) {
			const row = range.row.slice();
			row[fCODE] = codepoint.toString(16).toUpperCase();
			return row;
		}
	}

	return db.get(codepoint);
}

function searchDescription(text) {
	text = text.toUpperCase();

	const result = [];
	for (const row of db.values()) {
		if (row[fNAME].includes(text)) {
			result.push(row);
		}
	}

	return result;
}

module.exports = function (app, io, moddir) {
	const dataFilePath = path.join(moddir, "UnicodeData.txt");
	if (fs.existsSync(dataFilePath)) {
		importDatabase(fs.readFileSync(dataFilePath).toString());
	} else {
		console.log("Downloading UnicodeData.txt...");

		https.get("https://www.unicode.org/Public/13.0.0/ucd/UnicodeData-13.0.0d6.txt", res => {
			if (res.statusCode != 200) {
				console.log(`Could not download UnicodeData.txt: status code ${res.statusCode}`);
				return;
			}

			res.setEncoding("utf8");
			let buffer = "";
			res.on("data", data => buffer += data);
			res.on("end", () => {
				fs.writeFileSync(dataFilePath, buffer);
				console.log("Downloaded UnicodeData.txt");
				importDatabase(buffer);
			});
		}).on("error", err => {
			console.log("Error downloading UnicodeData.txt:", err);
		});
	}

	app.get("/unicode", (req, res) => {
		res.sendFile(path.join(moddir, "index.html"));
	});

	app.get("/unicode/lookup/:query", (req, res) => {
		const chars = [];
		for (let codepoint of req.params.query) {
			codepoint = codepoint.codePointAt(0);
			chars.push(lookupCode(codepoint));
		}

		res.json({
			chars: chars,
			search: searchDescription(req.params.query),
		});
	});
};