summaryrefslogtreecommitdiff
path: root/modules/unicode/unicode.js
diff options
context:
space:
mode:
Diffstat (limited to 'modules/unicode/unicode.js')
-rw-r--r--modules/unicode/unicode.js126
1 files changed, 126 insertions, 0 deletions
diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js
new file mode 100644
index 0000000..3221cb9
--- /dev/null
+++ b/modules/unicode/unicode.js
@@ -0,0 +1,126 @@
+"use strict";
+
+const cmn = require("../$common.js");
+const fs = require("fs");
+const path = require("path");
+const https = require("https");
+
+
+const fCODE = 0;
+const fNAME = 1;
+const fCATEGORY = 2;
+// combining class, bidirectional category, decomposition mapping
+const fDECDIGIT = 6;
+const fDIGIT = 7;
+const fNUMERIC = 8;
+const fMIRRORED = 9;
+const fOLDNAME = 10;
+const fCOMMENT = 11;
+const fUPPERCASE = 12;
+const fLOWERCASE = 13;
+const fTITLECASE = 14;
+
+// db: Map(int => [Field]) (NAME is in uppercase)
+let db = new Map();
+// ranges: [{name: String, first: int, last: int}]
+let ranges = [];
+
+function importDatabase(csv) {
+ // clear the database if necessary
+ db = new Map();
+ ranges = [];
+
+ let currentRange = null;
+
+ let cursor = 0, endidx;
+ while ((endidx = csv.indexOf("\n", cursor)) != -1) {
+ const row = csv.slice(cursor, endidx).split(";");
+ cursor = endidx + 1;
+
+ const code = parseInt(row[fCODE], 16);
+
+ const m = row[fNAME].match(/^<(.*), (First|Last)>$/)
+ if (m != null) {
+ if (m[2] == "First") {
+ row[fNAME] = "<" + m[1] + ">";
+ currentRange = {name: m[1], first: code, last: null, row: row};
+ } else if (currentRange != null && m[1] == currentRange.name) {
+ currentRange.last = code;
+ ranges.push(currentRange);
+ currentRange = null;
+ }
+ } else {
+ row[fNAME] = row[fNAME].toUpperCase();
+ db.set(code, row);
+ }
+ }
+}
+
+function lookupCode(codepoint) {
+ for (const range of ranges) {
+ if (range.first <= codepoint && codepoint <= range.last) {
+ const row = range.row.slice();
+ row[fCODE] = codepoint.toString(16).toUpperCase();
+ return row;
+ }
+ }
+
+ return db.get(codepoint);
+}
+
+function searchDescription(text) {
+ text = text.toUpperCase();
+
+ const result = [];
+ for (const row of db.values()) {
+ if (row[fNAME].includes(text)) {
+ result.push(row);
+ }
+ }
+
+ return result;
+}
+
+module.exports = function (app, io, moddir) {
+ const dataFilePath = path.join(moddir, "UnicodeData.txt");
+ if (fs.existsSync(dataFilePath)) {
+ importDatabase(fs.readFileSync(dataFilePath).toString());
+ } else {
+ console.log("Downloading UnicodeData.txt...");
+
+ https.get("https://www.unicode.org/Public/13.0.0/ucd/UnicodeData-13.0.0d6.txt", res => {
+ if (res.statusCode != 200) {
+ console.log(`Could not download UnicodeData.txt: status code ${res.statusCode}`);
+ return;
+ }
+
+ res.setEncoding("utf8");
+ let buffer = "";
+ res.on("data", data => buffer += data);
+ res.on("end", () => {
+ fs.writeFileSync(dataFilePath, buffer);
+ console.log("Downloaded UnicodeData.txt");
+ importDatabase(buffer);
+ });
+ }).on("error", err => {
+ console.log("Error downloading UnicodeData.txt:", err);
+ });
+ }
+
+ app.get("/unicode", (req, res) => {
+ res.sendFile(path.join(moddir, "index.html"));
+ });
+
+ app.get("/unicode/lookup/:query", (req, res) => {
+ const chars = [];
+ for (let codepoint of req.params.query) {
+ codepoint = codepoint.codePointAt(0);
+ chars.push(lookupCode(codepoint));
+ }
+
+ res.end(JSON.stringify({
+ chars: chars,
+ search: searchDescription(req.params.query),
+ }));
+ });
+};