From df268d8fc6b68be2a539561e89a376f0334a6e1e Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Fri, 17 Jan 2020 21:43:32 +0100 Subject: unicode: Module to search the unicode database --- modules/unicode/.gitignore | 1 + modules/unicode/index.html | 92 +++++++++++++++++++++++++++++++++ modules/unicode/unicode.js | 126 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100644 modules/unicode/.gitignore create mode 100644 modules/unicode/index.html create mode 100644 modules/unicode/unicode.js diff --git a/modules/unicode/.gitignore b/modules/unicode/.gitignore new file mode 100644 index 0000000..0487319 --- /dev/null +++ b/modules/unicode/.gitignore @@ -0,0 +1 @@ +UnicodeData.txt diff --git a/modules/unicode/index.html b/modules/unicode/index.html new file mode 100644 index 0000000..e44a96e --- /dev/null +++ b/modules/unicode/index.html @@ -0,0 +1,92 @@ + + + + +Unicode character lookup + + + + +

Unicode character lookup

+ + + +
+ + + +

Characters

+
+ +

Found in descriptions

+ + + diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js new file mode 100644 index 0000000..3221cb9 --- /dev/null +++ b/modules/unicode/unicode.js @@ -0,0 +1,126 @@ +"use strict"; + +const cmn = require("../$common.js"); +const fs = require("fs"); +const path = require("path"); +const https = require("https"); + + +const fCODE = 0; +const fNAME = 1; +const fCATEGORY = 2; +// combining class, bidirectional category, decomposition mapping +const fDECDIGIT = 6; +const fDIGIT = 7; +const fNUMERIC = 8; +const fMIRRORED = 9; +const fOLDNAME = 10; +const fCOMMENT = 11; +const fUPPERCASE = 12; +const fLOWERCASE = 13; +const fTITLECASE = 14; + +// db: Map(int => [Field]) (NAME is in uppercase) +let db = new Map(); +// ranges: [{name: String, first: int, last: int}] +let ranges = []; + +function importDatabase(csv) { + // clear the database if necessary + db = new Map(); + ranges = []; + + let currentRange = null; + + let cursor = 0, endidx; + while ((endidx = csv.indexOf("\n", cursor)) != -1) { + const row = csv.slice(cursor, endidx).split(";"); + cursor = endidx + 1; + + const code = parseInt(row[fCODE], 16); + + const m = row[fNAME].match(/^<(.*), (First|Last)>$/) + if (m != null) { + if (m[2] == "First") { + row[fNAME] = "<" + m[1] + ">"; + currentRange = {name: m[1], first: code, last: null, row: row}; + } else if (currentRange != null && m[1] == currentRange.name) { + currentRange.last = code; + ranges.push(currentRange); + currentRange = null; + } + } else { + row[fNAME] = row[fNAME].toUpperCase(); + db.set(code, row); + } + } +} + +function lookupCode(codepoint) { + for (const range of ranges) { + if (range.first <= codepoint && codepoint <= range.last) { + const row = range.row.slice(); + row[fCODE] = codepoint.toString(16).toUpperCase(); + return row; + } + } + + return db.get(codepoint); +} + +function searchDescription(text) { + text = text.toUpperCase(); + + const result = []; + for (const row of db.values()) { + if (row[fNAME].includes(text)) { + result.push(row); + } + } + + return result; +} + +module.exports = function (app, io, moddir) { + const dataFilePath = path.join(moddir, "UnicodeData.txt"); + if (fs.existsSync(dataFilePath)) { + importDatabase(fs.readFileSync(dataFilePath).toString()); + } else { + console.log("Downloading UnicodeData.txt..."); + + https.get("https://www.unicode.org/Public/13.0.0/ucd/UnicodeData-13.0.0d6.txt", res => { + if (res.statusCode != 200) { + console.log(`Could not download UnicodeData.txt: status code ${res.statusCode}`); + return; + } + + res.setEncoding("utf8"); + let buffer = ""; + res.on("data", data => buffer += data); + res.on("end", () => { + fs.writeFileSync(dataFilePath, buffer); + console.log("Downloaded UnicodeData.txt"); + importDatabase(buffer); + }); + }).on("error", err => { + console.log("Error downloading UnicodeData.txt:", err); + }); + } + + app.get("/unicode", (req, res) => { + res.sendFile(path.join(moddir, "index.html")); + }); + + app.get("/unicode/lookup/:query", (req, res) => { + const chars = []; + for (let codepoint of req.params.query) { + codepoint = codepoint.codePointAt(0); + chars.push(lookupCode(codepoint)); + } + + res.end(JSON.stringify({ + chars: chars, + search: searchDescription(req.params.query), + })); + }); +}; -- cgit v1.2.3