summaryrefslogtreecommitdiff
path: root/modules/unicode
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2020-01-17 21:43:32 +0100
committertomsmeding <tom.smeding@gmail.com>2020-01-17 21:43:32 +0100
commitdf268d8fc6b68be2a539561e89a376f0334a6e1e (patch)
treee19eef4b334327b8b5740e0ab3269da14c3c6160 /modules/unicode
parentf1ae140cde98ede14fd2374ab34cd28d69e9dbfb (diff)
unicode: Module to search the unicode database
Diffstat (limited to 'modules/unicode')
-rw-r--r--modules/unicode/.gitignore1
-rw-r--r--modules/unicode/index.html92
-rw-r--r--modules/unicode/unicode.js126
3 files changed, 219 insertions, 0 deletions
diff --git a/modules/unicode/.gitignore b/modules/unicode/.gitignore
new file mode 100644
index 0000000..0487319
--- /dev/null
+++ b/modules/unicode/.gitignore
@@ -0,0 +1 @@
+UnicodeData.txt
diff --git a/modules/unicode/index.html b/modules/unicode/index.html
new file mode 100644
index 0000000..e44a96e
--- /dev/null
+++ b/modules/unicode/index.html
@@ -0,0 +1,92 @@
+<!doctype html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Unicode character lookup</title>
+<style>
+body {
+ font-family: sans-serif;
+}
+.table > div:nth-child(odd) {
+ background-color: #eee;
+}
+.table {
+ max-height: 300px;
+ overflow-y: scroll;
+}
+</style>
+<script>
+function do_lookup() {
+ var xhr = new XMLHttpRequest();
+ xhr.onreadystatechange = function() {
+ if (xhr.readyState == 4) {
+ if (xhr.status == 200) {
+ handleResponse(JSON.parse(xhr.responseText));
+ } else {
+ alert("Request failed: " + xhr.responseText);
+ }
+ }
+ };
+ xhr.open("GET", "/unicode/lookup/" + encodeURIComponent(document.getElementById("input").value));
+ xhr.responseType = "text";
+ xhr.setRequestHeader("Content-Type", "text/plain");
+ xhr.send();
+}
+
+function handleResponse(json) {
+ console.log(json);
+
+ var elem;
+
+ /* elem = document.getElementById("json");
+ elem.innerHTML = "";
+ elem.appendChild(document.createTextNode(JSON.stringify(json))); */
+
+ var keys = ["chars", "search"];
+
+ for (var i = 0; i < keys.length; i++) {
+ document.getElementById(keys[i] + "_num").innerHTML = "(" + json[keys[i]].length + ")";
+
+ elem = document.getElementById(keys[i]);
+ elem.innerHTML = "";
+ for (var j = 0; j < json[keys[i]].length; j++) {
+ var div = document.createElement("div");
+ div.classList.add("character");
+ populateCharacter(div, json[keys[i]][j]);
+ elem.appendChild(div);
+ }
+ }
+}
+
+function populateCharacter(div, row) {
+ var span = document.createElement("span");
+ span.setAttribute("style", "display: inline-block; width: 7em; font-weight: bold;");
+ span.appendChild(document.createTextNode("U+" + row[0]));
+ div.appendChild(span);
+
+ span = document.createElement("span");
+ span.appendChild(document.createTextNode(row[1]));
+ div.appendChild(span);
+}
+
+window.addEventListener("load", function() {
+ document.getElementById("input").focus();
+});
+</script>
+</head>
+<body>
+ <h1>Unicode character lookup</h1>
+
+ <input type="text" id="input" placeholder="Character(s) to look up" onkeypress="if (event.keyCode == 10 || event.keyCode == 13) do_lookup()">
+ <input type="button" onclick="do_lookup()" value="Lookup">
+ <br>
+
+ <!-- <pre id="json"></pre><br> -->
+
+ <h3>Characters <span id="chars_num"></span></h3>
+ <div id="chars" class="table"></div>
+
+ <h3>Found in descriptions <span id="search_num"></span></h3>
+ <div id="search" class="table"></div>
+</body>
+</html>
diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js
new file mode 100644
index 0000000..3221cb9
--- /dev/null
+++ b/modules/unicode/unicode.js
@@ -0,0 +1,126 @@
+"use strict";
+
+const cmn = require("../$common.js");
+const fs = require("fs");
+const path = require("path");
+const https = require("https");
+
+
+const fCODE = 0;
+const fNAME = 1;
+const fCATEGORY = 2;
+// combining class, bidirectional category, decomposition mapping
+const fDECDIGIT = 6;
+const fDIGIT = 7;
+const fNUMERIC = 8;
+const fMIRRORED = 9;
+const fOLDNAME = 10;
+const fCOMMENT = 11;
+const fUPPERCASE = 12;
+const fLOWERCASE = 13;
+const fTITLECASE = 14;
+
+// db: Map(int => [Field]) (NAME is in uppercase)
+let db = new Map();
+// ranges: [{name: String, first: int, last: int}]
+let ranges = [];
+
+function importDatabase(csv) {
+ // clear the database if necessary
+ db = new Map();
+ ranges = [];
+
+ let currentRange = null;
+
+ let cursor = 0, endidx;
+ while ((endidx = csv.indexOf("\n", cursor)) != -1) {
+ const row = csv.slice(cursor, endidx).split(";");
+ cursor = endidx + 1;
+
+ const code = parseInt(row[fCODE], 16);
+
+ const m = row[fNAME].match(/^<(.*), (First|Last)>$/)
+ if (m != null) {
+ if (m[2] == "First") {
+ row[fNAME] = "<" + m[1] + ">";
+ currentRange = {name: m[1], first: code, last: null, row: row};
+ } else if (currentRange != null && m[1] == currentRange.name) {
+ currentRange.last = code;
+ ranges.push(currentRange);
+ currentRange = null;
+ }
+ } else {
+ row[fNAME] = row[fNAME].toUpperCase();
+ db.set(code, row);
+ }
+ }
+}
+
+function lookupCode(codepoint) {
+ for (const range of ranges) {
+ if (range.first <= codepoint && codepoint <= range.last) {
+ const row = range.row.slice();
+ row[fCODE] = codepoint.toString(16).toUpperCase();
+ return row;
+ }
+ }
+
+ return db.get(codepoint);
+}
+
+function searchDescription(text) {
+ text = text.toUpperCase();
+
+ const result = [];
+ for (const row of db.values()) {
+ if (row[fNAME].includes(text)) {
+ result.push(row);
+ }
+ }
+
+ return result;
+}
+
+module.exports = function (app, io, moddir) {
+ const dataFilePath = path.join(moddir, "UnicodeData.txt");
+ if (fs.existsSync(dataFilePath)) {
+ importDatabase(fs.readFileSync(dataFilePath).toString());
+ } else {
+ console.log("Downloading UnicodeData.txt...");
+
+ https.get("https://www.unicode.org/Public/13.0.0/ucd/UnicodeData-13.0.0d6.txt", res => {
+ if (res.statusCode != 200) {
+ console.log(`Could not download UnicodeData.txt: status code ${res.statusCode}`);
+ return;
+ }
+
+ res.setEncoding("utf8");
+ let buffer = "";
+ res.on("data", data => buffer += data);
+ res.on("end", () => {
+ fs.writeFileSync(dataFilePath, buffer);
+ console.log("Downloaded UnicodeData.txt");
+ importDatabase(buffer);
+ });
+ }).on("error", err => {
+ console.log("Error downloading UnicodeData.txt:", err);
+ });
+ }
+
+ app.get("/unicode", (req, res) => {
+ res.sendFile(path.join(moddir, "index.html"));
+ });
+
+ app.get("/unicode/lookup/:query", (req, res) => {
+ const chars = [];
+ for (let codepoint of req.params.query) {
+ codepoint = codepoint.codePointAt(0);
+ chars.push(lookupCode(codepoint));
+ }
+
+ res.end(JSON.stringify({
+ chars: chars,
+ search: searchDescription(req.params.query),
+ }));
+ });
+};