diff options
author | tomsmeding <tom.smeding@gmail.com> | 2020-01-17 21:43:32 +0100 |
---|---|---|
committer | tomsmeding <tom.smeding@gmail.com> | 2020-01-17 21:43:32 +0100 |
commit | df268d8fc6b68be2a539561e89a376f0334a6e1e (patch) | |
tree | e19eef4b334327b8b5740e0ab3269da14c3c6160 /modules/unicode | |
parent | f1ae140cde98ede14fd2374ab34cd28d69e9dbfb (diff) |
unicode: Module to search the unicode database
Diffstat (limited to 'modules/unicode')
-rw-r--r-- | modules/unicode/.gitignore | 1 | ||||
-rw-r--r-- | modules/unicode/index.html | 92 | ||||
-rw-r--r-- | modules/unicode/unicode.js | 126 |
3 files changed, 219 insertions, 0 deletions
diff --git a/modules/unicode/.gitignore b/modules/unicode/.gitignore new file mode 100644 index 0000000..0487319 --- /dev/null +++ b/modules/unicode/.gitignore @@ -0,0 +1 @@ +UnicodeData.txt diff --git a/modules/unicode/index.html b/modules/unicode/index.html new file mode 100644 index 0000000..e44a96e --- /dev/null +++ b/modules/unicode/index.html @@ -0,0 +1,92 @@ +<!doctype html> +<html> +<head> +<meta charset="utf-8"> +<title>Unicode character lookup</title> +<style> +body { + font-family: sans-serif; +} +.table > div:nth-child(odd) { + background-color: #eee; +} +.table { + max-height: 300px; + overflow-y: scroll; +} +</style> +<script> +function do_lookup() { + var xhr = new XMLHttpRequest(); + xhr.onreadystatechange = function() { + if (xhr.readyState == 4) { + if (xhr.status == 200) { + handleResponse(JSON.parse(xhr.responseText)); + } else { + alert("Request failed: " + xhr.responseText); + } + } + }; + xhr.open("GET", "/unicode/lookup/" + encodeURIComponent(document.getElementById("input").value)); + xhr.responseType = "text"; + xhr.setRequestHeader("Content-Type", "text/plain"); + xhr.send(); +} + +function handleResponse(json) { + console.log(json); + + var elem; + + /* elem = document.getElementById("json"); + elem.innerHTML = ""; + elem.appendChild(document.createTextNode(JSON.stringify(json))); */ + + var keys = ["chars", "search"]; + + for (var i = 0; i < keys.length; i++) { + document.getElementById(keys[i] + "_num").innerHTML = "(" + json[keys[i]].length + ")"; + + elem = document.getElementById(keys[i]); + elem.innerHTML = ""; + for (var j = 0; j < json[keys[i]].length; j++) { + var div = document.createElement("div"); + div.classList.add("character"); + populateCharacter(div, json[keys[i]][j]); + elem.appendChild(div); + } + } +} + +function populateCharacter(div, row) { + var span = document.createElement("span"); + span.setAttribute("style", "display: inline-block; width: 7em; font-weight: bold;"); + span.appendChild(document.createTextNode("U+" + row[0])); + div.appendChild(span); + + span = document.createElement("span"); + span.appendChild(document.createTextNode(row[1])); + div.appendChild(span); +} + +window.addEventListener("load", function() { + document.getElementById("input").focus(); +}); +</script> +</head> +<body> + <h1>Unicode character lookup</h1> + + <input type="text" id="input" placeholder="Character(s) to look up" onkeypress="if (event.keyCode == 10 || event.keyCode == 13) do_lookup()"> + <input type="button" onclick="do_lookup()" value="Lookup"> + <br> + + <!-- <pre id="json"></pre><br> --> + + <h3>Characters <span id="chars_num"></span></h3> + <div id="chars" class="table"></div> + + <h3>Found in descriptions <span id="search_num"></span></h3> + <div id="search" class="table"></div> +</body> +</html> diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js new file mode 100644 index 0000000..3221cb9 --- /dev/null +++ b/modules/unicode/unicode.js @@ -0,0 +1,126 @@ +"use strict"; + +const cmn = require("../$common.js"); +const fs = require("fs"); +const path = require("path"); +const https = require("https"); + + +const fCODE = 0; +const fNAME = 1; +const fCATEGORY = 2; +// combining class, bidirectional category, decomposition mapping +const fDECDIGIT = 6; +const fDIGIT = 7; +const fNUMERIC = 8; +const fMIRRORED = 9; +const fOLDNAME = 10; +const fCOMMENT = 11; +const fUPPERCASE = 12; +const fLOWERCASE = 13; +const fTITLECASE = 14; + +// db: Map(int => [Field]) (NAME is in uppercase) +let db = new Map(); +// ranges: [{name: String, first: int, last: int}] +let ranges = []; + +function importDatabase(csv) { + // clear the database if necessary + db = new Map(); + ranges = []; + + let currentRange = null; + + let cursor = 0, endidx; + while ((endidx = csv.indexOf("\n", cursor)) != -1) { + const row = csv.slice(cursor, endidx).split(";"); + cursor = endidx + 1; + + const code = parseInt(row[fCODE], 16); + + const m = row[fNAME].match(/^<(.*), (First|Last)>$/) + if (m != null) { + if (m[2] == "First") { + row[fNAME] = "<" + m[1] + ">"; + currentRange = {name: m[1], first: code, last: null, row: row}; + } else if (currentRange != null && m[1] == currentRange.name) { + currentRange.last = code; + ranges.push(currentRange); + currentRange = null; + } + } else { + row[fNAME] = row[fNAME].toUpperCase(); + db.set(code, row); + } + } +} + +function lookupCode(codepoint) { + for (const range of ranges) { + if (range.first <= codepoint && codepoint <= range.last) { + const row = range.row.slice(); + row[fCODE] = codepoint.toString(16).toUpperCase(); + return row; + } + } + + return db.get(codepoint); +} + +function searchDescription(text) { + text = text.toUpperCase(); + + const result = []; + for (const row of db.values()) { + if (row[fNAME].includes(text)) { + result.push(row); + } + } + + return result; +} + +module.exports = function (app, io, moddir) { + const dataFilePath = path.join(moddir, "UnicodeData.txt"); + if (fs.existsSync(dataFilePath)) { + importDatabase(fs.readFileSync(dataFilePath).toString()); + } else { + console.log("Downloading UnicodeData.txt..."); + + https.get("https://www.unicode.org/Public/13.0.0/ucd/UnicodeData-13.0.0d6.txt", res => { + if (res.statusCode != 200) { + console.log(`Could not download UnicodeData.txt: status code ${res.statusCode}`); + return; + } + + res.setEncoding("utf8"); + let buffer = ""; + res.on("data", data => buffer += data); + res.on("end", () => { + fs.writeFileSync(dataFilePath, buffer); + console.log("Downloaded UnicodeData.txt"); + importDatabase(buffer); + }); + }).on("error", err => { + console.log("Error downloading UnicodeData.txt:", err); + }); + } + + app.get("/unicode", (req, res) => { + res.sendFile(path.join(moddir, "index.html")); + }); + + app.get("/unicode/lookup/:query", (req, res) => { + const chars = []; + for (let codepoint of req.params.query) { + codepoint = codepoint.codePointAt(0); + chars.push(lookupCode(codepoint)); + } + + res.end(JSON.stringify({ + chars: chars, + search: searchDescription(req.params.query), + })); + }); +}; |