diff options
| author | tomsmeding <tom.smeding@gmail.com> | 2020-01-17 21:43:32 +0100 | 
|---|---|---|
| committer | tomsmeding <tom.smeding@gmail.com> | 2020-01-17 21:43:32 +0100 | 
| commit | df268d8fc6b68be2a539561e89a376f0334a6e1e (patch) | |
| tree | e19eef4b334327b8b5740e0ab3269da14c3c6160 | |
| parent | f1ae140cde98ede14fd2374ab34cd28d69e9dbfb (diff) | |
unicode: Module to search the unicode database
| -rw-r--r-- | modules/unicode/.gitignore | 1 | ||||
| -rw-r--r-- | modules/unicode/index.html | 92 | ||||
| -rw-r--r-- | modules/unicode/unicode.js | 126 | 
3 files changed, 219 insertions, 0 deletions
| diff --git a/modules/unicode/.gitignore b/modules/unicode/.gitignore new file mode 100644 index 0000000..0487319 --- /dev/null +++ b/modules/unicode/.gitignore @@ -0,0 +1 @@ +UnicodeData.txt diff --git a/modules/unicode/index.html b/modules/unicode/index.html new file mode 100644 index 0000000..e44a96e --- /dev/null +++ b/modules/unicode/index.html @@ -0,0 +1,92 @@ +<!doctype html> +<html> +<head> +<meta charset="utf-8"> +<title>Unicode character lookup</title> +<style> +body { +	font-family: sans-serif; +} +.table > div:nth-child(odd) { +	background-color: #eee; +} +.table { +	max-height: 300px; +	overflow-y: scroll; +} +</style> +<script> +function do_lookup() { +	var xhr = new XMLHttpRequest(); +	xhr.onreadystatechange = function() { +		if (xhr.readyState == 4) { +			if (xhr.status == 200) { +				handleResponse(JSON.parse(xhr.responseText)); +			} else { +				alert("Request failed: " + xhr.responseText); +			} +		} +	}; +	xhr.open("GET", "/unicode/lookup/" + encodeURIComponent(document.getElementById("input").value)); +	xhr.responseType = "text"; +	xhr.setRequestHeader("Content-Type", "text/plain"); +	xhr.send(); +} + +function handleResponse(json) { +	console.log(json); + +	var elem; + +	/* elem = document.getElementById("json"); +	elem.innerHTML = ""; +	elem.appendChild(document.createTextNode(JSON.stringify(json))); */ + +	var keys = ["chars", "search"]; + +	for (var i = 0; i < keys.length; i++) { +		document.getElementById(keys[i] + "_num").innerHTML = "(" + json[keys[i]].length + ")"; + +		elem = document.getElementById(keys[i]); +		elem.innerHTML = ""; +		for (var j = 0; j < json[keys[i]].length; j++) { +			var div = document.createElement("div"); +			div.classList.add("character"); +			populateCharacter(div, json[keys[i]][j]); +			elem.appendChild(div); +		} +	} +} + +function populateCharacter(div, row) { +	var span = document.createElement("span"); +	span.setAttribute("style", "display: inline-block; width: 7em; font-weight: bold;"); +	span.appendChild(document.createTextNode("U+" + row[0])); +	div.appendChild(span); + +	span = document.createElement("span"); +	span.appendChild(document.createTextNode(row[1])); +	div.appendChild(span); +} + +window.addEventListener("load", function() { +	document.getElementById("input").focus(); +}); +</script> +</head> +<body> +	<h1>Unicode character lookup</h1> + +	<input type="text" id="input" placeholder="Character(s) to look up" onkeypress="if (event.keyCode == 10 || event.keyCode == 13) do_lookup()"> +	<input type="button" onclick="do_lookup()" value="Lookup"> +	<br> + +	<!-- <pre id="json"></pre><br> --> + +	<h3>Characters <span id="chars_num"></span></h3> +	<div id="chars" class="table"></div> + +	<h3>Found in descriptions <span id="search_num"></span></h3> +	<div id="search" class="table"></div> +</body> +</html> diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js new file mode 100644 index 0000000..3221cb9 --- /dev/null +++ b/modules/unicode/unicode.js @@ -0,0 +1,126 @@ +"use strict"; + +const cmn = require("../$common.js"); +const fs = require("fs"); +const path = require("path"); +const https = require("https"); + + +const fCODE      = 0; +const fNAME      = 1; +const fCATEGORY  = 2; +// combining class, bidirectional category, decomposition mapping +const fDECDIGIT  = 6; +const fDIGIT     = 7; +const fNUMERIC   = 8; +const fMIRRORED  = 9; +const fOLDNAME   = 10; +const fCOMMENT   = 11; +const fUPPERCASE = 12; +const fLOWERCASE = 13; +const fTITLECASE = 14; + +// db: Map(int => [Field])  (NAME is in uppercase) +let db = new Map(); +// ranges: [{name: String, first: int, last: int}] +let ranges = []; + +function importDatabase(csv) { +	// clear the database if necessary +	db = new Map(); +	ranges = []; + +	let currentRange = null; + +	let cursor = 0, endidx; +	while ((endidx = csv.indexOf("\n", cursor)) != -1) { +		const row = csv.slice(cursor, endidx).split(";"); +		cursor = endidx + 1; + +		const code = parseInt(row[fCODE], 16); + +		const m = row[fNAME].match(/^<(.*), (First|Last)>$/) +		if (m != null) { +			if (m[2] == "First") { +				row[fNAME] = "<" + m[1] + ">"; +				currentRange = {name: m[1], first: code, last: null, row: row}; +			} else if (currentRange != null && m[1] == currentRange.name) { +				currentRange.last = code; +				ranges.push(currentRange); +				currentRange = null; +			} +		} else { +			row[fNAME] = row[fNAME].toUpperCase(); +			db.set(code, row); +		} +	} +} + +function lookupCode(codepoint) { +	for (const range of ranges) { +		if (range.first <= codepoint && codepoint <= range.last) { +			const row = range.row.slice(); +			row[fCODE] = codepoint.toString(16).toUpperCase(); +			return row; +		} +	} + +	return db.get(codepoint); +} + +function searchDescription(text) { +	text = text.toUpperCase(); + +	const result = []; +	for (const row of db.values()) { +		if (row[fNAME].includes(text)) { +			result.push(row); +		} +	} + +	return result; +} + +module.exports = function (app, io, moddir) { +	const dataFilePath = path.join(moddir, "UnicodeData.txt"); +	if (fs.existsSync(dataFilePath)) { +		importDatabase(fs.readFileSync(dataFilePath).toString()); +	} else { +		console.log("Downloading UnicodeData.txt..."); + +		https.get("https://www.unicode.org/Public/13.0.0/ucd/UnicodeData-13.0.0d6.txt", res => { +			if (res.statusCode != 200) { +				console.log(`Could not download UnicodeData.txt: status code ${res.statusCode}`); +				return; +			} + +			res.setEncoding("utf8"); +			let buffer = ""; +			res.on("data", data => buffer += data); +			res.on("end", () => { +				fs.writeFileSync(dataFilePath, buffer); +				console.log("Downloaded UnicodeData.txt"); +				importDatabase(buffer); +			}); +		}).on("error", err => { +			console.log("Error downloading UnicodeData.txt:", err); +		}); +	} + +	app.get("/unicode", (req, res) => { +		res.sendFile(path.join(moddir, "index.html")); +	}); + +	app.get("/unicode/lookup/:query", (req, res) => { +		const chars = []; +		for (let codepoint of req.params.query) { +			codepoint = codepoint.codePointAt(0); +			chars.push(lookupCode(codepoint)); +		} + +		res.end(JSON.stringify({ +			chars: chars, +			search: searchDescription(req.params.query), +		})); +	}); +}; | 
