From 29d26068ad98e865272198090375fca57e5cf156 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Mon, 12 Jun 2023 23:07:05 +0200 Subject: unicode: Recognise multiple index literals in input --- modules/unicode/index.html | 32 ++++++++++++++++++++++---------- modules/unicode/unicode.js | 45 ++++++++++++++++++++++++++++++--------------- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/modules/unicode/index.html b/modules/unicode/index.html index 924f86e..8738b6f 100644 --- a/modules/unicode/index.html +++ b/modules/unicode/index.html @@ -49,7 +49,7 @@ function do_lookup(override) { xhr.onreadystatechange = function() { if (xhr.readyState == 4) { if (xhr.status == 200) { - handleResponse(JSON.parse(xhr.responseText), input); + handleResponse(JSON.parse(xhr.responseText)); } else { alert("Request failed: " + xhr.responseText); } @@ -62,7 +62,7 @@ function do_lookup(override) { xhr.send(); } -function handleResponse(json, input) { +function handleResponse(json) { var elem; /* elem = document.getElementById("json"); @@ -70,12 +70,20 @@ function handleResponse(json, input) { elem.appendChild(document.createTextNode(JSON.stringify(json))); */ var index_container = document.getElementById("index_container"); - if ("index" in json) { + if (json.indices.length > 0) { index_container.classList.remove("invisible"); - document.getElementById("index_input").innerHTML = input; - elem = document.getElementById("index"); - elem.innerHTML = ""; - elem.appendChild(makeCodepointDiv(json["index"])); + setTableRows("indices", json.indices); + if (json.indices.length > 1) { + document.getElementById("indices_concatenated_container").classList.remove("invisible"); + elem = document.getElementById("indices_concatenated"); + elem.innerHTML = ""; + var str = ""; + for (var i = 0; i < json.indices.length; i++) + str += String.fromCodePoint(parseInt(json.indices[i][0], 16)); + elem.appendChild(document.createTextNode(str)); + } else { + document.getElementById("indices_concatenated_container").classList.add("invisible"); + } } else if (!index_container.classList.contains("invisible")) { index_container.classList.add("invisible"); } @@ -141,15 +149,19 @@ window.addEventListener("load", function() {
- +

Codepoints

diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js index ee83179..ab8ad53 100644 --- a/modules/unicode/unicode.js +++ b/modules/unicode/unicode.js @@ -82,22 +82,37 @@ function searchDescription(text) { return result; } -function recogniseIndex(text) { - let m; - - m = text.match(/^([0-9]+)$/); // 1234 - if (m) return lookupCode(parseInt(m[1], 10)); - - m = text.match(/^(?:0[Xx]|[Uu]\+)([0-9a-fA-F]+)$/); // 0x34ab / U+34ab - if (m) return lookupCode(parseInt(m[1], 16)); - - m = text.match(/^&#([0-9]+);$/); // Ӓ - if (m) return lookupCode(parseInt(m[1], 10)); +function recogniseIndices(text) { + const results = []; + + // A: 1234 (decimal) + // B: 0x34ab / U+34ab (hexadecimal) + // C: Ӓ (xml-style decimal) + // D: 㒫 (xml-style hexadecimal) + // within word boundaries. + // AAAAAA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB CCCCCCCCCCC DDDDDDDDDDDDDDDDDDDDD + const regex = /\b([0-9]+|(?:0[Xx]|[Uu]\+)([0-9a-fA-F]+)|&#([0-9]+);|&#[Xx]([0-9a-fA-F]+);)\b/g; + const matches = text.match(regex); + if (matches == null) return []; + + for (const m of matches) { + let code = -1; + + if ("Xx+".indexOf(m[1]) != -1) { // hexadecimal + code = parseInt(m.slice(2), 16); + } else if (m[0] == "&" && "Xx".indexOf(m[2]) != -1) { // xml-style hexadecimal + code = parseInt(m.slice(3, -1), 16); + } else if (m[0] == "&") { // xml-style decimal + code = parseInt(m.slice(2, -1), 10); + } else { // decimal + code = parseInt(m, 10); + } - m = text.match(/^&#[Xx]([0-9a-fA-F]+);$/); // 㒫 - if (m) return lookupCode(parseInt(m[1], 16)); + const row = lookupCode(code); + if (row != null) results.push(row); + } - return null; + return results; } module.exports = function (app, io, moddir) { @@ -123,7 +138,7 @@ module.exports = function (app, io, moddir) { } res.json({ - index: recogniseIndex(req.params.query) || undefined, + indices: recogniseIndices(req.params.query), codepoints: codepoints, notfound: notfound, search: searchDescription(req.params.query), -- cgit v1.2.3-54-g00ecf