From 29d26068ad98e865272198090375fca57e5cf156 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Mon, 12 Jun 2023 23:07:05 +0200 Subject: unicode: Recognise multiple index literals in input --- modules/unicode/unicode.js | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'modules/unicode/unicode.js') diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js index ee83179..ab8ad53 100644 --- a/modules/unicode/unicode.js +++ b/modules/unicode/unicode.js @@ -82,22 +82,37 @@ function searchDescription(text) { return result; } -function recogniseIndex(text) { - let m; - - m = text.match(/^([0-9]+)$/); // 1234 - if (m) return lookupCode(parseInt(m[1], 10)); - - m = text.match(/^(?:0[Xx]|[Uu]\+)([0-9a-fA-F]+)$/); // 0x34ab / U+34ab - if (m) return lookupCode(parseInt(m[1], 16)); - - m = text.match(/^&#([0-9]+);$/); // Ӓ - if (m) return lookupCode(parseInt(m[1], 10)); +function recogniseIndices(text) { + const results = []; + + // A: 1234 (decimal) + // B: 0x34ab / U+34ab (hexadecimal) + // C: Ӓ (xml-style decimal) + // D: 㒫 (xml-style hexadecimal) + // within word boundaries. + // AAAAAA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB CCCCCCCCCCC DDDDDDDDDDDDDDDDDDDDD + const regex = /\b([0-9]+|(?:0[Xx]|[Uu]\+)([0-9a-fA-F]+)|&#([0-9]+);|&#[Xx]([0-9a-fA-F]+);)\b/g; + const matches = text.match(regex); + if (matches == null) return []; + + for (const m of matches) { + let code = -1; + + if ("Xx+".indexOf(m[1]) != -1) { // hexadecimal + code = parseInt(m.slice(2), 16); + } else if (m[0] == "&" && "Xx".indexOf(m[2]) != -1) { // xml-style hexadecimal + code = parseInt(m.slice(3, -1), 16); + } else if (m[0] == "&") { // xml-style decimal + code = parseInt(m.slice(2, -1), 10); + } else { // decimal + code = parseInt(m, 10); + } - m = text.match(/^&#[Xx]([0-9a-fA-F]+);$/); // 㒫 - if (m) return lookupCode(parseInt(m[1], 16)); + const row = lookupCode(code); + if (row != null) results.push(row); + } - return null; + return results; } module.exports = function (app, io, moddir) { @@ -123,7 +138,7 @@ module.exports = function (app, io, moddir) { } res.json({ - index: recogniseIndex(req.params.query) || undefined, + indices: recogniseIndices(req.params.query), codepoints: codepoints, notfound: notfound, search: searchDescription(req.params.query), -- cgit v1.2.3-54-g00ecf