summaryrefslogtreecommitdiff
path: root/modules/unicode/unicode.js
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2023-06-12 23:07:05 +0200
committerTom Smeding <tom@tomsmeding.com>2023-06-12 23:07:05 +0200
commit29d26068ad98e865272198090375fca57e5cf156 (patch)
treee30c4edd65fa58d32ee8865c02f4a49e04d303f5 /modules/unicode/unicode.js
parentfdbedfe021aca37dbb17c47fb613c2b9b4c134d9 (diff)
unicode: Recognise multiple index literals in input
Diffstat (limited to 'modules/unicode/unicode.js')
-rw-r--r--modules/unicode/unicode.js45
1 files changed, 30 insertions, 15 deletions
diff --git a/modules/unicode/unicode.js b/modules/unicode/unicode.js
index ee83179..ab8ad53 100644
--- a/modules/unicode/unicode.js
+++ b/modules/unicode/unicode.js
@@ -82,22 +82,37 @@ function searchDescription(text) {
return result;
}
-function recogniseIndex(text) {
- let m;
-
- m = text.match(/^([0-9]+)$/); // 1234
- if (m) return lookupCode(parseInt(m[1], 10));
-
- m = text.match(/^(?:0[Xx]|[Uu]\+)([0-9a-fA-F]+)$/); // 0x34ab / U+34ab
- if (m) return lookupCode(parseInt(m[1], 16));
-
- m = text.match(/^&#([0-9]+);$/); // &#1234;
- if (m) return lookupCode(parseInt(m[1], 10));
+function recogniseIndices(text) {
+ const results = [];
+
+ // A: 1234 (decimal)
+ // B: 0x34ab / U+34ab (hexadecimal)
+ // C: &#1234; (xml-style decimal)
+ // D: &#x34ab; (xml-style hexadecimal)
+ // within word boundaries.
+ // AAAAAA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB CCCCCCCCCCC DDDDDDDDDDDDDDDDDDDDD
+ const regex = /\b([0-9]+|(?:0[Xx]|[Uu]\+)([0-9a-fA-F]+)|&#([0-9]+);|&#[Xx]([0-9a-fA-F]+);)\b/g;
+ const matches = text.match(regex);
+ if (matches == null) return [];
+
+ for (const m of matches) {
+ let code = -1;
+
+ if ("Xx+".indexOf(m[1]) != -1) { // hexadecimal
+ code = parseInt(m.slice(2), 16);
+ } else if (m[0] == "&" && "Xx".indexOf(m[2]) != -1) { // xml-style hexadecimal
+ code = parseInt(m.slice(3, -1), 16);
+ } else if (m[0] == "&") { // xml-style decimal
+ code = parseInt(m.slice(2, -1), 10);
+ } else { // decimal
+ code = parseInt(m, 10);
+ }
- m = text.match(/^&#[Xx]([0-9a-fA-F]+);$/); // &#x34ab;
- if (m) return lookupCode(parseInt(m[1], 16));
+ const row = lookupCode(code);
+ if (row != null) results.push(row);
+ }
- return null;
+ return results;
}
module.exports = function (app, io, moddir) {
@@ -123,7 +138,7 @@ module.exports = function (app, io, moddir) {
}
res.json({
- index: recogniseIndex(req.params.query) || undefined,
+ indices: recogniseIndices(req.params.query),
codepoints: codepoints,
notfound: notfound,
search: searchDescription(req.params.query),