summaryrefslogtreecommitdiff
path: root/escapexml
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2026-05-10 18:54:45 +0200
committerTom Smeding <tom@tomsmeding.com>2026-05-10 18:54:45 +0200
commit5d6f1f42eec48ae1671e54cdd127f39318416498 (patch)
tree58672af74066d702fef81c5fdc9587de77d42c57 /escapexml
parent66e9a4f242b9f02a7fcf6b5fc610417a50e1ba87 (diff)
Generate renderer functions using TH
These take (and project from) a data type instead of hash maps, _and_ use text-builder-linear, _and_ have the full template inlined. The result is that this is the fastest renderer yet.
Diffstat (limited to 'escapexml')
-rw-r--r--escapexml/EscapeXML.hs37
-rw-r--r--escapexml/escapexml.c49
2 files changed, 86 insertions, 0 deletions
diff --git a/escapexml/EscapeXML.hs b/escapexml/EscapeXML.hs
new file mode 100644
index 0000000..662d2ed
--- /dev/null
+++ b/escapexml/EscapeXML.hs
@@ -0,0 +1,37 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+module EscapeXML (escapeXML) where
+
+import Data.Array.Byte
+import Data.Text.Internal
+import Foreign.C.Types
+import GHC.Exts
+import GHC.IO (IO(IO))
+import System.IO.Unsafe (unsafePerformIO)
+
+
+foreign import ccall unsafe "tirclogv_escapexml_len"
+ c_escapexml_len :: ByteArray# -> CSize -> CSize -> IO CSize
+
+foreign import ccall unsafe "tirclogv_escapexml"
+ c_escapexml :: MutableByteArray# RealWorld -> ByteArray# -> CSize -> CSize -> IO ()
+
+{-# NOINLINE escapeXML #-}
+escapeXML :: Text -> Text
+escapeXML (Text (ByteArray src#) off len) = unsafePerformIO $ do
+ let offCS = fromIntegral @Int @CSize off
+ lenCS = fromIntegral @Int @CSize len
+
+ reslen <- c_escapexml_len src# offCS lenCS
+ let !reslenI@(I# reslen#) = fromIntegral @CSize @Int reslen
+
+ MutableByteArray dst# <-
+ IO $ \s -> case newByteArray# reslen# s of
+ (# s', mba# #) -> (# s', MutableByteArray mba# #)
+ c_escapexml dst# src# offCS lenCS
+ ba <- IO $ \s -> case unsafeFreezeByteArray# dst# s of
+ (# s', ba# #) -> (# s', ByteArray ba# #)
+
+ return (Text ba 0 reslenI)
diff --git a/escapexml/escapexml.c b/escapexml/escapexml.c
new file mode 100644
index 0000000..6eb366d
--- /dev/null
+++ b/escapexml/escapexml.c
@@ -0,0 +1,49 @@
+#include <string.h>
+#include <stdint.h>
+
+// https://hackage-content.haskell.org/package/mustache-2.4.3.1/src/src/Text/Mustache/Internal.hs
+//
+// escapeXML :: String -> String
+// escapeXML = concatMap $ \x -> IntMap.findWithDefault [x] (ord x) mp
+// where mp = IntMap.fromList [(ord b, "&"++a++";") | (a,[b]) <- xmlEntities]
+//
+// xmlEntities :: [(String, String)]
+// xmlEntities =
+// [ ("quot", "\"")
+// , ("#39", "'")
+// , ("amp" , "&")
+// , ("lt" , "<")
+// , ("gt" , ">")
+// ]
+
+#define ENTITIES_XLIST \
+ X('"', 6, "&quot;") \
+ X('\'', 5, "&#39;") \
+ X('&', 5, "&amp;") \
+ X('<', 4, "&lt;") \
+ X('>', 4, "&gt;")
+
+size_t tirclogv_escapexml_len(const uint8_t *src, size_t src_off, size_t src_len) {
+ size_t reslen = 0;
+ for (size_t i = 0; i < src_len; i++) {
+ switch (src[src_off + i]) {
+#define X(ch, len, _str) case ch: reslen += len; break;
+ ENTITIES_XLIST
+#undef X
+ default: reslen += 1; break;
+ }
+ }
+ return reslen;
+}
+
+void tirclogv_escapexml(uint8_t *dst, const uint8_t *src, size_t src_off, size_t src_len) {
+ for (size_t i = 0, j = 0; i < src_len; i++) {
+ const uint8_t c = src[src_off + i];
+ switch (c) {
+#define X(ch, len, str) case ch: memcpy(&dst[j], str, len); j += len; break;
+ ENTITIES_XLIST
+#undef X
+ default: dst[j++] = c; break;
+ }
+ }
+}