From 5d6f1f42eec48ae1671e54cdd127f39318416498 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sun, 10 May 2026 17:54:45 +0100 Subject: Generate renderer functions using TH These take (and project from) a data type instead of hash maps, _and_ use text-builder-linear, _and_ have the full template inlined. The result is that this is the fastest renderer yet. --- escapexml/EscapeXML.hs | 37 +++++++++++++++++++++++++++++++++++++ escapexml/escapexml.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 escapexml/EscapeXML.hs create mode 100644 escapexml/escapexml.c (limited to 'escapexml') diff --git a/escapexml/EscapeXML.hs b/escapexml/EscapeXML.hs new file mode 100644 index 0000000..662d2ed --- /dev/null +++ b/escapexml/EscapeXML.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes #-} +module EscapeXML (escapeXML) where + +import Data.Array.Byte +import Data.Text.Internal +import Foreign.C.Types +import GHC.Exts +import GHC.IO (IO(IO)) +import System.IO.Unsafe (unsafePerformIO) + + +foreign import ccall unsafe "tirclogv_escapexml_len" + c_escapexml_len :: ByteArray# -> CSize -> CSize -> IO CSize + +foreign import ccall unsafe "tirclogv_escapexml" + c_escapexml :: MutableByteArray# RealWorld -> ByteArray# -> CSize -> CSize -> IO () + +{-# NOINLINE escapeXML #-} +escapeXML :: Text -> Text +escapeXML (Text (ByteArray src#) off len) = unsafePerformIO $ do + let offCS = fromIntegral @Int @CSize off + lenCS = fromIntegral @Int @CSize len + + reslen <- c_escapexml_len src# offCS lenCS + let !reslenI@(I# reslen#) = fromIntegral @CSize @Int reslen + + MutableByteArray dst# <- + IO $ \s -> case newByteArray# reslen# s of + (# s', mba# #) -> (# s', MutableByteArray mba# #) + c_escapexml dst# src# offCS lenCS + ba <- IO $ \s -> case unsafeFreezeByteArray# dst# s of + (# s', ba# #) -> (# s', ByteArray ba# #) + + return (Text ba 0 reslenI) diff --git a/escapexml/escapexml.c b/escapexml/escapexml.c new file mode 100644 index 0000000..6eb366d --- /dev/null +++ b/escapexml/escapexml.c @@ -0,0 +1,49 @@ +#include +#include + +// https://hackage-content.haskell.org/package/mustache-2.4.3.1/src/src/Text/Mustache/Internal.hs +// +// escapeXML :: String -> String +// escapeXML = concatMap $ \x -> IntMap.findWithDefault [x] (ord x) mp +// where mp = IntMap.fromList [(ord b, "&"++a++";") | (a,[b]) <- xmlEntities] +// +// xmlEntities :: [(String, String)] +// xmlEntities = +// [ ("quot", "\"") +// , ("#39", "'") +// , ("amp" , "&") +// , ("lt" , "<") +// , ("gt" , ">") +// ] + +#define ENTITIES_XLIST \ + X('"', 6, """) \ + X('\'', 5, "'") \ + X('&', 5, "&") \ + X('<', 4, "<") \ + X('>', 4, ">") + +size_t tirclogv_escapexml_len(const uint8_t *src, size_t src_off, size_t src_len) { + size_t reslen = 0; + for (size_t i = 0; i < src_len; i++) { + switch (src[src_off + i]) { +#define X(ch, len, _str) case ch: reslen += len; break; + ENTITIES_XLIST +#undef X + default: reslen += 1; break; + } + } + return reslen; +} + +void tirclogv_escapexml(uint8_t *dst, const uint8_t *src, size_t src_off, size_t src_len) { + for (size_t i = 0, j = 0; i < src_len; i++) { + const uint8_t c = src[src_off + i]; + switch (c) { +#define X(ch, len, str) case ch: memcpy(&dst[j], str, len); j += len; break; + ENTITIES_XLIST +#undef X + default: dst[j++] = c; break; + } + } +} -- cgit v1.3.1