diff options
| author | Tom Smeding <tom@tomsmeding.com> | 2026-05-10 18:54:45 +0200 |
|---|---|---|
| committer | Tom Smeding <tom@tomsmeding.com> | 2026-05-10 18:54:45 +0200 |
| commit | 5d6f1f42eec48ae1671e54cdd127f39318416498 (patch) | |
| tree | 58672af74066d702fef81c5fdc9587de77d42c57 /escapexml | |
| parent | 66e9a4f242b9f02a7fcf6b5fc610417a50e1ba87 (diff) | |
Generate renderer functions using TH
These take (and project from) a data type instead of hash maps, _and_
use text-builder-linear, _and_ have the full template inlined. The
result is that this is the fastest renderer yet.
Diffstat (limited to 'escapexml')
| -rw-r--r-- | escapexml/EscapeXML.hs | 37 | ||||
| -rw-r--r-- | escapexml/escapexml.c | 49 |
2 files changed, 86 insertions, 0 deletions
diff --git a/escapexml/EscapeXML.hs b/escapexml/EscapeXML.hs new file mode 100644 index 0000000..662d2ed --- /dev/null +++ b/escapexml/EscapeXML.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes #-} +module EscapeXML (escapeXML) where + +import Data.Array.Byte +import Data.Text.Internal +import Foreign.C.Types +import GHC.Exts +import GHC.IO (IO(IO)) +import System.IO.Unsafe (unsafePerformIO) + + +foreign import ccall unsafe "tirclogv_escapexml_len" + c_escapexml_len :: ByteArray# -> CSize -> CSize -> IO CSize + +foreign import ccall unsafe "tirclogv_escapexml" + c_escapexml :: MutableByteArray# RealWorld -> ByteArray# -> CSize -> CSize -> IO () + +{-# NOINLINE escapeXML #-} +escapeXML :: Text -> Text +escapeXML (Text (ByteArray src#) off len) = unsafePerformIO $ do + let offCS = fromIntegral @Int @CSize off + lenCS = fromIntegral @Int @CSize len + + reslen <- c_escapexml_len src# offCS lenCS + let !reslenI@(I# reslen#) = fromIntegral @CSize @Int reslen + + MutableByteArray dst# <- + IO $ \s -> case newByteArray# reslen# s of + (# s', mba# #) -> (# s', MutableByteArray mba# #) + c_escapexml dst# src# offCS lenCS + ba <- IO $ \s -> case unsafeFreezeByteArray# dst# s of + (# s', ba# #) -> (# s', ByteArray ba# #) + + return (Text ba 0 reslenI) diff --git a/escapexml/escapexml.c b/escapexml/escapexml.c new file mode 100644 index 0000000..6eb366d --- /dev/null +++ b/escapexml/escapexml.c @@ -0,0 +1,49 @@ +#include <string.h> +#include <stdint.h> + +// https://hackage-content.haskell.org/package/mustache-2.4.3.1/src/src/Text/Mustache/Internal.hs +// +// escapeXML :: String -> String +// escapeXML = concatMap $ \x -> IntMap.findWithDefault [x] (ord x) mp +// where mp = IntMap.fromList [(ord b, "&"++a++";") | (a,[b]) <- xmlEntities] +// +// xmlEntities :: [(String, String)] +// xmlEntities = +// [ ("quot", "\"") +// , ("#39", "'") +// , ("amp" , "&") +// , ("lt" , "<") +// , ("gt" , ">") +// ] + +#define ENTITIES_XLIST \ + X('"', 6, """) \ + X('\'', 5, "'") \ + X('&', 5, "&") \ + X('<', 4, "<") \ + X('>', 4, ">") + +size_t tirclogv_escapexml_len(const uint8_t *src, size_t src_off, size_t src_len) { + size_t reslen = 0; + for (size_t i = 0; i < src_len; i++) { + switch (src[src_off + i]) { +#define X(ch, len, _str) case ch: reslen += len; break; + ENTITIES_XLIST +#undef X + default: reslen += 1; break; + } + } + return reslen; +} + +void tirclogv_escapexml(uint8_t *dst, const uint8_t *src, size_t src_off, size_t src_len) { + for (size_t i = 0, j = 0; i < src_len; i++) { + const uint8_t c = src[src_off + i]; + switch (c) { +#define X(ch, len, str) case ch: memcpy(&dst[j], str, len); j += len; break; + ENTITIES_XLIST +#undef X + default: dst[j++] = c; break; + } + } +} |
