summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2019-11-22 22:38:03 +0100
committertomsmeding <tom.smeding@gmail.com>2019-11-22 22:38:15 +0100
commit9de16e245424e62318cdce4909e33c256f585cb6 (patch)
treeac258835905ece97ea252ca57068a4f9085f0d4a
parent8cfb2879d9d5e9df9abb23bae8420f61005980e8 (diff)
Full tokeniser in lispparser.lisp (also strings now)
-rw-r--r--tests/lispparser.lisp122
-rw-r--r--tests/stdlib.lisp11
2 files changed, 126 insertions, 7 deletions
diff --git a/tests/lispparser.lisp b/tests/lispparser.lisp
index 1d8c79c..533e92f 100644
--- a/tests/lispparser.lisp
+++ b/tests/lispparser.lisp
@@ -1,29 +1,65 @@
#include "stdlib.lisp"
+; string -> string
+(define string-escape (str)
+ (let ((hexdigit (lambda (n) (substr n 1 "0123456789abcdef")))
+ (go (lambdarec rec (str yet)
+ (if (= str "") yet
+ (let ((ch (substr 0 1 str))
+ (n (ord ch))
+ (out (cond
+ (= ch "\n") "\\n"
+ (= ch "\r") "\\r"
+ (= ch "\t") "\\t"
+ (= ch "\0") "\\0"
+ (= ch "\"") "\\\""
+ (= ch "\\") "\\\\"
+ (< n 32) (concat "\\x" (concat (hexdigit (/ n 16)) (hexdigit (mod n 16))))
+ ch)))
+ (rec (substr 1 -1 str) (concat yet out)))))))
+ (go str "")))
+
+; tag string -> string
+(define ansi-color (clr str)
+ (let ((style (cond
+ (= clr 'red) "31"
+ (= clr 'yellow) "33"
+ (= clr 'blue) "34;1"
+ (error "Unknown color in ansi-color" clr))))
+ (concat (concat "\x1B[" (concat style "m")) (concat str "\x1B[0m"))))
+
+; char string -> bool
(define str-elem (ch str)
(if (= str "") 0
(if (= ch (substr 0 1 str)) 1
(str-elem ch (substr 1 -1 str)))))
+; char -> bool
(define isspace? (ch)
(str-elem ch " \n\t\r"))
+; char -> bool
(define isdigit? (ch)
(let ((n (ord ch))) (and (<= 48 n) (<= n 57))))
+; char -> bool
(define lowercase? (ch)
(let ((n (ord ch))) (and (<= 97 n) (<= n 122))))
+; char -> bool
(define uppercase? (ch)
(let ((n (ord ch))) (and (<= 65 n) (<= n 90))))
+; char -> bool
(define iswordchar? (ch)
(or (str-elem ch "-_?+/*")
(or (lowercase? ch) (uppercase? ch))))
+; char -> bool
(define isrestwordchar? (ch)
(or (iswordchar? ch) (isdigit? ch)))
+; string -> int (does not perform any checking)
(define parse-int (str)
(let ((helper (lambdarec rec (str n)
(let ((ch (substr 0 1 str))
@@ -33,6 +69,56 @@
n)))))
(helper str 0)))
+; char -> int
+(define parse-hex-digit (ch)
+ (let ((n (ord ch)))
+ (cond
+ (and (<= 48 n) (<= n 57))
+ (- n 48)
+ (and (<= 97 n) (<= n 102))
+ (- n 87)
+ (and (<= 65 n) (<= n 70))
+ (- n 55)
+ (error "Invalid hex escape digit " ch))))
+
+; string -> (string[parsed escape] string[rest])
+(define parse-string-escape (str)
+ (let ((ch (substr 0 1 str)))
+ (cond
+ (= ch "n")
+ (list "\n" (substr 1 -1 str))
+ (= ch "r")
+ (list "\r" (substr 1 -1 str))
+ (= ch "t")
+ (list "\t" (substr 1 -1 str))
+ (= ch "0")
+ (list "\0" (substr 1 -1 str))
+ (= ch "\"")
+ (list "\"" (substr 1 -1 str))
+ (= ch "\\")
+ (list "\\" (substr 1 -1 str))
+ (= ch "x")
+ (let ((h1 (parse-hex-digit (substr 1 1 str)))
+ (h2 (parse-hex-digit (substr 2 1 str))))
+ (list (chr (16 * h1 + h2)) (substr 3 -1 str)))
+ (error "Invalid string escape character " ch))))
+
+; string[input] string[string to prepend to result] -> string[parsed till bare "]
+(define parse-string-contents (str yet)
+ (let ((ch (substr 0 1 str))
+ (rest (substr 1 -1 str)))
+ (cond
+ (= ch "")
+ (error "Non-terminated string in source")
+ (= ch "\"")
+ yet
+ (= ch "\\")
+ (let ((pair (parse-string-escape rest)))
+ (parse-string-contents (cadr pair) (concat yet (car pair))))
+ (parse-string-contents rest (concat yet ch)))))
+
+; deftype token = ('tag <field>), where the type of <field> depends on the tag
+; string -> (token string[rest])
(define next-token (str)
(let ((ch (substr 0 1 str))
(rest (substr 1 -1 str)))
@@ -44,17 +130,37 @@
(= ch ";")
(next-token (drop-while (lambda (c) (not (= c "\n"))) rest))
(= ch "(")
- (list "(" rest)
+ (list (list 'open '()) rest)
(= ch ")")
- (list ")" rest)
+ (list (list 'close '()) rest)
+ (= ch "\"")
+ (let ((text (parse-string-contents rest "")))
+ (list (list 'string text) (substr (+ (length text) 1) -1 rest)))
(or (iswordchar? ch) (isdigit? ch))
(let ((restword (take-while isrestwordchar? rest))
(rest2 (substr (length restword) -1 rest)))
(if (and (all isdigit? restword)
(or (isdigit? ch) (and (= ch "-") (> (length restword) 0))))
- (list (parse-int (concat ch restword)) rest2)
- (list (concat ch restword) rest2)))
- (error (concat "Invalid token: " ch)))))
+ (list (list 'number (parse-int (concat ch restword))) rest2)
+ (list (list 'name (concat ch restword)) rest2)))
+ (error "Invalid token" ch))))
+
+; token -> string
+(define pretty-token (token)
+ (let ((tag (car token))
+ (field (cadr token)))
+ (cond
+ (= tag 'open)
+ (ansi-color 'red "(")
+ (= tag 'close)
+ (ansi-color 'red ")")
+ (= tag 'string)
+ (ansi-color 'yellow (concat "\"" (concat (string-escape field) "\"")))
+ (= tag 'number)
+ (ansi-color 'blue (number->string field))
+ (= tag 'name)
+ field
+ (error "Invalid token tag in pretty-token" tag))))
(define go (lambdarec rec (str)
(let ((pair (next-token str)))
@@ -63,7 +169,9 @@
(let ((token (car pair))
(rest (cadr pair)))
(do
- (print token)
+ (sys-put-string stdout (pretty-token token))
+ (sys-put-string stdout " ")
(rec rest)))))))
-(go (read-file "tests/closuretest.lisp"))
+(go (read-file "tests/f.lisp"))
+(print)
diff --git a/tests/stdlib.lisp b/tests/stdlib.lisp
index ece8953..86dabe2 100644
--- a/tests/stdlib.lisp
+++ b/tests/stdlib.lisp
@@ -99,3 +99,14 @@
(define read-line (fid)
(read-until fid (lambda (ch) (or (= ch "\n") (null? ch)))))
+
+(define number->string (num)
+ (let ((helper (lambdarec rec (n yet)
+ (if (= n 0) yet
+ (rec (/ n 10) (concat (chr (+ (mod n 10) 48)) yet))))))
+ (cond
+ (= num 0)
+ "0"
+ (< num 0)
+ (concat "-" (number->string (- 0 num)))
+ (helper num ""))))