From 9de16e245424e62318cdce4909e33c256f585cb6 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Fri, 22 Nov 2019 22:38:03 +0100 Subject: Full tokeniser in lispparser.lisp (also strings now) --- tests/lispparser.lisp | 122 +++++++++++++++++++++++++++++++++++++++++++++++--- tests/stdlib.lisp | 11 +++++ 2 files changed, 126 insertions(+), 7 deletions(-) diff --git a/tests/lispparser.lisp b/tests/lispparser.lisp index 1d8c79c..533e92f 100644 --- a/tests/lispparser.lisp +++ b/tests/lispparser.lisp @@ -1,29 +1,65 @@ #include "stdlib.lisp" +; string -> string +(define string-escape (str) + (let ((hexdigit (lambda (n) (substr n 1 "0123456789abcdef"))) + (go (lambdarec rec (str yet) + (if (= str "") yet + (let ((ch (substr 0 1 str)) + (n (ord ch)) + (out (cond + (= ch "\n") "\\n" + (= ch "\r") "\\r" + (= ch "\t") "\\t" + (= ch "\0") "\\0" + (= ch "\"") "\\\"" + (= ch "\\") "\\\\" + (< n 32) (concat "\\x" (concat (hexdigit (/ n 16)) (hexdigit (mod n 16)))) + ch))) + (rec (substr 1 -1 str) (concat yet out))))))) + (go str ""))) + +; tag string -> string +(define ansi-color (clr str) + (let ((style (cond + (= clr 'red) "31" + (= clr 'yellow) "33" + (= clr 'blue) "34;1" + (error "Unknown color in ansi-color" clr)))) + (concat (concat "\x1B[" (concat style "m")) (concat str "\x1B[0m")))) + +; char string -> bool (define str-elem (ch str) (if (= str "") 0 (if (= ch (substr 0 1 str)) 1 (str-elem ch (substr 1 -1 str))))) +; char -> bool (define isspace? (ch) (str-elem ch " \n\t\r")) +; char -> bool (define isdigit? (ch) (let ((n (ord ch))) (and (<= 48 n) (<= n 57)))) +; char -> bool (define lowercase? (ch) (let ((n (ord ch))) (and (<= 97 n) (<= n 122)))) +; char -> bool (define uppercase? (ch) (let ((n (ord ch))) (and (<= 65 n) (<= n 90)))) +; char -> bool (define iswordchar? (ch) (or (str-elem ch "-_?+/*") (or (lowercase? ch) (uppercase? ch)))) +; char -> bool (define isrestwordchar? (ch) (or (iswordchar? ch) (isdigit? ch))) +; string -> int (does not perform any checking) (define parse-int (str) (let ((helper (lambdarec rec (str n) (let ((ch (substr 0 1 str)) @@ -33,6 +69,56 @@ n))))) (helper str 0))) +; char -> int +(define parse-hex-digit (ch) + (let ((n (ord ch))) + (cond + (and (<= 48 n) (<= n 57)) + (- n 48) + (and (<= 97 n) (<= n 102)) + (- n 87) + (and (<= 65 n) (<= n 70)) + (- n 55) + (error "Invalid hex escape digit " ch)))) + +; string -> (string[parsed escape] string[rest]) +(define parse-string-escape (str) + (let ((ch (substr 0 1 str))) + (cond + (= ch "n") + (list "\n" (substr 1 -1 str)) + (= ch "r") + (list "\r" (substr 1 -1 str)) + (= ch "t") + (list "\t" (substr 1 -1 str)) + (= ch "0") + (list "\0" (substr 1 -1 str)) + (= ch "\"") + (list "\"" (substr 1 -1 str)) + (= ch "\\") + (list "\\" (substr 1 -1 str)) + (= ch "x") + (let ((h1 (parse-hex-digit (substr 1 1 str))) + (h2 (parse-hex-digit (substr 2 1 str)))) + (list (chr (16 * h1 + h2)) (substr 3 -1 str))) + (error "Invalid string escape character " ch)))) + +; string[input] string[string to prepend to result] -> string[parsed till bare "] +(define parse-string-contents (str yet) + (let ((ch (substr 0 1 str)) + (rest (substr 1 -1 str))) + (cond + (= ch "") + (error "Non-terminated string in source") + (= ch "\"") + yet + (= ch "\\") + (let ((pair (parse-string-escape rest))) + (parse-string-contents (cadr pair) (concat yet (car pair)))) + (parse-string-contents rest (concat yet ch))))) + +; deftype token = ('tag ), where the type of depends on the tag +; string -> (token string[rest]) (define next-token (str) (let ((ch (substr 0 1 str)) (rest (substr 1 -1 str))) @@ -44,17 +130,37 @@ (= ch ";") (next-token (drop-while (lambda (c) (not (= c "\n"))) rest)) (= ch "(") - (list "(" rest) + (list (list 'open '()) rest) (= ch ")") - (list ")" rest) + (list (list 'close '()) rest) + (= ch "\"") + (let ((text (parse-string-contents rest ""))) + (list (list 'string text) (substr (+ (length text) 1) -1 rest))) (or (iswordchar? ch) (isdigit? ch)) (let ((restword (take-while isrestwordchar? rest)) (rest2 (substr (length restword) -1 rest))) (if (and (all isdigit? restword) (or (isdigit? ch) (and (= ch "-") (> (length restword) 0)))) - (list (parse-int (concat ch restword)) rest2) - (list (concat ch restword) rest2))) - (error (concat "Invalid token: " ch))))) + (list (list 'number (parse-int (concat ch restword))) rest2) + (list (list 'name (concat ch restword)) rest2))) + (error "Invalid token" ch)))) + +; token -> string +(define pretty-token (token) + (let ((tag (car token)) + (field (cadr token))) + (cond + (= tag 'open) + (ansi-color 'red "(") + (= tag 'close) + (ansi-color 'red ")") + (= tag 'string) + (ansi-color 'yellow (concat "\"" (concat (string-escape field) "\""))) + (= tag 'number) + (ansi-color 'blue (number->string field)) + (= tag 'name) + field + (error "Invalid token tag in pretty-token" tag)))) (define go (lambdarec rec (str) (let ((pair (next-token str))) @@ -63,7 +169,9 @@ (let ((token (car pair)) (rest (cadr pair))) (do - (print token) + (sys-put-string stdout (pretty-token token)) + (sys-put-string stdout " ") (rec rest))))))) -(go (read-file "tests/closuretest.lisp")) +(go (read-file "tests/f.lisp")) +(print) diff --git a/tests/stdlib.lisp b/tests/stdlib.lisp index ece8953..86dabe2 100644 --- a/tests/stdlib.lisp +++ b/tests/stdlib.lisp @@ -99,3 +99,14 @@ (define read-line (fid) (read-until fid (lambda (ch) (or (= ch "\n") (null? ch))))) + +(define number->string (num) + (let ((helper (lambdarec rec (n yet) + (if (= n 0) yet + (rec (/ n 10) (concat (chr (+ (mod n 10) 48)) yet)))))) + (cond + (= num 0) + "0" + (< num 0) + (concat "-" (number->string (- 0 num))) + (helper num "")))) -- cgit v1.2.3-54-g00ecf