From c9127b2bd399ce5d3e9483a82434948202592309 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Fri, 19 Aug 2016 20:44:53 +0200 Subject: Parsing and stringification works --- Makefile | 8 +++- ast.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ast.h | 4 ++ code.lysp | 1 + main.c | 87 +++++++++++++++++++++++++++++++++++++++++++ parser.c | 68 ++++++++++++++++++++++++++++----- 6 files changed, 284 insertions(+), 10 deletions(-) create mode 100644 code.lysp create mode 100644 main.c diff --git a/Makefile b/Makefile index 2434358..2749caa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,13 @@ CC := gcc -CFLAGS := -Wall -Wextra -std=c11 -O2 -fwrapv +CFLAGS := -Wall -Wextra -std=c11 -fwrapv BIN := lysp +ifdef DEBUG + CFLAGS += -g +else + CFLAGS += -O2 +endif + .PHONY: all clean remake all: $(BIN) diff --git a/ast.c b/ast.c index 8120feb..a6f308d 100644 --- a/ast.c +++ b/ast.c @@ -1,3 +1,5 @@ +#define _GNU_SOURCE //asprintf +#include #include #include #include @@ -22,6 +24,11 @@ void ast_free(AST *ast){ free(ast->w.word); break; + case AST_STRING: + assert(ast->S.str); + free(ast->S.str); + break; + case AST_NUMBER: case AST_SYMBOL: break; @@ -53,6 +60,9 @@ AST* ast_copy(const AST *ast){ case AST_NUMBER: return ast_number(ast->n.num); + case AST_STRING: + return ast_string(copybufasstring(ast->S.str,ast->S.len),ast->S.len); + case AST_SYMBOL:{ assert(ast->s.name); AST *sym=ast_symbol(ast->s.name); @@ -66,10 +76,115 @@ AST* ast_copy(const AST *ast){ } +typedef struct Buffer{ + char *buf; + int sz,len; +} Buffer; + +static Buffer buf_make(int capacity){ + assert(capacity>0); + Buffer buf={malloc(capacity,char),capacity,0}; + buf.buf[0]='\0'; + return buf; +} + +static void buf_append(Buffer *buf,const char *str,int len){ + assert(buf); + assert(str); + assert(len>=0); + if(len==0)return; + if(buf->len+len>buf->sz-1){ + do buf->sz*=2; + while(buf->len+len>buf->sz-1); + buf->buf=realloc(buf->buf,buf->sz,char); + } + memcpy(buf->buf+buf->len,str,len); + buf->len+=len; + buf->buf[buf->len]='\0'; +} + +static char hexchar(int n){ + assert(n>=0&&n<16); + if(n<10)return n+'0'; + return n-10+'a'; +} + +static void ast_stringify_(const AST *ast,Buffer *buf){ + assert(ast); + assert(buf); + switch(ast->type){ + case AST_LIST: + if(ast->l.quoted)buf_append(buf,"'",1); + buf_append(buf,"(",1); + for(int i=0;il.len;i++){ + if(i!=0)buf_append(buf," ",1); + ast_stringify_(ast->l.nodes[i],buf); + } + buf_append(buf,")",1); + break; + + case AST_WORD: + buf_append(buf,ast->w.word,strlen(ast->w.word)); + break; + + case AST_NUMBER:{ + char *s; + int len=asprintf(&s,"%g",ast->n.num); + if(!s)outofmem(); + buf_append(buf,s,len); + free(s); + break; + } + + case AST_STRING:{ + buf_append(buf,"\"",1); + const char *str=ast->S.str; + for(int i=0;iS.len;i++){ + if(str[i]>=32&&str[i]<=126)buf_append(buf,str+i,1); + else switch(str[i]){ + case '\n': buf_append(buf,"\\n",2); break; + case '\t': buf_append(buf,"\\t",2); break; + case '\r': buf_append(buf,"\\r",2); break; + case '\b': buf_append(buf,"\\b",2); break; + case '\a': buf_append(buf,"\\a",2); break; + default:{ + char hexbuf[4]; + hexbuf[0]='\\'; + hexbuf[1]='x'; + hexbuf[2]=hexchar((unsigned char)str[i]/16); + hexbuf[3]=hexchar((unsigned char)str[i]%16); + buf_append(buf,hexbuf,4); + break; + } + } + } + buf_append(buf,"\"",1); + break; + } + + case AST_SYMBOL: + buf_append(buf,"'",1); + buf_append(buf,ast->s.name,strlen(ast->s.name)); + break; + + default: + assert(false); + } +} + +char* ast_stringify(const AST *ast){ + assert(ast); + Buffer buf=buf_make(32); + ast_stringify_(ast,&buf); + return buf.buf; +} + + AST* ast_list(int len,AST **nodes){ assert(len>=0); assert(nodes); AST *ast=malloc(1,AST); + ast->type=AST_LIST; ast->l.len=len; ast->l.nodes=malloc(len,AST*); memcpy(ast->l.nodes,nodes,len*sizeof(AST*)); @@ -80,19 +195,30 @@ AST* ast_list(int len,AST **nodes){ AST* ast_word(char *word){ assert(word); AST *ast=malloc(1,AST); + ast->type=AST_WORD; ast->w.word=word; return ast; } AST* ast_number(double num){ AST *ast=malloc(1,AST); + ast->type=AST_NUMBER; ast->n.num=num; return ast; } +AST* ast_string(char *str,int len){ + AST *ast=malloc(1,AST); + ast->type=AST_STRING; + ast->S.str=str; + ast->S.len=len; + return ast; +} + AST* ast_symbol(char *name){ assert(name); AST *ast=malloc(1,AST); + ast->type=AST_SYMBOL; ast->s.name=name; ast->s.symid=-1; return ast; diff --git a/ast.h b/ast.h index f55d798..8d23ef9 100644 --- a/ast.h +++ b/ast.h @@ -39,6 +39,7 @@ typedef struct ASTsymbol{ //if you're not the interpreter: // if you just allocated the ASTsymbol yourself, set symid to -1; // else, leave symid alone. + //You should probably use ast_symbol(), in which case you don't have to do anything. } ASTsymbol; struct AST{ @@ -57,7 +58,10 @@ void ast_free(AST *ast); AST* ast_copy(const AST *ast); +char* ast_stringify(const AST *ast); + AST* ast_list(int len,AST **nodes); //these convenience functions DO NOT copy their arguments AST* ast_word(char *word); AST* ast_number(double num); +AST* ast_string(char *str,int len); AST* ast_symbol(char *name); diff --git a/code.lysp b/code.lysp new file mode 100644 index 0000000..cebf2e1 --- /dev/null +++ b/code.lysp @@ -0,0 +1 @@ +(print (+ 1 (% 10 3)) 'kaas "kazen enzo") diff --git a/main.c b/main.c new file mode 100644 index 0000000..c3a5618 --- /dev/null +++ b/main.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include + +#include "parser.h" +#include "util.h" + + +char* readfile(const char *fname,size_t *length){ + FILE *f=fopen(fname,"rb"); + if(!f)return NULL; + if(fseek(f,0,SEEK_END)==-1){fclose(f); return NULL;} + long flen=ftell(f); + if(flen==-1){fclose(f); return NULL;} + rewind(f); + + char *buf=malloc(flen+1,char); + fread(buf,1,flen,f); + if(ferror(f)){fclose(f); free(buf); return NULL;} + if(memchr(buf,'\0',flen)!=NULL){ + fprintf(stderr,"Invalid null char in file '%s'\n",fname); + exit(1); + } + buf[flen]='\0'; + fclose(f); + + *length=flen; + return buf; +} + +char *readstdin(size_t *length){ + int bufsz=1024,cursor=0; + char *buf=malloc(bufsz,char); + while(true){ + if(cursor==bufsz-1){ + bufsz*=2; + buf=realloc(buf,bufsz,char); + } + int nread=fread(buf,1,bufsz-cursor-1,stdin); + if(nread>0&&memchr(buf,'\0',nread)!=NULL){ + fprintf(stderr,"Invalid null char on stdin file\n"); + exit(1); + } + cursor+=nread; + if(nreads=='"'){ int i; - for(i=0;il;i++){ + for(i=1;il;i++){ if(cursor->s[i]=='"')break; if(cursor->s[i]=='\\')i++; } if(i==cursor->l){ return tt_err("Unclosed string in source"); } + i++; advance(cursor,i); return tt_make(TT_STRING,cursor->s-i,i); } @@ -107,6 +108,15 @@ static Token nexttoken(Cursor *cursor){ } +static bool ishexdigit(char c){ + return (c>='0'&&c<='9')||(c>='a'&&c<='f')||(c>='A'&&c<='F'); +} + +static int hexnum(char c){ + return c<='9'?c-'0':(c&~32)-'A'; +} + + static ParseRet pr_ast(AST *ast){ ParseRet pr={ast,NULL}; return pr; @@ -164,23 +174,63 @@ static ParseRet parse_(Cursor *cursor){ nodes[len++]=pr.ast; } return pr_ast(ast_list(len,nodes)); - break; } case TT_WORD: - break; + return pr_ast(ast_word(copybufasstring(tok.str,tok.len))); case TT_QUOTEDWORD: - break; + return pr_ast(ast_symbol(copybufasstring(tok.str,tok.len))); case TT_NUMBER: - break; - - case TT_STRING: - break; + return pr_ast(ast_number(strtod(tok.str,NULL))); + + case TT_STRING:{ + assert(tok.len>=2&&tok.str[0]=='"'&&tok.str[tok.len-1]=='"'); + int len=0; + for(int i=1;i=tok.len-3||!ishexdigit(tok.str[i+1])||!ishexdigit(tok.str[i+2])){ + return pr_err_c("\"\\x\" in string needs two hexadecimal digits"); + } + i+=2; + } + } + len++; + } + char *buf=malloc(len==0?1:len,char); + int j=0; + for(int i=1;i