summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2016-08-19 20:44:53 +0200
committertomsmeding <tom.smeding@gmail.com>2016-08-19 20:52:28 +0200
commitc9127b2bd399ce5d3e9483a82434948202592309 (patch)
tree7a6f682fb2fc47c83191fab0b7ad00c5dd45e604
parent15fb03902ff5550b3a8c44bde3e08df876449f7a (diff)
Parsing and stringification works
-rw-r--r--Makefile8
-rw-r--r--ast.c126
-rw-r--r--ast.h4
-rw-r--r--code.lysp1
-rw-r--r--main.c87
-rw-r--r--parser.c68
6 files changed, 284 insertions, 10 deletions
diff --git a/Makefile b/Makefile
index 2434358..2749caa 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,13 @@
CC := gcc
-CFLAGS := -Wall -Wextra -std=c11 -O2 -fwrapv
+CFLAGS := -Wall -Wextra -std=c11 -fwrapv
BIN := lysp
+ifdef DEBUG
+ CFLAGS += -g
+else
+ CFLAGS += -O2
+endif
+
.PHONY: all clean remake
all: $(BIN)
diff --git a/ast.c b/ast.c
index 8120feb..a6f308d 100644
--- a/ast.c
+++ b/ast.c
@@ -1,3 +1,5 @@
+#define _GNU_SOURCE //asprintf
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
@@ -22,6 +24,11 @@ void ast_free(AST *ast){
free(ast->w.word);
break;
+ case AST_STRING:
+ assert(ast->S.str);
+ free(ast->S.str);
+ break;
+
case AST_NUMBER:
case AST_SYMBOL:
break;
@@ -53,6 +60,9 @@ AST* ast_copy(const AST *ast){
case AST_NUMBER:
return ast_number(ast->n.num);
+ case AST_STRING:
+ return ast_string(copybufasstring(ast->S.str,ast->S.len),ast->S.len);
+
case AST_SYMBOL:{
assert(ast->s.name);
AST *sym=ast_symbol(ast->s.name);
@@ -66,10 +76,115 @@ AST* ast_copy(const AST *ast){
}
+typedef struct Buffer{
+ char *buf;
+ int sz,len;
+} Buffer;
+
+static Buffer buf_make(int capacity){
+ assert(capacity>0);
+ Buffer buf={malloc(capacity,char),capacity,0};
+ buf.buf[0]='\0';
+ return buf;
+}
+
+static void buf_append(Buffer *buf,const char *str,int len){
+ assert(buf);
+ assert(str);
+ assert(len>=0);
+ if(len==0)return;
+ if(buf->len+len>buf->sz-1){
+ do buf->sz*=2;
+ while(buf->len+len>buf->sz-1);
+ buf->buf=realloc(buf->buf,buf->sz,char);
+ }
+ memcpy(buf->buf+buf->len,str,len);
+ buf->len+=len;
+ buf->buf[buf->len]='\0';
+}
+
+static char hexchar(int n){
+ assert(n>=0&&n<16);
+ if(n<10)return n+'0';
+ return n-10+'a';
+}
+
+static void ast_stringify_(const AST *ast,Buffer *buf){
+ assert(ast);
+ assert(buf);
+ switch(ast->type){
+ case AST_LIST:
+ if(ast->l.quoted)buf_append(buf,"'",1);
+ buf_append(buf,"(",1);
+ for(int i=0;i<ast->l.len;i++){
+ if(i!=0)buf_append(buf," ",1);
+ ast_stringify_(ast->l.nodes[i],buf);
+ }
+ buf_append(buf,")",1);
+ break;
+
+ case AST_WORD:
+ buf_append(buf,ast->w.word,strlen(ast->w.word));
+ break;
+
+ case AST_NUMBER:{
+ char *s;
+ int len=asprintf(&s,"%g",ast->n.num);
+ if(!s)outofmem();
+ buf_append(buf,s,len);
+ free(s);
+ break;
+ }
+
+ case AST_STRING:{
+ buf_append(buf,"\"",1);
+ const char *str=ast->S.str;
+ for(int i=0;i<ast->S.len;i++){
+ if(str[i]>=32&&str[i]<=126)buf_append(buf,str+i,1);
+ else switch(str[i]){
+ case '\n': buf_append(buf,"\\n",2); break;
+ case '\t': buf_append(buf,"\\t",2); break;
+ case '\r': buf_append(buf,"\\r",2); break;
+ case '\b': buf_append(buf,"\\b",2); break;
+ case '\a': buf_append(buf,"\\a",2); break;
+ default:{
+ char hexbuf[4];
+ hexbuf[0]='\\';
+ hexbuf[1]='x';
+ hexbuf[2]=hexchar((unsigned char)str[i]/16);
+ hexbuf[3]=hexchar((unsigned char)str[i]%16);
+ buf_append(buf,hexbuf,4);
+ break;
+ }
+ }
+ }
+ buf_append(buf,"\"",1);
+ break;
+ }
+
+ case AST_SYMBOL:
+ buf_append(buf,"'",1);
+ buf_append(buf,ast->s.name,strlen(ast->s.name));
+ break;
+
+ default:
+ assert(false);
+ }
+}
+
+char* ast_stringify(const AST *ast){
+ assert(ast);
+ Buffer buf=buf_make(32);
+ ast_stringify_(ast,&buf);
+ return buf.buf;
+}
+
+
AST* ast_list(int len,AST **nodes){
assert(len>=0);
assert(nodes);
AST *ast=malloc(1,AST);
+ ast->type=AST_LIST;
ast->l.len=len;
ast->l.nodes=malloc(len,AST*);
memcpy(ast->l.nodes,nodes,len*sizeof(AST*));
@@ -80,19 +195,30 @@ AST* ast_list(int len,AST **nodes){
AST* ast_word(char *word){
assert(word);
AST *ast=malloc(1,AST);
+ ast->type=AST_WORD;
ast->w.word=word;
return ast;
}
AST* ast_number(double num){
AST *ast=malloc(1,AST);
+ ast->type=AST_NUMBER;
ast->n.num=num;
return ast;
}
+AST* ast_string(char *str,int len){
+ AST *ast=malloc(1,AST);
+ ast->type=AST_STRING;
+ ast->S.str=str;
+ ast->S.len=len;
+ return ast;
+}
+
AST* ast_symbol(char *name){
assert(name);
AST *ast=malloc(1,AST);
+ ast->type=AST_SYMBOL;
ast->s.name=name;
ast->s.symid=-1;
return ast;
diff --git a/ast.h b/ast.h
index f55d798..8d23ef9 100644
--- a/ast.h
+++ b/ast.h
@@ -39,6 +39,7 @@ typedef struct ASTsymbol{
//if you're not the interpreter:
// if you just allocated the ASTsymbol yourself, set symid to -1;
// else, leave symid alone.
+ //You should probably use ast_symbol(), in which case you don't have to do anything.
} ASTsymbol;
struct AST{
@@ -57,7 +58,10 @@ void ast_free(AST *ast);
AST* ast_copy(const AST *ast);
+char* ast_stringify(const AST *ast);
+
AST* ast_list(int len,AST **nodes); //these convenience functions DO NOT copy their arguments
AST* ast_word(char *word);
AST* ast_number(double num);
+AST* ast_string(char *str,int len);
AST* ast_symbol(char *name);
diff --git a/code.lysp b/code.lysp
new file mode 100644
index 0000000..cebf2e1
--- /dev/null
+++ b/code.lysp
@@ -0,0 +1 @@
+(print (+ 1 (% 10 3)) 'kaas "kazen enzo")
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..c3a5618
--- /dev/null
+++ b/main.c
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "parser.h"
+#include "util.h"
+
+
+char* readfile(const char *fname,size_t *length){
+ FILE *f=fopen(fname,"rb");
+ if(!f)return NULL;
+ if(fseek(f,0,SEEK_END)==-1){fclose(f); return NULL;}
+ long flen=ftell(f);
+ if(flen==-1){fclose(f); return NULL;}
+ rewind(f);
+
+ char *buf=malloc(flen+1,char);
+ fread(buf,1,flen,f);
+ if(ferror(f)){fclose(f); free(buf); return NULL;}
+ if(memchr(buf,'\0',flen)!=NULL){
+ fprintf(stderr,"Invalid null char in file '%s'\n",fname);
+ exit(1);
+ }
+ buf[flen]='\0';
+ fclose(f);
+
+ *length=flen;
+ return buf;
+}
+
+char *readstdin(size_t *length){
+ int bufsz=1024,cursor=0;
+ char *buf=malloc(bufsz,char);
+ while(true){
+ if(cursor==bufsz-1){
+ bufsz*=2;
+ buf=realloc(buf,bufsz,char);
+ }
+ int nread=fread(buf,1,bufsz-cursor-1,stdin);
+ if(nread>0&&memchr(buf,'\0',nread)!=NULL){
+ fprintf(stderr,"Invalid null char on stdin file\n");
+ exit(1);
+ }
+ cursor+=nread;
+ if(nread<bufsz-cursor-1){
+ if(feof(stdin))break;
+ if(ferror(stdin)){
+ free(buf);
+ return NULL;
+ }
+ }
+ }
+ buf[cursor]='\0';
+ *length=cursor;
+ return buf;
+}
+
+
+int main(int argc,char **argv){
+ if(argc!=2){
+ fprintf(stderr,"Pass source file (or '-') as a command-line argument.\n");
+ return 1;
+ }
+ char *source;
+ size_t length;
+ if(strcmp(argv[1],"-")==0)source=readstdin(&length);
+ else source=readfile(argv[1],&length);
+
+ if((size_t)(int)length!=length){
+ fprintf(stderr,"Source file too long!\n");
+ return 2;
+ }
+
+ ParseRet pr=parse(source,length);
+ if(pr.errstr){
+ fprintf(stderr,"\x1B[1;31m%s\x1B[0m\n",pr.errstr);
+ free(pr.errstr);
+ return 1;
+ }
+ assert(pr.ast);
+ printf("%p\n",pr.ast);
+ char *s=ast_stringify(pr.ast);
+ printf("%s\n",s);
+ free(s);
+ ast_free(pr.ast);
+}
diff --git a/parser.c b/parser.c
index 5069904..d91af7e 100644
--- a/parser.c
+++ b/parser.c
@@ -75,13 +75,14 @@ static Token nexttoken(Cursor *cursor){
if(*cursor->s=='"'){
int i;
- for(i=0;i<cursor->l;i++){
+ for(i=1;i<cursor->l;i++){
if(cursor->s[i]=='"')break;
if(cursor->s[i]=='\\')i++;
}
if(i==cursor->l){
return tt_err("Unclosed string in source");
}
+ i++;
advance(cursor,i);
return tt_make(TT_STRING,cursor->s-i,i);
}
@@ -107,6 +108,15 @@ static Token nexttoken(Cursor *cursor){
}
+static bool ishexdigit(char c){
+ return (c>='0'&&c<='9')||(c>='a'&&c<='f')||(c>='A'&&c<='F');
+}
+
+static int hexnum(char c){
+ return c<='9'?c-'0':(c&~32)-'A';
+}
+
+
static ParseRet pr_ast(AST *ast){
ParseRet pr={ast,NULL};
return pr;
@@ -164,23 +174,63 @@ static ParseRet parse_(Cursor *cursor){
nodes[len++]=pr.ast;
}
return pr_ast(ast_list(len,nodes));
- break;
}
case TT_WORD:
- break;
+ return pr_ast(ast_word(copybufasstring(tok.str,tok.len)));
case TT_QUOTEDWORD:
- break;
+ return pr_ast(ast_symbol(copybufasstring(tok.str,tok.len)));
case TT_NUMBER:
- break;
-
- case TT_STRING:
- break;
+ return pr_ast(ast_number(strtod(tok.str,NULL)));
+
+ case TT_STRING:{
+ assert(tok.len>=2&&tok.str[0]=='"'&&tok.str[tok.len-1]=='"');
+ int len=0;
+ for(int i=1;i<tok.len-1;i++){
+ if(tok.str[i]=='\\'){
+ i++;
+ assert(i<tok.len-1);
+ if(tok.str[i]=='x'){
+ if(i>=tok.len-3||!ishexdigit(tok.str[i+1])||!ishexdigit(tok.str[i+2])){
+ return pr_err_c("\"\\x\" in string needs two hexadecimal digits");
+ }
+ i+=2;
+ }
+ }
+ len++;
+ }
+ char *buf=malloc(len==0?1:len,char);
+ int j=0;
+ for(int i=1;i<tok.len-1;i++){
+ if(tok.str[i]=='\\'){
+ i++;
+ switch(tok.str[i]){
+ case 'x':
+ buf[j++]=16*hexnum(tok.str[i+1])+hexnum(tok.str[i+2]);
+ i+=2;
+ break;
+
+ case 'n': buf[j++]='\n'; break;
+ case 't': buf[j++]='\t'; break;
+ case 'r': buf[j++]='\r'; break;
+ case 'b': buf[j++]='\b'; break;
+ case 'a': buf[j++]='\a'; break;
+ default: buf[j++]=tok.str[i]; break;
+ }
+ } else {
+ buf[j++]=tok.str[i];
+ }
+ }
+ return pr_ast(ast_string(buf,len));
+ }
case TT_ERR:
- break;
+ return pr_err_c(tok.str);
+
+ default:
+ assert(false);
}
}