diff options
| -rw-r--r-- | Makefile | 8 | ||||
| -rw-r--r-- | ast.c | 126 | ||||
| -rw-r--r-- | ast.h | 4 | ||||
| -rw-r--r-- | code.lysp | 1 | ||||
| -rw-r--r-- | main.c | 87 | ||||
| -rw-r--r-- | parser.c | 66 | 
6 files changed, 283 insertions, 9 deletions
@@ -1,7 +1,13 @@  CC := gcc -CFLAGS := -Wall -Wextra -std=c11 -O2 -fwrapv +CFLAGS := -Wall -Wextra -std=c11 -fwrapv  BIN := lysp +ifdef DEBUG +	CFLAGS += -g +else +	CFLAGS += -O2 +endif +  .PHONY: all clean remake  all: $(BIN) @@ -1,3 +1,5 @@ +#define _GNU_SOURCE //asprintf +#include <stdio.h>  #include <stdlib.h>  #include <string.h>  #include <assert.h> @@ -22,6 +24,11 @@ void ast_free(AST *ast){  			free(ast->w.word);  			break; +		case AST_STRING: +			assert(ast->S.str); +			free(ast->S.str); +			break; +  		case AST_NUMBER:  		case AST_SYMBOL:  			break; @@ -53,6 +60,9 @@ AST* ast_copy(const AST *ast){  		case AST_NUMBER:  			return ast_number(ast->n.num); +		case AST_STRING: +			return ast_string(copybufasstring(ast->S.str,ast->S.len),ast->S.len); +  		case AST_SYMBOL:{  			assert(ast->s.name);  			AST *sym=ast_symbol(ast->s.name); @@ -66,10 +76,115 @@ AST* ast_copy(const AST *ast){  } +typedef struct Buffer{ +	char *buf; +	int sz,len; +} Buffer; + +static Buffer buf_make(int capacity){ +	assert(capacity>0); +	Buffer buf={malloc(capacity,char),capacity,0}; +	buf.buf[0]='\0'; +	return buf; +} + +static void buf_append(Buffer *buf,const char *str,int len){ +	assert(buf); +	assert(str); +	assert(len>=0); +	if(len==0)return; +	if(buf->len+len>buf->sz-1){ +		do buf->sz*=2; +		while(buf->len+len>buf->sz-1); +		buf->buf=realloc(buf->buf,buf->sz,char); +	} +	memcpy(buf->buf+buf->len,str,len); +	buf->len+=len; +	buf->buf[buf->len]='\0'; +} + +static char hexchar(int n){ +	assert(n>=0&&n<16); +	if(n<10)return n+'0'; +	return n-10+'a'; +} + +static void ast_stringify_(const AST *ast,Buffer *buf){ +	assert(ast); +	assert(buf); +	switch(ast->type){ +		case AST_LIST: +			if(ast->l.quoted)buf_append(buf,"'",1); +			buf_append(buf,"(",1); +			for(int i=0;i<ast->l.len;i++){ +				if(i!=0)buf_append(buf," ",1); +				ast_stringify_(ast->l.nodes[i],buf); +			} +			buf_append(buf,")",1); +			break; + +		case AST_WORD: +			buf_append(buf,ast->w.word,strlen(ast->w.word)); +			break; + +		case AST_NUMBER:{ +			char *s; +			int len=asprintf(&s,"%g",ast->n.num); +			if(!s)outofmem(); +			buf_append(buf,s,len); +			free(s); +			break; +		} + +		case AST_STRING:{ +			buf_append(buf,"\"",1); +			const char *str=ast->S.str; +			for(int i=0;i<ast->S.len;i++){ +				if(str[i]>=32&&str[i]<=126)buf_append(buf,str+i,1); +				else switch(str[i]){ +					case '\n': buf_append(buf,"\\n",2); break; +					case '\t': buf_append(buf,"\\t",2); break; +					case '\r': buf_append(buf,"\\r",2); break; +					case '\b': buf_append(buf,"\\b",2); break; +					case '\a': buf_append(buf,"\\a",2); break; +					default:{ +						char hexbuf[4]; +						hexbuf[0]='\\'; +						hexbuf[1]='x'; +						hexbuf[2]=hexchar((unsigned char)str[i]/16); +						hexbuf[3]=hexchar((unsigned char)str[i]%16); +						buf_append(buf,hexbuf,4); +						break; +					} +				} +			} +			buf_append(buf,"\"",1); +			break; +		} + +		case AST_SYMBOL: +			buf_append(buf,"'",1); +			buf_append(buf,ast->s.name,strlen(ast->s.name)); +			break; + +		default: +			assert(false); +	} +} + +char* ast_stringify(const AST *ast){ +	assert(ast); +	Buffer buf=buf_make(32); +	ast_stringify_(ast,&buf); +	return buf.buf; +} + +  AST* ast_list(int len,AST **nodes){  	assert(len>=0);  	assert(nodes);  	AST *ast=malloc(1,AST); +	ast->type=AST_LIST;  	ast->l.len=len;  	ast->l.nodes=malloc(len,AST*);  	memcpy(ast->l.nodes,nodes,len*sizeof(AST*)); @@ -80,19 +195,30 @@ AST* ast_list(int len,AST **nodes){  AST* ast_word(char *word){  	assert(word);  	AST *ast=malloc(1,AST); +	ast->type=AST_WORD;  	ast->w.word=word;  	return ast;  }  AST* ast_number(double num){  	AST *ast=malloc(1,AST); +	ast->type=AST_NUMBER;  	ast->n.num=num;  	return ast;  } +AST* ast_string(char *str,int len){ +	AST *ast=malloc(1,AST); +	ast->type=AST_STRING; +	ast->S.str=str; +	ast->S.len=len; +	return ast; +} +  AST* ast_symbol(char *name){  	assert(name);  	AST *ast=malloc(1,AST); +	ast->type=AST_SYMBOL;  	ast->s.name=name;  	ast->s.symid=-1;  	return ast; @@ -39,6 +39,7 @@ typedef struct ASTsymbol{  	//if you're not the interpreter:  	// if you just allocated the ASTsymbol yourself, set symid to -1;  	// else, leave symid alone. +	//You should probably use ast_symbol(), in which case you don't have to do anything.  } ASTsymbol;  struct AST{ @@ -57,7 +58,10 @@ void ast_free(AST *ast);  AST* ast_copy(const AST *ast); +char* ast_stringify(const AST *ast); +  AST* ast_list(int len,AST **nodes); //these convenience functions DO NOT copy their arguments  AST* ast_word(char *word);  AST* ast_number(double num); +AST* ast_string(char *str,int len);  AST* ast_symbol(char *name); diff --git a/code.lysp b/code.lysp new file mode 100644 index 0000000..cebf2e1 --- /dev/null +++ b/code.lysp @@ -0,0 +1 @@ +(print (+ 1 (% 10 3)) 'kaas "kazen enzo") @@ -0,0 +1,87 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "parser.h" +#include "util.h" + + +char* readfile(const char *fname,size_t *length){ +	FILE *f=fopen(fname,"rb"); +	if(!f)return NULL; +	if(fseek(f,0,SEEK_END)==-1){fclose(f); return NULL;} +	long flen=ftell(f); +	if(flen==-1){fclose(f); return NULL;} +	rewind(f); + +	char *buf=malloc(flen+1,char); +	fread(buf,1,flen,f); +	if(ferror(f)){fclose(f); free(buf); return NULL;} +	if(memchr(buf,'\0',flen)!=NULL){ +		fprintf(stderr,"Invalid null char in file '%s'\n",fname); +		exit(1); +	} +	buf[flen]='\0'; +	fclose(f); + +	*length=flen; +	return buf; +} + +char *readstdin(size_t *length){ +	int bufsz=1024,cursor=0; +	char *buf=malloc(bufsz,char); +	while(true){ +		if(cursor==bufsz-1){ +			bufsz*=2; +			buf=realloc(buf,bufsz,char); +		} +		int nread=fread(buf,1,bufsz-cursor-1,stdin); +		if(nread>0&&memchr(buf,'\0',nread)!=NULL){ +			fprintf(stderr,"Invalid null char on stdin file\n"); +			exit(1); +		} +		cursor+=nread; +		if(nread<bufsz-cursor-1){ +			if(feof(stdin))break; +			if(ferror(stdin)){ +				free(buf); +				return NULL; +			} +		} +	} +	buf[cursor]='\0'; +	*length=cursor; +	return buf; +} + + +int main(int argc,char **argv){ +	if(argc!=2){ +		fprintf(stderr,"Pass source file (or '-') as a command-line argument.\n"); +		return 1; +	} +	char *source; +	size_t length; +	if(strcmp(argv[1],"-")==0)source=readstdin(&length); +	else source=readfile(argv[1],&length); + +	if((size_t)(int)length!=length){ +		fprintf(stderr,"Source file too long!\n"); +		return 2; +	} + +	ParseRet pr=parse(source,length); +	if(pr.errstr){ +		fprintf(stderr,"\x1B[1;31m%s\x1B[0m\n",pr.errstr); +		free(pr.errstr); +		return 1; +	} +	assert(pr.ast); +	printf("%p\n",pr.ast); +	char *s=ast_stringify(pr.ast); +	printf("%s\n",s); +	free(s); +	ast_free(pr.ast); +} @@ -75,13 +75,14 @@ static Token nexttoken(Cursor *cursor){  	if(*cursor->s=='"'){  		int i; -		for(i=0;i<cursor->l;i++){ +		for(i=1;i<cursor->l;i++){  			if(cursor->s[i]=='"')break;  			if(cursor->s[i]=='\\')i++;  		}  		if(i==cursor->l){  			return tt_err("Unclosed string in source");  		} +		i++;  		advance(cursor,i);  		return tt_make(TT_STRING,cursor->s-i,i);  	} @@ -107,6 +108,15 @@ static Token nexttoken(Cursor *cursor){  } +static bool ishexdigit(char c){ +	return (c>='0'&&c<='9')||(c>='a'&&c<='f')||(c>='A'&&c<='F'); +} + +static int hexnum(char c){ +	return c<='9'?c-'0':(c&~32)-'A'; +} + +  static ParseRet pr_ast(AST *ast){  	ParseRet pr={ast,NULL};  	return pr; @@ -164,23 +174,63 @@ static ParseRet parse_(Cursor *cursor){  				nodes[len++]=pr.ast;  			}  			return pr_ast(ast_list(len,nodes)); -			break;  		}  		case TT_WORD: -			break; +			return pr_ast(ast_word(copybufasstring(tok.str,tok.len)));  		case TT_QUOTEDWORD: -			break; +			return pr_ast(ast_symbol(copybufasstring(tok.str,tok.len)));  		case TT_NUMBER: -			break; +			return pr_ast(ast_number(strtod(tok.str,NULL))); + +		case TT_STRING:{ +			assert(tok.len>=2&&tok.str[0]=='"'&&tok.str[tok.len-1]=='"'); +			int len=0; +			for(int i=1;i<tok.len-1;i++){ +				if(tok.str[i]=='\\'){ +					i++; +					assert(i<tok.len-1); +					if(tok.str[i]=='x'){ +						if(i>=tok.len-3||!ishexdigit(tok.str[i+1])||!ishexdigit(tok.str[i+2])){ +							return pr_err_c("\"\\x\" in string needs two hexadecimal digits"); +						} +						i+=2; +					} +				} +				len++; +			} +			char *buf=malloc(len==0?1:len,char); +			int j=0; +			for(int i=1;i<tok.len-1;i++){ +				if(tok.str[i]=='\\'){ +					i++; +					switch(tok.str[i]){ +						case 'x': +							buf[j++]=16*hexnum(tok.str[i+1])+hexnum(tok.str[i+2]); +							i+=2; +							break; -		case TT_STRING: -			break; +						case 'n': buf[j++]='\n'; break; +						case 't': buf[j++]='\t'; break; +						case 'r': buf[j++]='\r'; break; +						case 'b': buf[j++]='\b'; break; +						case 'a': buf[j++]='\a'; break; +						default: buf[j++]=tok.str[i]; break; +					} +				} else { +					buf[j++]=tok.str[i]; +				} +			} +			return pr_ast(ast_string(buf,len)); +		}  		case TT_ERR: -			break; +			return pr_err_c(tok.str); + +		default: +			assert(false);  	}  }  | 
