diff options
| -rw-r--r-- | LANGUAGE.txt | 26 | ||||
| -rwxr-xr-x | genops.js | 88 | ||||
| -rw-r--r-- | parser.c | 131 | ||||
| -rw-r--r-- | parser.h | 9 | 
4 files changed, 230 insertions, 24 deletions
diff --git a/LANGUAGE.txt b/LANGUAGE.txt index 9d72f08..0e7066b 100644 --- a/LANGUAGE.txt +++ b/LANGUAGE.txt @@ -1,21 +1,21 @@  Statements are terminated by ';'.  The usual infix expression rules apply, with the following precedence table: -(lower precedence number means tighter binding) +(higher precedence number means tighter binding)   Operators   Precedence   Associativity -    **            1           Right -    - ! ~         2        Prefix unary -    * / %         3           Left -    + -           4           Left -    &             5           Left -    ^             6           Left +    **           14           Right +    - ! ~        12          Prefix (unary) + * / // %        11           Left +    + -          10           Left +    &             9           Left +    ^             8           Left      |             7           Left - < > <= >=        8       Nonassociative -   == !=          9       Nonassociative -    &&           10           Left (short-circuiting) -    ^^           11           Left -    ||           12           Left (short-circuiting) -    =            13           Right  (also += -= *= /= %= **= &= ^= |=) + < > <= >=        6       Nonassociative +   == !=          5       Nonassociative +    &&            4           Left (short-circuiting) +    ^^            3           Left +    ||            2           Left (short-circuiting) +    =             1           Right  (also += -= *= /= %= **= &= ^= |=)  break and continue get parsed to calls to the __break() and __continue() diff --git a/genops.js b/genops.js new file mode 100755 index 0000000..33b688d --- /dev/null +++ b/genops.js @@ -0,0 +1,88 @@ +#!/usr/bin/env node +const fs=require("fs"); + + +function print(/*arguments*/){ +	//console.log.apply(console,arguments); +	process.stdout.write.apply(process.stdout,arguments); +} + +function pad(s,w,c){ +	s=s+""; +	if(s.length>=Math.abs(w))return s; +	if(w<0)return s+Array(-w-s.length+1).join(c?c:" "); +	else return Array(w-s.length+1).join(c?c:" ")+s; +} + + +function readopmap(fname){ +	let table=String(fs.readFileSync(fname)).split("\n"); +	let i; +	for(i=0;i<table.length;i++){ +		if(/Oper.*Prec.*Assoc/.test(table[i]))break; +	} +	let header=table[i]; +	table=table.slice(i+1); +	for(i=0;i<table.length;i++){ +		if(table[i].length==0)break; +	} +	table=table.slice(0,i); + +	let opsidx=header.indexOf("Oper"), +	    precidx=header.indexOf("Prec"), +	    associdx=header.indexOf("Assoc"); + +	let opmap={}; + +	for(let row of table){ +		let ops=row.slice(opsidx,precidx).trim().split(" "), +		    prec=parseInt(row.slice(precidx,associdx).trim(),10), +		    assoc=row.slice(associdx).replace(/ *([^ ]+).*/,"$1"); +		for(let op of ops){ +			opmap[op]={prec,assoc}; +		} +	} +	return opmap; +} + +function outputfunc(opmap,name,gen,padw){ +	print("int "+name+"(const char *op){\n"); +	print("\tswitch(op[0]){\n"); + +	let firstchars={}; +	for(let k in opmap){ +		if(firstchars[k[0]])firstchars[k[0]].push(k); +		else firstchars[k[0]]=[k]; +	} +	let arr=[]; +	for(k in firstchars){ +		arr.push([k,firstchars[k].sort()]); +	} +	arr=arr.sort(); +	for(let tup of arr){ +		let k=tup[0],ops=tup[1]; +		print("\t\tcase '"+k+"': return "); +		for(let op of ops){ +			let cond=""; +			for(j=1;j<op.length;j++)cond+="op["+j+"]=='"+op[j]+"'&&"; +			cond+="op["+j+"]=='\\0'"; +			print(cond+" ? "+pad(gen(opmap[op]),padw)+" : "); +		} +		print("-1;\n"); +	} + +	print("\t\tdefault: return -1;\n"); +	print("\t}\n"); +	print("}\n"); +} + + +const opmap=readopmap("LANGUAGE.txt"); +outputfunc(opmap,"precedence",o=>o.prec,2); +print("\n"); +const assocenum={ +	"Prefix": "AS_PREFIX", "Suffix": "AS_SUFFIX", +	"Left": "AS_LEFT", "Right": "AS_RIGHT", +	"Nonassociative": "AS_NONASSOC" +}; +outputfunc(opmap,"associativity",o=>assocenum[o.assoc],-11); @@ -7,6 +7,115 @@  #include "parser.h" +typedef enum Tokentype{ +	TT_NUM, +	TT_STR, +	TT_WORD, +	TT_SYM +} Tokentype; + +typedef struct Token{ +	const char *str; +	int len; +} Token; + +Token nexttoken(const char **sourcep){ +	const char *source=*sourcep; +	while(isspace(*source))source++; +	if(isdigit(*source)||(*source=='-'&&isdigit(source[1]))){ +		char *endp; +		strtod(source,&endp); +		assert(endp!=source); +		Token tok={source,endp-source}; +		return tok; +	} +} + + +int precedence(const char *op){ +	switch(op[0]){ +		case '!': return op[1]=='\0' ? 12 : op[1]=='='&&op[2]=='\0' ?  5 : -1; +		case '%': return op[1]=='\0' ? 11 : -1; +		case '&': return op[1]=='\0' ?  9 : op[1]=='&'&&op[2]=='\0' ?  4 : -1; +		case '*': return op[1]=='\0' ? 11 : op[1]=='*'&&op[2]=='\0' ? 14 : -1; +		case '+': return op[1]=='\0' ? 10 : -1; +		case '-': return op[1]=='\0' ? 10 : -1; +		case '/': return op[1]=='\0' ? 11 : op[1]=='/'&&op[2]=='\0' ? 11 : -1; +		case '<': return op[1]=='\0' ?  6 : op[1]=='='&&op[2]=='\0' ?  6 : -1; +		case '=': return op[1]=='\0' ?  1 : op[1]=='='&&op[2]=='\0' ?  5 : -1; +		case '>': return op[1]=='\0' ?  6 : op[1]=='='&&op[2]=='\0' ?  6 : -1; +		case '^': return op[1]=='\0' ?  8 : op[1]=='^'&&op[2]=='\0' ?  3 : -1; +		case '|': return op[1]=='\0' ?  7 : op[1]=='|'&&op[2]=='\0' ?  2 : -1; +		case '~': return op[1]=='\0' ? 12 : -1; +		default: return -1; +	} +} + +int associativity(const char *op){ +	switch(op[0]){ +		case '!': return op[1]=='\0' ? AS_PREFIX   : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; +		case '%': return op[1]=='\0' ? AS_LEFT     : -1; +		case '&': return op[1]=='\0' ? AS_LEFT     : op[1]=='&'&&op[2]=='\0' ? AS_LEFT     : -1; +		case '*': return op[1]=='\0' ? AS_LEFT     : op[1]=='*'&&op[2]=='\0' ? AS_RIGHT    : -1; +		case '+': return op[1]=='\0' ? AS_LEFT     : -1; +		case '-': return op[1]=='\0' ? AS_LEFT     : -1; +		case '/': return op[1]=='\0' ? AS_LEFT     : op[1]=='/'&&op[2]=='\0' ? AS_LEFT     : -1; +		case '<': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; +		case '=': return op[1]=='\0' ? AS_RIGHT    : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; +		case '>': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; +		case '^': return op[1]=='\0' ? AS_LEFT     : op[1]=='^'&&op[2]=='\0' ? AS_LEFT     : -1; +		case '|': return op[1]=='\0' ? AS_LEFT     : op[1]=='|'&&op[2]=='\0' ? AS_LEFT     : -1; +		case '~': return op[1]=='\0' ? AS_PREFIX   : -1; +		default: return -1; +	} +} + + +static bool parsecomment(const char *source,int *reslen){ +	int cursor=0; +	if(source[cursor]!='#')return false; +	if(source[cursor+1]=='#'&&source[cursor+2]=='#'){ +		cursor+=3; +		while(source[cursor]&& +			  (source[cursor]!='#'||source[cursor+1]!='#'||source[cursor+2]!='#')){ +			cursor++; +		} +		if(!source[cursor])return false; //unclosed block comment +		cursor+=2; +	} else { +		while(source[cursor]&&source[cursor]!='\n')cursor++; +		if(source[cursor])cursor++; +	} +	*reslen=cursor; +	return true; +} + +static void parseintermediate(const char *source,int *reslen){ +	int cursor=0; +	bool acted; +	do { +		acted=false; +		while(source[cursor]&&isspace(source[cursor])){ +			cursor++; +			acted=true; +		} +		int partlen; +		if(parsecomment(source+cursor,&partlen)){ +			cursor+=partlen; +			acted=true; +		} +	} while(acted); +	*reslen=cursor; +} + +static AST* parseexpr(const char *source,int *reslen,int minprec){ +	; +} + +static AST* parsestmt(const char *source,int *reslen){ +	return parseexpr(source,reslen,0); +} +  ASTblock* parse(const char *source){  	ASTblock *bl=malloc(sizeof(ASTblock));  	int sz=32; @@ -21,7 +130,7 @@ ASTblock* parse(const char *source){  			bl->exprs=realloc(bl->exprs,sz*sizeof(AST*));  			if(!bl->exprs)outofmem();  		} -		while(source[cursor]&isspace(source[cursor]))cursor++; +		parseintermediate(source+cursor,&reslen);  		if(!source[cursor])break;  		AST *node=parsestmt(source+cursor,&reslen);  		if(!node){ @@ -34,15 +143,15 @@ ASTblock* parse(const char *source){  	return bl;  } -void ast_free(AST *ast){ -	switch(ast->type){ -		case AST_BLOCK:{ ASTblock *ast=ast; +void ast_free(AST *ast_){ +	switch(ast_->type){ +		case AST_BLOCK:{ ASTblock *ast=(ASTblock*)ast_;  			for(int i=0;i<ast->len;i++)if(ast->exprs[i])ast_free(ast->exprs[i]);  			free(ast->exprs);  			break;  		} -		case AST_OP:{ ASTop *ast=ast; +		case AST_OP:{ ASTop *ast=(ASTop*)ast_;  			if(ast->left)ast_free(ast->left);  			if(ast->right)ast_free(ast->right);  			break; @@ -51,35 +160,35 @@ void ast_free(AST *ast){  		case AST_NUM:  			break; -		case AST_STR:{ ASTstr *ast=ast; +		case AST_STR:{ ASTstr *ast=(ASTstr*)ast_;  			if(ast->str)free(ast->str);  			break;  		} -		case AST_VAR:{ ASTvar *ast=ast; +		case AST_VAR:{ ASTvar *ast=(ASTvar*)ast_;  			if(ast->name)free(ast->name);  			break;  		} -		case AST_CALL:{ ASTcall *ast=ast; +		case AST_CALL:{ ASTcall *ast=(ASTcall*)ast_;  			if(ast->func)free(ast->func);  			for(int i=0;i<ast->nargs;i++)if(ast->args[i])ast_free(ast->args[i]);  			free(ast->args);  			break;  		} -		case AST_IF:{ ASTif *ast=ast; +		case AST_IF:{ ASTif *ast=(ASTif*)ast_;  			if(ast->cond)free(ast->cond);  			if(ast->thenb)free(ast->thenb);  			if(ast->elseb)free(ast->elseb);  			break;  		} -		case AST_WHILE:{ ASTwhile *ast=ast; +		case AST_WHILE:{ ASTwhile *ast=(ASTwhile*)ast_;  			if(ast->cond)free(ast->cond);  			if(ast->body)free(ast->body);  			break;  		}  	} -	free(ast); +	free(ast_);  } @@ -77,5 +77,14 @@ typedef struct AST{  } AST; +typedef enum Associativity{ +	AS_PREFIX, +	AS_SUFFIX, +	AS_LEFT, +	AS_RIGHT, +	AS_NONASSOC +} Associativity; + +  ASTblock* parse(const char *source);  void ast_free(AST *ast);  | 
