%{ #include #include "util.h" #include "tokens.h" #include "errormsg.h" int charPos=1; int commentLevel = 0; const int INITIAL_BUF_LENGTH = 32; char *string_buf; char *string_buf_ptr; size_t string_buf_cap; void new_string(void) { string_buf = checked_malloc(INITIAL_BUF_LENGTH); *string_buf = '\0'; string_buf_cap = INITIAL_BUF_LENGTH; } void append(char c) { size_t length = strlen(string_buf); if (length + 1 == string_buf_cap) { size_t new_cap = string_buf_cap * 2; char *new_string_buf = checked_malloc(new_cap); memcpy(new_string_buf, string_buf, string_buf_cap); string_buf = new_string_buf; string_buf_cap = new_cap; } string_buf[length] = c; string_buf[length + 1] = '\0'; } int yywrap(void) { charPos=1; return 1; } void adjust(void) { EM_tokPos=charPos; charPos+=yyleng; } %} %x string %x comment %% /* whitespace */ \s {adjust(); continue;} " " {adjust(); continue;} \n {adjust(); EM_newline(); continue;} "/*" {adjust(); commentLevel++; BEGIN(comment);} /* reserved words */ array {adjust(); return ARRAY;} if {adjust(); return IF;} then {adjust(); return THEN;} else {adjust(); return ELSE;} while {adjust(); return WHILE;} for {adjust(); return FOR;} to {adjust(); return TO;} do {adjust(); return DO;} let {adjust(); return LET;} in {adjust(); return IN;} end {adjust(); return END;} of {adjust(); return OF;} break {adjust(); return BREAK;} nil {adjust(); return NIL;} function {adjust(); return FUNCTION;} var {adjust(); return VAR;} type {adjust(); return TYPE;} /* punctuation */ "," {adjust(); return COMMA;} ":" {adjust(); return COLON;} ";" {adjust(); return SEMICOLON;} "(" {adjust(); return LPAREN;} ")" {adjust(); return RPAREN;} "[" {adjust(); return LBRACK;} "]" {adjust(); return RBRACK;} "{" {adjust(); return LBRACE;} "}" {adjust(); return RBRACE;} "." {adjust(); return DOT;} "+" {adjust(); return PLUS;} "-" {adjust(); return MINUS;} "*" {adjust(); return TIMES;} "/" {adjust(); return DIVIDE;} "=" {adjust(); return EQ;} "!=" {adjust(); return NEQ;} "<" {adjust(); return LT;} "<=" {adjust(); return LE;} ">" {adjust(); return GT;} ">=" {adjust(); return GE;} "&" {adjust(); return AND;} "|" {adjust(); return OR;} ":=" {adjust(); return ASSIGN;} /* strings */ \" {adjust(); new_string(); BEGIN(string);} { \" { adjust(); yylval.sval=string_buf; BEGIN(INITIAL); return STRING; } \\[0-9]{3} { adjust(); int result; sscanf(yytext+1, "%o", &result); if (result > 0xff) { EM_error(EM_tokPos, "Invalid ASCII escape sequence"); yyterminate(); } append(result); } \\n {adjust(); append('\n');} \\t {adjust(); append('\t');} \\r {adjust(); append('\r');} \\b {adjust(); append('\b');} \\f {adjust(); append('\f');} \\\" {adjust(); append('"');} \\\\ {adjust(); append('\\');} <> {adjust(); EM_error(EM_tokPos, "string EOF");} [^\\\n\"] {adjust(); append(*yytext);} } /* comments */ { "*/" { adjust(); commentLevel--; if (commentLevel == 0) { BEGIN(INITIAL); } } "/*" {adjust(); commentLevel++;} <> {adjust(); EM_error(EM_tokPos, "comment EOF"); yyterminate();} . {adjust();} } /* semantic tokens */ [0-9]+ {adjust(); yylval.ival=atoi(yytext); return INT;} [a-zA-Z][a-zA-Z0-9]* {adjust(); yylval.sval=yytext; return ID;} . {adjust(); EM_error(EM_tokPos,"illegal token");}