155 lines
3.4 KiB
Plaintext
155 lines
3.4 KiB
Plaintext
%{
|
|
#include <string.h>
|
|
#include "util.h"
|
|
#include "tokens.h"
|
|
#include "errormsg.h"
|
|
|
|
int charPos=1;
|
|
|
|
int commentLevel = 0;
|
|
|
|
const int INITIAL_BUF_LENGTH = 32;
|
|
char *string_buf;
|
|
char *string_buf_ptr;
|
|
size_t string_buf_cap;
|
|
|
|
void new_string(void) {
|
|
string_buf = checked_malloc(INITIAL_BUF_LENGTH);
|
|
*string_buf = '\0';
|
|
string_buf_cap = INITIAL_BUF_LENGTH;
|
|
}
|
|
|
|
void append(char c) {
|
|
size_t length = strlen(string_buf);
|
|
|
|
if (length + 1 == string_buf_cap) {
|
|
size_t new_cap = string_buf_cap * 2;
|
|
char *new_string_buf = checked_malloc(new_cap);
|
|
memcpy(new_string_buf, string_buf, string_buf_cap);
|
|
string_buf = new_string_buf;
|
|
string_buf_cap = new_cap;
|
|
}
|
|
string_buf[length] = c;
|
|
string_buf[length + 1] = '\0';
|
|
}
|
|
|
|
int yywrap(void)
|
|
{
|
|
charPos=1;
|
|
return 1;
|
|
}
|
|
|
|
void adjust(void)
|
|
{
|
|
EM_tokPos=charPos;
|
|
charPos+=yyleng;
|
|
}
|
|
|
|
%}
|
|
|
|
%x string
|
|
%x comment
|
|
%%
|
|
|
|
/* whitespace */
|
|
\s {adjust(); continue;}
|
|
" " {adjust(); continue;}
|
|
\n {adjust(); EM_newline(); continue;}
|
|
"/*" {adjust(); commentLevel++; BEGIN(comment);}
|
|
|
|
/* reserved words */
|
|
array {adjust(); return ARRAY;}
|
|
if {adjust(); return IF;}
|
|
then {adjust(); return THEN;}
|
|
else {adjust(); return ELSE;}
|
|
while {adjust(); return WHILE;}
|
|
for {adjust(); return FOR;}
|
|
to {adjust(); return TO;}
|
|
do {adjust(); return DO;}
|
|
let {adjust(); return LET;}
|
|
in {adjust(); return IN;}
|
|
end {adjust(); return END;}
|
|
of {adjust(); return OF;}
|
|
break {adjust(); return BREAK;}
|
|
nil {adjust(); return NIL;}
|
|
function {adjust(); return FUNCTION;}
|
|
var {adjust(); return VAR;}
|
|
type {adjust(); return TYPE;}
|
|
|
|
|
|
/* punctuation */
|
|
"," {adjust(); return COMMA;}
|
|
":" {adjust(); return COLON;}
|
|
";" {adjust(); return SEMICOLON;}
|
|
"(" {adjust(); return LPAREN;}
|
|
")" {adjust(); return RPAREN;}
|
|
"[" {adjust(); return LBRACK;}
|
|
"]" {adjust(); return RBRACK;}
|
|
"{" {adjust(); return LBRACE;}
|
|
"}" {adjust(); return RBRACE;}
|
|
"." {adjust(); return DOT;}
|
|
"+" {adjust(); return PLUS;}
|
|
"-" {adjust(); return MINUS;}
|
|
"*" {adjust(); return TIMES;}
|
|
"/" {adjust(); return DIVIDE;}
|
|
"=" {adjust(); return EQ;}
|
|
"!=" {adjust(); return NEQ;}
|
|
"<" {adjust(); return LT;}
|
|
"<=" {adjust(); return LE;}
|
|
">" {adjust(); return GT;}
|
|
">=" {adjust(); return GE;}
|
|
"&" {adjust(); return AND;}
|
|
"|" {adjust(); return OR;}
|
|
":=" {adjust(); return ASSIGN;}
|
|
|
|
/* strings */
|
|
\" {adjust(); new_string(); BEGIN(string);}
|
|
<string>{
|
|
\" {
|
|
adjust();
|
|
yylval.sval=string_buf;
|
|
BEGIN(INITIAL);
|
|
return STRING;
|
|
}
|
|
\\[0-9]{3} {
|
|
adjust();
|
|
int result;
|
|
sscanf(yytext+1, "%o", &result);
|
|
if (result > 0xff) {
|
|
EM_error(EM_tokPos, "Invalid ASCII escape sequence");
|
|
yyterminate();
|
|
}
|
|
append(result);
|
|
}
|
|
\\n {adjust(); append('\n');}
|
|
\\t {adjust(); append('\t');}
|
|
\\r {adjust(); append('\r');}
|
|
\\b {adjust(); append('\b');}
|
|
\\f {adjust(); append('\f');}
|
|
\\\" {adjust(); append('"');}
|
|
\\\\ {adjust(); append('\\');}
|
|
<<EOF>> {adjust(); EM_error(EM_tokPos, "string EOF");}
|
|
[^\\\n\"] {adjust(); append(*yytext);}
|
|
}
|
|
|
|
/* comments */
|
|
<comment>{
|
|
"*/" {
|
|
adjust();
|
|
commentLevel--;
|
|
if (commentLevel == 0) {
|
|
BEGIN(INITIAL);
|
|
}
|
|
}
|
|
"/*" {adjust(); commentLevel++;}
|
|
<<EOF>> {adjust(); EM_error(EM_tokPos, "comment EOF"); yyterminate();}
|
|
. {adjust();}
|
|
}
|
|
|
|
/* semantic tokens */
|
|
[0-9]+ {adjust(); yylval.ival=atoi(yytext); return INT;}
|
|
[a-zA-Z][a-zA-Z0-9]* {adjust(); yylval.sval=yytext; return ID;}
|
|
. {adjust(); EM_error(EM_tokPos,"illegal token");}
|
|
|
|
|