2024-12-11 15:28:32 -05:00

155 lines
3.4 KiB
Plaintext

%{
#include <string.h>
#include "util.h"
#include "tokens.h"
#include "errormsg.h"
int charPos=1;
int commentLevel = 0;
const int INITIAL_BUF_LENGTH = 32;
char *string_buf;
char *string_buf_ptr;
size_t string_buf_cap;
void new_string(void) {
string_buf = checked_malloc(INITIAL_BUF_LENGTH);
*string_buf = '\0';
string_buf_cap = INITIAL_BUF_LENGTH;
}
void append(char c) {
size_t length = strlen(string_buf);
if (length + 1 == string_buf_cap) {
size_t new_cap = string_buf_cap * 2;
char *new_string_buf = checked_malloc(new_cap);
memcpy(new_string_buf, string_buf, string_buf_cap);
string_buf = new_string_buf;
string_buf_cap = new_cap;
}
string_buf[length] = c;
string_buf[length + 1] = '\0';
}
int yywrap(void)
{
charPos=1;
return 1;
}
void adjust(void)
{
EM_tokPos=charPos;
charPos+=yyleng;
}
%}
%x string
%x comment
%%
/* whitespace */
\s {adjust(); continue;}
" " {adjust(); continue;}
\n {adjust(); EM_newline(); continue;}
"/*" {adjust(); commentLevel++; BEGIN(comment);}
/* reserved words */
array {adjust(); return ARRAY;}
if {adjust(); return IF;}
then {adjust(); return THEN;}
else {adjust(); return ELSE;}
while {adjust(); return WHILE;}
for {adjust(); return FOR;}
to {adjust(); return TO;}
do {adjust(); return DO;}
let {adjust(); return LET;}
in {adjust(); return IN;}
end {adjust(); return END;}
of {adjust(); return OF;}
break {adjust(); return BREAK;}
nil {adjust(); return NIL;}
function {adjust(); return FUNCTION;}
var {adjust(); return VAR;}
type {adjust(); return TYPE;}
/* punctuation */
"," {adjust(); return COMMA;}
":" {adjust(); return COLON;}
";" {adjust(); return SEMICOLON;}
"(" {adjust(); return LPAREN;}
")" {adjust(); return RPAREN;}
"[" {adjust(); return LBRACK;}
"]" {adjust(); return RBRACK;}
"{" {adjust(); return LBRACE;}
"}" {adjust(); return RBRACE;}
"." {adjust(); return DOT;}
"+" {adjust(); return PLUS;}
"-" {adjust(); return MINUS;}
"*" {adjust(); return TIMES;}
"/" {adjust(); return DIVIDE;}
"=" {adjust(); return EQ;}
"!=" {adjust(); return NEQ;}
"<" {adjust(); return LT;}
"<=" {adjust(); return LE;}
">" {adjust(); return GT;}
">=" {adjust(); return GE;}
"&" {adjust(); return AND;}
"|" {adjust(); return OR;}
":=" {adjust(); return ASSIGN;}
/* strings */
\" {adjust(); new_string(); BEGIN(string);}
<string>{
\" {
adjust();
yylval.sval=string_buf;
BEGIN(INITIAL);
return STRING;
}
\\[0-9]{3} {
adjust();
int result;
sscanf(yytext+1, "%o", &result);
if (result > 0xff) {
EM_error(EM_tokPos, "Invalid ASCII escape sequence");
yyterminate();
}
append(result);
}
\\n {adjust(); append('\n');}
\\t {adjust(); append('\t');}
\\r {adjust(); append('\r');}
\\b {adjust(); append('\b');}
\\f {adjust(); append('\f');}
\\\" {adjust(); append('"');}
\\\\ {adjust(); append('\\');}
<<EOF>> {adjust(); EM_error(EM_tokPos, "string EOF");}
[^\\\n\"] {adjust(); append(*yytext);}
}
/* comments */
<comment>{
"*/" {
adjust();
commentLevel--;
if (commentLevel == 0) {
BEGIN(INITIAL);
}
}
"/*" {adjust(); commentLevel++;}
<<EOF>> {adjust(); EM_error(EM_tokPos, "comment EOF"); yyterminate();}
. {adjust();}
}
/* semantic tokens */
[0-9]+ {adjust(); yylval.ival=atoi(yytext); return INT;}
[a-zA-Z][a-zA-Z0-9]* {adjust(); yylval.sval=yytext; return ID;}
. {adjust(); EM_error(EM_tokPos,"illegal token");}