aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2023-11-02 23:27:25 +0000
committerAryadev Chavali <aryadev@aryadevchavali.com>2023-11-02 23:29:07 +0000
commitbd39c2b2835974c4ad9313f49a273df1af5887af (patch)
tree927c192628547b8f876b08ad477cce4434e2faca
parentc74c36333b33452a27f5da428c1087a547c2170b (diff)
downloadovm-bd39c2b2835974c4ad9313f49a273df1af5887af.tar.gz
ovm-bd39c2b2835974c4ad9313f49a273df1af5887af.tar.bz2
ovm-bd39c2b2835974c4ad9313f49a273df1af5887af.zip
Made lexer more error prone so parser is less
Lexer now will straight away attempt to eat up any type or later portions of an opcode rather than leaving everything but the root. This means checking for type in the parser is a direct check against the name rather than prefixed with a dot. Checks are a bit more strong to cause more tokens to go straight to symbol rather than getting checked after one routine in at on the parser side.
-rw-r--r--asm/lexer.c110
-rw-r--r--asm/parser.c12
2 files changed, 61 insertions, 61 deletions
diff --git a/asm/lexer.c b/asm/lexer.c
index 73859c3..baa5e92 100644
--- a/asm/lexer.c
+++ b/asm/lexer.c
@@ -165,149 +165,149 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
offset = 4;
type = TOKEN_HALT;
}
- else if (sym_size >= 8 && strncmp(opcode, "PUSH.REG", 8) == 0)
+ else if (sym_size > 9 && strncmp(opcode, "PUSH.REG.", 9) == 0)
{
- offset = 8;
+ offset = 9;
type = TOKEN_PUSH_REG;
}
- else if (sym_size >= 4 && strncmp(opcode, "PUSH", 4) == 0)
+ else if (sym_size > 5 && strncmp(opcode, "PUSH.", 5) == 0)
{
- offset = 4;
+ offset = 5;
type = TOKEN_PUSH;
}
- else if (sym_size >= 3 && strncmp(opcode, "POP", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "POP.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_POP;
}
- else if (sym_size >= 3 && strncmp(opcode, "MOV", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "MOV.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_MOV;
}
- else if (sym_size >= 3 && strncmp(opcode, "DUP", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "DUP.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_DUP;
}
- else if (sym_size >= 12 && strncmp(opcode, "MALLOC.STACK", 12) == 0)
+ else if (sym_size > 13 && strncmp(opcode, "MALLOC.STACK.", 13) == 0)
{
- offset = 12;
+ offset = 13;
type = TOKEN_MALLOC_STACK;
}
- else if (sym_size >= 6 && strncmp(opcode, "MALLOC", 6) == 0)
+ else if (sym_size > 7 && strncmp(opcode, "MALLOC.", 7) == 0)
{
- offset = 6;
+ offset = 7;
type = TOKEN_MALLOC;
}
- else if (sym_size >= 10 && strncmp(opcode, "MSET.STACK", 10) == 0)
+ else if (sym_size > 11 && strncmp(opcode, "MSET.STACK.", 11) == 0)
{
- offset = 10;
+ offset = 11;
type = TOKEN_MSET_STACK;
}
- else if (sym_size >= 4 && strncmp(opcode, "MSET", 4) == 0)
+ else if (sym_size > 5 && strncmp(opcode, "MSET.", 5) == 0)
{
- offset = 4;
+ offset = 5;
type = TOKEN_MSET;
}
- else if (sym_size >= 10 && strncmp(opcode, "MGET.STACK", 10) == 0)
+ else if (sym_size > 11 && strncmp(opcode, "MGET.STACK.", 11) == 0)
{
- offset = 10;
+ offset = 11;
type = TOKEN_MGET_STACK;
}
- else if (sym_size >= 4 && strncmp(opcode, "MGET", 4) == 0)
+ else if (sym_size > 5 && strncmp(opcode, "MGET.", 5) == 0)
{
- offset = 4;
+ offset = 5;
type = TOKEN_MGET;
}
- else if (sym_size >= 7 && strncmp(opcode, "MDELETE", 7) == 0)
+ else if (sym_size == 7 && strncmp(opcode, "MDELETE", 7) == 0)
{
offset = 7;
type = TOKEN_MDELETE;
}
- else if (sym_size >= 5 && strncmp(opcode, "MSIZE", 5) == 0)
+ else if (sym_size == 5 && strncmp(opcode, "MSIZE", 5) == 0)
{
offset = 5;
type = TOKEN_MSIZE;
}
- else if (sym_size >= 3 && strncmp(opcode, "NOT", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "NOT.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_NOT;
}
- else if (sym_size >= 2 && strncmp(opcode, "OR", 2) == 0)
+ else if (sym_size > 3 && strncmp(opcode, "OR.", 3) == 0)
{
- offset = 2;
+ offset = 3;
type = TOKEN_OR;
}
- else if (sym_size >= 3 && strncmp(opcode, "AND", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "AND.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_AND;
}
- else if (sym_size >= 3 && strncmp(opcode, "XOR", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "XOR.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_XOR;
}
- else if (sym_size >= 2 && strncmp(opcode, "EQ", 2) == 0)
+ else if (sym_size >= 3 && strncmp(opcode, "EQ.", 3) == 0)
{
- offset = 2;
+ offset = 3;
type = TOKEN_EQ;
}
- else if (sym_size >= 3 && strncmp(opcode, "LTE", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "LTE.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_LTE;
}
- else if (sym_size >= 2 && strncmp(opcode, "LT", 2) == 0)
+ else if (sym_size > 3 && strncmp(opcode, "LT.", 3) == 0)
{
- offset = 2;
+ offset = 3;
type = TOKEN_LT;
}
- else if (sym_size >= 3 && strncmp(opcode, "GTE", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "GTE.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_GTE;
}
- else if (sym_size >= 2 && strncmp(opcode, "GT", 2) == 0)
+ else if (sym_size > 3 && strncmp(opcode, "GT.", 3) == 0)
{
- offset = 2;
+ offset = 3;
type = TOKEN_GT;
}
- else if (sym_size >= 3 && strncmp(opcode, "SUB", 3) == 0)
+ else if (sym_size > 4 && strncmp(opcode, "SUB.", 4) == 0)
{
- offset = 3;
+ offset = 4;
type = TOKEN_SUB;
}
- else if (sym_size >= 4 && strncmp(opcode, "PLUS", 4) == 0)
+ else if (sym_size > 5 && strncmp(opcode, "PLUS.", 5) == 0)
{
- offset = 4;
+ offset = 5;
type = TOKEN_PLUS;
}
- else if (sym_size >= 4 && strncmp(opcode, "MULT", 4) == 0)
+ else if (sym_size > 5 && strncmp(opcode, "MULT.", 5) == 0)
{
- offset = 4;
+ offset = 5;
type = TOKEN_MULT;
}
- else if (sym_size >= 5 && strncmp(opcode, "PRINT", 5) == 0)
+ else if (sym_size > 6 && strncmp(opcode, "PRINT.", 6) == 0)
{
- offset = 5;
+ offset = 6;
type = TOKEN_PRINT;
}
- else if (sym_size >= 8 && strncmp(opcode, "JUMP.ABS", 8) == 0)
+ else if (sym_size == 8 && strncmp(opcode, "JUMP.ABS", 8) == 0)
{
offset = 8;
type = TOKEN_JUMP_ABS;
}
- else if (sym_size >= 10 && strncmp(opcode, "JUMP.STACK", 10) == 0)
+ else if (sym_size == 10 && strncmp(opcode, "JUMP.STACK", 10) == 0)
{
offset = 10;
type = TOKEN_JUMP_STACK;
}
- else if (sym_size >= 7 && strncmp(opcode, "JUMP.IF", 7) == 0)
+ else if (sym_size > 8 && strncmp(opcode, "JUMP.IF.", 8) == 0)
{
- offset = 7;
+ offset = 8;
type = TOKEN_JUMP_IF;
}
else
@@ -344,7 +344,7 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
}
ret.str_size = sym_size - offset;
}
- *column += sym_size;
+ *column += sym_size - 1;
buffer->used += sym_size;
return ret;
}
diff --git a/asm/parser.c b/asm/parser.c
index 44c09b4..4f76fe5 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -157,17 +157,17 @@ enum Type
T_WORD,
} parse_details_to_type(token_t details)
{
- if (details.str_size == 5 && strncmp(details.str, ".BYTE", 5) == 0)
+ if (details.str_size == 4 && strncmp(details.str, "BYTE", 4) == 0)
return T_BYTE;
- else if (details.str_size == 5 && strncmp(details.str, ".CHAR", 5) == 0)
+ else if (details.str_size == 4 && strncmp(details.str, "CHAR", 4) == 0)
return T_CHAR;
- else if (details.str_size == 6 && strncmp(details.str, ".HWORD", 6) == 0)
+ else if (details.str_size == 5 && strncmp(details.str, "HWORD", 5) == 0)
return T_HWORD;
- else if (details.str_size == 4 && strncmp(details.str, ".INT", 4) == 0)
+ else if (details.str_size == 3 && strncmp(details.str, "INT", 3) == 0)
return T_INT;
- else if (details.str_size == 5 && strncmp(details.str, ".LONG", 5) == 0)
+ else if (details.str_size == 4 && strncmp(details.str, "LONG", 4) == 0)
return T_LONG;
- else if (details.str_size == 5 && strncmp(details.str, ".WORD", 5) == 0)
+ else if (details.str_size == 4 && strncmp(details.str, "WORD", 4) == 0)
return T_WORD;
else
return T_NIL;