From 83678ad29a03623d7c13771c0bfa36657b554db4 Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Wed, 1 Nov 2023 18:09:00 +0000 Subject: Add MULT to lexer and parser for assembler --- asm/lexer.c | 150 ++++------------------------------------------------------- asm/lexer.h | 1 + asm/parser.c | 4 +- 3 files changed, 13 insertions(+), 142 deletions(-) (limited to 'asm') diff --git a/asm/lexer.c b/asm/lexer.c index a4d9f5a..f0408ad 100644 --- a/asm/lexer.c +++ b/asm/lexer.c @@ -62,6 +62,8 @@ const char *token_type_as_cstr(token_type_t type) return "GTE"; case TOKEN_PLUS: return "PLUS"; + case TOKEN_MULT: + return "MULT"; case TOKEN_PRINT: return "PRINT"; case TOKEN_JUMP: @@ -115,7 +117,7 @@ bool is_valid_hex_char(char c) token_t tokenise_symbol(buffer_t *buffer, size_t *column) { - static_assert(NUMBER_OF_OPCODES == 70, "tokenise_buffer: Out of date!"); + static_assert(NUMBER_OF_OPCODES == 73, "tokenise_buffer: Out of date!"); size_t sym_size = 0; for (; sym_size < space_left(buffer) && @@ -216,6 +218,11 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column) offset = 4; type = TOKEN_PLUS; } + else if (sym_size >= 4 && strncmp(opcode, "MULT", 4) == 0) + { + offset = 4; + type = TOKEN_MULT; + } else if (sym_size >= 5 && strncmp(opcode, "PRINT", 5) == 0) { offset = 5; @@ -368,146 +375,7 @@ lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr) is_valid_hex_char(buffer->data[buffer->used + 1])) t = tokenise_hex_literal(buffer, &column); else if (is_symbol(c)) - { - static_assert(NUMBER_OF_OPCODES == 70, "tokenise_buffer: Out of date!"); - token_t token = tokenise_symbol(buffer, &column); - char *opcode = token.str; - - bool is_opcode = true; - token_type_t type = 0; - size_t offset = 0; - - if (token.str_size == 4 && strncmp(opcode, "NOOP", 4) == 0) - { - offset = 4; - type = TOKEN_NOOP; - } - else if (token.str_size == 4 && strncmp(opcode, "HALT", 4) == 0) - { - offset = 4; - type = TOKEN_HALT; - } - else if (token.str_size >= 8 && strncmp(opcode, "PUSH.REG", 8) == 0) - { - offset = 8; - type = TOKEN_PUSH; - } - else if (token.str_size >= 4 && strncmp(opcode, "PUSH", 4) == 0) - { - offset = 4; - type = TOKEN_PUSH; - } - else if (token.str_size >= 3 && strncmp(opcode, "POP", 3) == 0) - { - offset = 3; - type = TOKEN_POP; - } - else if (token.str_size >= 3 && strncmp(opcode, "MOV", 3) == 0) - { - offset = 3; - type = TOKEN_MOV; - } - else if (token.str_size >= 3 && strncmp(opcode, "DUP", 3) == 0) - { - offset = 3; - type = TOKEN_DUP; - } - else if (token.str_size >= 3 && strncmp(opcode, "NOT", 3) == 0) - { - offset = 3; - type = TOKEN_NOT; - } - else if (token.str_size >= 2 && strncmp(opcode, "OR", 2) == 0) - { - offset = 2; - type = TOKEN_OR; - } - else if (token.str_size >= 3 && strncmp(opcode, "AND", 3) == 0) - { - offset = 3; - type = TOKEN_AND; - } - else if (token.str_size >= 3 && strncmp(opcode, "XOR", 3) == 0) - { - offset = 3; - type = TOKEN_XOR; - } - else if (token.str_size >= 2 && strncmp(opcode, "EQ", 2) == 0) - { - offset = 2; - type = TOKEN_EQ; - } - else if (token.str_size >= 3 && strncmp(opcode, "LTE", 3) == 0) - { - offset = 3; - type = TOKEN_LTE; - } - else if (token.str_size >= 2 && strncmp(opcode, "LT", 2) == 0) - { - offset = 2; - type = TOKEN_LT; - } - else if (token.str_size >= 3 && strncmp(opcode, "GTE", 3) == 0) - { - offset = 3; - type = TOKEN_GTE; - } - else if (token.str_size >= 2 && strncmp(opcode, "GT", 2) == 0) - { - offset = 2; - type = TOKEN_GT; - } - else if (token.str_size >= 4 && strncmp(opcode, "PLUS", 4) == 0) - { - offset = 4; - type = TOKEN_PLUS; - } - else if (token.str_size >= 5 && strncmp(opcode, "PRINT", 5) == 0) - { - offset = 5; - type = TOKEN_PRINT; - } - else if (token.str_size >= 1 && strncmp(opcode, "JUMP.IF", 7) == 0) - { - offset = 7; - type = TOKEN_JUMP_IF; - } - else if (token.str_size >= 6 && strncmp(opcode, "JUMP", 6) == 0) - { - offset = 6; - type = TOKEN_JUMP; - } - else - { - is_opcode = false; - t = token; - } - - if (!is_opcode) - // Just a symbol, so no further manipulation - t = token; - else - { - t.type = type; - t.column = token.column; - if (offset == token.str_size) - { - // There's no more to the string - t.str = malloc(1); - t.str[0] = '\0'; - } - else - { - // t.str is the remaining part of the string after the - // opcode - t.str = calloc(token.str_size - offset + 1, 1); - memcpy(t.str, token.str + offset, token.str_size - offset); - t.str[token.str_size - offset] = '\0'; - } - t.str_size = token.str_size - offset; - free(token.str); - } - } + t = tokenise_symbol(buffer, &column); else if (c == '\'') { if (space_left(buffer) < 2) diff --git a/asm/lexer.h b/asm/lexer.h index 78cf4c6..9ae6fcf 100644 --- a/asm/lexer.h +++ b/asm/lexer.h @@ -36,6 +36,7 @@ typedef enum TokenType TOKEN_GT, TOKEN_GTE, TOKEN_PLUS, + TOKEN_MULT, TOKEN_PRINT, TOKEN_JUMP, TOKEN_JUMP_IF, diff --git a/asm/parser.c b/asm/parser.c index a6ddd86..41ffdb9 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -194,7 +194,6 @@ perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret) perr_t parse_next_inst(token_stream_t *stream, inst_t *ret) { - static_assert(NUMBER_OF_OPCODES == 70, "parse_next_inst: Out of date!"); const token_t token = TOKEN_STREAM_AT(stream->data, stream->used); switch (token.type) { @@ -252,6 +251,9 @@ perr_t parse_next_inst(token_stream_t *stream, inst_t *ret) case TOKEN_PLUS: ret->opcode = OP_PLUS_BYTE; return parse_utype_inst(stream, ret); + case TOKEN_MULT: + ret->opcode = OP_MULT_BYTE; + return parse_utype_inst(stream, ret); case TOKEN_PRINT: ret->opcode = OP_PRINT_BYTE; return parse_type_inst(stream, ret); -- cgit v1.2.3-13-gbd6f