diff options
author | Aryadev Chavali <aryadev@aryadevchavali.com> | 2023-10-26 11:17:55 +0100 |
---|---|---|
committer | Aryadev Chavali <aryadev@aryadevchavali.com> | 2023-10-26 11:17:55 +0100 |
commit | 2fe2af22a9b7b2be36d51e24797330c5b6cdbb13 (patch) | |
tree | 542b5222432748993475825e9d04b1c2b58cc3b7 | |
parent | dca51106a205a14380446705d68a12d810eeb20f (diff) | |
download | ovm-2fe2af22a9b7b2be36d51e24797330c5b6cdbb13.tar.gz ovm-2fe2af22a9b7b2be36d51e24797330c5b6cdbb13.tar.bz2 ovm-2fe2af22a9b7b2be36d51e24797330c5b6cdbb13.zip |
Implemented a rudimentary parser with support for 4 instruction types
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | asm/parser.c | 178 |
2 files changed, 179 insertions, 1 deletions
@@ -31,7 +31,7 @@ VM_OUT=$(DIST)/ovm.out ## ASSEMBLY setup ASM_DIST=$(DIST)/asm ASM_SRC=asm -ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.c) +ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.c parser.c) ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.c=$(ASM_DIST)/%.o) ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d ASM_CFLAGS=$(CFLAGS) diff --git a/asm/parser.c b/asm/parser.c index 5521585..5d96c72 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -9,3 +9,181 @@ * Author: Aryadev Chavali * Description: Parser for assembly language */ + +#include <assert.h> +#include <errno.h> +#include <stdbool.h> +#include <string.h> + +#include "./parser.h" + +#define OPCODE_ON_TYPE(BASE_CODE, TYPE) + +opcode_t get_typed_opcode(opcode_t base_code, data_type_t type) +{ + switch (type) + { + case DATA_TYPE_NIL: + // TODO: Parse error (Not something we should pass here) + return 0; + case DATA_TYPE_BYTE: + return base_code; + case DATA_TYPE_HWORD: + return base_code + 1; + case DATA_TYPE_WORD: + return base_code + 2; + default: + // TODO: Parse error (EXPECTED_TYPE_TAG) + return 0; + } +} + +data_type_t parse_data_type(const char *cstr, size_t length) +{ + if (length >= 4 && strncmp(cstr, "BYTE", 4) == 0) + return DATA_TYPE_BYTE; + else if (length >= 5 && strncmp(cstr, "HWORD", 5) == 0) + return DATA_TYPE_HWORD; + else if (length >= 4 && strncmp(cstr, "WORD", 4) == 0) + return DATA_TYPE_WORD; + else + return DATA_TYPE_NIL; +} + +word parse_word(token_t token) +{ + assert(token.type == TOKEN_LITERAL_NUMBER); + bool is_negative = token.str_size > 1 && token.str[0] == '-'; + word w = 0; + if (is_negative) + { + char *end = NULL; + // TODO: Make a standardised type of the same size as word in + // base.h + int64_t i = strtoll(token.str, &end, 0); + if (!(end && end[0] == '\0')) + // TODO: Parse error (NOT_A_NUMBER) + return 0; + else if (errno == ERANGE) + { + // TODO: Parse error (INTEGER_OVERFLOW) + errno = 0; + return 0; + } + memcpy(&w, &i, sizeof(w)); + } + else + { + char *end = NULL; + w = strtoull(token.str, &end, 0); + if (!(end && end[0] == '\0')) + // TODO: Parse error (NOT_A_NUMBER) + return 0; + else if (errno == ERANGE) + { + // TODO: Parse error (WORD_OVERFLOW) + errno = 0; + return 0; + } + } + return w; +} + +inst_t parse_next_inst(token_stream_t *stream) +{ + token_t token = TOKEN_STREAM_AT(stream->data, stream->used); + if (token.type != TOKEN_SYMBOL) + // TODO Parser Error (EXPECTED_SYMBOL) + return INST_NOOP; + inst_t inst = {0}; + char *opcode = token.str; + if (token.str_size >= 4 && strncmp(opcode, "HALT", 4) == 0) + { + inst = INST_HALT; + ++stream->used; + } + else if (token.str_size >= 5 && strncmp(opcode, "PUSH.", 5) == 0) + { + data_type_t type = parse_data_type(opcode + 5, token.str_size - 5); + inst.opcode = get_typed_opcode(OP_PUSH_BYTE, type); + if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH)) + // TODO: Parse error (EXPECTED_TYPE_TAG) + return INST_NOOP; + else if (stream->used == stream->available - 1) + // TODO: Parse error (EXPECTED_OPERAND) + return INST_NOOP; + inst.operand = + DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); + stream->used += 2; + } + else if (token.str_size >= 4 && strncmp(opcode, "POP.", 4) == 0) + { + data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); + inst.opcode = get_typed_opcode(OP_POP_BYTE, type); + if (!OPCODE_IS_TYPE(inst.opcode, OP_POP)) + // TODO: Parse error (EXPECTED_TYPE_TAG) + return INST_NOOP; + ++stream->used; + } + else if (token.str_size >= 9 && strncmp(opcode, "PUSH-REG.", 9) == 0) + { + data_type_t type = parse_data_type(opcode + 9, token.str_size - 9); + inst.opcode = get_typed_opcode(OP_PUSH_REGISTER_BYTE, type); + if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER)) + // TODO: Parse error (EXPECTED_TYPE_TAG) + return INST_NOOP; + else if (stream->used == stream->available - 1) + // TODO: Parse error (EXPECTED_OPERAND) + return INST_NOOP; + inst.operand = + DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); + stream->used += 2; + } + else if (token.str_size >= 4 && strncmp(opcode, "MOV.", 4) == 0) + { + data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); + inst.opcode = get_typed_opcode(OP_MOV_BYTE, type); + if (!OPCODE_IS_TYPE(inst.opcode, OP_MOV)) + // TODO: Parse error (EXPECTED_TYPE_TAG) + return INST_NOOP; + else if (stream->used == stream->available - 1) + // TODO: Parse error (EXPECTED_OPERAND) + return INST_NOOP; + inst.operand = + DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); + stream->used += 2; + } + else if (token.str_size >= 4 && strncmp(opcode, "DUP.", 4) == 0) + { + data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); + inst.opcode = get_typed_opcode(OP_DUP_BYTE, type); + if (!OPCODE_IS_TYPE(inst.opcode, OP_DUP)) + // TODO: Parse error (EXPECTED_TYPE_TAG) + return INST_NOOP; + else if (stream->used == stream->available - 1) + // TODO: Parse error (EXPECTED_OPERAND) + return INST_NOOP; + inst.operand = + DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); + stream->used += 2; + } + else + { + // TODO: Parse error (UNKNOWN_OPCODE) + return INST_NOOP; + } + return inst; +} + +inst_t *parse_stream(token_stream_t *stream, size_t *size) +{ + darr_t instructions = {0}; + darr_init(&instructions, sizeof(inst_t)); + while (stream->used < stream->available) + { + inst_t inst = parse_next_inst(stream); + darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t)); + } + *size = instructions.used / sizeof(inst_t); + return (inst_t *)instructions.data; +} |