diff options
Diffstat (limited to 'asm')
-rw-r--r-- | asm/parser.c | 295 | ||||
-rw-r--r-- | asm/parser.h | 17 |
2 files changed, 221 insertions, 91 deletions
diff --git a/asm/parser.c b/asm/parser.c index 5d96c72..cc97e1c 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -19,21 +19,41 @@ #define OPCODE_ON_TYPE(BASE_CODE, TYPE) +const char *perr_as_cstr(perr_t perr) +{ + switch (perr) + { + case PERR_OK: + return "OK"; + case PERR_INTEGER_OVERFLOW: + return "INTEGER_OVERFLOW"; + case PERR_NOT_A_NUMBER: + return "NOT_A_NUMBER"; + case PERR_EXPECTED_TYPE: + return "EXPECTED_TYPE"; + case PERR_EXPECTED_SYMBOL: + return "EXPECTED_SYMBOL"; + case PERR_EXPECTED_OPERAND: + return "EXPECTED_OPERAND"; + case PERR_UNKNOWN_OPERATOR: + return "UNKNOWN_OPERATOR"; + default: + return ""; + } +} + opcode_t get_typed_opcode(opcode_t base_code, data_type_t type) { switch (type) { - case DATA_TYPE_NIL: - // TODO: Parse error (Not something we should pass here) - return 0; case DATA_TYPE_BYTE: return base_code; case DATA_TYPE_HWORD: return base_code + 1; case DATA_TYPE_WORD: return base_code + 2; + case DATA_TYPE_NIL: default: - // TODO: Parse error (EXPECTED_TYPE_TAG) return 0; } } @@ -50,7 +70,7 @@ data_type_t parse_data_type(const char *cstr, size_t length) return DATA_TYPE_NIL; } -word parse_word(token_t token) +perr_t parse_word(token_t token, word *ret) { assert(token.type == TOKEN_LITERAL_NUMBER); bool is_negative = token.str_size > 1 && token.str[0] == '-'; @@ -62,14 +82,13 @@ word parse_word(token_t token) // base.h int64_t i = strtoll(token.str, &end, 0); if (!(end && end[0] == '\0')) - // TODO: Parse error (NOT_A_NUMBER) - return 0; + return PERR_NOT_A_NUMBER; else if (errno == ERANGE) { - // TODO: Parse error (INTEGER_OVERFLOW) errno = 0; - return 0; + return PERR_INTEGER_OVERFLOW; } + // Copy bits, do not cast memcpy(&w, &i, sizeof(w)); } else @@ -77,113 +96,211 @@ word parse_word(token_t token) char *end = NULL; w = strtoull(token.str, &end, 0); if (!(end && end[0] == '\0')) - // TODO: Parse error (NOT_A_NUMBER) - return 0; + return PERR_NOT_A_NUMBER; else if (errno == ERANGE) { - // TODO: Parse error (WORD_OVERFLOW) errno = 0; - return 0; + return PERR_INTEGER_OVERFLOW; } } - return w; + *ret = w; + return PERR_OK; +} + +perr_t parse_inst_with_type(token_stream_t *stream, inst_t *ret, + size_t oplength) +{ + // Assume the base type OP_*_BYTE is in ret->opcode + token_t token = TOKEN_STREAM_AT(stream->data, stream->used); + char *opcode = token.str; + data_type_t type = parse_data_type(opcode + oplength, + WORD_SAFE_SUB(token.str_size, oplength)); + if (type == DATA_TYPE_NIL) + return PERR_EXPECTED_TYPE; + ++stream->used; + ret->opcode = get_typed_opcode(ret->opcode, type); + return PERR_OK; +} + +perr_t parse_inst_with_operand(token_stream_t *stream, inst_t *ret) +{ + // Parse operand + perr_t word_parse_error = parse_word( + TOKEN_STREAM_AT(stream->data, stream->used), &ret->operand.as_word); + if (word_parse_error) + return word_parse_error; + ++stream->used; + return PERR_OK; } -inst_t parse_next_inst(token_stream_t *stream) +perr_t parse_inst_with_typed_operand(token_stream_t *stream, inst_t *ret, + size_t oplength) { - token_t token = TOKEN_STREAM_AT(stream->data, stream->used); + perr_t type_parse_error = parse_inst_with_type(stream, ret, oplength); + if (type_parse_error) + return type_parse_error; + + // Parse operand + perr_t word_parse_error = parse_word( + TOKEN_STREAM_AT(stream->data, stream->used), &ret->operand.as_word); + if (word_parse_error) + return word_parse_error; + ++stream->used; + return PERR_OK; +} + +perr_t parse_next_inst(token_stream_t *stream, inst_t *ret) +{ + const token_t token = TOKEN_STREAM_AT(stream->data, stream->used); if (token.type != TOKEN_SYMBOL) - // TODO Parser Error (EXPECTED_SYMBOL) - return INST_NOOP; + return PERR_EXPECTED_SYMBOL; inst_t inst = {0}; char *opcode = token.str; - if (token.str_size >= 4 && strncmp(opcode, "HALT", 4) == 0) + if (token.str_size == 4 && strncmp(opcode, "NOOP", 4) == 0) { - inst = INST_HALT; + inst = INST_NOOP; ++stream->used; } - else if (token.str_size >= 5 && strncmp(opcode, "PUSH.", 5) == 0) - { - data_type_t type = parse_data_type(opcode + 5, token.str_size - 5); - inst.opcode = get_typed_opcode(OP_PUSH_BYTE, type); - if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH)) - // TODO: Parse error (EXPECTED_TYPE_TAG) - return INST_NOOP; - else if (stream->used == stream->available - 1) - // TODO: Parse error (EXPECTED_OPERAND) - return INST_NOOP; - inst.operand = - DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); - stream->used += 2; - } - else if (token.str_size >= 4 && strncmp(opcode, "POP.", 4) == 0) - { - data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); - inst.opcode = get_typed_opcode(OP_POP_BYTE, type); - if (!OPCODE_IS_TYPE(inst.opcode, OP_POP)) - // TODO: Parse error (EXPECTED_TYPE_TAG) - return INST_NOOP; + else if (token.str_size == 4 && strncmp(opcode, "HALT", 4) == 0) + { + inst = INST_HALT; ++stream->used; } - else if (token.str_size >= 9 && strncmp(opcode, "PUSH-REG.", 9) == 0) - { - data_type_t type = parse_data_type(opcode + 9, token.str_size - 9); - inst.opcode = get_typed_opcode(OP_PUSH_REGISTER_BYTE, type); - if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER)) - // TODO: Parse error (EXPECTED_TYPE_TAG) - return INST_NOOP; - else if (stream->used == stream->available - 1) - // TODO: Parse error (EXPECTED_OPERAND) - return INST_NOOP; - inst.operand = - DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); - stream->used += 2; - } - else if (token.str_size >= 4 && strncmp(opcode, "MOV.", 4) == 0) - { - data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); - inst.opcode = get_typed_opcode(OP_MOV_BYTE, type); - if (!OPCODE_IS_TYPE(inst.opcode, OP_MOV)) - // TODO: Parse error (EXPECTED_TYPE_TAG) - return INST_NOOP; - else if (stream->used == stream->available - 1) - // TODO: Parse error (EXPECTED_OPERAND) - return INST_NOOP; - inst.operand = - DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); - stream->used += 2; - } - else if (token.str_size >= 4 && strncmp(opcode, "DUP.", 4) == 0) - { - data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); - inst.opcode = get_typed_opcode(OP_DUP_BYTE, type); - if (!OPCODE_IS_TYPE(inst.opcode, OP_DUP)) - // TODO: Parse error (EXPECTED_TYPE_TAG) - return INST_NOOP; - else if (stream->used == stream->available - 1) - // TODO: Parse error (EXPECTED_OPERAND) - return INST_NOOP; - inst.operand = - DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); - stream->used += 2; + else if (token.str_size >= 4 && strncmp(opcode, "PUSH", 4) == 0) + { + size_t oplen = 5; + if (token.str_size >= 8 && strncmp(opcode, "PUSH-REG", 8) == 0) + { + oplen = 9; + ret->opcode = OP_PUSH_REGISTER_BYTE; + } + else + ret->opcode = OP_PUSH_BYTE; + return parse_inst_with_typed_operand(stream, ret, oplen); } - else + else if (token.str_size >= 3 && strncmp(opcode, "POP", 3) == 0) + { + ret->opcode = OP_POP_BYTE; + return parse_inst_with_type(stream, ret, 4); + } + else if (token.str_size >= 3 && strncmp(opcode, "MOV", 3) == 0) + { + ret->opcode = OP_MOV_BYTE; + return parse_inst_with_typed_operand(stream, ret, 4); + } + else if (token.str_size >= 3 && strncmp(opcode, "DUP", 3) == 0) { - // TODO: Parse error (UNKNOWN_OPCODE) - return INST_NOOP; + ret->opcode = OP_DUP_BYTE; + return parse_inst_with_typed_operand(stream, ret, 4); } - return inst; + else if (token.str_size >= 3 && strncmp(opcode, "NOT", 3) == 0) + { + ret->opcode = OP_NOT_BYTE; + return parse_inst_with_type(stream, ret, 4); + } + else if (token.str_size >= 2 && strncmp(opcode, "OR", 2) == 0) + { + ret->opcode = OP_OR_BYTE; + return parse_inst_with_type(stream, ret, 3); + } + else if (token.str_size >= 3 && strncmp(opcode, "AND", 3) == 0) + { + ret->opcode = OP_AND_BYTE; + return parse_inst_with_type(stream, ret, 4); + } + else if (token.str_size >= 3 && strncmp(opcode, "XOR", 3) == 0) + { + ret->opcode = OP_XOR_BYTE; + return parse_inst_with_type(stream, ret, 4); + } + else if (token.str_size >= 2 && strncmp(opcode, "EQ", 2) == 0) + { + ret->opcode = OP_EQ_BYTE; + return parse_inst_with_type(stream, ret, 3); + } + else if (token.str_size >= 4 && strncmp(opcode, "PLUS", 4) == 0) + { + ret->opcode = OP_PLUS_BYTE; + return parse_inst_with_type(stream, ret, 5); + } + else if (token.str_size >= 6 && strncmp(opcode, "PRINT.", 6) == 0) + { + const char *type = opcode + 6; + const size_t type_size = WORD_SAFE_SUB(token.str_size, 6); + if (type_size == 4 && strncmp(type, "CHAR", 4) == 0) + inst.opcode = OP_PRINT_CHAR; + else if (type_size == 4 && strncmp(type, "BYTE", 4) == 0) + inst.opcode = OP_PRINT_BYTE; + else if (type_size == 3 && strncmp(type, "INT", 3) == 0) + inst.opcode = OP_PRINT_INT; + else if (type_size == 5 && strncmp(type, "HWORD", 5) == 0) + inst.opcode = OP_PRINT_HWORD; + else if (type_size == 4 && strncmp(type, "LONG", 4) == 0) + inst.opcode = OP_PRINT_LONG; + else if (type_size == 4 && strncmp(type, "WORD", 4) == 0) + inst.opcode = OP_PRINT_WORD; + else + return PERR_UNKNOWN_OPERATOR; + ++stream->used; + } + else if (token.str_size >= 5 && strncmp(opcode, "JUMP.", 5) == 0) + { + const char *type = opcode + 5; + const size_t type_size = WORD_SAFE_SUB(token.str_size, 5); + if (type_size == 3 && strncmp(type, "ABS", 3) == 0) + { + ret->opcode = OP_JUMP_ABS; + ++stream->used; + return parse_inst_with_operand(stream, ret); + } + else if (type_size == 5 && strncmp(type, "STACK", 5) == 0) + inst.opcode = OP_JUMP_STACK; + else if (type_size == 8 && strncmp(type, "REGISTER", 8) == 0) + { + ret->opcode = OP_JUMP_REGISTER; + ++stream->used; + return parse_inst_with_operand(stream, ret); + } + else if (type_size >= 2 && strncmp(type, "IF", 2) == 0) + { + // Parse a typed operand JUMP.IF.<TYPE> + token_t prev = TOKEN_STREAM_AT(stream->data, stream->used); + size_t prev_ptr = stream->used; + + TOKEN_STREAM_AT(stream->data, stream->used).str = (char *)type; + TOKEN_STREAM_AT(stream->data, stream->used).str_size = type_size; + ret->opcode = OP_JUMP_IF_BYTE; + perr_t perr = parse_inst_with_typed_operand(stream, ret, 3); + + TOKEN_STREAM_AT(stream->data, prev_ptr) = prev; + return perr; + } + else + return PERR_UNKNOWN_OPERATOR; + ++stream->used; + } + else + return PERR_UNKNOWN_OPERATOR; + *ret = inst; + return PERR_OK; } -inst_t *parse_stream(token_stream_t *stream, size_t *size) +perr_t parse_stream(token_stream_t *stream, inst_t **ret, size_t *size) { darr_t instructions = {0}; darr_init(&instructions, sizeof(inst_t)); while (stream->used < stream->available) { - inst_t inst = parse_next_inst(stream); + inst_t inst = INST_NOOP; + perr_t err = parse_next_inst(stream, &inst); + if (err) + { + free(instructions.data); + return err; + } darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t)); } *size = instructions.used / sizeof(inst_t); - return (inst_t *)instructions.data; + *ret = (inst_t *)instructions.data; + return PERR_OK; } diff --git a/asm/parser.h b/asm/parser.h index 09a11d4..9da1117 100644 --- a/asm/parser.h +++ b/asm/parser.h @@ -17,7 +17,20 @@ #include <vm/inst.h> -inst_t parse_next_inst(token_stream_t *); -inst_t *parse_stream(token_stream_t *, size_t *); +typedef enum +{ + PERR_OK = 0, + PERR_INTEGER_OVERFLOW, + PERR_NOT_A_NUMBER, + PERR_EXPECTED_TYPE, + PERR_EXPECTED_SYMBOL, + PERR_EXPECTED_OPERAND, + PERR_UNKNOWN_OPERATOR, +} perr_t; + +const char *perr_as_cstr(perr_t); + +perr_t parse_next_inst(token_stream_t *, inst_t *); +perr_t parse_stream(token_stream_t *, inst_t **, size_t *); #endif |