diff options
author | Aryadev Chavali <aryadev@aryadevchavali.com> | 2023-11-02 20:31:55 +0000 |
---|---|---|
committer | Aryadev Chavali <aryadev@aryadevchavali.com> | 2023-11-02 20:31:55 +0000 |
commit | 4990d93a1c333c032e5ee0f1fc4aa3a02a7d41fc (patch) | |
tree | 39a1d0bf5c89de639b45c779cc0a3bbeb09d2a87 /asm/parser.c | |
parent | d5e311c9d44793d269e9b8ee5e8c0fa7a1a25a50 (diff) | |
download | ovm-4990d93a1c333c032e5ee0f1fc4aa3a02a7d41fc.tar.gz ovm-4990d93a1c333c032e5ee0f1fc4aa3a02a7d41fc.tar.bz2 ovm-4990d93a1c333c032e5ee0f1fc4aa3a02a7d41fc.zip |
Created a preprocessing unit presult_t and a function to process them
Essentially a presult_t contains one of these:
1) A label construction, which stores the label symbol into
`label` (PRES_LABEL)
2) An instruction that calls upon a label, storing the instruction
in `instruction` and the label name in `label` (PRES_LABEL_ADDRESS)
3) An instruction that uses a relative address offset, storing the
instruction in `instruction` and the offset wanted into
`relative_address` (PRES_RELATIVE_ADDRESS)
4) An instruction that requires no further processing, storing the
instruction into `instruction` (PRES_COMPLETE_INSTRUCTION)
In the processing stage, we resolve all calls by iterating one by one
and maintaining an absolute instruction address. Pretty nice, lots
more machinery involved in parsing now.
Diffstat (limited to 'asm/parser.c')
-rw-r--r-- | asm/parser.c | 376 |
1 files changed, 305 insertions, 71 deletions
diff --git a/asm/parser.c b/asm/parser.c index 08e067c..f9eb975 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -37,6 +37,10 @@ const char *perr_as_cstr(perr_t perr) return "EXPECTED_SYMBOL"; case PERR_EXPECTED_OPERAND: return "EXPECTED_OPERAND"; + case PERR_UNKNOWN_LABEL: + return "UNKNOWN_LABEL"; + case PERR_INVALID_RELATIVE_ADDRESS: + return "INVALID_RELATIVE_ADDRESS"; case PERR_UNKNOWN_OPERATOR: return "UNKNOWN_OPERATOR"; default: @@ -88,6 +92,60 @@ perr_t parse_word(token_t token, word *ret) return PERR_NOT_A_NUMBER; } +perr_t parse_sword(token_t token, i64 *ret) +{ + if (token.type == TOKEN_LITERAL_NUMBER) + { + char *end = NULL; + s_word i = strtoll(token.str, &end, 0); + if (!(end && end[0] == '\0')) + return PERR_NOT_A_NUMBER; + else if (errno == ERANGE) + { + errno = 0; + return PERR_INTEGER_OVERFLOW; + } + *ret = i; + return PERR_OK; + } + else if (token.type == TOKEN_LITERAL_CHAR) + { + *ret = token.str[0]; + return PERR_OK; + } + else + return PERR_NOT_A_NUMBER; +} + +perr_t parse_word_label_or_relative(token_stream_t *stream, presult_t *res) +{ + token_t token = TOKEN_STREAM_AT(stream->data, stream->used); + if (token.type == TOKEN_SYMBOL) + { + res->type = PRES_LABEL_ADDRESS; + res->label = calloc(token.str_size + 1, 1); + memcpy(res->label, token.str, token.str_size); + res->label[token.str_size] = '\0'; + return PERR_OK; + } + else if (token.type == TOKEN_LITERAL_CHAR || + token.type == TOKEN_LITERAL_NUMBER) + { + res->type = PRES_COMPLETE_RESULT; + return parse_word(token, &res->instruction.operand.as_word); + } + else if (token.type == TOKEN_STAR) + { + if (stream->used + 1 >= stream->available) + return PERR_EXPECTED_OPERAND; + res->type = PRES_RELATIVE_ADDRESS; + ++stream->used; + return parse_sword(TOKEN_STREAM_AT(stream->data, stream->used), + &res->relative_address); + } + return PERR_EXPECTED_OPERAND; +} + enum Type { T_NIL = -1, @@ -179,6 +237,18 @@ perr_t parse_utype_inst_with_operand(token_stream_t *stream, inst_t *ret) return PERR_OK; } +perr_t parse_jump_inst_operand(token_stream_t *stream, presult_t *res) +{ + perr_t inst_err = parse_utype_inst(stream, &res->instruction); + if (inst_err) + return inst_err; + ++stream->used; + perr_t op_err = parse_word_label_or_relative(stream, res); + if (op_err) + return op_err; + return PERR_OK; +} + perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret) { perr_t inst_err = parse_type_inst(stream, ret); @@ -192,151 +262,315 @@ perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret) return PERR_OK; } -perr_t parse_next_inst(token_stream_t *stream, inst_t *ret) +perr_t parse_next(token_stream_t *stream, presult_t *ret) { const token_t token = TOKEN_STREAM_AT(stream->data, stream->used); + perr_t perr = PERR_OK; switch (token.type) { case TOKEN_LITERAL_NUMBER: case TOKEN_LITERAL_CHAR: return PERR_EXPECTED_SYMBOL; case TOKEN_NOOP: - *ret = INST_NOOP; + *ret = (presult_t){.instruction = INST_NOOP, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_HALT: - *ret = INST_HALT; + *ret = (presult_t){.instruction = INST_HALT, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_PUSH: - ret->opcode = OP_PUSH_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_PUSH(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_POP: - ret->opcode = OP_POP_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_POP(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_PUSH_REG: - ret->opcode = OP_PUSH_REGISTER_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_PUSH_REG(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MOV: - ret->opcode = OP_MOV_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MOV(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_DUP: - ret->opcode = OP_DUP_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_DUP(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MALLOC: - ret->opcode = OP_MALLOC_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MALLOC(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MSET: - ret->opcode = OP_MSET_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MSET(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MGET: - ret->opcode = OP_MGET_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MGET(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MALLOC_STACK: - ret->opcode = OP_MALLOC_STACK_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MALLOC_STACK(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MSET_STACK: - ret->opcode = OP_MSET_STACK_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MSET_STACK(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MGET_STACK: - ret->opcode = OP_MGET_STACK_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MGET_STACK(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MDELETE: - ret->opcode = OP_MDELETE; + *ret = + (presult_t){.instruction = INST_MDELETE, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_MSIZE: - ret->opcode = OP_MSIZE; + *ret = (presult_t){.instruction = INST_MSIZE, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_NOT: - ret->opcode = OP_NOT_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_NOT(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_OR: - ret->opcode = OP_OR_BYTE; - return parse_utype_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_OR(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_AND: - ret->opcode = OP_AND_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_AND(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_XOR: - ret->opcode = OP_XOR_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_XOR(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_EQ: - ret->opcode = OP_EQ_BYTE; - return parse_utype_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_EQ(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_LT: - ret->opcode = OP_LT_BYTE; - return parse_type_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_LT(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_LTE: - ret->opcode = OP_LTE_BYTE; - return parse_type_inst(stream, ret); + *ret = (presult_t){.instruction = INST_LTE(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_GT: - ret->opcode = OP_GT_BYTE; - return parse_type_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_GT(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_GTE: - ret->opcode = OP_GTE_BYTE; - return parse_type_inst(stream, ret); + *ret = (presult_t){.instruction = INST_GTE(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_PLUS: - ret->opcode = OP_PLUS_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_PLUS(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_SUB: - ret->opcode = OP_SUB_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_SUB(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MULT: - ret->opcode = OP_MULT_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MULT(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_PRINT: - ret->opcode = OP_PRINT_BYTE; - return parse_type_inst(stream, ret); + *ret = (presult_t){.instruction = INST_PRINT(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_JUMP: { if (token.str_size == 4 && strncmp(token.str, ".ABS", 4) == 0) { - ret->opcode = OP_JUMP_ABS; + *ret = (presult_t){.instruction = INST_JUMP_ABS(0)}; ++stream->used; if (stream->used >= stream->available) return PERR_EXPECTED_OPERAND; - return parse_word(TOKEN_STREAM_AT(stream->data, stream->used), - &ret->operand.as_word); + return parse_word_label_or_relative(stream, ret); } else if (token.str_size == 9 && strncmp(token.str, ".REGISTER", 9) == 0) { - ret->opcode = OP_JUMP_REGISTER; + *ret = (presult_t){.instruction = INST_JUMP_REGISTER(0), + .type = PRES_COMPLETE_RESULT}; ++stream->used; if (stream->used >= stream->available) return PERR_EXPECTED_OPERAND; return parse_word(TOKEN_STREAM_AT(stream->data, stream->used), - &ret->operand.as_word); + &ret->instruction.operand.as_word); } else if (token.str_size == 6 && strncmp(token.str, ".STACK", 6) == 0) - ret->opcode = OP_JUMP_STACK; + *ret = (presult_t){.instruction = INST_JUMP_STACK, + .type = PRES_COMPLETE_RESULT}; else return PERR_UNKNOWN_OPERATOR; break; } case TOKEN_JUMP_IF: { - ret->opcode = OP_JUMP_IF_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_JUMP_IF(BYTE, 0)}; + return parse_jump_inst_operand(stream, ret); } - case TOKEN_SYMBOL: + case TOKEN_SYMBOL: { + size_t label_size = strcspn(token.str, ":"); + if (label_size == strlen(token.str)) + return PERR_UNKNOWN_OPERATOR; + *ret = (presult_t){.type = PRES_LABEL}; + ret->label = calloc(label_size + 1, 1); + memcpy(ret->label, token.str, label_size); + ret->label[label_size] = '\0'; + break; + } + case TOKEN_STAR: default: return PERR_UNKNOWN_OPERATOR; } + return perr; +} + +struct LabelPair +{ + char *label; + size_t label_size; + word addr; +}; + +perr_t process_presults(presult_t *results, size_t res_count, + inst_t **instructions, size_t *inst_count) +{ + darr_t label_pairs = {0}; + darr_init(&label_pairs, sizeof(struct LabelPair)); + *inst_count = 0; + for (size_t i = 0; i < res_count; ++i) + { + presult_t res = results[i]; + switch (res.type) + { + case PRES_LABEL: { + struct LabelPair pair = {0}; + pair.label = res.label; + pair.addr = (*inst_count); + pair.label_size = strlen(res.label); + darr_append_bytes(&label_pairs, (byte *)&pair, sizeof(pair)); + break; + } + case PRES_RELATIVE_ADDRESS: { + s_word offset = res.relative_address; + if (offset < 0 && ((word)(-offset)) > *inst_count) + { + free(label_pairs.data); + return PERR_INVALID_RELATIVE_ADDRESS; + } + results[i].instruction.operand.as_word = ((s_word)*inst_count) + offset; + (*inst_count)++; + break; + } + case PRES_LABEL_ADDRESS: + case PRES_COMPLETE_RESULT: + default: { + (*inst_count)++; + break; + } + } + } + + darr_t instr_darr = {0}; + darr_init(&instr_darr, sizeof(**instructions)); + for (size_t i = 0; i < res_count; ++i) + { + presult_t res = results[i]; + switch (res.type) + { + case PRES_LABEL_ADDRESS: { + inst_t inst = {0}; + for (size_t j = 0; j < (label_pairs.used / sizeof(struct LabelPair)); ++j) + { + struct LabelPair pair = ((struct LabelPair *)label_pairs.data)[j]; + if (pair.label_size == strlen(res.label) && + strncmp(pair.label, res.label, pair.label_size) == 0) + { + inst = res.instruction; + inst.operand = DWORD(pair.addr); + } + } + + if (inst.opcode == OP_NOOP) + { + free(instr_darr.data); + free(label_pairs.data); + return PERR_UNKNOWN_LABEL; + } + darr_append_bytes(&instr_darr, (byte *)&inst, sizeof(inst)); + break; + } + case PRES_RELATIVE_ADDRESS: + case PRES_COMPLETE_RESULT: + darr_append_bytes(&instr_darr, (byte *)&res.instruction, + sizeof(res.instruction)); + case PRES_LABEL: + break; + } + } + + free(label_pairs.data); + *instructions = (inst_t *)instr_darr.data; return PERR_OK; } perr_t parse_stream(token_stream_t *stream, inst_t **ret, size_t *size) { - darr_t instructions = {0}; - darr_init(&instructions, sizeof(inst_t)); + darr_t presults = {0}; + darr_init(&presults, sizeof(presult_t)); while (stream->used < stream->available) { - inst_t inst = INST_NOOP; - perr_t err = parse_next_inst(stream, &inst); + presult_t pres = {0}; + perr_t err = parse_next(stream, &pres); if (err) { - free(instructions.data); + for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) + { + presult_t res = ((presult_t *)presults.data)[i]; + if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL) + free(res.label); + } + free(presults.data); return err; } - darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t)); + darr_append_bytes(&presults, (byte *)&pres, sizeof(presult_t)); ++stream->used; } - *size = instructions.used / sizeof(inst_t); - *ret = (inst_t *)instructions.data; - return PERR_OK; + + perr_t perr = process_presults((presult_t *)presults.data, + presults.used / sizeof(presult_t), ret, size); + for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) + { + presult_t res = ((presult_t *)presults.data)[i]; + if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL) + free(res.label); + } + free(presults.data); + return perr; } |