diff options
-rw-r--r-- | asm/parser.c | 376 | ||||
-rw-r--r-- | asm/parser.h | 19 |
2 files changed, 323 insertions, 72 deletions
diff --git a/asm/parser.c b/asm/parser.c index 08e067c..f9eb975 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -37,6 +37,10 @@ const char *perr_as_cstr(perr_t perr) return "EXPECTED_SYMBOL"; case PERR_EXPECTED_OPERAND: return "EXPECTED_OPERAND"; + case PERR_UNKNOWN_LABEL: + return "UNKNOWN_LABEL"; + case PERR_INVALID_RELATIVE_ADDRESS: + return "INVALID_RELATIVE_ADDRESS"; case PERR_UNKNOWN_OPERATOR: return "UNKNOWN_OPERATOR"; default: @@ -88,6 +92,60 @@ perr_t parse_word(token_t token, word *ret) return PERR_NOT_A_NUMBER; } +perr_t parse_sword(token_t token, i64 *ret) +{ + if (token.type == TOKEN_LITERAL_NUMBER) + { + char *end = NULL; + s_word i = strtoll(token.str, &end, 0); + if (!(end && end[0] == '\0')) + return PERR_NOT_A_NUMBER; + else if (errno == ERANGE) + { + errno = 0; + return PERR_INTEGER_OVERFLOW; + } + *ret = i; + return PERR_OK; + } + else if (token.type == TOKEN_LITERAL_CHAR) + { + *ret = token.str[0]; + return PERR_OK; + } + else + return PERR_NOT_A_NUMBER; +} + +perr_t parse_word_label_or_relative(token_stream_t *stream, presult_t *res) +{ + token_t token = TOKEN_STREAM_AT(stream->data, stream->used); + if (token.type == TOKEN_SYMBOL) + { + res->type = PRES_LABEL_ADDRESS; + res->label = calloc(token.str_size + 1, 1); + memcpy(res->label, token.str, token.str_size); + res->label[token.str_size] = '\0'; + return PERR_OK; + } + else if (token.type == TOKEN_LITERAL_CHAR || + token.type == TOKEN_LITERAL_NUMBER) + { + res->type = PRES_COMPLETE_RESULT; + return parse_word(token, &res->instruction.operand.as_word); + } + else if (token.type == TOKEN_STAR) + { + if (stream->used + 1 >= stream->available) + return PERR_EXPECTED_OPERAND; + res->type = PRES_RELATIVE_ADDRESS; + ++stream->used; + return parse_sword(TOKEN_STREAM_AT(stream->data, stream->used), + &res->relative_address); + } + return PERR_EXPECTED_OPERAND; +} + enum Type { T_NIL = -1, @@ -179,6 +237,18 @@ perr_t parse_utype_inst_with_operand(token_stream_t *stream, inst_t *ret) return PERR_OK; } +perr_t parse_jump_inst_operand(token_stream_t *stream, presult_t *res) +{ + perr_t inst_err = parse_utype_inst(stream, &res->instruction); + if (inst_err) + return inst_err; + ++stream->used; + perr_t op_err = parse_word_label_or_relative(stream, res); + if (op_err) + return op_err; + return PERR_OK; +} + perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret) { perr_t inst_err = parse_type_inst(stream, ret); @@ -192,151 +262,315 @@ perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret) return PERR_OK; } -perr_t parse_next_inst(token_stream_t *stream, inst_t *ret) +perr_t parse_next(token_stream_t *stream, presult_t *ret) { const token_t token = TOKEN_STREAM_AT(stream->data, stream->used); + perr_t perr = PERR_OK; switch (token.type) { case TOKEN_LITERAL_NUMBER: case TOKEN_LITERAL_CHAR: return PERR_EXPECTED_SYMBOL; case TOKEN_NOOP: - *ret = INST_NOOP; + *ret = (presult_t){.instruction = INST_NOOP, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_HALT: - *ret = INST_HALT; + *ret = (presult_t){.instruction = INST_HALT, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_PUSH: - ret->opcode = OP_PUSH_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_PUSH(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_POP: - ret->opcode = OP_POP_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_POP(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_PUSH_REG: - ret->opcode = OP_PUSH_REGISTER_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_PUSH_REG(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MOV: - ret->opcode = OP_MOV_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MOV(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_DUP: - ret->opcode = OP_DUP_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_DUP(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MALLOC: - ret->opcode = OP_MALLOC_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MALLOC(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MSET: - ret->opcode = OP_MSET_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MSET(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MGET: - ret->opcode = OP_MGET_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_MGET(BYTE, 0), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst_with_operand(stream, &ret->instruction); + break; case TOKEN_MALLOC_STACK: - ret->opcode = OP_MALLOC_STACK_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MALLOC_STACK(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MSET_STACK: - ret->opcode = OP_MSET_STACK_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MSET_STACK(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MGET_STACK: - ret->opcode = OP_MGET_STACK_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MGET_STACK(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MDELETE: - ret->opcode = OP_MDELETE; + *ret = + (presult_t){.instruction = INST_MDELETE, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_MSIZE: - ret->opcode = OP_MSIZE; + *ret = (presult_t){.instruction = INST_MSIZE, .type = PRES_COMPLETE_RESULT}; break; case TOKEN_NOT: - ret->opcode = OP_NOT_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_NOT(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_OR: - ret->opcode = OP_OR_BYTE; - return parse_utype_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_OR(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_AND: - ret->opcode = OP_AND_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_AND(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_XOR: - ret->opcode = OP_XOR_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_XOR(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_EQ: - ret->opcode = OP_EQ_BYTE; - return parse_utype_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_EQ(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_LT: - ret->opcode = OP_LT_BYTE; - return parse_type_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_LT(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_LTE: - ret->opcode = OP_LTE_BYTE; - return parse_type_inst(stream, ret); + *ret = (presult_t){.instruction = INST_LTE(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_GT: - ret->opcode = OP_GT_BYTE; - return parse_type_inst(stream, ret); + *ret = + (presult_t){.instruction = INST_GT(BYTE), .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_GTE: - ret->opcode = OP_GTE_BYTE; - return parse_type_inst(stream, ret); + *ret = (presult_t){.instruction = INST_GTE(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_PLUS: - ret->opcode = OP_PLUS_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_PLUS(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_SUB: - ret->opcode = OP_SUB_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_SUB(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_MULT: - ret->opcode = OP_MULT_BYTE; - return parse_utype_inst(stream, ret); + *ret = (presult_t){.instruction = INST_MULT(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_utype_inst(stream, &ret->instruction); + break; case TOKEN_PRINT: - ret->opcode = OP_PRINT_BYTE; - return parse_type_inst(stream, ret); + *ret = (presult_t){.instruction = INST_PRINT(BYTE), + .type = PRES_COMPLETE_RESULT}; + perr = parse_type_inst(stream, &ret->instruction); + break; case TOKEN_JUMP: { if (token.str_size == 4 && strncmp(token.str, ".ABS", 4) == 0) { - ret->opcode = OP_JUMP_ABS; + *ret = (presult_t){.instruction = INST_JUMP_ABS(0)}; ++stream->used; if (stream->used >= stream->available) return PERR_EXPECTED_OPERAND; - return parse_word(TOKEN_STREAM_AT(stream->data, stream->used), - &ret->operand.as_word); + return parse_word_label_or_relative(stream, ret); } else if (token.str_size == 9 && strncmp(token.str, ".REGISTER", 9) == 0) { - ret->opcode = OP_JUMP_REGISTER; + *ret = (presult_t){.instruction = INST_JUMP_REGISTER(0), + .type = PRES_COMPLETE_RESULT}; ++stream->used; if (stream->used >= stream->available) return PERR_EXPECTED_OPERAND; return parse_word(TOKEN_STREAM_AT(stream->data, stream->used), - &ret->operand.as_word); + &ret->instruction.operand.as_word); } else if (token.str_size == 6 && strncmp(token.str, ".STACK", 6) == 0) - ret->opcode = OP_JUMP_STACK; + *ret = (presult_t){.instruction = INST_JUMP_STACK, + .type = PRES_COMPLETE_RESULT}; else return PERR_UNKNOWN_OPERATOR; break; } case TOKEN_JUMP_IF: { - ret->opcode = OP_JUMP_IF_BYTE; - return parse_utype_inst_with_operand(stream, ret); + *ret = (presult_t){.instruction = INST_JUMP_IF(BYTE, 0)}; + return parse_jump_inst_operand(stream, ret); } - case TOKEN_SYMBOL: + case TOKEN_SYMBOL: { + size_t label_size = strcspn(token.str, ":"); + if (label_size == strlen(token.str)) + return PERR_UNKNOWN_OPERATOR; + *ret = (presult_t){.type = PRES_LABEL}; + ret->label = calloc(label_size + 1, 1); + memcpy(ret->label, token.str, label_size); + ret->label[label_size] = '\0'; + break; + } + case TOKEN_STAR: default: return PERR_UNKNOWN_OPERATOR; } + return perr; +} + +struct LabelPair +{ + char *label; + size_t label_size; + word addr; +}; + +perr_t process_presults(presult_t *results, size_t res_count, + inst_t **instructions, size_t *inst_count) +{ + darr_t label_pairs = {0}; + darr_init(&label_pairs, sizeof(struct LabelPair)); + *inst_count = 0; + for (size_t i = 0; i < res_count; ++i) + { + presult_t res = results[i]; + switch (res.type) + { + case PRES_LABEL: { + struct LabelPair pair = {0}; + pair.label = res.label; + pair.addr = (*inst_count); + pair.label_size = strlen(res.label); + darr_append_bytes(&label_pairs, (byte *)&pair, sizeof(pair)); + break; + } + case PRES_RELATIVE_ADDRESS: { + s_word offset = res.relative_address; + if (offset < 0 && ((word)(-offset)) > *inst_count) + { + free(label_pairs.data); + return PERR_INVALID_RELATIVE_ADDRESS; + } + results[i].instruction.operand.as_word = ((s_word)*inst_count) + offset; + (*inst_count)++; + break; + } + case PRES_LABEL_ADDRESS: + case PRES_COMPLETE_RESULT: + default: { + (*inst_count)++; + break; + } + } + } + + darr_t instr_darr = {0}; + darr_init(&instr_darr, sizeof(**instructions)); + for (size_t i = 0; i < res_count; ++i) + { + presult_t res = results[i]; + switch (res.type) + { + case PRES_LABEL_ADDRESS: { + inst_t inst = {0}; + for (size_t j = 0; j < (label_pairs.used / sizeof(struct LabelPair)); ++j) + { + struct LabelPair pair = ((struct LabelPair *)label_pairs.data)[j]; + if (pair.label_size == strlen(res.label) && + strncmp(pair.label, res.label, pair.label_size) == 0) + { + inst = res.instruction; + inst.operand = DWORD(pair.addr); + } + } + + if (inst.opcode == OP_NOOP) + { + free(instr_darr.data); + free(label_pairs.data); + return PERR_UNKNOWN_LABEL; + } + darr_append_bytes(&instr_darr, (byte *)&inst, sizeof(inst)); + break; + } + case PRES_RELATIVE_ADDRESS: + case PRES_COMPLETE_RESULT: + darr_append_bytes(&instr_darr, (byte *)&res.instruction, + sizeof(res.instruction)); + case PRES_LABEL: + break; + } + } + + free(label_pairs.data); + *instructions = (inst_t *)instr_darr.data; return PERR_OK; } perr_t parse_stream(token_stream_t *stream, inst_t **ret, size_t *size) { - darr_t instructions = {0}; - darr_init(&instructions, sizeof(inst_t)); + darr_t presults = {0}; + darr_init(&presults, sizeof(presult_t)); while (stream->used < stream->available) { - inst_t inst = INST_NOOP; - perr_t err = parse_next_inst(stream, &inst); + presult_t pres = {0}; + perr_t err = parse_next(stream, &pres); if (err) { - free(instructions.data); + for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) + { + presult_t res = ((presult_t *)presults.data)[i]; + if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL) + free(res.label); + } + free(presults.data); return err; } - darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t)); + darr_append_bytes(&presults, (byte *)&pres, sizeof(presult_t)); ++stream->used; } - *size = instructions.used / sizeof(inst_t); - *ret = (inst_t *)instructions.data; - return PERR_OK; + + perr_t perr = process_presults((presult_t *)presults.data, + presults.used / sizeof(presult_t), ret, size); + for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) + { + presult_t res = ((presult_t *)presults.data)[i]; + if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL) + free(res.label); + } + free(presults.data); + return perr; } diff --git a/asm/parser.h b/asm/parser.h index ee12b40..0a65310 100644 --- a/asm/parser.h +++ b/asm/parser.h @@ -27,11 +27,28 @@ typedef enum PERR_EXPECTED_SYMBOL, PERR_EXPECTED_OPERAND, PERR_UNKNOWN_OPERATOR, + PERR_INVALID_RELATIVE_ADDRESS, + PERR_UNKNOWN_LABEL, } perr_t; const char *perr_as_cstr(perr_t); -perr_t parse_next_inst(token_stream_t *, inst_t *); +typedef struct +{ + inst_t instruction; + char *label; + s_word relative_address; + enum PResult_Type + { + PRES_LABEL = 0, + PRES_LABEL_ADDRESS, + PRES_RELATIVE_ADDRESS, + PRES_COMPLETE_RESULT, + } type; +} presult_t; + +perr_t parse_next(token_stream_t *, presult_t *); +perr_t process_presults(presult_t *, size_t, inst_t **, size_t *); perr_t parse_stream(token_stream_t *, inst_t **, size_t *); #endif |