diff options
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | asm/parser.c | 178 | 
2 files changed, 179 insertions, 1 deletions
| @@ -31,7 +31,7 @@ VM_OUT=$(DIST)/ovm.out  ## ASSEMBLY setup  ASM_DIST=$(DIST)/asm  ASM_SRC=asm -ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.c) +ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.c parser.c)  ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.c=$(ASM_DIST)/%.o)  ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d  ASM_CFLAGS=$(CFLAGS) diff --git a/asm/parser.c b/asm/parser.c index 5521585..5d96c72 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -9,3 +9,181 @@   * Author: Aryadev Chavali   * Description: Parser for assembly language   */ + +#include <assert.h> +#include <errno.h> +#include <stdbool.h> +#include <string.h> + +#include "./parser.h" + +#define OPCODE_ON_TYPE(BASE_CODE, TYPE) + +opcode_t get_typed_opcode(opcode_t base_code, data_type_t type) +{ +  switch (type) +  { +  case DATA_TYPE_NIL: +    // TODO: Parse error (Not something we should pass here) +    return 0; +  case DATA_TYPE_BYTE: +    return base_code; +  case DATA_TYPE_HWORD: +    return base_code + 1; +  case DATA_TYPE_WORD: +    return base_code + 2; +  default: +    // TODO: Parse error (EXPECTED_TYPE_TAG) +    return 0; +  } +} + +data_type_t parse_data_type(const char *cstr, size_t length) +{ +  if (length >= 4 && strncmp(cstr, "BYTE", 4) == 0) +    return DATA_TYPE_BYTE; +  else if (length >= 5 && strncmp(cstr, "HWORD", 5) == 0) +    return DATA_TYPE_HWORD; +  else if (length >= 4 && strncmp(cstr, "WORD", 4) == 0) +    return DATA_TYPE_WORD; +  else +    return DATA_TYPE_NIL; +} + +word parse_word(token_t token) +{ +  assert(token.type == TOKEN_LITERAL_NUMBER); +  bool is_negative = token.str_size > 1 && token.str[0] == '-'; +  word w           = 0; +  if (is_negative) +  { +    char *end = NULL; +    // TODO: Make a standardised type of the same size as word in +    // base.h +    int64_t i = strtoll(token.str, &end, 0); +    if (!(end && end[0] == '\0')) +      // TODO: Parse error (NOT_A_NUMBER) +      return 0; +    else if (errno == ERANGE) +    { +      // TODO: Parse error (INTEGER_OVERFLOW) +      errno = 0; +      return 0; +    } +    memcpy(&w, &i, sizeof(w)); +  } +  else +  { +    char *end = NULL; +    w         = strtoull(token.str, &end, 0); +    if (!(end && end[0] == '\0')) +      // TODO: Parse error (NOT_A_NUMBER) +      return 0; +    else if (errno == ERANGE) +    { +      // TODO: Parse error (WORD_OVERFLOW) +      errno = 0; +      return 0; +    } +  } +  return w; +} + +inst_t parse_next_inst(token_stream_t *stream) +{ +  token_t token = TOKEN_STREAM_AT(stream->data, stream->used); +  if (token.type != TOKEN_SYMBOL) +    // TODO Parser Error (EXPECTED_SYMBOL) +    return INST_NOOP; +  inst_t inst  = {0}; +  char *opcode = token.str; +  if (token.str_size >= 4 && strncmp(opcode, "HALT", 4) == 0) +  { +    inst = INST_HALT; +    ++stream->used; +  } +  else if (token.str_size >= 5 && strncmp(opcode, "PUSH.", 5) == 0) +  { +    data_type_t type = parse_data_type(opcode + 5, token.str_size - 5); +    inst.opcode      = get_typed_opcode(OP_PUSH_BYTE, type); +    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH)) +      // TODO: Parse error (EXPECTED_TYPE_TAG) +      return INST_NOOP; +    else if (stream->used == stream->available - 1) +      // TODO: Parse error (EXPECTED_OPERAND) +      return INST_NOOP; +    inst.operand = +        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); +    stream->used += 2; +  } +  else if (token.str_size >= 4 && strncmp(opcode, "POP.", 4) == 0) +  { +    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); +    inst.opcode      = get_typed_opcode(OP_POP_BYTE, type); +    if (!OPCODE_IS_TYPE(inst.opcode, OP_POP)) +      // TODO: Parse error (EXPECTED_TYPE_TAG) +      return INST_NOOP; +    ++stream->used; +  } +  else if (token.str_size >= 9 && strncmp(opcode, "PUSH-REG.", 9) == 0) +  { +    data_type_t type = parse_data_type(opcode + 9, token.str_size - 9); +    inst.opcode      = get_typed_opcode(OP_PUSH_REGISTER_BYTE, type); +    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER)) +      // TODO: Parse error (EXPECTED_TYPE_TAG) +      return INST_NOOP; +    else if (stream->used == stream->available - 1) +      // TODO: Parse error (EXPECTED_OPERAND) +      return INST_NOOP; +    inst.operand = +        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); +    stream->used += 2; +  } +  else if (token.str_size >= 4 && strncmp(opcode, "MOV.", 4) == 0) +  { +    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); +    inst.opcode      = get_typed_opcode(OP_MOV_BYTE, type); +    if (!OPCODE_IS_TYPE(inst.opcode, OP_MOV)) +      // TODO: Parse error (EXPECTED_TYPE_TAG) +      return INST_NOOP; +    else if (stream->used == stream->available - 1) +      // TODO: Parse error (EXPECTED_OPERAND) +      return INST_NOOP; +    inst.operand = +        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); +    stream->used += 2; +  } +  else if (token.str_size >= 4 && strncmp(opcode, "DUP.", 4) == 0) +  { +    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); +    inst.opcode      = get_typed_opcode(OP_DUP_BYTE, type); +    if (!OPCODE_IS_TYPE(inst.opcode, OP_DUP)) +      // TODO: Parse error (EXPECTED_TYPE_TAG) +      return INST_NOOP; +    else if (stream->used == stream->available - 1) +      // TODO: Parse error (EXPECTED_OPERAND) +      return INST_NOOP; +    inst.operand = +        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); +    stream->used += 2; +  } +  else +  { +    // TODO: Parse error (UNKNOWN_OPCODE) +    return INST_NOOP; +  } +  return inst; +} + +inst_t *parse_stream(token_stream_t *stream, size_t *size) +{ +  darr_t instructions = {0}; +  darr_init(&instructions, sizeof(inst_t)); +  while (stream->used < stream->available) +  { +    inst_t inst = parse_next_inst(stream); +    darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t)); +  } +  *size = instructions.used / sizeof(inst_t); +  return (inst_t *)instructions.data; +} | 
