diff options
Diffstat (limited to 'asm/parser.c')
| -rw-r--r-- | asm/parser.c | 295 | 
1 files changed, 206 insertions, 89 deletions
diff --git a/asm/parser.c b/asm/parser.c index 5d96c72..cc97e1c 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -19,21 +19,41 @@  #define OPCODE_ON_TYPE(BASE_CODE, TYPE) +const char *perr_as_cstr(perr_t perr) +{ +  switch (perr) +  { +  case PERR_OK: +    return "OK"; +  case PERR_INTEGER_OVERFLOW: +    return "INTEGER_OVERFLOW"; +  case PERR_NOT_A_NUMBER: +    return "NOT_A_NUMBER"; +  case PERR_EXPECTED_TYPE: +    return "EXPECTED_TYPE"; +  case PERR_EXPECTED_SYMBOL: +    return "EXPECTED_SYMBOL"; +  case PERR_EXPECTED_OPERAND: +    return "EXPECTED_OPERAND"; +  case PERR_UNKNOWN_OPERATOR: +    return "UNKNOWN_OPERATOR"; +  default: +    return ""; +  } +} +  opcode_t get_typed_opcode(opcode_t base_code, data_type_t type)  {    switch (type)    { -  case DATA_TYPE_NIL: -    // TODO: Parse error (Not something we should pass here) -    return 0;    case DATA_TYPE_BYTE:      return base_code;    case DATA_TYPE_HWORD:      return base_code + 1;    case DATA_TYPE_WORD:      return base_code + 2; +  case DATA_TYPE_NIL:    default: -    // TODO: Parse error (EXPECTED_TYPE_TAG)      return 0;    }  } @@ -50,7 +70,7 @@ data_type_t parse_data_type(const char *cstr, size_t length)      return DATA_TYPE_NIL;  } -word parse_word(token_t token) +perr_t parse_word(token_t token, word *ret)  {    assert(token.type == TOKEN_LITERAL_NUMBER);    bool is_negative = token.str_size > 1 && token.str[0] == '-'; @@ -62,14 +82,13 @@ word parse_word(token_t token)      // base.h      int64_t i = strtoll(token.str, &end, 0);      if (!(end && end[0] == '\0')) -      // TODO: Parse error (NOT_A_NUMBER) -      return 0; +      return PERR_NOT_A_NUMBER;      else if (errno == ERANGE)      { -      // TODO: Parse error (INTEGER_OVERFLOW)        errno = 0; -      return 0; +      return PERR_INTEGER_OVERFLOW;      } +    // Copy bits, do not cast      memcpy(&w, &i, sizeof(w));    }    else @@ -77,113 +96,211 @@ word parse_word(token_t token)      char *end = NULL;      w         = strtoull(token.str, &end, 0);      if (!(end && end[0] == '\0')) -      // TODO: Parse error (NOT_A_NUMBER) -      return 0; +      return PERR_NOT_A_NUMBER;      else if (errno == ERANGE)      { -      // TODO: Parse error (WORD_OVERFLOW)        errno = 0; -      return 0; +      return PERR_INTEGER_OVERFLOW;      }    } -  return w; +  *ret = w; +  return PERR_OK; +} + +perr_t parse_inst_with_type(token_stream_t *stream, inst_t *ret, +                            size_t oplength) +{ +  // Assume the base type OP_*_BYTE is in ret->opcode +  token_t token    = TOKEN_STREAM_AT(stream->data, stream->used); +  char *opcode     = token.str; +  data_type_t type = parse_data_type(opcode + oplength, +                                     WORD_SAFE_SUB(token.str_size, oplength)); +  if (type == DATA_TYPE_NIL) +    return PERR_EXPECTED_TYPE; +  ++stream->used; +  ret->opcode = get_typed_opcode(ret->opcode, type); +  return PERR_OK; +} + +perr_t parse_inst_with_operand(token_stream_t *stream, inst_t *ret) +{ +  // Parse operand +  perr_t word_parse_error = parse_word( +      TOKEN_STREAM_AT(stream->data, stream->used), &ret->operand.as_word); +  if (word_parse_error) +    return word_parse_error; +  ++stream->used; +  return PERR_OK;  } -inst_t parse_next_inst(token_stream_t *stream) +perr_t parse_inst_with_typed_operand(token_stream_t *stream, inst_t *ret, +                                     size_t oplength)  { -  token_t token = TOKEN_STREAM_AT(stream->data, stream->used); +  perr_t type_parse_error = parse_inst_with_type(stream, ret, oplength); +  if (type_parse_error) +    return type_parse_error; + +  // Parse operand +  perr_t word_parse_error = parse_word( +      TOKEN_STREAM_AT(stream->data, stream->used), &ret->operand.as_word); +  if (word_parse_error) +    return word_parse_error; +  ++stream->used; +  return PERR_OK; +} + +perr_t parse_next_inst(token_stream_t *stream, inst_t *ret) +{ +  const token_t token = TOKEN_STREAM_AT(stream->data, stream->used);    if (token.type != TOKEN_SYMBOL) -    // TODO Parser Error (EXPECTED_SYMBOL) -    return INST_NOOP; +    return PERR_EXPECTED_SYMBOL;    inst_t inst  = {0};    char *opcode = token.str; -  if (token.str_size >= 4 && strncmp(opcode, "HALT", 4) == 0) +  if (token.str_size == 4 && strncmp(opcode, "NOOP", 4) == 0)    { -    inst = INST_HALT; +    inst = INST_NOOP;      ++stream->used;    } -  else if (token.str_size >= 5 && strncmp(opcode, "PUSH.", 5) == 0) -  { -    data_type_t type = parse_data_type(opcode + 5, token.str_size - 5); -    inst.opcode      = get_typed_opcode(OP_PUSH_BYTE, type); -    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH)) -      // TODO: Parse error (EXPECTED_TYPE_TAG) -      return INST_NOOP; -    else if (stream->used == stream->available - 1) -      // TODO: Parse error (EXPECTED_OPERAND) -      return INST_NOOP; -    inst.operand = -        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); -    stream->used += 2; -  } -  else if (token.str_size >= 4 && strncmp(opcode, "POP.", 4) == 0) -  { -    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); -    inst.opcode      = get_typed_opcode(OP_POP_BYTE, type); -    if (!OPCODE_IS_TYPE(inst.opcode, OP_POP)) -      // TODO: Parse error (EXPECTED_TYPE_TAG) -      return INST_NOOP; +  else if (token.str_size == 4 && strncmp(opcode, "HALT", 4) == 0) +  { +    inst = INST_HALT;      ++stream->used;    } -  else if (token.str_size >= 9 && strncmp(opcode, "PUSH-REG.", 9) == 0) -  { -    data_type_t type = parse_data_type(opcode + 9, token.str_size - 9); -    inst.opcode      = get_typed_opcode(OP_PUSH_REGISTER_BYTE, type); -    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER)) -      // TODO: Parse error (EXPECTED_TYPE_TAG) -      return INST_NOOP; -    else if (stream->used == stream->available - 1) -      // TODO: Parse error (EXPECTED_OPERAND) -      return INST_NOOP; -    inst.operand = -        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); -    stream->used += 2; -  } -  else if (token.str_size >= 4 && strncmp(opcode, "MOV.", 4) == 0) -  { -    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); -    inst.opcode      = get_typed_opcode(OP_MOV_BYTE, type); -    if (!OPCODE_IS_TYPE(inst.opcode, OP_MOV)) -      // TODO: Parse error (EXPECTED_TYPE_TAG) -      return INST_NOOP; -    else if (stream->used == stream->available - 1) -      // TODO: Parse error (EXPECTED_OPERAND) -      return INST_NOOP; -    inst.operand = -        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); -    stream->used += 2; -  } -  else if (token.str_size >= 4 && strncmp(opcode, "DUP.", 4) == 0) -  { -    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4); -    inst.opcode      = get_typed_opcode(OP_DUP_BYTE, type); -    if (!OPCODE_IS_TYPE(inst.opcode, OP_DUP)) -      // TODO: Parse error (EXPECTED_TYPE_TAG) -      return INST_NOOP; -    else if (stream->used == stream->available - 1) -      // TODO: Parse error (EXPECTED_OPERAND) -      return INST_NOOP; -    inst.operand = -        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1))); -    stream->used += 2; +  else if (token.str_size >= 4 && strncmp(opcode, "PUSH", 4) == 0) +  { +    size_t oplen = 5; +    if (token.str_size >= 8 && strncmp(opcode, "PUSH-REG", 8) == 0) +    { +      oplen       = 9; +      ret->opcode = OP_PUSH_REGISTER_BYTE; +    } +    else +      ret->opcode = OP_PUSH_BYTE; +    return parse_inst_with_typed_operand(stream, ret, oplen);    } -  else +  else if (token.str_size >= 3 && strncmp(opcode, "POP", 3) == 0) +  { +    ret->opcode = OP_POP_BYTE; +    return parse_inst_with_type(stream, ret, 4); +  } +  else if (token.str_size >= 3 && strncmp(opcode, "MOV", 3) == 0) +  { +    ret->opcode = OP_MOV_BYTE; +    return parse_inst_with_typed_operand(stream, ret, 4); +  } +  else if (token.str_size >= 3 && strncmp(opcode, "DUP", 3) == 0)    { -    // TODO: Parse error (UNKNOWN_OPCODE) -    return INST_NOOP; +    ret->opcode = OP_DUP_BYTE; +    return parse_inst_with_typed_operand(stream, ret, 4);    } -  return inst; +  else if (token.str_size >= 3 && strncmp(opcode, "NOT", 3) == 0) +  { +    ret->opcode = OP_NOT_BYTE; +    return parse_inst_with_type(stream, ret, 4); +  } +  else if (token.str_size >= 2 && strncmp(opcode, "OR", 2) == 0) +  { +    ret->opcode = OP_OR_BYTE; +    return parse_inst_with_type(stream, ret, 3); +  } +  else if (token.str_size >= 3 && strncmp(opcode, "AND", 3) == 0) +  { +    ret->opcode = OP_AND_BYTE; +    return parse_inst_with_type(stream, ret, 4); +  } +  else if (token.str_size >= 3 && strncmp(opcode, "XOR", 3) == 0) +  { +    ret->opcode = OP_XOR_BYTE; +    return parse_inst_with_type(stream, ret, 4); +  } +  else if (token.str_size >= 2 && strncmp(opcode, "EQ", 2) == 0) +  { +    ret->opcode = OP_EQ_BYTE; +    return parse_inst_with_type(stream, ret, 3); +  } +  else if (token.str_size >= 4 && strncmp(opcode, "PLUS", 4) == 0) +  { +    ret->opcode = OP_PLUS_BYTE; +    return parse_inst_with_type(stream, ret, 5); +  } +  else if (token.str_size >= 6 && strncmp(opcode, "PRINT.", 6) == 0) +  { +    const char *type       = opcode + 6; +    const size_t type_size = WORD_SAFE_SUB(token.str_size, 6); +    if (type_size == 4 && strncmp(type, "CHAR", 4) == 0) +      inst.opcode = OP_PRINT_CHAR; +    else if (type_size == 4 && strncmp(type, "BYTE", 4) == 0) +      inst.opcode = OP_PRINT_BYTE; +    else if (type_size == 3 && strncmp(type, "INT", 3) == 0) +      inst.opcode = OP_PRINT_INT; +    else if (type_size == 5 && strncmp(type, "HWORD", 5) == 0) +      inst.opcode = OP_PRINT_HWORD; +    else if (type_size == 4 && strncmp(type, "LONG", 4) == 0) +      inst.opcode = OP_PRINT_LONG; +    else if (type_size == 4 && strncmp(type, "WORD", 4) == 0) +      inst.opcode = OP_PRINT_WORD; +    else +      return PERR_UNKNOWN_OPERATOR; +    ++stream->used; +  } +  else if (token.str_size >= 5 && strncmp(opcode, "JUMP.", 5) == 0) +  { +    const char *type       = opcode + 5; +    const size_t type_size = WORD_SAFE_SUB(token.str_size, 5); +    if (type_size == 3 && strncmp(type, "ABS", 3) == 0) +    { +      ret->opcode = OP_JUMP_ABS; +      ++stream->used; +      return parse_inst_with_operand(stream, ret); +    } +    else if (type_size == 5 && strncmp(type, "STACK", 5) == 0) +      inst.opcode = OP_JUMP_STACK; +    else if (type_size == 8 && strncmp(type, "REGISTER", 8) == 0) +    { +      ret->opcode = OP_JUMP_REGISTER; +      ++stream->used; +      return parse_inst_with_operand(stream, ret); +    } +    else if (type_size >= 2 && strncmp(type, "IF", 2) == 0) +    { +      // Parse a typed operand JUMP.IF.<TYPE> +      token_t prev    = TOKEN_STREAM_AT(stream->data, stream->used); +      size_t prev_ptr = stream->used; + +      TOKEN_STREAM_AT(stream->data, stream->used).str      = (char *)type; +      TOKEN_STREAM_AT(stream->data, stream->used).str_size = type_size; +      ret->opcode                                          = OP_JUMP_IF_BYTE; +      perr_t perr = parse_inst_with_typed_operand(stream, ret, 3); + +      TOKEN_STREAM_AT(stream->data, prev_ptr) = prev; +      return perr; +    } +    else +      return PERR_UNKNOWN_OPERATOR; +    ++stream->used; +  } +  else +    return PERR_UNKNOWN_OPERATOR; +  *ret = inst; +  return PERR_OK;  } -inst_t *parse_stream(token_stream_t *stream, size_t *size) +perr_t parse_stream(token_stream_t *stream, inst_t **ret, size_t *size)  {    darr_t instructions = {0};    darr_init(&instructions, sizeof(inst_t));    while (stream->used < stream->available)    { -    inst_t inst = parse_next_inst(stream); +    inst_t inst = INST_NOOP; +    perr_t err  = parse_next_inst(stream, &inst); +    if (err) +    { +      free(instructions.data); +      return err; +    }      darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t));    }    *size = instructions.used / sizeof(inst_t); -  return (inst_t *)instructions.data; +  *ret  = (inst_t *)instructions.data; +  return PERR_OK;  }  | 
