2 files changed, 221 insertions, 91 deletions
diff --git a/asm/parser.c b/asm/parser.c
index 5d96c72..cc97e1c 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -19,21 +19,41 @@
 
 #define OPCODE_ON_TYPE(BASE_CODE, TYPE)
 
+const char *perr_as_cstr(perr_t perr)
+{
+  switch (perr)
+  {
+  case PERR_OK:
+    return "OK";
+  case PERR_INTEGER_OVERFLOW:
+    return "INTEGER_OVERFLOW";
+  case PERR_NOT_A_NUMBER:
+    return "NOT_A_NUMBER";
+  case PERR_EXPECTED_TYPE:
+    return "EXPECTED_TYPE";
+  case PERR_EXPECTED_SYMBOL:
+    return "EXPECTED_SYMBOL";
+  case PERR_EXPECTED_OPERAND:
+    return "EXPECTED_OPERAND";
+  case PERR_UNKNOWN_OPERATOR:
+    return "UNKNOWN_OPERATOR";
+  default:
+    return "";
+  }
+}
+
 opcode_t get_typed_opcode(opcode_t base_code, data_type_t type)
 {
   switch (type)
   {
-  case DATA_TYPE_NIL:
-    // TODO: Parse error (Not something we should pass here)
-    return 0;
   case DATA_TYPE_BYTE:
     return base_code;
   case DATA_TYPE_HWORD:
     return base_code + 1;
   case DATA_TYPE_WORD:
     return base_code + 2;
+  case DATA_TYPE_NIL:
   default:
-    // TODO: Parse error (EXPECTED_TYPE_TAG)
     return 0;
   }
 }
@@ -50,7 +70,7 @@ data_type_t parse_data_type(const char *cstr, size_t length)
     return DATA_TYPE_NIL;
 }
 
-word parse_word(token_t token)
+perr_t parse_word(token_t token, word *ret)
 {
   assert(token.type == TOKEN_LITERAL_NUMBER);
   bool is_negative = token.str_size > 1 && token.str[0] == '-';
@@ -62,14 +82,13 @@ word parse_word(token_t token)
     // base.h
     int64_t i = strtoll(token.str, &end, 0);
     if (!(end && end[0] == '\0'))
-      // TODO: Parse error (NOT_A_NUMBER)
-      return 0;
+      return PERR_NOT_A_NUMBER;
     else if (errno == ERANGE)
     {
-      // TODO: Parse error (INTEGER_OVERFLOW)
       errno = 0;
-      return 0;
+      return PERR_INTEGER_OVERFLOW;
     }
+    // Copy bits, do not cast
     memcpy(&w, &i, sizeof(w));
   }
   else
@@ -77,113 +96,211 @@ word parse_word(token_t token)
     char *end = NULL;
     w         = strtoull(token.str, &end, 0);
     if (!(end && end[0] == '\0'))
-      // TODO: Parse error (NOT_A_NUMBER)
-      return 0;
+      return PERR_NOT_A_NUMBER;
     else if (errno == ERANGE)
     {
-      // TODO: Parse error (WORD_OVERFLOW)
       errno = 0;
-      return 0;
+      return PERR_INTEGER_OVERFLOW;
     }
   }
-  return w;
+  *ret = w;
+  return PERR_OK;
+}
+
+perr_t parse_inst_with_type(token_stream_t *stream, inst_t *ret,
+                            size_t oplength)
+{
+  // Assume the base type OP_*_BYTE is in ret->opcode
+  token_t token    = TOKEN_STREAM_AT(stream->data, stream->used);
+  char *opcode     = token.str;
+  data_type_t type = parse_data_type(opcode + oplength,
+                                     WORD_SAFE_SUB(token.str_size, oplength));
+  if (type == DATA_TYPE_NIL)
+    return PERR_EXPECTED_TYPE;
+  ++stream->used;
+  ret->opcode = get_typed_opcode(ret->opcode, type);
+  return PERR_OK;
+}
+
+perr_t parse_inst_with_operand(token_stream_t *stream, inst_t *ret)
+{
+  // Parse operand
+  perr_t word_parse_error = parse_word(
+      TOKEN_STREAM_AT(stream->data, stream->used), &ret->operand.as_word);
+  if (word_parse_error)
+    return word_parse_error;
+  ++stream->used;
+  return PERR_OK;
 }
 
-inst_t parse_next_inst(token_stream_t *stream)
+perr_t parse_inst_with_typed_operand(token_stream_t *stream, inst_t *ret,
+                                     size_t oplength)
 {
-  token_t token = TOKEN_STREAM_AT(stream->data, stream->used);
+  perr_t type_parse_error = parse_inst_with_type(stream, ret, oplength);
+  if (type_parse_error)
+    return type_parse_error;
+
+  // Parse operand
+  perr_t word_parse_error = parse_word(
+      TOKEN_STREAM_AT(stream->data, stream->used), &ret->operand.as_word);
+  if (word_parse_error)
+    return word_parse_error;
+  ++stream->used;
+  return PERR_OK;
+}
+
+perr_t parse_next_inst(token_stream_t *stream, inst_t *ret)
+{
+  const token_t token = TOKEN_STREAM_AT(stream->data, stream->used);
   if (token.type != TOKEN_SYMBOL)
-    // TODO Parser Error (EXPECTED_SYMBOL)
-    return INST_NOOP;
+    return PERR_EXPECTED_SYMBOL;
   inst_t inst  = {0};
   char *opcode = token.str;
-  if (token.str_size >= 4 && strncmp(opcode, "HALT", 4) == 0)
+  if (token.str_size == 4 && strncmp(opcode, "NOOP", 4) == 0)
   {
-    inst = INST_HALT;
+    inst = INST_NOOP;
     ++stream->used;
   }
-  else if (token.str_size >= 5 && strncmp(opcode, "PUSH.", 5) == 0)
-  {
-    data_type_t type = parse_data_type(opcode + 5, token.str_size - 5);
-    inst.opcode      = get_typed_opcode(OP_PUSH_BYTE, type);
-    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH))
-      // TODO: Parse error (EXPECTED_TYPE_TAG)
-      return INST_NOOP;
-    else if (stream->used == stream->available - 1)
-      // TODO: Parse error (EXPECTED_OPERAND)
-      return INST_NOOP;
-    inst.operand =
-        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
-    stream->used += 2;
-  }
-  else if (token.str_size >= 4 && strncmp(opcode, "POP.", 4) == 0)
-  {
-    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4);
-    inst.opcode      = get_typed_opcode(OP_POP_BYTE, type);
-    if (!OPCODE_IS_TYPE(inst.opcode, OP_POP))
-      // TODO: Parse error (EXPECTED_TYPE_TAG)
-      return INST_NOOP;
+  else if (token.str_size == 4 && strncmp(opcode, "HALT", 4) == 0)
+  {
+    inst = INST_HALT;
     ++stream->used;
   }
-  else if (token.str_size >= 9 && strncmp(opcode, "PUSH-REG.", 9) == 0)
-  {
-    data_type_t type = parse_data_type(opcode + 9, token.str_size - 9);
-    inst.opcode      = get_typed_opcode(OP_PUSH_REGISTER_BYTE, type);
-    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER))
-      // TODO: Parse error (EXPECTED_TYPE_TAG)
-      return INST_NOOP;
-    else if (stream->used == stream->available - 1)
-      // TODO: Parse error (EXPECTED_OPERAND)
-      return INST_NOOP;
-    inst.operand =
-        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
-    stream->used += 2;
-  }
-  else if (token.str_size >= 4 && strncmp(opcode, "MOV.", 4) == 0)
-  {
-    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4);
-    inst.opcode      = get_typed_opcode(OP_MOV_BYTE, type);
-    if (!OPCODE_IS_TYPE(inst.opcode, OP_MOV))
-      // TODO: Parse error (EXPECTED_TYPE_TAG)
-      return INST_NOOP;
-    else if (stream->used == stream->available - 1)
-      // TODO: Parse error (EXPECTED_OPERAND)
-      return INST_NOOP;
-    inst.operand =
-        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
-    stream->used += 2;
-  }
-  else if (token.str_size >= 4 && strncmp(opcode, "DUP.", 4) == 0)
-  {
-    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4);
-    inst.opcode      = get_typed_opcode(OP_DUP_BYTE, type);
-    if (!OPCODE_IS_TYPE(inst.opcode, OP_DUP))
-      // TODO: Parse error (EXPECTED_TYPE_TAG)
-      return INST_NOOP;
-    else if (stream->used == stream->available - 1)
-      // TODO: Parse error (EXPECTED_OPERAND)
-      return INST_NOOP;
-    inst.operand =
-        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
-    stream->used += 2;
+  else if (token.str_size >= 4 && strncmp(opcode, "PUSH", 4) == 0)
+  {
+    size_t oplen = 5;
+    if (token.str_size >= 8 && strncmp(opcode, "PUSH-REG", 8) == 0)
+    {
+      oplen       = 9;
+      ret->opcode = OP_PUSH_REGISTER_BYTE;
+    }
+    else
+      ret->opcode = OP_PUSH_BYTE;
+    return parse_inst_with_typed_operand(stream, ret, oplen);
   }
-  else
+  else if (token.str_size >= 3 && strncmp(opcode, "POP", 3) == 0)
+  {
+    ret->opcode = OP_POP_BYTE;
+    return parse_inst_with_type(stream, ret, 4);
+  }
+  else if (token.str_size >= 3 && strncmp(opcode, "MOV", 3) == 0)
+  {
+    ret->opcode = OP_MOV_BYTE;
+    return parse_inst_with_typed_operand(stream, ret, 4);
+  }
+  else if (token.str_size >= 3 && strncmp(opcode, "DUP", 3) == 0)
   {
-    // TODO: Parse error (UNKNOWN_OPCODE)
-    return INST_NOOP;
+    ret->opcode = OP_DUP_BYTE;
+    return parse_inst_with_typed_operand(stream, ret, 4);
   }
-  return inst;
+  else if (token.str_size >= 3 && strncmp(opcode, "NOT", 3) == 0)
+  {
+    ret->opcode = OP_NOT_BYTE;
+    return parse_inst_with_type(stream, ret, 4);
+  }
+  else if (token.str_size >= 2 && strncmp(opcode, "OR", 2) == 0)
+  {
+    ret->opcode = OP_OR_BYTE;
+    return parse_inst_with_type(stream, ret, 3);
+  }
+  else if (token.str_size >= 3 && strncmp(opcode, "AND", 3) == 0)
+  {
+    ret->opcode = OP_AND_BYTE;
+    return parse_inst_with_type(stream, ret, 4);
+  }
+  else if (token.str_size >= 3 && strncmp(opcode, "XOR", 3) == 0)
+  {
+    ret->opcode = OP_XOR_BYTE;
+    return parse_inst_with_type(stream, ret, 4);
+  }
+  else if (token.str_size >= 2 && strncmp(opcode, "EQ", 2) == 0)
+  {
+    ret->opcode = OP_EQ_BYTE;
+    return parse_inst_with_type(stream, ret, 3);
+  }
+  else if (token.str_size >= 4 && strncmp(opcode, "PLUS", 4) == 0)
+  {
+    ret->opcode = OP_PLUS_BYTE;
+    return parse_inst_with_type(stream, ret, 5);
+  }
+  else if (token.str_size >= 6 && strncmp(opcode, "PRINT.", 6) == 0)
+  {
+    const char *type       = opcode + 6;
+    const size_t type_size = WORD_SAFE_SUB(token.str_size, 6);
+    if (type_size == 4 && strncmp(type, "CHAR", 4) == 0)
+      inst.opcode = OP_PRINT_CHAR;
+    else if (type_size == 4 && strncmp(type, "BYTE", 4) == 0)
+      inst.opcode = OP_PRINT_BYTE;
+    else if (type_size == 3 && strncmp(type, "INT", 3) == 0)
+      inst.opcode = OP_PRINT_INT;
+    else if (type_size == 5 && strncmp(type, "HWORD", 5) == 0)
+      inst.opcode = OP_PRINT_HWORD;
+    else if (type_size == 4 && strncmp(type, "LONG", 4) == 0)
+      inst.opcode = OP_PRINT_LONG;
+    else if (type_size == 4 && strncmp(type, "WORD", 4) == 0)
+      inst.opcode = OP_PRINT_WORD;
+    else
+      return PERR_UNKNOWN_OPERATOR;
+    ++stream->used;
+  }
+  else if (token.str_size >= 5 && strncmp(opcode, "JUMP.", 5) == 0)
+  {
+    const char *type       = opcode + 5;
+    const size_t type_size = WORD_SAFE_SUB(token.str_size, 5);
+    if (type_size == 3 && strncmp(type, "ABS", 3) == 0)
+    {
+      ret->opcode = OP_JUMP_ABS;
+      ++stream->used;
+      return parse_inst_with_operand(stream, ret);
+    }
+    else if (type_size == 5 && strncmp(type, "STACK", 5) == 0)
+      inst.opcode = OP_JUMP_STACK;
+    else if (type_size == 8 && strncmp(type, "REGISTER", 8) == 0)
+    {
+      ret->opcode = OP_JUMP_REGISTER;
+      ++stream->used;
+      return parse_inst_with_operand(stream, ret);
+    }
+    else if (type_size >= 2 && strncmp(type, "IF", 2) == 0)
+    {
+      // Parse a typed operand JUMP.IF.<TYPE>
+      token_t prev    = TOKEN_STREAM_AT(stream->data, stream->used);
+      size_t prev_ptr = stream->used;
+
+      TOKEN_STREAM_AT(stream->data, stream->used).str      = (char *)type;
+      TOKEN_STREAM_AT(stream->data, stream->used).str_size = type_size;
+      ret->opcode                                          = OP_JUMP_IF_BYTE;
+      perr_t perr = parse_inst_with_typed_operand(stream, ret, 3);
+
+      TOKEN_STREAM_AT(stream->data, prev_ptr) = prev;
+      return perr;
+    }
+    else
+      return PERR_UNKNOWN_OPERATOR;
+    ++stream->used;
+  }
+  else
+    return PERR_UNKNOWN_OPERATOR;
+  *ret = inst;
+  return PERR_OK;
 }
 
-inst_t *parse_stream(token_stream_t *stream, size_t *size)
+perr_t parse_stream(token_stream_t *stream, inst_t **ret, size_t *size)
 {
   darr_t instructions = {0};
   darr_init(&instructions, sizeof(inst_t));
   while (stream->used < stream->available)
   {
-    inst_t inst = parse_next_inst(stream);
+    inst_t inst = INST_NOOP;
+    perr_t err  = parse_next_inst(stream, &inst);
+    if (err)
+    {
+      free(instructions.data);
+      return err;
+    }
     darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t));
   }
   *size = instructions.used / sizeof(inst_t);
-  return (inst_t *)instructions.data;
+  *ret  = (inst_t *)instructions.data;
+  return PERR_OK;
 }
diff --git a/asm/parser.h b/asm/parser.h
index 09a11d4..9da1117 100644
--- a/asm/parser.h
+++ b/asm/parser.h
@@ -17,7 +17,20 @@
 
 #include <vm/inst.h>
 
-inst_t parse_next_inst(token_stream_t *);
-inst_t *parse_stream(token_stream_t *, size_t *);
+typedef enum
+{
+  PERR_OK = 0,
+  PERR_INTEGER_OVERFLOW,
+  PERR_NOT_A_NUMBER,
+  PERR_EXPECTED_TYPE,
+  PERR_EXPECTED_SYMBOL,
+  PERR_EXPECTED_OPERAND,
+  PERR_UNKNOWN_OPERATOR,
+} perr_t;
+
+const char *perr_as_cstr(perr_t);
+
+perr_t parse_next_inst(token_stream_t *, inst_t *);
+perr_t parse_stream(token_stream_t *, inst_t **, size_t *);
 
 #endif