From 2fe2af22a9b7b2be36d51e24797330c5b6cdbb13 Mon Sep 17 00:00:00 2001
From: Aryadev Chavali <aryadev@aryadevchavali.com>
Date: Thu, 26 Oct 2023 11:17:55 +0100
Subject: Implemented a rudimentary parser with support for 4 instruction types

---
 Makefile     |   2 +-
 asm/parser.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e1fa63c..ae48255 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ VM_OUT=$(DIST)/ovm.out
 ## ASSEMBLY setup
 ASM_DIST=$(DIST)/asm
 ASM_SRC=asm
-ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.c)
+ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.c parser.c)
 ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.c=$(ASM_DIST)/%.o)
 ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d
 ASM_CFLAGS=$(CFLAGS)
diff --git a/asm/parser.c b/asm/parser.c
index 5521585..5d96c72 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -9,3 +9,181 @@
  * Author: Aryadev Chavali
  * Description: Parser for assembly language
  */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "./parser.h"
+
+#define OPCODE_ON_TYPE(BASE_CODE, TYPE)
+
+opcode_t get_typed_opcode(opcode_t base_code, data_type_t type)
+{
+  switch (type)
+  {
+  case DATA_TYPE_NIL:
+    // TODO: Parse error (Not something we should pass here)
+    return 0;
+  case DATA_TYPE_BYTE:
+    return base_code;
+  case DATA_TYPE_HWORD:
+    return base_code + 1;
+  case DATA_TYPE_WORD:
+    return base_code + 2;
+  default:
+    // TODO: Parse error (EXPECTED_TYPE_TAG)
+    return 0;
+  }
+}
+
+data_type_t parse_data_type(const char *cstr, size_t length)
+{
+  if (length >= 4 && strncmp(cstr, "BYTE", 4) == 0)
+    return DATA_TYPE_BYTE;
+  else if (length >= 5 && strncmp(cstr, "HWORD", 5) == 0)
+    return DATA_TYPE_HWORD;
+  else if (length >= 4 && strncmp(cstr, "WORD", 4) == 0)
+    return DATA_TYPE_WORD;
+  else
+    return DATA_TYPE_NIL;
+}
+
+word parse_word(token_t token)
+{
+  assert(token.type == TOKEN_LITERAL_NUMBER);
+  bool is_negative = token.str_size > 1 && token.str[0] == '-';
+  word w           = 0;
+  if (is_negative)
+  {
+    char *end = NULL;
+    // TODO: Make a standardised type of the same size as word in
+    // base.h
+    int64_t i = strtoll(token.str, &end, 0);
+    if (!(end && end[0] == '\0'))
+      // TODO: Parse error (NOT_A_NUMBER)
+      return 0;
+    else if (errno == ERANGE)
+    {
+      // TODO: Parse error (INTEGER_OVERFLOW)
+      errno = 0;
+      return 0;
+    }
+    memcpy(&w, &i, sizeof(w));
+  }
+  else
+  {
+    char *end = NULL;
+    w         = strtoull(token.str, &end, 0);
+    if (!(end && end[0] == '\0'))
+      // TODO: Parse error (NOT_A_NUMBER)
+      return 0;
+    else if (errno == ERANGE)
+    {
+      // TODO: Parse error (WORD_OVERFLOW)
+      errno = 0;
+      return 0;
+    }
+  }
+  return w;
+}
+
+inst_t parse_next_inst(token_stream_t *stream)
+{
+  token_t token = TOKEN_STREAM_AT(stream->data, stream->used);
+  if (token.type != TOKEN_SYMBOL)
+    // TODO Parser Error (EXPECTED_SYMBOL)
+    return INST_NOOP;
+  inst_t inst  = {0};
+  char *opcode = token.str;
+  if (token.str_size >= 4 && strncmp(opcode, "HALT", 4) == 0)
+  {
+    inst = INST_HALT;
+    ++stream->used;
+  }
+  else if (token.str_size >= 5 && strncmp(opcode, "PUSH.", 5) == 0)
+  {
+    data_type_t type = parse_data_type(opcode + 5, token.str_size - 5);
+    inst.opcode      = get_typed_opcode(OP_PUSH_BYTE, type);
+    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH))
+      // TODO: Parse error (EXPECTED_TYPE_TAG)
+      return INST_NOOP;
+    else if (stream->used == stream->available - 1)
+      // TODO: Parse error (EXPECTED_OPERAND)
+      return INST_NOOP;
+    inst.operand =
+        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
+    stream->used += 2;
+  }
+  else if (token.str_size >= 4 && strncmp(opcode, "POP.", 4) == 0)
+  {
+    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4);
+    inst.opcode      = get_typed_opcode(OP_POP_BYTE, type);
+    if (!OPCODE_IS_TYPE(inst.opcode, OP_POP))
+      // TODO: Parse error (EXPECTED_TYPE_TAG)
+      return INST_NOOP;
+    ++stream->used;
+  }
+  else if (token.str_size >= 9 && strncmp(opcode, "PUSH-REG.", 9) == 0)
+  {
+    data_type_t type = parse_data_type(opcode + 9, token.str_size - 9);
+    inst.opcode      = get_typed_opcode(OP_PUSH_REGISTER_BYTE, type);
+    if (!OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER))
+      // TODO: Parse error (EXPECTED_TYPE_TAG)
+      return INST_NOOP;
+    else if (stream->used == stream->available - 1)
+      // TODO: Parse error (EXPECTED_OPERAND)
+      return INST_NOOP;
+    inst.operand =
+        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
+    stream->used += 2;
+  }
+  else if (token.str_size >= 4 && strncmp(opcode, "MOV.", 4) == 0)
+  {
+    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4);
+    inst.opcode      = get_typed_opcode(OP_MOV_BYTE, type);
+    if (!OPCODE_IS_TYPE(inst.opcode, OP_MOV))
+      // TODO: Parse error (EXPECTED_TYPE_TAG)
+      return INST_NOOP;
+    else if (stream->used == stream->available - 1)
+      // TODO: Parse error (EXPECTED_OPERAND)
+      return INST_NOOP;
+    inst.operand =
+        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
+    stream->used += 2;
+  }
+  else if (token.str_size >= 4 && strncmp(opcode, "DUP.", 4) == 0)
+  {
+    data_type_t type = parse_data_type(opcode + 4, token.str_size - 4);
+    inst.opcode      = get_typed_opcode(OP_DUP_BYTE, type);
+    if (!OPCODE_IS_TYPE(inst.opcode, OP_DUP))
+      // TODO: Parse error (EXPECTED_TYPE_TAG)
+      return INST_NOOP;
+    else if (stream->used == stream->available - 1)
+      // TODO: Parse error (EXPECTED_OPERAND)
+      return INST_NOOP;
+    inst.operand =
+        DWORD(parse_word(TOKEN_STREAM_AT(stream->data, stream->used + 1)));
+    stream->used += 2;
+  }
+  else
+  {
+    // TODO: Parse error (UNKNOWN_OPCODE)
+    return INST_NOOP;
+  }
+  return inst;
+}
+
+inst_t *parse_stream(token_stream_t *stream, size_t *size)
+{
+  darr_t instructions = {0};
+  darr_init(&instructions, sizeof(inst_t));
+  while (stream->used < stream->available)
+  {
+    inst_t inst = parse_next_inst(stream);
+    darr_append_bytes(&instructions, (byte *)&inst, sizeof(inst_t));
+  }
+  *size = instructions.used / sizeof(inst_t);
+  return (inst_t *)instructions.data;
+}
-- 
cgit v1.2.3-13-gbd6f