aboutsummaryrefslogtreecommitdiff
path: root/asm
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 02:45:48 +0630
committerAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 02:45:48 +0630
commit0ebbf3ca751e638a90cf886625992bb028f9b587 (patch)
tree626e1cbddb50f0ec7d5fb8e93c4a3adfa9943349 /asm
parentb7a40f4ab0fc5c0f6b68f24437f479a29e72c9af (diff)
downloadovm-0ebbf3ca751e638a90cf886625992bb028f9b587.tar.gz
ovm-0ebbf3ca751e638a90cf886625992bb028f9b587.tar.bz2
ovm-0ebbf3ca751e638a90cf886625992bb028f9b587.zip
Start writing assembler in C++
Best language to use as it's already compatible with the headers I'm using and can pretty neatly enter the build system while also using the functions I've built for converting to and from bytecode!
Diffstat (limited to 'asm')
-rw-r--r--asm/lexer.c623
-rw-r--r--asm/lexer.h91
-rw-r--r--asm/main.c148
-rw-r--r--asm/main.cpp19
-rw-r--r--asm/parser.c935
-rw-r--r--asm/parser.h104
6 files changed, 19 insertions, 1901 deletions
diff --git a/asm/lexer.c b/asm/lexer.c
deleted file mode 100644
index a4905fb..0000000
--- a/asm/lexer.c
+++ /dev/null
@@ -1,623 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Lexer for assembly language
- */
-
-#include <assert.h>
-#include <ctype.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <lib/inst.h>
-
-#include "./lexer.h"
-
-const char *token_type_as_cstr(token_type_t type)
-{
- switch (type)
- {
- case TOKEN_PP_USE:
- return "PP_USE";
- case TOKEN_PP_CONST:
- return "PP_CONST";
- case TOKEN_PP_END:
- return "PP_END";
- case TOKEN_PP_REFERENCE:
- return "PP_REFERENCE";
- case TOKEN_GLOBAL:
- return "GLOBAL";
- case TOKEN_STAR:
- return "STAR";
- case TOKEN_LITERAL_STRING:
- return "LITERAL_STRING";
- case TOKEN_LITERAL_NUMBER:
- return "LITERAL_NUMBER";
- case TOKEN_LITERAL_CHAR:
- return "LITERAL_CHAR";
- case TOKEN_NOOP:
- return "NOOP";
- case TOKEN_HALT:
- return "HALT";
- case TOKEN_PUSH:
- return "PUSH";
- case TOKEN_POP:
- return "POP";
- case TOKEN_PUSH_REG:
- return "PUSH_REG";
- case TOKEN_MOV:
- return "MOV";
- case TOKEN_DUP:
- return "DUP";
- case TOKEN_MALLOC:
- return "MALLOC";
- case TOKEN_MALLOC_STACK:
- return "MALLOC_STACK";
- case TOKEN_MSET:
- return "MSET";
- case TOKEN_MSET_STACK:
- return "MSET_STACK";
- case TOKEN_MGET:
- return "MGET";
- case TOKEN_MGET_STACK:
- return "MGET_STACK";
- case TOKEN_MDELETE:
- return "MDELETE";
- case TOKEN_MSIZE:
- return "MSIZE";
- case TOKEN_NOT:
- return "NOT";
- case TOKEN_OR:
- return "OR";
- case TOKEN_AND:
- return "AND";
- case TOKEN_XOR:
- return "XOR";
- case TOKEN_EQ:
- return "EQ";
- case TOKEN_LT:
- return "LT";
- case TOKEN_LTE:
- return "LTE";
- case TOKEN_GT:
- return "GT";
- case TOKEN_GTE:
- return "GTE";
- case TOKEN_PLUS:
- return "PLUS";
- case TOKEN_SUB:
- return "SUB";
- case TOKEN_MULT:
- return "MULT";
- case TOKEN_PRINT:
- return "PRINT";
- case TOKEN_JUMP_ABS:
- return "JUMP_ABS";
- case TOKEN_JUMP_STACK:
- return "JUMP_STACK";
- case TOKEN_JUMP_IF:
- return "JUMP_IF";
- case TOKEN_CALL:
- return "CALL";
- case TOKEN_CALL_STACK:
- return "CALL_STACK";
- case TOKEN_RET:
- return "RET";
- case TOKEN_SYMBOL:
- return "SYMBOL";
- }
- return "";
-}
-
-const char *lerr_as_cstr(lerr_t lerr)
-{
- switch (lerr)
- {
- case LERR_OK:
- return "OK";
- case LERR_INVALID_CHAR_LITERAL:
- return "INVALID_CHAR_LITERAL";
- case LERR_INVALID_PREPROCESSOR_DIRECTIVE:
- return "INVALID_PREPROCESSOR_DIRECTIVE";
- }
- return "";
-}
-
-token_t token_copy(token_t t)
-{
- token_t new = t;
- new.str = malloc(t.str_size + 1);
- memcpy(new.str, t.str, t.str_size);
- new.str[t.str_size] = '\0';
- return new;
-}
-
-size_t space_left(buffer_t *buffer)
-{
- if (buffer->available == buffer->used)
- return 0;
- return buffer->available - 1 - buffer->used;
-}
-
-char uppercase(char c)
-{
- if (c >= 'a' && c <= 'z')
- return (c - 'a') + 'A';
- return c;
-}
-
-bool is_symbol(char c)
-{
- return isalpha(c) || isdigit(c) || c == '-' || c == '_' || c == '.' ||
- c == ':' || c == '(' || c == ')' || c == '%' || c == '$';
-}
-
-bool is_valid_hex_char(char c)
-{
- return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
- (c >= 'A' && c <= 'F');
-}
-
-lerr_t tokenise_symbol(buffer_t *buffer, size_t *column, token_t *token)
-{
- static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!");
-
- size_t sym_size = 0;
- for (; sym_size < space_left(buffer) &&
- is_symbol(buffer->data[buffer->used + sym_size]);
- ++sym_size)
- buffer->data[buffer->used + sym_size] =
- uppercase(buffer->data[buffer->used + sym_size]);
-
- token_t ret = {0};
- char *opcode = (char *)buffer->data + buffer->used;
-
- bool is_opcode = true;
- token_type_t type = 0;
- size_t offset = 0;
-
- if (sym_size > 1 && strncmp(opcode, "%", 1) == 0)
- {
- // Some preprocessing directive
- if (sym_size > 6 && strncmp(opcode + 1, "CONST", 5) == 0)
- {
- type = TOKEN_PP_CONST;
- offset = 6;
- }
- else if (sym_size == 4 && strncmp(opcode + 1, "USE", 3) == 0)
- {
- type = TOKEN_PP_USE;
- offset = 4;
- }
- else if (sym_size == 4 && strncmp(opcode + 1, "END", 3) == 0)
- {
- type = TOKEN_PP_END;
- offset = 4;
- }
- else
- return LERR_INVALID_PREPROCESSOR_DIRECTIVE;
- }
- else if (sym_size > 1 && strncmp(opcode, "$", 1) == 0)
- {
- // A reference to a preprocessing constant
- offset = 1;
- type = TOKEN_PP_REFERENCE;
- }
- else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0)
- {
- offset = 4;
- type = TOKEN_NOOP;
- }
- else if (sym_size == 4 && strncmp(opcode, "HALT", 4) == 0)
- {
- offset = 4;
- type = TOKEN_HALT;
- }
- else if (sym_size > 9 && strncmp(opcode, "PUSH.REG.", 9) == 0)
- {
- offset = 9;
- type = TOKEN_PUSH_REG;
- }
- else if (sym_size > 5 && strncmp(opcode, "PUSH.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_PUSH;
- }
- else if (sym_size > 4 && strncmp(opcode, "POP.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_POP;
- }
- else if (sym_size > 4 && strncmp(opcode, "MOV.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_MOV;
- }
- else if (sym_size > 4 && strncmp(opcode, "DUP.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_DUP;
- }
- else if (sym_size > 13 && strncmp(opcode, "MALLOC.STACK.", 13) == 0)
- {
- offset = 13;
- type = TOKEN_MALLOC_STACK;
- }
- else if (sym_size > 7 && strncmp(opcode, "MALLOC.", 7) == 0)
- {
- offset = 7;
- type = TOKEN_MALLOC;
- }
- else if (sym_size > 11 && strncmp(opcode, "MSET.STACK.", 11) == 0)
- {
- offset = 11;
- type = TOKEN_MSET_STACK;
- }
- else if (sym_size > 5 && strncmp(opcode, "MSET.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MSET;
- }
- else if (sym_size > 11 && strncmp(opcode, "MGET.STACK.", 11) == 0)
- {
- offset = 11;
- type = TOKEN_MGET_STACK;
- }
- else if (sym_size > 5 && strncmp(opcode, "MGET.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MGET;
- }
- else if (sym_size == 7 && strncmp(opcode, "MDELETE", 7) == 0)
- {
- offset = 7;
- type = TOKEN_MDELETE;
- }
- else if (sym_size == 5 && strncmp(opcode, "MSIZE", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MSIZE;
- }
- else if (sym_size > 4 && strncmp(opcode, "NOT.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_NOT;
- }
- else if (sym_size > 3 && strncmp(opcode, "OR.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_OR;
- }
- else if (sym_size > 4 && strncmp(opcode, "AND.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_AND;
- }
- else if (sym_size > 4 && strncmp(opcode, "XOR.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_XOR;
- }
- else if (sym_size >= 3 && strncmp(opcode, "EQ.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_EQ;
- }
- else if (sym_size > 4 && strncmp(opcode, "LTE.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_LTE;
- }
- else if (sym_size > 3 && strncmp(opcode, "LT.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_LT;
- }
- else if (sym_size > 4 && strncmp(opcode, "GTE.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_GTE;
- }
- else if (sym_size > 3 && strncmp(opcode, "GT.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_GT;
- }
- else if (sym_size > 4 && strncmp(opcode, "SUB.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_SUB;
- }
- else if (sym_size > 5 && strncmp(opcode, "PLUS.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_PLUS;
- }
- else if (sym_size > 5 && strncmp(opcode, "MULT.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MULT;
- }
- else if (sym_size > 6 && strncmp(opcode, "PRINT.", 6) == 0)
- {
- offset = 6;
- type = TOKEN_PRINT;
- }
- else if (sym_size == 8 && strncmp(opcode, "JUMP.ABS", 8) == 0)
- {
- offset = 8;
- type = TOKEN_JUMP_ABS;
- }
- else if (sym_size == 10 && strncmp(opcode, "JUMP.STACK", 10) == 0)
- {
- offset = 10;
- type = TOKEN_JUMP_STACK;
- }
- else if (sym_size > 8 && strncmp(opcode, "JUMP.IF.", 8) == 0)
- {
- offset = 8;
- type = TOKEN_JUMP_IF;
- }
- else if (sym_size == 10 && strncmp(opcode, "CALL.STACK", 10) == 0)
- {
- offset = 10;
- type = TOKEN_CALL_STACK;
- }
- else if (sym_size == 4 && strncmp(opcode, "CALL", 4) == 0)
- {
- offset = 4;
- type = TOKEN_CALL;
- }
- else if (sym_size == 3 && strncmp(opcode, "RET", 3) == 0)
- {
- offset = 3;
- type = TOKEN_RET;
- }
- else if (sym_size == 6 && strncmp(opcode, "GLOBAL", 6) == 0)
- {
- offset = 6;
- type = TOKEN_GLOBAL;
- }
- else
- is_opcode = false;
-
- if (!is_opcode)
- {
- // Just a symbol, so no further manipulation
- char *sym = malloc(sym_size + 1);
- memcpy(sym, opcode, sym_size);
- sym[sym_size] = '\0';
- ret = (token_t){.type = TOKEN_SYMBOL,
- .str = sym,
- .column = *column,
- .str_size = sym_size};
- }
- else
- {
- ret.type = type;
- ret.column = *column;
- if (offset == sym_size)
- {
- // There's no more to the string
- ret.str = malloc(1);
- ret.str[0] = '\0';
- }
- else
- {
- // t.str is the remaining part of the string after the
- // opcode
- ret.str = calloc(sym_size - offset + 1, 1);
- memcpy(ret.str, opcode + offset, sym_size - offset);
- ret.str[sym_size - offset] = '\0';
- }
- ret.str_size = sym_size - offset;
- }
- *column += sym_size - 1;
- buffer->used += sym_size;
- *token = ret;
- return LERR_OK;
-}
-
-token_t tokenise_number_literal(buffer_t *buffer, size_t *column)
-{
- token_t token = {
- .type = TOKEN_LITERAL_NUMBER, .str_size = 0, .column = *column};
- if (buffer->data[buffer->used] == '-')
- ++token.str_size;
- for (; token.str_size < space_left(buffer) &&
- isdigit(buffer->data[buffer->used + token.str_size]);
- ++token.str_size)
- continue;
- token.str = calloc(token.str_size + 1, 1);
- memcpy(token.str, buffer->data + buffer->used, token.str_size);
- token.str[token.str_size] = '\0';
- buffer->used += token.str_size;
- *column += token.str_size;
- return token;
-}
-
-token_t tokenise_hex_literal(buffer_t *buffer, size_t *column)
-{
- // For the x part of the literal
- ++buffer->used;
- token_t token = {
- .type = TOKEN_LITERAL_NUMBER, .str_size = 0, .column = *column};
- for (; token.str_size < space_left(buffer) &&
- is_valid_hex_char(buffer->data[buffer->used + token.str_size]);
- ++token.str_size)
- continue;
- // Setup a proper C hex literal
- token.str = calloc(token.str_size + 3, 1);
- token.str[0] = '0';
- token.str[1] = 'x';
- memcpy(token.str + 2, buffer->data + buffer->used, token.str_size);
- token.str[token.str_size + 2] = '\0';
- buffer->used += token.str_size;
- *column += token.str_size;
-
- // Setup the first two characters
- token.str_size += 2;
- return token;
-}
-
-token_t tokenise_char_literal(buffer_t *buffer, size_t *column)
-{
- token_t token = {
- .type = TOKEN_LITERAL_CHAR, .str_size = 1, .column = *column};
- token.str = calloc(2, 1);
- token.str[0] = buffer->data[buffer->used + 1];
- token.str[1] = '\0';
- buffer->used += 3;
- *column += 3;
- return token;
-}
-
-token_t tokenise_string_literal(buffer_t *buffer, size_t *column)
-{
- ++buffer->used;
- size_t string_size;
- for (string_size = 0; string_size + buffer->used < buffer->available &&
- buffer->data[buffer->used + string_size] != '\"';
- ++string_size)
- continue;
- token_t t = {.type = TOKEN_LITERAL_STRING,
- .column = *column,
- .str = malloc(string_size + 1),
- .str_size = string_size};
- memcpy(t.str, buffer->data + buffer->used, string_size);
- t.str[string_size] = '\0';
- *column += string_size + 1;
- buffer->used += string_size + 1;
- return t;
-}
-
-lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr)
-{
- size_t column = 0, line = 1;
- token_stream_t tokens = {0};
- darr_init(&tokens, sizeof(token_t));
- while (space_left(buffer) != 0)
- {
- bool is_token = true;
- token_t t = {0};
- char c = buffer->data[buffer->used];
- if (isspace(c) || c == '\0')
- {
- // Clean whitespace
- for (; space_left(buffer) > 0 && (isspace(c) || c == '\0');
- ++buffer->used, c = buffer->data[buffer->used])
- {
- ++column;
- if (c == '\n')
- {
- column = 0;
- ++line;
- }
- }
- ++column;
- is_token = false;
- }
- else if (c == ';')
- {
- // Start lexing at next line
- for (; space_left(buffer) > 0 && c != '\n';
- ++buffer->used, c = buffer->data[buffer->used])
- continue;
- column = 0;
- ++line;
- ++buffer->used;
- is_token = false;
- }
- else if (c == '*')
- {
- t = (token_t){.type = TOKEN_STAR,
- .column = column,
- .str = malloc(1),
- .str_size = 1};
- t.str[0] = '\0';
- ++buffer->used;
- }
- else if (c == '\"')
- t = tokenise_string_literal(buffer, &column);
- else if (isdigit(c) || (space_left(buffer) > 1 && c == '-' &&
- isdigit(buffer->data[buffer->used + 1])))
- t = tokenise_number_literal(buffer, &column);
- else if (c == 'x' && space_left(buffer) > 1 &&
- is_valid_hex_char(buffer->data[buffer->used + 1]))
- t = tokenise_hex_literal(buffer, &column);
- else if (is_symbol(c))
- {
- lerr_t lerr = tokenise_symbol(buffer, &column, &t);
- if (lerr)
- {
- free(tokens.data);
- return lerr;
- }
- }
- else if (c == '\'')
- {
- if (space_left(buffer) < 2)
- {
- free(tokens.data);
- return LERR_INVALID_CHAR_LITERAL;
- }
- else if (buffer->data[buffer->used + 1] == '\\')
- {
- char escape = '\0';
- if (space_left(buffer) < 3 || buffer->data[buffer->used + 3] != '\'')
- {
- free(tokens.data);
- return LERR_INVALID_CHAR_LITERAL;
- }
- switch (buffer->data[buffer->used + 2])
- {
- case 'n':
- escape = '\n';
- break;
- case 't':
- escape = '\t';
- break;
- case 'r':
- escape = '\r';
- break;
- case '\\':
- escape = '\\';
- break;
- default:
- column += 2;
- free(tokens.data);
- return LERR_INVALID_CHAR_LITERAL;
- break;
- }
-
- t = (token_t){.type = TOKEN_LITERAL_CHAR,
- .str = malloc(2),
- .str_size = 1,
- .column = column};
- column += 2;
- buffer->used += 4;
- t.str[0] = escape;
- t.str[1] = '\0';
- }
- else
- t = tokenise_char_literal(buffer, &column);
- }
-
- if (is_token)
- {
- t.line = line;
- darr_append_bytes(&tokens, (byte *)&t, sizeof(t));
- }
- }
- tokens.available = tokens.used / sizeof(token_t);
- tokens.used = 0;
- *tokens_ptr = tokens;
- return LERR_OK;
-}
diff --git a/asm/lexer.h b/asm/lexer.h
deleted file mode 100644
index 734ffa1..0000000
--- a/asm/lexer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Lexer for assembly language
- */
-
-#ifndef LEXER_H
-#define LEXER_H
-
-#include <lib/darr.h>
-
-typedef enum TokenType
-{
- TOKEN_PP_CONST, // %const(<symbol>)...
- TOKEN_PP_USE, // %use <string>
- TOKEN_PP_END, // %end
- TOKEN_PP_REFERENCE, // $<symbol>
- TOKEN_GLOBAL,
- TOKEN_STAR,
- TOKEN_LITERAL_NUMBER,
- TOKEN_LITERAL_CHAR,
- TOKEN_LITERAL_STRING,
- TOKEN_NOOP,
- TOKEN_HALT,
- TOKEN_PUSH,
- TOKEN_POP,
- TOKEN_PUSH_REG,
- TOKEN_MOV,
- TOKEN_DUP,
- TOKEN_MALLOC,
- TOKEN_MALLOC_STACK,
- TOKEN_MSET,
- TOKEN_MSET_STACK,
- TOKEN_MGET,
- TOKEN_MGET_STACK,
- TOKEN_MDELETE,
- TOKEN_MSIZE,
- TOKEN_NOT,
- TOKEN_OR,
- TOKEN_AND,
- TOKEN_XOR,
- TOKEN_EQ,
- TOKEN_LT,
- TOKEN_LTE,
- TOKEN_GT,
- TOKEN_GTE,
- TOKEN_PLUS,
- TOKEN_SUB,
- TOKEN_MULT,
- TOKEN_PRINT,
- TOKEN_JUMP_ABS,
- TOKEN_JUMP_STACK,
- TOKEN_JUMP_IF,
- TOKEN_CALL,
- TOKEN_CALL_STACK,
- TOKEN_RET,
- TOKEN_SYMBOL,
-} token_type_t;
-
-typedef struct
-{
- token_type_t type;
- size_t column, line;
- char *str;
- size_t str_size;
-} token_t;
-
-token_t token_copy(token_t);
-
-typedef enum
-{
- LERR_OK = 0,
- LERR_INVALID_CHAR_LITERAL,
- LERR_INVALID_PREPROCESSOR_DIRECTIVE,
-} lerr_t;
-const char *lerr_as_cstr(lerr_t);
-
-typedef darr_t buffer_t;
-typedef darr_t token_stream_t;
-#define TOKEN_STREAM_AT(STREAM_DATA, INDEX) (((token_t *)(STREAM_DATA))[INDEX])
-
-const char *token_type_as_cstr(token_type_t type);
-lerr_t tokenise_buffer(buffer_t *, token_stream_t *);
-
-#endif
diff --git a/asm/main.c b/asm/main.c
deleted file mode 100644
index 32b8187..0000000
--- a/asm/main.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-23
- * Author: Aryadev Chavali
- * Description: Assembly source code compiler, targeting OVM
- */
-
-#include <lib/darr.h>
-
-#include "./lexer.h"
-#include "./parser.h"
-
-void usage(const char *program_name, FILE *fp)
-{
- fprintf(fp,
- "Usage: %s FILE OUT-FILE\n"
- "\tFILE: Source code to compile\n"
- "\tOUT-FILE: Name of file to store bytecode\n",
- program_name);
-}
-
-int main(int argc, char *argv[])
-{
- int ret = 0;
- char *source_file = "";
- char *out_file = "";
- if (argc < 3)
- {
- usage(argv[0], stderr);
- return 1;
- }
-
- source_file = argv[1];
- out_file = argv[2];
-
-#if VERBOSE >= 1
- printf("[%sASSEMBLER%s]: Assembling `%s` to `%s`\n", TERM_YELLOW, TERM_RESET,
- source_file, out_file);
-#endif
- FILE *fp = fopen(source_file, "rb");
- darr_t buffer = darr_read_file(fp);
- fclose(fp);
-
-#if VERBOSE >= 1
- printf("[%sASSEMBLER%s]: Read `%s` -> %lu bytes\n", TERM_YELLOW, TERM_RESET,
- source_file, buffer.available);
-#endif
-
- token_stream_t tokens = {0};
- lerr_t lex_error = tokenise_buffer(&buffer, &tokens);
- if (lex_error)
- {
- // Compute the line/newlines by hand
- size_t column = 0, line = 1;
- for (size_t i = 0; i < buffer.used; ++i)
- {
- if (buffer.data[i] == '\n')
- {
- column = 0;
- ++line;
- }
- else
- ++column;
- }
- fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column,
- lerr_as_cstr(lex_error));
- ret = 255 - lex_error;
- goto end;
- }
-#if VERBOSE >= 1
- printf("[%sTOKENISER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET,
- buffer.available, tokens.available);
-#endif
-
-#if VERBOSE >= 2
- printf("[%sTOKENISER%s]: Tokens parsed:\n", TERM_GREEN, TERM_RESET);
- for (size_t i = 0; i < tokens.available; ++i)
- {
- token_t token = TOKEN_STREAM_AT(tokens.data, i);
- printf("\t[%lu]: %s(`%s`)@%lu,%lu\n", i, token_type_as_cstr(token.type),
- token.str, token.line, token.column);
- }
-#endif
-
- free(buffer.data);
- buffer.data = NULL;
-
-#if VERBOSE >= 2
- printf("[%sPARSER%s]: Beginning parse...\n", TERM_YELLOW, TERM_RESET);
-#endif
- prog_t *program = NULL;
- perr_t parse_error = parse_stream(&tokens, &program);
- if (parse_error)
- {
- size_t column = 0;
- size_t line = 0;
- if (tokens.used < tokens.available)
- {
- token_t t = TOKEN_STREAM_AT(tokens.data, tokens.used);
- column = t.column;
- line = t.line;
- }
- fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column,
- perr_as_cstr(parse_error));
- ret = 255 - parse_error;
- goto end;
- }
-#if VERBOSE >= 1
- printf("[%sPARSER%s]: %lu tokens -> %lu instructions\n", TERM_GREEN,
- TERM_RESET, tokens.available, program->count);
-#endif
-
-#if VERBOSE >= 2
- printf("[%sPARSER%s]: Program parsed(COUNT=%lu, START=%lu):\n", TERM_GREEN,
- TERM_RESET, program->count, program->header.start_address);
- for (size_t i = 0; i < program->count; ++i)
- {
- printf("\t[%lu]: ", i);
- inst_print(program->instructions[i], stdout);
- printf("\n");
- }
-#endif
-
- fp = fopen(out_file, "wb");
- prog_write_file(program, fp);
- fclose(fp);
-#if VERBOSE >= 1
- printf("[%sASSEMBLER%s]: Wrote bytecode to `%s`\n", TERM_GREEN, TERM_RESET,
- out_file);
-#endif
-end:
- if (buffer.data)
- free(buffer.data);
- if (tokens.data)
- {
- for (size_t i = 0; i < tokens.available; ++i)
- free(TOKEN_STREAM_AT(tokens.data, i).str);
- free(tokens.data);
- }
- if (program)
- free(program);
- return ret;
-}
diff --git a/asm/main.cpp b/asm/main.cpp
new file mode 100644
index 0000000..1ad17b1
--- /dev/null
+++ b/asm/main.cpp
@@ -0,0 +1,19 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license. You should have received a copy of the GPLv2
+ * license with this file. If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Entrypoint for assembly program
+ */
+
+#include <iostream>
+
+int main(void)
+{
+ std::cout << "Hello, world!" << std::endl;
+ return 0;
+}
diff --git a/asm/parser.c b/asm/parser.c
deleted file mode 100644
index d326d14..0000000
--- a/asm/parser.c
+++ /dev/null
@@ -1,935 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Parser for assembly language
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <stdbool.h>
-#include <string.h>
-
-#include "./parser.h"
-
-#define OPCODE_ON_TYPE(BASE_CODE, TYPE)
-
-const char *perr_as_cstr(perr_t perr)
-{
- switch (perr)
- {
- case PERR_OK:
- return "OK";
- case PERR_INTEGER_OVERFLOW:
- return "INTEGER_OVERFLOW";
- case PERR_NOT_A_NUMBER:
- return "NOT_A_NUMBER";
- case PERR_EXPECTED_TYPE:
- return "EXPECTED_TYPE";
- case PERR_EXPECTED_UTYPE:
- return "EXPECTED_UTYPE";
- case PERR_EXPECTED_SYMBOL:
- return "EXPECTED_SYMBOL";
- case PERR_EXPECTED_LABEL:
- return "EXPECTED_LABEL";
- case PERR_EXPECTED_OPERAND:
- return "EXPECTED_OPERAND";
- case PERR_PREPROCESSOR_EXPECTED_STRING:
- return "PREPROCESSOR_EXPECTED_STRING";
- case PERR_PREPROCESSOR_FILE_NONEXISTENT:
- return "PREPROCESSOR_FILE_NONEXISTENT";
- case PERR_PREPROCESSOR_FILE_PARSE_ERROR:
- return "PREPROCESSOR_FILE_PARSE_ERROR";
- case PERR_PREPROCESSOR_EXPECTED_END:
- return "PREPROCESSOR_EXPECTED_END";
- case PERR_PREPROCESSOR_EXPECTED_NAME:
- return "PREPROCESSOR_EXPECTED_NAME";
- case PERR_PREPROCESSOR_UNKNOWN_NAME:
- return "PREPROCESSOR_UNKNOWN_NAME";
- case PERR_INVALID_RELATIVE_ADDRESS:
- return "INVALID_RELATIVE_ADDRESS";
- case PERR_UNKNOWN_LABEL:
- return "UNKNOWN_LABEL";
- case PERR_UNKNOWN_OPERATOR:
- return "UNKNOWN_OPERATOR";
- default:
- return "";
- }
-}
-
-presult_t presult_label(size_t stream_index, const char *name, size_t size,
- s_word addr)
-{
- presult_t res = {.stream_index = stream_index,
- .address = addr,
- .label = {.name = malloc(size + 1), .size = size}};
- memcpy(res.label.name, name, size);
- res.label.name[size] = '\0';
- return res;
-}
-
-presult_t presult_label_ref(size_t stream_index, inst_t base, const char *label,
- size_t size)
-{
- presult_t pres = presult_label(stream_index, label, size, 0);
- pres.instruction = base;
- pres.type = PRES_LABEL_ADDRESS;
- return pres;
-}
-
-presult_t presult_instruction(size_t stream_index, inst_t inst)
-{
- return (presult_t){.stream_index = stream_index,
- .instruction = inst,
- .type = PRES_COMPLETE_RESULT};
-}
-
-presult_t presult_relative(size_t stream_index, inst_t inst, s_word addr)
-{
- return (presult_t){.stream_index = stream_index,
- .instruction = inst,
- .address = addr,
- .type = PRES_RELATIVE_ADDRESS};
-}
-
-presult_t presult_global(size_t stream_index, const char *name, size_t size,
- s_word addr)
-{
- presult_t res = presult_label(stream_index, name, size, addr);
- res.type = PRES_GLOBAL_LABEL;
- return res;
-}
-
-void presult_free(presult_t res)
-{
- switch (res.type)
- {
- case PRES_LABEL_ADDRESS:
- case PRES_GLOBAL_LABEL:
- case PRES_LABEL:
- free(res.label.name);
- break;
- case PRES_RELATIVE_ADDRESS:
- case PRES_COMPLETE_RESULT:
- break;
- }
-}
-
-void presults_free(presult_t *ptr, size_t number)
-{
- for (size_t i = 0; i < number; ++i)
- presult_free(ptr[i]);
-}
-
-perr_t parse_word(token_t token, word *ret)
-{
- if (token.type == TOKEN_LITERAL_NUMBER)
- {
- bool is_negative = token.str_size > 1 && token.str[0] == '-';
- word w = 0;
- if (is_negative)
- {
- char *end = NULL;
- s_word i = strtoll(token.str, &end, 0);
- if (!(end && end[0] == '\0'))
- return PERR_NOT_A_NUMBER;
- else if (errno == ERANGE)
- {
- errno = 0;
- return PERR_INTEGER_OVERFLOW;
- }
- // Copy bits, do not cast
- memcpy(&w, &i, sizeof(w));
- }
- else
- {
- char *end = NULL;
- w = strtoull(token.str, &end, 0);
- if (!(end && end[0] == '\0'))
- return PERR_NOT_A_NUMBER;
- else if (errno == ERANGE)
- {
- errno = 0;
- return PERR_INTEGER_OVERFLOW;
- }
- }
- *ret = w;
- return PERR_OK;
- }
- else if (token.type == TOKEN_LITERAL_CHAR)
- {
- *ret = token.str[0];
- return PERR_OK;
- }
- else
- return PERR_NOT_A_NUMBER;
-}
-
-perr_t parse_sword(token_t token, i64 *ret)
-{
- if (token.type == TOKEN_LITERAL_NUMBER)
- {
- char *end = NULL;
- s_word i = strtoll(token.str, &end, 0);
- if (!(end && end[0] == '\0'))
- return PERR_NOT_A_NUMBER;
- else if (errno == ERANGE)
- {
- errno = 0;
- return PERR_INTEGER_OVERFLOW;
- }
- *ret = i;
- return PERR_OK;
- }
- else if (token.type == TOKEN_LITERAL_CHAR)
- {
- *ret = token.str[0];
- return PERR_OK;
- }
- else
- return PERR_NOT_A_NUMBER;
-}
-
-perr_t parse_word_label_or_relative(token_stream_t *stream, presult_t *res)
-{
- token_t token = TOKEN_STREAM_AT(stream->data, stream->used);
- if (token.type == TOKEN_SYMBOL)
- {
- *res = presult_label_ref(stream->used, res->instruction, token.str,
- token.str_size);
- return PERR_OK;
- }
- else if (token.type == TOKEN_LITERAL_CHAR ||
- token.type == TOKEN_LITERAL_NUMBER)
- {
- res->type = PRES_COMPLETE_RESULT;
- return parse_word(token, &res->instruction.operand.as_word);
- }
- else if (token.type == TOKEN_STAR)
- {
- if (stream->used + 1 >= stream->available)
- return PERR_EXPECTED_OPERAND;
- res->type = PRES_RELATIVE_ADDRESS;
- ++stream->used;
- return parse_sword(TOKEN_STREAM_AT(stream->data, stream->used),
- &res->address);
- }
- return PERR_EXPECTED_OPERAND;
-}
-
-enum Type
-{
- T_NIL = -1,
- T_BYTE,
- T_CHAR,
- T_HWORD,
- T_INT,
- T_LONG,
- T_WORD,
-} parse_details_to_type(token_t details)
-{
- if (details.str_size == 4 && strncmp(details.str, "BYTE", 4) == 0)
- return T_BYTE;
- else if (details.str_size == 4 && strncmp(details.str, "CHAR", 4) == 0)
- return T_CHAR;
- else if (details.str_size == 5 && strncmp(details.str, "HWORD", 5) == 0)
- return T_HWORD;
- else if (details.str_size == 3 && strncmp(details.str, "INT", 3) == 0)
- return T_INT;
- else if (details.str_size == 4 && strncmp(details.str, "LONG", 4) == 0)
- return T_LONG;
- else if (details.str_size == 4 && strncmp(details.str, "WORD", 4) == 0)
- return T_WORD;
- else
- return T_NIL;
-}
-
-enum UType
-{
- U_NIL = -1,
- U_BYTE,
- U_HWORD,
- U_WORD,
-} convert_type_to_utype(enum Type type)
-{
- if (type == T_CHAR || type == T_INT || type == T_LONG)
- return U_NIL;
- switch (type)
- {
- case T_NIL:
- case T_LONG:
- case T_INT:
- case T_CHAR:
- return U_NIL;
- case T_BYTE:
- return U_BYTE;
- case T_HWORD:
- return U_HWORD;
- case T_WORD:
- return U_WORD;
- }
- return 0;
-}
-
-perr_t parse_utype_inst(token_stream_t *stream, inst_t *ret)
-{
- if (stream->used + 1 > stream->available)
- return PERR_EXPECTED_OPERAND;
- enum UType type = convert_type_to_utype(
- parse_details_to_type(TOKEN_STREAM_AT(stream->data, stream->used)));
- if (type == U_NIL)
- return PERR_EXPECTED_UTYPE;
- ret->opcode += type;
- return PERR_OK;
-}
-
-perr_t parse_type_inst(token_stream_t *stream, inst_t *ret)
-{
- if (stream->used + 1 > stream->available)
- return PERR_EXPECTED_OPERAND;
- enum Type type =
- parse_details_to_type(TOKEN_STREAM_AT(stream->data, stream->used));
- if (type == T_NIL)
- return PERR_EXPECTED_TYPE;
- ret->opcode += type;
- return PERR_OK;
-}
-
-perr_t parse_utype_inst_with_operand(token_stream_t *stream, inst_t *ret)
-{
- perr_t inst_err = parse_utype_inst(stream, ret);
- if (inst_err)
- return inst_err;
- ++stream->used;
- perr_t word_err = parse_word(TOKEN_STREAM_AT(stream->data, stream->used),
- &ret->operand.as_word);
- if (word_err)
- return word_err;
- return PERR_OK;
-}
-
-perr_t parse_jump_inst_operand(token_stream_t *stream, presult_t *res)
-{
- perr_t inst_err = parse_utype_inst(stream, &res->instruction);
-
- if (inst_err)
- return inst_err;
- ++stream->used;
- perr_t op_err = parse_word_label_or_relative(stream, res);
- if (op_err)
- return op_err;
- return PERR_OK;
-}
-
-perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret)
-{
- perr_t inst_err = parse_type_inst(stream, ret);
- if (inst_err)
- return inst_err;
- ++stream->used;
- perr_t word_err = parse_word(TOKEN_STREAM_AT(stream->data, stream->used),
- &ret->operand.as_word);
- if (word_err)
- return word_err;
- return PERR_OK;
-}
-
-label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size)
-{
- for (size_t i = 0; i < n; ++i)
- {
- label_t label = labels[i];
- if (label.name_size == name_size &&
- strncmp(label.name, name, name_size) == 0)
- return label;
- }
-
- return (label_t){0};
-}
-
-block_t search_blocks(block_t *blocks, size_t n, char *name, size_t name_size)
-{
- for (size_t i = 0; i < n; ++i)
- {
- block_t block = blocks[i];
- if (block.name_size == name_size &&
- strncmp(block.name, name, name_size) == 0)
- return block;
- }
-
- return (block_t){0};
-}
-
-perr_t preprocess_use_blocks(token_stream_t *stream, token_stream_t *new)
-{
- token_stream_t new_stream = {0};
- darr_init(&new_stream, sizeof(token_t));
- // %USE <STRING FILENAME> -> #TOKENS_IN(FILENAME)
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t t = DARR_AT(token_t, stream->data, i);
- if (t.type == TOKEN_PP_USE)
- {
- if (i + 1 >= stream->available ||
- DARR_AT(token_t, stream->data, i + 1).type != TOKEN_LITERAL_STRING)
- {
- stream->used = i + 1 >= stream->available ? i : i + 1;
- for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i)
- free(TOKEN_STREAM_AT(new_stream.data, i).str);
- free(new_stream.data);
- return PERR_PREPROCESSOR_EXPECTED_STRING;
- }
- // Load and tokenise another file
- ++i;
- t = DARR_AT(token_t, stream->data, i);
- FILE *fp = fopen(t.str, "rb");
- if (!fp)
- {
- for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i)
- free(TOKEN_STREAM_AT(new_stream.data, i).str);
- free(new_stream.data);
- stream->used = i;
- return PERR_PREPROCESSOR_FILE_NONEXISTENT;
- }
- buffer_t buffer = darr_read_file(fp);
- fclose(fp);
-
- token_stream_t fstream = {0};
- lerr_t lerr = tokenise_buffer(&buffer, &fstream);
- free(buffer.data);
- if (lerr)
- {
- if (fstream.data)
- {
- for (size_t i = 0; i < fstream.available; ++i)
- free(TOKEN_STREAM_AT(fstream.data, i).str);
- free(fstream.data);
- }
- for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i)
- free(TOKEN_STREAM_AT(new_stream.data, i).str);
- free(new_stream.data);
- stream->used = i;
- return PERR_PREPROCESSOR_FILE_PARSE_ERROR;
- }
- darr_append_bytes(&new_stream, fstream.data,
- sizeof(token_t) * fstream.available);
- free(fstream.data);
- }
- else
- {
- token_t copy = token_copy(t);
- darr_append_bytes(&new_stream, (byte *)&copy, sizeof(copy));
- }
- }
-
- new_stream.available = new_stream.used / sizeof(token_t);
- new_stream.used = 0;
- *new = new_stream;
-
- return PERR_OK;
-}
-
-perr_t preprocess_macro_blocks(token_stream_t *stream, token_stream_t *new)
-{
- darr_t block_registry = {0};
- darr_init(&block_registry, sizeof(block_t));
-
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t t = DARR_AT(token_t, stream->data, i);
- if (t.type == TOKEN_PP_CONST)
- {
- char *sym = t.str;
- size_t start = strcspn(sym, "(");
- size_t end = strcspn(sym, ")");
- if (end == t.str_size || start == t.str_size || start == end + 1)
- {
- free(block_registry.data);
- return PERR_PREPROCESSOR_EXPECTED_NAME;
- }
- block_t block = {.name = sym + start + 1, .name_size = end - start - 1};
- ++i;
- size_t prev = i;
- token_t t = {0};
- for (t = DARR_AT(token_t, stream->data, i);
- i < stream->available && t.type != TOKEN_PP_END;
- ++i, t = DARR_AT(token_t, stream->data, i))
- continue;
- if (t.type != TOKEN_PP_END)
- {
- stream->used = i;
- free(block_registry.data);
- return PERR_PREPROCESSOR_EXPECTED_END;
- }
-
- // Set the block's token DARR by hand
- block.code.data = stream->data + (prev * sizeof(token_t));
- block.code.available = i - prev;
- block.code.used = block.code.available;
- darr_append_bytes(&block_registry, (byte *)&block, sizeof(block));
- }
- }
-
- if (block_registry.used == 0)
- {
- // Nothing to preprocess so just copywholesale
- free(block_registry.data);
- *new = (token_stream_t){0};
- darr_init(new, sizeof(token_t));
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t token = DARR_AT(token_t, stream->data, i);
- token_t copy = token_copy(token);
- darr_append_bytes(new, (byte *)&copy, sizeof(copy));
- }
- new->available = new->used / sizeof(token_t);
- new->used = 0;
- return PERR_OK;
- }
-
- // Stream with blocks now inlined
- token_stream_t new_stream = {0};
- darr_init(&new_stream, sizeof(token_t));
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t t = DARR_AT(token_t, stream->data, i);
- if (t.type == TOKEN_PP_CONST)
- {
- // Skip till after end
- for (; i < stream->available && t.type != TOKEN_PP_END;
- ++i, t = DARR_AT(token_t, stream->data, i))
- continue;
- }
- else if (t.type == TOKEN_PP_REFERENCE)
- {
- // Find the reference in the block registry
- block_t block = search_blocks((block_t *)block_registry.data,
- block_registry.used, t.str, t.str_size);
- if (!block.name)
- {
- free(new_stream.data);
- free(block_registry.data);
- stream->used = i;
- return PERR_PREPROCESSOR_UNKNOWN_NAME;
- }
-
- // Inline the block found
- for (size_t j = 0; j < block.code.used; j++)
- {
- token_t b_token = DARR_AT(token_t, block.code.data, j);
- token_t copy = token_copy(b_token);
- darr_append_bytes(&new_stream, (byte *)&copy, sizeof(token_t));
- }
- }
- else
- {
- // Insert into stream as is
- token_t copy = token_copy(t);
- darr_append_bytes(&new_stream, (byte *)&copy, sizeof(copy));
- }
- }
-
- // Free block registry
- free(block_registry.data);
-
- new_stream.available = new_stream.used / sizeof(token_t);
- new_stream.used = 0;
- *new = new_stream;
-
- return PERR_OK;
-}
-
-perr_t preprocessor(token_stream_t *stream)
-{
- token_stream_t use_blocks = {0};
- perr_t perr = preprocess_use_blocks(stream, &use_blocks);
- if (perr)
- return perr;
-
- token_stream_t macro_blocks = {0};
- perr = preprocess_macro_blocks(&use_blocks, &macro_blocks);
- if (perr)
- {
- stream->used = use_blocks.used;
- for (size_t i = 0; i < use_blocks.available; ++i)
- free(TOKEN_STREAM_AT(use_blocks.data, i).str);
- free(use_blocks.data);
- return perr;
- }
-
- for (size_t i = 0; i < use_blocks.available; ++i)
- free(TOKEN_STREAM_AT(use_blocks.data, i).str);
- free(use_blocks.data);
-
- for (size_t i = 0; i < stream->available; ++i)
- free(TOKEN_STREAM_AT(stream->data, i).str);
- free(stream->data);
-
- *stream = macro_blocks;
-
- return PERR_OK;
-}
-
-perr_t parse_next(token_stream_t *stream, presult_t *ret)
-{
- token_t token = TOKEN_STREAM_AT(stream->data, stream->used);
- perr_t perr = PERR_OK;
- switch (token.type)
- {
- case TOKEN_LITERAL_STRING:
- case TOKEN_PP_CONST:
- case TOKEN_PP_USE:
- case TOKEN_PP_REFERENCE:
- case TOKEN_PP_END:
- case TOKEN_LITERAL_NUMBER:
- case TOKEN_LITERAL_CHAR:
- return PERR_EXPECTED_SYMBOL;
- case TOKEN_GLOBAL: {
- if (stream->used + 1 >= stream->available ||
- TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL)
- return PERR_EXPECTED_LABEL;
- ++stream->used;
- token_t label = TOKEN_STREAM_AT(stream->data, stream->used);
- *ret = presult_global(stream->used, label.str, label.str_size, 0);
- return PERR_OK;
- }
- case TOKEN_NOOP:
- *ret = presult_instruction(stream->used, INST_NOOP);
- ret->type = PRES_COMPLETE_RESULT;
- break;
- case TOKEN_HALT:
- *ret = presult_instruction(stream->used, INST_HALT);
- ret->type = PRES_COMPLETE_RESULT;
- break;
- case TOKEN_PUSH:
- *ret = presult_instruction(stream->used, INST_PUSH(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_POP:
- *ret = presult_instruction(stream->used, INST_POP(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_PUSH_REG:
- *ret = presult_instruction(stream->used, INST_PUSH_REG(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MOV:
- *ret = presult_instruction(stream->used, INST_MOV(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_DUP:
- *ret = presult_instruction(stream->used, INST_DUP(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MALLOC:
- *ret = presult_instruction(stream->used, INST_MALLOC(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MSET:
- *ret = presult_instruction(stream->used, INST_MSET(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MGET:
- *ret = presult_instruction(stream->used, INST_MGET(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MALLOC_STACK:
- *ret = presult_instruction(stream->used, INST_MALLOC_STACK(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MSET_STACK:
- *ret = presult_instruction(stream->used, INST_MSET_STACK(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MGET_STACK:
- *ret = presult_instruction(stream->used, INST_MGET_STACK(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MDELETE:
- *ret = presult_instruction(stream->used, INST_MDELETE);
- break;
- case TOKEN_MSIZE:
- *ret = presult_instruction(stream->used, INST_MSIZE);
- break;
- case TOKEN_NOT:
- *ret = presult_instruction(stream->used, INST_NOT(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_OR:
- *ret = presult_instruction(stream->used, INST_OR(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_AND:
- *ret = presult_instruction(stream->used, INST_AND(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_XOR:
- *ret = presult_instruction(stream->used, INST_XOR(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_EQ:
- *ret = presult_instruction(stream->used, INST_EQ(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_LT:
- *ret = presult_instruction(stream->used, INST_LT(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_LTE:
- *ret = presult_instruction(stream->used, INST_LTE(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_GT:
- *ret = presult_instruction(stream->used, INST_GT(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_GTE:
- *ret = presult_instruction(stream->used, INST_GTE(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_PLUS:
- *ret = presult_instruction(stream->used, INST_PLUS(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_SUB:
- *ret = presult_instruction(stream->used, INST_SUB(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MULT:
- *ret = presult_instruction(stream->used, INST_MULT(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_PRINT:
- *ret = presult_instruction(stream->used, INST_PRINT(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_JUMP_ABS:
- *ret = presult_instruction(stream->used, INST_JUMP_ABS(0));
- ++stream->used;
- if (stream->used >= stream->available)
- return PERR_EXPECTED_OPERAND;
- return parse_word_label_or_relative(stream, ret);
- case TOKEN_JUMP_STACK:
- *ret = presult_instruction(stream->used, INST_JUMP_STACK);
- break;
- case TOKEN_JUMP_IF: {
- *ret = presult_instruction(stream->used, INST_JUMP_IF(BYTE, 0));
- return parse_jump_inst_operand(stream, ret);
- }
- case TOKEN_CALL:
- *ret = presult_instruction(stream->used, INST_CALL(0));
- ++stream->used;
- if (stream->used >= stream->available)
- return PERR_EXPECTED_OPERAND;
- return parse_word_label_or_relative(stream, ret);
- case TOKEN_CALL_STACK:
- *ret = presult_instruction(stream->used, INST_CALL_STACK);
- break;
- case TOKEN_RET:
- *ret = presult_instruction(stream->used, INST_RET);
- break;
- case TOKEN_SYMBOL: {
- size_t label_size = strcspn(token.str, ":");
- if (label_size == token.str_size)
- return PERR_UNKNOWN_OPERATOR;
- else if (label_size != token.str_size - 1)
- return PERR_EXPECTED_LABEL;
- *ret = presult_label(stream->used, token.str, label_size, 0);
- break;
- }
- case TOKEN_STAR:
- default:
- return PERR_UNKNOWN_OPERATOR;
- }
- return perr;
-}
-
-perr_t process_presults(presult_t *results, size_t res_count,
- size_t *result_reached, prog_t **program_ptr)
-{
- assert(result_reached && "process_presults: result_reached is NULL?!");
- *result_reached = 0;
- label_t start_label = {0};
-
- darr_t label_registry = {0};
- darr_init(&label_registry, sizeof(label_t));
- word inst_count = 0;
- for (size_t i = 0; i < res_count; ++i)
- {
- presult_t res = results[i];
- switch (res.type)
- {
- case PRES_LABEL: {
- label_t label = {.name = res.label.name,
- .name_size = res.label.size,
- .addr = inst_count};
- darr_append_bytes(&label_registry, (byte *)&label, sizeof(label));
- break;
- }
- case PRES_RELATIVE_ADDRESS: {
- s_word offset = res.address;
- if (offset < 0 && ((word)(-offset)) > inst_count)
- {
- free(label_registry.data);
- *result_reached = i;
- return PERR_INVALID_RELATIVE_ADDRESS;
- }
- results[i].instruction.operand.as_word = ((s_word)inst_count) + offset;
- inst_count++;
- break;
- }
- case PRES_GLOBAL_LABEL: {
- start_label = (label_t){.name = res.label.name,
- .name_size = res.label.size,
- .addr = (word)inst_count};
- break;
- }
- case PRES_LABEL_ADDRESS:
- case PRES_COMPLETE_RESULT:
- inst_count++;
- break;
- default:
- break;
- }
- }
-
- darr_t instr_darr = {0};
- darr_init(&instr_darr, sizeof(inst_t));
-
- prog_header_t header = {0};
- if (start_label.name_size > 0)
- {
- label_t label = search_labels((label_t *)label_registry.data,
- label_registry.used / sizeof(label_t),
- start_label.name, start_label.name_size);
- if (!label.name)
- {
- free(instr_darr.data);
- free(label_registry.data);
- return PERR_UNKNOWN_LABEL;
- }
- header.start_address = label.addr;
- }
-
- for (size_t i = 0; i < res_count; ++i)
- {
- presult_t res = results[i];
- switch (res.type)
- {
- case PRES_LABEL_ADDRESS: {
- inst_t inst = {0};
- label_t label = search_labels((label_t *)label_registry.data,
- label_registry.used / sizeof(label_t),
- res.label.name, res.label.size);
-
- if (!label.name)
- {
- free(instr_darr.data);
- free(label_registry.data);
- *result_reached = i;
- return PERR_UNKNOWN_LABEL;
- }
-
- inst.opcode = res.instruction.opcode;
- inst.operand = DWORD(label.addr);
- darr_append_bytes(&instr_darr, (byte *)&inst, sizeof(inst));
- break;
- }
- case PRES_RELATIVE_ADDRESS:
- case PRES_COMPLETE_RESULT: {
- darr_append_bytes(&instr_darr, (byte *)&res.instruction,
- sizeof(res.instruction));
- }
- case PRES_GLOBAL_LABEL:
- case PRES_LABEL:
- break;
- }
- }
-
- free(label_registry.data);
- prog_t *program =
- malloc(sizeof(**program_ptr) + (sizeof(inst_t) * inst_count));
- program->header = header;
- program->count = inst_count;
- memcpy(program->instructions, instr_darr.data, instr_darr.used);
- free(instr_darr.data);
- *program_ptr = program;
- return PERR_OK;
-}
-
-perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
-{
- // Preprocessor
- perr_t perr = preprocessor(stream);
- if (perr)
- return perr;
- darr_t presults = {0};
- darr_init(&presults, sizeof(presult_t));
- while (stream->used < stream->available)
- {
- presult_t pres = {0};
- perr_t err = parse_next(stream, &pres);
- if (err)
- {
- presults_free((presult_t *)presults.data,
- presults.used / sizeof(presult_t));
- free(presults.data);
- return err;
- }
- darr_append_bytes(&presults, (byte *)&pres, sizeof(presult_t));
- ++stream->used;
- }
-
- presults.available = presults.used / sizeof(presult_t);
- presults.used = 0;
-
-#if VERBOSE >= 2
- printf("[%sPARSER%s]: %lu tokens -> %lu parse units\n", TERM_YELLOW,
- TERM_RESET, stream->available, presults.available);
- for (size_t i = 0; i < presults.available; ++i)
- {
- presult_t pres = DARR_AT(presult_t, presults.data, i);
- switch (pres.type)
- {
- case PRES_LABEL:
- printf("\tLABEL: label=%s\n", pres.label.name);
- break;
- case PRES_LABEL_ADDRESS:
- printf("\tLABEL_CALL: label=%s, inst=", pres.label.name);
- inst_print(pres.instruction, stdout);
- printf("\n");
- break;
- case PRES_RELATIVE_ADDRESS:
- printf("\tRELATIVE_CALL: addr=%ld, inst=", pres.address);
- inst_print(pres.instruction, stdout);
- printf("\n");
- break;
- case PRES_GLOBAL_LABEL:
- printf("\tSET_GLOBAL_START: name=%s\n", pres.label.name);
- break;
- case PRES_COMPLETE_RESULT:
- printf("\tCOMPLETE: inst=");
- inst_print(pres.instruction, stdout);
- printf("\n");
- break;
- }
- }
-#endif
-
- size_t results_processed = 0;
- perr = process_presults((presult_t *)presults.data, presults.available,
- &results_processed, program_ptr);
- if (results_processed != presults.available)
- {
- presult_t pres = DARR_AT(presult_t, presults.data, results_processed);
- stream->used = pres.stream_index;
- }
- presults_free((presult_t *)presults.data, presults.available);
- free(presults.data);
- return perr;
-}
diff --git a/asm/parser.h b/asm/parser.h
deleted file mode 100644
index 7e2d1b7..0000000
--- a/asm/parser.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Parser for assembly language
- */
-
-#ifndef PARSER_H
-#define PARSER_H
-
-#include "./lexer.h"
-
-#include <lib/inst.h>
-
-typedef enum
-{
- PERR_OK = 0,
- PERR_INTEGER_OVERFLOW,
- PERR_NOT_A_NUMBER,
- PERR_EXPECTED_UTYPE,
- PERR_EXPECTED_TYPE,
- PERR_EXPECTED_SYMBOL,
- PERR_EXPECTED_LABEL,
- PERR_EXPECTED_OPERAND,
- PERR_PREPROCESSOR_EXPECTED_NAME,
- PERR_PREPROCESSOR_EXPECTED_STRING,
- PERR_PREPROCESSOR_EXPECTED_END,
- PERR_PREPROCESSOR_FILE_NONEXISTENT,
- PERR_PREPROCESSOR_FILE_PARSE_ERROR,
- PERR_PREPROCESSOR_UNKNOWN_NAME,
- PERR_INVALID_RELATIVE_ADDRESS,
- PERR_UNKNOWN_OPERATOR,
- PERR_UNKNOWN_LABEL,
-} perr_t;
-
-const char *perr_as_cstr(perr_t);
-
-typedef struct
-{
- size_t stream_index;
- inst_t instruction;
- s_word address;
- struct PLabel
- {
- char *name;
- size_t size;
- } label;
- enum PResult_Type
- {
- PRES_LABEL = 0,
- PRES_LABEL_ADDRESS,
- PRES_GLOBAL_LABEL,
- PRES_RELATIVE_ADDRESS,
- PRES_COMPLETE_RESULT,
- } type;
-} presult_t;
-
-presult_t presult_label(size_t, const char *, size_t, s_word);
-presult_t presult_label_ref(size_t, inst_t, const char *, size_t);
-presult_t presult_instruction(size_t, inst_t);
-presult_t presult_relative(size_t, inst_t, s_word);
-presult_t presult_global(size_t, const char *, size_t, s_word);
-void presult_free(presult_t);
-void presults_free(presult_t *, size_t);
-
-typedef struct
-{
- char *name;
- size_t name_size;
- word addr;
-} label_t;
-
-label_t search_labels(label_t *, size_t, char *, size_t);
-
-typedef struct
-{
- char *name;
- size_t name_size;
- darr_t code;
-} block_t;
-
-block_t search_blocks(block_t *, size_t, char *, size_t);
-
-perr_t preprocess_use_blocks(token_stream_t *, token_stream_t *);
-perr_t preprocess_macro_blocks(token_stream_t *, token_stream_t *);
-// Analyses then inlines corresponding tokens into stream directly
-perr_t preprocessor(token_stream_t *);
-
-// Parses the next "parse result" from stream
-perr_t parse_next(token_stream_t *, presult_t *);
-// Constructs a program from the set of parse results (from repeatedly
-// calling parse_next)
-perr_t process_presults(presult_t *, size_t, size_t *, prog_t **);
-
-// Preprocesses, generates results then constructs a program all in
-// one routine (thing to call in most use cases).
-perr_t parse_stream(token_stream_t *, prog_t **);
-
-#endif