aboutsummaryrefslogtreecommitdiff
path: root/asm
diff options
context:
space:
mode:
Diffstat (limited to 'asm')
-rw-r--r--asm/lexer.c623
-rw-r--r--asm/lexer.h91
-rw-r--r--asm/main.c148
-rw-r--r--asm/main.cpp19
-rw-r--r--asm/parser.c935
-rw-r--r--asm/parser.h104
6 files changed, 19 insertions, 1901 deletions
diff --git a/asm/lexer.c b/asm/lexer.c
deleted file mode 100644
index a4905fb..0000000
--- a/asm/lexer.c
+++ /dev/null
@@ -1,623 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Lexer for assembly language
- */
-
-#include <assert.h>
-#include <ctype.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <lib/inst.h>
-
-#include "./lexer.h"
-
-const char *token_type_as_cstr(token_type_t type)
-{
- switch (type)
- {
- case TOKEN_PP_USE:
- return "PP_USE";
- case TOKEN_PP_CONST:
- return "PP_CONST";
- case TOKEN_PP_END:
- return "PP_END";
- case TOKEN_PP_REFERENCE:
- return "PP_REFERENCE";
- case TOKEN_GLOBAL:
- return "GLOBAL";
- case TOKEN_STAR:
- return "STAR";
- case TOKEN_LITERAL_STRING:
- return "LITERAL_STRING";
- case TOKEN_LITERAL_NUMBER:
- return "LITERAL_NUMBER";
- case TOKEN_LITERAL_CHAR:
- return "LITERAL_CHAR";
- case TOKEN_NOOP:
- return "NOOP";
- case TOKEN_HALT:
- return "HALT";
- case TOKEN_PUSH:
- return "PUSH";
- case TOKEN_POP:
- return "POP";
- case TOKEN_PUSH_REG:
- return "PUSH_REG";
- case TOKEN_MOV:
- return "MOV";
- case TOKEN_DUP:
- return "DUP";
- case TOKEN_MALLOC:
- return "MALLOC";
- case TOKEN_MALLOC_STACK:
- return "MALLOC_STACK";
- case TOKEN_MSET:
- return "MSET";
- case TOKEN_MSET_STACK:
- return "MSET_STACK";
- case TOKEN_MGET:
- return "MGET";
- case TOKEN_MGET_STACK:
- return "MGET_STACK";
- case TOKEN_MDELETE:
- return "MDELETE";
- case TOKEN_MSIZE:
- return "MSIZE";
- case TOKEN_NOT:
- return "NOT";
- case TOKEN_OR:
- return "OR";
- case TOKEN_AND:
- return "AND";
- case TOKEN_XOR:
- return "XOR";
- case TOKEN_EQ:
- return "EQ";
- case TOKEN_LT:
- return "LT";
- case TOKEN_LTE:
- return "LTE";
- case TOKEN_GT:
- return "GT";
- case TOKEN_GTE:
- return "GTE";
- case TOKEN_PLUS:
- return "PLUS";
- case TOKEN_SUB:
- return "SUB";
- case TOKEN_MULT:
- return "MULT";
- case TOKEN_PRINT:
- return "PRINT";
- case TOKEN_JUMP_ABS:
- return "JUMP_ABS";
- case TOKEN_JUMP_STACK:
- return "JUMP_STACK";
- case TOKEN_JUMP_IF:
- return "JUMP_IF";
- case TOKEN_CALL:
- return "CALL";
- case TOKEN_CALL_STACK:
- return "CALL_STACK";
- case TOKEN_RET:
- return "RET";
- case TOKEN_SYMBOL:
- return "SYMBOL";
- }
- return "";
-}
-
-const char *lerr_as_cstr(lerr_t lerr)
-{
- switch (lerr)
- {
- case LERR_OK:
- return "OK";
- case LERR_INVALID_CHAR_LITERAL:
- return "INVALID_CHAR_LITERAL";
- case LERR_INVALID_PREPROCESSOR_DIRECTIVE:
- return "INVALID_PREPROCESSOR_DIRECTIVE";
- }
- return "";
-}
-
-token_t token_copy(token_t t)
-{
- token_t new = t;
- new.str = malloc(t.str_size + 1);
- memcpy(new.str, t.str, t.str_size);
- new.str[t.str_size] = '\0';
- return new;
-}
-
-size_t space_left(buffer_t *buffer)
-{
- if (buffer->available == buffer->used)
- return 0;
- return buffer->available - 1 - buffer->used;
-}
-
-char uppercase(char c)
-{
- if (c >= 'a' && c <= 'z')
- return (c - 'a') + 'A';
- return c;
-}
-
-bool is_symbol(char c)
-{
- return isalpha(c) || isdigit(c) || c == '-' || c == '_' || c == '.' ||
- c == ':' || c == '(' || c == ')' || c == '%' || c == '$';
-}
-
-bool is_valid_hex_char(char c)
-{
- return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
- (c >= 'A' && c <= 'F');
-}
-
-lerr_t tokenise_symbol(buffer_t *buffer, size_t *column, token_t *token)
-{
- static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!");
-
- size_t sym_size = 0;
- for (; sym_size < space_left(buffer) &&
- is_symbol(buffer->data[buffer->used + sym_size]);
- ++sym_size)
- buffer->data[buffer->used + sym_size] =
- uppercase(buffer->data[buffer->used + sym_size]);
-
- token_t ret = {0};
- char *opcode = (char *)buffer->data + buffer->used;
-
- bool is_opcode = true;
- token_type_t type = 0;
- size_t offset = 0;
-
- if (sym_size > 1 && strncmp(opcode, "%", 1) == 0)
- {
- // Some preprocessing directive
- if (sym_size > 6 && strncmp(opcode + 1, "CONST", 5) == 0)
- {
- type = TOKEN_PP_CONST;
- offset = 6;
- }
- else if (sym_size == 4 && strncmp(opcode + 1, "USE", 3) == 0)
- {
- type = TOKEN_PP_USE;
- offset = 4;
- }
- else if (sym_size == 4 && strncmp(opcode + 1, "END", 3) == 0)
- {
- type = TOKEN_PP_END;
- offset = 4;
- }
- else
- return LERR_INVALID_PREPROCESSOR_DIRECTIVE;
- }
- else if (sym_size > 1 && strncmp(opcode, "$", 1) == 0)
- {
- // A reference to a preprocessing constant
- offset = 1;
- type = TOKEN_PP_REFERENCE;
- }
- else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0)
- {
- offset = 4;
- type = TOKEN_NOOP;
- }
- else if (sym_size == 4 && strncmp(opcode, "HALT", 4) == 0)
- {
- offset = 4;
- type = TOKEN_HALT;
- }
- else if (sym_size > 9 && strncmp(opcode, "PUSH.REG.", 9) == 0)
- {
- offset = 9;
- type = TOKEN_PUSH_REG;
- }
- else if (sym_size > 5 && strncmp(opcode, "PUSH.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_PUSH;
- }
- else if (sym_size > 4 && strncmp(opcode, "POP.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_POP;
- }
- else if (sym_size > 4 && strncmp(opcode, "MOV.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_MOV;
- }
- else if (sym_size > 4 && strncmp(opcode, "DUP.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_DUP;
- }
- else if (sym_size > 13 && strncmp(opcode, "MALLOC.STACK.", 13) == 0)
- {
- offset = 13;
- type = TOKEN_MALLOC_STACK;
- }
- else if (sym_size > 7 && strncmp(opcode, "MALLOC.", 7) == 0)
- {
- offset = 7;
- type = TOKEN_MALLOC;
- }
- else if (sym_size > 11 && strncmp(opcode, "MSET.STACK.", 11) == 0)
- {
- offset = 11;
- type = TOKEN_MSET_STACK;
- }
- else if (sym_size > 5 && strncmp(opcode, "MSET.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MSET;
- }
- else if (sym_size > 11 && strncmp(opcode, "MGET.STACK.", 11) == 0)
- {
- offset = 11;
- type = TOKEN_MGET_STACK;
- }
- else if (sym_size > 5 && strncmp(opcode, "MGET.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MGET;
- }
- else if (sym_size == 7 && strncmp(opcode, "MDELETE", 7) == 0)
- {
- offset = 7;
- type = TOKEN_MDELETE;
- }
- else if (sym_size == 5 && strncmp(opcode, "MSIZE", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MSIZE;
- }
- else if (sym_size > 4 && strncmp(opcode, "NOT.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_NOT;
- }
- else if (sym_size > 3 && strncmp(opcode, "OR.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_OR;
- }
- else if (sym_size > 4 && strncmp(opcode, "AND.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_AND;
- }
- else if (sym_size > 4 && strncmp(opcode, "XOR.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_XOR;
- }
- else if (sym_size >= 3 && strncmp(opcode, "EQ.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_EQ;
- }
- else if (sym_size > 4 && strncmp(opcode, "LTE.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_LTE;
- }
- else if (sym_size > 3 && strncmp(opcode, "LT.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_LT;
- }
- else if (sym_size > 4 && strncmp(opcode, "GTE.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_GTE;
- }
- else if (sym_size > 3 && strncmp(opcode, "GT.", 3) == 0)
- {
- offset = 3;
- type = TOKEN_GT;
- }
- else if (sym_size > 4 && strncmp(opcode, "SUB.", 4) == 0)
- {
- offset = 4;
- type = TOKEN_SUB;
- }
- else if (sym_size > 5 && strncmp(opcode, "PLUS.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_PLUS;
- }
- else if (sym_size > 5 && strncmp(opcode, "MULT.", 5) == 0)
- {
- offset = 5;
- type = TOKEN_MULT;
- }
- else if (sym_size > 6 && strncmp(opcode, "PRINT.", 6) == 0)
- {
- offset = 6;
- type = TOKEN_PRINT;
- }
- else if (sym_size == 8 && strncmp(opcode, "JUMP.ABS", 8) == 0)
- {
- offset = 8;
- type = TOKEN_JUMP_ABS;
- }
- else if (sym_size == 10 && strncmp(opcode, "JUMP.STACK", 10) == 0)
- {
- offset = 10;
- type = TOKEN_JUMP_STACK;
- }
- else if (sym_size > 8 && strncmp(opcode, "JUMP.IF.", 8) == 0)
- {
- offset = 8;
- type = TOKEN_JUMP_IF;
- }
- else if (sym_size == 10 && strncmp(opcode, "CALL.STACK", 10) == 0)
- {
- offset = 10;
- type = TOKEN_CALL_STACK;
- }
- else if (sym_size == 4 && strncmp(opcode, "CALL", 4) == 0)
- {
- offset = 4;
- type = TOKEN_CALL;
- }
- else if (sym_size == 3 && strncmp(opcode, "RET", 3) == 0)
- {
- offset = 3;
- type = TOKEN_RET;
- }
- else if (sym_size == 6 && strncmp(opcode, "GLOBAL", 6) == 0)
- {
- offset = 6;
- type = TOKEN_GLOBAL;
- }
- else
- is_opcode = false;
-
- if (!is_opcode)
- {
- // Just a symbol, so no further manipulation
- char *sym = malloc(sym_size + 1);
- memcpy(sym, opcode, sym_size);
- sym[sym_size] = '\0';
- ret = (token_t){.type = TOKEN_SYMBOL,
- .str = sym,
- .column = *column,
- .str_size = sym_size};
- }
- else
- {
- ret.type = type;
- ret.column = *column;
- if (offset == sym_size)
- {
- // There's no more to the string
- ret.str = malloc(1);
- ret.str[0] = '\0';
- }
- else
- {
- // t.str is the remaining part of the string after the
- // opcode
- ret.str = calloc(sym_size - offset + 1, 1);
- memcpy(ret.str, opcode + offset, sym_size - offset);
- ret.str[sym_size - offset] = '\0';
- }
- ret.str_size = sym_size - offset;
- }
- *column += sym_size - 1;
- buffer->used += sym_size;
- *token = ret;
- return LERR_OK;
-}
-
-token_t tokenise_number_literal(buffer_t *buffer, size_t *column)
-{
- token_t token = {
- .type = TOKEN_LITERAL_NUMBER, .str_size = 0, .column = *column};
- if (buffer->data[buffer->used] == '-')
- ++token.str_size;
- for (; token.str_size < space_left(buffer) &&
- isdigit(buffer->data[buffer->used + token.str_size]);
- ++token.str_size)
- continue;
- token.str = calloc(token.str_size + 1, 1);
- memcpy(token.str, buffer->data + buffer->used, token.str_size);
- token.str[token.str_size] = '\0';
- buffer->used += token.str_size;
- *column += token.str_size;
- return token;
-}
-
-token_t tokenise_hex_literal(buffer_t *buffer, size_t *column)
-{
- // For the x part of the literal
- ++buffer->used;
- token_t token = {
- .type = TOKEN_LITERAL_NUMBER, .str_size = 0, .column = *column};
- for (; token.str_size < space_left(buffer) &&
- is_valid_hex_char(buffer->data[buffer->used + token.str_size]);
- ++token.str_size)
- continue;
- // Setup a proper C hex literal
- token.str = calloc(token.str_size + 3, 1);
- token.str[0] = '0';
- token.str[1] = 'x';
- memcpy(token.str + 2, buffer->data + buffer->used, token.str_size);
- token.str[token.str_size + 2] = '\0';
- buffer->used += token.str_size;
- *column += token.str_size;
-
- // Setup the first two characters
- token.str_size += 2;
- return token;
-}
-
-token_t tokenise_char_literal(buffer_t *buffer, size_t *column)
-{
- token_t token = {
- .type = TOKEN_LITERAL_CHAR, .str_size = 1, .column = *column};
- token.str = calloc(2, 1);
- token.str[0] = buffer->data[buffer->used + 1];
- token.str[1] = '\0';
- buffer->used += 3;
- *column += 3;
- return token;
-}
-
-token_t tokenise_string_literal(buffer_t *buffer, size_t *column)
-{
- ++buffer->used;
- size_t string_size;
- for (string_size = 0; string_size + buffer->used < buffer->available &&
- buffer->data[buffer->used + string_size] != '\"';
- ++string_size)
- continue;
- token_t t = {.type = TOKEN_LITERAL_STRING,
- .column = *column,
- .str = malloc(string_size + 1),
- .str_size = string_size};
- memcpy(t.str, buffer->data + buffer->used, string_size);
- t.str[string_size] = '\0';
- *column += string_size + 1;
- buffer->used += string_size + 1;
- return t;
-}
-
-lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr)
-{
- size_t column = 0, line = 1;
- token_stream_t tokens = {0};
- darr_init(&tokens, sizeof(token_t));
- while (space_left(buffer) != 0)
- {
- bool is_token = true;
- token_t t = {0};
- char c = buffer->data[buffer->used];
- if (isspace(c) || c == '\0')
- {
- // Clean whitespace
- for (; space_left(buffer) > 0 && (isspace(c) || c == '\0');
- ++buffer->used, c = buffer->data[buffer->used])
- {
- ++column;
- if (c == '\n')
- {
- column = 0;
- ++line;
- }
- }
- ++column;
- is_token = false;
- }
- else if (c == ';')
- {
- // Start lexing at next line
- for (; space_left(buffer) > 0 && c != '\n';
- ++buffer->used, c = buffer->data[buffer->used])
- continue;
- column = 0;
- ++line;
- ++buffer->used;
- is_token = false;
- }
- else if (c == '*')
- {
- t = (token_t){.type = TOKEN_STAR,
- .column = column,
- .str = malloc(1),
- .str_size = 1};
- t.str[0] = '\0';
- ++buffer->used;
- }
- else if (c == '\"')
- t = tokenise_string_literal(buffer, &column);
- else if (isdigit(c) || (space_left(buffer) > 1 && c == '-' &&
- isdigit(buffer->data[buffer->used + 1])))
- t = tokenise_number_literal(buffer, &column);
- else if (c == 'x' && space_left(buffer) > 1 &&
- is_valid_hex_char(buffer->data[buffer->used + 1]))
- t = tokenise_hex_literal(buffer, &column);
- else if (is_symbol(c))
- {
- lerr_t lerr = tokenise_symbol(buffer, &column, &t);
- if (lerr)
- {
- free(tokens.data);
- return lerr;
- }
- }
- else if (c == '\'')
- {
- if (space_left(buffer) < 2)
- {
- free(tokens.data);
- return LERR_INVALID_CHAR_LITERAL;
- }
- else if (buffer->data[buffer->used + 1] == '\\')
- {
- char escape = '\0';
- if (space_left(buffer) < 3 || buffer->data[buffer->used + 3] != '\'')
- {
- free(tokens.data);
- return LERR_INVALID_CHAR_LITERAL;
- }
- switch (buffer->data[buffer->used + 2])
- {
- case 'n':
- escape = '\n';
- break;
- case 't':
- escape = '\t';
- break;
- case 'r':
- escape = '\r';
- break;
- case '\\':
- escape = '\\';
- break;
- default:
- column += 2;
- free(tokens.data);
- return LERR_INVALID_CHAR_LITERAL;
- break;
- }
-
- t = (token_t){.type = TOKEN_LITERAL_CHAR,
- .str = malloc(2),
- .str_size = 1,
- .column = column};
- column += 2;
- buffer->used += 4;
- t.str[0] = escape;
- t.str[1] = '\0';
- }
- else
- t = tokenise_char_literal(buffer, &column);
- }
-
- if (is_token)
- {
- t.line = line;
- darr_append_bytes(&tokens, (byte *)&t, sizeof(t));
- }
- }
- tokens.available = tokens.used / sizeof(token_t);
- tokens.used = 0;
- *tokens_ptr = tokens;
- return LERR_OK;
-}
diff --git a/asm/lexer.h b/asm/lexer.h
deleted file mode 100644
index 734ffa1..0000000
--- a/asm/lexer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Lexer for assembly language
- */
-
-#ifndef LEXER_H
-#define LEXER_H
-
-#include <lib/darr.h>
-
-typedef enum TokenType
-{
- TOKEN_PP_CONST, // %const(<symbol>)...
- TOKEN_PP_USE, // %use <string>
- TOKEN_PP_END, // %end
- TOKEN_PP_REFERENCE, // $<symbol>
- TOKEN_GLOBAL,
- TOKEN_STAR,
- TOKEN_LITERAL_NUMBER,
- TOKEN_LITERAL_CHAR,
- TOKEN_LITERAL_STRING,
- TOKEN_NOOP,
- TOKEN_HALT,
- TOKEN_PUSH,
- TOKEN_POP,
- TOKEN_PUSH_REG,
- TOKEN_MOV,
- TOKEN_DUP,
- TOKEN_MALLOC,
- TOKEN_MALLOC_STACK,
- TOKEN_MSET,
- TOKEN_MSET_STACK,
- TOKEN_MGET,
- TOKEN_MGET_STACK,
- TOKEN_MDELETE,
- TOKEN_MSIZE,
- TOKEN_NOT,
- TOKEN_OR,
- TOKEN_AND,
- TOKEN_XOR,
- TOKEN_EQ,
- TOKEN_LT,
- TOKEN_LTE,
- TOKEN_GT,
- TOKEN_GTE,
- TOKEN_PLUS,
- TOKEN_SUB,
- TOKEN_MULT,
- TOKEN_PRINT,
- TOKEN_JUMP_ABS,
- TOKEN_JUMP_STACK,
- TOKEN_JUMP_IF,
- TOKEN_CALL,
- TOKEN_CALL_STACK,
- TOKEN_RET,
- TOKEN_SYMBOL,
-} token_type_t;
-
-typedef struct
-{
- token_type_t type;
- size_t column, line;
- char *str;
- size_t str_size;
-} token_t;
-
-token_t token_copy(token_t);
-
-typedef enum
-{
- LERR_OK = 0,
- LERR_INVALID_CHAR_LITERAL,
- LERR_INVALID_PREPROCESSOR_DIRECTIVE,
-} lerr_t;
-const char *lerr_as_cstr(lerr_t);
-
-typedef darr_t buffer_t;
-typedef darr_t token_stream_t;
-#define TOKEN_STREAM_AT(STREAM_DATA, INDEX) (((token_t *)(STREAM_DATA))[INDEX])
-
-const char *token_type_as_cstr(token_type_t type);
-lerr_t tokenise_buffer(buffer_t *, token_stream_t *);
-
-#endif
diff --git a/asm/main.c b/asm/main.c
deleted file mode 100644
index 32b8187..0000000
--- a/asm/main.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-23
- * Author: Aryadev Chavali
- * Description: Assembly source code compiler, targeting OVM
- */
-
-#include <lib/darr.h>
-
-#include "./lexer.h"
-#include "./parser.h"
-
-void usage(const char *program_name, FILE *fp)
-{
- fprintf(fp,
- "Usage: %s FILE OUT-FILE\n"
- "\tFILE: Source code to compile\n"
- "\tOUT-FILE: Name of file to store bytecode\n",
- program_name);
-}
-
-int main(int argc, char *argv[])
-{
- int ret = 0;
- char *source_file = "";
- char *out_file = "";
- if (argc < 3)
- {
- usage(argv[0], stderr);
- return 1;
- }
-
- source_file = argv[1];
- out_file = argv[2];
-
-#if VERBOSE >= 1
- printf("[%sASSEMBLER%s]: Assembling `%s` to `%s`\n", TERM_YELLOW, TERM_RESET,
- source_file, out_file);
-#endif
- FILE *fp = fopen(source_file, "rb");
- darr_t buffer = darr_read_file(fp);
- fclose(fp);
-
-#if VERBOSE >= 1
- printf("[%sASSEMBLER%s]: Read `%s` -> %lu bytes\n", TERM_YELLOW, TERM_RESET,
- source_file, buffer.available);
-#endif
-
- token_stream_t tokens = {0};
- lerr_t lex_error = tokenise_buffer(&buffer, &tokens);
- if (lex_error)
- {
- // Compute the line/newlines by hand
- size_t column = 0, line = 1;
- for (size_t i = 0; i < buffer.used; ++i)
- {
- if (buffer.data[i] == '\n')
- {
- column = 0;
- ++line;
- }
- else
- ++column;
- }
- fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column,
- lerr_as_cstr(lex_error));
- ret = 255 - lex_error;
- goto end;
- }
-#if VERBOSE >= 1
- printf("[%sTOKENISER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET,
- buffer.available, tokens.available);
-#endif
-
-#if VERBOSE >= 2
- printf("[%sTOKENISER%s]: Tokens parsed:\n", TERM_GREEN, TERM_RESET);
- for (size_t i = 0; i < tokens.available; ++i)
- {
- token_t token = TOKEN_STREAM_AT(tokens.data, i);
- printf("\t[%lu]: %s(`%s`)@%lu,%lu\n", i, token_type_as_cstr(token.type),
- token.str, token.line, token.column);
- }
-#endif
-
- free(buffer.data);
- buffer.data = NULL;
-
-#if VERBOSE >= 2
- printf("[%sPARSER%s]: Beginning parse...\n", TERM_YELLOW, TERM_RESET);
-#endif
- prog_t *program = NULL;
- perr_t parse_error = parse_stream(&tokens, &program);
- if (parse_error)
- {
- size_t column = 0;
- size_t line = 0;
- if (tokens.used < tokens.available)
- {
- token_t t = TOKEN_STREAM_AT(tokens.data, tokens.used);
- column = t.column;
- line = t.line;
- }
- fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column,
- perr_as_cstr(parse_error));
- ret = 255 - parse_error;
- goto end;
- }
-#if VERBOSE >= 1
- printf("[%sPARSER%s]: %lu tokens -> %lu instructions\n", TERM_GREEN,
- TERM_RESET, tokens.available, program->count);
-#endif
-
-#if VERBOSE >= 2
- printf("[%sPARSER%s]: Program parsed(COUNT=%lu, START=%lu):\n", TERM_GREEN,
- TERM_RESET, program->count, program->header.start_address);
- for (size_t i = 0; i < program->count; ++i)
- {
- printf("\t[%lu]: ", i);
- inst_print(program->instructions[i], stdout);
- printf("\n");
- }
-#endif
-
- fp = fopen(out_file, "wb");
- prog_write_file(program, fp);
- fclose(fp);
-#if VERBOSE >= 1
- printf("[%sASSEMBLER%s]: Wrote bytecode to `%s`\n", TERM_GREEN, TERM_RESET,
- out_file);
-#endif
-end:
- if (buffer.data)
- free(buffer.data);
- if (tokens.data)
- {
- for (size_t i = 0; i < tokens.available; ++i)
- free(TOKEN_STREAM_AT(tokens.data, i).str);
- free(tokens.data);
- }
- if (program)
- free(program);
- return ret;
-}
diff --git a/asm/main.cpp b/asm/main.cpp
new file mode 100644
index 0000000..1ad17b1
--- /dev/null
+++ b/asm/main.cpp
@@ -0,0 +1,19 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license. You should have received a copy of the GPLv2
+ * license with this file. If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Entrypoint for assembly program
+ */
+
+#include <iostream>
+
+int main(void)
+{
+ std::cout << "Hello, world!" << std::endl;
+ return 0;
+}
diff --git a/asm/parser.c b/asm/parser.c
deleted file mode 100644
index d326d14..0000000
--- a/asm/parser.c
+++ /dev/null
@@ -1,935 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Parser for assembly language
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <stdbool.h>
-#include <string.h>
-
-#include "./parser.h"
-
-#define OPCODE_ON_TYPE(BASE_CODE, TYPE)
-
-const char *perr_as_cstr(perr_t perr)
-{
- switch (perr)
- {
- case PERR_OK:
- return "OK";
- case PERR_INTEGER_OVERFLOW:
- return "INTEGER_OVERFLOW";
- case PERR_NOT_A_NUMBER:
- return "NOT_A_NUMBER";
- case PERR_EXPECTED_TYPE:
- return "EXPECTED_TYPE";
- case PERR_EXPECTED_UTYPE:
- return "EXPECTED_UTYPE";
- case PERR_EXPECTED_SYMBOL:
- return "EXPECTED_SYMBOL";
- case PERR_EXPECTED_LABEL:
- return "EXPECTED_LABEL";
- case PERR_EXPECTED_OPERAND:
- return "EXPECTED_OPERAND";
- case PERR_PREPROCESSOR_EXPECTED_STRING:
- return "PREPROCESSOR_EXPECTED_STRING";
- case PERR_PREPROCESSOR_FILE_NONEXISTENT:
- return "PREPROCESSOR_FILE_NONEXISTENT";
- case PERR_PREPROCESSOR_FILE_PARSE_ERROR:
- return "PREPROCESSOR_FILE_PARSE_ERROR";
- case PERR_PREPROCESSOR_EXPECTED_END:
- return "PREPROCESSOR_EXPECTED_END";
- case PERR_PREPROCESSOR_EXPECTED_NAME:
- return "PREPROCESSOR_EXPECTED_NAME";
- case PERR_PREPROCESSOR_UNKNOWN_NAME:
- return "PREPROCESSOR_UNKNOWN_NAME";
- case PERR_INVALID_RELATIVE_ADDRESS:
- return "INVALID_RELATIVE_ADDRESS";
- case PERR_UNKNOWN_LABEL:
- return "UNKNOWN_LABEL";
- case PERR_UNKNOWN_OPERATOR:
- return "UNKNOWN_OPERATOR";
- default:
- return "";
- }
-}
-
-presult_t presult_label(size_t stream_index, const char *name, size_t size,
- s_word addr)
-{
- presult_t res = {.stream_index = stream_index,
- .address = addr,
- .label = {.name = malloc(size + 1), .size = size}};
- memcpy(res.label.name, name, size);
- res.label.name[size] = '\0';
- return res;
-}
-
-presult_t presult_label_ref(size_t stream_index, inst_t base, const char *label,
- size_t size)
-{
- presult_t pres = presult_label(stream_index, label, size, 0);
- pres.instruction = base;
- pres.type = PRES_LABEL_ADDRESS;
- return pres;
-}
-
-presult_t presult_instruction(size_t stream_index, inst_t inst)
-{
- return (presult_t){.stream_index = stream_index,
- .instruction = inst,
- .type = PRES_COMPLETE_RESULT};
-}
-
-presult_t presult_relative(size_t stream_index, inst_t inst, s_word addr)
-{
- return (presult_t){.stream_index = stream_index,
- .instruction = inst,
- .address = addr,
- .type = PRES_RELATIVE_ADDRESS};
-}
-
-presult_t presult_global(size_t stream_index, const char *name, size_t size,
- s_word addr)
-{
- presult_t res = presult_label(stream_index, name, size, addr);
- res.type = PRES_GLOBAL_LABEL;
- return res;
-}
-
-void presult_free(presult_t res)
-{
- switch (res.type)
- {
- case PRES_LABEL_ADDRESS:
- case PRES_GLOBAL_LABEL:
- case PRES_LABEL:
- free(res.label.name);
- break;
- case PRES_RELATIVE_ADDRESS:
- case PRES_COMPLETE_RESULT:
- break;
- }
-}
-
-void presults_free(presult_t *ptr, size_t number)
-{
- for (size_t i = 0; i < number; ++i)
- presult_free(ptr[i]);
-}
-
-perr_t parse_word(token_t token, word *ret)
-{
- if (token.type == TOKEN_LITERAL_NUMBER)
- {
- bool is_negative = token.str_size > 1 && token.str[0] == '-';
- word w = 0;
- if (is_negative)
- {
- char *end = NULL;
- s_word i = strtoll(token.str, &end, 0);
- if (!(end && end[0] == '\0'))
- return PERR_NOT_A_NUMBER;
- else if (errno == ERANGE)
- {
- errno = 0;
- return PERR_INTEGER_OVERFLOW;
- }
- // Copy bits, do not cast
- memcpy(&w, &i, sizeof(w));
- }
- else
- {
- char *end = NULL;
- w = strtoull(token.str, &end, 0);
- if (!(end && end[0] == '\0'))
- return PERR_NOT_A_NUMBER;
- else if (errno == ERANGE)
- {
- errno = 0;
- return PERR_INTEGER_OVERFLOW;
- }
- }
- *ret = w;
- return PERR_OK;
- }
- else if (token.type == TOKEN_LITERAL_CHAR)
- {
- *ret = token.str[0];
- return PERR_OK;
- }
- else
- return PERR_NOT_A_NUMBER;
-}
-
-perr_t parse_sword(token_t token, i64 *ret)
-{
- if (token.type == TOKEN_LITERAL_NUMBER)
- {
- char *end = NULL;
- s_word i = strtoll(token.str, &end, 0);
- if (!(end && end[0] == '\0'))
- return PERR_NOT_A_NUMBER;
- else if (errno == ERANGE)
- {
- errno = 0;
- return PERR_INTEGER_OVERFLOW;
- }
- *ret = i;
- return PERR_OK;
- }
- else if (token.type == TOKEN_LITERAL_CHAR)
- {
- *ret = token.str[0];
- return PERR_OK;
- }
- else
- return PERR_NOT_A_NUMBER;
-}
-
-perr_t parse_word_label_or_relative(token_stream_t *stream, presult_t *res)
-{
- token_t token = TOKEN_STREAM_AT(stream->data, stream->used);
- if (token.type == TOKEN_SYMBOL)
- {
- *res = presult_label_ref(stream->used, res->instruction, token.str,
- token.str_size);
- return PERR_OK;
- }
- else if (token.type == TOKEN_LITERAL_CHAR ||
- token.type == TOKEN_LITERAL_NUMBER)
- {
- res->type = PRES_COMPLETE_RESULT;
- return parse_word(token, &res->instruction.operand.as_word);
- }
- else if (token.type == TOKEN_STAR)
- {
- if (stream->used + 1 >= stream->available)
- return PERR_EXPECTED_OPERAND;
- res->type = PRES_RELATIVE_ADDRESS;
- ++stream->used;
- return parse_sword(TOKEN_STREAM_AT(stream->data, stream->used),
- &res->address);
- }
- return PERR_EXPECTED_OPERAND;
-}
-
-enum Type
-{
- T_NIL = -1,
- T_BYTE,
- T_CHAR,
- T_HWORD,
- T_INT,
- T_LONG,
- T_WORD,
-} parse_details_to_type(token_t details)
-{
- if (details.str_size == 4 && strncmp(details.str, "BYTE", 4) == 0)
- return T_BYTE;
- else if (details.str_size == 4 && strncmp(details.str, "CHAR", 4) == 0)
- return T_CHAR;
- else if (details.str_size == 5 && strncmp(details.str, "HWORD", 5) == 0)
- return T_HWORD;
- else if (details.str_size == 3 && strncmp(details.str, "INT", 3) == 0)
- return T_INT;
- else if (details.str_size == 4 && strncmp(details.str, "LONG", 4) == 0)
- return T_LONG;
- else if (details.str_size == 4 && strncmp(details.str, "WORD", 4) == 0)
- return T_WORD;
- else
- return T_NIL;
-}
-
-enum UType
-{
- U_NIL = -1,
- U_BYTE,
- U_HWORD,
- U_WORD,
-} convert_type_to_utype(enum Type type)
-{
- if (type == T_CHAR || type == T_INT || type == T_LONG)
- return U_NIL;
- switch (type)
- {
- case T_NIL:
- case T_LONG:
- case T_INT:
- case T_CHAR:
- return U_NIL;
- case T_BYTE:
- return U_BYTE;
- case T_HWORD:
- return U_HWORD;
- case T_WORD:
- return U_WORD;
- }
- return 0;
-}
-
-perr_t parse_utype_inst(token_stream_t *stream, inst_t *ret)
-{
- if (stream->used + 1 > stream->available)
- return PERR_EXPECTED_OPERAND;
- enum UType type = convert_type_to_utype(
- parse_details_to_type(TOKEN_STREAM_AT(stream->data, stream->used)));
- if (type == U_NIL)
- return PERR_EXPECTED_UTYPE;
- ret->opcode += type;
- return PERR_OK;
-}
-
-perr_t parse_type_inst(token_stream_t *stream, inst_t *ret)
-{
- if (stream->used + 1 > stream->available)
- return PERR_EXPECTED_OPERAND;
- enum Type type =
- parse_details_to_type(TOKEN_STREAM_AT(stream->data, stream->used));
- if (type == T_NIL)
- return PERR_EXPECTED_TYPE;
- ret->opcode += type;
- return PERR_OK;
-}
-
-perr_t parse_utype_inst_with_operand(token_stream_t *stream, inst_t *ret)
-{
- perr_t inst_err = parse_utype_inst(stream, ret);
- if (inst_err)
- return inst_err;
- ++stream->used;
- perr_t word_err = parse_word(TOKEN_STREAM_AT(stream->data, stream->used),
- &ret->operand.as_word);
- if (word_err)
- return word_err;
- return PERR_OK;
-}
-
-perr_t parse_jump_inst_operand(token_stream_t *stream, presult_t *res)
-{
- perr_t inst_err = parse_utype_inst(stream, &res->instruction);
-
- if (inst_err)
- return inst_err;
- ++stream->used;
- perr_t op_err = parse_word_label_or_relative(stream, res);
- if (op_err)
- return op_err;
- return PERR_OK;
-}
-
-perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret)
-{
- perr_t inst_err = parse_type_inst(stream, ret);
- if (inst_err)
- return inst_err;
- ++stream->used;
- perr_t word_err = parse_word(TOKEN_STREAM_AT(stream->data, stream->used),
- &ret->operand.as_word);
- if (word_err)
- return word_err;
- return PERR_OK;
-}
-
-label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size)
-{
- for (size_t i = 0; i < n; ++i)
- {
- label_t label = labels[i];
- if (label.name_size == name_size &&
- strncmp(label.name, name, name_size) == 0)
- return label;
- }
-
- return (label_t){0};
-}
-
-block_t search_blocks(block_t *blocks, size_t n, char *name, size_t name_size)
-{
- for (size_t i = 0; i < n; ++i)
- {
- block_t block = blocks[i];
- if (block.name_size == name_size &&
- strncmp(block.name, name, name_size) == 0)
- return block;
- }
-
- return (block_t){0};
-}
-
-perr_t preprocess_use_blocks(token_stream_t *stream, token_stream_t *new)
-{
- token_stream_t new_stream = {0};
- darr_init(&new_stream, sizeof(token_t));
- // %USE <STRING FILENAME> -> #TOKENS_IN(FILENAME)
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t t = DARR_AT(token_t, stream->data, i);
- if (t.type == TOKEN_PP_USE)
- {
- if (i + 1 >= stream->available ||
- DARR_AT(token_t, stream->data, i + 1).type != TOKEN_LITERAL_STRING)
- {
- stream->used = i + 1 >= stream->available ? i : i + 1;
- for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i)
- free(TOKEN_STREAM_AT(new_stream.data, i).str);
- free(new_stream.data);
- return PERR_PREPROCESSOR_EXPECTED_STRING;
- }
- // Load and tokenise another file
- ++i;
- t = DARR_AT(token_t, stream->data, i);
- FILE *fp = fopen(t.str, "rb");
- if (!fp)
- {
- for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i)
- free(TOKEN_STREAM_AT(new_stream.data, i).str);
- free(new_stream.data);
- stream->used = i;
- return PERR_PREPROCESSOR_FILE_NONEXISTENT;
- }
- buffer_t buffer = darr_read_file(fp);
- fclose(fp);
-
- token_stream_t fstream = {0};
- lerr_t lerr = tokenise_buffer(&buffer, &fstream);
- free(buffer.data);
- if (lerr)
- {
- if (fstream.data)
- {
- for (size_t i = 0; i < fstream.available; ++i)
- free(TOKEN_STREAM_AT(fstream.data, i).str);
- free(fstream.data);
- }
- for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i)
- free(TOKEN_STREAM_AT(new_stream.data, i).str);
- free(new_stream.data);
- stream->used = i;
- return PERR_PREPROCESSOR_FILE_PARSE_ERROR;
- }
- darr_append_bytes(&new_stream, fstream.data,
- sizeof(token_t) * fstream.available);
- free(fstream.data);
- }
- else
- {
- token_t copy = token_copy(t);
- darr_append_bytes(&new_stream, (byte *)&copy, sizeof(copy));
- }
- }
-
- new_stream.available = new_stream.used / sizeof(token_t);
- new_stream.used = 0;
- *new = new_stream;
-
- return PERR_OK;
-}
-
-perr_t preprocess_macro_blocks(token_stream_t *stream, token_stream_t *new)
-{
- darr_t block_registry = {0};
- darr_init(&block_registry, sizeof(block_t));
-
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t t = DARR_AT(token_t, stream->data, i);
- if (t.type == TOKEN_PP_CONST)
- {
- char *sym = t.str;
- size_t start = strcspn(sym, "(");
- size_t end = strcspn(sym, ")");
- if (end == t.str_size || start == t.str_size || start == end + 1)
- {
- free(block_registry.data);
- return PERR_PREPROCESSOR_EXPECTED_NAME;
- }
- block_t block = {.name = sym + start + 1, .name_size = end - start - 1};
- ++i;
- size_t prev = i;
- token_t t = {0};
- for (t = DARR_AT(token_t, stream->data, i);
- i < stream->available && t.type != TOKEN_PP_END;
- ++i, t = DARR_AT(token_t, stream->data, i))
- continue;
- if (t.type != TOKEN_PP_END)
- {
- stream->used = i;
- free(block_registry.data);
- return PERR_PREPROCESSOR_EXPECTED_END;
- }
-
- // Set the block's token DARR by hand
- block.code.data = stream->data + (prev * sizeof(token_t));
- block.code.available = i - prev;
- block.code.used = block.code.available;
- darr_append_bytes(&block_registry, (byte *)&block, sizeof(block));
- }
- }
-
- if (block_registry.used == 0)
- {
- // Nothing to preprocess so just copywholesale
- free(block_registry.data);
- *new = (token_stream_t){0};
- darr_init(new, sizeof(token_t));
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t token = DARR_AT(token_t, stream->data, i);
- token_t copy = token_copy(token);
- darr_append_bytes(new, (byte *)&copy, sizeof(copy));
- }
- new->available = new->used / sizeof(token_t);
- new->used = 0;
- return PERR_OK;
- }
-
- // Stream with blocks now inlined
- token_stream_t new_stream = {0};
- darr_init(&new_stream, sizeof(token_t));
- for (size_t i = 0; i < stream->available; ++i)
- {
- token_t t = DARR_AT(token_t, stream->data, i);
- if (t.type == TOKEN_PP_CONST)
- {
- // Skip till after end
- for (; i < stream->available && t.type != TOKEN_PP_END;
- ++i, t = DARR_AT(token_t, stream->data, i))
- continue;
- }
- else if (t.type == TOKEN_PP_REFERENCE)
- {
- // Find the reference in the block registry
- block_t block = search_blocks((block_t *)block_registry.data,
- block_registry.used, t.str, t.str_size);
- if (!block.name)
- {
- free(new_stream.data);
- free(block_registry.data);
- stream->used = i;
- return PERR_PREPROCESSOR_UNKNOWN_NAME;
- }
-
- // Inline the block found
- for (size_t j = 0; j < block.code.used; j++)
- {
- token_t b_token = DARR_AT(token_t, block.code.data, j);
- token_t copy = token_copy(b_token);
- darr_append_bytes(&new_stream, (byte *)&copy, sizeof(token_t));
- }
- }
- else
- {
- // Insert into stream as is
- token_t copy = token_copy(t);
- darr_append_bytes(&new_stream, (byte *)&copy, sizeof(copy));
- }
- }
-
- // Free block registry
- free(block_registry.data);
-
- new_stream.available = new_stream.used / sizeof(token_t);
- new_stream.used = 0;
- *new = new_stream;
-
- return PERR_OK;
-}
-
-perr_t preprocessor(token_stream_t *stream)
-{
- token_stream_t use_blocks = {0};
- perr_t perr = preprocess_use_blocks(stream, &use_blocks);
- if (perr)
- return perr;
-
- token_stream_t macro_blocks = {0};
- perr = preprocess_macro_blocks(&use_blocks, &macro_blocks);
- if (perr)
- {
- stream->used = use_blocks.used;
- for (size_t i = 0; i < use_blocks.available; ++i)
- free(TOKEN_STREAM_AT(use_blocks.data, i).str);
- free(use_blocks.data);
- return perr;
- }
-
- for (size_t i = 0; i < use_blocks.available; ++i)
- free(TOKEN_STREAM_AT(use_blocks.data, i).str);
- free(use_blocks.data);
-
- for (size_t i = 0; i < stream->available; ++i)
- free(TOKEN_STREAM_AT(stream->data, i).str);
- free(stream->data);
-
- *stream = macro_blocks;
-
- return PERR_OK;
-}
-
-perr_t parse_next(token_stream_t *stream, presult_t *ret)
-{
- token_t token = TOKEN_STREAM_AT(stream->data, stream->used);
- perr_t perr = PERR_OK;
- switch (token.type)
- {
- case TOKEN_LITERAL_STRING:
- case TOKEN_PP_CONST:
- case TOKEN_PP_USE:
- case TOKEN_PP_REFERENCE:
- case TOKEN_PP_END:
- case TOKEN_LITERAL_NUMBER:
- case TOKEN_LITERAL_CHAR:
- return PERR_EXPECTED_SYMBOL;
- case TOKEN_GLOBAL: {
- if (stream->used + 1 >= stream->available ||
- TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL)
- return PERR_EXPECTED_LABEL;
- ++stream->used;
- token_t label = TOKEN_STREAM_AT(stream->data, stream->used);
- *ret = presult_global(stream->used, label.str, label.str_size, 0);
- return PERR_OK;
- }
- case TOKEN_NOOP:
- *ret = presult_instruction(stream->used, INST_NOOP);
- ret->type = PRES_COMPLETE_RESULT;
- break;
- case TOKEN_HALT:
- *ret = presult_instruction(stream->used, INST_HALT);
- ret->type = PRES_COMPLETE_RESULT;
- break;
- case TOKEN_PUSH:
- *ret = presult_instruction(stream->used, INST_PUSH(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_POP:
- *ret = presult_instruction(stream->used, INST_POP(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_PUSH_REG:
- *ret = presult_instruction(stream->used, INST_PUSH_REG(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MOV:
- *ret = presult_instruction(stream->used, INST_MOV(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_DUP:
- *ret = presult_instruction(stream->used, INST_DUP(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MALLOC:
- *ret = presult_instruction(stream->used, INST_MALLOC(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MSET:
- *ret = presult_instruction(stream->used, INST_MSET(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MGET:
- *ret = presult_instruction(stream->used, INST_MGET(BYTE, 0));
- perr = parse_utype_inst_with_operand(stream, &ret->instruction);
- break;
- case TOKEN_MALLOC_STACK:
- *ret = presult_instruction(stream->used, INST_MALLOC_STACK(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MSET_STACK:
- *ret = presult_instruction(stream->used, INST_MSET_STACK(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MGET_STACK:
- *ret = presult_instruction(stream->used, INST_MGET_STACK(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MDELETE:
- *ret = presult_instruction(stream->used, INST_MDELETE);
- break;
- case TOKEN_MSIZE:
- *ret = presult_instruction(stream->used, INST_MSIZE);
- break;
- case TOKEN_NOT:
- *ret = presult_instruction(stream->used, INST_NOT(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_OR:
- *ret = presult_instruction(stream->used, INST_OR(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_AND:
- *ret = presult_instruction(stream->used, INST_AND(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_XOR:
- *ret = presult_instruction(stream->used, INST_XOR(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_EQ:
- *ret = presult_instruction(stream->used, INST_EQ(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_LT:
- *ret = presult_instruction(stream->used, INST_LT(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_LTE:
- *ret = presult_instruction(stream->used, INST_LTE(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_GT:
- *ret = presult_instruction(stream->used, INST_GT(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_GTE:
- *ret = presult_instruction(stream->used, INST_GTE(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_PLUS:
- *ret = presult_instruction(stream->used, INST_PLUS(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_SUB:
- *ret = presult_instruction(stream->used, INST_SUB(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_MULT:
- *ret = presult_instruction(stream->used, INST_MULT(BYTE));
- perr = parse_utype_inst(stream, &ret->instruction);
- break;
- case TOKEN_PRINT:
- *ret = presult_instruction(stream->used, INST_PRINT(BYTE));
- perr = parse_type_inst(stream, &ret->instruction);
- break;
- case TOKEN_JUMP_ABS:
- *ret = presult_instruction(stream->used, INST_JUMP_ABS(0));
- ++stream->used;
- if (stream->used >= stream->available)
- return PERR_EXPECTED_OPERAND;
- return parse_word_label_or_relative(stream, ret);
- case TOKEN_JUMP_STACK:
- *ret = presult_instruction(stream->used, INST_JUMP_STACK);
- break;
- case TOKEN_JUMP_IF: {
- *ret = presult_instruction(stream->used, INST_JUMP_IF(BYTE, 0));
- return parse_jump_inst_operand(stream, ret);
- }
- case TOKEN_CALL:
- *ret = presult_instruction(stream->used, INST_CALL(0));
- ++stream->used;
- if (stream->used >= stream->available)
- return PERR_EXPECTED_OPERAND;
- return parse_word_label_or_relative(stream, ret);
- case TOKEN_CALL_STACK:
- *ret = presult_instruction(stream->used, INST_CALL_STACK);
- break;
- case TOKEN_RET:
- *ret = presult_instruction(stream->used, INST_RET);
- break;
- case TOKEN_SYMBOL: {
- size_t label_size = strcspn(token.str, ":");
- if (label_size == token.str_size)
- return PERR_UNKNOWN_OPERATOR;
- else if (label_size != token.str_size - 1)
- return PERR_EXPECTED_LABEL;
- *ret = presult_label(stream->used, token.str, label_size, 0);
- break;
- }
- case TOKEN_STAR:
- default:
- return PERR_UNKNOWN_OPERATOR;
- }
- return perr;
-}
-
-perr_t process_presults(presult_t *results, size_t res_count,
- size_t *result_reached, prog_t **program_ptr)
-{
- assert(result_reached && "process_presults: result_reached is NULL?!");
- *result_reached = 0;
- label_t start_label = {0};
-
- darr_t label_registry = {0};
- darr_init(&label_registry, sizeof(label_t));
- word inst_count = 0;
- for (size_t i = 0; i < res_count; ++i)
- {
- presult_t res = results[i];
- switch (res.type)
- {
- case PRES_LABEL: {
- label_t label = {.name = res.label.name,
- .name_size = res.label.size,
- .addr = inst_count};
- darr_append_bytes(&label_registry, (byte *)&label, sizeof(label));
- break;
- }
- case PRES_RELATIVE_ADDRESS: {
- s_word offset = res.address;
- if (offset < 0 && ((word)(-offset)) > inst_count)
- {
- free(label_registry.data);
- *result_reached = i;
- return PERR_INVALID_RELATIVE_ADDRESS;
- }
- results[i].instruction.operand.as_word = ((s_word)inst_count) + offset;
- inst_count++;
- break;
- }
- case PRES_GLOBAL_LABEL: {
- start_label = (label_t){.name = res.label.name,
- .name_size = res.label.size,
- .addr = (word)inst_count};
- break;
- }
- case PRES_LABEL_ADDRESS:
- case PRES_COMPLETE_RESULT:
- inst_count++;
- break;
- default:
- break;
- }
- }
-
- darr_t instr_darr = {0};
- darr_init(&instr_darr, sizeof(inst_t));
-
- prog_header_t header = {0};
- if (start_label.name_size > 0)
- {
- label_t label = search_labels((label_t *)label_registry.data,
- label_registry.used / sizeof(label_t),
- start_label.name, start_label.name_size);
- if (!label.name)
- {
- free(instr_darr.data);
- free(label_registry.data);
- return PERR_UNKNOWN_LABEL;
- }
- header.start_address = label.addr;
- }
-
- for (size_t i = 0; i < res_count; ++i)
- {
- presult_t res = results[i];
- switch (res.type)
- {
- case PRES_LABEL_ADDRESS: {
- inst_t inst = {0};
- label_t label = search_labels((label_t *)label_registry.data,
- label_registry.used / sizeof(label_t),
- res.label.name, res.label.size);
-
- if (!label.name)
- {
- free(instr_darr.data);
- free(label_registry.data);
- *result_reached = i;
- return PERR_UNKNOWN_LABEL;
- }
-
- inst.opcode = res.instruction.opcode;
- inst.operand = DWORD(label.addr);
- darr_append_bytes(&instr_darr, (byte *)&inst, sizeof(inst));
- break;
- }
- case PRES_RELATIVE_ADDRESS:
- case PRES_COMPLETE_RESULT: {
- darr_append_bytes(&instr_darr, (byte *)&res.instruction,
- sizeof(res.instruction));
- }
- case PRES_GLOBAL_LABEL:
- case PRES_LABEL:
- break;
- }
- }
-
- free(label_registry.data);
- prog_t *program =
- malloc(sizeof(**program_ptr) + (sizeof(inst_t) * inst_count));
- program->header = header;
- program->count = inst_count;
- memcpy(program->instructions, instr_darr.data, instr_darr.used);
- free(instr_darr.data);
- *program_ptr = program;
- return PERR_OK;
-}
-
-perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
-{
- // Preprocessor
- perr_t perr = preprocessor(stream);
- if (perr)
- return perr;
- darr_t presults = {0};
- darr_init(&presults, sizeof(presult_t));
- while (stream->used < stream->available)
- {
- presult_t pres = {0};
- perr_t err = parse_next(stream, &pres);
- if (err)
- {
- presults_free((presult_t *)presults.data,
- presults.used / sizeof(presult_t));
- free(presults.data);
- return err;
- }
- darr_append_bytes(&presults, (byte *)&pres, sizeof(presult_t));
- ++stream->used;
- }
-
- presults.available = presults.used / sizeof(presult_t);
- presults.used = 0;
-
-#if VERBOSE >= 2
- printf("[%sPARSER%s]: %lu tokens -> %lu parse units\n", TERM_YELLOW,
- TERM_RESET, stream->available, presults.available);
- for (size_t i = 0; i < presults.available; ++i)
- {
- presult_t pres = DARR_AT(presult_t, presults.data, i);
- switch (pres.type)
- {
- case PRES_LABEL:
- printf("\tLABEL: label=%s\n", pres.label.name);
- break;
- case PRES_LABEL_ADDRESS:
- printf("\tLABEL_CALL: label=%s, inst=", pres.label.name);
- inst_print(pres.instruction, stdout);
- printf("\n");
- break;
- case PRES_RELATIVE_ADDRESS:
- printf("\tRELATIVE_CALL: addr=%ld, inst=", pres.address);
- inst_print(pres.instruction, stdout);
- printf("\n");
- break;
- case PRES_GLOBAL_LABEL:
- printf("\tSET_GLOBAL_START: name=%s\n", pres.label.name);
- break;
- case PRES_COMPLETE_RESULT:
- printf("\tCOMPLETE: inst=");
- inst_print(pres.instruction, stdout);
- printf("\n");
- break;
- }
- }
-#endif
-
- size_t results_processed = 0;
- perr = process_presults((presult_t *)presults.data, presults.available,
- &results_processed, program_ptr);
- if (results_processed != presults.available)
- {
- presult_t pres = DARR_AT(presult_t, presults.data, results_processed);
- stream->used = pres.stream_index;
- }
- presults_free((presult_t *)presults.data, presults.available);
- free(presults.data);
- return perr;
-}
diff --git a/asm/parser.h b/asm/parser.h
deleted file mode 100644
index 7e2d1b7..0000000
--- a/asm/parser.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Copyright (C) 2023 Aryadev Chavali
-
- * You may distribute and modify this code under the terms of the
- * GPLv2 license. You should have received a copy of the GPLv2
- * license with this file. If not, please write to:
- * aryadev@aryadevchavali.com.
-
- * Created: 2023-10-24
- * Author: Aryadev Chavali
- * Description: Parser for assembly language
- */
-
-#ifndef PARSER_H
-#define PARSER_H
-
-#include "./lexer.h"
-
-#include <lib/inst.h>
-
-typedef enum
-{
- PERR_OK = 0,
- PERR_INTEGER_OVERFLOW,
- PERR_NOT_A_NUMBER,
- PERR_EXPECTED_UTYPE,
- PERR_EXPECTED_TYPE,
- PERR_EXPECTED_SYMBOL,
- PERR_EXPECTED_LABEL,
- PERR_EXPECTED_OPERAND,
- PERR_PREPROCESSOR_EXPECTED_NAME,
- PERR_PREPROCESSOR_EXPECTED_STRING,
- PERR_PREPROCESSOR_EXPECTED_END,
- PERR_PREPROCESSOR_FILE_NONEXISTENT,
- PERR_PREPROCESSOR_FILE_PARSE_ERROR,
- PERR_PREPROCESSOR_UNKNOWN_NAME,
- PERR_INVALID_RELATIVE_ADDRESS,
- PERR_UNKNOWN_OPERATOR,
- PERR_UNKNOWN_LABEL,
-} perr_t;
-
-const char *perr_as_cstr(perr_t);
-
-typedef struct
-{
- size_t stream_index;
- inst_t instruction;
- s_word address;
- struct PLabel
- {
- char *name;
- size_t size;
- } label;
- enum PResult_Type
- {
- PRES_LABEL = 0,
- PRES_LABEL_ADDRESS,
- PRES_GLOBAL_LABEL,
- PRES_RELATIVE_ADDRESS,
- PRES_COMPLETE_RESULT,
- } type;
-} presult_t;
-
-presult_t presult_label(size_t, const char *, size_t, s_word);
-presult_t presult_label_ref(size_t, inst_t, const char *, size_t);
-presult_t presult_instruction(size_t, inst_t);
-presult_t presult_relative(size_t, inst_t, s_word);
-presult_t presult_global(size_t, const char *, size_t, s_word);
-void presult_free(presult_t);
-void presults_free(presult_t *, size_t);
-
-typedef struct
-{
- char *name;
- size_t name_size;
- word addr;
-} label_t;
-
-label_t search_labels(label_t *, size_t, char *, size_t);
-
-typedef struct
-{
- char *name;
- size_t name_size;
- darr_t code;
-} block_t;
-
-block_t search_blocks(block_t *, size_t, char *, size_t);
-
-perr_t preprocess_use_blocks(token_stream_t *, token_stream_t *);
-perr_t preprocess_macro_blocks(token_stream_t *, token_stream_t *);
-// Analyses then inlines corresponding tokens into stream directly
-perr_t preprocessor(token_stream_t *);
-
-// Parses the next "parse result" from stream
-perr_t parse_next(token_stream_t *, presult_t *);
-// Constructs a program from the set of parse results (from repeatedly
-// calling parse_next)
-perr_t process_presults(presult_t *, size_t, size_t *, prog_t **);
-
-// Preprocesses, generates results then constructs a program all in
-// one routine (thing to call in most use cases).
-perr_t parse_stream(token_stream_t *, prog_t **);
-
-#endif