diff options
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | asm/lexer.c | 623 | ||||
-rw-r--r-- | asm/lexer.h | 91 | ||||
-rw-r--r-- | asm/main.c | 148 | ||||
-rw-r--r-- | asm/main.cpp | 19 | ||||
-rw-r--r-- | asm/parser.c | 935 | ||||
-rw-r--r-- | asm/parser.h | 104 |
7 files changed, 31 insertions, 1909 deletions
@@ -1,9 +1,13 @@ CC=gcc +CPP=g++ + VERBOSE=0 -GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -std=c11 -I. +GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I. DEBUG-FLAGS=-ggdb -fsanitize=address RELEASE-FLAGS=-O3 -CFLAGS:=$(GENERAL-FLAGS) $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) +CFLAGS:=$(GENERAL-FLAGS) -std=c11 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) +CPPFLAGS:=$(GENERAL_FLAGS) $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) + LIBS=-lm DIST=build TERM_YELLOW:=$(shell echo -e "\e[0;33m") @@ -31,10 +35,10 @@ VM_OUT=$(DIST)/ovm.out ## ASSEMBLY setup ASM_DIST=$(DIST)/asm ASM_SRC=asm -ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.c parser.c) -ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.c=$(ASM_DIST)/%.o) +ASM_CODE:=$(addprefix $(ASM_SRC)/, ) +ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.cpp=$(ASM_DIST)/%.o) ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d -ASM_CFLAGS=$(CFLAGS) +ASM_CFLAGS=$(CPPFLAGS) ASM_OUT=$(DIST)/asm.out ## EXAMPLES setup @@ -71,13 +75,13 @@ $(VM_DIST)/%.o: $(VM_SRC)/%.c ## ASSEMBLY Recipes $(ASM_OUT): $(LIB_OBJECTS) $(ASM_OBJECTS) $(ASM_DIST)/main.o - @$(CC) $(ASM_CFLAGS) $^ -o $@ $(LIBS) + @$(CPP) $(ASM_CFLAGS) $^ -o $@ $(LIBS) @echo "$(TERM_GREEN)$@$(TERM_RESET): $^" -include $(ASM_DEPS) -$(ASM_DIST)/%.o: $(ASM_SRC)/%.c - @$(CC) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS) +$(ASM_DIST)/%.o: $(ASM_SRC)/%.cpp + @$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS) @echo "$(TERM_YELLOW)$@$(TERM_RESET): $<" ## EXAMPLES recipes diff --git a/asm/lexer.c b/asm/lexer.c deleted file mode 100644 index a4905fb..0000000 --- a/asm/lexer.c +++ /dev/null @@ -1,623 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-24 - * Author: Aryadev Chavali - * Description: Lexer for assembly language - */ - -#include <assert.h> -#include <ctype.h> -#include <stdbool.h> -#include <stdio.h> -#include <string.h> - -#include <lib/inst.h> - -#include "./lexer.h" - -const char *token_type_as_cstr(token_type_t type) -{ - switch (type) - { - case TOKEN_PP_USE: - return "PP_USE"; - case TOKEN_PP_CONST: - return "PP_CONST"; - case TOKEN_PP_END: - return "PP_END"; - case TOKEN_PP_REFERENCE: - return "PP_REFERENCE"; - case TOKEN_GLOBAL: - return "GLOBAL"; - case TOKEN_STAR: - return "STAR"; - case TOKEN_LITERAL_STRING: - return "LITERAL_STRING"; - case TOKEN_LITERAL_NUMBER: - return "LITERAL_NUMBER"; - case TOKEN_LITERAL_CHAR: - return "LITERAL_CHAR"; - case TOKEN_NOOP: - return "NOOP"; - case TOKEN_HALT: - return "HALT"; - case TOKEN_PUSH: - return "PUSH"; - case TOKEN_POP: - return "POP"; - case TOKEN_PUSH_REG: - return "PUSH_REG"; - case TOKEN_MOV: - return "MOV"; - case TOKEN_DUP: - return "DUP"; - case TOKEN_MALLOC: - return "MALLOC"; - case TOKEN_MALLOC_STACK: - return "MALLOC_STACK"; - case TOKEN_MSET: - return "MSET"; - case TOKEN_MSET_STACK: - return "MSET_STACK"; - case TOKEN_MGET: - return "MGET"; - case TOKEN_MGET_STACK: - return "MGET_STACK"; - case TOKEN_MDELETE: - return "MDELETE"; - case TOKEN_MSIZE: - return "MSIZE"; - case TOKEN_NOT: - return "NOT"; - case TOKEN_OR: - return "OR"; - case TOKEN_AND: - return "AND"; - case TOKEN_XOR: - return "XOR"; - case TOKEN_EQ: - return "EQ"; - case TOKEN_LT: - return "LT"; - case TOKEN_LTE: - return "LTE"; - case TOKEN_GT: - return "GT"; - case TOKEN_GTE: - return "GTE"; - case TOKEN_PLUS: - return "PLUS"; - case TOKEN_SUB: - return "SUB"; - case TOKEN_MULT: - return "MULT"; - case TOKEN_PRINT: - return "PRINT"; - case TOKEN_JUMP_ABS: - return "JUMP_ABS"; - case TOKEN_JUMP_STACK: - return "JUMP_STACK"; - case TOKEN_JUMP_IF: - return "JUMP_IF"; - case TOKEN_CALL: - return "CALL"; - case TOKEN_CALL_STACK: - return "CALL_STACK"; - case TOKEN_RET: - return "RET"; - case TOKEN_SYMBOL: - return "SYMBOL"; - } - return ""; -} - -const char *lerr_as_cstr(lerr_t lerr) -{ - switch (lerr) - { - case LERR_OK: - return "OK"; - case LERR_INVALID_CHAR_LITERAL: - return "INVALID_CHAR_LITERAL"; - case LERR_INVALID_PREPROCESSOR_DIRECTIVE: - return "INVALID_PREPROCESSOR_DIRECTIVE"; - } - return ""; -} - -token_t token_copy(token_t t) -{ - token_t new = t; - new.str = malloc(t.str_size + 1); - memcpy(new.str, t.str, t.str_size); - new.str[t.str_size] = '\0'; - return new; -} - -size_t space_left(buffer_t *buffer) -{ - if (buffer->available == buffer->used) - return 0; - return buffer->available - 1 - buffer->used; -} - -char uppercase(char c) -{ - if (c >= 'a' && c <= 'z') - return (c - 'a') + 'A'; - return c; -} - -bool is_symbol(char c) -{ - return isalpha(c) || isdigit(c) || c == '-' || c == '_' || c == '.' || - c == ':' || c == '(' || c == ')' || c == '%' || c == '$'; -} - -bool is_valid_hex_char(char c) -{ - return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || - (c >= 'A' && c <= 'F'); -} - -lerr_t tokenise_symbol(buffer_t *buffer, size_t *column, token_t *token) -{ - static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!"); - - size_t sym_size = 0; - for (; sym_size < space_left(buffer) && - is_symbol(buffer->data[buffer->used + sym_size]); - ++sym_size) - buffer->data[buffer->used + sym_size] = - uppercase(buffer->data[buffer->used + sym_size]); - - token_t ret = {0}; - char *opcode = (char *)buffer->data + buffer->used; - - bool is_opcode = true; - token_type_t type = 0; - size_t offset = 0; - - if (sym_size > 1 && strncmp(opcode, "%", 1) == 0) - { - // Some preprocessing directive - if (sym_size > 6 && strncmp(opcode + 1, "CONST", 5) == 0) - { - type = TOKEN_PP_CONST; - offset = 6; - } - else if (sym_size == 4 && strncmp(opcode + 1, "USE", 3) == 0) - { - type = TOKEN_PP_USE; - offset = 4; - } - else if (sym_size == 4 && strncmp(opcode + 1, "END", 3) == 0) - { - type = TOKEN_PP_END; - offset = 4; - } - else - return LERR_INVALID_PREPROCESSOR_DIRECTIVE; - } - else if (sym_size > 1 && strncmp(opcode, "$", 1) == 0) - { - // A reference to a preprocessing constant - offset = 1; - type = TOKEN_PP_REFERENCE; - } - else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0) - { - offset = 4; - type = TOKEN_NOOP; - } - else if (sym_size == 4 && strncmp(opcode, "HALT", 4) == 0) - { - offset = 4; - type = TOKEN_HALT; - } - else if (sym_size > 9 && strncmp(opcode, "PUSH.REG.", 9) == 0) - { - offset = 9; - type = TOKEN_PUSH_REG; - } - else if (sym_size > 5 && strncmp(opcode, "PUSH.", 5) == 0) - { - offset = 5; - type = TOKEN_PUSH; - } - else if (sym_size > 4 && strncmp(opcode, "POP.", 4) == 0) - { - offset = 4; - type = TOKEN_POP; - } - else if (sym_size > 4 && strncmp(opcode, "MOV.", 4) == 0) - { - offset = 4; - type = TOKEN_MOV; - } - else if (sym_size > 4 && strncmp(opcode, "DUP.", 4) == 0) - { - offset = 4; - type = TOKEN_DUP; - } - else if (sym_size > 13 && strncmp(opcode, "MALLOC.STACK.", 13) == 0) - { - offset = 13; - type = TOKEN_MALLOC_STACK; - } - else if (sym_size > 7 && strncmp(opcode, "MALLOC.", 7) == 0) - { - offset = 7; - type = TOKEN_MALLOC; - } - else if (sym_size > 11 && strncmp(opcode, "MSET.STACK.", 11) == 0) - { - offset = 11; - type = TOKEN_MSET_STACK; - } - else if (sym_size > 5 && strncmp(opcode, "MSET.", 5) == 0) - { - offset = 5; - type = TOKEN_MSET; - } - else if (sym_size > 11 && strncmp(opcode, "MGET.STACK.", 11) == 0) - { - offset = 11; - type = TOKEN_MGET_STACK; - } - else if (sym_size > 5 && strncmp(opcode, "MGET.", 5) == 0) - { - offset = 5; - type = TOKEN_MGET; - } - else if (sym_size == 7 && strncmp(opcode, "MDELETE", 7) == 0) - { - offset = 7; - type = TOKEN_MDELETE; - } - else if (sym_size == 5 && strncmp(opcode, "MSIZE", 5) == 0) - { - offset = 5; - type = TOKEN_MSIZE; - } - else if (sym_size > 4 && strncmp(opcode, "NOT.", 4) == 0) - { - offset = 4; - type = TOKEN_NOT; - } - else if (sym_size > 3 && strncmp(opcode, "OR.", 3) == 0) - { - offset = 3; - type = TOKEN_OR; - } - else if (sym_size > 4 && strncmp(opcode, "AND.", 4) == 0) - { - offset = 4; - type = TOKEN_AND; - } - else if (sym_size > 4 && strncmp(opcode, "XOR.", 4) == 0) - { - offset = 4; - type = TOKEN_XOR; - } - else if (sym_size >= 3 && strncmp(opcode, "EQ.", 3) == 0) - { - offset = 3; - type = TOKEN_EQ; - } - else if (sym_size > 4 && strncmp(opcode, "LTE.", 4) == 0) - { - offset = 4; - type = TOKEN_LTE; - } - else if (sym_size > 3 && strncmp(opcode, "LT.", 3) == 0) - { - offset = 3; - type = TOKEN_LT; - } - else if (sym_size > 4 && strncmp(opcode, "GTE.", 4) == 0) - { - offset = 4; - type = TOKEN_GTE; - } - else if (sym_size > 3 && strncmp(opcode, "GT.", 3) == 0) - { - offset = 3; - type = TOKEN_GT; - } - else if (sym_size > 4 && strncmp(opcode, "SUB.", 4) == 0) - { - offset = 4; - type = TOKEN_SUB; - } - else if (sym_size > 5 && strncmp(opcode, "PLUS.", 5) == 0) - { - offset = 5; - type = TOKEN_PLUS; - } - else if (sym_size > 5 && strncmp(opcode, "MULT.", 5) == 0) - { - offset = 5; - type = TOKEN_MULT; - } - else if (sym_size > 6 && strncmp(opcode, "PRINT.", 6) == 0) - { - offset = 6; - type = TOKEN_PRINT; - } - else if (sym_size == 8 && strncmp(opcode, "JUMP.ABS", 8) == 0) - { - offset = 8; - type = TOKEN_JUMP_ABS; - } - else if (sym_size == 10 && strncmp(opcode, "JUMP.STACK", 10) == 0) - { - offset = 10; - type = TOKEN_JUMP_STACK; - } - else if (sym_size > 8 && strncmp(opcode, "JUMP.IF.", 8) == 0) - { - offset = 8; - type = TOKEN_JUMP_IF; - } - else if (sym_size == 10 && strncmp(opcode, "CALL.STACK", 10) == 0) - { - offset = 10; - type = TOKEN_CALL_STACK; - } - else if (sym_size == 4 && strncmp(opcode, "CALL", 4) == 0) - { - offset = 4; - type = TOKEN_CALL; - } - else if (sym_size == 3 && strncmp(opcode, "RET", 3) == 0) - { - offset = 3; - type = TOKEN_RET; - } - else if (sym_size == 6 && strncmp(opcode, "GLOBAL", 6) == 0) - { - offset = 6; - type = TOKEN_GLOBAL; - } - else - is_opcode = false; - - if (!is_opcode) - { - // Just a symbol, so no further manipulation - char *sym = malloc(sym_size + 1); - memcpy(sym, opcode, sym_size); - sym[sym_size] = '\0'; - ret = (token_t){.type = TOKEN_SYMBOL, - .str = sym, - .column = *column, - .str_size = sym_size}; - } - else - { - ret.type = type; - ret.column = *column; - if (offset == sym_size) - { - // There's no more to the string - ret.str = malloc(1); - ret.str[0] = '\0'; - } - else - { - // t.str is the remaining part of the string after the - // opcode - ret.str = calloc(sym_size - offset + 1, 1); - memcpy(ret.str, opcode + offset, sym_size - offset); - ret.str[sym_size - offset] = '\0'; - } - ret.str_size = sym_size - offset; - } - *column += sym_size - 1; - buffer->used += sym_size; - *token = ret; - return LERR_OK; -} - -token_t tokenise_number_literal(buffer_t *buffer, size_t *column) -{ - token_t token = { - .type = TOKEN_LITERAL_NUMBER, .str_size = 0, .column = *column}; - if (buffer->data[buffer->used] == '-') - ++token.str_size; - for (; token.str_size < space_left(buffer) && - isdigit(buffer->data[buffer->used + token.str_size]); - ++token.str_size) - continue; - token.str = calloc(token.str_size + 1, 1); - memcpy(token.str, buffer->data + buffer->used, token.str_size); - token.str[token.str_size] = '\0'; - buffer->used += token.str_size; - *column += token.str_size; - return token; -} - -token_t tokenise_hex_literal(buffer_t *buffer, size_t *column) -{ - // For the x part of the literal - ++buffer->used; - token_t token = { - .type = TOKEN_LITERAL_NUMBER, .str_size = 0, .column = *column}; - for (; token.str_size < space_left(buffer) && - is_valid_hex_char(buffer->data[buffer->used + token.str_size]); - ++token.str_size) - continue; - // Setup a proper C hex literal - token.str = calloc(token.str_size + 3, 1); - token.str[0] = '0'; - token.str[1] = 'x'; - memcpy(token.str + 2, buffer->data + buffer->used, token.str_size); - token.str[token.str_size + 2] = '\0'; - buffer->used += token.str_size; - *column += token.str_size; - - // Setup the first two characters - token.str_size += 2; - return token; -} - -token_t tokenise_char_literal(buffer_t *buffer, size_t *column) -{ - token_t token = { - .type = TOKEN_LITERAL_CHAR, .str_size = 1, .column = *column}; - token.str = calloc(2, 1); - token.str[0] = buffer->data[buffer->used + 1]; - token.str[1] = '\0'; - buffer->used += 3; - *column += 3; - return token; -} - -token_t tokenise_string_literal(buffer_t *buffer, size_t *column) -{ - ++buffer->used; - size_t string_size; - for (string_size = 0; string_size + buffer->used < buffer->available && - buffer->data[buffer->used + string_size] != '\"'; - ++string_size) - continue; - token_t t = {.type = TOKEN_LITERAL_STRING, - .column = *column, - .str = malloc(string_size + 1), - .str_size = string_size}; - memcpy(t.str, buffer->data + buffer->used, string_size); - t.str[string_size] = '\0'; - *column += string_size + 1; - buffer->used += string_size + 1; - return t; -} - -lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr) -{ - size_t column = 0, line = 1; - token_stream_t tokens = {0}; - darr_init(&tokens, sizeof(token_t)); - while (space_left(buffer) != 0) - { - bool is_token = true; - token_t t = {0}; - char c = buffer->data[buffer->used]; - if (isspace(c) || c == '\0') - { - // Clean whitespace - for (; space_left(buffer) > 0 && (isspace(c) || c == '\0'); - ++buffer->used, c = buffer->data[buffer->used]) - { - ++column; - if (c == '\n') - { - column = 0; - ++line; - } - } - ++column; - is_token = false; - } - else if (c == ';') - { - // Start lexing at next line - for (; space_left(buffer) > 0 && c != '\n'; - ++buffer->used, c = buffer->data[buffer->used]) - continue; - column = 0; - ++line; - ++buffer->used; - is_token = false; - } - else if (c == '*') - { - t = (token_t){.type = TOKEN_STAR, - .column = column, - .str = malloc(1), - .str_size = 1}; - t.str[0] = '\0'; - ++buffer->used; - } - else if (c == '\"') - t = tokenise_string_literal(buffer, &column); - else if (isdigit(c) || (space_left(buffer) > 1 && c == '-' && - isdigit(buffer->data[buffer->used + 1]))) - t = tokenise_number_literal(buffer, &column); - else if (c == 'x' && space_left(buffer) > 1 && - is_valid_hex_char(buffer->data[buffer->used + 1])) - t = tokenise_hex_literal(buffer, &column); - else if (is_symbol(c)) - { - lerr_t lerr = tokenise_symbol(buffer, &column, &t); - if (lerr) - { - free(tokens.data); - return lerr; - } - } - else if (c == '\'') - { - if (space_left(buffer) < 2) - { - free(tokens.data); - return LERR_INVALID_CHAR_LITERAL; - } - else if (buffer->data[buffer->used + 1] == '\\') - { - char escape = '\0'; - if (space_left(buffer) < 3 || buffer->data[buffer->used + 3] != '\'') - { - free(tokens.data); - return LERR_INVALID_CHAR_LITERAL; - } - switch (buffer->data[buffer->used + 2]) - { - case 'n': - escape = '\n'; - break; - case 't': - escape = '\t'; - break; - case 'r': - escape = '\r'; - break; - case '\\': - escape = '\\'; - break; - default: - column += 2; - free(tokens.data); - return LERR_INVALID_CHAR_LITERAL; - break; - } - - t = (token_t){.type = TOKEN_LITERAL_CHAR, - .str = malloc(2), - .str_size = 1, - .column = column}; - column += 2; - buffer->used += 4; - t.str[0] = escape; - t.str[1] = '\0'; - } - else - t = tokenise_char_literal(buffer, &column); - } - - if (is_token) - { - t.line = line; - darr_append_bytes(&tokens, (byte *)&t, sizeof(t)); - } - } - tokens.available = tokens.used / sizeof(token_t); - tokens.used = 0; - *tokens_ptr = tokens; - return LERR_OK; -} diff --git a/asm/lexer.h b/asm/lexer.h deleted file mode 100644 index 734ffa1..0000000 --- a/asm/lexer.h +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-24 - * Author: Aryadev Chavali - * Description: Lexer for assembly language - */ - -#ifndef LEXER_H -#define LEXER_H - -#include <lib/darr.h> - -typedef enum TokenType -{ - TOKEN_PP_CONST, // %const(<symbol>)... - TOKEN_PP_USE, // %use <string> - TOKEN_PP_END, // %end - TOKEN_PP_REFERENCE, // $<symbol> - TOKEN_GLOBAL, - TOKEN_STAR, - TOKEN_LITERAL_NUMBER, - TOKEN_LITERAL_CHAR, - TOKEN_LITERAL_STRING, - TOKEN_NOOP, - TOKEN_HALT, - TOKEN_PUSH, - TOKEN_POP, - TOKEN_PUSH_REG, - TOKEN_MOV, - TOKEN_DUP, - TOKEN_MALLOC, - TOKEN_MALLOC_STACK, - TOKEN_MSET, - TOKEN_MSET_STACK, - TOKEN_MGET, - TOKEN_MGET_STACK, - TOKEN_MDELETE, - TOKEN_MSIZE, - TOKEN_NOT, - TOKEN_OR, - TOKEN_AND, - TOKEN_XOR, - TOKEN_EQ, - TOKEN_LT, - TOKEN_LTE, - TOKEN_GT, - TOKEN_GTE, - TOKEN_PLUS, - TOKEN_SUB, - TOKEN_MULT, - TOKEN_PRINT, - TOKEN_JUMP_ABS, - TOKEN_JUMP_STACK, - TOKEN_JUMP_IF, - TOKEN_CALL, - TOKEN_CALL_STACK, - TOKEN_RET, - TOKEN_SYMBOL, -} token_type_t; - -typedef struct -{ - token_type_t type; - size_t column, line; - char *str; - size_t str_size; -} token_t; - -token_t token_copy(token_t); - -typedef enum -{ - LERR_OK = 0, - LERR_INVALID_CHAR_LITERAL, - LERR_INVALID_PREPROCESSOR_DIRECTIVE, -} lerr_t; -const char *lerr_as_cstr(lerr_t); - -typedef darr_t buffer_t; -typedef darr_t token_stream_t; -#define TOKEN_STREAM_AT(STREAM_DATA, INDEX) (((token_t *)(STREAM_DATA))[INDEX]) - -const char *token_type_as_cstr(token_type_t type); -lerr_t tokenise_buffer(buffer_t *, token_stream_t *); - -#endif diff --git a/asm/main.c b/asm/main.c deleted file mode 100644 index 32b8187..0000000 --- a/asm/main.c +++ /dev/null @@ -1,148 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-23 - * Author: Aryadev Chavali - * Description: Assembly source code compiler, targeting OVM - */ - -#include <lib/darr.h> - -#include "./lexer.h" -#include "./parser.h" - -void usage(const char *program_name, FILE *fp) -{ - fprintf(fp, - "Usage: %s FILE OUT-FILE\n" - "\tFILE: Source code to compile\n" - "\tOUT-FILE: Name of file to store bytecode\n", - program_name); -} - -int main(int argc, char *argv[]) -{ - int ret = 0; - char *source_file = ""; - char *out_file = ""; - if (argc < 3) - { - usage(argv[0], stderr); - return 1; - } - - source_file = argv[1]; - out_file = argv[2]; - -#if VERBOSE >= 1 - printf("[%sASSEMBLER%s]: Assembling `%s` to `%s`\n", TERM_YELLOW, TERM_RESET, - source_file, out_file); -#endif - FILE *fp = fopen(source_file, "rb"); - darr_t buffer = darr_read_file(fp); - fclose(fp); - -#if VERBOSE >= 1 - printf("[%sASSEMBLER%s]: Read `%s` -> %lu bytes\n", TERM_YELLOW, TERM_RESET, - source_file, buffer.available); -#endif - - token_stream_t tokens = {0}; - lerr_t lex_error = tokenise_buffer(&buffer, &tokens); - if (lex_error) - { - // Compute the line/newlines by hand - size_t column = 0, line = 1; - for (size_t i = 0; i < buffer.used; ++i) - { - if (buffer.data[i] == '\n') - { - column = 0; - ++line; - } - else - ++column; - } - fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column, - lerr_as_cstr(lex_error)); - ret = 255 - lex_error; - goto end; - } -#if VERBOSE >= 1 - printf("[%sTOKENISER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET, - buffer.available, tokens.available); -#endif - -#if VERBOSE >= 2 - printf("[%sTOKENISER%s]: Tokens parsed:\n", TERM_GREEN, TERM_RESET); - for (size_t i = 0; i < tokens.available; ++i) - { - token_t token = TOKEN_STREAM_AT(tokens.data, i); - printf("\t[%lu]: %s(`%s`)@%lu,%lu\n", i, token_type_as_cstr(token.type), - token.str, token.line, token.column); - } -#endif - - free(buffer.data); - buffer.data = NULL; - -#if VERBOSE >= 2 - printf("[%sPARSER%s]: Beginning parse...\n", TERM_YELLOW, TERM_RESET); -#endif - prog_t *program = NULL; - perr_t parse_error = parse_stream(&tokens, &program); - if (parse_error) - { - size_t column = 0; - size_t line = 0; - if (tokens.used < tokens.available) - { - token_t t = TOKEN_STREAM_AT(tokens.data, tokens.used); - column = t.column; - line = t.line; - } - fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column, - perr_as_cstr(parse_error)); - ret = 255 - parse_error; - goto end; - } -#if VERBOSE >= 1 - printf("[%sPARSER%s]: %lu tokens -> %lu instructions\n", TERM_GREEN, - TERM_RESET, tokens.available, program->count); -#endif - -#if VERBOSE >= 2 - printf("[%sPARSER%s]: Program parsed(COUNT=%lu, START=%lu):\n", TERM_GREEN, - TERM_RESET, program->count, program->header.start_address); - for (size_t i = 0; i < program->count; ++i) - { - printf("\t[%lu]: ", i); - inst_print(program->instructions[i], stdout); - printf("\n"); - } -#endif - - fp = fopen(out_file, "wb"); - prog_write_file(program, fp); - fclose(fp); -#if VERBOSE >= 1 - printf("[%sASSEMBLER%s]: Wrote bytecode to `%s`\n", TERM_GREEN, TERM_RESET, - out_file); -#endif -end: - if (buffer.data) - free(buffer.data); - if (tokens.data) - { - for (size_t i = 0; i < tokens.available; ++i) - free(TOKEN_STREAM_AT(tokens.data, i).str); - free(tokens.data); - } - if (program) - free(program); - return ret; -} diff --git a/asm/main.cpp b/asm/main.cpp new file mode 100644 index 0000000..1ad17b1 --- /dev/null +++ b/asm/main.cpp @@ -0,0 +1,19 @@ +/* Copyright (C) 2024 Aryadev Chavali + + * You may distribute and modify this code under the terms of the + * GPLv2 license. You should have received a copy of the GPLv2 + * license with this file. If not, please write to: + * aryadev@aryadevchavali.com. + + * Created: 2024-04-14 + * Author: Aryadev Chavali + * Description: Entrypoint for assembly program + */ + +#include <iostream> + +int main(void) +{ + std::cout << "Hello, world!" << std::endl; + return 0; +} diff --git a/asm/parser.c b/asm/parser.c deleted file mode 100644 index d326d14..0000000 --- a/asm/parser.c +++ /dev/null @@ -1,935 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-24 - * Author: Aryadev Chavali - * Description: Parser for assembly language - */ - -#include <assert.h> -#include <errno.h> -#include <stdbool.h> -#include <string.h> - -#include "./parser.h" - -#define OPCODE_ON_TYPE(BASE_CODE, TYPE) - -const char *perr_as_cstr(perr_t perr) -{ - switch (perr) - { - case PERR_OK: - return "OK"; - case PERR_INTEGER_OVERFLOW: - return "INTEGER_OVERFLOW"; - case PERR_NOT_A_NUMBER: - return "NOT_A_NUMBER"; - case PERR_EXPECTED_TYPE: - return "EXPECTED_TYPE"; - case PERR_EXPECTED_UTYPE: - return "EXPECTED_UTYPE"; - case PERR_EXPECTED_SYMBOL: - return "EXPECTED_SYMBOL"; - case PERR_EXPECTED_LABEL: - return "EXPECTED_LABEL"; - case PERR_EXPECTED_OPERAND: - return "EXPECTED_OPERAND"; - case PERR_PREPROCESSOR_EXPECTED_STRING: - return "PREPROCESSOR_EXPECTED_STRING"; - case PERR_PREPROCESSOR_FILE_NONEXISTENT: - return "PREPROCESSOR_FILE_NONEXISTENT"; - case PERR_PREPROCESSOR_FILE_PARSE_ERROR: - return "PREPROCESSOR_FILE_PARSE_ERROR"; - case PERR_PREPROCESSOR_EXPECTED_END: - return "PREPROCESSOR_EXPECTED_END"; - case PERR_PREPROCESSOR_EXPECTED_NAME: - return "PREPROCESSOR_EXPECTED_NAME"; - case PERR_PREPROCESSOR_UNKNOWN_NAME: - return "PREPROCESSOR_UNKNOWN_NAME"; - case PERR_INVALID_RELATIVE_ADDRESS: - return "INVALID_RELATIVE_ADDRESS"; - case PERR_UNKNOWN_LABEL: - return "UNKNOWN_LABEL"; - case PERR_UNKNOWN_OPERATOR: - return "UNKNOWN_OPERATOR"; - default: - return ""; - } -} - -presult_t presult_label(size_t stream_index, const char *name, size_t size, - s_word addr) -{ - presult_t res = {.stream_index = stream_index, - .address = addr, - .label = {.name = malloc(size + 1), .size = size}}; - memcpy(res.label.name, name, size); - res.label.name[size] = '\0'; - return res; -} - -presult_t presult_label_ref(size_t stream_index, inst_t base, const char *label, - size_t size) -{ - presult_t pres = presult_label(stream_index, label, size, 0); - pres.instruction = base; - pres.type = PRES_LABEL_ADDRESS; - return pres; -} - -presult_t presult_instruction(size_t stream_index, inst_t inst) -{ - return (presult_t){.stream_index = stream_index, - .instruction = inst, - .type = PRES_COMPLETE_RESULT}; -} - -presult_t presult_relative(size_t stream_index, inst_t inst, s_word addr) -{ - return (presult_t){.stream_index = stream_index, - .instruction = inst, - .address = addr, - .type = PRES_RELATIVE_ADDRESS}; -} - -presult_t presult_global(size_t stream_index, const char *name, size_t size, - s_word addr) -{ - presult_t res = presult_label(stream_index, name, size, addr); - res.type = PRES_GLOBAL_LABEL; - return res; -} - -void presult_free(presult_t res) -{ - switch (res.type) - { - case PRES_LABEL_ADDRESS: - case PRES_GLOBAL_LABEL: - case PRES_LABEL: - free(res.label.name); - break; - case PRES_RELATIVE_ADDRESS: - case PRES_COMPLETE_RESULT: - break; - } -} - -void presults_free(presult_t *ptr, size_t number) -{ - for (size_t i = 0; i < number; ++i) - presult_free(ptr[i]); -} - -perr_t parse_word(token_t token, word *ret) -{ - if (token.type == TOKEN_LITERAL_NUMBER) - { - bool is_negative = token.str_size > 1 && token.str[0] == '-'; - word w = 0; - if (is_negative) - { - char *end = NULL; - s_word i = strtoll(token.str, &end, 0); - if (!(end && end[0] == '\0')) - return PERR_NOT_A_NUMBER; - else if (errno == ERANGE) - { - errno = 0; - return PERR_INTEGER_OVERFLOW; - } - // Copy bits, do not cast - memcpy(&w, &i, sizeof(w)); - } - else - { - char *end = NULL; - w = strtoull(token.str, &end, 0); - if (!(end && end[0] == '\0')) - return PERR_NOT_A_NUMBER; - else if (errno == ERANGE) - { - errno = 0; - return PERR_INTEGER_OVERFLOW; - } - } - *ret = w; - return PERR_OK; - } - else if (token.type == TOKEN_LITERAL_CHAR) - { - *ret = token.str[0]; - return PERR_OK; - } - else - return PERR_NOT_A_NUMBER; -} - -perr_t parse_sword(token_t token, i64 *ret) -{ - if (token.type == TOKEN_LITERAL_NUMBER) - { - char *end = NULL; - s_word i = strtoll(token.str, &end, 0); - if (!(end && end[0] == '\0')) - return PERR_NOT_A_NUMBER; - else if (errno == ERANGE) - { - errno = 0; - return PERR_INTEGER_OVERFLOW; - } - *ret = i; - return PERR_OK; - } - else if (token.type == TOKEN_LITERAL_CHAR) - { - *ret = token.str[0]; - return PERR_OK; - } - else - return PERR_NOT_A_NUMBER; -} - -perr_t parse_word_label_or_relative(token_stream_t *stream, presult_t *res) -{ - token_t token = TOKEN_STREAM_AT(stream->data, stream->used); - if (token.type == TOKEN_SYMBOL) - { - *res = presult_label_ref(stream->used, res->instruction, token.str, - token.str_size); - return PERR_OK; - } - else if (token.type == TOKEN_LITERAL_CHAR || - token.type == TOKEN_LITERAL_NUMBER) - { - res->type = PRES_COMPLETE_RESULT; - return parse_word(token, &res->instruction.operand.as_word); - } - else if (token.type == TOKEN_STAR) - { - if (stream->used + 1 >= stream->available) - return PERR_EXPECTED_OPERAND; - res->type = PRES_RELATIVE_ADDRESS; - ++stream->used; - return parse_sword(TOKEN_STREAM_AT(stream->data, stream->used), - &res->address); - } - return PERR_EXPECTED_OPERAND; -} - -enum Type -{ - T_NIL = -1, - T_BYTE, - T_CHAR, - T_HWORD, - T_INT, - T_LONG, - T_WORD, -} parse_details_to_type(token_t details) -{ - if (details.str_size == 4 && strncmp(details.str, "BYTE", 4) == 0) - return T_BYTE; - else if (details.str_size == 4 && strncmp(details.str, "CHAR", 4) == 0) - return T_CHAR; - else if (details.str_size == 5 && strncmp(details.str, "HWORD", 5) == 0) - return T_HWORD; - else if (details.str_size == 3 && strncmp(details.str, "INT", 3) == 0) - return T_INT; - else if (details.str_size == 4 && strncmp(details.str, "LONG", 4) == 0) - return T_LONG; - else if (details.str_size == 4 && strncmp(details.str, "WORD", 4) == 0) - return T_WORD; - else - return T_NIL; -} - -enum UType -{ - U_NIL = -1, - U_BYTE, - U_HWORD, - U_WORD, -} convert_type_to_utype(enum Type type) -{ - if (type == T_CHAR || type == T_INT || type == T_LONG) - return U_NIL; - switch (type) - { - case T_NIL: - case T_LONG: - case T_INT: - case T_CHAR: - return U_NIL; - case T_BYTE: - return U_BYTE; - case T_HWORD: - return U_HWORD; - case T_WORD: - return U_WORD; - } - return 0; -} - -perr_t parse_utype_inst(token_stream_t *stream, inst_t *ret) -{ - if (stream->used + 1 > stream->available) - return PERR_EXPECTED_OPERAND; - enum UType type = convert_type_to_utype( - parse_details_to_type(TOKEN_STREAM_AT(stream->data, stream->used))); - if (type == U_NIL) - return PERR_EXPECTED_UTYPE; - ret->opcode += type; - return PERR_OK; -} - -perr_t parse_type_inst(token_stream_t *stream, inst_t *ret) -{ - if (stream->used + 1 > stream->available) - return PERR_EXPECTED_OPERAND; - enum Type type = - parse_details_to_type(TOKEN_STREAM_AT(stream->data, stream->used)); - if (type == T_NIL) - return PERR_EXPECTED_TYPE; - ret->opcode += type; - return PERR_OK; -} - -perr_t parse_utype_inst_with_operand(token_stream_t *stream, inst_t *ret) -{ - perr_t inst_err = parse_utype_inst(stream, ret); - if (inst_err) - return inst_err; - ++stream->used; - perr_t word_err = parse_word(TOKEN_STREAM_AT(stream->data, stream->used), - &ret->operand.as_word); - if (word_err) - return word_err; - return PERR_OK; -} - -perr_t parse_jump_inst_operand(token_stream_t *stream, presult_t *res) -{ - perr_t inst_err = parse_utype_inst(stream, &res->instruction); - - if (inst_err) - return inst_err; - ++stream->used; - perr_t op_err = parse_word_label_or_relative(stream, res); - if (op_err) - return op_err; - return PERR_OK; -} - -perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret) -{ - perr_t inst_err = parse_type_inst(stream, ret); - if (inst_err) - return inst_err; - ++stream->used; - perr_t word_err = parse_word(TOKEN_STREAM_AT(stream->data, stream->used), - &ret->operand.as_word); - if (word_err) - return word_err; - return PERR_OK; -} - -label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size) -{ - for (size_t i = 0; i < n; ++i) - { - label_t label = labels[i]; - if (label.name_size == name_size && - strncmp(label.name, name, name_size) == 0) - return label; - } - - return (label_t){0}; -} - -block_t search_blocks(block_t *blocks, size_t n, char *name, size_t name_size) -{ - for (size_t i = 0; i < n; ++i) - { - block_t block = blocks[i]; - if (block.name_size == name_size && - strncmp(block.name, name, name_size) == 0) - return block; - } - - return (block_t){0}; -} - -perr_t preprocess_use_blocks(token_stream_t *stream, token_stream_t *new) -{ - token_stream_t new_stream = {0}; - darr_init(&new_stream, sizeof(token_t)); - // %USE <STRING FILENAME> -> #TOKENS_IN(FILENAME) - for (size_t i = 0; i < stream->available; ++i) - { - token_t t = DARR_AT(token_t, stream->data, i); - if (t.type == TOKEN_PP_USE) - { - if (i + 1 >= stream->available || - DARR_AT(token_t, stream->data, i + 1).type != TOKEN_LITERAL_STRING) - { - stream->used = i + 1 >= stream->available ? i : i + 1; - for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i) - free(TOKEN_STREAM_AT(new_stream.data, i).str); - free(new_stream.data); - return PERR_PREPROCESSOR_EXPECTED_STRING; - } - // Load and tokenise another file - ++i; - t = DARR_AT(token_t, stream->data, i); - FILE *fp = fopen(t.str, "rb"); - if (!fp) - { - for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i) - free(TOKEN_STREAM_AT(new_stream.data, i).str); - free(new_stream.data); - stream->used = i; - return PERR_PREPROCESSOR_FILE_NONEXISTENT; - } - buffer_t buffer = darr_read_file(fp); - fclose(fp); - - token_stream_t fstream = {0}; - lerr_t lerr = tokenise_buffer(&buffer, &fstream); - free(buffer.data); - if (lerr) - { - if (fstream.data) - { - for (size_t i = 0; i < fstream.available; ++i) - free(TOKEN_STREAM_AT(fstream.data, i).str); - free(fstream.data); - } - for (size_t i = 0; i < (new_stream.used / sizeof(token_t)); ++i) - free(TOKEN_STREAM_AT(new_stream.data, i).str); - free(new_stream.data); - stream->used = i; - return PERR_PREPROCESSOR_FILE_PARSE_ERROR; - } - darr_append_bytes(&new_stream, fstream.data, - sizeof(token_t) * fstream.available); - free(fstream.data); - } - else - { - token_t copy = token_copy(t); - darr_append_bytes(&new_stream, (byte *)©, sizeof(copy)); - } - } - - new_stream.available = new_stream.used / sizeof(token_t); - new_stream.used = 0; - *new = new_stream; - - return PERR_OK; -} - -perr_t preprocess_macro_blocks(token_stream_t *stream, token_stream_t *new) -{ - darr_t block_registry = {0}; - darr_init(&block_registry, sizeof(block_t)); - - for (size_t i = 0; i < stream->available; ++i) - { - token_t t = DARR_AT(token_t, stream->data, i); - if (t.type == TOKEN_PP_CONST) - { - char *sym = t.str; - size_t start = strcspn(sym, "("); - size_t end = strcspn(sym, ")"); - if (end == t.str_size || start == t.str_size || start == end + 1) - { - free(block_registry.data); - return PERR_PREPROCESSOR_EXPECTED_NAME; - } - block_t block = {.name = sym + start + 1, .name_size = end - start - 1}; - ++i; - size_t prev = i; - token_t t = {0}; - for (t = DARR_AT(token_t, stream->data, i); - i < stream->available && t.type != TOKEN_PP_END; - ++i, t = DARR_AT(token_t, stream->data, i)) - continue; - if (t.type != TOKEN_PP_END) - { - stream->used = i; - free(block_registry.data); - return PERR_PREPROCESSOR_EXPECTED_END; - } - - // Set the block's token DARR by hand - block.code.data = stream->data + (prev * sizeof(token_t)); - block.code.available = i - prev; - block.code.used = block.code.available; - darr_append_bytes(&block_registry, (byte *)&block, sizeof(block)); - } - } - - if (block_registry.used == 0) - { - // Nothing to preprocess so just copywholesale - free(block_registry.data); - *new = (token_stream_t){0}; - darr_init(new, sizeof(token_t)); - for (size_t i = 0; i < stream->available; ++i) - { - token_t token = DARR_AT(token_t, stream->data, i); - token_t copy = token_copy(token); - darr_append_bytes(new, (byte *)©, sizeof(copy)); - } - new->available = new->used / sizeof(token_t); - new->used = 0; - return PERR_OK; - } - - // Stream with blocks now inlined - token_stream_t new_stream = {0}; - darr_init(&new_stream, sizeof(token_t)); - for (size_t i = 0; i < stream->available; ++i) - { - token_t t = DARR_AT(token_t, stream->data, i); - if (t.type == TOKEN_PP_CONST) - { - // Skip till after end - for (; i < stream->available && t.type != TOKEN_PP_END; - ++i, t = DARR_AT(token_t, stream->data, i)) - continue; - } - else if (t.type == TOKEN_PP_REFERENCE) - { - // Find the reference in the block registry - block_t block = search_blocks((block_t *)block_registry.data, - block_registry.used, t.str, t.str_size); - if (!block.name) - { - free(new_stream.data); - free(block_registry.data); - stream->used = i; - return PERR_PREPROCESSOR_UNKNOWN_NAME; - } - - // Inline the block found - for (size_t j = 0; j < block.code.used; j++) - { - token_t b_token = DARR_AT(token_t, block.code.data, j); - token_t copy = token_copy(b_token); - darr_append_bytes(&new_stream, (byte *)©, sizeof(token_t)); - } - } - else - { - // Insert into stream as is - token_t copy = token_copy(t); - darr_append_bytes(&new_stream, (byte *)©, sizeof(copy)); - } - } - - // Free block registry - free(block_registry.data); - - new_stream.available = new_stream.used / sizeof(token_t); - new_stream.used = 0; - *new = new_stream; - - return PERR_OK; -} - -perr_t preprocessor(token_stream_t *stream) -{ - token_stream_t use_blocks = {0}; - perr_t perr = preprocess_use_blocks(stream, &use_blocks); - if (perr) - return perr; - - token_stream_t macro_blocks = {0}; - perr = preprocess_macro_blocks(&use_blocks, ¯o_blocks); - if (perr) - { - stream->used = use_blocks.used; - for (size_t i = 0; i < use_blocks.available; ++i) - free(TOKEN_STREAM_AT(use_blocks.data, i).str); - free(use_blocks.data); - return perr; - } - - for (size_t i = 0; i < use_blocks.available; ++i) - free(TOKEN_STREAM_AT(use_blocks.data, i).str); - free(use_blocks.data); - - for (size_t i = 0; i < stream->available; ++i) - free(TOKEN_STREAM_AT(stream->data, i).str); - free(stream->data); - - *stream = macro_blocks; - - return PERR_OK; -} - -perr_t parse_next(token_stream_t *stream, presult_t *ret) -{ - token_t token = TOKEN_STREAM_AT(stream->data, stream->used); - perr_t perr = PERR_OK; - switch (token.type) - { - case TOKEN_LITERAL_STRING: - case TOKEN_PP_CONST: - case TOKEN_PP_USE: - case TOKEN_PP_REFERENCE: - case TOKEN_PP_END: - case TOKEN_LITERAL_NUMBER: - case TOKEN_LITERAL_CHAR: - return PERR_EXPECTED_SYMBOL; - case TOKEN_GLOBAL: { - if (stream->used + 1 >= stream->available || - TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL) - return PERR_EXPECTED_LABEL; - ++stream->used; - token_t label = TOKEN_STREAM_AT(stream->data, stream->used); - *ret = presult_global(stream->used, label.str, label.str_size, 0); - return PERR_OK; - } - case TOKEN_NOOP: - *ret = presult_instruction(stream->used, INST_NOOP); - ret->type = PRES_COMPLETE_RESULT; - break; - case TOKEN_HALT: - *ret = presult_instruction(stream->used, INST_HALT); - ret->type = PRES_COMPLETE_RESULT; - break; - case TOKEN_PUSH: - *ret = presult_instruction(stream->used, INST_PUSH(BYTE, 0)); - perr = parse_utype_inst_with_operand(stream, &ret->instruction); - break; - case TOKEN_POP: - *ret = presult_instruction(stream->used, INST_POP(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_PUSH_REG: - *ret = presult_instruction(stream->used, INST_PUSH_REG(BYTE, 0)); - perr = parse_utype_inst_with_operand(stream, &ret->instruction); - break; - case TOKEN_MOV: - *ret = presult_instruction(stream->used, INST_MOV(BYTE, 0)); - perr = parse_utype_inst_with_operand(stream, &ret->instruction); - break; - case TOKEN_DUP: - *ret = presult_instruction(stream->used, INST_DUP(BYTE, 0)); - perr = parse_utype_inst_with_operand(stream, &ret->instruction); - break; - case TOKEN_MALLOC: - *ret = presult_instruction(stream->used, INST_MALLOC(BYTE, 0)); - perr = parse_utype_inst_with_operand(stream, &ret->instruction); - break; - case TOKEN_MSET: - *ret = presult_instruction(stream->used, INST_MSET(BYTE, 0)); - perr = parse_utype_inst_with_operand(stream, &ret->instruction); - break; - case TOKEN_MGET: - *ret = presult_instruction(stream->used, INST_MGET(BYTE, 0)); - perr = parse_utype_inst_with_operand(stream, &ret->instruction); - break; - case TOKEN_MALLOC_STACK: - *ret = presult_instruction(stream->used, INST_MALLOC_STACK(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_MSET_STACK: - *ret = presult_instruction(stream->used, INST_MSET_STACK(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_MGET_STACK: - *ret = presult_instruction(stream->used, INST_MGET_STACK(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_MDELETE: - *ret = presult_instruction(stream->used, INST_MDELETE); - break; - case TOKEN_MSIZE: - *ret = presult_instruction(stream->used, INST_MSIZE); - break; - case TOKEN_NOT: - *ret = presult_instruction(stream->used, INST_NOT(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_OR: - *ret = presult_instruction(stream->used, INST_OR(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_AND: - *ret = presult_instruction(stream->used, INST_AND(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_XOR: - *ret = presult_instruction(stream->used, INST_XOR(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_EQ: - *ret = presult_instruction(stream->used, INST_EQ(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_LT: - *ret = presult_instruction(stream->used, INST_LT(BYTE)); - perr = parse_type_inst(stream, &ret->instruction); - break; - case TOKEN_LTE: - *ret = presult_instruction(stream->used, INST_LTE(BYTE)); - perr = parse_type_inst(stream, &ret->instruction); - break; - case TOKEN_GT: - *ret = presult_instruction(stream->used, INST_GT(BYTE)); - perr = parse_type_inst(stream, &ret->instruction); - break; - case TOKEN_GTE: - *ret = presult_instruction(stream->used, INST_GTE(BYTE)); - perr = parse_type_inst(stream, &ret->instruction); - break; - case TOKEN_PLUS: - *ret = presult_instruction(stream->used, INST_PLUS(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_SUB: - *ret = presult_instruction(stream->used, INST_SUB(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_MULT: - *ret = presult_instruction(stream->used, INST_MULT(BYTE)); - perr = parse_utype_inst(stream, &ret->instruction); - break; - case TOKEN_PRINT: - *ret = presult_instruction(stream->used, INST_PRINT(BYTE)); - perr = parse_type_inst(stream, &ret->instruction); - break; - case TOKEN_JUMP_ABS: - *ret = presult_instruction(stream->used, INST_JUMP_ABS(0)); - ++stream->used; - if (stream->used >= stream->available) - return PERR_EXPECTED_OPERAND; - return parse_word_label_or_relative(stream, ret); - case TOKEN_JUMP_STACK: - *ret = presult_instruction(stream->used, INST_JUMP_STACK); - break; - case TOKEN_JUMP_IF: { - *ret = presult_instruction(stream->used, INST_JUMP_IF(BYTE, 0)); - return parse_jump_inst_operand(stream, ret); - } - case TOKEN_CALL: - *ret = presult_instruction(stream->used, INST_CALL(0)); - ++stream->used; - if (stream->used >= stream->available) - return PERR_EXPECTED_OPERAND; - return parse_word_label_or_relative(stream, ret); - case TOKEN_CALL_STACK: - *ret = presult_instruction(stream->used, INST_CALL_STACK); - break; - case TOKEN_RET: - *ret = presult_instruction(stream->used, INST_RET); - break; - case TOKEN_SYMBOL: { - size_t label_size = strcspn(token.str, ":"); - if (label_size == token.str_size) - return PERR_UNKNOWN_OPERATOR; - else if (label_size != token.str_size - 1) - return PERR_EXPECTED_LABEL; - *ret = presult_label(stream->used, token.str, label_size, 0); - break; - } - case TOKEN_STAR: - default: - return PERR_UNKNOWN_OPERATOR; - } - return perr; -} - -perr_t process_presults(presult_t *results, size_t res_count, - size_t *result_reached, prog_t **program_ptr) -{ - assert(result_reached && "process_presults: result_reached is NULL?!"); - *result_reached = 0; - label_t start_label = {0}; - - darr_t label_registry = {0}; - darr_init(&label_registry, sizeof(label_t)); - word inst_count = 0; - for (size_t i = 0; i < res_count; ++i) - { - presult_t res = results[i]; - switch (res.type) - { - case PRES_LABEL: { - label_t label = {.name = res.label.name, - .name_size = res.label.size, - .addr = inst_count}; - darr_append_bytes(&label_registry, (byte *)&label, sizeof(label)); - break; - } - case PRES_RELATIVE_ADDRESS: { - s_word offset = res.address; - if (offset < 0 && ((word)(-offset)) > inst_count) - { - free(label_registry.data); - *result_reached = i; - return PERR_INVALID_RELATIVE_ADDRESS; - } - results[i].instruction.operand.as_word = ((s_word)inst_count) + offset; - inst_count++; - break; - } - case PRES_GLOBAL_LABEL: { - start_label = (label_t){.name = res.label.name, - .name_size = res.label.size, - .addr = (word)inst_count}; - break; - } - case PRES_LABEL_ADDRESS: - case PRES_COMPLETE_RESULT: - inst_count++; - break; - default: - break; - } - } - - darr_t instr_darr = {0}; - darr_init(&instr_darr, sizeof(inst_t)); - - prog_header_t header = {0}; - if (start_label.name_size > 0) - { - label_t label = search_labels((label_t *)label_registry.data, - label_registry.used / sizeof(label_t), - start_label.name, start_label.name_size); - if (!label.name) - { - free(instr_darr.data); - free(label_registry.data); - return PERR_UNKNOWN_LABEL; - } - header.start_address = label.addr; - } - - for (size_t i = 0; i < res_count; ++i) - { - presult_t res = results[i]; - switch (res.type) - { - case PRES_LABEL_ADDRESS: { - inst_t inst = {0}; - label_t label = search_labels((label_t *)label_registry.data, - label_registry.used / sizeof(label_t), - res.label.name, res.label.size); - - if (!label.name) - { - free(instr_darr.data); - free(label_registry.data); - *result_reached = i; - return PERR_UNKNOWN_LABEL; - } - - inst.opcode = res.instruction.opcode; - inst.operand = DWORD(label.addr); - darr_append_bytes(&instr_darr, (byte *)&inst, sizeof(inst)); - break; - } - case PRES_RELATIVE_ADDRESS: - case PRES_COMPLETE_RESULT: { - darr_append_bytes(&instr_darr, (byte *)&res.instruction, - sizeof(res.instruction)); - } - case PRES_GLOBAL_LABEL: - case PRES_LABEL: - break; - } - } - - free(label_registry.data); - prog_t *program = - malloc(sizeof(**program_ptr) + (sizeof(inst_t) * inst_count)); - program->header = header; - program->count = inst_count; - memcpy(program->instructions, instr_darr.data, instr_darr.used); - free(instr_darr.data); - *program_ptr = program; - return PERR_OK; -} - -perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr) -{ - // Preprocessor - perr_t perr = preprocessor(stream); - if (perr) - return perr; - darr_t presults = {0}; - darr_init(&presults, sizeof(presult_t)); - while (stream->used < stream->available) - { - presult_t pres = {0}; - perr_t err = parse_next(stream, &pres); - if (err) - { - presults_free((presult_t *)presults.data, - presults.used / sizeof(presult_t)); - free(presults.data); - return err; - } - darr_append_bytes(&presults, (byte *)&pres, sizeof(presult_t)); - ++stream->used; - } - - presults.available = presults.used / sizeof(presult_t); - presults.used = 0; - -#if VERBOSE >= 2 - printf("[%sPARSER%s]: %lu tokens -> %lu parse units\n", TERM_YELLOW, - TERM_RESET, stream->available, presults.available); - for (size_t i = 0; i < presults.available; ++i) - { - presult_t pres = DARR_AT(presult_t, presults.data, i); - switch (pres.type) - { - case PRES_LABEL: - printf("\tLABEL: label=%s\n", pres.label.name); - break; - case PRES_LABEL_ADDRESS: - printf("\tLABEL_CALL: label=%s, inst=", pres.label.name); - inst_print(pres.instruction, stdout); - printf("\n"); - break; - case PRES_RELATIVE_ADDRESS: - printf("\tRELATIVE_CALL: addr=%ld, inst=", pres.address); - inst_print(pres.instruction, stdout); - printf("\n"); - break; - case PRES_GLOBAL_LABEL: - printf("\tSET_GLOBAL_START: name=%s\n", pres.label.name); - break; - case PRES_COMPLETE_RESULT: - printf("\tCOMPLETE: inst="); - inst_print(pres.instruction, stdout); - printf("\n"); - break; - } - } -#endif - - size_t results_processed = 0; - perr = process_presults((presult_t *)presults.data, presults.available, - &results_processed, program_ptr); - if (results_processed != presults.available) - { - presult_t pres = DARR_AT(presult_t, presults.data, results_processed); - stream->used = pres.stream_index; - } - presults_free((presult_t *)presults.data, presults.available); - free(presults.data); - return perr; -} diff --git a/asm/parser.h b/asm/parser.h deleted file mode 100644 index 7e2d1b7..0000000 --- a/asm/parser.h +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-24 - * Author: Aryadev Chavali - * Description: Parser for assembly language - */ - -#ifndef PARSER_H -#define PARSER_H - -#include "./lexer.h" - -#include <lib/inst.h> - -typedef enum -{ - PERR_OK = 0, - PERR_INTEGER_OVERFLOW, - PERR_NOT_A_NUMBER, - PERR_EXPECTED_UTYPE, - PERR_EXPECTED_TYPE, - PERR_EXPECTED_SYMBOL, - PERR_EXPECTED_LABEL, - PERR_EXPECTED_OPERAND, - PERR_PREPROCESSOR_EXPECTED_NAME, - PERR_PREPROCESSOR_EXPECTED_STRING, - PERR_PREPROCESSOR_EXPECTED_END, - PERR_PREPROCESSOR_FILE_NONEXISTENT, - PERR_PREPROCESSOR_FILE_PARSE_ERROR, - PERR_PREPROCESSOR_UNKNOWN_NAME, - PERR_INVALID_RELATIVE_ADDRESS, - PERR_UNKNOWN_OPERATOR, - PERR_UNKNOWN_LABEL, -} perr_t; - -const char *perr_as_cstr(perr_t); - -typedef struct -{ - size_t stream_index; - inst_t instruction; - s_word address; - struct PLabel - { - char *name; - size_t size; - } label; - enum PResult_Type - { - PRES_LABEL = 0, - PRES_LABEL_ADDRESS, - PRES_GLOBAL_LABEL, - PRES_RELATIVE_ADDRESS, - PRES_COMPLETE_RESULT, - } type; -} presult_t; - -presult_t presult_label(size_t, const char *, size_t, s_word); -presult_t presult_label_ref(size_t, inst_t, const char *, size_t); -presult_t presult_instruction(size_t, inst_t); -presult_t presult_relative(size_t, inst_t, s_word); -presult_t presult_global(size_t, const char *, size_t, s_word); -void presult_free(presult_t); -void presults_free(presult_t *, size_t); - -typedef struct -{ - char *name; - size_t name_size; - word addr; -} label_t; - -label_t search_labels(label_t *, size_t, char *, size_t); - -typedef struct -{ - char *name; - size_t name_size; - darr_t code; -} block_t; - -block_t search_blocks(block_t *, size_t, char *, size_t); - -perr_t preprocess_use_blocks(token_stream_t *, token_stream_t *); -perr_t preprocess_macro_blocks(token_stream_t *, token_stream_t *); -// Analyses then inlines corresponding tokens into stream directly -perr_t preprocessor(token_stream_t *); - -// Parses the next "parse result" from stream -perr_t parse_next(token_stream_t *, presult_t *); -// Constructs a program from the set of parse results (from repeatedly -// calling parse_next) -perr_t process_presults(presult_t *, size_t, size_t *, prog_t **); - -// Preprocesses, generates results then constructs a program all in -// one routine (thing to call in most use cases). -perr_t parse_stream(token_stream_t *, prog_t **); - -#endif |