From 9d72c9177da190b8e720c25ad56a041d562f2b95 Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Tue, 16 Apr 2024 19:14:24 +0630 Subject: [PATCH] Clean up work tree for making assembler --- README.org | 116 ++---- lib/base.c | 107 ----- lib/base.h | 148 ------- lib/darr.c | 77 ---- lib/darr.h | 88 ----- lib/heap.c | 101 ----- lib/heap.h | 42 -- lib/inst.c | 516 ------------------------ lib/inst.h | 108 ----- lib/prog.h | 176 --------- spec.org | 88 ----- todo.org | 87 ---- vm/main.c | 92 ----- vm/runtime.c | 1070 -------------------------------------------------- vm/runtime.h | 301 -------------- 15 files changed, 25 insertions(+), 3092 deletions(-) delete mode 100644 lib/base.c delete mode 100644 lib/base.h delete mode 100644 lib/darr.c delete mode 100644 lib/darr.h delete mode 100644 lib/heap.c delete mode 100644 lib/heap.h delete mode 100644 lib/inst.c delete mode 100644 lib/inst.h delete mode 100644 lib/prog.h delete mode 100644 spec.org delete mode 100644 vm/main.c delete mode 100644 vm/runtime.c delete mode 100644 vm/runtime.h diff --git a/README.org b/README.org index 68642f7..d638ab0 100644 --- a/README.org +++ b/README.org @@ -1,110 +1,44 @@ -#+title: Oreo's Virtual Machine (OVM) +#+title: Aryadev's Assembly Language (AAL) #+author: Aryadev Chavali #+date: 2023-10-15 -A stack based virtual machine in C11, with a dynamic register setup -which acts as variable space. Deals primarily in bytes, doesn't make -assertions about typing and is very simple to target. +A compiler for Aryadev's Assembly Language, an assembly-like +programming language, which targets the +[[https://github.com/aryadev-software/avm/][AVM]]. -2024-04-16: Project will now be split into two components -1) The runtime + base library -2) The assembler - -This will focus each repository on separate issues and make it easier -to organize. They will both derive from the same repositories -i.e. I'm not making fresh repositories and just sticking the folders -in but rather branching this repository into two different versions. - -The two versions will be hosted at: -1) [[https://github.com/aryadev-software/avm]] -1) [[https://github.com/aryadev-software/aal]] * How to build -Requires =GNU make= and a compliant C11 compiler. Code base has been -tested against =gcc= and =clang=, but given how the project has been -written without use of GNU'isms (that I'm aware of) it shouldn't be an -issue to compile using something like =tcc= or another compiler (look -at [[file:Makefile::CC=gcc][here]] to change the compiler). +Requires =GNU make= and a compliant C++17 compiler. Code base has +been tested against =g++= and =clang=, but given how the project has +been written without use of GNU'isms (that I'm aware of) it shouldn't +be an issue to compile using something like =tcc= or another compiler +(look at [[file:Makefile::CPP=g++][here]] to change the compiler). To build everything simply run ~make~. This will build: -+ [[file:lib/inst.c][instruction bytecode system]] which provides - object files to target the VM -+ [[file:vm/main.c][VM executable]] which executes bytecode -+ [[file:asm/main.c][Assembler executable]] which assembles compliant - assembly code to VM bytecode ++ [[file:asm/main.cpp][Assembler executable]] which assembles + compliant assembly code to VM bytecode + [[file:examples/][Assembly examples]] which provide some source code examples on common programs one may write. Use this to figure out - how to write compliant assembly. Also a good test of both the VM - and assembler. + how to write compliant AAL. Also a good test of both the VM and + assembler. You may also build each component individually through the corresponding recipe: -+ ~make lib~ -+ ~make vm~ + ~make asm~ + ~make examples~ -* Instructions to target the virtual machine -You need to link with the object files for -[[file:lib/base.c][base.c]], [[file:lib/darr.c][darr.c]] and -[[file:lib/inst.c][inst.c]] to be able to properly target the OVM. -The basic idea is to create some instructions via ~inst_t~, -instantiating a ~prog_t~ structure which wraps those instructions -(includes a header and other useful things for the runtime), then -using ~prog_write_file~ to serialise and write bytecode to a file -pointer. - -To execute directly compiled bytecode use the ~ovm.out~ executable on -the bytecode file. - -For clarity, one may build ~lib~ (~make lib~) then use the resulting -object files to link and create bytecode for the virtual machine. -** In memory virtual machine -Instead of serialising and writing bytecode to a file, one may instead -serialise bytecode in memory using ~prog_write_bytecode~ which writes -bytecode to a dynamic byte buffer, so called *in memory compilation*. -To execute this bytecode, deserialise the bytecode into a program then -load it into a complete ~vm_t~ structure (linking with -[[file:vm/runtime.c][runtime.c]]). - -In fact, you may skip the process of serialising entirely. You can -emit a ~prog_t~ structure corresponding to source code, load it -directly into the ~vm_t~ structure, then execute. To do so is a bit -involved, so I recommend looking at [[file:vm/main.c]]. In rough -steps: -+ Create a virtual machine "from scratch" (load the necessary - components (the stack, heap and call stack) by hand) -+ Load program into VM (~vm_load_program~) -+ Run ~vm_execute_all~ - -This is recommended if writing an interpreted language such as a Lisp, -where on demand execution of code is more suitable. * Lines of code #+begin_src sh :results table :exports results -wc -lwc $(find -regex ".*\.[ch]\(pp\)?") +wc -lwc $(find -regex ".*\.[ch]\(pp\)?" -maxdepth 2) #+end_src #+RESULTS: -| Files | Lines | Words | Bytes | -|------------------------+-------+-------+--------| -| ./lib/heap.h | 42 | 111 | 801 | -| ./lib/inst.c | 516 | 1315 | 13982 | -| ./lib/darr.c | 77 | 225 | 1757 | -| ./lib/base.c | 107 | 306 | 2002 | -| ./lib/inst.h | 108 | 426 | 4067 | -| ./lib/prog.h | 176 | 247 | 2616 | -| ./lib/base.h | 148 | 626 | 3915 | -| ./lib/darr.h | 88 | 465 | 2697 | -| ./lib/heap.c | 101 | 270 | 1910 | -| ./vm/runtime.h | 301 | 780 | 7965 | -| ./vm/runtime.c | 1070 | 3097 | 30010 | -| ./vm/main.c | 92 | 265 | 2243 | -| ./asm/base.hpp | 21 | 68 | 472 | -| ./asm/lexer.cpp | 565 | 1448 | 14067 | -| ./asm/base.cpp | 33 | 89 | 705 | -| ./asm/parser.hpp | 82 | 199 | 1656 | -| ./asm/parser.cpp | 42 | 129 | 1294 | -| ./asm/lexer.hpp | 106 | 204 | 1757 | -| ./asm/preprocesser.cpp | 218 | 574 | 5800 | -| ./asm/preprocesser.hpp | 62 | 147 | 1360 | -| ./asm/main.cpp | 148 | 414 | 3791 | -|------------------------+-------+-------+--------| -| total | 4103 | 11405 | 104867 | +| Files | Lines | Words | Bytes | +|------------------------+-------+-------+-------| +| ./asm/base.hpp | 21 | 68 | 472 | +| ./asm/lexer.cpp | 565 | 1448 | 14067 | +| ./asm/base.cpp | 33 | 89 | 705 | +| ./asm/lexer.hpp | 106 | 204 | 1757 | +| ./asm/preprocesser.cpp | 218 | 574 | 5800 | +| ./asm/preprocesser.hpp | 62 | 147 | 1360 | +| ./asm/main.cpp | 148 | 414 | 3791 | +|------------------------+-------+-------+-------| +| total | 1153 | 2944 | 27952 | diff --git a/lib/base.c b/lib/base.c deleted file mode 100644 index caa76ae..0000000 --- a/lib/base.c +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-26 - * Author: Aryadev Chavali - * Description: Implementation of basic library functions - */ - -#include "./base.h" - -#include - -union hword_pun -{ - hword h; - byte bytes[HWORD_SIZE]; -}; - -union word_pun -{ - word h; - byte bytes[WORD_SIZE]; -}; - -hword hword_htobc(hword w) -{ -#if __LITTLE_ENDIAN__ - return w; -#else - union hword_pun x = {w}; - union hword_pun y = {0}; - for (size_t i = 0, j = HWORD_SIZE; i < HWORD_SIZE; ++i, --j) - y.bytes[j - 1] = x.bytes[i]; - return y.h; -#endif -} - -hword hword_bctoh(hword w) -{ -#if __LITTLE_ENDIAN__ - return w; -#else - union hword_pun x = {w}; - union hword_pun y = {0}; - for (size_t i = 0, j = HWORD_SIZE; i < HWORD_SIZE; ++i, --j) - y.bytes[j - 1] = x.bytes[i]; - return y.h; -#endif -} - -word word_htobc(word w) -{ -#if __LITTLE_ENDIAN__ - return w; -#else - union word_pun x = {w}; - union word_pun y = {0}; - for (size_t i = 0, j = WORD_SIZE; i < WORD_SIZE; ++i, --j) - y.bytes[j - 1] = x.bytes[i]; - return y.h; -#endif -} - -word word_bctoh(word w) -{ -#if __LITTLE_ENDIAN__ - return w; -#else - union word_pun x = {w}; - union word_pun y = {0}; - for (size_t i = 0, j = WORD_SIZE; i < WORD_SIZE; ++i, --j) - y.bytes[j - 1] = x.bytes[i]; - return y.h; -#endif -} - -hword convert_bytes_to_hword(byte *bytes) -{ - hword be_h = 0; - memcpy(&be_h, bytes, HWORD_SIZE); - hword h = hword_bctoh(be_h); - return h; -} - -void convert_hword_to_bytes(hword w, byte *bytes) -{ - hword be_h = hword_htobc(w); - memcpy(bytes, &be_h, HWORD_SIZE); -} - -void convert_word_to_bytes(word w, byte *bytes) -{ - word be_w = word_htobc(w); - memcpy(bytes, &be_w, WORD_SIZE); -} - -word convert_bytes_to_word(byte *bytes) -{ - word be_w = 0; - memcpy(&be_w, bytes, WORD_SIZE); - word w = word_bctoh(be_w); - return w; -} diff --git a/lib/base.h b/lib/base.h deleted file mode 100644 index 8ce3510..0000000 --- a/lib/base.h +++ /dev/null @@ -1,148 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Basic types and routines - */ - -#ifndef BASE_H -#define BASE_H - -#include - -/* Basic macros for a variety of uses. Quite self explanatory. */ -#define ARR_SIZE(xs) (sizeof(xs) / sizeof(xs[0])) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) > (b) ? (b) : (a)) -#define TERM_GREEN "\e[0;32m" -#define TERM_YELLOW "\e[0;33m" -#define TERM_RED "\e[0;31m" -#define TERM_RESET "\e[0;0m" - -// Flags for program behaviour (usually related to printing) -#ifndef VERBOSE -#define VERBOSE 0 -#endif -#ifndef PRINT_HEX -#define PRINT_HEX 0 -#endif - -/* Ease of use aliases for numeric types */ -typedef uint8_t u8; -typedef int8_t i8; -typedef uint32_t u32; -typedef int32_t i32; -typedef uint64_t u64; -typedef int64_t i64; - -typedef float f32; -typedef double f64; - -typedef u8 byte; -typedef i8 s_byte; -typedef u32 hword; -typedef i32 s_hword; -typedef u64 word; -typedef i64 s_word; - -/* Macros for the sizes of common base data types. */ -#define HWORD_SIZE sizeof(hword) -#define SHWORD_SIZE sizeof(s_hword) -#define WORD_SIZE sizeof(word) -#define SWORD_SIZE sizeof(s_word) - -/** Union for all basic data types in the virtual machine. - */ -typedef union -{ - byte as_byte; - s_byte as_char; - hword as_hword; - s_hword as_int; - word as_word; - s_word as_long; -} data_t; - -/** Enum of type tags for the data_t structure to provide context. - */ -typedef enum -{ - DATA_TYPE_NIL = 0, - DATA_TYPE_BYTE, - DATA_TYPE_HWORD, - DATA_TYPE_WORD, -} data_type_t; - -/* Some macros for constructing data_t instances quickly. */ -#define DBYTE(BYTE) ((data_t){.as_byte = (BYTE)}) -#define DHWORD(HWORD) ((data_t){.as_hword = (HWORD)}) -#define DWORD(WORD) ((data_t){.as_word = (WORD)}) - -/** Safely subtract SUB from W, where both are words (64 bit integers). - * - * In case of underflow (i.e. where W - SUB < 0) returns 0 instead of - * the underflowed result. - */ -#define WORD_SAFE_SUB(W, SUB) ((W) > (SUB) ? ((W) - (SUB)) : 0) - -/** Return the Nth byte of WORD - * N should range from 0 to 7 as there are 8 bytes in a word. - */ -#define WORD_NTH_BYTE(WORD, N) (((WORD) >> ((N) * 8)) & 0xFF) - -/** Return the Nth half word of WORD - * N should range from 0 to 1 as there are 2 half words in a word - */ -#define WORD_NTH_HWORD(WORD, N) (((WORD) >> ((N) * 2)) & 0xFFFFFFFF) - -/** Convert a buffer of bytes to a half word - * We assume the buffer of bytes are in virtual machine byte code - * format (big endian) and that they are at least HWORD_SIZE in - * size. - */ -hword convert_bytes_to_hword(byte *buffer); - -/** Convert a half word into a VM byte code format bytes (big endian) - * @param h: Half word to convert - * @param buffer: Buffer to store into. We assume the buffer has at - * least HWORD_SIZE space. - */ -void convert_hword_to_bytes(hword h, byte *buffer); - -/** Convert a buffer of bytes to a word - * We assume the buffer of bytes are in virtual machine byte code - * format (big endian) and that they are at least WORD_SIZE in - * size. - */ -word convert_bytes_to_word(byte *); - -/** Convert a word into a VM byte code format bytes (big endian) - * @param w: Word to convert - * @param buffer: Buffer to store into. We assume the buffer has at - * least WORD_SIZE space. - */ -void convert_word_to_bytes(word w, byte *buffer); - -/** Convert a half word into bytecode format (little endian) - */ -hword hword_htobc(hword); - -/** Convert a half word in bytecode format (little endian) to host - * format - */ -hword hword_bctoh(hword); - -/** Convert a word into bytecode format (little endian) - */ -word word_htobc(word); - -/** Convert a word in bytecode format (little endian) to host format - */ -word word_bctoh(word); - -#endif diff --git a/lib/darr.c b/lib/darr.c deleted file mode 100644 index d9a8645..0000000 --- a/lib/darr.c +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Dynamically sized byte array - */ - -#include -#include -#include - -#include "./darr.h" - -void darr_init(darr_t *darr, size_t size) -{ - if (size == 0) - size = DARR_DEFAULT_SIZE; - *darr = (darr_t){ - .data = calloc(size, 1), - .used = 0, - .available = size, - }; -} - -void darr_ensure_capacity(darr_t *darr, size_t requested) -{ - if (darr->used + requested >= darr->available) - { - darr->available = - MAX(darr->used + requested, darr->available * DARR_REALLOC_MULT); - darr->data = realloc(darr->data, darr->available); - memset(darr->data + darr->used, 0, darr->available - darr->used); - } -} - -void darr_append_byte(darr_t *darr, byte byte) -{ - darr_ensure_capacity(darr, 1); - darr->data[darr->used++] = byte; -} - -void darr_append_bytes(darr_t *darr, byte *bytes, size_t n) -{ - darr_ensure_capacity(darr, n); - memcpy(darr->data + darr->used, bytes, n); - darr->used += n; -} - -byte darr_at(darr_t *darr, size_t index) -{ - if (index >= darr->used) - // TODO: Error (index is out of bounds) - return 0; - return darr->data[index]; -} - -void darr_write_file(darr_t *bytes, FILE *fp) -{ - size_t size = fwrite(bytes->data, bytes->used, 1, fp); - assert(size == 1); -} - -darr_t darr_read_file(FILE *fp) -{ - darr_t darr = {0}; - fseek(fp, 0, SEEK_END); - long size = ftell(fp); - darr_init(&darr, size); - fseek(fp, 0, SEEK_SET); - fread(darr.data, size, 1, fp); - return darr; -} diff --git a/lib/darr.h b/lib/darr.h deleted file mode 100644 index f36c034..0000000 --- a/lib/darr.h +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Dynamically sized byte array - */ - -#ifndef DARR_H -#define DARR_H - -#include -#include - -#include "./base.h" - -/** - * A dynamically sized buffer of bytes which may be used for a - * variety of purposes. - * @prop data: Buffer of bytes (may be reallocated) - * @prop used: Number of bytes currently used - * @prop available: Number of bytes currently allocated - */ -typedef struct -{ - byte *data; - size_t used, available; -} darr_t; - -/* Some useful constants for dynamic array work. */ -#define DARR_DEFAULT_SIZE 8 -#define DARR_REALLOC_MULT 1.5 - -/** Get the INDth item in a darr, where the buffer of bytes is - * considerd an array of type TYPE. - * Unsafe operation as safety checks are not done (in particular if - * the dynamic array has IND items or is big enough to store an - * element of TYPE) so it is presumed the caller will. - */ -#define DARR_AT(TYPE, DARR_DATA, IND) ((TYPE *)(DARR_DATA))[(IND)] - -/** Initialise a dynamic array (darr) with n elements. - * If n == 0 then initialise with DARR_DEFAULT_SIZE elements. - */ -void darr_init(darr_t *darr, size_t n); - -/** Ensure the dynamic array (darr) has at least n elements free. - * If the dynamic array has less than n elements free it will - * reallocate. - */ -void darr_ensure_capacity(darr_t *darr, size_t n); - -/** Append a byte (b) to the dynamic array (darr). - * If the dynamic array doesn't have enough space it will reallocate - * to ensure it can fit it in. - */ -void darr_append_byte(darr_t *darr, byte b); - -/** Append an array of n bytes (b) to the dynamic array (darr). - * If the dynamic array doesn't have enough space to fit all n bytes - * it will reallocate to ensure it can fit it in. - */ -void darr_append_bytes(darr_t *darr, byte *b, size_t n); - -/** Safely get the nth byte of the dynamic array (darr) - * If the dynamic array has less than n bytes used, it will return 0 - * as a default value. - */ -byte darr_at(darr_t *darr, size_t n); - -/** Write the dynamic array (darr) to the file pointer (fp) as a - * buffer of bytes. - * Assumes fp is a valid file pointer and in write mode. - */ -void darr_write_file(darr_t *, FILE *); - -/** Read a file pointer (fp) in its entirety, converting the bytes - * into a tightly fitted dynamic array. - * Say the file pointer is a file of n bytes. Then the dynamic array - * returned will have available set to n and used set to 0. - */ -darr_t darr_read_file(FILE *); - -#endif diff --git a/lib/heap.c b/lib/heap.c deleted file mode 100644 index 28cb06a..0000000 --- a/lib/heap.c +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-11-01 - * Author: Aryadev Chavali - * Description: Arena allocator - */ - -#include "./heap.h" - -#include - -page_t *page_create(size_t max, page_t *next) -{ - page_t *page = calloc(1, sizeof(*page) + max); - page->available = max; - page->next = next; - return page; -} - -void page_delete(page_t *page) -{ - free(page); -} - -void heap_create(heap_t *heap) -{ - heap->beg = heap->end = NULL; - heap->pages = 0; -} - -bool heap_free_page(heap_t *heap, page_t *page) -{ - if (!page || !heap) - return false; - - if (page == heap->beg) - { - heap->beg = heap->beg->next; - page_delete(page); - --heap->pages; - if (heap->pages == 0) - heap->end = NULL; - return true; - } - - page_t *prev = NULL, *next = NULL, *cur = NULL; - for (cur = heap->beg; cur; cur = cur->next) - { - next = cur->next; - if (cur == page) - break; - prev = cur; - } - - if (!cur) - // Couldn't find the page - return false; - // Page was found - prev->next = next; - if (!next) - // This means page == heap->end - heap->end = prev; - page_delete(page); - --heap->pages; - if (heap->pages == 0) - heap->beg = NULL; - - return true; -} - -page_t *heap_allocate(heap_t *heap, size_t requested) -{ - page_t *cur = page_create(requested, NULL); - if (heap->end) - heap->end->next = cur; - else - heap->beg = cur; - heap->end = cur; - heap->pages++; - return cur; -} - -void heap_stop(heap_t *heap) -{ - page_t *ptr = heap->beg; - for (size_t i = 0; i < heap->pages; ++i) - { - page_t *cur = ptr; - page_t *next = ptr->next; - page_delete(cur); - ptr = next; - } - heap->beg = NULL; - heap->end = NULL; - heap->pages = 0; -} diff --git a/lib/heap.h b/lib/heap.h deleted file mode 100644 index 486f28c..0000000 --- a/lib/heap.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-11-01 - * Author: Aryadev Chavali - * Description: Arena allocator - */ - -#ifndef HEAP_H -#define HEAP_H - -#include "./base.h" - -#include -#include - -typedef struct Page -{ - struct Page *next; - size_t available; - byte data[]; -} page_t; - -page_t *page_create(size_t, page_t *); -void page_delete(page_t *); - -typedef struct -{ - page_t *beg, *end; - size_t pages; -} heap_t; - -void heap_create(heap_t *); -bool heap_free_page(heap_t *, page_t *); -page_t *heap_allocate(heap_t *, size_t); -void heap_stop(heap_t *); - -#endif diff --git a/lib/inst.c b/lib/inst.c deleted file mode 100644 index 644517e..0000000 --- a/lib/inst.c +++ /dev/null @@ -1,516 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Implementation of bytecode for instructions - */ - -#include "./inst.h" - -#include -#include -#include -#include - -const char *opcode_as_cstr(opcode_t code) -{ - switch (code) - { - case OP_NOOP: - return "NOOP"; - case OP_PUSH_BYTE: - return "PUSH_BYTE"; - case OP_PUSH_WORD: - return "PUSH_WORD"; - case OP_PUSH_HWORD: - return "PUSH_HWORD"; - case OP_PUSH_REGISTER_BYTE: - return "PUSH_REGISTER_BYTE"; - case OP_PUSH_REGISTER_WORD: - return "PUSH_REGISTER_WORD"; - case OP_PUSH_REGISTER_HWORD: - return "PUSH_REGISTER_HWORD"; - case OP_POP_BYTE: - return "POP_BYTE"; - case OP_POP_WORD: - return "POP_WORD"; - case OP_POP_HWORD: - return "POP_HWORD"; - case OP_MOV_BYTE: - return "MOV_BYTE"; - case OP_MOV_WORD: - return "MOV_WORD"; - case OP_MOV_HWORD: - return "MOV_HWORD"; - case OP_DUP_BYTE: - return "DUP_BYTE"; - case OP_DUP_HWORD: - return "DUP_HWORD"; - case OP_DUP_WORD: - return "DUP_WORD"; - case OP_MALLOC_BYTE: - return "MALLOC_BYTE"; - case OP_MALLOC_HWORD: - return "MALLOC_HWORD"; - case OP_MALLOC_WORD: - return "MALLOC_WORD"; - case OP_MALLOC_STACK_BYTE: - return "MALLOC_STACK_BYTE"; - case OP_MALLOC_STACK_HWORD: - return "MALLOC_STACK_HWORD"; - case OP_MALLOC_STACK_WORD: - return "MALLOC_STACK_WORD"; - case OP_MSET_BYTE: - return "MSET_BYTE"; - case OP_MSET_HWORD: - return "MSET_HWORD"; - case OP_MSET_WORD: - return "MSET_WORD"; - case OP_MSET_STACK_BYTE: - return "MSET_STACK_BYTE"; - case OP_MSET_STACK_HWORD: - return "MSET_STACK_HWORD"; - case OP_MSET_STACK_WORD: - return "MSET_STACK_WORD"; - case OP_MGET_BYTE: - return "MGET_BYTE"; - case OP_MGET_HWORD: - return "MGET_HWORD"; - case OP_MGET_WORD: - return "MGET_WORD"; - case OP_MGET_STACK_BYTE: - return "MGET_STACK_BYTE"; - case OP_MGET_STACK_HWORD: - return "MGET_STACK_HWORD"; - case OP_MGET_STACK_WORD: - return "MGET_STACK_WORD"; - case OP_MDELETE: - return "MDELETE"; - case OP_MSIZE: - return "MDELETE"; - case OP_NOT_BYTE: - return "NOT_BYTE"; - case OP_NOT_HWORD: - return "NOT_HWORD"; - case OP_NOT_WORD: - return "NOT_WORD"; - case OP_OR_BYTE: - return "OR_BYTE"; - case OP_OR_HWORD: - return "OR_HWORD"; - case OP_OR_WORD: - return "OR_WORD"; - case OP_AND_BYTE: - return "AND_BYTE"; - case OP_AND_HWORD: - return "AND_HWORD"; - case OP_AND_WORD: - return "AND_WORD"; - case OP_XOR_BYTE: - return "XOR_BYTE"; - case OP_XOR_HWORD: - return "XOR_HWORD"; - case OP_XOR_WORD: - return "XOR_WORD"; - case OP_EQ_BYTE: - return "EQ_BYTE"; - case OP_EQ_HWORD: - return "EQ_HWORD"; - case OP_EQ_WORD: - return "EQ_WORD"; - case OP_LT_BYTE: - return "LT_BYTE"; - case OP_LT_CHAR: - return "LT_CHAR"; - case OP_LT_HWORD: - return "LT_HWORD"; - case OP_LT_INT: - return "LT_INT"; - case OP_LT_LONG: - return "LT_LONG"; - case OP_LT_WORD: - return "LT_WORD"; - case OP_LTE_BYTE: - return "LTE_BYTE"; - case OP_LTE_CHAR: - return "LTE_CHAR"; - case OP_LTE_HWORD: - return "LTE_HWORD"; - case OP_LTE_INT: - return "LTE_INT"; - case OP_LTE_LONG: - return "LTE_LONG"; - case OP_LTE_WORD: - return "LTE_WORD"; - case OP_GT_BYTE: - return "GT_BYTE"; - case OP_GT_CHAR: - return "GT_CHAR"; - case OP_GT_HWORD: - return "GT_HWORD"; - case OP_GT_INT: - return "GT_INT"; - case OP_GT_LONG: - return "GT_LONG"; - case OP_GT_WORD: - return "GT_WORD"; - case OP_GTE_BYTE: - return "GTE_BYTE"; - case OP_GTE_CHAR: - return "GTE_CHAR"; - case OP_GTE_HWORD: - return "GTE_HWORD"; - case OP_GTE_INT: - return "GTE_INT"; - case OP_GTE_LONG: - return "GTE_LONG"; - case OP_GTE_WORD: - return "GTE_WORD"; - case OP_PLUS_BYTE: - return "PLUS_BYTE"; - case OP_PLUS_HWORD: - return "PLUS_HWORD"; - case OP_PLUS_WORD: - return "PLUS_WORD"; - case OP_SUB_BYTE: - return "SUB_BYTE"; - case OP_SUB_HWORD: - return "SUB_HWORD"; - case OP_SUB_WORD: - return "SUB_WORD"; - case OP_MULT_BYTE: - return "MULT_BYTE"; - case OP_MULT_HWORD: - return "MULT_HWORD"; - case OP_MULT_WORD: - return "MULT_WORD"; - case OP_JUMP_ABS: - return "JUMP_ABS"; - case OP_JUMP_STACK: - return "JUMP_STACK"; - case OP_JUMP_IF_BYTE: - return "JUMP_IF_BYTE"; - case OP_JUMP_IF_HWORD: - return "JUMP_IF_HWORD"; - case OP_JUMP_IF_WORD: - return "JUMP_IF_WORD"; - case OP_CALL: - return "CALL"; - case OP_CALL_STACK: - return "CALL_STACK"; - case OP_RET: - return "RET"; - case OP_PRINT_CHAR: - return "PRINT_CHAR"; - case OP_PRINT_BYTE: - return "PRINT_BYTE"; - case OP_PRINT_INT: - return "PRINT_INT"; - case OP_PRINT_HWORD: - return "PRINT_HWORD"; - case OP_PRINT_LONG: - return "PRINT_LONG"; - case OP_PRINT_WORD: - return "PRINT_WORD"; - case OP_HALT: - return "HALT"; - case NUMBER_OF_OPCODES: - return ""; - } - return ""; -} - -void data_print(data_t datum, data_type_t type, FILE *fp) -{ - switch (type) - { - case DATA_TYPE_NIL: - break; - case DATA_TYPE_BYTE: - fprintf(fp, "%X", datum.as_byte); - break; - case DATA_TYPE_HWORD: - fprintf(fp, "%X", datum.as_hword); - break; - case DATA_TYPE_WORD: - fprintf(fp, "%lX", datum.as_word); - break; - } -} - -void inst_print(inst_t instruction, FILE *fp) -{ - static_assert(NUMBER_OF_OPCODES == 98, "inst_bytecode_size: Out of date"); - fprintf(fp, "%s(", opcode_as_cstr(instruction.opcode)); - if (OPCODE_IS_TYPE(instruction.opcode, OP_PUSH)) - { - data_type_t type = (data_type_t)instruction.opcode; - fprintf(fp, "datum=0x"); - data_print(instruction.operand, type, fp); - } - else if (OPCODE_IS_TYPE(instruction.opcode, OP_PUSH_REGISTER) || - OPCODE_IS_TYPE(instruction.opcode, OP_MOV)) - { - fprintf(fp, "reg=0x"); - data_print(instruction.operand, DATA_TYPE_BYTE, fp); - } - else if (OPCODE_IS_TYPE(instruction.opcode, OP_DUP) || - OPCODE_IS_TYPE(instruction.opcode, OP_MALLOC) || - OPCODE_IS_TYPE(instruction.opcode, OP_MSET) || - OPCODE_IS_TYPE(instruction.opcode, OP_MGET)) - { - fprintf(fp, "n=%lu", instruction.operand.as_word); - } - else if (instruction.opcode == OP_JUMP_ABS || - OPCODE_IS_TYPE(instruction.opcode, OP_JUMP_IF) || - instruction.opcode == OP_CALL) - { - fprintf(fp, "address=0x"); - data_print(instruction.operand, DATA_TYPE_WORD, fp); - } - fprintf(fp, ")"); -} - -size_t inst_bytecode_size(inst_t inst) -{ - static_assert(NUMBER_OF_OPCODES == 98, "inst_bytecode_size: Out of date"); - size_t size = 1; // for opcode - if (OPCODE_IS_TYPE(inst.opcode, OP_PUSH)) - { - if (inst.opcode == OP_PUSH_BYTE) - ++size; - else if (inst.opcode == OP_PUSH_HWORD) - size += HWORD_SIZE; - else if (inst.opcode == OP_PUSH_WORD) - size += WORD_SIZE; - } - else if (OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER) || - OPCODE_IS_TYPE(inst.opcode, OP_MOV) || - OPCODE_IS_TYPE(inst.opcode, OP_DUP) || - OPCODE_IS_TYPE(inst.opcode, OP_MALLOC) || - OPCODE_IS_TYPE(inst.opcode, OP_MSET) || - OPCODE_IS_TYPE(inst.opcode, OP_MGET) || inst.opcode == OP_JUMP_ABS || - OPCODE_IS_TYPE(inst.opcode, OP_JUMP_IF) || inst.opcode == OP_CALL) - size += WORD_SIZE; - return size; -} - -void inst_write_bytecode(inst_t inst, darr_t *darr) -{ - static_assert(NUMBER_OF_OPCODES == 98, "inst_write_bytecode: Out of date"); - // Append opcode - darr_append_byte(darr, inst.opcode); - // Then append 0 or more operands - data_type_t to_append = DATA_TYPE_NIL; - if (OPCODE_IS_TYPE(inst.opcode, OP_PUSH)) - to_append = (data_type_t)inst.opcode; - else if (OPCODE_IS_TYPE(inst.opcode, OP_PUSH_REGISTER) || - OPCODE_IS_TYPE(inst.opcode, OP_MOV) || - OPCODE_IS_TYPE(inst.opcode, OP_DUP) || - OPCODE_IS_TYPE(inst.opcode, OP_MALLOC) || - OPCODE_IS_TYPE(inst.opcode, OP_MSET) || - OPCODE_IS_TYPE(inst.opcode, OP_MGET) || inst.opcode == OP_JUMP_ABS || - OPCODE_IS_TYPE(inst.opcode, OP_JUMP_IF) || inst.opcode == OP_CALL) - to_append = DATA_TYPE_WORD; - - switch (to_append) - { - case DATA_TYPE_NIL: - break; - case DATA_TYPE_BYTE: - darr_append_byte(darr, inst.operand.as_byte); - break; - case DATA_TYPE_HWORD: - darr_ensure_capacity(darr, HWORD_SIZE); - convert_hword_to_bytes(inst.operand.as_hword, darr->data + darr->used); - darr->used += HWORD_SIZE; - break; - case DATA_TYPE_WORD: - darr_ensure_capacity(darr, WORD_SIZE); - convert_word_to_bytes(inst.operand.as_word, darr->data + darr->used); - darr->used += WORD_SIZE; - break; - } -} - -void insts_write_bytecode(inst_t *insts, size_t size, darr_t *darr) -{ - for (size_t i = 0; i < size; ++i) - inst_write_bytecode(insts[i], darr); -} - -data_t read_type_from_darr(darr_t *darr, data_type_t type) -{ - switch (type) - { - case DATA_TYPE_NIL: - break; - case DATA_TYPE_BYTE: - if (darr->used > darr->available) - // TODO: Error (darr has no space left) - return DBYTE(0); - return DBYTE(darr->data[darr->used++]); - break; - case DATA_TYPE_HWORD: - if (darr->used + HWORD_SIZE > darr->available) - // TODO: Error (darr has no space left) - return DWORD(0); - hword u = convert_bytes_to_hword(darr->data + darr->used); - darr->used += HWORD_SIZE; - return DHWORD(u); - break; - case DATA_TYPE_WORD: - if (darr->used + WORD_SIZE > darr->available) - // TODO: Error (darr has no space left) - return DWORD(0); - word w = convert_bytes_to_word(darr->data + darr->used); - darr->used += WORD_SIZE; - return DWORD(w); - break; - } - // TODO: Error (unrecognised type) - return DBYTE(0); -} - -inst_t inst_read_bytecode(darr_t *darr) -{ - static_assert(NUMBER_OF_OPCODES == 98, "inst_read_bytecode: Out of date"); - if (darr->used >= darr->available) - return (inst_t){0}; - inst_t inst = {0}; - opcode_t opcode = darr->data[darr->used++]; - if (opcode > OP_HALT || opcode == NUMBER_OF_OPCODES || opcode < OP_NOOP) - return INST_NOOP; - // Read operands - if (OPCODE_IS_TYPE(opcode, OP_PUSH)) - inst.operand = read_type_from_darr(darr, (data_type_t)opcode); - // Read register (as a byte) - else if (OPCODE_IS_TYPE(opcode, OP_PUSH_REGISTER) || - OPCODE_IS_TYPE(opcode, OP_MOV) || OPCODE_IS_TYPE(opcode, OP_DUP) || - OPCODE_IS_TYPE(opcode, OP_MALLOC) || - OPCODE_IS_TYPE(opcode, OP_MSET) || OPCODE_IS_TYPE(opcode, OP_MGET) || - opcode == OP_JUMP_ABS || OPCODE_IS_TYPE(opcode, OP_JUMP_IF) || - opcode == OP_CALL) - inst.operand = read_type_from_darr(darr, DATA_TYPE_WORD); - // Otherwise opcode doesn't take operands - - inst.opcode = opcode; - - return inst; -} - -inst_t *insts_read_bytecode(darr_t *bytes, size_t *ret_size) -{ - *ret_size = 0; - // NOTE: Here we use the darr as a dynamic array of inst_t. - darr_t instructions = {0}; - darr_init(&instructions, sizeof(inst_t)); - while (bytes->used < bytes->available) - { - inst_t instruction = inst_read_bytecode(bytes); - darr_append_bytes(&instructions, (byte *)&instruction, sizeof(instruction)); - } - *ret_size = instructions.used / sizeof(inst_t); - return (inst_t *)instructions.data; -} - -inst_t *insts_read_bytecode_file(FILE *fp, size_t *ret) -{ - darr_t darr = darr_read_file(fp); - inst_t *instructions = insts_read_bytecode(&darr, ret); - free(darr.data); - return instructions; -} - -void insts_write_bytecode_file(inst_t *instructions, size_t size, FILE *fp) -{ - darr_t darr = {0}; - darr_init(&darr, 0); - insts_write_bytecode(instructions, size, &darr); - darr_write_file(&darr, fp); - free(darr.data); -} - -void prog_header_write_bytecode(prog_header_t header, darr_t *buffer) -{ - word start = word_htobc(header.start_address); - darr_append_bytes(buffer, (byte *)&start, sizeof(start)); -} - -void prog_write_bytecode(prog_t *program, darr_t *buffer) -{ - // Write program header - prog_header_write_bytecode(program->header, buffer); - // Write instruction count - word pcount = word_htobc(program->count); - darr_append_bytes(buffer, (byte *)&pcount, sizeof(pcount)); - // Write instructions - insts_write_bytecode(program->instructions, program->count, buffer); -} - -void prog_append_bytecode(prog_t *program, darr_t *buffer) -{ - insts_write_bytecode(program->instructions, program->count, buffer); -} - -prog_header_t prog_header_read_bytecode(darr_t *buffer) -{ - prog_header_t header = {0}; - header.start_address = convert_bytes_to_word(buffer->data + buffer->used); - buffer->used += sizeof(header.start_address); - return header; -} - -prog_t *prog_read_bytecode(darr_t *buffer) -{ - // TODO: Error (not enough space for program header) - if ((buffer->available - buffer->used) < sizeof(prog_header_t)) - return NULL; - // Read program header - prog_header_t header = prog_header_read_bytecode(buffer); - // TODO: Error (not enough space for program instruction count) - if ((buffer->available - buffer->used) < WORD_SIZE) - return NULL; - - // Read instruction count - word count = convert_bytes_to_word(buffer->data + buffer->used); - buffer->used += sizeof(count); - - prog_t *program = malloc(sizeof(*program) + (sizeof(inst_t) * count)); - size_t i; - for (i = 0; i < count && (buffer->used < buffer->available); ++i) - program->instructions[i] = inst_read_bytecode(buffer); - - // TODO: Error (Expected more instructions) - if (i < count - 1) - { - free(program); - return NULL; - } - - program->header = header; - program->count = count; - - return program; -} - -void prog_write_file(prog_t *program, FILE *fp) -{ - darr_t bytecode = {0}; - prog_write_bytecode(program, &bytecode); - fwrite(bytecode.data, bytecode.used, 1, fp); - free(bytecode.data); -} - -prog_t *prog_read_file(FILE *fp) -{ - darr_t buffer = darr_read_file(fp); - prog_t *p = prog_read_bytecode(&buffer); - free(buffer.data); - return p; -} diff --git a/lib/inst.h b/lib/inst.h deleted file mode 100644 index fd694a9..0000000 --- a/lib/inst.h +++ /dev/null @@ -1,108 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Instructions and opcodes - */ - -#ifndef INST_H -#define INST_H - -#include -#include - -#include -#include - -const char *opcode_as_cstr(opcode_t); - -#define OPCODE_IS_TYPE(OPCODE, OP_TYPE) \ - (((OPCODE) >= OP_TYPE##_BYTE) && ((OPCODE) <= OP_TYPE##_WORD)) - -#define OPCODE_DATA_TYPE(OPCODE, OP_TYPE) \ - ((OPCODE) == OP_TYPE##_BYTE ? DATA_TYPE_BYTE \ - : ((OPCODE) == OP_TYPE##_HWORD) ? DATA_TYPE_HWORD \ - : DATA_TYPE_WORD) - -void inst_print(inst_t, FILE *); - -size_t inst_bytecode_size(inst_t); -void inst_write_bytecode(inst_t, darr_t *); -void insts_write_bytecode(inst_t *, size_t, darr_t *); -// Here the dynamic array is a preloaded buffer of bytes, where -// darr.available is the number of overall bytes and used is the -// cursor (where we are in the buffer). -inst_t inst_read_bytecode(darr_t *); -inst_t *insts_read_bytecode(darr_t *, size_t *); - -void insts_write_bytecode_file(inst_t *, size_t, FILE *); -inst_t *insts_read_bytecode_file(FILE *, size_t *); - -// Write the entire program as bytecode -void prog_write_bytecode(prog_t *, darr_t *); -// Only append the instructions as bytecode -void prog_append_bytecode(prog_t *, darr_t *); -// Read an entire program as bytecode -prog_t *prog_read_bytecode(darr_t *); - -void prog_write_file(prog_t *, FILE *); -prog_t *prog_read_file(FILE *); - -#define INST_NOOP ((inst_t){0}) -#define INST_HALT ((inst_t){.opcode = OP_HALT}) - -#define INST_PUSH(TYPE, OP) \ - ((inst_t){.opcode = OP_PUSH_##TYPE, .operand = D##TYPE(OP)}) - -#define INST_MOV(TYPE, OP) \ - ((inst_t){.opcode = OP_MOV_##TYPE, .operand = D##TYPE(OP)}) - -#define INST_POP(TYPE) ((inst_t){.opcode = OP_POP_##TYPE}) - -#define INST_PUSH_REG(TYPE, REG) \ - ((inst_t){.opcode = OP_PUSH_REGISTER_##TYPE, .operand = D##TYPE(REG)}) - -#define INST_DUP(TYPE, OP) \ - ((inst_t){.opcode = OP_DUP_##TYPE, .operand = DWORD(OP)}) - -#define INST_MALLOC(TYPE, OP) \ - ((inst_t){.opcode = OP_MALLOC_##TYPE, .operand = DWORD(OP)}) -#define INST_MALLOC_STACK(TYPE) ((inst_t){.opcode = OP_MALLOC_STACK_##TYPE}) -#define INST_MSET(TYPE, OP) \ - ((inst_t){.opcode = OP_MSET_##TYPE, .operand = DWORD(OP)}) -#define INST_MSET_STACK(TYPE) ((inst_t){.opcode = OP_MSET_STACK_##TYPE}) -#define INST_MGET(TYPE, OP) \ - ((inst_t){.opcode = OP_MGET_##TYPE, .operand = DWORD(OP)}) -#define INST_MGET_STACK(TYPE) ((inst_t){.opcode = OP_MGET_STACK_##TYPE}) -#define INST_MDELETE ((inst_t){.opcode = OP_MDELETE}) -#define INST_MSIZE ((inst_t){.opcode = OP_MSIZE}) - -#define INST_NOT(TYPE) ((inst_t){.opcode = OP_NOT_##TYPE}) -#define INST_OR(TYPE) ((inst_t){.opcode = OP_OR_##TYPE}) -#define INST_AND(TYPE) ((inst_t){.opcode = OP_AND_##TYPE}) -#define INST_XOR(TYPE) ((inst_t){.opcode = OP_XOR_##TYPE}) -#define INST_EQ(TYPE) ((inst_t){.opcode = OP_EQ_##TYPE}) -#define INST_LT(TYPE) ((inst_t){.opcode = OP_LT_##TYPE}) -#define INST_LTE(TYPE) ((inst_t){.opcode = OP_LTE_##TYPE}) -#define INST_GT(TYPE) ((inst_t){.opcode = OP_GT_##TYPE}) -#define INST_GTE(TYPE) ((inst_t){.opcode = OP_GTE_##TYPE}) -#define INST_PLUS(TYPE) ((inst_t){.opcode = OP_PLUS_##TYPE}) -#define INST_SUB(TYPE) ((inst_t){.opcode = OP_SUB_##TYPE}) -#define INST_MULT(TYPE) ((inst_t){.opcode = OP_MULT_##TYPE}) - -#define INST_JUMP_ABS(OP) \ - ((inst_t){.opcode = OP_JUMP_ABS, .operand = DWORD(OP)}) -#define INST_JUMP_STACK ((inst_t){.opcode = OP_JUMP_STACK}) -#define INST_JUMP_IF(TYPE, OP) \ - ((inst_t){.opcode = OP_JUMP_IF_##TYPE, .operand = DWORD(OP)}) -#define INST_CALL(OP) ((inst_t){.opcode = OP_CALL, .operand = DWORD(OP)}) -#define INST_CALL_STACK ((inst_t){.opcode = OP_CALL_STACK}) -#define INST_RET ((inst_t){.opcode = OP_RET}) - -#define INST_PRINT(TYPE) ((inst_t){.opcode = OP_PRINT_##TYPE}) -#endif diff --git a/lib/prog.h b/lib/prog.h deleted file mode 100644 index 4548f08..0000000 --- a/lib/prog.h +++ /dev/null @@ -1,176 +0,0 @@ -/* Copyright (C) 2024 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2024-04-14 - * Author: Aryadev Chavali - * Description: Structures for both instructions and programs for the - * virtual machine - */ - -#ifndef PROG_H -#define PROG_H - -#include - -typedef enum -{ - OP_NOOP = 0, - - // Dealing with data and registers - OP_PUSH_BYTE, - OP_PUSH_HWORD, - OP_PUSH_WORD, - - OP_POP_BYTE, - OP_POP_HWORD, - OP_POP_WORD, - - OP_PUSH_REGISTER_BYTE, - OP_PUSH_REGISTER_HWORD, - OP_PUSH_REGISTER_WORD, - - OP_MOV_BYTE, - OP_MOV_HWORD, - OP_MOV_WORD, - - OP_DUP_BYTE, - OP_DUP_HWORD, - OP_DUP_WORD, - - // Dealing with the heap - OP_MALLOC_BYTE, - OP_MALLOC_HWORD, - OP_MALLOC_WORD, - - OP_MALLOC_STACK_BYTE, - OP_MALLOC_STACK_HWORD, - OP_MALLOC_STACK_WORD, - - OP_MSET_BYTE, - OP_MSET_HWORD, - OP_MSET_WORD, - - OP_MSET_STACK_BYTE, - OP_MSET_STACK_HWORD, - OP_MSET_STACK_WORD, - - OP_MGET_BYTE, - OP_MGET_HWORD, - OP_MGET_WORD, - - OP_MGET_STACK_BYTE, - OP_MGET_STACK_HWORD, - OP_MGET_STACK_WORD, - - OP_MDELETE, - OP_MSIZE, - - // Boolean operations - OP_NOT_BYTE, - OP_NOT_HWORD, - OP_NOT_WORD, - - OP_OR_BYTE, - OP_OR_HWORD, - OP_OR_WORD, - - OP_AND_BYTE, - OP_AND_HWORD, - OP_AND_WORD, - - OP_XOR_BYTE, - OP_XOR_HWORD, - OP_XOR_WORD, - - OP_EQ_BYTE, - OP_EQ_HWORD, - OP_EQ_WORD, - - // Mathematical operations - OP_LT_BYTE, - OP_LT_CHAR, - OP_LT_HWORD, - OP_LT_INT, - OP_LT_LONG, - OP_LT_WORD, - - OP_LTE_BYTE, - OP_LTE_CHAR, - OP_LTE_HWORD, - OP_LTE_INT, - OP_LTE_LONG, - OP_LTE_WORD, - - OP_GT_BYTE, - OP_GT_CHAR, - OP_GT_HWORD, - OP_GT_INT, - OP_GT_LONG, - OP_GT_WORD, - - OP_GTE_BYTE, - OP_GTE_CHAR, - OP_GTE_HWORD, - OP_GTE_INT, - OP_GTE_LONG, - OP_GTE_WORD, - - OP_PLUS_BYTE, - OP_PLUS_HWORD, - OP_PLUS_WORD, - - OP_SUB_BYTE, - OP_SUB_HWORD, - OP_SUB_WORD, - - OP_MULT_BYTE, - OP_MULT_HWORD, - OP_MULT_WORD, - - // Simple I/O - OP_PRINT_BYTE, - OP_PRINT_CHAR, - OP_PRINT_HWORD, - OP_PRINT_INT, - OP_PRINT_LONG, - OP_PRINT_WORD, - - // Program control flow - OP_JUMP_ABS, - OP_JUMP_STACK, - OP_JUMP_IF_BYTE, - OP_JUMP_IF_HWORD, - OP_JUMP_IF_WORD, - // Subroutines - OP_CALL, - OP_CALL_STACK, - OP_RET, - - // Should not be an opcode - NUMBER_OF_OPCODES, - OP_HALT = 0b11111111, // top of the byte is a HALT -} opcode_t; - -typedef struct -{ - opcode_t opcode; - data_t operand; -} inst_t; - -typedef struct -{ - word start_address; -} prog_header_t; - -typedef struct -{ - prog_header_t header; - word count; - inst_t instructions[]; -} prog_t; - -#endif diff --git a/spec.org b/spec.org deleted file mode 100644 index 4a995d2..0000000 --- a/spec.org +++ /dev/null @@ -1,88 +0,0 @@ -#+title: VM Specification -#+author: Aryadev Chavali -#+description: A specification of instructions for the virtual machine -#+date: 2023-11-02 - -* WIP Data types -There are 3 main data types of the virtual machine. They are all -unsigned. There exist signed versions of these data types, though -there is no difference (internally) between them. For an unsigned -type the signed version is simply S_. -|-------+------| -| Name | Bits | -|-------+------| -| Byte | 8 | -| HWord | 32 | -| Word | 64 | -|-------+------| - -Generally, the abbreviations B, H and W are used for Byte, HWord and -Word respectively. The following table shows a comparison between the -data types where an entry (row and column) $A\times{B}$ refers to "How -many of A can I fit in B". -|-------+------+-------+------| -| | Byte | Hword | Word | -|-------+------+-------+------| -| Byte | 1 | 4 | 8 | -| HWord | 1/4 | 1 | 2 | -| Word | 1/8 | 1/2 | 1 | -|-------+------+-------+------| -* WIP Instructions -An instruction for the virtual machine is composed of an *opcode* and, -potentially, an *operand*. The /opcode/ represents the behaviour of -the instruction i.e. what _is_ the instruction. The /operand/ is an -element of one of the /data types/ described previously. - -Some instructions do have /operands/ while others do not. The former -type of instructions are called *UNIT* instructions while the latter -type are called *MULTI* instructions[fn:1]. - -All /opcodes/ (with very few exceptions[fn:2]) have two components: -the *root* and the *type specifier*. The /root/ represents the -general behaviour of the instruction: ~PUSH~, ~POP~, ~MOV~, etc. The -/type specifier/ specifies what /data type/ it manipulates. A -complete opcode will be a combination of these two e.g. ~PUSH_BYTE~, -~POP_WORD~, etc. Some /opcodes/ may have more /type specifiers/ than -others. -* TODO Bytecode format -Bytecode files are byte sequence which encode instructions for the -virtual machine. Any instruction (even with an operand) has one and -only one byte sequence associated with it. -* TODO Storage -Two types of storage: -+ Data stack which all core VM routines manipulate and work on (FILO) - + ~DS~ in shorthand, with indexing from 0 (referring to the top of the - stack) up to n (referring to the bottom of the stack). B(DS) - refers to the bytes in the stack (the default). -+ Register space which is generally reserved for user space code - i.e. other than ~mov~ no other core VM routine manipulates the - registers - + ~R~ in shorthand, with indexing from 0 to $\infty$. -* TODO Standard library -Standard library subroutines reserve the first 16 words (128 bytes) of -register space (W(R)[0] to W(R)[15]). The first 8 words (W(R)[0] to -W(R)[7]) are generally considered "arguments" to the subroutine while -the remaining 8 words (W(R)[8] to W(R)[15]) are considered additional -space that the subroutine may access and mutate for internal purposes. - -The stack may have additional bytes pushed, which act as the "return -value" of the subroutine, but no bytes will be popped off (*Stack -Preservation*). - -If a subroutine requires more than 8 words for its arguments, then it -will use the stack. This is the only case where the stack is mutated -due to a subroutine call, as those arguments will always be popped off -the stack. - -Subroutines must always end in ~RET~. Therefore, they must always be -called via ~CALL~, never by ~JUMP~ (which will always cause error -prone behaviour). -* Footnotes -[fn:2] ~NOOP~, ~HALT~, ~MDELETE~, ~MSIZE~, ~JUMP_*~ - -[fn:1] /UNIT/ refers to the fact that the internal representation of -these instructions are singular: two instances of the same /UNIT/ -instruction will be identical in terms of their binary. On the other -hand, two instances of the same /MULTI/ instruction may not be -equivalent due to the operand they take. Crucially, most if not all -/MULTI/ instructions have different versions for each /data type/. diff --git a/todo.org b/todo.org index 4dc64e2..747eb1a 100644 --- a/todo.org +++ b/todo.org @@ -5,17 +5,9 @@ * TODO Better documentation [0%] :DOC: ** TODO Comment coverage [0%] -*** WIP Lib [50%] -**** DONE lib/base.h -**** DONE lib/darr.h -**** TODO lib/heap.h -**** TODO lib/inst.h *** TODO ASM [0%] **** TODO asm/lexer.h **** TODO asm/parser.h -*** TODO VM [0%] -**** TODO vm/runtime.h -** TODO Specification * TODO Preprocessing directives :ASM: Like in FASM or NASM where we can give certain helpful instructions to the assembler. I'd use the ~%~ symbol to designate preprocessor @@ -200,85 +192,6 @@ process_const(V: Vector[Unit]) -> v = v_x[0] for v_x in V] #+end_src -* TODO Introduce error handling in base library :LIB: -There is a large variety of TODOs about errors. Let's fix them! -8 TODOs currently present. -* TODO Standard library :ASM:VM: -I should start considering this and how a user may use it. Should it -be an option in the VM and/or assembler binaries (i.e. a flag) or -something the user has to specify in their source files? - -Something to consider is /static/ and /dynamic/ "linking" i.e.: -+ Static linking: assembler inserts all used library definitions into - the bytecode output directly - + We could insert all of it at the start of the bytecode file, and - with [[*Start points][Start points]] this won't interfere with - user code - + 2023-11-03: Finishing the Start point feature has made these - features more tenable. A program header which is compiled and - interpreted in bytecode works wonders. - + Furthermore library code will have fixed program addresses (always - at the start) so we'll know at start of assembler runtime where to - resolve standard library subroutine calls - + Virtual machine needs no changes to do this -** TODO Consider dynamic Linking -+ Dynamic linking: virtual machine has fixed program storage for - library code (a ROM), and assembler makes jump references - specifically for this program storage - + When assembling subroutine calls, just need to put references to - this library storage (some kind of shared state between VM and - assembler to know what these references are) - + VM needs to manage a ROM of some kind for library code - + How do we ensure assembled links to subroutine calls don't - conflict with user code jumps? - -What follows is a possible dynamic linking strategy. It requires -quite a few moving parts: - -The address operand of every program control instruction (~CALL~, -~JUMP~, ~JUMP.IF~) has a specific encoding if the standard library is -dynamically linked: -+ If the most significant bit is 0, the remaining 63 bits encode an - absolute address within the program -+ Otherwise, the address encodes a standard library subroutine. The - bits within the address follow this schema: - + The next 30 bits represent the specific module where the - subroutine is defined (over 1.07 *billion* possible library values) - + The remaining 33 bits (4 bytes + 1 bit) encode the absolute - program address in the bytecode of that specific module for the - start of the subroutine (over 8.60 *billion* values) - -The assembler will automatically encode this based on "%USE" calls and -the name of the subroutines called. On the virtual machine, there is -a storage location (similar to the ROM of real machines) which stores -the bytecode for modules of the standard library, indexed by the -module number. This means, on deserialising the address into the -proper components, the VM can refer to the module bytecode then jump -to the correct address. - -2023-11-09: I'll need a way to run library code in the current program -system in the runtime. It currently doesn't support jumps or work in -programs outside of the main one unfortunately. Any proper work done -in this area requires some proper refactoring. - -2023-11-09: Constants or inline macros need to be reconfigured for -this to work: at parse time, we work out the inlines directly which -means compiling bytecode with "standard library" macros will not work -as they won't be in the token stream. Either we don't allow -preprocessor work in the standard library at all (which is bad cos we -can't then set standard limits or other useful things) or we insert -them into the registries at parse time for use in program parsing -(which not only requires assembler refactoring to figure out what -libraries are used (to pull definitions from) but also requires making -macros "recognisable" in bytecode because they're essentially -invisible). - -2024-04-15: Perhaps we could insert the linking information into the -program header? -1) A table which states the load order of certain modules would allow - the runtime to selectively spin up and properly delegate module - jumps to the right bytecode -2) * Completed ** DONE Write a label/jump system :ASM: Essentially a user should be able to write arbitrary labels (maybe diff --git a/vm/main.c b/vm/main.c deleted file mode 100644 index f014325..0000000 --- a/vm/main.c +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Entrypoint to program - */ - -#include -#include -#include - -#include "./runtime.h" -#include - -void usage(const char *program_name, FILE *out) -{ - fprintf(out, - "Usage: %s [OPTIONS] FILE\n" - "\t FILE: Bytecode file to execute\n" - "\tOptions:\n" - "\t\t To be developed...\n", - program_name); -} - -int main(int argc, char *argv[]) -{ - if (argc == 1) - { - usage(argv[0], stderr); - return 1; - } - const char *filename = argv[1]; - -#if VERBOSE >= 1 - printf("[" TERM_YELLOW "INTERPRETER" TERM_RESET "]: `%s`\n", filename); -#endif - - FILE *fp = fopen(filename, "rb"); - prog_t *program = prog_read_file(fp); - fclose(fp); - -#if VERBOSE >= 1 - printf("\t[" TERM_GREEN "SETUP" TERM_RESET "]: Read %lu instructions\n", - program->count); -#endif - - size_t stack_size = 256; - byte *stack = calloc(stack_size, 1); - registers_t registers = {0}; - darr_init(®isters, 8 * WORD_SIZE); - heap_t heap = {0}; - heap_create(&heap); - size_t call_stack_size = 256; - word *call_stack = calloc(call_stack_size, sizeof(call_stack)); - - vm_t vm = {0}; - vm_load_stack(&vm, stack, stack_size); - vm_load_program(&vm, program); - vm_load_registers(&vm, registers); - vm_load_heap(&vm, heap); - vm_load_call_stack(&vm, call_stack, call_stack_size); - -#if VERBOSE >= 1 - printf("\t[" TERM_GREEN "SETUP" TERM_RESET - "]: Loaded stack and program into VM\n"); -#endif -#if VERBOSE >= 1 - printf("[" TERM_YELLOW "INTERPRETER" TERM_RESET "]: Beginning execution\n"); -#endif - err_t err = vm_execute_all(&vm); - - int ret = 0; - if (err) - { - const char *error_str = err_as_cstr(err); - fprintf(stderr, "[ERROR]: %s\n", error_str); - vm_print_all(&vm, stderr); - ret = 255 - err; - } - - vm_stop(&vm); - -#if VERBOSE >= 1 - printf("[%sINTERPRETER%s]: Finished execution\n", TERM_GREEN, TERM_RESET); -#endif - return ret; -} diff --git a/vm/runtime.c b/vm/runtime.c deleted file mode 100644 index a005f9e..0000000 --- a/vm/runtime.c +++ /dev/null @@ -1,1070 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Virtual machine implementation - */ - -#include -#include -#include -#include -#include -#include - -#include "./runtime.h" - -const char *err_as_cstr(err_t err) -{ - switch (err) - { - case ERR_OK: - return "OK"; - case ERR_STACK_UNDERFLOW: - return "STACK_UNDERFLOW"; - case ERR_STACK_OVERFLOW: - return "STACK_OVERFLOW"; - case ERR_CALL_STACK_UNDERFLOW: - return "CALL_STACK_UNDERFLOW"; - case ERR_CALL_STACK_OVERFLOW: - return "CALL_STACK_OVERFLOW"; - case ERR_INVALID_OPCODE: - return "INVALID_OPCODE"; - case ERR_INVALID_REGISTER_BYTE: - return "INVALID_REGISTER_BYTE"; - case ERR_INVALID_REGISTER_HWORD: - return "INVALID_REGISTER_HWORD"; - case ERR_INVALID_REGISTER_WORD: - return "INVALID_REGISTER_WORD"; - case ERR_INVALID_PROGRAM_ADDRESS: - return "INVALID_PROGRAM_ADDRESS"; - case ERR_INVALID_PAGE_ADDRESS: - return "INVALID_PAGE_ADDRESS"; - case ERR_OUT_OF_BOUNDS: - return "OUT_OF_BOUNDS"; - case ERR_END_OF_PROGRAM: - return "END_OF_PROGRAM"; - default: - return ""; - } -} - -err_t vm_execute(vm_t *vm) -{ - static_assert(NUMBER_OF_OPCODES == 98, "vm_execute: Out of date"); - struct Program *prog = &vm->program; - if (prog->ptr >= prog->data->count) - return ERR_END_OF_PROGRAM; - inst_t instruction = prog->data->instructions[prog->ptr]; - - if (OPCODE_IS_TYPE(instruction.opcode, OP_PUSH)) - { - prog->ptr++; - return PUSH_ROUTINES[instruction.opcode](vm, instruction.operand); - } - else if (OPCODE_IS_TYPE(instruction.opcode, OP_MOV) || - OPCODE_IS_TYPE(instruction.opcode, OP_PUSH_REGISTER) || - OPCODE_IS_TYPE(instruction.opcode, OP_DUP) || - OPCODE_IS_TYPE(instruction.opcode, OP_MALLOC) || - OPCODE_IS_TYPE(instruction.opcode, OP_MSET) || - OPCODE_IS_TYPE(instruction.opcode, OP_MGET)) - { - err_t err = - WORD_ROUTINES[instruction.opcode](vm, instruction.operand.as_word); - if (err) - return err; - prog->ptr++; - } - else if (OPCODE_IS_TYPE(instruction.opcode, OP_POP)) - { - // NOTE: We use the first register to hold the result of this pop - data_type_t type = OPCODE_DATA_TYPE(instruction.opcode, OP_POP); - err_t err = ERR_OK; - switch (type) - { - case DATA_TYPE_NIL: - break; - case DATA_TYPE_BYTE: - err = vm_mov_byte(vm, 0); - break; - case DATA_TYPE_HWORD: - err = vm_mov_hword(vm, 0); - break; - case DATA_TYPE_WORD: - err = vm_mov_word(vm, 0); - break; - } - if (err) - return err; - prog->ptr++; - } - else if (OPCODE_IS_TYPE(instruction.opcode, OP_NOT) || - OPCODE_IS_TYPE(instruction.opcode, OP_OR) || - OPCODE_IS_TYPE(instruction.opcode, OP_AND) || - OPCODE_IS_TYPE(instruction.opcode, OP_XOR) || - OPCODE_IS_TYPE(instruction.opcode, OP_EQ) || - OPCODE_IS_TYPE(instruction.opcode, OP_LT) || - OPCODE_IS_TYPE(instruction.opcode, OP_LTE) || - OPCODE_IS_TYPE(instruction.opcode, OP_GT) || - OPCODE_IS_TYPE(instruction.opcode, OP_GTE) || - OPCODE_IS_TYPE(instruction.opcode, OP_PLUS) || - OPCODE_IS_TYPE(instruction.opcode, OP_SUB) || - OPCODE_IS_TYPE(instruction.opcode, OP_MULT) || - OPCODE_IS_TYPE(instruction.opcode, OP_MALLOC_STACK) || - OPCODE_IS_TYPE(instruction.opcode, OP_MSET_STACK) || - OPCODE_IS_TYPE(instruction.opcode, OP_MGET_STACK) || - instruction.opcode == OP_MDELETE || instruction.opcode == OP_MSIZE) - { - err_t err = STACK_ROUTINES[instruction.opcode](vm); - prog->ptr++; - if (err) - return err; - } - else if (instruction.opcode == OP_JUMP_ABS) - return vm_jump(vm, instruction.operand.as_word); - else if (instruction.opcode == OP_JUMP_STACK) - { - data_t ret = {0}; - // Set prog->ptr to the word on top of the stack - err_t err = vm_pop_word(vm, &ret); - if (err) - return err; - return vm_jump(vm, ret.as_word); - } - else if (OPCODE_IS_TYPE(instruction.opcode, OP_JUMP_IF)) - { - data_t datum = {0}; - err_t err = ERR_OK; - if (instruction.opcode == OP_JUMP_IF_BYTE) - err = vm_pop_byte(vm, &datum); - else if (instruction.opcode == OP_JUMP_IF_HWORD) - err = vm_pop_hword(vm, &datum); - else if (instruction.opcode == OP_JUMP_IF_WORD) - err = vm_pop_word(vm, &datum); - - if (err) - return err; - - // If datum != 0 then jump, else go to the next instruction - if (datum.as_word != 0) - return vm_jump(vm, instruction.operand.as_word); - else - ++prog->ptr; - } - else if (instruction.opcode == OP_CALL) - { - if (vm->call_stack.ptr >= vm->call_stack.max) - return ERR_CALL_STACK_OVERFLOW; - vm->call_stack.address_pointers[vm->call_stack.ptr++] = vm->program.ptr + 1; - return vm_jump(vm, instruction.operand.as_word); - } - else if (instruction.opcode == OP_CALL_STACK) - { - if (vm->call_stack.ptr >= vm->call_stack.max) - return ERR_CALL_STACK_OVERFLOW; - vm->call_stack.address_pointers[vm->call_stack.ptr++] = vm->program.ptr + 1; - data_t ret = {0}; - err_t err = vm_pop_word(vm, &ret); - if (err) - return err; - return vm_jump(vm, ret.as_word); - } - else if (instruction.opcode == OP_RET) - { - if (vm->call_stack.ptr == 0) - return ERR_CALL_STACK_UNDERFLOW; - return vm_jump(vm, vm->call_stack.address_pointers[--vm->call_stack.ptr]); - } - else if (OPCODE_IS_TYPE(instruction.opcode, OP_PRINT)) - { - data_t datum = {0}; - enum - { - TYPE_BYTE, - TYPE_CHAR, - TYPE_INT, - TYPE_HWORD, - TYPE_LONG, - TYPE_WORD - } print_type; - err_t err = ERR_OK; - if (instruction.opcode == OP_PRINT_BYTE || - instruction.opcode == OP_PRINT_CHAR) - { - print_type = instruction.opcode == OP_PRINT_BYTE ? TYPE_BYTE : TYPE_CHAR; - err = vm_pop_byte(vm, &datum); - } - else if (instruction.opcode == OP_PRINT_HWORD || - instruction.opcode == OP_PRINT_INT) - { - print_type = instruction.opcode == OP_PRINT_HWORD ? TYPE_HWORD : TYPE_INT; - err = vm_pop_hword(vm, &datum); - } - else if (instruction.opcode == OP_PRINT_WORD || - instruction.opcode == OP_PRINT_LONG) - { - print_type = instruction.opcode == OP_PRINT_WORD ? TYPE_WORD : TYPE_LONG; - err = vm_pop_word(vm, &datum); - } - - if (err) - return err; - - switch (print_type) - { - case TYPE_CHAR: { - printf("%c", datum.as_char); - break; - } - case TYPE_BYTE: - printf("0x%x", datum.as_byte); - break; - case TYPE_INT: { - printf( -#if PRINT_HEX == 1 - "0x%X", -#else - "%" PRId32, -#endif - datum.as_int); - break; - } - case TYPE_HWORD: - printf( -#if PRINT_HEX == 1 - "0x%X", -#else - "%" PRIu32, -#endif - datum.as_hword); - break; - case TYPE_LONG: { - printf( -#if PRINT_HEX == 1 - "0x%dX", -#else - "%" PRId64, -#endif - datum.as_long); - break; - } - case TYPE_WORD: - printf( -#if PRINT_HEX == 1 - "0x%lX", -#else - "%" PRIu64, -#endif - datum.as_word); - break; - } - - prog->ptr++; - } - else if (instruction.opcode == OP_HALT) - { - // Do nothing here. Should be caught by callers of vm_execute - } - else - return ERR_INVALID_OPCODE; - return ERR_OK; -} - -err_t vm_execute_all(vm_t *vm) -{ - struct Program *program = &vm->program; - err_t err = ERR_OK; - // Setup the initial address according to the program - program->ptr = program->data->header.start_address; -#if VERBOSE >= 1 - size_t cycles = 0; -#endif -#if VERBOSE >= 2 - registers_t prev_registers = vm->registers; - size_t prev_sptr = 0; - size_t prev_pages = 0; - size_t prev_cptr = 0; -#endif - while (program->ptr < program->data->count && - program->data->instructions[program->ptr].opcode != OP_HALT) - { -#if VERBOSE >= 2 - fprintf(stdout, "[vm_execute_all]: Trace(Cycle %lu)\n", cycles); - fputs( - "----------------------------------------------------------------------" - "----------\n", - stdout); - vm_print_program(vm, stdout); - fputs( - "----------------------------------------------------------------------" - "----------\n", - stdout); - if (prev_cptr != vm->call_stack.ptr) - { - vm_print_call_stack(vm, stdout); - prev_cptr = vm->call_stack.ptr; - fputs("------------------------------------------------------------------" - "----" - "----------\n", - stdout); - } - if (prev_pages != vm->heap.pages) - { - vm_print_heap(vm, stdout); - prev_pages = vm->heap.pages; - fputs("------------------------------------------------------------------" - "----" - "----------\n", - stdout); - } - if (memcmp(&prev_registers, &vm->registers, sizeof(darr_t)) != 0) - { - vm_print_registers(vm, stdout); - prev_registers = vm->registers; - fputs("------------------------------------------------------------------" - "----" - "----------\n", - stdout); - } - if (prev_sptr != vm->stack.ptr) - { - vm_print_stack(vm, stdout); - prev_sptr = vm->stack.ptr; - fputs("------------------------------------------------------------------" - "----" - "----------\n", - stdout); - } -#endif -#if VERBOSE >= 1 - ++cycles; -#endif - err = vm_execute(vm); - if (err) - return err; - } - -#if VERBOSE >= 1 - fprintf(stdout, "[%svm_execute_all%s]: Final VM state(Cycle %lu)\n", - TERM_YELLOW, TERM_RESET, cycles); - vm_print_all(vm, stdout); -#endif - return err; -} - -void vm_load_stack(vm_t *vm, byte *bytes, size_t size) -{ - vm->stack.data = bytes; - vm->stack.max = size; - vm->stack.ptr = 0; -} - -void vm_load_program(vm_t *vm, prog_t *program) -{ - vm->program.ptr = 0; - vm->program.data = program; -} - -void vm_load_registers(vm_t *vm, registers_t registers) -{ - vm->registers = registers; -} - -void vm_load_heap(vm_t *vm, heap_t heap) -{ - vm->heap = heap; -} - -void vm_load_call_stack(vm_t *vm, word *buffer, size_t size) -{ - vm->call_stack = - (struct CallStack){.address_pointers = buffer, .ptr = 0, .max = size}; -} - -void vm_stop(vm_t *vm) -{ -#if VERBOSE >= 1 - bool leaks = false; - printf("[" TERM_YELLOW "DATA" TERM_RESET "]: Checking for leaks...\n"); - if (vm->call_stack.ptr > 0) - { - leaks = true; - printf("\t[" TERM_RED "DATA" TERM_RESET "]: Call stack at %lu\n\t[" TERM_RED - "DATA" TERM_RESET "]\n\t[" TERM_RED "DATA" TERM_RESET "]: Call " - "stack trace:", - vm->call_stack.ptr); - for (size_t i = vm->call_stack.ptr; i > 0; --i) - { - word w = vm->call_stack.address_pointers[i - 1]; - printf("\t\t%lu: %lX", vm->call_stack.ptr - i, w); - if (i != 1) - printf(", "); - printf("\n"); - } - } - if (vm->heap.pages > 0) - { - leaks = true; - page_t *cur = vm->heap.beg; - size_t capacities[vm->heap.pages], total_capacity = 0; - for (size_t i = 0; i < vm->heap.pages; ++i) - { - capacities[i] = cur->available; - total_capacity += capacities[i]; - } - printf("\t[" TERM_RED "DATA" TERM_RESET - "]: Heap: %luB (over %lu %s) not reclaimed\n", - total_capacity, vm->heap.pages, - vm->heap.pages == 1 ? "page" : "pages"); - for (size_t i = 0; i < vm->heap.pages; i++) - printf("\t\t[%lu]: %luB lost\n", i, capacities[i]); - } - if (vm->stack.ptr > 0) - { - leaks = true; - printf("\t[" TERM_RED "DATA" TERM_RESET "]: Stack: %luB not reclaimed\n", - vm->stack.ptr); - } - if (leaks) - printf("[" TERM_RED "DATA" TERM_RESET "]: Leaks found\n"); - else - printf("[" TERM_GREEN "DATA" TERM_RESET "]: No leaks found\n"); -#endif - - free(vm->registers.data); - free(vm->program.data); - free(vm->stack.data); - heap_stop(&vm->heap); - free(vm->call_stack.address_pointers); - - vm->registers = (registers_t){0}; - vm->program = (struct Program){0}; - vm->stack = (struct Stack){0}; - vm->heap = (heap_t){0}; -} - -void vm_print_registers(vm_t *vm, FILE *fp) -{ - registers_t reg = vm->registers; - fprintf( - fp, - "Registers.used = %luB/%luH/%luW\nRegisters.available = %luB/%luH/%luW\n", - vm->registers.used, vm->registers.used / HWORD_SIZE, - vm->registers.used / WORD_SIZE, vm->registers.available, - vm->registers.available / HWORD_SIZE, - vm->registers.available / WORD_SIZE); - fprintf(fp, "Registers.reg = ["); - for (size_t i = 0; i < ceil((long double)reg.used / WORD_SIZE); ++i) - { - fprintf(fp, "{%lu:%lX}", i, VM_NTH_REGISTER(reg, i)); - if (i != reg.used - 1) - fprintf(fp, ", "); - } - fprintf(fp, "]\n"); -} - -void vm_print_stack(vm_t *vm, FILE *fp) -{ - struct Stack stack = vm->stack; - fprintf(fp, "Stack.max = %lu\nStack.ptr = %lu\nStack.data = [", stack.max, - stack.ptr); - if (stack.ptr == 0) - { - fprintf(fp, "]\n"); - return; - } - printf("\n"); - for (size_t i = stack.ptr; i > 0; --i) - { - byte b = stack.data[i - 1]; - fprintf(fp, "\t%lu: %X", stack.ptr - i, b); - if (i != 1) - fprintf(fp, ", "); - fprintf(fp, "\n"); - } - fprintf(fp, "]\n"); -} - -void vm_print_program(vm_t *vm, FILE *fp) -{ - struct Program program = vm->program; - fprintf(fp, - "Program.max = %lu\nProgram.ptr = " - "%lu\nProgram.instructions = [\n", - program.data->count, program.ptr); - size_t beg = 0; - if (program.ptr >= VM_PRINT_PROGRAM_EXCERPT) - { - fprintf(fp, "\t...\n"); - beg = program.ptr - VM_PRINT_PROGRAM_EXCERPT; - } - else - beg = 0; - size_t end = MIN(program.ptr + VM_PRINT_PROGRAM_EXCERPT, program.data->count); - for (size_t i = beg; i < end; ++i) - { - fprintf(fp, "\t%lu: ", i); - inst_print(program.data->instructions[i], fp); - if (i == program.ptr) - fprintf(fp, " <---"); - fprintf(fp, "\n"); - } - if (end != program.data->count) - fprintf(fp, "\t...\n"); - fprintf(fp, "]\n"); -} - -void vm_print_heap(vm_t *vm, FILE *fp) -{ - heap_t heap = vm->heap; - fprintf(fp, "Heap.pages = %lu\nHeap.data = [", heap.pages); - if (heap.pages == 0) - { - fprintf(fp, "]\n"); - return; - } - page_t *cur = heap.beg; - fprintf(fp, "\n"); - for (size_t i = 0; i < heap.pages; ++i) - { - fprintf(fp, "\t[%lu]@%p: ", i, cur); - if (!cur) - fprintf(fp, "\n"); - else - { - fprintf(fp, "{"); - for (size_t j = 0; j < cur->available; ++j) - { - if ((j % 8) == 0) - fprintf(fp, "\n\t\t"); - fprintf(fp, "%x", cur->data[j]); - if (j != cur->available - 1) - fprintf(fp, ",\t"); - } - fprintf(fp, "\n\t}\n"); - cur = cur->next; - } - } - fprintf(fp, "]\n"); -} - -void vm_print_call_stack(vm_t *vm, FILE *fp) -{ - struct CallStack cs = vm->call_stack; - fprintf(fp, "CallStack.max = %lu\nCallStack.ptr = %lu\nCallStack.data = [", - cs.max, cs.ptr); - if (cs.ptr == 0) - { - fprintf(fp, "]\n"); - return; - } - printf("\n"); - for (size_t i = cs.ptr; i > 0; --i) - { - word w = cs.address_pointers[i - 1]; - fprintf(fp, "\t%lu: %lX", cs.ptr - i, w); - if (i != 1) - fprintf(fp, ", "); - fprintf(fp, "\n"); - } - fprintf(fp, "]\n"); -} - -void vm_print_all(vm_t *vm, FILE *fp) -{ - fputs("----------------------------------------------------------------------" - "----------\n", - fp); - vm_print_program(vm, fp); - fputs("----------------------------------------------------------------------" - "----------\n", - fp); - vm_print_call_stack(vm, fp); - fputs("----------------------------------------------------------------------" - "----------\n", - fp); - vm_print_heap(vm, fp); - fputs("----------------------------------------------------------------------" - "----------\n", - fp); - vm_print_registers(vm, fp); - fputs("----------------------------------------------------------------------" - "----------\n", - fp); - vm_print_stack(vm, fp); - fputs("----------------------------------------------------------------------" - "----------\n", - fp); -} - -err_t vm_jump(vm_t *vm, word w) -{ - if (w >= vm->program.data->count) - return ERR_INVALID_PROGRAM_ADDRESS; - vm->program.ptr = w; - return ERR_OK; -} - -err_t vm_push_byte(vm_t *vm, data_t b) -{ - if (vm->stack.ptr >= vm->stack.max) - return ERR_STACK_OVERFLOW; - vm->stack.data[vm->stack.ptr++] = b.as_byte; - return ERR_OK; -} - -err_t vm_push_hword(vm_t *vm, data_t f) -{ - if (vm->stack.ptr + HWORD_SIZE >= vm->stack.max) - return ERR_STACK_OVERFLOW; - byte bytes[HWORD_SIZE] = {0}; - convert_hword_to_bytes(f.as_hword, bytes); - for (size_t i = 0; i < HWORD_SIZE; ++i) - { - byte b = bytes[HWORD_SIZE - i - 1]; - err_t err = vm_push_byte(vm, DBYTE(b)); - if (err) - return err; - } - return ERR_OK; -} - -err_t vm_push_word(vm_t *vm, data_t w) -{ - if (vm->stack.ptr + WORD_SIZE >= vm->stack.max) - return ERR_STACK_OVERFLOW; - byte bytes[WORD_SIZE] = {0}; - convert_word_to_bytes(w.as_word, bytes); - for (size_t i = 0; i < WORD_SIZE; ++i) - { - byte b = bytes[WORD_SIZE - i - 1]; - err_t err = vm_push_byte(vm, DBYTE(b)); - if (err) - return err; - } - return ERR_OK; -} - -err_t vm_push_byte_register(vm_t *vm, word reg) -{ - if (reg > vm->registers.used) - return ERR_INVALID_REGISTER_BYTE; - - // Interpret each word based register as 8 byte registers - byte b = vm->registers.data[reg]; - - return vm_push_byte(vm, DBYTE(b)); -} - -err_t vm_push_hword_register(vm_t *vm, word reg) -{ - if (reg > (vm->registers.used / HWORD_SIZE)) - return ERR_INVALID_REGISTER_HWORD; - // Interpret the bytes at point reg * HWORD_SIZE as an hword - hword hw = *(hword *)(vm->registers.data + (reg * HWORD_SIZE)); - return vm_push_hword(vm, DHWORD(hw)); -} - -err_t vm_push_word_register(vm_t *vm, word reg) -{ - if (reg > (vm->registers.used / WORD_SIZE)) - return ERR_INVALID_REGISTER_WORD; - return vm_push_word(vm, DWORD(VM_NTH_REGISTER(vm->registers, reg))); -} - -err_t vm_mov_byte(vm_t *vm, word reg) -{ - if (reg >= vm->registers.used) - { - // Expand capacity - darr_ensure_capacity(&vm->registers, reg - vm->registers.used); - vm->registers.used = MAX(vm->registers.used, reg + 1); - } - data_t ret = {0}; - err_t err = vm_pop_byte(vm, &ret); - if (err) - return err; - vm->registers.data[reg] = ret.as_byte; - return ERR_OK; -} - -err_t vm_mov_hword(vm_t *vm, word reg) -{ - if (reg >= (vm->registers.used / HWORD_SIZE)) - { - // Expand capacity till we can ensure that this is a valid - // register to use - - // Number of hwords needed ontop of what is allocated: - const size_t hwords = (reg - (vm->registers.used / HWORD_SIZE)); - // Number of bytes needed ontop of what is allocated - const size_t diff = (hwords + 1) * HWORD_SIZE; - - darr_ensure_capacity(&vm->registers, diff); - vm->registers.used = MAX(vm->registers.used, (reg + 1) * HWORD_SIZE); - } - data_t ret = {0}; - err_t err = vm_pop_hword(vm, &ret); - if (err) - return err; - // Here we treat vm->registers as a set of hwords - hword *hword_ptr = (hword *)(vm->registers.data + (reg * HWORD_SIZE)); - *hword_ptr = ret.as_hword; - return ERR_OK; -} - -err_t vm_mov_word(vm_t *vm, word reg) -{ - if (reg >= (vm->registers.used / WORD_SIZE)) - { - // Number of hwords needed ontop of what is allocated: - const size_t words = (reg - (vm->registers.used / WORD_SIZE)); - // Number of bytes needed ontop of what is allocated - const size_t diff = (words + 1) * WORD_SIZE; - - darr_ensure_capacity(&vm->registers, diff); - vm->registers.used = MAX(vm->registers.used, (reg + 1) * WORD_SIZE); - } - else if (vm->stack.ptr < sizeof(word)) - return ERR_STACK_UNDERFLOW; - data_t ret = {0}; - err_t err = vm_pop_word(vm, &ret); - if (err) - return err; - ((word *)(vm->registers.data))[reg] = ret.as_word; - return ERR_OK; -} - -err_t vm_dup_byte(vm_t *vm, word w) -{ - if (vm->stack.ptr < w + 1) - return ERR_STACK_UNDERFLOW; - return vm_push_byte(vm, DBYTE(vm->stack.data[vm->stack.ptr - 1 - w])); -} - -err_t vm_dup_hword(vm_t *vm, word w) -{ - if (vm->stack.ptr < HWORD_SIZE * (w + 1)) - return ERR_STACK_UNDERFLOW; - byte bytes[HWORD_SIZE] = {0}; - for (size_t i = 0; i < HWORD_SIZE; ++i) - bytes[HWORD_SIZE - i - 1] = - vm->stack.data[vm->stack.ptr - (HWORD_SIZE * (w + 1)) + i]; - return vm_push_hword(vm, DHWORD(convert_bytes_to_hword(bytes))); -} - -err_t vm_dup_word(vm_t *vm, word w) -{ - if (vm->stack.ptr < WORD_SIZE * (w + 1)) - return ERR_STACK_UNDERFLOW; - byte bytes[WORD_SIZE] = {0}; - for (size_t i = 0; i < WORD_SIZE; ++i) - bytes[WORD_SIZE - i - 1] = - vm->stack.data[vm->stack.ptr - (WORD_SIZE * (w + 1)) + i]; - return vm_push_word(vm, DWORD(convert_bytes_to_word(bytes))); -} - -err_t vm_malloc_byte(vm_t *vm, word n) -{ - page_t *page = heap_allocate(&vm->heap, n); - return vm_push_word(vm, DWORD((word)page)); -} - -err_t vm_malloc_hword(vm_t *vm, word n) -{ - page_t *page = heap_allocate(&vm->heap, n * HWORD_SIZE); - return vm_push_word(vm, DWORD((word)page)); -} - -err_t vm_malloc_word(vm_t *vm, word n) -{ - page_t *page = heap_allocate(&vm->heap, n * WORD_SIZE); - return vm_push_word(vm, DWORD((word)page)); -} - -err_t vm_mset_byte(vm_t *vm, word nth) -{ - // Stack layout should be [BYTE, PTR] - data_t byte = {0}; - err_t err = vm_pop_byte(vm, &byte); - if (err) - return err; - data_t ptr = {0}; - err = vm_pop_word(vm, &ptr); - if (err) - return err; - - page_t *page = (page_t *)ptr.as_word; - if (nth >= page->available) - return ERR_OUT_OF_BOUNDS; - page->data[nth] = byte.as_byte; - - return ERR_OK; -} - -err_t vm_mset_hword(vm_t *vm, word nth) -{ - // Stack layout should be [HWORD, PTR] - data_t byte = {0}; - err_t err = vm_pop_hword(vm, &byte); - if (err) - return err; - data_t ptr = {0}; - err = vm_pop_word(vm, &ptr); - if (err) - return err; - - page_t *page = (page_t *)ptr.as_word; - if (nth >= (page->available / HWORD_SIZE)) - return ERR_OUT_OF_BOUNDS; - ((hword *)page->data)[nth] = byte.as_hword; - - return ERR_OK; -} - -err_t vm_mset_word(vm_t *vm, word nth) -{ - // Stack layout should be [WORD, PTR] - data_t byte = {0}; - err_t err = vm_pop_word(vm, &byte); - if (err) - return err; - data_t ptr = {0}; - err = vm_pop_word(vm, &ptr); - if (err) - return err; - - page_t *page = (page_t *)ptr.as_word; - if (nth >= (page->available / WORD_SIZE)) - return ERR_OUT_OF_BOUNDS; - ((word *)page->data)[nth] = byte.as_word; - - return ERR_OK; -} - -err_t vm_mget_byte(vm_t *vm, word n) -{ - // Stack layout should be [PTR] - data_t ptr = {0}; - err_t err = vm_pop_word(vm, &ptr); - if (err) - return err; - page_t *page = (page_t *)ptr.as_word; - if (n >= page->available) - return ERR_OUT_OF_BOUNDS; - return vm_push_byte(vm, DBYTE(page->data[n])); -} - -err_t vm_mget_hword(vm_t *vm, word n) -{ - // Stack layout should be [PTR] - data_t ptr = {0}; - err_t err = vm_pop_word(vm, &ptr); - if (err) - return err; - page_t *page = (page_t *)ptr.as_word; - if (n >= (page->available / HWORD_SIZE)) - return ERR_OUT_OF_BOUNDS; - return vm_push_hword(vm, DHWORD(((hword *)page->data)[n])); -} - -err_t vm_mget_word(vm_t *vm, word n) -{ - // Stack layout should be [PTR] - data_t ptr = {0}; - err_t err = vm_pop_word(vm, &ptr); - if (err) - return err; - printf("%lx\n", ptr.as_word); - page_t *page = (page_t *)ptr.as_word; - if (n >= (page->available / WORD_SIZE)) - return ERR_OUT_OF_BOUNDS; - return vm_push_word(vm, DWORD(((word *)page->data)[n])); -} - -err_t vm_pop_byte(vm_t *vm, data_t *ret) -{ - if (vm->stack.ptr == 0) - return ERR_STACK_UNDERFLOW; - *ret = DBYTE(vm->stack.data[--vm->stack.ptr]); - return ERR_OK; -} - -err_t vm_pop_hword(vm_t *vm, data_t *ret) -{ - if (vm->stack.ptr < HWORD_SIZE) - return ERR_STACK_UNDERFLOW; - byte bytes[HWORD_SIZE] = {0}; - for (size_t i = 0; i < HWORD_SIZE; ++i) - { - data_t b = {0}; - vm_pop_byte(vm, &b); - bytes[i] = b.as_byte; - } - *ret = DHWORD(convert_bytes_to_hword(bytes)); - return ERR_OK; -} - -err_t vm_pop_word(vm_t *vm, data_t *ret) -{ - if (vm->stack.ptr < WORD_SIZE) - return ERR_STACK_UNDERFLOW; - byte bytes[WORD_SIZE] = {0}; - for (size_t i = 0; i < WORD_SIZE; ++i) - { - data_t b = {0}; - vm_pop_byte(vm, &b); - bytes[i] = b.as_byte; - } - *ret = DWORD(convert_bytes_to_word(bytes)); - return ERR_OK; -} - -// TODO: rename this to something more appropriate -#define VM_MEMORY_STACK_CONSTR(ACTION, TYPE) \ - err_t vm_##ACTION##_stack_##TYPE(vm_t *vm) \ - { \ - data_t n = {0}; \ - err_t err = vm_pop_word(vm, &n); \ - if (err) \ - return err; \ - return vm_##ACTION##_##TYPE(vm, n.as_word); \ - } - -VM_MEMORY_STACK_CONSTR(malloc, byte) -VM_MEMORY_STACK_CONSTR(malloc, hword) -VM_MEMORY_STACK_CONSTR(malloc, word) -VM_MEMORY_STACK_CONSTR(mset, byte) -VM_MEMORY_STACK_CONSTR(mset, hword) -VM_MEMORY_STACK_CONSTR(mset, word) -VM_MEMORY_STACK_CONSTR(mget, byte) -VM_MEMORY_STACK_CONSTR(mget, hword) -VM_MEMORY_STACK_CONSTR(mget, word) - -err_t vm_mdelete(vm_t *vm) -{ - data_t ptr = {0}; - err_t err = vm_pop_word(vm, &ptr); - if (err) - return err; - page_t *page = (page_t *)ptr.as_word; - bool done = heap_free_page(&vm->heap, page); - if (!done) - return ERR_INVALID_PAGE_ADDRESS; - return ERR_OK; -} - -err_t vm_msize(vm_t *vm) -{ - data_t ptr = {0}; - err_t err = vm_pop_word(vm, &ptr); - if (err) - return err; - page_t *page = (page_t *)ptr.as_word; - return vm_push_word(vm, DWORD(page->available)); -} - -// TODO: rename this to something more appropriate -#define VM_NOT_TYPE(TYPEL, TYPEU) \ - err_t vm_not_##TYPEL(vm_t *vm) \ - { \ - data_t a = {0}; \ - err_t err = vm_pop_##TYPEL(vm, &a); \ - if (err) \ - return err; \ - return vm_push_##TYPEL(vm, D##TYPEU(!a.as_##TYPEL)); \ - } - -VM_NOT_TYPE(byte, BYTE) -VM_NOT_TYPE(hword, HWORD) -VM_NOT_TYPE(word, WORD) - -// TODO: rename this to something more appropriate -#define VM_SAME_TYPE(COMPNAME, COMP, TYPEL, TYPEU) \ - err_t vm_##COMPNAME##_##TYPEL(vm_t *vm) \ - { \ - data_t a = {0}, b = {0}; \ - err_t err = vm_pop_##TYPEL(vm, &a); \ - if (err) \ - return err; \ - err = vm_pop_##TYPEL(vm, &b); \ - if (err) \ - return err; \ - return vm_push_##TYPEL(vm, D##TYPEU(a.as_##TYPEL COMP b.as_##TYPEL)); \ - } - -// TODO: rename this to something more appropriate -#define VM_COMPARATOR_TYPE(COMPNAME, COMP, TYPEL, GETL) \ - err_t vm_##COMPNAME##_##GETL(vm_t *vm) \ - { \ - data_t a = {0}, b = {0}; \ - err_t err = vm_pop_##TYPEL(vm, &a); \ - if (err) \ - return err; \ - err = vm_pop_##TYPEL(vm, &b); \ - if (err) \ - return err; \ - return vm_push_byte(vm, DBYTE(b.as_##GETL COMP a.as_##GETL)); \ - } - -VM_SAME_TYPE(or, |, byte, BYTE) -VM_SAME_TYPE(or, |, hword, HWORD) -VM_SAME_TYPE(or, |, word, WORD) -VM_SAME_TYPE(and, &, byte, BYTE) -VM_SAME_TYPE(and, &, hword, HWORD) -VM_SAME_TYPE(and, &, word, WORD) -VM_SAME_TYPE(xor, ^, byte, BYTE) -VM_SAME_TYPE(xor, ^, hword, HWORD) -VM_SAME_TYPE(xor, ^, word, WORD) - -VM_SAME_TYPE(plus, +, byte, BYTE) -VM_SAME_TYPE(plus, +, hword, HWORD) -VM_SAME_TYPE(plus, +, word, WORD) - -VM_SAME_TYPE(sub, -, byte, BYTE) -VM_SAME_TYPE(sub, -, hword, HWORD) -VM_SAME_TYPE(sub, -, word, WORD) - -VM_SAME_TYPE(mult, *, byte, BYTE) -VM_SAME_TYPE(mult, *, hword, HWORD) -VM_SAME_TYPE(mult, *, word, WORD) - -VM_COMPARATOR_TYPE(eq, ==, byte, byte) -VM_COMPARATOR_TYPE(eq, ==, byte, char) -VM_COMPARATOR_TYPE(eq, ==, hword, hword) -VM_COMPARATOR_TYPE(eq, ==, hword, int) -VM_COMPARATOR_TYPE(eq, ==, word, word) -VM_COMPARATOR_TYPE(eq, ==, word, long) - -VM_COMPARATOR_TYPE(lt, <, byte, byte) -VM_COMPARATOR_TYPE(lt, <, byte, char) -VM_COMPARATOR_TYPE(lt, <, hword, hword) -VM_COMPARATOR_TYPE(lt, <, hword, int) -VM_COMPARATOR_TYPE(lt, <, word, word) -VM_COMPARATOR_TYPE(lt, <, word, long) - -VM_COMPARATOR_TYPE(lte, <=, byte, byte) -VM_COMPARATOR_TYPE(lte, <=, byte, char) -VM_COMPARATOR_TYPE(lte, <=, hword, hword) -VM_COMPARATOR_TYPE(lte, <=, hword, int) -VM_COMPARATOR_TYPE(lte, <=, word, word) -VM_COMPARATOR_TYPE(lte, <=, word, long) - -VM_COMPARATOR_TYPE(gt, >, byte, byte) -VM_COMPARATOR_TYPE(gt, >, byte, char) -VM_COMPARATOR_TYPE(gt, >, hword, hword) -VM_COMPARATOR_TYPE(gt, >, hword, int) -VM_COMPARATOR_TYPE(gt, >, word, word) -VM_COMPARATOR_TYPE(gt, >, word, long) - -VM_COMPARATOR_TYPE(gte, >=, byte, byte) -VM_COMPARATOR_TYPE(gte, >=, byte, char) -VM_COMPARATOR_TYPE(gte, >=, hword, hword) -VM_COMPARATOR_TYPE(gte, >=, hword, int) -VM_COMPARATOR_TYPE(gte, >=, word, word) -VM_COMPARATOR_TYPE(gte, >=, word, long) diff --git a/vm/runtime.h b/vm/runtime.h deleted file mode 100644 index 7fd72b8..0000000 --- a/vm/runtime.h +++ /dev/null @@ -1,301 +0,0 @@ -/* Copyright (C) 2023 Aryadev Chavali - - * You may distribute and modify this code under the terms of the - * GPLv2 license. You should have received a copy of the GPLv2 - * license with this file. If not, please write to: - * aryadev@aryadevchavali.com. - - * Created: 2023-10-15 - * Author: Aryadev Chavali - * Description: Virtual machine implementation - */ - -#ifndef RUNTIME_H -#define RUNTIME_H - -#include -#include - -#include -#include - -typedef enum -{ - ERR_OK = 0, - ERR_STACK_UNDERFLOW, - ERR_STACK_OVERFLOW, - ERR_CALL_STACK_UNDERFLOW, - ERR_CALL_STACK_OVERFLOW, - ERR_INVALID_OPCODE, - ERR_INVALID_REGISTER_BYTE, - ERR_INVALID_REGISTER_HWORD, - ERR_INVALID_REGISTER_WORD, - ERR_INVALID_PROGRAM_ADDRESS, - ERR_INVALID_PAGE_ADDRESS, - ERR_OUT_OF_BOUNDS, - ERR_END_OF_PROGRAM, -} err_t; - -const char *err_as_cstr(err_t); - -typedef darr_t registers_t; -#define VM_NTH_REGISTER(REGISTERS, N) (((word *)((REGISTERS).data))[N]) -#define VM_REGISTERS_AVAILABLE(REGISTERS) (((REGISTERS).available) / WORD_SIZE) - -typedef struct -{ - registers_t registers; - struct Stack - { - byte *data; - size_t ptr, max; - } stack; - heap_t heap; - struct Program - { - prog_t *data; - word ptr; - } program; - struct CallStack - { - word *address_pointers; - size_t ptr, max; - } call_stack; -} vm_t; - -err_t vm_execute(vm_t *); -err_t vm_execute_all(vm_t *); - -void vm_load_stack(vm_t *, byte *, size_t); -void vm_load_registers(vm_t *, registers_t); -void vm_load_heap(vm_t *, heap_t); -void vm_load_program(vm_t *, prog_t *); -void vm_load_call_stack(vm_t *, word *, size_t); -void vm_stop(vm_t *); - -// Print routines -#define VM_PRINT_PROGRAM_EXCERPT 5 -void vm_print_registers(vm_t *, FILE *); -void vm_print_stack(vm_t *, FILE *); -void vm_print_program(vm_t *, FILE *); -void vm_print_heap(vm_t *, FILE *); -void vm_print_call_stack(vm_t *, FILE *); -void vm_print_all(vm_t *, FILE *); - -// Execution routines -err_t vm_jump(vm_t *, word); - -err_t vm_pop_byte(vm_t *, data_t *); -err_t vm_pop_hword(vm_t *, data_t *); -err_t vm_pop_word(vm_t *, data_t *); - -err_t vm_push_byte(vm_t *, data_t); -err_t vm_push_hword(vm_t *, data_t); -err_t vm_push_word(vm_t *, data_t); - -typedef err_t (*push_f)(vm_t *, data_t); -static const push_f PUSH_ROUTINES[] = { - [OP_PUSH_BYTE] = vm_push_byte, - [OP_PUSH_HWORD] = vm_push_hword, - [OP_PUSH_WORD] = vm_push_word, -}; - -err_t vm_push_byte_register(vm_t *, word); -err_t vm_push_hword_register(vm_t *, word); -err_t vm_push_word_register(vm_t *, word); - -err_t vm_mov_byte(vm_t *, word); -err_t vm_mov_hword(vm_t *, word); -err_t vm_mov_word(vm_t *, word); - -err_t vm_dup_byte(vm_t *, word); -err_t vm_dup_hword(vm_t *, word); -err_t vm_dup_word(vm_t *, word); - -err_t vm_malloc_byte(vm_t *, word); -err_t vm_malloc_hword(vm_t *, word); -err_t vm_malloc_word(vm_t *, word); - -err_t vm_mset_byte(vm_t *, word); -err_t vm_mset_hword(vm_t *, word); -err_t vm_mset_word(vm_t *, word); - -err_t vm_mget_byte(vm_t *, word); -err_t vm_mget_hword(vm_t *, word); -err_t vm_mget_word(vm_t *, word); - -typedef err_t (*word_f)(vm_t *, word); -static const word_f WORD_ROUTINES[] = { - [OP_PUSH_REGISTER_BYTE] = vm_push_byte_register, - [OP_PUSH_REGISTER_HWORD] = vm_push_hword_register, - [OP_PUSH_REGISTER_WORD] = vm_push_word_register, - [OP_MOV_BYTE] = vm_mov_byte, - [OP_MOV_HWORD] = vm_mov_hword, - [OP_MOV_WORD] = vm_mov_word, - [OP_DUP_BYTE] = vm_dup_byte, - [OP_DUP_HWORD] = vm_dup_hword, - [OP_DUP_WORD] = vm_dup_word, - [OP_MALLOC_BYTE] = vm_malloc_byte, - [OP_MALLOC_HWORD] = vm_malloc_hword, - [OP_MALLOC_WORD] = vm_malloc_word, - [OP_MGET_BYTE] = vm_mget_byte, - [OP_MGET_HWORD] = vm_mget_hword, - [OP_MGET_WORD] = vm_mget_word, - [OP_MSET_BYTE] = vm_mset_byte, - [OP_MSET_HWORD] = vm_mset_hword, - [OP_MSET_WORD] = vm_mset_word, -}; - -err_t vm_malloc_stack_byte(vm_t *); -err_t vm_malloc_stack_hword(vm_t *); -err_t vm_malloc_stack_word(vm_t *); - -err_t vm_mset_stack_byte(vm_t *); -err_t vm_mset_stack_hword(vm_t *); -err_t vm_mset_stack_word(vm_t *); - -err_t vm_mget_stack_byte(vm_t *); -err_t vm_mget_stack_hword(vm_t *); -err_t vm_mget_stack_word(vm_t *); - -err_t vm_mdelete(vm_t *); -err_t vm_msize(vm_t *); - -err_t vm_not_byte(vm_t *); -err_t vm_not_hword(vm_t *); -err_t vm_not_word(vm_t *); - -err_t vm_or_byte(vm_t *); -err_t vm_or_hword(vm_t *); -err_t vm_or_word(vm_t *); - -err_t vm_and_byte(vm_t *); -err_t vm_and_hword(vm_t *); -err_t vm_and_word(vm_t *); - -err_t vm_xor_byte(vm_t *); -err_t vm_xor_hword(vm_t *); -err_t vm_xor_word(vm_t *); - -err_t vm_eq_byte(vm_t *); -err_t vm_eq_char(vm_t *); -err_t vm_eq_int(vm_t *); -err_t vm_eq_hword(vm_t *); -err_t vm_eq_long(vm_t *); -err_t vm_eq_word(vm_t *); - -err_t vm_lt_byte(vm_t *); -err_t vm_lt_char(vm_t *); -err_t vm_lt_int(vm_t *); -err_t vm_lt_hword(vm_t *); -err_t vm_lt_long(vm_t *); -err_t vm_lt_word(vm_t *); - -err_t vm_lte_byte(vm_t *); -err_t vm_lte_char(vm_t *); -err_t vm_lte_int(vm_t *); -err_t vm_lte_hword(vm_t *); -err_t vm_lte_long(vm_t *); -err_t vm_lte_word(vm_t *); - -err_t vm_gt_byte(vm_t *); -err_t vm_gt_char(vm_t *); -err_t vm_gt_int(vm_t *); -err_t vm_gt_hword(vm_t *); -err_t vm_gt_long(vm_t *); -err_t vm_gt_word(vm_t *); - -err_t vm_gte_byte(vm_t *); -err_t vm_gte_char(vm_t *); -err_t vm_gte_int(vm_t *); -err_t vm_gte_hword(vm_t *); -err_t vm_gte_long(vm_t *); -err_t vm_gte_word(vm_t *); - -err_t vm_plus_byte(vm_t *); -err_t vm_plus_hword(vm_t *); -err_t vm_plus_word(vm_t *); - -err_t vm_sub_byte(vm_t *); -err_t vm_sub_hword(vm_t *); -err_t vm_sub_word(vm_t *); - -err_t vm_mult_byte(vm_t *); -err_t vm_mult_hword(vm_t *); -err_t vm_mult_word(vm_t *); - -typedef err_t (*stack_f)(vm_t *); -static const stack_f STACK_ROUTINES[] = { - [OP_MALLOC_STACK_BYTE] = vm_malloc_stack_byte, - [OP_MALLOC_STACK_HWORD] = vm_malloc_stack_hword, - [OP_MALLOC_STACK_WORD] = vm_malloc_stack_word, - [OP_MGET_STACK_BYTE] = vm_mget_stack_byte, - [OP_MGET_STACK_HWORD] = vm_mget_stack_hword, - [OP_MGET_STACK_WORD] = vm_mget_stack_word, - [OP_MSET_STACK_BYTE] = vm_mset_stack_byte, - [OP_MSET_STACK_HWORD] = vm_mset_stack_hword, - [OP_MSET_STACK_WORD] = vm_mset_stack_word, - [OP_MDELETE] = vm_mdelete, - [OP_MSIZE] = vm_msize, - - [OP_NOT_BYTE] = vm_not_byte, - [OP_NOT_HWORD] = vm_not_hword, - [OP_NOT_WORD] = vm_not_word, - - [OP_OR_BYTE] = vm_or_byte, - [OP_OR_HWORD] = vm_or_hword, - [OP_OR_WORD] = vm_or_word, - - [OP_AND_BYTE] = vm_and_byte, - [OP_AND_HWORD] = vm_and_hword, - [OP_AND_WORD] = vm_and_word, - - [OP_XOR_BYTE] = vm_xor_byte, - [OP_XOR_HWORD] = vm_xor_hword, - [OP_XOR_WORD] = vm_xor_word, - - [OP_EQ_BYTE] = vm_eq_byte, - [OP_EQ_HWORD] = vm_eq_hword, - [OP_EQ_WORD] = vm_eq_word, - - [OP_LT_BYTE] = vm_lt_byte, - [OP_LT_CHAR] = vm_lt_char, - [OP_LT_INT] = vm_lt_int, - [OP_LT_HWORD] = vm_lt_hword, - [OP_LT_LONG] = vm_lt_long, - [OP_LT_WORD] = vm_lt_word, - - [OP_LTE_BYTE] = vm_lte_byte, - [OP_LTE_CHAR] = vm_lte_char, - [OP_LTE_INT] = vm_lte_int, - [OP_LTE_HWORD] = vm_lte_hword, - [OP_LTE_LONG] = vm_lte_long, - [OP_LTE_WORD] = vm_lte_word, - - [OP_GT_BYTE] = vm_gt_byte, - [OP_GT_CHAR] = vm_gt_char, - [OP_GT_INT] = vm_gt_int, - [OP_GT_HWORD] = vm_gt_hword, - [OP_GT_LONG] = vm_gt_long, - [OP_GT_WORD] = vm_gt_word, - - [OP_GTE_BYTE] = vm_gte_byte, - [OP_GTE_CHAR] = vm_gte_char, - [OP_GTE_INT] = vm_gte_int, - [OP_GTE_HWORD] = vm_gte_hword, - [OP_GTE_LONG] = vm_gte_long, - [OP_GTE_WORD] = vm_gte_word, - - [OP_PLUS_BYTE] = vm_plus_byte, - [OP_PLUS_HWORD] = vm_plus_hword, - [OP_PLUS_WORD] = vm_plus_word, - [OP_SUB_BYTE] = vm_sub_byte, - [OP_SUB_HWORD] = vm_sub_hword, - [OP_SUB_WORD] = vm_sub_word, - - [OP_MULT_BYTE] = vm_mult_byte, - [OP_MULT_HWORD] = vm_mult_hword, - [OP_MULT_WORD] = vm_mult_word, -}; - -#endif