From 3cd2dbc2ace9791b62287ce7a43e31dae708a398 Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Mon, 24 Jun 2024 14:38:11 +0100 Subject: [PATCH] Registers are now fixed size todo.org contains an explanation for this --- todo.org | 33 +++++++++++++++++++++++++-- vm/main.c | 8 +++---- vm/runtime.c | 63 +++++++++++++++++++++++----------------------------- vm/struct.c | 27 +++++++--------------- vm/struct.h | 15 ++++++++----- 5 files changed, 81 insertions(+), 65 deletions(-) diff --git a/todo.org b/todo.org index 346907a..e94d6eb 100644 --- a/todo.org +++ b/todo.org @@ -15,14 +15,43 @@ **** TODO vm/struct.h **** TODO vm/main.c ** TODO Specification +* TODO Do not request for more memory in registers +The stack is a fixed size object allocated at the start of a program +and inserted onto the VM. The VM cannot request more memory for the +stack if it runs out, but this also ensures a very strict upper bound +on stack memory usage which can be profiled easily. Furthermore, the +code that interacts with the stack can use the strict sizing as an +invariant to simplify implementation (e.g. pushing to the stack when +the stack is full will trap the program). Also the stack cannot be +used to OOM attack the virtual machine. + +Registers are currently dynamic arrays. Say 8 word registers are +allocated at init time. If a user requests a 9th word register, +memory is requested from the operating system to increase register +space. This is unacceptable from both a profiling and an attack point +of view; it would be trivial to write a program which forced the +runtime to request ridiculous amounts of memory from the operating +system (for example, by ~mov.word ~). + +Registers should not be infinite; a standardised size (with a compile +time option to alter it) ensures the benefits stated above for the +stack. * TODO Introduce error handling in base library :LIB: There is a large variety of TODOs about errors. Let's fix them! #+begin_src sh :exports results :results output verbatim replace -echo "$(find -type 'f' -regex ".*\.[ch]\(pp\)?" -exec grep -nH TODO "{}" ";" | wc -l) TODOs currently" +find -type 'f' -regex ".*\.[ch]\(pp\)?" -exec grep -nH TODO "{}" ";" #+end_src #+RESULTS: -: 8 TODOs currently +: ./vm/runtime.c:228: // TODO: Figure out a way to ensure the ordering of OP_PRINT_* is +: ./vm/runtime.c:578:// TODO: rename this to something more appropriate +: ./vm/runtime.c:625:// TODO: rename this to something more appropriate +: ./vm/runtime.c:641:// TODO: rename this to something more appropriate +: ./vm/runtime.c:655:// TODO: rename this to something more appropriate +: ./lib/heap.c:59: // TODO: When does this fragmentation become a performance +: ./lib/base.c:19: // TODO: is there a faster way of doing this? +: ./lib/base.c:25: // TODO: is there a faster way of doing this? +: ./lib/base.c:32: // TODO: is there a faster way of doing this? * TODO Standard library :VM: I should start considering this and how a user may use it. Should it be an option in the VM and/or assembler binaries (i.e. a flag) or diff --git a/vm/main.c b/vm/main.c index 10cb40d..0ab4704 100644 --- a/vm/main.c +++ b/vm/main.c @@ -95,9 +95,9 @@ int main(int argc, char *argv[]) size_t stack_size = 256; byte_t *stack = calloc(stack_size, 1); - registers_t registers = {0}; - darr_init(®isters, 8 * WORD_SIZE); - heap_t heap = {0}; + size_t registers_size = 8 * WORD_SIZE; + byte_t *registers = calloc(registers_size, 1); + heap_t heap = {0}; heap_create(&heap); size_t call_stack_size = 256; word_t *call_stack = calloc(call_stack_size, sizeof(call_stack)); @@ -105,7 +105,7 @@ int main(int argc, char *argv[]) vm_t vm = {0}; vm_load_stack(&vm, stack, stack_size); vm_load_program(&vm, program); - vm_load_registers(&vm, registers); + vm_load_registers(&vm, registers, registers_size); vm_load_heap(&vm, heap); vm_load_call_stack(&vm, call_stack, call_stack_size); diff --git a/vm/runtime.c b/vm/runtime.c index 4ea706d..59144c7 100644 --- a/vm/runtime.c +++ b/vm/runtime.c @@ -273,10 +273,10 @@ err_t vm_execute_all(vm_t *vm) size_t cycles = 0; #endif #if VERBOSE >= 2 - registers_t prev_registers = vm->registers; - size_t prev_sptr = 0; - size_t prev_pages = 0; - size_t prev_cptr = 0; + struct Registers prev_registers = vm->registers; + size_t prev_sptr = 0; + size_t prev_pages = 0; + size_t prev_cptr = 0; #endif while (program->ptr < count && program->data.instructions[program->ptr].opcode != OP_HALT) @@ -310,7 +310,7 @@ err_t vm_execute_all(vm_t *vm) "----------\n", stdout); } - if (memcmp(&prev_registers, &vm->registers, sizeof(darr_t)) != 0) + if (memcmp(&prev_registers, &vm->registers, sizeof(vm->registers)) != 0) { vm_print_registers(vm, stdout); prev_registers = vm->registers; @@ -441,17 +441,17 @@ VM_POP_CONSTR(word, WORD) Note this means that we check for stack overflow here. */ -#define VM_PUSH_REGISTER_CONSTR(TYPE, TYPE_CAP) \ - err_t vm_push_##TYPE##_register(vm_t *vm, word_t reg) \ - { \ - if (reg > (vm->registers.used / TYPE_CAP##_SIZE)) \ - return ERR_INVALID_REGISTER_##TYPE_CAP; \ - else if (vm->stack.ptr + TYPE_CAP##_SIZE >= vm->stack.max) \ - return ERR_STACK_OVERFLOW; \ - memcpy(vm->stack.data + vm->stack.ptr, \ - vm->registers.data + (reg * TYPE_CAP##_SIZE), TYPE_CAP##_SIZE); \ - vm->stack.ptr += TYPE_CAP##_SIZE; \ - return ERR_OK; \ +#define VM_PUSH_REGISTER_CONSTR(TYPE, TYPE_CAP) \ + err_t vm_push_##TYPE##_register(vm_t *vm, word_t reg) \ + { \ + if (reg > (vm->registers.size / TYPE_CAP##_SIZE)) \ + return ERR_INVALID_REGISTER_##TYPE_CAP; \ + else if (vm->stack.ptr + TYPE_CAP##_SIZE >= vm->stack.max) \ + return ERR_STACK_OVERFLOW; \ + memcpy(vm->stack.data + vm->stack.ptr, \ + vm->registers.bytes + (reg * TYPE_CAP##_SIZE), TYPE_CAP##_SIZE); \ + vm->stack.ptr += TYPE_CAP##_SIZE; \ + return ERR_OK; \ } VM_PUSH_REGISTER_CONSTR(byte, BYTE) @@ -466,25 +466,18 @@ VM_PUSH_REGISTER_CONSTR(word, WORD) a value of N bytes, the array stack[top - N:top] is copied into the register directly, we're done. */ -#define VM_MOV_CONSTR(TYPE, TYPE_CAP) \ - err_t vm_mov_##TYPE(vm_t *vm, word_t reg) \ - { \ - if (reg >= (vm->registers.used / TYPE_CAP##_SIZE)) \ - { \ - const size_t diff = \ - ((reg - (vm->registers.used / TYPE_CAP##_SIZE)) + 1) * \ - TYPE_CAP##_SIZE; \ - darr_ensure_capacity(&vm->registers, diff); \ - vm->registers.used = \ - MAX(vm->registers.used, (reg + 1) * TYPE_CAP##_SIZE); \ - } \ - else if (vm->stack.ptr + TYPE_CAP##_SIZE >= vm->stack.max) \ - return ERR_STACK_OVERFLOW; \ - memcpy(vm->registers.data + (reg * TYPE_CAP##_SIZE), \ - vm->stack.data + vm->stack.ptr - (TYPE_CAP##_SIZE), \ - TYPE_CAP##_SIZE); \ - vm->stack.ptr -= TYPE_CAP##_SIZE; \ - return ERR_OK; \ +#define VM_MOV_CONSTR(TYPE, TYPE_CAP) \ + err_t vm_mov_##TYPE(vm_t *vm, word_t reg) \ + { \ + if (reg >= (vm->registers.size / TYPE_CAP##_SIZE)) \ + return ERR_INVALID_REGISTER_##TYPE_CAP; \ + else if (vm->stack.ptr + TYPE_CAP##_SIZE >= vm->stack.max) \ + return ERR_STACK_OVERFLOW; \ + memcpy(vm->registers.bytes + (reg * TYPE_CAP##_SIZE), \ + vm->stack.data + vm->stack.ptr - (TYPE_CAP##_SIZE), \ + TYPE_CAP##_SIZE); \ + vm->stack.ptr -= TYPE_CAP##_SIZE; \ + return ERR_OK; \ } VM_MOV_CONSTR(byte, BYTE) diff --git a/vm/struct.c b/vm/struct.c index 89ec6ff..d688a56 100644 --- a/vm/struct.c +++ b/vm/struct.c @@ -29,9 +29,9 @@ void vm_load_program(vm_t *vm, prog_t program) vm->program.data = program; } -void vm_load_registers(vm_t *vm, registers_t registers) +void vm_load_registers(vm_t *vm, byte_t *buffer, size_t size) { - vm->registers = registers; + vm->registers = (struct Registers){.size = size, .bytes = buffer}; } void vm_load_heap(vm_t *vm, heap_t heap) @@ -95,13 +95,7 @@ void vm_stop(vm_t *vm) printf("[" TERM_GREEN "DATA" TERM_RESET "]: No leaks found\n"); #endif - free(vm->registers.data); - free(vm->program.data.instructions); - free(vm->stack.data); - heap_stop(&vm->heap); - free(vm->call_stack.address_pointers); - - vm->registers = (registers_t){0}; + vm->registers = (struct Registers){0}; vm->program = (struct Program){0}; vm->stack = (struct Stack){0}; vm->heap = (heap_t){0}; @@ -109,19 +103,14 @@ void vm_stop(vm_t *vm) void vm_print_registers(vm_t *vm, FILE *fp) { - registers_t reg = vm->registers; - fprintf( - fp, - "Registers.used = %luB/%luH/%luW\nRegisters.available = %luB/%luH/%luW\n", - vm->registers.used, vm->registers.used / HWORD_SIZE, - vm->registers.used / WORD_SIZE, vm->registers.available, - vm->registers.available / HWORD_SIZE, - vm->registers.available / WORD_SIZE); + struct Registers reg = vm->registers; + fprintf(fp, "Registers.size = %luB/%luH/%luW\n", vm->registers.size, + vm->registers.size / HWORD_SIZE, vm->registers.size / WORD_SIZE); fprintf(fp, "Registers.reg = ["); - for (size_t i = 0; i < (reg.used / WORD_SIZE); ++i) + for (size_t i = 0; i < (reg.size / WORD_SIZE); ++i) { fprintf(fp, "{%lu:%lX}", i, VM_NTH_REGISTER(reg, i)); - if (i != reg.used - 1) + if (i != reg.size - 1) fprintf(fp, ", "); } fprintf(fp, "]\n"); diff --git a/vm/struct.h b/vm/struct.h index f4bbd58..5f62d5a 100644 --- a/vm/struct.h +++ b/vm/struct.h @@ -17,9 +17,11 @@ #include #include -typedef darr_t registers_t; -#define VM_NTH_REGISTER(REGISTERS, N) (((word_t *)((REGISTERS).data))[N]) -#define VM_REGISTERS_AVAILABLE(REGISTERS) (((REGISTERS).available) / WORD_SIZE) +struct Registers +{ + byte_t *bytes; + size_t size; +}; struct Stack { @@ -39,9 +41,12 @@ struct CallStack size_t ptr, max; }; +#define VM_NTH_REGISTER(REGISTERS, N) (((word_t *)((REGISTERS).bytes))[N]) +#define VM_REGISTERS_AVAILABLE(REGISTERS) (((REGISTERS).size) / WORD_SIZE) + typedef struct { - registers_t registers; + struct Registers registers; struct Stack stack; heap_t heap; @@ -51,7 +56,7 @@ typedef struct // Start and stop void vm_load_stack(vm_t *, byte_t *, size_t); -void vm_load_registers(vm_t *, registers_t); +void vm_load_registers(vm_t *, byte_t *, size_t); void vm_load_heap(vm_t *, heap_t); void vm_load_program(vm_t *, prog_t); void vm_load_call_stack(vm_t *, word_t *, size_t);