Reworked (de)serialising routines for instructions

No longer relying on darr_t or anything other than the C runtime and aliases. This means it should be *even easier* to target this via FFI from other languages without having to initialise my custom made structures! Furthermore I've removed any form of allocation in the library so FFI callers don't need to manage memory in any way. Instead we rely on the caller allocating the correct amount of memory for the functions to work, with basic error handling if that doesn't happen. In the case of inst_read_bytecode, error reporting occurs by making the return of a function an integer. If the integer is positive it is the number of bytes read from the buffer. If negative it flags a possible error, which is a member of read_err_t. prog_read_bytecode has been split into two functions: prog_read_header and prog_read_instructions. prog_read_instructions works under the assumption that the program's header has been filled, e.g. via prog_read_header. prog_read_header returns 0 if there's not enough space in the buffer or if the start_address is greater than the count. prog_read_instructions returns a custom structure which contains an byte position as well as an error enum, allowing for finer error reporting. In the case of inst_write_bytecode via the assumption that the caller allocated the correct memory there is no need for error reporting. For prog_write_bytecode if an error occurs due to In the case of inst_read_bytecode we return the number
2024-04-27 17:22:51 +05:30
parent b9c94d0725
commit 40907e5113
3 changed files with 333 additions and 393 deletions
--- a/lib/inst.h
+++ b/lib/inst.h
@@ -13,14 +13,10 @@
 #ifndef INST_H
 #define INST_H

-#include <lib/darr.h>
-#include <lib/prog.h>
-
+#include <lib/base.h>
 #include <stdio.h>
 #include <stdlib.h>

-const char *opcode_as_cstr(opcode_t);
-
 #define UNSIGNED_OPCODE_IS_TYPE(OPCODE, OP_TYPE) \
  (((OPCODE) >= OP_TYPE##_BYTE) && ((OPCODE) <= OP_TYPE##_WORD))

@@ -29,85 +25,224 @@ const char *opcode_as_cstr(opcode_t);

 #define OPCODE_DATA_TYPE(OPCODE, OP_TYPE) (OPCODE - OP_TYPE##_BYTE)

-// OPCODE_DATA_TYPE: opcode_t -> data_type_t.  data_type_t acts as
-// a map between types and their offsets from the first type of
-// instruction.  That means for opcode_type A and data_type u,
-// OP_<A>_BYTE + u = OP_<A>_<u>.
+typedef enum
+{
+  OP_NOOP = 0,
+
+  // Dealing with data and registers
+  OP_PUSH_BYTE,
+  OP_PUSH_HWORD,
+  OP_PUSH_WORD,
+
+  OP_POP_BYTE,
+  OP_POP_HWORD,
+  OP_POP_WORD,
+
+  OP_PUSH_REGISTER_BYTE,
+  OP_PUSH_REGISTER_HWORD,
+  OP_PUSH_REGISTER_WORD,
+
+  OP_MOV_BYTE,
+  OP_MOV_HWORD,
+  OP_MOV_WORD,
+
+  OP_DUP_BYTE,
+  OP_DUP_HWORD,
+  OP_DUP_WORD,
+
+  // Dealing with the heap
+  OP_MALLOC_BYTE,
+  OP_MALLOC_HWORD,
+  OP_MALLOC_WORD,
+
+  OP_MALLOC_STACK_BYTE,
+  OP_MALLOC_STACK_HWORD,
+  OP_MALLOC_STACK_WORD,
+
+  OP_MSET_BYTE,
+  OP_MSET_HWORD,
+  OP_MSET_WORD,
+
+  OP_MSET_STACK_BYTE,
+  OP_MSET_STACK_HWORD,
+  OP_MSET_STACK_WORD,
+
+  OP_MGET_BYTE,
+  OP_MGET_HWORD,
+  OP_MGET_WORD,
+
+  OP_MGET_STACK_BYTE,
+  OP_MGET_STACK_HWORD,
+  OP_MGET_STACK_WORD,
+
+  OP_MDELETE,
+  OP_MSIZE,
+
+  // Boolean operations
+  OP_NOT_BYTE,
+  OP_NOT_HWORD,
+  OP_NOT_WORD,
+
+  OP_OR_BYTE,
+  OP_OR_HWORD,
+  OP_OR_WORD,
+
+  OP_AND_BYTE,
+  OP_AND_HWORD,
+  OP_AND_WORD,
+
+  OP_XOR_BYTE,
+  OP_XOR_HWORD,
+  OP_XOR_WORD,
+
+  OP_EQ_BYTE,
+  OP_EQ_HWORD,
+  OP_EQ_WORD,
+
+  // Mathematical operations
+  OP_PLUS_BYTE,
+  OP_PLUS_HWORD,
+  OP_PLUS_WORD,
+
+  OP_SUB_BYTE,
+  OP_SUB_HWORD,
+  OP_SUB_WORD,
+
+  OP_MULT_BYTE,
+  OP_MULT_HWORD,
+  OP_MULT_WORD,
+
+  // Comparison operations
+  OP_LT_BYTE,
+  OP_LT_CHAR,
+  OP_LT_HWORD,
+  OP_LT_INT,
+  OP_LT_WORD,
+  OP_LT_LONG,
+
+  OP_LTE_BYTE,
+  OP_LTE_CHAR,
+  OP_LTE_HWORD,
+  OP_LTE_INT,
+  OP_LTE_WORD,
+  OP_LTE_LONG,
+
+  OP_GT_BYTE,
+  OP_GT_CHAR,
+  OP_GT_HWORD,
+  OP_GT_INT,
+  OP_GT_WORD,
+  OP_GT_LONG,
+
+  OP_GTE_BYTE,
+  OP_GTE_CHAR,
+  OP_GTE_HWORD,
+  OP_GTE_INT,
+  OP_GTE_WORD,
+  OP_GTE_LONG,
+
+  // Simple I/O
+  OP_PRINT_BYTE,
+  OP_PRINT_CHAR,
+  OP_PRINT_HWORD,
+  OP_PRINT_INT,
+  OP_PRINT_WORD,
+  OP_PRINT_LONG,
+
+  // Program control flow
+  OP_JUMP_ABS,
+  OP_JUMP_STACK,
+  OP_JUMP_IF_BYTE,
+  OP_JUMP_IF_HWORD,
+  OP_JUMP_IF_WORD,
+
+  // Subroutines
+  OP_CALL,
+  OP_CALL_STACK,
+  OP_RET,
+
+  // Should not be an opcode
+  NUMBER_OF_OPCODES,
+  OP_HALT = 0b11111111, // top of the byte is a HALT
+} opcode_t;
+
+size_t opcode_bytecode_size(opcode_t);
+const char *opcode_as_cstr(opcode_t);
+
+typedef struct
+{
+  opcode_t opcode;
+  data_t operand;
+} inst_t;
+
+/**
+   @brief Serialise an instruction into a byte buffer
+
+   @details Given an instruction and a suitably sized byte buffer,
+   write the bytecode for the instruction into the buffer.  NOTE: This
+   function does NOT check the bounds of `bytes` i.e. we assume the
+   caller has created a suitably sized buffer.
+
+   @param[inst] Instruction to serialise
+   @param[bytes] Buffer to write on
+
+   @return[size_t] Number of bytes written to `bytes`.
+ */
+size_t inst_write_bytecode(inst_t inst, byte_t *bytes);
+
+typedef enum
+{
+  READ_ERR_INVALID_OPCODE = -1,
+  READ_ERR_OPERAND_NO_FIT = -2,
+  READ_ERR_EXPECTED_MORE  = -3,
+  READ_ERR_END            = -4
+} read_err_t;
+
+/**
+   @brief Deserialise an instruction from a bytecode buffer
+
+   @details Given a buffer of bytes, deserialise an instruction,
+   storing the result in the pointer given.  The number of bytes read
+   in the buffer is returned, which should be opcode_bytecode_size().
+   NOTE: If bytes is not suitably sized for the instruction expected
+   or it is not well formed i.e. not the right schema then a negative
+   number is returned.
+
+   @param[inst] Pointer to instruction which will store result
+   @param[bytes] Bytecode buffer to deserialise
+   @param[size_bytes] Number of bytes in buffer
+
+   @return[int] Number of bytes read.  If negative then an error
+   occurred in deserialisation (either buffer was not suitably sized
+   or instruction was not well formed) so any result must be
+   considered invalid.
+ */
+int inst_read_bytecode(inst_t *inst, byte_t *bytes, size_t size_bytes);

 void inst_print(inst_t, FILE *);

-size_t inst_bytecode_size(inst_t);
-void inst_write_bytecode(inst_t, darr_t *);
-void insts_write_bytecode(inst_t *, size_t, darr_t *);
-// Here the dynamic array is a preloaded buffer of bytes, where
-// darr.available is the number of overall bytes and used is the
-// cursor (where we are in the buffer).
-inst_t inst_read_bytecode(darr_t *);
-inst_t *insts_read_bytecode(darr_t *, size_t *);
+typedef struct
+{
+  word_t start_address;
+  word_t count;
+  inst_t *instructions;
+} prog_t;

-void insts_write_bytecode_file(inst_t *, size_t, FILE *);
-inst_t *insts_read_bytecode_file(FILE *, size_t *);
+#define PROG_HEADER_SIZE (WORD_SIZE * 2)

-// Write the entire program as bytecode
-void prog_write_bytecode(prog_t *, darr_t *);
-// Only append the instructions as bytecode
-void prog_append_bytecode(prog_t *, darr_t *);
-// Read an entire program as bytecode
-prog_t *prog_read_bytecode(darr_t *);
+size_t prog_bytecode_size(prog_t);

-void prog_write_file(prog_t *, FILE *);
-prog_t *prog_read_file(FILE *);
+size_t prog_write_bytecode(prog_t program, byte_t *bytes, size_t size_bytes);

-#define INST_NOOP ((inst_t){0})
-#define INST_HALT ((inst_t){.opcode = OP_HALT})
+size_t prog_read_header(prog_t *program, byte_t *bytes, size_t size_bytes);

-#define INST_PUSH(TYPE, OP) \
-  ((inst_t){.opcode = OP_PUSH_##TYPE, .operand = D##TYPE(OP)})
+typedef struct
+{
+  read_err_t type;
+  size_t index;
+} read_err_prog_t;

-#define INST_MOV(TYPE, OP) \
-  ((inst_t){.opcode = OP_MOV_##TYPE, .operand = D##TYPE(OP)})
+read_err_prog_t prog_read_instructions(prog_t *program, size_t *size_bytes_read,
+                                       byte_t *bytes, size_t size_bytes);

-#define INST_POP(TYPE) ((inst_t){.opcode = OP_POP_##TYPE})
-
-#define INST_PUSH_REG(TYPE, REG) \
-  ((inst_t){.opcode = OP_PUSH_REGISTER_##TYPE, .operand = D##TYPE(REG)})
-
-#define INST_DUP(TYPE, OP) \
-  ((inst_t){.opcode = OP_DUP_##TYPE, .operand = DWORD(OP)})
-
-#define INST_MALLOC(TYPE, OP) \
-  ((inst_t){.opcode = OP_MALLOC_##TYPE, .operand = DWORD(OP)})
-#define INST_MALLOC_STACK(TYPE) ((inst_t){.opcode = OP_MALLOC_STACK_##TYPE})
-#define INST_MSET(TYPE, OP) \
-  ((inst_t){.opcode = OP_MSET_##TYPE, .operand = DWORD(OP)})
-#define INST_MSET_STACK(TYPE) ((inst_t){.opcode = OP_MSET_STACK_##TYPE})
-#define INST_MGET(TYPE, OP) \
-  ((inst_t){.opcode = OP_MGET_##TYPE, .operand = DWORD(OP)})
-#define INST_MGET_STACK(TYPE) ((inst_t){.opcode = OP_MGET_STACK_##TYPE})
-#define INST_MDELETE          ((inst_t){.opcode = OP_MDELETE})
-#define INST_MSIZE            ((inst_t){.opcode = OP_MSIZE})
-
-#define INST_NOT(TYPE)  ((inst_t){.opcode = OP_NOT_##TYPE})
-#define INST_OR(TYPE)   ((inst_t){.opcode = OP_OR_##TYPE})
-#define INST_AND(TYPE)  ((inst_t){.opcode = OP_AND_##TYPE})
-#define INST_XOR(TYPE)  ((inst_t){.opcode = OP_XOR_##TYPE})
-#define INST_EQ(TYPE)   ((inst_t){.opcode = OP_EQ_##TYPE})
-#define INST_LT(TYPE)   ((inst_t){.opcode = OP_LT_##TYPE})
-#define INST_LTE(TYPE)  ((inst_t){.opcode = OP_LTE_##TYPE})
-#define INST_GT(TYPE)   ((inst_t){.opcode = OP_GT_##TYPE})
-#define INST_GTE(TYPE)  ((inst_t){.opcode = OP_GTE_##TYPE})
-#define INST_PLUS(TYPE) ((inst_t){.opcode = OP_PLUS_##TYPE})
-#define INST_SUB(TYPE)  ((inst_t){.opcode = OP_SUB_##TYPE})
-#define INST_MULT(TYPE) ((inst_t){.opcode = OP_MULT_##TYPE})
-
-#define INST_JUMP_ABS(OP) \
-  ((inst_t){.opcode = OP_JUMP_ABS, .operand = DWORD(OP)})
-#define INST_JUMP_STACK ((inst_t){.opcode = OP_JUMP_STACK})
-#define INST_JUMP_IF(TYPE, OP) \
-  ((inst_t){.opcode = OP_JUMP_IF_##TYPE, .operand = DWORD(OP)})
-#define INST_CALL(OP)   ((inst_t){.opcode = OP_CALL, .operand = DWORD(OP)})
-#define INST_CALL_STACK ((inst_t){.opcode = OP_CALL_STACK})
-#define INST_RET        ((inst_t){.opcode = OP_RET})
-
-#define INST_PRINT(TYPE) ((inst_t){.opcode = OP_PRINT_##TYPE})
 #endif