Reworked (de)serialising routines for instructions

No longer relying on darr_t or anything other than the C runtime and
aliases.  This means it should be *even easier* to target this via FFI
from other languages without having to initialise my custom made
structures!  Furthermore I've removed any form of allocation in the
library so FFI callers don't need to manage memory in any way.
Instead we rely on the caller allocating the correct amount of memory
for the functions to work, with basic error handling if that doesn't
happen.

In the case of inst_read_bytecode, error reporting occurs by making
the return of a function an integer.  If the integer is positive it is
the number of bytes read from the buffer.  If negative it flags a
possible error, which is a member of read_err_t.

prog_read_bytecode has been split into two functions: prog_read_header
and prog_read_instructions.  prog_read_instructions works under the
assumption that the program's header has been filled, e.g. via
prog_read_header.  prog_read_header returns 0 if there's not enough
space in the buffer or if the start_address is greater than the count.
prog_read_instructions returns a custom structure which contains an
byte position as well as an error enum, allowing for finer error
reporting.

In the case of inst_write_bytecode via the assumption that the caller
allocated the correct memory there is no need for error reporting.
For prog_write_bytecode if an error occurs due to

In the case of inst_read_bytecode we return the number
This commit is contained in:
2024-04-27 17:22:51 +05:30
parent b9c94d0725
commit 40907e5113
3 changed files with 333 additions and 393 deletions

View File

@@ -13,14 +13,10 @@
#ifndef INST_H
#define INST_H
#include <lib/darr.h>
#include <lib/prog.h>
#include <lib/base.h>
#include <stdio.h>
#include <stdlib.h>
const char *opcode_as_cstr(opcode_t);
#define UNSIGNED_OPCODE_IS_TYPE(OPCODE, OP_TYPE) \
(((OPCODE) >= OP_TYPE##_BYTE) && ((OPCODE) <= OP_TYPE##_WORD))
@@ -29,85 +25,224 @@ const char *opcode_as_cstr(opcode_t);
#define OPCODE_DATA_TYPE(OPCODE, OP_TYPE) (OPCODE - OP_TYPE##_BYTE)
// OPCODE_DATA_TYPE: opcode_t -> data_type_t. data_type_t acts as
// a map between types and their offsets from the first type of
// instruction. That means for opcode_type A and data_type u,
// OP_<A>_BYTE + u = OP_<A>_<u>.
typedef enum
{
OP_NOOP = 0,
// Dealing with data and registers
OP_PUSH_BYTE,
OP_PUSH_HWORD,
OP_PUSH_WORD,
OP_POP_BYTE,
OP_POP_HWORD,
OP_POP_WORD,
OP_PUSH_REGISTER_BYTE,
OP_PUSH_REGISTER_HWORD,
OP_PUSH_REGISTER_WORD,
OP_MOV_BYTE,
OP_MOV_HWORD,
OP_MOV_WORD,
OP_DUP_BYTE,
OP_DUP_HWORD,
OP_DUP_WORD,
// Dealing with the heap
OP_MALLOC_BYTE,
OP_MALLOC_HWORD,
OP_MALLOC_WORD,
OP_MALLOC_STACK_BYTE,
OP_MALLOC_STACK_HWORD,
OP_MALLOC_STACK_WORD,
OP_MSET_BYTE,
OP_MSET_HWORD,
OP_MSET_WORD,
OP_MSET_STACK_BYTE,
OP_MSET_STACK_HWORD,
OP_MSET_STACK_WORD,
OP_MGET_BYTE,
OP_MGET_HWORD,
OP_MGET_WORD,
OP_MGET_STACK_BYTE,
OP_MGET_STACK_HWORD,
OP_MGET_STACK_WORD,
OP_MDELETE,
OP_MSIZE,
// Boolean operations
OP_NOT_BYTE,
OP_NOT_HWORD,
OP_NOT_WORD,
OP_OR_BYTE,
OP_OR_HWORD,
OP_OR_WORD,
OP_AND_BYTE,
OP_AND_HWORD,
OP_AND_WORD,
OP_XOR_BYTE,
OP_XOR_HWORD,
OP_XOR_WORD,
OP_EQ_BYTE,
OP_EQ_HWORD,
OP_EQ_WORD,
// Mathematical operations
OP_PLUS_BYTE,
OP_PLUS_HWORD,
OP_PLUS_WORD,
OP_SUB_BYTE,
OP_SUB_HWORD,
OP_SUB_WORD,
OP_MULT_BYTE,
OP_MULT_HWORD,
OP_MULT_WORD,
// Comparison operations
OP_LT_BYTE,
OP_LT_CHAR,
OP_LT_HWORD,
OP_LT_INT,
OP_LT_WORD,
OP_LT_LONG,
OP_LTE_BYTE,
OP_LTE_CHAR,
OP_LTE_HWORD,
OP_LTE_INT,
OP_LTE_WORD,
OP_LTE_LONG,
OP_GT_BYTE,
OP_GT_CHAR,
OP_GT_HWORD,
OP_GT_INT,
OP_GT_WORD,
OP_GT_LONG,
OP_GTE_BYTE,
OP_GTE_CHAR,
OP_GTE_HWORD,
OP_GTE_INT,
OP_GTE_WORD,
OP_GTE_LONG,
// Simple I/O
OP_PRINT_BYTE,
OP_PRINT_CHAR,
OP_PRINT_HWORD,
OP_PRINT_INT,
OP_PRINT_WORD,
OP_PRINT_LONG,
// Program control flow
OP_JUMP_ABS,
OP_JUMP_STACK,
OP_JUMP_IF_BYTE,
OP_JUMP_IF_HWORD,
OP_JUMP_IF_WORD,
// Subroutines
OP_CALL,
OP_CALL_STACK,
OP_RET,
// Should not be an opcode
NUMBER_OF_OPCODES,
OP_HALT = 0b11111111, // top of the byte is a HALT
} opcode_t;
size_t opcode_bytecode_size(opcode_t);
const char *opcode_as_cstr(opcode_t);
typedef struct
{
opcode_t opcode;
data_t operand;
} inst_t;
/**
@brief Serialise an instruction into a byte buffer
@details Given an instruction and a suitably sized byte buffer,
write the bytecode for the instruction into the buffer. NOTE: This
function does NOT check the bounds of `bytes` i.e. we assume the
caller has created a suitably sized buffer.
@param[inst] Instruction to serialise
@param[bytes] Buffer to write on
@return[size_t] Number of bytes written to `bytes`.
*/
size_t inst_write_bytecode(inst_t inst, byte_t *bytes);
typedef enum
{
READ_ERR_INVALID_OPCODE = -1,
READ_ERR_OPERAND_NO_FIT = -2,
READ_ERR_EXPECTED_MORE = -3,
READ_ERR_END = -4
} read_err_t;
/**
@brief Deserialise an instruction from a bytecode buffer
@details Given a buffer of bytes, deserialise an instruction,
storing the result in the pointer given. The number of bytes read
in the buffer is returned, which should be opcode_bytecode_size().
NOTE: If bytes is not suitably sized for the instruction expected
or it is not well formed i.e. not the right schema then a negative
number is returned.
@param[inst] Pointer to instruction which will store result
@param[bytes] Bytecode buffer to deserialise
@param[size_bytes] Number of bytes in buffer
@return[int] Number of bytes read. If negative then an error
occurred in deserialisation (either buffer was not suitably sized
or instruction was not well formed) so any result must be
considered invalid.
*/
int inst_read_bytecode(inst_t *inst, byte_t *bytes, size_t size_bytes);
void inst_print(inst_t, FILE *);
size_t inst_bytecode_size(inst_t);
void inst_write_bytecode(inst_t, darr_t *);
void insts_write_bytecode(inst_t *, size_t, darr_t *);
// Here the dynamic array is a preloaded buffer of bytes, where
// darr.available is the number of overall bytes and used is the
// cursor (where we are in the buffer).
inst_t inst_read_bytecode(darr_t *);
inst_t *insts_read_bytecode(darr_t *, size_t *);
typedef struct
{
word_t start_address;
word_t count;
inst_t *instructions;
} prog_t;
void insts_write_bytecode_file(inst_t *, size_t, FILE *);
inst_t *insts_read_bytecode_file(FILE *, size_t *);
#define PROG_HEADER_SIZE (WORD_SIZE * 2)
// Write the entire program as bytecode
void prog_write_bytecode(prog_t *, darr_t *);
// Only append the instructions as bytecode
void prog_append_bytecode(prog_t *, darr_t *);
// Read an entire program as bytecode
prog_t *prog_read_bytecode(darr_t *);
size_t prog_bytecode_size(prog_t);
void prog_write_file(prog_t *, FILE *);
prog_t *prog_read_file(FILE *);
size_t prog_write_bytecode(prog_t program, byte_t *bytes, size_t size_bytes);
#define INST_NOOP ((inst_t){0})
#define INST_HALT ((inst_t){.opcode = OP_HALT})
size_t prog_read_header(prog_t *program, byte_t *bytes, size_t size_bytes);
#define INST_PUSH(TYPE, OP) \
((inst_t){.opcode = OP_PUSH_##TYPE, .operand = D##TYPE(OP)})
typedef struct
{
read_err_t type;
size_t index;
} read_err_prog_t;
#define INST_MOV(TYPE, OP) \
((inst_t){.opcode = OP_MOV_##TYPE, .operand = D##TYPE(OP)})
read_err_prog_t prog_read_instructions(prog_t *program, size_t *size_bytes_read,
byte_t *bytes, size_t size_bytes);
#define INST_POP(TYPE) ((inst_t){.opcode = OP_POP_##TYPE})
#define INST_PUSH_REG(TYPE, REG) \
((inst_t){.opcode = OP_PUSH_REGISTER_##TYPE, .operand = D##TYPE(REG)})
#define INST_DUP(TYPE, OP) \
((inst_t){.opcode = OP_DUP_##TYPE, .operand = DWORD(OP)})
#define INST_MALLOC(TYPE, OP) \
((inst_t){.opcode = OP_MALLOC_##TYPE, .operand = DWORD(OP)})
#define INST_MALLOC_STACK(TYPE) ((inst_t){.opcode = OP_MALLOC_STACK_##TYPE})
#define INST_MSET(TYPE, OP) \
((inst_t){.opcode = OP_MSET_##TYPE, .operand = DWORD(OP)})
#define INST_MSET_STACK(TYPE) ((inst_t){.opcode = OP_MSET_STACK_##TYPE})
#define INST_MGET(TYPE, OP) \
((inst_t){.opcode = OP_MGET_##TYPE, .operand = DWORD(OP)})
#define INST_MGET_STACK(TYPE) ((inst_t){.opcode = OP_MGET_STACK_##TYPE})
#define INST_MDELETE ((inst_t){.opcode = OP_MDELETE})
#define INST_MSIZE ((inst_t){.opcode = OP_MSIZE})
#define INST_NOT(TYPE) ((inst_t){.opcode = OP_NOT_##TYPE})
#define INST_OR(TYPE) ((inst_t){.opcode = OP_OR_##TYPE})
#define INST_AND(TYPE) ((inst_t){.opcode = OP_AND_##TYPE})
#define INST_XOR(TYPE) ((inst_t){.opcode = OP_XOR_##TYPE})
#define INST_EQ(TYPE) ((inst_t){.opcode = OP_EQ_##TYPE})
#define INST_LT(TYPE) ((inst_t){.opcode = OP_LT_##TYPE})
#define INST_LTE(TYPE) ((inst_t){.opcode = OP_LTE_##TYPE})
#define INST_GT(TYPE) ((inst_t){.opcode = OP_GT_##TYPE})
#define INST_GTE(TYPE) ((inst_t){.opcode = OP_GTE_##TYPE})
#define INST_PLUS(TYPE) ((inst_t){.opcode = OP_PLUS_##TYPE})
#define INST_SUB(TYPE) ((inst_t){.opcode = OP_SUB_##TYPE})
#define INST_MULT(TYPE) ((inst_t){.opcode = OP_MULT_##TYPE})
#define INST_JUMP_ABS(OP) \
((inst_t){.opcode = OP_JUMP_ABS, .operand = DWORD(OP)})
#define INST_JUMP_STACK ((inst_t){.opcode = OP_JUMP_STACK})
#define INST_JUMP_IF(TYPE, OP) \
((inst_t){.opcode = OP_JUMP_IF_##TYPE, .operand = DWORD(OP)})
#define INST_CALL(OP) ((inst_t){.opcode = OP_CALL, .operand = DWORD(OP)})
#define INST_CALL_STACK ((inst_t){.opcode = OP_CALL_STACK})
#define INST_RET ((inst_t){.opcode = OP_RET})
#define INST_PRINT(TYPE) ((inst_t){.opcode = OP_PRINT_##TYPE})
#endif