From a135262cd8eed85df769a7dbace73a886af7d6df Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Tue, 3 Dec 2024 03:38:01 +0000 Subject: Wrote an assembler (transpiles brainfuck into assembly code) Provides: + Transpilation from brainfuck code into intel style assembly + functions to assemble and link code --- assembler.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 assembler.c (limited to 'assembler.c') diff --git a/assembler.c b/assembler.c new file mode 100644 index 0000000..36cd2c0 --- /dev/null +++ b/assembler.c @@ -0,0 +1,203 @@ +/* assembler.c + * Created: 2024-12-03 + * Author: Aryadev Chavali + * Description: Compile brainfuck into native + */ + +#include "./assembler.h" + +#include + +// Structure representing a loop operative as indices into an ASM label array. +struct Label +{ + u64 cur, next; +}; + +/* First, the functions to call external utilities (badly written, I know). They + * presume a filled asm_buffer. */ + +void asm_write(buffer_t **asm_buffer) +{ + FILE *fp = fopen(asm_buffer[0]->name, "w"); + fwrite(asm_buffer[0]->data, asm_buffer[0]->size, 1, fp); + fclose(fp); +} + +int asm_assemble(const char *srcname, const char *objname) +{ +#ifdef DEBUG + char *format_str = "yasm -f elf64 -g dwarf2 -o %s %s"; +#else + char *format_str = "yasm -f elf64 -o %s %s"; +#endif + char command[snprintf(NULL, 0, format_str, objname, srcname) + 1]; + sprintf(command, format_str, objname, srcname); + return system(command); +} + +int asm_link(const char *objname, const char *outname) +{ + char *format_str = "ld -o %s %s"; + char command[snprintf(NULL, 0, format_str, outname, objname) + 1]; + sprintf(command, format_str, outname, objname); + return system(command); +} + +int asm_compile(buffer_t **asm_buffer, const char *objname, const char *outname) +{ + asm_write(asm_buffer); + asm_assemble(asm_buffer[0]->name, objname); + return asm_link(objname, outname); +} + +void asm_setup_buffer(buffer_t **asm_buffer, const char *outname) +{ + *asm_buffer = buffer_init_str(outname, NULL, 1024); + asm_buffer[0]->size = 0; +} + +// Write the initial boilerplate of the assembly file +void asm_write_init(buffer_t **asm_buffer) +{ + const char format_string[] = "section .bss\n" + " memory resb %d\n" + "section .text\n" + "global _start\n" + "_start:\n" + " mov r9, memory\n"; + char initial_assembly[snprintf(NULL, 0, format_string, MEMORY_DEFAULT) + 1]; + snprintf(initial_assembly, sizeof(initial_assembly), format_string, + MEMORY_DEFAULT); + buffer_append_bytes(asm_buffer, (u8 *)initial_assembly, + sizeof(initial_assembly) - 1); +} + +// Write the exit code for the assembly file +void asm_write_exit(buffer_t **asm_buffer) +{ + buffer_append_bytes(asm_buffer, + (u8 *)" mov rax, 60\n" + " mov rdi, 0\n" + " syscall\n", + 37); +} + +// A table which translates brainfuck operations into generic assembly code (I +// love C's array indexing) +static const struct +{ + char *str; + u64 len; +} table[] = { + [NEXT] = {.str = " add r9, 1\n", .len = 12}, + [PREV] = {.str = " sub r9, 1\n", .len = 12}, + [INC] = {.str = " add byte [r9], 1\n", .len = 19}, + [DEC] = {.str = " sub byte [r9], 1\n", .len = 19}, + [OUT] = {.str = " mov rax, 1\n" + " mov rdi, 1\n" + " mov rsi, r9\n" + " mov rdx, 1\n" + " syscall\n", + .len = 63}, + [READ] = {.str = " mov rax, 0\n" + " mov rdi, 0\n" + " mov rsi, r9\n" + " mov rdx, 1\n" + " syscall\n", + .len = 63}, +}; + +// Linear search in the ASM label array for a label with the current AST +// relative index +i64 get_abs_label(u64, struct Label *, u64); + +// Translate an AST relative jump to a specific ASM label, storing the current +// and next items in given pointers. +void ast_ref_to_asm_label(u64, struct Label *, u64, i64 *, i64 *); + +void asm_translate_nodes(buffer_t **asm_buffer, struct PResult nodes, + const char *src_name) +{ + // First pass: Setup the ASM label array + struct Label labels[nodes.labels * 2]; + u64 label_ptr = 0; + for (size_t i = 0; i < nodes.size; ++i) + if (nodes.nodes[i].type == LIN || nodes.nodes[i].type == LOUT) + labels[label_ptr++] = + (struct Label){.cur = i, .next = nodes.nodes[i].loop_ref}; + + // Second pass: Translating to assembly + for (size_t i = 0; i < nodes.size; ++i) + { + node_t node = nodes.nodes[i]; + if (node.type == LIN || node.type == LOUT) + { + // Translate the node-relative addresses to assembly-label-relative + // addresses + i64 cur_asm_label = 0, next_asm_label = 0; + ast_ref_to_asm_label(i, labels, nodes.labels * 2, &cur_asm_label, + &next_asm_label); + if (cur_asm_label == -1 || next_asm_label == -1) + { + print_error( + src_name, node.row, node.col, + "[ASSEMBLY ERROR]: Could not find label for current jump!\n"); + exit(1); + } + + // Format labels + char current_label[128], next_label[128]; + + sprintf(current_label, ".L%lu", cur_asm_label); + sprintf(next_label, ".L%lu", next_asm_label); + + // Setup format string for assembly jump code + char *format_string = NULL; + if (node.type == LIN) + { + format_string = "%s:\n" + " cmp byte [r9], 0\n" + " je %s\n"; + } + else + { + format_string = "%s:\n" + " cmp byte [r9], 0\n" + " jne %s\n"; + } + char formatted_string[snprintf(NULL, 0, format_string, current_label, + next_label) + + 1]; + snprintf(formatted_string, sizeof(formatted_string), format_string, + current_label, next_label); + buffer_append_bytes(asm_buffer, (u8 *)formatted_string, + sizeof(formatted_string) - 1); + } + else + { + // I love tables so goddamn much + buffer_append_bytes(asm_buffer, (u8 *)table[node.type].str, + table[node.type].len); + } + } +} + +/* Implementations for throwaway functions */ + +i64 get_abs_label(u64 ref, struct Label *labels, u64 size) +{ + for (u64 i = 0; i < size; ++i) + if (labels[i].cur == ref) + return (i64)i; + return -1; +} + +void ast_ref_to_asm_label(u64 ref, struct Label *labels, u64 size, i64 *cur, + i64 *next) +{ + *cur = get_abs_label(ref, labels, size); + if (*cur == -1) + return; + *next = get_abs_label(labels[*cur].next, labels, size); +} -- cgit v1.2.3-13-gbd6f