Fixed Makefile so it tracks dependencies better
It now tracks main.cpp's dependencies and rebuilds them as needed.
This commit is contained in:
33
src/base.cpp
Normal file
33
src/base.cpp
Normal file
@@ -0,0 +1,33 @@
|
||||
/* Copyright (C) 2024 Aryadev Chavali
|
||||
|
||||
* You may distribute and modify this code under the terms of the
|
||||
* GPLv2 license. You should have received a copy of the GPLv2
|
||||
* license with this file. If not, please write to:
|
||||
* aryadev@aryadevchavali.com.
|
||||
|
||||
* Created: 2024-04-14
|
||||
* Author: Aryadev Chavali
|
||||
* Description:
|
||||
*/
|
||||
|
||||
#include "./base.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
std::optional<std::string> read_file(const char *filename)
|
||||
{
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if (fp)
|
||||
{
|
||||
std::string contents;
|
||||
fseek(fp, 0, SEEK_END);
|
||||
contents.resize(ftell(fp));
|
||||
rewind(fp);
|
||||
fread(&contents[0], 1, contents.size(), fp);
|
||||
fclose(fp);
|
||||
|
||||
return contents;
|
||||
}
|
||||
else
|
||||
return std::nullopt;
|
||||
}
|
||||
21
src/base.hpp
Normal file
21
src/base.hpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/* Copyright (C) 2024 Aryadev Chavali
|
||||
|
||||
* You may distribute and modify this code under the terms of the
|
||||
* GPLv2 license. You should have received a copy of the GPLv2
|
||||
* license with this file. If not, please write to:
|
||||
* aryadev@aryadevchavali.com.
|
||||
|
||||
* Created: 2024-04-14
|
||||
* Author: Aryadev Chavali
|
||||
* Description: Base library
|
||||
*/
|
||||
|
||||
#ifndef BASE_HPP
|
||||
#define BASE_HPP
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
std::optional<std::string> read_file(const char *);
|
||||
|
||||
#endif
|
||||
565
src/lexer.cpp
Normal file
565
src/lexer.cpp
Normal file
@@ -0,0 +1,565 @@
|
||||
/* Copyright (C) 2024 Aryadev Chavali
|
||||
|
||||
* You may distribute and modify this code under the terms of the
|
||||
* GPLv2 license. You should have received a copy of the GPLv2
|
||||
* license with this file. If not, please write to:
|
||||
* aryadev@aryadevchavali.com.
|
||||
|
||||
* Created: 2024-04-14
|
||||
* Author: Aryadev Chavali
|
||||
* Description: Lexer for assembly language
|
||||
*/
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include <lib/inst.h>
|
||||
}
|
||||
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
|
||||
#include "./lexer.hpp"
|
||||
|
||||
static_assert(NUMBER_OF_OPCODES == 98, "ERROR: Lexer is out of date");
|
||||
|
||||
using std::string, std::string_view, std::pair, std::make_pair;
|
||||
|
||||
const auto VALID_SYMBOL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV"
|
||||
"WXYZ0123456789-_.:%#$",
|
||||
VALID_DIGIT = "0123456789", VALID_HEX = "0123456789abcdefABCDEF";
|
||||
|
||||
bool is_char_in_s(char c, const char *s)
|
||||
{
|
||||
return string_view(s).find(c) != string::npos;
|
||||
}
|
||||
|
||||
bool initial_match(string_view src, string_view match)
|
||||
{
|
||||
return (src.size() > match.size() && src.substr(0, match.size()) == match);
|
||||
}
|
||||
|
||||
pair<token_t, lerr_t> tokenise_symbol(string_view &source, size_t &column,
|
||||
size_t line)
|
||||
{
|
||||
auto end = source.find_first_not_of(VALID_SYMBOL);
|
||||
if (end == string::npos)
|
||||
end = source.size() - 1;
|
||||
string sym{source.substr(0, end)};
|
||||
source.remove_prefix(end);
|
||||
std::transform(sym.begin(), sym.end(), sym.begin(), ::toupper);
|
||||
|
||||
token_t t{};
|
||||
|
||||
if (sym == "%CONST")
|
||||
{
|
||||
t.type = token_type_t::PP_CONST;
|
||||
}
|
||||
else if (sym == "%USE")
|
||||
{
|
||||
t.type = token_type_t::PP_USE;
|
||||
}
|
||||
else if (sym == "%END")
|
||||
{
|
||||
t.type = token_type_t::PP_END;
|
||||
}
|
||||
else if (sym[0] == '%')
|
||||
{
|
||||
return make_pair(
|
||||
t, lerr_t(lerr_type_t::INVALID_PREPROCESSOR_DIRECTIVE, column, line));
|
||||
}
|
||||
else if (sym.size() > 1 && sym[0] == '$')
|
||||
{
|
||||
t = token_t(token_type_t::PP_REFERENCE, sym.substr(1));
|
||||
}
|
||||
else if (sym == "NOOP")
|
||||
{
|
||||
t.type = token_type_t::NOOP;
|
||||
}
|
||||
else if (sym == "HALT")
|
||||
{
|
||||
t.type = token_type_t::HALT;
|
||||
}
|
||||
else if (initial_match(sym, "PUSH.REG."))
|
||||
{
|
||||
t = token_t(token_type_t::PUSH_REG, sym.substr(9));
|
||||
}
|
||||
else if (initial_match(sym, "PUSH."))
|
||||
{
|
||||
t = token_t(token_type_t::PUSH, sym.substr(5));
|
||||
}
|
||||
else if (initial_match(sym, "POP."))
|
||||
{
|
||||
t = token_t(token_type_t::POP, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "MOV."))
|
||||
{
|
||||
t = token_t(token_type_t::MOV, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "DUP."))
|
||||
{
|
||||
t = token_t(token_type_t::DUP, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "MALLOC.STACK."))
|
||||
{
|
||||
t = token_t(token_type_t::MALLOC_STACK, sym.substr(13));
|
||||
}
|
||||
else if (initial_match(sym, "MALLOC."))
|
||||
{
|
||||
t = token_t(token_type_t::MALLOC, sym.substr(7));
|
||||
}
|
||||
else if (initial_match(sym, "MSET.STACK."))
|
||||
{
|
||||
t = token_t(token_type_t::MSET_STACK, sym.substr(11));
|
||||
}
|
||||
else if (initial_match(sym, "MSET."))
|
||||
{
|
||||
t = token_t(token_type_t::MSET, sym.substr(5));
|
||||
}
|
||||
else if (initial_match(sym, "MGET.STACK."))
|
||||
{
|
||||
t = token_t(token_type_t::MGET_STACK, sym.substr(11));
|
||||
}
|
||||
else if (initial_match(sym, "MGET."))
|
||||
{
|
||||
t = token_t(token_type_t::MGET, sym.substr(5));
|
||||
}
|
||||
else if (sym == "MDELETE")
|
||||
{
|
||||
t.type = token_type_t::MDELETE;
|
||||
}
|
||||
else if (sym == "MSIZE")
|
||||
{
|
||||
t.type = token_type_t::MSIZE;
|
||||
}
|
||||
else if (initial_match(sym, "NOT."))
|
||||
{
|
||||
t = token_t(token_type_t::NOT, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "OR."))
|
||||
{
|
||||
t = token_t(token_type_t::OR, sym.substr(3));
|
||||
}
|
||||
else if (initial_match(sym, "AND."))
|
||||
{
|
||||
t = token_t(token_type_t::AND, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "XOR."))
|
||||
{
|
||||
t = token_t(token_type_t::XOR, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "EQ."))
|
||||
{
|
||||
t = token_t(token_type_t::EQ, sym.substr(3));
|
||||
}
|
||||
else if (initial_match(sym, "LTE."))
|
||||
{
|
||||
t = token_t(token_type_t::LTE, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "LT."))
|
||||
{
|
||||
t = token_t(token_type_t::LT, sym.substr(3));
|
||||
}
|
||||
else if (initial_match(sym, "GTE."))
|
||||
{
|
||||
t = token_t(token_type_t::GTE, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "GT."))
|
||||
{
|
||||
t = token_t(token_type_t::GT, sym.substr(3));
|
||||
}
|
||||
else if (initial_match(sym, "SUB."))
|
||||
{
|
||||
t = token_t(token_type_t::SUB, sym.substr(4));
|
||||
}
|
||||
else if (initial_match(sym, "PLUS."))
|
||||
{
|
||||
t = token_t(token_type_t::PLUS, sym.substr(5));
|
||||
}
|
||||
else if (initial_match(sym, "MULT."))
|
||||
{
|
||||
t = token_t(token_type_t::MULT, sym.substr(5));
|
||||
}
|
||||
else if (initial_match(sym, "PRINT."))
|
||||
{
|
||||
t = token_t(token_type_t::PRINT, sym.substr(6));
|
||||
}
|
||||
else if (sym == "JUMP.ABS")
|
||||
{
|
||||
t.type = token_type_t::JUMP_ABS;
|
||||
}
|
||||
else if (sym == "JUMP.STACK")
|
||||
{
|
||||
t.type = token_type_t::JUMP_STACK;
|
||||
}
|
||||
else if (initial_match(sym, "JUMP.IF."))
|
||||
{
|
||||
t = token_t(token_type_t::JUMP_IF, sym.substr(8));
|
||||
}
|
||||
else if (sym == "CALL.STACK")
|
||||
{
|
||||
t.type = token_type_t::CALL_STACK;
|
||||
}
|
||||
else if (sym == "CALL")
|
||||
{
|
||||
t.type = token_type_t::CALL;
|
||||
}
|
||||
else if (sym == "RET")
|
||||
{
|
||||
t.type = token_type_t::RET;
|
||||
}
|
||||
else if (sym == "GLOBAL")
|
||||
{
|
||||
t.type = token_type_t::GLOBAL;
|
||||
}
|
||||
else
|
||||
{
|
||||
t.type = token_type_t::SYMBOL;
|
||||
}
|
||||
|
||||
if (t.content == "")
|
||||
t.content = sym;
|
||||
t.column = column;
|
||||
column += sym.size() - 1;
|
||||
return make_pair(t, lerr_t());
|
||||
}
|
||||
|
||||
token_t tokenise_literal_number(string_view &source, size_t &column)
|
||||
{
|
||||
bool is_negative = false;
|
||||
if (source[0] == '-')
|
||||
{
|
||||
is_negative = true;
|
||||
source.remove_prefix(1);
|
||||
}
|
||||
|
||||
auto end = source.find_first_not_of(VALID_DIGIT);
|
||||
if (end == string::npos)
|
||||
end = source.size() - 1;
|
||||
string digits{source.substr(0, end)};
|
||||
source.remove_prefix(end);
|
||||
|
||||
token_t t{token_type_t::LITERAL_NUMBER, (is_negative ? "-" : "") + digits,
|
||||
column};
|
||||
|
||||
column += digits.size() + (is_negative ? 1 : 0);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
token_t tokenise_literal_hex(string_view &source, size_t &column)
|
||||
{
|
||||
// Remove x char from source
|
||||
source.remove_prefix(1);
|
||||
auto end = source.find_first_not_of(VALID_HEX);
|
||||
if (end == string::npos)
|
||||
end = source.size() - 1;
|
||||
string digits{source.substr(0, end)};
|
||||
source.remove_prefix(end);
|
||||
|
||||
token_t t = {token_type_t::LITERAL_NUMBER, "0x" + digits, column};
|
||||
|
||||
column += digits.size() + 1;
|
||||
return t;
|
||||
}
|
||||
|
||||
pair<token_t, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
|
||||
size_t &line)
|
||||
{
|
||||
token_t t{};
|
||||
auto end = source.find('\'', 1);
|
||||
if (source.size() < 3 || end == 1 || end > 3)
|
||||
return make_pair(t,
|
||||
lerr_t(lerr_type_t::INVALID_CHAR_LITERAL, column, line));
|
||||
else if (source[1] == '\\')
|
||||
{
|
||||
// Escape sequence
|
||||
char escape = '\0';
|
||||
if (source.size() < 4 || source[3] != '\'')
|
||||
return make_pair(t,
|
||||
lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
|
||||
column, line));
|
||||
switch (source[2])
|
||||
{
|
||||
case 'n':
|
||||
escape = '\n';
|
||||
break;
|
||||
case 't':
|
||||
escape = '\t';
|
||||
break;
|
||||
case 'r':
|
||||
escape = '\r';
|
||||
break;
|
||||
case '\\':
|
||||
escape = '\\';
|
||||
break;
|
||||
default:
|
||||
column += 2;
|
||||
return make_pair(t,
|
||||
lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
|
||||
column, line));
|
||||
break;
|
||||
}
|
||||
t = token_t{token_type_t::LITERAL_CHAR, std::to_string(escape), column};
|
||||
column += 4;
|
||||
source.remove_prefix(4);
|
||||
}
|
||||
else
|
||||
{
|
||||
t = token_t(token_type_t::LITERAL_CHAR, std::to_string(source[1]));
|
||||
column += 3;
|
||||
source.remove_prefix(3);
|
||||
}
|
||||
return make_pair(t, lerr_t());
|
||||
}
|
||||
|
||||
token_t tokenise_literal_string(string_view &source, size_t &column, size_t end)
|
||||
{
|
||||
source.remove_prefix(1);
|
||||
token_t token{token_type_t::LITERAL_STRING, string(source.substr(0, end - 1)),
|
||||
column};
|
||||
source.remove_prefix(end);
|
||||
column += end + 1;
|
||||
return token;
|
||||
}
|
||||
|
||||
lerr_t tokenise_buffer(string_view source, std::vector<token_t *> &tokens)
|
||||
{
|
||||
size_t column = 0, line = 1;
|
||||
while (source.size() > 0)
|
||||
{
|
||||
bool is_token = true;
|
||||
char first = source[0];
|
||||
token_t t{};
|
||||
if (isspace(first) || first == '\0')
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0;
|
||||
i < source.size() && (isspace(source[i]) || source[i] == '\0'); ++i)
|
||||
{
|
||||
++column;
|
||||
if (source[i] == '\n')
|
||||
{
|
||||
column = 0;
|
||||
++line;
|
||||
}
|
||||
}
|
||||
++column;
|
||||
source.remove_prefix(i);
|
||||
is_token = false;
|
||||
}
|
||||
else if (first == ';')
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < source.size() && source[i] != '\n'; ++i)
|
||||
continue;
|
||||
column = 0;
|
||||
++line;
|
||||
source.remove_prefix(i + 1);
|
||||
is_token = false;
|
||||
}
|
||||
else if (first == '*')
|
||||
{
|
||||
t = token_t(token_type_t::STAR, "", column);
|
||||
source.remove_prefix(1);
|
||||
}
|
||||
else if (first == '\"')
|
||||
{
|
||||
auto end = source.find('\"', 1);
|
||||
if (end == string::npos)
|
||||
return lerr_t(lerr_type_t::INVALID_STRING_LITERAL, column, line);
|
||||
t = tokenise_literal_string(source, column, end);
|
||||
}
|
||||
else if (first == '\'')
|
||||
{
|
||||
lerr_t lerr;
|
||||
std::tie(t, lerr) = tokenise_literal_char(source, column, line);
|
||||
if (lerr.type != lerr_type_t::OK)
|
||||
return lerr;
|
||||
}
|
||||
else if (isdigit(first) ||
|
||||
(source.size() > 1 && first == '-' && isdigit(source[1])))
|
||||
{
|
||||
auto end = source.find_first_not_of(VALID_DIGIT, first == '-' ? 1 : 0);
|
||||
if (end == string::npos)
|
||||
end = source.size() - 1;
|
||||
else if (end != string::npos && !(isspace(source[end])))
|
||||
return lerr_t(lerr_type_t::INVALID_NUMBER_LITERAL, column, line);
|
||||
t = tokenise_literal_number(source, column);
|
||||
}
|
||||
else if (first == '0' && source.size() > 2 && source[1] == 'x' &&
|
||||
is_char_in_s(source[2], VALID_HEX))
|
||||
{
|
||||
auto end = source.find_first_not_of(VALID_HEX);
|
||||
if (end == string::npos)
|
||||
end = source.size() - 1;
|
||||
else if (end != string::npos && !(isspace(source[end])))
|
||||
return lerr_t(lerr_type_t::INVALID_NUMBER_LITERAL, column, line);
|
||||
t = tokenise_literal_hex(source, column);
|
||||
}
|
||||
else if (is_char_in_s(first, VALID_SYMBOL))
|
||||
{
|
||||
lerr_t lerr;
|
||||
std::tie(t, lerr) = tokenise_symbol(source, column, line);
|
||||
if (lerr.type != lerr_type_t::OK)
|
||||
return lerr;
|
||||
}
|
||||
else
|
||||
{
|
||||
++column;
|
||||
return lerr_t{lerr_type_t::UNKNOWN_LEXEME, column, line};
|
||||
}
|
||||
|
||||
if (is_token)
|
||||
{
|
||||
t.line = line;
|
||||
token_t *acc = new token_t(t);
|
||||
tokens.push_back(acc);
|
||||
}
|
||||
}
|
||||
return lerr_t{};
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, token_t &t)
|
||||
{
|
||||
return os << token_type_as_cstr(t.type) << "(`" << t.content << "`)@"
|
||||
<< t.line << ", " << t.column;
|
||||
}
|
||||
|
||||
token_t::token_t()
|
||||
{}
|
||||
|
||||
token_t::token_t(token_type_t type, string content, size_t col, size_t line)
|
||||
: type{type}, column{col}, line{line}, content{content}
|
||||
{}
|
||||
|
||||
const char *token_type_as_cstr(token_type_t type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case token_type_t::PP_USE:
|
||||
return "PP_USE";
|
||||
case token_type_t::PP_CONST:
|
||||
return "PP_CONST";
|
||||
case token_type_t::PP_END:
|
||||
return "PP_END";
|
||||
case token_type_t::PP_REFERENCE:
|
||||
return "PP_REFERENCE";
|
||||
case token_type_t::GLOBAL:
|
||||
return "GLOBAL";
|
||||
case token_type_t::STAR:
|
||||
return "STAR";
|
||||
case token_type_t::LITERAL_STRING:
|
||||
return "LITERAL_STRING";
|
||||
case token_type_t::LITERAL_NUMBER:
|
||||
return "LITERAL_NUMBER";
|
||||
case token_type_t::LITERAL_CHAR:
|
||||
return "LITERAL_CHAR";
|
||||
case token_type_t::NOOP:
|
||||
return "NOOP";
|
||||
case token_type_t::HALT:
|
||||
return "HALT";
|
||||
case token_type_t::PUSH:
|
||||
return "PUSH";
|
||||
case token_type_t::POP:
|
||||
return "POP";
|
||||
case token_type_t::PUSH_REG:
|
||||
return "PUSH_REG";
|
||||
case token_type_t::MOV:
|
||||
return "MOV";
|
||||
case token_type_t::DUP:
|
||||
return "DUP";
|
||||
case token_type_t::MALLOC:
|
||||
return "MALLOC";
|
||||
case token_type_t::MALLOC_STACK:
|
||||
return "MALLOC_STACK";
|
||||
case token_type_t::MSET:
|
||||
return "MSET";
|
||||
case token_type_t::MSET_STACK:
|
||||
return "MSET_STACK";
|
||||
case token_type_t::MGET:
|
||||
return "MGET";
|
||||
case token_type_t::MGET_STACK:
|
||||
return "MGET_STACK";
|
||||
case token_type_t::MDELETE:
|
||||
return "MDELETE";
|
||||
case token_type_t::MSIZE:
|
||||
return "MSIZE";
|
||||
case token_type_t::NOT:
|
||||
return "NOT";
|
||||
case token_type_t::OR:
|
||||
return "OR";
|
||||
case token_type_t::AND:
|
||||
return "AND";
|
||||
case token_type_t::XOR:
|
||||
return "XOR";
|
||||
case token_type_t::EQ:
|
||||
return "EQ";
|
||||
case token_type_t::LT:
|
||||
return "LT";
|
||||
case token_type_t::LTE:
|
||||
return "LTE";
|
||||
case token_type_t::GT:
|
||||
return "GT";
|
||||
case token_type_t::GTE:
|
||||
return "GTE";
|
||||
case token_type_t::PLUS:
|
||||
return "PLUS";
|
||||
case token_type_t::SUB:
|
||||
return "SUB";
|
||||
case token_type_t::MULT:
|
||||
return "MULT";
|
||||
case token_type_t::PRINT:
|
||||
return "PRINT";
|
||||
case token_type_t::JUMP_ABS:
|
||||
return "JUMP_ABS";
|
||||
case token_type_t::JUMP_STACK:
|
||||
return "JUMP_STACK";
|
||||
case token_type_t::JUMP_IF:
|
||||
return "JUMP_IF";
|
||||
case token_type_t::CALL:
|
||||
return "CALL";
|
||||
case token_type_t::CALL_STACK:
|
||||
return "CALL_STACK";
|
||||
case token_type_t::RET:
|
||||
return "RET";
|
||||
case token_type_t::SYMBOL:
|
||||
return "SYMBOL";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, lerr_t &lerr)
|
||||
{
|
||||
os << lerr.line << ":" << lerr.col << ": ";
|
||||
switch (lerr.type)
|
||||
{
|
||||
case lerr_type_t::OK:
|
||||
os << "OK";
|
||||
break;
|
||||
case lerr_type_t::INVALID_CHAR_LITERAL:
|
||||
os << "INVALID_CHAR_LITERAL";
|
||||
break;
|
||||
case lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE:
|
||||
os << "INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE";
|
||||
break;
|
||||
case lerr_type_t::INVALID_STRING_LITERAL:
|
||||
os << "INVALID_STRING_LITERAL";
|
||||
break;
|
||||
case lerr_type_t::INVALID_NUMBER_LITERAL:
|
||||
os << "INVALID_NUMBER_LITERAL";
|
||||
break;
|
||||
case lerr_type_t::INVALID_PREPROCESSOR_DIRECTIVE:
|
||||
os << "INVALID_PREPROCESSOR_DIRECTIVE";
|
||||
break;
|
||||
case lerr_type_t::UNKNOWN_LEXEME:
|
||||
os << "UNKNOWN_LEXEME";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
lerr_t::lerr_t(lerr_type_t type, size_t col, size_t line)
|
||||
: col{col}, line{line}, type{type}
|
||||
{}
|
||||
106
src/lexer.hpp
Normal file
106
src/lexer.hpp
Normal file
@@ -0,0 +1,106 @@
|
||||
/* Copyright (C) 2024 Aryadev Chavali
|
||||
|
||||
* You may distribute and modify this code under the terms of the
|
||||
* GPLv2 license. You should have received a copy of the GPLv2
|
||||
* license with this file. If not, please write to:
|
||||
* aryadev@aryadevchavali.com.
|
||||
|
||||
* Created: 2024-04-14
|
||||
* Author: Aryadev Chavali
|
||||
* Description: Lexer for assembly language
|
||||
*/
|
||||
|
||||
#ifndef LEXER_HPP
|
||||
#define LEXER_HPP
|
||||
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
enum class token_type_t
|
||||
{
|
||||
PP_CONST, // %const(<symbol>)...
|
||||
PP_USE, // %use <string>
|
||||
PP_END, // %end
|
||||
PP_REFERENCE, // $<symbol>
|
||||
GLOBAL,
|
||||
STAR,
|
||||
LITERAL_NUMBER,
|
||||
LITERAL_CHAR,
|
||||
LITERAL_STRING,
|
||||
NOOP,
|
||||
HALT,
|
||||
PUSH,
|
||||
POP,
|
||||
PUSH_REG,
|
||||
MOV,
|
||||
DUP,
|
||||
MALLOC,
|
||||
MALLOC_STACK,
|
||||
MSET,
|
||||
MSET_STACK,
|
||||
MGET,
|
||||
MGET_STACK,
|
||||
MDELETE,
|
||||
MSIZE,
|
||||
NOT,
|
||||
OR,
|
||||
AND,
|
||||
XOR,
|
||||
EQ,
|
||||
LT,
|
||||
LTE,
|
||||
GT,
|
||||
GTE,
|
||||
PLUS,
|
||||
SUB,
|
||||
MULT,
|
||||
PRINT,
|
||||
JUMP_ABS,
|
||||
JUMP_STACK,
|
||||
JUMP_IF,
|
||||
CALL,
|
||||
CALL_STACK,
|
||||
RET,
|
||||
SYMBOL,
|
||||
};
|
||||
|
||||
const char *token_type_as_cstr(token_type_t type);
|
||||
|
||||
struct token_t
|
||||
{
|
||||
token_type_t type;
|
||||
size_t column, line;
|
||||
std::string content;
|
||||
|
||||
token_t();
|
||||
token_t(token_type_t, std::string, size_t col = 0, size_t line = 0);
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, token_t &);
|
||||
|
||||
enum class lerr_type_t
|
||||
{
|
||||
OK = 0,
|
||||
INVALID_CHAR_LITERAL,
|
||||
INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
|
||||
INVALID_STRING_LITERAL,
|
||||
INVALID_NUMBER_LITERAL,
|
||||
INVALID_PREPROCESSOR_DIRECTIVE,
|
||||
UNKNOWN_LEXEME,
|
||||
};
|
||||
|
||||
struct lerr_t
|
||||
{
|
||||
size_t col, line;
|
||||
lerr_type_t type;
|
||||
|
||||
lerr_t(lerr_type_t type = lerr_type_t::OK, size_t col = 0, size_t line = 0);
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, lerr_t &);
|
||||
|
||||
lerr_t tokenise_buffer(std::string_view, std::vector<token_t *> &);
|
||||
|
||||
#endif
|
||||
148
src/main.cpp
Normal file
148
src/main.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
/* Copyright (C) 2024 Aryadev Chavali
|
||||
|
||||
* You may distribute and modify this code under the terms of the
|
||||
* GPLv2 license. You should have received a copy of the GPLv2
|
||||
* license with this file. If not, please write to:
|
||||
* aryadev@aryadevchavali.com.
|
||||
|
||||
* Created: 2024-04-14
|
||||
* Author: Aryadev Chavali
|
||||
* Description: Entrypoint for assembly program
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include <lib/inst.h>
|
||||
}
|
||||
|
||||
#include "./base.hpp"
|
||||
#include "./lexer.hpp"
|
||||
#include "./preprocesser.hpp"
|
||||
|
||||
using std::cout, std::cerr, std::endl;
|
||||
using std::pair, std::string, std::string_view, std::vector;
|
||||
|
||||
void usage(const char *program_name, FILE *fp)
|
||||
{
|
||||
fprintf(fp,
|
||||
"Usage: %s FILE OUT-FILE\n"
|
||||
"\tFILE: Source code to compile\n"
|
||||
"\tOUT-FILE: Name of file to store bytecode\n",
|
||||
program_name);
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
if (argc == 1 || argc > 3)
|
||||
{
|
||||
usage(argv[0], stderr);
|
||||
return -1;
|
||||
}
|
||||
int ret = 0;
|
||||
const char *source_name = argv[1];
|
||||
const char *out_name = argv[2];
|
||||
(void)out_name;
|
||||
|
||||
#if VERBOSE >= 1
|
||||
printf("[%sASSEMBLER%s]: Assembling `%s` to `%s`\n", TERM_YELLOW, TERM_RESET,
|
||||
source_name, out_name);
|
||||
#endif
|
||||
|
||||
auto file_source = read_file(source_name);
|
||||
|
||||
#if VERBOSE >= 1
|
||||
printf("[%sASSEMBLER%s]: `%s` -> %lu bytes\n", TERM_YELLOW, TERM_RESET,
|
||||
source_name, file_source.has_value() ? file_source.value().size() : 0);
|
||||
#endif
|
||||
|
||||
string source_str;
|
||||
string_view original;
|
||||
string_view src;
|
||||
vector<token_t *> tokens, preprocessed_tokens;
|
||||
lerr_t lerr;
|
||||
pp_err_t pp_err;
|
||||
|
||||
// Highest scoped variable cut off point
|
||||
|
||||
if (file_source.has_value())
|
||||
source_str = file_source.value();
|
||||
else
|
||||
{
|
||||
cerr << "ERROR: file `" << source_name << "` does not exist!" << endl;
|
||||
ret = -1;
|
||||
goto end;
|
||||
}
|
||||
original = string_view{source_str};
|
||||
src = string_view{source_str};
|
||||
lerr = tokenise_buffer(src, tokens);
|
||||
|
||||
if (lerr.type != lerr_type_t::OK)
|
||||
{
|
||||
cerr << source_name << ":" << lerr << endl;
|
||||
ret = 255 - static_cast<int>(lerr.type);
|
||||
goto end;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
#if VERBOSE >= 1
|
||||
printf("[%sLEXER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET,
|
||||
source_str.size(), tokens.size());
|
||||
#endif
|
||||
|
||||
#if VERBOSE == 2
|
||||
printf("[%sLEXER%s]: Tokens "
|
||||
"parsed:\n----------------------------------------------------------"
|
||||
"----------------------\n",
|
||||
TERM_GREEN, TERM_RESET);
|
||||
for (auto token : tokens)
|
||||
cout << "\t" << *token << endl;
|
||||
printf("-------------------------------------------------------------"
|
||||
"-------------------\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
// preprocessing
|
||||
pp_err = preprocesser(tokens, preprocessed_tokens);
|
||||
if (pp_err.type != pp_err_type_t::OK)
|
||||
{
|
||||
cerr << source_name << ":" << pp_err.reference->line << ":"
|
||||
<< pp_err.reference->column << ": " << pp_err << endl;
|
||||
ret = 255 - static_cast<int>(pp_err.type);
|
||||
goto end;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
#if VERBOSE >= 1
|
||||
printf("[%sPREPROCESSOR%s]: %lu tokens -> %lu tokens\n", TERM_GREEN,
|
||||
TERM_RESET, tokens.size(), preprocessed_tokens.size());
|
||||
#endif
|
||||
#if VERBOSE == 2
|
||||
printf("[%sPREPROCESSOR%s]: Processed tokens: "
|
||||
"\n-----------------------------------------------------------------"
|
||||
"---------------\n",
|
||||
TERM_GREEN, TERM_RESET);
|
||||
for (auto token : preprocessed_tokens)
|
||||
cout << "\t" << *token << endl;
|
||||
printf("-------------------------------------------------------------"
|
||||
"-------------------\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
end:
|
||||
for (auto token : tokens)
|
||||
delete token;
|
||||
for (auto token : preprocessed_tokens)
|
||||
delete token;
|
||||
|
||||
return ret;
|
||||
}
|
||||
218
src/preprocesser.cpp
Normal file
218
src/preprocesser.cpp
Normal file
@@ -0,0 +1,218 @@
|
||||
/* Copyright (C) 2024 Aryadev Chavali
|
||||
|
||||
* You may distribute and modify this code under the terms of the
|
||||
* GPLv2 license. You should have received a copy of the GPLv2
|
||||
* license with this file. If not, please write to:
|
||||
* aryadev@aryadevchavali.com.
|
||||
|
||||
* Created: 2024-04-14
|
||||
* Author: Aryadev Chavali
|
||||
* Description: Preprocessor which occurs after lexing before parsing.
|
||||
*/
|
||||
|
||||
#include "./preprocesser.hpp"
|
||||
#include "./base.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
|
||||
using std::pair, std::vector, std::make_pair, std::string, std::string_view;
|
||||
|
||||
#define VCLEAR(V) \
|
||||
std::for_each((V).begin(), (V).end(), \
|
||||
[](token_t *t) \
|
||||
{ \
|
||||
delete t; \
|
||||
});
|
||||
|
||||
pp_err_t preprocess_use_blocks(const vector<token_t *> &tokens,
|
||||
vector<token_t *> &vec_out)
|
||||
{
|
||||
for (size_t i = 0; i < tokens.size(); ++i)
|
||||
{
|
||||
token_t *t = tokens[i];
|
||||
if (t->type == token_type_t::PP_USE)
|
||||
{
|
||||
if (i + 1 >= tokens.size() ||
|
||||
tokens[i + 1]->type != token_type_t::LITERAL_STRING)
|
||||
{
|
||||
VCLEAR(vec_out);
|
||||
vec_out.clear();
|
||||
return pp_err_t(pp_err_type_t::EXPECTED_STRING, t);
|
||||
}
|
||||
|
||||
token_t *name = tokens[i + 1];
|
||||
auto source = read_file(name->content.c_str());
|
||||
if (!source)
|
||||
{
|
||||
VCLEAR(vec_out);
|
||||
vec_out.clear();
|
||||
return pp_err_t(pp_err_type_t::FILE_NONEXISTENT, name);
|
||||
}
|
||||
|
||||
std::vector<token_t *> ftokens;
|
||||
lerr_t lerr = tokenise_buffer(source.value(), ftokens);
|
||||
if (lerr.type != lerr_type_t::OK)
|
||||
{
|
||||
VCLEAR(vec_out);
|
||||
vec_out.clear();
|
||||
return pp_err_t(pp_err_type_t::FILE_PARSE_ERROR, name, lerr);
|
||||
}
|
||||
|
||||
vec_out.insert(vec_out.end(), ftokens.begin(), ftokens.end());
|
||||
|
||||
++i;
|
||||
}
|
||||
else
|
||||
vec_out.push_back(new token_t{*t});
|
||||
}
|
||||
return pp_err_t();
|
||||
}
|
||||
|
||||
struct const_t
|
||||
{
|
||||
size_t start, end;
|
||||
};
|
||||
|
||||
pp_err_t preprocess_const_blocks(const vector<token_t *> &tokens,
|
||||
vector<token_t *> &vec_out)
|
||||
{
|
||||
std::unordered_map<string_view, const_t> blocks;
|
||||
for (size_t i = 0; i < tokens.size(); ++i)
|
||||
{
|
||||
token_t *t = tokens[i];
|
||||
if (t->type == token_type_t::PP_CONST)
|
||||
{
|
||||
string_view capture;
|
||||
if (i + 1 >= tokens.size() || tokens[i + 1]->type != token_type_t::SYMBOL)
|
||||
return pp_err_type_t::EXPECTED_NAME;
|
||||
|
||||
capture = tokens[++i]->content;
|
||||
|
||||
++i;
|
||||
size_t block_start = i, block_end = 0;
|
||||
for (; i < tokens.size() && tokens[i]->type != token_type_t::PP_END; ++i)
|
||||
continue;
|
||||
|
||||
if (i == tokens.size())
|
||||
return pp_err_t{pp_err_type_t::EXPECTED_END};
|
||||
|
||||
block_end = i;
|
||||
|
||||
blocks[capture] = const_t{block_start, block_end};
|
||||
}
|
||||
}
|
||||
|
||||
if (blocks.size() == 0)
|
||||
{
|
||||
// Just construct a new vector and carry on
|
||||
for (token_t *token : tokens)
|
||||
vec_out.push_back(new token_t{*token});
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < tokens.size(); ++i)
|
||||
{
|
||||
token_t *token = tokens[i];
|
||||
// Skip the tokens that construct the const
|
||||
if (token->type == token_type_t::PP_CONST)
|
||||
for (; i < tokens.size() && tokens[i]->type != token_type_t::PP_END;
|
||||
++i)
|
||||
continue;
|
||||
else if (token->type == token_type_t::PP_REFERENCE)
|
||||
{
|
||||
auto it = blocks.find(token->content);
|
||||
if (it == blocks.end())
|
||||
{
|
||||
VCLEAR(vec_out);
|
||||
vec_out.clear();
|
||||
return pp_err_t(pp_err_type_t::UNKNOWN_NAME, token);
|
||||
}
|
||||
|
||||
const_t block = it->second;
|
||||
for (size_t i = block.start; i < block.end; ++i)
|
||||
vec_out.push_back(new token_t{*tokens[i]});
|
||||
}
|
||||
else
|
||||
vec_out.push_back(new token_t{*token});
|
||||
}
|
||||
}
|
||||
|
||||
return pp_err_t();
|
||||
}
|
||||
|
||||
pp_err_t preprocesser(const vector<token_t *> &tokens,
|
||||
vector<token_t *> &vec_out)
|
||||
{
|
||||
vector<token_t *> use_block_tokens;
|
||||
pp_err_t pperr = preprocess_use_blocks(tokens, use_block_tokens);
|
||||
if (pperr.type != pp_err_type_t::OK)
|
||||
{
|
||||
vec_out = tokens;
|
||||
return pperr;
|
||||
}
|
||||
|
||||
vector<token_t *> const_block_tokens;
|
||||
pperr = preprocess_const_blocks(use_block_tokens, const_block_tokens);
|
||||
if (pperr.type != pp_err_type_t::OK)
|
||||
{
|
||||
VCLEAR(tokens);
|
||||
vec_out = use_block_tokens;
|
||||
return pperr;
|
||||
}
|
||||
|
||||
VCLEAR(use_block_tokens);
|
||||
vec_out = const_block_tokens;
|
||||
|
||||
return pp_err_t{pp_err_type_t::OK};
|
||||
}
|
||||
|
||||
// TODO: Implement this
|
||||
pp_err_t preprocess_macro_blocks(const vector<token_t *> &,
|
||||
vector<token_t *> &);
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, pp_err_t &err)
|
||||
{
|
||||
os << "PREPROCESSING_";
|
||||
switch (err.type)
|
||||
{
|
||||
case OK:
|
||||
return os << "OK";
|
||||
case EXPECTED_NAME:
|
||||
return os << "EXPECTED_NAME";
|
||||
case EXPECTED_STRING:
|
||||
return os << "EXPECTED_STRING";
|
||||
case EXPECTED_END:
|
||||
return os << "EXPECTED_END";
|
||||
case FILE_NONEXISTENT:
|
||||
return os << "FILE_NONEXISTENT";
|
||||
case FILE_PARSE_ERROR:
|
||||
return os << "FILE_PARSE_ERROR -> \n\t[" << err.reference->content
|
||||
<< "]:" << err.lerr;
|
||||
case UNKNOWN_NAME:
|
||||
return os << "UNKNOWN_NAME";
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
pp_err_t::pp_err_t() : reference{nullptr}, type{pp_err_type_t::OK}, lerr{}
|
||||
{}
|
||||
|
||||
pp_err_t::pp_err_t(pp_err_type_t e) : reference{nullptr}, type{e}, lerr{}
|
||||
{}
|
||||
|
||||
pp_err_t::pp_err_t(pp_err_type_t err, const token_t *ref)
|
||||
: reference{ref}, type{err}
|
||||
{}
|
||||
|
||||
pp_err_t::pp_err_t(pp_err_type_t err, const token_t *ref, lerr_t lerr)
|
||||
: reference{ref}, type{err}, lerr{lerr}
|
||||
{}
|
||||
|
||||
// pp_unit_t::pp_unit_t(const token_t *const token) : resolved{false},
|
||||
// token{token}
|
||||
// {}
|
||||
|
||||
// pp_unit_t::pp_unit_t(std::string_view name, std::vector<pp_unit_t> elements)
|
||||
// : resolved{false}, token{nullptr}, container{name, elements}
|
||||
// {}
|
||||
62
src/preprocesser.hpp
Normal file
62
src/preprocesser.hpp
Normal file
@@ -0,0 +1,62 @@
|
||||
/* Copyright (C) 2024 Aryadev Chavali
|
||||
|
||||
* You may distribute and modify this code under the terms of the GPLv2
|
||||
* license. You should have received a copy of the GPLv2 license with
|
||||
* this file. If not, please write to: aryadev@aryadevchavali.com.
|
||||
|
||||
* Created: 2024-04-14
|
||||
* Author: Aryadev Chavali
|
||||
* Description: Preprocessor which occurs after lexing before parsing.
|
||||
*/
|
||||
|
||||
#ifndef PREPROCESSER_HPP
|
||||
#define PREPROCESSER_HPP
|
||||
|
||||
#include <ostream>
|
||||
#include <tuple>
|
||||
|
||||
#include "./lexer.hpp"
|
||||
|
||||
enum pp_err_type_t
|
||||
{
|
||||
OK = 0,
|
||||
EXPECTED_NAME,
|
||||
EXPECTED_STRING,
|
||||
EXPECTED_END,
|
||||
FILE_NONEXISTENT,
|
||||
FILE_PARSE_ERROR,
|
||||
UNKNOWN_NAME,
|
||||
};
|
||||
|
||||
struct pp_err_t
|
||||
{
|
||||
const token_t *reference;
|
||||
pp_err_type_t type;
|
||||
lerr_t lerr;
|
||||
|
||||
pp_err_t();
|
||||
pp_err_t(pp_err_type_t);
|
||||
pp_err_t(pp_err_type_t, const token_t *);
|
||||
pp_err_t(pp_err_type_t, const token_t *, lerr_t);
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, pp_err_t &);
|
||||
|
||||
struct pp_unit_t
|
||||
{
|
||||
const token_t *const token;
|
||||
struct
|
||||
{
|
||||
std::string_view name;
|
||||
std::vector<pp_unit_t> elements;
|
||||
} container;
|
||||
|
||||
pp_unit_t(const token_t *const);
|
||||
pp_unit_t(std::string_view, std::vector<pp_unit_t>);
|
||||
};
|
||||
|
||||
std::vector<pp_unit_t> tokens_to_units(const std::vector<token_t *> &);
|
||||
pp_err_t preprocess_use(std::vector<pp_unit_t> &);
|
||||
pp_err_t preprocesser(const std::vector<token_t *> &, std::vector<token_t *> &);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user