Fixed Makefile so it tracks dependencies better

It now tracks main.cpp's dependencies and rebuilds them as needed.
2024-04-16 20:42:51 +06:30
parent 190bb766cb
commit f060a856d3
8 changed files with 26 additions and 26 deletions
--- a/src/base.cpp
+++ b/src/base.cpp
@@ -0,0 +1,33 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license.  You should have received a copy of the GPLv2
+ * license with this file.  If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description:
+ */
+
+#include "./base.hpp"
+
+#include <cstdio>
+
+std::optional<std::string> read_file(const char *filename)
+{
+  FILE *fp = fopen(filename, "rb");
+  if (fp)
+  {
+    std::string contents;
+    fseek(fp, 0, SEEK_END);
+    contents.resize(ftell(fp));
+    rewind(fp);
+    fread(&contents[0], 1, contents.size(), fp);
+    fclose(fp);
+
+    return contents;
+  }
+  else
+    return std::nullopt;
+}
--- a/src/base.hpp
+++ b/src/base.hpp
@@ -0,0 +1,21 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license.  You should have received a copy of the GPLv2
+ * license with this file.  If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Base library
+ */
+
+#ifndef BASE_HPP
+#define BASE_HPP
+
+#include <optional>
+#include <string>
+
+std::optional<std::string> read_file(const char *);
+
+#endif
--- a/src/lexer.cpp
+++ b/src/lexer.cpp
@@ -0,0 +1,565 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license.  You should have received a copy of the GPLv2
+ * license with this file.  If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Lexer for assembly language
+ */
+
+extern "C"
+{
+#include <lib/inst.h>
+}
+
+#include <algorithm>
+#include <tuple>
+
+#include "./lexer.hpp"
+
+static_assert(NUMBER_OF_OPCODES == 98, "ERROR: Lexer is out of date");
+
+using std::string, std::string_view, std::pair, std::make_pair;
+
+const auto VALID_SYMBOL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV"
+                          "WXYZ0123456789-_.:%#$",
+           VALID_DIGIT = "0123456789", VALID_HEX = "0123456789abcdefABCDEF";
+
+bool is_char_in_s(char c, const char *s)
+{
+  return string_view(s).find(c) != string::npos;
+}
+
+bool initial_match(string_view src, string_view match)
+{
+  return (src.size() > match.size() && src.substr(0, match.size()) == match);
+}
+
+pair<token_t, lerr_t> tokenise_symbol(string_view &source, size_t &column,
+                                      size_t line)
+{
+  auto end = source.find_first_not_of(VALID_SYMBOL);
+  if (end == string::npos)
+    end = source.size() - 1;
+  string sym{source.substr(0, end)};
+  source.remove_prefix(end);
+  std::transform(sym.begin(), sym.end(), sym.begin(), ::toupper);
+
+  token_t t{};
+
+  if (sym == "%CONST")
+  {
+    t.type = token_type_t::PP_CONST;
+  }
+  else if (sym == "%USE")
+  {
+    t.type = token_type_t::PP_USE;
+  }
+  else if (sym == "%END")
+  {
+    t.type = token_type_t::PP_END;
+  }
+  else if (sym[0] == '%')
+  {
+    return make_pair(
+        t, lerr_t(lerr_type_t::INVALID_PREPROCESSOR_DIRECTIVE, column, line));
+  }
+  else if (sym.size() > 1 && sym[0] == '$')
+  {
+    t = token_t(token_type_t::PP_REFERENCE, sym.substr(1));
+  }
+  else if (sym == "NOOP")
+  {
+    t.type = token_type_t::NOOP;
+  }
+  else if (sym == "HALT")
+  {
+    t.type = token_type_t::HALT;
+  }
+  else if (initial_match(sym, "PUSH.REG."))
+  {
+    t = token_t(token_type_t::PUSH_REG, sym.substr(9));
+  }
+  else if (initial_match(sym, "PUSH."))
+  {
+    t = token_t(token_type_t::PUSH, sym.substr(5));
+  }
+  else if (initial_match(sym, "POP."))
+  {
+    t = token_t(token_type_t::POP, sym.substr(4));
+  }
+  else if (initial_match(sym, "MOV."))
+  {
+    t = token_t(token_type_t::MOV, sym.substr(4));
+  }
+  else if (initial_match(sym, "DUP."))
+  {
+    t = token_t(token_type_t::DUP, sym.substr(4));
+  }
+  else if (initial_match(sym, "MALLOC.STACK."))
+  {
+    t = token_t(token_type_t::MALLOC_STACK, sym.substr(13));
+  }
+  else if (initial_match(sym, "MALLOC."))
+  {
+    t = token_t(token_type_t::MALLOC, sym.substr(7));
+  }
+  else if (initial_match(sym, "MSET.STACK."))
+  {
+    t = token_t(token_type_t::MSET_STACK, sym.substr(11));
+  }
+  else if (initial_match(sym, "MSET."))
+  {
+    t = token_t(token_type_t::MSET, sym.substr(5));
+  }
+  else if (initial_match(sym, "MGET.STACK."))
+  {
+    t = token_t(token_type_t::MGET_STACK, sym.substr(11));
+  }
+  else if (initial_match(sym, "MGET."))
+  {
+    t = token_t(token_type_t::MGET, sym.substr(5));
+  }
+  else if (sym == "MDELETE")
+  {
+    t.type = token_type_t::MDELETE;
+  }
+  else if (sym == "MSIZE")
+  {
+    t.type = token_type_t::MSIZE;
+  }
+  else if (initial_match(sym, "NOT."))
+  {
+    t = token_t(token_type_t::NOT, sym.substr(4));
+  }
+  else if (initial_match(sym, "OR."))
+  {
+    t = token_t(token_type_t::OR, sym.substr(3));
+  }
+  else if (initial_match(sym, "AND."))
+  {
+    t = token_t(token_type_t::AND, sym.substr(4));
+  }
+  else if (initial_match(sym, "XOR."))
+  {
+    t = token_t(token_type_t::XOR, sym.substr(4));
+  }
+  else if (initial_match(sym, "EQ."))
+  {
+    t = token_t(token_type_t::EQ, sym.substr(3));
+  }
+  else if (initial_match(sym, "LTE."))
+  {
+    t = token_t(token_type_t::LTE, sym.substr(4));
+  }
+  else if (initial_match(sym, "LT."))
+  {
+    t = token_t(token_type_t::LT, sym.substr(3));
+  }
+  else if (initial_match(sym, "GTE."))
+  {
+    t = token_t(token_type_t::GTE, sym.substr(4));
+  }
+  else if (initial_match(sym, "GT."))
+  {
+    t = token_t(token_type_t::GT, sym.substr(3));
+  }
+  else if (initial_match(sym, "SUB."))
+  {
+    t = token_t(token_type_t::SUB, sym.substr(4));
+  }
+  else if (initial_match(sym, "PLUS."))
+  {
+    t = token_t(token_type_t::PLUS, sym.substr(5));
+  }
+  else if (initial_match(sym, "MULT."))
+  {
+    t = token_t(token_type_t::MULT, sym.substr(5));
+  }
+  else if (initial_match(sym, "PRINT."))
+  {
+    t = token_t(token_type_t::PRINT, sym.substr(6));
+  }
+  else if (sym == "JUMP.ABS")
+  {
+    t.type = token_type_t::JUMP_ABS;
+  }
+  else if (sym == "JUMP.STACK")
+  {
+    t.type = token_type_t::JUMP_STACK;
+  }
+  else if (initial_match(sym, "JUMP.IF."))
+  {
+    t = token_t(token_type_t::JUMP_IF, sym.substr(8));
+  }
+  else if (sym == "CALL.STACK")
+  {
+    t.type = token_type_t::CALL_STACK;
+  }
+  else if (sym == "CALL")
+  {
+    t.type = token_type_t::CALL;
+  }
+  else if (sym == "RET")
+  {
+    t.type = token_type_t::RET;
+  }
+  else if (sym == "GLOBAL")
+  {
+    t.type = token_type_t::GLOBAL;
+  }
+  else
+  {
+    t.type = token_type_t::SYMBOL;
+  }
+
+  if (t.content == "")
+    t.content = sym;
+  t.column = column;
+  column += sym.size() - 1;
+  return make_pair(t, lerr_t());
+}
+
+token_t tokenise_literal_number(string_view &source, size_t &column)
+{
+  bool is_negative = false;
+  if (source[0] == '-')
+  {
+    is_negative = true;
+    source.remove_prefix(1);
+  }
+
+  auto end = source.find_first_not_of(VALID_DIGIT);
+  if (end == string::npos)
+    end = source.size() - 1;
+  string digits{source.substr(0, end)};
+  source.remove_prefix(end);
+
+  token_t t{token_type_t::LITERAL_NUMBER, (is_negative ? "-" : "") + digits,
+            column};
+
+  column += digits.size() + (is_negative ? 1 : 0);
+
+  return t;
+}
+
+token_t tokenise_literal_hex(string_view &source, size_t &column)
+{
+  // Remove x char from source
+  source.remove_prefix(1);
+  auto end = source.find_first_not_of(VALID_HEX);
+  if (end == string::npos)
+    end = source.size() - 1;
+  string digits{source.substr(0, end)};
+  source.remove_prefix(end);
+
+  token_t t = {token_type_t::LITERAL_NUMBER, "0x" + digits, column};
+
+  column += digits.size() + 1;
+  return t;
+}
+
+pair<token_t, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
+                                            size_t &line)
+{
+  token_t t{};
+  auto end = source.find('\'', 1);
+  if (source.size() < 3 || end == 1 || end > 3)
+    return make_pair(t,
+                     lerr_t(lerr_type_t::INVALID_CHAR_LITERAL, column, line));
+  else if (source[1] == '\\')
+  {
+    // Escape sequence
+    char escape = '\0';
+    if (source.size() < 4 || source[3] != '\'')
+      return make_pair(t,
+                       lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
+                              column, line));
+    switch (source[2])
+    {
+    case 'n':
+      escape = '\n';
+      break;
+    case 't':
+      escape = '\t';
+      break;
+    case 'r':
+      escape = '\r';
+      break;
+    case '\\':
+      escape = '\\';
+      break;
+    default:
+      column += 2;
+      return make_pair(t,
+                       lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
+                              column, line));
+      break;
+    }
+    t = token_t{token_type_t::LITERAL_CHAR, std::to_string(escape), column};
+    column += 4;
+    source.remove_prefix(4);
+  }
+  else
+  {
+    t = token_t(token_type_t::LITERAL_CHAR, std::to_string(source[1]));
+    column += 3;
+    source.remove_prefix(3);
+  }
+  return make_pair(t, lerr_t());
+}
+
+token_t tokenise_literal_string(string_view &source, size_t &column, size_t end)
+{
+  source.remove_prefix(1);
+  token_t token{token_type_t::LITERAL_STRING, string(source.substr(0, end - 1)),
+                column};
+  source.remove_prefix(end);
+  column += end + 1;
+  return token;
+}
+
+lerr_t tokenise_buffer(string_view source, std::vector<token_t *> &tokens)
+{
+  size_t column = 0, line = 1;
+  while (source.size() > 0)
+  {
+    bool is_token = true;
+    char first    = source[0];
+    token_t t{};
+    if (isspace(first) || first == '\0')
+    {
+      size_t i;
+      for (i = 0;
+           i < source.size() && (isspace(source[i]) || source[i] == '\0'); ++i)
+      {
+        ++column;
+        if (source[i] == '\n')
+        {
+          column = 0;
+          ++line;
+        }
+      }
+      ++column;
+      source.remove_prefix(i);
+      is_token = false;
+    }
+    else if (first == ';')
+    {
+      size_t i;
+      for (i = 0; i < source.size() && source[i] != '\n'; ++i)
+        continue;
+      column = 0;
+      ++line;
+      source.remove_prefix(i + 1);
+      is_token = false;
+    }
+    else if (first == '*')
+    {
+      t = token_t(token_type_t::STAR, "", column);
+      source.remove_prefix(1);
+    }
+    else if (first == '\"')
+    {
+      auto end = source.find('\"', 1);
+      if (end == string::npos)
+        return lerr_t(lerr_type_t::INVALID_STRING_LITERAL, column, line);
+      t = tokenise_literal_string(source, column, end);
+    }
+    else if (first == '\'')
+    {
+      lerr_t lerr;
+      std::tie(t, lerr) = tokenise_literal_char(source, column, line);
+      if (lerr.type != lerr_type_t::OK)
+        return lerr;
+    }
+    else if (isdigit(first) ||
+             (source.size() > 1 && first == '-' && isdigit(source[1])))
+    {
+      auto end = source.find_first_not_of(VALID_DIGIT, first == '-' ? 1 : 0);
+      if (end == string::npos)
+        end = source.size() - 1;
+      else if (end != string::npos && !(isspace(source[end])))
+        return lerr_t(lerr_type_t::INVALID_NUMBER_LITERAL, column, line);
+      t = tokenise_literal_number(source, column);
+    }
+    else if (first == '0' && source.size() > 2 && source[1] == 'x' &&
+             is_char_in_s(source[2], VALID_HEX))
+    {
+      auto end = source.find_first_not_of(VALID_HEX);
+      if (end == string::npos)
+        end = source.size() - 1;
+      else if (end != string::npos && !(isspace(source[end])))
+        return lerr_t(lerr_type_t::INVALID_NUMBER_LITERAL, column, line);
+      t = tokenise_literal_hex(source, column);
+    }
+    else if (is_char_in_s(first, VALID_SYMBOL))
+    {
+      lerr_t lerr;
+      std::tie(t, lerr) = tokenise_symbol(source, column, line);
+      if (lerr.type != lerr_type_t::OK)
+        return lerr;
+    }
+    else
+    {
+      ++column;
+      return lerr_t{lerr_type_t::UNKNOWN_LEXEME, column, line};
+    }
+
+    if (is_token)
+    {
+      t.line       = line;
+      token_t *acc = new token_t(t);
+      tokens.push_back(acc);
+    }
+  }
+  return lerr_t{};
+}
+
+std::ostream &operator<<(std::ostream &os, token_t &t)
+{
+  return os << token_type_as_cstr(t.type) << "(`" << t.content << "`)@"
+            << t.line << ", " << t.column;
+}
+
+token_t::token_t()
+{}
+
+token_t::token_t(token_type_t type, string content, size_t col, size_t line)
+    : type{type}, column{col}, line{line}, content{content}
+{}
+
+const char *token_type_as_cstr(token_type_t type)
+{
+  switch (type)
+  {
+  case token_type_t::PP_USE:
+    return "PP_USE";
+  case token_type_t::PP_CONST:
+    return "PP_CONST";
+  case token_type_t::PP_END:
+    return "PP_END";
+  case token_type_t::PP_REFERENCE:
+    return "PP_REFERENCE";
+  case token_type_t::GLOBAL:
+    return "GLOBAL";
+  case token_type_t::STAR:
+    return "STAR";
+  case token_type_t::LITERAL_STRING:
+    return "LITERAL_STRING";
+  case token_type_t::LITERAL_NUMBER:
+    return "LITERAL_NUMBER";
+  case token_type_t::LITERAL_CHAR:
+    return "LITERAL_CHAR";
+  case token_type_t::NOOP:
+    return "NOOP";
+  case token_type_t::HALT:
+    return "HALT";
+  case token_type_t::PUSH:
+    return "PUSH";
+  case token_type_t::POP:
+    return "POP";
+  case token_type_t::PUSH_REG:
+    return "PUSH_REG";
+  case token_type_t::MOV:
+    return "MOV";
+  case token_type_t::DUP:
+    return "DUP";
+  case token_type_t::MALLOC:
+    return "MALLOC";
+  case token_type_t::MALLOC_STACK:
+    return "MALLOC_STACK";
+  case token_type_t::MSET:
+    return "MSET";
+  case token_type_t::MSET_STACK:
+    return "MSET_STACK";
+  case token_type_t::MGET:
+    return "MGET";
+  case token_type_t::MGET_STACK:
+    return "MGET_STACK";
+  case token_type_t::MDELETE:
+    return "MDELETE";
+  case token_type_t::MSIZE:
+    return "MSIZE";
+  case token_type_t::NOT:
+    return "NOT";
+  case token_type_t::OR:
+    return "OR";
+  case token_type_t::AND:
+    return "AND";
+  case token_type_t::XOR:
+    return "XOR";
+  case token_type_t::EQ:
+    return "EQ";
+  case token_type_t::LT:
+    return "LT";
+  case token_type_t::LTE:
+    return "LTE";
+  case token_type_t::GT:
+    return "GT";
+  case token_type_t::GTE:
+    return "GTE";
+  case token_type_t::PLUS:
+    return "PLUS";
+  case token_type_t::SUB:
+    return "SUB";
+  case token_type_t::MULT:
+    return "MULT";
+  case token_type_t::PRINT:
+    return "PRINT";
+  case token_type_t::JUMP_ABS:
+    return "JUMP_ABS";
+  case token_type_t::JUMP_STACK:
+    return "JUMP_STACK";
+  case token_type_t::JUMP_IF:
+    return "JUMP_IF";
+  case token_type_t::CALL:
+    return "CALL";
+  case token_type_t::CALL_STACK:
+    return "CALL_STACK";
+  case token_type_t::RET:
+    return "RET";
+  case token_type_t::SYMBOL:
+    return "SYMBOL";
+  }
+  return "";
+}
+
+std::ostream &operator<<(std::ostream &os, lerr_t &lerr)
+{
+  os << lerr.line << ":" << lerr.col << ": ";
+  switch (lerr.type)
+  {
+  case lerr_type_t::OK:
+    os << "OK";
+    break;
+  case lerr_type_t::INVALID_CHAR_LITERAL:
+    os << "INVALID_CHAR_LITERAL";
+    break;
+  case lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE:
+    os << "INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE";
+    break;
+  case lerr_type_t::INVALID_STRING_LITERAL:
+    os << "INVALID_STRING_LITERAL";
+    break;
+  case lerr_type_t::INVALID_NUMBER_LITERAL:
+    os << "INVALID_NUMBER_LITERAL";
+    break;
+  case lerr_type_t::INVALID_PREPROCESSOR_DIRECTIVE:
+    os << "INVALID_PREPROCESSOR_DIRECTIVE";
+    break;
+  case lerr_type_t::UNKNOWN_LEXEME:
+    os << "UNKNOWN_LEXEME";
+    break;
+  default:
+    break;
+  }
+  return os;
+}
+
+lerr_t::lerr_t(lerr_type_t type, size_t col, size_t line)
+    : col{col}, line{line}, type{type}
+{}
--- a/src/lexer.hpp
+++ b/src/lexer.hpp
@@ -0,0 +1,106 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license.  You should have received a copy of the GPLv2
+ * license with this file.  If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Lexer for assembly language
+ */
+
+#ifndef LEXER_HPP
+#define LEXER_HPP
+
+#include <ostream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+enum class token_type_t
+{
+  PP_CONST,     // %const(<symbol>)...
+  PP_USE,       // %use <string>
+  PP_END,       // %end
+  PP_REFERENCE, // $<symbol>
+  GLOBAL,
+  STAR,
+  LITERAL_NUMBER,
+  LITERAL_CHAR,
+  LITERAL_STRING,
+  NOOP,
+  HALT,
+  PUSH,
+  POP,
+  PUSH_REG,
+  MOV,
+  DUP,
+  MALLOC,
+  MALLOC_STACK,
+  MSET,
+  MSET_STACK,
+  MGET,
+  MGET_STACK,
+  MDELETE,
+  MSIZE,
+  NOT,
+  OR,
+  AND,
+  XOR,
+  EQ,
+  LT,
+  LTE,
+  GT,
+  GTE,
+  PLUS,
+  SUB,
+  MULT,
+  PRINT,
+  JUMP_ABS,
+  JUMP_STACK,
+  JUMP_IF,
+  CALL,
+  CALL_STACK,
+  RET,
+  SYMBOL,
+};
+
+const char *token_type_as_cstr(token_type_t type);
+
+struct token_t
+{
+  token_type_t type;
+  size_t column, line;
+  std::string content;
+
+  token_t();
+  token_t(token_type_t, std::string, size_t col = 0, size_t line = 0);
+};
+
+std::ostream &operator<<(std::ostream &, token_t &);
+
+enum class lerr_type_t
+{
+  OK = 0,
+  INVALID_CHAR_LITERAL,
+  INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
+  INVALID_STRING_LITERAL,
+  INVALID_NUMBER_LITERAL,
+  INVALID_PREPROCESSOR_DIRECTIVE,
+  UNKNOWN_LEXEME,
+};
+
+struct lerr_t
+{
+  size_t col, line;
+  lerr_type_t type;
+
+  lerr_t(lerr_type_t type = lerr_type_t::OK, size_t col = 0, size_t line = 0);
+};
+
+std::ostream &operator<<(std::ostream &, lerr_t &);
+
+lerr_t tokenise_buffer(std::string_view, std::vector<token_t *> &);
+
+#endif
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -0,0 +1,148 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license.  You should have received a copy of the GPLv2
+ * license with this file.  If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Entrypoint for assembly program
+ */
+
+#include <algorithm>
+#include <cstdio>
+#include <iostream>
+#include <optional>
+#include <string>
+#include <tuple>
+#include <vector>
+
+extern "C"
+{
+#include <lib/inst.h>
+}
+
+#include "./base.hpp"
+#include "./lexer.hpp"
+#include "./preprocesser.hpp"
+
+using std::cout, std::cerr, std::endl;
+using std::pair, std::string, std::string_view, std::vector;
+
+void usage(const char *program_name, FILE *fp)
+{
+  fprintf(fp,
+          "Usage: %s FILE OUT-FILE\n"
+          "\tFILE: Source code to compile\n"
+          "\tOUT-FILE: Name of file to store bytecode\n",
+          program_name);
+}
+
+int main(int argc, const char *argv[])
+{
+  if (argc == 1 || argc > 3)
+  {
+    usage(argv[0], stderr);
+    return -1;
+  }
+  int ret                 = 0;
+  const char *source_name = argv[1];
+  const char *out_name    = argv[2];
+  (void)out_name;
+
+#if VERBOSE >= 1
+  printf("[%sASSEMBLER%s]: Assembling `%s` to `%s`\n", TERM_YELLOW, TERM_RESET,
+         source_name, out_name);
+#endif
+
+  auto file_source = read_file(source_name);
+
+#if VERBOSE >= 1
+  printf("[%sASSEMBLER%s]: `%s` -> %lu bytes\n", TERM_YELLOW, TERM_RESET,
+         source_name, file_source.has_value() ? file_source.value().size() : 0);
+#endif
+
+  string source_str;
+  string_view original;
+  string_view src;
+  vector<token_t *> tokens, preprocessed_tokens;
+  lerr_t lerr;
+  pp_err_t pp_err;
+
+  // Highest scoped variable cut off point
+
+  if (file_source.has_value())
+    source_str = file_source.value();
+  else
+  {
+    cerr << "ERROR: file `" << source_name << "` does not exist!" << endl;
+    ret = -1;
+    goto end;
+  }
+  original = string_view{source_str};
+  src      = string_view{source_str};
+  lerr     = tokenise_buffer(src, tokens);
+
+  if (lerr.type != lerr_type_t::OK)
+  {
+    cerr << source_name << ":" << lerr << endl;
+    ret = 255 - static_cast<int>(lerr.type);
+    goto end;
+  }
+  else
+  {
+
+#if VERBOSE >= 1
+    printf("[%sLEXER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET,
+           source_str.size(), tokens.size());
+#endif
+
+#if VERBOSE == 2
+    printf("[%sLEXER%s]: Tokens "
+           "parsed:\n----------------------------------------------------------"
+           "----------------------\n",
+           TERM_GREEN, TERM_RESET);
+    for (auto token : tokens)
+      cout << "\t" << *token << endl;
+    printf("-------------------------------------------------------------"
+           "-------------------\n");
+#endif
+  }
+
+  // preprocessing
+  pp_err = preprocesser(tokens, preprocessed_tokens);
+  if (pp_err.type != pp_err_type_t::OK)
+  {
+    cerr << source_name << ":" << pp_err.reference->line << ":"
+         << pp_err.reference->column << ": " << pp_err << endl;
+    ret = 255 - static_cast<int>(pp_err.type);
+    goto end;
+  }
+  else
+  {
+
+#if VERBOSE >= 1
+    printf("[%sPREPROCESSOR%s]: %lu tokens -> %lu tokens\n", TERM_GREEN,
+           TERM_RESET, tokens.size(), preprocessed_tokens.size());
+#endif
+#if VERBOSE == 2
+    printf("[%sPREPROCESSOR%s]: Processed tokens: "
+           "\n-----------------------------------------------------------------"
+           "---------------\n",
+           TERM_GREEN, TERM_RESET);
+    for (auto token : preprocessed_tokens)
+      cout << "\t" << *token << endl;
+    printf("-------------------------------------------------------------"
+           "-------------------\n");
+#endif
+  }
+
+end:
+  for (auto token : tokens)
+    delete token;
+  for (auto token : preprocessed_tokens)
+    delete token;
+
+  return ret;
+}
--- a/src/preprocesser.cpp
+++ b/src/preprocesser.cpp
@@ -0,0 +1,218 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license.  You should have received a copy of the GPLv2
+ * license with this file.  If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Preprocessor which occurs after lexing before parsing.
+ */
+
+#include "./preprocesser.hpp"
+#include "./base.hpp"
+
+#include <algorithm>
+#include <unordered_map>
+
+using std::pair, std::vector, std::make_pair, std::string, std::string_view;
+
+#define VCLEAR(V)                       \
+  std::for_each((V).begin(), (V).end(), \
+                [](token_t *t)          \
+                {                       \
+                  delete t;             \
+                });
+
+pp_err_t preprocess_use_blocks(const vector<token_t *> &tokens,
+                               vector<token_t *> &vec_out)
+{
+  for (size_t i = 0; i < tokens.size(); ++i)
+  {
+    token_t *t = tokens[i];
+    if (t->type == token_type_t::PP_USE)
+    {
+      if (i + 1 >= tokens.size() ||
+          tokens[i + 1]->type != token_type_t::LITERAL_STRING)
+      {
+        VCLEAR(vec_out);
+        vec_out.clear();
+        return pp_err_t(pp_err_type_t::EXPECTED_STRING, t);
+      }
+
+      token_t *name = tokens[i + 1];
+      auto source   = read_file(name->content.c_str());
+      if (!source)
+      {
+        VCLEAR(vec_out);
+        vec_out.clear();
+        return pp_err_t(pp_err_type_t::FILE_NONEXISTENT, name);
+      }
+
+      std::vector<token_t *> ftokens;
+      lerr_t lerr = tokenise_buffer(source.value(), ftokens);
+      if (lerr.type != lerr_type_t::OK)
+      {
+        VCLEAR(vec_out);
+        vec_out.clear();
+        return pp_err_t(pp_err_type_t::FILE_PARSE_ERROR, name, lerr);
+      }
+
+      vec_out.insert(vec_out.end(), ftokens.begin(), ftokens.end());
+
+      ++i;
+    }
+    else
+      vec_out.push_back(new token_t{*t});
+  }
+  return pp_err_t();
+}
+
+struct const_t
+{
+  size_t start, end;
+};
+
+pp_err_t preprocess_const_blocks(const vector<token_t *> &tokens,
+                                 vector<token_t *> &vec_out)
+{
+  std::unordered_map<string_view, const_t> blocks;
+  for (size_t i = 0; i < tokens.size(); ++i)
+  {
+    token_t *t = tokens[i];
+    if (t->type == token_type_t::PP_CONST)
+    {
+      string_view capture;
+      if (i + 1 >= tokens.size() || tokens[i + 1]->type != token_type_t::SYMBOL)
+        return pp_err_type_t::EXPECTED_NAME;
+
+      capture = tokens[++i]->content;
+
+      ++i;
+      size_t block_start = i, block_end = 0;
+      for (; i < tokens.size() && tokens[i]->type != token_type_t::PP_END; ++i)
+        continue;
+
+      if (i == tokens.size())
+        return pp_err_t{pp_err_type_t::EXPECTED_END};
+
+      block_end = i;
+
+      blocks[capture] = const_t{block_start, block_end};
+    }
+  }
+
+  if (blocks.size() == 0)
+  {
+    // Just construct a new vector and carry on
+    for (token_t *token : tokens)
+      vec_out.push_back(new token_t{*token});
+  }
+  else
+  {
+    for (size_t i = 0; i < tokens.size(); ++i)
+    {
+      token_t *token = tokens[i];
+      // Skip the tokens that construct the const
+      if (token->type == token_type_t::PP_CONST)
+        for (; i < tokens.size() && tokens[i]->type != token_type_t::PP_END;
+             ++i)
+          continue;
+      else if (token->type == token_type_t::PP_REFERENCE)
+      {
+        auto it = blocks.find(token->content);
+        if (it == blocks.end())
+        {
+          VCLEAR(vec_out);
+          vec_out.clear();
+          return pp_err_t(pp_err_type_t::UNKNOWN_NAME, token);
+        }
+
+        const_t block = it->second;
+        for (size_t i = block.start; i < block.end; ++i)
+          vec_out.push_back(new token_t{*tokens[i]});
+      }
+      else
+        vec_out.push_back(new token_t{*token});
+    }
+  }
+
+  return pp_err_t();
+}
+
+pp_err_t preprocesser(const vector<token_t *> &tokens,
+                      vector<token_t *> &vec_out)
+{
+  vector<token_t *> use_block_tokens;
+  pp_err_t pperr = preprocess_use_blocks(tokens, use_block_tokens);
+  if (pperr.type != pp_err_type_t::OK)
+  {
+    vec_out = tokens;
+    return pperr;
+  }
+
+  vector<token_t *> const_block_tokens;
+  pperr = preprocess_const_blocks(use_block_tokens, const_block_tokens);
+  if (pperr.type != pp_err_type_t::OK)
+  {
+    VCLEAR(tokens);
+    vec_out = use_block_tokens;
+    return pperr;
+  }
+
+  VCLEAR(use_block_tokens);
+  vec_out = const_block_tokens;
+
+  return pp_err_t{pp_err_type_t::OK};
+}
+
+// TODO: Implement this
+pp_err_t preprocess_macro_blocks(const vector<token_t *> &,
+                                 vector<token_t *> &);
+
+std::ostream &operator<<(std::ostream &os, pp_err_t &err)
+{
+  os << "PREPROCESSING_";
+  switch (err.type)
+  {
+  case OK:
+    return os << "OK";
+  case EXPECTED_NAME:
+    return os << "EXPECTED_NAME";
+  case EXPECTED_STRING:
+    return os << "EXPECTED_STRING";
+  case EXPECTED_END:
+    return os << "EXPECTED_END";
+  case FILE_NONEXISTENT:
+    return os << "FILE_NONEXISTENT";
+  case FILE_PARSE_ERROR:
+    return os << "FILE_PARSE_ERROR -> \n\t[" << err.reference->content
+              << "]:" << err.lerr;
+  case UNKNOWN_NAME:
+    return os << "UNKNOWN_NAME";
+  }
+  return os;
+}
+
+pp_err_t::pp_err_t() : reference{nullptr}, type{pp_err_type_t::OK}, lerr{}
+{}
+
+pp_err_t::pp_err_t(pp_err_type_t e) : reference{nullptr}, type{e}, lerr{}
+{}
+
+pp_err_t::pp_err_t(pp_err_type_t err, const token_t *ref)
+    : reference{ref}, type{err}
+{}
+
+pp_err_t::pp_err_t(pp_err_type_t err, const token_t *ref, lerr_t lerr)
+    : reference{ref}, type{err}, lerr{lerr}
+{}
+
+// pp_unit_t::pp_unit_t(const token_t *const token) : resolved{false},
+// token{token}
+// {}
+
+// pp_unit_t::pp_unit_t(std::string_view name, std::vector<pp_unit_t> elements)
+//     : resolved{false}, token{nullptr}, container{name, elements}
+// {}
--- a/src/preprocesser.hpp
+++ b/src/preprocesser.hpp
@@ -0,0 +1,62 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the GPLv2
+ * license.  You should have received a copy of the GPLv2 license with
+ * this file.  If not, please write to: aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Preprocessor which occurs after lexing before parsing.
+ */
+
+#ifndef PREPROCESSER_HPP
+#define PREPROCESSER_HPP
+
+#include <ostream>
+#include <tuple>
+
+#include "./lexer.hpp"
+
+enum pp_err_type_t
+{
+  OK = 0,
+  EXPECTED_NAME,
+  EXPECTED_STRING,
+  EXPECTED_END,
+  FILE_NONEXISTENT,
+  FILE_PARSE_ERROR,
+  UNKNOWN_NAME,
+};
+
+struct pp_err_t
+{
+  const token_t *reference;
+  pp_err_type_t type;
+  lerr_t lerr;
+
+  pp_err_t();
+  pp_err_t(pp_err_type_t);
+  pp_err_t(pp_err_type_t, const token_t *);
+  pp_err_t(pp_err_type_t, const token_t *, lerr_t);
+};
+
+std::ostream &operator<<(std::ostream &, pp_err_t &);
+
+struct pp_unit_t
+{
+  const token_t *const token;
+  struct
+  {
+    std::string_view name;
+    std::vector<pp_unit_t> elements;
+  } container;
+
+  pp_unit_t(const token_t *const);
+  pp_unit_t(std::string_view, std::vector<pp_unit_t>);
+};
+
+std::vector<pp_unit_t> tokens_to_units(const std::vector<token_t *> &);
+pp_err_t preprocess_use(std::vector<pp_unit_t> &);
+pp_err_t preprocesser(const std::vector<token_t *> &, std::vector<token_t *> &);
+
+#endif