diff --git a/Makefile b/Makefile index 7578e4b..e95ea69 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ TERM_RESET:=$(shell echo -e "\e[0;0m") # Setup variables for source code, output, etc ## ASSEMBLY setup SRC=src -CODE:=$(addprefix $(SRC)/, base.cpp lexer.cpp) +CODE:=$(addprefix $(SRC)/, base.cpp lexer.cpp preprocesser.cpp) OBJECTS:=$(CODE:$(SRC)/%.cpp=$(DIST)/%.o) OUT=$(DIST)/asm.out diff --git a/src/preprocesser.cpp b/src/preprocesser.cpp new file mode 100644 index 0000000..1de1ba4 --- /dev/null +++ b/src/preprocesser.cpp @@ -0,0 +1,273 @@ +/* Copyright (C) 2024 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to . + + * Created: 2024-07-05 + * Author: Aryadev Chavali + * Description: + */ + +#include +#include +#include + +#include +#include + +namespace Preprocesser +{ + using TT = Lexer::Token::Type; + using ET = Err::Type; + using LET = Lexer::Err::Type; + + Err *preprocess(std::vector tokens, std::vector &units, + std::vector &new_token_bag, Map &const_map, + Map &file_map, int depth) + { + // Stop preprocessing if we've smashed the preprocessing call stack + if (depth >= PREPROCESSER_MAX_DEPTH) + return new Err{ET::EXCEEDED_PREPROCESSER_DEPTH, tokens[0]}; + + for (size_t i = 0; i < tokens.size(); ++i) + { + const auto token = tokens[i]; + if (token->type == TT::PP_CONST) + { + if (i == tokens.size() - 1 || tokens[i + 1]->type != TT::SYMBOL) + return new Err{ET::EXPECTED_SYMBOL_FOR_NAME, token}; + const auto const_name = tokens[i + 1]->content; + + size_t end = 0; + for (end = i + 2; + end < tokens.size() && tokens[end]->type != TT::PP_END; ++end) + { + // TODO: Is there a better way to deal with preprocesser calls inside + // of a constant? + if (tokens[end]->type == TT::PP_CONST || + tokens[end]->type == TT::PP_USE) + return new Err{ET::DIRECTIVES_IN_CONST_BODY, tokens[end]}; + } + + if (end == tokens.size()) + return new Err{ET::EXPECTED_END, token}; + else if (end - i == 2) + return new Err{ET::EMPTY_CONST, token}; + + // If this content is actually being included (depth > 0) by another + // file, check if the constant is already defined. If so, stop what + // we're doing and continue because user has technically redefined the + // constant. Implements a #ifndef guard automatically on included + // constants. + if (depth > 0 && const_map.find(const_name) != const_map.end()) + { + i = end; +#if VERBOSE >= 2 + std::cout << "[" TERM_YELLOW "PREPROCESSER" TERM_RESET "]: <" << depth + << "> [" << i << "]:\n\tPreserving definition of `" + << const_name << "` from outer scope\n"; +#endif + continue; + } + + std::vector body{end - i - 2}; + std::copy(std::begin(tokens) + i + 2, std::begin(tokens) + end, + std::begin(body)); + + const_map[const_name] = {token, body}; + i = end; + +#if VERBOSE >= 2 + std::cout << "[" TERM_YELLOW "PREPROCESSER" TERM_RESET "]: <" << depth + << "> [" << i << "]:\n\tConstant `" << const_name << "` {\n"; + + for (size_t j = 0; j < body.size(); ++j) + { + std::cout << "\t\t[" << j << "]: "; + if (body[j]) + std::cout << *body[j]; + else + std::cout << "[NULL]"; + std::cout << "\n"; + } + std::cout << "\t}\n"; +#endif + } + else if (token->type == TT::PP_USE) + { + // Ensure string in next token + if (i == tokens.size() - 1 || tokens[i + 1]->type != TT::LITERAL_STRING) + return new Err{ET::EXPECTED_FILE_NAME_AS_STRING, token}; + // Stops recursive calls on the file currently being preprocessed + if (file_map.find(token->source_name) == file_map.end()) + file_map[token->source_name] = {}; + + const auto name = tokens[i + 1]->content; +#if VERBOSE >= 2 + std::cout << "[" TERM_YELLOW "PREPROCESSER" TERM_RESET "]: <" << depth + << "> [" << i << "]: (" << *tokens[i] << "): FILENAME=`" + << name << "`\n"; +#endif + // If file has never been encountered, let's tokenise, preprocess then + // cache the result + if (file_map.find(name) == file_map.end()) + { + auto content = read_file(tokens[i + 1]->content.c_str()); + + if (!content.has_value()) + return new Err{ET::FILE_NON_EXISTENT, token}; + + std::vector body; + Lexer::Err lexer_err = Lexer::tokenise_buffer(tokens[i + 1]->content, + content.value(), body); + + if (lexer_err.type != LET::OK) + return new Err{ET::IN_FILE_LEXING, token, nullptr, lexer_err}; + + // Here we add the tokens, freshly allocated, to the bag so we can + // free it later + new_token_bag.insert(std::end(new_token_bag), std::begin(body), + std::end(body)); + + file_map[name].body = body; + std::vector body_units; + Err *err = preprocess(body, body_units, new_token_bag, const_map, + file_map, depth + 1); + // TODO: Introduce stack traces for this error (this error occurs in + // outside file that has use site in current file). + if (err) + return new Err{ET::IN_ERROR, token, err}; + units.push_back(Unit{token, body_units}); + ++i; + } + // Otherwise file must be part of the source tree already, so skip this + // call + else + i += 1; + } + else if (token->type == TT::PP_REFERENCE) + { + // Reference expansion based on latest constant + const auto found = const_map.find(token->content); + if (found == const_map.end()) + return new Err{ET::UNKNOWN_NAME_IN_REFERENCE, token}; + + std::vector preprocessed; + Err *err = preprocess(found->second.body, preprocessed, new_token_bag, + const_map, file_map, depth + 1); + if (err) + return new Err{ET::IN_ERROR, token, err}; + units.push_back(Unit{token, preprocessed}); + } + else if (token->type == TT::PP_END) + return new Err{ET::NO_CONST_AROUND, token}; + else + units.push_back(Unit{token, {}}); + } + return nullptr; + } + + std::string to_string(const Unit &unit, int depth) + { + std::stringstream ss; + for (int i = 0; i < depth; ++i) + ss << "\t"; + ss << Lexer::to_string(*unit.root) << " => {"; + if (unit.expansion.size() != 0) + { + ss << "\n"; + for (auto child : unit.expansion) + ss << to_string(child, depth + 1) << "\n"; + for (int i = 0; i < depth; ++i) + ss << "\t"; + } + ss << "}"; + return ss.str(); + } + + std::string to_string(const Err::Type &type) + { + switch (type) + { + case ET::EXPECTED_END: + return "EXPECTED_END"; + case ET::EMPTY_CONST: + return "EMPTY_CONST"; + case ET::NO_CONST_AROUND: + return "NO_CONST_AROUND"; + case ET::EXPECTED_SYMBOL_FOR_NAME: + return "EXPECTED_SYMBOL_FOR_NAME"; + case ET::DIRECTIVES_IN_CONST_BODY: + return "DIRECTIVES_IN_CONST_BODY"; + case ET::UNKNOWN_NAME_IN_REFERENCE: + return "UNKNOWN_NAME_IN_REFERENCE"; + case ET::EXPECTED_FILE_NAME_AS_STRING: + return "EXPECTED_FILE_NAME_AS_STRING"; + case ET::FILE_NON_EXISTENT: + return "FILE_NON_EXISTENT"; + case ET::IN_FILE_LEXING: + return "IN_FILE_LEXING"; + case ET::SELF_RECURSIVE_USE_CALL: + return "SELF_RECURSIVE_USE_CALL"; + case ET::IN_ERROR: + return "IN_ERROR"; + case ET::EXCEEDED_PREPROCESSER_DEPTH: + return "EXCEEDED_PREPROCESSER_DEPTH"; + default: + return ""; + } + } + + std::string to_string(const Err &err) + { + std::stringstream ss; + // Reverse traversal of err linked list + std::vector errors; + errors.push_back((Err *)&err); + for (Err *e = err.child_error; e; e = e->child_error) + errors.insert(errors.begin(), e); + for (size_t depth = 0; depth < errors.size(); ++depth) + { + // for (size_t i = 0; i < depth; ++i) + // ss << " "; + const Err &e = *errors[depth]; + ss << e.token->source_name << ":" << e.token->line << ":" + << e.token->column << ": " << to_string(e.type); + if (depth != errors.size() - 1) + ss << "\n"; + } + return ss.str(); + } + + std::ostream &operator<<(std::ostream &stream, const Unit &unit) + { + return stream << to_string(unit, 1); + } + + std::ostream &operator<<(std::ostream &stream, const Err &err) + { + return stream << to_string(err); + } + + Err::Err() + { + } + + Err::Err(Err::Type type, Lexer::Token *root, Err *child, Lexer::Err err) + : token{root}, child_error{child}, lexer_error{err}, type{type} + { + } + + Err::~Err(void) + { + if (this->child_error) + delete this->child_error; + } + +} // namespace Preprocesser diff --git a/src/preprocesser.hpp b/src/preprocesser.hpp new file mode 100644 index 0000000..3378428 --- /dev/null +++ b/src/preprocesser.hpp @@ -0,0 +1,80 @@ +/* Copyright (C) 2024 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to . + + * Created: 2024-07-03 + * Author: Aryadev Chavali + * Description: + */ + +#ifndef PREPROCESSER_HPP +#define PREPROCESSER_HPP + +#include +#include + +#include + +namespace Preprocesser +{ +#define PREPROCESSER_MAX_DEPTH 16 + struct Block + { + Lexer::Token *root; + std::vector body; + }; + + typedef std::unordered_map Map; + + struct Unit + { + Lexer::Token *const root; + std::vector expansion; + }; + + struct Err + { + Lexer::Token *token; + Err *child_error; + Lexer::Err lexer_error; + enum class Type + { + EXPECTED_END, + NO_CONST_AROUND, + EMPTY_CONST, + EXPECTED_SYMBOL_FOR_NAME, + DIRECTIVES_IN_CONST_BODY, + UNKNOWN_NAME_IN_REFERENCE, + + EXPECTED_FILE_NAME_AS_STRING, + FILE_NON_EXISTENT, + IN_FILE_LEXING, + SELF_RECURSIVE_USE_CALL, + + IN_ERROR, + EXCEEDED_PREPROCESSER_DEPTH, + } type; + + Err(); + Err(Err::Type, Lexer::Token *, Err *child = nullptr, Lexer::Err err = {}); + ~Err(void); + }; + + std::string to_string(const Unit &, int depth = 0); + std::string to_string(const Err::Type &); + std::string to_string(const Err &); + std::ostream &operator<<(std::ostream &, const Unit &); + std::ostream &operator<<(std::ostream &, const Err &); + + Err *preprocess(std::vector tokens, std::vector &units, + std::vector &new_token_bag, Map &const_map, + Map &file_map, int depth = 0); +}; // namespace Preprocesser +#endif