Namespace the lexer module
Future proofing any name collisions.
This commit is contained in:
102
src/lexer.cpp
102
src/lexer.cpp
@@ -24,23 +24,27 @@ static_assert(NUMBER_OF_OPCODES == 99, "ERROR: Lexer is out of date");
|
||||
|
||||
using std::string, std::string_view, std::pair, std::make_pair;
|
||||
|
||||
const auto VALID_SYMBOL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV"
|
||||
namespace Lexer
|
||||
{
|
||||
constexpr auto VALID_SYMBOL =
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV"
|
||||
"WXYZ0123456789-_.:%#$",
|
||||
VALID_DIGIT = "0123456789", VALID_HEX = "0123456789abcdefABCDEF";
|
||||
VALID_DIGIT = "0123456789",
|
||||
VALID_HEX = "0123456789abcdefABCDEF";
|
||||
|
||||
bool is_char_in_s(char c, const char *s)
|
||||
{
|
||||
bool is_char_in_s(char c, const char *s)
|
||||
{
|
||||
return string_view(s).find(c) != string::npos;
|
||||
}
|
||||
}
|
||||
|
||||
bool initial_match(string_view src, string_view match)
|
||||
{
|
||||
bool initial_match(string_view src, string_view match)
|
||||
{
|
||||
return (src.size() > match.size() && src.substr(0, match.size()) == match);
|
||||
}
|
||||
}
|
||||
|
||||
pair<Token, lerr_t> tokenise_symbol(string_view &source, size_t &column,
|
||||
pair<Token, lerr_t> tokenise_symbol(string_view &source, size_t &column,
|
||||
size_t line)
|
||||
{
|
||||
{
|
||||
auto end = source.find_first_not_of(VALID_SYMBOL);
|
||||
if (end == string::npos)
|
||||
end = source.size() - 1;
|
||||
@@ -221,10 +225,10 @@ pair<Token, lerr_t> tokenise_symbol(string_view &source, size_t &column,
|
||||
t.column = column;
|
||||
column += sym.size() - 1;
|
||||
return make_pair(t, lerr_t());
|
||||
}
|
||||
}
|
||||
|
||||
Token tokenise_literal_number(string_view &source, size_t &column)
|
||||
{
|
||||
Token tokenise_literal_number(string_view &source, size_t &column)
|
||||
{
|
||||
bool is_negative = false;
|
||||
if (source[0] == '-')
|
||||
{
|
||||
@@ -244,10 +248,10 @@ Token tokenise_literal_number(string_view &source, size_t &column)
|
||||
column += digits.size() + (is_negative ? 1 : 0);
|
||||
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
Token tokenise_literal_hex(string_view &source, size_t &column)
|
||||
{
|
||||
Token tokenise_literal_hex(string_view &source, size_t &column)
|
||||
{
|
||||
// Remove x char from source
|
||||
source.remove_prefix(1);
|
||||
auto end = source.find_first_not_of(VALID_HEX);
|
||||
@@ -260,11 +264,11 @@ Token tokenise_literal_hex(string_view &source, size_t &column)
|
||||
|
||||
column += digits.size() + 1;
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
|
||||
pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
|
||||
size_t &line)
|
||||
{
|
||||
{
|
||||
Token t{};
|
||||
auto end = source.find('\'', 1);
|
||||
if (source.size() < 3 || end == 1 || end > 3)
|
||||
@@ -275,9 +279,9 @@ pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
|
||||
// Escape sequence
|
||||
char escape = '\0';
|
||||
if (source.size() < 4 || source[3] != '\'')
|
||||
return make_pair(t,
|
||||
lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
|
||||
column, line));
|
||||
return make_pair(
|
||||
t, lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, column,
|
||||
line));
|
||||
switch (source[2])
|
||||
{
|
||||
case 'n':
|
||||
@@ -294,9 +298,9 @@ pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
|
||||
break;
|
||||
default:
|
||||
column += 2;
|
||||
return make_pair(t,
|
||||
lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
|
||||
column, line));
|
||||
return make_pair(
|
||||
t, lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, column,
|
||||
line));
|
||||
break;
|
||||
}
|
||||
t = Token{Token::Type::LITERAL_CHAR, std::to_string(escape), column};
|
||||
@@ -310,20 +314,20 @@ pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
|
||||
source.remove_prefix(3);
|
||||
}
|
||||
return make_pair(t, lerr_t());
|
||||
}
|
||||
}
|
||||
|
||||
Token tokenise_literal_string(string_view &source, size_t &column, size_t end)
|
||||
{
|
||||
Token tokenise_literal_string(string_view &source, size_t &column, size_t end)
|
||||
{
|
||||
source.remove_prefix(1);
|
||||
Token token{Token::Type::LITERAL_STRING, string(source.substr(0, end - 1)),
|
||||
column};
|
||||
source.remove_prefix(end);
|
||||
column += end + 1;
|
||||
return token;
|
||||
}
|
||||
}
|
||||
|
||||
lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens)
|
||||
{
|
||||
lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens)
|
||||
{
|
||||
size_t column = 0, line = 1;
|
||||
while (source.size() > 0)
|
||||
{
|
||||
@@ -334,7 +338,8 @@ lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0;
|
||||
i < source.size() && (isspace(source[i]) || source[i] == '\0'); ++i)
|
||||
i < source.size() && (isspace(source[i]) || source[i] == '\0');
|
||||
++i)
|
||||
{
|
||||
++column;
|
||||
if (source[i] == '\n')
|
||||
@@ -417,23 +422,23 @@ lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens)
|
||||
}
|
||||
}
|
||||
return lerr_t{};
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, Token &t)
|
||||
{
|
||||
std::ostream &operator<<(std::ostream &os, Token &t)
|
||||
{
|
||||
return os << token_type_as_cstr(t.type) << "(`" << t.content << "`)@"
|
||||
<< t.line << ", " << t.column;
|
||||
}
|
||||
}
|
||||
|
||||
Token::Token()
|
||||
{}
|
||||
Token::Token()
|
||||
{}
|
||||
|
||||
Token::Token(Token::Type type, string content, size_t col, size_t line)
|
||||
Token::Token(Token::Type type, string content, size_t col, size_t line)
|
||||
: type{type}, column{col}, line{line}, content{content}
|
||||
{}
|
||||
{}
|
||||
|
||||
const char *token_type_as_cstr(Token::Type type)
|
||||
{
|
||||
const char *token_type_as_cstr(Token::Type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Token::Type::PP_USE:
|
||||
@@ -526,10 +531,10 @@ const char *token_type_as_cstr(Token::Type type)
|
||||
return "SYMBOL";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, lerr_t &lerr)
|
||||
{
|
||||
std::ostream &operator<<(std::ostream &os, lerr_t &lerr)
|
||||
{
|
||||
os << lerr.line << ":" << lerr.col << ": ";
|
||||
switch (lerr.type)
|
||||
{
|
||||
@@ -558,8 +563,9 @@ std::ostream &operator<<(std::ostream &os, lerr_t &lerr)
|
||||
break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
lerr_t::lerr_t(lerr_type_t type, size_t col, size_t line)
|
||||
lerr_t::lerr_t(lerr_type_t type, size_t col, size_t line)
|
||||
: col{col}, line{line}, type{type}
|
||||
{}
|
||||
{}
|
||||
} // namespace Lexer
|
||||
|
||||
@@ -18,8 +18,10 @@
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
struct Token
|
||||
namespace Lexer
|
||||
{
|
||||
struct Token
|
||||
{
|
||||
enum class Type
|
||||
{
|
||||
PP_CONST, // %const(<symbol>)...
|
||||
@@ -72,14 +74,14 @@ struct Token
|
||||
|
||||
Token();
|
||||
Token(Token::Type, std::string, size_t col = 0, size_t line = 0);
|
||||
};
|
||||
};
|
||||
|
||||
const char *token_type_as_cstr(Token::Type type);
|
||||
const char *token_type_as_cstr(Token::Type type);
|
||||
|
||||
std::ostream &operator<<(std::ostream &, Token &);
|
||||
std::ostream &operator<<(std::ostream &, Token &);
|
||||
|
||||
enum class lerr_type_t
|
||||
{
|
||||
enum class lerr_type_t
|
||||
{
|
||||
OK = 0,
|
||||
INVALID_CHAR_LITERAL,
|
||||
INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
|
||||
@@ -87,18 +89,19 @@ enum class lerr_type_t
|
||||
INVALID_NUMBER_LITERAL,
|
||||
INVALID_PREPROCESSOR_DIRECTIVE,
|
||||
UNKNOWN_LEXEME,
|
||||
};
|
||||
};
|
||||
|
||||
struct lerr_t
|
||||
{
|
||||
struct lerr_t
|
||||
{
|
||||
size_t col, line;
|
||||
lerr_type_t type;
|
||||
|
||||
lerr_t(lerr_type_t type = lerr_type_t::OK, size_t col = 0, size_t line = 0);
|
||||
};
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, lerr_t &);
|
||||
std::ostream &operator<<(std::ostream &, lerr_t &);
|
||||
|
||||
lerr_t tokenise_buffer(std::string_view, std::vector<Token *> &);
|
||||
lerr_t tokenise_buffer(std::string_view, std::vector<Token *> &);
|
||||
} // namespace Lexer
|
||||
|
||||
#endif
|
||||
|
||||
@@ -30,6 +30,8 @@ extern "C"
|
||||
using std::cout, std::cerr, std::endl;
|
||||
using std::pair, std::string, std::string_view, std::vector;
|
||||
|
||||
using Lexer::Token, Lexer::lerr_t, Lexer::lerr_type_t;
|
||||
|
||||
void usage(const char *program_name, FILE *fp)
|
||||
{
|
||||
fprintf(fp,
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
using std::pair, std::vector, std::make_pair, std::string, std::string_view;
|
||||
|
||||
using Lexer::Token, Lexer::lerr_t, Lexer::lerr_type_t;
|
||||
|
||||
#define VCLEAR(V) \
|
||||
std::for_each((V).begin(), (V).end(), \
|
||||
[](Token *t) \
|
||||
|
||||
@@ -30,33 +30,34 @@ enum pp_err_type_t
|
||||
|
||||
struct pp_err_t
|
||||
{
|
||||
const Token *reference;
|
||||
const Lexer::Token *reference;
|
||||
pp_err_type_t type;
|
||||
lerr_t lerr;
|
||||
Lexer::lerr_t lerr;
|
||||
|
||||
pp_err_t();
|
||||
pp_err_t(pp_err_type_t);
|
||||
pp_err_t(pp_err_type_t, const Token *);
|
||||
pp_err_t(pp_err_type_t, const Token *, lerr_t);
|
||||
pp_err_t(pp_err_type_t, const Lexer::Token *);
|
||||
pp_err_t(pp_err_type_t, const Lexer::Token *, Lexer::lerr_t);
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, pp_err_t &);
|
||||
|
||||
struct pp_unit_t
|
||||
{
|
||||
const Token *const token;
|
||||
const Lexer::Token *const token;
|
||||
struct
|
||||
{
|
||||
std::string_view name;
|
||||
std::vector<pp_unit_t> elements;
|
||||
} container;
|
||||
|
||||
pp_unit_t(const Token *const);
|
||||
pp_unit_t(const Lexer::Token *const);
|
||||
pp_unit_t(std::string_view, std::vector<pp_unit_t>);
|
||||
};
|
||||
|
||||
std::vector<pp_unit_t> tokens_to_units(const std::vector<Token *> &);
|
||||
std::vector<pp_unit_t> tokens_to_units(const std::vector<Lexer::Token *> &);
|
||||
pp_err_t preprocess_use(std::vector<pp_unit_t> &);
|
||||
pp_err_t preprocesser(const std::vector<Token *> &, std::vector<Token *> &);
|
||||
pp_err_t preprocesser(const std::vector<Lexer::Token *> &,
|
||||
std::vector<Lexer::Token *> &);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user