diff options
author | Aryadev Chavali <aryadev@aryadevchavali.com> | 2024-04-14 17:04:15 +0630 |
---|---|---|
committer | Aryadev Chavali <aryadev@aryadevchavali.com> | 2024-04-14 17:04:15 +0630 |
commit | a8f605c89b0e3d57aeb30b165733d95a11829f7b (patch) | |
tree | 2cc102c2ca86d98d32268008fdb9122f76846644 | |
parent | 7a9e646d396cf8c4abbadb6e6df208bb96cd070c (diff) | |
download | ovm-a8f605c89b0e3d57aeb30b165733d95a11829f7b.tar.gz ovm-a8f605c89b0e3d57aeb30b165733d95a11829f7b.tar.bz2 ovm-a8f605c89b0e3d57aeb30b165733d95a11829f7b.zip |
Implemented tokenise_buffer
Note that this is basically the same as the previous version,
excluding the fact that it uses C++ idioms more and does a bit better
in error checking.
-rw-r--r-- | asm/lexer.cpp | 90 | ||||
-rw-r--r-- | asm/lexer.hpp | 2 |
2 files changed, 91 insertions, 1 deletions
diff --git a/asm/lexer.cpp b/asm/lexer.cpp index 760f8db..8b30c55 100644 --- a/asm/lexer.cpp +++ b/asm/lexer.cpp @@ -306,3 +306,93 @@ token_t tokenise_literal_string(string_view &source, size_t &column, size_t end) column += end + 1; return token; } + +lerr_t tokenise_buffer(string_view source, std::vector<token_t> &tokens) +{ + size_t column = 0, line = 1; + while (source.size() > 0) + { + bool is_token = true; + char first = source[0]; + token_t t{}; + if (isspace(first) || first == '\0') + { + size_t i; + for (i = 0; + i < source.size() && (isspace(source[i]) || source[i] == '\0'); ++i) + { + ++column; + if (source[i] == '\n') + { + column = 0; + ++line; + } + } + ++column; + source.remove_prefix(i); + is_token = false; + } + else if (first == ';') + { + size_t i; + for (i = 0; i < source.size() && source[i] != '\n'; ++i) + continue; + column = 0; + ++line; + source.remove_prefix(i + 1); + is_token = false; + } + else if (first == '*') + { + t = token_t(token_type_t::STAR, "", column); + source.remove_prefix(1); + } + else if (first == '\"') + { + auto end = source.find('\"', 1); + if (end == string::npos) + return lerr_t::INVALID_STRING_LITERAL; + t = tokenise_literal_string(source, column, end); + } + else if (first == '\'') + { + lerr_t lerr; + std::tie(t, lerr) = tokenise_literal_char(source, column); + if (lerr != lerr_t::OK) + return lerr; + } + else if (isdigit(first) || + (source.size() > 1 && first == '-' && isdigit(source[1]))) + { + auto end = source.find_first_not_of(VALID_DIGIT, first == '-' ? 1 : 0); + if (end == string::npos) + end = source.size() - 1; + else if (end != string::npos && !(isspace(source[end]))) + return lerr_t::INVALID_NUMBER_LITERAL; + t = tokenise_literal_number(source, column); + } + else if (first == 'x' && source.size() > 1 && + is_char_in_s(source[1], VALID_HEX)) + { + auto end = source.find_first_not_of(VALID_HEX); + if (end == string::npos) + end = source.size() - 1; + else if (end != string::npos && !(isspace(source[end]))) + return lerr_t::INVALID_NUMBER_LITERAL; + t = tokenise_literal_hex(source, column); + } + else if (is_char_in_s(first, VALID_SYMBOL)) + { + lerr_t lerr; + std::tie(t, lerr) = tokenise_symbol(source, column); + if (lerr != lerr_t::OK) + return lerr; + } + if (is_token) + { + t.line = line; + tokens.push_back(t); + } + } + return lerr_t::OK; +} diff --git a/asm/lexer.hpp b/asm/lexer.hpp index 4e8439b..3b9243f 100644 --- a/asm/lexer.hpp +++ b/asm/lexer.hpp @@ -91,6 +91,6 @@ enum lerr_t }; const char *lerr_as_cstr(lerr_t); -lerr_t tokenise_string(std::string_view, std::vector<token_t> &); +lerr_t tokenise_buffer(std::string_view, std::vector<token_t> &); #endif |