From 3c46fde66ac71e4804cdd5fe076ed930db5d7122 Mon Sep 17 00:00:00 2001
From: Aryadev Chavali <aryadev@aryadevchavali.com>
Date: Sun, 14 Apr 2024 16:57:46 +0630
Subject: Implemented tokenise_literal_hex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Note the overall size of this function in comparison to the C version,
as well as its clarity.

Of course, it is doing allocations in the background through
std::string which requires more profiling if I want to make this super
efficient™ but honestly the assembler just needs to work, whereas the
runtime needs to be fast.
---
 asm/lexer.cpp | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'asm')

diff --git a/asm/lexer.cpp b/asm/lexer.cpp
index 189b330..f8abf0a 100644
--- a/asm/lexer.cpp
+++ b/asm/lexer.cpp
@@ -20,7 +20,7 @@ using std::string, std::string_view, std::pair, std::make_pair;
 
 constexpr auto VALID_SYMBOL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV"
                               "WXYZ0123456789-_.:()%#$",
-               VALID_DIGIT  = "0123456789";
+               VALID_DIGIT = "0123456789", VALID_HEX = "0123456789abcdefABCDEF";
 
 bool is_char_in_s(char c, const char *s)
 {
@@ -237,3 +237,19 @@ token_t tokenise_literal_number(string_view &source, size_t &column)
 
   return t;
 }
+
+token_t tokenise_literal_hex(string_view &source, size_t &column)
+{
+  // Remove x char from source
+  source.remove_prefix(1);
+  auto end = source.find_first_not_of(VALID_HEX);
+  if (end == string::npos)
+    end = source.size() - 1;
+  string digits{source.substr(0, end)};
+  source.remove_prefix(end);
+
+  token_t t = {token_type_t::LITERAL_NUMBER, "0x" + digits, column};
+
+  column += digits.size() + 1;
+  return t;
+}
-- 
cgit v1.2.3-13-gbd6f