diff options
Diffstat (limited to 'asm')
| -rw-r--r-- | asm/lexer.c | 81 | ||||
| -rw-r--r-- | asm/lexer.h | 8 | ||||
| -rw-r--r-- | asm/main.c | 18 | ||||
| -rw-r--r-- | asm/parser.c | 2 | ||||
| -rw-r--r-- | asm/parser.h | 2 | 
5 files changed, 33 insertions, 78 deletions
| diff --git a/asm/lexer.c b/asm/lexer.c index 03f7d05..2f64e80 100644 --- a/asm/lexer.c +++ b/asm/lexer.c @@ -20,12 +20,8 @@ const char *token_type_as_cstr(token_type_t type)  {    switch (type)    { -  case TOKEN_LITERAL_BYTE: -    return "LITERAL_BYTE"; -  case TOKEN_LITERAL_HWORD: -    return "LITERAL_HWORD"; -  case TOKEN_LITERAL_WORD: -    return "LITERAL_WORD"; +  case TOKEN_LITERAL_NUMBER: +    return "LITERAL_NUMBER";    case TOKEN_LITERAL_CHAR:      return "LITERAL_CHAR";    case TOKEN_SYMBOL: @@ -43,7 +39,7 @@ size_t space_left(buffer_t *buffer)  bool is_symbol(char c)  { -  return isalpha(c) || c == '-' || c == '_'; +  return isalpha(c) || c == '-' || c == '_' || c == '.';  }  token_t tokenise_symbol(buffer_t *buffer) @@ -60,37 +56,17 @@ token_t tokenise_symbol(buffer_t *buffer)    return token;  } -void tokenise_literal(buffer_t *buffer, token_t *token) +token_t tokenise_number_literal(buffer_t *buffer)  { -  token->str_size = 0; -  for (; token->str_size < space_left(buffer) && -         isdigit(buffer->data[buffer->used + token->str_size]); -       ++token->str_size) +  token_t token = {.type = TOKEN_LITERAL_NUMBER, .str_size = 0}; +  for (; token.str_size < space_left(buffer) && +         isdigit(buffer->data[buffer->used + token.str_size]); +       ++token.str_size)      continue; -  token->str = calloc(token->str_size + 1, 1); -  memcpy(token->str, buffer->data + buffer->used, token->str_size); -  token->str[token->str_size] = '\0'; -  buffer->used += token->str_size; -} - -token_t tokenise_byte_literal(buffer_t *buffer) -{ -  token_t token = {.type = TOKEN_LITERAL_BYTE}; -  tokenise_literal(buffer, &token); -  return token; -} - -token_t tokenise_hword_literal(buffer_t *buffer) -{ -  token_t token = {.type = TOKEN_LITERAL_HWORD}; -  tokenise_literal(buffer, &token); -  return token; -} - -token_t tokenise_word_literal(buffer_t *buffer) -{ -  token_t token = {.type = TOKEN_LITERAL_WORD}; -  tokenise_literal(buffer, &token); +  token.str = calloc(token.str_size + 1, 1); +  memcpy(token.str, buffer->data + buffer->used, token.str_size); +  token.str[token.str_size] = '\0'; +  buffer->used += token.str_size;    return token;  } @@ -103,7 +79,7 @@ token_t tokenise_char_literal(buffer_t *buffer)    return token;  } -token_t *tokenise_buffer(buffer_t *buffer, size_t *n_tokens) +token_stream_t tokenise_buffer(buffer_t *buffer)  {    buffer_t tokens = {0};    darr_init(&tokens, sizeof(token_t)); @@ -120,28 +96,8 @@ token_t *tokenise_buffer(buffer_t *buffer, size_t *n_tokens)          continue;        is_token = false;      } -    else if (space_left(buffer) > 1 && isdigit(buffer->data[buffer->used + 1])) -    { -      // Parsing numeric literals -      switch (c) -      { -      case 'b': -        buffer->used++; -        t = tokenise_byte_literal(buffer); -        break; -      case 'h': -        buffer->used++; -        t = tokenise_hword_literal(buffer); -        break; -      case 'w': -        buffer->used++; -        t = tokenise_word_literal(buffer); -        break; -      default: -        // TODO: Lex Error (INVALID_NUMERIC_LITERAL) -        exit(1); -      } -    } +    else if (isdigit(c)) +      t = tokenise_number_literal(buffer);      else if (is_symbol(c))        t = tokenise_symbol(buffer);      else if (c == '\'') @@ -151,9 +107,12 @@ token_t *tokenise_buffer(buffer_t *buffer, size_t *n_tokens)          exit(1);        t = tokenise_char_literal(buffer);      } +      if (is_token)        darr_append_bytes(&tokens, (byte *)&t, sizeof(t));    } -  *n_tokens = tokens.used / sizeof(token_t); -  return (token_t *)tokens.data; +  size_t n_tokens  = tokens.used / sizeof(token_t); +  tokens.available = n_tokens; +  tokens.used      = 0; +  return tokens;  } diff --git a/asm/lexer.h b/asm/lexer.h index 01badf4..bd9dfb2 100644 --- a/asm/lexer.h +++ b/asm/lexer.h @@ -16,10 +16,8 @@  typedef enum TokenType  { -  TOKEN_LITERAL_BYTE, +  TOKEN_LITERAL_NUMBER,    TOKEN_LITERAL_CHAR, -  TOKEN_LITERAL_HWORD, -  TOKEN_LITERAL_WORD,    TOKEN_SYMBOL,  } token_type_t; @@ -31,9 +29,9 @@ typedef struct  } token_t;  typedef darr_t buffer_t; +typedef darr_t token_stream_t;  const char *token_type_as_cstr(token_type_t type); - -token_t *tokenise_buffer(buffer_t *, size_t *); +token_stream_t tokenise_buffer(buffer_t *);  #endif @@ -20,18 +20,18 @@ int main(void)    darr_t buffer = darr_read_file(fp);    fclose(fp); -  size_t n        = 0; -  token_t *tokens = tokenise_buffer(&buffer, &n); -  printf("%lu bytes -> %lu tokens\n", buffer.used, n); +  token_stream_t tokens = tokenise_buffer(&buffer); +  printf("%lu bytes -> %lu tokens\n", buffer.used, tokens.available);    free(buffer.data); -  for (size_t i = 0; i < n; ++i) -    printf("%s(%.*s)\n", token_type_as_cstr(tokens[i].type), -           (int)tokens[i].str_size, tokens[i].str); +  for (size_t i = 0; i < tokens.available; ++i) +    printf("%s(%.*s)\n", token_type_as_cstr(((token_t *)tokens.data)[i].type), +           (int)((token_t *)tokens.data)[i].str_size, +           ((token_t *)tokens.data)[i].str);    // Free the tokens -  for (size_t i = 0; i < n; ++i) -    free(tokens[i].str); -  free(tokens); +  for (size_t i = 0; i < tokens.available; ++i) +    free(((token_t *)tokens.data)[i].str); +  free(tokens.data);    return 0;  } diff --git a/asm/parser.c b/asm/parser.c index f796dfc..5521585 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -7,5 +7,5 @@   * Created: 2023-10-24   * Author: Aryadev Chavali - * Description: + * Description: Parser for assembly language   */ diff --git a/asm/parser.h b/asm/parser.h index 699539b..88dc0e9 100644 --- a/asm/parser.h +++ b/asm/parser.h @@ -16,8 +16,6 @@  #include <vm/inst.h> -typedef darr_t token_stream_t; -  inst_t parse_next_inst(token_stream_t *);  inst_t *parse_stream(token_stream_t *, size_t *); | 
