This repository has been archived on 2025-11-10. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
ovm/asm/lexer.h
Aryadev Chavali 93d234cd48 Lexer now returns more descriptive tokens
More useful tokens, in particular for each opcode possible.  This
makes parsing a simpler task to reason as now we're just checking
against an enum rather than doing a string check in linear time.

It makes more sense to do this at the tokeniser as the local data from
the buffer will be in the cache most likely as the buffer is
contiguously allocated.  While it will always be slow to do linear
time checks on strings, when doing it at the parser we're having to
check strings that may be allocated in a variety of different places.
This means caching becomes a harder task, but with this approach we're
less likely to have cache misses as long as the buffer stays there.
2023-11-01 15:09:47 +00:00

68 lines
1.2 KiB
C

/* Copyright (C) 2023 Aryadev Chavali
* You may distribute and modify this code under the terms of the
* GPLv2 license. You should have received a copy of the GPLv2
* license with this file. If not, please write to:
* aryadev@aryadevchavali.com.
* Created: 2023-10-24
* Author: Aryadev Chavali
* Description: Lexer for assembly language
*/
#ifndef LEXER_H
#define LEXER_H
#include <lib/darr.h>
typedef enum TokenType
{
TOKEN_LITERAL_NUMBER,
TOKEN_LITERAL_CHAR,
TOKEN_NOOP,
TOKEN_HALT,
TOKEN_PUSH,
TOKEN_POP,
TOKEN_PUSH_REG,
TOKEN_MOV,
TOKEN_DUP,
TOKEN_NOT,
TOKEN_OR,
TOKEN_AND,
TOKEN_XOR,
TOKEN_EQ,
TOKEN_LT,
TOKEN_LTE,
TOKEN_GT,
TOKEN_GTE,
TOKEN_PLUS,
TOKEN_PRINT,
TOKEN_JUMP,
TOKEN_JUMP_IF,
TOKEN_SYMBOL,
} token_type_t;
typedef struct
{
token_type_t type;
size_t column, line;
char *str;
size_t str_size;
} token_t;
typedef enum
{
LERR_OK = 0,
LERR_INVALID_CHAR_LITERAL,
} lerr_t;
const char *lerr_as_cstr(lerr_t);
typedef darr_t buffer_t;
typedef darr_t token_stream_t;
#define TOKEN_STREAM_AT(STREAM_DATA, INDEX) (((token_t *)(STREAM_DATA))[INDEX])
const char *token_type_as_cstr(token_type_t type);
lerr_t tokenise_buffer(buffer_t *, token_stream_t *);
#endif