Compare commits
3 Commits
master
...
08c543d8aa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
08c543d8aa | ||
|
|
9a41b49e49 | ||
|
|
3f9a13798a |
@@ -1,6 +1,6 @@
|
|||||||
;;; Directory Local Variables -*- no-byte-compile: t -*-
|
;;; Directory Local Variables -*- no-byte-compile: t -*-
|
||||||
;;; For more information see (info "(emacs) Directory Variables")
|
;;; For more information see (info "(emacs) Directory Variables")
|
||||||
|
|
||||||
((nil . ((compile-command . "make -k MODE=debug examples")
|
((nil . ((compile-command . "make MODE=debug -k")
|
||||||
(+license/license-choice . "MIT License")))
|
(+license/license-choice . "MIT License")))
|
||||||
(c-mode . ((mode . clang-format))))
|
(c-mode . ((mode . clang-format))))
|
||||||
|
|||||||
20
Makefile
20
Makefile
@@ -3,13 +3,13 @@ CC=cc
|
|||||||
DIST=build
|
DIST=build
|
||||||
OUT=$(DIST)/arl.out
|
OUT=$(DIST)/arl.out
|
||||||
|
|
||||||
MODULES=$(shell cd include/arl; find . -type 'd' -printf "%f\n")
|
MODULES=. lib parser
|
||||||
UNITS=main cli lib/vec lib/sv lexer/token lexer/lexer
|
UNITS=main lib/vec lib/sv parser/ast parser/parser
|
||||||
OBJECTS:=$(patsubst %,$(DIST)/%.o, $(UNITS))
|
OBJECTS:=$(patsubst %,$(DIST)/%.o, $(UNITS))
|
||||||
|
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
GFLAGS=-Wall -Wextra -Wpedantic -std=c23 -I./include/
|
GFLAGS=-Wall -Wextra -Wpedantic -std=c23 -I./src/
|
||||||
DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined -DVERBOSE_LOGS=1
|
DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined
|
||||||
RFLAGS=-O3
|
RFLAGS=-O3
|
||||||
|
|
||||||
MODE=release
|
MODE=release
|
||||||
@@ -26,7 +26,7 @@ DEPDIR=$(DIST)/deps
|
|||||||
$(OUT): $(OBJECTS) | $(DIST)
|
$(OUT): $(OBJECTS) | $(DIST)
|
||||||
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
$(DIST)/%.o: src/%.c | $(DIST) $(DEPDIR)
|
$(DIST)/%.o: src/arl/%.c | $(DIST) $(DEPDIR)
|
||||||
$(CC) $(CFLAGS) $(DEPFLAGS) $(DEPDIR)/$*.d -c -o $@ $<
|
$(CC) $(CFLAGS) $(DEPFLAGS) $(DEPDIR)/$*.d -c -o $@ $<
|
||||||
|
|
||||||
$(DIST):
|
$(DIST):
|
||||||
@@ -35,11 +35,7 @@ $(DIST):
|
|||||||
$(DEPDIR):
|
$(DEPDIR):
|
||||||
mkdir -p $(patsubst %,$(DEPDIR)/%, $(MODULES))
|
mkdir -p $(patsubst %,$(DEPDIR)/%, $(MODULES))
|
||||||
|
|
||||||
clangd: compile_commands.json
|
.PHONY: run clean
|
||||||
compile_commands.json: Makefile
|
|
||||||
bear -- $(MAKE) -B MODE=debug
|
|
||||||
|
|
||||||
.PHONY: run clean examples
|
|
||||||
ARGS=
|
ARGS=
|
||||||
run: $(OUT)
|
run: $(OUT)
|
||||||
./$^ $(ARGS)
|
./$^ $(ARGS)
|
||||||
@@ -47,9 +43,5 @@ run: $(OUT)
|
|||||||
clean:
|
clean:
|
||||||
rm -rf $(DIST)
|
rm -rf $(DIST)
|
||||||
|
|
||||||
examples: $(OUT)
|
|
||||||
@echo "Example: Hello World"
|
|
||||||
./$^ examples/hello-world.arl
|
|
||||||
|
|
||||||
DEPS:=$(patsubst %,$(DEPDIR)/%.d, $(UNITS))
|
DEPS:=$(patsubst %,$(DEPDIR)/%.d, $(UNITS))
|
||||||
include $(wildcard $(DEPS))
|
include $(wildcard $(DEPS))
|
||||||
|
|||||||
14
README
14
README
@@ -6,12 +6,13 @@
|
|||||||
│ /_/ \_\_| \_\_____| │
|
│ /_/ \_\_| \_\_____| │
|
||||||
└───────────────────────┘
|
└───────────────────────┘
|
||||||
|
|
||||||
Similar to Forth.
|
Similar to Forth. Compiles to C.
|
||||||
|
Native speed with simple semantics.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
Goals
|
Goals
|
||||||
-----
|
-----
|
||||||
- Complete operational transpiler, with C as a provisional working target
|
- Complete operational transpiler to C
|
||||||
- Ability to reuse compiled code (as object code) in top level ARL code.
|
- Ability to reuse compiled code (as object code) in top level ARL code.
|
||||||
- Static type system with informative errors
|
- Static type system with informative errors
|
||||||
|
|
||||||
@@ -43,12 +44,3 @@ $ make DIST=<folder>
|
|||||||
|
|
||||||
Similarly, the general flags used in the C compiler may be set via the CFLAGS
|
Similarly, the general flags used in the C compiler may be set via the CFLAGS
|
||||||
variable, with linking arguments set via the LDFLAGS variable.
|
variable, with linking arguments set via the LDFLAGS variable.
|
||||||
|
|
||||||
------------------
|
|
||||||
Usage instructions
|
|
||||||
------------------
|
|
||||||
Once built, simply use the built binary like so:
|
|
||||||
$ ./build/arl.out <filename>
|
|
||||||
|
|
||||||
Alternatively, you can run the examples automatically via the Makefile:
|
|
||||||
$ make examples
|
|
||||||
103
arl.org
103
arl.org
@@ -1,64 +1,69 @@
|
|||||||
#+title: ARL - Issue tracker
|
#+title: ARL - Issue tracker
|
||||||
#+date: 2026-01-23
|
#+date: 2026-01-23
|
||||||
#+filetags: arl
|
|
||||||
|
|
||||||
* TODO Write a minimum working transpiler
|
* TODO Write a minimum working transpiler
|
||||||
We need to be able to compile the following file:
|
We need to be able to compile the following file:
|
||||||
[[file:examples/hello-world.arl]]. All it does is print "Hello,
|
[[file:examples/hello-world.arl]]. All it does is print "Hello,
|
||||||
world!". Should be relatively straightforward.
|
world!". Should be relatively straightforward.
|
||||||
** Stages
|
|
||||||
We need the following stages in our MVP transpiler:
|
|
||||||
- Source code reading (read bytes from a file)
|
|
||||||
- Parse raw bytes into tokens (Lexer)
|
|
||||||
- Interpret tokens into a classical AST (Parser)
|
|
||||||
- Stack effect and type analysis of the AST for soundness
|
|
||||||
- Translate AST into C code (Codegen)
|
|
||||||
- Compile C code into native executable (Target)
|
|
||||||
|
|
||||||
It's a Eulerian Path from the source code to the native executable.
|
|
||||||
** DONE Read file
|
** DONE Read file
|
||||||
** DONE Lexer
|
** DONE Parser
|
||||||
[[file:src/lexer/]]
|
** TODO Intermediate representation (Virtual Machine)
|
||||||
[[file:include/arl/lexer/]]
|
[[file:src/arl/vm/]]
|
||||||
** WIP Parser
|
|
||||||
[[file:src/parser/]]
|
|
||||||
[[file:include/arl/parser/]]
|
|
||||||
|
|
||||||
We need to generate some form of AST from the token stream. This
|
Before we get into generating C code and then compiling it, it might
|
||||||
should be a little more advanced than our initial stream,
|
be worth translating the parsed ARL code into a generic IR.
|
||||||
distinguishing between
|
|
||||||
- Literal values
|
|
||||||
- Primitive calls
|
|
||||||
- References to otherwise undefined words (may be defined through
|
|
||||||
import or later on)
|
|
||||||
** TODO Stack effect/type analysis
|
|
||||||
[[file:src/analysis/]]
|
|
||||||
[[file:include/arl/analysis/]]
|
|
||||||
|
|
||||||
Given the AST, we need to verify the soundness of it with regards to
|
The IR should be much more primitive in its semantics, and force clear
|
||||||
types and the stack. We have this idea of "stack effects" attached to
|
requirements of the platform we're compiling to. This way, at the
|
||||||
every node in the AST; literals push values onto the stack and pop
|
code generator stage we can figure out:
|
||||||
nothing, while operations may pop some operands and push some values.
|
- what can we reasonably use from the target platform to satisfy
|
||||||
|
requirements?
|
||||||
|
- what do we need to hand-roll on the target in order to make this
|
||||||
|
work?
|
||||||
|
|
||||||
We need a way to:
|
Essentially, we want to write a virtual machine, and translate ARL
|
||||||
- Codify the stack effects of each type of AST node
|
code into bytecode for that VM. Goals:
|
||||||
- Infer the total stack effect from a sequence of nodes
|
- Easier to optimise IR bytecode than the AST of our original program
|
||||||
|
- Easier to imagine translations from that IR bytecode into target
|
||||||
|
platform code
|
||||||
|
*** TODO Minimal IR representation
|
||||||
|
We need the following clear items in our IR:
|
||||||
|
- Static type values
|
||||||
|
- Static type variables (possible DeBrujin numbering or other such
|
||||||
|
mechanism to abstract naming away and leave it to the target to
|
||||||
|
generate effectively)
|
||||||
|
- Strongly typed primitive operators (numeric, strings, I/O) with
|
||||||
|
packed arguments
|
||||||
|
|
||||||
These stack effects work in tandem with our type analysis. Stack
|
Read about [[https://en.wikipedia.org/wiki/Three-address_code][TAC]].
|
||||||
shape analysis tells us what operands are being fed into primitives,
|
*** TODO IR Compiler
|
||||||
while the type analysis will tell us if the operands are well formed
|
We should have a rough grouping between AST objects and this IR. As
|
||||||
for the primitives.
|
ARL is Forth-like, we can use the stack semantics to generate this IR
|
||||||
|
as we walk the AST in a linear manner.
|
||||||
|
|
||||||
|
Consider the following ARL code:
|
||||||
|
#+begin_src text
|
||||||
|
34 35 +
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
When we walk through this code:
|
||||||
|
- 34 (an integer) is pushed onto the stack
|
||||||
|
- 35 (an integer) is pushed onto the stack
|
||||||
|
- + is encountered
|
||||||
|
- Pop two values off the stack and verify their type against the
|
||||||
|
contract for "+" (something like (-> i32 i32 i32))
|
||||||
|
- Generate IR, something like ~prim-add(34, 35)~
|
||||||
|
*** TODO Consider optimisers
|
||||||
|
Certainly we should perform optimisations on the IR itself before
|
||||||
|
passing it over to the code generator. Currently we haven't got much
|
||||||
|
in the way of optimisations to consider, but it may be worth
|
||||||
|
considering.
|
||||||
** TODO Code generator
|
** TODO Code generator
|
||||||
[[file:src/codegen/]]
|
[[file:src/arl/target-c/]]
|
||||||
[[file:include/arl/codegen/]]
|
|
||||||
|
|
||||||
This should take the AST generated by the parser (which should already
|
This should take the IR translated from the AST generated by the
|
||||||
have been analysed), and write equivalent C code.
|
parser, and write equivalent C code.
|
||||||
** TODO Target compilation
|
|
||||||
[[file:src/target/]]
|
|
||||||
[[file:include/arl/target/]]
|
|
||||||
|
|
||||||
=gcc= and =clang= take C code via /stdin/, so we don't need to write
|
After we've generated the C code, we need to call a C compiler on it
|
||||||
the C code to disk - we can just leave it as a buffer of bytes. So
|
to generate a binary. GCC and Clang allow passing source code through
|
||||||
we'll call the compilers and feed the generated code from the previous
|
stdin, so we don't even need to write to disk first which is nice.
|
||||||
stage into it via stdin.
|
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
"Hello, world!\n" puts
|
"Hello, world!" println
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
;;; arl-mode.el --- ARL mode for Emacs -*- lexical-binding: t; -*-
|
|
||||||
|
|
||||||
;; Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
;; Author: Aryadev Chavali <aryadev@aryadevchavali.com>
|
|
||||||
;; Keywords:
|
|
||||||
|
|
||||||
;; Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
;; This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
;; FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
;; You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
;; which you should have received a copy of along with this program. If not,
|
|
||||||
;; please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
;;; Commentary:
|
|
||||||
|
|
||||||
;;
|
|
||||||
|
|
||||||
;;; Code:
|
|
||||||
|
|
||||||
(defvar arl-mode-comments '(?\; ";;" ("#|" . "|#")))
|
|
||||||
(defvar arl-mode-keywords '("if" "then" "else"))
|
|
||||||
(defvar arl-mode-expressions '(("\".*\"" . font-lock-string-face)))
|
|
||||||
(defvar arl-mode-automode-list '("\\.arl"))
|
|
||||||
|
|
||||||
(define-derived-mode arl-mode
|
|
||||||
arl-mode-comments
|
|
||||||
arl-mode-keywords
|
|
||||||
arl-mode-expressions
|
|
||||||
arl-mode-automode-list
|
|
||||||
nil)
|
|
||||||
|
|
||||||
(provide 'arl-mode)
|
|
||||||
;;; arl-mode.el ends here
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
/* cli.h: CLI helpers
|
|
||||||
* Created: 2026-01-29
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary:
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef CLI_H
|
|
||||||
#define CLI_H
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include <arl/lib/sv.h>
|
|
||||||
|
|
||||||
int read_file(const char *filename, sv_t *ret);
|
|
||||||
int read_pipe(FILE *pipe, sv_t *ret);
|
|
||||||
void usage(FILE *fp);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
/* token.h: General definition of tokens, and a sequence of them.
|
|
||||||
* Created: 2026-01-22
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary:
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TOKEN_H
|
|
||||||
#define TOKEN_H
|
|
||||||
|
|
||||||
#include <arl/lib/base.h>
|
|
||||||
#include <arl/lib/sv.h>
|
|
||||||
#include <arl/lib/vec.h>
|
|
||||||
|
|
||||||
/// Types of tokens
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
TOKEN_TYPE_KNOWN = 0,
|
|
||||||
TOKEN_TYPE_SYMBOL,
|
|
||||||
TOKEN_TYPE_STRING,
|
|
||||||
|
|
||||||
NUM_TOKEN_TYPES,
|
|
||||||
} token_type_t;
|
|
||||||
|
|
||||||
/// Known symbols which later stages would benefit from.
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
TOKEN_KNOWN_PUTS,
|
|
||||||
NUM_TOKEN_KNOWNS,
|
|
||||||
} token_known_t;
|
|
||||||
|
|
||||||
const char *token_known_to_cstr(token_known_t);
|
|
||||||
|
|
||||||
/// Tokens are a tagged union
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
u64 byte_location;
|
|
||||||
token_type_t type;
|
|
||||||
union
|
|
||||||
{
|
|
||||||
token_known_t as_known;
|
|
||||||
sv_t as_symbol;
|
|
||||||
sv_t as_string;
|
|
||||||
};
|
|
||||||
} token_t;
|
|
||||||
|
|
||||||
token_t token_known(u64 byte, token_known_t known);
|
|
||||||
token_t token_symbol(u64 byte, sv_t symbol);
|
|
||||||
token_t token_string(u64 byte, sv_t string);
|
|
||||||
void token_print(FILE *fp, token_t *token);
|
|
||||||
|
|
||||||
/// Sequence of tokens
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
vec_t vec;
|
|
||||||
} token_stream_t;
|
|
||||||
|
|
||||||
void token_stream_free(token_stream_t *token);
|
|
||||||
void token_stream_print(FILE *fp, token_stream_t *token);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
@@ -33,35 +33,13 @@ typedef double f64;
|
|||||||
#define MIN(A, B) ((A) > (B) ? (B) : (A))
|
#define MIN(A, B) ((A) > (B) ? (B) : (A))
|
||||||
#define ARRSIZE(A) ((sizeof(A)) / sizeof((A)[0]))
|
#define ARRSIZE(A) ((sizeof(A)) / sizeof((A)[0]))
|
||||||
|
|
||||||
#ifndef VERBOSE_LOGS
|
#define FAIL(...) \
|
||||||
#define VERBOSE_LOGS 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if VERBOSE_LOGS
|
|
||||||
#define LOG(...) \
|
|
||||||
do \
|
|
||||||
{ \
|
|
||||||
fprintf(stdout, "LOG: "); \
|
|
||||||
fprintf(stdout, __VA_ARGS__); \
|
|
||||||
} while (0);
|
|
||||||
#else
|
|
||||||
#define LOG(...)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define LOG_ERR(...) \
|
|
||||||
do \
|
do \
|
||||||
{ \
|
{ \
|
||||||
|
fprintf(stderr, "FAIL: "); \
|
||||||
fprintf(stderr, __VA_ARGS__); \
|
fprintf(stderr, __VA_ARGS__); \
|
||||||
} while (0);
|
assert(0); \
|
||||||
|
|
||||||
#define FAIL(...) \
|
|
||||||
do \
|
|
||||||
{ \
|
|
||||||
LOG_ERR("FAIL: "); \
|
|
||||||
LOG_ERR(__VA_ARGS__); \
|
|
||||||
assert(0); \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
/* Copyright (C) 2026 Aryadev Chavali
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
* Created: 2026-01-22
|
* Created: 2026-01-22
|
||||||
* Author: Aryadev Chavali
|
* Author: Aryadev Chavali
|
||||||
* License: See end of file
|
* License: See end of file
|
||||||
* Commentary: See /include/arl/lib/sv.h
|
* Commentary: See /include/sv.h
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
* Created: 2026-01-22
|
* Created: 2026-01-22
|
||||||
* Author: Aryadev Chavali
|
* Author: Aryadev Chavali
|
||||||
* License: See end of file
|
* License: See end of file
|
||||||
* Commentary: See /include/arl/lib/vec.h
|
* Commentary:
|
||||||
|
|
||||||
Taken from prick_vec.h: see https://github.com/oreodave/prick.
|
Taken from prick_vec.h: see https://github.com/oreodave/prick.
|
||||||
*/
|
*/
|
||||||
@@ -31,14 +31,6 @@ void vec_append_byte(vec_t *vec, u8 byte)
|
|||||||
++vec->size;
|
++vec->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 *vec_pop(vec_t *vec, u64 size)
|
|
||||||
{
|
|
||||||
if (!vec || vec->size < size)
|
|
||||||
return NULL;
|
|
||||||
vec->size -= size;
|
|
||||||
return (u8 *)vec_data(vec) + vec->size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *vec_data(vec_t *vec)
|
void *vec_data(vec_t *vec)
|
||||||
{
|
{
|
||||||
if (!vec)
|
if (!vec)
|
||||||
@@ -97,14 +89,6 @@ void vec_free(vec_t *vec)
|
|||||||
memset(vec, 1, sizeof(*vec));
|
memset(vec, 1, sizeof(*vec));
|
||||||
}
|
}
|
||||||
|
|
||||||
void vec_reset(vec_t *vec)
|
|
||||||
{
|
|
||||||
if (!vec)
|
|
||||||
return;
|
|
||||||
memset(vec_data(vec), 0, vec->capacity);
|
|
||||||
vec->size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void vec_clone(vec_t *v2, vec_t *v1)
|
void vec_clone(vec_t *v2, vec_t *v1)
|
||||||
{
|
{
|
||||||
if (!v1 || !v2)
|
if (!v1 || !v2)
|
||||||
@@ -31,13 +31,9 @@ typedef struct
|
|||||||
|
|
||||||
static_assert(sizeof(vec_t) == 64, "Expected sizeof(vec_t) to be 64");
|
static_assert(sizeof(vec_t) == 64, "Expected sizeof(vec_t) to be 64");
|
||||||
|
|
||||||
// standard old appending methods
|
|
||||||
void vec_append(vec_t *vec, const void *const ptr, u64 size);
|
void vec_append(vec_t *vec, const void *const ptr, u64 size);
|
||||||
void vec_append_byte(vec_t *vec, u8 byte);
|
void vec_append_byte(vec_t *vec, u8 byte);
|
||||||
|
|
||||||
// vector-as-a-stack
|
|
||||||
u8 *vec_pop(vec_t *vec, u64 size);
|
|
||||||
|
|
||||||
// Returns pointer to the start of the buffer VEC is currently using to store
|
// Returns pointer to the start of the buffer VEC is currently using to store
|
||||||
// data (either its inline buffer or the heap buffer).
|
// data (either its inline buffer or the heap buffer).
|
||||||
void *vec_data(vec_t *vec);
|
void *vec_data(vec_t *vec);
|
||||||
@@ -47,13 +43,8 @@ void vec_ensure_capacity(vec_t *vec, u64 capacity);
|
|||||||
|
|
||||||
// Ensure VEC has at least SIZE bytes free
|
// Ensure VEC has at least SIZE bytes free
|
||||||
void vec_ensure_free(vec_t *vec, u64 size);
|
void vec_ensure_free(vec_t *vec, u64 size);
|
||||||
|
|
||||||
// Free the memory associated with the vector
|
|
||||||
void vec_free(vec_t *vec);
|
void vec_free(vec_t *vec);
|
||||||
|
|
||||||
// Reset a vector while preserving any allocations
|
|
||||||
void vec_reset(vec_t *vec);
|
|
||||||
|
|
||||||
// Copy all data from V1 into V2.
|
// Copy all data from V1 into V2.
|
||||||
void vec_clone(vec_t *v2, vec_t *v1);
|
void vec_clone(vec_t *v2, vec_t *v1);
|
||||||
|
|
||||||
82
src/arl/main.c
Normal file
82
src/arl/main.c
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
/* main.cpp:
|
||||||
|
* Created: 2026-01-22
|
||||||
|
* Author: Aryadev Chavali
|
||||||
|
* License: See end of file
|
||||||
|
* Commentary:
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <arl/lib/base.h>
|
||||||
|
#include <arl/lib/sv.h>
|
||||||
|
#include <arl/lib/vec.h>
|
||||||
|
#include <arl/parser/ast.h>
|
||||||
|
#include <arl/parser/parser.h>
|
||||||
|
|
||||||
|
/// Parser
|
||||||
|
sv_t read_file(const char *filename)
|
||||||
|
{
|
||||||
|
FILE *fp = fopen(filename, "rb");
|
||||||
|
if (!fp)
|
||||||
|
FAIL("File `%s` does not exist\n", filename);
|
||||||
|
|
||||||
|
fseek(fp, 0, SEEK_END);
|
||||||
|
long size = ftell(fp);
|
||||||
|
fseek(fp, 0, SEEK_SET);
|
||||||
|
char *buffer = calloc(1, size + 1);
|
||||||
|
fread(buffer, size, 1, fp);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
buffer[size] = '\0';
|
||||||
|
return SV(buffer, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
const char *filename = "./examples/hello-world.arl";
|
||||||
|
sv_t contents = read_file(filename);
|
||||||
|
printf("%s\n=> `" PR_SV "`\n", filename, SV_FMT(contents));
|
||||||
|
|
||||||
|
parse_stream_t stream = {.byte = 0, .contents = contents};
|
||||||
|
ast_t ast = {0};
|
||||||
|
parse_err_t perr = parse(&ast, &stream);
|
||||||
|
if (perr)
|
||||||
|
{
|
||||||
|
u64 line = 1, col = 0;
|
||||||
|
parse_stream_get_line_col(&stream, &line, &col);
|
||||||
|
|
||||||
|
fprintf(stderr, "%s:%lu:%lu: %s\n", filename, line, col,
|
||||||
|
parse_err_to_string(perr));
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
printf("=> Parsed %lu nodes\n", ast.nodes.size / sizeof(ast_node_t));
|
||||||
|
ast_print(stdout, &ast);
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
free(contents.data);
|
||||||
|
ast_free(&ast);
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
if (contents.data)
|
||||||
|
free(contents.data);
|
||||||
|
if (ast.nodes.capacity > 0)
|
||||||
|
ast_free(&ast);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the MIT License,
|
||||||
|
* which you should have received a copy of along with this program. If not,
|
||||||
|
* please go to <https://opensource.org/license/MIT>.
|
||||||
|
|
||||||
|
*/
|
||||||
88
src/arl/parser/ast.c
Normal file
88
src/arl/parser/ast.c
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
/* ast.c: Implementation of AST constructor/destructor functions
|
||||||
|
* Created: 2026-01-22
|
||||||
|
* Author: Aryadev Chavali
|
||||||
|
* License: See end of file
|
||||||
|
* Commentary: See ast.h.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <arl/lib/vec.h>
|
||||||
|
#include <arl/parser/ast.h>
|
||||||
|
|
||||||
|
ast_node_t ast_node_string(u64 byte, sv_t string)
|
||||||
|
{
|
||||||
|
return (ast_node_t){
|
||||||
|
.byte_location = byte,
|
||||||
|
.type = AST_NODE_TYPE_STRING,
|
||||||
|
.value = {string},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
ast_node_t ast_node_symbol(u64 byte, sv_t symbol)
|
||||||
|
{
|
||||||
|
return (ast_node_t){
|
||||||
|
.byte_location = byte,
|
||||||
|
.type = AST_NODE_TYPE_SYMBOL,
|
||||||
|
.value = {symbol},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_node_print(FILE *fp, ast_node_t *obj)
|
||||||
|
{
|
||||||
|
if (!obj)
|
||||||
|
{
|
||||||
|
fprintf(fp, "NIL");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
switch (obj->type)
|
||||||
|
{
|
||||||
|
case AST_NODE_TYPE_SYMBOL:
|
||||||
|
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(obj->value.as_symbol));
|
||||||
|
break;
|
||||||
|
case AST_NODE_TYPE_STRING:
|
||||||
|
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(obj->value.as_string));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_print(FILE *fp, ast_t *ast)
|
||||||
|
{
|
||||||
|
if (!ast)
|
||||||
|
{
|
||||||
|
fprintf(fp, "{}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fprintf(fp, "{");
|
||||||
|
if (ast->nodes.size == 0)
|
||||||
|
{
|
||||||
|
fprintf(fp, "}\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
for (u64 i = 0; i < ast->nodes.size / sizeof(ast_node_t); ++i)
|
||||||
|
{
|
||||||
|
ast_node_t item = VEC_GET(&ast->nodes, i, ast_node_t);
|
||||||
|
fprintf(fp, "\t[%lu]: ", i);
|
||||||
|
ast_node_print(fp, &item);
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
}
|
||||||
|
fprintf(fp, "}");
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_free(ast_t *ast)
|
||||||
|
{
|
||||||
|
// we can free the vector itself and we're done
|
||||||
|
vec_free(&ast->nodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the MIT License,
|
||||||
|
* which you should have received a copy of along with this program. If not,
|
||||||
|
* please go to <https://opensource.org/license/MIT>.
|
||||||
|
|
||||||
|
*/
|
||||||
59
src/arl/parser/ast.h
Normal file
59
src/arl/parser/ast.h
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
/* ast.h: General definition of the AST and nodes within it.
|
||||||
|
* Created: 2026-01-22
|
||||||
|
* Author: Aryadev Chavali
|
||||||
|
* License: See end of file
|
||||||
|
* Commentary:
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AST_H
|
||||||
|
#define AST_H
|
||||||
|
|
||||||
|
#include <arl/lib/base.h>
|
||||||
|
#include <arl/lib/sv.h>
|
||||||
|
#include <arl/lib/vec.h>
|
||||||
|
|
||||||
|
/// Types the AST can encode
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
AST_NODE_TYPE_SYMBOL = 0,
|
||||||
|
AST_NODE_TYPE_STRING,
|
||||||
|
} ast_node_type_t;
|
||||||
|
|
||||||
|
/// Node of the AST as a tagged union
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
u64 byte_location;
|
||||||
|
ast_node_type_t type;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
sv_t as_string;
|
||||||
|
sv_t as_symbol;
|
||||||
|
} value;
|
||||||
|
} ast_node_t;
|
||||||
|
|
||||||
|
ast_node_t ast_node_string(u64 byte, sv_t string);
|
||||||
|
ast_node_t ast_node_symbol(u64 byte, sv_t symbol);
|
||||||
|
void ast_node_print(FILE *fp, ast_node_t *obj);
|
||||||
|
|
||||||
|
/// The AST as a flat collection of nodes
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
vec_t nodes;
|
||||||
|
} ast_t;
|
||||||
|
|
||||||
|
void ast_free(ast_t *ast);
|
||||||
|
void ast_print(FILE *fp, ast_t *ast);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the MIT License,
|
||||||
|
* which you should have received a copy of along with this program. If not,
|
||||||
|
* please go to <https://opensource.org/license/MIT>.
|
||||||
|
|
||||||
|
*/
|
||||||
165
src/arl/parser/parser.c
Normal file
165
src/arl/parser/parser.c
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
/* parser.c: Implementation of parser.
|
||||||
|
* Created: 2026-01-22
|
||||||
|
* Author: Aryadev Chavali
|
||||||
|
* License: See end of file
|
||||||
|
* Commentary: See parser.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "arl/lib/sv.h"
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <arl/parser/parser.h>
|
||||||
|
|
||||||
|
/// Expected characters in a symbol
|
||||||
|
static const char *SYMBOL_CHARS =
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&'()*+,-./"
|
||||||
|
":;<=>?@\\^_`{|}~0123456789";
|
||||||
|
|
||||||
|
const char *parse_err_to_string(parse_err_t err)
|
||||||
|
{
|
||||||
|
switch (err)
|
||||||
|
{
|
||||||
|
case PARSE_ERR_OK:
|
||||||
|
return "OK";
|
||||||
|
case PARSE_ERR_EXPECTED_SPEECH_MARKS:
|
||||||
|
return "EXPECTED_SPEECH_MARKS";
|
||||||
|
case PARSE_ERR_UNKNOWN_CHAR:
|
||||||
|
return "UNKNOWN_CHAR";
|
||||||
|
default:
|
||||||
|
FAIL("Unexpected value for parse_err_t: %d\n", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prototypes for streams
|
||||||
|
bool stream_eos(parse_stream_t *stream);
|
||||||
|
char stream_peek(parse_stream_t *stream);
|
||||||
|
void stream_advance(parse_stream_t *stream, u64 size);
|
||||||
|
u64 stream_size(parse_stream_t *stream);
|
||||||
|
|
||||||
|
void parse_stream_get_line_col(parse_stream_t *stream, u64 *line, u64 *col)
|
||||||
|
{
|
||||||
|
assert(stream && line && col && "Expected valid pointers.");
|
||||||
|
for (u64 i = 0; i < stream->byte; ++i)
|
||||||
|
{
|
||||||
|
char c = stream->contents.data[i];
|
||||||
|
if (c == '\n')
|
||||||
|
{
|
||||||
|
*line += 1;
|
||||||
|
*col = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*col += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prototypes for parsing subroutines
|
||||||
|
parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret);
|
||||||
|
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret);
|
||||||
|
|
||||||
|
parse_err_t parse(ast_t *out, parse_stream_t *stream)
|
||||||
|
{
|
||||||
|
assert(out && stream && "Expected valid pointers");
|
||||||
|
while (!stream_eos(stream))
|
||||||
|
{
|
||||||
|
char cur = stream_peek(stream);
|
||||||
|
if (isspace(cur))
|
||||||
|
{
|
||||||
|
while (isspace(cur) && !stream_eos(stream))
|
||||||
|
{
|
||||||
|
stream_advance(stream, 1);
|
||||||
|
cur = stream_peek(stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (cur == '"')
|
||||||
|
{
|
||||||
|
// we make a copy for parse_string to mess with
|
||||||
|
ast_node_t ret = {0};
|
||||||
|
parse_err_t perr = parse_string(stream, &ret);
|
||||||
|
if (perr)
|
||||||
|
return perr;
|
||||||
|
vec_append(&out->nodes, &ret, sizeof(ret));
|
||||||
|
}
|
||||||
|
else if (strchr(SYMBOL_CHARS, cur) && !isdigit(cur))
|
||||||
|
{
|
||||||
|
// we make a copy for parse_symbol to mess with
|
||||||
|
ast_node_t ret = {0};
|
||||||
|
parse_err_t perr = parse_symbol(stream, &ret);
|
||||||
|
if (perr)
|
||||||
|
return perr;
|
||||||
|
|
||||||
|
vec_append(&out->nodes, &ret, sizeof(ret));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return PARSE_ERR_UNKNOWN_CHAR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return PARSE_ERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
|
||||||
|
{
|
||||||
|
// Increment the cursor just past the first speechmark
|
||||||
|
stream_advance(stream, 1);
|
||||||
|
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
|
||||||
|
u64 string_size = sv_till(current_contents, "\"");
|
||||||
|
if (string_size + stream->byte == stream_size(stream))
|
||||||
|
return PARSE_ERR_EXPECTED_SPEECH_MARKS;
|
||||||
|
// Bounds of string are well defined, generate an object and advance the
|
||||||
|
// stream
|
||||||
|
*ret =
|
||||||
|
ast_node_string(stream->byte - 1, SV(current_contents.data, string_size));
|
||||||
|
stream_advance(stream, string_size + 1);
|
||||||
|
return PARSE_ERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
|
||||||
|
{
|
||||||
|
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
|
||||||
|
u64 symbol_size = sv_while(current_contents, SYMBOL_CHARS);
|
||||||
|
// Generate symbol
|
||||||
|
*ret = ast_node_symbol(stream->byte, SV(current_contents.data, symbol_size));
|
||||||
|
stream_advance(stream, symbol_size);
|
||||||
|
return PARSE_ERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool stream_eos(parse_stream_t *stream)
|
||||||
|
{
|
||||||
|
return stream->byte >= stream->contents.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
char stream_peek(parse_stream_t *stream)
|
||||||
|
{
|
||||||
|
if (stream_eos(stream))
|
||||||
|
return '\0';
|
||||||
|
else
|
||||||
|
return stream->contents.data[stream->byte];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stream_advance(parse_stream_t *stream, u64 size)
|
||||||
|
{
|
||||||
|
if (stream->byte + size >= stream->contents.size)
|
||||||
|
stream->byte = stream->contents.size;
|
||||||
|
else
|
||||||
|
stream->byte += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 stream_size(parse_stream_t *stream)
|
||||||
|
{
|
||||||
|
return stream->contents.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the MIT License,
|
||||||
|
* which you should have received a copy of along with this program. If not,
|
||||||
|
* please go to <https://opensource.org/license/MIT>.
|
||||||
|
|
||||||
|
*/
|
||||||
@@ -1,38 +1,38 @@
|
|||||||
/* lexer.h: Lexer which takes character buffers and yields a sequence of tokens.
|
/* parser.h: Parser which takes character buffers and yields an AST
|
||||||
* Created: 2026-01-22
|
* Created: 2026-01-22
|
||||||
* Author: Aryadev Chavali
|
* Author: Aryadev Chavali
|
||||||
* License: See end of file
|
* License: See end of file
|
||||||
* Commentary:
|
* Commentary:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LEXER_H
|
#ifndef PARSER_H
|
||||||
#define LEXER_H
|
#define PARSER_H
|
||||||
|
|
||||||
#include <arl/lexer/token.h>
|
#include <arl/parser/ast.h>
|
||||||
|
|
||||||
/// Token streams, utilised when lexing.
|
/// Parser streams, utilised when generating an AST.
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
u64 byte;
|
u64 byte;
|
||||||
sv_t contents;
|
sv_t contents;
|
||||||
} lex_stream_t;
|
} parse_stream_t;
|
||||||
|
|
||||||
/// Types of errors that may occur during lexing
|
/// Types of errors that may occur during parsing
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
LEX_ERR_OK = 0,
|
PARSE_ERR_OK = 0,
|
||||||
LEX_ERR_EXPECTED_SPEECH_MARKS,
|
PARSE_ERR_EXPECTED_SPEECH_MARKS,
|
||||||
LEX_ERR_UNKNOWN_CHAR,
|
PARSE_ERR_UNKNOWN_CHAR,
|
||||||
} lex_err_t;
|
} parse_err_t;
|
||||||
const char *lex_err_to_string(lex_err_t err);
|
const char *parse_err_to_string(parse_err_t err);
|
||||||
|
|
||||||
// Generates a token stream from a lex_stream_t, storing it in OUT. Returns any
|
// Generates an AST from STREAM, storing it in OUT. Returns any errors it may
|
||||||
// errors it may generate.
|
// generate.
|
||||||
lex_err_t lex_stream(token_stream_t *out, lex_stream_t *stream);
|
parse_err_t parse(ast_t *out, parse_stream_t *stream);
|
||||||
|
|
||||||
// Computes the line and column that STREAM is currently pointing at in its
|
// Computes the line and column that STREAM is currently pointing at in its
|
||||||
// buffer, storing it in LINE and COL.
|
// buffer, storing it in LINE and COL.
|
||||||
void lex_stream_get_line_col(lex_stream_t *stream, u64 *line, u64 *col);
|
void parse_stream_get_line_col(parse_stream_t *stream, u64 *line, u64 *col);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
83
src/cli.c
83
src/cli.c
@@ -1,83 +0,0 @@
|
|||||||
/* cli.c:
|
|
||||||
* Created: 2026-01-29
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary: See /include/arl/cli.h
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include <arl/cli.h>
|
|
||||||
#include <arl/lib/vec.h>
|
|
||||||
|
|
||||||
int read_file(const char *filename, sv_t *ret)
|
|
||||||
{
|
|
||||||
// NOTE: Stupidly simple. Presumes the file is NOT three pipes in a trench
|
|
||||||
// coat.
|
|
||||||
FILE *fp = fopen(filename, "rb");
|
|
||||||
if (!fp)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
fseek(fp, 0, SEEK_END);
|
|
||||||
ret->size = ftell(fp);
|
|
||||||
fseek(fp, 0, SEEK_SET);
|
|
||||||
ret->data = calloc(1, ret->size + 1);
|
|
||||||
fread(ret->data, ret->size, 1, fp);
|
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
ret->data[ret->size] = '\0';
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int read_pipe(FILE *pipe, sv_t *ret)
|
|
||||||
{
|
|
||||||
// NOTE: We can't read an entire pipe at once like we did for read_file. So
|
|
||||||
// let's read in buffered chunks, with a vector to keep them contiguous.
|
|
||||||
vec_t contents = {0};
|
|
||||||
char buffer[1024];
|
|
||||||
while (!feof(pipe))
|
|
||||||
{
|
|
||||||
size_t bytes_read = fread(buffer, 1, sizeof(buffer), pipe);
|
|
||||||
vec_append(&contents, buffer, bytes_read);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret->size = contents.size;
|
|
||||||
// Get that null terminator in, but only after we've recorded the actual size
|
|
||||||
// of what's been read.
|
|
||||||
vec_append_byte(&contents, '\0');
|
|
||||||
|
|
||||||
if (contents.not_inlined)
|
|
||||||
{
|
|
||||||
// Take the heap pointer from us.
|
|
||||||
ret->data = vec_data(&contents);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// vec_data(&contents) is stack allocated; can't carry that out of this
|
|
||||||
// function!
|
|
||||||
ret->data = calloc(1, contents.size);
|
|
||||||
memcpy(ret->data, vec_data(&contents), contents.size);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void usage(FILE *fp)
|
|
||||||
{
|
|
||||||
fprintf(fp, "Usage: arl [FILE]\n"
|
|
||||||
"Compiles [FILE] as ARL source code.\n"
|
|
||||||
" [FILE]: File to compile.\n"
|
|
||||||
"If FILE is \"--\", then read from stdin.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
@@ -1,184 +0,0 @@
|
|||||||
/* lexr.c: Implementation of lexr.
|
|
||||||
* Created: 2026-01-22
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary: See /include/arl/lexer/lexer.h
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include <arl/lexer/lexer.h>
|
|
||||||
#include <arl/lexer/token.h>
|
|
||||||
#include <arl/lib/sv.h>
|
|
||||||
|
|
||||||
/// Expected characters in a symbol
|
|
||||||
static const char *SYMBOL_CHARS =
|
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&'()*+,-./"
|
|
||||||
":;<=>?@\\^_`{|}~0123456789";
|
|
||||||
|
|
||||||
const char *lex_err_to_string(lex_err_t err)
|
|
||||||
{
|
|
||||||
switch (err)
|
|
||||||
{
|
|
||||||
case LEX_ERR_OK:
|
|
||||||
return "OK";
|
|
||||||
case LEX_ERR_EXPECTED_SPEECH_MARKS:
|
|
||||||
return "EXPECTED_SPEECH_MARKS";
|
|
||||||
case LEX_ERR_UNKNOWN_CHAR:
|
|
||||||
return "UNKNOWN_CHAR";
|
|
||||||
default:
|
|
||||||
FAIL("Unexpected lex_err_t value: %d\n", err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Prototypes for streams
|
|
||||||
bool stream_eos(lex_stream_t *stream);
|
|
||||||
char stream_peek(lex_stream_t *stream);
|
|
||||||
void stream_advance(lex_stream_t *stream, u64 size);
|
|
||||||
u64 stream_size(lex_stream_t *stream);
|
|
||||||
|
|
||||||
void lex_stream_get_line_col(lex_stream_t *stream, u64 *line, u64 *col)
|
|
||||||
{
|
|
||||||
assert(stream && line && col && "Expected valid pointers.");
|
|
||||||
for (u64 i = 0; i < stream->byte; ++i)
|
|
||||||
{
|
|
||||||
char c = stream->contents.data[i];
|
|
||||||
if (c == '\n')
|
|
||||||
{
|
|
||||||
*line += 1;
|
|
||||||
*col = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*col += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Prototypes for lexing subroutines
|
|
||||||
lex_err_t lex_string(lex_stream_t *stream, token_t *ret);
|
|
||||||
lex_err_t lex_symbol(lex_stream_t *stream, token_t *ret);
|
|
||||||
|
|
||||||
lex_err_t lex_stream(token_stream_t *out, lex_stream_t *stream)
|
|
||||||
{
|
|
||||||
assert(out && stream && "Expected valid pointers");
|
|
||||||
while (!stream_eos(stream))
|
|
||||||
{
|
|
||||||
char cur = stream_peek(stream);
|
|
||||||
if (isspace(cur))
|
|
||||||
{
|
|
||||||
while (isspace(cur) && !stream_eos(stream))
|
|
||||||
{
|
|
||||||
stream_advance(stream, 1);
|
|
||||||
cur = stream_peek(stream);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (cur == '"')
|
|
||||||
{
|
|
||||||
// we make a copy for lex_string to mess with
|
|
||||||
token_t ret = {0};
|
|
||||||
lex_err_t perr = lex_string(stream, &ret);
|
|
||||||
if (perr)
|
|
||||||
return perr;
|
|
||||||
vec_append(&out->vec, &ret, sizeof(ret));
|
|
||||||
}
|
|
||||||
else if (strchr(SYMBOL_CHARS, cur) && !isdigit(cur))
|
|
||||||
{
|
|
||||||
// we make a copy for lex_symbol to mess with
|
|
||||||
token_t ret = {0};
|
|
||||||
lex_err_t perr = lex_symbol(stream, &ret);
|
|
||||||
if (perr)
|
|
||||||
return perr;
|
|
||||||
|
|
||||||
vec_append(&out->vec, &ret, sizeof(ret));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return LEX_ERR_UNKNOWN_CHAR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return LEX_ERR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
lex_err_t lex_string(lex_stream_t *stream, token_t *ret)
|
|
||||||
{
|
|
||||||
// Increment the cursor just past the first speechmark
|
|
||||||
stream_advance(stream, 1);
|
|
||||||
sv_t string = sv_chop_left(stream->contents, stream->byte);
|
|
||||||
string.size = sv_till(string, "\"");
|
|
||||||
|
|
||||||
// If we're at the edge of the stream, there must not have been any
|
|
||||||
// speechmarks.
|
|
||||||
if (string.size + stream->byte == stream_size(stream))
|
|
||||||
return LEX_ERR_EXPECTED_SPEECH_MARKS;
|
|
||||||
|
|
||||||
// `string` is well defined, package and throw it back.
|
|
||||||
*ret = token_string(stream->byte - 1, string);
|
|
||||||
stream_advance(stream, string.size + 1);
|
|
||||||
return LEX_ERR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
lex_err_t lex_symbol(lex_stream_t *stream, token_t *ret)
|
|
||||||
{
|
|
||||||
sv_t symbol = sv_chop_left(stream->contents, stream->byte);
|
|
||||||
symbol.size = sv_while(symbol, SYMBOL_CHARS);
|
|
||||||
|
|
||||||
// see if symbol is one of the already known symbols
|
|
||||||
static_assert(NUM_TOKEN_KNOWNS == 1, "Expected number of TOKEN_KNOWNs");
|
|
||||||
for (token_known_t i = 0; i < NUM_TOKEN_KNOWNS; ++i)
|
|
||||||
{
|
|
||||||
const char *possible_known = token_known_to_cstr(i);
|
|
||||||
if (strlen(possible_known) == symbol.size &&
|
|
||||||
strncmp(possible_known, symbol.data, symbol.size) == 0)
|
|
||||||
{
|
|
||||||
// Found a matching known symbol
|
|
||||||
*ret = token_known(stream->byte, i);
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// otherwise, it must be a fresh symbol i.e. user defined
|
|
||||||
*ret = token_symbol(stream->byte, symbol);
|
|
||||||
end:
|
|
||||||
stream_advance(stream, symbol.size);
|
|
||||||
return LEX_ERR_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool stream_eos(lex_stream_t *stream)
|
|
||||||
{
|
|
||||||
return stream->byte >= stream->contents.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
char stream_peek(lex_stream_t *stream)
|
|
||||||
{
|
|
||||||
if (stream_eos(stream))
|
|
||||||
return '\0';
|
|
||||||
else
|
|
||||||
return stream->contents.data[stream->byte];
|
|
||||||
}
|
|
||||||
|
|
||||||
void stream_advance(lex_stream_t *stream, u64 size)
|
|
||||||
{
|
|
||||||
if (stream->byte + size >= stream->contents.size)
|
|
||||||
stream->byte = stream->contents.size;
|
|
||||||
else
|
|
||||||
stream->byte += size;
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 stream_size(lex_stream_t *stream)
|
|
||||||
{
|
|
||||||
return stream->contents.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
@@ -1,115 +0,0 @@
|
|||||||
/* token.c: Implementation of TOKEN constructor/destructor functions
|
|
||||||
* Created: 2026-01-22
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary: See /include/arl/lexer/token.h.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <arl/lexer/token.h>
|
|
||||||
#include <arl/lib/base.h>
|
|
||||||
#include <arl/lib/vec.h>
|
|
||||||
|
|
||||||
const char *token_known_to_cstr(token_known_t known)
|
|
||||||
{
|
|
||||||
switch (known)
|
|
||||||
{
|
|
||||||
case TOKEN_KNOWN_PUTS:
|
|
||||||
return "puts";
|
|
||||||
default:
|
|
||||||
FAIL("Unexpected TOKEN_KNOWN value: %d\n", known);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
token_t token_known(u64 byte, token_known_t known)
|
|
||||||
{
|
|
||||||
return (token_t){
|
|
||||||
.byte_location = byte,
|
|
||||||
.type = TOKEN_TYPE_KNOWN,
|
|
||||||
.as_known = known,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
token_t token_string(u64 byte, sv_t string)
|
|
||||||
{
|
|
||||||
return (token_t){
|
|
||||||
.byte_location = byte,
|
|
||||||
.type = TOKEN_TYPE_STRING,
|
|
||||||
.as_string = string,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
token_t token_symbol(u64 byte, sv_t symbol)
|
|
||||||
{
|
|
||||||
return (token_t){
|
|
||||||
.byte_location = byte,
|
|
||||||
.type = TOKEN_TYPE_SYMBOL,
|
|
||||||
.as_symbol = symbol,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void token_print(FILE *fp, token_t *token)
|
|
||||||
{
|
|
||||||
if (!token)
|
|
||||||
{
|
|
||||||
fprintf(fp, "NIL");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
switch (token->type)
|
|
||||||
{
|
|
||||||
case TOKEN_TYPE_KNOWN:
|
|
||||||
fprintf(fp, "KNOWN(%s)", token_known_to_cstr(token->as_known));
|
|
||||||
break;
|
|
||||||
case TOKEN_TYPE_SYMBOL:
|
|
||||||
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(token->as_symbol));
|
|
||||||
break;
|
|
||||||
case TOKEN_TYPE_STRING:
|
|
||||||
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(token->as_string));
|
|
||||||
break;
|
|
||||||
case NUM_TOKEN_TYPES:
|
|
||||||
default:
|
|
||||||
FAIL("Unexpected token type: %d\n", token->type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void token_stream_print(FILE *fp, token_stream_t *token)
|
|
||||||
{
|
|
||||||
if (!token)
|
|
||||||
{
|
|
||||||
fprintf(fp, "{}");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
fprintf(fp, "{");
|
|
||||||
if (token->vec.size == 0)
|
|
||||||
{
|
|
||||||
fprintf(fp, "}\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(fp, "\n");
|
|
||||||
for (u64 i = 0; i < token->vec.size / sizeof(token_t); ++i)
|
|
||||||
{
|
|
||||||
token_t item = VEC_GET(&token->vec, i, token_t);
|
|
||||||
fprintf(fp, "\t[%lu]: ", i);
|
|
||||||
token_print(fp, &item);
|
|
||||||
fprintf(fp, "\n");
|
|
||||||
}
|
|
||||||
fprintf(fp, "}");
|
|
||||||
}
|
|
||||||
|
|
||||||
void token_stream_free(token_stream_t *stream)
|
|
||||||
{
|
|
||||||
// we can free the vector itself and we're done
|
|
||||||
vec_free(&stream->vec);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
96
src/main.c
96
src/main.c
@@ -1,96 +0,0 @@
|
|||||||
/* main.c:
|
|
||||||
* Created: 2026-01-22
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary:
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include <arl/lexer/lexer.h>
|
|
||||||
#include <arl/lexer/token.h>
|
|
||||||
#include <arl/lib/base.h>
|
|
||||||
#include <arl/lib/sv.h>
|
|
||||||
#include <arl/lib/vec.h>
|
|
||||||
|
|
||||||
#include <arl/cli.h>
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
char *filename = "";
|
|
||||||
if (argc == 1)
|
|
||||||
{
|
|
||||||
usage(stderr);
|
|
||||||
ret = 1;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
filename = argv[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
int read_err = 0;
|
|
||||||
sv_t contents = {0};
|
|
||||||
if (strcmp(filename, "--") == 0)
|
|
||||||
{
|
|
||||||
filename = "stdin";
|
|
||||||
read_err = read_pipe(stdin, &contents);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
read_err = read_file(filename, &contents);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (read_err)
|
|
||||||
{
|
|
||||||
LOG_ERR("ERROR: Reading `%s`: ", filename);
|
|
||||||
perror("");
|
|
||||||
ret = 1;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG("%s => `" PR_SV "`\n", filename, SV_FMT(contents));
|
|
||||||
|
|
||||||
lex_stream_t stream = {.byte = 0, .contents = contents};
|
|
||||||
token_stream_t tokens = {0};
|
|
||||||
lex_err_t perr = lex_stream(&tokens, &stream);
|
|
||||||
if (perr)
|
|
||||||
{
|
|
||||||
u64 line = 1, col = 0;
|
|
||||||
lex_stream_get_line_col(&stream, &line, &col);
|
|
||||||
|
|
||||||
LOG_ERR("%s:%lu:%lu: %s\n", filename, line, col, lex_err_to_string(perr));
|
|
||||||
ret = 1;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if VERBOSE_LOGS
|
|
||||||
LOG("Lexed %lu tokens ", tokens.vec.size / sizeof(token_t));
|
|
||||||
token_stream_print(stdout, &tokens);
|
|
||||||
printf("\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
end:
|
|
||||||
if (contents.data)
|
|
||||||
free(contents.data);
|
|
||||||
token_stream_free(&tokens);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
Reference in New Issue
Block a user