Compare commits
8 Commits
6f6b747540
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00786d9cb9 | ||
|
|
b391fe9a74 | ||
|
|
12e82e64d0 | ||
|
|
9b55b9ec32 | ||
|
|
6545bd1302 | ||
|
|
c11b69092d | ||
|
|
82b96e23d5 | ||
|
|
0fdfbef1de |
@@ -1,6 +1,6 @@
|
|||||||
;;; Directory Local Variables -*- no-byte-compile: t -*-
|
;;; Directory Local Variables -*- no-byte-compile: t -*-
|
||||||
;;; For more information see (info "(emacs) Directory Variables")
|
;;; For more information see (info "(emacs) Directory Variables")
|
||||||
|
|
||||||
((nil . ((compile-command . "make MODE=debug -k")
|
((nil . ((compile-command . "make -k MODE=debug examples")
|
||||||
(+license/license-choice . "MIT License")))
|
(+license/license-choice . "MIT License")))
|
||||||
(c-mode . ((mode . clang-format))))
|
(c-mode . ((mode . clang-format))))
|
||||||
|
|||||||
6
Makefile
6
Makefile
@@ -39,7 +39,7 @@ clangd: compile_commands.json
|
|||||||
compile_commands.json: Makefile
|
compile_commands.json: Makefile
|
||||||
bear -- $(MAKE) -B MODE=debug
|
bear -- $(MAKE) -B MODE=debug
|
||||||
|
|
||||||
.PHONY: run clean
|
.PHONY: run clean examples
|
||||||
ARGS=
|
ARGS=
|
||||||
run: $(OUT)
|
run: $(OUT)
|
||||||
./$^ $(ARGS)
|
./$^ $(ARGS)
|
||||||
@@ -47,5 +47,9 @@ run: $(OUT)
|
|||||||
clean:
|
clean:
|
||||||
rm -rf $(DIST)
|
rm -rf $(DIST)
|
||||||
|
|
||||||
|
examples: $(OUT)
|
||||||
|
@echo "Example: Hello World"
|
||||||
|
./$^ examples/hello-world.arl
|
||||||
|
|
||||||
DEPS:=$(patsubst %,$(DEPDIR)/%.d, $(UNITS))
|
DEPS:=$(patsubst %,$(DEPDIR)/%.d, $(UNITS))
|
||||||
include $(wildcard $(DEPS))
|
include $(wildcard $(DEPS))
|
||||||
|
|||||||
14
README
14
README
@@ -6,13 +6,12 @@
|
|||||||
│ /_/ \_\_| \_\_____| │
|
│ /_/ \_\_| \_\_____| │
|
||||||
└───────────────────────┘
|
└───────────────────────┘
|
||||||
|
|
||||||
Similar to Forth. Compiles to C.
|
Similar to Forth.
|
||||||
Native speed with simple semantics.
|
|
||||||
|
|
||||||
-----
|
-----
|
||||||
Goals
|
Goals
|
||||||
-----
|
-----
|
||||||
- Complete operational transpiler to C
|
- Complete operational transpiler, with C as a provisional working target
|
||||||
- Ability to reuse compiled code (as object code) in top level ARL code.
|
- Ability to reuse compiled code (as object code) in top level ARL code.
|
||||||
- Static type system with informative errors
|
- Static type system with informative errors
|
||||||
|
|
||||||
@@ -44,3 +43,12 @@ $ make DIST=<folder>
|
|||||||
|
|
||||||
Similarly, the general flags used in the C compiler may be set via the CFLAGS
|
Similarly, the general flags used in the C compiler may be set via the CFLAGS
|
||||||
variable, with linking arguments set via the LDFLAGS variable.
|
variable, with linking arguments set via the LDFLAGS variable.
|
||||||
|
|
||||||
|
------------------
|
||||||
|
Usage instructions
|
||||||
|
------------------
|
||||||
|
Once built, simply use the built binary like so:
|
||||||
|
$ ./build/arl.out <filename>
|
||||||
|
|
||||||
|
Alternatively, you can run the examples automatically via the Makefile:
|
||||||
|
$ make examples
|
||||||
195
arl.org
195
arl.org
@@ -1,161 +1,64 @@
|
|||||||
#+title: ARL - Issue tracker
|
#+title: ARL - Issue tracker
|
||||||
#+date: 2026-01-23
|
#+date: 2026-01-23
|
||||||
|
#+filetags: arl
|
||||||
|
|
||||||
* TODO Write a minimum working transpiler
|
* TODO Write a minimum working transpiler
|
||||||
We need to be able to compile the following file:
|
We need to be able to compile the following file:
|
||||||
[[file:examples/hello-world.arl]]. All it does is print "Hello,
|
[[file:examples/hello-world.arl]]. All it does is print "Hello,
|
||||||
world!". Should be relatively straightforward.
|
world!". Should be relatively straightforward.
|
||||||
|
** Stages
|
||||||
|
We need the following stages in our MVP transpiler:
|
||||||
|
- Source code reading (read bytes from a file)
|
||||||
|
- Parse raw bytes into tokens (Lexer)
|
||||||
|
- Interpret tokens into a classical AST (Parser)
|
||||||
|
- Stack effect and type analysis of the AST for soundness
|
||||||
|
- Translate AST into C code (Codegen)
|
||||||
|
- Compile C code into native executable (Target)
|
||||||
|
|
||||||
|
It's a Eulerian Path from the source code to the native executable.
|
||||||
** DONE Read file
|
** DONE Read file
|
||||||
** DONE Parser
|
** DONE Lexer
|
||||||
** TODO Intermediate representation (Virtual Machine)
|
[[file:src/lexer/]]
|
||||||
[[file:src/arl/vm/]]
|
[[file:include/arl/lexer/]]
|
||||||
|
** WIP Parser
|
||||||
|
[[file:src/parser/]]
|
||||||
|
[[file:include/arl/parser/]]
|
||||||
|
|
||||||
Before we get into generating C code and then compiling it, it might
|
We need to generate some form of AST from the token stream. This
|
||||||
be worth translating the parsed ARL code into a generic IR.
|
should be a little more advanced than our initial stream,
|
||||||
|
distinguishing between
|
||||||
|
- Literal values
|
||||||
|
- Primitive calls
|
||||||
|
- References to otherwise undefined words (may be defined through
|
||||||
|
import or later on)
|
||||||
|
** TODO Stack effect/type analysis
|
||||||
|
[[file:src/analysis/]]
|
||||||
|
[[file:include/arl/analysis/]]
|
||||||
|
|
||||||
The IR should be primitive in its semantics but should still
|
Given the AST, we need to verify the soundness of it with regards to
|
||||||
encapsulate the intention behind the original ARL code. This should
|
types and the stack. We have this idea of "stack effects" attached to
|
||||||
allow us to find a set of minimum requirements for target compilation:
|
every node in the AST; literals push values onto the stack and pop
|
||||||
- what can we reasonably use from the target platform to satisfy
|
nothing, while operations may pop some operands and push some values.
|
||||||
supporting the primitive IR?
|
|
||||||
- what do we need to hand-roll on the target in order to make this
|
|
||||||
work?
|
|
||||||
|
|
||||||
Essentially, we want to write a virtual machine, and translate ARL
|
We need a way to:
|
||||||
code into bytecode for that VM. Goals:
|
- Codify the stack effects of each type of AST node
|
||||||
- Type checking
|
- Infer the total stack effect from a sequence of nodes
|
||||||
- Optimiser (stretch)
|
|
||||||
|
|
||||||
We need the following clear items in our IR:
|
These stack effects work in tandem with our type analysis. Stack
|
||||||
- Static type values
|
shape analysis tells us what operands are being fed into primitives,
|
||||||
- Static type variables (possible DeBrujin numbering or other such
|
while the type analysis will tell us if the operands are well formed
|
||||||
mechanism to abstract naming away and leave it to the target to
|
for the primitives.
|
||||||
generate effectively)
|
|
||||||
- Strongly typed primitive operators (numeric, strings, I/O) with
|
|
||||||
packed arguments
|
|
||||||
|
|
||||||
We should have a rough grouping between AST objects and this IR. As
|
|
||||||
ARL is Forth-like, we can use the stack semantics to generate this IR
|
|
||||||
as we walk the AST in a linear manner. In practice this should almost
|
|
||||||
look like emulating a really small subset of the ARL language itself
|
|
||||||
and executing the program in that small subset.
|
|
||||||
|
|
||||||
Looking at how
|
|
||||||
[[https://en.wikipedia.org/wiki/Three-address_code][TAC]] works, I
|
|
||||||
think it may be a good idea to do something like that for our IR.
|
|
||||||
Essentially we should our AST into a sequence of really simple
|
|
||||||
bindings, with the final expression being a reference to some binding.
|
|
||||||
|
|
||||||
This also simplifies type checking to just verifying each little
|
|
||||||
binding and operation.
|
|
||||||
|
|
||||||
*** Examples
|
|
||||||
**** Basic example
|
|
||||||
Consider the following ARL code:
|
|
||||||
#+begin_src text
|
|
||||||
34 35 +
|
|
||||||
#+end_src
|
|
||||||
|
|
||||||
When we walk through the above code:
|
|
||||||
- 34 (an integer) is pushed onto the stack
|
|
||||||
- 35 (an integer) is pushed onto the stack
|
|
||||||
- ~+~ primitive is encountered
|
|
||||||
- Type check the top two values of the stack; they should be
|
|
||||||
integral.
|
|
||||||
- ~a b +~ should correspond to ~a + b~ so the IR expression should
|
|
||||||
pack the arguments in that order: ~prim-add(34,35)~.
|
|
||||||
- Bind the generated IR expression to some unique name, say ~v1~.
|
|
||||||
- Ensure this works with type checking; looking up ~v1~'s type
|
|
||||||
should give you the output type of the "+" operator (integer).
|
|
||||||
- Push ~v1~ onto the stack.
|
|
||||||
|
|
||||||
The final state of the stack should be something like ~[v1]~ where
|
|
||||||
~v1=prim-add(34,35)~. The final state of the stack, along with the
|
|
||||||
bindings we form, is the IR, to pass over to the later stages of the
|
|
||||||
compiler.
|
|
||||||
**** Slightly more complex example
|
|
||||||
Let's look at a slightly more complex program:
|
|
||||||
#+begin_src text
|
|
||||||
34 35 + 70 swap -
|
|
||||||
#+end_src
|
|
||||||
- 34 (integer) pushed
|
|
||||||
- 35 (integer) pushed
|
|
||||||
- ~+~ primitive:
|
|
||||||
- As stated previously, the final state of this primitive gives us
|
|
||||||
the name ~v1~ on the stack with the association
|
|
||||||
~v1=prim-add(34,35)~.
|
|
||||||
- 70 (integer) pushed
|
|
||||||
- ~swap~ primitive:
|
|
||||||
- Requires two values on the stack, but we care little about their
|
|
||||||
types. Just swaps their order on the stack.
|
|
||||||
- We /could/ introduce generics here to make the input/output
|
|
||||||
relation ship explicit (forall T, U swap:-(-> (T U) (U T))), but
|
|
||||||
at the same time we can just as easily get away with a type hole
|
|
||||||
(essentially some kind of ~any~). Up to debate.
|
|
||||||
- We do not generate IR for this primitive as it simply isn't
|
|
||||||
necessary. Instead we perform the swap on our IR stack and
|
|
||||||
continue. The ~swap~ primitive is "transparent" in the final IR.
|
|
||||||
- In this situation, the stack goes from ~[v1, 70]~ to
|
|
||||||
~[70, v1]~
|
|
||||||
- ~-~ primitive:
|
|
||||||
- Type checks the top two values of the stack (which are both
|
|
||||||
integers)
|
|
||||||
- ~a b -~ should correspond to ~a - b~, thus the corresponding IR
|
|
||||||
expression should be ~prim-sub(70,v1)~
|
|
||||||
- Associate IR expression with name ~v2~,
|
|
||||||
- Push ~v2~ onto the stack.
|
|
||||||
|
|
||||||
The final state of the IR should be:
|
|
||||||
- Stack: ~[v2]~
|
|
||||||
- Bindings:
|
|
||||||
- ~v1=prim-add(34,35)~
|
|
||||||
- ~v2=prim-sub(70,v1)~
|
|
||||||
|
|
||||||
Notice how some primitives generate IR, while others manipulate IR
|
|
||||||
themselves? They almost seem like macros!
|
|
||||||
|
|
||||||
Another thing of note is how the final state of the stack is a single
|
|
||||||
item in this case; an IR expression representing the entire program.
|
|
||||||
When we introduce code level bindings we won't have such nice outputs,
|
|
||||||
but it is certainly something to consider.
|
|
||||||
**** Hello world! example
|
|
||||||
For our hello world:
|
|
||||||
#+begin_src text
|
|
||||||
"Hello, world!\n" putstr
|
|
||||||
#+end_src
|
|
||||||
- "Hello, world!\n" (string) pushed
|
|
||||||
- "putstr" primitive:
|
|
||||||
- Type check the top of the stack (should be a string)
|
|
||||||
- Generate IR ~prim-putstr("Hello, world!\n")~
|
|
||||||
- Associate with name ~v1~ and push it onto the stack
|
|
||||||
|
|
||||||
Much simpler than our
|
|
||||||
*** TODO IR level type checking
|
|
||||||
During IR compilation, the following should be type checked:
|
|
||||||
- use of callables (primitives, user defined when implemented)
|
|
||||||
- variable assignment (when implemented)
|
|
||||||
- variable use (when implemented)
|
|
||||||
- definition of callables (when implemented)
|
|
||||||
|
|
||||||
We want to ensure no statement is unsound.
|
|
||||||
**** TODO Primitive types
|
|
||||||
Define the primitive types of the IR. Remember, simplicity is key,
|
|
||||||
but we need to mirror what we're getting on the ARL side.
|
|
||||||
**** TODO Type contracts for callables
|
|
||||||
Define how we can type check arguments on the stack against the types
|
|
||||||
a callable expects for its inputs. In the same vein, we also need to
|
|
||||||
figure out the type of whatever is pushed onto the stack by the
|
|
||||||
callable.
|
|
||||||
*** TODO Use SSA for user level bindings
|
|
||||||
[[https://en.wikipedia.org/wiki/Static_single-assignment_form][Static
|
|
||||||
single-assignment form]] is something we should use when we introduce
|
|
||||||
for user level bindings.
|
|
||||||
** TODO Code generator
|
** TODO Code generator
|
||||||
[[file:src/arl/target-c/]]
|
[[file:src/codegen/]]
|
||||||
|
[[file:include/arl/codegen/]]
|
||||||
|
|
||||||
This should take the IR translated from the AST generated by the
|
This should take the AST generated by the parser (which should already
|
||||||
parser, and write equivalent C code.
|
have been analysed), and write equivalent C code.
|
||||||
|
** TODO Target compilation
|
||||||
|
[[file:src/target/]]
|
||||||
|
[[file:include/arl/target/]]
|
||||||
|
|
||||||
After we've generated the C code, we need to call a C compiler on it
|
=gcc= and =clang= take C code via /stdin/, so we don't need to write
|
||||||
to generate a binary. GCC and Clang allow passing source code through
|
the C code to disk - we can just leave it as a buffer of bytes. So
|
||||||
stdin, so we don't even need to write to disk first which is nice.
|
we'll call the compilers and feed the generated code from the previous
|
||||||
|
stage into it via stdin.
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
"Hello, world!\n" putstr
|
"Hello, world!\n" puts
|
||||||
37
extensions/arl-mode.el
Normal file
37
extensions/arl-mode.el
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
;;; arl-mode.el --- ARL mode for Emacs -*- lexical-binding: t; -*-
|
||||||
|
|
||||||
|
;; Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
;; Author: Aryadev Chavali <aryadev@aryadevchavali.com>
|
||||||
|
;; Keywords:
|
||||||
|
|
||||||
|
;; Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
;; This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
;; FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
||||||
|
|
||||||
|
;; You may distribute and modify this code under the terms of the MIT License,
|
||||||
|
;; which you should have received a copy of along with this program. If not,
|
||||||
|
;; please go to <https://opensource.org/license/MIT>.
|
||||||
|
|
||||||
|
;;; Commentary:
|
||||||
|
|
||||||
|
;;
|
||||||
|
|
||||||
|
;;; Code:
|
||||||
|
|
||||||
|
(defvar arl-mode-comments '(?\; ";;" ("#|" . "|#")))
|
||||||
|
(defvar arl-mode-keywords '("if" "then" "else"))
|
||||||
|
(defvar arl-mode-expressions '(("\".*\"" . font-lock-string-face)))
|
||||||
|
(defvar arl-mode-automode-list '("\\.arl"))
|
||||||
|
|
||||||
|
(define-derived-mode arl-mode
|
||||||
|
arl-mode-comments
|
||||||
|
arl-mode-keywords
|
||||||
|
arl-mode-expressions
|
||||||
|
arl-mode-automode-list
|
||||||
|
nil)
|
||||||
|
|
||||||
|
(provide 'arl-mode)
|
||||||
|
;;; arl-mode.el ends here
|
||||||
@@ -25,7 +25,7 @@ typedef enum
|
|||||||
/// Known symbols which later stages would benefit from.
|
/// Known symbols which later stages would benefit from.
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
TOKEN_KNOWN_PUTSTR,
|
TOKEN_KNOWN_PUTS,
|
||||||
NUM_TOKEN_KNOWNS,
|
NUM_TOKEN_KNOWNS,
|
||||||
} token_known_t;
|
} token_known_t;
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ const char *token_known_to_cstr(token_known_t known)
|
|||||||
{
|
{
|
||||||
switch (known)
|
switch (known)
|
||||||
{
|
{
|
||||||
case TOKEN_KNOWN_PUTSTR:
|
case TOKEN_KNOWN_PUTS:
|
||||||
return "putstr";
|
return "puts";
|
||||||
default:
|
default:
|
||||||
FAIL("Unexpected TOKEN_KNOWN value: %d\n", known);
|
FAIL("Unexpected TOKEN_KNOWN value: %d\n", known);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user