commit df558a99b817374975be54aca7b0bf9860c7ca66
parent 18faf765338823b90c38805fe9fdc08415d928c2
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Thu, 2 Jun 2022 06:30:33 -0400
cc: add AST structures
Example usage:
Dusk OS ok
f<< ccast.fs
ok
: mystr S" main" ;
ok
Unit mystr Function Return 42 Constant SeqClose SeqClose
ok
curunit printast
unit(function[main](return(constant[0000002a])) ok
Diffstat:
3 files changed, 66 insertions(+), 0 deletions(-)
diff --git a/boot.fs b/boot.fs
@@ -31,6 +31,7 @@ current to (psufl)
: <> ( n n -- l h ) 2dup > if swap then ;
: min <> drop ; : max <> nip ;
: fill ( a u b -- *A* ) rot> >r >A begin dup Ac!+ next drop ;
+: allot0 ( n -- ) here over 0 fill allot ;
: .xh $f and tbl-0-f + c@ emit ;
: .x1 dup 4 rshift .xh .xh ;
: .x2 dup 8 rshift .x1 .x1 ;
diff --git a/fs/cc1.fs b/fs/cc1.fs
@@ -1,4 +1,5 @@
\ C compiler stage 1
+\ Requires ccast.fs
alias in< cc<
0 value putback
diff --git a/fs/ccast.fs b/fs/ccast.fs
@@ -0,0 +1,64 @@
+\ C compiler Abstract Syntax Tree
+\ An abstract syntax tree, AST, is a hierarchical structure of elements
+\ representing the elements found in a C source file. The top of this structure
+\ is a Unit, which is what we get after we compiler a C source file.
+\ In memory, each element has this structure:
+
+\ 1b type id
+\ 1b flags b0=haschildren b2=int data b3=str data
+\ 4b addr of parent element (0 if root)
+\ 4b addr of next element (0 if none)
+\ ... maybe data
+
+\ Types
+\ ID Name Data
+\ 0 SeqClose
+\ 1 Unit
+\ 2 Function name
+\ 3 Return
+\ 4 Constant value
+
+\ Flags
+\ b0 haschildren this element can contain children
+\ b2 int data The 'data section contains a 4b integer
+\ b3 str data The 'data section contains a 1b str length followed by a
+\ string of that length.
+
+\ 8 chars per name
+create astidnames ,"
+) unit functionreturn constant"
+
+0 value curunit \ points to current Unit, the beginning of the AST
+0 value lastelem \ last element of the chain
+0 value activeelem \ elem we're currently adding to
+
+\ trim whitespaces from the right of string
+: rtrim ( sa sl -- sa sl ) 1+ begin 1- 2dup + 1- c@ ws? not until ;
+: idname ( id -- sa sl ) 8 * astidnames + 8 rtrim ;
+: flags ( elem -- flags ) 1+ c@ ;
+: haschildren ( elem -- f ) flags $01 and ;
+: parentelem ( elem -- parent ) 1+ 1+ @ ;
+: nextelem ( elem -- next ) 6 + @ ;
+: 'data ( elem -- 'data ) 10 + ;
+: newelem ( flags id -- )
+ here lastelem 6 + ! here to lastelem c, c, activeelem , 0 ,
+ lastelem haschildren if lastelem to activeelem then ;
+: SeqClose ( -- )
+ 0 0 newelem activeelem ?dup not if S" can't go beyond root!" stype abort then
+ parentelem to activeelem ;
+: Unit ( -- )
+ here to curunit here to lastelem here to activeelem 1 c, $01 c, 8 allot0 ;
+: Function ( 'name namelen -- ) $09 2 newelem dup c, move, ;
+: Return ( -- ) $01 3 newelem ;
+: Constant ( n -- ) $04 4 newelem , ;
+
+: printelem ( elem -- )
+ dup c@ idname stype dup flags ( elem flags )
+ dup $04 and if ( int data ) '[' emit over 'data @ .x ']' emit then
+ $08 and if ( str data ) '[' emit over 'data c@+ stype ']' emit then
+ drop ;
+: printast ( elem -- ) 1 swap begin ( lvl elem )
+ dup c@ not if ( seqclose ) swap 1- swap then
+ dup printelem
+ dup haschildren if '(' emit swap 1+ swap then ( lvl elem )
+ nextelem 2dup not swap not or until 2drop ;