commit 78e80d03d886cf85ba24331ebb7d56ed13f0ba76
parent ab7e21b55f0cccef79f9a2f41f0278c5db80ace6
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Thu, 2 Jun 2022 08:43:26 -0400
cc: Add parsing to ccast
Test with "./dusk < testcc.fs"
Diffstat:
7 files changed, 84 insertions(+), 28 deletions(-)
diff --git a/dusk.asm b/dusk.asm
@@ -157,7 +157,7 @@ defword 'exit', 4, word_exit, word_abort
pop eax
ret
-defword 'execute', 8, word_execute, word_exit
+defword 'execute', 7, word_execute, word_exit
pspop eax
jmp eax
diff --git a/fs/cc1.fs b/fs/cc1.fs
@@ -1,27 +1,6 @@
\ C compiler stage 1
-\ Requires ccast.fs
+\ Requires cctok.fs and ccast.fs
-alias in< cc<
-0 value putback
-: _cc< ( -- c ) putback ?dup if 0 to putback else cc< then ;
-create buf LNSZ allot
-: 0-9? ( c -- f ) '0' - 10 < ;
-: a-z? ( c -- f ) dup 'A' - 26 < swap 'a' - 26 < or ;
-: identifier? ( c -- f ) dup 0-9? swap a-z? or ;
-
-\ advance to the next non-whitespace and return the char encountered.
-\ if end of stream is reached, c is 0
-: tonws ( -- c ) 0 begin ( c )
- drop _cc< dup dup EOF <= swap ws? not or until ( c )
- dup EOF <= if drop 0 then ;
-
-\ Returns the next token. sl is 0 and there's no sa when there's no more token
-\ to consume.
-: nextt ( -- sa? sl-or-0 ) tonws dup if ( c )
- A>r buf >A dup identifier? if begin ( c )
- Ac!+ cc< dup identifier? not until to putback
- else Ac!+ then
- buf A> buf - ( sa sl ) r>A then ;
\ Compiles input coming from the cc< alias (defaulting to in<) and writes the
diff --git a/fs/ccast.fs b/fs/ccast.fs
@@ -1,4 +1,5 @@
\ C compiler Abstract Syntax Tree
+\ requires cctok.fs
\ An abstract syntax tree, AST, is a hierarchical structure of elements
\ representing the elements found in a C source file. The top of this structure
\ is a Unit, which is what we get after we compiler a C source file.
@@ -17,6 +18,8 @@
\ 2 Function name
\ 3 Return
\ 4 Constant value
+\ 5 Statements
+\ 6 Arguments
\ Flags
\ b0 haschildren this element can contain children
@@ -24,9 +27,10 @@
\ b3 str data The 'data section contains a 1b str length followed by a
\ string of that length.
+7 value ASTIDCNT
\ 8 chars per name
create astidnames ,"
-) unit functionreturn constant"
+) unit functionreturn constantstmts args "
0 value curunit \ points to current Unit, the beginning of the AST
0 value lastelem \ last element of the chain
@@ -35,6 +39,7 @@ create astidnames ,"
\ trim whitespaces from the right of string
: rtrim ( sa sl -- sa sl ) 1+ begin 1- 2dup + 1- c@ ws? not until ;
: idname ( id -- sa sl ) 8 * astidnames + 8 rtrim ;
+: astid ( elem -- id ) c@ ;
: flags ( elem -- flags ) 1+ c@ ;
: haschildren ( elem -- f ) flags $01 and ;
: parentelem ( elem -- parent ) 1+ 1+ @ ;
@@ -44,21 +49,51 @@ create astidnames ,"
here lastelem 6 + ! here to lastelem c, c, activeelem , 0 ,
lastelem haschildren if lastelem to activeelem then ;
: SeqClose ( -- )
- 0 0 newelem activeelem ?dup not if S" can't go beyond root!" stype abort then
+ 0 0 newelem activeelem ?dup not if abort" can't go beyond root!" then
parentelem to activeelem ;
: Unit ( -- )
here to curunit here to lastelem here to activeelem 1 c, $01 c, 8 allot0 ;
: Function ( 'name namelen -- ) $09 2 newelem dup c, move, ;
: Return ( -- ) $01 3 newelem ;
: Constant ( n -- ) $04 4 newelem , ;
+: Statements ( -- ) $01 5 newelem ;
+: Arguments ( -- ) $01 6 newelem ;
+
+\ Parsing words. Each "haschildren" elem has a parsing context, each of which
+\ having its corresponding parsing word down there. Every parsing word takes
+\ a token as a parameter.
+: _err ( ta tl -- ) stype abort" parsing error" abort ;
+: _nextt nextt ?dup not if abort" expecting token!" then ;
+
+\ Takes a token and returns the corresponding typedef (not AST type).
+\ For now, we always return 1 on "int".
+: isType? ( ta tl -- typeid? f ) S" int" S= dup if 1 swap then ;
+: expectConst ( ta tl -- n ) 2dup parse if rot> 2drop else _err then ;
+: expectIdent ( ta tl -- ta tl )
+ A>r 2dup >r >A begin Ac@+ identifier? not if _err then next r>A ;
+: expectChar ( ta tl c -- )
+ >r 2dup 1 = not if drop _err then c@ r> = not if _err then 2drop ;
+
+: parseConstant ( ta tl -- ) expectConst Constant ;
+: parseReturn ( -- ) _nextt parseConstant _nextt ';' expectChar SeqClose ;
+: parseStatements ( -- )
+ Statements _nextt '{' expectChar
+ _nextt S" return" S= if parseReturn else _err then
+ _nextt '}' expectChar SeqClose ;
+: parseArguments ( -- )
+ Arguments _nextt '(' expectChar _nextt ')' expectChar SeqClose ;
+: parseFunction ( typeid -- )
+ drop _nextt expectIdent Function parseArguments parseStatements SeqClose ;
+: parseUnit ( -- ) nextt isType? if parseFunction else _err then ;
+: parseast ( -- ) Unit parseUnit ;
: printelem ( elem -- )
- dup c@ idname stype dup flags ( elem flags )
+ dup astid idname stype dup flags ( elem flags )
dup $04 and if ( int data ) '[' emit over 'data @ .x ']' emit then
$08 and if ( str data ) '[' emit over 'data c@+ stype ']' emit then
drop ;
: printast ( elem -- ) 1 swap begin ( lvl elem )
- dup c@ not if ( seqclose ) swap 1- swap then
+ dup astid not if ( seqclose ) swap 1- swap then
dup printelem
- dup haschildren if '(' emit swap 1+ swap then ( lvl elem )
+ dup haschildren if '(' emit swap 1+ swap else ',' emit then ( lvl elem )
nextelem 2dup not swap not or until 2drop ;
diff --git a/fs/cctok.fs b/fs/cctok.fs
@@ -0,0 +1,23 @@
+\ C compiler tokenization
+
+alias in< cc<
+0 value putback
+: _cc< ( -- c ) putback ?dup if 0 to putback else cc< then ;
+create buf LNSZ allot
+: 0-9? ( c -- f ) '0' - 10 < ;
+: a-z? ( c -- f ) dup 'A' - 26 < swap 'a' - 26 < or ;
+: identifier? ( c -- f ) dup 0-9? swap a-z? or ;
+
+\ advance to the next non-whitespace and return the char encountered.
+\ if end of stream is reached, c is 0
+: tonws ( -- c ) 0 begin ( c )
+ drop _cc< dup dup EOF <= swap ws? not or until ( c )
+ dup EOF <= if drop 0 then ;
+
+\ Returns the next token. sl is 0 and there's no sa when there's no more token
+\ to consume.
+: nextt ( -- sa? sl-or-0 ) tonws dup if ( c )
+ A>r buf >A dup identifier? if begin ( c )
+ Ac!+ cc< dup identifier? not until to putback
+ else Ac!+ then
+ buf A> buf - ( sa sl ) r>A then ;
diff --git a/fs/test.c b/fs/test.c
@@ -0,0 +1,3 @@
+int main() {
+ return 42;
+}
diff --git a/fs/wordtbl.fs b/fs/wordtbl.fs
@@ -0,0 +1,6 @@
+\ Word tables
+: wordtbl ( n -- a ) create here swap 4 * allot0 1 here c! ;
+: w+ ( a -- a+4? ) 4 + dup @ if drop then ;
+: :w ( a -- a+4? ) here xtcomp over ! w+ ;
+: 'w ( a -- a+4? ) ' over ! w+ ;
+: wexec ( tbl idx -- ) 4 * + @ execute ;
diff --git a/testcc.fs b/testcc.fs
@@ -0,0 +1,10 @@
+\ ./dusk < testcc.fs
+f<< wordtbl.fs
+f<< cctok.fs
+f<< ccast.fs
+: opentestc S" test.c" fopen not if abort" can't open" then ;
+opentestc
+' fin< to cc<
+parseast
+curunit printast
+bye