commit 6b9b0ab2aa91e7d481c6881d8aff6e9d0e4cce59
parent de8b1611f450bdd710910ed225639d4863fd25eb
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Fri, 3 Jun 2022 06:42:09 -0400
cc: Simplify ccast.fs
Also, forgot to commit cc1.fs in my "liftoff!" commit earlier! That's isn't
much of a liftoff...
Diffstat:
4 files changed, 73 insertions(+), 49 deletions(-)
diff --git a/dusk.asm b/dusk.asm
@@ -275,7 +275,7 @@ _fread_hasfd:
mov ecx, ebp ; buffer
mov edx, 1 ; len
int 0x80
- test eax, eax
+ dec eax ; error or EOF
jns _fread_success
mov dword [ebp], 0 ; return 0 if negative
_fread_success:
diff --git a/fs/cc1.fs b/fs/cc1.fs
@@ -1,7 +1,5 @@
\ C compiler stage 1
-\ Requires cctok.fs, ccast.fs and wordtbl.fs
-
-: ln" [compile] ." compile nl> ; immediate
+\ Requires cctok.fs, ccast.fs and asm.fs
\ Generation words take an AST element as a parameter and write the
\ corresponding code to here.
@@ -10,21 +8,22 @@
over dup not if _err then astid = not if _err then ;
: expectclose 0 expectastid nextelem ;
\ All words below have the same sig: elem -- next-or-0
-: genConstant 4 expectastid ." mov eax,0x" dup intdata .x nl> nextelem ;
+: genConstant 4 expectastid eax dup intdata i32 mov, nextelem ;
+: genExpression 7 expectastid nextelem genConstant nextelem ;
: genReturn
- 3 expectastid nextelem genConstant
- ln" sub ebp,4"
- ln" [ebp],eax"
+ 3 expectastid nextelem genExpression
+ ebp 4 i32 sub,
+ [ebp] eax mov,
expectclose ;
: genArguments 6 expectastid nextelem expectclose ;
: genStatements
5 expectastid nextelem genReturn
- ln" ret"
+ ret,
expectclose ;
: genFunction
- 2 expectastid ." new entry: " dup strdata stype nl> nextelem
+ 2 expectastid dup strdata entry nextelem
genArguments genStatements ;
-: genUnit ( elem -- ) begin ( elem ) nextelem genFunction ?dup until ;
+: genUnit ( elem -- ) begin ( elem ) nextelem genFunction ?dup not until ;
\ Compiles input coming from the cc< alias (defaulting to in<) and writes the
\ result to here. Aborts on error.
diff --git a/fs/ccast.fs b/fs/ccast.fs
@@ -1,12 +1,13 @@
\ C compiler Abstract Syntax Tree
-\ requires cctok.fs
+\ requires cctok.fs and ccops.fs
\ An abstract syntax tree, AST, is a hierarchical structure of elements
\ representing the elements found in a C source file. The top of this structure
\ is a Unit, which is what we get after we compiler a C source file.
\ In memory, each element has this structure:
\ 1b type id
-\ 1b flags b0=haschildren b2=int data b3=str data
+\ 1b flags (see below)
+\ 1b parse stage
\ 4b addr of parent element (0 if root)
\ 4b addr of next element (0 if none)
\ ... maybe data
@@ -20,17 +21,23 @@
\ 4 Constant value
\ 5 Statements
\ 6 Arguments
+\ 7 Expression
\ Flags
-\ b0 haschildren this element can contain children
-\ b2 int data The 'data section contains a 4b integer
-\ b3 str data The 'data section contains a 1b str length followed by a
-\ string of that length.
+\ b0 haschildren this element can contain children
+\ b1 autoclose close automatically when a children closes
+\ b2 int data The 'data section contains a 4b integer
+\ b3 str data The 'data section contains a 1b str length followed by a
+\ string of that length.
+\ Parse stage
+\ Stores the stage at which the element is, parse-wise. For example, a Function
+\ starts at 0. When Arguments have been parsed, it becomes 1. When Statements
+\ have been parsed, it becomes 2.
-7 value ASTIDCNT
+8 value ASTIDCNT
\ 8 chars per name
create astidnames ,"
-) unit functionreturn constantstmts args "
+) unit functionreturn constantstmts args expr "
0 value curunit \ points to current Unit, the beginning of the AST
0 value lastelem \ last element of the chain
@@ -41,30 +48,36 @@ create astidnames ,"
: idname ( id -- sa sl ) 8 * astidnames + 8 rtrim ;
: astid ( elem -- id ) c@ ;
: flags ( elem -- flags ) 1+ c@ ;
-: haschildren ( elem -- f ) flags $01 and ;
-: parentelem ( elem -- parent ) 1+ 1+ @ ;
-: nextelem ( elem -- next ) 6 + @ ;
-: 'data ( elem -- 'data ) 10 + ;
+: pstage ( elem -- pstage ) 1+ 1+ c@ ;
+: pstage+ ( elem -- newpstage ) dup pstage 1+ over 1+ 1+ c! pstage ;
+: haschildren? ( elem -- f ) flags $01 and ;
+: autoclose? ( elem -- f ) flags $02 and ;
+: parentelem ( elem -- parent ) 3 + @ ;
+: nextelem ( elem -- next ) 7 + @ ;
+: 'data ( elem -- 'data ) 11 + ;
: intdata ( elem -- n ) 'data @ ;
: strdata ( elem -- sa sl ) 'data c@+ ;
: newelem ( flags id -- )
- here lastelem 6 + ! here to lastelem c, c, activeelem , 0 ,
- lastelem haschildren if lastelem to activeelem then ;
+ here lastelem 7 + ! here to lastelem c, c, 0 c, activeelem , 0 ,
+ lastelem haschildren? if lastelem to activeelem then ;
+
+\ AST elements
: SeqClose ( -- )
- 0 0 newelem activeelem ?dup not if abort" can't go beyond root!" then
- parentelem to activeelem ;
+ 0 0 newelem activeelem
+ ?dup not if abort" can't go beyond root!" then
+ parentelem to activeelem
+ activeelem autoclose? if SeqClose then ;
: Unit ( -- )
- here to curunit here to lastelem here to activeelem 1 c, $01 c, 8 allot0 ;
+ here to curunit here to lastelem here to activeelem 1 c, $01 c, 9 allot0 ;
: Function ( 'name namelen -- ) $09 2 newelem dup c, move, ;
-: Return ( -- ) $01 3 newelem ;
+: Return ( -- ) $03 3 newelem ;
: Constant ( n -- ) $04 4 newelem , ;
: Statements ( -- ) $01 5 newelem ;
: Arguments ( -- ) $01 6 newelem ;
+: Expression ( -- ) $01 7 newelem ;
-\ Parsing words. Each "haschildren" elem has a parsing context, each of which
-\ having its corresponding parsing word down there. Every parsing word takes
-\ a token as a parameter.
: _err ( ta tl -- ) stype abort" parsing error" abort ;
+: _assert ( ta tl f -- ) not if _err then ;
: _nextt nextt ?dup not if abort" expecting token!" then ;
\ Takes a token and returns the corresponding typedef (not AST type).
@@ -72,25 +85,35 @@ create astidnames ,"
: isType? ( ta tl -- typeid? f ) S" int" S= dup if 1 swap then ;
: expectConst ( ta tl -- n ) 2dup parse if rot> 2drop else _err then ;
: expectIdent ( ta tl -- ta tl )
- A>r 2dup >r >A begin Ac@+ identifier? not if _err then next r>A ;
+ A>r 2dup >r >A begin Ac@+ identifier? _assert next r>A ;
: expectChar ( ta tl c -- )
- >r 2dup 1 = not if drop _err then c@ r> = not if _err then 2drop ;
+ >r 2dup 1 = not if drop _err then c@ r> = _assert 2drop ;
+
+\ Parse words. Each of those words have the signature "ta tl -- ".
+\ To be clear on the semantincs, the word represents the *context*, not the
+\ element being parsed. For example, in "Function", we're not parsing the
+\ Function AST element, but we're parsing its *children*.
+
+ASTIDCNT wordtbl astparsetbl
+'w _err ( SeqClose )
+:w ( Unit ) isType? _assert drop _nextt expectIdent Function ;
+:w ( Function ) activeelem pstage+ dup 1 = if
+ drop '(' expectChar Arguments else
+ 2 = if '{' expectChar Statements else SeqClose then then ;
+'w _err ( Return )
+'w _err ( Constant )
+:w ( Statements )
+ 2dup S" }" S= if 2drop SeqClose else
+ 2dup S" return" S= _assert 2drop Return Expression then ;
+:w ( Arguments ) ')' expectChar SeqClose ;
+:w ( Expression ) expectConst Constant _nextt ';' expectChar SeqClose ;
-: parseConstant ( ta tl -- ) expectConst Constant ;
-: parseReturn ( -- )
- Return _nextt parseConstant _nextt ';' expectChar SeqClose ;
-: parseStatements ( -- )
- Statements _nextt '{' expectChar
- _nextt S" return" S= if parseReturn else _err then
- _nextt '}' expectChar SeqClose ;
-: parseArguments ( -- )
- Arguments _nextt '(' expectChar _nextt ')' expectChar SeqClose ;
-: parseFunction ( typeid -- )
- drop _nextt expectIdent Function parseArguments parseStatements SeqClose ;
-: parseUnit ( -- ) nextt isType? if parseFunction else _err then ;
-: parseast ( -- ) Unit parseUnit ;
+: parseast ( -- ) Unit begin
+ nextt ?dup not if exit then
+ astparsetbl activeelem astid wexec again ;
: printelem ( elem -- )
+ ?dup not if ." null" exit then
dup astid idname stype dup flags ( elem flags )
dup $04 and if ( int data ) '[' emit over intdata .x ']' emit then
$08 and if ( str data ) '[' emit over strdata stype ']' emit then
@@ -98,5 +121,5 @@ create astidnames ,"
: printast ( elem -- ) 1 swap begin ( lvl elem )
dup astid not if ( seqclose ) swap 1- swap then
dup printelem
- dup haschildren if '(' emit swap 1+ swap else ',' emit then ( lvl elem )
+ dup haschildren? if '(' emit swap 1+ swap else ',' emit then ( lvl elem )
nextelem 2dup not swap not or until 2drop ;
diff --git a/testcc.fs b/testcc.fs
@@ -1,10 +1,12 @@
\ ./dusk < testcc.fs
f<< wordtbl.fs
+f<< asm.fs
f<< cctok.fs
f<< ccast.fs
f<< cc1.fs
: opentestc S" test.c" fopen not if abort" can't open" then ;
opentestc
-' fin< to cc<
+' f< to cc<
cc1,
+main .x
bye