duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit 6b9b0ab2aa91e7d481c6881d8aff6e9d0e4cce59
parent de8b1611f450bdd710910ed225639d4863fd25eb
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Fri,  3 Jun 2022 06:42:09 -0400

cc: Simplify ccast.fs

Also, forgot to commit cc1.fs in my "liftoff!" commit earlier! That's isn't
much of a liftoff...

Diffstat:
Mdusk.asm | 2+-
Mfs/cc1.fs | 19+++++++++----------
Mfs/ccast.fs | 97+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Mtestcc.fs | 4+++-
4 files changed, 73 insertions(+), 49 deletions(-)

diff --git a/dusk.asm b/dusk.asm @@ -275,7 +275,7 @@ _fread_hasfd: mov ecx, ebp ; buffer mov edx, 1 ; len int 0x80 - test eax, eax + dec eax ; error or EOF jns _fread_success mov dword [ebp], 0 ; return 0 if negative _fread_success: diff --git a/fs/cc1.fs b/fs/cc1.fs @@ -1,7 +1,5 @@ \ C compiler stage 1 -\ Requires cctok.fs, ccast.fs and wordtbl.fs - -: ln" [compile] ." compile nl> ; immediate +\ Requires cctok.fs, ccast.fs and asm.fs \ Generation words take an AST element as a parameter and write the \ corresponding code to here. @@ -10,21 +8,22 @@ over dup not if _err then astid = not if _err then ; : expectclose 0 expectastid nextelem ; \ All words below have the same sig: elem -- next-or-0 -: genConstant 4 expectastid ." mov eax,0x" dup intdata .x nl> nextelem ; +: genConstant 4 expectastid eax dup intdata i32 mov, nextelem ; +: genExpression 7 expectastid nextelem genConstant nextelem ; : genReturn - 3 expectastid nextelem genConstant - ln" sub ebp,4" - ln" [ebp],eax" + 3 expectastid nextelem genExpression + ebp 4 i32 sub, + [ebp] eax mov, expectclose ; : genArguments 6 expectastid nextelem expectclose ; : genStatements 5 expectastid nextelem genReturn - ln" ret" + ret, expectclose ; : genFunction - 2 expectastid ." new entry: " dup strdata stype nl> nextelem + 2 expectastid dup strdata entry nextelem genArguments genStatements ; -: genUnit ( elem -- ) begin ( elem ) nextelem genFunction ?dup until ; +: genUnit ( elem -- ) begin ( elem ) nextelem genFunction ?dup not until ; \ Compiles input coming from the cc< alias (defaulting to in<) and writes the \ result to here. Aborts on error. diff --git a/fs/ccast.fs b/fs/ccast.fs @@ -1,12 +1,13 @@ \ C compiler Abstract Syntax Tree -\ requires cctok.fs +\ requires cctok.fs and ccops.fs \ An abstract syntax tree, AST, is a hierarchical structure of elements \ representing the elements found in a C source file. The top of this structure \ is a Unit, which is what we get after we compiler a C source file. \ In memory, each element has this structure: \ 1b type id -\ 1b flags b0=haschildren b2=int data b3=str data +\ 1b flags (see below) +\ 1b parse stage \ 4b addr of parent element (0 if root) \ 4b addr of next element (0 if none) \ ... maybe data @@ -20,17 +21,23 @@ \ 4 Constant value \ 5 Statements \ 6 Arguments +\ 7 Expression \ Flags -\ b0 haschildren this element can contain children -\ b2 int data The 'data section contains a 4b integer -\ b3 str data The 'data section contains a 1b str length followed by a -\ string of that length. +\ b0 haschildren this element can contain children +\ b1 autoclose close automatically when a children closes +\ b2 int data The 'data section contains a 4b integer +\ b3 str data The 'data section contains a 1b str length followed by a +\ string of that length. +\ Parse stage +\ Stores the stage at which the element is, parse-wise. For example, a Function +\ starts at 0. When Arguments have been parsed, it becomes 1. When Statements +\ have been parsed, it becomes 2. -7 value ASTIDCNT +8 value ASTIDCNT \ 8 chars per name create astidnames ," -) unit functionreturn constantstmts args " +) unit functionreturn constantstmts args expr " 0 value curunit \ points to current Unit, the beginning of the AST 0 value lastelem \ last element of the chain @@ -41,30 +48,36 @@ create astidnames ," : idname ( id -- sa sl ) 8 * astidnames + 8 rtrim ; : astid ( elem -- id ) c@ ; : flags ( elem -- flags ) 1+ c@ ; -: haschildren ( elem -- f ) flags $01 and ; -: parentelem ( elem -- parent ) 1+ 1+ @ ; -: nextelem ( elem -- next ) 6 + @ ; -: 'data ( elem -- 'data ) 10 + ; +: pstage ( elem -- pstage ) 1+ 1+ c@ ; +: pstage+ ( elem -- newpstage ) dup pstage 1+ over 1+ 1+ c! pstage ; +: haschildren? ( elem -- f ) flags $01 and ; +: autoclose? ( elem -- f ) flags $02 and ; +: parentelem ( elem -- parent ) 3 + @ ; +: nextelem ( elem -- next ) 7 + @ ; +: 'data ( elem -- 'data ) 11 + ; : intdata ( elem -- n ) 'data @ ; : strdata ( elem -- sa sl ) 'data c@+ ; : newelem ( flags id -- ) - here lastelem 6 + ! here to lastelem c, c, activeelem , 0 , - lastelem haschildren if lastelem to activeelem then ; + here lastelem 7 + ! here to lastelem c, c, 0 c, activeelem , 0 , + lastelem haschildren? if lastelem to activeelem then ; + +\ AST elements : SeqClose ( -- ) - 0 0 newelem activeelem ?dup not if abort" can't go beyond root!" then - parentelem to activeelem ; + 0 0 newelem activeelem + ?dup not if abort" can't go beyond root!" then + parentelem to activeelem + activeelem autoclose? if SeqClose then ; : Unit ( -- ) - here to curunit here to lastelem here to activeelem 1 c, $01 c, 8 allot0 ; + here to curunit here to lastelem here to activeelem 1 c, $01 c, 9 allot0 ; : Function ( 'name namelen -- ) $09 2 newelem dup c, move, ; -: Return ( -- ) $01 3 newelem ; +: Return ( -- ) $03 3 newelem ; : Constant ( n -- ) $04 4 newelem , ; : Statements ( -- ) $01 5 newelem ; : Arguments ( -- ) $01 6 newelem ; +: Expression ( -- ) $01 7 newelem ; -\ Parsing words. Each "haschildren" elem has a parsing context, each of which -\ having its corresponding parsing word down there. Every parsing word takes -\ a token as a parameter. : _err ( ta tl -- ) stype abort" parsing error" abort ; +: _assert ( ta tl f -- ) not if _err then ; : _nextt nextt ?dup not if abort" expecting token!" then ; \ Takes a token and returns the corresponding typedef (not AST type). @@ -72,25 +85,35 @@ create astidnames ," : isType? ( ta tl -- typeid? f ) S" int" S= dup if 1 swap then ; : expectConst ( ta tl -- n ) 2dup parse if rot> 2drop else _err then ; : expectIdent ( ta tl -- ta tl ) - A>r 2dup >r >A begin Ac@+ identifier? not if _err then next r>A ; + A>r 2dup >r >A begin Ac@+ identifier? _assert next r>A ; : expectChar ( ta tl c -- ) - >r 2dup 1 = not if drop _err then c@ r> = not if _err then 2drop ; + >r 2dup 1 = not if drop _err then c@ r> = _assert 2drop ; + +\ Parse words. Each of those words have the signature "ta tl -- ". +\ To be clear on the semantincs, the word represents the *context*, not the +\ element being parsed. For example, in "Function", we're not parsing the +\ Function AST element, but we're parsing its *children*. + +ASTIDCNT wordtbl astparsetbl +'w _err ( SeqClose ) +:w ( Unit ) isType? _assert drop _nextt expectIdent Function ; +:w ( Function ) activeelem pstage+ dup 1 = if + drop '(' expectChar Arguments else + 2 = if '{' expectChar Statements else SeqClose then then ; +'w _err ( Return ) +'w _err ( Constant ) +:w ( Statements ) + 2dup S" }" S= if 2drop SeqClose else + 2dup S" return" S= _assert 2drop Return Expression then ; +:w ( Arguments ) ')' expectChar SeqClose ; +:w ( Expression ) expectConst Constant _nextt ';' expectChar SeqClose ; -: parseConstant ( ta tl -- ) expectConst Constant ; -: parseReturn ( -- ) - Return _nextt parseConstant _nextt ';' expectChar SeqClose ; -: parseStatements ( -- ) - Statements _nextt '{' expectChar - _nextt S" return" S= if parseReturn else _err then - _nextt '}' expectChar SeqClose ; -: parseArguments ( -- ) - Arguments _nextt '(' expectChar _nextt ')' expectChar SeqClose ; -: parseFunction ( typeid -- ) - drop _nextt expectIdent Function parseArguments parseStatements SeqClose ; -: parseUnit ( -- ) nextt isType? if parseFunction else _err then ; -: parseast ( -- ) Unit parseUnit ; +: parseast ( -- ) Unit begin + nextt ?dup not if exit then + astparsetbl activeelem astid wexec again ; : printelem ( elem -- ) + ?dup not if ." null" exit then dup astid idname stype dup flags ( elem flags ) dup $04 and if ( int data ) '[' emit over intdata .x ']' emit then $08 and if ( str data ) '[' emit over strdata stype ']' emit then @@ -98,5 +121,5 @@ create astidnames ," : printast ( elem -- ) 1 swap begin ( lvl elem ) dup astid not if ( seqclose ) swap 1- swap then dup printelem - dup haschildren if '(' emit swap 1+ swap else ',' emit then ( lvl elem ) + dup haschildren? if '(' emit swap 1+ swap else ',' emit then ( lvl elem ) nextelem 2dup not swap not or until 2drop ; diff --git a/testcc.fs b/testcc.fs @@ -1,10 +1,12 @@ \ ./dusk < testcc.fs f<< wordtbl.fs +f<< asm.fs f<< cctok.fs f<< ccast.fs f<< cc1.fs : opentestc S" test.c" fopen not if abort" can't open" then ; opentestc -' fin< to cc< +' f< to cc< cc1, +main .x bye