duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit 78e80d03d886cf85ba24331ebb7d56ed13f0ba76
parent ab7e21b55f0cccef79f9a2f41f0278c5db80ace6
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Thu,  2 Jun 2022 08:43:26 -0400

cc: Add parsing to ccast

Test with "./dusk < testcc.fs"

Diffstat:
Mdusk.asm | 2+-
Mfs/cc1.fs | 23+----------------------
Mfs/ccast.fs | 45++++++++++++++++++++++++++++++++++++++++-----
Afs/cctok.fs | 23+++++++++++++++++++++++
Afs/test.c | 3+++
Afs/wordtbl.fs | 6++++++
Atestcc.fs | 10++++++++++
7 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/dusk.asm b/dusk.asm @@ -157,7 +157,7 @@ defword 'exit', 4, word_exit, word_abort pop eax ret -defword 'execute', 8, word_execute, word_exit +defword 'execute', 7, word_execute, word_exit pspop eax jmp eax diff --git a/fs/cc1.fs b/fs/cc1.fs @@ -1,27 +1,6 @@ \ C compiler stage 1 -\ Requires ccast.fs +\ Requires cctok.fs and ccast.fs -alias in< cc< -0 value putback -: _cc< ( -- c ) putback ?dup if 0 to putback else cc< then ; -create buf LNSZ allot -: 0-9? ( c -- f ) '0' - 10 < ; -: a-z? ( c -- f ) dup 'A' - 26 < swap 'a' - 26 < or ; -: identifier? ( c -- f ) dup 0-9? swap a-z? or ; - -\ advance to the next non-whitespace and return the char encountered. -\ if end of stream is reached, c is 0 -: tonws ( -- c ) 0 begin ( c ) - drop _cc< dup dup EOF <= swap ws? not or until ( c ) - dup EOF <= if drop 0 then ; - -\ Returns the next token. sl is 0 and there's no sa when there's no more token -\ to consume. -: nextt ( -- sa? sl-or-0 ) tonws dup if ( c ) - A>r buf >A dup identifier? if begin ( c ) - Ac!+ cc< dup identifier? not until to putback - else Ac!+ then - buf A> buf - ( sa sl ) r>A then ; \ Compiles input coming from the cc< alias (defaulting to in<) and writes the diff --git a/fs/ccast.fs b/fs/ccast.fs @@ -1,4 +1,5 @@ \ C compiler Abstract Syntax Tree +\ requires cctok.fs \ An abstract syntax tree, AST, is a hierarchical structure of elements \ representing the elements found in a C source file. The top of this structure \ is a Unit, which is what we get after we compiler a C source file. @@ -17,6 +18,8 @@ \ 2 Function name \ 3 Return \ 4 Constant value +\ 5 Statements +\ 6 Arguments \ Flags \ b0 haschildren this element can contain children @@ -24,9 +27,10 @@ \ b3 str data The 'data section contains a 1b str length followed by a \ string of that length. +7 value ASTIDCNT \ 8 chars per name create astidnames ," -) unit functionreturn constant" +) unit functionreturn constantstmts args " 0 value curunit \ points to current Unit, the beginning of the AST 0 value lastelem \ last element of the chain @@ -35,6 +39,7 @@ create astidnames ," \ trim whitespaces from the right of string : rtrim ( sa sl -- sa sl ) 1+ begin 1- 2dup + 1- c@ ws? not until ; : idname ( id -- sa sl ) 8 * astidnames + 8 rtrim ; +: astid ( elem -- id ) c@ ; : flags ( elem -- flags ) 1+ c@ ; : haschildren ( elem -- f ) flags $01 and ; : parentelem ( elem -- parent ) 1+ 1+ @ ; @@ -44,21 +49,51 @@ create astidnames ," here lastelem 6 + ! here to lastelem c, c, activeelem , 0 , lastelem haschildren if lastelem to activeelem then ; : SeqClose ( -- ) - 0 0 newelem activeelem ?dup not if S" can't go beyond root!" stype abort then + 0 0 newelem activeelem ?dup not if abort" can't go beyond root!" then parentelem to activeelem ; : Unit ( -- ) here to curunit here to lastelem here to activeelem 1 c, $01 c, 8 allot0 ; : Function ( 'name namelen -- ) $09 2 newelem dup c, move, ; : Return ( -- ) $01 3 newelem ; : Constant ( n -- ) $04 4 newelem , ; +: Statements ( -- ) $01 5 newelem ; +: Arguments ( -- ) $01 6 newelem ; + +\ Parsing words. Each "haschildren" elem has a parsing context, each of which +\ having its corresponding parsing word down there. Every parsing word takes +\ a token as a parameter. +: _err ( ta tl -- ) stype abort" parsing error" abort ; +: _nextt nextt ?dup not if abort" expecting token!" then ; + +\ Takes a token and returns the corresponding typedef (not AST type). +\ For now, we always return 1 on "int". +: isType? ( ta tl -- typeid? f ) S" int" S= dup if 1 swap then ; +: expectConst ( ta tl -- n ) 2dup parse if rot> 2drop else _err then ; +: expectIdent ( ta tl -- ta tl ) + A>r 2dup >r >A begin Ac@+ identifier? not if _err then next r>A ; +: expectChar ( ta tl c -- ) + >r 2dup 1 = not if drop _err then c@ r> = not if _err then 2drop ; + +: parseConstant ( ta tl -- ) expectConst Constant ; +: parseReturn ( -- ) _nextt parseConstant _nextt ';' expectChar SeqClose ; +: parseStatements ( -- ) + Statements _nextt '{' expectChar + _nextt S" return" S= if parseReturn else _err then + _nextt '}' expectChar SeqClose ; +: parseArguments ( -- ) + Arguments _nextt '(' expectChar _nextt ')' expectChar SeqClose ; +: parseFunction ( typeid -- ) + drop _nextt expectIdent Function parseArguments parseStatements SeqClose ; +: parseUnit ( -- ) nextt isType? if parseFunction else _err then ; +: parseast ( -- ) Unit parseUnit ; : printelem ( elem -- ) - dup c@ idname stype dup flags ( elem flags ) + dup astid idname stype dup flags ( elem flags ) dup $04 and if ( int data ) '[' emit over 'data @ .x ']' emit then $08 and if ( str data ) '[' emit over 'data c@+ stype ']' emit then drop ; : printast ( elem -- ) 1 swap begin ( lvl elem ) - dup c@ not if ( seqclose ) swap 1- swap then + dup astid not if ( seqclose ) swap 1- swap then dup printelem - dup haschildren if '(' emit swap 1+ swap then ( lvl elem ) + dup haschildren if '(' emit swap 1+ swap else ',' emit then ( lvl elem ) nextelem 2dup not swap not or until 2drop ; diff --git a/fs/cctok.fs b/fs/cctok.fs @@ -0,0 +1,23 @@ +\ C compiler tokenization + +alias in< cc< +0 value putback +: _cc< ( -- c ) putback ?dup if 0 to putback else cc< then ; +create buf LNSZ allot +: 0-9? ( c -- f ) '0' - 10 < ; +: a-z? ( c -- f ) dup 'A' - 26 < swap 'a' - 26 < or ; +: identifier? ( c -- f ) dup 0-9? swap a-z? or ; + +\ advance to the next non-whitespace and return the char encountered. +\ if end of stream is reached, c is 0 +: tonws ( -- c ) 0 begin ( c ) + drop _cc< dup dup EOF <= swap ws? not or until ( c ) + dup EOF <= if drop 0 then ; + +\ Returns the next token. sl is 0 and there's no sa when there's no more token +\ to consume. +: nextt ( -- sa? sl-or-0 ) tonws dup if ( c ) + A>r buf >A dup identifier? if begin ( c ) + Ac!+ cc< dup identifier? not until to putback + else Ac!+ then + buf A> buf - ( sa sl ) r>A then ; diff --git a/fs/test.c b/fs/test.c @@ -0,0 +1,3 @@ +int main() { + return 42; +} diff --git a/fs/wordtbl.fs b/fs/wordtbl.fs @@ -0,0 +1,6 @@ +\ Word tables +: wordtbl ( n -- a ) create here swap 4 * allot0 1 here c! ; +: w+ ( a -- a+4? ) 4 + dup @ if drop then ; +: :w ( a -- a+4? ) here xtcomp over ! w+ ; +: 'w ( a -- a+4? ) ' over ! w+ ; +: wexec ( tbl idx -- ) 4 * + @ execute ; diff --git a/testcc.fs b/testcc.fs @@ -0,0 +1,10 @@ +\ ./dusk < testcc.fs +f<< wordtbl.fs +f<< cctok.fs +f<< ccast.fs +: opentestc S" test.c" fopen not if abort" can't open" then ; +opentestc +' fin< to cc< +parseast +curunit printast +bye