commit 2f99bf57ebeff87c0bc239eccd7c4eeaaf27b19f
parent 0182df9cb962d4c10634e188b51a541521ea598c
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Sat, 26 Nov 2022 12:13:56 -0500
comp/c: The Grand Simplification
DuskCC is my first compiler implementation. When I started out, from the
tutorials I was reading, I was under the impression that modeling an AST was a
necessity. As my understanding of the problem space grew, I came to realize that
it isn't true.
To do fancy optimizations, sure, you need an AST. But simply to compile correct
code that is moderately optimized? no. C is designed in a way that very seldom
requires lookahead beyond "look at next token". There are two places I know
that require lookahead:
1. Function variable initialization (we don't know the size of the stack frame
yet).
2. for loop "adjustement" (the 3rd one) statement. Its ideal place is after the
loop body, but we can't do that without lookahead.
So, I went crazy and though: how simpler would that CC be if we scrapped the
AST modeling? It turns out that it's a lot simpler: we go from 1600 LOC to 1200
LOC! The code is now, IMHO, a lot easier to reason about too.
For the two instances mentioned above where lookahead are necessary, I paid the
price of simplicity with runtime jumps, and thus (very) slightly slower code. I
think it's worth it. Compile time is much faster though.
I had to scrap the "AST-manipulating macros" idea though and I went with
something much closer to regular C: text-base macro expansions. I'm quite happy
with how simple I managed to make it.
All in all, I am exceedingly happy about how that refactoring turned out. I like
that code a lot.
Diffstat:
25 files changed, 624 insertions(+), 1039 deletions(-)
diff --git a/Makefile b/Makefile
@@ -53,6 +53,10 @@ test: dusk
testlib: dusk
echo "' byefail to abort f<< tests/lib/all.fs bye" | ./dusk || (echo; exit 1)
+.PHONY: testcc
+testcc: dusk
+ echo "' byefail to abort f<< tests/comp/c/all.fs bye" | ./dusk || (echo; exit 1)
+
.PHONY: clean
clean:
rm -f $(TARGETS) dusk.o fs/init.fs posix/boot.fs memdump *.bin *.img
diff --git a/fs/comp/c/ast.fs b/fs/comp/c/ast.fs
@@ -1,449 +0,0 @@
-\ C compiler Abstract Syntax Tree
-\ An abstract syntax tree, AST, is a hierarchical structure of nodes
-\ representing the nodes found in a C source file. See tree.fs for structure.
-?f<< lib/str.fs
-?f<< lib/arena.fs
-?f<< lib/wordtbl.fs
-?f<< comp/c/tok.fs
-?f<< comp/c/tree.fs
-?f<< comp/c/macrolo.fs
-?f<< comp/c/type.fs
-
-\ This arena contains AST structures for the unit being currently parsed.
-\ We reset it once those structures have been generated.
-Arena :new structbind Arena _arena
-
-0 value _ccdebug
-: _err ( -- ) tokdbg abort" ast error" ;
-: _assert ( f -- ) not if _err then ;
-
-: ccast$ _arena :reset ;
-
-\ Unary operators
-7 const UOPSCNT
-UOPSCNT stringlist UOPTlist "-" "~" "!" "&" "*" "++" "--"
-3 const UOP&
-4 const UOP*
-
-: uopid ( tok -- opid? f )
- UOPTlist sfind dup 0< if drop 0 else 1 then ;
-: uoptoken ( opid -- tok ) UOPTlist slistiter ;
-
-\ Postfix operators
-2 const POPSCNT
-POPSCNT stringlist POPTlist "++" "--"
-
-: popid ( tok -- opid? f )
- POPTlist sfind dup 0< if drop 0 else 1 then ;
-: poptoken ( opid -- tok ) POPTlist slistiter ;
-
-\ Binary operators
-29 const BOPSCNT
-BOPSCNT stringlist BOPTlist
- "+" "-" "*" "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "&" "^" "|"
- "&&" "||" "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>=" "&=" "^=" "|="
-
-\ binary ops precedence. lower means more precedence
-create bopsprectbl BOPSCNT nc,
- 1 1 0 0 0 2 2 3 3 3 3 4 4 5 5 5
- 6 6 7 7 7 7 7 7 7 7 7 7 7
-
-: bopid ( tok -- opid? f )
- BOPTlist sfind dup 0< if drop 0 else 1 then ;
-: bopprec ( opid -- precedence ) BOPSCNT min bopsprectbl + c@ ;
-: boptoken ( opid -- tok ) BOPTlist slistiter ;
-: ptrbop? ( opid -- f ) 2 < ; \ can op be applied to pointers?
-
-
-\ AST node types
-22 const ASTIDCNT
-0 const AST_DECLARE
-\ 1 is unused
-2 const AST_FUNCTION
-3 const AST_RETURN
-4 const AST_CONSTANT
-5 const AST_STATEMENTS
-\ 6 is unused
-7 const AST_IDENT
-8 const AST_UNARYOP
-9 const AST_POSTFIXOP
-10 const AST_BINARYOP
-11 const AST_LIST \ list of lvalues or constants: {1, 2, 3}
-12 const AST_IF \ if (child0) { child1 } else { child2 }
-13 const AST_STRLIT
-14 const AST_FUNCALL \ first elem is addr to call, the rest are args
-15 const AST_FOR \ for (child0; child1; child2) { child3 }
-16 const AST_PSPUSH
-17 const AST_PSPOP
-18 const AST_BREAK
-19 const AST_WHILE
-20 const AST_DO
-21 const AST_ARROW
-
-0 value curstatic \ is current definition "static"?
-extends Node struct[ ASTNode
- : :new
- \ To ensure allocator atomicity, we over-reserve in the :[ call to ensure
- \ that any AST node as enough space ahead to have a contiguous memory space.
- \ No AST node is suppose to exceed $200 bytes
- $200 _arena :[ Node :new _arena :] drop ;
-]struct
-
-extends ASTNode struct[ Declare \ Declaration of global variable
- sfield ctype
-
- : :new ( ctype -- node )
- curstatic if dup CType :static! then
- AST_DECLARE ASTNode :new ( ctype node ) swap _arena :, ;
-]struct
-
-extends ASTNode struct[ Function
- sfield ctype \ the signature of the function.
- sfield locvars \ CType LL of local variables
-
- 0 value current \ function being currently parsed
- : :new ( ctype -- node )
- curstatic if dup CType :static! then
- AST_FUNCTION ASTNode :new swap ( node ctype ) _arena :, 0 _arena :,
- dup to current ;
-
- : name ctype CType name ;
- : :finddecl ( name self -- ctype-or-0 )
- 2dup ctype CType :find ?dup if nip nip else to' locvars CType :find then ;
- : :argssize ( self -- size-in-bytes ) ctype CType :argssize ;
- : :locsize ( self -- size-in-bytes ) locvars dup if CType :size then ;
-]struct
-
-extends ASTNode struct[ Constant
- sfield value
- : :new ( n -- node ) AST_CONSTANT ASTNode :new swap _arena :, ;
-]struct
-
-extends ASTNode struct[ Ident
- SZ &+ name
-
- : :new ( name -- node ) AST_IDENT ASTNode :new swap _arena :s, ;
-
- : :finddecl ( self -- ctype-or-0 )
- dup name dup rot AST_FUNCTION swap Node :findparent ( name name fnode )
- dup if Function :finddecl else nip then ( name ctype-or-0 )
- ?dup if nip else ( name ) findSymbol then ( ctype ) ;
-]struct
-
-extends ASTNode struct[ Op
- sfield opid
- : :new ( opid id -- node ) ASTNode :new swap _arena :, ;
-]struct
-
-extends ASTNode struct[ Arrow
- SZ &+ name
-
- : :new AST_ARROW ASTNode :new swap _arena :s, ;
-]struct
-
-struct+[ ASTNode
- : :type dup id case ( self )
- AST_IDENT of =
- Ident :finddecl ?dup _assert CType :type endof
- AST_UNARYOP of =
- dup firstchild :type swap Op opid case ( type )
- UOP& of = type*lvl+ endof
- UOP* of = dup type*lvl if type*lvl- then endof
- endcase endof
- AST_ARROW of =
- dup Arrow name swap firstchild :type ( name type )
- dup type*lvl 1 = _assert ( name type )
- ctype' dup CType :struct? _assert ( name ctype )
- CType :find# ( field-ctype ) CType :type endof
- drop TYPE_INT endcase ;
-]struct
-
-extends Op struct[ UnaryOp
- : :new ( opid -- node ) AST_UNARYOP Op :new ;
- : :new& 3 ( & opid ) :new ;
- : :new* 4 ( * opid ) :new ;
-]struct
-
-extends Op struct[ PostfixOp
- : :new ( opid -- node ) AST_POSTFIXOP Op :new ;
-]struct
-
-extends Op struct[ BinaryOp
- : :new ( opid -- node ) AST_BINARYOP Op :new ;
- : :new+ 0 ( + opid ) :new ;
- : :new= 18 ( = opid ) :new ;
-]struct
-
-extends ASTNode struct[ StrLit
- SZ &+ value
-
- : :new AST_STRLIT ASTNode :new ;
-]struct
-
-ASTIDCNT stringlist astidnames
-"declare" "unit" "function" "return" "constant" "stmts" "unused" "ident"
-"unaryop" "postop" "binop" "list" "if" "str" "call" "for" "push" "pop" "break"
-"while" "do" "arrow"
-
-: idname ( id -- str ) astidnames slistiter ;
-
-: _[ '[' emit ;
-: _] ']' emit ;
-
-ASTIDCNT wordtbl astdatatbl ( node -- node )
-:w ( Declare ) _[ dup Declare ctype CType :. _] ;
-'w noop ( Unit )
-:w ( Function ) _[ dup Function ctype printtype _] ;
-'w noop ( Return )
-:w ( Constant ) _[ dup Constant value .x _] ;
-'w noop ( Statements )
-'w noop ( unused )
-:w ( Ident ) _[ dup Ident name stype _] ;
-:w ( UnaryOp ) _[ dup Op opid uoptoken stype _] ;
-:w ( PostfixOp ) _[ dup Op opid poptoken stype _] ;
-:w ( BinaryOp ) _[ dup Op opid boptoken stype _] ;
-'w noop ( List )
-'w noop ( If )
-:w ( StrLit ) _[ dup StrLit value stype _] ;
-'w noop ( FunCall )
-'w noop ( For )
-'w noop ( PSPush )
-'w noop ( PSPop )
-'w noop ( Break )
-'w noop ( While )
-'w noop ( Do )
-:w ( Arrow ) _[ dup Arrow name stype _] ;
-
-: printast ( node -- )
- ?dup not if ." null" exit then
- dup Node id dup AST_FUNCTION = if nl> then idname stype
- astdatatbl over Node id wexec
- Node firstchild ?dup if
- '(' emit begin
- dup printast Node nextsibling dup if ',' emit then ?dup not until
- ')' emit then ;
-
-
-: newnode ( parent nodeid -- newnode )
- ASTNode :new ( parent node ) dup rot Node :add ( node ) ;
-
-\ Parse words
-
-alias noop parseExpression ( tok -- node ) \ forward declaration
-alias noop parseFactor ( tok -- node ) \ forward declaration
-
-\ The first '{' has already been read
-: parseList ( -- node )
- AST_LIST ASTNode :new nextt dup '}' isChar? if drop exit then
- begin ( lnode tok )
- parseFactor over Node :add
- nextt case
- '}' of isChar?^ rdrop exit endof
- ',' of isChar?^ endof
- _err
- endcase
- nextt again ;
-
-\ parses, if possible, a postfix operator. If none, this is a noop.
-\ We parse postfix args as long as there are any.
-: parsePostfixOp ( node -- node )
- nextt case ( inode )
- '[' of isChar?^ ( inode ) \ x[y] is the equivalent of *(x+y)
- BinaryOp :new+ ( inode bnode )
- tuck Node :add ( bnode )
- nextt parseExpression nextt ']' expectChar ( bnode node )
- over Node :add ( bnode )
- UnaryOp :new* ( bnode unode )
- tuck Node :add ( unode ) parsePostfixOp
- endof
- '(' of isChar?^ ( inode )
- AST_FUNCALL ASTNode :new ( inode fcnode ) tuck Node :add begin ( fcnode )
- nextt dup ')' isChar? not while \ an argument
- parseExpression over Node :add
- nextt dup ',' isChar? if drop else to nexttputback then
- repeat ( tok ) drop parsePostfixOp
- endof
- S" ->" of s= ( inode ) nextt Arrow :new tuck Node :add parsePostfixOp endof
- '.' of isChar?^ ( inode )
- UnaryOp :new& tuck Node :add
- nextt Arrow :new tuck Node :add parsePostfixOp
- endof
- r@ popid if ( inode opid )
- PostfixOp :new ( inode opnode )
- tuck Node :add ( opnode ) parsePostfixOp
- else r@ to nexttputback then
- endcase ;
-
-\ A factor can be:
-\ 1. A constant
-\ 2. A Lvalue (AST_IDENT)
-\ 3. A unaryop/postfixop containing a factor
-\ 4. A function call
-\ 5. An expression inside () parens.
-\ 6. A string literal
-\ 7. pspop()
-\ 8. a typecast followed by an expression
-\ 9. NULL
-: _ ( tok -- node )
- case
- '(' of isChar?^ ( )
- \ can be an expression or a typecast
- nextt dup parseType if ( tok type )
- \ TODO: actually process the typecast
- nip parseDeclarator drop nextt ')' expectChar nextt parseExpression
- else ( tok ) parseExpression nextt ')' expectChar then
- endof
- '"' of isChar?^ ( )
- StrLit :new $100 _arena :[ here 0 c, ['] ," with-stdin<
- here over - 1- swap c! _arena :] drop ( node )
- endof
- S" pspop" of s= ( )
- nextt '(' expectChar nextt ')' expectChar
- AST_PSPOP ASTNode :new parsePostfixOp
- endof
- S" NULL" of s= 0 Constant :new endof
- of uopid ( opid )
- UnaryOp :new ( opnode )
- nextt parseFactor over Node :add ( opnode ) endof
- of isIdent? ( ) \ lvalue, FunCall or macro
- r@ findMacro ?dup if Macro ast else r@ Ident :new then ( node )
- parsePostfixOp
- endof
- ( case else ) \ Constant
- r@ parse if Constant :new else _err then
- endcase ;
-current to parseFactor
-
-\ An expression can be 2 things:
-\ 1. a factor
-\ 3. A binaryop containing two expressions.
-: _ ( tok -- exprnode ) \ parseExpression
- \ tok is expected to be a factor
- parseFactor nextt ( factor nexttok )
- dup bopid if ( factor tok binop )
- nip ( factor binop ) BinaryOp :new ( factor node )
- tuck Node :add nextt ( binnode tok )
- \ now, let's consume tokens as long as we have binops coming.
- begin ( bn tok )
- parseFactor nextt ( bn factor tok )
- dup bopid while ( bn fn tok bopid )
- nip BinaryOp :new ( bn1 fn bn2 )
- \ another binop! who will get fn? bn1 or bn2? the one that has the
- \ best precedence!
- rot ( fn bn2 bn1 ) over Op opid bopprec
- over Op opid bopprec < if ( fn bn2 bn1 )
- \ bn2 wins. add fn to bn2, add bn2 to bn1, bn2 becomes bn
- rot> tuck Node :add ( bn1 bn2 ) dup rot Node :add ( bn2->bn )
- else \ bn1 wins. add fn to bn1, bn1 to bn2, bn2 becomes bn
- dup Node parent if \ bn1 has a parent! bn2 needs to replace it.
- 2dup Node :replace then
- rot over Node :add ( bn2 bn1 ) over Node :add ( bn2->bn )
- then ( bn )
- nextt repeat ( bn fn tok ) \ not a binop
- \ tok becomes nexttok and we add fn to bn to complete the chain
- rot> over Node :add swap ( bn tok )
- \ bn is not necessarily our result, the root node is.
- swap Node :root swap
- then \ if not binop we have nothing to do, we're done
- ( node tok ) to nexttputback ;
-current to parseExpression
-
-: parseDeclareInit ( -- expr-or-list-or-0 )
- nextt dup '=' isChar? not if to nexttputback 0 exit then
- drop nextt dup '{' isChar? if drop parseList else parseExpression then ;
-
-: parseDeclareStatement ( type parentnode -- )
- over parseDeclarator ( type pnode ctype )
- Function current Function locvars ?dup if ( type pnode ctype ll )
- 2dup CType :size swap to CType offset over swap llappend
- else dup Function current to Function locvars then ( type pnode ctype )
- parseDeclareInit ( type parentnode ctype initnode )
- ?dup if
- swap CType name Ident :new ( type pnode initnode identnode )
- BinaryOp :new= tuck Node :add ( type pnode initnode =node )
- tuck Node :add over Node :add ( type pnode )
- else drop then ( type pnode )
- nextt dup ',' isChar? if \ another declaration
- drop parseDeclareStatement
- else ';' expectChar 2drop then ;
-
-alias noop parseStatement ( funcnode -- ) \ forward declaration
-
-8 stringlist statementnames
- "{" "return" "if" "for" "pspush" "break" "while" "do"
-8 wordtbl statementhandler ( parentnode -- )
-:w ( { )
- AST_STATEMENTS newnode begin ( snode )
- nextt dup S" }" s= not while
- to nexttputback dup parseStatement repeat ( snode tok ) 2drop ;
-:w ( return )
- AST_RETURN newnode ( rnode )
- nextt dup S" ;" s= if \ empty returns are allowed
- 2drop else
- parseExpression read; ( rnode expr ) swap Node :add
- then ;
-:w ( if ) AST_IF newnode ( ifnode )
- nextt '(' expectChar
- nextt parseExpression ( ifn expr ) over Node :add
- nextt ')' expectChar
- dup parseStatement ( ifnode )
- nextt dup S" else" s= if ( ifn tok )
- drop parseStatement else
- to nexttputback drop then ;
-:w ( for ) AST_FOR newnode ( fornode )
- nextt '(' expectChar
- nextt parseExpression ( sn forn expr ) over Node :add
- read;
- nextt parseExpression ( sn forn expr ) over Node :add
- read;
- nextt parseExpression ( sn forn expr ) over Node :add
- nextt ')' expectChar
- parseStatement ;
-:w ( pspush ) AST_PSPUSH newnode ( pushnode )
- nextt '(' expectChar
- nextt parseExpression swap Node :add
- nextt ')' expectChar read; ;
-:w ( break ) AST_BREAK newnode drop read; ;
-:w ( while ) AST_WHILE newnode ( whilenode )
- nextt '(' expectChar
- nextt parseExpression ( whilen expr ) over Node :add
- nextt ')' expectChar
- parseStatement ;
-:w ( do ) AST_DO newnode ( donode )
- dup parseStatement ( donode )
- nextt S" while" s= _assert
- nextt '(' expectChar
- nextt parseExpression ( don expr ) swap Node :add
- nextt ')' expectChar read; ;
-
-: _ ( parentnode -- ) \ parseStatement
- nextt dup statementnames sfind dup 0< if ( pnode tok -1 )
- drop dup parseType if ( pnode tok type )
- nip swap parseDeclareStatement else ( pnode tok )
- parseExpression swap Node :add read; then
- else ( pnode tok idx )
- nip statementhandler swap wexec then ;
-current to parseStatement
-
-: parseFuncDef ( ctype -- fnode )
- dup addSymbol Function :new ( fnode ) dup parseStatement ;
-
-: parseGlobalDecl ( ctype -- dnode )
- Declare :new ( dnode )
- dup Declare ctype addSymbol
- STORAGE_MEM over Declare ctype to CType storage ( dnode )
- parseDeclareInit ( dnode initnode )
- ?dup if over Node :add then read; ;
-
-\\ Parse the next element in a Unit node
-: parseUnit ( tok -- node-or-0 )
- dup '#' isChar? if
- drop nextt ['] MacroOps structdict' find ?dup _assert
- execute 0 exit then
- 0 to curstatic
- dup S" static" s= if drop nextt 1 to curstatic then
- parseType _assert ( type )
- nextt dup ';' isChar? if \ Only a type on a line is fine, carry on
- 2drop 0 exit then
- to nexttputback parseDeclarator ( ctype )
- dup CType :funcsig? if parseFuncDef else parseGlobalDecl then ;
diff --git a/fs/comp/c/cc.fs b/fs/comp/c/cc.fs
@@ -1,7 +1,7 @@
\ C compiler
?f<< /comp/c/vm/vm.fs
-?f<< /comp/c/gen.fs
-?f<< /comp/c/macro.fs
+?f<< /comp/c/pgen.fs
+?f<< /comp/c/pp.fs
: _err ( -- ) abort" CC error" ;
: _assert ( f -- ) not if _err then ;
@@ -9,13 +9,8 @@
\ Compiles input coming from the stdin alias and writes the
\ result to here. Aborts on error.
: cc1, ( -- )
- cctypes$ ccast$ cmacro$ cctok$ begin ( )
- nextt? ?dup while parseUnit ( node-or-0 ) ?dup if
- _ccdebug if dup printast nl> then ( node )
- gennode then repeat ;
+ cctypes$ ccpp$ cctok$ begin ( ) nextt? ?dup while cparse .free repeat ;
-: :c
- cctok$ nextt parseUnit ?dup if
- _ccdebug if dup printast nl> then gennode then ;
+: :c cctok$ nextt cparse ;
: cc<< ( -- ) ['] cc1, word with-stdin-file ;
diff --git a/fs/comp/c/feed.fs b/fs/comp/c/feed.fs
@@ -0,0 +1,16 @@
+\ C compiler feeding logic
+\ We generally feed from "stdin", except when a macro is being processed. When
+\ we're in that mode, we're feeding from the macro's range, until a NULL is
+\ encountered. This process is recursive.
+?f<< /lib/stack.fs
+
+$10 Stack :new structbind Stack _feed
+
+: ccin ( -- c )
+ _feed :count if
+ _feed :peek' 8b @@+ ?dup not if _feed :pop drop ccin then
+ else stdin then ;
+: ccin# ccin dup 0< if abort" unexpected EOF" then ;
+: ccputback ( c -- )
+ _feed :count if _feed :peek' -1 over +! @ c! else StdIn IO :putback then ;
+: ccpushmacro ( macro -- ) _feed :push ;
diff --git a/fs/comp/c/gen.fs b/fs/comp/c/gen.fs
@@ -1,280 +0,0 @@
-\ C compiler code generation
-
-\ Code generation
-
-\ This unit takes an AST and generates native code using comp/c/vm operations.
-\ We do so by starting from the root Unit node, iterate children (Functions) and
-\ recursively "resolve" them.
-
-\ As a general rule, we stay with Op1 (in comp/c/vm) active. Op2 usage is an
-\ exception. However, it's very important that "regular" node handler *don't*
-\ explicitely select Op1, because when we select Op2 for these exceptional
-\ reasons, we generally want to to this recursively, all the way down. This is
-\ why we don't see "selop" calls except in those exceptional places. We always
-\ target the "active" op.
-
-\ Binary Op resolution strategy
-\ The Binary Op node generation logic a few lines below needs a bit of an
-\ explanation. A binary op needs the two VM operands at once, apply an operation
-\ on them and return the result in Op1. Yes, Op1, not "active" op.
-
-\ A binary op can have 3 basic configuration for its two children:
-\ 1. Both children are "single ops" (or lvalues, or constants... nothing that
-\ requires 2 ops.
-\ 2. One of the child is or contains a node that needs 2 ops and the other is
-\ simple.
-\ 3. Both children are or contain nodes that need 2 ops.
-
-\ The first configuration is easy to solve, no need for anything. You resolve
-\ the first in op1, the second in op2, then apply the operation.
-
-\ The second configuration needs careful threading. Because the binop requires
-\ both VM operands, it should be executed first. If the binop is "left" (the
-\ first child), then nothing special needs to be done, Op1 has the proper value.
-\ If the binop is "right", then we need to swap Op1 and Op2 after having
-\ resolved it so that its result sits in the proper Op.
-
-\ The third configuration is the tricky one. If nothing is done, the result of
-\ the first binop will be overwritten by the calculation made by the second
-\ binop. We need to:
-\ 1. resolve the first node
-\ 2. save the result
-\ 3. resolve the second node
-\ 4. swap the result to Op2
-\ 5. restore the result to Op1.
-
-\ comp/c/vm abstracts away the save/restore mechanism through :push/:pop.
-?f<< /lib/wordtbl.fs
-?f<< /lib/meta.fs
-?f<< /lib/stack.fs
-?f<< /comp/c/ast.fs
-\ This unit also requires vm/(ARCH).fs, but it's loaded in comp/c/cc.fs
-
-: _err ( -- ) abort" gen error" ;
-: _assert ( f -- ) not if _err then ;
-
-\ breaks are a list of forward jumps addr that need to be resolved at the end
-\ of the "breakeable" structure.
-10 Stack :new structbind Stack breaks
-: addbreak vmjmp[, breaks :push ;
-: resolvebreaks ( tgtlvl -- )
- begin ( tgt ) breaks :count over > while breaks :pop ]vmjmp repeat drop ;
-: nobreaks# breaks :count not _assert ;
-
-alias noop gennode ( node -- ) \ forward declaration
-
-: gennode$ gennode ops$ ;
-: genchildren ( node -- )
- Node firstchild ?dup if begin
- dup gennode Node nextsibling ?dup not until then ;
-
-: spit ( a u -- ) swap >r >r begin 8b to@+ V1 .x1 next rdrop ;
-: wordfunctype ( w -- type ) wordsig nip 1 = if TYPE_UINT else TYPE_VOID then ;
-
-\ Does node need 2 VM operands?
-: needs2ops? ( node -- f )
- dup Node id dup AST_BINARYOP = swap AST_FUNCALL = or if drop 1 exit then
- Node firstchild begin
- ?dup while dup needs2ops? not while Node nextsibling repeat
- ( needs2ops? == true ) drop 1 else ( end of children ) 0 then ;
-
-UOPSCNT wordtbl uopgentbl ( -- )
-:w ( - ) vmneg, ;
-:w ( ~ ) vmnot, ;
-:w ( ! ) vmboolnot, ;
-:w ( & ) vmop :&op ;
-:w ( * ) vmop :*op ;
-:w ( ++ ) vm++op, ;
-:w ( -- ) vm--op, ;
-
-POPSCNT wordtbl popgentbl ( -- )
-:w ( ++ ) vmop++, ;
-:w ( -- ) vmop--, ;
-
-BOPSCNT wordtbl bopgentblpost ( -- )
-'w vm+,
-'w vm-,
-'w vm*,
-'w vm/,
-'w vm%,
-'w vm<<,
-'w vm>>,
-'w vm<,
-'w vm>,
-'w vm<=,
-'w vm>=,
-'w vm==,
-'w vm!=,
-'w vm&,
-'w vm^,
-'w vm|,
-'w vm&&,
-'w vm||,
-'w vm=,
-'w vm+=,
-'w vm-=,
-'w vm*=,
-'w vm/=,
-'w vm%=,
-'w vm<<=,
-'w vm>>=,
-'w vm&=,
-'w vm^=,
-'w vm|=,
-
-struct+[ ASTNode
- \ Try to transform this node into a constant number.
- \ f=1 and n=number if it's possible, otherwise f=0.
- : :?asnum ( self -- n? f ) dup id case ( self )
- AST_CONSTANT of = Constant value 1 endof
- AST_UNARYOP of =
- dup firstchild :?asnum if ( self n )
- vmop :>const Op opid uopgentbl swap wexec 1
- else drop 0 then endof
- AST_BINARYOP of =
- dup firstchild dup :?asnum if ( self child n1 )
- swap nextsibling :?asnum if ( self n1 n2 )
- vmop^ :>const vmop :>const
- Op opid bopgentblpost swap wexec 1
- else 2drop 0 then
- else 2drop 0 then endof
- drop 0
- endcase ;
-]struct
-
-ASTIDCNT wordtbl gentbl ( node -- )
-:w ( Declare ) dup Declare ctype CType :isglobal? _assert
- dup Declare ctype dup CType :static? not if \ not static
- dup CType name NEXTWORD ! create then ( dnode ctype )
- here swap to CType offset ( dnode )
- dup Node firstchild ?dup if ( dnode inode )
- nip gennode \ initialization value in vmop
- \ TODO: support Ident during initialization
- vmop loc case
- VM_CONSTANT of = vmop arg , endof
- VM_CONSTARRAY of = vmop arg @+ CELLSZ * move, endof
- _err endcase ops$
- else ( dnode ) Declare ctype CType :size allot then ( ) ;
-'w genchildren ( Unit )
-:w ( Function )
- ops$
- dup Function ctype dup CType :static? not if \ not static
- sysdict over CType name entry then ( fnode ctype )
- here swap to CType offset ( fnode )
- dup Function :argssize over
- Function :locsize ( argsz locsz ) vmprelude, dup genchildren
- nobreaks# \ no dangling "break" markers
- \ emit implicit vmret, if needed
- dup Function firstchild ( stmtsnode ) Node :lastchild
- dup if Node id AST_RETURN = then not if vmret, then ( fnode )
- _ccdebug if
- Function ctype CType offset here over - spit nl>
- else drop then ;
-:w ( Return ) genchildren vmret, ;
-:w ( Constant ) Constant value const>op ;
-:w ( Statements )
- \ we run ops$ between each statement to discard any unused Result
- Node firstchild begin
- ?dup while dup gennode$ Node nextsibling repeat ( snode ) ;
-'w _err ( unused )
-:w ( Ident )
- dup Ident :finddecl ?dup if ( inode ctype )
- ctype>op drop
- else ( inode )
- Ident name sysdict @ find ?dup _assert TYPE_VOID to vmop type const>op then
- _ccdebug if .ops then ;
-:w ( UnaryOp )
- dup genchildren
- Op opid uopgentbl swap wexec
- _ccdebug if .ops then ;
-:w ( PostfixOp )
- dup genchildren
- Op opid popgentbl swap wexec ;
-\ See "Binary op resolution strategy" in opening comment
-:w ( BinaryOp )
- dup >r \ V1=node
- Node firstchild dup Node nextsibling swap ( n2 n1 )
- over needs2ops? if \ n2 == 2ops
- \ Resolve n2 before n1
- swap gennode \ result in op1
- dup needs2ops? if \ both need 2ops
- vmop :push swap gennode vmop^ :pop else
- selop^ gennode then
- else \ nothing special needed, regular resolution
- gennode selop^ gennode selop^ then
- bopgentblpost r> Op opid wexec ;
-:w ( List )
- vmop :noop#
- dup Node :childcount dup 1+ CELLSZ * syspad :allot dup >r ( node len a )
- over >r !+ swap Node firstchild begin ( a node )
- dup gennode \ value to vmop
- vmop :loclo VM_CONSTANT = _assert ( a node )
- vmop arg vmop :init rot !+ ( node a )
- swap Node nextsibling next ( a node ) 2drop
- r> constarray>op ;
-:w ( If )
- Node firstchild ?dup not if _err then dup gennode ( exprnode )
- vmjz[, swap ( jump_addr exprnode ) ops$
- Node nextsibling ?dup not if _err then dup gennode$ ( jump_addr condnode )
- Node nextsibling ?dup if ( jump_addr elsenode )
- vmjmp[, ( ja1 enode ja2 ) rot ]vmjmp ( enode ja2 )
- swap gennode$ ( ja2 ) then ( jump_addr ) ]vmjmp ;
-:w ( StrLit )
- vmjmp[, here ( snode jaddr saddr )
- rot StrLit value s, ( jaddr saddr ) const>op ]vmjmp ;
-:w ( FunCall )
- \ Resolve address node
- dup Node firstchild gennode \ op has call address
- vmop type ctype? if
- \ We either have a direct function signature or a pointer to it.
- \ TODO: :funcptr? doesn't work correctly here. fix this
- vmop type ctype'
- dup CType :funcsig? not if CType type ctype' then
- dup CType :funcsig? _assert CType type
- else
- vmop loc VM_CONSTANT = if vmop arg wordfunctype else TYPE_VOID then then
- ( node type ) >r \ V1=type
- dup Node :childcount 1- dup >r ( node nargs ) \ V2=nargs
- vmop :push >r ( node nargs ) \ V3='callop
- ?dup if >r ( node ) Node :lastchild begin ( argnode )
- dup gennode vmop :push swap Node prevsibling next then ( node )
- ( argN .. arg0 node ) drop r> ( 'callop ) vmop :pop
- r> ( nargs ) vmcall, r> ( type ) if vmpspop, then ;
-:w ( For )
- breaks :count >r
- Node firstchild dup _assert dup gennode$ ( exprnode ) \ initialization
- here swap ( loop_addr node )
- Node nextsibling dup _assert dup gennode ( loop' exprnode ) \ control
- vmjz[, swap ( loop' cond' node ) ops$
- Node nextsibling dup _assert dup ( adjustment node ) >r
- Node nextsibling dup _assert gennode$ ( loop' cond' ) \ body
- r> gennode$ \ adjustment node
- swap vmjmp, ( cond' ) ]vmjmp r> resolvebreaks ;
-:w ( PSPush ) Node firstchild dup _assert gennode vmpspush, ;
-:w ( PSPop ) drop vmpspop, ;
-:w ( Break ) drop addbreak ;
-:w ( While )
- breaks :count >r here swap ( loop_addr node )
- Node firstchild dup _assert dup gennode \ control
- vmjz[, swap ( loop' cond' node ) ops$
- Node nextsibling ( loop' cond' stmtnode ) dup _assert gennode$ \ body
- ( loop' cond' ) swap vmjmp, ]vmjmp r> resolvebreaks ;
-:w ( Do )
- breaks :count >r here swap ( loop_addr node )
- Node firstchild dup _assert dup gennode$ \ body
- Node nextsibling ( loop' node ) dup _assert gennode \ control
- vmjnz, ops$ r> resolvebreaks ;
-:w ( Arrow )
- dup Node firstchild dup _assert gennode Arrow name ( fieldname )
- vmop type dup ctype? _assert dup type*lvl 1 = _assert ( name type )
- ctype' dup CType :struct? _assert ( name ctype )
- CType :find# ( field-ctype )
- dup CType offset vmop :+n vmop :*op ( field-ctype )
- dup CType type to vmop type
- CType nbelem if vmop :&op then ;
-
-: _ ( node -- )
- _ccdebug if ." generating: " dup printast nl> .ops then
- gentbl over Node id wexec
- _ccdebug if ." done\n" then ;
-current to gennode
diff --git a/fs/comp/c/macro.fs b/fs/comp/c/macro.fs
@@ -1,17 +0,0 @@
-\ CC macros (high part)
-?f<< /comp/c/macrolo.fs
-?f<< /comp/c/ast.fs
-?f<< /comp/c/gen.fs
-
-: _err ( -- ) abort" macro error" ;
-: _assert ( f -- ) not if _err then ;
-
-struct+[ Macro
- : _asnum ( self -- n ) ast ASTNode :?asnum _assert ;
- current to :asnum
-
-struct+[ MacroOps
- : define ( -- )
- nextt nextt parseExpression ( name exprnode )
- Macro :new swap addMacro ;
-]struct
diff --git a/fs/comp/c/macrolo.fs b/fs/comp/c/macrolo.fs
@@ -1,37 +0,0 @@
-\ CC macros (low part)
-?f<< /lib/arena.fs
-?f<< /comp/c/tree.fs
-
-\ Holds defined macros
-Arena :new structbind Arena _arena
-
-struct[ Macro
- \ For now, a macro is only a link to an AST, but when parameters are
- \ introduced, it will be more.
- sfield ast
- : :new ( ast -- macro ) CELLSZ _arena :[ , _arena :] ;
- alias _err :asnum ( self -- n ) \ forward declaration
-]struct
-
-create macros 0 , 0 c, \ this is a dict link
-: addMacro ( macro name -- )
- dup c@ macros rot> ( macro 'dict name len )
- ENTRYSZ + 8 + _arena :[ entry , _arena :] drop ;
-: findMacro ( name -- macro-or-0 ) macros find dup if @ then ;
-
-: cmacro$ _arena :reset 0 macros ! ;
-
-struct[ MacroOps
- -1 value pslvl \ PS level at last runmacro
- : _ begin word runword pslvl 0< until ;
- : runmacro scnt 1+ to pslvl ['] _ with-stdin< ;
-
- : forthdef
- nextt ( name ) runmacro ( name node )
- Macro :new swap addMacro ;
-]struct
-
-: ]#
- scnt MacroOps pslvl - ?dup if abort" PS imbalance during macros" then
- -1 to MacroOps pslvl ;
-
diff --git a/fs/comp/c/pgen.fs b/fs/comp/c/pgen.fs
@@ -0,0 +1,356 @@
+\ C compiler parse+generate
+\ This CC used to build an AST in memory and then, in a subsequent phase,
+\ generate the corresponding code. It turned out to be needlessly complex. In
+\ this unit, we are fed with tokens, we parse it, then we spit code directly.
+require /sys/scratch.fs
+?f<< /lib/str.fs
+?f<< /lib/wordtbl.fs
+?f<< /lib/stack.fs
+?f<< /comp/c/tok.fs
+?f<< /comp/c/type.fs
+\ This unit also requires vm/(ARCH).fs, but it's loaded in comp/c/cc.fs
+
+\ Maximum number that a function call can have
+$10 const MAXARGCNT
+
+1 value _ccdebug
+: _err ( -- ) tokdbg abort" pgen error" ;
+: _assert ( f -- ) not if _err then ;
+: spit ( a u -- ) swap >r >r begin 8b to@+ V1 .x1 next rdrop ;
+: wordfunctype ( w -- type ) wordsig nip 1 = if TYPE_UINT else TYPE_VOID then ;
+
+\ Unary operators
+7 const UOPSCNT
+UOPSCNT stringlist UOPTlist "-" "~" "!" "&" "*" "++" "--"
+3 const UOP&
+4 const UOP*
+
+: uopid ( tok -- opid? f )
+ UOPTlist sfind dup 0< if drop 0 else 1 then ;
+: uoptoken ( opid -- tok ) UOPTlist slistiter ;
+
+UOPSCNT wordtbl uopgentbl ( -- )
+:w ( - ) vmneg, ;
+:w ( ~ ) vmnot, ;
+:w ( ! ) vmboolnot, ;
+:w ( & ) vmop :&op ;
+:w ( * ) vmop :*op ;
+:w ( ++ ) vm++op, ;
+:w ( -- ) vm--op, ;
+
+\ Postfix operators
+2 const POPSCNT
+POPSCNT stringlist POPTlist "++" "--"
+
+POPSCNT wordtbl popgentbl ( -- )
+:w ( ++ ) vmop++, ;
+:w ( -- ) vmop--, ;
+
+: popid ( tok -- opid? f )
+ POPTlist sfind dup 0< if drop 0 else 1 then ;
+: poptoken ( opid -- tok ) POPTlist slistiter ;
+
+\ Binary operators
+29 const BOPSCNT
+BOPSCNT stringlist BOPTlist
+ "+" "-" "*" "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "&" "^" "|"
+ "&&" "||" "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>=" "&=" "^=" "|="
+
+\ binary ops precedence. lower means more precedence
+create bopsprectbl BOPSCNT nc,
+ 1 1 0 0 0 2 2 3 3 3 3 4 4 5 5 5
+ 6 6 7 7 7 7 7 7 7 7 7 7 7
+
+BOPSCNT wordtbl bopgentbl ( -- )
+'w vm+, 'w vm-, 'w vm*, 'w vm/,
+'w vm%, 'w vm<<, 'w vm>>, 'w vm<,
+'w vm>, 'w vm<=, 'w vm>=, 'w vm==,
+'w vm!=, 'w vm&, 'w vm^, 'w vm|,
+'w vm&&, 'w vm||, 'w vm=, 'w vm+=,
+'w vm-=, 'w vm*=, 'w vm/=, 'w vm%=,
+'w vm<<=, 'w vm>>=, 'w vm&=, 'w vm^=,
+'w vm|=,
+
+: bopid ( tok -- opid? f )
+ BOPTlist sfind dup 0< if drop 0 else 1 then ;
+: bopprec ( opid -- precedence ) BOPSCNT min bopsprectbl + c@ ;
+: boptoken ( opid -- tok ) BOPTlist slistiter ;
+: ptrbop? ( opid -- f ) 2 < ; \ can op be applied to pointers?
+
+0 value _curfunc \ ctype of the current function (includes arguments)
+0 value _locvars \ the root ctype of local variables for current function
+0 value curstatic \ is current definition "static"?
+
+: findIdent ( name -- ctype-or-0 )
+ _curfunc if dup _curfunc CType :find ?dup if nip exit then then ( name )
+ dup _locvars if to' _locvars CType :find else drop 0 then ( name ctype-or-0 )
+ ?dup if nip else findSymbol then ;
+
+alias noop parseExpression ( tok -- ) \ forward declaration
+alias noop parseFactor ( tok -- ) \ forward declaration
+
+\ we have a func call and its target in in vmop
+: _funcall ( -- )
+ vmop type ctype? if
+ \ We either have a direct function signature or a pointer to it.
+ \ TODO: :funcptr? doesn't work correctly here. fix this
+ vmop type ctype'
+ dup CType :funcsig? not if CType type ctype' then
+ dup CType :funcsig? _assert CType type
+ else
+ vmop loc VM_CONSTANT =
+ if vmop arg wordfunctype else TYPE_VOID then then ( type )
+ nextt dup ')' isChar? not if ( type tok )
+ to nexttputback vmop :push >r \ V1=callop
+ MAXARGCNT CELLSZ * Stack SZ +
+ \ TODO: I had a strange failure if, instead of using the result of :new
+ \ below, I used the result of :]. They're supposed to be equivalent, but :]
+ \ was wrong. investigate.
+ syspad :[ MAXARGCNT Stack :new syspad :] drop >r ( type ) \ V2=args
+ begin ( type )
+ nextt parseExpression vmop :push V2 Stack :push ( type )
+ nextt dup ',' isChar? while drop repeat ( type tok )
+ ')' expectChar
+ \ now, we want to push the args to PS in the reverse order, with first
+ \ arg on top of PS.
+ V2 Stack :count dup >r >r begin V2 Stack :pop next ( type argN .. arg0 )
+ r> rdrop r> vmop :pop ( type argN .. arg0 nargs )
+ else drop 0 then ( type ... narg )
+ vmcall, ( type ) if vmpspop, then ;
+
+: _arrow ( -- ) \ struct in vmop
+ nextt vmop type dup ctype? _assert dup type*lvl 1 = _assert ( name type )
+ ctype' dup CType :struct? _assert ( name ctype )
+ CType :find# ( field-ctype )
+ dup CType offset vmop :+n vmop :*op ( field-ctype )
+ dup CType type to vmop type
+ CType nbelem if vmop :&op then ;
+
+\ parses, if possible, a postfix operator. If none, this is a noop.
+\ We parse postfix args as long as there are any.
+: parsePostfixOp ( -- )
+ nextt case ( )
+ '[' of isChar?^ \ x[y] is the equivalent of *(x+y)
+ vmop^ :push vmop :push
+ nextt parseExpression selop^ vmop :pop vm+, vmop :*op vmop^ :pop
+ nextt ']' expectChar parsePostfixOp endof
+ '(' of isChar?^ _funcall parsePostfixOp endof
+ S" ->" of s= _arrow parsePostfixOp endof
+ '.' of isChar?^ vmop :&op _arrow parsePostfixOp endof
+ of popid ( opid )
+ popgentbl swap wexec parsePostfixOp endof
+ r@ to nexttputback
+ endcase ;
+
+\ A factor can be:
+\ 1. A constant
+\ 2. A Lvalue (AST_IDENT)
+\ 3. A unaryop/postfixop containing a factor
+\ 4. A function call
+\ 5. An expression inside () parens.
+\ 6. A string literal
+\ 7. pspop()
+\ 8. a typecast followed by an expression
+\ 9. NULL
+: _ ( tok -- ) vmop :init case
+ '(' of isChar?^ ( )
+ \ can be an expression or a typecast
+ nextt dup parseType if ( tok type )
+ \ TODO: actually process the typecast
+ nip parseDeclarator drop nextt ')' expectChar nextt parseExpression
+ else ( tok ) parseExpression nextt ')' expectChar then
+ endof
+ '"' of isChar?^ ( )
+ vmjmp[, here ( jaddr saddr )
+ here 0 c, ['] ," with-stdin< here over - 1- swap c! ( jaddr saddr )
+ const>op ]vmjmp
+ endof
+ S" pspop" of s= ( )
+ nextt '(' expectChar nextt ')' expectChar
+ vmpspop, parsePostfixOp
+ endof
+ S" NULL" of s= 0 const>op endof
+ of uopid ( opid )
+ nextt parseFactor ( opid ) \ vmop is set
+ uopgentbl swap wexec endof
+ of isIdent? ( ) \ lvalue, FunCall or macro
+ r@ findIdent ?dup if ctype>op else
+ r@ sysdict @ find ?dup not if r@ stype abort" not found" then
+ TYPE_VOID to vmop type const>op then
+ parsePostfixOp
+ endof
+ r@ parse if const>op else _err then
+ endcase ;
+current to parseFactor
+
+\ Parse the "right" part of an expression with the leftmost factor and leftmost
+\ binary operator already parsed. We expect vmop to already contain the left
+\ factor.
+: parseRExpr ( binop -- )
+ nextt selop^ parseFactor nextt ( binop tok )
+ \ left factor in vmop^ right factor in vmop
+ dup bopid if ( opleft tok opright )
+ \ another binop! let's apply precedence rules.
+ nip over bopprec over bopprec ( l r lprec rprec ) > if ( l r )
+ \ the right part has more precedence.
+ vmop^ :push ( l r op ) swap parseRExpr vmop^ :pop ( binop )
+ \ vmop has rexpr result, vmop^ has left operator
+ else ( l r ) \ the left part has more precedence
+ selop^ swap bopgentbl swap wexec ( r )
+ \ vmop has result, vmop^ is empty
+ parseRExpr ( ) exit then ( binop )
+ else ( opleft tok ) to nexttputback then ( binop )
+ \ left factor in vmop^ right factor in vmop
+ selop^ bopgentbl swap wexec ;
+
+\ An expression can be 2 things:
+\ 1. a factor
+\ 3. A binaryop containing two expressions.
+: _ ( tok -- ) \ parseExpression
+ \ first tok is always a factor
+ parseFactor nextt ( tok ) \ factor in vmop
+ dup bopid if ( tok binop )
+ nip vmop^ :push swap parseRExpr vmop^ :pop
+ else to nexttputback then ;
+
+current to parseExpression
+
+\ breaks are a list of forward jumps addr that need to be resolved at the end
+\ of the "breakeable" structure.
+10 Stack :new structbind Stack breaks
+: addbreak vmjmp[, breaks :push ;
+: resolvebreaks ( tgtlvl -- )
+ begin ( tgt ) breaks :count over > while breaks :pop ]vmjmp repeat drop ;
+: nobreaks# breaks :count not _assert ;
+
+alias noop parseStatement ( tok -- ) \ forward declaration
+
+: parseStatements ( -- )
+ begin nextt dup '}' isChar? not while parseStatement repeat ( tok ) drop ;
+
+8 stringlist statementnames
+ "{" "return" "if" "for" "pspush" "break" "while" "do"
+8 wordtbl statementhandler ( -- )
+'w parseStatements ( { )
+:w ( return )
+ nextt dup ';' isChar? if \ empty returns are allowed
+ drop else parseExpression read; then vmret, ops$ ;
+:w ( if )
+ nextt '(' expectChar nextt parseExpression nextt ')' expectChar
+ vmjz[, ops$
+ nextt parseStatement ops$
+ nextt dup S" else" s= if ( jump_addr tok )
+ drop vmjmp[, swap ]vmjmp nextt parseStatement ops$
+ else to nexttputback then ( jump_addr )
+ ]vmjmp ;
+:w ( for )
+ breaks :count >r
+ nextt '(' expectChar nextt parseExpression ops$ read; \ initialization
+ here nextt parseExpression read; vmjz[, vmjmp[, ( caddr cjmpz cjmp ) \ control
+ rot here nextt parseExpression nextt ( cjmpz cjmp caddr aaddr )
+ ')' expectChar ops$ swap vmjmp, ( cjmpz cjmp aaddr ) \ adjustment
+ swap ]vmjmp nextt parseStatement ( cjmpz aaddr )
+ vmjmp, ]vmjmp r> resolvebreaks ;
+:w ( pspush )
+ nextt '(' expectChar
+ nextt parseExpression vmpspush,
+ nextt ')' expectChar read; ;
+:w ( break ) addbreak read; ;
+:w ( while )
+ breaks :count >r
+ here nextt '(' expectChar nextt parseExpression nextt ')' expectChar
+ vmjz[, ops$ nextt parseStatement ( wjmp waddr )
+ swap vmjmp, ]vmjmp r> resolvebreaks ;
+:w ( do )
+ breaks :count >r
+ here nextt parseStatement ( daddr )
+ nextt S" while" s= _assert nextt '(' expectChar
+ nextt parseExpression nextt ')' expectChar ( daddr )
+ vmjnz, read; r> resolvebreaks ;
+
+0 value _laststmtid
+: _ ( tok -- ) \ parseStatement
+ dup statementnames sfind dup >r dup 0< if ( tok -1 ) \ V1=stmtidx
+ drop parseExpression read; else nip statementhandler swap wexec then
+ ops$ r> to _laststmtid ;
+current to parseStatement
+
+$100 const MAXCONSTARRAYLEN
+create _constarray MAXCONSTARRAYLEN 1+ CELLSZ * allot
+: parseList ( -- )
+ _constarray CELLSZ + begin ( a )
+ nextt parseFactor vmop :isconst# vmop arg vmop :init swap !+ ( a )
+ nextt dup ',' isChar? while drop repeat ( a tok )
+ '}' expectChar ( a )
+ _constarray - CELLSZ / 1- ( len ) _constarray !
+ _constarray constarray>op ;
+
+\ When there's variable initialization code, it has to come before the prelude
+\ and we jump to it after we've created the stack frame.
+0 value _initcode
+
+: parseDeclInit ( -- ) \ result in vmop
+ nextt dup '{' isChar? if drop parseList else parseExpression then ;
+
+: parseDeclLine ( type -- )
+ parseDeclarator
+ _locvars ?dup if CType :append else to _locvars then begin ( )
+ nextt dup '=' isChar? if ( tok )
+ _initcode not if
+ \ when there is init code, it's possible, because we declare new types,
+ \ that the type arena allocate a new buffer right in the middle of our
+ \ init code. that's bad. To avoid this, we "reserve" an arena buf now.
+ cctypearena Arena :reserve
+ here to _initcode then
+ drop parseDeclInit
+ selop^ _locvars llend ctype>op vm=, ops$ nextt then ( tok )
+ dup ';' isChar? not while ( tok )
+ ',' expectChar _locvars llend CType type parseDeclarator ( ctype )
+ _locvars CType :append repeat ( tok ) drop ;
+
+: parseFunction ( ctype -- )
+ dup addSymbol 0 to _locvars 0 to _initcode to _curfunc ( )
+ \ Let's parse function body
+ nextt '{' expectChar begin ( )
+ nextt dup parseType while ( tok type ) nip parseDeclLine repeat ( tok )
+ to nexttputback
+ _initcode if vmjmp[, >r then
+ _curfunc CType :static? not if sysdict _curfunc CType name entry then ( )
+ here _curfunc to CType offset ( )
+ _curfunc CType :argssize _locvars CType :size vmprelude, ( )
+ _initcode ?dup if vmjmp, r> ]vmjmp then
+ 0 to _laststmtid parseStatements
+ _laststmtid 1 <> if vmret, then \ emit implicit return if needed
+ 0 to _curfunc ;
+
+: parseGlobalDecl ( ctype -- )
+ dup addSymbol STORAGE_MEM over to CType storage
+ dup CType :static? not if \ not static
+ dup CType name NEXTWORD ! create then ( ctype )
+ here over to CType offset ( ctype )
+ nextt dup '=' isChar? if ( ctype tok )
+ drop parseDeclInit vmop loc case
+ VM_CONSTANT of = vmop arg , endof
+ VM_CONSTARRAY of = vmop arg @+ CELLSZ * move, endof
+ _err endcase ops$
+ else to nexttputback dup CType :size allot then ( ctype )
+ nextt dup ',' isChar? if
+ drop CType type parseDeclarator parseGlobalDecl
+ else ';' expectChar drop then ;
+
+\ Begin parsing incoming tokens for a new "element" (a function or a
+\ declaration) and consume tokens until that element is finished parsing. That
+\ element is written to memory at "here".
+: cparse ( tok -- )
+ 0 to curstatic
+ dup S" static" s= if drop nextt 1 to curstatic then
+ parseType _assert ( type )
+ nextt dup ';' isChar? if \ Only a type on a line is fine, carry on
+ 2drop exit then to nexttputback
+ parseDeclarator ( ctype )
+ curstatic if dup CType :static! then
+ _ccdebug if ." parsing: " dup printtype nl> then
+ dup CType :funcsig?
+ if dup parseFunction _ccdebug if
+ ." complete: " dup printtype nl> CType offset here over - spit nl> then
+ else parseGlobalDecl then ( ) ;
diff --git a/fs/comp/c/pp.fs b/fs/comp/c/pp.fs
@@ -0,0 +1,38 @@
+\ C compiler pre-processing
+\ Implements a text-based macro system that expands text on-the-fly in the
+\ tokenizer.
+\ #define creates a new macro, which simply copies the rest of the line in a
+\ buffer. Then, when the tokenizer hits an identifier, before sending it to the
+\ parser, it checks if a macro of the same name exists. If yes, it overrides its
+\ core input (normally "stdin") with the contents of that saved stringm which is
+\ then processed regularly until the end of string, at which point we go back to
+\ our previous feed. This system works recursively.
+\ Macro are stored as null-terminated ranges, *not* regular strings.
+?f<< /lib/arena.fs
+?f<< /comp/c/feed.fs
+
+$400 const MAXMACROSZ
+
+Arena :new structbind Arena _arena
+: _err abort" pp error" ;
+: _assert ( f -- ) not if _err then ;
+
+create macros 0 , 0 c,
+
+: findMacro ( name -- macro-or-0 ) macros find ;
+: ccpp$ _arena :reset 0 macros ! ;
+
+: _expect ( str -- ) c@+ >r begin c@+ ccin = _assert next drop ;
+: _tonws ( -- ) begin ccin# dup ws? while drop repeat ccputback ;
+
+create _buf $100 allot
+: _define ( -- )
+ _tonws _buf 1+ begin ( a ) ccin# dup ws? not while swap c!+ repeat ( a c )
+ drop _buf - 1- ( len ) _buf c! ( )
+ MAXMACROSZ _arena :[
+ macros _buf entry
+ begin ccin# dup LF <> while c, repeat ccputback 0 c,
+ _arena :] drop ;
+
+\ A # has just been read, we're handled control from here.
+: handleDirective ( -- ) S" define " _expect _define ;
diff --git a/fs/comp/c/tok.fs b/fs/comp/c/tok.fs
@@ -1,4 +1,7 @@
\ C compiler tokenization
+?f<< /lib/scratch.fs
+?f<< /comp/c/feed.fs
+?f<< /comp/c/pp.fs
\ Many tokens can be used at once, but they are not supposed to be kept in the
\ long term. When the name needs to be kept around, it must be copied elsewhere.
@@ -6,18 +9,16 @@
$400 Scratchpad :new structbind Scratchpad _pad
0 value curline
+1 value _firstchar
: tokdbg ." Current line: " curline . nl> ;
: _err tokdbg abort" tokenization error" ;
: _assert ( f -- ) not if _err then ;
+
: cctok$ 1 to curline ;
-: ?line+ ( c -- c ) dup LF = if 1 to+ curline then ;
+: ?line+ ( c -- c ) dup LF = if 1 to+ curline 1 to _firstchar then ;
+\ Tokenization
64 const MAXTOKSZ
-0 const TOK_KEYWORD
-1 const TOK_IDENTIFIER
-2 const TOK_CONSTANT
-3 const TOK_STRLIT
-4 const TOK_SYMBOL
31 stringlist keywords "break" "case" "char" "const" "continue" "default" "do"
"double" "else" "enum" "extern" "float" "for" "goto" "if"
@@ -31,9 +32,9 @@ $400 Scratchpad :new structbind Scratchpad _pad
\ with a symbol that is also a 1 char symbol and all 3 chars symbols begin with
\ 2 chars that are also a 2 chars symbol.
\ list of 1 char symbols
-create symbols1 ," +-*/~&<>=[](){}.%^?:;,|^#\"!"
+create symbols1 ," +-*/~&<>=[](){}.%^?:;,|^\"!"
-: isSym1? ( c -- f ) symbols1 27 [c]? 0>= ;
+: isSym1? ( c -- f ) symbols1 26 [c]? 0>= ;
\ list of 2 chars symbols
create symbols2 ," <=>===!=&&||++---><<>>+=-=*=/=%=&=^=|=/**///"
@@ -60,7 +61,7 @@ create _ 10 c, ," 09AZaz__$$"
\ advance to the next non-whitespace and return the char encountered.
\ if end of stream is reached, c is 0
: tonws ( -- c ) begin ( )
- stdin dup 0>= while ?line+ dup ws? while drop repeat
+ ccin dup 0>= while ?line+ dup ws? while drop repeat
( c ) else ( EOF ) drop 0 then ;
: _writesym ( c3? c2? c1 len -- str )
@@ -73,34 +74,37 @@ create _ 10 c, ," 09AZaz__$$"
\ Returns the next token as a string or 0 when there's no more token to consume.
: nextt? ( -- tok-or-0 )
nexttputback ?dup if 0 to nexttputback exit then
- tonws dup not if ( EOF ) exit then ( c ) case
+ tonws dup not if ( EOF ) exit then ( c )
+ 0 to@! _firstchar over '#' = and if drop handleDirective nextt? exit then
+ case ( )
of isSym1? ( )
- r@ stdin 2dup isSym2? if ( c1 c2 )
+ r@ ccin 2dup isSym2? if ( c1 c2 )
2dup is<<>>? if ( c1 c2 )
- stdin dup '=' = if ( c1 c2 '=' )
+ ccin dup '=' = if ( c1 c2 '=' )
rot> swap 3 ( '=' c2 c1 len ) _writesym
else ( c1 c2 c3 )
- StdIn IO :putback swap 2 ( c2 c1 len ) _writesym then
+ ccputback swap 2 ( c2 c1 len ) _writesym then
else swap 2 ( c2 c1 len ) _writesym then
- else ( c1 c2 ) StdIn IO :putback 1 ( c1 len ) _writesym then ( tok )
+ else ( c1 c2 ) ccputback 1 ( c1 len ) _writesym then ( tok )
dup case
S" /*" of s= drop begin ( )
- stdin ?line+ '*' = dup if drop stdin '/' = then until nextt? endof
+ ccin ?line+ '*' = dup if drop ccin '/' = then until nextt? endof
S" //" of s= drop begin ( )
- stdin dup not if ( EOF! ) rdrop exit then LF = until
+ ccin dup not if ( EOF! ) rdrop exit then LF = until
1 to+ curline
nextt? endof
endcase
endof
''' of = \ the char literal is a special case: anything can go in between ''
- stdin stdin ''' = not if _err then ( c ) ''' tuck ( ' c ' ) 3 _writesym
+ ccin ccin ''' = not if _err then ( c ) ''' tuck ( ' c ' ) 3 _writesym
endof
of ident-or-lit? \ identifier or number literal
[ -4 [rcnt] ! ] \ V1=c
MAXTOKSZ _pad :allot dup >r >r \ V2=tok V3=a
0 8b to!+ V3 ( len placeholder ) V1 begin ( c )
- 8b to!+ V3 stdin dup ident-or-lit? not until StdIn IO :putback
- r> ( a ) r> ( a tok ) tuck 1+ - ( tok len ) over c!
+ 8b to!+ V3 ccin dup ident-or-lit? not until ccputback
+ r> ( a ) r> ( a tok ) tuck 1+ - ( tok len ) over c! ( tok )
+ dup findMacro ?dup if nip ccpushmacro nextt? then
endof
_err
endcase ;
diff --git a/fs/comp/c/tree.fs b/fs/comp/c/tree.fs
@@ -1,78 +0,0 @@
-\ Tree structure for the C compiler
-\ The different parts of the C compiler that need a tree structure all use the
-\ same memory layout, which is a series of nodes linked to each other.
-
-struct[ Node
- sfield id
- sfield parent \ 0 if root
- sfield firstchild \ 0 if none
- sfield nextsibling \ 0 if last
- sfield prevsibling \ 0 if first
-
- : :new ( id -- node ) here >r , 16 allot0 r> ;
-
- : :root ( self -- node ) dup parent if parent :root then ;
-
- \ iterate to the next node, descending into children before continuing to
- \ siblings. we stop when we reach the last child of "ref"
- : :next ( ref self -- ref next )
- dup firstchild ?dup if nip else begin ( ref node )
- 2dup = if drop 0 exit then \ if ref==node in the beginning
- dup nextsibling ?dup if nip exit then
- parent 2dup = until drop 0
- then ;
-
- \ Return the next node with the specified id
- : :findnext ( id self -- node )
- swap >r dup begin ( ref n R:id )
- :next dup while dup id r@ <> while repeat then rdrop nip ;
-
- \ Return the parent node with the specified id
- : :findparent ( id self -- node )
- swap >r begin parent dup while dup id r@ <> while repeat then rdrop ;
-
- : :lastchild ( self -- child )
- firstchild 0 swap begin ( cur next )
- ?dup while ( cur next ) nip dup nextsibling repeat ( cur ) ;
-
- : :depth ( self -- n ) firstchild ?dup if :depth 1+ else 0 then ;
-
- : :childcount ( self -- n )
- 0 swap firstchild begin ( res n )
- ?dup while swap 1+ swap nextsibling repeat ;
-
- : :childindex ( child self -- idx )
- swap >r 0 swap firstchild begin ( R:child idx node )
- ?dup while
- r@ over = not while
- swap 1+ swap nextsibling repeat ( R:child idx node )
- drop rdrop else abort" child not found" then ;
-
- \ Add node as a child to self
- : :add ( node self -- )
- 2dup swap to parent ( node parent )
- dup :lastchild ?dup if ( n p lc ) \ add next to last child
- nip ( n lc ) 2dup to nextsibling swap to prevsibling
- else \ add node as first child
- ( n p ) to firstchild then ;
-
- \ Remove self from its parent
- : :remove ( self -- )
- dup parent firstchild over = if
- dup nextsibling over parent to firstchild
- else
- dup nextsibling over prevsibling to nextsibling then
- dup nextsibling if
- dup prevsibling swap nextsibling to prevsibling
- else drop then ;
-
- \ Replace self, in its hierarchy, by n, making self orphan.
- : :replace ( n self -- ) >r >r \ V1=self V2=n
- V1 parent firstchild V1 = if V2 V1 parent to firstchild then
- V1 prevsibling if V2 V1 prevsibling to nextsibling then
- V1 nextsibling if V2 V1 nextsibling to prevsibling then
- 0 V1 to@! parent V2 to parent
- 0 V1 to@! prevsibling V2 to prevsibling
- 0 V1 to@! nextsibling V2 to nextsibling
- rdrop rdrop ;
-]struct
diff --git a/fs/comp/c/type.fs b/fs/comp/c/type.fs
@@ -3,10 +3,11 @@
?f<< /lib/arena.fs
?f<< /lib/meta.fs
?f<< /comp/c/tok.fs
-?f<< /comp/c/macrolo.fs
\ This arena is for local typedefs for a single unit.
Arena :new structbind Arena _arena
+\ needed in pgen.fs
+: cctypearena _arena :self ;
0 value _globalmode \ are we adding a global type?
@@ -131,6 +132,7 @@ struct[ CType
2 of = sfieldw endof
sfield endcase then ( ctype )
llnext repeat ]struct ;
+ : :append ( other self -- ) 2dup :size swap to offset llappend ;
]struct
\ Typedefs are dictionary entries in the "typedefs" dicts, which contain a 4b
@@ -146,8 +148,8 @@ create globaldefs 0 , 0 c,
: findTypedef ( name -- type-or-0 )
dup localdefs find ?dup if nip @ else globaldefs find dup if @ then then ;
-\ Symbols are anything that an Ident node can refer to: function or variable.
-\ This dictionary below contain global symbols of the current unit
+\ The Symbols dict contains functions and global variables (static or not) for
+\ the current unit.
create symbols 0 , 0 c, \ this is a dict link
: addSymbol ( ctype -- )
@@ -214,9 +216,7 @@ alias _err parseDeclarator ( type -- ctype ) \ forward declaration
: _post ( ctype -- ctype )
begin ( ctype ) nextt case
'[' of isChar?^
- nextt dup isIdent? if
- findMacro ?dup _assert Macro :asnum
- else parse _assert then ( ctype nbelem )
+ nextt parse _assert ( ctype nbelem )
nextt ']' expectChar ( ctype nbelem )
over to CType nbelem endof
'(' of isChar?^
diff --git a/fs/doc/cc/usage.txt b/fs/doc/cc/usage.txt
@@ -23,8 +23,7 @@ emptying of local buffers occurs at the beginning of "cc<<".
Writing for DuskCC is the same as writing for another ANSI C compiler, but there
are a few differences:
-* no C preprocessor. We have "#define" (and more), but it's a macro system that
- works with the AST directly.
+* the C preprocessor is similar, but not 100% compliant
* no 64bit types
* no long, redundant with int
* no double, float is always 32b
@@ -69,39 +68,28 @@ int mymax(int a, int b) {
return pspop();
}
-## Macros
+## Pre-processor
-Macros are predefined AST fragments that are inserted in the tree when we refer
-to them (parametrizable fragments is a planned feature, but not implemented
-yet). You can define macros with a "#" family of words which are only valid at
-the top level of the unit source code. Example usage:
+The pre-processor allows you to define text expansion macros which can then be
+used directly in C code. You define a macro with #define:
- #define MYCONST 42
- int foo() { return MYCONST; }
+ #define FOO bar
-When "#define" is called, the C expression following it is parsed and the
-resulting AST is saved and linked to the MYCONST name. Then, during subsequent
-expression parsing, whenever MYCONST is encountered, that AST in inserted.
+The #define directive reads the next identifier, considers it the macro name,
+and then reads the rest of the lines and associates it verbatim with that name.
-Macros have precedence over other identifiers. A variable named MYCONST would be
-shadowed by this macro.
+Then, the tokenizer, whenever it encounters an identifier, checks if it
+corresponds to a macro name before sending it to the parser. If it does, that
+name is not sent to the parser. Instead, the content of the associated macro is
+fed back into the tokenizer as if that content had textually replaced the macro
+name. Parametrized macros are coming, but not yet implemented. Therefore:
-Macro references can be placed anywhere an expression can be, as well as inside
-"[]" brackets in typedefs. In the latter case, however, you can only use
-expressions that resolve to a constant number.
+ #define FOO bar()
+ int FOO { return 42; }
-Another way to define macros is "#forthdef". This word takes the C compiler in
-Forth interpret mode, where you can write arbitrary Forth code. You end this
-mode with "]#", which takes you back in CC mode. During that time, you are
-expected to put an AST node on PS, which is the node to be inserted when
-invoking it. One use of this is to refer to Forth words with names that are not
-valid C identifiers:
+is the exact same equivalent to:
- #forthdef INRANGE S" =><=" Ident :new ]#
- int isinrange(int n, int l, int h) {
- INRANGE(n, l, h);
- return pspop();
- }
+ int bar() { return 42; }
## Linkage and persistence
diff --git a/fs/doc/design/simple.txt b/fs/doc/design/simple.txt
@@ -37,24 +37,9 @@ see some of the complexity associated with computing as unavoidable. It's not.
That is why Forth's approach to simplicity is revolutionary, because it removes
a blindfold.
-A second simplicity factor is pre-processing. The C pre-processor is a very
-important part of the compiler. Without it, C is much less powerful. DuskCC
-doesn't have a pre-processor, but it has a macro system that provide #define and
-friends. This macro system isn't complete yet, but at this moment, it's about 60
-lines of code. When it manages to be powerful enough to be considered an
-adequate replacement to the C pre-processor, I don't think it will be over 300
-lines of code. Tcc's pre-processor is 3900 lines.
-
-Why this difference? While the two approaches are not functionally equivalent
-(one is a textual macro processor and the other manipulates the C AST directly
-and works within the regular C tokenizer and parser). However, the C
-pre-processor forces us to reimplement a big part of tokenizing and parsing
-rules, but in a slightly different manner. When you look at tccpp.c, you can see
-the code is mostly about tokenizing and parsing. It's also striking to see the
-amount of boilerplate that you need when you're processing and spitting text.
-
-DuskCC sidesteps that complexity by piggy-backing on its existing tokenizer and
-parser and manipulate the AST directly.
+(TODO: there used to be a comparison between DuskCC's macro system and tcc's
+pre-processor, but the macro system since changed significantly and that
+comparison didn't hold. Re-compare when the new macro system is completed.)
A third simplicity factor is parsing boilerplate. Tcc's assembler's input is
text formatted in GNU assembler format. This parsing boilerplate is a
diff --git a/fs/lib/arena.fs b/fs/lib/arena.fs
@@ -37,4 +37,6 @@ extends Allocator struct[ Arena
over r@ current <> while
ARENASZ + swap ArenaBuf nextbuf swap repeat ( current res )
swap ArenaBuf buf r> ptr -^ + ;
+ \ don't allocate anything, but ensure that a "next" buf exists.
+ : :reserve current ArenaBuf :next drop ;
]struct
diff --git a/fs/lib/stack.fs b/fs/lib/stack.fs
@@ -13,7 +13,8 @@ struct[ Stack
: :push ( n self -- )
dup :)buf over ptr = if abort" Stack overflow " then
( n self ) to!+ ptr ;
- : :pop ( self -- n )
- dup :buf( over ptr = if abort" Stack underflow " then
- CELLSZ neg over to+ ptr ptr @ ;
+ : :peek' ( self -- 'n )
+ dup :buf( over ptr = if abort" Stack underflow " then ptr CELLSZ - ;
+ : :peek ( self -- n ) :peek' @ ;
+ : :pop ( self -- n ) dup :peek CELLSZ neg rot to+ ptr ;
]struct
diff --git a/fs/lib/tree.fs b/fs/lib/tree.fs
@@ -0,0 +1,79 @@
+\ Tree structure
+\ This was built for the C compiler, but isn't needed anymore. This isn't used
+\ anywhere else, but since it's a general purpose tree with tests and all, let's
+\ keep it around for a while in case we need it at some point.
+
+struct[ Node
+ sfield id
+ sfield parent \ 0 if root
+ sfield firstchild \ 0 if none
+ sfield nextsibling \ 0 if last
+ sfield prevsibling \ 0 if first
+
+ : :new ( id -- node ) here >r , 16 allot0 r> ;
+
+ : :root ( self -- node ) dup parent if parent :root then ;
+
+ \ iterate to the next node, descending into children before continuing to
+ \ siblings. we stop when we reach the last child of "ref"
+ : :next ( ref self -- ref next )
+ dup firstchild ?dup if nip else begin ( ref node )
+ 2dup = if drop 0 exit then \ if ref==node in the beginning
+ dup nextsibling ?dup if nip exit then
+ parent 2dup = until drop 0
+ then ;
+
+ \ Return the next node with the specified id
+ : :findnext ( id self -- node )
+ swap >r dup begin ( ref n R:id )
+ :next dup while dup id r@ <> while repeat then rdrop nip ;
+
+ \ Return the parent node with the specified id
+ : :findparent ( id self -- node )
+ swap >r begin parent dup while dup id r@ <> while repeat then rdrop ;
+
+ : :lastchild ( self -- child )
+ firstchild 0 swap begin ( cur next )
+ ?dup while ( cur next ) nip dup nextsibling repeat ( cur ) ;
+
+ : :depth ( self -- n ) firstchild ?dup if :depth 1+ else 0 then ;
+
+ : :childcount ( self -- n )
+ 0 swap firstchild begin ( res n )
+ ?dup while swap 1+ swap nextsibling repeat ;
+
+ : :childindex ( child self -- idx )
+ swap >r 0 swap firstchild begin ( R:child idx node )
+ ?dup while
+ r@ over = not while
+ swap 1+ swap nextsibling repeat ( R:child idx node )
+ drop rdrop else abort" child not found" then ;
+
+ \ Add node as a child to self
+ : :add ( node self -- )
+ 2dup swap to parent ( node parent )
+ dup :lastchild ?dup if ( n p lc ) \ add next to last child
+ nip ( n lc ) 2dup to nextsibling swap to prevsibling
+ else \ add node as first child
+ ( n p ) to firstchild then ;
+
+ \ Remove self from its parent
+ : :remove ( self -- )
+ dup parent firstchild over = if
+ dup nextsibling over parent to firstchild
+ else
+ dup nextsibling over prevsibling to nextsibling then
+ dup nextsibling if
+ dup prevsibling swap nextsibling to prevsibling
+ else drop then ;
+
+ \ Replace self, in its hierarchy, by n, making self orphan.
+ : :replace ( n self -- ) >r >r \ V1=self V2=n
+ V1 parent firstchild V1 = if V2 V1 parent to firstchild then
+ V1 prevsibling if V2 V1 prevsibling to nextsibling then
+ V1 nextsibling if V2 V1 nextsibling to prevsibling then
+ 0 V1 to@! parent V2 to parent
+ 0 V1 to@! prevsibling V2 to prevsibling
+ 0 V1 to@! nextsibling V2 to nextsibling
+ rdrop rdrop ;
+]struct
diff --git a/fs/tests/comp/c/all.fs b/fs/tests/comp/c/all.fs
@@ -1,7 +1,5 @@
\ Run all CC test suites
-f<< tests/comp/c/tree.fs
f<< tests/comp/c/type.fs
-f<< tests/comp/c/ast.fs
f<< tests/comp/c/vm.fs
f<< tests/comp/c/cc.fs
f<< tests/comp/c/lib.fs
diff --git a/fs/tests/comp/c/ast.fs b/fs/tests/comp/c/ast.fs
@@ -1,16 +0,0 @@
-?f<< tests/harness.fs
-?f<< comp/c/ast.fs
-testbegin
-\ Tests for the C compiler AST
-: _parse cctypes$ ccast$ nextt parseUnit ;
-current with-stdin< short retconst() {
- return 42 ;
-}
-
-dup Node id AST_FUNCTION #eq ( fnode )
-dup Function name S" retconst" #s=
-Node firstchild dup Node id AST_STATEMENTS #eq ( snode )
-Node firstchild dup Node id AST_RETURN #eq ( rnode )
-Node firstchild dup Node id AST_CONSTANT #eq ( cnode )
-Constant value 42 #eq
-testend
diff --git a/fs/tests/comp/c/cc.fs b/fs/tests/comp/c/cc.fs
@@ -39,8 +39,6 @@ globalinc 1238 #eq
42 142 sysword 142 #eq
42 142 funcsig 142 #eq
capture helloworld S" Hello World!" #s=
-42 40 50 isinrange 1 #eq
-42 30 40 isinrange 0 #eq
5 forsum 10 #eq
1 multret 1 #eq
42 multret 32 #eq
@@ -83,6 +81,7 @@ structop5 42 #eq
structop6 54 #eq
structop7 42 #eq
12 42 structop8 54 #eq
+cond2 scnt not # \ don't crash or leak
opwidth1 42 #eq
opwidth2 42 #eq
opwidth3 $129 #eq
diff --git a/fs/tests/comp/c/test.c b/fs/tests/comp/c/test.c
@@ -133,9 +133,8 @@ int array() {
return *a + a[1] - *(a+2);
}
-static int global1 = 1234;
#define GLOB2SZ 3
-static int global2[GLOB2SZ] = {4, 5, 6};
+static int global1 = 1234, global2[GLOB2SZ] = {4, 5, 6};
int global() {
return global1;
@@ -159,15 +158,6 @@ int funcsig(int a, int b) {
void helloworld() {
stype("Hello World!");
}
-// Now let's put all this together an start calling fancy forth words!
-// Here, we see the power of macros in action. Let's say we want to call the
-// system word "=><=". It's not a valid C identifier, right? ok, but what about
-// using macros to trick the parser into accepting it?
-#forthdef INRANGE S" =><=" Ident :new ]#
-int isinrange(int n, int l, int h) {
- INRANGE(n, l, h);
- return pspop();
-}
int forsum(int n) {
int i;
int r = 0;
@@ -375,6 +365,10 @@ void cond1() {
int x = 42;
if (x==0) x++; else x--;
}
+
+// Having a return statement in a conditional, if nothing came after it, would
+// prevent the parent from having an implicit return.
+void cond2() { if (0) return; }
// The forth VM used to assign to the SF in the wrong width
short opwidth1() {
short x = 42;
diff --git a/fs/tests/comp/c/tree.fs b/fs/tests/comp/c/tree.fs
@@ -1,64 +0,0 @@
-?f<< tests/harness.fs
-?f<< comp/c/tree.fs
-testbegin
-\ C compiler tree unit
-
-1 Node :new value n1
-n1 Node id 1 #eq
-
-2 Node :new value n2
-n2 n1 Node :add
-n2 Node id 2 #eq
-n2 Node parent n1 #eq
-n2 Node nextsibling 0 #eq
-n2 Node prevsibling 0 #eq
-n2 Node firstchild 0 #eq
-n1 Node firstchild n2 #eq
-
-3 Node :new value n3
-n3 n1 Node :add
-n3 Node id 3 #eq
-n3 Node parent n1 #eq
-n3 Node nextsibling 0 #eq
-n3 Node prevsibling n2 #eq
-n3 Node firstchild 0 #eq
-n1 Node firstchild n2 #eq
-n1 Node :lastchild n3 #eq
-
-4 Node :new value n4
-n4 n2 Node :add
-n4 Node id 4 #eq
-n4 Node parent n2 #eq
-n4 Node nextsibling 0 #eq
-n4 Node prevsibling 0 #eq
-n2 Node firstchild n4 #eq
-
-n1 Node :depth 2 #eq
-n2 Node :depth 1 #eq
-n3 Node :depth 0 #eq
-n4 Node :depth 0 #eq
-
-n1 Node :childcount 2 #eq
-n2 Node :childcount 1 #eq
-n3 Node :childcount 0 #eq
-n4 Node :childcount 0 #eq
-
-: traverse ( node -- )
- dup begin dup Node id dup .x1 c, Node :next ?dup not until drop ;
-create expected 1 c, 2 c, 4 c, 3 c,
-create res n1 traverse
-expected res 4 []= #
-
-4 n1 Node :findnext n4 #eq
-4 n4 Node :findnext 0 #eq
-n3 n1 Node :childindex 1 #eq
-
-create expected 2 c, 4 c,
-create res n2 traverse
-expected res 2 []= #
-
-n2 Node :remove
-create expected 1 c, 3 c,
-create res n1 traverse
-expected res 2 []= #
-testend
diff --git a/fs/tests/lib/all.fs b/fs/tests/lib/all.fs
@@ -11,3 +11,4 @@ f<< /tests/lib/meta.fs
f<< /tests/lib/arena.fs
f<< /tests/lib/math.fs
f<< /tests/lib/stack.fs
+f<< /tests/lib/tree.fs
diff --git a/fs/tests/lib/stack.fs b/fs/tests/lib/stack.fs
@@ -13,6 +13,8 @@ s :empty? #
4 expectabort _
s :count 3 #eq
s :pop 3 #eq
+s :peek 2 #eq
+s :peek' @ 2 #eq
s :pop 2 #eq
s :pop 1 #eq
: _ s :pop ;
diff --git a/fs/tests/lib/tree.fs b/fs/tests/lib/tree.fs
@@ -0,0 +1,64 @@
+?f<< /tests/harness.fs
+?f<< /lib/tree.fs
+testbegin
+\ Tree tests
+
+1 Node :new value n1
+n1 Node id 1 #eq
+
+2 Node :new value n2
+n2 n1 Node :add
+n2 Node id 2 #eq
+n2 Node parent n1 #eq
+n2 Node nextsibling 0 #eq
+n2 Node prevsibling 0 #eq
+n2 Node firstchild 0 #eq
+n1 Node firstchild n2 #eq
+
+3 Node :new value n3
+n3 n1 Node :add
+n3 Node id 3 #eq
+n3 Node parent n1 #eq
+n3 Node nextsibling 0 #eq
+n3 Node prevsibling n2 #eq
+n3 Node firstchild 0 #eq
+n1 Node firstchild n2 #eq
+n1 Node :lastchild n3 #eq
+
+4 Node :new value n4
+n4 n2 Node :add
+n4 Node id 4 #eq
+n4 Node parent n2 #eq
+n4 Node nextsibling 0 #eq
+n4 Node prevsibling 0 #eq
+n2 Node firstchild n4 #eq
+
+n1 Node :depth 2 #eq
+n2 Node :depth 1 #eq
+n3 Node :depth 0 #eq
+n4 Node :depth 0 #eq
+
+n1 Node :childcount 2 #eq
+n2 Node :childcount 1 #eq
+n3 Node :childcount 0 #eq
+n4 Node :childcount 0 #eq
+
+: traverse ( node -- )
+ dup begin dup Node id dup .x1 c, Node :next ?dup not until drop ;
+create expected 1 c, 2 c, 4 c, 3 c,
+create res n1 traverse
+expected res 4 []= #
+
+4 n1 Node :findnext n4 #eq
+4 n4 Node :findnext 0 #eq
+n3 n1 Node :childindex 1 #eq
+
+create expected 2 c, 4 c,
+create res n2 traverse
+expected res 2 []= #
+
+n2 Node :remove
+create expected 1 c, 3 c,
+create res n1 traverse
+expected res 2 []= #
+testend