duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit 97adf21009ce2441ee87d0d322535a50bf376b08
parent cf4288984b845e4aea1d728c9131494ad2f8bc8d
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Mon, 18 Jul 2022 14:12:46 -0400

cc: add macros

Diffstat:
Mfs/cc/ast.fs | 30++++++++++++++++++++++++++----
Mfs/cc/tok.fs | 8++++----
Mfs/doc/cc.txt | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/lib/str.fs | 8+++++++-
Mfs/tests/cc/test.c | 11++++++++---
Mfs/xcomp/bootlo.fs | 9+--------
6 files changed, 105 insertions(+), 20 deletions(-)

diff --git a/fs/cc/ast.fs b/fs/cc/ast.fs @@ -1,6 +1,7 @@ \ C compiler Abstract Syntax Tree \ An abstract syntax tree, AST, is a hierarchical structure of nodes \ representing the nodes found in a C source file. See tree.fs for structure. +?f<< lib/with.fs ?f<< lib/wordtbl.fs ?f<< cc/tok.fs ?f<< cc/tree.fs @@ -57,6 +58,9 @@ create bopsprectbl 1 c, 1 c, 0 c, 0 c, 2 c, 2 c, 3 c, 3 c, 3 c, 3 c, 16 const AST_PSPUSH 17 const AST_PSPOP +: Constant ( n -- node ) AST_CONSTANT createnode swap , ; +: Ident ( name -- node ) AST_IDENT createnode swap , ; + \ It's important that decl.name and func.name have the same offset. Poor man's \ polymorphism... NODESZ ufield ast.decl.name @@ -188,6 +192,23 @@ ASTIDCNT wordtbl astdatatbl ( node -- node ) : expectChar ( tok c -- ) isChar? _assert ; : read; ( -- ) nextt ';' expectChar ; +\ Macros. See doc/cc +-1 value _pslvl \ PS level at last #[ + +: runmacro + ['] cc< to' in< with[ begin word runword _pslvl 0< until ]with ; + +: #[0 scnt to _pslvl runmacro ; +: #[1 scnt 1+ to _pslvl runmacro ; + +: ]# + scnt _pslvl - ?dup if abort" PS imbalance during macros" then + -1 to _pslvl ; + +: c]# Constant ]# ; +: i]# Ident ]# ; +: +]# over addnode ]# ; + \ Parse words alias noop parseExpression ( tok -- node ) \ forward declaration @@ -199,7 +220,7 @@ alias noop parseExpression ( tok -- node ) \ forward declaration of isIdent? ( lnode ) AST_IDENT createnode swap , ( lnode inode ) over addnode endof of parse ( lnode n ) - AST_CONSTANT createnode swap , ( lnode cnode ) over addnode endof + Constant ( lnode cnode ) over addnode endof _err endcase nextt case @@ -256,6 +277,7 @@ alias noop parseExpression ( tok -- node ) \ forward declaration nextt '(' expectChar nextt ')' expectChar AST_PSPOP createnode parsePostfixOp endof + S" #[" of s= #[1 ( node ) parsePostfixOp endof of uopid ( opid ) AST_UNARYOP createnode swap , ( opnode ) nextt parseFactor over addnode ( opnode ) endof @@ -263,7 +285,7 @@ alias noop parseExpression ( tok -- node ) \ forward declaration AST_IDENT createnode r@ , ( inode ) parsePostfixOp endof ( case else ) \ Constant - r@ parse if AST_CONSTANT createnode swap , else _err then + r@ parse if Constant else _err then endcase ; \ An expression can be 2 things: @@ -377,6 +399,7 @@ current to parseStatements \\ Parse the next element in a Unit node : parseUnit ( unitnode tok -- ) + dup S" #[" s= if drop #[0 drop exit then parseType _assert ( unode type ) parseType* ( unode type tok ) expectIdent ( unode type name ) rot nextt case ( type name unode ) S" (" of s= @@ -391,5 +414,4 @@ current to parseStatements : parseast ( -- ) AST_UNIT createnode dup to curunit - nextt? ?dup not if exit then begin ( unitnode tok ) - over swap parseUnit ( unitnode ) nextt? ?dup not until ( unitnode ) drop ; + begin ( unode ) nextt? ?dup while over swap parseUnit repeat drop ; diff --git a/fs/cc/tok.fs b/fs/cc/tok.fs @@ -24,15 +24,15 @@ \ with a symbol that is also a 1 char symbol and all 3 chars symbols begin with \ 2 chars that are also a 2 chars symbol. \ list of 1 char symbols -create symbols1 ," +-*/~&<>=[](){}.%^?:;,|^" '"' c, +create symbols1 ," +-*/~&<>=[](){}.%^?:;,|^#" '"' c, -: isSym1? ( c -- f ) symbols1 25 [c]? 0>= ; +: isSym1? ( c -- f ) symbols1 26 [c]? 0>= ; \ list of 2 chars symbols -create symbols2 ," <=>===!=&&||++---><<>>+=-=*=/=%=&=^=|=/**///" +create symbols2 ," <=>===!=&&||++---><<>>+=-=*=/=%=&=^=|=/**///#[" : isSym2? ( c1 c2 -- f ) - A>r 22 >r symbols2 >A begin ( c1 c2 ) + A>r 23 >r symbols2 >A begin ( c1 c2 ) over Ac@+ = over Ac@+ = and if 2drop r~ r>A 1 exit then next 2drop 0 r>A ; diff --git a/fs/doc/cc.txt b/fs/doc/cc.txt @@ -184,3 +184,62 @@ int mymax(int a, int b) { // don't use a or b before having called pspop(), they're broken. return pspop(); } + +## Macros + +Macros in Dusk's CC are simply markers inside which arbitrary Forth code is +interpreted. Those markers are #[ and ]#. Those markers are executed during the +AST generation phase, which means that you can arbitrarily modify the AST at any +point during parsing. + +A common case with C macros is the definition and reuse of constants. That's how +it looks: + +#[ 42 const FOOBAR ]# +int foo() { + return #[ FOOBAR Constant ]#; +} + +AST nodes are created with "createnode" accompanied with one of the AST_* +constants, or with the use of a "node helper" word, such as "Constant". + +Because macros can modify the AST, they can only be inserted at certain +designated places, known as "hash (#) bars". These are: + +* In a Unit context (in between functions) +* In a Statements context (inside {}, in between statements) +* Replacing a "factor" AST element, that is: + 1. A constant + 2. A Lvalue (AST_IDENT) + 3. A unaryop/postfixop containing a factor + 4. A function call + 5. An expression inside () parens. + 6. A string literal + 7. pspop() + +In any other place, "#[" will be a parse error. + +In the first two cases, the signature of the macro is ( node -- node ). By using +PS TOS, you can add a node to either the active Unit or Statements. + +The third case has a signature ( -- node ), that is, you are expected to put a +node that is a valid factor. It will then be added wherever the factor was +expected. It will even have postfix AST rules applied to it, which opens nice +doors. For example, if your macro returns a simple AST_IDENT, then right after +the macro you can add parens to make it into a function call. + +In some cases, you might have to wrap your macro in () to "force" an expression. +For example, "return 2 + #[ FOOBAR Constant ]#;" will generate a parse error +because what is expected there is a factor, not an expression. You can work +around this with "return 2 + (#[ FOOBAR Constant ]#);". + +When a macro begins, PS level is recorded. If it doesn't end with the correct +PS size, an error is raised. + +Macro opening symbol, "#[", obeys C tokenization rules, but the closing one, +"]#", obeys Forth tokenization rules, so it has to be followed by a space. + +There are "shortcut words" for closing a macro: + +c]# --> Constant ]# ++]# --> over addnode ]# diff --git a/fs/lib/str.fs b/fs/lib/str.fs @@ -1,4 +1,4 @@ -\ String utilities +\ String/range utilities \\ maximum size of strings (including size byte) $100 value STR_MAXSZ @@ -8,6 +8,12 @@ $100 value STR_MAXSZ \ "skip" str, that is, return the address following its last char : s) ( str -- a ) c@+ + ; +\\ index of "c" inside range "a u". -1 if not found +: [c]? ( c a u -- i ) + ?dup not if 2drop -1 exit then A>r over >r >r >A ( c ) + begin dup Ac@+ = if leave then next ( c ) + A- Ac@ = if A> r> - ( i ) else r~ -1 then r>A ; + \\ append character to end of string : sappend ( c str -- ) tuck s) c! dup c@ 1+ swap c! ; diff --git a/fs/tests/cc/test.c b/fs/tests/cc/test.c @@ -1,7 +1,10 @@ /* test a few simple C constructs */ + +#[ 42 const MYCONST ]# + // just return a constant int retconst() { - return 42; + return #[ MYCONST c]# ; } // test unary op and that we don't require whitespace around symbols int neg() {return -$2a;} @@ -120,9 +123,11 @@ void helloworld() { stype("Hello World!"); } // Now let's put all this together an start calling fancy forth words! +// Here, we see the power of macros in action. Let's say we want to call the +// system word "=><=". It's not a valid C identifier, right? ok, but what about +// using macros to trick the parser into accepting it? int isinrange(int n, int l, int h) { - find("=><="); - pspop()(n, l, h); + #[ S" =><=" i]# (n, l, h); return pspop(); } int forloop(int a, int b) { diff --git a/fs/xcomp/bootlo.fs b/fs/xcomp/bootlo.fs @@ -14,6 +14,7 @@ : again compile (br) , ; immediate : until compile (?br) , ; immediate : next compile (next) , ; immediate +: leave r> r~ 1 >r >r ; : code word entry ; : create code compile (cell) ; : value code compile (val) , ; @@ -72,9 +73,6 @@ $20 const SPC $0d const CR $0a const LF $08 const BS : ." [compile] S" compile stype ; immediate : abort" [compile] ." compile abort ; immediate -\ Flow control -: leave r> r~ 1 >r >r ; - \ while..repeat : while [compile] if swap ; immediate : repeat [compile] again [compile] then ; immediate @@ -95,11 +93,6 @@ alias else endof immediate : endcase ( then-stopgap jump1? jump2? ... jumpn? -- ) ?dup if begin [compile] then ?dup not until then compile r~ ; immediate -\ Sequences -: [c]? ( c a u -- i ) - ?dup not if 2drop -1 exit then A>r over >r >r >A ( c ) - begin dup Ac@+ = if leave then next ( c ) - A- Ac@ = if A> r> - ( i ) else r~ -1 then r>A ; \ Return whether strings s1 and s2 are equal : s= ( s1 s2 -- f ) over c@ 1+ []= ;