commit 97adf21009ce2441ee87d0d322535a50bf376b08
parent cf4288984b845e4aea1d728c9131494ad2f8bc8d
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Mon, 18 Jul 2022 14:12:46 -0400
cc: add macros
Diffstat:
6 files changed, 105 insertions(+), 20 deletions(-)
diff --git a/fs/cc/ast.fs b/fs/cc/ast.fs
@@ -1,6 +1,7 @@
\ C compiler Abstract Syntax Tree
\ An abstract syntax tree, AST, is a hierarchical structure of nodes
\ representing the nodes found in a C source file. See tree.fs for structure.
+?f<< lib/with.fs
?f<< lib/wordtbl.fs
?f<< cc/tok.fs
?f<< cc/tree.fs
@@ -57,6 +58,9 @@ create bopsprectbl 1 c, 1 c, 0 c, 0 c, 2 c, 2 c, 3 c, 3 c, 3 c, 3 c,
16 const AST_PSPUSH
17 const AST_PSPOP
+: Constant ( n -- node ) AST_CONSTANT createnode swap , ;
+: Ident ( name -- node ) AST_IDENT createnode swap , ;
+
\ It's important that decl.name and func.name have the same offset. Poor man's
\ polymorphism...
NODESZ ufield ast.decl.name
@@ -188,6 +192,23 @@ ASTIDCNT wordtbl astdatatbl ( node -- node )
: expectChar ( tok c -- ) isChar? _assert ;
: read; ( -- ) nextt ';' expectChar ;
+\ Macros. See doc/cc
+-1 value _pslvl \ PS level at last #[
+
+: runmacro
+ ['] cc< to' in< with[ begin word runword _pslvl 0< until ]with ;
+
+: #[0 scnt to _pslvl runmacro ;
+: #[1 scnt 1+ to _pslvl runmacro ;
+
+: ]#
+ scnt _pslvl - ?dup if abort" PS imbalance during macros" then
+ -1 to _pslvl ;
+
+: c]# Constant ]# ;
+: i]# Ident ]# ;
+: +]# over addnode ]# ;
+
\ Parse words
alias noop parseExpression ( tok -- node ) \ forward declaration
@@ -199,7 +220,7 @@ alias noop parseExpression ( tok -- node ) \ forward declaration
of isIdent? ( lnode )
AST_IDENT createnode swap , ( lnode inode ) over addnode endof
of parse ( lnode n )
- AST_CONSTANT createnode swap , ( lnode cnode ) over addnode endof
+ Constant ( lnode cnode ) over addnode endof
_err
endcase
nextt case
@@ -256,6 +277,7 @@ alias noop parseExpression ( tok -- node ) \ forward declaration
nextt '(' expectChar nextt ')' expectChar
AST_PSPOP createnode parsePostfixOp
endof
+ S" #[" of s= #[1 ( node ) parsePostfixOp endof
of uopid ( opid )
AST_UNARYOP createnode swap , ( opnode )
nextt parseFactor over addnode ( opnode ) endof
@@ -263,7 +285,7 @@ alias noop parseExpression ( tok -- node ) \ forward declaration
AST_IDENT createnode r@ , ( inode ) parsePostfixOp
endof
( case else ) \ Constant
- r@ parse if AST_CONSTANT createnode swap , else _err then
+ r@ parse if Constant else _err then
endcase ;
\ An expression can be 2 things:
@@ -377,6 +399,7 @@ current to parseStatements
\\ Parse the next element in a Unit node
: parseUnit ( unitnode tok -- )
+ dup S" #[" s= if drop #[0 drop exit then
parseType _assert ( unode type ) parseType* ( unode type tok )
expectIdent ( unode type name ) rot nextt case ( type name unode )
S" (" of s=
@@ -391,5 +414,4 @@ current to parseStatements
: parseast ( -- )
AST_UNIT createnode dup to curunit
- nextt? ?dup not if exit then begin ( unitnode tok )
- over swap parseUnit ( unitnode ) nextt? ?dup not until ( unitnode ) drop ;
+ begin ( unode ) nextt? ?dup while over swap parseUnit repeat drop ;
diff --git a/fs/cc/tok.fs b/fs/cc/tok.fs
@@ -24,15 +24,15 @@
\ with a symbol that is also a 1 char symbol and all 3 chars symbols begin with
\ 2 chars that are also a 2 chars symbol.
\ list of 1 char symbols
-create symbols1 ," +-*/~&<>=[](){}.%^?:;,|^" '"' c,
+create symbols1 ," +-*/~&<>=[](){}.%^?:;,|^#" '"' c,
-: isSym1? ( c -- f ) symbols1 25 [c]? 0>= ;
+: isSym1? ( c -- f ) symbols1 26 [c]? 0>= ;
\ list of 2 chars symbols
-create symbols2 ," <=>===!=&&||++---><<>>+=-=*=/=%=&=^=|=/**///"
+create symbols2 ," <=>===!=&&||++---><<>>+=-=*=/=%=&=^=|=/**///#["
: isSym2? ( c1 c2 -- f )
- A>r 22 >r symbols2 >A begin ( c1 c2 )
+ A>r 23 >r symbols2 >A begin ( c1 c2 )
over Ac@+ = over Ac@+ = and if 2drop r~ r>A 1 exit then
next 2drop 0 r>A ;
diff --git a/fs/doc/cc.txt b/fs/doc/cc.txt
@@ -184,3 +184,62 @@ int mymax(int a, int b) {
// don't use a or b before having called pspop(), they're broken.
return pspop();
}
+
+## Macros
+
+Macros in Dusk's CC are simply markers inside which arbitrary Forth code is
+interpreted. Those markers are #[ and ]#. Those markers are executed during the
+AST generation phase, which means that you can arbitrarily modify the AST at any
+point during parsing.
+
+A common case with C macros is the definition and reuse of constants. That's how
+it looks:
+
+#[ 42 const FOOBAR ]#
+int foo() {
+ return #[ FOOBAR Constant ]#;
+}
+
+AST nodes are created with "createnode" accompanied with one of the AST_*
+constants, or with the use of a "node helper" word, such as "Constant".
+
+Because macros can modify the AST, they can only be inserted at certain
+designated places, known as "hash (#) bars". These are:
+
+* In a Unit context (in between functions)
+* In a Statements context (inside {}, in between statements)
+* Replacing a "factor" AST element, that is:
+ 1. A constant
+ 2. A Lvalue (AST_IDENT)
+ 3. A unaryop/postfixop containing a factor
+ 4. A function call
+ 5. An expression inside () parens.
+ 6. A string literal
+ 7. pspop()
+
+In any other place, "#[" will be a parse error.
+
+In the first two cases, the signature of the macro is ( node -- node ). By using
+PS TOS, you can add a node to either the active Unit or Statements.
+
+The third case has a signature ( -- node ), that is, you are expected to put a
+node that is a valid factor. It will then be added wherever the factor was
+expected. It will even have postfix AST rules applied to it, which opens nice
+doors. For example, if your macro returns a simple AST_IDENT, then right after
+the macro you can add parens to make it into a function call.
+
+In some cases, you might have to wrap your macro in () to "force" an expression.
+For example, "return 2 + #[ FOOBAR Constant ]#;" will generate a parse error
+because what is expected there is a factor, not an expression. You can work
+around this with "return 2 + (#[ FOOBAR Constant ]#);".
+
+When a macro begins, PS level is recorded. If it doesn't end with the correct
+PS size, an error is raised.
+
+Macro opening symbol, "#[", obeys C tokenization rules, but the closing one,
+"]#", obeys Forth tokenization rules, so it has to be followed by a space.
+
+There are "shortcut words" for closing a macro:
+
+c]# --> Constant ]#
++]# --> over addnode ]#
diff --git a/fs/lib/str.fs b/fs/lib/str.fs
@@ -1,4 +1,4 @@
-\ String utilities
+\ String/range utilities
\\ maximum size of strings (including size byte)
$100 value STR_MAXSZ
@@ -8,6 +8,12 @@ $100 value STR_MAXSZ
\ "skip" str, that is, return the address following its last char
: s) ( str -- a ) c@+ + ;
+\\ index of "c" inside range "a u". -1 if not found
+: [c]? ( c a u -- i )
+ ?dup not if 2drop -1 exit then A>r over >r >r >A ( c )
+ begin dup Ac@+ = if leave then next ( c )
+ A- Ac@ = if A> r> - ( i ) else r~ -1 then r>A ;
+
\\ append character to end of string
: sappend ( c str -- ) tuck s) c! dup c@ 1+ swap c! ;
diff --git a/fs/tests/cc/test.c b/fs/tests/cc/test.c
@@ -1,7 +1,10 @@
/* test a few simple C constructs */
+
+#[ 42 const MYCONST ]#
+
// just return a constant
int retconst() {
- return 42;
+ return #[ MYCONST c]# ;
}
// test unary op and that we don't require whitespace around symbols
int neg() {return -$2a;}
@@ -120,9 +123,11 @@ void helloworld() {
stype("Hello World!");
}
// Now let's put all this together an start calling fancy forth words!
+// Here, we see the power of macros in action. Let's say we want to call the
+// system word "=><=". It's not a valid C identifier, right? ok, but what about
+// using macros to trick the parser into accepting it?
int isinrange(int n, int l, int h) {
- find("=><=");
- pspop()(n, l, h);
+ #[ S" =><=" i]# (n, l, h);
return pspop();
}
int forloop(int a, int b) {
diff --git a/fs/xcomp/bootlo.fs b/fs/xcomp/bootlo.fs
@@ -14,6 +14,7 @@
: again compile (br) , ; immediate
: until compile (?br) , ; immediate
: next compile (next) , ; immediate
+: leave r> r~ 1 >r >r ;
: code word entry ;
: create code compile (cell) ;
: value code compile (val) , ;
@@ -72,9 +73,6 @@ $20 const SPC $0d const CR $0a const LF $08 const BS
: ." [compile] S" compile stype ; immediate
: abort" [compile] ." compile abort ; immediate
-\ Flow control
-: leave r> r~ 1 >r >r ;
-
\ while..repeat
: while [compile] if swap ; immediate
: repeat [compile] again [compile] then ; immediate
@@ -95,11 +93,6 @@ alias else endof immediate
: endcase ( then-stopgap jump1? jump2? ... jumpn? -- )
?dup if begin [compile] then ?dup not until then compile r~ ; immediate
-\ Sequences
-: [c]? ( c a u -- i )
- ?dup not if 2drop -1 exit then A>r over >r >r >A ( c )
- begin dup Ac@+ = if leave then next ( c )
- A- Ac@ = if A> r> - ( i ) else r~ -1 then r>A ;
\ Return whether strings s1 and s2 are equal
: s= ( s1 s2 -- f ) over c@ 1+ []= ;