duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit 180bb94f0f1e24cffbb5772abd8b2261a874e804
parent 8875a29ca72770d70422b8723737aad5428afdb1
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Sat,  5 Nov 2022 11:42:59 -0400

cc: change the way macro works

see doc/cc/usage.

While the "use the power of Forth directly" sounds great, when wanting to add
tooling for complex features such as parametrized macro expansion, I couldn't
come up with ways that are more straightforward than a CPP-like syntax. All
ideas I had seemed awkward to me, so I figured it was a winning more to stay as
close as possible to CPP, interface-wise.

The fundamental approach to macros is still the same, but the interface to it
changes slightly, is simplified. The idea is that we have a "#define" that works
mostly like the CPP version (with big caveats for not being text-based, but
AST-based), but we also have #forthdef for direct forth interpreter access.

Diffstat:
MROADMAP.md | 2++
Mfs/app/cos/cvm.c | 62++++++++++++++++++++++++++++++++------------------------------
Mfs/cc/ast.fs | 16+++++++---------
Mfs/cc/cc.fs | 7++++---
Mfs/cc/macro.fs | 29+++++++++++++++--------------
Afs/cc/macrolo.fs | 37+++++++++++++++++++++++++++++++++++++
Mfs/cc/tok.fs | 17+++++++----------
Mfs/cc/type.fs | 8+++++---
Mfs/doc/cc/usage.txt | 83++++++++++++++++++++++++++++++++-----------------------------------------------
Mfs/doc/design/simple.txt | 25++++++++++++++++++-------
Mfs/tests/cc/test.c | 11++++++-----
11 files changed, 166 insertions(+), 131 deletions(-)

diff --git a/ROADMAP.md b/ROADMAP.md @@ -14,6 +14,8 @@ consider complete: * enum * goto * float +* macro expansion in #define +* #if/#else/#endif * a few little ops here and there * the check phase (anything that is currently understood by the parser is compiled no matter how nonsensical). diff --git a/fs/app/cos/cvm.c b/fs/app/cos/cvm.c @@ -1,18 +1,20 @@ // This unit compiles without error, but it doesn't actually work yet. -#[ - $10000 const MEMSIZE - $fffa const SP_ADDR - $ff00 const RS_ADDR - $fe00 const SYSVARS - \ Port for block reads. Each read or write has to be done in 5 IO writes: - \ 1 - r/w. 1 for read, 2 for write. - \ 2 - blkid MSB - \ 3 - blkid LSB - \ 4 - dest addr MSB - \ 5 - dest addr LSB - $03 const BLK_PORT - 4 const BLKOP_CMD_SZ -]# + +#define MEMSIZE $10000 +#define SP_ADDR $fffa +#define RS_ADDR $ff00 +#define SYSVARS $fe00 +// TODO: allow constant expressions in macros to allow stuff like SYSVARS + $18 +#define SYSVARS_TOPTR $fe18 +/* Port for block reads. Each read or write has to be done in 5 IO writes: + * 1 - r/w. 1 for read, 2 for write. + * 2 - blkid MSB + * 3 - blkid LSB + * 4 - dest addr MSB + * 5 - dest addr LSB + */ +#define BLK_PORT $03 +#define BLKOP_CMD_SZ 4 typedef unsigned char byte; typedef unsigned short word; @@ -21,7 +23,7 @@ typedef void (*IOWR) (byte); typedef void (*VMOP) (); struct COSVM { - byte mem[ #[ MEMSIZE ]# ]; + byte mem[MEMSIZE]; word SP; /* parameter Stack Pointer */ word RS; /* Return Stack pointer */ word IP; /* Interpreter Pointer */ @@ -41,7 +43,7 @@ File *blkfp = NULL; /* Stores blkop command. Bytes flow from left (byte 0) to right (byte 3) * We know we have a full command when last byte is nonzero. After * processing the cmd, we reset blkop to 0. */ -static byte blkop[ #[ BLKOP_CMD_SZ c]# ]; +static byte blkop[BLKOP_CMD_SZ]; /* Read single byte from I/O handler, if set. addr is a word only because of Forth's cell size, but can't actually address more than a byte-full of ports. @@ -79,7 +81,7 @@ static void iowr_blk(byte val) if (rw) { blkid = (word)blkop[2] << 8 | (word)blkop[1]; dest = (word)blkop[0] << 8 | (word)val; - memset(blkop, 0, #[ BLKOP_CMD_SZ c]# ); + memset(blkop, 0, BLKOP_CMD_SZ); fseek(blkid*1024, blkfp); if (rw==2) { /* write */ fwrite(&vm.mem[dest], 1024, blkfp); @@ -135,8 +137,8 @@ static void lblxt() { pushRS(vm.IP); vm.IP = pop(); lblnext(); } static void lbldoes() { vm.PC = pop(); push(vm.PC+2); vm.PC = gw(vm.PC); } static void lblval() { word a; - if (vm.mem[ #[ SYSVARS $18 + c]# ]) { // TO? - vm.mem[ #[ SYSVARS $18 + c]# ] = 0; + if (vm.mem[SYSVARS_TOPTR]) { // TO? + vm.mem[SYSVARS_TOPTR] = 0; a = pop(); sw(a, pop()); } else { @@ -197,10 +199,10 @@ static void DIVMOD() { word b = pop(); word a = pop(); push(a % b); push(a / b); } -static void QUIT() { vm.RS = #[ RS_ADDR c]# ; } -static void ABORT() { vm.SP = #[ SP_ADDR c]# ; } -static void RCNT() { push((vm.RS - #[ RS_ADDR c]# ) / 2); } -static void SCNT() { push((#[ SP_ADDR c]# - vm.SP) / 2); } +static void QUIT() { vm.RS = RS_ADDR; } +static void ABORT() { vm.SP = SP_ADDR; } +static void RCNT() { push((vm.RS - RS_ADDR) / 2); } +static void SCNT() { push((SP_ADDR - vm.SP) / 2); } static void BYE() { vm.running = 0; } static void EXIT() { vm.IP = popRS(); } static void CDUP() { word a = peek(); if (a) push(a); } @@ -209,8 +211,8 @@ static void LIT16() { push(gw(vm.IP)); vm.IP+=2; } static void LT() { word b = pop(); word a = pop(); push(a<b); } -#[ 67 const OPCNT ]# -static VMOP ops[ #[ OPCNT ]# ] = { +#define OPCNT 67 +static VMOP ops[OPCNT] = { DUP, DROP, PUSHi, PUSHii, SWAP, OVER, ROT, lblnext, CBR, NEXT, CALLi, JMPi, lblxt, EXIT, CDUP, LIT8, LIT16, JMPii, lbldoes, lblval, NULL, EXECUTE, NULL, NULL, @@ -223,7 +225,7 @@ static VMOP ops[ #[ OPCNT ]# ] = { }; static void opexec(byte op) { - if (op < #[ OPCNT c]# ) { + if (op < OPCNT) { ops[op](); } else { fprintf(op, vm.PC, "Out of bounds op %w. PC: %w\n", ConsoleOut()); @@ -233,14 +235,14 @@ static void opexec(byte op) { void COS_init() { - memset(blkop, 0, #[ BLKOP_CMD_SZ c]# ); - vm.SP = #[ SP_ADDR c]# ; - vm.RS = #[ RS_ADDR c]# ; + memset(blkop, 0, BLKOP_CMD_SZ); + vm.SP = SP_ADDR; + vm.RS = RS_ADDR; vm.minSP = vm.SP ; vm.maxRS = vm.RS ; memset(vm.iord, 0, $400); memset(vm.iowr, 0, $400); - vm.iowr[ #[ BLK_PORT c]# ] = iowr_blk; + vm.iowr[BLK_PORT] = iowr_blk; vm.PC = 0; vm.running = 1; } diff --git a/fs/cc/ast.fs b/fs/cc/ast.fs @@ -6,7 +6,7 @@ ?f<< lib/wordtbl.fs ?f<< cc/tok.fs ?f<< cc/tree.fs -?f<< cc/macro.fs +?f<< cc/macrolo.fs ?f<< cc/type.fs \ This arena contains AST structures for the unit being currently parsed. @@ -202,10 +202,6 @@ extends ASTNode struct[ StrLit : :new AST_STRLIT ASTNode :new ; ]struct -\ Macro shortcuts -: c]# Constant :new ]# ; -: i]# Ident :new ]# ; - ASTIDCNT stringlist astidnames "declare" "unit" "function" "return" "constant" "stmts" "unused" "ident" "unaryop" "postop" "binop" "list" "if" "str" "call" "for" "push" "pop" "break" @@ -327,13 +323,13 @@ alias noop parseFactor ( tok -- node ) \ forward declaration nextt '(' expectChar nextt ')' expectChar AST_PSPOP ASTNode :new parsePostfixOp endof - S" #[" of s= #[1 ( node ) parsePostfixOp endof S" NULL" of s= 0 Constant :new endof of uopid ( opid ) UnaryOp :new ( opnode ) nextt parseFactor over Node :add ( opnode ) endof - of isIdent? ( ) \ lvalue or FunCall - r@ Ident :new ( inode ) parsePostfixOp + of isIdent? ( ) \ lvalue, FunCall or macro + r@ findMacro ?dup if Macro ast else r@ Ident :new then ( node ) + parsePostfixOp endof ( case else ) \ Constant r@ parse if Constant :new else _err then @@ -463,7 +459,9 @@ current to parseStatement \\ Parse the next element in a Unit node : parseUnit ( tok -- node-or-0 ) - dup S" #[" s= if drop #[0 0 exit then + dup '#' isChar? if + drop nextt ['] MacroOps structdict' find ?dup _assert + execute 0 exit then 0 to curstatic dup S" static" s= if drop nextt 1 to curstatic then parseType _assert ( type ) diff --git a/fs/cc/cc.fs b/fs/cc/cc.fs @@ -2,6 +2,7 @@ ?f<< /cc/vm/vm.fs ?f<< /cc/ttr.fs ?f<< /cc/gen.fs +?f<< /cc/macro.fs : _err ( -- ) abort" CC error" ; : _assert ( f -- ) not if _err then ; @@ -9,15 +10,15 @@ \ Compiles input coming from the stdin alias and writes the \ result to here. Aborts on error. : cc1, ( -- ) - cctypes$ begin ( ) + cctypes$ ccast$ cmacro$ begin ( ) nextt? ?dup while parseUnit ( node-or-0 ) ?dup if _ccdebug if dup printast nl> then ( node ) dup trnode _ccdebug if dup printast nl> then ( node ) - gennode ccast$ then repeat ; + gennode then repeat ; : :c nextt parseUnit ?dup if _ccdebug if dup printast nl> then - dup trnode _ccdebug if dup printast nl> then gennode ccast$ then ; + dup trnode _ccdebug if dup printast nl> then gennode then ; : cc<< ( -- ) ['] cc1, word with-stdin-file ; diff --git a/fs/cc/macro.fs b/fs/cc/macro.fs @@ -1,17 +1,18 @@ -\ CC macros -?f<< /cc/tree.fs +\ CC macros (high part) +?f<< /cc/macrolo.fs +?f<< /cc/ast.fs -\ Macros. See doc/cc --1 value _pslvl \ PS level at last #[ +: _err ( -- ) abort" macro error" ; +: _assert ( f -- ) not if _err then ; -: _ begin word runword _pslvl 0< until ; -: runmacro ['] _ with-stdin< ; +struct+[ Macro + : _asnum ( self -- n ) + ast dup Node id AST_CONSTANT = _assert + Constant value ; + current to :asnum -: #[0 scnt to _pslvl runmacro ; -: #[1 scnt 1+ to _pslvl runmacro ; - -: ]# - scnt _pslvl - ?dup if abort" PS imbalance during macros" then - -1 to _pslvl ; - -: +]# over Node :add ]# ; +struct+[ MacroOps + : define ( -- ) + nextt nextt parseExpression ( name exprnode ) + Macro :new swap addMacro ; +]struct diff --git a/fs/cc/macrolo.fs b/fs/cc/macrolo.fs @@ -0,0 +1,37 @@ +\ CC macros (low part) +?f<< /lib/arena.fs +?f<< /cc/tree.fs + +\ Holds defined macros +Arena :new structbind Arena _arena + +struct[ Macro + \ For now, a macro is only a link to an AST, but when parameters are + \ introduced, it will be more. + sfield ast + : :new ( ast -- macro ) CELLSZ _arena :[ , _arena :] ; + alias _err :asnum ( self -- n ) \ forward declaration +]struct + +create macros 0 , 0 c, \ this is a dict link +: addMacro ( macro name -- ) + dup c@ macros rot> ( macro 'dict name len ) + ENTRYSZ + 8 + _arena :[ entry , _arena :] drop ; +: findMacro ( name -- macro-or-0 ) macros find dup if @ then ; + +: cmacro$ _arena :reset 0 macros ! ; + +struct[ MacroOps + -1 value pslvl \ PS level at last runmacro + : _ begin word runword pslvl 0< until ; + : runmacro scnt 1+ to pslvl ['] _ with-stdin< ; + + : forthdef + nextt ( name ) runmacro ( name node ) + Macro :new swap addMacro ; +]struct + +: ]# + scnt MacroOps pslvl - ?dup if abort" PS imbalance during macros" then + -1 to MacroOps pslvl ; + diff --git a/fs/cc/tok.fs b/fs/cc/tok.fs @@ -29,10 +29,10 @@ create symbols1 ," +-*/~&<>=[](){}.%^?:;,|^#\"!" : isSym1? ( c -- f ) symbols1 27 [c]? 0>= ; \ list of 2 chars symbols -create symbols2 ," <=>===!=&&||++---><<>>+=-=*=/=%=&=^=|=/**///#[" +create symbols2 ," <=>===!=&&||++---><<>>+=-=*=/=%=&=^=|=/**///" : isSym2? ( c1 c2 -- f ) - 8 lshift or symbols2 >r 23 >r begin ( c1+c2 ) + 8 lshift or symbols2 >r 22 >r begin ( c1+c2 ) dup 16b to@+ V1 = if drop rdrop rdrop 1 exit then next drop rdrop 0 ; @@ -63,8 +63,12 @@ create _ 10 c, ," 09AZaz__$$" 4 _pad :allot dup >r ( c3? c2? c1 len a ) over >r c!+ ( c a ) begin c!+ next drop r> ( str ) ; +\ if not 0, next nextt call will fetch token from here +0 value nexttputback + \ Returns the next token as a string or 0 when there's no more token to consume. : nextt? ( -- tok-or-0 ) + nexttputback ?dup if 0 to nexttputback exit then tonws dup not if ( EOF ) exit then ( c ) case of isSym1? ( ) r@ stdin 2dup isSym2? if ( c1 c2 ) @@ -96,14 +100,7 @@ create _ 10 c, ," 09AZaz__$$" _err endcase ; -\ if not 0, next nextt call will fetch token from here -0 value nexttputback - -\ Fetch the next token, aborting if there's none. Also, apply the "putback" -\ logic. -: nextt ( -- tok ) - nexttputback ?dup if 0 to nexttputback exit then - nextt? ?dup not if abort" expecting token!" then ; +: nextt ( -- tok ) nextt? dup _assert ; : expectConst ( tok -- n ) dup parse if nip else _err then ; : isIdent? ( tok -- f ) diff --git a/fs/cc/type.fs b/fs/cc/type.fs @@ -1,8 +1,8 @@ \ C compiler types ?f<< /lib/str.fs -?f<< lib/arena.fs +?f<< /lib/arena.fs ?f<< /cc/tok.fs -?f<< /cc/macro.fs +?f<< /cc/macrolo.fs \ This arena is for local typedefs for a single unit. Arena :new structbind Arena _arena @@ -213,7 +213,9 @@ alias _err parseDeclarator ( type -- ctype ) \ forward declaration : _post ( ctype -- ctype ) begin ( ctype ) nextt case '[' of isChar?^ - nextt dup S" #[" s= if drop #[1 else parse _assert then + nextt dup isIdent? if + findMacro ?dup _assert Macro :asnum + else parse _assert then ( ctype nbelem ) nextt ']' expectChar ( ctype nbelem ) over to CType nbelem endof '(' of isChar?^ diff --git a/fs/doc/cc/usage.txt b/fs/doc/cc/usage.txt @@ -23,7 +23,8 @@ emptying of local buffers occurs at the beginning of "cc<<". Writing for DuskCC is the same as writing for another ANSI C compiler, but there are a few differences: -* no C preprocessor, the preprocessor is Forth itself, through macros. +* no C preprocessor. We have "#define" (and more), but it's a macro system that + works with the AST directly. * no 64bit types * no long, redundant with int * no double, float is always 32b @@ -70,55 +71,37 @@ int mymax(int a, int b) { ## Macros -Macros in Dusk's CC are simply markers inside which arbitrary Forth code is -interpreted. Those markers are #[ and ]#. Those markers are executed during the -AST generation phase, which means that you can arbitrarily modify the AST at any -point during parsing. - -A common case with C macros is the definition and reuse of constants. Here's how -it looks: - -#[ 42 const FOOBAR ]# -int foo() { - return #[ FOOBAR c]# ; // c]# means "Constant :new ]#" -} - -Because macros can modify the AST, they can only be inserted at certain -designated places, known as "hash (#) bars". These are: - -* In a Unit context (in between functions) -* Replacing a "factor" AST element, which are quite numerous. Some of them: - * A constant - * A Lvalue (AST_IDENT) - * A function call - * An expression -* Inside an array length [] definition. - -In any other place, "#[" will be a parse error. - -In the first case, the signature of the macro is ( node -- node ). By using PS -TOS, you can add a node to the active Unit. - -The second case has a signature ( -- node ), that is, you are expected to put a -node that is in the context you're putting it. It will then be added wherever -the factor was expected. It will even have postfix AST rules applied to it, -which opens nice doors. For example, if your macro returns a simple AST_IDENT, -then right after the macro you can add parens to make it into a function call. - -The third case has a signature ( -- number ), with "number" being the number of -elements that the array being defined will have. - -When a macro begins, PS level is recorded. If it doesn't end with the correct -PS size, an error is raised. - -Macro opening symbol, "#[", obeys C tokenization rules, but the closing one, -"]#", obeys Forth tokenization rules, so it has to be followed by a space. - -There are "shortcut words" for closing a macro: - -c]# --> Constant :new ]# -i]# --> Ident :new ]# -+]# --> over Node :add ]# +Macros are predefined AST fragments that are inserted in the tree when we refer +to them (parametrizable fragments is a planned feature, but not implemented +yet). You can define macros with a "#" family of words which are only valid at +the top level of the unit source code. Example usage: + + #define MYCONST 42 + int foo() { return MYCONST; } + +When "#define" is called, the C expression following it is parsed and the +resulting AST is saved and linked to the MYCONST name. Then, during subsequent +expression parsing, whenever MYCONST is encountered, that AST in inserted. + +Macros have precedence over other identifiers. A variable named MYCONST would be +shadowed by this macro. + +Macro references can be placed anywhere an expression can be, as well as inside +"[]" brackets in typedefs. In the latter case, however, you can only use +expressions that resolve to a constant number. + +Another way to define macros is "#forthdef". This word takes the C compiler in +Forth interpret mode, where you can write arbitrary Forth code. You end this +mode with "]#", which takes you back in CC mode. During that time, you are +expected to put an AST node on PS, which is the node to be inserted when +invoking it. One use of this is to refer to Forth words with names that are not +valid C identifiers: + + #forthdef INRANGE S" =><=" Ident :new ]# + int isinrange(int n, int l, int h) { + INRANGE(n, l, h); + return pspop(); + } ## Linkage and persistence diff --git a/fs/doc/design/simple.txt b/fs/doc/design/simple.txt @@ -38,13 +38,24 @@ That is why Forth's approach to simplicity is revolutionary, because it removes a blindfold. A second simplicity factor is pre-processing. The C pre-processor is a very -important part of the compiler. Without it, C is much less powerful. Tcc -dedicates 3900 lines of code to the pre-processor. In Dusk CC, it's 16 lines. -Why? because the pre-processor is Forth itself. Sure, it's not quite the CPP -we're used to, but it's in fact much more powerful and extensible. You have the -whole system available to you in those macros. In this case, Forth is simpler by -cleverly piggy-backing on existing logic in ways we're not used to with our UNIX -blindfolds. +important part of the compiler. Without it, C is much less powerful. DuskCC +doesn't have a pre-processor, but it has a macro system that provide #define and +friends. This macro system isn't complete yet, but at this moment, it's about 60 +lines of code. When it manages to be powerful enough to be considered an +adequate replacement to the C pre-processor, I don't think it will be over 300 +lines of code. + +Why this difference? While the two approaches are not functionally equivalent +(one is a textual macro processor and the other manipulates the C AST directly +and works within the regular C tokenizer and parser). However, the C +pre-processor forces us to reimplement a big part of tokenizing and parsing +rules, but in a slightly different manner. When you look at tccpp.c, you can see +the code is mostly about tokenizing and parsing. It's also striking to see the +amount of boilerplate that you need when you're processing text and spitting +text. + +DuskCC sidesteps that complexity by piggy-backing on its existing tokenizer and +parser and manipulate the AST directly. A third simplicity factor is parsing boilerplate. Tcc's assembler's input is text formatted in GNU assembler format. This parsing boilerplate is a diff --git a/fs/tests/cc/test.c b/fs/tests/cc/test.c @@ -1,10 +1,10 @@ /* test a few simple C constructs */ -#[ 42 const MYCONST ]# +#define MYCONST 42 // just return a constant short retconst() { - return #[ MYCONST c]# ; + return MYCONST; } short variables() { short foo = 40, _bar = 2; @@ -127,8 +127,8 @@ int array() { } static int global1 = 1234; -#[ 3 const GLOB2SZ ]# -static int global2[ #[ GLOB2SZ ]# ] = {4, 5, 6}; +#define GLOB2SZ 3 +static int global2[GLOB2SZ] = {4, 5, 6}; int global() { return global1; @@ -156,8 +156,9 @@ void helloworld() { // Here, we see the power of macros in action. Let's say we want to call the // system word "=><=". It's not a valid C identifier, right? ok, but what about // using macros to trick the parser into accepting it? +#forthdef INRANGE S" =><=" Ident :new ]# int isinrange(int n, int l, int h) { - #[ S" =><=" i]# (n, l, h); + INRANGE(n, l, h); return pspop(); } int forsum(int n) {