commit 0708fcc1b5925de17523e7c691689a22f6f79a43
parent 017e3ebac111572b1db0591897a0d1a3a1be7439
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Fri, 3 Jun 2022 20:58:23 -0400
cc: add binary ops
Diffstat:
7 files changed, 145 insertions(+), 33 deletions(-)
diff --git a/fs/asm.fs b/fs/asm.fs
@@ -35,7 +35,7 @@
: srcid tgt _id ;
: mod ( tgt-or-src -- mod ) >> >> $c0 and ;
: is16? ( tgt-or-src -- f ) 10 rshift 1 and ;
-: isimm? ( tgt-or-src -- f ) _id IMM = ;
+: isimm? ( -- f ) src _id IMM = ;
: tgt-or-src! tgt 0< if to tgt else to src then ;
: r! ( reg -- ) $700 or ( mod 3 + 32 ) tgt-or-src! ;
@@ -48,18 +48,19 @@
\ Writing the thing
: prefix, ( -- ) exit
- tgt is16? if $66 c, then src dup isimm? not swap is16? and if $67 c, then ;
+ tgt is16? if $66 c, then src isimm? not swap is16? and if $67 c, then ;
: inh, ( op -- ) c, asm$ ;
: modrm1, ( reg op -- ) \ modrm op with 1 argument
prefix, c, ( reg ) 3 lshift tgtid tgt mod or or ( modrm ) c,
disp? if disp c, then asm$ ;
: modrm2, ( imm? op -- ) \ modrm op with 2 arguments
- prefix, c, tgtid tgt mod or ( modrm ) src isimm?
- if $28 or ( 5 in reg ) c, , else c, then disp? if disp c, then asm$ ;
+ prefix, c, tgtid tgt mod or ( modrm )
+ isimm? if $28 or ( 5 in reg ) c, , else src 3 lshift or c, then
+ disp? if disp c, then asm$ ;
\ operations
-: mov, src isimm? if prefix, $b8 tgtid or c, , asm$ else $89 modrm2, then ;
+: mov, isimm? if prefix, $b8 tgtid or c, , asm$ else $89 modrm2, then ;
: neg, 3 $f7 modrm1, ;
: not, 2 $f7 modrm1, ;
-: sub, $81 modrm2, ;
+: sub, isimm? if $81 else $29 then modrm2, ;
: ret, $c3 inh, ;
diff --git a/fs/cc/ast.fs b/fs/cc/ast.fs
@@ -10,6 +10,7 @@
\ 1b child slots
\ 4b addr of parent element (0 if root)
\ 4b addr of next element (0 if none)
+\ 4b addr of prev element (0 if none)
\ ... maybe data
\ Types
@@ -24,6 +25,7 @@
\ 7 Expression
\ 8 UnaryOp opid
\ 9 Factor A constant or a variable
+\ 10 BinaryOp opid
\ Flags
\ b0 haschildren this element can contain children
@@ -35,11 +37,11 @@
\ means unlimited, other numbers indicate the number of slots. Each time a
\ children is added, the slot is decreased. When 0 is reached, we close it.
-10 value ASTIDCNT
+11 value ASTIDCNT
\ 8 chars per name
create astidnames
," ) unit functionreturn constantstmts args expr "
-," unaryop factor "
+," unaryop factor binop "
0 value curunit \ points to current Unit, the beginning of the AST
0 value lastelem \ last element of the chain
@@ -56,14 +58,36 @@ create astidnames
: cslots- ( elem -- newslots ) dup cslots 1- tuck swap cslots! ;
: haschildren? ( elem -- f ) flags $01 and ;
: parentelem ( elem -- parent ) 3 + @ ;
+: parentelem! ( parent elem -- ) 3 + ! ;
: nextelem ( elem -- next ) 7 + @ ;
-: 'data ( elem -- 'data ) 11 + ;
+: nextelem! ( next elem -- ) 7 + ! ;
+: prevelem ( elem -- prev ) 11 + @ ;
+: prevelem! ( prev elem -- ) 11 + ! ;
+: 'data ( elem -- 'data ) 15 + ;
: intdata ( elem -- n ) 'data @ ;
: strdata ( elem -- sa sl ) 'data c@+ ;
+: nextsibling ( elem -- next )
+ dup parentelem >r begin ( elem )
+ nextelem dup not if abort" no next!" then
+ dup parentelem r@ = until r~ ;
+: prevsibling ( elem -- prev )
+ dup parentelem >r begin ( elem )
+ prevelem dup r@ = if abort" no prev!" then
+ dup parentelem r@ = until r~ ;
: skipnext 1 to _skip ;
-: newelem ( slots flags id -- )
- here lastelem 7 + ! here to lastelem c, c, c, activeelem , 0 ,
- lastelem haschildren? if lastelem to activeelem then ;
+\ is currently active element empty?
+: activeempty? ( -- f ) activeelem lastelem = ;
+\ Return the last child of the active element. To find it, we start from
+\ lastelem and as long its parent is not activeelem, we go to prevelem.
+: lastactivechild ( -- elem )
+ lastelem begin dup parentelem activeelem = if exit then prevelem again ;
+
+: addelem ( elem -- )
+ activeelem over parentelem! lastelem over prevelem!
+ dup lastelem nextelem! dup to lastelem
+ dup haschildren? if to activeelem else drop then ;
+: createelem ( slots flags id -- elem ) here >r c, c, c, 12 allot0 r> ;
+: newelem createelem addelem ;
\ AST elements
: SeqClose ( -- )
@@ -72,8 +96,7 @@ create astidnames
parentelem to activeelem
activeelem cslots- not if SeqClose then ;
: Unit ( -- )
- here to curunit here to lastelem here to activeelem
- 1 c, $01 c, -1 c, 9 allot0 ;
+ -1 $01 1 createelem dup to curunit dup to lastelem to activeelem ;
: Function ( 'name namelen -- ) 2 $09 2 newelem dup c, move, ;
: Return ( -- ) 1 $03 3 newelem ;
: Constant ( n -- ) 0 $04 4 newelem , ;
@@ -82,6 +105,20 @@ create astidnames
: Expression ( -- ) -1 $01 7 newelem ;
: UnaryOp ( opid -- ) 1 $07 8 newelem , ;
: Factor ( -- ) 1 $01 9 newelem ;
+\ does *not* automatically add the elem
+: BinaryOp ( opid -- elem ) 2 $07 10 createelem swap , ;
+
+: printelem ( elem -- )
+ ?dup not if ." null" exit then
+ dup astid idname stype dup flags ( elem flags )
+ dup $04 and if ( int data ) '[' emit over intdata .x ']' emit then
+ $08 and if ( str data ) '[' emit over strdata stype ']' emit then
+ drop ;
+: printast ( elem -- ) 1 swap begin ( lvl elem )
+ dup astid not if ( seqclose ) swap 1- swap then
+ dup printelem
+ dup haschildren? if '(' emit swap 1+ swap else ',' emit then ( lvl elem )
+ nextelem 2dup not swap not or until 2drop ;
: _err ( ta tl -- )
stype spc>
@@ -103,6 +140,28 @@ create astidnames
: tokenfromlist ( ta tl list optbl -- )
>r rot> >s ( list R:optbl ) sfind dup 0< if s> _err then r> swap wexec ;
+\ The binopswap operation is funky. It happens when we want to add a binop that
+\ "eats up" the preceding element. There are 3 scenarios.
+\ 1. the preceding sibling (let's call it the target element) is not a binop.
+\ We want our binop to take its place and have the number be the first
+\ operand of our new binop.
+\ 2. The preceding element is a binop with a lower precedence than our new
+\ binop. In this case, we want to "steal" the second operator from the first
+\ binop and place the new binop "under" the old binop as the second operator.
+\ the "target" is the second operand of the old binop.
+\ 3. The preceding element is a binop with a higher precedence than our new
+\ binop. Like in scenario 1, we want to take its place. the target is the
+\ old binop.
+: spit ( a u -- ) A>r >r >A begin Ac@+ .x1 spc> next r>A ;
+: binopswap ( target bopid -- )
+ BinaryOp ( tgt bop )
+ over parentelem over parentelem! \ bop.parent = bop.parent
+ over prevelem over prevelem! \ bop.prev = tgt.prev
+ dup dup prevelem nextelem! \ bop.prev.next = bop
+ 2dup nextelem! \ bop.next = tgt
+ 2dup swap prevelem! \ tgt.prev = bop
+ dup cslots- drop to activeelem drop ;
+
\ Parse words. Each of those words have the signature "ta tl -- ".
\ Some words call "skipnext" to skip the next "nextt" call. In that case, the
\ signature is "ta tl -- ta tl".
@@ -127,22 +186,20 @@ ASTIDCNT wordtbl astparsetbl
:w ( Arguments ) ')' expectChar SeqClose ;
:w ( Expression )
2dup S" ;" S= if 2drop SeqClose exit then
- 2dup uopid if UnaryOp 2drop Factor else skipnext Factor then ;
-'w _err ( UnaryOp )
+ activeempty? if 2dup uopid if UnaryOp 2drop exit then then
+ 2dup bopid if ( ta tl binopid )
+ rot> 2drop
+ activeempty? if _err then \ can't start an expression with a binop
+ lastactivechild astid 10 ( BinaryOp ) = if
+ abort" TODO" else
+ lastactivechild swap binopswap exit then
+ then
+ skipnext Factor ;
+:w ( UnaryOp ) skipnext Factor ;
:w ( Factor ) expectConst Constant SeqClose ;
+:w ( BinaryOp ) skipnext Factor ;
: parseast ( -- ) Unit begin
_skip if 0 to _skip else nextt ?dup not if exit then then
astparsetbl activeelem astid wexec again ;
-: printelem ( elem -- )
- ?dup not if ." null" exit then
- dup astid idname stype dup flags ( elem flags )
- dup $04 and if ( int data ) '[' emit over intdata .x ']' emit then
- $08 and if ( str data ) '[' emit over strdata stype ']' emit then
- drop ;
-: printast ( elem -- ) 1 swap begin ( lvl elem )
- dup astid not if ( seqclose ) swap 1- swap then
- dup printelem
- dup haschildren? if '(' emit swap 1+ swap else ',' emit then ( lvl elem )
- nextelem 2dup not swap not or until 2drop ;
diff --git a/fs/cc/cc1.fs b/fs/cc/cc1.fs
@@ -1,5 +1,5 @@
\ C compiler stage 1
-\ Requires cc/tok.fs, cc/ast.fs, asm.fs and wordtbl.fs
+\ Requires cc/gen.fs, cc/ast.fs, asm.fs and wordtbl.fs
: _err ( elem -- ) printelem abort" unexpected element" ;
@@ -15,25 +15,27 @@ ASTIDCNT wordtbl posttbl
'w noop ( Function )
:w ( Return )
ebp 4 i32 sub,
- [ebp] eax mov, ;
+ [ebp] reg> mov, ;
'w _err ( Constant )
:w ( Statements ) ret, ;
'w noop ( Arguments )
'w noop ( Expression )
:w ( UnaryOp ) dup intdata genuop ;
'w noop ( Factor )
+:w ( BinaryOp ) dup intdata genbop ;
ASTIDCNT wordtbl pretbl
:w ( SeqClose ) dup parentelem posttbl over astid wexec drop ;
'w noop ( Unit )
:w ( Function ) dup strdata entry ;
'w noop ( Return )
-:w ( Constant ) eax dup intdata i32 mov, ;
+:w ( Constant ) >reg dup intdata i32 mov, ;
'w noop ( Statements )
'w noop ( Arguments )
'w noop ( Expression )
'w noop ( UnaryOp )
'w noop ( Factor )
+'w noop ( BinaryOp )
\ Compiles input coming from the cc< alias (defaulting to in<) and writes the
\ result to here. Aborts on error.
diff --git a/fs/cc/gen.fs b/fs/cc/gen.fs
@@ -0,0 +1,22 @@
+\ C compile code generation utilities
+\ Requires asm.fs
+
+\ Target register stack: AST element setting a value to a register such as
+\ Constant don't always push to the same register. During a binary op, we need
+\ the same Constant op will push once to eax and once to ebx. For this reason,
+\ we have the register selection words below.
+\ The principle is this we "push" values with ">reg". Constant does this. We
+\ "pull" values with "reg>". BinaryOp and Return does this. Then, at the end
+\ of each statement, we verify that our levels of push and pull match.
+\ Some operations affect the currently active reg, like unary ops. You'll then
+\ want to use "curreg".
+
+-1 value reglvl \ 0=AX 1=CX 2=DX 3=BX
+: _regerr abort" register stack overflow/underflow/imbalance" ;
+: curreg reglvl 4 < not if _regerr then reglvl r! ;
+: >reg reglvl 1+ to reglvl curreg ;
+: reg> curreg reglvl 1- to reglvl ;
+\ use this when you need 2 register at once in the *opposite* order of what
+\ you'd get with "reg> curreg". only drops reglvl by 1.
+: reg>> reglvl dup 1 < if _regerr then 1- dup r! dup 1+ r! to reglvl ;
+
diff --git a/fs/cc/ops.fs b/fs/cc/ops.fs
@@ -1,5 +1,6 @@
\ C compiler operators
-\ Requires wordtbl.fs and asm.fs
+\ Requires cc/gen.fs wordtbl.fs and asm.fs
+
\ Unary operators
\ ID Sym Name
\ 0 - Negate
@@ -15,8 +16,32 @@ create uopssyms ," -~!?"
: uopchar ( opid -- c ) UOPSCNT max uopssyms + c@ ;
UOPSCNT wordtbl opgentbl ( -- )
-:w ( - ) eax neg, ;
-:w ( ~ ) eax not, ;
+:w ( - ) curreg neg, ;
+:w ( ~ ) curreg not, ;
:w ( ! ) abort" TODO" ;
: genuop ( opid -- ) opgentbl swap wexec ;
+
+\ Binary operators
+\ ID Sym Name
+\ 0 + Addition
+\ 1 - Subtraction
+\ 2 * Multiplication
+\ 3 / Division
+
+4 value BOPSCNT
+create bopssyms ," +-*/?"
+
+: bopid ( ta tl -- opid? f )
+ 1 = if c@ bopssyms BOPSCNT [c]? dup 0< if drop 0 else 1 then
+ else drop 0 then ;
+: bopchar ( opid -- c ) BOPSCNT max bopssyms + c@ ;
+
+BOPSCNT wordtbl opgentbl ( -- )
+:w ( + ) abort" TODO" ;
+:w ( - ) reg>> sub, ;
+:w ( * ) abort" TODO" ;
+:w ( / ) abort" TODO" ;
+
+: genbop ( opid -- ) opgentbl swap wexec ;
+
diff --git a/fs/test.c b/fs/test.c
@@ -5,3 +5,6 @@ int neg() {return -42;}
int bwnot() {
return ~42;
}
+int subconsts () {
+ return 42 - 4;
+}
diff --git a/tests/testcc.fs b/tests/testcc.fs
@@ -3,6 +3,7 @@ f<< str.fs
f<< wordtbl.fs
f<< asm.fs
f<< cc/tok.fs
+f<< cc/gen.fs
f<< cc/ops.fs
f<< cc/ast.fs
f<< cc/cc1.fs
@@ -13,3 +14,4 @@ cc1,
retconst 42 #eq
neg -42 #eq
bwnot $ffffffd5 #eq
+subconsts 38 #eq