commit 51b259439cdf0eb201532949cd381cbd35743594
parent aa6ec7470ebfa0320f94e3be107509317d85fef7
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Sat, 19 Nov 2022 15:04:58 -0500
cc/vm/forth: big location reform
I hit a wall during app/uxn implementation where function calls from registers
(that is, function calls from a variable, in this instance, deo() and dei())
which also had arguments (which is why I haven't hit the wall in Collapse OS,
which has no arguments for its ops functions) ended up up losing that call
address in PS and CC wasn't able to get it back.
Fixing this with the old approach to VMOp in the Forth VM would have been too
messy, so I changed it all. See doc/cc/forth.
The end result is simpler than what was there before, but it consumes more PS
space during expression solving.
Although the i386 backend doesn't have this problem, one of the tests I wrote
to reproduce the bug I had in app/uxn accidentally uncovered a i386 bug, which
is fixed here.
Diffstat:
10 files changed, 193 insertions(+), 108 deletions(-)
diff --git a/fs/cc/vm/commonlo.fs b/fs/cc/vm/commonlo.fs
@@ -37,13 +37,11 @@ $02 const VM_STACKFRAME \ on RS at RSP+arg
$03 const VM_ARGSFRAME \ on PS at PSP+arg
$04 const VM_REGISTER \ in an implementation-specific register of id "arg"
$05 const VM_CONSTARRAY \ pointer to an array with the 1st elem being length
-$06 const VM_TOS \ top of PS
\ Below, references to a location (points to X)
$11 const VM_*CONSTANT
$12 const VM_*STACKFRAME
$13 const VM_*ARGSFRAME
$14 const VM_*REGISTER
-$16 const VM_*TOS
struct[ VMOp
sfield loc \ one of the VM_ constants
@@ -73,14 +71,14 @@ struct[ VMOp
: :pop ( 'copy self -- ) dup :noop# 12 move ;
: :&loc dup :locptr? _assert dup :loclo swap to loc ;
: :&op dup :&loc dup type type*lvl+ swap to type ;
+ : :type- dup type type*lvl- swap to type ;
: :*op dup loc case
VM_CONSTANT of = endof
VM_STACKFRAME of = endof
VM_ARGSFRAME of = endof
VM_REGISTER of = endof
- VM_TOS of = endof
_err endcase
- dup loc $10 or over to loc dup type type*lvl- swap to type ;
+ dup loc $10 or over to loc :type- ;
: :*arisz ( self -- n ) type *ariunitsz ;
]struct
diff --git a/fs/cc/vm/forth.fs b/fs/cc/vm/forth.fs
@@ -2,20 +2,10 @@
\ This VM produces slower and bigger code than their native counterpart but has
\ the advantage of working under any architecture.
+\ See doc/cc/forth for implementation details.
?f<< /cc/vm/commonlo.fs
?f<< /lib/wordtbl.fs
-\ How the VM works
-\ Generally, CPUs have registers that we want to use for performance reasons.
-\ With a forth backend, the fastest way to proceed will generally to keep things
-\ on PS. However, tracking PS levels for op1 and op2 is complex and because the
-\ uses of this backend are limited, we opt for the simplest approach: what we
-\ call a "register" in this backend are two 32-bit memory area reserved for op1
-\ and op2. Those memory areas correspond to "oparg", which is the area itself
-\ directly.
-
-\ When accumulating call arguments, we need to keep track of how many we have
-\ and apply the corresponding offset to VM_ARGSFRAME.
0 value psoff
: PS+ CELLSZ to+ psoff ;
: PS- CELLSZ neg to+ psoff ;
@@ -23,66 +13,39 @@
1 of = [compile] 8b endof 2 of = [compile] 16b endof endcase ;
struct+[ VMOp
- : :TOS? :loclo VM_TOS = ;
- \ Re-initialize op, "forgetting" the TOS, abandoning it to PS.
- : :forgetTOS dup :TOS? _assert VMOp :init ;
- : :init dup :TOS? if compile drop PS- then VMOp :init ;
- : :>TOS VM_TOS swap to loc ;
- : :>*TOS VM_*TOS swap to loc ;
- : _ ( loc self -- )
- swap case
- VM_CONSTANT of = arg litn PS+ endof
- VM_STACKFRAME of = arg r', PS+ endof
- VM_ARGSFRAME of = arg psoff + p', PS+ endof
- VM_REGISTER of = 'arg litn compile @ PS+ endof
- VM_TOS of = drop endof \ nothing to do
+ : _compile ( arg loc -- ) \ compile "straight" operands, errors on * ops.
+ case ( arg )
+ VM_CONSTANT of = litn PS+ endof
+ VM_STACKFRAME of = r', PS+ endof
+ VM_ARGSFRAME of = psoff + p', PS+ endof
+ VM_REGISTER of = psoff + p', compile @ PS+ endof
_err endcase ;
- \ Resolve current operand and compile a push to PS as either VM_TOS or
- \ VM_*TOS.
- : :compile& >r
- r@ :locptr? if r@ :loclo r@ _ r> :>*TOS else r@ loc r@ _ r> :>TOS then ;
+ : :compile& dup :locptr? _assert dup arg swap :loclo _compile ;
: :typesz! type typesize sz! ;
- \ Resolve current operand and forces dereferencing. Always yields VM_TOS.
- : :compile ( -- ) >r
- r@ :compile& r@ loc VM_*TOS = if r@ :typesz! compile @ r@ :>TOS then rdrop ;
- : :>reg dup :loclo VM_REGISTER = if drop exit then >r
- r@ :hasop# r@ :locptr? ( f ) r@ :compile& r@ 'arg litn compile ! PS- ( f )
- 4 lshift VM_REGISTER or to r> loc ;
+ \ Dereference PS TOS using this operand's type size
+ : :TOS@ ( self -- ) :typesz! compile @ ;
+ \ Resolve current operand and dereferences it if needed
+ : :compile ( self -- )
+ dup arg over :locptr?
+ if over :loclo _compile :TOS@ else swap loc _compile then ;
+ : :compile$
+ \ special case that happens often: our op is current top of stack. When we
+ \ don't want to keep the op (hence the "$"), then no compiling is necessary.
+ \ just do nothing. It saves a lot of bytecode.
+ dup loc VM_REGISTER = over arg neg psoff = and
+ not if dup :compile then :init ;
+ \ "save" the value currently on PS TOS as a "register" (see doc/cc/forth)
+ : :>reg VM_REGISTER over to loc psoff neg swap to arg ;
\ dereference current operand
: :*op dup :locptr? if dup :compile dup :>reg then VMOp :*op ;
: :*n ( n self -- ) tuck :compile litn compile * :>reg ;
: :/n ( n self -- ) tuck :compile litn compile / :>reg ;
: :+n ( n self -- ) tuck :compile litn compile + :>reg ;
- : :>res
- dup :loclo dup VM_STACKFRAME = swap VM_ARGSFRAME = or
- if :>reg else drop then ;
- : :?>reg dup :TOS? if :>reg else drop then ;
+ : :>res dup :compile :>reg ;
]struct
-\ Verify that we're in "neutral" position with regards to PS
-: neutral# psoff if abort" unbalanced PS" then ;
-
-\ If any of the op is VM_TOS, push it to a register.
-: ?tos>reg vmop :?>reg vmop^ :?>reg ;
-
-\ We override the common :push/:pop mechanism to adjust to the weirness of
-\ the forth backend. We use :push/:pop when 2 ops aren't enough to store all
-\ variables needed to resolve the expression. This happens in nested binops.
-\ The Forth PS is ideally suited for this kind of storage, but it has to be
-\ used carefully, following these rules:
-\ 1. When pushing a TOS, nothing needs to be done.
-\ 2. When pushing a REGISTER, it needs to be pushed to TOS first, and then
-\ yield a TOS oploc. In other words. :push never yields oploc=REGISTER.
-\ 3. When popping, if oploc=TOS, we need to ensure that the "other op" is not
-\ a TOS. If it is, change it into a REGISTER.
-struct+[ VMOp
- : :push ( self -- 'copy )
- dup :loclo VM_REGISTER = if vmop :compile& then VMOp :push ;
- \ In vm=, and binop= we need to pop without the ?tos>reg check
- : :popNoChk ( arg loc&type self -- ) VMOp :pop ;
- : :pop ( 'copy self -- )
- over :loclo VM_TOS = if ?tos>reg then VMOp :pop ;
-]struct
+\ Free elements leaked to PS during the execution of the function
+: neutral# 0 to@! psoff ?dup if p+, then ;
\ generate function prelude code by allocating "locsz" bytes on RS.
: vmprelude, ( argsz locsz -- )
@@ -95,34 +58,42 @@ struct+[ VMOp
argsz >r \ V1=argsz
vmop^ :noop# \ returning with a second operand? something's wrong
vmop loc if
- vmop :compile PS- vmop :forgetTOS argsz ?dup if
+ vmop :compile$ PS- psoff argsz + ?dup if
p', compile ! -4 to+ V1 then then
- r> ( argsz ) ?dup if p+, then
- neutral#
+ r> ( argsz ) 0 to@! psoff + ?dup if p+, then
locsz ?dup if r+, then
exit, ;
-0 value callsz \ length in bytes of the current call args
+\ a stack of :push'ed VMOps that are to be sent as args
+$10 const MAXCALLARGS
+create callargs MAXCALLARGS CELLSZ * allot
+here const _limit
+callargs value _cur
\ Write op to args
-: vmcallarg, ( -- ) vmop :compile vmop :forgetTOS CELLSZ to+ callsz ;
+: vmcallarg, ( -- ) vmop :push to!+ _cur _cur _limit < _assert ;
\ Call the address in current op. If the function has a result, you need to
\ pop it with vmpspop,
: vmcall, ( -- )
- VM_CONSTANT vmop loc = if
+ \ Let's push the args to PS first
+ vmop :push \ we keep call addr for later
+ callargs begin ( 'pushed a )
+ dup _cur < while
+ @+ vmop :pop vmop :compile$ repeat drop
+ vmop :pop VM_CONSTANT vmop loc = if
vmop arg execute, vmop :init
- else vmop :compile vmop :forgetTOS compile execute PS- then
- 0 to@! callsz neg to+ psoff ;
+ else vmop :compile$ compile execute PS- then
+ callargs to@! _cur callargs - ( callsz ) neg to+ psoff ;
\ Allocate a new register for active op and pop 4b from PS into it.
-: vmpspop, vmop :noop# vmop :>TOS PS+ vmop :>reg ;
+: vmpspop, vmop :noop# PS+ vmop :>reg ;
\ Push active op to PS.
-: vmpspush, vmop :compile vmop :forgetTOS PS- ;
+: vmpspush, vmop :compile$ PS- ;
UNOPCNT wordtbl unop
'w neg 'w ^ 'w bool 'w not
-: unop, ( opid -- ) vmop :compile unop swap wexec, ;
+: unop, ( opid -- ) vmop :compile$ unop swap wexec, vmop :>reg ;
\ Signature: a incsz -- n
UNOPMUTCNT >> wordtbl _tbl32
@@ -141,8 +112,8 @@ UNOPMUTCNT >> wordtbl _tbl8
vmop type typesize
case 1 of = _tbl8 endof 2 of = _tbl16 endof _tbl32 endcase ( opid tbl )
over >> wtbl@ ( opid w )
- vmop :&loc vmop :compile
- vmop :*arisz rot 1 and if ( -- ) neg then litn ( w ) execute, ;
+ vmop :compile&
+ vmop :*arisz rot 1 and if ( -- ) neg then litn ( w ) execute, vmop :>reg ;
ARIOPCNT 1+ ( for = ) wordtbl _tbl
'w + 'w - 'w * 'w /
@@ -150,10 +121,8 @@ ARIOPCNT 1+ ( for = ) wordtbl _tbl
'w lshift 'w rshift 'w nip
: _binop, ( opid tbl -- )
- vmop :compile \ op1 is TOS
- vmop^ :hasop# vmop^ :TOS? if compile swap then
- vmop^ :compile vmop^ :forgetTOS
- swap wexec, PS- ; \ result in op1 as VM_TOS
+ vmop :compile vmop^ :compile$
+ swap wexec, PS- vmop :>reg ;
: ariop, ( opid -- ) _tbl _binop, ;
@@ -163,7 +132,7 @@ ARIOPCNT 1+ ( for = ) wordtbl _tbl
: _movarray, \ special case, we have a {1, 2, 3} assign
vmop loc VM_STACKFRAME = _assert
vmop type type*lvl- typesize vmop :compile ( sz ) \ dst on PS
- vmop :forgetTOS vmop^ arg @+ ( sz a len ) >r begin ( sz a )
+ vmop^ arg @+ ( sz a len ) >r begin ( sz a )
@+ litn compile swap over sz! compile !+ next compile drop PS- ( sz a )
2drop vmop^ :init ;
@@ -171,12 +140,11 @@ ARIOPCNT 1+ ( for = ) wordtbl _tbl
\ with the participation of op2.
: assignop, ( opid -- )
vmop^ loc VM_CONSTARRAY = if drop _movarray, exit then
- vmop^ :?>reg vmop type typesize ( opid sz )
- vmop :TOS? if compile dup PS+ then
- vmop :keep vmop :&loc vmop :compile vmop :forgetTOS vmop :popNoChk
- compile dup PS+ dup sz! compile @ vmop^ :compile vmop^ :forgetTOS ( opid sz )
+ vmop type typesize ( opid sz ) vmop :compile&
+ compile dup PS+ dup sz! compile @ vmop^ :compile$ ( opid sz )
swap _tbl swap wexec, PS- ( sz ) \ result on TOS
- compile swap sz! compile ! PS- PS- ;
+ compile dup compile rot sz! compile ! PS- \ result still on TOS
+ vmop :>reg ;
: _s $80000000 + swap $80000000 + swap ;
: _&& bool swap bool and ;
@@ -196,8 +164,12 @@ LOGOPCNT wordtbl _tblunsigned
: ]vmjmp ( 'jump_addr -- ) here swap ! ;
: vmjmp, ( a -- ) [compile] again ;
: vmjmp[, ( -- a ) compile (br) here 4 allot ;
-\ In conditional jumps below, the source of the test in in current op
-: _ vmop :compile vmop :forgetTOS PS- ;
+\ In conditional jumps below, the source of the test is in current op
+\ However, because we don't track "psoff" across branches, we *have* to have a
+\ neutral level before the jump, which means that this flag that we're pushing
+\ on PS *has* to be right after the last argument of the args frame.
+: _ vmop^ :noop# vmop :compile$ PS- 0 to@! psoff ?dup if
+ dup p', compile ! CELLSZ - ?dup if p+, then then ;
: vmjz, ( a -- ) _ [compile] until ;
: vmjz[, ( -- a ) _ [compile] if ;
: vmjnz, ( a -- ) _ compile not [compile] until ;
diff --git a/fs/cc/vm/i386.fs b/fs/cc/vm/i386.fs
@@ -53,7 +53,7 @@ struct+[ VMOp
_err endcase ;
: :compilesz dup :typesz! :compile ;
: :compiletest
- dup :compilesz dup :loclo VM_REGISTER = if
+ dup :compilesz dup loc VM_REGISTER = if
dup :compile else -1 i) then drop test, ;
\ Force current operand to be copied to a register
diff --git a/fs/doc/cc/forth.txt b/fs/doc/cc/forth.txt
@@ -0,0 +1,47 @@
+# Forth VM
+
+The Forth VM doesn't have registers, making the implementation of this C backend
+a bit more... interesting. It has, however, a PS space where each element can be
+addressed by its byte offset relative to current PSP. These addresses are what
+we use to fill the role generally reserved for CPU registers.
+
+That "register" space directly follows the argument frame and is managed in the
+exact same way as VM_ARGSFRAME operands, except that we still use the
+VM_REGISTER location constant for it (if we use VM_ARGSFRAME for registers, we
+end up "missing" an indirection level for reasons that are hard to explain with
+words, but if you think really hard about it, you'll get it).
+
+However, because we keep adding and removing elements from PS, and because our
+addressing scheme is relative to PSP, we need to keep track of our relative PS
+level at all times so that our call to "p'," can have the correct argument.
+
+We do this through a variable we call "psoff" which starts at 0, increases by 4
+bytes every time we compile a word that adds to the stack and decreases by 4
+when we compile a word that removes an element from the stack.
+
+Important note: PSP points to *current* element. When a function starts, PSP+0
+points to the last arguments in the ARGSFRAME.
+
+< mem down mem up >
++-----------------------------------------------+
+| ... | REG1 | REG0 | ARG0 | ARG1 | ... |
++-----------------------------------------------+
+psoff=8 ^ psoff=0 ^
+
+For example, let's say that we begin the function with psoff=0 and that we
+"allocate a register". that will yield a VMOp with loc=VM_REGISTER and arg=-4.
+(args=0 is the last argument for the function). PSP is increased by 4 bytes
+(it's always 4 bytes).
+
+Now, what happens if we want to compile an access to that register? We do
+"psoff+args", which yields 0, which triggers the special case "compile dup".
+We then increase psoff by 4 (we're now at 8). If we're in the middle of an
+operation that will consume this value right away, we let that operation manage
+the value it puts on PS. If that value needs to be kept in a "register", then
+we allocate a new space for it, with "arg=-8".
+
+What do we do with the old value at arg=-4? we leak it to PS. That's a side
+effect of this scheme, which is a tradeoff against generaly simplicity. We clean
+up PS only when we return with vmret, or when "ops$" is called (which is done
+between each statement).
+
diff --git a/fs/tests/cc/cc.fs b/fs/tests/cc/cc.fs
@@ -81,6 +81,7 @@ structop4 globdata 12 + #eq
structop5 42 #eq
structop6 54 #eq
structop7 42 #eq
+12 42 structop8 54 #eq
opwidth1 42 #eq
opwidth2 42 #eq
opwidth3 $129 #eq
diff --git a/fs/tests/cc/lib.fs b/fs/tests/cc/lib.fs
@@ -53,7 +53,6 @@ S" ( ----- %d )" S" ( ----- 000 )" sscanf # 0 #eq
foo #
create myarray 3 , 7 , 8 , 5 , 2 , 1 , 9 , 5 , 4 ,
-myarray dump
myarray 9 qsort
create expected 1 , 2 , 3 , 4 , 5 , 5 , 7 , 8 , 9 ,
myarray expected 9 CELLSZ * []= #
diff --git a/fs/tests/cc/test.c b/fs/tests/cc/test.c
@@ -77,8 +77,9 @@ int subber(int a, int b) {
return a - b;
}
// are arguments, both constants and lvalues, properly passed?
+// do we support expressions as arguments?
int plusone(int x) {
- return adder(1, x);
+ return adder(1, x+x-x);
}
int ptrget() {
int a = 42;
@@ -353,6 +354,17 @@ short structop7() {
return globdataptr->bar;
}
+// in the forth VM, the address of the call would get lost on PS in certain
+// situations, such as this one: a function living in a struct and accessed
+// through a pointer.
+struct StructWithFunc { int (*func)(int, int); };
+int structop8(int a, int b) {
+ StructWithFunc s;
+ StructWithFunc *ptr = &s;
+ s.func = adder;
+ return ptr->func(a, b);
+}
+
// we used to leak VM ops in condition blocks without {}
void cond1() {
int x = 42;
diff --git a/fs/tests/cc/vm.fs b/fs/tests/cc/vm.fs
@@ -73,15 +73,18 @@ ops$
code test5
0 8 vmprelude,
\ foo = 42
- selop2 42 const>op
- selop1 4 sf+>op
- vm=,
+ 42 const>op
+ selop^ 4 sf+>op
+ vm=, ops$
\ bar = &foo
+ 4 sf+>op
vmop :&op
- selop2 0 sf+>op
+ selop^ 0 sf+>op
vmop^ type to vmop type
- vm=,
+ vm=, ops$
\ return *bar
+ 0 sf+>op
+ TYPE_INT* to vmop type
vmop :*op
vmret,
test5 42 #eq
@@ -93,8 +96,9 @@ code test6
\ foo = 42
selop2 42 const>op
selop1 4 sf+>op
- vm=,
+ vm=, ops$
\ bar = &foo
+ 4 sf+>op
vmop :&op
selop2 0 sf+>op
vmop^ type to vmop type
@@ -126,6 +130,7 @@ code test8
selop2 42 const>op
selop1 0 sf+>op
vm=,
+ ops$ 0 sf+>op
vm++op, \ the inc must happen directly in SF+0
ops$
selop1 0 sf+>op
@@ -137,11 +142,12 @@ ops$
code test9
0 4 vmprelude,
\ foo = 42
- selop2 42 const>op
- selop1 0 sf+>op
- vm=,
- vmop++, \ inc SF+0 and yield the old value to op1
- selop2 0 sf+>op
+ 42 const>op
+ selop^ 0 sf+>op
+ vm=, ops$
+ 0 sf+>op
+ vmop++, \ inc SF+0 and yield the old value to vmop
+ selop^ 0 sf+>op
vm+,
vmret,
test9 85 #eq
@@ -177,7 +183,7 @@ code test11 ( n -- n-42 )
vmpspop,
vmret,
54 test11 12 #eq
-
+
\ variable op width
ops$
here ," hello" ( a )
@@ -211,4 +217,46 @@ code test13 ( -- n )
vmret,
test13 85 #eq
+\ a rewrite of ptrset() from test.c for more precise VM testing
+code test14 ( -- n )
+ 0 8 vmprelude, ops$
+ 42 const>op
+ selop^ 0 sf+>op
+ vm=, ops$ \ a = 42
+ 0 sf+>op vmop :&op \ &a
+ selop^ 4 sf+>op
+ TYPE_INT* to vmop type
+ vm=, ops$ \ int *b = &a;
+ 54 const>op
+ selop^ 4 sf+>op
+ TYPE_INT* to vmop type
+ vmop :*op
+ vm=, ops$ \ *b = 54
+ 0 sf+>op
+ vmret, \ return a
+test14 54 #eq
+
+\ Branching with intermediate results caused PS leaks in the forth VM
+create myarray 1 , 2 , 3 , 0 ,
+\ Equivalent: int i = 0; int *b = myarray; do ++i; while (*(b++)); return i;
+code test15 ( -- n )
+ 0 8 vmprelude, ops$
+ 0 const>op
+ selop^ 0 sf+>op
+ vm=, ops$ \ i = 5
+ myarray const>op
+ selop^ 4 sf+>op
+ vm=, ops$ \ b = &myarray
+ here
+ 0 sf+>op
+ vm++op, ops$
+ 4 sf+>op
+ TYPE_INT* to vmop type
+ vmop++,
+ vmop :*op
+ vmjnz, ops$ \ PS must be empty
+ 0 sf+>op
+ vmret,
+test15 4 #eq
+scnt 0 #eq
testend
diff --git a/fs/tests/lib/all.fs b/fs/tests/lib/all.fs
@@ -2,6 +2,10 @@
f<< /tests/lib/core.fs
f<< /tests/lib/bit.fs
f<< /tests/lib/str.fs
+\ TODO: something is fishy. whenever I comment that line below to more precisely
+\ test CC vm code, the test suite fails under i386 (only) during vm.fs code at
+\ test1 with a stack underflow. I don't want to track that now, but I should at
+\ some point.
f<< /tests/lib/crc.fs
f<< /tests/lib/meta.fs
f<< /tests/lib/arena.fs
diff --git a/posix/dis.c b/posix/dis.c
@@ -143,7 +143,7 @@ struct call {
int arg;
};
-#define CALLCNT 24
+#define CALLCNT 28
struct call calls[CALLCNT] = {
{0x5f, "execute", ARGNONE},
{0x9e, "(br)", ARGINT},
@@ -151,6 +151,8 @@ struct call calls[CALLCNT] = {
{0x11a, "dup", ARGNONE},
{0x138, "swap", ARGNONE},
{0x147, "over", ARGNONE},
+ {0x155, "rot", ARGNONE},
+ {0x172, "nip", ARGNONE},
{0x1c7, "@", ARGNONE},
{0x1cf, "16b @", ARGNONE},
{0x1d7, "8b @", ARGNONE},
@@ -163,12 +165,14 @@ struct call calls[CALLCNT] = {
{0x355, "1+", ARGNONE},
{0x36e, "+", ARGNONE},
{0x37a, "-", ARGNONE},
+ {0x386, "*", ARGNONE},
{0x3a3, "and", ARGNONE},
{0x3ec, "not", ARGNONE},
{0x3f8, "<", ARGNONE},
{0x436, "rshift", ARGNONE},
{0xc18, "drop", ARGNONE},
- {0xd38, ">", ARGNONE}
+ {0xd38, ">", ARGNONE},
+ {0xde0, "/", ARGNONE}
};
static int offset = 0;