commit ab86276371d59e7dcba011cb62c31b822b07c454
parent 2ef45a52938441b29214fcc8803f46b2ae39007c
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Mon, 29 May 2023 15:29:01 -0400
rpi: change call conventions
See "pushret, and popret," section in doc/hal.
Diffstat:
4 files changed, 110 insertions(+), 72 deletions(-)
diff --git a/fs/asm/arm.fs b/fs/asm/arm.fs
@@ -82,13 +82,10 @@ $90 al) const mul)
\ Macros
-: lret, ( -- ) mov) rPC rd) rLR rm) ,) ;
: push, ( r -- ) str) swap rd) rSP rn) CELLSZ -i) pre) !) ,) ;
: pop, ( r -- ) ldr) swap rd) rSP rn) CELLSZ +i) post) ,) ;
: ppush, ( r -- ) str) swap rd) rPSP rn) CELLSZ -i) pre) !) ,) ;
: ppop, ( r -- ) ldr) swap rd) rPSP rn) CELLSZ +i) post) ,) ;
-: call, ( rel -- ) rLR push, CELLSZ - bl) ,) rLR pop, ;
-: abscall, ( tgt -- ) abs>rel call, ;
\ HAL
diff --git a/fs/doc/hal.txt b/fs/doc/hal.txt
@@ -176,6 +176,41 @@ All addresses passed to branching words are absolute addresses. If the native
instructions use relative branching addressing, the HAL takes care of the
translation.
+## pushret, and popret,
+
+In Dusk, "Call" means "Push the address of the instruction following the current
+one to RSP, and then jump to the address being called". "Return" means "Pop RSP
+and jump to that address".
+
+On "traditional" CPU architectures, this maps exactly to the behavior of the
+native "call" and "return" instructions, so we can live a happy life of
+blissful ignorance when using these CPUs.
+
+On some CPUs such as ARM, the native "call" model is to save the address we'll
+want to return to to a register and leave the task of push/popping to RSP to the
+programmer.
+
+Of course, one thing we could do is to simply wrap all calls and returns in Dusk
+into RSP push/pop operation, but that would squander a wonderful speedup
+opportunity: With such an approach to calling, we can avoid one push and one pop
+on each "leaf" routine call, that is, on each call to a routine that doesn't
+call any other routine. That adds up to quite a lot of pushes and pops.
+
+To grab this opportunity, the HAL has two words: pushret, and popret,
+
+On "traditional" CPUs, these are noops. On ARM, these words push and pop the
+return address register to and from RSP.
+
+Words defined through "high level" mechanism such as ":" call those words
+automatically, no need to worry. However, words created with "code" don't.
+
+This means that if you create such a word and that this word calls another word,
+it needs to call "pushret," as a prelude and to call "popret," before it
+returns. Leaf words don't need to do that, which makes them faster.
+
+NOTE: the ARM port is currently being written, so pushret, and popret, hasn't
+been added everywhere it needs to yet.
+
## Low HAL
Operand words:
@@ -214,6 +249,10 @@ branch! tgtaddr braddr --
branching.
branchA, --
Branch to the address held in the A register.
+pushret, --
+ Push the current return address to RSP (on relevant CPUs)
+popret, --
+ Pop RSP in return address register (on relevant CPUs)
Instructions:
diff --git a/fs/xcomp/arm/rpi/kernel.fs b/fs/xcomp/arm/rpi/kernel.fs
@@ -21,8 +21,12 @@ $44 const UART0_ICR
: xdrop, rTOP ppop, ;
: xgrow, sub) rPSP rdn) CELLSZ i) ,) ;
: xdup, rTOP ppush, ;
-: wcall, xwordlbl abscall, ;
-: wjmp, xwordlbl abs>rel b) ,) ;
+: pushret, rLR push, ;
+: popret, rLR pop, ;
+: exit, rLR bx) ,) ;
+: execute, abs>rel bl) ,) ;
+: wcall, xwordlbl execute, ;
+: wjmp, xwordlbl abs>rel b) ,) ; \ only for leaf words!
: delay, ( ncycles -- )
mov) r0 rd) swap ( ncycles ) i) ,)
@@ -41,13 +45,13 @@ $44 const UART0_ICR
rot mov) over rd) rot i) ,) ( n1 r )
add) swap rdn) swap i) ,) ;
-: xaddr ( lbl -- ) xcode xdup, rTOP pc>reg, lret, ;
-: xconst ( n -- ) pc swap le, xcode xdup, rTOP pc@>reg, lret, ;
+: xaddr ( lbl -- ) xcode xdup, rTOP pc>reg, exit, ;
+: xconst ( n -- ) pc swap le, xcode xdup, rTOP pc@>reg, exit, ;
: values ( n -- ) for 0 value next ;
-13 values lblcurword lblsysdict lblhere lblnextmeta lblcompiling lblbootptr
+12 values lblcurword lblsysdict lblhere lblnextmeta lblcompiling lblbootptr
lblhbank
- lblcwrite lbldwrite lblwriterange lblrel lblerrmsg lblmain
+ lblcwrite lbldwrite lblwriterange lblerrmsg lblmain
$8000 to binstart
binstart const RSTOP
RSTOP $1000 - const PSTOP
@@ -61,14 +65,14 @@ HERESTART xconst herestart
pc to lblcompiling 0 le,
xcode compiling
xdup, lblcompiling rTOP pc@>reg,
- lret,
+ exit,
xcode [ ximm
mov) r0 rd) 0 i) ,)
pc
lblcompiling rTOP pc>reg,
str) r0 rd) rTOP rn) ,)
- lret,
+ exit,
xcode ]
mov) r0 rd) 1 i) ,)
@@ -88,7 +92,7 @@ xcode bye 0 b) ,)
pc to L1 \ fail
mov) rTOP rd) 0 i) ,)
- lret,
+ exit,
\ parse char
pc to L2 \ rTOP=a-with-'-skipped r0=u
@@ -101,7 +105,7 @@ pc to L2 \ rTOP=a-with-'-skipped r0=u
and) r0 rdn) $ff i) ,)
r0 ppush,
mov) rTOP rd) 1 i) ,)
- lret,
+ exit,
\ parse hexadecimal
pc to L3 \ rTOP=a-with-$-skipped r0=u
@@ -127,7 +131,7 @@ L4 forward! \ parse ok
( loop ) abs>rel b) nz) ,)
r2 ppush,
mov) rTOP rd) 1 i) ,)
- lret,
+ exit,
\ parse unsigned decimal
pc to L4 \ rTOP=a+1 r1=first-char r0=u
@@ -151,7 +155,7 @@ pc \ loop
rsb) z) r2 rdn) 0 i) ,) \ negate
r2 ppush,
mov) rTOP rd) 1 i) ,)
- lret,
+ exit,
xcode parse ( str -- n? f )
ldr) r0 rd) rTOP rn) 8b) 1 +i) post) ,) \ rTOP=a r0=u
@@ -170,7 +174,7 @@ xcode emit ( c -- )
( pc ) abs>rel b) ne) ,)
str) rTOP rd) r7 rn) UART0_DR +i) ,)
xdrop,
- lret,
+ exit,
xcode key ( -- c )
pc
@@ -179,9 +183,9 @@ xcode key ( -- c )
( pc ) abs>rel b) ne) ,)
xdup,
ldr) rTOP rd) r7 rn) UART0_DR +i) 8b) ,)
- lret,
+ exit,
-xcode rtype ( a u -- )
+xcode rtype pushret, ( a u -- )
r1 ppop, mov) r2 rd) rTOP rm) ,) \ r1=a r2=u
pc
ldr) rTOP rd) r1 rn) 8b) 1 +i) post) ,)
@@ -190,7 +194,7 @@ xcode rtype ( a u -- )
sub) r2 rdn) 1 i) f) ,)
( pc ) abs>rel b) ne) ,)
xdrop,
- lret,
+ popret, exit,
xcode stype ( str -- )
ldr) r0 rd) rTOP rn) 8b) 1 +i) post) ,)
@@ -211,7 +215,7 @@ xcode boot<
xwordlbl key abs>rel b) eq) ,) \ until we have a proper "realias"
xdup, ldr) rTOP rd) r1 rn) 8b) 1 +i) post) ,)
str) r1 rd) r0 rn) ,)
- lret,
+ exit,
xcode in< wjmp, boot<
@@ -219,12 +223,12 @@ xcode interactive! \ until we have a proper "realias"
lblbootptr r0 pc>reg,
mov) r1 rd) 0 i) ,)
str) r1 rd) r0 rn) ,)
- lret,
+ exit,
pc to lblcurword $20 allot0
lblcurword xaddr curword
-xcode word ( -- str )
+xcode word pushret, ( -- str )
xdup,
pc
wcall, in<
@@ -242,7 +246,7 @@ xcode word ( -- str )
( pc ) abs>rel b) hi) ,)
lblcurword rTOP pc>reg,
str) r6 rd) rTOP rn) 8b) ,)
- lret,
+ popret, exit,
pc ," word not found" 0 align4
xcode (wnf)
@@ -300,13 +304,13 @@ pc \ loop2
( loop2 ) abs>rel b) ne) ,)
\ same contents
add) rTOP rdn) 4 i) ,) \ e>w
- lret,
+ exit,
L2 forward! L1 forward! \ not matching, try next
ldr) rTOP rdn) 0 +i) ,)
cmp) rTOP rn) 0 i) ,)
( loop1 ) abs>rel b) ne) ,)
\ not found
- lret,
+ exit,
pc to lblhere HERESTART le,
lblhere xaddr here
@@ -316,14 +320,14 @@ pc to lblcwrite \ r0=char
ldr) r1 rd) r2 rn) ,)
str) r0 rd) r1 rn) 8b) 1 +i) post) ,)
str) r1 rd) r2 rn) ,)
- lret,
+ exit,
pc to lbldwrite \ r0=n
lblhere r2 pc>reg,
ldr) r1 rd) r2 rn) ,)
str) r0 rd) r1 rn) 4 +i) post) ,)
str) r1 rd) r2 rn) ,)
- lret,
+ exit,
pc to lblwriterange \ r0=addr r1=len
lblhere r2 pc>reg,
@@ -334,15 +338,7 @@ pc to lblwriterange \ r0=addr r1=len
sub) r1 rdn) 1 i) f) ,)
( pc ) abs>rel b) ne) ,)
str) r3 rd) r2 rn) ,)
- lret,
-
-pc to lblrel \ r0=abs addr -- r0=rel offset | preserves r6
- lblhere r1 pc@>reg,
- sub) r0 rdn) r1 rm) ,)
- mov) r0 rd) r0 rm) 2 lsr) ,)
- sub) r0 rdn) 2 i) ,)
- bic) r0 rn) $ff000000 i) ,)
- lret,
+ exit,
xcode align4 ( n -- )
lblhere r0 pc>reg,
@@ -352,34 +348,35 @@ xcode align4 ( n -- )
sub) ne) r1 rdn) r2 rm) ,)
add) ne) r1 rdn) 4 i) ,)
str) ne) r1 rd) r0 rn) ,)
- xdrop, lret,
+ xdrop, exit,
pc to lblnextmeta 0 le,
lblnextmeta xaddr nextmeta
-xcode entry ( 'dict s -- )
+xcode entry pushret, ( 'dict s -- )
mov) r6 rd) rTOP rm) ,)
ldr) r5 rd) r6 rn) 8b) 1 +i) post) ,) \ r7=a r6=len
add) rTOP rd) r5 rn) 1 i) ,) \ rTOP=len+1
wcall, align4 \ rTOP='dict
mov) r0 rd) r6 rm) ,)
mov) r1 rd) r5 rm) ,)
- lblwriterange abscall,
+ lblwriterange execute,
mov) r0 rd) r5 rm) ,)
- lblcwrite abscall,
+ lblcwrite execute,
lblnextmeta r0 pc>reg,
ldr) r0 rdn) ,)
- lbldwrite abscall,
+ lbldwrite execute,
ldr) r0 rd) rTOP rn) ,) \ r0=dict
lblhere r1 pc>reg,
ldr) r1 rdn) ,)
str) r1 rd) rTOP rn) ,) \ "here" is new sysdict
+ popret,
lbldwrite abs>rel b) ,)
-xcode code
+xcode code pushret,
wcall, sysdict
wcall, word
- wjmp, entry
+ popret, wjmp, entry
\ HAL operands
HALBASE rTOP + xconst W)
@@ -395,26 +392,26 @@ xcode m) ( a -- operand )
lblhbank r0 pc>reg,
str) rTOP rd) r0 rn) ,)
( pc ) rTOP pc@>reg,
- lret,
+ exit,
xcode +) ( operand n -- operand )
lblhbank r0 pc>reg,
str) rTOP rd) r0 rn) ,)
xdrop,
- lret,
+ exit,
xcode 8b) ( operand -- operand )
orr) rTOP rdn) $00400000 i) ,)
- lret,
+ exit,
xcode 16b) ( operand -- operand )
bic) rTOP rdn) $04000000 i) ,)
- lret,
+ exit,
xcode 32b) ( operand -- operand )
bic) rTOP rdn) $00400000 i) ,)
orr) rTOP rdn) $04000000 i) ,)
- lret,
+ exit,
\ HAL operations
\ r0 is used as the immediate accumulator
@@ -438,22 +435,26 @@ xcode ps+, ( n -- )
( pc ) r0 pc@>reg,
L1 abs>rel b) ,)
-pc lret,
+pc pushret,
+xcode pushret,
+ ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,)
+
+pc popret,
+xcode popret,
+ ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,)
+
+pc exit,
xcode exit,
- ( pc ) r0 pc@>reg,
- lbldwrite abs>rel b) ,)
+ ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,)
-pc rLR push, rLR pop,
xcode execute, ( w -- )
- ( pc ) r6 pc>reg,
- ldr) r0 rd) r6 rn) 4 +i) post) ,)
- lbldwrite abscall,
- mov) r0 rd) rTOP rm) ,)
- xdrop,
- lblrel abscall, \ r0=offset
+ lblhere r1 pc@>reg,
+ sub) r0 rd) rTOP rn) r1 rm) ,)
+ mov) r0 rd) r0 rm) 2 lsr) ,)
+ sub) r0 rdn) 2 i) ,)
+ bic) r0 rn) $ff000000 i) ,)
orr) r0 rdn) $eb000000 i) ,)
- lbldwrite abscall,
- ldr) r0 rd) r6 rn) ,)
+ xdrop,
lbldwrite abs>rel b) ,)
pc xdup,
@@ -461,36 +462,37 @@ xcode dup,
( pc ) r0 pc@>reg,
lbldwrite abs>rel b) ,)
-xcode ; ximm
+xcode ; ximm pushret,
+ wcall, popret,
wcall, exit,
- wjmp, [
+ popret, wjmp, [
-pc to L2 ( str -- w ) \ find in sys dict
+pc to L2 pushret, ( str -- w ) \ find in sys dict
wcall, curword
wcall, sysdict
wcall, find
teq) rTOP rn) 0 i) ,)
xwordlbl (wnf) abs>rel b) eq) ,)
- lret,
+ popret, exit,
xcode compword ( str -- )
- L2 abscall,
+ pushret, L2 execute, popret,
ldr) r0 rd) rTOP rn) 8b) 9 -i) ,)
tst) r0 rn) $80 i) ,)
xwordlbl execute abs>rel b) ne) ,) \ immediate? execute
\ compile word
wjmp, execute,
+pc lblcompiling le,
xcode runword ( str -- )
- wcall, compiling
- cmp) rTOP rn) 0 i) ,)
- xdrop,
+ ( pc ) r0 pc@>reg, ldr) r0 rdn) ,)
+ cmp) r0 rn) 0 i) ,)
xwordlbl compword abs>rel b) ne) ,)
- wcall, parse
+ pushret, wcall, parse popret,
cmp) rTOP rn) 0 i) ,)
xdrop,
mov) ne) rPC rd) rLR rm) ,) \ literal: nothing to do
- L2 abscall,
+ pushret, L2 execute, popret,
wjmp, execute
xcode uartinit
@@ -527,7 +529,7 @@ xcode uartinit
\ Enable UART0, receive & transfer part of UART.
mov) r1 rd) $300 i) ,) add) r1 rd) r1 rn) $01 i) ,)
str) r1 rd) r7 rn) UART0_CR +i) ,)
- lret,
+ exit,
xcode main pc w>e lblsysdict pc>addr le!
lblmain forward!
diff --git a/fs/xcomp/rpiboot.fs b/fs/xcomp/rpiboot.fs
@@ -1,4 +1,4 @@
-code : ] code ] ;
+code : pushret, ] code ] ;
code hey 42 ps+, exit,
code ho -12 ps+, exit,
uartinit prompt interactive!