duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit ab86276371d59e7dcba011cb62c31b822b07c454
parent 2ef45a52938441b29214fcc8803f46b2ae39007c
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Mon, 29 May 2023 15:29:01 -0400

rpi: change call conventions

See "pushret, and popret," section in doc/hal.

Diffstat:
Mfs/asm/arm.fs | 3---
Mfs/doc/hal.txt | 39+++++++++++++++++++++++++++++++++++++++
Mfs/xcomp/arm/rpi/kernel.fs | 138++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mfs/xcomp/rpiboot.fs | 2+-
4 files changed, 110 insertions(+), 72 deletions(-)

diff --git a/fs/asm/arm.fs b/fs/asm/arm.fs @@ -82,13 +82,10 @@ $90 al) const mul) \ Macros -: lret, ( -- ) mov) rPC rd) rLR rm) ,) ; : push, ( r -- ) str) swap rd) rSP rn) CELLSZ -i) pre) !) ,) ; : pop, ( r -- ) ldr) swap rd) rSP rn) CELLSZ +i) post) ,) ; : ppush, ( r -- ) str) swap rd) rPSP rn) CELLSZ -i) pre) !) ,) ; : ppop, ( r -- ) ldr) swap rd) rPSP rn) CELLSZ +i) post) ,) ; -: call, ( rel -- ) rLR push, CELLSZ - bl) ,) rLR pop, ; -: abscall, ( tgt -- ) abs>rel call, ; \ HAL diff --git a/fs/doc/hal.txt b/fs/doc/hal.txt @@ -176,6 +176,41 @@ All addresses passed to branching words are absolute addresses. If the native instructions use relative branching addressing, the HAL takes care of the translation. +## pushret, and popret, + +In Dusk, "Call" means "Push the address of the instruction following the current +one to RSP, and then jump to the address being called". "Return" means "Pop RSP +and jump to that address". + +On "traditional" CPU architectures, this maps exactly to the behavior of the +native "call" and "return" instructions, so we can live a happy life of +blissful ignorance when using these CPUs. + +On some CPUs such as ARM, the native "call" model is to save the address we'll +want to return to to a register and leave the task of push/popping to RSP to the +programmer. + +Of course, one thing we could do is to simply wrap all calls and returns in Dusk +into RSP push/pop operation, but that would squander a wonderful speedup +opportunity: With such an approach to calling, we can avoid one push and one pop +on each "leaf" routine call, that is, on each call to a routine that doesn't +call any other routine. That adds up to quite a lot of pushes and pops. + +To grab this opportunity, the HAL has two words: pushret, and popret, + +On "traditional" CPUs, these are noops. On ARM, these words push and pop the +return address register to and from RSP. + +Words defined through "high level" mechanism such as ":" call those words +automatically, no need to worry. However, words created with "code" don't. + +This means that if you create such a word and that this word calls another word, +it needs to call "pushret," as a prelude and to call "popret," before it +returns. Leaf words don't need to do that, which makes them faster. + +NOTE: the ARM port is currently being written, so pushret, and popret, hasn't +been added everywhere it needs to yet. + ## Low HAL Operand words: @@ -214,6 +249,10 @@ branch! tgtaddr braddr -- branching. branchA, -- Branch to the address held in the A register. +pushret, -- + Push the current return address to RSP (on relevant CPUs) +popret, -- + Pop RSP in return address register (on relevant CPUs) Instructions: diff --git a/fs/xcomp/arm/rpi/kernel.fs b/fs/xcomp/arm/rpi/kernel.fs @@ -21,8 +21,12 @@ $44 const UART0_ICR : xdrop, rTOP ppop, ; : xgrow, sub) rPSP rdn) CELLSZ i) ,) ; : xdup, rTOP ppush, ; -: wcall, xwordlbl abscall, ; -: wjmp, xwordlbl abs>rel b) ,) ; +: pushret, rLR push, ; +: popret, rLR pop, ; +: exit, rLR bx) ,) ; +: execute, abs>rel bl) ,) ; +: wcall, xwordlbl execute, ; +: wjmp, xwordlbl abs>rel b) ,) ; \ only for leaf words! : delay, ( ncycles -- ) mov) r0 rd) swap ( ncycles ) i) ,) @@ -41,13 +45,13 @@ $44 const UART0_ICR rot mov) over rd) rot i) ,) ( n1 r ) add) swap rdn) swap i) ,) ; -: xaddr ( lbl -- ) xcode xdup, rTOP pc>reg, lret, ; -: xconst ( n -- ) pc swap le, xcode xdup, rTOP pc@>reg, lret, ; +: xaddr ( lbl -- ) xcode xdup, rTOP pc>reg, exit, ; +: xconst ( n -- ) pc swap le, xcode xdup, rTOP pc@>reg, exit, ; : values ( n -- ) for 0 value next ; -13 values lblcurword lblsysdict lblhere lblnextmeta lblcompiling lblbootptr +12 values lblcurword lblsysdict lblhere lblnextmeta lblcompiling lblbootptr lblhbank - lblcwrite lbldwrite lblwriterange lblrel lblerrmsg lblmain + lblcwrite lbldwrite lblwriterange lblerrmsg lblmain $8000 to binstart binstart const RSTOP RSTOP $1000 - const PSTOP @@ -61,14 +65,14 @@ HERESTART xconst herestart pc to lblcompiling 0 le, xcode compiling xdup, lblcompiling rTOP pc@>reg, - lret, + exit, xcode [ ximm mov) r0 rd) 0 i) ,) pc lblcompiling rTOP pc>reg, str) r0 rd) rTOP rn) ,) - lret, + exit, xcode ] mov) r0 rd) 1 i) ,) @@ -88,7 +92,7 @@ xcode bye 0 b) ,) pc to L1 \ fail mov) rTOP rd) 0 i) ,) - lret, + exit, \ parse char pc to L2 \ rTOP=a-with-'-skipped r0=u @@ -101,7 +105,7 @@ pc to L2 \ rTOP=a-with-'-skipped r0=u and) r0 rdn) $ff i) ,) r0 ppush, mov) rTOP rd) 1 i) ,) - lret, + exit, \ parse hexadecimal pc to L3 \ rTOP=a-with-$-skipped r0=u @@ -127,7 +131,7 @@ L4 forward! \ parse ok ( loop ) abs>rel b) nz) ,) r2 ppush, mov) rTOP rd) 1 i) ,) - lret, + exit, \ parse unsigned decimal pc to L4 \ rTOP=a+1 r1=first-char r0=u @@ -151,7 +155,7 @@ pc \ loop rsb) z) r2 rdn) 0 i) ,) \ negate r2 ppush, mov) rTOP rd) 1 i) ,) - lret, + exit, xcode parse ( str -- n? f ) ldr) r0 rd) rTOP rn) 8b) 1 +i) post) ,) \ rTOP=a r0=u @@ -170,7 +174,7 @@ xcode emit ( c -- ) ( pc ) abs>rel b) ne) ,) str) rTOP rd) r7 rn) UART0_DR +i) ,) xdrop, - lret, + exit, xcode key ( -- c ) pc @@ -179,9 +183,9 @@ xcode key ( -- c ) ( pc ) abs>rel b) ne) ,) xdup, ldr) rTOP rd) r7 rn) UART0_DR +i) 8b) ,) - lret, + exit, -xcode rtype ( a u -- ) +xcode rtype pushret, ( a u -- ) r1 ppop, mov) r2 rd) rTOP rm) ,) \ r1=a r2=u pc ldr) rTOP rd) r1 rn) 8b) 1 +i) post) ,) @@ -190,7 +194,7 @@ xcode rtype ( a u -- ) sub) r2 rdn) 1 i) f) ,) ( pc ) abs>rel b) ne) ,) xdrop, - lret, + popret, exit, xcode stype ( str -- ) ldr) r0 rd) rTOP rn) 8b) 1 +i) post) ,) @@ -211,7 +215,7 @@ xcode boot< xwordlbl key abs>rel b) eq) ,) \ until we have a proper "realias" xdup, ldr) rTOP rd) r1 rn) 8b) 1 +i) post) ,) str) r1 rd) r0 rn) ,) - lret, + exit, xcode in< wjmp, boot< @@ -219,12 +223,12 @@ xcode interactive! \ until we have a proper "realias" lblbootptr r0 pc>reg, mov) r1 rd) 0 i) ,) str) r1 rd) r0 rn) ,) - lret, + exit, pc to lblcurword $20 allot0 lblcurword xaddr curword -xcode word ( -- str ) +xcode word pushret, ( -- str ) xdup, pc wcall, in< @@ -242,7 +246,7 @@ xcode word ( -- str ) ( pc ) abs>rel b) hi) ,) lblcurword rTOP pc>reg, str) r6 rd) rTOP rn) 8b) ,) - lret, + popret, exit, pc ," word not found" 0 align4 xcode (wnf) @@ -300,13 +304,13 @@ pc \ loop2 ( loop2 ) abs>rel b) ne) ,) \ same contents add) rTOP rdn) 4 i) ,) \ e>w - lret, + exit, L2 forward! L1 forward! \ not matching, try next ldr) rTOP rdn) 0 +i) ,) cmp) rTOP rn) 0 i) ,) ( loop1 ) abs>rel b) ne) ,) \ not found - lret, + exit, pc to lblhere HERESTART le, lblhere xaddr here @@ -316,14 +320,14 @@ pc to lblcwrite \ r0=char ldr) r1 rd) r2 rn) ,) str) r0 rd) r1 rn) 8b) 1 +i) post) ,) str) r1 rd) r2 rn) ,) - lret, + exit, pc to lbldwrite \ r0=n lblhere r2 pc>reg, ldr) r1 rd) r2 rn) ,) str) r0 rd) r1 rn) 4 +i) post) ,) str) r1 rd) r2 rn) ,) - lret, + exit, pc to lblwriterange \ r0=addr r1=len lblhere r2 pc>reg, @@ -334,15 +338,7 @@ pc to lblwriterange \ r0=addr r1=len sub) r1 rdn) 1 i) f) ,) ( pc ) abs>rel b) ne) ,) str) r3 rd) r2 rn) ,) - lret, - -pc to lblrel \ r0=abs addr -- r0=rel offset | preserves r6 - lblhere r1 pc@>reg, - sub) r0 rdn) r1 rm) ,) - mov) r0 rd) r0 rm) 2 lsr) ,) - sub) r0 rdn) 2 i) ,) - bic) r0 rn) $ff000000 i) ,) - lret, + exit, xcode align4 ( n -- ) lblhere r0 pc>reg, @@ -352,34 +348,35 @@ xcode align4 ( n -- ) sub) ne) r1 rdn) r2 rm) ,) add) ne) r1 rdn) 4 i) ,) str) ne) r1 rd) r0 rn) ,) - xdrop, lret, + xdrop, exit, pc to lblnextmeta 0 le, lblnextmeta xaddr nextmeta -xcode entry ( 'dict s -- ) +xcode entry pushret, ( 'dict s -- ) mov) r6 rd) rTOP rm) ,) ldr) r5 rd) r6 rn) 8b) 1 +i) post) ,) \ r7=a r6=len add) rTOP rd) r5 rn) 1 i) ,) \ rTOP=len+1 wcall, align4 \ rTOP='dict mov) r0 rd) r6 rm) ,) mov) r1 rd) r5 rm) ,) - lblwriterange abscall, + lblwriterange execute, mov) r0 rd) r5 rm) ,) - lblcwrite abscall, + lblcwrite execute, lblnextmeta r0 pc>reg, ldr) r0 rdn) ,) - lbldwrite abscall, + lbldwrite execute, ldr) r0 rd) rTOP rn) ,) \ r0=dict lblhere r1 pc>reg, ldr) r1 rdn) ,) str) r1 rd) rTOP rn) ,) \ "here" is new sysdict + popret, lbldwrite abs>rel b) ,) -xcode code +xcode code pushret, wcall, sysdict wcall, word - wjmp, entry + popret, wjmp, entry \ HAL operands HALBASE rTOP + xconst W) @@ -395,26 +392,26 @@ xcode m) ( a -- operand ) lblhbank r0 pc>reg, str) rTOP rd) r0 rn) ,) ( pc ) rTOP pc@>reg, - lret, + exit, xcode +) ( operand n -- operand ) lblhbank r0 pc>reg, str) rTOP rd) r0 rn) ,) xdrop, - lret, + exit, xcode 8b) ( operand -- operand ) orr) rTOP rdn) $00400000 i) ,) - lret, + exit, xcode 16b) ( operand -- operand ) bic) rTOP rdn) $04000000 i) ,) - lret, + exit, xcode 32b) ( operand -- operand ) bic) rTOP rdn) $00400000 i) ,) orr) rTOP rdn) $04000000 i) ,) - lret, + exit, \ HAL operations \ r0 is used as the immediate accumulator @@ -438,22 +435,26 @@ xcode ps+, ( n -- ) ( pc ) r0 pc@>reg, L1 abs>rel b) ,) -pc lret, +pc pushret, +xcode pushret, + ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,) + +pc popret, +xcode popret, + ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,) + +pc exit, xcode exit, - ( pc ) r0 pc@>reg, - lbldwrite abs>rel b) ,) + ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,) -pc rLR push, rLR pop, xcode execute, ( w -- ) - ( pc ) r6 pc>reg, - ldr) r0 rd) r6 rn) 4 +i) post) ,) - lbldwrite abscall, - mov) r0 rd) rTOP rm) ,) - xdrop, - lblrel abscall, \ r0=offset + lblhere r1 pc@>reg, + sub) r0 rd) rTOP rn) r1 rm) ,) + mov) r0 rd) r0 rm) 2 lsr) ,) + sub) r0 rdn) 2 i) ,) + bic) r0 rn) $ff000000 i) ,) orr) r0 rdn) $eb000000 i) ,) - lbldwrite abscall, - ldr) r0 rd) r6 rn) ,) + xdrop, lbldwrite abs>rel b) ,) pc xdup, @@ -461,36 +462,37 @@ xcode dup, ( pc ) r0 pc@>reg, lbldwrite abs>rel b) ,) -xcode ; ximm +xcode ; ximm pushret, + wcall, popret, wcall, exit, - wjmp, [ + popret, wjmp, [ -pc to L2 ( str -- w ) \ find in sys dict +pc to L2 pushret, ( str -- w ) \ find in sys dict wcall, curword wcall, sysdict wcall, find teq) rTOP rn) 0 i) ,) xwordlbl (wnf) abs>rel b) eq) ,) - lret, + popret, exit, xcode compword ( str -- ) - L2 abscall, + pushret, L2 execute, popret, ldr) r0 rd) rTOP rn) 8b) 9 -i) ,) tst) r0 rn) $80 i) ,) xwordlbl execute abs>rel b) ne) ,) \ immediate? execute \ compile word wjmp, execute, +pc lblcompiling le, xcode runword ( str -- ) - wcall, compiling - cmp) rTOP rn) 0 i) ,) - xdrop, + ( pc ) r0 pc@>reg, ldr) r0 rdn) ,) + cmp) r0 rn) 0 i) ,) xwordlbl compword abs>rel b) ne) ,) - wcall, parse + pushret, wcall, parse popret, cmp) rTOP rn) 0 i) ,) xdrop, mov) ne) rPC rd) rLR rm) ,) \ literal: nothing to do - L2 abscall, + pushret, L2 execute, popret, wjmp, execute xcode uartinit @@ -527,7 +529,7 @@ xcode uartinit \ Enable UART0, receive & transfer part of UART. mov) r1 rd) $300 i) ,) add) r1 rd) r1 rn) $01 i) ,) str) r1 rd) r7 rn) UART0_CR +i) ,) - lret, + exit, xcode main pc w>e lblsysdict pc>addr le! lblmain forward! diff --git a/fs/xcomp/rpiboot.fs b/fs/xcomp/rpiboot.fs @@ -1,4 +1,4 @@ -code : ] code ] ; +code : pushret, ] code ] ; code hey 42 ps+, exit, code ho -12 ps+, exit, uartinit prompt interactive!