duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit 0220d2fe683b8dd8fa264c2c1bff92080498f158
parent f3d3cf2505e0eee11f05b6eb6eaa1b0a54b3f26b
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Thu, 23 Mar 2023 20:22:40 -0400

halcc: array()

whew! that was a tough one.

Diffstat:
Mfs/comp/c/egen.fs | 140+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Mfs/comp/c/expr.fs | 69++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Mfs/comp/c/fgen.fs | 12+++++++++++-
Mfs/comp/c/glob.fs | 2++
Mfs/comp/c/ptype.fs | 6+++++-
Mfs/comp/c/type.fs | 8++++++--
Mfs/tests/comp/c/cc.fs | 2+-
Mfs/tests/comp/c/test2.c | 4++++
8 files changed, 152 insertions(+), 91 deletions(-)

diff --git a/fs/comp/c/egen.fs b/fs/comp/c/egen.fs @@ -2,8 +2,10 @@ require /sys/scratch.fs ?f<< /lib/math.fs ?f<< /comp/c/tok.fs +?f<< /comp/c/glob.fs ?f<< /comp/c/expr.fs ?f<< /comp/c/func.fs +?f<< /comp/c/ptype.fs : _err ( -- ) tokdbg abort" egen error" ; : _assert ( f -- ) not if _err then ; @@ -19,7 +21,7 @@ Arena :new structbind Arena _litarena \ Maximum size in bytes that a single literal can have $400 const MAXLITSZ -alias noop parseExpression ( tok -- res ) \ forward declaration +\ parseExpression forward declaration is in glob.fs, it's needed in ptype.fs alias noop parseFactor ( tok -- res ) \ forward declaration : unaryop doer ' , ' , does> ( res 'op -- res ) @@ -38,9 +40,66 @@ unaryop _not, _ ^ over Result :isW? if W+n, else over Result :hal# [+n], then ; : _++, 1 _ ; : _--, -1 _ ; -UOPSCNT wordtbl _tbl ( res -- res ) +UOPSCNT wordtbl uoptbl ( res -- res ) 'w _neg, 'w _not, 'w _err 'w _&, 'w _*, 'w _++, 'w _--, +\ ops that can freely swap their operands +: _prep ( left right -- left halop ) + dup Result :isW? if swap then over Result :?>W Result :hal# ; +: _*, _prep *, ; : _&, _prep and, ; : _^, _prep xor, ; : _|, _prep or, ; +: _&&, _prep and, ; +: _||, _prep or, W=0>Z, NZ) C>W, ; +: _+, ( left right -- res ) + over Result :*arisz over Result :*arisz <> if + over Result :*arisz 1 = if swap then \ left has mutiplier + over Result :*arisz log2 over Result :<<n then + _prep +, ; + +\ ops that can't freely swap their operands +: _prep ( left right -- left halop ) Result :?>A over Result :?>W ; +: _/, _prep /, ; : _%, _prep %, ; +: _<<, _prep <<, ; : _>>, _prep >>, ; + +: _-, ( left right -- res ) + over Result :*arisz over Result :*arisz over = _assert ( left right arisz ) + >r _prep -, r> log2 ?dup if over Result :>>n then ; + +: assign doer ' , does> @ ( left right w ) >r + over Result :hal# rot> r> execute ( lefthal res ) swap !, ; +assign _+=, _+, +assign _*=, _*, assign _/=, _/, assign _%=, _%, +assign _&=, _&, assign _^=, _^, assign _|=, _|, +assign _-=, _-, assign _/=, _/, assign _%=, _%, +assign _<<=, _<<, assign _>>=, _>>, + +: _=, tuck Result :?>W Result :hal# !, ; + +\ To avoid W juggling, we check if our right operand is W. If it is, no need +\ for juggling, all we need is to invert the condition we use. +: cmpop doer swap , , does> ( left right 'conds ) + over Result :isW? if CELLSZ + @ >r swap else @ >r then ( left right ) + Result :hal# over Result :?>W cmp, r> C>W, ; +Z) Z) cmpop _==, NZ) NZ) cmpop _!=, +<) >=) cmpop _<, <=) >) cmpop _<=, >) <=) cmpop _>, >=) <) cmpop _>=, + +\ Our implementation of "x ? y : z" suffers a significant limitation because +\ we're single pass: by the time _? is called, it's possible that code +\ generating the right part of it has already been generated, so the "true" part +\ will always be ran, regardless of the choice. So, we go like this: we generate +\ the "true" hand, push it to PS, then generate the "cond", keep it in W. When +\ we encounter the "false" hand, *then* we generate conditional code which +\ cleans up PS. +: _?, ( left right -- res ) Result :?>W Result :?freeCurrentW ; +: _:, ( left right -- res ) + swap Result :>W$ PS- W=0>Z, 0 Z) branchC, + drop, [compile] else nip, over Result :>W [compile] then ; + +BOPSCNT wordtbl boptbl ( left right -- res ) +'w _+, 'w _-, 'w _*, 'w _/, 'w _%, 'w _<<, 'w _>>, 'w _<, +'w _>, 'w _<=, 'w _>=, 'w _==, 'w _!=, 'w _&, 'w _^, 'w _|, +'w _&&, 'w _||, 'w _=, 'w _+=, 'w _-=, 'w _*=, 'w _/=, 'w _%=, +'w _<<=, 'w _>>=, 'w _&=, 'w _^=, 'w _|=, 'w _?, 'w _:, + \ we gave to call "res" after having parsed its arguments. "(" is parsed. \ Arguments construction: basically, we can place arguments on PS in the order \ in which we parse them. Convenient. However, parseExpression can "leak" to @@ -58,6 +117,7 @@ UOPSCNT wordtbl _tbl ( res -- res ) else abort" TODO: dynamic calls" then ( cdecl ) rdrop r> ( psinitlvl ) to psoff Result currentW ?dup if PS- Result :release then + \ TODO: arilvl of fun rettype isn't properly preserved here CDecl type if PS+ Result :W else Result :none then ; : _incdec, ( res incsz -- res ) @@ -69,7 +129,8 @@ UOPSCNT wordtbl _tbl ( res -- res ) : parsePostfixOp ( res -- res ) nextt case ( ) '[' of isChar?^ \ x[y] is the equivalent of *(x+y) - abort" TODO []" endof + nextt parseExpression _+, dup Result :* + nextt ']' expectChar parsePostfixOp endof '(' of isChar?^ _funcall parsePostfixOp endof S" ->" of s= abort" TODO ->" endof '.' of isChar?^ abort" TODO ." endof @@ -78,6 +139,16 @@ UOPSCNT wordtbl _tbl ( res -- res ) r@ to nexttputback endcase ; +\ We need to parse the entire list before we begin writing to _litarena if we +\ want to support the possibility that some of these elements use _litarena +\ themselves (for example, string literals). *then*, we write. +MAXLITSZ Stack :new structbind Stack _list +: parseList ( -- res ) + _list :empty begin ( ) + nextt parseFactor Result :const# _list :push + ',' readChar? not until ( tok ) + '}' expectChar _list :self Result ARRAY Result :new ; + \ A factor can be: \ 1. A constant \ 2. A lvalue @@ -103,7 +174,7 @@ UOPSCNT wordtbl _tbl ( res -- res ) drop 1+ 0 c, \ null terminated else ccputback here over - 1- over c! then ( saddr ) _litarena :] drop ( "a ) Result :const endof - '{' of isChar?^ abort" TODO" endof + '{' of isChar?^ parseList endof S" pspop" of s= read( read) Result :?freeCurrentW 0 PSP+) @, PS- Result :W parsePostfixOp endof @@ -111,70 +182,13 @@ UOPSCNT wordtbl _tbl ( res -- res ) S" sizeof" of s= read( nextt parseType _assert typesize Result :const read) endof of uopid ( opid ) - nextt parseFactor ( opid res ) _tbl rot wexec endof + nextt parseFactor ( opid res ) uoptbl rot wexec endof of isIdent? \ lvalue, FunCall or macro r@ findIdent ?dup _assert Result :cdecl parsePostfixOp endof r@ parse if Result :const else _err then endcase ; current ' parseFactor realias -\ ops that can freely swap their operands -: _prep ( left right -- left halop ) - dup Result :isW? if swap then over Result :?>W Result :hal# ; -: _*, _prep *, ; : _&, _prep and, ; : _^, _prep xor, ; : _|, _prep or, ; -: _&&, _prep and, ; -: _||, _prep or, W=0>Z, NZ) C>W, ; -: _+, ( left right -- res ) - over Result :*arisz over Result :*arisz <> if - over Result :*arisz 1 = if swap then \ left has mutiplier - over Result :*arisz log2 over Result :<<n then - _prep +, ; - -\ ops that can't freely swap their operands -: _prep ( left right -- left halop ) Result :?>A over Result :?>W ; -: _/, _prep /, ; : _%, _prep %, ; -: _<<, _prep <<, ; : _>>, _prep >>, ; - -: _-, ( left right -- res ) - over Result :*arisz over Result :*arisz over = _assert ( left right arisz ) - >r _prep -, r> log2 ?dup if over Result :>>n then ; - -: assign doer ' , does> @ ( left right w ) >r - over Result :hal# rot> r> execute ( lefthal res ) swap !, ; -assign _+=, _+, -assign _*=, _*, assign _/=, _/, assign _%=, _%, -assign _&=, _&, assign _^=, _^, assign _|=, _|, -assign _-=, _-, assign _/=, _/, assign _%=, _%, -assign _<<=, _<<, assign _>>=, _>>, - -: _=, tuck Result :?>W Result :hal# !, ; - -\ To avoid W juggling, we check if our right operand is W. If it is, no need -\ for juggling, all we need is to invert the condition we use. -: cmpop doer swap , , does> ( left right 'conds ) - over Result :isW? if CELLSZ + @ >r swap else @ >r then ( left right ) - Result :hal# over Result :?>W cmp, r> C>W, ; -Z) Z) cmpop _==, NZ) NZ) cmpop _!=, -<) >=) cmpop _<, <=) >) cmpop _<=, >) <=) cmpop _>, >=) <) cmpop _>=, - -\ Our implementation of "x ? y : z" suffers a significant limitation because -\ we're single pass: by the time _? is called, it's possible that code -\ generating the right part of it has already been generated, so the "true" part -\ will always be ran, regardless of the choice. So, we go like this: we generate -\ the "true" hand, push it to PS, then generate the "cond", keep it in W. When -\ we encounter the "false" hand, *then* we generate conditional code which -\ cleans up PS. -: _?, ( left right -- res ) Result :?>W Result :?freeCurrentW ; -: _:, ( left right -- res ) - swap Result :>W$ PS- W=0>Z, 0 Z) branchC, - drop, [compile] else nip, over Result :>W [compile] then ; - -BOPSCNT wordtbl _tbl ( left right -- res ) -'w _+, 'w _-, 'w _*, 'w _/, 'w _%, 'w _<<, 'w _>>, 'w _<, -'w _>, 'w _<=, 'w _>=, 'w _==, 'w _!=, 'w _&, 'w _^, 'w _|, -'w _&&, 'w _||, 'w _=, 'w _+=, 'w _-=, 'w _*=, 'w _/=, 'w _%=, -'w _<<=, 'w _>>=, 'w _&=, 'w _^=, 'w _|=, 'w _?, 'w _:, - : bothconst? ( left right -- f ) Result :isconst? swap Result :isconst? and ; : ?constApply ( left right opid -- left right opid 0 | res 1 ) >r \ V1=opid @@ -189,7 +203,7 @@ BOPSCNT wordtbl _tbl ( left right -- res ) dup if rdrop else r> swap then ; : applyBinop ( left right opid -- res ) - ?constApply not if _tbl swap wexec then ; + ?constApply not if boptbl swap wexec then ; \ Parse the "right" part of an expression with the leftmost factor and leftmost \ binary operator already parsed. diff --git a/fs/comp/c/expr.fs b/fs/comp/c/expr.fs @@ -14,31 +14,62 @@ of 0>= r@ for A>) @, A) next drop A) endof _err endcase ; +\ Operation levels vs Arithmetic levels +\ We track two types of indirection levels here. First, there's the "operations" +\ level, that is, where to get the damn value at the end. This is independent of +\ indirection levels in the declarations. It is only affected by & and * +\ operators applied within expressions. Therefore, Results begin their lives +\ at "oplvl" 0. There's one exception: Arrays living in the stack frame start +\ at lvl -1 because the pointer is RSP+offset without indirection. +\ When comes the time to resolve the oplvl, we first check if it's -1, in which +\ case we resolve it with a "lea,". This only works on CDECL results. Less than +\ -1 is an error. For positive oplvl, we move to W and repeatedly call @,. For +\ more efficiency, we bundle oplvl in 2s and use [@], when possible. +\ Arithmetic levels serve a different purpose: to know when we need to apply +\ pointer arithmetics. For this, we take CDecl's base lvl (if it's an array, +\ this adds 1 to this level) subtract Result's lvl from it. +\ This is the "arilvl". If that level is 0, then its "pointer arithmetics +\ multiplier" is 1. If the level is 1, then the multiplier is the size of the +\ base type. Otherwise, the multiplier is 4 (size of a pointer). Negative is an +\ error. The arilvl has no effect on how the fetching of the actual value +\ occurs. struct[ Result 0 const NONE \ Nothing (probably a released W) 1 const CONST \ Is a constant (value in arg) 2 const W \ Value in W register 3 const CDECL \ CDecl pointer is in arg. 4 const PS \ Result pushed to PS, offset in arg + 5 const ARRAY \ Result is a constant array in a Stack. arg is a pointer to it. sfield type sfield arg - sfield lvl \ indirection levels (*) that have been applied within the - \ expression (not at declaration). This is only used with CDECL - \ type. On the W type, * indirections are applied directly. + sfield lvl \ lvl changed applied within the expression + sfield arilvl \ offset to apply to lvl to get the arithmetic level \ There can only be one result using W at once. Whenever a W result is \ created, it takes the lock. If it's already taken, there's an error. 0 value currentW \ link to Result : :Wfree# currentW if abort" W is already taken!" then ; - : :new ( arg type -- res ) SZ syspad :allot dup >r !+ !+ 0 swap ! r> ; + : :new ( arg type -- res ) SZ syspad :[ , , 0 , 0 , syspad :] ; : :none ( -- res ) 0 NONE :new ; : :const ( n -- res ) CONST :new ; : :W ( -- res ) :Wfree# 0 W :new dup to currentW ; - : :cdecl ( cdecl -- res ) CDECL :new ; - : :isW? ( self -- f ) type W = ; : :release ( self -- ) dup :isW? if 0 to currentW then NONE swap to type ; + : :>PS + dup :isW? _assert dup :release + dup, PS+ PS over to type psoff neg swap to arg ; + : :?freeCurrentW ( -- ) currentW ?dup if :>PS then ; + : :iscdecl? ( self -- f ) type CDECL = ; + : :W! ( self -- ) dup to currentW W swap to type ; + : :& ( self -- ) -1 swap to+ lvl ; + : :cdecl ( cdecl -- res ) + dup CDECL :new ( cdecl res ) + over bi CDecl lvl | CDecl nbelem bool + over to arilvl ( cdecl res ) + swap bi CDecl nbelem | CDecl :isvar? and? if + dup :& -1 over to+ arilvl then ; + + : :cdecl# dup :iscdecl? _assert arg ; : :hal# ( self -- halop ) dup type case ( self ) CONST of = arg i) endof CDECL of = bi arg CDecl :halop | lvl applylvl endof @@ -53,35 +84,27 @@ struct[ Result : :iszero? bi arg 0 = | :isconst? and ; : :isone? bi arg 1 = | :isconst? and ; : :const# dup :isconst? _assert arg ; - : :iscdecl? ( self -- f ) type CDECL = ; - : :cdecl# dup :iscdecl? _assert arg ; - : :>PS - dup :isW? _assert dup :release - dup, PS+ PS over to type psoff neg swap to arg ; - : :?freeCurrentW ( -- ) currentW ?dup if :>PS then ; : :?>W dup :isW? if drop else :?freeCurrentW :>W then ; : :?>W$ dup :?>W :release ; \ Free up W by sending it to A if needed. : :?>A ( self -- halop ) - dup :isW? if dup :release W>A, A*) else :hal# then ; + dup :isW? if :release W>A, A*) else :hal# then ; : :* ( self -- ) - dup :isW? if W) @, else 1 swap to+ lvl then ; - : :& ( self -- ) -1 swap to+ lvl ; + dup :isW? if W) @, then 1 swap to+ lvl ; : :<<n ( n self -- ) dup :isconst? if dup arg rot lshift swap to arg else :?>W i) <<, then ; : :>>n ( n self -- ) dup :isconst? if dup arg rot rshift swap to arg else :?>W i) >>, then ; - : :arilvl bi :cdecl# CDecl lvl | lvl - ; - \ Return the "pointer arithmetics" multiplier to apply to the "other" operand. - \ If we're a lvl 1 pointer, return the size of the underlying type, otherwise - \ return 1. - : :*arisz ( self -- n ) - dup :iscdecl? if dup :arilvl case + : :arilvl bi arilvl | lvl - ; + : :*arisz ( self -- n ) \ pointer arithmetics multiplier + dup :arilvl case 0 of = drop 1 endof - 1 of = :cdecl# CDecl type typesize endof - drop 4 endcase else drop 1 then ; + \ TODO: make cdecl typesize follow when a :& converts the result. + \ Hardcoding to 4 is bad... + 1 of = dup :iscdecl? if :cdecl# CDecl type typesize else drop 4 then endof + drop 4 endcase ; ]struct BOPSCNT wordtbl _tbl ( a b -- n ) diff --git a/fs/comp/c/fgen.fs b/fs/comp/c/fgen.fs @@ -51,13 +51,23 @@ current ' parseStatement realias \ and we jump to it after we've created the stack frame. 0 value _initcode +: _, ( sz -- ) case 1 of = c, endof 2 of = 16b , endof , endcase ; +\ array is a Stack +: _copyArray ( array cdecl -- ) + dup CDecl :elemsize >r >r >r \ V1=sz V2=cdecl V3=array + V3 Stack :count dup V1 * tuck _litarena :[ ( arraysz cnt ) + r> Stack :buf( swap for ( a ) @+ V1 _, next drop _litarena :] ( arraysz a ) + litn r> dup, CDecl :halop lea, litn compile move rdrop ; + : parseDeclLine ( type -- ) parseDeclarator ( cdecl ) dup _locvars ?dup if CDecl :append else to _locvars then begin ( cdecl ) '=' readChar? if ( cdecl ) _initcode not if here to _initcode then nextt parseExpression ( cdecl res ) - Result :>W$ dup CDecl :halop !, psneutral nextt then ( cdecl tok ) + dup Result type Result ARRAY = if Result arg over _copyArray else + Result :>W$ dup CDecl :halop !, then + psneutral nextt then ( cdecl tok ) dup ';' isChar? not while ( cdecl tok ) ',' expectChar CDecl type parseDeclarator ( cdecl ) dup _locvars CDecl :append repeat ( cdecl tok ) 2drop ; diff --git a/fs/comp/c/glob.fs b/fs/comp/c/glob.fs @@ -10,3 +10,5 @@ : PS- CELLSZ neg to+ psoff ; : PSP+) PSP) swap psoff + +) ; : psneutral 0 to@! psoff ?dup if ps+, then ; + +alias noop parseExpression ( tok -- res ) \ forward declaration diff --git a/fs/comp/c/ptype.fs b/fs/comp/c/ptype.fs @@ -1,5 +1,7 @@ \ Type parsing ?f<< /comp/c/tok.fs +?f<< /comp/c/glob.fs +?f<< /comp/c/expr.fs ?f<< /comp/c/type.fs : _err ( -- ) tokdbg abort" ptype error" ; @@ -28,7 +30,9 @@ alias _err parseDeclarator ( type -- cdecl ) \ forward declaration \ parsing after the identifier : _post ( cdecl -- cdecl ) begin ( cdecl ) nextt case - '[' of isChar?^ abort" TODO" endof + '[' of isChar?^ + nextt parseExpression Result :const# + nextt ']' expectChar ( cdecl nbelem ) over to CDecl nbelem endof '(' of isChar?^ dup CDecl :funcsig! STORAGE_PS to@! curstorage >r ')' readChar? not if ( cdecl tok ) diff --git a/fs/comp/c/type.fs b/fs/comp/c/type.fs @@ -80,8 +80,9 @@ struct[ CDecl 2 _f? :funcsig? 2 _f! :funcsig! 4 _f? :static? 4 _f! :static! 8 _f? :incomplete? 8 _f! :incomplete! - : :isarg? ( dnode -- f ) storage STORAGE_PS = ; - : :isglobal? ( dnode -- f ) storage STORAGE_MEM = ; + : :isarg? ( self -- f ) storage STORAGE_PS = ; + : :isvar? ( self -- f ) storage STORAGE_SF = ; + : :isglobal? ( self -- f ) storage STORAGE_MEM = ; : :halop ( self -- operand ) dup bi offset | storage case ( self offset ) STORAGE_SF of = RSP) swap +) endof @@ -96,6 +97,9 @@ struct[ CDecl tuck dup type _typesize swap nbelem 1 max * + swap llnext repeat ( res ) ; + \ When the CDecl is an array, return the size of a single element. + : :elemsize ( self -- size ) dup nbelem _assert type _typesize ; + : :argssize ( self -- size ) dup :funcsig? _assert llcnt 1- CELLSZ * ; : :offset! ( off self -- off+size ) 2dup to offset :size + ; diff --git a/fs/tests/comp/c/cc.fs b/fs/tests/comp/c/cc.fs @@ -34,8 +34,8 @@ exprparens 9 #eq cnoop ( no result! ) scnt 0 #eq 42 ptrari 50 #eq 42 50 ptrari2 2 #eq -testend \s array 52 #eq +testend \s global 1234 #eq globalinc 1236 #eq globalinc 1238 #eq diff --git a/fs/tests/comp/c/test2.c b/fs/tests/comp/c/test2.c @@ -135,3 +135,7 @@ int* ptrari(int *x) { int ptrari2(int *lo, int *hi) { return hi-lo; } +int array() { + int a[3] = {42, 12, 2}; + return *a + a[1] - *(a+2); +}