commit 0220d2fe683b8dd8fa264c2c1bff92080498f158
parent f3d3cf2505e0eee11f05b6eb6eaa1b0a54b3f26b
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Thu, 23 Mar 2023 20:22:40 -0400
halcc: array()
whew! that was a tough one.
Diffstat:
8 files changed, 152 insertions(+), 91 deletions(-)
diff --git a/fs/comp/c/egen.fs b/fs/comp/c/egen.fs
@@ -2,8 +2,10 @@
require /sys/scratch.fs
?f<< /lib/math.fs
?f<< /comp/c/tok.fs
+?f<< /comp/c/glob.fs
?f<< /comp/c/expr.fs
?f<< /comp/c/func.fs
+?f<< /comp/c/ptype.fs
: _err ( -- ) tokdbg abort" egen error" ;
: _assert ( f -- ) not if _err then ;
@@ -19,7 +21,7 @@ Arena :new structbind Arena _litarena
\ Maximum size in bytes that a single literal can have
$400 const MAXLITSZ
-alias noop parseExpression ( tok -- res ) \ forward declaration
+\ parseExpression forward declaration is in glob.fs, it's needed in ptype.fs
alias noop parseFactor ( tok -- res ) \ forward declaration
: unaryop doer ' , ' , does> ( res 'op -- res )
@@ -38,9 +40,66 @@ unaryop _not, _ ^
over Result :isW? if W+n, else over Result :hal# [+n], then ;
: _++, 1 _ ; : _--, -1 _ ;
-UOPSCNT wordtbl _tbl ( res -- res )
+UOPSCNT wordtbl uoptbl ( res -- res )
'w _neg, 'w _not, 'w _err 'w _&, 'w _*, 'w _++, 'w _--,
+\ ops that can freely swap their operands
+: _prep ( left right -- left halop )
+ dup Result :isW? if swap then over Result :?>W Result :hal# ;
+: _*, _prep *, ; : _&, _prep and, ; : _^, _prep xor, ; : _|, _prep or, ;
+: _&&, _prep and, ;
+: _||, _prep or, W=0>Z, NZ) C>W, ;
+: _+, ( left right -- res )
+ over Result :*arisz over Result :*arisz <> if
+ over Result :*arisz 1 = if swap then \ left has mutiplier
+ over Result :*arisz log2 over Result :<<n then
+ _prep +, ;
+
+\ ops that can't freely swap their operands
+: _prep ( left right -- left halop ) Result :?>A over Result :?>W ;
+: _/, _prep /, ; : _%, _prep %, ;
+: _<<, _prep <<, ; : _>>, _prep >>, ;
+
+: _-, ( left right -- res )
+ over Result :*arisz over Result :*arisz over = _assert ( left right arisz )
+ >r _prep -, r> log2 ?dup if over Result :>>n then ;
+
+: assign doer ' , does> @ ( left right w ) >r
+ over Result :hal# rot> r> execute ( lefthal res ) swap !, ;
+assign _+=, _+,
+assign _*=, _*, assign _/=, _/, assign _%=, _%,
+assign _&=, _&, assign _^=, _^, assign _|=, _|,
+assign _-=, _-, assign _/=, _/, assign _%=, _%,
+assign _<<=, _<<, assign _>>=, _>>,
+
+: _=, tuck Result :?>W Result :hal# !, ;
+
+\ To avoid W juggling, we check if our right operand is W. If it is, no need
+\ for juggling, all we need is to invert the condition we use.
+: cmpop doer swap , , does> ( left right 'conds )
+ over Result :isW? if CELLSZ + @ >r swap else @ >r then ( left right )
+ Result :hal# over Result :?>W cmp, r> C>W, ;
+Z) Z) cmpop _==, NZ) NZ) cmpop _!=,
+<) >=) cmpop _<, <=) >) cmpop _<=, >) <=) cmpop _>, >=) <) cmpop _>=,
+
+\ Our implementation of "x ? y : z" suffers a significant limitation because
+\ we're single pass: by the time _? is called, it's possible that code
+\ generating the right part of it has already been generated, so the "true" part
+\ will always be ran, regardless of the choice. So, we go like this: we generate
+\ the "true" hand, push it to PS, then generate the "cond", keep it in W. When
+\ we encounter the "false" hand, *then* we generate conditional code which
+\ cleans up PS.
+: _?, ( left right -- res ) Result :?>W Result :?freeCurrentW ;
+: _:, ( left right -- res )
+ swap Result :>W$ PS- W=0>Z, 0 Z) branchC,
+ drop, [compile] else nip, over Result :>W [compile] then ;
+
+BOPSCNT wordtbl boptbl ( left right -- res )
+'w _+, 'w _-, 'w _*, 'w _/, 'w _%, 'w _<<, 'w _>>, 'w _<,
+'w _>, 'w _<=, 'w _>=, 'w _==, 'w _!=, 'w _&, 'w _^, 'w _|,
+'w _&&, 'w _||, 'w _=, 'w _+=, 'w _-=, 'w _*=, 'w _/=, 'w _%=,
+'w _<<=, 'w _>>=, 'w _&=, 'w _^=, 'w _|=, 'w _?, 'w _:,
+
\ we gave to call "res" after having parsed its arguments. "(" is parsed.
\ Arguments construction: basically, we can place arguments on PS in the order
\ in which we parse them. Convenient. However, parseExpression can "leak" to
@@ -58,6 +117,7 @@ UOPSCNT wordtbl _tbl ( res -- res )
else abort" TODO: dynamic calls" then ( cdecl )
rdrop r> ( psinitlvl ) to psoff
Result currentW ?dup if PS- Result :release then
+ \ TODO: arilvl of fun rettype isn't properly preserved here
CDecl type if PS+ Result :W else Result :none then ;
: _incdec, ( res incsz -- res )
@@ -69,7 +129,8 @@ UOPSCNT wordtbl _tbl ( res -- res )
: parsePostfixOp ( res -- res )
nextt case ( )
'[' of isChar?^ \ x[y] is the equivalent of *(x+y)
- abort" TODO []" endof
+ nextt parseExpression _+, dup Result :*
+ nextt ']' expectChar parsePostfixOp endof
'(' of isChar?^ _funcall parsePostfixOp endof
S" ->" of s= abort" TODO ->" endof
'.' of isChar?^ abort" TODO ." endof
@@ -78,6 +139,16 @@ UOPSCNT wordtbl _tbl ( res -- res )
r@ to nexttputback
endcase ;
+\ We need to parse the entire list before we begin writing to _litarena if we
+\ want to support the possibility that some of these elements use _litarena
+\ themselves (for example, string literals). *then*, we write.
+MAXLITSZ Stack :new structbind Stack _list
+: parseList ( -- res )
+ _list :empty begin ( )
+ nextt parseFactor Result :const# _list :push
+ ',' readChar? not until ( tok )
+ '}' expectChar _list :self Result ARRAY Result :new ;
+
\ A factor can be:
\ 1. A constant
\ 2. A lvalue
@@ -103,7 +174,7 @@ UOPSCNT wordtbl _tbl ( res -- res )
drop 1+ 0 c, \ null terminated
else ccputback here over - 1- over c! then ( saddr )
_litarena :] drop ( "a ) Result :const endof
- '{' of isChar?^ abort" TODO" endof
+ '{' of isChar?^ parseList endof
S" pspop" of s=
read( read) Result :?freeCurrentW 0 PSP+) @, PS-
Result :W parsePostfixOp endof
@@ -111,70 +182,13 @@ UOPSCNT wordtbl _tbl ( res -- res )
S" sizeof" of s=
read( nextt parseType _assert typesize Result :const read) endof
of uopid ( opid )
- nextt parseFactor ( opid res ) _tbl rot wexec endof
+ nextt parseFactor ( opid res ) uoptbl rot wexec endof
of isIdent? \ lvalue, FunCall or macro
r@ findIdent ?dup _assert Result :cdecl parsePostfixOp endof
r@ parse if Result :const else _err then
endcase ;
current ' parseFactor realias
-\ ops that can freely swap their operands
-: _prep ( left right -- left halop )
- dup Result :isW? if swap then over Result :?>W Result :hal# ;
-: _*, _prep *, ; : _&, _prep and, ; : _^, _prep xor, ; : _|, _prep or, ;
-: _&&, _prep and, ;
-: _||, _prep or, W=0>Z, NZ) C>W, ;
-: _+, ( left right -- res )
- over Result :*arisz over Result :*arisz <> if
- over Result :*arisz 1 = if swap then \ left has mutiplier
- over Result :*arisz log2 over Result :<<n then
- _prep +, ;
-
-\ ops that can't freely swap their operands
-: _prep ( left right -- left halop ) Result :?>A over Result :?>W ;
-: _/, _prep /, ; : _%, _prep %, ;
-: _<<, _prep <<, ; : _>>, _prep >>, ;
-
-: _-, ( left right -- res )
- over Result :*arisz over Result :*arisz over = _assert ( left right arisz )
- >r _prep -, r> log2 ?dup if over Result :>>n then ;
-
-: assign doer ' , does> @ ( left right w ) >r
- over Result :hal# rot> r> execute ( lefthal res ) swap !, ;
-assign _+=, _+,
-assign _*=, _*, assign _/=, _/, assign _%=, _%,
-assign _&=, _&, assign _^=, _^, assign _|=, _|,
-assign _-=, _-, assign _/=, _/, assign _%=, _%,
-assign _<<=, _<<, assign _>>=, _>>,
-
-: _=, tuck Result :?>W Result :hal# !, ;
-
-\ To avoid W juggling, we check if our right operand is W. If it is, no need
-\ for juggling, all we need is to invert the condition we use.
-: cmpop doer swap , , does> ( left right 'conds )
- over Result :isW? if CELLSZ + @ >r swap else @ >r then ( left right )
- Result :hal# over Result :?>W cmp, r> C>W, ;
-Z) Z) cmpop _==, NZ) NZ) cmpop _!=,
-<) >=) cmpop _<, <=) >) cmpop _<=, >) <=) cmpop _>, >=) <) cmpop _>=,
-
-\ Our implementation of "x ? y : z" suffers a significant limitation because
-\ we're single pass: by the time _? is called, it's possible that code
-\ generating the right part of it has already been generated, so the "true" part
-\ will always be ran, regardless of the choice. So, we go like this: we generate
-\ the "true" hand, push it to PS, then generate the "cond", keep it in W. When
-\ we encounter the "false" hand, *then* we generate conditional code which
-\ cleans up PS.
-: _?, ( left right -- res ) Result :?>W Result :?freeCurrentW ;
-: _:, ( left right -- res )
- swap Result :>W$ PS- W=0>Z, 0 Z) branchC,
- drop, [compile] else nip, over Result :>W [compile] then ;
-
-BOPSCNT wordtbl _tbl ( left right -- res )
-'w _+, 'w _-, 'w _*, 'w _/, 'w _%, 'w _<<, 'w _>>, 'w _<,
-'w _>, 'w _<=, 'w _>=, 'w _==, 'w _!=, 'w _&, 'w _^, 'w _|,
-'w _&&, 'w _||, 'w _=, 'w _+=, 'w _-=, 'w _*=, 'w _/=, 'w _%=,
-'w _<<=, 'w _>>=, 'w _&=, 'w _^=, 'w _|=, 'w _?, 'w _:,
-
: bothconst? ( left right -- f ) Result :isconst? swap Result :isconst? and ;
: ?constApply ( left right opid -- left right opid 0 | res 1 ) >r \ V1=opid
@@ -189,7 +203,7 @@ BOPSCNT wordtbl _tbl ( left right -- res )
dup if rdrop else r> swap then ;
: applyBinop ( left right opid -- res )
- ?constApply not if _tbl swap wexec then ;
+ ?constApply not if boptbl swap wexec then ;
\ Parse the "right" part of an expression with the leftmost factor and leftmost
\ binary operator already parsed.
diff --git a/fs/comp/c/expr.fs b/fs/comp/c/expr.fs
@@ -14,31 +14,62 @@
of 0>= r@ for A>) @, A) next drop A) endof
_err endcase ;
+\ Operation levels vs Arithmetic levels
+\ We track two types of indirection levels here. First, there's the "operations"
+\ level, that is, where to get the damn value at the end. This is independent of
+\ indirection levels in the declarations. It is only affected by & and *
+\ operators applied within expressions. Therefore, Results begin their lives
+\ at "oplvl" 0. There's one exception: Arrays living in the stack frame start
+\ at lvl -1 because the pointer is RSP+offset without indirection.
+\ When comes the time to resolve the oplvl, we first check if it's -1, in which
+\ case we resolve it with a "lea,". This only works on CDECL results. Less than
+\ -1 is an error. For positive oplvl, we move to W and repeatedly call @,. For
+\ more efficiency, we bundle oplvl in 2s and use [@], when possible.
+\ Arithmetic levels serve a different purpose: to know when we need to apply
+\ pointer arithmetics. For this, we take CDecl's base lvl (if it's an array,
+\ this adds 1 to this level) subtract Result's lvl from it.
+\ This is the "arilvl". If that level is 0, then its "pointer arithmetics
+\ multiplier" is 1. If the level is 1, then the multiplier is the size of the
+\ base type. Otherwise, the multiplier is 4 (size of a pointer). Negative is an
+\ error. The arilvl has no effect on how the fetching of the actual value
+\ occurs.
struct[ Result
0 const NONE \ Nothing (probably a released W)
1 const CONST \ Is a constant (value in arg)
2 const W \ Value in W register
3 const CDECL \ CDecl pointer is in arg.
4 const PS \ Result pushed to PS, offset in arg
+ 5 const ARRAY \ Result is a constant array in a Stack. arg is a pointer to it.
sfield type
sfield arg
- sfield lvl \ indirection levels (*) that have been applied within the
- \ expression (not at declaration). This is only used with CDECL
- \ type. On the W type, * indirections are applied directly.
+ sfield lvl \ lvl changed applied within the expression
+ sfield arilvl \ offset to apply to lvl to get the arithmetic level
\ There can only be one result using W at once. Whenever a W result is
\ created, it takes the lock. If it's already taken, there's an error.
0 value currentW \ link to Result
: :Wfree# currentW if abort" W is already taken!" then ;
- : :new ( arg type -- res ) SZ syspad :allot dup >r !+ !+ 0 swap ! r> ;
+ : :new ( arg type -- res ) SZ syspad :[ , , 0 , 0 , syspad :] ;
: :none ( -- res ) 0 NONE :new ;
: :const ( n -- res ) CONST :new ;
: :W ( -- res ) :Wfree# 0 W :new dup to currentW ;
- : :cdecl ( cdecl -- res ) CDECL :new ;
-
: :isW? ( self -- f ) type W = ;
: :release ( self -- ) dup :isW? if 0 to currentW then NONE swap to type ;
+ : :>PS
+ dup :isW? _assert dup :release
+ dup, PS+ PS over to type psoff neg swap to arg ;
+ : :?freeCurrentW ( -- ) currentW ?dup if :>PS then ;
+ : :iscdecl? ( self -- f ) type CDECL = ;
+ : :W! ( self -- ) dup to currentW W swap to type ;
+ : :& ( self -- ) -1 swap to+ lvl ;
+ : :cdecl ( cdecl -- res )
+ dup CDECL :new ( cdecl res )
+ over bi CDecl lvl | CDecl nbelem bool + over to arilvl ( cdecl res )
+ swap bi CDecl nbelem | CDecl :isvar? and? if
+ dup :& -1 over to+ arilvl then ;
+
+ : :cdecl# dup :iscdecl? _assert arg ;
: :hal# ( self -- halop ) dup type case ( self )
CONST of = arg i) endof
CDECL of = bi arg CDecl :halop | lvl applylvl endof
@@ -53,35 +84,27 @@ struct[ Result
: :iszero? bi arg 0 = | :isconst? and ;
: :isone? bi arg 1 = | :isconst? and ;
: :const# dup :isconst? _assert arg ;
- : :iscdecl? ( self -- f ) type CDECL = ;
- : :cdecl# dup :iscdecl? _assert arg ;
- : :>PS
- dup :isW? _assert dup :release
- dup, PS+ PS over to type psoff neg swap to arg ;
- : :?freeCurrentW ( -- ) currentW ?dup if :>PS then ;
: :?>W dup :isW? if drop else :?freeCurrentW :>W then ;
: :?>W$ dup :?>W :release ;
\ Free up W by sending it to A if needed.
: :?>A ( self -- halop )
- dup :isW? if dup :release W>A, A*) else :hal# then ;
+ dup :isW? if :release W>A, A*) else :hal# then ;
: :* ( self -- )
- dup :isW? if W) @, else 1 swap to+ lvl then ;
- : :& ( self -- ) -1 swap to+ lvl ;
+ dup :isW? if W) @, then 1 swap to+ lvl ;
: :<<n ( n self -- )
dup :isconst? if
dup arg rot lshift swap to arg else :?>W i) <<, then ;
: :>>n ( n self -- )
dup :isconst? if
dup arg rot rshift swap to arg else :?>W i) >>, then ;
- : :arilvl bi :cdecl# CDecl lvl | lvl - ;
- \ Return the "pointer arithmetics" multiplier to apply to the "other" operand.
- \ If we're a lvl 1 pointer, return the size of the underlying type, otherwise
- \ return 1.
- : :*arisz ( self -- n )
- dup :iscdecl? if dup :arilvl case
+ : :arilvl bi arilvl | lvl - ;
+ : :*arisz ( self -- n ) \ pointer arithmetics multiplier
+ dup :arilvl case
0 of = drop 1 endof
- 1 of = :cdecl# CDecl type typesize endof
- drop 4 endcase else drop 1 then ;
+ \ TODO: make cdecl typesize follow when a :& converts the result.
+ \ Hardcoding to 4 is bad...
+ 1 of = dup :iscdecl? if :cdecl# CDecl type typesize else drop 4 then endof
+ drop 4 endcase ;
]struct
BOPSCNT wordtbl _tbl ( a b -- n )
diff --git a/fs/comp/c/fgen.fs b/fs/comp/c/fgen.fs
@@ -51,13 +51,23 @@ current ' parseStatement realias
\ and we jump to it after we've created the stack frame.
0 value _initcode
+: _, ( sz -- ) case 1 of = c, endof 2 of = 16b , endof , endcase ;
+\ array is a Stack
+: _copyArray ( array cdecl -- )
+ dup CDecl :elemsize >r >r >r \ V1=sz V2=cdecl V3=array
+ V3 Stack :count dup V1 * tuck _litarena :[ ( arraysz cnt )
+ r> Stack :buf( swap for ( a ) @+ V1 _, next drop _litarena :] ( arraysz a )
+ litn r> dup, CDecl :halop lea, litn compile move rdrop ;
+
: parseDeclLine ( type -- )
parseDeclarator ( cdecl )
dup _locvars ?dup if CDecl :append else to _locvars then begin ( cdecl )
'=' readChar? if ( cdecl )
_initcode not if here to _initcode then
nextt parseExpression ( cdecl res )
- Result :>W$ dup CDecl :halop !, psneutral nextt then ( cdecl tok )
+ dup Result type Result ARRAY = if Result arg over _copyArray else
+ Result :>W$ dup CDecl :halop !, then
+ psneutral nextt then ( cdecl tok )
dup ';' isChar? not while ( cdecl tok )
',' expectChar CDecl type parseDeclarator ( cdecl )
dup _locvars CDecl :append repeat ( cdecl tok ) 2drop ;
diff --git a/fs/comp/c/glob.fs b/fs/comp/c/glob.fs
@@ -10,3 +10,5 @@
: PS- CELLSZ neg to+ psoff ;
: PSP+) PSP) swap psoff + +) ;
: psneutral 0 to@! psoff ?dup if ps+, then ;
+
+alias noop parseExpression ( tok -- res ) \ forward declaration
diff --git a/fs/comp/c/ptype.fs b/fs/comp/c/ptype.fs
@@ -1,5 +1,7 @@
\ Type parsing
?f<< /comp/c/tok.fs
+?f<< /comp/c/glob.fs
+?f<< /comp/c/expr.fs
?f<< /comp/c/type.fs
: _err ( -- ) tokdbg abort" ptype error" ;
@@ -28,7 +30,9 @@ alias _err parseDeclarator ( type -- cdecl ) \ forward declaration
\ parsing after the identifier
: _post ( cdecl -- cdecl )
begin ( cdecl ) nextt case
- '[' of isChar?^ abort" TODO" endof
+ '[' of isChar?^
+ nextt parseExpression Result :const#
+ nextt ']' expectChar ( cdecl nbelem ) over to CDecl nbelem endof
'(' of isChar?^
dup CDecl :funcsig! STORAGE_PS to@! curstorage >r
')' readChar? not if ( cdecl tok )
diff --git a/fs/comp/c/type.fs b/fs/comp/c/type.fs
@@ -80,8 +80,9 @@ struct[ CDecl
2 _f? :funcsig? 2 _f! :funcsig!
4 _f? :static? 4 _f! :static!
8 _f? :incomplete? 8 _f! :incomplete!
- : :isarg? ( dnode -- f ) storage STORAGE_PS = ;
- : :isglobal? ( dnode -- f ) storage STORAGE_MEM = ;
+ : :isarg? ( self -- f ) storage STORAGE_PS = ;
+ : :isvar? ( self -- f ) storage STORAGE_SF = ;
+ : :isglobal? ( self -- f ) storage STORAGE_MEM = ;
: :halop ( self -- operand ) dup bi offset | storage case ( self offset )
STORAGE_SF of = RSP) swap +) endof
@@ -96,6 +97,9 @@ struct[ CDecl
tuck dup type _typesize swap nbelem 1 max * +
swap llnext repeat ( res ) ;
+ \ When the CDecl is an array, return the size of a single element.
+ : :elemsize ( self -- size ) dup nbelem _assert type _typesize ;
+
: :argssize ( self -- size ) dup :funcsig? _assert llcnt 1- CELLSZ * ;
: :offset! ( off self -- off+size ) 2dup to offset :size + ;
diff --git a/fs/tests/comp/c/cc.fs b/fs/tests/comp/c/cc.fs
@@ -34,8 +34,8 @@ exprparens 9 #eq
cnoop ( no result! ) scnt 0 #eq
42 ptrari 50 #eq
42 50 ptrari2 2 #eq
-testend \s
array 52 #eq
+testend \s
global 1234 #eq
globalinc 1236 #eq
globalinc 1238 #eq
diff --git a/fs/tests/comp/c/test2.c b/fs/tests/comp/c/test2.c
@@ -135,3 +135,7 @@ int* ptrari(int *x) {
int ptrari2(int *lo, int *hi) {
return hi-lo;
}
+int array() {
+ int a[3] = {42, 12, 2};
+ return *a + a[1] - *(a+2);
+}