commit 67bc87acde9da34b8bb4ba11baf0856c290a13db
parent 31290e5566ca67e9415a39c10e315047385b1ed3
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Thu, 3 Nov 2022 14:13:53 -0400
cc: improve type parsing
Type parsing is now entirely done in cc/types (previously, we would parse
function signatures for function implementations in cc/ast, and functions for
function signatures in typedefs in cc/types)
Moreover, this parsing can now parse more complex types, such as
"int (*foo)[42]" (pointer to an array of int) which is properly differenciated
from "int *foo[42]" (array of pointers to int).
Diffstat:
5 files changed, 133 insertions(+), 57 deletions(-)
diff --git a/fs/cc/ast.fs b/fs/cc/ast.fs
@@ -14,6 +14,7 @@
Arena :new structbind Arena _arena
+0 value _ccdebug
: _err ( -- ) abort" ast error" ;
: _assert ( f -- ) not if _err then ;
@@ -23,8 +24,8 @@ Arena :new structbind Arena _arena
\ This dictionary below contain global symbols of the current unit
create symbols 0 , 0 c, \ this is a dict link
-: addSymbol ( ctype name -- )
- symbols swap dup c@ ( ctype 'dict name len )
+: addSymbol ( ctype -- )
+ symbols over CType name dup c@ ( ctype 'dict name len )
ENTRYSZ + 8 + _arena :[ entry , _arena :] drop ;
: findSymbol ( name -- ctype-or-0 ) symbols find dup if @ then ;
: ccast$ _arena :reset 0 to curunit 0 symbols ! ;
@@ -144,10 +145,7 @@ extends ASTNode struct[ Function
sfield ctype \ the signature of the function.
sfield flags \ b0=static
- : :new ( type name -- node )
- swap CType :new ( ctype )
- STORAGE_MEM over to CType storage
- 2 ( funcsig ) over to CType flags
+ : :new ( ctype -- node )
AST_FUNCTION ASTNode :new swap ( node ctype )
( ctype ) _arena :, curstatic ( flags ) _arena :, ;
@@ -423,7 +421,7 @@ current to parseExpression
\ Parse a variable declaration from within a function
: parseDeclare ( type parentnode -- dnode )
- swap parseVariable ( pnode ctype )
+ swap parseDeclarator ( pnode ctype )
Declare :new ( pnode dnode ) dup rot Node :add ( dnode ) ;
: parseDeclareInit ( dnode tok -- )
@@ -433,18 +431,6 @@ current to parseExpression
parseExpression then
( dnode expr-or-list ) swap Node :add ;
-: _ ( parent-ctype tok -- offset )
- parseType _assert parseVariable ( ctype newtype )
- STORAGE_PS over to CType storage
- tuck swap to CType nexttype ( newtype )
- nextt dup S" )" s= if drop 0 swap CType :offset! else
- ',' expectChar dup nextt _ ( ctype offset )
- swap CType :offset! then ( offset ) ;
-
-: parseFuncArgs ( func-ctype -- )
- \ First '(' is already parsed
- nextt dup S" )" s= if 2drop exit then ( ctype tok ) _ drop ;
-
: parseDeclareStatement ( type parentnode -- )
2dup parseDeclare nextt parseDeclareInit ( type parentnode )
nextt dup ',' isChar? if \ another declaration
@@ -509,16 +495,13 @@ alias noop parseStatement ( funcnode -- ) \ forward declaration
nip statementhandler swap wexec then ;
current to parseStatement
-\ returntype, name and '(' have already been parsed, parse the rest
-: parseFuncDef ( unitnode type name -- fnode )
- Function :new ( unode fnode )
- dup Function ctype over Function name addSymbol ( unode fnode )
- dup Function ctype parseFuncArgs ( unode fnode )
+: parseFuncDef ( unitnode ctype -- fnode )
+ dup addSymbol Function :new ( unode fnode )
dup rot Node :add ( fnode ) dup parseStatement ;
: parseGlobalDecl ( unitnode ctype -- dnode )
Declare :new ( unode dnode ) dup rot Node :add ( dnode )
- dup Declare ctype over Declare :name addSymbol
+ dup Declare ctype addSymbol
STORAGE_MEM over Declare ctype to CType storage ( dnode )
dup nextt parseDeclareInit read; ;
@@ -530,9 +513,8 @@ current to parseStatement
parseType _assert ( unode type )
nextt dup ';' isChar? if \ Only a type on a line is fine, carry on
2drop drop 0 exit then
- to nexttputback parseVariable ( unode ctype ) nextt dup S" (" s= if
- drop dup CType type swap CType name parseFuncDef
- else to nexttputback parseGlobalDecl then ;
+ to nexttputback parseDeclarator ( unode ctype )
+ dup CType :funcsig? if parseFuncDef else parseGlobalDecl then ;
: newparseunit ( -- unit ) AST_UNIT ASTNode :new dup to curunit ;
: parseast ( -- )
diff --git a/fs/cc/cc.fs b/fs/cc/cc.fs
@@ -1,5 +1,4 @@
\ C compiler
-0 value _ccdebug
?f<< /cc/vm/vm.fs
?f<< /cc/ttr.fs
?f<< /cc/gen.fs
diff --git a/fs/cc/gen.fs b/fs/cc/gen.fs
@@ -223,8 +223,9 @@ ASTIDCNT wordtbl gentbl ( node -- )
dup Node firstchild gennode \ op has call address
lastidentfound ?dup if ( ctype )
\ We either have a direct function signature or a pointer to it.
- dup CType :funcsig? not if CType type dup CType :funcsig? _assert then
- CType type
+ \ TODO: :funcptr? doesn't work correctly here. fix this
+ dup CType :funcsig? not if CType type ctype' then
+ dup CType :funcsig? _assert CType type
else
vmop loc VM_CONSTANT = if vmop arg wordfunctype else TYPE_VOID then then
( node type ) vmop :push rot ( type 'copy node )
diff --git a/fs/cc/type.fs b/fs/cc/type.fs
@@ -51,6 +51,8 @@ $1d const TYPE_UINT*
: typesigned! ( type -- type ) $f and ;
: typeunsigned! ( type -- type ) $10 or ;
: type*lvl ( type -- lvl ) 3 and ;
+\ TODO: have the _assert in type*lvl! it's not possible now because it breaks
+\ the CC vms.
: type*lvl! ( lvl type -- type ) $fffffffc and or ;
: type*lvl+ ( type -- type ) dup type*lvl 1+ dup 4 < _assert swap type*lvl! ;
: type*lvl- ( type -- type ) dup type*lvl 1- dup 0>= _assert swap type*lvl! ;
@@ -76,7 +78,11 @@ struct[ CType
_globalmode if _ else $100 SZ + _arena :[ _ _arena :] drop then ;
: :struct? flags 1 and ;
- : :funcsig? flags 2 and ;
+ : :funcsig? flags 2 and bool ;
+ : :funcptr?
+ type dup ctype? if
+ dup type*lvl 1 = swap ctype' :funcsig? and
+ else 0 then ;
: :isarg? ( dnode -- f ) storage STORAGE_PS = ;
: :isglobal? ( dnode -- f ) storage STORAGE_MEM = ;
@@ -171,33 +177,74 @@ current to _typesize
: parseType* ( type -- type tok )
begin nextt dup '*' isChar? while drop type*lvl+ repeat ;
-: parseFuncSig ( type -- ctype )
- nextt '*' expectChar nextt expectIdent ( type name )
- swap CType :new ( ctype )
- 2 over to CType flags ( ctype )
- nextt ')' expectChar nextt '(' expectChar begin ( ctype )
- \ TODO: don't ignore funcsig arguments
- nextt dup ')' isChar? not while ( ctype tok ) drop
- repeat ( ctype tok ) drop ;
-
-\ Given a "type" part that is already parsed from parseType, parse the rest of
-\ a variable declaration, that is, the indirections (*), the name and nbelem
-\ ([]). This always returns a CType.
-: parseVariable ( type -- ctype )
- nextt dup '(' isChar? if drop parseFuncSig exit else to nexttputback then
- parseType* expectIdent swap CType :new ( ctype )
- nextt dup S" [" s= if ( ctype tok )
- drop nextt dup S" #[" s= if drop #[1 else parse _assert then
- nextt ']' expectChar ( ctype nbelem )
- over to CType nbelem
- else to nexttputback then ( ctype ) ;
+alias _err parseType ( tok -- type? f ) \ forward declaration
+alias _err parseDeclarator ( type -- ctype ) \ forward declaration
+
+\ Parsing strategy: we dig down recursively through nextt until we get to our
+\ identifier. Before that identifier, we can hit chars like ( and *.
+\ For *, it's easy, we inc our current indirection level. For (, we enter a
+\ recursion.
+\ After we hit the identifier, we continue parsing forward, where
+\ we can hit chars like [, an array specifier and (, a function specifier. We
+\ process them, amending our ctype structure as we go. If we hit a ), we go up
+\ one level in recursion and apply previously recorded indirection levels to the
+\ returned type.
+
+: _arg ( parent-ctype tok -- offset )
+ parseType _assert parseDeclarator ( ctype newtype )
+ STORAGE_PS over to CType storage
+ tuck swap to CType nexttype ( newtype )
+ nextt dup ')' isChar? if drop 0 swap CType :offset! else
+ ',' expectChar dup nextt _arg ( ctype offset )
+ swap CType :offset! then ( offset ) ;
+
+\ parsing after the identifier
+: _post ( ctype -- ctype )
+ begin ( ctype ) nextt case
+ '[' of isChar?^
+ nextt dup S" #[" s= if drop #[1 else parse _assert then
+ nextt ']' expectChar ( ctype nbelem )
+ over to CType nbelem endof
+ '(' of isChar?^
+ 2 over to CType flags \ func
+ STORAGE_MEM over to CType storage
+ nextt dup ')' isChar? if drop
+ else ( ctype tok ) over swap _arg ( ctype offset ) drop then endof
+ r> to nexttputback exit
+ endcase again ;
+
+: _addlvl ( lvl type -- type ) tuck type*lvl + dup 4 < _assert swap type*lvl! ;
+
+: _parseDeclarator ( type -- ctype )
+ 0 begin ( type lvl )
+ nextt dup '*' isChar? while ( type lvl tok )
+ drop 1+ repeat ( type lvl tok )
+ dup '(' isChar? if ( type lvl tok )
+ drop swap parseDeclarator nextt ')' expectChar
+ >r >r \ V1=outer-ctype v2=inner-lvl
+ \ type recursion in C is "inside out". The ctype we have now is the outer
+ \ type. We'll forward its current "type" field to the ctype we're about to
+ \ finish parsing, and that new ctype will be placed in our outer type's
+ \ "type" field. One thing we have to be careful about is to keep our *lvl
+ \ where it belongs.
+ V1 CType type dup type*lvl ( inner-type outer-lvl )
+ swap r> ( inner-lvl ) swap type*lvl! ( outer-lvl inner-type )
+ NULLSTR swap CType :new _post ( outer-lvl inner-type )
+ _addlvl ( inner-type ) r@ ( outer-type ) to CType type r> ( ctype )
+ else ( type lvl tok )
+ dup isIdent? not if to nexttputback NULLSTR then ( type lvl name )
+ rot CType :new ( lvl ctype )
+ _post tuck CType type ( ctype lvl type )
+ _addlvl ( ctype type )
+ over to CType type ( ctype ) then ;
+current to parseDeclarator
\ parse a type from stream, starting with "tok". This only parses the "type"
\ part without the "*" part or the name part. The result can be a "base" type
-\ (type < $100) or a CType if the type is a struct.
-: parseType ( tok -- type? f )
+\ (type < $100) or a CType if the type is a struct, union or enum.
+: _parseType ( tok -- type? f )
dup S" typedef" s= if
- drop nextt parseType _assert parseVariable ( ctype )
+ drop nextt parseType _assert parseDeclarator ( ctype )
dup addLocalTypedef 1 exit then
dup S" struct" s= if
drop nextt dup isIdent? if nextt 1 to _globalmode else NULLSTR swap then
@@ -206,7 +253,7 @@ current to _typesize
_globalmode if dup addGlobalTypedef then
0 >r dup begin ( res prev ) \ V1=offset
nextt dup '}' isChar? not while ( res prev tok )
- parseType _assert parseVariable ( res prev new )
+ parseType _assert parseDeclarator ( res prev new )
tuck swap to CType nexttype ( res new )
V1 over to CType offset
dup typesize to+ V1 read;
@@ -217,4 +264,4 @@ current to _typesize
nip << << or 1
else drop nip findTypedef ( type-or-0 ) ?dup bool then
then ;
-
+current to parseType
diff --git a/fs/tests/cc/type.fs b/fs/tests/cc/type.fs
@@ -36,4 +36,51 @@ S" struct Struct1 {unsigned int foo, +04 short* bar, +08 char baz[2]}" #s=
\ Anonymous structs work too
current with-stdin< struct { int foo; } STOP typesize 4 #eq
+
+\ And now, let's test parseDeclarator
+: _parse nextt parseType # parseDeclarator nextt S" STOP" #s= ;
+current with-stdin< int *foo STOP
+dup CType type TYPE_INT* #eq
+CType name S" foo" #s=
+
+current with-stdin< int *foo[42] STOP
+dup CType type TYPE_INT* #eq
+dup CType nbelem 42 #eq
+CType name S" foo" #s=
+
+current with-stdin< int (*foo)[42] STOP
+dup CType type ctype? #
+dup CType type type*lvl 1 #eq
+dup CType nbelem 0 #eq
+dup CType name S" foo" #s=
+CType type ctype'
+dup CType type TYPE_INT #eq
+dup CType nbelem 42 #eq
+CType name NULLSTR #s=
+
+current with-stdin< unsigned int (*foo)(char,short) STOP
+dup CType :funcptr? #
+dup CType name S" foo" #s=
+CType type ctype'
+dup CType type TYPE_UINT #eq
+CType nexttype
+dup CType type TYPE_CHAR #eq
+dup CType offset 4 #eq \ PS args are always 4b in size
+CType nexttype
+dup CType type TYPE_SHORT #eq
+CType offset 0 #eq
+
+\ We can also have a function signature with argument names.
+current with-stdin< unsigned int (*foo)(short bar,char baz) STOP
+dup CType :funcptr? #
+dup CType name S" foo" #s=
+CType type ctype'
+CType nexttype
+dup CType name S" bar" #s=
+dup CType type TYPE_SHORT #eq
+dup CType offset 4 #eq
+CType nexttype
+dup CType name S" baz" #s=
+dup CType type TYPE_CHAR #eq
+CType offset 0 #eq
testend