duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit 67bc87acde9da34b8bb4ba11baf0856c290a13db
parent 31290e5566ca67e9415a39c10e315047385b1ed3
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Thu,  3 Nov 2022 14:13:53 -0400

cc: improve type parsing

Type parsing is now entirely done in cc/types (previously, we would parse
function signatures for function implementations in cc/ast, and functions for
function signatures in typedefs in cc/types)

Moreover, this parsing can now parse more complex types, such as
"int (*foo)[42]" (pointer to an array of int) which is properly differenciated
from "int *foo[42]" (array of pointers to int).

Diffstat:
Mfs/cc/ast.fs | 38++++++++++----------------------------
Mfs/cc/cc.fs | 1-
Mfs/cc/gen.fs | 5+++--
Mfs/cc/type.fs | 99++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mfs/tests/cc/type.fs | 47+++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 133 insertions(+), 57 deletions(-)

diff --git a/fs/cc/ast.fs b/fs/cc/ast.fs @@ -14,6 +14,7 @@ Arena :new structbind Arena _arena +0 value _ccdebug : _err ( -- ) abort" ast error" ; : _assert ( f -- ) not if _err then ; @@ -23,8 +24,8 @@ Arena :new structbind Arena _arena \ This dictionary below contain global symbols of the current unit create symbols 0 , 0 c, \ this is a dict link -: addSymbol ( ctype name -- ) - symbols swap dup c@ ( ctype 'dict name len ) +: addSymbol ( ctype -- ) + symbols over CType name dup c@ ( ctype 'dict name len ) ENTRYSZ + 8 + _arena :[ entry , _arena :] drop ; : findSymbol ( name -- ctype-or-0 ) symbols find dup if @ then ; : ccast$ _arena :reset 0 to curunit 0 symbols ! ; @@ -144,10 +145,7 @@ extends ASTNode struct[ Function sfield ctype \ the signature of the function. sfield flags \ b0=static - : :new ( type name -- node ) - swap CType :new ( ctype ) - STORAGE_MEM over to CType storage - 2 ( funcsig ) over to CType flags + : :new ( ctype -- node ) AST_FUNCTION ASTNode :new swap ( node ctype ) ( ctype ) _arena :, curstatic ( flags ) _arena :, ; @@ -423,7 +421,7 @@ current to parseExpression \ Parse a variable declaration from within a function : parseDeclare ( type parentnode -- dnode ) - swap parseVariable ( pnode ctype ) + swap parseDeclarator ( pnode ctype ) Declare :new ( pnode dnode ) dup rot Node :add ( dnode ) ; : parseDeclareInit ( dnode tok -- ) @@ -433,18 +431,6 @@ current to parseExpression parseExpression then ( dnode expr-or-list ) swap Node :add ; -: _ ( parent-ctype tok -- offset ) - parseType _assert parseVariable ( ctype newtype ) - STORAGE_PS over to CType storage - tuck swap to CType nexttype ( newtype ) - nextt dup S" )" s= if drop 0 swap CType :offset! else - ',' expectChar dup nextt _ ( ctype offset ) - swap CType :offset! then ( offset ) ; - -: parseFuncArgs ( func-ctype -- ) - \ First '(' is already parsed - nextt dup S" )" s= if 2drop exit then ( ctype tok ) _ drop ; - : parseDeclareStatement ( type parentnode -- ) 2dup parseDeclare nextt parseDeclareInit ( type parentnode ) nextt dup ',' isChar? if \ another declaration @@ -509,16 +495,13 @@ alias noop parseStatement ( funcnode -- ) \ forward declaration nip statementhandler swap wexec then ; current to parseStatement -\ returntype, name and '(' have already been parsed, parse the rest -: parseFuncDef ( unitnode type name -- fnode ) - Function :new ( unode fnode ) - dup Function ctype over Function name addSymbol ( unode fnode ) - dup Function ctype parseFuncArgs ( unode fnode ) +: parseFuncDef ( unitnode ctype -- fnode ) + dup addSymbol Function :new ( unode fnode ) dup rot Node :add ( fnode ) dup parseStatement ; : parseGlobalDecl ( unitnode ctype -- dnode ) Declare :new ( unode dnode ) dup rot Node :add ( dnode ) - dup Declare ctype over Declare :name addSymbol + dup Declare ctype addSymbol STORAGE_MEM over Declare ctype to CType storage ( dnode ) dup nextt parseDeclareInit read; ; @@ -530,9 +513,8 @@ current to parseStatement parseType _assert ( unode type ) nextt dup ';' isChar? if \ Only a type on a line is fine, carry on 2drop drop 0 exit then - to nexttputback parseVariable ( unode ctype ) nextt dup S" (" s= if - drop dup CType type swap CType name parseFuncDef - else to nexttputback parseGlobalDecl then ; + to nexttputback parseDeclarator ( unode ctype ) + dup CType :funcsig? if parseFuncDef else parseGlobalDecl then ; : newparseunit ( -- unit ) AST_UNIT ASTNode :new dup to curunit ; : parseast ( -- ) diff --git a/fs/cc/cc.fs b/fs/cc/cc.fs @@ -1,5 +1,4 @@ \ C compiler -0 value _ccdebug ?f<< /cc/vm/vm.fs ?f<< /cc/ttr.fs ?f<< /cc/gen.fs diff --git a/fs/cc/gen.fs b/fs/cc/gen.fs @@ -223,8 +223,9 @@ ASTIDCNT wordtbl gentbl ( node -- ) dup Node firstchild gennode \ op has call address lastidentfound ?dup if ( ctype ) \ We either have a direct function signature or a pointer to it. - dup CType :funcsig? not if CType type dup CType :funcsig? _assert then - CType type + \ TODO: :funcptr? doesn't work correctly here. fix this + dup CType :funcsig? not if CType type ctype' then + dup CType :funcsig? _assert CType type else vmop loc VM_CONSTANT = if vmop arg wordfunctype else TYPE_VOID then then ( node type ) vmop :push rot ( type 'copy node ) diff --git a/fs/cc/type.fs b/fs/cc/type.fs @@ -51,6 +51,8 @@ $1d const TYPE_UINT* : typesigned! ( type -- type ) $f and ; : typeunsigned! ( type -- type ) $10 or ; : type*lvl ( type -- lvl ) 3 and ; +\ TODO: have the _assert in type*lvl! it's not possible now because it breaks +\ the CC vms. : type*lvl! ( lvl type -- type ) $fffffffc and or ; : type*lvl+ ( type -- type ) dup type*lvl 1+ dup 4 < _assert swap type*lvl! ; : type*lvl- ( type -- type ) dup type*lvl 1- dup 0>= _assert swap type*lvl! ; @@ -76,7 +78,11 @@ struct[ CType _globalmode if _ else $100 SZ + _arena :[ _ _arena :] drop then ; : :struct? flags 1 and ; - : :funcsig? flags 2 and ; + : :funcsig? flags 2 and bool ; + : :funcptr? + type dup ctype? if + dup type*lvl 1 = swap ctype' :funcsig? and + else 0 then ; : :isarg? ( dnode -- f ) storage STORAGE_PS = ; : :isglobal? ( dnode -- f ) storage STORAGE_MEM = ; @@ -171,33 +177,74 @@ current to _typesize : parseType* ( type -- type tok ) begin nextt dup '*' isChar? while drop type*lvl+ repeat ; -: parseFuncSig ( type -- ctype ) - nextt '*' expectChar nextt expectIdent ( type name ) - swap CType :new ( ctype ) - 2 over to CType flags ( ctype ) - nextt ')' expectChar nextt '(' expectChar begin ( ctype ) - \ TODO: don't ignore funcsig arguments - nextt dup ')' isChar? not while ( ctype tok ) drop - repeat ( ctype tok ) drop ; - -\ Given a "type" part that is already parsed from parseType, parse the rest of -\ a variable declaration, that is, the indirections (*), the name and nbelem -\ ([]). This always returns a CType. -: parseVariable ( type -- ctype ) - nextt dup '(' isChar? if drop parseFuncSig exit else to nexttputback then - parseType* expectIdent swap CType :new ( ctype ) - nextt dup S" [" s= if ( ctype tok ) - drop nextt dup S" #[" s= if drop #[1 else parse _assert then - nextt ']' expectChar ( ctype nbelem ) - over to CType nbelem - else to nexttputback then ( ctype ) ; +alias _err parseType ( tok -- type? f ) \ forward declaration +alias _err parseDeclarator ( type -- ctype ) \ forward declaration + +\ Parsing strategy: we dig down recursively through nextt until we get to our +\ identifier. Before that identifier, we can hit chars like ( and *. +\ For *, it's easy, we inc our current indirection level. For (, we enter a +\ recursion. +\ After we hit the identifier, we continue parsing forward, where +\ we can hit chars like [, an array specifier and (, a function specifier. We +\ process them, amending our ctype structure as we go. If we hit a ), we go up +\ one level in recursion and apply previously recorded indirection levels to the +\ returned type. + +: _arg ( parent-ctype tok -- offset ) + parseType _assert parseDeclarator ( ctype newtype ) + STORAGE_PS over to CType storage + tuck swap to CType nexttype ( newtype ) + nextt dup ')' isChar? if drop 0 swap CType :offset! else + ',' expectChar dup nextt _arg ( ctype offset ) + swap CType :offset! then ( offset ) ; + +\ parsing after the identifier +: _post ( ctype -- ctype ) + begin ( ctype ) nextt case + '[' of isChar?^ + nextt dup S" #[" s= if drop #[1 else parse _assert then + nextt ']' expectChar ( ctype nbelem ) + over to CType nbelem endof + '(' of isChar?^ + 2 over to CType flags \ func + STORAGE_MEM over to CType storage + nextt dup ')' isChar? if drop + else ( ctype tok ) over swap _arg ( ctype offset ) drop then endof + r> to nexttputback exit + endcase again ; + +: _addlvl ( lvl type -- type ) tuck type*lvl + dup 4 < _assert swap type*lvl! ; + +: _parseDeclarator ( type -- ctype ) + 0 begin ( type lvl ) + nextt dup '*' isChar? while ( type lvl tok ) + drop 1+ repeat ( type lvl tok ) + dup '(' isChar? if ( type lvl tok ) + drop swap parseDeclarator nextt ')' expectChar + >r >r \ V1=outer-ctype v2=inner-lvl + \ type recursion in C is "inside out". The ctype we have now is the outer + \ type. We'll forward its current "type" field to the ctype we're about to + \ finish parsing, and that new ctype will be placed in our outer type's + \ "type" field. One thing we have to be careful about is to keep our *lvl + \ where it belongs. + V1 CType type dup type*lvl ( inner-type outer-lvl ) + swap r> ( inner-lvl ) swap type*lvl! ( outer-lvl inner-type ) + NULLSTR swap CType :new _post ( outer-lvl inner-type ) + _addlvl ( inner-type ) r@ ( outer-type ) to CType type r> ( ctype ) + else ( type lvl tok ) + dup isIdent? not if to nexttputback NULLSTR then ( type lvl name ) + rot CType :new ( lvl ctype ) + _post tuck CType type ( ctype lvl type ) + _addlvl ( ctype type ) + over to CType type ( ctype ) then ; +current to parseDeclarator \ parse a type from stream, starting with "tok". This only parses the "type" \ part without the "*" part or the name part. The result can be a "base" type -\ (type < $100) or a CType if the type is a struct. -: parseType ( tok -- type? f ) +\ (type < $100) or a CType if the type is a struct, union or enum. +: _parseType ( tok -- type? f ) dup S" typedef" s= if - drop nextt parseType _assert parseVariable ( ctype ) + drop nextt parseType _assert parseDeclarator ( ctype ) dup addLocalTypedef 1 exit then dup S" struct" s= if drop nextt dup isIdent? if nextt 1 to _globalmode else NULLSTR swap then @@ -206,7 +253,7 @@ current to _typesize _globalmode if dup addGlobalTypedef then 0 >r dup begin ( res prev ) \ V1=offset nextt dup '}' isChar? not while ( res prev tok ) - parseType _assert parseVariable ( res prev new ) + parseType _assert parseDeclarator ( res prev new ) tuck swap to CType nexttype ( res new ) V1 over to CType offset dup typesize to+ V1 read; @@ -217,4 +264,4 @@ current to _typesize nip << << or 1 else drop nip findTypedef ( type-or-0 ) ?dup bool then then ; - +current to parseType diff --git a/fs/tests/cc/type.fs b/fs/tests/cc/type.fs @@ -36,4 +36,51 @@ S" struct Struct1 {unsigned int foo, +04 short* bar, +08 char baz[2]}" #s= \ Anonymous structs work too current with-stdin< struct { int foo; } STOP typesize 4 #eq + +\ And now, let's test parseDeclarator +: _parse nextt parseType # parseDeclarator nextt S" STOP" #s= ; +current with-stdin< int *foo STOP +dup CType type TYPE_INT* #eq +CType name S" foo" #s= + +current with-stdin< int *foo[42] STOP +dup CType type TYPE_INT* #eq +dup CType nbelem 42 #eq +CType name S" foo" #s= + +current with-stdin< int (*foo)[42] STOP +dup CType type ctype? # +dup CType type type*lvl 1 #eq +dup CType nbelem 0 #eq +dup CType name S" foo" #s= +CType type ctype' +dup CType type TYPE_INT #eq +dup CType nbelem 42 #eq +CType name NULLSTR #s= + +current with-stdin< unsigned int (*foo)(char,short) STOP +dup CType :funcptr? # +dup CType name S" foo" #s= +CType type ctype' +dup CType type TYPE_UINT #eq +CType nexttype +dup CType type TYPE_CHAR #eq +dup CType offset 4 #eq \ PS args are always 4b in size +CType nexttype +dup CType type TYPE_SHORT #eq +CType offset 0 #eq + +\ We can also have a function signature with argument names. +current with-stdin< unsigned int (*foo)(short bar,char baz) STOP +dup CType :funcptr? # +dup CType name S" foo" #s= +CType type ctype' +CType nexttype +dup CType name S" bar" #s= +dup CType type TYPE_SHORT #eq +dup CType offset 4 #eq +CType nexttype +dup CType name S" baz" #s= +dup CType type TYPE_CHAR #eq +CType offset 0 #eq testend