commit 14ca909f3d424d6a926f12414c4ca0e38f9c17f2
parent f8c05fa5d47e41f42151c86c2371387d4be45336
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Fri, 2 Dec 2022 19:59:17 -0500
comp/c: revisit symbols lifetimes
Rules for determining if a type or a symbol was permanent or temporary were a
bit fuzzy. The global/local typedef thing brought complexity for very small
gains in memory usage.
I revisited the whole thing and documented it better.
Diffstat:
3 files changed, 90 insertions(+), 54 deletions(-)
diff --git a/fs/comp/c/pgen.fs b/fs/comp/c/pgen.fs
@@ -97,7 +97,6 @@ BOPSCNT wordtbl bopgentbl ( -- )
0 value _curfunc \ ctype of the current function (includes arguments)
0 value _locvars \ the root ctype of local variables for current function
-0 value curstatic \ is current definition "static"?
: findIdent ( name -- ctype-or-0 )
_curfunc if dup _curfunc CType :find ?dup if nip exit then then ( name )
@@ -120,7 +119,6 @@ alias noop parseExpression ( tok -- ) \ forward declaration
: _arg ( parent-ctype tok -- offset )
parseType _assert parseDeclarator ( ctype newtype )
- STORAGE_PS over to CType storage
tuck swap to CType nexttype ( newtype )
')' readChar? if 0 swap CType :offset! else
',' expectChar dup nextt _arg ( ctype offset )
@@ -135,9 +133,10 @@ alias noop parseExpression ( tok -- ) \ forward declaration
nextt ']' expectChar ( ctype nbelem )
over to CType nbelem endof
'(' of isChar?^
- dup CType :funcsig! STORAGE_MEM over to CType storage
+ dup CType :funcsig! STORAGE_PS to@! curstorage >r
')' readChar? not if ( ctype tok )
- over swap _arg ( ctype offset ) drop then endof
+ over swap _arg ( ctype offset ) drop then
+ r> to curstorage endof
r> to nexttputback exit
endcase again ;
@@ -172,19 +171,18 @@ current to parseDeclarator
: _parseType ( tok -- type? f )
dup S" typedef" s= if
drop nextt parseType _assert parseDeclarator ( ctype )
- dup addLocalTypedef 1 exit then
+ dup addTypedef 1 exit then
dup S" struct" s= if
- drop nextt dup isIdent? if nextt 1 to _globalmode else NULLSTR swap then
+ drop nextt dup isIdent? if nextt else NULLSTR swap then
'{' expectChar ( name ) TYPE_VOID CType :new ( res )
- dup CType :struct!
- _globalmode if dup addGlobalTypedef then
+ dup CType :struct! dup addTypedef
0 >r dup begin ( res prev ) \ V1=offset
'}' readChar? not while ( res prev tok )
parseType _assert parseDeclarator ( res prev new )
tuck swap to CType nexttype ( res new )
V1 over to CType offset
dup typesize to+ V1 read;
- repeat ( res prev ) rdrop drop 1 0 to _globalmode
+ repeat ( res prev ) rdrop drop 1
else
dup S" unsigned" s= if drop $10 nextt else $00 swap then ( type tok )
dup typenames sfind dup 0>= if ( type tok idx )
@@ -399,12 +397,7 @@ current to parseStatement
parseDeclarator ( ctype )
dup _locvars ?dup if CType :append else to _locvars then begin ( ctype )
'=' readChar? if ( ctype )
- _initcode not if
- \ when there is init code, it's possible, because we declare new types,
- \ that the type arena allocate a new buffer right in the middle of our
- \ init code. that's bad. To avoid this, we "reserve" an arena buf now.
- cctypearena Arena :reserve
- here to _initcode then
+ _initcode not if here to _initcode then
dup ctype>op selop^ nextt parseExpression
selop^ vm=, ops$ nextt then ( ctype tok )
dup ';' isChar? not while ( ctype tok )
@@ -425,8 +418,9 @@ current to parseStatement
\ '{' is already parsed
: parseFunctionBody ( ctype -- )
0 to _locvars 0 to _initcode to _curfunc _litarena :reserve ( )
+ STORAGE_SF to@! curstorage >r
begin nextt dup parseType while ( tok type ) nip parseDeclLine repeat ( tok )
- to nexttputback
+ to nexttputback r> to curstorage
_initcode if vmjmp[, >r then
_curfunc CType :static? not if sysdict _curfunc CType name entry then ( )
here _curfunc to CType offset ( )
@@ -442,7 +436,7 @@ current to parseStatement
here swap to CType offset -1 vmjmp, ;
: parseGlobalDecl ( ctype -- )
- dup addSymbol STORAGE_MEM over to CType storage
+ dup addSymbol
dup CType :static? not if \ not static
dup CType name NEXTWORD ! create then ( ctype )
here over to CType offset ( ctype )
@@ -460,7 +454,7 @@ current to parseStatement
\ declaration) and consume tokens until that element is finished parsing. That
\ element is written to memory at "here".
: cparse ( tok -- )
- 0 to curstatic
+ cctypereserve 0 to curstatic
dup S" static" s= if drop nextt 1 to curstatic then
parseType _assert ( type )
';' readChar? if \ Only a type on a line is fine, carry on
diff --git a/fs/comp/c/type.fs b/fs/comp/c/type.fs
@@ -4,12 +4,16 @@
?f<< /lib/meta.fs
?f<< /comp/c/tok.fs
-\ This arena is for local typedefs for a single unit.
-Arena :new structbind Arena _arena
-\ needed in pgen.fs
-: cctypearena _arena :self ;
+\ When we parse a type, we can almost never write it directly to "here" because
+\ there's always the chance that we're in the middle of a code generation op.
+\ Therefore, we always write to an allocator, in this unit it's an Arena.
+\ We have 2 arenas, the Permanent arena, which we never reset and the Temporary
+\ arena, which we clear at the beginning of cc<<.
+Arena :new const _parena \ Permanent
+Arena :new const _tarena \ Temporary
-0 value _globalmode \ are we adding a global type?
+\ Call this in between code gen so that we don't have untimely block allocs.
+: cctypereserve _parena Arena :reserve _tarena Arena :reserve ;
: _err ( -- ) abort" type error" ;
: _assert ( f -- ) not if _err then ;
@@ -44,9 +48,14 @@ $d const TYPE_INT*
$1c const TYPE_UINT
$1d const TYPE_UINT*
-0 const STORAGE_SF \ Stack frame (or struct offset)
+0 const STORAGE_SF \ Stack frame
1 const STORAGE_PS \ Parameter Stack
2 const STORAGE_MEM \ Fixed address in memory
+\ Set by pgen and determines the storage type of new created CTypes
+STORAGE_MEM value curstorage
+0 value curstatic \ is current definition "static"?
+
+: _arena curstorage STORAGE_MEM <> curstatic or if _tarena else _parena then ;
4 stringlist typenames "void" "char" "short" "int"
: typeunsigned? ( type -- flags ) 4 rshift 1 and ;
@@ -79,9 +88,10 @@ struct[ CType
sfield storage \ one of the STORAGE_* consts
SZ &+ name \ name associated with this type within its list.
- : _ 0 align4 here rot> 0 , , 0 , 0 , 0 , STORAGE_SF , s, ;
: :new ( name type -- ctype )
- _globalmode if _ else $100 SZ + _arena :[ _ _arena :] drop then ;
+ $100 SZ + _arena Arena :[
+ 0 align4 here rot> 0 , , 0 , 0 , 0 , curstorage , s,
+ _arena Arena :] drop ;
: _f? doer , does> ( self 'w ) @ swap flags and bool ;
: _f! doer , does> ( f self 'w ) @ over flags or swap to flags ;
@@ -140,27 +150,22 @@ struct[ CType
\ Typedefs are dictionary entries in the "typedefs" dicts, which contain a 4b
\ value representing the type it aliases.
-create localdefs 0 , 0 c, \ this is a dict link
-create globaldefs 0 , 0 c,
+create typedefs 0 , 0 c, \ this is a dict link
-: addLocalTypedef ( ctype -- )
- localdefs over CType name dup c@ ( ctype 'dict name len )
- ENTRYSZ + 8 + _arena :[
- entry dup CType flags not if CType type then , _arena :] drop ;
-: addGlobalTypedef ( ctype -- ) globaldefs over CType name entry , ;
-: findTypedef ( name -- type-or-0 )
- dup localdefs find ?dup if nip @ else globaldefs find dup if @ then then ;
+: addTypedef ( ctype -- ) typedefs over CType name entry , ;
+: findTypedef ( name -- type-or-0 ) typedefs find dup if @ then ;
-\ The Symbols dict contains functions and global variables (static or not) for
-\ the current unit.
-create symbols 0 , 0 c, \ this is a dict link
+create _symbols 0 , 0 c, \ non-static
+create _ssymbols 0 , 0 c, \ static
: addSymbol ( ctype -- )
- symbols over CType name dup c@ ( ctype 'dict name len )
- ENTRYSZ + 8 + _arena :[ entry , _arena :] drop ;
-: findSymbol ( name -- ctype-or-0 ) symbols find dup if @ then ;
+ curstatic if _ssymbols else _symbols then
+ over CType name dup c@ ( ctype 'dict name len )
+ ENTRYSZ + 8 + _arena Arena :[ entry , _arena Arena :] drop ;
+: findSymbol ( name -- ctype-or-0 )
+ dup _ssymbols find ?dup if nip @ else _symbols find dup if @ then then ;
-: cctypes$ 0 localdefs ! 0 symbols ! _arena :reset ;
+: cctypes$ 0 _ssymbols ! _tarena Arena :reset ;
: printtype ( type -- )
dup ctype? if dup ctype' CType :. else
diff --git a/fs/doc/cc/usage.txt b/fs/doc/cc/usage.txt
@@ -56,6 +56,7 @@ are a few differences:
that chained assignments such as "a = b = c = 42" must be expressed as
"a = (b = (c = 42))" and "a ? b ? c : d : e ? f : g" (seriously, you wanted
to write that?!?) must be written as "a ? (b ? c : d) : (e ? f : g)"
+* The keyword "static" has a slightly different meaning. See below.
## Calling Forth words
@@ -99,20 +100,56 @@ is the exact same equivalent to:
int bar() { return 42; }
-## Linkage and persistence
+## Symbols, types and macro visibility and lifetime
-By default, functions and global variables have external linkage. You give them
-an internal linkage with "static".
+The C compiler create two kinds of artifacts: types and symbols.
-static void foo() { }
-void bar() { foo(); }
+Types are what is created by "struct" and "typedef". Those artifacts bind a name
+to type information. Once they're created, following C code can use these names
+to refer to these types.
-This unit will compile fine. Because "foo()" is in the same unit as "bar()",
-"bar()" can call "foo()". However, that function can't be called from another
-unit or from Forth. "bar()" can.
+Types are never cleared. Once a type is created, it is always available. It can,
+however, be shadowed by a new type of the same name.
-Symbols created with "typedef" are only visible in the current unit. Structures
-created through the "struct <name> { ... }" form, however, are persistent and
-can be referenced across units. The "typedef struct { ... } <name>" form,
-however, is "private" like any typedef.
+Creating a type doesn't create a Forth word, so by default, types are invisible
+to Forth. It is possible, however, to export a type to a Forth structure. The
+code looks like this:
+ S" MyType" findTypedef CType :export
+
+After that, a "MyType" struct with the same fields as the C type is available to
+Forth.
+
+Symbols are declarations of functions and variables at different offsets. A
+variable declared outside a function in a C unit is a global variable and
+generates a Forth word that acts like a "create" word: it yields the variable's
+address.
+
+A function declaration also generates a Forth word that call into the generated
+function.
+
+If you don't want to generate a Forth word for your declaration, begin the
+declaration with the "static" keyword. Then, the function or variable will only
+be available to C code.
+
+Non-static symbols are never cleared from memory. They always stay available to
+both Forth and C code. Symbols can be shadowed by a new definition of the same
+name.
+
+Static symbols are cleared at the beginning of the "cc<<" call. The ":c" word
+doesn't clear static symbols and can refer static symbols created in the
+previously compiled C unit.
+
+During symbol lookups, static symbols are searched first, so a non-static symbol
+cannot shadow a static one. When static symbols are cleared, they unshadow any
+non-static symbol they would shadow.
+
+Symbols declared in function bodies are local variables and are cleared at the
+end of the function body. During lookup, they are searched before all global
+symbols.
+
+Function arguments are part of the function signature type, so they're permanent
+just like the signature.
+
+Macros are not exposed to Forth as words and are cleared at the beginning of the
+"cc<<" call.