duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit a11848f25cae8b5898f1d00e2d959ef136cfe99d
parent d5afd6bab549cc5a1a6fd18c5ead6c1693ab9116
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Tue,  1 Nov 2022 21:10:29 -0400

Add the Arena allocator

see doc/alloc

Diffstat:
Mfs/cc/ast.fs | 42++++++++++++++++++++++++------------------
Mfs/cc/type.fs | 22+++++++++++-----------
Afs/doc/alloc.txt | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mfs/doc/cc/usage.txt | 4++++
Afs/lib/alloc.fs | 36++++++++++++++++++++++++++++++++++++
Afs/lib/arena.fs | 40++++++++++++++++++++++++++++++++++++++++
Mfs/lib/meta.fs | 5+++--
Mfs/lib/scratch.fs | 42+++++++++---------------------------------
Mfs/tests/lib/all.fs | 1+
Afs/tests/lib/arena.fs | 15+++++++++++++++
10 files changed, 258 insertions(+), 64 deletions(-)

diff --git a/fs/cc/ast.fs b/fs/cc/ast.fs @@ -2,17 +2,17 @@ \ An abstract syntax tree, AST, is a hierarchical structure of nodes \ representing the nodes found in a C source file. See tree.fs for structure. ?f<< lib/str.fs +?f<< lib/arena.fs ?f<< lib/wordtbl.fs ?f<< cc/tok.fs ?f<< cc/tree.fs ?f<< cc/macro.fs ?f<< cc/type.fs -\ When compiling a C unit, we need to keep its whole AST in memory at once and -\ this can be a little big. For reference, the AST for app/cos/cvm.c requires -\ 32 KB of memory. -32 $400 * Scratchpad :new structbind Scratchpad _pad -0 to _pad allowrollover +\ This arena contains AST structures for the unit being currently parsed. +\ We reset it between units. + +Arena :new structbind Arena _arena : _err ( -- ) abort" ast error" ; : _assert ( f -- ) not if _err then ; @@ -23,9 +23,11 @@ \ This dictionary below contain global symbols of the current unit create symbols 0 , 0 c, \ this is a dict link -: addSymbol ( node name -- ) symbols swap _pad :entry _pad :, ; +: addSymbol ( node name -- ) + symbols swap dup c@ ( node 'dict name len ) + ENTRYSZ + 8 + _arena :[ entry , _arena :] drop ; : findSymbol ( name -- node-or-0 ) symbols find dup if @ then ; -: ccast$ _pad :reset 0 to curunit 0 symbols ! ; +: ccast$ _arena :reset 0 to curunit 0 symbols ! ; \ Unary operators 7 const UOPSCNT @@ -90,7 +92,11 @@ create bopsprectbl BOPSCNT nc, 0 value curstatic \ is current definition "static"? extends Node struct[ ASTNode - : :new SZ _pad :[ Node :new _pad :] drop ; + : :new + \ To ensure allocator atomicity, we over-reserve in the :[ call to ensure + \ that any AST node as enough space ahead to have a contiguous memory space. + \ No AST node is suppose to exceed $200 bytes + $200 _arena :[ Node :new _arena :] drop ; ]struct extends ASTNode struct[ Declare @@ -99,7 +105,7 @@ extends ASTNode struct[ Declare : :new ( ctype -- node ) AST_DECLARE ASTNode :new ( ctype node ) - swap ( ctype ) _pad :, curstatic ( flags ) _pad :, ; + swap ( ctype ) _arena :, curstatic ( flags ) _arena :, ; : :name ctype CType name ; \ Number of bytes required to hold this variable declaration in memory. @@ -137,7 +143,7 @@ extends ASTNode struct[ Declarations extends ASTNode struct[ FuncSig sfield rettype - : :new ( type -- node ) AST_FUNCSIG ASTNode :new swap _pad :, ; + : :new ( type -- node ) AST_FUNCSIG ASTNode :new swap _arena :, ; ]struct extends ASTNode struct[ Function @@ -147,7 +153,7 @@ extends ASTNode struct[ Function : :new ( name -- node ) AST_FUNCTION ASTNode :new ( name node ) - 0 ( address ) _pad :, curstatic ( flags ) _pad :, swap _pad :s, ; + 0 ( address ) _arena :, curstatic ( flags ) _arena :, swap _arena :s, ; : :sig ( self -- anode ) firstchild dup id AST_FUNCSIG = _assert ; : :stmts ( self -- snode ) :sig nextsibling dup id AST_STATEMENTS = _assert ; @@ -160,7 +166,7 @@ extends ASTNode struct[ Function extends ASTNode struct[ Constant sfield value - : :new ( n -- node ) AST_CONSTANT ASTNode :new swap _pad :, ; + : :new ( n -- node ) AST_CONSTANT ASTNode :new swap _arena :, ; ]struct \ Result of the last "identfind" call @@ -168,7 +174,7 @@ extends ASTNode struct[ Constant extends ASTNode struct[ Ident SZ &+ name - : :new ( name -- node ) AST_IDENT ASTNode :new swap _pad :s, ; + : :new ( name -- node ) AST_IDENT ASTNode :new swap _arena :s, ; : :finddecl ( self -- dnode-or-fnode-or-0 ) dup name dup rot AST_FUNCTION swap Node :findparent ( name name fnode ) @@ -178,13 +184,13 @@ extends ASTNode struct[ Ident extends ASTNode struct[ Op sfield opid - : :new ( opid id -- node ) ASTNode :new swap _pad :, ; + : :new ( opid id -- node ) ASTNode :new swap _arena :, ; ]struct extends ASTNode struct[ Arrow SZ &+ name - : :new AST_ARROW ASTNode :new swap _pad :s, ; + : :new AST_ARROW ASTNode :new swap _arena :s, ; ]struct struct+[ ASTNode @@ -365,8 +371,8 @@ alias noop parseFactor ( tok -- node ) \ forward declaration else ( tok ) parseExpression nextt ')' expectChar then endof '"' of isChar?^ ( ) - StrLit :new $100 _pad :[ here 0 c, ['] ," with-stdin< - here over - 1- swap c! _pad :] drop ( node ) + StrLit :new $100 _arena :[ here 0 c, ['] ," with-stdin< + here over - 1- swap c! _arena :] drop ( node ) endof S" pspop" of s= ( ) nextt '(' expectChar nextt ')' expectChar @@ -534,4 +540,4 @@ current to parseStatement : parseast ( -- ) newparseunit begin ( unode ) nextt? ?dup while over swap parseUnit drop repeat drop - _ccdebug if nl> ." used pad space for AST: " _pad :usedsz .x nl> then ; + _ccdebug if nl> ." used space for AST: " _arena :usedsz .x nl> then ; diff --git a/fs/cc/type.fs b/fs/cc/type.fs @@ -1,13 +1,11 @@ \ C compiler types ?f<< /lib/str.fs +?f<< lib/arena.fs ?f<< /cc/tok.fs ?f<< /cc/macro.fs -\ This pad is for local typedefs for a single unit. At 24b + name (let's say 32b -\ on average), this gives us 512 local types per unit (every variable -\ declaration is a CType, remember). -16 $400 * Scratchpad :new structbind Scratchpad _pad -0 to _pad allowrollover +\ This arena is for local typedefs for a single unit. +Arena :new structbind Arena _arena 0 value _globalmode \ are we adding a global type? @@ -27,8 +25,8 @@ alias _err _printtype ( type -- ) \ When a type is higher than $ff, it means that it's a pointer to a CType, \ which contains extra information needed for arrays (nbelem) and structs/sig -\ (offset and nexttype). All CType are aligned to 4 bytes, which allows our pointers to use -\ the lower 2 bits to be used for the "*lvl" field. +\ (offset and nexttype). All CType are aligned to 4 bytes, which allows our +\ pointers to use the lower 2 bits to be used for the "*lvl" field. \ In the nomenclature below, "type" can be either a basic type or a CType, \ but "ctype" is always a CType (which is also a type). @@ -74,7 +72,7 @@ struct[ CType : _ 0 align4 here rot> 0 , , 0 , 0 , 0 , STORAGE_SF , s, ; : :new ( name type -- ctype ) - _globalmode if _ else $100 SZ + _pad :[ _ _pad :] drop then ; + _globalmode if _ else $100 SZ + _arena :[ _ _arena :] drop then ; : :struct? flags 1 and ; : :funcsig? flags 2 and ; @@ -125,12 +123,14 @@ struct[ CType create localdefs 0 , 0 c, \ this is a dict link create globaldefs 0 , 0 c, -: addLocalTypedef ( ctype -- ) localdefs over CType name _pad :entry - dup CType flags not if CType type then _pad :, ; +: addLocalTypedef ( ctype -- ) + localdefs over CType name dup c@ ( ctype 'dict name len ) + ENTRYSZ + 8 + _arena :[ + entry dup CType flags not if CType type then , _arena :] drop ; : addGlobalTypedef ( ctype -- ) globaldefs over CType name entry , ; : findTypedef ( name -- type-or-0 ) dup localdefs find ?dup if nip @ else globaldefs find dup if @ then then ; -: cctypes$ 0 localdefs ! _pad :reset ; +: cctypes$ 0 localdefs ! _arena :reset ; : printType* ( type -- ) type*lvl begin ?dup while '*' emit 1- repeat ; diff --git a/fs/doc/alloc.txt b/fs/doc/alloc.txt @@ -0,0 +1,115 @@ +# Memory allocation + +In Forth, memory allocated with "alloc" and the "," family of words can't be +reclaimed except through "forget", which has limited applications. This kind of +allocation is thus for data that has a permanent lifetime. + +To manage data with temporary and semi-tempory lifetimes, different mechanisms +need to be used. + +## Static allocation + +Static allocation is the simplest: + + create mybuf 42 allot + +"mybuf" can now be used to store your temporary data while you work on it. If +this kind of allocation fits your constraints, it's the best way to proceed. + +## Scratchpad + +Sometimes, words produce temporary data which is returned as a result with a +lifetime that is beyond that word's control. If we use a static buffer, we +corrupt the returned data as soon as the word is called again, and sometimes +it's a problem. + +One such example is DuskCC's tokenizer. To process the data it receives from the +stream and yield tokenized strings, it needs to put those strings in memory. The +tokenizer is very clear about those string being temporary and that if you want +to hold onto them, you need to copy it. + +However, a static buffer is inconvenient because the consumer of that tokenizer, +the AST parser, sometimes has to old onto a handful of those token (3 or 4) at +once and juggle with them while it processes them. + +For these scenarios, Dusk has scratchpads. Scratchpads are rolling buffer of a +pre-defined size. When ever you need a space, you call its ":allot" method and +it give you an address to play with. Whenever it reaches the end of its buffer, +it rolls back to the beginning, overwriting previous data. All you have to do is +to choose a size that is big enough to hold all the data you'll ever need at +one particular moment, and small enough not to be wasteful. + +Example usage: + + $100 ( size-in-bytes ) Scratchpad :new structbind Scratchpad pad + : processstr ( str -- str ) + c@+ pad :[]>str ( copied-str ) + \ do some processing + ; + S" hello" processstr value foo + S" goodbye" processstr value bar + \ foo and bar both point to valid processed copies + \ if I call "processstr" often enough, I'll end up overwriting "foo" and + \ "bar", but for now I'm ok. + +### Scratchpad's API + +:new ( sz -- pad ) create a new scratchpad of sz bytes in length + +:allot ( n pad -- a ) allocate n bytes in the pad and return the address of the +allocated memory space. + +:move ( src u pad -- a ) allocate u bytes on the pad and copy the range "src u" +into it. Then, return the allocated memory space. + +:[]>str ( a u pad -- str ) allocate u bytes on the pad and copy that range to it +as an encoded string, then return that string. + +:[ ( sz pad -- ) reserve sz bytes on the pad (we don't allocate it yet, but we +ensure that there's at least that many bytes left in the buffer) and make "here" +point to that reserved buffer. From that point on, all "write" words will write +to that buffer. When you're finished, call :] + +:] ( pad -- a ) finalize a :[ operation by allocating the actual number of bytes +that were written to "here". If that number was less than reserved, we only +allocate that lesser size. If it was higher, we error out (you never want to go +over that limit because that means corrupting your system memory). Restore +"here" to the value it had before the :[ call. Then, return the memory address +if the allocated memory. + +:, ( n pad ) shortcut for "4 :[ , :] drop" + +:s, ( str pad -- ) write string str to the pad. + +:freesz ( pad -- sz ) number of bytes left in the buffer before it rolls over. + +:usedsz ( pad -- sz ) number of bytes used in the buffer so far. Goes back to 0 +after a rollover. + +:reset ( pad -- ) empty the buffer and resets its pointers. + +## Arena allocator + +Scratchpads are fine for temporary data, but for semi-temporary data, not so +much. Sometimes, we need to hold onto temporary data for longer and we don't the +size of that data can vary wildly, making it difficult to find a reasonable size +for our buffer. + +If we have a clear point where we can say "from this moment, I don't need any of +this class of data", then the Arena allocator is ideally suited for the task. + +One such example are AST structures in DuskCC. As we parce the input file, we +need to create those AST nodes in memory, but unlike the tokenizer, we need to +hold onto them because we'll refer to them later. The size of the AST varies +wildly, but once we're finished compling a unit, we don't need this data anymore +so we can clear it. + +The Arena allocator is a linked list of fixed size buffers (4 kilobytes) that +grow whenever we need more memory, by adding a buffer to the linked list. When +we clear the arena, it rewinds itself to its first buffer and reuse those +previously allocated buffers for subsequent allocation, growing the list again +if it reaches its previous limit. + +It's API is the exact same as the Scratchpad except that its :new method doesn't +take a size argument (buffer size is hardcoded to 4K). You call :reset to clear +its data. diff --git a/fs/doc/cc/usage.txt b/fs/doc/cc/usage.txt @@ -44,6 +44,10 @@ are a few differences: * The "struct" keyword can't be used to reference structs, only to define them. * There's a maximum of 3 indirection levels for types. "int ***i;" is fine, "int ****i;" is not. +* No bit fields. I expect that they're not worth their complexity weight, not + only to parse them and generate them correctly, but also to do so in an + efficient manner. The plan is to port code using bit fields with manual + masking/unmasking code. ## Calling Forth words diff --git a/fs/lib/alloc.fs b/fs/lib/alloc.fs @@ -0,0 +1,36 @@ +\ Allocator struct + +struct[ Allocator + sfield ptr + \ Returns the end of the current buffer + smethod :)buf ( self -- a ) + \ Called when there's not enough space until :)buf to allocate. + \ The allocator must then find new space and return its beginning address. + smethod :findspace ( self -- a ) + + : :freesz dup :)buf swap ptr - ; + + : :allot ( n self -- a ) + dup >r :freesz over < if r@ :findspace else r@ ptr then ( n a ) + tuck + to r> ptr ( a ) ; + + : :move ( src u self -- a ) + over swap :allot ( src u dst ) tuck >r move r> ; + + \ push a range to the scratchpad as a string + : :[]>str ( a u self -- str ) + over 1+ swap :allot ( src u dst-1 ) + >r dup r@ c!+ swap ( src dst u ) move r> ; + + 0 value _here 0 value _ptr + \ Open a scratch area of size "size" for "here-style" writing. + : :[ ( size self -- ) dup ptr to _ptr :allot to@! here to _here ; + \ Stop writing to the scratch area and restore here + \ Returns the address of the beginning of the written area + : :] ( self -- a ) + here over ptr > if abort" allocator :[ overflow" then + _here to@! here swap to ptr _ptr ; + + : :, ( n self -- ) CELLSZ swap :allot ! ; + : :s, ( str self -- ) swap c@+ rot :[]>str drop ; +]struct diff --git a/fs/lib/arena.fs b/fs/lib/arena.fs @@ -0,0 +1,40 @@ +\ Arena allocator + +\ The arena allocator is like a scratchpad, but instead of rolling over when +\ it's out of space, it creates a new arena and continue from there. Unlike +\ scratchpad, we expect the user of the arena to "reset" it from time to time, +\ or else, writing to it is the exact same as writing to system memory (the +\ memory is never reclaimed) + +?f<< /lib/alloc.fs + +$1000 const ARENASZ + +struct[ ArenaBuf + sfield nextbuf + SZ &+ buf + : :)buf buf ARENASZ + ; + : :new ( -- buf ) here 0 , ARENASZ allot ; + : :next dup nextbuf ?dup not if :new tuck swap to nextbuf else nip then ; +]struct + +extends Allocator struct[ Arena + sfield root \ first buf of the chain + sfield current \ current ArenaBuf + + : _)buf current ArenaBuf :)buf ; + : _findspace ( self -- a ) + dup current ArenaBuf :next ( self arenabuf ) + tuck swap to current ( arenabuf ) ArenaBuf buf ; + : :new ( -- arena ) + ArenaBuf :new here swap dup ( arena arenabuf ) + ArenaBuf buf ( ptr ) , ['] _)buf , ['] _findspace , + dup ( root ) , ( current ) , ; + : :reset ( self -- ) + dup root over to current + dup current ArenaBuf buf swap to ptr ; + : :usedsz ( self -- sz ) dup >r root 0 begin ( arena res ) + over r@ current <> while + ARENASZ + swap ArenaBuf nextbuf swap repeat ( current res ) + swap ArenaBuf buf r> ptr -^ + ; +]struct diff --git a/fs/lib/meta.fs b/fs/lib/meta.fs @@ -1,8 +1,9 @@ \ Utilities around dictionary metadata \ Dictionary --9 &+@ emeta --9 &+ 'emeta +9 const ENTRYSZ +ENTRYSZ neg &+@ emeta +ENTRYSZ neg &+ 'emeta : wordlen ( w -- len ) 1- c@ $3f and ; : wordname[] ( w -- sa sl ) dup wordlen swap 9 - over - ( sl sa ) swap ; diff --git a/fs/lib/scratch.fs b/fs/lib/scratch.fs @@ -8,42 +8,18 @@ \ The system scratchpad lives at sys/scratch. +?f<< /lib/alloc.fs -struct[ Scratchpad +extends Allocator struct[ Scratchpad sfield size - sfield ptr - sfield allowrollover - SZ &+ :buf( + : :reset ( self -- ) dup :buf( swap to ptr ; - : :new ( size -- pad ) here over , 0 , 1 , swap allot dup :reset ; - : :)buf ( self -- a ) dup :buf( swap size + ; + : _)buf ( self -- a ) dup :buf( swap size + ; + : _findspace ( self -- a ) :buf( ; + : :new ( size -- pad ) + here 0 ( ptr ) , ['] _)buf , ['] _findspace , + over ( size ) , ( size pad ) + swap allot dup :reset ; : :usedsz dup ptr swap :buf( - ; - : :allot ( n self -- a ) - dup >r ptr over + r@ :)buf >= if - r@ allowrollover not if abort" pad limit exceeded!" then r@ :reset then - r@ ptr swap to+ r> ptr ( a ) ; - - : :move ( src u self -- a ) - over swap :allot ( src u dst ) tuck >r move r> ; - - \ push a range to the scratchpad as a string - : :[]>str ( a u self -- str ) - over 1+ swap :allot ( src u dst-1 ) - >r dup r@ c!+ swap ( src dst u ) move r> ; - - 0 value _here 0 value _ptr - \ Open a scratch area of size "size" for "here-style" writing. - : :[ ( size self -- ) dup ptr to _ptr :allot to@! here to _here ; - \ Stop writing to the scratch area and restore here - \ Returns the address of the beginning of the written area - : :] ( self -- a ) - here over ptr > if abort" scratchpad :[ overflow" then - _here to@! here swap to ptr _ptr ; - - : :, ( n self -- ) CELLSZ swap :allot ! ; - : :s, ( str self -- ) swap c@+ rot :[]>str drop ; - : :entry ( 'dict name self -- ) - \ meta + len + prev + align = 12 - over c@ 12 + over :[ rot> entry :] drop ; ]struct diff --git a/fs/tests/lib/all.fs b/fs/tests/lib/all.fs @@ -4,3 +4,4 @@ f<< /tests/lib/bit.fs f<< /tests/lib/str.fs f<< /tests/lib/crc.fs f<< /tests/lib/meta.fs +f<< /tests/lib/arena.fs diff --git a/fs/tests/lib/arena.fs b/fs/tests/lib/arena.fs @@ -0,0 +1,15 @@ +?f<< /tests/harness.fs +?f<< /lib/arena.fs +testbegin +\ Arena allocator tests +Arena :new structbind Arena arena + +4 arena :allot +4 arena :allot +4 - #eq +arena root arena current #eq +arena current ArenaBuf nextbuf 0 #eq +ARENASZ arena :allot \ create a new arena +arena root ArenaBuf nextbuf ArenaBuf buf #eq +arena :usedsz ARENASZ 2 * #eq +testend