duskos

dusk os fork
git clone git://git.alexwennerberg.com/duskos
Log | Files | Refs | README | LICENSE

commit d5afd6bab549cc5a1a6fd18c5ead6c1693ab9116
parent 3b1c36651aacda538be1cb95bc6e70003d84dcd9
Author: Virgil Dupras <hsoft@hardcoded.net>
Date:   Mon, 31 Oct 2022 10:56:54 -0400

lib/fmt: move printf code from cc/lib

Rename lib/nfmt to lib/fmt and translate C printf from cc/lib into a Forth
version, which cc/lib now call into. This gives printf capabilities to Forth
IO structures.

Diffstat:
Mfs/asm/i386.fs | 2+-
Mfs/cc/lib.fs | 39++-------------------------------------
Mfs/doc/cc/index.txt | 310+------------------------------------------------------------------------------
Mfs/drv/pc/ata.fs | 2+-
Mfs/drv/pc/pci.fs | 2+-
Mfs/lib/diag.fs | 3+--
Afs/lib/fmt.fs | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Dfs/lib/nfmt.fs | 26--------------------------
Mfs/xcomp/i386/pc/inittest.fs | 2+-
Mfs/xcomp/init.fs | 2+-
10 files changed, 61 insertions(+), 377 deletions(-)

diff --git a/fs/asm/i386.fs b/fs/asm/i386.fs @@ -1,5 +1,5 @@ \ i386 assembler -?f<< /lib/nfmt.fs +?f<< /lib/fmt.fs ?f<< /asm/label.fs \ MOD/RM constants diff --git a/fs/cc/lib.fs b/fs/cc/lib.fs @@ -2,6 +2,7 @@ \ Isn't needed by the compiler, so it can have C code. ?f<< /cc/cc.fs ?f<< /lib/str.fs +?f<< /lib/fmt.fs 0 const NULL (S str -- len ) @@ -30,47 +31,11 @@ $100 MemFile :new const _sfile (S hdl -- ) : fclose IO :close ; -: _ doer ' , does> ( n hdl 'w ) @ >r to@! StdOut swap r> execute to StdOut ; -(S n hdl -- ) _ fprintd . -(S n hdl -- ) _ fprintb .x1 -(S n hdl -- ) _ fprintw .x2 -(S n hdl -- ) _ fprintx .x (S str hdl -- ) : fputs IO :puts ; : puts ( str ) StdOut fputs ; -: printd . ; - -\ TODO: switch statements -:c void fprintf() { - void *iohdl = pspop(); - char *fmt = pspop(); - int len = strlen(fmt++); - int i; - char c; - for (i=0; i<len; i++) { - c = fmt[i]; - if (c == '%') { - c = fmt[++i]; - if (c == 'd') { - fprintd(iohdl); // n already on PS - } else if (c == 'b') { - fprintb(iohdl); - } else if (c == 'w') { - fprintw(iohdl); - } else if (c == 'x') { - fprintx(iohdl); - } else if (c == 's') { - fputs(iohdl); - } else { - stype("unsupported fmt argument"); - abort(); - } - } else { - fputc(c, iohdl); - } - } -} +: fprintf IO :printf ; : printf ( .. n1? n0? fmt -- ) StdOut fprintf ; (S ... fmt -- str ) diff --git a/fs/doc/cc/index.txt b/fs/doc/cc/index.txt @@ -25,311 +25,7 @@ porting. For this reason, the core of the language is very close to ANSI C. -## Usage +## Topics -There are two ways to compile code with this compiler. The regular method is -through "cc<<", which compiles the specified file. For example, "cc<< foo.c" -Reads the file "foo.c" as a unit and compiles every element in it. Functions will -be added to the system dictionary unless they have the "static" storage type. - -Another method is to compile in an "inline" manner with ":c". This word reads a -single "unit element" (a function definition, a global variable or a typedef) -from the input stream and compiles it. It returns to normal Forth interpretation -after it parses the last token of the element. Example: - -:c int foo() { - return 42; -} foo . \ prints 42 - -Unit local symbols persist across :c definitions. You can also access local -symbols of the last unit loaded with "cc<<" through ":c" definitions. The -emptying of local buffers occurs at the beginning of "cc<<". - -## Differences in the core language - -* no C preprocessor, the preprocessor is Forth itself, through macros. -* no 64bit types - * no long, redundant with int - * no double, float is always 32b - * char is always 8b, short is always 16b, int is always 32b -* tightened parsing requirements for simplification purposes - * "unsigned" always goes first - * no "signed" (always default), no "auto" -* No logical shortcut guarantee. In (a && b), b will be executed even if a - yields 0. -* Number literals are the same as Dusk OS, so 12345, $1234 and 'X'. No 0x1234 - or 0o777. -* string literals are not null-terminated, but "counted strings". The exact same - format as system strings. -* Added pspop() and pspush() built-in functions. -* No "varargs". pspop() and pspush() replace that functionality. -* "struct MyStruct {...};" automatically creates a "typedef" to the struct. -* The "struct" keyword can't be used to reference structs, only to define them. -* There's a maximum of 3 indirection levels for types. "int ***i;" is fine, - "int ****i;" is not. - -## Caller save - -Native words don't save registers they use. For Forth words, it doesn't matter -much because all words are "atomic". Once they return, register values don't -matter anymore. Some native words call each other and in this case, careful -threading is necessary, but otherwise, it works well as is. - -When other languages are concerned, however, this attribute becomes important -because if a C expression calls a Forth word, then it loses register values. - -Therefore, it's important to remember that in Dusk OS, it's the caller's -responsibility to save/restore registers around a call. - -## Function call stack frame - -In C-compiled code, local variables and arguments being passed during function -calls are placed on something called a stack frame. - -In Dusk, we have two stack frames. The "arguments" frame lives in PS and the -"local" frame (for local variables) lives on RS. The caller of a C function -has to allocate enough space to place the arguments it's passing to the -function. When you think about it, it's the same thing as with Forth words. - -When the function begins, it allocates enough space for its local variables on -RS. Its arguments are already on PS, where they should be, so it does nothing. - -When the function returns, it frees the local frame from RS. It also adjusts -PSP according to the function's "argument balance". If it returns more arguments -than it received, PS will grow, if it returns less arguments than received, PS -shrinks. Then, we return. - -Let's use an example: - -int foobar(int a, int b) { - int x = 42; - return a+b+x; -} - -For Forth, this function can be called like this: - -1 2 foobar . \ prints "45" - -Here's what PS and RS look like at the moment foobar is called: - - |-----------| |-------------| - PSP+4 ->| $00000001 | RSP+0 -> | return addr | - PSP+0 ->| $00000002 | |-------------| - |-----------| - -During the function prelude, PSP doesn't change, but the compiler assigns each -argument to its proper place in PS. - -At the same time, the prelude also decreases RSP by 4 to make space for local -variables: - - |-----------| |---------------| - PSP+4 ->| int a = 1 | RSP+4 -> | return addr | - PSP+0 ->| int b = 2 | RSP+0 -> | int x = undef | - |-----------| |---------------| - -Then, we execute "int x = 42", which sets RSP+0: - - |-----------| |---------------| - PSP+4 ->| int a = 1 | RSP+4 -> | return addr | - PSP+0 ->| int b = 2 | RSP+0 -> | int x = 42 | - |-----------| |---------------| - -Then, "return a+b+x" does a "soft" push to PS, that is, it pushes to PS as if -the arguments had been popped during the prelude. This means that our return -value overwrites "a". PSP is increased by 4: - - |-----------| |---------------| - PSP+0 ->| $0000002d | RSP+4 -> | return addr | - PSP-4 ->| int b = 2 | RSP+0 -> | int x = 42 | - |-----------| |---------------| - -Then, before returning, we deallocate the local stack: - - |-----------| |-------------| - PSP+0 ->| $0000002d | RSP+0 -> | return addr | - |-----------| |-------------| - -## pspush() and pspop() - -Builtin functions pspush() and pspop() allow for direct control over PS. This -gives you the ability to pop or push a variable number of arguments from/to PS. - -These functions, however, are incompatible with arguments and return values. If -you use both at the same time, they'll mess your PS stack. You can only use them -in functions that have a "void (void)" signature (except what calling Forth -words, see below). Let's see an example: - -void foobar() { - int b = pspop(); - int a = pspop(); - int x = 42; - pspush(a+b+x); -} - -This function does the exact same thing as the previous "foobar()", but stacks -will look different. After function prelude: - - |-----------| |---------------| - PSP+4 ->| $00000001 | RSP+12-> | return addr | - PSP+0 ->| $00000002 | RSP+8 -> | int b = undef | - |-----------| RSP+4 -> | int a = undef | - RSP+0 -> | int x = undef | - |---------------| - -Then, after the first 3 lines: - - PSP+0 ->|-----------| |---------------| - RSP+12-> | return addr | - RSP+8 -> | int b = 2 | - RSP+4 -> | int a = 1 | - RSP+0 -> | int x = 42 | - |---------------| - -And right before returning: - - |-----------| |-------------| - PSP+0 ->| $0000002d | RSP+0 -> | return addr | - |-----------| |-------------| - -## Calling Forth words - -Words from the system dictionary can be called. They are considered to have a -void return type and an unspecified number of arguments. - -Arguments to Forth words can be passed normally, but return values have to be -handled with pspop() and pspush(). Whenever you call such a function, you should -return to "PS normality" before using one of your function arguments, because if -you don't, PS offsets for those arguments will be wrong. - -For example, let's say that you want to call "max", a forth word with a -signature "a b -- n". You would do so like this: - -int mymax(int a, int b) { - max(a, b); - // don't use a or b before having called pspop(), they're broken. - return pspop(); -} - -## Macros - -Macros in Dusk's CC are simply markers inside which arbitrary Forth code is -interpreted. Those markers are #[ and ]#. Those markers are executed during the -AST generation phase, which means that you can arbitrarily modify the AST at any -point during parsing. - -A common case with C macros is the definition and reuse of constants. Here's how -it looks: - -#[ 42 const FOOBAR ]# -int foo() { - return #[ FOOBAR c]# ; // c]# means "Constant :new ]#" -} - -Because macros can modify the AST, they can only be inserted at certain -designated places, known as "hash (#) bars". These are: - -* In a Unit context (in between functions) -* Replacing a "factor" AST element, which are quite numerous. Some of them: - * A constant - * A Lvalue (AST_IDENT) - * A function call - * An expression -* Inside an array length [] definition. - -In any other place, "#[" will be a parse error. - -In the first case, the signature of the macro is ( node -- node ). By using PS -TOS, you can add a node to the active Unit. - -The second case has a signature ( -- node ), that is, you are expected to put a -node that is in the context you're putting it. It will then be added wherever -the factor was expected. It will even have postfix AST rules applied to it, -which opens nice doors. For example, if your macro returns a simple AST_IDENT, -then right after the macro you can add parens to make it into a function call. - -The third case has a signature ( -- number ), with "number" being the number of -elements that the array being defined will have. - -When a macro begins, PS level is recorded. If it doesn't end with the correct -PS size, an error is raised. - -Macro opening symbol, "#[", obeys C tokenization rules, but the closing one, -"]#", obeys Forth tokenization rules, so it has to be followed by a space. - -There are "shortcut words" for closing a macro: - -c]# --> Constant :new ]# -i]# --> Ident :new ]# -+]# --> over Node :add ]# - -## Linkage and persistence - -By default, functions and global variables have external linkage. You give them -an internal linkage with "static". - -static void foo() { } -void bar() { foo(); } - -This unit will compile fine. Because "foo()" is in the same unit as "bar()", -"bar()" can call "foo()". However, that function can't be called from another -unit or from Forth. "bar()" can. - -Symbols created with "typedef" are only visible in the current unit. Structures -created through the "struct <name> { ... }" form, however, are persistent and -can be referenced across units. The "typedef struct { ... } <name>" form, -however, is "private" like any typedef. - -## The compiler VM - -The goal of this VM, which lives in /cc/vm, is to provide a unified API for code -generation of a C AST across CPU architecture. - -Computation done by this generated code is centered around two operands, Op1 and -Op2. Those operands can "live" in different places depending on the context: in -a register, in memory, or as a constant. - -Operands can be stored in these locations: - -None: operand not specified -Constant: a constant value. Cannot be used as a "destination" op. -Stack Frame: an address on the Stack Frame -Register: value currently being held a register - -Besides the location, each operand has an accompanying "argument", whose -meaning depends on the location: - -Constant: the value of the constant -Stack Frame: the offset relative to the SF pointer -Arguments Frame: the offset relative to the AF pointer -Register: the ID of the register - -Operations are compiled by calling the appropriate op word. For example, -"vmadd," is the "binary +" operations. These operations have different op -requirements and effects depending on their type. - -Unary op: Requires vmop. Keeps vmop allocated. -Binary op: Requires vmop and vmop^. Keeps vmop allocated and deallocates vmop^. -Assign op: Same as Binary op. -vmret,: Requires a deallocated vmop^. If vmop is allocated, compile a push to -PS and then de-allocate it. - -### Jumping in the VM - -There are 2 kinds of jumps: forward and backward. In forward jumps, we need to -emit a jump opcode followed by a placeholder and then push the address of that -placeholder to PS. When we reach the target, we write the target address to that -placeholder. - -In backward jumps, we push the target address upon meeting it, and then simply -write the jump opcode followed by that address. - -Forward jumps are written with the "[" words: - -vmjmp[, ... ]vmjmp -vmjz[, ... ]vmjmp - -Backward jumps are written with the non-"[" words: - -here ... vmjmp, -here ... vmjnz, +* Usage (doc/cc/usage.txt) +* Implementation details (doc/cc/impl.txt) diff --git a/fs/drv/pc/ata.fs b/fs/drv/pc/ata.fs @@ -1,5 +1,5 @@ \ ATA driver -?f<< /lib/nfmt.fs +?f<< /lib/fmt.fs ?f<< /asm/i386.fs extends Drive struct[ ATADrive diff --git a/fs/drv/pc/pci.fs b/fs/drv/pc/pci.fs @@ -1,6 +1,6 @@ \ PCI driver ?f<< /lib/str.fs -?f<< /lib/nfmt.fs +?f<< /lib/fmt.fs ?f<< /lib/bit.fs ?f<< /lib/wordtbl.fs ?f<< /lib/meta.fs diff --git a/fs/lib/diag.fs b/fs/lib/diag.fs @@ -3,8 +3,7 @@ scnt >r begin dup .x spc> >r scnt not until begin r> scnt V1 = until rdrop ; : .S ( -- ) - S" SP " stype scnt .x1 spc> S" RS " stype rcnt .x1 spc> - S" -- " stype stack? psdump ; + scnt rcnt swap S" SP %b RS %b -- " ConsoleOut IO :printf stack? psdump ; : .free here ['] 2drop ( first word in boot.fs ) - .sz ." used " HEREMAX @ here - .sz ." free" ; diff --git a/fs/lib/fmt.fs b/fs/lib/fmt.fs @@ -0,0 +1,50 @@ +\ String formatting + +struct+[ IO + create _buf 11 allot + create _ ," 0123456789abcdef" + : _xh ( n -- c ) $f and _ + c@ ; + : _xn ( n digits self -- ) + >r dup >r >r begin ( n ) \ V1=self V2=digits V3=loop + dup _xh _buf r@ 1- + c! 4 rshift next drop + _buf r> r> :write ; + : :.x1 2 swap _xn ; + : :.x2 4 swap _xn ; + : :.x 8 swap _xn ; + + : _ ( n self -- ) >r 10 /mod ( r q ) ?dup if r@ _ then '0' + r> :putc ; + : :. ( n self -- ) >r + ?dup not if + '0' r> :putc else + dup 0< if '-' r@ :putc 0 -^ r> _ else r> _ then + then ; + + : :printf ( nX ... n0 fmt self -- ) >r >r \ V1=self V2=fmt + 8b to@+ V2 ( len ) >r begin ( nX ... n0 ) \ V3=loop + 8b to@+ V2 dup '%' = if + drop -1 to+ V3 8b to@+ V2 case + 'b' of = V1 :.x1 endof + 'w' of = V1 :.x2 endof + 'x' of = V1 :.x endof + 'd' of = V1 :. endof + 's' of = V1 :puts endof + abort" unsupported fmt argument" + endcase + else V1 :putc then next rfree ; +]struct + +: .x1 StdOut IO :.x1 ; +: .x2 StdOut IO :.x2 ; +: .x StdOut IO :.x ; +\\ print in hexadecimal with a width that depends on the value +: .x? dup $ffff > if .x else dup $ff > if .x2 else .x1 then then ; +: . StdOut IO :. ; + +\ size +create _ ," KMG" +: .sz ( size-in-bytes -- ) + 0 begin ( sz lvl ) + swap 1024 /mod ( lvl r q ) ?dup while + nip swap 1+ repeat ( lvl sz ) + . ?dup if 1- _ + c@ stdout then 'B' stdout ; + diff --git a/fs/lib/nfmt.fs b/fs/lib/nfmt.fs @@ -1,26 +0,0 @@ -\ Number formatting -\ hexadecimal -create _ ," 0123456789abcdef" -: .xh $f and _ + c@ stdout ; -: .x1 dup 4 rshift .xh .xh ; -: .x2 dup 8 rshift .x1 .x1 ; -\\ print top of stack in hexadecimal -: .x ( n -- ) dup 16 rshift .x2 .x2 ; -\\ print in hexadecimal with a width that depends on the value -: .x? dup $ffff > if .x else dup $ff > if .x2 else .x1 then then ; - -\ decimal -: _ 10 /mod ( r q ) ?dup if _ then '0' + stdout ; -: . ( n -- ) - ?dup not if - '0' stdout else - dup 0< if '-' stdout 0 -^ _ else _ then - then ; -\ size -create _ ," KMG" -: .sz ( size-in-bytes -- ) - 0 begin ( sz lvl ) - swap 1024 /mod ( lvl r q ) ?dup while - nip swap 1+ repeat ( lvl sz ) - . ?dup if 1- _ + c@ stdout then 'B' stdout ; - diff --git a/fs/xcomp/i386/pc/inittest.fs b/fs/xcomp/i386/pc/inittest.fs @@ -1,7 +1,7 @@ f<< /drv/pc/com.fs com$ ' >com to emit f<< sys/scratch.fs -f<< lib/nfmt.fs +f<< lib/fmt.fs f<< lib/diag.fs ' bye to abort f<< tests/all.fs diff --git a/fs/xcomp/init.fs b/fs/xcomp/init.fs @@ -1,7 +1,7 @@ \ Common part of init.fs. Machine-independent \ As a user, you'll want to adapt this to your needs. f<< sys/scratch.fs -f<< lib/nfmt.fs +f<< lib/fmt.fs f<< lib/diag.fs f<< sys/rdln.fs : init S" Dusk OS\n" stype .free rdln$ stdio$ quit ;