commit d5afd6bab549cc5a1a6fd18c5ead6c1693ab9116
parent 3b1c36651aacda538be1cb95bc6e70003d84dcd9
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Mon, 31 Oct 2022 10:56:54 -0400
lib/fmt: move printf code from cc/lib
Rename lib/nfmt to lib/fmt and translate C printf from cc/lib into a Forth
version, which cc/lib now call into. This gives printf capabilities to Forth
IO structures.
Diffstat:
10 files changed, 61 insertions(+), 377 deletions(-)
diff --git a/fs/asm/i386.fs b/fs/asm/i386.fs
@@ -1,5 +1,5 @@
\ i386 assembler
-?f<< /lib/nfmt.fs
+?f<< /lib/fmt.fs
?f<< /asm/label.fs
\ MOD/RM constants
diff --git a/fs/cc/lib.fs b/fs/cc/lib.fs
@@ -2,6 +2,7 @@
\ Isn't needed by the compiler, so it can have C code.
?f<< /cc/cc.fs
?f<< /lib/str.fs
+?f<< /lib/fmt.fs
0 const NULL
(S str -- len )
@@ -30,47 +31,11 @@ $100 MemFile :new const _sfile
(S hdl -- )
: fclose IO :close ;
-: _ doer ' , does> ( n hdl 'w ) @ >r to@! StdOut swap r> execute to StdOut ;
-(S n hdl -- ) _ fprintd .
-(S n hdl -- ) _ fprintb .x1
-(S n hdl -- ) _ fprintw .x2
-(S n hdl -- ) _ fprintx .x
(S str hdl -- )
: fputs IO :puts ;
: puts ( str ) StdOut fputs ;
-: printd . ;
-
-\ TODO: switch statements
-:c void fprintf() {
- void *iohdl = pspop();
- char *fmt = pspop();
- int len = strlen(fmt++);
- int i;
- char c;
- for (i=0; i<len; i++) {
- c = fmt[i];
- if (c == '%') {
- c = fmt[++i];
- if (c == 'd') {
- fprintd(iohdl); // n already on PS
- } else if (c == 'b') {
- fprintb(iohdl);
- } else if (c == 'w') {
- fprintw(iohdl);
- } else if (c == 'x') {
- fprintx(iohdl);
- } else if (c == 's') {
- fputs(iohdl);
- } else {
- stype("unsupported fmt argument");
- abort();
- }
- } else {
- fputc(c, iohdl);
- }
- }
-}
+: fprintf IO :printf ;
: printf ( .. n1? n0? fmt -- ) StdOut fprintf ;
(S ... fmt -- str )
diff --git a/fs/doc/cc/index.txt b/fs/doc/cc/index.txt
@@ -25,311 +25,7 @@ porting.
For this reason, the core of the language is very close to ANSI C.
-## Usage
+## Topics
-There are two ways to compile code with this compiler. The regular method is
-through "cc<<", which compiles the specified file. For example, "cc<< foo.c"
-Reads the file "foo.c" as a unit and compiles every element in it. Functions will
-be added to the system dictionary unless they have the "static" storage type.
-
-Another method is to compile in an "inline" manner with ":c". This word reads a
-single "unit element" (a function definition, a global variable or a typedef)
-from the input stream and compiles it. It returns to normal Forth interpretation
-after it parses the last token of the element. Example:
-
-:c int foo() {
- return 42;
-} foo . \ prints 42
-
-Unit local symbols persist across :c definitions. You can also access local
-symbols of the last unit loaded with "cc<<" through ":c" definitions. The
-emptying of local buffers occurs at the beginning of "cc<<".
-
-## Differences in the core language
-
-* no C preprocessor, the preprocessor is Forth itself, through macros.
-* no 64bit types
- * no long, redundant with int
- * no double, float is always 32b
- * char is always 8b, short is always 16b, int is always 32b
-* tightened parsing requirements for simplification purposes
- * "unsigned" always goes first
- * no "signed" (always default), no "auto"
-* No logical shortcut guarantee. In (a && b), b will be executed even if a
- yields 0.
-* Number literals are the same as Dusk OS, so 12345, $1234 and 'X'. No 0x1234
- or 0o777.
-* string literals are not null-terminated, but "counted strings". The exact same
- format as system strings.
-* Added pspop() and pspush() built-in functions.
-* No "varargs". pspop() and pspush() replace that functionality.
-* "struct MyStruct {...};" automatically creates a "typedef" to the struct.
-* The "struct" keyword can't be used to reference structs, only to define them.
-* There's a maximum of 3 indirection levels for types. "int ***i;" is fine,
- "int ****i;" is not.
-
-## Caller save
-
-Native words don't save registers they use. For Forth words, it doesn't matter
-much because all words are "atomic". Once they return, register values don't
-matter anymore. Some native words call each other and in this case, careful
-threading is necessary, but otherwise, it works well as is.
-
-When other languages are concerned, however, this attribute becomes important
-because if a C expression calls a Forth word, then it loses register values.
-
-Therefore, it's important to remember that in Dusk OS, it's the caller's
-responsibility to save/restore registers around a call.
-
-## Function call stack frame
-
-In C-compiled code, local variables and arguments being passed during function
-calls are placed on something called a stack frame.
-
-In Dusk, we have two stack frames. The "arguments" frame lives in PS and the
-"local" frame (for local variables) lives on RS. The caller of a C function
-has to allocate enough space to place the arguments it's passing to the
-function. When you think about it, it's the same thing as with Forth words.
-
-When the function begins, it allocates enough space for its local variables on
-RS. Its arguments are already on PS, where they should be, so it does nothing.
-
-When the function returns, it frees the local frame from RS. It also adjusts
-PSP according to the function's "argument balance". If it returns more arguments
-than it received, PS will grow, if it returns less arguments than received, PS
-shrinks. Then, we return.
-
-Let's use an example:
-
-int foobar(int a, int b) {
- int x = 42;
- return a+b+x;
-}
-
-For Forth, this function can be called like this:
-
-1 2 foobar . \ prints "45"
-
-Here's what PS and RS look like at the moment foobar is called:
-
- |-----------| |-------------|
- PSP+4 ->| $00000001 | RSP+0 -> | return addr |
- PSP+0 ->| $00000002 | |-------------|
- |-----------|
-
-During the function prelude, PSP doesn't change, but the compiler assigns each
-argument to its proper place in PS.
-
-At the same time, the prelude also decreases RSP by 4 to make space for local
-variables:
-
- |-----------| |---------------|
- PSP+4 ->| int a = 1 | RSP+4 -> | return addr |
- PSP+0 ->| int b = 2 | RSP+0 -> | int x = undef |
- |-----------| |---------------|
-
-Then, we execute "int x = 42", which sets RSP+0:
-
- |-----------| |---------------|
- PSP+4 ->| int a = 1 | RSP+4 -> | return addr |
- PSP+0 ->| int b = 2 | RSP+0 -> | int x = 42 |
- |-----------| |---------------|
-
-Then, "return a+b+x" does a "soft" push to PS, that is, it pushes to PS as if
-the arguments had been popped during the prelude. This means that our return
-value overwrites "a". PSP is increased by 4:
-
- |-----------| |---------------|
- PSP+0 ->| $0000002d | RSP+4 -> | return addr |
- PSP-4 ->| int b = 2 | RSP+0 -> | int x = 42 |
- |-----------| |---------------|
-
-Then, before returning, we deallocate the local stack:
-
- |-----------| |-------------|
- PSP+0 ->| $0000002d | RSP+0 -> | return addr |
- |-----------| |-------------|
-
-## pspush() and pspop()
-
-Builtin functions pspush() and pspop() allow for direct control over PS. This
-gives you the ability to pop or push a variable number of arguments from/to PS.
-
-These functions, however, are incompatible with arguments and return values. If
-you use both at the same time, they'll mess your PS stack. You can only use them
-in functions that have a "void (void)" signature (except what calling Forth
-words, see below). Let's see an example:
-
-void foobar() {
- int b = pspop();
- int a = pspop();
- int x = 42;
- pspush(a+b+x);
-}
-
-This function does the exact same thing as the previous "foobar()", but stacks
-will look different. After function prelude:
-
- |-----------| |---------------|
- PSP+4 ->| $00000001 | RSP+12-> | return addr |
- PSP+0 ->| $00000002 | RSP+8 -> | int b = undef |
- |-----------| RSP+4 -> | int a = undef |
- RSP+0 -> | int x = undef |
- |---------------|
-
-Then, after the first 3 lines:
-
- PSP+0 ->|-----------| |---------------|
- RSP+12-> | return addr |
- RSP+8 -> | int b = 2 |
- RSP+4 -> | int a = 1 |
- RSP+0 -> | int x = 42 |
- |---------------|
-
-And right before returning:
-
- |-----------| |-------------|
- PSP+0 ->| $0000002d | RSP+0 -> | return addr |
- |-----------| |-------------|
-
-## Calling Forth words
-
-Words from the system dictionary can be called. They are considered to have a
-void return type and an unspecified number of arguments.
-
-Arguments to Forth words can be passed normally, but return values have to be
-handled with pspop() and pspush(). Whenever you call such a function, you should
-return to "PS normality" before using one of your function arguments, because if
-you don't, PS offsets for those arguments will be wrong.
-
-For example, let's say that you want to call "max", a forth word with a
-signature "a b -- n". You would do so like this:
-
-int mymax(int a, int b) {
- max(a, b);
- // don't use a or b before having called pspop(), they're broken.
- return pspop();
-}
-
-## Macros
-
-Macros in Dusk's CC are simply markers inside which arbitrary Forth code is
-interpreted. Those markers are #[ and ]#. Those markers are executed during the
-AST generation phase, which means that you can arbitrarily modify the AST at any
-point during parsing.
-
-A common case with C macros is the definition and reuse of constants. Here's how
-it looks:
-
-#[ 42 const FOOBAR ]#
-int foo() {
- return #[ FOOBAR c]# ; // c]# means "Constant :new ]#"
-}
-
-Because macros can modify the AST, they can only be inserted at certain
-designated places, known as "hash (#) bars". These are:
-
-* In a Unit context (in between functions)
-* Replacing a "factor" AST element, which are quite numerous. Some of them:
- * A constant
- * A Lvalue (AST_IDENT)
- * A function call
- * An expression
-* Inside an array length [] definition.
-
-In any other place, "#[" will be a parse error.
-
-In the first case, the signature of the macro is ( node -- node ). By using PS
-TOS, you can add a node to the active Unit.
-
-The second case has a signature ( -- node ), that is, you are expected to put a
-node that is in the context you're putting it. It will then be added wherever
-the factor was expected. It will even have postfix AST rules applied to it,
-which opens nice doors. For example, if your macro returns a simple AST_IDENT,
-then right after the macro you can add parens to make it into a function call.
-
-The third case has a signature ( -- number ), with "number" being the number of
-elements that the array being defined will have.
-
-When a macro begins, PS level is recorded. If it doesn't end with the correct
-PS size, an error is raised.
-
-Macro opening symbol, "#[", obeys C tokenization rules, but the closing one,
-"]#", obeys Forth tokenization rules, so it has to be followed by a space.
-
-There are "shortcut words" for closing a macro:
-
-c]# --> Constant :new ]#
-i]# --> Ident :new ]#
-+]# --> over Node :add ]#
-
-## Linkage and persistence
-
-By default, functions and global variables have external linkage. You give them
-an internal linkage with "static".
-
-static void foo() { }
-void bar() { foo(); }
-
-This unit will compile fine. Because "foo()" is in the same unit as "bar()",
-"bar()" can call "foo()". However, that function can't be called from another
-unit or from Forth. "bar()" can.
-
-Symbols created with "typedef" are only visible in the current unit. Structures
-created through the "struct <name> { ... }" form, however, are persistent and
-can be referenced across units. The "typedef struct { ... } <name>" form,
-however, is "private" like any typedef.
-
-## The compiler VM
-
-The goal of this VM, which lives in /cc/vm, is to provide a unified API for code
-generation of a C AST across CPU architecture.
-
-Computation done by this generated code is centered around two operands, Op1 and
-Op2. Those operands can "live" in different places depending on the context: in
-a register, in memory, or as a constant.
-
-Operands can be stored in these locations:
-
-None: operand not specified
-Constant: a constant value. Cannot be used as a "destination" op.
-Stack Frame: an address on the Stack Frame
-Register: value currently being held a register
-
-Besides the location, each operand has an accompanying "argument", whose
-meaning depends on the location:
-
-Constant: the value of the constant
-Stack Frame: the offset relative to the SF pointer
-Arguments Frame: the offset relative to the AF pointer
-Register: the ID of the register
-
-Operations are compiled by calling the appropriate op word. For example,
-"vmadd," is the "binary +" operations. These operations have different op
-requirements and effects depending on their type.
-
-Unary op: Requires vmop. Keeps vmop allocated.
-Binary op: Requires vmop and vmop^. Keeps vmop allocated and deallocates vmop^.
-Assign op: Same as Binary op.
-vmret,: Requires a deallocated vmop^. If vmop is allocated, compile a push to
-PS and then de-allocate it.
-
-### Jumping in the VM
-
-There are 2 kinds of jumps: forward and backward. In forward jumps, we need to
-emit a jump opcode followed by a placeholder and then push the address of that
-placeholder to PS. When we reach the target, we write the target address to that
-placeholder.
-
-In backward jumps, we push the target address upon meeting it, and then simply
-write the jump opcode followed by that address.
-
-Forward jumps are written with the "[" words:
-
-vmjmp[, ... ]vmjmp
-vmjz[, ... ]vmjmp
-
-Backward jumps are written with the non-"[" words:
-
-here ... vmjmp,
-here ... vmjnz,
+* Usage (doc/cc/usage.txt)
+* Implementation details (doc/cc/impl.txt)
diff --git a/fs/drv/pc/ata.fs b/fs/drv/pc/ata.fs
@@ -1,5 +1,5 @@
\ ATA driver
-?f<< /lib/nfmt.fs
+?f<< /lib/fmt.fs
?f<< /asm/i386.fs
extends Drive struct[ ATADrive
diff --git a/fs/drv/pc/pci.fs b/fs/drv/pc/pci.fs
@@ -1,6 +1,6 @@
\ PCI driver
?f<< /lib/str.fs
-?f<< /lib/nfmt.fs
+?f<< /lib/fmt.fs
?f<< /lib/bit.fs
?f<< /lib/wordtbl.fs
?f<< /lib/meta.fs
diff --git a/fs/lib/diag.fs b/fs/lib/diag.fs
@@ -3,8 +3,7 @@
scnt >r begin dup .x spc> >r scnt not until
begin r> scnt V1 = until rdrop ;
: .S ( -- )
- S" SP " stype scnt .x1 spc> S" RS " stype rcnt .x1 spc>
- S" -- " stype stack? psdump ;
+ scnt rcnt swap S" SP %b RS %b -- " ConsoleOut IO :printf stack? psdump ;
: .free
here ['] 2drop ( first word in boot.fs ) - .sz ." used "
HEREMAX @ here - .sz ." free" ;
diff --git a/fs/lib/fmt.fs b/fs/lib/fmt.fs
@@ -0,0 +1,50 @@
+\ String formatting
+
+struct+[ IO
+ create _buf 11 allot
+ create _ ," 0123456789abcdef"
+ : _xh ( n -- c ) $f and _ + c@ ;
+ : _xn ( n digits self -- )
+ >r dup >r >r begin ( n ) \ V1=self V2=digits V3=loop
+ dup _xh _buf r@ 1- + c! 4 rshift next drop
+ _buf r> r> :write ;
+ : :.x1 2 swap _xn ;
+ : :.x2 4 swap _xn ;
+ : :.x 8 swap _xn ;
+
+ : _ ( n self -- ) >r 10 /mod ( r q ) ?dup if r@ _ then '0' + r> :putc ;
+ : :. ( n self -- ) >r
+ ?dup not if
+ '0' r> :putc else
+ dup 0< if '-' r@ :putc 0 -^ r> _ else r> _ then
+ then ;
+
+ : :printf ( nX ... n0 fmt self -- ) >r >r \ V1=self V2=fmt
+ 8b to@+ V2 ( len ) >r begin ( nX ... n0 ) \ V3=loop
+ 8b to@+ V2 dup '%' = if
+ drop -1 to+ V3 8b to@+ V2 case
+ 'b' of = V1 :.x1 endof
+ 'w' of = V1 :.x2 endof
+ 'x' of = V1 :.x endof
+ 'd' of = V1 :. endof
+ 's' of = V1 :puts endof
+ abort" unsupported fmt argument"
+ endcase
+ else V1 :putc then next rfree ;
+]struct
+
+: .x1 StdOut IO :.x1 ;
+: .x2 StdOut IO :.x2 ;
+: .x StdOut IO :.x ;
+\\ print in hexadecimal with a width that depends on the value
+: .x? dup $ffff > if .x else dup $ff > if .x2 else .x1 then then ;
+: . StdOut IO :. ;
+
+\ size
+create _ ," KMG"
+: .sz ( size-in-bytes -- )
+ 0 begin ( sz lvl )
+ swap 1024 /mod ( lvl r q ) ?dup while
+ nip swap 1+ repeat ( lvl sz )
+ . ?dup if 1- _ + c@ stdout then 'B' stdout ;
+
diff --git a/fs/lib/nfmt.fs b/fs/lib/nfmt.fs
@@ -1,26 +0,0 @@
-\ Number formatting
-\ hexadecimal
-create _ ," 0123456789abcdef"
-: .xh $f and _ + c@ stdout ;
-: .x1 dup 4 rshift .xh .xh ;
-: .x2 dup 8 rshift .x1 .x1 ;
-\\ print top of stack in hexadecimal
-: .x ( n -- ) dup 16 rshift .x2 .x2 ;
-\\ print in hexadecimal with a width that depends on the value
-: .x? dup $ffff > if .x else dup $ff > if .x2 else .x1 then then ;
-
-\ decimal
-: _ 10 /mod ( r q ) ?dup if _ then '0' + stdout ;
-: . ( n -- )
- ?dup not if
- '0' stdout else
- dup 0< if '-' stdout 0 -^ _ else _ then
- then ;
-\ size
-create _ ," KMG"
-: .sz ( size-in-bytes -- )
- 0 begin ( sz lvl )
- swap 1024 /mod ( lvl r q ) ?dup while
- nip swap 1+ repeat ( lvl sz )
- . ?dup if 1- _ + c@ stdout then 'B' stdout ;
-
diff --git a/fs/xcomp/i386/pc/inittest.fs b/fs/xcomp/i386/pc/inittest.fs
@@ -1,7 +1,7 @@
f<< /drv/pc/com.fs
com$ ' >com to emit
f<< sys/scratch.fs
-f<< lib/nfmt.fs
+f<< lib/fmt.fs
f<< lib/diag.fs
' bye to abort
f<< tests/all.fs
diff --git a/fs/xcomp/init.fs b/fs/xcomp/init.fs
@@ -1,7 +1,7 @@
\ Common part of init.fs. Machine-independent
\ As a user, you'll want to adapt this to your needs.
f<< sys/scratch.fs
-f<< lib/nfmt.fs
+f<< lib/fmt.fs
f<< lib/diag.fs
f<< sys/rdln.fs
: init S" Dusk OS\n" stype .free rdln$ stdio$ quit ;