commit 561f6b929c31118aa84d27e6c3a406fa89ebb549
parent b8b4fd38490dd5a04b10e354f0c2840b1dfe25a8
Author: Virgil Dupras <hsoft@hardcoded.net>
Date: Wed, 30 Nov 2022 12:38:14 -0500
ar/puff: output to stdout
I can't get rid of the output buffer because of the "dist" algorithm which needs
to look in the previously outputted bytes (up to 32K back). The code is still
in mode "full output buffer in memory", but I'll soon change it so that it uses
only enough memory to allow for this dist algo to work.
Diffstat:
M | fs/ar/puff.c | | | 136 | +++++++++++++++++++++++++++++++++++++------------------------------------------ |
M | fs/ar/ungz.fs | | | 10 | +--------- |
2 files changed, 65 insertions(+), 81 deletions(-)
diff --git a/fs/ar/puff.c b/fs/ar/puff.c
@@ -39,12 +39,12 @@
#define MAXCODES (MAXLCODES+MAXDCODES)
/* number of fixed literal/length codes */
#define FIXLCODES 288
+#define OUTBUFLEN $10000
/* input and output state */
struct state {
/* output state */
- unsigned char *out; /* output buffer */
- unsigned int outlen; /* available space at out */
+ unsigned char out[OUTBUFLEN]; /* output buffer */
unsigned int outcnt; /* bytes written to out so far */
/* input state */
@@ -55,6 +55,8 @@ struct state {
int bitcnt; /* number of bits in bit buffer */
};
+static state s;
+
/*
* Return need bits from the input stream. This always leaves less than
* eight bits in the buffer. bits() works properly for need == 0.
@@ -66,24 +68,24 @@ struct state {
* buffer, using shift right, and new bytes are appended to the top of the
* bit buffer, using shift left.
*/
-static int bits(state *s, int need)
+static int bits(int need)
{
int val; /* bit accumulator (can use up to 20 bits) */
/* load at least need bits into val */
- val = s->bitbuf;
- while (s->bitcnt < need) {
- if (s->incnt == s->inlen) {
+ val = s.bitbuf;
+ while (s.bitcnt < need) {
+ if (s.incnt == s.inlen) {
fputs("out of input\n", ConsoleOut());
abort();
}
- val |= (s->in[s->incnt++]) << s->bitcnt; /* load eight bits */
- s->bitcnt += 8;
+ val |= (s.in[s.incnt++]) << s.bitcnt; /* load eight bits */
+ s.bitcnt += 8;
}
/* drop need bits and update buffer, always zero to seven bits left */
- s->bitbuf = (val >> need);
- s->bitcnt -= need;
+ s.bitbuf = (val >> need);
+ s.bitcnt -= need;
/* return need bits, zeroing the bits above that */
return (val & ((1 << need) - 1));
@@ -106,35 +108,35 @@ static int bits(state *s, int need)
* - A stored block can have zero length. This is sometimes used to byte-align
* subsets of the compressed data for random access or partial recovery.
*/
-static int stored(state *s)
+static int stored()
{
unsigned int len; /* length of stored block */
/* discard leftover bits from current byte (assumes s->bitcnt < 8) */
- s->bitbuf = 0;
- s->bitcnt = 0;
+ s.bitbuf = 0;
+ s.bitcnt = 0;
/* get length and check against its one's complement */
- if (s->incnt + 4 > s->inlen)
+ if (s.incnt + 4 > s.inlen)
return 2; /* not enough input */
- len = s->in[s->incnt++];
- len |= s->in[s->incnt++] << 8;
- if (s->in[s->incnt++] != (~len & $ff) ||
- s->in[s->incnt++] != ((~len >> 8) & $ff))
+ len = s.in[s.incnt++];
+ len |= s.in[s.incnt++] << 8;
+ if (s.in[s.incnt++] != (~len & $ff) ||
+ s.in[s.incnt++] != ((~len >> 8) & $ff))
return -2; /* didn't match complement! */
/* copy len bytes from in to out */
- if (s->incnt + len > s->inlen)
+ if (s.incnt + len > s.inlen)
return 2; /* not enough input */
- if (s->out != NULL) {
- if (s->outcnt + len > s->outlen)
+ if (s.out != NULL) {
+ if (s.outcnt + len > OUTBUFLEN)
return 1; /* not enough output space */
while (len--)
- s->out[s->outcnt++] = s->in[s->incnt++];
+ s.out[s.outcnt++] = s.in[s.incnt++];
}
else { /* just scanning */
- s->outcnt += len;
- s->incnt += len;
+ s.outcnt += len;
+ s.incnt += len;
}
/* done with a valid stored block */
@@ -176,7 +178,7 @@ struct huffman {
* - Incomplete codes are handled by this decoder, since they are permitted
* in the deflate format. See the format notes for fixed() and dynamic().
*/
-static int decode(state *s, huffman *h)
+static int decode(huffman *h)
{
int len; /* current number of bits in code */
int code; /* len bits being decoded */
@@ -186,7 +188,7 @@ static int decode(state *s, huffman *h)
code = (first = (index = 0));
for (len = 1; len <= MAXBITS; len++) {
- code |= bits(s, 1); /* get next bit */
+ code |= bits(1); /* get next bit */
count = h->count[len];
if (code - count < first) /* if length len, return symbol */
return h->symbol[index + (code - first)];
@@ -340,7 +342,7 @@ static short dext[30] = { /* Extra bits for distance codes 0..29 */
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13};
-static int codes(state *s, huffman *lencode, huffman *distcode)
+static int codes(huffman *lencode, huffman *distcode)
{
int symbol; /* decoded symbol */
int len; /* length for copy */
@@ -348,45 +350,45 @@ static int codes(state *s, huffman *lencode, huffman *distcode)
/* decode literals and length/distance pairs */
do {
- symbol = decode(s, lencode);
+ symbol = decode(lencode);
if (symbol < 0)
return symbol; /* invalid symbol */
if (symbol < 256) { /* literal: symbol is the byte */
/* write out the literal */
- if (s->out != NULL) {
- if (s->outcnt == s->outlen)
+ if (s.out != NULL) {
+ if (s.outcnt == OUTBUFLEN)
return 1;
- s->out[s->outcnt] = symbol;
+ s.out[s.outcnt] = symbol;
}
- s->outcnt++;
+ s.outcnt++;
}
else if (symbol > 256) { /* length */
/* get and compute length */
symbol -= 257;
if (symbol >= 29)
return -10; /* invalid fixed code */
- len = lens[symbol] + bits(s, lext[symbol]);
+ len = lens[symbol] + bits(lext[symbol]);
/* get and check distance */
- symbol = decode(s, distcode);
+ symbol = decode(distcode);
if (symbol < 0)
return symbol; /* invalid symbol */
- dist = dists[symbol] + bits(s, dext[symbol]);
- if (dist > s->outcnt)
+ dist = dists[symbol] + bits(dext[symbol]);
+ if (dist > s.outcnt)
return -11; /* distance too far back */
/* copy length bytes from distance bytes back */
- if (s->out != NULL) {
- if (s->outcnt + len > s->outlen)
+ if (s.out != NULL) {
+ if (s.outcnt + len > OUTBUFLEN)
return 1;
while (len--) {
- s->out[s->outcnt] = dist > s->outcnt ?
- 0 : s->out[s->outcnt - dist];
- s->outcnt++;
+ s.out[s.outcnt] = dist > s.outcnt ?
+ 0 : s.out[s.outcnt - dist];
+ s.outcnt++;
}
}
else
- s->outcnt += len;
+ s.outcnt += len;
}
} while (symbol != 256); /* end of block symbol */
@@ -424,7 +426,7 @@ static short lencnt[MAXBITS+1], lensym[FIXLCODES];
static short distcnt[MAXBITS+1], distsym[MAXDCODES];
static huffman lencode, distcode;
-static int fixed(state *s)
+static int fixed()
{
int symbol;
short lengths[FIXLCODES];
@@ -458,7 +460,7 @@ static int fixed(state *s)
}
/* decode data until end-of-block code */
- return codes(s, &lencode, &distcode);
+ return codes(&lencode, &distcode);
}
/*
@@ -550,7 +552,7 @@ static int fixed(state *s)
*/
static short order[19] = /* permutation of code length codes */
{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-static int dynamic(state *s)
+static int dynamic()
{
int nlen, ndist, ncode; /* number of lengths in descriptor */
int index; /* index of lengths[] */
@@ -569,15 +571,15 @@ static int dynamic(state *s)
distcode.symbol = distsym;
/* get number of lengths in each table, check lengths */
- nlen = bits(s, 5) + 257;
- ndist = bits(s, 5) + 1;
- ncode = bits(s, 4) + 4;
+ nlen = bits(5) + 257;
+ ndist = bits(5) + 1;
+ ncode = bits(4) + 4;
if (nlen > MAXLCODES || ndist > MAXDCODES)
return -3; /* bad counts */
/* read code length code lengths (really), missing lengths are zero */
for (index = 0; index < ncode; index++)
- lengths[order[index]] = bits(s, 3);
+ lengths[order[index]] = bits(3);
for (; index < 19; index++)
lengths[order[index]] = 0;
@@ -589,7 +591,7 @@ static int dynamic(state *s)
/* read length/literal and distance code length tables */
index = 0;
while (index < nlen + ndist) {
- symbol = decode(s, &lencode);
+ symbol = decode(&lencode);
if (symbol < 0)
return symbol; /* invalid symbol */
if (symbol < 16) /* length in 0..15 */
@@ -600,12 +602,12 @@ static int dynamic(state *s)
if (index == 0)
return -5; /* no last length! */
len = lengths[index - 1]; /* last length */
- symbol = 3 + bits(s, 2);
+ symbol = 3 + bits(2);
}
else if (symbol == 17) /* repeat zero 3..10 times */
- symbol = 3 + bits(s, 3);
+ symbol = 3 + bits(3);
else /* == 18, repeat zero 11..138 times */
- symbol = 11 + bits(s, 7);
+ symbol = 11 + bits(7);
if (index + symbol > nlen + ndist)
return -6; /* too many lengths! */
while (symbol--) /* repeat last or zero symbol times */
@@ -628,7 +630,7 @@ static int dynamic(state *s)
return -8; /* incomplete code ok only for single length 1 code */
/* decode data until end-of-block code */
- return codes(s, &lencode, &distcode);
+ return codes(&lencode, &distcode);
}
/*
@@ -675,34 +677,25 @@ static int dynamic(state *s)
* block (if it was a fixed or dynamic block) are undefined and have no
* expected values to check.
*/
-int puff(unsigned char *dest, /* pointer to destination pointer */
- unsigned int *destlen, /* amount of output space */
- unsigned char *source, /* pointer to source data pointer */
- unsigned int *sourcelen) /* amount of input available */
+int puff(unsigned char *source, unsigned int sourcelen)
{
- state s; /* input/output state */
int last, type; /* block information */
int err; /* return value */
- /* initialize output state */
- s.out = dest;
- s.outlen = *destlen; /* ignored if dest is NULL */
s.outcnt = 0;
-
- /* initialize input state */
s.in = source;
- s.inlen = *sourcelen;
+ s.inlen = sourcelen;
s.incnt = 0;
s.bitbuf = 0;
s.bitcnt = 0;
/* process blocks until last block or error */
do {
- last = bits(&s, 1); /* one if last block */
- type = bits(&s, 2); /* block type 0..3 */
- if (type == 0) err = stored(&s);
- else if (type == 1) err = fixed(&s);
- else if (type == 2) err = dynamic(&s);
+ last = bits(1); /* one if last block */
+ type = bits(2); /* block type 0..3 */
+ if (type == 0) err = stored();
+ else if (type == 1) err = fixed();
+ else if (type == 2) err = dynamic();
else err = -1;
if (err != 0)
break; /* return with error */
@@ -710,8 +703,7 @@ int puff(unsigned char *dest, /* pointer to destination pointer */
/* update the lengths and return */
if (err <= 0) {
- *destlen = s.outcnt;
- *sourcelen = s.incnt;
+ fwrite(s.out, s.outcnt, StdOut());
}
return err;
}
diff --git a/fs/ar/ungz.fs b/fs/ar/ungz.fs
@@ -11,13 +11,8 @@ cc<< /ar/puff.c
\ Creating huge buffers for gzip is ugly, but I just want to make it work.
\ Streaming is coming soon...
$1000 const INBUFSZ
-$4000 const OUTBUFSZ
create _inbuf INBUFSZ allot
-create _outbuf OUTBUFSZ allot
-create _inlen CELLSZ allot
-create _outlen CELLSZ allot
-." our addresses " _inbuf .x spc> _outbuf .x spc> _inlen .x spc> _outlen .x nl>
\ Take a gzip file from stdin and spit the deflated version on stdout
: ungz ( -- err )
stdin $1f = _assert stdin $8b = _assert \ ID1+ID2
@@ -33,7 +28,4 @@ create _outlen CELLSZ allot
r> $10 and if \ FLG.COMMENT
_skiptonull then
_inbuf StdIn IO :readall
- OUTBUFSZ _outlen !
- INBUFSZ _inlen !
- _outbuf _outlen _inbuf _outlen puff
- _outbuf _outlen @ StdOut IO :write ;
+ _inbuf INBUFSZ puff ;