Fossil SCM

fossil-scm / compat / zlib / contrib / puff / puff.c
Source Blame History 840 lines
7ef7284… drh 1 /*
7ef7284… drh 2 * puff.c
bb4776e… jan.nijtmans 3 * Copyright (C) 2002-2013 Mark Adler
7ef7284… drh 4 * For conditions of distribution and use, see copyright notice in puff.h
bb4776e… jan.nijtmans 5 * version 2.3, 21 Jan 2013
7ef7284… drh 6 *
7ef7284… drh 7 * puff.c is a simple inflate written to be an unambiguous way to specify the
7ef7284… drh 8 * deflate format. It is not written for speed but rather simplicity. As a
7ef7284… drh 9 * side benefit, this code might actually be useful when small code is more
7ef7284… drh 10 * important than speed, such as bootstrap applications. For typical deflate
7ef7284… drh 11 * data, zlib's inflate() is about four times as fast as puff(). zlib's
7ef7284… drh 12 * inflate compiles to around 20K on my machine, whereas puff.c compiles to
7ef7284… drh 13 * around 4K on my machine (a PowerPC using GNU cc). If the faster decode()
7ef7284… drh 14 * function here is used, then puff() is only twice as slow as zlib's
7ef7284… drh 15 * inflate().
7ef7284… drh 16 *
7ef7284… drh 17 * All dynamically allocated memory comes from the stack. The stack required
7ef7284… drh 18 * is less than 2K bytes. This code is compatible with 16-bit int's and
7ef7284… drh 19 * assumes that long's are at least 32 bits. puff.c uses the short data type,
e38d5e1… jan.nijtmans 20 * assumed to be 16 bits, for arrays in order to conserve memory. The code
7ef7284… drh 21 * works whether integers are stored big endian or little endian.
7ef7284… drh 22 *
7ef7284… drh 23 * In the comments below are "Format notes" that describe the inflate process
7ef7284… drh 24 * and document some of the less obvious aspects of the format. This source
7ef7284… drh 25 * code is meant to supplement RFC 1951, which formally describes the deflate
7ef7284… drh 26 * format:
7ef7284… drh 27 *
6ea30fb… florian 28 * https://datatracker.ietf.org/doc/html/rfc1951
7ef7284… drh 29 */
7ef7284… drh 30
7ef7284… drh 31 /*
7ef7284… drh 32 * Change history:
7ef7284… drh 33 *
7ef7284… drh 34 * 1.0 10 Feb 2002 - First version
7ef7284… drh 35 * 1.1 17 Feb 2002 - Clarifications of some comments and notes
7ef7284… drh 36 * - Update puff() dest and source pointers on negative
7ef7284… drh 37 * errors to facilitate debugging deflators
7ef7284… drh 38 * - Remove longest from struct huffman -- not needed
7ef7284… drh 39 * - Simplify offs[] index in construct()
7ef7284… drh 40 * - Add input size and checking, using longjmp() to
7ef7284… drh 41 * maintain easy readability
7ef7284… drh 42 * - Use short data type for large arrays
7ef7284… drh 43 * - Use pointers instead of long to specify source and
7ef7284… drh 44 * destination sizes to avoid arbitrary 4 GB limits
7ef7284… drh 45 * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!),
a9e589c… florian 46 * but leave simple version for readability
7ef7284… drh 47 * - Make sure invalid distances detected if pointers
7ef7284… drh 48 * are 16 bits
7ef7284… drh 49 * - Fix fixed codes table error
7ef7284… drh 50 * - Provide a scanning mode for determining size of
7ef7284… drh 51 * uncompressed data
7ef7284… drh 52 * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Gailly]
7ef7284… drh 53 * - Add a puff.h file for the interface
7ef7284… drh 54 * - Add braces in puff() for else do [Gailly]
7ef7284… drh 55 * - Use indexes instead of pointers for readability
7ef7284… drh 56 * 1.4 31 Mar 2002 - Simplify construct() code set check
7ef7284… drh 57 * - Fix some comments
7ef7284… drh 58 * - Add FIXLCODES #define
7ef7284… drh 59 * 1.5 6 Apr 2002 - Minor comment fixes
7ef7284… drh 60 * 1.6 7 Aug 2002 - Minor format changes
7ef7284… drh 61 * 1.7 3 Mar 2003 - Added test code for distribution
7ef7284… drh 62 * - Added zlib-like license
7ef7284… drh 63 * 1.8 9 Jan 2004 - Added some comments on no distance codes case
7ef7284… drh 64 * 1.9 21 Feb 2008 - Fix bug on 16-bit integer architectures [Pohland]
7ef7284… drh 65 * - Catch missing end-of-block symbol error
7ef7284… drh 66 * 2.0 25 Jul 2008 - Add #define to permit distance too far back
7ef7284… drh 67 * - Add option in TEST code for puff to write the data
7ef7284… drh 68 * - Add option in TEST code to skip input bytes
7ef7284… drh 69 * - Allow TEST code to read from piped stdin
7ef7284… drh 70 * 2.1 4 Apr 2010 - Avoid variable initialization for happier compilers
7ef7284… drh 71 * - Avoid unsigned comparisons for even happier compilers
7ef7284… drh 72 * 2.2 25 Apr 2010 - Fix bug in variable initializations [Oberhumer]
7ef7284… drh 73 * - Add const where appropriate [Oberhumer]
7ef7284… drh 74 * - Split if's and ?'s for coverage testing
7ef7284… drh 75 * - Break out test code to separate file
7ef7284… drh 76 * - Move NIL to puff.h
7ef7284… drh 77 * - Allow incomplete code only if single code length is 1
7ef7284… drh 78 * - Add full code coverage test to Makefile
bb4776e… jan.nijtmans 79 * 2.3 21 Jan 2013 - Check for invalid code length codes in dynamic blocks
7ef7284… drh 80 */
7ef7284… drh 81
7ef7284… drh 82 #include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */
7ef7284… drh 83 #include "puff.h" /* prototype for puff() */
7ef7284… drh 84
7ef7284… drh 85 #define local static /* for local function definitions */
7ef7284… drh 86
7ef7284… drh 87 /*
7ef7284… drh 88 * Maximums for allocations and loops. It is not useful to change these --
7ef7284… drh 89 * they are fixed by the deflate format.
7ef7284… drh 90 */
7ef7284… drh 91 #define MAXBITS 15 /* maximum bits in a code */
7ef7284… drh 92 #define MAXLCODES 286 /* maximum number of literal/length codes */
7ef7284… drh 93 #define MAXDCODES 30 /* maximum number of distance codes */
7ef7284… drh 94 #define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */
7ef7284… drh 95 #define FIXLCODES 288 /* number of fixed literal/length codes */
7ef7284… drh 96
7ef7284… drh 97 /* input and output state */
7ef7284… drh 98 struct state {
7ef7284… drh 99 /* output state */
7ef7284… drh 100 unsigned char *out; /* output buffer */
7ef7284… drh 101 unsigned long outlen; /* available space at out */
7ef7284… drh 102 unsigned long outcnt; /* bytes written to out so far */
7ef7284… drh 103
7ef7284… drh 104 /* input state */
7ef7284… drh 105 const unsigned char *in; /* input buffer */
7ef7284… drh 106 unsigned long inlen; /* available input at in */
7ef7284… drh 107 unsigned long incnt; /* bytes read so far */
7ef7284… drh 108 int bitbuf; /* bit buffer */
7ef7284… drh 109 int bitcnt; /* number of bits in bit buffer */
7ef7284… drh 110
7ef7284… drh 111 /* input limit error return state for bits() and decode() */
7ef7284… drh 112 jmp_buf env;
7ef7284… drh 113 };
7ef7284… drh 114
7ef7284… drh 115 /*
7ef7284… drh 116 * Return need bits from the input stream. This always leaves less than
7ef7284… drh 117 * eight bits in the buffer. bits() works properly for need == 0.
7ef7284… drh 118 *
7ef7284… drh 119 * Format notes:
7ef7284… drh 120 *
7ef7284… drh 121 * - Bits are stored in bytes from the least significant bit to the most
7ef7284… drh 122 * significant bit. Therefore bits are dropped from the bottom of the bit
7ef7284… drh 123 * buffer, using shift right, and new bytes are appended to the top of the
7ef7284… drh 124 * bit buffer, using shift left.
7ef7284… drh 125 */
7ef7284… drh 126 local int bits(struct state *s, int need)
7ef7284… drh 127 {
7ef7284… drh 128 long val; /* bit accumulator (can use up to 20 bits) */
7ef7284… drh 129
7ef7284… drh 130 /* load at least need bits into val */
7ef7284… drh 131 val = s->bitbuf;
7ef7284… drh 132 while (s->bitcnt < need) {
7ef7284… drh 133 if (s->incnt == s->inlen)
7ef7284… drh 134 longjmp(s->env, 1); /* out of input */
7ef7284… drh 135 val |= (long)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */
7ef7284… drh 136 s->bitcnt += 8;
7ef7284… drh 137 }
7ef7284… drh 138
7ef7284… drh 139 /* drop need bits and update buffer, always zero to seven bits left */
7ef7284… drh 140 s->bitbuf = (int)(val >> need);
7ef7284… drh 141 s->bitcnt -= need;
7ef7284… drh 142
7ef7284… drh 143 /* return need bits, zeroing the bits above that */
7ef7284… drh 144 return (int)(val & ((1L << need) - 1));
7ef7284… drh 145 }
7ef7284… drh 146
7ef7284… drh 147 /*
7ef7284… drh 148 * Process a stored block.
7ef7284… drh 149 *
7ef7284… drh 150 * Format notes:
7ef7284… drh 151 *
7ef7284… drh 152 * - After the two-bit stored block type (00), the stored block length and
7ef7284… drh 153 * stored bytes are byte-aligned for fast copying. Therefore any leftover
7ef7284… drh 154 * bits in the byte that has the last bit of the type, as many as seven, are
7ef7284… drh 155 * discarded. The value of the discarded bits are not defined and should not
7ef7284… drh 156 * be checked against any expectation.
7ef7284… drh 157 *
7ef7284… drh 158 * - The second inverted copy of the stored block length does not have to be
7ef7284… drh 159 * checked, but it's probably a good idea to do so anyway.
7ef7284… drh 160 *
7ef7284… drh 161 * - A stored block can have zero length. This is sometimes used to byte-align
7ef7284… drh 162 * subsets of the compressed data for random access or partial recovery.
7ef7284… drh 163 */
7ef7284… drh 164 local int stored(struct state *s)
7ef7284… drh 165 {
7ef7284… drh 166 unsigned len; /* length of stored block */
7ef7284… drh 167
7ef7284… drh 168 /* discard leftover bits from current byte (assumes s->bitcnt < 8) */
7ef7284… drh 169 s->bitbuf = 0;
7ef7284… drh 170 s->bitcnt = 0;
7ef7284… drh 171
7ef7284… drh 172 /* get length and check against its one's complement */
7ef7284… drh 173 if (s->incnt + 4 > s->inlen)
7ef7284… drh 174 return 2; /* not enough input */
7ef7284… drh 175 len = s->in[s->incnt++];
7ef7284… drh 176 len |= s->in[s->incnt++] << 8;
7ef7284… drh 177 if (s->in[s->incnt++] != (~len & 0xff) ||
7ef7284… drh 178 s->in[s->incnt++] != ((~len >> 8) & 0xff))
7ef7284… drh 179 return -2; /* didn't match complement! */
7ef7284… drh 180
7ef7284… drh 181 /* copy len bytes from in to out */
7ef7284… drh 182 if (s->incnt + len > s->inlen)
7ef7284… drh 183 return 2; /* not enough input */
7ef7284… drh 184 if (s->out != NIL) {
7ef7284… drh 185 if (s->outcnt + len > s->outlen)
7ef7284… drh 186 return 1; /* not enough output space */
7ef7284… drh 187 while (len--)
7ef7284… drh 188 s->out[s->outcnt++] = s->in[s->incnt++];
7ef7284… drh 189 }
7ef7284… drh 190 else { /* just scanning */
7ef7284… drh 191 s->outcnt += len;
7ef7284… drh 192 s->incnt += len;
7ef7284… drh 193 }
7ef7284… drh 194
7ef7284… drh 195 /* done with a valid stored block */
7ef7284… drh 196 return 0;
7ef7284… drh 197 }
7ef7284… drh 198
7ef7284… drh 199 /*
7ef7284… drh 200 * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of
7ef7284… drh 201 * each length, which for a canonical code are stepped through in order.
7ef7284… drh 202 * symbol[] are the symbol values in canonical order, where the number of
7ef7284… drh 203 * entries is the sum of the counts in count[]. The decoding process can be
7ef7284… drh 204 * seen in the function decode() below.
7ef7284… drh 205 */
7ef7284… drh 206 struct huffman {
7ef7284… drh 207 short *count; /* number of symbols of each length */
7ef7284… drh 208 short *symbol; /* canonically ordered symbols */
7ef7284… drh 209 };
7ef7284… drh 210
7ef7284… drh 211 /*
7ef7284… drh 212 * Decode a code from the stream s using huffman table h. Return the symbol or
7ef7284… drh 213 * a negative value if there is an error. If all of the lengths are zero, i.e.
7ef7284… drh 214 * an empty code, or if the code is incomplete and an invalid code is received,
7ef7284… drh 215 * then -10 is returned after reading MAXBITS bits.
7ef7284… drh 216 *
7ef7284… drh 217 * Format notes:
7ef7284… drh 218 *
7ef7284… drh 219 * - The codes as stored in the compressed data are bit-reversed relative to
7ef7284… drh 220 * a simple integer ordering of codes of the same lengths. Hence below the
7ef7284… drh 221 * bits are pulled from the compressed data one at a time and used to
7ef7284… drh 222 * build the code value reversed from what is in the stream in order to
7ef7284… drh 223 * permit simple integer comparisons for decoding. A table-based decoding
7ef7284… drh 224 * scheme (as used in zlib) does not need to do this reversal.
7ef7284… drh 225 *
7ef7284… drh 226 * - The first code for the shortest length is all zeros. Subsequent codes of
7ef7284… drh 227 * the same length are simply integer increments of the previous code. When
7ef7284… drh 228 * moving up a length, a zero bit is appended to the code. For a complete
7ef7284… drh 229 * code, the last code of the longest length will be all ones.
7ef7284… drh 230 *
7ef7284… drh 231 * - Incomplete codes are handled by this decoder, since they are permitted
7ef7284… drh 232 * in the deflate format. See the format notes for fixed() and dynamic().
7ef7284… drh 233 */
7ef7284… drh 234 #ifdef SLOW
7ef7284… drh 235 local int decode(struct state *s, const struct huffman *h)
7ef7284… drh 236 {
7ef7284… drh 237 int len; /* current number of bits in code */
7ef7284… drh 238 int code; /* len bits being decoded */
7ef7284… drh 239 int first; /* first code of length len */
7ef7284… drh 240 int count; /* number of codes of length len */
7ef7284… drh 241 int index; /* index of first code of length len in symbol table */
7ef7284… drh 242
7ef7284… drh 243 code = first = index = 0;
7ef7284… drh 244 for (len = 1; len <= MAXBITS; len++) {
7ef7284… drh 245 code |= bits(s, 1); /* get next bit */
7ef7284… drh 246 count = h->count[len];
7ef7284… drh 247 if (code - count < first) /* if length len, return symbol */
7ef7284… drh 248 return h->symbol[index + (code - first)];
7ef7284… drh 249 index += count; /* else update for next length */
7ef7284… drh 250 first += count;
7ef7284… drh 251 first <<= 1;
7ef7284… drh 252 code <<= 1;
7ef7284… drh 253 }
7ef7284… drh 254 return -10; /* ran out of codes */
7ef7284… drh 255 }
7ef7284… drh 256
7ef7284… drh 257 /*
7ef7284… drh 258 * A faster version of decode() for real applications of this code. It's not
7ef7284… drh 259 * as readable, but it makes puff() twice as fast. And it only makes the code
7ef7284… drh 260 * a few percent larger.
7ef7284… drh 261 */
7ef7284… drh 262 #else /* !SLOW */
7ef7284… drh 263 local int decode(struct state *s, const struct huffman *h)
7ef7284… drh 264 {
7ef7284… drh 265 int len; /* current number of bits in code */
7ef7284… drh 266 int code; /* len bits being decoded */
7ef7284… drh 267 int first; /* first code of length len */
7ef7284… drh 268 int count; /* number of codes of length len */
7ef7284… drh 269 int index; /* index of first code of length len in symbol table */
7ef7284… drh 270 int bitbuf; /* bits from stream */
7ef7284… drh 271 int left; /* bits left in next or left to process */
7ef7284… drh 272 short *next; /* next number of codes */
7ef7284… drh 273
7ef7284… drh 274 bitbuf = s->bitbuf;
7ef7284… drh 275 left = s->bitcnt;
7ef7284… drh 276 code = first = index = 0;
7ef7284… drh 277 len = 1;
7ef7284… drh 278 next = h->count + 1;
7ef7284… drh 279 while (1) {
7ef7284… drh 280 while (left--) {
7ef7284… drh 281 code |= bitbuf & 1;
7ef7284… drh 282 bitbuf >>= 1;
7ef7284… drh 283 count = *next++;
7ef7284… drh 284 if (code - count < first) { /* if length len, return symbol */
7ef7284… drh 285 s->bitbuf = bitbuf;
7ef7284… drh 286 s->bitcnt = (s->bitcnt - len) & 7;
7ef7284… drh 287 return h->symbol[index + (code - first)];
7ef7284… drh 288 }
7ef7284… drh 289 index += count; /* else update for next length */
7ef7284… drh 290 first += count;
7ef7284… drh 291 first <<= 1;
7ef7284… drh 292 code <<= 1;
7ef7284… drh 293 len++;
7ef7284… drh 294 }
7ef7284… drh 295 left = (MAXBITS+1) - len;
7ef7284… drh 296 if (left == 0)
7ef7284… drh 297 break;
7ef7284… drh 298 if (s->incnt == s->inlen)
7ef7284… drh 299 longjmp(s->env, 1); /* out of input */
7ef7284… drh 300 bitbuf = s->in[s->incnt++];
7ef7284… drh 301 if (left > 8)
7ef7284… drh 302 left = 8;
7ef7284… drh 303 }
7ef7284… drh 304 return -10; /* ran out of codes */
7ef7284… drh 305 }
7ef7284… drh 306 #endif /* SLOW */
7ef7284… drh 307
7ef7284… drh 308 /*
7ef7284… drh 309 * Given the list of code lengths length[0..n-1] representing a canonical
7ef7284… drh 310 * Huffman code for n symbols, construct the tables required to decode those
7ef7284… drh 311 * codes. Those tables are the number of codes of each length, and the symbols
7ef7284… drh 312 * sorted by length, retaining their original order within each length. The
7ef7284… drh 313 * return value is zero for a complete code set, negative for an over-
7ef7284… drh 314 * subscribed code set, and positive for an incomplete code set. The tables
7ef7284… drh 315 * can be used if the return value is zero or positive, but they cannot be used
7ef7284… drh 316 * if the return value is negative. If the return value is zero, it is not
7ef7284… drh 317 * possible for decode() using that table to return an error--any stream of
7ef7284… drh 318 * enough bits will resolve to a symbol. If the return value is positive, then
7ef7284… drh 319 * it is possible for decode() using that table to return an error for received
7ef7284… drh 320 * codes past the end of the incomplete lengths.
7ef7284… drh 321 *
7ef7284… drh 322 * Not used by decode(), but used for error checking, h->count[0] is the number
7ef7284… drh 323 * of the n symbols not in the code. So n - h->count[0] is the number of
7ef7284… drh 324 * codes. This is useful for checking for incomplete codes that have more than
7ef7284… drh 325 * one symbol, which is an error in a dynamic block.
7ef7284… drh 326 *
7ef7284… drh 327 * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS
7ef7284… drh 328 * This is assured by the construction of the length arrays in dynamic() and
7ef7284… drh 329 * fixed() and is not verified by construct().
7ef7284… drh 330 *
7ef7284… drh 331 * Format notes:
7ef7284… drh 332 *
7ef7284… drh 333 * - Permitted and expected examples of incomplete codes are one of the fixed
7ef7284… drh 334 * codes and any code with a single symbol which in deflate is coded as one
7ef7284… drh 335 * bit instead of zero bits. See the format notes for fixed() and dynamic().
7ef7284… drh 336 *
7ef7284… drh 337 * - Within a given code length, the symbols are kept in ascending order for
7ef7284… drh 338 * the code bits definition.
7ef7284… drh 339 */
7ef7284… drh 340 local int construct(struct huffman *h, const short *length, int n)
7ef7284… drh 341 {
7ef7284… drh 342 int symbol; /* current symbol when stepping through length[] */
7ef7284… drh 343 int len; /* current length when stepping through h->count[] */
7ef7284… drh 344 int left; /* number of possible codes left of current length */
7ef7284… drh 345 short offs[MAXBITS+1]; /* offsets in symbol table for each length */
7ef7284… drh 346
7ef7284… drh 347 /* count number of codes of each length */
7ef7284… drh 348 for (len = 0; len <= MAXBITS; len++)
7ef7284… drh 349 h->count[len] = 0;
7ef7284… drh 350 for (symbol = 0; symbol < n; symbol++)
7ef7284… drh 351 (h->count[length[symbol]])++; /* assumes lengths are within bounds */
7ef7284… drh 352 if (h->count[0] == n) /* no codes! */
7ef7284… drh 353 return 0; /* complete, but decode() will fail */
7ef7284… drh 354
7ef7284… drh 355 /* check for an over-subscribed or incomplete set of lengths */
7ef7284… drh 356 left = 1; /* one possible code of zero length */
7ef7284… drh 357 for (len = 1; len <= MAXBITS; len++) {
7ef7284… drh 358 left <<= 1; /* one more bit, double codes left */
7ef7284… drh 359 left -= h->count[len]; /* deduct count from possible codes */
7ef7284… drh 360 if (left < 0)
7ef7284… drh 361 return left; /* over-subscribed--return negative */
7ef7284… drh 362 } /* left > 0 means incomplete */
7ef7284… drh 363
7ef7284… drh 364 /* generate offsets into symbol table for each length for sorting */
7ef7284… drh 365 offs[1] = 0;
7ef7284… drh 366 for (len = 1; len < MAXBITS; len++)
7ef7284… drh 367 offs[len + 1] = offs[len] + h->count[len];
7ef7284… drh 368
7ef7284… drh 369 /*
7ef7284… drh 370 * put symbols in table sorted by length, by symbol order within each
7ef7284… drh 371 * length
7ef7284… drh 372 */
7ef7284… drh 373 for (symbol = 0; symbol < n; symbol++)
7ef7284… drh 374 if (length[symbol] != 0)
7ef7284… drh 375 h->symbol[offs[length[symbol]]++] = symbol;
7ef7284… drh 376
7ef7284… drh 377 /* return zero for complete set, positive for incomplete set */
7ef7284… drh 378 return left;
7ef7284… drh 379 }
7ef7284… drh 380
7ef7284… drh 381 /*
7ef7284… drh 382 * Decode literal/length and distance codes until an end-of-block code.
7ef7284… drh 383 *
7ef7284… drh 384 * Format notes:
7ef7284… drh 385 *
7ef7284… drh 386 * - Compressed data that is after the block type if fixed or after the code
7ef7284… drh 387 * description if dynamic is a combination of literals and length/distance
7ef7284… drh 388 * pairs terminated by and end-of-block code. Literals are simply Huffman
7ef7284… drh 389 * coded bytes. A length/distance pair is a coded length followed by a
7ef7284… drh 390 * coded distance to represent a string that occurs earlier in the
7ef7284… drh 391 * uncompressed data that occurs again at the current location.
7ef7284… drh 392 *
7ef7284… drh 393 * - Literals, lengths, and the end-of-block code are combined into a single
7ef7284… drh 394 * code of up to 286 symbols. They are 256 literals (0..255), 29 length
7ef7284… drh 395 * symbols (257..285), and the end-of-block symbol (256).
7ef7284… drh 396 *
7ef7284… drh 397 * - There are 256 possible lengths (3..258), and so 29 symbols are not enough
7ef7284… drh 398 * to represent all of those. Lengths 3..10 and 258 are in fact represented
7ef7284… drh 399 * by just a length symbol. Lengths 11..257 are represented as a symbol and
7ef7284… drh 400 * some number of extra bits that are added as an integer to the base length
7ef7284… drh 401 * of the length symbol. The number of extra bits is determined by the base
7ef7284… drh 402 * length symbol. These are in the static arrays below, lens[] for the base
7ef7284… drh 403 * lengths and lext[] for the corresponding number of extra bits.
7ef7284… drh 404 *
7ef7284… drh 405 * - The reason that 258 gets its own symbol is that the longest length is used
7ef7284… drh 406 * often in highly redundant files. Note that 258 can also be coded as the
7ef7284… drh 407 * base value 227 plus the maximum extra value of 31. While a good deflate
7ef7284… drh 408 * should never do this, it is not an error, and should be decoded properly.
7ef7284… drh 409 *
7ef7284… drh 410 * - If a length is decoded, including its extra bits if any, then it is
7ef7284… drh 411 * followed a distance code. There are up to 30 distance symbols. Again
7ef7284… drh 412 * there are many more possible distances (1..32768), so extra bits are added
7ef7284… drh 413 * to a base value represented by the symbol. The distances 1..4 get their
7ef7284… drh 414 * own symbol, but the rest require extra bits. The base distances and
7ef7284… drh 415 * corresponding number of extra bits are below in the static arrays dist[]
7ef7284… drh 416 * and dext[].
7ef7284… drh 417 *
7ef7284… drh 418 * - Literal bytes are simply written to the output. A length/distance pair is
7ef7284… drh 419 * an instruction to copy previously uncompressed bytes to the output. The
7ef7284… drh 420 * copy is from distance bytes back in the output stream, copying for length
7ef7284… drh 421 * bytes.
7ef7284… drh 422 *
7ef7284… drh 423 * - Distances pointing before the beginning of the output data are not
7ef7284… drh 424 * permitted.
7ef7284… drh 425 *
7ef7284… drh 426 * - Overlapped copies, where the length is greater than the distance, are
7ef7284… drh 427 * allowed and common. For example, a distance of one and a length of 258
7ef7284… drh 428 * simply copies the last byte 258 times. A distance of four and a length of
7ef7284… drh 429 * twelve copies the last four bytes three times. A simple forward copy
7ef7284… drh 430 * ignoring whether the length is greater than the distance or not implements
7ef7284… drh 431 * this correctly. You should not use memcpy() since its behavior is not
7ef7284… drh 432 * defined for overlapped arrays. You should not use memmove() or bcopy()
7ef7284… drh 433 * since though their behavior -is- defined for overlapping arrays, it is
7ef7284… drh 434 * defined to do the wrong thing in this case.
7ef7284… drh 435 */
7ef7284… drh 436 local int codes(struct state *s,
7ef7284… drh 437 const struct huffman *lencode,
7ef7284… drh 438 const struct huffman *distcode)
7ef7284… drh 439 {
7ef7284… drh 440 int symbol; /* decoded symbol */
7ef7284… drh 441 int len; /* length for copy */
7ef7284… drh 442 unsigned dist; /* distance for copy */
7ef7284… drh 443 static const short lens[29] = { /* Size base for length codes 257..285 */
7ef7284… drh 444 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
7ef7284… drh 445 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
7ef7284… drh 446 static const short lext[29] = { /* Extra bits for length codes 257..285 */
7ef7284… drh 447 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
7ef7284… drh 448 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
7ef7284… drh 449 static const short dists[30] = { /* Offset base for distance codes 0..29 */
7ef7284… drh 450 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
7ef7284… drh 451 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
7ef7284… drh 452 8193, 12289, 16385, 24577};
7ef7284… drh 453 static const short dext[30] = { /* Extra bits for distance codes 0..29 */
7ef7284… drh 454 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7ef7284… drh 455 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
7ef7284… drh 456 12, 12, 13, 13};
7ef7284… drh 457
7ef7284… drh 458 /* decode literals and length/distance pairs */
7ef7284… drh 459 do {
7ef7284… drh 460 symbol = decode(s, lencode);
7ef7284… drh 461 if (symbol < 0)
7ef7284… drh 462 return symbol; /* invalid symbol */
7ef7284… drh 463 if (symbol < 256) { /* literal: symbol is the byte */
7ef7284… drh 464 /* write out the literal */
7ef7284… drh 465 if (s->out != NIL) {
7ef7284… drh 466 if (s->outcnt == s->outlen)
7ef7284… drh 467 return 1;
7ef7284… drh 468 s->out[s->outcnt] = symbol;
7ef7284… drh 469 }
7ef7284… drh 470 s->outcnt++;
7ef7284… drh 471 }
7ef7284… drh 472 else if (symbol > 256) { /* length */
7ef7284… drh 473 /* get and compute length */
7ef7284… drh 474 symbol -= 257;
7ef7284… drh 475 if (symbol >= 29)
7ef7284… drh 476 return -10; /* invalid fixed code */
7ef7284… drh 477 len = lens[symbol] + bits(s, lext[symbol]);
7ef7284… drh 478
7ef7284… drh 479 /* get and check distance */
7ef7284… drh 480 symbol = decode(s, distcode);
7ef7284… drh 481 if (symbol < 0)
7ef7284… drh 482 return symbol; /* invalid symbol */
7ef7284… drh 483 dist = dists[symbol] + bits(s, dext[symbol]);
7ef7284… drh 484 #ifndef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
7ef7284… drh 485 if (dist > s->outcnt)
7ef7284… drh 486 return -11; /* distance too far back */
7ef7284… drh 487 #endif
7ef7284… drh 488
7ef7284… drh 489 /* copy length bytes from distance bytes back */
7ef7284… drh 490 if (s->out != NIL) {
7ef7284… drh 491 if (s->outcnt + len > s->outlen)
7ef7284… drh 492 return 1;
7ef7284… drh 493 while (len--) {
7ef7284… drh 494 s->out[s->outcnt] =
7ef7284… drh 495 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
7ef7284… drh 496 dist > s->outcnt ?
7ef7284… drh 497 0 :
7ef7284… drh 498 #endif
7ef7284… drh 499 s->out[s->outcnt - dist];
7ef7284… drh 500 s->outcnt++;
7ef7284… drh 501 }
7ef7284… drh 502 }
7ef7284… drh 503 else
7ef7284… drh 504 s->outcnt += len;
7ef7284… drh 505 }
7ef7284… drh 506 } while (symbol != 256); /* end of block symbol */
7ef7284… drh 507
7ef7284… drh 508 /* done with a valid fixed or dynamic block */
7ef7284… drh 509 return 0;
7ef7284… drh 510 }
7ef7284… drh 511
7ef7284… drh 512 /*
7ef7284… drh 513 * Process a fixed codes block.
7ef7284… drh 514 *
7ef7284… drh 515 * Format notes:
7ef7284… drh 516 *
7ef7284… drh 517 * - This block type can be useful for compressing small amounts of data for
7ef7284… drh 518 * which the size of the code descriptions in a dynamic block exceeds the
7ef7284… drh 519 * benefit of custom codes for that block. For fixed codes, no bits are
7ef7284… drh 520 * spent on code descriptions. Instead the code lengths for literal/length
7ef7284… drh 521 * codes and distance codes are fixed. The specific lengths for each symbol
7ef7284… drh 522 * can be seen in the "for" loops below.
7ef7284… drh 523 *
7ef7284… drh 524 * - The literal/length code is complete, but has two symbols that are invalid
7ef7284… drh 525 * and should result in an error if received. This cannot be implemented
7ef7284… drh 526 * simply as an incomplete code since those two symbols are in the "middle"
7ef7284… drh 527 * of the code. They are eight bits long and the longest literal/length\
7ef7284… drh 528 * code is nine bits. Therefore the code must be constructed with those
7ef7284… drh 529 * symbols, and the invalid symbols must be detected after decoding.
7ef7284… drh 530 *
7ef7284… drh 531 * - The fixed distance codes also have two invalid symbols that should result
7ef7284… drh 532 * in an error if received. Since all of the distance codes are the same
7ef7284… drh 533 * length, this can be implemented as an incomplete code. Then the invalid
7ef7284… drh 534 * codes are detected while decoding.
7ef7284… drh 535 */
7ef7284… drh 536 local int fixed(struct state *s)
7ef7284… drh 537 {
7ef7284… drh 538 static int virgin = 1;
7ef7284… drh 539 static short lencnt[MAXBITS+1], lensym[FIXLCODES];
7ef7284… drh 540 static short distcnt[MAXBITS+1], distsym[MAXDCODES];
7ef7284… drh 541 static struct huffman lencode, distcode;
7ef7284… drh 542
7ef7284… drh 543 /* build fixed huffman tables if first call (may not be thread safe) */
7ef7284… drh 544 if (virgin) {
7ef7284… drh 545 int symbol;
7ef7284… drh 546 short lengths[FIXLCODES];
7ef7284… drh 547
7ef7284… drh 548 /* construct lencode and distcode */
7ef7284… drh 549 lencode.count = lencnt;
7ef7284… drh 550 lencode.symbol = lensym;
7ef7284… drh 551 distcode.count = distcnt;
7ef7284… drh 552 distcode.symbol = distsym;
7ef7284… drh 553
7ef7284… drh 554 /* literal/length table */
7ef7284… drh 555 for (symbol = 0; symbol < 144; symbol++)
7ef7284… drh 556 lengths[symbol] = 8;
7ef7284… drh 557 for (; symbol < 256; symbol++)
7ef7284… drh 558 lengths[symbol] = 9;
7ef7284… drh 559 for (; symbol < 280; symbol++)
7ef7284… drh 560 lengths[symbol] = 7;
7ef7284… drh 561 for (; symbol < FIXLCODES; symbol++)
7ef7284… drh 562 lengths[symbol] = 8;
7ef7284… drh 563 construct(&lencode, lengths, FIXLCODES);
7ef7284… drh 564
7ef7284… drh 565 /* distance table */
7ef7284… drh 566 for (symbol = 0; symbol < MAXDCODES; symbol++)
7ef7284… drh 567 lengths[symbol] = 5;
7ef7284… drh 568 construct(&distcode, lengths, MAXDCODES);
7ef7284… drh 569
7ef7284… drh 570 /* do this just once */
7ef7284… drh 571 virgin = 0;
7ef7284… drh 572 }
7ef7284… drh 573
7ef7284… drh 574 /* decode data until end-of-block code */
7ef7284… drh 575 return codes(s, &lencode, &distcode);
7ef7284… drh 576 }
7ef7284… drh 577
7ef7284… drh 578 /*
7ef7284… drh 579 * Process a dynamic codes block.
7ef7284… drh 580 *
7ef7284… drh 581 * Format notes:
7ef7284… drh 582 *
7ef7284… drh 583 * - A dynamic block starts with a description of the literal/length and
7ef7284… drh 584 * distance codes for that block. New dynamic blocks allow the compressor to
7ef7284… drh 585 * rapidly adapt to changing data with new codes optimized for that data.
7ef7284… drh 586 *
7ef7284… drh 587 * - The codes used by the deflate format are "canonical", which means that
7ef7284… drh 588 * the actual bits of the codes are generated in an unambiguous way simply
7ef7284… drh 589 * from the number of bits in each code. Therefore the code descriptions
7ef7284… drh 590 * are simply a list of code lengths for each symbol.
7ef7284… drh 591 *
7ef7284… drh 592 * - The code lengths are stored in order for the symbols, so lengths are
7ef7284… drh 593 * provided for each of the literal/length symbols, and for each of the
7ef7284… drh 594 * distance symbols.
7ef7284… drh 595 *
64ce68d… drh 596 * - If a symbol is not used in the block, this is represented by a zero as the
64ce68d… drh 597 * code length. This does not mean a zero-length code, but rather that no
64ce68d… drh 598 * code should be created for this symbol. There is no way in the deflate
64ce68d… drh 599 * format to represent a zero-length code.
7ef7284… drh 600 *
7ef7284… drh 601 * - The maximum number of bits in a code is 15, so the possible lengths for
7ef7284… drh 602 * any code are 1..15.
7ef7284… drh 603 *
7ef7284… drh 604 * - The fact that a length of zero is not permitted for a code has an
7ef7284… drh 605 * interesting consequence. Normally if only one symbol is used for a given
7ef7284… drh 606 * code, then in fact that code could be represented with zero bits. However
7ef7284… drh 607 * in deflate, that code has to be at least one bit. So for example, if
7ef7284… drh 608 * only a single distance base symbol appears in a block, then it will be
7ef7284… drh 609 * represented by a single code of length one, in particular one 0 bit. This
7ef7284… drh 610 * is an incomplete code, since if a 1 bit is received, it has no meaning,
7ef7284… drh 611 * and should result in an error. So incomplete distance codes of one symbol
7ef7284… drh 612 * should be permitted, and the receipt of invalid codes should be handled.
7ef7284… drh 613 *
7ef7284… drh 614 * - It is also possible to have a single literal/length code, but that code
7ef7284… drh 615 * must be the end-of-block code, since every dynamic block has one. This
7ef7284… drh 616 * is not the most efficient way to create an empty block (an empty fixed
7ef7284… drh 617 * block is fewer bits), but it is allowed by the format. So incomplete
7ef7284… drh 618 * literal/length codes of one symbol should also be permitted.
7ef7284… drh 619 *
7ef7284… drh 620 * - If there are only literal codes and no lengths, then there are no distance
7ef7284… drh 621 * codes. This is represented by one distance code with zero bits.
7ef7284… drh 622 *
7ef7284… drh 623 * - The list of up to 286 length/literal lengths and up to 30 distance lengths
7ef7284… drh 624 * are themselves compressed using Huffman codes and run-length encoding. In
7ef7284… drh 625 * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means
7ef7284… drh 626 * that length, and the symbols 16, 17, and 18 are run-length instructions.
a9e589c… florian 627 * Each of 16, 17, and 18 are followed by extra bits to define the length of
7ef7284… drh 628 * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10
7ef7284… drh 629 * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols
7ef7284… drh 630 * are common, hence the special coding for zero lengths.
7ef7284… drh 631 *
7ef7284… drh 632 * - The symbols for 0..18 are Huffman coded, and so that code must be
7ef7284… drh 633 * described first. This is simply a sequence of up to 19 three-bit values
7ef7284… drh 634 * representing no code (0) or the code length for that symbol (1..7).
7ef7284… drh 635 *
7ef7284… drh 636 * - A dynamic block starts with three fixed-size counts from which is computed
7ef7284… drh 637 * the number of literal/length code lengths, the number of distance code
7ef7284… drh 638 * lengths, and the number of code length code lengths (ok, you come up with
7ef7284… drh 639 * a better name!) in the code descriptions. For the literal/length and
7ef7284… drh 640 * distance codes, lengths after those provided are considered zero, i.e. no
7ef7284… drh 641 * code. The code length code lengths are received in a permuted order (see
7ef7284… drh 642 * the order[] array below) to make a short code length code length list more
7ef7284… drh 643 * likely. As it turns out, very short and very long codes are less likely
7ef7284… drh 644 * to be seen in a dynamic code description, hence what may appear initially
7ef7284… drh 645 * to be a peculiar ordering.
7ef7284… drh 646 *
7ef7284… drh 647 * - Given the number of literal/length code lengths (nlen) and distance code
7ef7284… drh 648 * lengths (ndist), then they are treated as one long list of nlen + ndist
7ef7284… drh 649 * code lengths. Therefore run-length coding can and often does cross the
7ef7284… drh 650 * boundary between the two sets of lengths.
7ef7284… drh 651 *
7ef7284… drh 652 * - So to summarize, the code description at the start of a dynamic block is
7ef7284… drh 653 * three counts for the number of code lengths for the literal/length codes,
7ef7284… drh 654 * the distance codes, and the code length codes. This is followed by the
7ef7284… drh 655 * code length code lengths, three bits each. This is used to construct the
7ef7284… drh 656 * code length code which is used to read the remainder of the lengths. Then
7ef7284… drh 657 * the literal/length code lengths and distance lengths are read as a single
7ef7284… drh 658 * set of lengths using the code length codes. Codes are constructed from
7ef7284… drh 659 * the resulting two sets of lengths, and then finally you can start
7ef7284… drh 660 * decoding actual compressed data in the block.
7ef7284… drh 661 *
7ef7284… drh 662 * - For reference, a "typical" size for the code description in a dynamic
7ef7284… drh 663 * block is around 80 bytes.
7ef7284… drh 664 */
7ef7284… drh 665 local int dynamic(struct state *s)
7ef7284… drh 666 {
7ef7284… drh 667 int nlen, ndist, ncode; /* number of lengths in descriptor */
7ef7284… drh 668 int index; /* index of lengths[] */
7ef7284… drh 669 int err; /* construct() return value */
7ef7284… drh 670 short lengths[MAXCODES]; /* descriptor code lengths */
7ef7284… drh 671 short lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */
7ef7284… drh 672 short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */
7ef7284… drh 673 struct huffman lencode, distcode; /* length and distance codes */
7ef7284… drh 674 static const short order[19] = /* permutation of code length codes */
7ef7284… drh 675 {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
7ef7284… drh 676
7ef7284… drh 677 /* construct lencode and distcode */
7ef7284… drh 678 lencode.count = lencnt;
7ef7284… drh 679 lencode.symbol = lensym;
7ef7284… drh 680 distcode.count = distcnt;
7ef7284… drh 681 distcode.symbol = distsym;
7ef7284… drh 682
7ef7284… drh 683 /* get number of lengths in each table, check lengths */
7ef7284… drh 684 nlen = bits(s, 5) + 257;
7ef7284… drh 685 ndist = bits(s, 5) + 1;
7ef7284… drh 686 ncode = bits(s, 4) + 4;
7ef7284… drh 687 if (nlen > MAXLCODES || ndist > MAXDCODES)
7ef7284… drh 688 return -3; /* bad counts */
7ef7284… drh 689
7ef7284… drh 690 /* read code length code lengths (really), missing lengths are zero */
7ef7284… drh 691 for (index = 0; index < ncode; index++)
7ef7284… drh 692 lengths[order[index]] = bits(s, 3);
7ef7284… drh 693 for (; index < 19; index++)
7ef7284… drh 694 lengths[order[index]] = 0;
7ef7284… drh 695
7ef7284… drh 696 /* build huffman table for code lengths codes (use lencode temporarily) */
7ef7284… drh 697 err = construct(&lencode, lengths, 19);
7ef7284… drh 698 if (err != 0) /* require complete code set here */
7ef7284… drh 699 return -4;
7ef7284… drh 700
7ef7284… drh 701 /* read length/literal and distance code length tables */
7ef7284… drh 702 index = 0;
7ef7284… drh 703 while (index < nlen + ndist) {
7ef7284… drh 704 int symbol; /* decoded value */
7ef7284… drh 705 int len; /* last length to repeat */
7ef7284… drh 706
7ef7284… drh 707 symbol = decode(s, &lencode);
bb4776e… jan.nijtmans 708 if (symbol < 0)
bb4776e… jan.nijtmans 709 return symbol; /* invalid symbol */
7ef7284… drh 710 if (symbol < 16) /* length in 0..15 */
7ef7284… drh 711 lengths[index++] = symbol;
7ef7284… drh 712 else { /* repeat instruction */
7ef7284… drh 713 len = 0; /* assume repeating zeros */
7ef7284… drh 714 if (symbol == 16) { /* repeat last length 3..6 times */
7ef7284… drh 715 if (index == 0)
7ef7284… drh 716 return -5; /* no last length! */
7ef7284… drh 717 len = lengths[index - 1]; /* last length */
7ef7284… drh 718 symbol = 3 + bits(s, 2);
7ef7284… drh 719 }
7ef7284… drh 720 else if (symbol == 17) /* repeat zero 3..10 times */
7ef7284… drh 721 symbol = 3 + bits(s, 3);
7ef7284… drh 722 else /* == 18, repeat zero 11..138 times */
7ef7284… drh 723 symbol = 11 + bits(s, 7);
7ef7284… drh 724 if (index + symbol > nlen + ndist)
7ef7284… drh 725 return -6; /* too many lengths! */
7ef7284… drh 726 while (symbol--) /* repeat last or zero symbol times */
7ef7284… drh 727 lengths[index++] = len;
7ef7284… drh 728 }
7ef7284… drh 729 }
7ef7284… drh 730
7ef7284… drh 731 /* check for end-of-block code -- there better be one! */
7ef7284… drh 732 if (lengths[256] == 0)
7ef7284… drh 733 return -9;
7ef7284… drh 734
7ef7284… drh 735 /* build huffman table for literal/length codes */
7ef7284… drh 736 err = construct(&lencode, lengths, nlen);
7ef7284… drh 737 if (err && (err < 0 || nlen != lencode.count[0] + lencode.count[1]))
7ef7284… drh 738 return -7; /* incomplete code ok only for single length 1 code */
7ef7284… drh 739
7ef7284… drh 740 /* build huffman table for distance codes */
7ef7284… drh 741 err = construct(&distcode, lengths + nlen, ndist);
7ef7284… drh 742 if (err && (err < 0 || ndist != distcode.count[0] + distcode.count[1]))
7ef7284… drh 743 return -8; /* incomplete code ok only for single length 1 code */
7ef7284… drh 744
7ef7284… drh 745 /* decode data until end-of-block code */
7ef7284… drh 746 return codes(s, &lencode, &distcode);
7ef7284… drh 747 }
7ef7284… drh 748
7ef7284… drh 749 /*
7ef7284… drh 750 * Inflate source to dest. On return, destlen and sourcelen are updated to the
7ef7284… drh 751 * size of the uncompressed data and the size of the deflate data respectively.
7ef7284… drh 752 * On success, the return value of puff() is zero. If there is an error in the
7ef7284… drh 753 * source data, i.e. it is not in the deflate format, then a negative value is
7ef7284… drh 754 * returned. If there is not enough input available or there is not enough
7ef7284… drh 755 * output space, then a positive error is returned. In that case, destlen and
7ef7284… drh 756 * sourcelen are not updated to facilitate retrying from the beginning with the
7ef7284… drh 757 * provision of more input data or more output space. In the case of invalid
7ef7284… drh 758 * inflate data (a negative error), the dest and source pointers are updated to
7ef7284… drh 759 * facilitate the debugging of deflators.
7ef7284… drh 760 *
7ef7284… drh 761 * puff() also has a mode to determine the size of the uncompressed output with
7ef7284… drh 762 * no output written. For this dest must be (unsigned char *)0. In this case,
7ef7284… drh 763 * the input value of *destlen is ignored, and on return *destlen is set to the
7ef7284… drh 764 * size of the uncompressed output.
7ef7284… drh 765 *
7ef7284… drh 766 * The return codes are:
7ef7284… drh 767 *
7ef7284… drh 768 * 2: available inflate data did not terminate
7ef7284… drh 769 * 1: output space exhausted before completing inflate
7ef7284… drh 770 * 0: successful inflate
7ef7284… drh 771 * -1: invalid block type (type == 3)
7ef7284… drh 772 * -2: stored block length did not match one's complement
7ef7284… drh 773 * -3: dynamic block code description: too many length or distance codes
7ef7284… drh 774 * -4: dynamic block code description: code lengths codes incomplete
7ef7284… drh 775 * -5: dynamic block code description: repeat lengths with no first length
7ef7284… drh 776 * -6: dynamic block code description: repeat more than specified lengths
7ef7284… drh 777 * -7: dynamic block code description: invalid literal/length code lengths
7ef7284… drh 778 * -8: dynamic block code description: invalid distance code lengths
7ef7284… drh 779 * -9: dynamic block code description: missing end-of-block code
7ef7284… drh 780 * -10: invalid literal/length or distance code in fixed or dynamic block
7ef7284… drh 781 * -11: distance is too far back in fixed or dynamic block
7ef7284… drh 782 *
7ef7284… drh 783 * Format notes:
7ef7284… drh 784 *
7ef7284… drh 785 * - Three bits are read for each block to determine the kind of block and
7ef7284… drh 786 * whether or not it is the last block. Then the block is decoded and the
7ef7284… drh 787 * process repeated if it was not the last block.
7ef7284… drh 788 *
7ef7284… drh 789 * - The leftover bits in the last byte of the deflate data after the last
7ef7284… drh 790 * block (if it was a fixed or dynamic block) are undefined and have no
7ef7284… drh 791 * expected values to check.
7ef7284… drh 792 */
7ef7284… drh 793 int puff(unsigned char *dest, /* pointer to destination pointer */
7ef7284… drh 794 unsigned long *destlen, /* amount of output space */
7ef7284… drh 795 const unsigned char *source, /* pointer to source data pointer */
7ef7284… drh 796 unsigned long *sourcelen) /* amount of input available */
7ef7284… drh 797 {
7ef7284… drh 798 struct state s; /* input/output state */
7ef7284… drh 799 int last, type; /* block information */
7ef7284… drh 800 int err; /* return value */
7ef7284… drh 801
7ef7284… drh 802 /* initialize output state */
7ef7284… drh 803 s.out = dest;
7ef7284… drh 804 s.outlen = *destlen; /* ignored if dest is NIL */
7ef7284… drh 805 s.outcnt = 0;
7ef7284… drh 806
7ef7284… drh 807 /* initialize input state */
7ef7284… drh 808 s.in = source;
7ef7284… drh 809 s.inlen = *sourcelen;
7ef7284… drh 810 s.incnt = 0;
7ef7284… drh 811 s.bitbuf = 0;
7ef7284… drh 812 s.bitcnt = 0;
7ef7284… drh 813
7ef7284… drh 814 /* return if bits() or decode() tries to read past available input */
7ef7284… drh 815 if (setjmp(s.env) != 0) /* if came back here via longjmp() */
7ef7284… drh 816 err = 2; /* then skip do-loop, return error */
7ef7284… drh 817 else {
7ef7284… drh 818 /* process blocks until last block or error */
7ef7284… drh 819 do {
7ef7284… drh 820 last = bits(&s, 1); /* one if last block */
7ef7284… drh 821 type = bits(&s, 2); /* block type 0..3 */
7ef7284… drh 822 err = type == 0 ?
7ef7284… drh 823 stored(&s) :
7ef7284… drh 824 (type == 1 ?
7ef7284… drh 825 fixed(&s) :
7ef7284… drh 826 (type == 2 ?
7ef7284… drh 827 dynamic(&s) :
7ef7284… drh 828 -1)); /* type == 3, invalid */
7ef7284… drh 829 if (err != 0)
7ef7284… drh 830 break; /* return with error */
7ef7284… drh 831 } while (!last);
7ef7284… drh 832 }
7ef7284… drh 833
7ef7284… drh 834 /* update the lengths and return */
7ef7284… drh 835 if (err <= 0) {
7ef7284… drh 836 *destlen = s.outcnt;
7ef7284… drh 837 *sourcelen = s.incnt;
7ef7284… drh 838 }
7ef7284… drh 839 return err;
7ef7284… drh 840 }

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button