Fossil SCM

fossil-scm / compat / zlib / contrib / puff / puff.c

Blame History Raw 841 lines

1	`/*`
2	`* puff.c`
3	`* Copyright (C) 2002-2013 Mark Adler`
4	`* For conditions of distribution and use, see copyright notice in puff.h`
5	`* version 2.3, 21 Jan 2013`
6	`*`
7	`* puff.c is a simple inflate written to be an unambiguous way to specify the`
8	`* deflate format. It is not written for speed but rather simplicity. As a`
9	`* side benefit, this code might actually be useful when small code is more`
10	`* important than speed, such as bootstrap applications. For typical deflate`
11	`* data, zlib's inflate() is about four times as fast as puff(). zlib's`
12	`* inflate compiles to around 20K on my machine, whereas puff.c compiles to`
13	`* around 4K on my machine (a PowerPC using GNU cc). If the faster decode()`
14	`* function here is used, then puff() is only twice as slow as zlib's`
15	`* inflate().`
16	`*`
17	`* All dynamically allocated memory comes from the stack. The stack required`
18	`* is less than 2K bytes. This code is compatible with 16-bit int's and`
19	`* assumes that long's are at least 32 bits. puff.c uses the short data type,`
20	`* assumed to be 16 bits, for arrays in order to conserve memory. The code`
21	`* works whether integers are stored big endian or little endian.`
22	`*`
23	`* In the comments below are "Format notes" that describe the inflate process`
24	`* and document some of the less obvious aspects of the format. This source`
25	`* code is meant to supplement RFC 1951, which formally describes the deflate`
26	`* format:`
27	`*`
28	`* https://datatracker.ietf.org/doc/html/rfc1951`
29	`*/`
30
31	`/*`
32	`* Change history:`
33	`*`
34	`* 1.0 10 Feb 2002 - First version`
35	`* 1.1 17 Feb 2002 - Clarifications of some comments and notes`
36	`* - Update puff() dest and source pointers on negative`
37	`* errors to facilitate debugging deflators`
38	`* - Remove longest from struct huffman -- not needed`
39	`* - Simplify offs[] index in construct()`
40	`* - Add input size and checking, using longjmp() to`
41	`* maintain easy readability`
42	`* - Use short data type for large arrays`
43	`* - Use pointers instead of long to specify source and`
44	`* destination sizes to avoid arbitrary 4 GB limits`
45	`* 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!),`
46	`* but leave simple version for readability`
47	`* - Make sure invalid distances detected if pointers`
48	`* are 16 bits`
49	`* - Fix fixed codes table error`
50	`* - Provide a scanning mode for determining size of`
51	`* uncompressed data`
52	`* 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Gailly]`
53	`* - Add a puff.h file for the interface`
54	`* - Add braces in puff() for else do [Gailly]`
55	`* - Use indexes instead of pointers for readability`
56	`* 1.4 31 Mar 2002 - Simplify construct() code set check`
57	`* - Fix some comments`
58	`* - Add FIXLCODES #define`
59	`* 1.5 6 Apr 2002 - Minor comment fixes`
60	`* 1.6 7 Aug 2002 - Minor format changes`
61	`* 1.7 3 Mar 2003 - Added test code for distribution`
62	`* - Added zlib-like license`
63	`* 1.8 9 Jan 2004 - Added some comments on no distance codes case`
64	`* 1.9 21 Feb 2008 - Fix bug on 16-bit integer architectures [Pohland]`
65	`* - Catch missing end-of-block symbol error`
66	`* 2.0 25 Jul 2008 - Add #define to permit distance too far back`
67	`* - Add option in TEST code for puff to write the data`
68	`* - Add option in TEST code to skip input bytes`
69	`* - Allow TEST code to read from piped stdin`
70	`* 2.1 4 Apr 2010 - Avoid variable initialization for happier compilers`
71	`* - Avoid unsigned comparisons for even happier compilers`
72	`* 2.2 25 Apr 2010 - Fix bug in variable initializations [Oberhumer]`
73	`* - Add const where appropriate [Oberhumer]`
74	`* - Split if's and ?'s for coverage testing`
75	`* - Break out test code to separate file`
76	`* - Move NIL to puff.h`
77	`* - Allow incomplete code only if single code length is 1`
78	`* - Add full code coverage test to Makefile`
79	`* 2.3 21 Jan 2013 - Check for invalid code length codes in dynamic blocks`
80	`*/`
81
82	`#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */`
83	`#include "puff.h" /* prototype for puff() */`
84
85	`#define local static /* for local function definitions */`
86
87	`/*`
88	`* Maximums for allocations and loops. It is not useful to change these --`
89	`* they are fixed by the deflate format.`
90	`*/`
91	`#define MAXBITS 15 /* maximum bits in a code */`
92	`#define MAXLCODES 286 /* maximum number of literal/length codes */`
93	`#define MAXDCODES 30 /* maximum number of distance codes */`
94	`#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */`
95	`#define FIXLCODES 288 /* number of fixed literal/length codes */`
96
97	`/* input and output state */`
98	`struct state {`
99	`/* output state */`
100	`unsigned char out; / output buffer */`
101	`unsigned long outlen; /* available space at out */`
102	`unsigned long outcnt; /* bytes written to out so far */`
103
104	`/* input state */`
105	`const unsigned char in; / input buffer */`
106	`unsigned long inlen; /* available input at in */`
107	`unsigned long incnt; /* bytes read so far */`
108	`int bitbuf; /* bit buffer */`
109	`int bitcnt; /* number of bits in bit buffer */`
110
111	`/* input limit error return state for bits() and decode() */`
112	`jmp_buf env;`
113	`};`
114
115	`/*`
116	`* Return need bits from the input stream. This always leaves less than`
117	`* eight bits in the buffer. bits() works properly for need == 0.`
118	`*`
119	`* Format notes:`
120	`*`
121	`* - Bits are stored in bytes from the least significant bit to the most`
122	`* significant bit. Therefore bits are dropped from the bottom of the bit`
123	`* buffer, using shift right, and new bytes are appended to the top of the`
124	`* bit buffer, using shift left.`
125	`*/`
126	`local int bits(struct state *s, int need)`
127	`{`
128	`long val; /* bit accumulator (can use up to 20 bits) */`
129
130	`/* load at least need bits into val */`
131	`val = s->bitbuf;`
132	`while (s->bitcnt < need) {`
133	`if (s->incnt == s->inlen)`
134	`longjmp(s->env, 1); /* out of input */`
135	`val \|= (long)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */`
136	`s->bitcnt += 8;`
137	`}`
138
139	`/* drop need bits and update buffer, always zero to seven bits left */`
140	`s->bitbuf = (int)(val >> need);`
141	`s->bitcnt -= need;`
142
143	`/* return need bits, zeroing the bits above that */`
144	`return (int)(val & ((1L << need) - 1));`
145	`}`
146
147	`/*`
148	`* Process a stored block.`
149	`*`
150	`* Format notes:`
151	`*`
152	`* - After the two-bit stored block type (00), the stored block length and`
153	`* stored bytes are byte-aligned for fast copying. Therefore any leftover`
154	`* bits in the byte that has the last bit of the type, as many as seven, are`
155	`* discarded. The value of the discarded bits are not defined and should not`
156	`* be checked against any expectation.`
157	`*`
158	`* - The second inverted copy of the stored block length does not have to be`
159	`* checked, but it's probably a good idea to do so anyway.`
160	`*`
161	`* - A stored block can have zero length. This is sometimes used to byte-align`
162	`* subsets of the compressed data for random access or partial recovery.`
163	`*/`
164	`local int stored(struct state *s)`
165	`{`
166	`unsigned len; /* length of stored block */`
167
168	`/* discard leftover bits from current byte (assumes s->bitcnt < 8) */`
169	`s->bitbuf = 0;`
170	`s->bitcnt = 0;`
171
172	`/* get length and check against its one's complement */`
173	`if (s->incnt + 4 > s->inlen)`
174	`return 2; /* not enough input */`
175	`len = s->in[s->incnt++];`
176	`len \|= s->in[s->incnt++] << 8;`
177	`if (s->in[s->incnt++] != (~len & 0xff) \|\|`
178	`s->in[s->incnt++] != ((~len >> 8) & 0xff))`
179	`return -2; /* didn't match complement! */`
180
181	`/* copy len bytes from in to out */`
182	`if (s->incnt + len > s->inlen)`
183	`return 2; /* not enough input */`
184	`if (s->out != NIL) {`
185	`if (s->outcnt + len > s->outlen)`
186	`return 1; /* not enough output space */`
187	`while (len--)`
188	`s->out[s->outcnt++] = s->in[s->incnt++];`
189	`}`
190	`else { /* just scanning */`
191	`s->outcnt += len;`
192	`s->incnt += len;`
193	`}`
194
195	`/* done with a valid stored block */`
196	`return 0;`
197	`}`
198
199	`/*`
200	`* Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of`
201	`* each length, which for a canonical code are stepped through in order.`
202	`* symbol[] are the symbol values in canonical order, where the number of`
203	`* entries is the sum of the counts in count[]. The decoding process can be`
204	`* seen in the function decode() below.`
205	`*/`
206	`struct huffman {`
207	`short count; / number of symbols of each length */`
208	`short symbol; / canonically ordered symbols */`
209	`};`
210
211	`/*`
212	`* Decode a code from the stream s using huffman table h. Return the symbol or`
213	`* a negative value if there is an error. If all of the lengths are zero, i.e.`
214	`* an empty code, or if the code is incomplete and an invalid code is received,`
215	`* then -10 is returned after reading MAXBITS bits.`
216	`*`
217	`* Format notes:`
218	`*`
219	`* - The codes as stored in the compressed data are bit-reversed relative to`
220	`* a simple integer ordering of codes of the same lengths. Hence below the`
221	`* bits are pulled from the compressed data one at a time and used to`
222	`* build the code value reversed from what is in the stream in order to`
223	`* permit simple integer comparisons for decoding. A table-based decoding`
224	`* scheme (as used in zlib) does not need to do this reversal.`
225	`*`
226	`* - The first code for the shortest length is all zeros. Subsequent codes of`
227	`* the same length are simply integer increments of the previous code. When`
228	`* moving up a length, a zero bit is appended to the code. For a complete`
229	`* code, the last code of the longest length will be all ones.`
230	`*`
231	`* - Incomplete codes are handled by this decoder, since they are permitted`
232	`* in the deflate format. See the format notes for fixed() and dynamic().`
233	`*/`
234	`#ifdef SLOW`
235	`local int decode(struct state s, const struct huffman h)`
236	`{`
237	`int len; /* current number of bits in code */`
238	`int code; /* len bits being decoded */`
239	`int first; /* first code of length len */`
240	`int count; /* number of codes of length len */`
241	`int index; /* index of first code of length len in symbol table */`
242
243	`code = first = index = 0;`
244	`for (len = 1; len <= MAXBITS; len++) {`
245	`code \|= bits(s, 1); /* get next bit */`
246	`count = h->count[len];`
247	`if (code - count < first) /* if length len, return symbol */`
248	`return h->symbol[index + (code - first)];`
249	`index += count; /* else update for next length */`
250	`first += count;`
251	`first <<= 1;`
252	`code <<= 1;`
253	`}`
254	`return -10; /* ran out of codes */`
255	`}`
256
257	`/*`
258	`* A faster version of decode() for real applications of this code. It's not`
259	`* as readable, but it makes puff() twice as fast. And it only makes the code`
260	`* a few percent larger.`
261	`*/`
262	`#else /* !SLOW */`
263	`local int decode(struct state s, const struct huffman h)`
264	`{`
265	`int len; /* current number of bits in code */`
266	`int code; /* len bits being decoded */`
267	`int first; /* first code of length len */`
268	`int count; /* number of codes of length len */`
269	`int index; /* index of first code of length len in symbol table */`
270	`int bitbuf; /* bits from stream */`
271	`int left; /* bits left in next or left to process */`
272	`short next; / next number of codes */`
273
274	`bitbuf = s->bitbuf;`
275	`left = s->bitcnt;`
276	`code = first = index = 0;`
277	`len = 1;`
278	`next = h->count + 1;`
279	`while (1) {`
280	`while (left--) {`
281	`code \|= bitbuf & 1;`
282	`bitbuf >>= 1;`
283	`count = *next++;`
284	`if (code - count < first) { /* if length len, return symbol */`
285	`s->bitbuf = bitbuf;`
286	`s->bitcnt = (s->bitcnt - len) & 7;`
287	`return h->symbol[index + (code - first)];`
288	`}`
289	`index += count; /* else update for next length */`
290	`first += count;`
291	`first <<= 1;`
292	`code <<= 1;`
293	`len++;`
294	`}`
295	`left = (MAXBITS+1) - len;`
296	`if (left == 0)`
297	`break;`
298	`if (s->incnt == s->inlen)`
299	`longjmp(s->env, 1); /* out of input */`
300	`bitbuf = s->in[s->incnt++];`
301	`if (left > 8)`
302	`left = 8;`
303	`}`
304	`return -10; /* ran out of codes */`
305	`}`
306	`#endif /* SLOW */`
307
308	`/*`
309	`* Given the list of code lengths length[0..n-1] representing a canonical`
310	`* Huffman code for n symbols, construct the tables required to decode those`
311	`* codes. Those tables are the number of codes of each length, and the symbols`
312	`* sorted by length, retaining their original order within each length. The`
313	`* return value is zero for a complete code set, negative for an over-`
314	`* subscribed code set, and positive for an incomplete code set. The tables`
315	`* can be used if the return value is zero or positive, but they cannot be used`
316	`* if the return value is negative. If the return value is zero, it is not`
317	`* possible for decode() using that table to return an error--any stream of`
318	`* enough bits will resolve to a symbol. If the return value is positive, then`
319	`* it is possible for decode() using that table to return an error for received`
320	`* codes past the end of the incomplete lengths.`
321	`*`
322	`* Not used by decode(), but used for error checking, h->count[0] is the number`
323	`* of the n symbols not in the code. So n - h->count[0] is the number of`
324	`* codes. This is useful for checking for incomplete codes that have more than`
325	`* one symbol, which is an error in a dynamic block.`
326	`*`
327	`* Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS`
328	`* This is assured by the construction of the length arrays in dynamic() and`
329	`* fixed() and is not verified by construct().`
330	`*`
331	`* Format notes:`
332	`*`
333	`* - Permitted and expected examples of incomplete codes are one of the fixed`
334	`* codes and any code with a single symbol which in deflate is coded as one`
335	`* bit instead of zero bits. See the format notes for fixed() and dynamic().`
336	`*`
337	`* - Within a given code length, the symbols are kept in ascending order for`
338	`* the code bits definition.`
339	`*/`
340	`local int construct(struct huffman h, const short length, int n)`
341	`{`
342	`int symbol; /* current symbol when stepping through length[] */`
343	`int len; /* current length when stepping through h->count[] */`
344	`int left; /* number of possible codes left of current length */`
345	`short offs[MAXBITS+1]; /* offsets in symbol table for each length */`
346
347	`/* count number of codes of each length */`
348	`for (len = 0; len <= MAXBITS; len++)`
349	`h->count[len] = 0;`
350	`for (symbol = 0; symbol < n; symbol++)`
351	`(h->count[length[symbol]])++; /* assumes lengths are within bounds */`
352	`if (h->count[0] == n) /* no codes! */`
353	`return 0; /* complete, but decode() will fail */`
354
355	`/* check for an over-subscribed or incomplete set of lengths */`
356	`left = 1; /* one possible code of zero length */`
357	`for (len = 1; len <= MAXBITS; len++) {`
358	`left <<= 1; /* one more bit, double codes left */`
359	`left -= h->count[len]; /* deduct count from possible codes */`
360	`if (left < 0)`
361	`return left; /* over-subscribed--return negative */`
362	`} /* left > 0 means incomplete */`
363
364	`/* generate offsets into symbol table for each length for sorting */`
365	`offs[1] = 0;`
366	`for (len = 1; len < MAXBITS; len++)`
367	`offs[len + 1] = offs[len] + h->count[len];`
368
369	`/*`
370	`* put symbols in table sorted by length, by symbol order within each`
371	`* length`
372	`*/`
373	`for (symbol = 0; symbol < n; symbol++)`
374	`if (length[symbol] != 0)`
375	`h->symbol[offs[length[symbol]]++] = symbol;`
376
377	`/* return zero for complete set, positive for incomplete set */`
378	`return left;`
379	`}`
380
381	`/*`
382	`* Decode literal/length and distance codes until an end-of-block code.`
383	`*`
384	`* Format notes:`
385	`*`
386	`* - Compressed data that is after the block type if fixed or after the code`
387	`* description if dynamic is a combination of literals and length/distance`
388	`* pairs terminated by and end-of-block code. Literals are simply Huffman`
389	`* coded bytes. A length/distance pair is a coded length followed by a`
390	`* coded distance to represent a string that occurs earlier in the`
391	`* uncompressed data that occurs again at the current location.`
392	`*`
393	`* - Literals, lengths, and the end-of-block code are combined into a single`
394	`* code of up to 286 symbols. They are 256 literals (0..255), 29 length`
395	`* symbols (257..285), and the end-of-block symbol (256).`
396	`*`
397	`* - There are 256 possible lengths (3..258), and so 29 symbols are not enough`
398	`* to represent all of those. Lengths 3..10 and 258 are in fact represented`
399	`* by just a length symbol. Lengths 11..257 are represented as a symbol and`
400	`* some number of extra bits that are added as an integer to the base length`
401	`* of the length symbol. The number of extra bits is determined by the base`
402	`* length symbol. These are in the static arrays below, lens[] for the base`
403	`* lengths and lext[] for the corresponding number of extra bits.`
404	`*`
405	`* - The reason that 258 gets its own symbol is that the longest length is used`
406	`* often in highly redundant files. Note that 258 can also be coded as the`
407	`* base value 227 plus the maximum extra value of 31. While a good deflate`
408	`* should never do this, it is not an error, and should be decoded properly.`
409	`*`
410	`* - If a length is decoded, including its extra bits if any, then it is`
411	`* followed a distance code. There are up to 30 distance symbols. Again`
412	`* there are many more possible distances (1..32768), so extra bits are added`
413	`* to a base value represented by the symbol. The distances 1..4 get their`
414	`* own symbol, but the rest require extra bits. The base distances and`
415	`* corresponding number of extra bits are below in the static arrays dist[]`
416	`* and dext[].`
417	`*`
418	`* - Literal bytes are simply written to the output. A length/distance pair is`
419	`* an instruction to copy previously uncompressed bytes to the output. The`
420	`* copy is from distance bytes back in the output stream, copying for length`
421	`* bytes.`
422	`*`
423	`* - Distances pointing before the beginning of the output data are not`
424	`* permitted.`
425	`*`
426	`* - Overlapped copies, where the length is greater than the distance, are`
427	`* allowed and common. For example, a distance of one and a length of 258`
428	`* simply copies the last byte 258 times. A distance of four and a length of`
429	`* twelve copies the last four bytes three times. A simple forward copy`
430	`* ignoring whether the length is greater than the distance or not implements`
431	`* this correctly. You should not use memcpy() since its behavior is not`
432	`* defined for overlapped arrays. You should not use memmove() or bcopy()`
433	`* since though their behavior -is- defined for overlapping arrays, it is`
434	`* defined to do the wrong thing in this case.`
435	`*/`
436	`local int codes(struct state *s,`
437	`const struct huffman *lencode,`
438	`const struct huffman *distcode)`
439	`{`
440	`int symbol; /* decoded symbol */`
441	`int len; /* length for copy */`
442	`unsigned dist; /* distance for copy */`
443	`static const short lens[29] = { /* Size base for length codes 257..285 */`
444	`3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,`
445	`35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};`
446	`static const short lext[29] = { /* Extra bits for length codes 257..285 */`
447	`0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,`
448	`3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};`
449	`static const short dists[30] = { /* Offset base for distance codes 0..29 */`
450	`1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,`
451	`257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,`
452	`8193, 12289, 16385, 24577};`
453	`static const short dext[30] = { /* Extra bits for distance codes 0..29 */`
454	`0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,`
455	`7, 7, 8, 8, 9, 9, 10, 10, 11, 11,`
456	`12, 12, 13, 13};`
457
458	`/* decode literals and length/distance pairs */`
459	`do {`
460	`symbol = decode(s, lencode);`
461	`if (symbol < 0)`
462	`return symbol; /* invalid symbol */`
463	`if (symbol < 256) { /* literal: symbol is the byte */`
464	`/* write out the literal */`
465	`if (s->out != NIL) {`
466	`if (s->outcnt == s->outlen)`
467	`return 1;`
468	`s->out[s->outcnt] = symbol;`
469	`}`
470	`s->outcnt++;`
471	`}`
472	`else if (symbol > 256) { /* length */`
473	`/* get and compute length */`
474	`symbol -= 257;`
475	`if (symbol >= 29)`
476	`return -10; /* invalid fixed code */`
477	`len = lens[symbol] + bits(s, lext[symbol]);`
478
479	`/* get and check distance */`
480	`symbol = decode(s, distcode);`
481	`if (symbol < 0)`
482	`return symbol; /* invalid symbol */`
483	`dist = dists[symbol] + bits(s, dext[symbol]);`
484	`#ifndef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR`
485	`if (dist > s->outcnt)`
486	`return -11; /* distance too far back */`
487	`#endif`
488
489	`/* copy length bytes from distance bytes back */`
490	`if (s->out != NIL) {`
491	`if (s->outcnt + len > s->outlen)`
492	`return 1;`
493	`while (len--) {`
494	`s->out[s->outcnt] =`
495	`#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR`
496	`dist > s->outcnt ?`
497	`0 :`
498	`#endif`
499	`s->out[s->outcnt - dist];`
500	`s->outcnt++;`
501	`}`
502	`}`
503	`else`
504	`s->outcnt += len;`
505	`}`
506	`} while (symbol != 256); /* end of block symbol */`
507
508	`/* done with a valid fixed or dynamic block */`
509	`return 0;`
510	`}`
511
512	`/*`
513	`* Process a fixed codes block.`
514	`*`
515	`* Format notes:`
516	`*`
517	`* - This block type can be useful for compressing small amounts of data for`
518	`* which the size of the code descriptions in a dynamic block exceeds the`
519	`* benefit of custom codes for that block. For fixed codes, no bits are`
520	`* spent on code descriptions. Instead the code lengths for literal/length`
521	`* codes and distance codes are fixed. The specific lengths for each symbol`
522	`* can be seen in the "for" loops below.`
523	`*`
524	`* - The literal/length code is complete, but has two symbols that are invalid`
525	`* and should result in an error if received. This cannot be implemented`
526	`* simply as an incomplete code since those two symbols are in the "middle"`
527	`* of the code. They are eight bits long and the longest literal/length\`
528	`* code is nine bits. Therefore the code must be constructed with those`
529	`* symbols, and the invalid symbols must be detected after decoding.`
530	`*`
531	`* - The fixed distance codes also have two invalid symbols that should result`
532	`* in an error if received. Since all of the distance codes are the same`
533	`* length, this can be implemented as an incomplete code. Then the invalid`
534	`* codes are detected while decoding.`
535	`*/`
536	`local int fixed(struct state *s)`
537	`{`
538	`static int virgin = 1;`
539	`static short lencnt[MAXBITS+1], lensym[FIXLCODES];`
540	`static short distcnt[MAXBITS+1], distsym[MAXDCODES];`
541	`static struct huffman lencode, distcode;`
542
543	`/* build fixed huffman tables if first call (may not be thread safe) */`
544	`if (virgin) {`
545	`int symbol;`
546	`short lengths[FIXLCODES];`
547
548	`/* construct lencode and distcode */`
549	`lencode.count = lencnt;`
550	`lencode.symbol = lensym;`
551	`distcode.count = distcnt;`
552	`distcode.symbol = distsym;`
553
554	`/* literal/length table */`
555	`for (symbol = 0; symbol < 144; symbol++)`
556	`lengths[symbol] = 8;`
557	`for (; symbol < 256; symbol++)`
558	`lengths[symbol] = 9;`
559	`for (; symbol < 280; symbol++)`
560	`lengths[symbol] = 7;`
561	`for (; symbol < FIXLCODES; symbol++)`
562	`lengths[symbol] = 8;`
563	`construct(&lencode, lengths, FIXLCODES);`
564
565	`/* distance table */`
566	`for (symbol = 0; symbol < MAXDCODES; symbol++)`
567	`lengths[symbol] = 5;`
568	`construct(&distcode, lengths, MAXDCODES);`
569
570	`/* do this just once */`
571	`virgin = 0;`
572	`}`
573
574	`/* decode data until end-of-block code */`
575	`return codes(s, &lencode, &distcode);`
576	`}`
577
578	`/*`
579	`* Process a dynamic codes block.`
580	`*`
581	`* Format notes:`
582	`*`
583	`* - A dynamic block starts with a description of the literal/length and`
584	`* distance codes for that block. New dynamic blocks allow the compressor to`
585	`* rapidly adapt to changing data with new codes optimized for that data.`
586	`*`
587	`* - The codes used by the deflate format are "canonical", which means that`
588	`* the actual bits of the codes are generated in an unambiguous way simply`
589	`* from the number of bits in each code. Therefore the code descriptions`
590	`* are simply a list of code lengths for each symbol.`
591	`*`
592	`* - The code lengths are stored in order for the symbols, so lengths are`
593	`* provided for each of the literal/length symbols, and for each of the`
594	`* distance symbols.`
595	`*`
596	`* - If a symbol is not used in the block, this is represented by a zero as the`
597	`* code length. This does not mean a zero-length code, but rather that no`
598	`* code should be created for this symbol. There is no way in the deflate`
599	`* format to represent a zero-length code.`
600	`*`
601	`* - The maximum number of bits in a code is 15, so the possible lengths for`
602	`* any code are 1..15.`
603	`*`
604	`* - The fact that a length of zero is not permitted for a code has an`
605	`* interesting consequence. Normally if only one symbol is used for a given`
606	`* code, then in fact that code could be represented with zero bits. However`
607	`* in deflate, that code has to be at least one bit. So for example, if`
608	`* only a single distance base symbol appears in a block, then it will be`
609	`* represented by a single code of length one, in particular one 0 bit. This`
610	`* is an incomplete code, since if a 1 bit is received, it has no meaning,`
611	`* and should result in an error. So incomplete distance codes of one symbol`
612	`* should be permitted, and the receipt of invalid codes should be handled.`
613	`*`
614	`* - It is also possible to have a single literal/length code, but that code`
615	`* must be the end-of-block code, since every dynamic block has one. This`
616	`* is not the most efficient way to create an empty block (an empty fixed`
617	`* block is fewer bits), but it is allowed by the format. So incomplete`
618	`* literal/length codes of one symbol should also be permitted.`
619	`*`
620	`* - If there are only literal codes and no lengths, then there are no distance`
621	`* codes. This is represented by one distance code with zero bits.`
622	`*`
623	`* - The list of up to 286 length/literal lengths and up to 30 distance lengths`
624	`* are themselves compressed using Huffman codes and run-length encoding. In`
625	`* the list of code lengths, a 0 symbol means no code, a 1..15 symbol means`
626	`* that length, and the symbols 16, 17, and 18 are run-length instructions.`
627	`* Each of 16, 17, and 18 are followed by extra bits to define the length of`
628	`* the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10`
629	`* zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols`
630	`* are common, hence the special coding for zero lengths.`
631	`*`
632	`* - The symbols for 0..18 are Huffman coded, and so that code must be`
633	`* described first. This is simply a sequence of up to 19 three-bit values`
634	`* representing no code (0) or the code length for that symbol (1..7).`
635	`*`
636	`* - A dynamic block starts with three fixed-size counts from which is computed`
637	`* the number of literal/length code lengths, the number of distance code`
638	`* lengths, and the number of code length code lengths (ok, you come up with`
639	`* a better name!) in the code descriptions. For the literal/length and`
640	`* distance codes, lengths after those provided are considered zero, i.e. no`
641	`* code. The code length code lengths are received in a permuted order (see`
642	`* the order[] array below) to make a short code length code length list more`
643	`* likely. As it turns out, very short and very long codes are less likely`
644	`* to be seen in a dynamic code description, hence what may appear initially`
645	`* to be a peculiar ordering.`
646	`*`
647	`* - Given the number of literal/length code lengths (nlen) and distance code`
648	`* lengths (ndist), then they are treated as one long list of nlen + ndist`
649	`* code lengths. Therefore run-length coding can and often does cross the`
650	`* boundary between the two sets of lengths.`
651	`*`
652	`* - So to summarize, the code description at the start of a dynamic block is`
653	`* three counts for the number of code lengths for the literal/length codes,`
654	`* the distance codes, and the code length codes. This is followed by the`
655	`* code length code lengths, three bits each. This is used to construct the`
656	`* code length code which is used to read the remainder of the lengths. Then`
657	`* the literal/length code lengths and distance lengths are read as a single`
658	`* set of lengths using the code length codes. Codes are constructed from`
659	`* the resulting two sets of lengths, and then finally you can start`
660	`* decoding actual compressed data in the block.`
661	`*`
662	`* - For reference, a "typical" size for the code description in a dynamic`
663	`* block is around 80 bytes.`
664	`*/`
665	`local int dynamic(struct state *s)`
666	`{`
667	`int nlen, ndist, ncode; /* number of lengths in descriptor */`
668	`int index; /* index of lengths[] */`
669	`int err; /* construct() return value */`
670	`short lengths[MAXCODES]; /* descriptor code lengths */`
671	`short lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */`
672	`short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */`
673	`struct huffman lencode, distcode; /* length and distance codes */`
674	`static const short order[19] = /* permutation of code length codes */`
675	`{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};`
676
677	`/* construct lencode and distcode */`
678	`lencode.count = lencnt;`
679	`lencode.symbol = lensym;`
680	`distcode.count = distcnt;`
681	`distcode.symbol = distsym;`
682
683	`/* get number of lengths in each table, check lengths */`
684	`nlen = bits(s, 5) + 257;`
685	`ndist = bits(s, 5) + 1;`
686	`ncode = bits(s, 4) + 4;`
687	`if (nlen > MAXLCODES \|\| ndist > MAXDCODES)`
688	`return -3; /* bad counts */`
689
690	`/* read code length code lengths (really), missing lengths are zero */`
691	`for (index = 0; index < ncode; index++)`
692	`lengths[order[index]] = bits(s, 3);`
693	`for (; index < 19; index++)`
694	`lengths[order[index]] = 0;`
695
696	`/* build huffman table for code lengths codes (use lencode temporarily) */`
697	`err = construct(&lencode, lengths, 19);`
698	`if (err != 0) /* require complete code set here */`
699	`return -4;`
700
701	`/* read length/literal and distance code length tables */`
702	`index = 0;`
703	`while (index < nlen + ndist) {`
704	`int symbol; /* decoded value */`
705	`int len; /* last length to repeat */`
706
707	`symbol = decode(s, &lencode);`
708	`if (symbol < 0)`
709	`return symbol; /* invalid symbol */`
710	`if (symbol < 16) /* length in 0..15 */`
711	`lengths[index++] = symbol;`
712	`else { /* repeat instruction */`
713	`len = 0; /* assume repeating zeros */`
714	`if (symbol == 16) { /* repeat last length 3..6 times */`
715	`if (index == 0)`
716	`return -5; /* no last length! */`
717	`len = lengths[index - 1]; /* last length */`
718	`symbol = 3 + bits(s, 2);`
719	`}`
720	`else if (symbol == 17) /* repeat zero 3..10 times */`
721	`symbol = 3 + bits(s, 3);`
722	`else /* == 18, repeat zero 11..138 times */`
723	`symbol = 11 + bits(s, 7);`
724	`if (index + symbol > nlen + ndist)`
725	`return -6; /* too many lengths! */`
726	`while (symbol--) /* repeat last or zero symbol times */`
727	`lengths[index++] = len;`
728	`}`
729	`}`
730
731	`/* check for end-of-block code -- there better be one! */`
732	`if (lengths[256] == 0)`
733	`return -9;`
734
735	`/* build huffman table for literal/length codes */`
736	`err = construct(&lencode, lengths, nlen);`
737	`if (err && (err < 0 \|\| nlen != lencode.count[0] + lencode.count[1]))`
738	`return -7; /* incomplete code ok only for single length 1 code */`
739
740	`/* build huffman table for distance codes */`
741	`err = construct(&distcode, lengths + nlen, ndist);`
742	`if (err && (err < 0 \|\| ndist != distcode.count[0] + distcode.count[1]))`
743	`return -8; /* incomplete code ok only for single length 1 code */`
744
745	`/* decode data until end-of-block code */`
746	`return codes(s, &lencode, &distcode);`
747	`}`
748
749	`/*`
750	`* Inflate source to dest. On return, destlen and sourcelen are updated to the`
751	`* size of the uncompressed data and the size of the deflate data respectively.`
752	`* On success, the return value of puff() is zero. If there is an error in the`
753	`* source data, i.e. it is not in the deflate format, then a negative value is`
754	`* returned. If there is not enough input available or there is not enough`
755	`* output space, then a positive error is returned. In that case, destlen and`
756	`* sourcelen are not updated to facilitate retrying from the beginning with the`
757	`* provision of more input data or more output space. In the case of invalid`
758	`* inflate data (a negative error), the dest and source pointers are updated to`
759	`* facilitate the debugging of deflators.`
760	`*`
761	`* puff() also has a mode to determine the size of the uncompressed output with`
762	`* no output written. For this dest must be (unsigned char *)0. In this case,`
763	`* the input value of destlen is ignored, and on return destlen is set to the`
764	`* size of the uncompressed output.`
765	`*`
766	`* The return codes are:`
767	`*`
768	`* 2: available inflate data did not terminate`
769	`* 1: output space exhausted before completing inflate`
770	`* 0: successful inflate`
771	`* -1: invalid block type (type == 3)`
772	`* -2: stored block length did not match one's complement`
773	`* -3: dynamic block code description: too many length or distance codes`
774	`* -4: dynamic block code description: code lengths codes incomplete`
775	`* -5: dynamic block code description: repeat lengths with no first length`
776	`* -6: dynamic block code description: repeat more than specified lengths`
777	`* -7: dynamic block code description: invalid literal/length code lengths`
778	`* -8: dynamic block code description: invalid distance code lengths`
779	`* -9: dynamic block code description: missing end-of-block code`
780	`* -10: invalid literal/length or distance code in fixed or dynamic block`
781	`* -11: distance is too far back in fixed or dynamic block`
782	`*`
783	`* Format notes:`
784	`*`
785	`* - Three bits are read for each block to determine the kind of block and`
786	`* whether or not it is the last block. Then the block is decoded and the`
787	`* process repeated if it was not the last block.`
788	`*`
789	`* - The leftover bits in the last byte of the deflate data after the last`
790	`* block (if it was a fixed or dynamic block) are undefined and have no`
791	`* expected values to check.`
792	`*/`
793	`int puff(unsigned char dest, / pointer to destination pointer */`
794	`unsigned long destlen, / amount of output space */`
795	`const unsigned char source, / pointer to source data pointer */`
796	`unsigned long sourcelen) / amount of input available */`
797	`{`
798	`struct state s; /* input/output state */`
799	`int last, type; /* block information */`
800	`int err; /* return value */`
801
802	`/* initialize output state */`
803	`s.out = dest;`
804	`s.outlen = destlen; / ignored if dest is NIL */`
805	`s.outcnt = 0;`
806
807	`/* initialize input state */`
808	`s.in = source;`
809	`s.inlen = *sourcelen;`
810	`s.incnt = 0;`
811	`s.bitbuf = 0;`
812	`s.bitcnt = 0;`
813
814	`/* return if bits() or decode() tries to read past available input */`
815	`if (setjmp(s.env) != 0) /* if came back here via longjmp() */`
816	`err = 2; /* then skip do-loop, return error */`
817	`else {`
818	`/* process blocks until last block or error */`
819	`do {`
820	`last = bits(&s, 1); /* one if last block */`
821	`type = bits(&s, 2); /* block type 0..3 */`
822	`err = type == 0 ?`
823	`stored(&s) :`
824	`(type == 1 ?`
825	`fixed(&s) :`
826	`(type == 2 ?`
827	`dynamic(&s) :`
828	`-1)); /* type == 3, invalid */`
829	`if (err != 0)`
830	`break; /* return with error */`
831	`} while (!last);`
832	`}`
833
834	`/* update the lengths and return */`
835	`if (err <= 0) {`
836	`*destlen = s.outcnt;`
837	`*sourcelen = s.incnt;`
838	`}`
839	`return err;`
840	`}`
841

Fossil SCM

Keyboard Shortcuts