Fossil SCM

fossil-scm / compat / zlib / contrib / blast / blast.c

Blame History Raw 423 lines

1	`/* blast.c`
2	`* Copyright (C) 2003, 2012, 2013 Mark Adler`
3	`* For conditions of distribution and use, see copyright notice in blast.h`
4	`* version 1.3, 24 Aug 2013`
5	`*`
6	`* blast.c decompresses data compressed by the PKWare Compression Library.`
7	`* This function provides functionality similar to the explode() function of`
8	`* the PKWare library, hence the name "blast".`
9	`*`
10	`* This decompressor is based on the excellent format description provided by`
11	`* Ben Rudiak-Gould in comp.compression on August 13, 2001. Interestingly, the`
12	`* example Ben provided in the post is incorrect. The distance 110001 should`
13	`* instead be 111000. When corrected, the example byte stream becomes:`
14	`*`
15	`* 00 04 82 24 25 8f 80 7f`
16	`*`
17	`* which decompresses to "AIAIAIAIAIAIA" (without the quotes).`
18	`*/`
19
20	`/*`
21	`* Change history:`
22	`*`
23	`* 1.0 12 Feb 2003 - First version`
24	`* 1.1 16 Feb 2003 - Fixed distance check for > 4 GB uncompressed data`
25	`* 1.2 24 Oct 2012 - Add note about using binary mode in stdio`
26	`* - Fix comparisons of differently signed integers`
27	`* 1.3 24 Aug 2013 - Return unused input from blast()`
28	`* - Fix test code to correctly report unused input`
29	`* - Enable the provision of initial input to blast()`
30	`*/`
31
32	`#include <stddef.h> /* for NULL */`
33	`#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */`
34	`#include "blast.h" /* prototype for blast() */`
35
36	`#define MAXBITS 13 /* maximum code length */`
37	`#define MAXWIN 4096 /* maximum window size */`
38
39	`/* input and output state */`
40	`struct state {`
41	`/* input state */`
42	`blast_in infun; /* input function provided by user */`
43	`void inhow; / opaque information passed to infun() */`
44	`unsigned char in; / next input location */`
45	`unsigned left; /* available input at in */`
46	`int bitbuf; /* bit buffer */`
47	`int bitcnt; /* number of bits in bit buffer */`
48
49	`/* input limit error return state for bits() and decode() */`
50	`jmp_buf env;`
51
52	`/* output state */`
53	`blast_out outfun; /* output function provided by user */`
54	`void outhow; / opaque information passed to outfun() */`
55	`unsigned next; /* index of next write location in out[] */`
56	`int first; /* true to check distances (for first 4K) */`
57	`unsigned char out[MAXWIN]; /* output buffer and sliding window */`
58	`};`
59
60	`/*`
61	`* Return need bits from the input stream. This always leaves less than`
62	`* eight bits in the buffer. bits() works properly for need == 0.`
63	`*`
64	`* Format notes:`
65	`*`
66	`* - Bits are stored in bytes from the least significant bit to the most`
67	`* significant bit. Therefore bits are dropped from the bottom of the bit`
68	`* buffer, using shift right, and new bytes are appended to the top of the`
69	`* bit buffer, using shift left.`
70	`*/`
71	`local int bits(struct state *s, int need)`
72	`{`
73	`int val; /* bit accumulator */`
74
75	`/* load at least need bits into val */`
76	`val = s->bitbuf;`
77	`while (s->bitcnt < need) {`
78	`if (s->left == 0) {`
79	`s->left = s->infun(s->inhow, &(s->in));`
80	`if (s->left == 0) longjmp(s->env, 1); /* out of input */`
81	`}`
82	`val \|= (int)((s->in)++) << s->bitcnt; / load eight bits */`
83	`s->left--;`
84	`s->bitcnt += 8;`
85	`}`
86
87	`/* drop need bits and update buffer, always zero to seven bits left */`
88	`s->bitbuf = val >> need;`
89	`s->bitcnt -= need;`
90
91	`/* return need bits, zeroing the bits above that */`
92	`return val & ((1 << need) - 1);`
93	`}`
94
95	`/*`
96	`* Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of`
97	`* each length, which for a canonical code are stepped through in order.`
98	`* symbol[] are the symbol values in canonical order, where the number of`
99	`* entries is the sum of the counts in count[]. The decoding process can be`
100	`* seen in the function decode() below.`
101	`*/`
102	`struct huffman {`
103	`short count; / number of symbols of each length */`
104	`short symbol; / canonically ordered symbols */`
105	`};`
106
107	`/*`
108	`* Decode a code from the stream s using huffman table h. Return the symbol or`
109	`* a negative value if there is an error. If all of the lengths are zero, i.e.`
110	`* an empty code, or if the code is incomplete and an invalid code is received,`
111	`* then -9 is returned after reading MAXBITS bits.`
112	`*`
113	`* Format notes:`
114	`*`
115	`* - The codes as stored in the compressed data are bit-reversed relative to`
116	`* a simple integer ordering of codes of the same lengths. Hence below the`
117	`* bits are pulled from the compressed data one at a time and used to`
118	`* build the code value reversed from what is in the stream in order to`
119	`* permit simple integer comparisons for decoding.`
120	`*`
121	`* - The first code for the shortest length is all ones. Subsequent codes of`
122	`* the same length are simply integer decrements of the previous code. When`
123	`* moving up a length, a one bit is appended to the code. For a complete`
124	`* code, the last code of the longest length will be all zeros. To support`
125	`* this ordering, the bits pulled during decoding are inverted to apply the`
126	`* more "natural" ordering starting with all zeros and incrementing.`
127	`*/`
128	`local int decode(struct state s, struct huffman h)`
129	`{`
130	`int len; /* current number of bits in code */`
131	`int code; /* len bits being decoded */`
132	`int first; /* first code of length len */`
133	`int count; /* number of codes of length len */`
134	`int index; /* index of first code of length len in symbol table */`
135	`int bitbuf; /* bits from stream */`
136	`int left; /* bits left in next or left to process */`
137	`short next; / next number of codes */`
138
139	`bitbuf = s->bitbuf;`
140	`left = s->bitcnt;`
141	`code = first = index = 0;`
142	`len = 1;`
143	`next = h->count + 1;`
144	`while (1) {`
145	`while (left--) {`
146	`code \|= (bitbuf & 1) ^ 1; /* invert code */`
147	`bitbuf >>= 1;`
148	`count = *next++;`
149	`if (code < first + count) { /* if length len, return symbol */`
150	`s->bitbuf = bitbuf;`
151	`s->bitcnt = (s->bitcnt - len) & 7;`
152	`return h->symbol[index + (code - first)];`
153	`}`
154	`index += count; /* else update for next length */`
155	`first += count;`
156	`first <<= 1;`
157	`code <<= 1;`
158	`len++;`
159	`}`
160	`left = (MAXBITS+1) - len;`
161	`if (left == 0) break;`
162	`if (s->left == 0) {`
163	`s->left = s->infun(s->inhow, &(s->in));`
164	`if (s->left == 0) longjmp(s->env, 1); /* out of input */`
165	`}`
166	`bitbuf = *(s->in)++;`
167	`s->left--;`
168	`if (left > 8) left = 8;`
169	`}`
170	`return -9; /* ran out of codes */`
171	`}`
172
173	`/*`
174	`* Given a list of repeated code lengths rep[0..n-1], where each byte is a`
175	`* count (high four bits + 1) and a code length (low four bits), generate the`
176	`* list of code lengths. This compaction reduces the size of the object code.`
177	`* Then given the list of code lengths length[0..n-1] representing a canonical`
178	`* Huffman code for n symbols, construct the tables required to decode those`
179	`* codes. Those tables are the number of codes of each length, and the symbols`
180	`* sorted by length, retaining their original order within each length. The`
181	`* return value is zero for a complete code set, negative for an over-`
182	`* subscribed code set, and positive for an incomplete code set. The tables`
183	`* can be used if the return value is zero or positive, but they cannot be used`
184	`* if the return value is negative. If the return value is zero, it is not`
185	`* possible for decode() using that table to return an error--any stream of`
186	`* enough bits will resolve to a symbol. If the return value is positive, then`
187	`* it is possible for decode() using that table to return an error for received`
188	`* codes past the end of the incomplete lengths.`
189	`*/`
190	`local int construct(struct huffman h, const unsigned char rep, int n)`
191	`{`
192	`int symbol; /* current symbol when stepping through length[] */`
193	`int len; /* current length when stepping through h->count[] */`
194	`int left; /* number of possible codes left of current length */`
195	`short offs[MAXBITS+1]; /* offsets in symbol table for each length */`
196	`short length[256]; /* code lengths */`
197
198	`/* convert compact repeat counts into symbol bit length list */`
199	`symbol = 0;`
200	`do {`
201	`len = *rep++;`
202	`left = (len >> 4) + 1;`
203	`len &= 15;`
204	`do {`
205	`length[symbol++] = len;`
206	`} while (--left);`
207	`} while (--n);`
208	`n = symbol;`
209
210	`/* count number of codes of each length */`
211	`for (len = 0; len <= MAXBITS; len++)`
212	`h->count[len] = 0;`
213	`for (symbol = 0; symbol < n; symbol++)`
214	`(h->count[length[symbol]])++; /* assumes lengths are within bounds */`
215	`if (h->count[0] == n) /* no codes! */`
216	`return 0; /* complete, but decode() will fail */`
217
218	`/* check for an over-subscribed or incomplete set of lengths */`
219	`left = 1; /* one possible code of zero length */`
220	`for (len = 1; len <= MAXBITS; len++) {`
221	`left <<= 1; /* one more bit, double codes left */`
222	`left -= h->count[len]; /* deduct count from possible codes */`
223	`if (left < 0) return left; /* over-subscribed--return negative */`
224	`} /* left > 0 means incomplete */`
225
226	`/* generate offsets into symbol table for each length for sorting */`
227	`offs[1] = 0;`
228	`for (len = 1; len < MAXBITS; len++)`
229	`offs[len + 1] = offs[len] + h->count[len];`
230
231	`/*`
232	`* put symbols in table sorted by length, by symbol order within each`
233	`* length`
234	`*/`
235	`for (symbol = 0; symbol < n; symbol++)`
236	`if (length[symbol] != 0)`
237	`h->symbol[offs[length[symbol]]++] = symbol;`
238
239	`/* return zero for complete set, positive for incomplete set */`
240	`return left;`
241	`}`
242
243	`/*`
244	`* Decode PKWare Compression Library stream.`
245	`*`
246	`* Format notes:`
247	`*`
248	`* - First byte is 0 if literals are uncoded or 1 if they are coded. Second`
249	`* byte is 4, 5, or 6 for the number of extra bits in the distance code.`
250	`* This is the base-2 logarithm of the dictionary size minus six.`
251	`*`
252	`* - Compressed data is a combination of literals and length/distance pairs`
253	`* terminated by an end code. Literals are either Huffman coded or`
254	`* uncoded bytes. A length/distance pair is a coded length followed by a`
255	`* coded distance to represent a string that occurs earlier in the`
256	`* uncompressed data that occurs again at the current location.`
257	`*`
258	`* - A bit preceding a literal or length/distance pair indicates which comes`
259	`* next, 0 for literals, 1 for length/distance.`
260	`*`
261	`* - If literals are uncoded, then the next eight bits are the literal, in the`
262	`* normal bit order in the stream, i.e. no bit-reversal is needed. Similarly,`
263	`* no bit reversal is needed for either the length extra bits or the distance`
264	`* extra bits.`
265	`*`
266	`* - Literal bytes are simply written to the output. A length/distance pair is`
267	`* an instruction to copy previously uncompressed bytes to the output. The`
268	`* copy is from distance bytes back in the output stream, copying for length`
269	`* bytes.`
270	`*`
271	`* - Distances pointing before the beginning of the output data are not`
272	`* permitted.`
273	`*`
274	`* - Overlapped copies, where the length is greater than the distance, are`
275	`* allowed and common. For example, a distance of one and a length of 518`
276	`* simply copies the last byte 518 times. A distance of four and a length of`
277	`* twelve copies the last four bytes three times. A simple forward copy`
278	`* ignoring whether the length is greater than the distance or not implements`
279	`* this correctly.`
280	`*/`
281	`local int decomp(struct state *s)`
282	`{`
283	`int lit; /* true if literals are coded */`
284	`int dict; /* log2(dictionary size) - 6 */`
285	`int symbol; /* decoded symbol, extra bits for distance */`
286	`int len; /* length for copy */`
287	`unsigned dist; /* distance for copy */`
288	`int copy; /* copy counter */`
289	`unsigned char from, to; /* copy pointers */`
290	`static int virgin = 1; /* build tables once */`
291	`static short litcnt[MAXBITS+1], litsym[256]; /* litcode memory */`
292	`static short lencnt[MAXBITS+1], lensym[16]; /* lencode memory */`
293	`static short distcnt[MAXBITS+1], distsym[64]; /* distcode memory */`
294	`static struct huffman litcode = {litcnt, litsym}; /* length code */`
295	`static struct huffman lencode = {lencnt, lensym}; /* length code */`
296	`static struct huffman distcode = {distcnt, distsym};/* distance code */`
297	`/* bit lengths of literal codes */`
298	`static const unsigned char litlen[] = {`
299	`11, 124, 8, 7, 28, 7, 188, 13, 76, 4, 10, 8, 12, 10, 12, 10, 8, 23, 8,`
300	`9, 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, 12, 11, 7, 9, 11, 12, 6, 7, 22, 5,`
301	`7, 24, 6, 11, 9, 6, 7, 22, 7, 11, 38, 7, 9, 8, 25, 11, 8, 11, 9, 12,`
302	`8, 12, 5, 38, 5, 38, 5, 11, 7, 5, 6, 21, 6, 10, 53, 8, 7, 24, 10, 27,`
303	`44, 253, 253, 253, 252, 252, 252, 13, 12, 45, 12, 45, 12, 61, 12, 45,`
304	`44, 173};`
305	`/* bit lengths of length codes 0..15 */`
306	`static const unsigned char lenlen[] = {2, 35, 36, 53, 38, 23};`
307	`/* bit lengths of distance codes 0..63 */`
308	`static const unsigned char distlen[] = {2, 20, 53, 230, 247, 151, 248};`
309	`static const short base[16] = { /* base for length codes */`
310	`3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264};`
311	`static const char extra[16] = { /* extra bits for length codes */`
312	`0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8};`
313
314	`/* set up decoding tables (once--might not be thread-safe) */`
315	`if (virgin) {`
316	`construct(&litcode, litlen, sizeof(litlen));`
317	`construct(&lencode, lenlen, sizeof(lenlen));`
318	`construct(&distcode, distlen, sizeof(distlen));`
319	`virgin = 0;`
320	`}`
321
322	`/* read header */`
323	`lit = bits(s, 8);`
324	`if (lit > 1) return -1;`
325	`dict = bits(s, 8);`
326	`if (dict < 4 \|\| dict > 6) return -2;`
327
328	`/* decode literals and length/distance pairs */`
329	`do {`
330	`if (bits(s, 1)) {`
331	`/* get length */`
332	`symbol = decode(s, &lencode);`
333	`len = base[symbol] + bits(s, extra[symbol]);`
334	`if (len == 519) break; /* end code */`
335
336	`/* get distance */`
337	`symbol = len == 2 ? 2 : dict;`
338	`dist = decode(s, &distcode) << symbol;`
339	`dist += bits(s, symbol);`
340	`dist++;`
341	`if (s->first && dist > s->next)`
342	`return -3; /* distance too far back */`
343
344	`/* copy length bytes from distance bytes back */`
345	`do {`
346	`to = s->out + s->next;`
347	`from = to - dist;`
348	`copy = MAXWIN;`
349	`if (s->next < dist) {`
350	`from += copy;`
351	`copy = dist;`
352	`}`
353	`copy -= s->next;`
354	`if (copy > len) copy = len;`
355	`len -= copy;`
356	`s->next += copy;`
357	`do {`
358	`to++ = from++;`
359	`} while (--copy);`
360	`if (s->next == MAXWIN) {`
361	`if (s->outfun(s->outhow, s->out, s->next)) return 1;`
362	`s->next = 0;`
363	`s->first = 0;`
364	`}`
365	`} while (len != 0);`
366	`}`
367	`else {`
368	`/* get literal and write it */`
369	`symbol = lit ? decode(s, &litcode) : bits(s, 8);`
370	`s->out[s->next++] = symbol;`
371	`if (s->next == MAXWIN) {`
372	`if (s->outfun(s->outhow, s->out, s->next)) return 1;`
373	`s->next = 0;`
374	`s->first = 0;`
375	`}`
376	`}`
377	`} while (1);`
378	`return 0;`
379	`}`
380
381	`/* See comments in blast.h */`
382	`int blast(blast_in infun, void inhow, blast_out outfun, void outhow,`
383	`unsigned left, unsigned char *in)`
384	`{`
385	`struct state s; /* input/output state */`
386	`int err; /* return value */`
387
388	`/* initialize input state */`
389	`s.infun = infun;`
390	`s.inhow = inhow;`
391	`if (left != NULL && *left) {`
392	`s.left = *left;`
393	`s.in = *in;`
394	`}`
395	`else`
396	`s.left = 0;`
397	`s.bitbuf = 0;`
398	`s.bitcnt = 0;`
399
400	`/* initialize output state */`
401	`s.outfun = outfun;`
402	`s.outhow = outhow;`
403	`s.next = 0;`
404	`s.first = 1;`
405
406	`/* return if bits() or decode() tries to read past available input */`
407	`if (setjmp(s.env) != 0) /* if came back here via longjmp(), */`
408	`err = 2; /* then skip decomp(), return error */`
409	`else`
410	`err = decomp(&s); /* decompress */`
411
412	`/* return unused input */`
413	`if (left != NULL)`
414	`*left = s.left;`
415	`if (in != NULL)`
416	`*in = s.left ? s.in : NULL;`
417
418	`/* write any leftover output and update the error code if needed */`
419	`if (err != 1 && s.next && s.outfun(s.outhow, s.out, s.next) && err == 0)`
420	`err = 1;`
421	`return err;`
422	`}`
423

Fossil SCM

Keyboard Shortcuts