Fossil SCM

fossil-scm / compat / zlib / deflate.h
Blame History Raw 384 lines
1
/* deflate.h -- internal compression state
2
* Copyright (C) 1995-2026 Jean-loup Gailly
3
* For conditions of distribution and use, see copyright notice in zlib.h
4
*/
5
6
/* WARNING: this file should *not* be used by applications. It is
7
part of the implementation of the compression library and is
8
subject to change. Applications should only use zlib.h.
9
*/
10
11
/* @(#) $Id$ */
12
13
#ifndef DEFLATE_H
14
#define DEFLATE_H
15
16
#include "zutil.h"
17
18
/* define NO_GZIP when compiling if you want to disable gzip header and
19
trailer creation by deflate(). NO_GZIP would be used to avoid linking in
20
the crc code when it is not needed. For shared libraries, gzip encoding
21
should be left enabled. */
22
#ifndef NO_GZIP
23
# define GZIP
24
#endif
25
26
/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
27
the cost of a larger memory footprint */
28
/* #define LIT_MEM */
29
30
/* ===========================================================================
31
* Internal compression state.
32
*/
33
34
#define LENGTH_CODES 29
35
/* number of length codes, not counting the special END_BLOCK code */
36
37
#define LITERALS 256
38
/* number of literal bytes 0..255 */
39
40
#define L_CODES (LITERALS+1+LENGTH_CODES)
41
/* number of Literal or Length codes, including the END_BLOCK code */
42
43
#define D_CODES 30
44
/* number of distance codes */
45
46
#define BL_CODES 19
47
/* number of codes used to transfer the bit lengths */
48
49
#define HEAP_SIZE (2*L_CODES+1)
50
/* maximum heap size */
51
52
#define MAX_BITS 15
53
/* All codes must not exceed MAX_BITS bits */
54
55
#define Buf_size 16
56
/* size of bit buffer in bi_buf */
57
58
#define INIT_STATE 42 /* zlib header -> BUSY_STATE */
59
#ifdef GZIP
60
# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */
61
#endif
62
#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */
63
#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */
64
#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */
65
#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */
66
#define BUSY_STATE 113 /* deflate -> FINISH_STATE */
67
#define FINISH_STATE 666 /* stream complete */
68
/* Stream status */
69
70
71
/* Data structure describing a single value and its code string. */
72
typedef struct ct_data_s {
73
union {
74
ush freq; /* frequency count */
75
ush code; /* bit string */
76
} fc;
77
union {
78
ush dad; /* father node in Huffman tree */
79
ush len; /* length of bit string */
80
} dl;
81
} FAR ct_data;
82
83
#define Freq fc.freq
84
#define Code fc.code
85
#define Dad dl.dad
86
#define Len dl.len
87
88
typedef struct static_tree_desc_s static_tree_desc;
89
90
typedef struct tree_desc_s {
91
ct_data *dyn_tree; /* the dynamic tree */
92
int max_code; /* largest code with non zero frequency */
93
const static_tree_desc *stat_desc; /* the corresponding static tree */
94
} FAR tree_desc;
95
96
typedef ush Pos;
97
typedef Pos FAR Posf;
98
typedef unsigned IPos;
99
100
/* A Pos is an index in the character window. We use short instead of int to
101
* save space in the various tables. IPos is used only for parameter passing.
102
*/
103
104
typedef struct internal_state {
105
z_streamp strm; /* pointer back to this zlib stream */
106
int status; /* as the name implies */
107
Bytef *pending_buf; /* output still pending */
108
ulg pending_buf_size; /* size of pending_buf */
109
Bytef *pending_out; /* next pending byte to output to the stream */
110
ulg pending; /* nb of bytes in the pending buffer */
111
int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
112
gz_headerp gzhead; /* gzip header information to write */
113
ulg gzindex; /* where in extra, name, or comment */
114
Byte method; /* can only be DEFLATED */
115
int last_flush; /* value of flush param for previous deflate call */
116
117
/* used by deflate.c: */
118
119
uInt w_size; /* LZ77 window size (32K by default) */
120
uInt w_bits; /* log2(w_size) (8..16) */
121
uInt w_mask; /* w_size - 1 */
122
123
Bytef *window;
124
/* Sliding window. Input bytes are read into the second half of the window,
125
* and move to the first half later to keep a dictionary of at least wSize
126
* bytes. With this organization, matches are limited to a distance of
127
* wSize-MAX_MATCH bytes, but this ensures that IO is always
128
* performed with a length multiple of the block size. Also, it limits
129
* the window size to 64K, which is quite useful on MSDOS.
130
* To do: use the user input buffer as sliding window.
131
*/
132
133
ulg window_size;
134
/* Actual size of window: 2*wSize, except when the user input buffer
135
* is directly used as sliding window.
136
*/
137
138
Posf *prev;
139
/* Link to older string with same hash index. To limit the size of this
140
* array to 64K, this link is maintained only for the last 32K strings.
141
* An index in this array is thus a window index modulo 32K.
142
*/
143
144
Posf *head; /* Heads of the hash chains or NIL. */
145
146
uInt ins_h; /* hash index of string to be inserted */
147
uInt hash_size; /* number of elements in hash table */
148
uInt hash_bits; /* log2(hash_size) */
149
uInt hash_mask; /* hash_size-1 */
150
151
uInt hash_shift;
152
/* Number of bits by which ins_h must be shifted at each input
153
* step. It must be such that after MIN_MATCH steps, the oldest
154
* byte no longer takes part in the hash key, that is:
155
* hash_shift * MIN_MATCH >= hash_bits
156
*/
157
158
long block_start;
159
/* Window position at the beginning of the current output block. Gets
160
* negative when the window is moved backwards.
161
*/
162
163
uInt match_length; /* length of best match */
164
IPos prev_match; /* previous match */
165
int match_available; /* set if previous match exists */
166
uInt strstart; /* start of string to insert */
167
uInt match_start; /* start of matching string */
168
uInt lookahead; /* number of valid bytes ahead in window */
169
170
uInt prev_length;
171
/* Length of the best match at previous step. Matches not greater than this
172
* are discarded. This is used in the lazy match evaluation.
173
*/
174
175
uInt max_chain_length;
176
/* To speed up deflation, hash chains are never searched beyond this
177
* length. A higher limit improves compression ratio but degrades the
178
* speed.
179
*/
180
181
uInt max_lazy_match;
182
/* Attempt to find a better match only when the current match is strictly
183
* smaller than this value. This mechanism is used only for compression
184
* levels >= 4.
185
*/
186
# define max_insert_length max_lazy_match
187
/* Insert new strings in the hash table only if the match length is not
188
* greater than this length. This saves time but degrades compression.
189
* max_insert_length is used only for compression levels <= 3.
190
*/
191
192
int level; /* compression level (1..9) */
193
int strategy; /* favor or force Huffman coding*/
194
195
uInt good_match;
196
/* Use a faster search when the previous match is longer than this */
197
198
int nice_match; /* Stop searching when current match exceeds this */
199
200
/* used by trees.c: */
201
/* Didn't use ct_data typedef below to suppress compiler warning */
202
struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
203
struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
204
struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
205
206
struct tree_desc_s l_desc; /* desc. for literal tree */
207
struct tree_desc_s d_desc; /* desc. for distance tree */
208
struct tree_desc_s bl_desc; /* desc. for bit length tree */
209
210
ush bl_count[MAX_BITS+1];
211
/* number of codes at each bit length for an optimal tree */
212
213
int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
214
int heap_len; /* number of elements in the heap */
215
int heap_max; /* element of largest frequency */
216
/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
217
* The same heap array is used to build all trees.
218
*/
219
220
uch depth[2*L_CODES+1];
221
/* Depth of each subtree used as tie breaker for trees of equal frequency
222
*/
223
224
#ifdef LIT_MEM
225
# define LIT_BUFS 5
226
ushf *d_buf; /* buffer for distances */
227
uchf *l_buf; /* buffer for literals/lengths */
228
#else
229
# define LIT_BUFS 4
230
uchf *sym_buf; /* buffer for distances and literals/lengths */
231
#endif
232
233
uInt lit_bufsize;
234
/* Size of match buffer for literals/lengths. There are 4 reasons for
235
* limiting lit_bufsize to 64K:
236
* - frequencies can be kept in 16 bit counters
237
* - if compression is not successful for the first block, all input
238
* data is still in the window so we can still emit a stored block even
239
* when input comes from standard input. (This can also be done for
240
* all blocks if lit_bufsize is not greater than 32K.)
241
* - if compression is not successful for a file smaller than 64K, we can
242
* even emit a stored file instead of a stored block (saving 5 bytes).
243
* This is applicable only for zip (not gzip or zlib).
244
* - creating new Huffman trees less frequently may not provide fast
245
* adaptation to changes in the input data statistics. (Take for
246
* example a binary file with poorly compressible code followed by
247
* a highly compressible string table.) Smaller buffer sizes give
248
* fast adaptation but have of course the overhead of transmitting
249
* trees more frequently.
250
* - I can't count above 4
251
*/
252
253
uInt sym_next; /* running index in symbol buffer */
254
uInt sym_end; /* symbol table full when sym_next reaches this */
255
256
ulg opt_len; /* bit length of current block with optimal trees */
257
ulg static_len; /* bit length of current block with static trees */
258
uInt matches; /* number of string matches in current block */
259
uInt insert; /* bytes at end of window left to insert */
260
261
#ifdef ZLIB_DEBUG
262
ulg compressed_len; /* total bit length of compressed file mod 2^32 */
263
ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
264
#endif
265
266
ush bi_buf;
267
/* Output buffer. bits are inserted starting at the bottom (least
268
* significant bits).
269
*/
270
int bi_valid;
271
/* Number of valid bits in bi_buf. All bits above the last valid bit
272
* are always zero.
273
*/
274
int bi_used;
275
/* Last number of used bits when going to a byte boundary.
276
*/
277
278
ulg high_water;
279
/* High water mark offset in window for initialized bytes -- bytes above
280
* this are set to zero in order to avoid memory check warnings when
281
* longest match routines access bytes past the input. This is then
282
* updated to the new high water mark.
283
*/
284
285
int slid;
286
/* True if the hash table has been slid since it was cleared. */
287
288
} FAR deflate_state;
289
290
/* Output a byte on the stream.
291
* IN assertion: there is enough room in pending_buf.
292
*/
293
#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
294
295
296
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
297
/* Minimum amount of lookahead, except at the end of the input file.
298
* See deflate.c for comments about the MIN_MATCH+1.
299
*/
300
301
#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
302
/* In order to simplify the code, particularly on 16 bit machines, match
303
* distances are limited to MAX_DIST instead of WSIZE.
304
*/
305
306
#define WIN_INIT MAX_MATCH
307
/* Number of bytes after end of data in window to initialize in order to avoid
308
memory checker errors from longest match routines */
309
310
/* in trees.c */
311
void ZLIB_INTERNAL _tr_init(deflate_state *s);
312
int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
313
void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
314
ulg stored_len, int last);
315
void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
316
void ZLIB_INTERNAL _tr_align(deflate_state *s);
317
void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
318
ulg stored_len, int last);
319
320
#define d_code(dist) \
321
((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
322
/* Mapping from a distance to a distance code. dist is the distance - 1 and
323
* must not have side effects. _dist_code[256] and _dist_code[257] are never
324
* used.
325
*/
326
327
#ifndef ZLIB_DEBUG
328
/* Inline versions of _tr_tally for speed: */
329
330
#if defined(GEN_TREES_H) || !defined(STDC)
331
extern uch ZLIB_INTERNAL _length_code[];
332
extern uch ZLIB_INTERNAL _dist_code[];
333
#else
334
extern const uch ZLIB_INTERNAL _length_code[];
335
extern const uch ZLIB_INTERNAL _dist_code[];
336
#endif
337
338
#ifdef LIT_MEM
339
# define _tr_tally_lit(s, c, flush) \
340
{ uch cc = (c); \
341
s->d_buf[s->sym_next] = 0; \
342
s->l_buf[s->sym_next++] = cc; \
343
s->dyn_ltree[cc].Freq++; \
344
flush = (s->sym_next == s->sym_end); \
345
}
346
# define _tr_tally_dist(s, distance, length, flush) \
347
{ uch len = (uch)(length); \
348
ush dist = (ush)(distance); \
349
s->d_buf[s->sym_next] = dist; \
350
s->l_buf[s->sym_next++] = len; \
351
dist--; \
352
s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
353
s->dyn_dtree[d_code(dist)].Freq++; \
354
flush = (s->sym_next == s->sym_end); \
355
}
356
#else
357
# define _tr_tally_lit(s, c, flush) \
358
{ uch cc = (c); \
359
s->sym_buf[s->sym_next++] = 0; \
360
s->sym_buf[s->sym_next++] = 0; \
361
s->sym_buf[s->sym_next++] = cc; \
362
s->dyn_ltree[cc].Freq++; \
363
flush = (s->sym_next == s->sym_end); \
364
}
365
# define _tr_tally_dist(s, distance, length, flush) \
366
{ uch len = (uch)(length); \
367
ush dist = (ush)(distance); \
368
s->sym_buf[s->sym_next++] = (uch)dist; \
369
s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \
370
s->sym_buf[s->sym_next++] = len; \
371
dist--; \
372
s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
373
s->dyn_dtree[d_code(dist)].Freq++; \
374
flush = (s->sym_next == s->sym_end); \
375
}
376
#endif
377
#else
378
# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
379
# define _tr_tally_dist(s, distance, length, flush) \
380
flush = _tr_tally(s, distance, length)
381
#endif
382
383
#endif /* DEFLATE_H */
384

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button