Fossil SCM

fossil-scm / compat / zlib / deflate.h
Source Blame History 383 lines
7ef7284… drh 1 /* deflate.h -- internal compression state
6ea30fb… florian 2 * Copyright (C) 1995-2026 Jean-loup Gailly
7ef7284… drh 3 * For conditions of distribution and use, see copyright notice in zlib.h
7ef7284… drh 4 */
7ef7284… drh 5
7ef7284… drh 6 /* WARNING: this file should *not* be used by applications. It is
7ef7284… drh 7 part of the implementation of the compression library and is
7ef7284… drh 8 subject to change. Applications should only use zlib.h.
7ef7284… drh 9 */
7ef7284… drh 10
7ef7284… drh 11 /* @(#) $Id$ */
7ef7284… drh 12
7ef7284… drh 13 #ifndef DEFLATE_H
7ef7284… drh 14 #define DEFLATE_H
7ef7284… drh 15
7ef7284… drh 16 #include "zutil.h"
7ef7284… drh 17
7ef7284… drh 18 /* define NO_GZIP when compiling if you want to disable gzip header and
7ef7284… drh 19 trailer creation by deflate(). NO_GZIP would be used to avoid linking in
7ef7284… drh 20 the crc code when it is not needed. For shared libraries, gzip encoding
7ef7284… drh 21 should be left enabled. */
7ef7284… drh 22 #ifndef NO_GZIP
7ef7284… drh 23 # define GZIP
7ef7284… drh 24 #endif
64ce68d… drh 25
64ce68d… drh 26 /* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
64ce68d… drh 27 the cost of a larger memory footprint */
64ce68d… drh 28 /* #define LIT_MEM */
7ef7284… drh 29
7ef7284… drh 30 /* ===========================================================================
7ef7284… drh 31 * Internal compression state.
7ef7284… drh 32 */
7ef7284… drh 33
7ef7284… drh 34 #define LENGTH_CODES 29
7ef7284… drh 35 /* number of length codes, not counting the special END_BLOCK code */
7ef7284… drh 36
7ef7284… drh 37 #define LITERALS 256
7ef7284… drh 38 /* number of literal bytes 0..255 */
7ef7284… drh 39
7ef7284… drh 40 #define L_CODES (LITERALS+1+LENGTH_CODES)
7ef7284… drh 41 /* number of Literal or Length codes, including the END_BLOCK code */
7ef7284… drh 42
7ef7284… drh 43 #define D_CODES 30
7ef7284… drh 44 /* number of distance codes */
7ef7284… drh 45
7ef7284… drh 46 #define BL_CODES 19
7ef7284… drh 47 /* number of codes used to transfer the bit lengths */
7ef7284… drh 48
7ef7284… drh 49 #define HEAP_SIZE (2*L_CODES+1)
7ef7284… drh 50 /* maximum heap size */
7ef7284… drh 51
7ef7284… drh 52 #define MAX_BITS 15
7ef7284… drh 53 /* All codes must not exceed MAX_BITS bits */
7ef7284… drh 54
7ef7284… drh 55 #define Buf_size 16
7ef7284… drh 56 /* size of bit buffer in bi_buf */
7ef7284… drh 57
e38d5e1… jan.nijtmans 58 #define INIT_STATE 42 /* zlib header -> BUSY_STATE */
e38d5e1… jan.nijtmans 59 #ifdef GZIP
e38d5e1… jan.nijtmans 60 # define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */
e38d5e1… jan.nijtmans 61 #endif
e38d5e1… jan.nijtmans 62 #define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */
e38d5e1… jan.nijtmans 63 #define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */
e38d5e1… jan.nijtmans 64 #define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */
e38d5e1… jan.nijtmans 65 #define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */
e38d5e1… jan.nijtmans 66 #define BUSY_STATE 113 /* deflate -> FINISH_STATE */
e38d5e1… jan.nijtmans 67 #define FINISH_STATE 666 /* stream complete */
7ef7284… drh 68 /* Stream status */
7ef7284… drh 69
7ef7284… drh 70
7ef7284… drh 71 /* Data structure describing a single value and its code string. */
7ef7284… drh 72 typedef struct ct_data_s {
7ef7284… drh 73 union {
7ef7284… drh 74 ush freq; /* frequency count */
7ef7284… drh 75 ush code; /* bit string */
7ef7284… drh 76 } fc;
7ef7284… drh 77 union {
7ef7284… drh 78 ush dad; /* father node in Huffman tree */
7ef7284… drh 79 ush len; /* length of bit string */
7ef7284… drh 80 } dl;
7ef7284… drh 81 } FAR ct_data;
7ef7284… drh 82
7ef7284… drh 83 #define Freq fc.freq
7ef7284… drh 84 #define Code fc.code
7ef7284… drh 85 #define Dad dl.dad
7ef7284… drh 86 #define Len dl.len
7ef7284… drh 87
7ef7284… drh 88 typedef struct static_tree_desc_s static_tree_desc;
7ef7284… drh 89
7ef7284… drh 90 typedef struct tree_desc_s {
7ef7284… drh 91 ct_data *dyn_tree; /* the dynamic tree */
7ef7284… drh 92 int max_code; /* largest code with non zero frequency */
e38d5e1… jan.nijtmans 93 const static_tree_desc *stat_desc; /* the corresponding static tree */
7ef7284… drh 94 } FAR tree_desc;
7ef7284… drh 95
7ef7284… drh 96 typedef ush Pos;
7ef7284… drh 97 typedef Pos FAR Posf;
7ef7284… drh 98 typedef unsigned IPos;
7ef7284… drh 99
7ef7284… drh 100 /* A Pos is an index in the character window. We use short instead of int to
7ef7284… drh 101 * save space in the various tables. IPos is used only for parameter passing.
7ef7284… drh 102 */
7ef7284… drh 103
7ef7284… drh 104 typedef struct internal_state {
7ef7284… drh 105 z_streamp strm; /* pointer back to this zlib stream */
7ef7284… drh 106 int status; /* as the name implies */
7ef7284… drh 107 Bytef *pending_buf; /* output still pending */
7ef7284… drh 108 ulg pending_buf_size; /* size of pending_buf */
7ef7284… drh 109 Bytef *pending_out; /* next pending byte to output to the stream */
e38d5e1… jan.nijtmans 110 ulg pending; /* nb of bytes in the pending buffer */
7ef7284… drh 111 int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
7ef7284… drh 112 gz_headerp gzhead; /* gzip header information to write */
e38d5e1… jan.nijtmans 113 ulg gzindex; /* where in extra, name, or comment */
bb4776e… jan.nijtmans 114 Byte method; /* can only be DEFLATED */
7ef7284… drh 115 int last_flush; /* value of flush param for previous deflate call */
7ef7284… drh 116
7ef7284… drh 117 /* used by deflate.c: */
7ef7284… drh 118
7ef7284… drh 119 uInt w_size; /* LZ77 window size (32K by default) */
7ef7284… drh 120 uInt w_bits; /* log2(w_size) (8..16) */
7ef7284… drh 121 uInt w_mask; /* w_size - 1 */
7ef7284… drh 122
7ef7284… drh 123 Bytef *window;
7ef7284… drh 124 /* Sliding window. Input bytes are read into the second half of the window,
7ef7284… drh 125 * and move to the first half later to keep a dictionary of at least wSize
7ef7284… drh 126 * bytes. With this organization, matches are limited to a distance of
7ef7284… drh 127 * wSize-MAX_MATCH bytes, but this ensures that IO is always
7ef7284… drh 128 * performed with a length multiple of the block size. Also, it limits
7ef7284… drh 129 * the window size to 64K, which is quite useful on MSDOS.
7ef7284… drh 130 * To do: use the user input buffer as sliding window.
7ef7284… drh 131 */
7ef7284… drh 132
7ef7284… drh 133 ulg window_size;
7ef7284… drh 134 /* Actual size of window: 2*wSize, except when the user input buffer
7ef7284… drh 135 * is directly used as sliding window.
7ef7284… drh 136 */
7ef7284… drh 137
7ef7284… drh 138 Posf *prev;
7ef7284… drh 139 /* Link to older string with same hash index. To limit the size of this
7ef7284… drh 140 * array to 64K, this link is maintained only for the last 32K strings.
7ef7284… drh 141 * An index in this array is thus a window index modulo 32K.
7ef7284… drh 142 */
7ef7284… drh 143
7ef7284… drh 144 Posf *head; /* Heads of the hash chains or NIL. */
7ef7284… drh 145
7ef7284… drh 146 uInt ins_h; /* hash index of string to be inserted */
7ef7284… drh 147 uInt hash_size; /* number of elements in hash table */
7ef7284… drh 148 uInt hash_bits; /* log2(hash_size) */
7ef7284… drh 149 uInt hash_mask; /* hash_size-1 */
7ef7284… drh 150
7ef7284… drh 151 uInt hash_shift;
7ef7284… drh 152 /* Number of bits by which ins_h must be shifted at each input
7ef7284… drh 153 * step. It must be such that after MIN_MATCH steps, the oldest
7ef7284… drh 154 * byte no longer takes part in the hash key, that is:
7ef7284… drh 155 * hash_shift * MIN_MATCH >= hash_bits
7ef7284… drh 156 */
7ef7284… drh 157
7ef7284… drh 158 long block_start;
7ef7284… drh 159 /* Window position at the beginning of the current output block. Gets
7ef7284… drh 160 * negative when the window is moved backwards.
7ef7284… drh 161 */
7ef7284… drh 162
7ef7284… drh 163 uInt match_length; /* length of best match */
7ef7284… drh 164 IPos prev_match; /* previous match */
7ef7284… drh 165 int match_available; /* set if previous match exists */
7ef7284… drh 166 uInt strstart; /* start of string to insert */
7ef7284… drh 167 uInt match_start; /* start of matching string */
7ef7284… drh 168 uInt lookahead; /* number of valid bytes ahead in window */
7ef7284… drh 169
7ef7284… drh 170 uInt prev_length;
7ef7284… drh 171 /* Length of the best match at previous step. Matches not greater than this
7ef7284… drh 172 * are discarded. This is used in the lazy match evaluation.
7ef7284… drh 173 */
7ef7284… drh 174
7ef7284… drh 175 uInt max_chain_length;
7ef7284… drh 176 /* To speed up deflation, hash chains are never searched beyond this
7ef7284… drh 177 * length. A higher limit improves compression ratio but degrades the
7ef7284… drh 178 * speed.
7ef7284… drh 179 */
7ef7284… drh 180
7ef7284… drh 181 uInt max_lazy_match;
7ef7284… drh 182 /* Attempt to find a better match only when the current match is strictly
7ef7284… drh 183 * smaller than this value. This mechanism is used only for compression
7ef7284… drh 184 * levels >= 4.
7ef7284… drh 185 */
7ef7284… drh 186 # define max_insert_length max_lazy_match
7ef7284… drh 187 /* Insert new strings in the hash table only if the match length is not
7ef7284… drh 188 * greater than this length. This saves time but degrades compression.
7ef7284… drh 189 * max_insert_length is used only for compression levels <= 3.
7ef7284… drh 190 */
7ef7284… drh 191
7ef7284… drh 192 int level; /* compression level (1..9) */
7ef7284… drh 193 int strategy; /* favor or force Huffman coding*/
7ef7284… drh 194
7ef7284… drh 195 uInt good_match;
7ef7284… drh 196 /* Use a faster search when the previous match is longer than this */
7ef7284… drh 197
7ef7284… drh 198 int nice_match; /* Stop searching when current match exceeds this */
7ef7284… drh 199
7ef7284… drh 200 /* used by trees.c: */
7ef7284… drh 201 /* Didn't use ct_data typedef below to suppress compiler warning */
7ef7284… drh 202 struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
7ef7284… drh 203 struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
7ef7284… drh 204 struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
7ef7284… drh 205
7ef7284… drh 206 struct tree_desc_s l_desc; /* desc. for literal tree */
7ef7284… drh 207 struct tree_desc_s d_desc; /* desc. for distance tree */
7ef7284… drh 208 struct tree_desc_s bl_desc; /* desc. for bit length tree */
7ef7284… drh 209
7ef7284… drh 210 ush bl_count[MAX_BITS+1];
7ef7284… drh 211 /* number of codes at each bit length for an optimal tree */
7ef7284… drh 212
7ef7284… drh 213 int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
7ef7284… drh 214 int heap_len; /* number of elements in the heap */
7ef7284… drh 215 int heap_max; /* element of largest frequency */
7ef7284… drh 216 /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
7ef7284… drh 217 * The same heap array is used to build all trees.
7ef7284… drh 218 */
7ef7284… drh 219
7ef7284… drh 220 uch depth[2*L_CODES+1];
7ef7284… drh 221 /* Depth of each subtree used as tie breaker for trees of equal frequency
7ef7284… drh 222 */
7ef7284… drh 223
64ce68d… drh 224 #ifdef LIT_MEM
64ce68d… drh 225 # define LIT_BUFS 5
64ce68d… drh 226 ushf *d_buf; /* buffer for distances */
64ce68d… drh 227 uchf *l_buf; /* buffer for literals/lengths */
64ce68d… drh 228 #else
64ce68d… drh 229 # define LIT_BUFS 4
adb9e8e… drh 230 uchf *sym_buf; /* buffer for distances and literals/lengths */
64ce68d… drh 231 #endif
7ef7284… drh 232
7ef7284… drh 233 uInt lit_bufsize;
7ef7284… drh 234 /* Size of match buffer for literals/lengths. There are 4 reasons for
7ef7284… drh 235 * limiting lit_bufsize to 64K:
7ef7284… drh 236 * - frequencies can be kept in 16 bit counters
7ef7284… drh 237 * - if compression is not successful for the first block, all input
7ef7284… drh 238 * data is still in the window so we can still emit a stored block even
7ef7284… drh 239 * when input comes from standard input. (This can also be done for
7ef7284… drh 240 * all blocks if lit_bufsize is not greater than 32K.)
7ef7284… drh 241 * - if compression is not successful for a file smaller than 64K, we can
7ef7284… drh 242 * even emit a stored file instead of a stored block (saving 5 bytes).
7ef7284… drh 243 * This is applicable only for zip (not gzip or zlib).
7ef7284… drh 244 * - creating new Huffman trees less frequently may not provide fast
7ef7284… drh 245 * adaptation to changes in the input data statistics. (Take for
7ef7284… drh 246 * example a binary file with poorly compressible code followed by
7ef7284… drh 247 * a highly compressible string table.) Smaller buffer sizes give
7ef7284… drh 248 * fast adaptation but have of course the overhead of transmitting
7ef7284… drh 249 * trees more frequently.
7ef7284… drh 250 * - I can't count above 4
7ef7284… drh 251 */
7ef7284… drh 252
64ce68d… drh 253 uInt sym_next; /* running index in symbol buffer */
adb9e8e… drh 254 uInt sym_end; /* symbol table full when sym_next reaches this */
7ef7284… drh 255
7ef7284… drh 256 ulg opt_len; /* bit length of current block with optimal trees */
7ef7284… drh 257 ulg static_len; /* bit length of current block with static trees */
7ef7284… drh 258 uInt matches; /* number of string matches in current block */
7ef7284… drh 259 uInt insert; /* bytes at end of window left to insert */
7ef7284… drh 260
e38d5e1… jan.nijtmans 261 #ifdef ZLIB_DEBUG
7ef7284… drh 262 ulg compressed_len; /* total bit length of compressed file mod 2^32 */
7ef7284… drh 263 ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
7ef7284… drh 264 #endif
7ef7284… drh 265
7ef7284… drh 266 ush bi_buf;
7ef7284… drh 267 /* Output buffer. bits are inserted starting at the bottom (least
7ef7284… drh 268 * significant bits).
7ef7284… drh 269 */
7ef7284… drh 270 int bi_valid;
7ef7284… drh 271 /* Number of valid bits in bi_buf. All bits above the last valid bit
7ef7284… drh 272 * are always zero.
7ef7284… drh 273 */
6ea30fb… florian 274 int bi_used;
6ea30fb… florian 275 /* Last number of used bits when going to a byte boundary.
6ea30fb… florian 276 */
7ef7284… drh 277
7ef7284… drh 278 ulg high_water;
7ef7284… drh 279 /* High water mark offset in window for initialized bytes -- bytes above
7ef7284… drh 280 * this are set to zero in order to avoid memory check warnings when
7ef7284… drh 281 * longest match routines access bytes past the input. This is then
7ef7284… drh 282 * updated to the new high water mark.
7ef7284… drh 283 */
6ea30fb… florian 284
6ea30fb… florian 285 int slid;
6ea30fb… florian 286 /* True if the hash table has been slid since it was cleared. */
e38d5e1… jan.nijtmans 287
7ef7284… drh 288 } FAR deflate_state;
7ef7284… drh 289
7ef7284… drh 290 /* Output a byte on the stream.
7ef7284… drh 291 * IN assertion: there is enough room in pending_buf.
7ef7284… drh 292 */
e38d5e1… jan.nijtmans 293 #define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
7ef7284… drh 294
7ef7284… drh 295
7ef7284… drh 296 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
7ef7284… drh 297 /* Minimum amount of lookahead, except at the end of the input file.
7ef7284… drh 298 * See deflate.c for comments about the MIN_MATCH+1.
7ef7284… drh 299 */
7ef7284… drh 300
7ef7284… drh 301 #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
7ef7284… drh 302 /* In order to simplify the code, particularly on 16 bit machines, match
7ef7284… drh 303 * distances are limited to MAX_DIST instead of WSIZE.
7ef7284… drh 304 */
7ef7284… drh 305
7ef7284… drh 306 #define WIN_INIT MAX_MATCH
7ef7284… drh 307 /* Number of bytes after end of data in window to initialize in order to avoid
7ef7284… drh 308 memory checker errors from longest match routines */
7ef7284… drh 309
7ef7284… drh 310 /* in trees.c */
f1f1d6c… drh 311 void ZLIB_INTERNAL _tr_init(deflate_state *s);
f1f1d6c… drh 312 int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
f1f1d6c… drh 313 void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
f1f1d6c… drh 314 ulg stored_len, int last);
f1f1d6c… drh 315 void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
f1f1d6c… drh 316 void ZLIB_INTERNAL _tr_align(deflate_state *s);
f1f1d6c… drh 317 void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
f1f1d6c… drh 318 ulg stored_len, int last);
7ef7284… drh 319
7ef7284… drh 320 #define d_code(dist) \
7ef7284… drh 321 ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
7ef7284… drh 322 /* Mapping from a distance to a distance code. dist is the distance - 1 and
7ef7284… drh 323 * must not have side effects. _dist_code[256] and _dist_code[257] are never
7ef7284… drh 324 * used.
7ef7284… drh 325 */
7ef7284… drh 326
e38d5e1… jan.nijtmans 327 #ifndef ZLIB_DEBUG
7ef7284… drh 328 /* Inline versions of _tr_tally for speed: */
7ef7284… drh 329
7ef7284… drh 330 #if defined(GEN_TREES_H) || !defined(STDC)
7ef7284… drh 331 extern uch ZLIB_INTERNAL _length_code[];
7ef7284… drh 332 extern uch ZLIB_INTERNAL _dist_code[];
7ef7284… drh 333 #else
7ef7284… drh 334 extern const uch ZLIB_INTERNAL _length_code[];
7ef7284… drh 335 extern const uch ZLIB_INTERNAL _dist_code[];
7ef7284… drh 336 #endif
7ef7284… drh 337
64ce68d… drh 338 #ifdef LIT_MEM
64ce68d… drh 339 # define _tr_tally_lit(s, c, flush) \
64ce68d… drh 340 { uch cc = (c); \
64ce68d… drh 341 s->d_buf[s->sym_next] = 0; \
64ce68d… drh 342 s->l_buf[s->sym_next++] = cc; \
64ce68d… drh 343 s->dyn_ltree[cc].Freq++; \
64ce68d… drh 344 flush = (s->sym_next == s->sym_end); \
64ce68d… drh 345 }
64ce68d… drh 346 # define _tr_tally_dist(s, distance, length, flush) \
64ce68d… drh 347 { uch len = (uch)(length); \
64ce68d… drh 348 ush dist = (ush)(distance); \
64ce68d… drh 349 s->d_buf[s->sym_next] = dist; \
64ce68d… drh 350 s->l_buf[s->sym_next++] = len; \
64ce68d… drh 351 dist--; \
64ce68d… drh 352 s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
64ce68d… drh 353 s->dyn_dtree[d_code(dist)].Freq++; \
64ce68d… drh 354 flush = (s->sym_next == s->sym_end); \
64ce68d… drh 355 }
64ce68d… drh 356 #else
7ef7284… drh 357 # define _tr_tally_lit(s, c, flush) \
7ef7284… drh 358 { uch cc = (c); \
adb9e8e… drh 359 s->sym_buf[s->sym_next++] = 0; \
adb9e8e… drh 360 s->sym_buf[s->sym_next++] = 0; \
adb9e8e… drh 361 s->sym_buf[s->sym_next++] = cc; \
7ef7284… drh 362 s->dyn_ltree[cc].Freq++; \
adb9e8e… drh 363 flush = (s->sym_next == s->sym_end); \
7ef7284… drh 364 }
7ef7284… drh 365 # define _tr_tally_dist(s, distance, length, flush) \
e38d5e1… jan.nijtmans 366 { uch len = (uch)(length); \
e38d5e1… jan.nijtmans 367 ush dist = (ush)(distance); \
a9e589c… florian 368 s->sym_buf[s->sym_next++] = (uch)dist; \
a9e589c… florian 369 s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \
adb9e8e… drh 370 s->sym_buf[s->sym_next++] = len; \
7ef7284… drh 371 dist--; \
7ef7284… drh 372 s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
7ef7284… drh 373 s->dyn_dtree[d_code(dist)].Freq++; \
adb9e8e… drh 374 flush = (s->sym_next == s->sym_end); \
7ef7284… drh 375 }
64ce68d… drh 376 #endif
7ef7284… drh 377 #else
7ef7284… drh 378 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
7ef7284… drh 379 # define _tr_tally_dist(s, distance, length, flush) \
7ef7284… drh 380 flush = _tr_tally(s, distance, length)
7ef7284… drh 381 #endif
7ef7284… drh 382
7ef7284… drh 383 #endif /* DEFLATE_H */

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button