Fossil SCM

fossil-scm / compat / zlib / contrib / infback9 / inftree9.c
Source Blame History 319 lines
7ef7284… drh 1 /* inftree9.c -- generate Huffman trees for efficient decoding
6ea30fb… florian 2 * Copyright (C) 1995-2026 Mark Adler
7ef7284… drh 3 * For conditions of distribution and use, see copyright notice in zlib.h
7ef7284… drh 4 */
7ef7284… drh 5
7ef7284… drh 6 #include "zutil.h"
7ef7284… drh 7 #include "inftree9.h"
7ef7284… drh 8
7ef7284… drh 9 #define MAXBITS 15
7ef7284… drh 10
7ef7284… drh 11 const char inflate9_copyright[] =
6ea30fb… florian 12 " inflate9 1.3.2 Copyright 1995-2026 Mark Adler ";
7ef7284… drh 13 /*
7ef7284… drh 14 If you use the zlib library in a product, an acknowledgment is welcome
7ef7284… drh 15 in the documentation of your product. If for some reason you cannot
7ef7284… drh 16 include such an acknowledgment, I would appreciate that you keep this
7ef7284… drh 17 copyright string in the executable of your product.
7ef7284… drh 18 */
7ef7284… drh 19
7ef7284… drh 20 /*
7ef7284… drh 21 Build a set of tables to decode the provided canonical Huffman code.
7ef7284… drh 22 The code lengths are lens[0..codes-1]. The result starts at *table,
7ef7284… drh 23 whose indices are 0..2^bits-1. work is a writable array of at least
7ef7284… drh 24 lens shorts, which is used as a work area. type is the type of code
7ef7284… drh 25 to be generated, CODES, LENS, or DISTS. On return, zero is success,
7ef7284… drh 26 -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
7ef7284… drh 27 on return points to the next available entry's address. bits is the
7ef7284… drh 28 requested root table index bits, and on return it is the actual root
7ef7284… drh 29 table index bits. It will differ if the request is greater than the
7ef7284… drh 30 longest code or if it is less than the shortest code.
7ef7284… drh 31 */
f1f1d6c… drh 32 int inflate_table9(codetype type, unsigned short FAR *lens, unsigned codes,
f1f1d6c… drh 33 code FAR * FAR *table, unsigned FAR *bits,
f1f1d6c… drh 34 unsigned short FAR *work) {
7ef7284… drh 35 unsigned len; /* a code's length in bits */
7ef7284… drh 36 unsigned sym; /* index of code symbols */
7ef7284… drh 37 unsigned min, max; /* minimum and maximum code lengths */
7ef7284… drh 38 unsigned root; /* number of index bits for root table */
7ef7284… drh 39 unsigned curr; /* number of index bits for current table */
7ef7284… drh 40 unsigned drop; /* code bits to drop for sub-table */
7ef7284… drh 41 int left; /* number of prefix codes available */
7ef7284… drh 42 unsigned used; /* code entries in table used */
7ef7284… drh 43 unsigned huff; /* Huffman code */
7ef7284… drh 44 unsigned incr; /* for incrementing code, index */
7ef7284… drh 45 unsigned fill; /* index for replicating entries */
7ef7284… drh 46 unsigned low; /* low bits for current root entry */
7ef7284… drh 47 unsigned mask; /* mask for low root bits */
7ef7284… drh 48 code this; /* table entry for duplication */
7ef7284… drh 49 code FAR *next; /* next available space in table */
7ef7284… drh 50 const unsigned short FAR *base; /* base value table to use */
7ef7284… drh 51 const unsigned short FAR *extra; /* extra bits table to use */
7ef7284… drh 52 int end; /* use base and extra for symbol > end */
7ef7284… drh 53 unsigned short count[MAXBITS+1]; /* number of codes of each length */
7ef7284… drh 54 unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
7ef7284… drh 55 static const unsigned short lbase[31] = { /* Length codes 257..285 base */
7ef7284… drh 56 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17,
7ef7284… drh 57 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115,
7ef7284… drh 58 131, 163, 195, 227, 3, 0, 0};
7ef7284… drh 59 static const unsigned short lext[31] = { /* Length codes 257..285 extra */
7ef7284… drh 60 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
7ef7284… drh 61 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132,
6ea30fb… florian 62 133, 133, 133, 133, 144, 199, 75};
7ef7284… drh 63 static const unsigned short dbase[32] = { /* Distance codes 0..31 base */
7ef7284… drh 64 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49,
7ef7284… drh 65 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073,
7ef7284… drh 66 4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153};
7ef7284… drh 67 static const unsigned short dext[32] = { /* Distance codes 0..31 extra */
7ef7284… drh 68 128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132,
7ef7284… drh 69 133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138,
7ef7284… drh 70 139, 139, 140, 140, 141, 141, 142, 142};
7ef7284… drh 71
7ef7284… drh 72 /*
7ef7284… drh 73 Process a set of code lengths to create a canonical Huffman code. The
7ef7284… drh 74 code lengths are lens[0..codes-1]. Each length corresponds to the
7ef7284… drh 75 symbols 0..codes-1. The Huffman code is generated by first sorting the
7ef7284… drh 76 symbols by length from short to long, and retaining the symbol order
7ef7284… drh 77 for codes with equal lengths. Then the code starts with all zero bits
7ef7284… drh 78 for the first code of the shortest length, and the codes are integer
7ef7284… drh 79 increments for the same length, and zeros are appended as the length
7ef7284… drh 80 increases. For the deflate format, these bits are stored backwards
7ef7284… drh 81 from their more natural integer increment ordering, and so when the
7ef7284… drh 82 decoding tables are built in the large loop below, the integer codes
7ef7284… drh 83 are incremented backwards.
7ef7284… drh 84
7ef7284… drh 85 This routine assumes, but does not check, that all of the entries in
7ef7284… drh 86 lens[] are in the range 0..MAXBITS. The caller must assure this.
7ef7284… drh 87 1..MAXBITS is interpreted as that code length. zero means that that
7ef7284… drh 88 symbol does not occur in this code.
7ef7284… drh 89
7ef7284… drh 90 The codes are sorted by computing a count of codes for each length,
7ef7284… drh 91 creating from that a table of starting indices for each length in the
7ef7284… drh 92 sorted table, and then entering the symbols in order in the sorted
7ef7284… drh 93 table. The sorted table is work[], with that space being provided by
7ef7284… drh 94 the caller.
7ef7284… drh 95
7ef7284… drh 96 The length counts are used for other purposes as well, i.e. finding
7ef7284… drh 97 the minimum and maximum length codes, determining if there are any
7ef7284… drh 98 codes at all, checking for a valid set of lengths, and looking ahead
7ef7284… drh 99 at length counts to determine sub-table sizes when building the
7ef7284… drh 100 decoding tables.
7ef7284… drh 101 */
7ef7284… drh 102
7ef7284… drh 103 /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
7ef7284… drh 104 for (len = 0; len <= MAXBITS; len++)
7ef7284… drh 105 count[len] = 0;
7ef7284… drh 106 for (sym = 0; sym < codes; sym++)
7ef7284… drh 107 count[lens[sym]]++;
7ef7284… drh 108
7ef7284… drh 109 /* bound code lengths, force root to be within code lengths */
7ef7284… drh 110 root = *bits;
7ef7284… drh 111 for (max = MAXBITS; max >= 1; max--)
7ef7284… drh 112 if (count[max] != 0) break;
7ef7284… drh 113 if (root > max) root = max;
7ef7284… drh 114 if (max == 0) return -1; /* no codes! */
7ef7284… drh 115 for (min = 1; min <= MAXBITS; min++)
7ef7284… drh 116 if (count[min] != 0) break;
7ef7284… drh 117 if (root < min) root = min;
7ef7284… drh 118
7ef7284… drh 119 /* check for an over-subscribed or incomplete set of lengths */
7ef7284… drh 120 left = 1;
7ef7284… drh 121 for (len = 1; len <= MAXBITS; len++) {
7ef7284… drh 122 left <<= 1;
7ef7284… drh 123 left -= count[len];
7ef7284… drh 124 if (left < 0) return -1; /* over-subscribed */
7ef7284… drh 125 }
7ef7284… drh 126 if (left > 0 && (type == CODES || max != 1))
7ef7284… drh 127 return -1; /* incomplete set */
7ef7284… drh 128
7ef7284… drh 129 /* generate offsets into symbol table for each length for sorting */
7ef7284… drh 130 offs[1] = 0;
7ef7284… drh 131 for (len = 1; len < MAXBITS; len++)
7ef7284… drh 132 offs[len + 1] = offs[len] + count[len];
7ef7284… drh 133
7ef7284… drh 134 /* sort symbols by length, by symbol order within each length */
7ef7284… drh 135 for (sym = 0; sym < codes; sym++)
7ef7284… drh 136 if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
7ef7284… drh 137
7ef7284… drh 138 /*
7ef7284… drh 139 Create and fill in decoding tables. In this loop, the table being
7ef7284… drh 140 filled is at next and has curr index bits. The code being used is huff
7ef7284… drh 141 with length len. That code is converted to an index by dropping drop
7ef7284… drh 142 bits off of the bottom. For codes where len is less than drop + curr,
7ef7284… drh 143 those top drop + curr - len bits are incremented through all values to
7ef7284… drh 144 fill the table with replicated entries.
7ef7284… drh 145
7ef7284… drh 146 root is the number of index bits for the root table. When len exceeds
7ef7284… drh 147 root, sub-tables are created pointed to by the root entry with an index
7ef7284… drh 148 of the low root bits of huff. This is saved in low to check for when a
7ef7284… drh 149 new sub-table should be started. drop is zero when the root table is
7ef7284… drh 150 being filled, and drop is root when sub-tables are being filled.
7ef7284… drh 151
7ef7284… drh 152 When a new sub-table is needed, it is necessary to look ahead in the
7ef7284… drh 153 code lengths to determine what size sub-table is needed. The length
7ef7284… drh 154 counts are used for this, and so count[] is decremented as codes are
7ef7284… drh 155 entered in the tables.
7ef7284… drh 156
7ef7284… drh 157 used keeps track of how many table entries have been allocated from the
7ef7284… drh 158 provided *table space. It is checked for LENS and DIST tables against
7ef7284… drh 159 the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
7ef7284… drh 160 the initial root table size constants. See the comments in inftree9.h
7ef7284… drh 161 for more information.
7ef7284… drh 162
7ef7284… drh 163 sym increments through all symbols, and the loop terminates when
7ef7284… drh 164 all codes of length max, i.e. all codes, have been processed. This
7ef7284… drh 165 routine permits incomplete codes, so another loop after this one fills
7ef7284… drh 166 in the rest of the decoding tables with invalid code markers.
7ef7284… drh 167 */
7ef7284… drh 168
7ef7284… drh 169 /* set up for code type */
7ef7284… drh 170 switch (type) {
7ef7284… drh 171 case CODES:
7ef7284… drh 172 base = extra = work; /* dummy value--not used */
7ef7284… drh 173 end = 19;
7ef7284… drh 174 break;
7ef7284… drh 175 case LENS:
7ef7284… drh 176 base = lbase;
7ef7284… drh 177 base -= 257;
7ef7284… drh 178 extra = lext;
7ef7284… drh 179 extra -= 257;
7ef7284… drh 180 end = 256;
7ef7284… drh 181 break;
7ef7284… drh 182 default: /* DISTS */
7ef7284… drh 183 base = dbase;
7ef7284… drh 184 extra = dext;
7ef7284… drh 185 end = -1;
7ef7284… drh 186 }
7ef7284… drh 187
7ef7284… drh 188 /* initialize state for loop */
7ef7284… drh 189 huff = 0; /* starting code */
7ef7284… drh 190 sym = 0; /* starting code symbol */
7ef7284… drh 191 len = min; /* starting code length */
7ef7284… drh 192 next = *table; /* current table to fill in */
7ef7284… drh 193 curr = root; /* current table index bits */
7ef7284… drh 194 drop = 0; /* current bits to drop from code for index */
7ef7284… drh 195 low = (unsigned)(-1); /* trigger new sub-table when len > root */
7ef7284… drh 196 used = 1U << root; /* use root table entries */
7ef7284… drh 197 mask = used - 1; /* mask for comparing low */
7ef7284… drh 198
7ef7284… drh 199 /* check available table space */
7ef7284… drh 200 if ((type == LENS && used >= ENOUGH_LENS) ||
7ef7284… drh 201 (type == DISTS && used >= ENOUGH_DISTS))
7ef7284… drh 202 return 1;
7ef7284… drh 203
7ef7284… drh 204 /* process all codes and make table entries */
7ef7284… drh 205 for (;;) {
7ef7284… drh 206 /* create table entry */
7ef7284… drh 207 this.bits = (unsigned char)(len - drop);
7ef7284… drh 208 if ((int)(work[sym]) < end) {
7ef7284… drh 209 this.op = (unsigned char)0;
7ef7284… drh 210 this.val = work[sym];
7ef7284… drh 211 }
7ef7284… drh 212 else if ((int)(work[sym]) > end) {
7ef7284… drh 213 this.op = (unsigned char)(extra[work[sym]]);
7ef7284… drh 214 this.val = base[work[sym]];
7ef7284… drh 215 }
7ef7284… drh 216 else {
7ef7284… drh 217 this.op = (unsigned char)(32 + 64); /* end of block */
7ef7284… drh 218 this.val = 0;
7ef7284… drh 219 }
7ef7284… drh 220
7ef7284… drh 221 /* replicate for those indices with low len bits equal to huff */
7ef7284… drh 222 incr = 1U << (len - drop);
7ef7284… drh 223 fill = 1U << curr;
7ef7284… drh 224 do {
7ef7284… drh 225 fill -= incr;
7ef7284… drh 226 next[(huff >> drop) + fill] = this;
7ef7284… drh 227 } while (fill != 0);
7ef7284… drh 228
7ef7284… drh 229 /* backwards increment the len-bit code huff */
7ef7284… drh 230 incr = 1U << (len - 1);
7ef7284… drh 231 while (huff & incr)
7ef7284… drh 232 incr >>= 1;
7ef7284… drh 233 if (incr != 0) {
7ef7284… drh 234 huff &= incr - 1;
7ef7284… drh 235 huff += incr;
7ef7284… drh 236 }
7ef7284… drh 237 else
7ef7284… drh 238 huff = 0;
7ef7284… drh 239
7ef7284… drh 240 /* go to next symbol, update count, len */
7ef7284… drh 241 sym++;
7ef7284… drh 242 if (--(count[len]) == 0) {
7ef7284… drh 243 if (len == max) break;
7ef7284… drh 244 len = lens[work[sym]];
7ef7284… drh 245 }
7ef7284… drh 246
7ef7284… drh 247 /* create new sub-table if needed */
7ef7284… drh 248 if (len > root && (huff & mask) != low) {
7ef7284… drh 249 /* if first time, transition to sub-tables */
7ef7284… drh 250 if (drop == 0)
7ef7284… drh 251 drop = root;
7ef7284… drh 252
7ef7284… drh 253 /* increment past last table */
7ef7284… drh 254 next += 1U << curr;
7ef7284… drh 255
7ef7284… drh 256 /* determine length of next table */
7ef7284… drh 257 curr = len - drop;
7ef7284… drh 258 left = (int)(1 << curr);
7ef7284… drh 259 while (curr + drop < max) {
7ef7284… drh 260 left -= count[curr + drop];
7ef7284… drh 261 if (left <= 0) break;
7ef7284… drh 262 curr++;
7ef7284… drh 263 left <<= 1;
7ef7284… drh 264 }
7ef7284… drh 265
7ef7284… drh 266 /* check for enough space */
7ef7284… drh 267 used += 1U << curr;
7ef7284… drh 268 if ((type == LENS && used >= ENOUGH_LENS) ||
7ef7284… drh 269 (type == DISTS && used >= ENOUGH_DISTS))
7ef7284… drh 270 return 1;
7ef7284… drh 271
7ef7284… drh 272 /* point entry in root table to sub-table */
7ef7284… drh 273 low = huff & mask;
7ef7284… drh 274 (*table)[low].op = (unsigned char)curr;
7ef7284… drh 275 (*table)[low].bits = (unsigned char)root;
7ef7284… drh 276 (*table)[low].val = (unsigned short)(next - *table);
7ef7284… drh 277 }
7ef7284… drh 278 }
7ef7284… drh 279
7ef7284… drh 280 /*
7ef7284… drh 281 Fill in rest of table for incomplete codes. This loop is similar to the
7ef7284… drh 282 loop above in incrementing huff for table indices. It is assumed that
7ef7284… drh 283 len is equal to curr + drop, so there is no loop needed to increment
7ef7284… drh 284 through high index bits. When the current sub-table is filled, the loop
7ef7284… drh 285 drops back to the root table to fill in any remaining entries there.
7ef7284… drh 286 */
7ef7284… drh 287 this.op = (unsigned char)64; /* invalid code marker */
7ef7284… drh 288 this.bits = (unsigned char)(len - drop);
7ef7284… drh 289 this.val = (unsigned short)0;
7ef7284… drh 290 while (huff != 0) {
7ef7284… drh 291 /* when done with sub-table, drop back to root table */
7ef7284… drh 292 if (drop != 0 && (huff & mask) != low) {
7ef7284… drh 293 drop = 0;
7ef7284… drh 294 len = root;
7ef7284… drh 295 next = *table;
7ef7284… drh 296 curr = root;
7ef7284… drh 297 this.bits = (unsigned char)len;
7ef7284… drh 298 }
7ef7284… drh 299
7ef7284… drh 300 /* put invalid code marker in table */
7ef7284… drh 301 next[huff >> drop] = this;
7ef7284… drh 302
7ef7284… drh 303 /* backwards increment the len-bit code huff */
7ef7284… drh 304 incr = 1U << (len - 1);
7ef7284… drh 305 while (huff & incr)
7ef7284… drh 306 incr >>= 1;
7ef7284… drh 307 if (incr != 0) {
7ef7284… drh 308 huff &= incr - 1;
7ef7284… drh 309 huff += incr;
7ef7284… drh 310 }
7ef7284… drh 311 else
7ef7284… drh 312 huff = 0;
7ef7284… drh 313 }
7ef7284… drh 314
7ef7284… drh 315 /* set return parameters */
7ef7284… drh 316 *table += used;
7ef7284… drh 317 *bits = root;
7ef7284… drh 318 return 0;
7ef7284… drh 319 }

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button