Fossil SCM

fossil-scm / compat / zlib / examples / gznorm.c
Source Blame History 474 lines
adb9e8e… drh 1 /* gznorm.c -- normalize a gzip stream
adb9e8e… drh 2 * Copyright (C) 2018 Mark Adler
adb9e8e… drh 3 * For conditions of distribution and use, see copyright notice in zlib.h
adb9e8e… drh 4 * Version 1.0 7 Oct 2018 Mark Adler */
adb9e8e… drh 5
adb9e8e… drh 6 // gznorm takes a gzip stream, potentially containing multiple members, and
adb9e8e… drh 7 // converts it to a gzip stream with a single member. In addition the gzip
adb9e8e… drh 8 // header is normalized, removing the file name and time stamp, and setting the
adb9e8e… drh 9 // other header contents (XFL, OS) to fixed values. gznorm does not recompress
adb9e8e… drh 10 // the data, so it is fast, but no advantage is gained from the history that
adb9e8e… drh 11 // could be available across member boundaries.
6ea30fb… florian 12
6ea30fb… florian 13 #if defined(_WIN32) && !defined(_CRT_NONSTDC_NO_DEPRECATE)
6ea30fb… florian 14 # define _CRT_NONSTDC_NO_DEPRECATE
6ea30fb… florian 15 #endif
adb9e8e… drh 16
adb9e8e… drh 17 #include <stdio.h> // fread, fwrite, putc, fflush, ferror, fprintf,
adb9e8e… drh 18 // vsnprintf, stdout, stderr, NULL, FILE
adb9e8e… drh 19 #include <stdlib.h> // malloc, free
adb9e8e… drh 20 #include <string.h> // strerror
adb9e8e… drh 21 #include <errno.h> // errno
adb9e8e… drh 22 #include <stdarg.h> // va_list, va_start, va_end
adb9e8e… drh 23 #include "zlib.h" // inflateInit2, inflate, inflateReset, inflateEnd,
adb9e8e… drh 24 // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK,
adb9e8e… drh 25 // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR,
adb9e8e… drh 26 // Z_MEM_ERROR
adb9e8e… drh 27
adb9e8e… drh 28 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
adb9e8e… drh 29 # include <fcntl.h>
adb9e8e… drh 30 # include <io.h>
adb9e8e… drh 31 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
adb9e8e… drh 32 #else
adb9e8e… drh 33 # define SET_BINARY_MODE(file)
adb9e8e… drh 34 #endif
adb9e8e… drh 35
adb9e8e… drh 36 #define local static
adb9e8e… drh 37
adb9e8e… drh 38 // printf to an allocated string. Return the string, or NULL if the printf or
adb9e8e… drh 39 // allocation fails.
adb9e8e… drh 40 local char *aprintf(char *fmt, ...) {
adb9e8e… drh 41 // Get the length of the result of the printf.
adb9e8e… drh 42 va_list args;
adb9e8e… drh 43 va_start(args, fmt);
adb9e8e… drh 44 int len = vsnprintf(NULL, 0, fmt, args);
adb9e8e… drh 45 va_end(args);
adb9e8e… drh 46 if (len < 0)
adb9e8e… drh 47 return NULL;
adb9e8e… drh 48
adb9e8e… drh 49 // Allocate the required space and printf to it.
adb9e8e… drh 50 char *str = malloc(len + 1);
adb9e8e… drh 51 if (str == NULL)
adb9e8e… drh 52 return NULL;
adb9e8e… drh 53 va_start(args, fmt);
adb9e8e… drh 54 vsnprintf(str, len + 1, fmt, args);
adb9e8e… drh 55 va_end(args);
adb9e8e… drh 56 return str;
adb9e8e… drh 57 }
adb9e8e… drh 58
adb9e8e… drh 59 // Return with an error, putting an allocated error message in *err. Doing an
adb9e8e… drh 60 // inflateEnd() on an already ended state, or one with state set to Z_NULL, is
adb9e8e… drh 61 // permitted.
adb9e8e… drh 62 #define BYE(...) \
adb9e8e… drh 63 do { \
adb9e8e… drh 64 inflateEnd(&strm); \
adb9e8e… drh 65 *err = aprintf(__VA_ARGS__); \
adb9e8e… drh 66 return 1; \
adb9e8e… drh 67 } while (0)
adb9e8e… drh 68
adb9e8e… drh 69 // Chunk size for buffered reads and for decompression. Twice this many bytes
adb9e8e… drh 70 // will be allocated on the stack by gzip_normalize(). Must fit in an unsigned.
adb9e8e… drh 71 #define CHUNK 16384
adb9e8e… drh 72
adb9e8e… drh 73 // Read a gzip stream from in and write an equivalent normalized gzip stream to
adb9e8e… drh 74 // out. If given no input, an empty gzip stream will be written. If successful,
adb9e8e… drh 75 // 0 is returned, and *err is set to NULL. On error, 1 is returned, where the
adb9e8e… drh 76 // details of the error are returned in *err, a pointer to an allocated string.
adb9e8e… drh 77 //
adb9e8e… drh 78 // The input may be a stream with multiple gzip members, which is converted to
adb9e8e… drh 79 // a single gzip member on the output. Each gzip member is decompressed at the
adb9e8e… drh 80 // level of deflate blocks. This enables clearing the last-block bit, shifting
adb9e8e… drh 81 // the compressed data to concatenate to the previous member's compressed data,
adb9e8e… drh 82 // which can end at an arbitrary bit boundary, and identifying stored blocks in
adb9e8e… drh 83 // order to resynchronize those to byte boundaries. The deflate compressed data
adb9e8e… drh 84 // is terminated with a 10-bit empty fixed block. If any members on the input
adb9e8e… drh 85 // end with a 10-bit empty fixed block, then that block is excised from the
adb9e8e… drh 86 // stream. This avoids appending empty fixed blocks for every normalization,
adb9e8e… drh 87 // and assures that gzip_normalize applied a second time will not change the
adb9e8e… drh 88 // input. The pad bits after stored block headers and after the final deflate
adb9e8e… drh 89 // block are all forced to zeros.
adb9e8e… drh 90 local int gzip_normalize(FILE *in, FILE *out, char **err) {
adb9e8e… drh 91 // initialize the inflate engine to process a gzip member
adb9e8e… drh 92 z_stream strm;
adb9e8e… drh 93 strm.zalloc = Z_NULL;
adb9e8e… drh 94 strm.zfree = Z_NULL;
adb9e8e… drh 95 strm.opaque = Z_NULL;
adb9e8e… drh 96 strm.avail_in = 0;
adb9e8e… drh 97 strm.next_in = Z_NULL;
adb9e8e… drh 98 if (inflateInit2(&strm, 15 + 16) != Z_OK)
adb9e8e… drh 99 BYE("out of memory");
adb9e8e… drh 100
adb9e8e… drh 101 // State while processing the input gzip stream.
adb9e8e… drh 102 enum { // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ...
adb9e8e… drh 103 BETWEEN, // between gzip members (must end in this state)
adb9e8e… drh 104 HEAD, // reading a gzip header
adb9e8e… drh 105 BLOCK, // reading deflate blocks
adb9e8e… drh 106 TAIL // reading a gzip trailer
adb9e8e… drh 107 } state = BETWEEN; // current component being processed
adb9e8e… drh 108 unsigned long crc = 0; // accumulated CRC of uncompressed data
adb9e8e… drh 109 unsigned long len = 0; // accumulated length of uncompressed data
adb9e8e… drh 110 unsigned long buf = 0; // deflate stream bit buffer of num bits
adb9e8e… drh 111 int num = 0; // number of bits in buf (at bottom)
adb9e8e… drh 112
adb9e8e… drh 113 // Write a canonical gzip header (no mod time, file name, comment, extra
adb9e8e… drh 114 // block, or extra flags, and OS is marked as unknown).
adb9e8e… drh 115 fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
adb9e8e… drh 116
adb9e8e… drh 117 // Process the gzip stream from in until reaching the end of the input,
adb9e8e… drh 118 // encountering invalid input, or experiencing an i/o error.
adb9e8e… drh 119 int more; // true if not at the end of the input
adb9e8e… drh 120 do {
adb9e8e… drh 121 // State inside this loop.
adb9e8e… drh 122 unsigned char *put; // next input buffer location to process
adb9e8e… drh 123 int prev; // number of bits from previous block in
adb9e8e… drh 124 // the bit buffer, or -1 if not at the
adb9e8e… drh 125 // start of a block
adb9e8e… drh 126 unsigned long long memb; // uncompressed length of member
adb9e8e… drh 127 size_t tail; // number of trailer bytes read (0..8)
adb9e8e… drh 128 unsigned long part; // accumulated trailer component
adb9e8e… drh 129
adb9e8e… drh 130 // Get the next chunk of input from in.
adb9e8e… drh 131 unsigned char dat[CHUNK];
adb9e8e… drh 132 strm.avail_in = fread(dat, 1, CHUNK, in);
adb9e8e… drh 133 if (strm.avail_in == 0)
adb9e8e… drh 134 break;
adb9e8e… drh 135 more = strm.avail_in == CHUNK;
adb9e8e… drh 136 strm.next_in = put = dat;
adb9e8e… drh 137
adb9e8e… drh 138 // Run that chunk of input through the inflate engine to exhaustion.
adb9e8e… drh 139 do {
adb9e8e… drh 140 // At this point it is assured that strm.avail_in > 0.
adb9e8e… drh 141
adb9e8e… drh 142 // Inflate until the end of a gzip component (header, deflate
adb9e8e… drh 143 // block, trailer) is reached, or until all of the chunk is
adb9e8e… drh 144 // consumed. The resulting decompressed data is discarded, though
adb9e8e… drh 145 // the total size of the decompressed data in each member is
adb9e8e… drh 146 // tracked, for the calculation of the total CRC.
adb9e8e… drh 147 do {
adb9e8e… drh 148 // inflate and handle any errors
adb9e8e… drh 149 unsigned char scrap[CHUNK];
adb9e8e… drh 150 strm.avail_out = CHUNK;
adb9e8e… drh 151 strm.next_out = scrap;
adb9e8e… drh 152 int ret = inflate(&strm, Z_BLOCK);
adb9e8e… drh 153 if (ret == Z_MEM_ERROR)
adb9e8e… drh 154 BYE("out of memory");
adb9e8e… drh 155 if (ret == Z_DATA_ERROR)
adb9e8e… drh 156 BYE("input invalid: %s", strm.msg);
adb9e8e… drh 157 if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END)
adb9e8e… drh 158 BYE("internal error");
adb9e8e… drh 159
adb9e8e… drh 160 // Update the number of uncompressed bytes generated in this
adb9e8e… drh 161 // member. The actual count (not modulo 2^32) is required to
adb9e8e… drh 162 // correctly compute the total CRC.
adb9e8e… drh 163 unsigned got = CHUNK - strm.avail_out;
adb9e8e… drh 164 memb += got;
adb9e8e… drh 165 if (memb < got)
adb9e8e… drh 166 BYE("overflow error");
adb9e8e… drh 167
adb9e8e… drh 168 // Continue to process this chunk until it is consumed, or
adb9e8e… drh 169 // until the end of a component (header, deflate block, or
adb9e8e… drh 170 // trailer) is reached.
adb9e8e… drh 171 } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0);
adb9e8e… drh 172
adb9e8e… drh 173 // Since strm.avail_in was > 0 for the inflate call, some input was
adb9e8e… drh 174 // just consumed. It is therefore assured that put < strm.next_in.
adb9e8e… drh 175
adb9e8e… drh 176 // Disposition the consumed component or part of a component.
adb9e8e… drh 177 switch (state) {
adb9e8e… drh 178 case BETWEEN:
adb9e8e… drh 179 state = HEAD;
adb9e8e… drh 180 // Fall through to HEAD when some or all of the header is
adb9e8e… drh 181 // processed.
adb9e8e… drh 182
adb9e8e… drh 183 case HEAD:
adb9e8e… drh 184 // Discard the header.
adb9e8e… drh 185 if (strm.data_type & 0x80) {
adb9e8e… drh 186 // End of header reached -- deflate blocks follow.
adb9e8e… drh 187 put = strm.next_in;
adb9e8e… drh 188 prev = num;
adb9e8e… drh 189 memb = 0;
adb9e8e… drh 190 state = BLOCK;
adb9e8e… drh 191 }
adb9e8e… drh 192 break;
adb9e8e… drh 193
adb9e8e… drh 194 case BLOCK:
adb9e8e… drh 195 // Copy the deflate stream to the output, but with the
adb9e8e… drh 196 // last-block-bit cleared. Re-synchronize stored block
adb9e8e… drh 197 // headers to the output byte boundaries. The bytes at
adb9e8e… drh 198 // put..strm.next_in-1 is the compressed data that has been
adb9e8e… drh 199 // processed and is ready to be copied to the output.
adb9e8e… drh 200
adb9e8e… drh 201 // At this point, it is assured that new compressed data is
adb9e8e… drh 202 // available, i.e., put < strm.next_in. If prev is -1, then
adb9e8e… drh 203 // that compressed data starts in the middle of a deflate
adb9e8e… drh 204 // block. If prev is not -1, then the bits in the bit
adb9e8e… drh 205 // buffer, possibly combined with the bits in *put, contain
adb9e8e… drh 206 // the three-bit header of the new deflate block. In that
adb9e8e… drh 207 // case, prev is the number of bits from the previous block
adb9e8e… drh 208 // that remain in the bit buffer. Since num is the number
adb9e8e… drh 209 // of bits in the bit buffer, we have that num - prev is
adb9e8e… drh 210 // the number of bits from the new block currently in the
adb9e8e… drh 211 // bit buffer.
adb9e8e… drh 212
adb9e8e… drh 213 // If strm.data_type & 0xc0 is 0x80, then the last byte of
adb9e8e… drh 214 // the available compressed data includes the last bits of
adb9e8e… drh 215 // the end of a deflate block. In that case, that last byte
adb9e8e… drh 216 // also has strm.data_type & 0x1f bits of the next deflate
adb9e8e… drh 217 // block, in the range 0..7. If strm.data_type & 0xc0 is
adb9e8e… drh 218 // 0xc0, then the last byte of the compressed data is the
adb9e8e… drh 219 // end of the deflate stream, followed by strm.data_type &
adb9e8e… drh 220 // 0x1f pad bits, also in the range 0..7.
adb9e8e… drh 221
adb9e8e… drh 222 // Set bits to the number of bits not yet consumed from the
adb9e8e… drh 223 // last byte. If we are at the end of the block, bits is
adb9e8e… drh 224 // either the number of bits in the last byte belonging to
adb9e8e… drh 225 // the next block, or the number of pad bits after the
adb9e8e… drh 226 // final block. In either of those cases, bits is in the
adb9e8e… drh 227 // range 0..7.
adb9e8e… drh 228 ; // (required due to C syntax oddity)
adb9e8e… drh 229 int bits = strm.data_type & 0x1f;
adb9e8e… drh 230
adb9e8e… drh 231 if (prev != -1) {
adb9e8e… drh 232 // We are at the start of a new block. Clear the last
adb9e8e… drh 233 // block bit, and check for special cases. If it is a
adb9e8e… drh 234 // stored block, then emit the header and pad to the
adb9e8e… drh 235 // next byte boundary. If it is a final, empty fixed
adb9e8e… drh 236 // block, then excise it.
adb9e8e… drh 237
adb9e8e… drh 238 // Some or all of the three header bits for this block
adb9e8e… drh 239 // may already be in the bit buffer. Load any remaining
adb9e8e… drh 240 // header bits into the bit buffer.
adb9e8e… drh 241 if (num - prev < 3) {
adb9e8e… drh 242 buf += (unsigned long)*put++ << num;
adb9e8e… drh 243 num += 8;
adb9e8e… drh 244 }
adb9e8e… drh 245
adb9e8e… drh 246 // Set last to have a 1 in the position of the last
adb9e8e… drh 247 // block bit in the bit buffer.
adb9e8e… drh 248 unsigned long last = (unsigned long)1 << prev;
adb9e8e… drh 249
adb9e8e… drh 250 if (((buf >> prev) & 7) == 3) {
adb9e8e… drh 251 // This is a final fixed block. Load at least ten
adb9e8e… drh 252 // bits from this block, including the header, into
adb9e8e… drh 253 // the bit buffer. We already have at least three,
adb9e8e… drh 254 // so at most one more byte needs to be loaded.
adb9e8e… drh 255 if (num - prev < 10) {
adb9e8e… drh 256 if (put == strm.next_in)
adb9e8e… drh 257 // Need to go get and process more input.
adb9e8e… drh 258 // We'll end up back here to finish this.
adb9e8e… drh 259 break;
adb9e8e… drh 260 buf += (unsigned long)*put++ << num;
adb9e8e… drh 261 num += 8;
adb9e8e… drh 262 }
adb9e8e… drh 263 if (((buf >> prev) & 0x3ff) == 3) {
adb9e8e… drh 264 // That final fixed block is empty. Delete it
adb9e8e… drh 265 // to avoid adding an empty block every time a
adb9e8e… drh 266 // gzip stream is normalized.
adb9e8e… drh 267 num = prev;
adb9e8e… drh 268 buf &= last - 1; // zero the pad bits
adb9e8e… drh 269 }
adb9e8e… drh 270 }
adb9e8e… drh 271 else if (((buf >> prev) & 6) == 0) {
adb9e8e… drh 272 // This is a stored block. Flush to the next
adb9e8e… drh 273 // byte boundary after the three-bit header.
adb9e8e… drh 274 num = (prev + 10) & ~7;
adb9e8e… drh 275 buf &= last - 1; // zero the pad bits
adb9e8e… drh 276 }
adb9e8e… drh 277
adb9e8e… drh 278 // Clear the last block bit.
adb9e8e… drh 279 buf &= ~last;
adb9e8e… drh 280
adb9e8e… drh 281 // Write out complete bytes in the bit buffer.
adb9e8e… drh 282 while (num >= 8) {
adb9e8e… drh 283 putc(buf, out);
adb9e8e… drh 284 buf >>= 8;
adb9e8e… drh 285 num -= 8;
adb9e8e… drh 286 }
adb9e8e… drh 287
adb9e8e… drh 288 // If no more bytes left to process, then we have
adb9e8e… drh 289 // consumed the byte that had bits from the next block.
adb9e8e… drh 290 if (put == strm.next_in)
adb9e8e… drh 291 bits = 0;
adb9e8e… drh 292 }
adb9e8e… drh 293
adb9e8e… drh 294 // We are done handling the deflate block header. Now copy
adb9e8e… drh 295 // all or almost all of the remaining compressed data that
adb9e8e… drh 296 // has been processed so far. Don't copy one byte at the
adb9e8e… drh 297 // end if it contains bits from the next deflate block or
adb9e8e… drh 298 // pad bits at the end of a deflate block.
adb9e8e… drh 299
adb9e8e… drh 300 // mix is 1 if we are at the end of a deflate block, and if
adb9e8e… drh 301 // some of the bits in the last byte follow this block. mix
adb9e8e… drh 302 // is 0 if we are in the middle of a deflate block, if the
adb9e8e… drh 303 // deflate block ended on a byte boundary, or if all of the
adb9e8e… drh 304 // compressed data processed so far has been consumed.
adb9e8e… drh 305 int mix = (strm.data_type & 0x80) && bits;
adb9e8e… drh 306
adb9e8e… drh 307 // Copy all of the processed compressed data to the output,
adb9e8e… drh 308 // except for the last byte if it contains bits from the
adb9e8e… drh 309 // next deflate block or pad bits at the end of the deflate
adb9e8e… drh 310 // stream. Copy the data after shifting in num bits from
adb9e8e… drh 311 // buf in front of it, leaving num bits from the end of the
adb9e8e… drh 312 // compressed data in buf when done.
adb9e8e… drh 313 unsigned char *end = strm.next_in - mix;
adb9e8e… drh 314 if (put < end) {
adb9e8e… drh 315 if (num)
adb9e8e… drh 316 // Insert num bits from buf before the data being
adb9e8e… drh 317 // copied.
adb9e8e… drh 318 do {
adb9e8e… drh 319 buf += (unsigned)(*put++) << num;
adb9e8e… drh 320 putc(buf, out);
adb9e8e… drh 321 buf >>= 8;
adb9e8e… drh 322 } while (put < end);
adb9e8e… drh 323 else {
adb9e8e… drh 324 // No shifting needed -- write directly.
adb9e8e… drh 325 fwrite(put, 1, end - put, out);
adb9e8e… drh 326 put = end;
adb9e8e… drh 327 }
adb9e8e… drh 328 }
adb9e8e… drh 329
adb9e8e… drh 330 // Process the last processed byte if it wasn't written.
adb9e8e… drh 331 if (mix) {
adb9e8e… drh 332 // Load the last byte into the bit buffer.
adb9e8e… drh 333 buf += (unsigned)(*put++) << num;
adb9e8e… drh 334 num += 8;
adb9e8e… drh 335
adb9e8e… drh 336 if (strm.data_type & 0x40) {
adb9e8e… drh 337 // We are at the end of the deflate stream and
adb9e8e… drh 338 // there are bits pad bits. Discard the pad bits
adb9e8e… drh 339 // and write a byte to the output, if available.
adb9e8e… drh 340 // Leave the num bits left over in buf to prepend
adb9e8e… drh 341 // to the next deflate stream.
adb9e8e… drh 342 num -= bits;
adb9e8e… drh 343 if (num >= 8) {
adb9e8e… drh 344 putc(buf, out);
adb9e8e… drh 345 num -= 8;
adb9e8e… drh 346 buf >>= 8;
adb9e8e… drh 347 }
adb9e8e… drh 348
adb9e8e… drh 349 // Force the pad bits in the bit buffer to zeros.
adb9e8e… drh 350 buf &= ((unsigned long)1 << num) - 1;
adb9e8e… drh 351
adb9e8e… drh 352 // Don't need to set prev here since going to TAIL.
adb9e8e… drh 353 }
adb9e8e… drh 354 else
adb9e8e… drh 355 // At the end of an internal deflate block. Leave
adb9e8e… drh 356 // the last byte in the bit buffer to examine on
adb9e8e… drh 357 // the next entry to BLOCK, when more bits from the
adb9e8e… drh 358 // next block will be available.
adb9e8e… drh 359 prev = num - bits; // number of bits in buffer
adb9e8e… drh 360 // from current block
adb9e8e… drh 361 }
adb9e8e… drh 362
adb9e8e… drh 363 // Don't have a byte left over, so we are in the middle of
adb9e8e… drh 364 // a deflate block, or the deflate block ended on a byte
adb9e8e… drh 365 // boundary. Set prev appropriately for the next entry into
adb9e8e… drh 366 // BLOCK.
adb9e8e… drh 367 else if (strm.data_type & 0x80)
adb9e8e… drh 368 // The block ended on a byte boundary, so no header
adb9e8e… drh 369 // bits are in the bit buffer.
adb9e8e… drh 370 prev = num;
adb9e8e… drh 371 else
adb9e8e… drh 372 // In the middle of a deflate block, so no header here.
adb9e8e… drh 373 prev = -1;
adb9e8e… drh 374
adb9e8e… drh 375 // Check for the end of the deflate stream.
adb9e8e… drh 376 if ((strm.data_type & 0xc0) == 0xc0) {
adb9e8e… drh 377 // That ends the deflate stream on the input side, the
adb9e8e… drh 378 // pad bits were discarded, and any remaining bits from
adb9e8e… drh 379 // the last block in the stream are saved in the bit
adb9e8e… drh 380 // buffer to prepend to the next stream. Process the
adb9e8e… drh 381 // gzip trailer next.
adb9e8e… drh 382 tail = 0;
adb9e8e… drh 383 part = 0;
adb9e8e… drh 384 state = TAIL;
adb9e8e… drh 385 }
adb9e8e… drh 386 break;
adb9e8e… drh 387
adb9e8e… drh 388 case TAIL:
adb9e8e… drh 389 // Accumulate available trailer bytes to update the total
adb9e8e… drh 390 // CRC and the total uncompressed length.
adb9e8e… drh 391 do {
adb9e8e… drh 392 part = (part >> 8) + ((unsigned long)(*put++) << 24);
adb9e8e… drh 393 tail++;
adb9e8e… drh 394 if (tail == 4) {
adb9e8e… drh 395 // Update the total CRC.
adb9e8e… drh 396 z_off_t len2 = memb;
adb9e8e… drh 397 if (len2 < 0 || (unsigned long long)len2 != memb)
adb9e8e… drh 398 BYE("overflow error");
adb9e8e… drh 399 crc = crc ? crc32_combine(crc, part, len2) : part;
adb9e8e… drh 400 part = 0;
adb9e8e… drh 401 }
adb9e8e… drh 402 else if (tail == 8) {
adb9e8e… drh 403 // Update the total uncompressed length. (It's ok
adb9e8e… drh 404 // if this sum is done modulo 2^32.)
adb9e8e… drh 405 len += part;
adb9e8e… drh 406
adb9e8e… drh 407 // At the end of a member. Set up to inflate an
adb9e8e… drh 408 // immediately following gzip member. (If we made
adb9e8e… drh 409 // it this far, then the trailer was valid.)
adb9e8e… drh 410 if (inflateReset(&strm) != Z_OK)
adb9e8e… drh 411 BYE("internal error");
adb9e8e… drh 412 state = BETWEEN;
adb9e8e… drh 413 break;
adb9e8e… drh 414 }
adb9e8e… drh 415 } while (put < strm.next_in);
adb9e8e… drh 416 break;
adb9e8e… drh 417 }
adb9e8e… drh 418
adb9e8e… drh 419 // Process the input buffer until completely consumed.
adb9e8e… drh 420 } while (strm.avail_in > 0);
adb9e8e… drh 421
adb9e8e… drh 422 // Process input until end of file, invalid input, or i/o error.
adb9e8e… drh 423 } while (more);
adb9e8e… drh 424
adb9e8e… drh 425 // Done with the inflate engine.
adb9e8e… drh 426 inflateEnd(&strm);
adb9e8e… drh 427
adb9e8e… drh 428 // Verify the validity of the input.
adb9e8e… drh 429 if (state != BETWEEN)
adb9e8e… drh 430 BYE("input invalid: incomplete gzip stream");
adb9e8e… drh 431
adb9e8e… drh 432 // Write the remaining deflate stream bits, followed by a terminating
adb9e8e… drh 433 // deflate fixed block.
adb9e8e… drh 434 buf += (unsigned long)3 << num;
adb9e8e… drh 435 putc(buf, out);
adb9e8e… drh 436 putc(buf >> 8, out);
adb9e8e… drh 437 if (num > 6)
adb9e8e… drh 438 putc(0, out);
adb9e8e… drh 439
adb9e8e… drh 440 // Write the gzip trailer, which is the CRC and the uncompressed length
adb9e8e… drh 441 // modulo 2^32, both in little-endian order.
adb9e8e… drh 442 putc(crc, out);
adb9e8e… drh 443 putc(crc >> 8, out);
adb9e8e… drh 444 putc(crc >> 16, out);
adb9e8e… drh 445 putc(crc >> 24, out);
adb9e8e… drh 446 putc(len, out);
adb9e8e… drh 447 putc(len >> 8, out);
adb9e8e… drh 448 putc(len >> 16, out);
adb9e8e… drh 449 putc(len >> 24, out);
adb9e8e… drh 450 fflush(out);
adb9e8e… drh 451
adb9e8e… drh 452 // Check for any i/o errors.
adb9e8e… drh 453 if (ferror(in) || ferror(out))
adb9e8e… drh 454 BYE("i/o error: %s", strerror(errno));
adb9e8e… drh 455
adb9e8e… drh 456 // All good!
adb9e8e… drh 457 *err = NULL;
adb9e8e… drh 458 return 0;
adb9e8e… drh 459 }
adb9e8e… drh 460
adb9e8e… drh 461 // Normalize the gzip stream on stdin, writing the result to stdout.
adb9e8e… drh 462 int main(void) {
adb9e8e… drh 463 // Avoid end-of-line conversions on evil operating systems.
adb9e8e… drh 464 SET_BINARY_MODE(stdin);
adb9e8e… drh 465 SET_BINARY_MODE(stdout);
adb9e8e… drh 466
adb9e8e… drh 467 // Normalize from stdin to stdout, returning 1 on error, 0 if ok.
adb9e8e… drh 468 char *err;
adb9e8e… drh 469 int ret = gzip_normalize(stdin, stdout, &err);
adb9e8e… drh 470 if (ret)
adb9e8e… drh 471 fprintf(stderr, "gznorm error: %s\n", err);
adb9e8e… drh 472 free(err);
adb9e8e… drh 473 return ret;
adb9e8e… drh 474 }

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button