|
adb9e8e…
|
drh
|
1 |
/* gznorm.c -- normalize a gzip stream |
|
adb9e8e…
|
drh
|
2 |
* Copyright (C) 2018 Mark Adler |
|
adb9e8e…
|
drh
|
3 |
* For conditions of distribution and use, see copyright notice in zlib.h |
|
adb9e8e…
|
drh
|
4 |
* Version 1.0 7 Oct 2018 Mark Adler */ |
|
adb9e8e…
|
drh
|
5 |
|
|
adb9e8e…
|
drh
|
6 |
// gznorm takes a gzip stream, potentially containing multiple members, and |
|
adb9e8e…
|
drh
|
7 |
// converts it to a gzip stream with a single member. In addition the gzip |
|
adb9e8e…
|
drh
|
8 |
// header is normalized, removing the file name and time stamp, and setting the |
|
adb9e8e…
|
drh
|
9 |
// other header contents (XFL, OS) to fixed values. gznorm does not recompress |
|
adb9e8e…
|
drh
|
10 |
// the data, so it is fast, but no advantage is gained from the history that |
|
adb9e8e…
|
drh
|
11 |
// could be available across member boundaries. |
|
6ea30fb…
|
florian
|
12 |
|
|
6ea30fb…
|
florian
|
13 |
#if defined(_WIN32) && !defined(_CRT_NONSTDC_NO_DEPRECATE) |
|
6ea30fb…
|
florian
|
14 |
# define _CRT_NONSTDC_NO_DEPRECATE |
|
6ea30fb…
|
florian
|
15 |
#endif |
|
adb9e8e…
|
drh
|
16 |
|
|
adb9e8e…
|
drh
|
17 |
#include <stdio.h> // fread, fwrite, putc, fflush, ferror, fprintf, |
|
adb9e8e…
|
drh
|
18 |
// vsnprintf, stdout, stderr, NULL, FILE |
|
adb9e8e…
|
drh
|
19 |
#include <stdlib.h> // malloc, free |
|
adb9e8e…
|
drh
|
20 |
#include <string.h> // strerror |
|
adb9e8e…
|
drh
|
21 |
#include <errno.h> // errno |
|
adb9e8e…
|
drh
|
22 |
#include <stdarg.h> // va_list, va_start, va_end |
|
adb9e8e…
|
drh
|
23 |
#include "zlib.h" // inflateInit2, inflate, inflateReset, inflateEnd, |
|
adb9e8e…
|
drh
|
24 |
// z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK, |
|
adb9e8e…
|
drh
|
25 |
// Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR, |
|
adb9e8e…
|
drh
|
26 |
// Z_MEM_ERROR |
|
adb9e8e…
|
drh
|
27 |
|
|
adb9e8e…
|
drh
|
28 |
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) |
|
adb9e8e…
|
drh
|
29 |
# include <fcntl.h> |
|
adb9e8e…
|
drh
|
30 |
# include <io.h> |
|
adb9e8e…
|
drh
|
31 |
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) |
|
adb9e8e…
|
drh
|
32 |
#else |
|
adb9e8e…
|
drh
|
33 |
# define SET_BINARY_MODE(file) |
|
adb9e8e…
|
drh
|
34 |
#endif |
|
adb9e8e…
|
drh
|
35 |
|
|
adb9e8e…
|
drh
|
36 |
#define local static |
|
adb9e8e…
|
drh
|
37 |
|
|
adb9e8e…
|
drh
|
38 |
// printf to an allocated string. Return the string, or NULL if the printf or |
|
adb9e8e…
|
drh
|
39 |
// allocation fails. |
|
adb9e8e…
|
drh
|
40 |
local char *aprintf(char *fmt, ...) { |
|
adb9e8e…
|
drh
|
41 |
// Get the length of the result of the printf. |
|
adb9e8e…
|
drh
|
42 |
va_list args; |
|
adb9e8e…
|
drh
|
43 |
va_start(args, fmt); |
|
adb9e8e…
|
drh
|
44 |
int len = vsnprintf(NULL, 0, fmt, args); |
|
adb9e8e…
|
drh
|
45 |
va_end(args); |
|
adb9e8e…
|
drh
|
46 |
if (len < 0) |
|
adb9e8e…
|
drh
|
47 |
return NULL; |
|
adb9e8e…
|
drh
|
48 |
|
|
adb9e8e…
|
drh
|
49 |
// Allocate the required space and printf to it. |
|
adb9e8e…
|
drh
|
50 |
char *str = malloc(len + 1); |
|
adb9e8e…
|
drh
|
51 |
if (str == NULL) |
|
adb9e8e…
|
drh
|
52 |
return NULL; |
|
adb9e8e…
|
drh
|
53 |
va_start(args, fmt); |
|
adb9e8e…
|
drh
|
54 |
vsnprintf(str, len + 1, fmt, args); |
|
adb9e8e…
|
drh
|
55 |
va_end(args); |
|
adb9e8e…
|
drh
|
56 |
return str; |
|
adb9e8e…
|
drh
|
57 |
} |
|
adb9e8e…
|
drh
|
58 |
|
|
adb9e8e…
|
drh
|
59 |
// Return with an error, putting an allocated error message in *err. Doing an |
|
adb9e8e…
|
drh
|
60 |
// inflateEnd() on an already ended state, or one with state set to Z_NULL, is |
|
adb9e8e…
|
drh
|
61 |
// permitted. |
|
adb9e8e…
|
drh
|
62 |
#define BYE(...) \ |
|
adb9e8e…
|
drh
|
63 |
do { \ |
|
adb9e8e…
|
drh
|
64 |
inflateEnd(&strm); \ |
|
adb9e8e…
|
drh
|
65 |
*err = aprintf(__VA_ARGS__); \ |
|
adb9e8e…
|
drh
|
66 |
return 1; \ |
|
adb9e8e…
|
drh
|
67 |
} while (0) |
|
adb9e8e…
|
drh
|
68 |
|
|
adb9e8e…
|
drh
|
69 |
// Chunk size for buffered reads and for decompression. Twice this many bytes |
|
adb9e8e…
|
drh
|
70 |
// will be allocated on the stack by gzip_normalize(). Must fit in an unsigned. |
|
adb9e8e…
|
drh
|
71 |
#define CHUNK 16384 |
|
adb9e8e…
|
drh
|
72 |
|
|
adb9e8e…
|
drh
|
73 |
// Read a gzip stream from in and write an equivalent normalized gzip stream to |
|
adb9e8e…
|
drh
|
74 |
// out. If given no input, an empty gzip stream will be written. If successful, |
|
adb9e8e…
|
drh
|
75 |
// 0 is returned, and *err is set to NULL. On error, 1 is returned, where the |
|
adb9e8e…
|
drh
|
76 |
// details of the error are returned in *err, a pointer to an allocated string. |
|
adb9e8e…
|
drh
|
77 |
// |
|
adb9e8e…
|
drh
|
78 |
// The input may be a stream with multiple gzip members, which is converted to |
|
adb9e8e…
|
drh
|
79 |
// a single gzip member on the output. Each gzip member is decompressed at the |
|
adb9e8e…
|
drh
|
80 |
// level of deflate blocks. This enables clearing the last-block bit, shifting |
|
adb9e8e…
|
drh
|
81 |
// the compressed data to concatenate to the previous member's compressed data, |
|
adb9e8e…
|
drh
|
82 |
// which can end at an arbitrary bit boundary, and identifying stored blocks in |
|
adb9e8e…
|
drh
|
83 |
// order to resynchronize those to byte boundaries. The deflate compressed data |
|
adb9e8e…
|
drh
|
84 |
// is terminated with a 10-bit empty fixed block. If any members on the input |
|
adb9e8e…
|
drh
|
85 |
// end with a 10-bit empty fixed block, then that block is excised from the |
|
adb9e8e…
|
drh
|
86 |
// stream. This avoids appending empty fixed blocks for every normalization, |
|
adb9e8e…
|
drh
|
87 |
// and assures that gzip_normalize applied a second time will not change the |
|
adb9e8e…
|
drh
|
88 |
// input. The pad bits after stored block headers and after the final deflate |
|
adb9e8e…
|
drh
|
89 |
// block are all forced to zeros. |
|
adb9e8e…
|
drh
|
90 |
local int gzip_normalize(FILE *in, FILE *out, char **err) { |
|
adb9e8e…
|
drh
|
91 |
// initialize the inflate engine to process a gzip member |
|
adb9e8e…
|
drh
|
92 |
z_stream strm; |
|
adb9e8e…
|
drh
|
93 |
strm.zalloc = Z_NULL; |
|
adb9e8e…
|
drh
|
94 |
strm.zfree = Z_NULL; |
|
adb9e8e…
|
drh
|
95 |
strm.opaque = Z_NULL; |
|
adb9e8e…
|
drh
|
96 |
strm.avail_in = 0; |
|
adb9e8e…
|
drh
|
97 |
strm.next_in = Z_NULL; |
|
adb9e8e…
|
drh
|
98 |
if (inflateInit2(&strm, 15 + 16) != Z_OK) |
|
adb9e8e…
|
drh
|
99 |
BYE("out of memory"); |
|
adb9e8e…
|
drh
|
100 |
|
|
adb9e8e…
|
drh
|
101 |
// State while processing the input gzip stream. |
|
adb9e8e…
|
drh
|
102 |
enum { // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ... |
|
adb9e8e…
|
drh
|
103 |
BETWEEN, // between gzip members (must end in this state) |
|
adb9e8e…
|
drh
|
104 |
HEAD, // reading a gzip header |
|
adb9e8e…
|
drh
|
105 |
BLOCK, // reading deflate blocks |
|
adb9e8e…
|
drh
|
106 |
TAIL // reading a gzip trailer |
|
adb9e8e…
|
drh
|
107 |
} state = BETWEEN; // current component being processed |
|
adb9e8e…
|
drh
|
108 |
unsigned long crc = 0; // accumulated CRC of uncompressed data |
|
adb9e8e…
|
drh
|
109 |
unsigned long len = 0; // accumulated length of uncompressed data |
|
adb9e8e…
|
drh
|
110 |
unsigned long buf = 0; // deflate stream bit buffer of num bits |
|
adb9e8e…
|
drh
|
111 |
int num = 0; // number of bits in buf (at bottom) |
|
adb9e8e…
|
drh
|
112 |
|
|
adb9e8e…
|
drh
|
113 |
// Write a canonical gzip header (no mod time, file name, comment, extra |
|
adb9e8e…
|
drh
|
114 |
// block, or extra flags, and OS is marked as unknown). |
|
adb9e8e…
|
drh
|
115 |
fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); |
|
adb9e8e…
|
drh
|
116 |
|
|
adb9e8e…
|
drh
|
117 |
// Process the gzip stream from in until reaching the end of the input, |
|
adb9e8e…
|
drh
|
118 |
// encountering invalid input, or experiencing an i/o error. |
|
adb9e8e…
|
drh
|
119 |
int more; // true if not at the end of the input |
|
adb9e8e…
|
drh
|
120 |
do { |
|
adb9e8e…
|
drh
|
121 |
// State inside this loop. |
|
adb9e8e…
|
drh
|
122 |
unsigned char *put; // next input buffer location to process |
|
adb9e8e…
|
drh
|
123 |
int prev; // number of bits from previous block in |
|
adb9e8e…
|
drh
|
124 |
// the bit buffer, or -1 if not at the |
|
adb9e8e…
|
drh
|
125 |
// start of a block |
|
adb9e8e…
|
drh
|
126 |
unsigned long long memb; // uncompressed length of member |
|
adb9e8e…
|
drh
|
127 |
size_t tail; // number of trailer bytes read (0..8) |
|
adb9e8e…
|
drh
|
128 |
unsigned long part; // accumulated trailer component |
|
adb9e8e…
|
drh
|
129 |
|
|
adb9e8e…
|
drh
|
130 |
// Get the next chunk of input from in. |
|
adb9e8e…
|
drh
|
131 |
unsigned char dat[CHUNK]; |
|
adb9e8e…
|
drh
|
132 |
strm.avail_in = fread(dat, 1, CHUNK, in); |
|
adb9e8e…
|
drh
|
133 |
if (strm.avail_in == 0) |
|
adb9e8e…
|
drh
|
134 |
break; |
|
adb9e8e…
|
drh
|
135 |
more = strm.avail_in == CHUNK; |
|
adb9e8e…
|
drh
|
136 |
strm.next_in = put = dat; |
|
adb9e8e…
|
drh
|
137 |
|
|
adb9e8e…
|
drh
|
138 |
// Run that chunk of input through the inflate engine to exhaustion. |
|
adb9e8e…
|
drh
|
139 |
do { |
|
adb9e8e…
|
drh
|
140 |
// At this point it is assured that strm.avail_in > 0. |
|
adb9e8e…
|
drh
|
141 |
|
|
adb9e8e…
|
drh
|
142 |
// Inflate until the end of a gzip component (header, deflate |
|
adb9e8e…
|
drh
|
143 |
// block, trailer) is reached, or until all of the chunk is |
|
adb9e8e…
|
drh
|
144 |
// consumed. The resulting decompressed data is discarded, though |
|
adb9e8e…
|
drh
|
145 |
// the total size of the decompressed data in each member is |
|
adb9e8e…
|
drh
|
146 |
// tracked, for the calculation of the total CRC. |
|
adb9e8e…
|
drh
|
147 |
do { |
|
adb9e8e…
|
drh
|
148 |
// inflate and handle any errors |
|
adb9e8e…
|
drh
|
149 |
unsigned char scrap[CHUNK]; |
|
adb9e8e…
|
drh
|
150 |
strm.avail_out = CHUNK; |
|
adb9e8e…
|
drh
|
151 |
strm.next_out = scrap; |
|
adb9e8e…
|
drh
|
152 |
int ret = inflate(&strm, Z_BLOCK); |
|
adb9e8e…
|
drh
|
153 |
if (ret == Z_MEM_ERROR) |
|
adb9e8e…
|
drh
|
154 |
BYE("out of memory"); |
|
adb9e8e…
|
drh
|
155 |
if (ret == Z_DATA_ERROR) |
|
adb9e8e…
|
drh
|
156 |
BYE("input invalid: %s", strm.msg); |
|
adb9e8e…
|
drh
|
157 |
if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END) |
|
adb9e8e…
|
drh
|
158 |
BYE("internal error"); |
|
adb9e8e…
|
drh
|
159 |
|
|
adb9e8e…
|
drh
|
160 |
// Update the number of uncompressed bytes generated in this |
|
adb9e8e…
|
drh
|
161 |
// member. The actual count (not modulo 2^32) is required to |
|
adb9e8e…
|
drh
|
162 |
// correctly compute the total CRC. |
|
adb9e8e…
|
drh
|
163 |
unsigned got = CHUNK - strm.avail_out; |
|
adb9e8e…
|
drh
|
164 |
memb += got; |
|
adb9e8e…
|
drh
|
165 |
if (memb < got) |
|
adb9e8e…
|
drh
|
166 |
BYE("overflow error"); |
|
adb9e8e…
|
drh
|
167 |
|
|
adb9e8e…
|
drh
|
168 |
// Continue to process this chunk until it is consumed, or |
|
adb9e8e…
|
drh
|
169 |
// until the end of a component (header, deflate block, or |
|
adb9e8e…
|
drh
|
170 |
// trailer) is reached. |
|
adb9e8e…
|
drh
|
171 |
} while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0); |
|
adb9e8e…
|
drh
|
172 |
|
|
adb9e8e…
|
drh
|
173 |
// Since strm.avail_in was > 0 for the inflate call, some input was |
|
adb9e8e…
|
drh
|
174 |
// just consumed. It is therefore assured that put < strm.next_in. |
|
adb9e8e…
|
drh
|
175 |
|
|
adb9e8e…
|
drh
|
176 |
// Disposition the consumed component or part of a component. |
|
adb9e8e…
|
drh
|
177 |
switch (state) { |
|
adb9e8e…
|
drh
|
178 |
case BETWEEN: |
|
adb9e8e…
|
drh
|
179 |
state = HEAD; |
|
adb9e8e…
|
drh
|
180 |
// Fall through to HEAD when some or all of the header is |
|
adb9e8e…
|
drh
|
181 |
// processed. |
|
adb9e8e…
|
drh
|
182 |
|
|
adb9e8e…
|
drh
|
183 |
case HEAD: |
|
adb9e8e…
|
drh
|
184 |
// Discard the header. |
|
adb9e8e…
|
drh
|
185 |
if (strm.data_type & 0x80) { |
|
adb9e8e…
|
drh
|
186 |
// End of header reached -- deflate blocks follow. |
|
adb9e8e…
|
drh
|
187 |
put = strm.next_in; |
|
adb9e8e…
|
drh
|
188 |
prev = num; |
|
adb9e8e…
|
drh
|
189 |
memb = 0; |
|
adb9e8e…
|
drh
|
190 |
state = BLOCK; |
|
adb9e8e…
|
drh
|
191 |
} |
|
adb9e8e…
|
drh
|
192 |
break; |
|
adb9e8e…
|
drh
|
193 |
|
|
adb9e8e…
|
drh
|
194 |
case BLOCK: |
|
adb9e8e…
|
drh
|
195 |
// Copy the deflate stream to the output, but with the |
|
adb9e8e…
|
drh
|
196 |
// last-block-bit cleared. Re-synchronize stored block |
|
adb9e8e…
|
drh
|
197 |
// headers to the output byte boundaries. The bytes at |
|
adb9e8e…
|
drh
|
198 |
// put..strm.next_in-1 is the compressed data that has been |
|
adb9e8e…
|
drh
|
199 |
// processed and is ready to be copied to the output. |
|
adb9e8e…
|
drh
|
200 |
|
|
adb9e8e…
|
drh
|
201 |
// At this point, it is assured that new compressed data is |
|
adb9e8e…
|
drh
|
202 |
// available, i.e., put < strm.next_in. If prev is -1, then |
|
adb9e8e…
|
drh
|
203 |
// that compressed data starts in the middle of a deflate |
|
adb9e8e…
|
drh
|
204 |
// block. If prev is not -1, then the bits in the bit |
|
adb9e8e…
|
drh
|
205 |
// buffer, possibly combined with the bits in *put, contain |
|
adb9e8e…
|
drh
|
206 |
// the three-bit header of the new deflate block. In that |
|
adb9e8e…
|
drh
|
207 |
// case, prev is the number of bits from the previous block |
|
adb9e8e…
|
drh
|
208 |
// that remain in the bit buffer. Since num is the number |
|
adb9e8e…
|
drh
|
209 |
// of bits in the bit buffer, we have that num - prev is |
|
adb9e8e…
|
drh
|
210 |
// the number of bits from the new block currently in the |
|
adb9e8e…
|
drh
|
211 |
// bit buffer. |
|
adb9e8e…
|
drh
|
212 |
|
|
adb9e8e…
|
drh
|
213 |
// If strm.data_type & 0xc0 is 0x80, then the last byte of |
|
adb9e8e…
|
drh
|
214 |
// the available compressed data includes the last bits of |
|
adb9e8e…
|
drh
|
215 |
// the end of a deflate block. In that case, that last byte |
|
adb9e8e…
|
drh
|
216 |
// also has strm.data_type & 0x1f bits of the next deflate |
|
adb9e8e…
|
drh
|
217 |
// block, in the range 0..7. If strm.data_type & 0xc0 is |
|
adb9e8e…
|
drh
|
218 |
// 0xc0, then the last byte of the compressed data is the |
|
adb9e8e…
|
drh
|
219 |
// end of the deflate stream, followed by strm.data_type & |
|
adb9e8e…
|
drh
|
220 |
// 0x1f pad bits, also in the range 0..7. |
|
adb9e8e…
|
drh
|
221 |
|
|
adb9e8e…
|
drh
|
222 |
// Set bits to the number of bits not yet consumed from the |
|
adb9e8e…
|
drh
|
223 |
// last byte. If we are at the end of the block, bits is |
|
adb9e8e…
|
drh
|
224 |
// either the number of bits in the last byte belonging to |
|
adb9e8e…
|
drh
|
225 |
// the next block, or the number of pad bits after the |
|
adb9e8e…
|
drh
|
226 |
// final block. In either of those cases, bits is in the |
|
adb9e8e…
|
drh
|
227 |
// range 0..7. |
|
adb9e8e…
|
drh
|
228 |
; // (required due to C syntax oddity) |
|
adb9e8e…
|
drh
|
229 |
int bits = strm.data_type & 0x1f; |
|
adb9e8e…
|
drh
|
230 |
|
|
adb9e8e…
|
drh
|
231 |
if (prev != -1) { |
|
adb9e8e…
|
drh
|
232 |
// We are at the start of a new block. Clear the last |
|
adb9e8e…
|
drh
|
233 |
// block bit, and check for special cases. If it is a |
|
adb9e8e…
|
drh
|
234 |
// stored block, then emit the header and pad to the |
|
adb9e8e…
|
drh
|
235 |
// next byte boundary. If it is a final, empty fixed |
|
adb9e8e…
|
drh
|
236 |
// block, then excise it. |
|
adb9e8e…
|
drh
|
237 |
|
|
adb9e8e…
|
drh
|
238 |
// Some or all of the three header bits for this block |
|
adb9e8e…
|
drh
|
239 |
// may already be in the bit buffer. Load any remaining |
|
adb9e8e…
|
drh
|
240 |
// header bits into the bit buffer. |
|
adb9e8e…
|
drh
|
241 |
if (num - prev < 3) { |
|
adb9e8e…
|
drh
|
242 |
buf += (unsigned long)*put++ << num; |
|
adb9e8e…
|
drh
|
243 |
num += 8; |
|
adb9e8e…
|
drh
|
244 |
} |
|
adb9e8e…
|
drh
|
245 |
|
|
adb9e8e…
|
drh
|
246 |
// Set last to have a 1 in the position of the last |
|
adb9e8e…
|
drh
|
247 |
// block bit in the bit buffer. |
|
adb9e8e…
|
drh
|
248 |
unsigned long last = (unsigned long)1 << prev; |
|
adb9e8e…
|
drh
|
249 |
|
|
adb9e8e…
|
drh
|
250 |
if (((buf >> prev) & 7) == 3) { |
|
adb9e8e…
|
drh
|
251 |
// This is a final fixed block. Load at least ten |
|
adb9e8e…
|
drh
|
252 |
// bits from this block, including the header, into |
|
adb9e8e…
|
drh
|
253 |
// the bit buffer. We already have at least three, |
|
adb9e8e…
|
drh
|
254 |
// so at most one more byte needs to be loaded. |
|
adb9e8e…
|
drh
|
255 |
if (num - prev < 10) { |
|
adb9e8e…
|
drh
|
256 |
if (put == strm.next_in) |
|
adb9e8e…
|
drh
|
257 |
// Need to go get and process more input. |
|
adb9e8e…
|
drh
|
258 |
// We'll end up back here to finish this. |
|
adb9e8e…
|
drh
|
259 |
break; |
|
adb9e8e…
|
drh
|
260 |
buf += (unsigned long)*put++ << num; |
|
adb9e8e…
|
drh
|
261 |
num += 8; |
|
adb9e8e…
|
drh
|
262 |
} |
|
adb9e8e…
|
drh
|
263 |
if (((buf >> prev) & 0x3ff) == 3) { |
|
adb9e8e…
|
drh
|
264 |
// That final fixed block is empty. Delete it |
|
adb9e8e…
|
drh
|
265 |
// to avoid adding an empty block every time a |
|
adb9e8e…
|
drh
|
266 |
// gzip stream is normalized. |
|
adb9e8e…
|
drh
|
267 |
num = prev; |
|
adb9e8e…
|
drh
|
268 |
buf &= last - 1; // zero the pad bits |
|
adb9e8e…
|
drh
|
269 |
} |
|
adb9e8e…
|
drh
|
270 |
} |
|
adb9e8e…
|
drh
|
271 |
else if (((buf >> prev) & 6) == 0) { |
|
adb9e8e…
|
drh
|
272 |
// This is a stored block. Flush to the next |
|
adb9e8e…
|
drh
|
273 |
// byte boundary after the three-bit header. |
|
adb9e8e…
|
drh
|
274 |
num = (prev + 10) & ~7; |
|
adb9e8e…
|
drh
|
275 |
buf &= last - 1; // zero the pad bits |
|
adb9e8e…
|
drh
|
276 |
} |
|
adb9e8e…
|
drh
|
277 |
|
|
adb9e8e…
|
drh
|
278 |
// Clear the last block bit. |
|
adb9e8e…
|
drh
|
279 |
buf &= ~last; |
|
adb9e8e…
|
drh
|
280 |
|
|
adb9e8e…
|
drh
|
281 |
// Write out complete bytes in the bit buffer. |
|
adb9e8e…
|
drh
|
282 |
while (num >= 8) { |
|
adb9e8e…
|
drh
|
283 |
putc(buf, out); |
|
adb9e8e…
|
drh
|
284 |
buf >>= 8; |
|
adb9e8e…
|
drh
|
285 |
num -= 8; |
|
adb9e8e…
|
drh
|
286 |
} |
|
adb9e8e…
|
drh
|
287 |
|
|
adb9e8e…
|
drh
|
288 |
// If no more bytes left to process, then we have |
|
adb9e8e…
|
drh
|
289 |
// consumed the byte that had bits from the next block. |
|
adb9e8e…
|
drh
|
290 |
if (put == strm.next_in) |
|
adb9e8e…
|
drh
|
291 |
bits = 0; |
|
adb9e8e…
|
drh
|
292 |
} |
|
adb9e8e…
|
drh
|
293 |
|
|
adb9e8e…
|
drh
|
294 |
// We are done handling the deflate block header. Now copy |
|
adb9e8e…
|
drh
|
295 |
// all or almost all of the remaining compressed data that |
|
adb9e8e…
|
drh
|
296 |
// has been processed so far. Don't copy one byte at the |
|
adb9e8e…
|
drh
|
297 |
// end if it contains bits from the next deflate block or |
|
adb9e8e…
|
drh
|
298 |
// pad bits at the end of a deflate block. |
|
adb9e8e…
|
drh
|
299 |
|
|
adb9e8e…
|
drh
|
300 |
// mix is 1 if we are at the end of a deflate block, and if |
|
adb9e8e…
|
drh
|
301 |
// some of the bits in the last byte follow this block. mix |
|
adb9e8e…
|
drh
|
302 |
// is 0 if we are in the middle of a deflate block, if the |
|
adb9e8e…
|
drh
|
303 |
// deflate block ended on a byte boundary, or if all of the |
|
adb9e8e…
|
drh
|
304 |
// compressed data processed so far has been consumed. |
|
adb9e8e…
|
drh
|
305 |
int mix = (strm.data_type & 0x80) && bits; |
|
adb9e8e…
|
drh
|
306 |
|
|
adb9e8e…
|
drh
|
307 |
// Copy all of the processed compressed data to the output, |
|
adb9e8e…
|
drh
|
308 |
// except for the last byte if it contains bits from the |
|
adb9e8e…
|
drh
|
309 |
// next deflate block or pad bits at the end of the deflate |
|
adb9e8e…
|
drh
|
310 |
// stream. Copy the data after shifting in num bits from |
|
adb9e8e…
|
drh
|
311 |
// buf in front of it, leaving num bits from the end of the |
|
adb9e8e…
|
drh
|
312 |
// compressed data in buf when done. |
|
adb9e8e…
|
drh
|
313 |
unsigned char *end = strm.next_in - mix; |
|
adb9e8e…
|
drh
|
314 |
if (put < end) { |
|
adb9e8e…
|
drh
|
315 |
if (num) |
|
adb9e8e…
|
drh
|
316 |
// Insert num bits from buf before the data being |
|
adb9e8e…
|
drh
|
317 |
// copied. |
|
adb9e8e…
|
drh
|
318 |
do { |
|
adb9e8e…
|
drh
|
319 |
buf += (unsigned)(*put++) << num; |
|
adb9e8e…
|
drh
|
320 |
putc(buf, out); |
|
adb9e8e…
|
drh
|
321 |
buf >>= 8; |
|
adb9e8e…
|
drh
|
322 |
} while (put < end); |
|
adb9e8e…
|
drh
|
323 |
else { |
|
adb9e8e…
|
drh
|
324 |
// No shifting needed -- write directly. |
|
adb9e8e…
|
drh
|
325 |
fwrite(put, 1, end - put, out); |
|
adb9e8e…
|
drh
|
326 |
put = end; |
|
adb9e8e…
|
drh
|
327 |
} |
|
adb9e8e…
|
drh
|
328 |
} |
|
adb9e8e…
|
drh
|
329 |
|
|
adb9e8e…
|
drh
|
330 |
// Process the last processed byte if it wasn't written. |
|
adb9e8e…
|
drh
|
331 |
if (mix) { |
|
adb9e8e…
|
drh
|
332 |
// Load the last byte into the bit buffer. |
|
adb9e8e…
|
drh
|
333 |
buf += (unsigned)(*put++) << num; |
|
adb9e8e…
|
drh
|
334 |
num += 8; |
|
adb9e8e…
|
drh
|
335 |
|
|
adb9e8e…
|
drh
|
336 |
if (strm.data_type & 0x40) { |
|
adb9e8e…
|
drh
|
337 |
// We are at the end of the deflate stream and |
|
adb9e8e…
|
drh
|
338 |
// there are bits pad bits. Discard the pad bits |
|
adb9e8e…
|
drh
|
339 |
// and write a byte to the output, if available. |
|
adb9e8e…
|
drh
|
340 |
// Leave the num bits left over in buf to prepend |
|
adb9e8e…
|
drh
|
341 |
// to the next deflate stream. |
|
adb9e8e…
|
drh
|
342 |
num -= bits; |
|
adb9e8e…
|
drh
|
343 |
if (num >= 8) { |
|
adb9e8e…
|
drh
|
344 |
putc(buf, out); |
|
adb9e8e…
|
drh
|
345 |
num -= 8; |
|
adb9e8e…
|
drh
|
346 |
buf >>= 8; |
|
adb9e8e…
|
drh
|
347 |
} |
|
adb9e8e…
|
drh
|
348 |
|
|
adb9e8e…
|
drh
|
349 |
// Force the pad bits in the bit buffer to zeros. |
|
adb9e8e…
|
drh
|
350 |
buf &= ((unsigned long)1 << num) - 1; |
|
adb9e8e…
|
drh
|
351 |
|
|
adb9e8e…
|
drh
|
352 |
// Don't need to set prev here since going to TAIL. |
|
adb9e8e…
|
drh
|
353 |
} |
|
adb9e8e…
|
drh
|
354 |
else |
|
adb9e8e…
|
drh
|
355 |
// At the end of an internal deflate block. Leave |
|
adb9e8e…
|
drh
|
356 |
// the last byte in the bit buffer to examine on |
|
adb9e8e…
|
drh
|
357 |
// the next entry to BLOCK, when more bits from the |
|
adb9e8e…
|
drh
|
358 |
// next block will be available. |
|
adb9e8e…
|
drh
|
359 |
prev = num - bits; // number of bits in buffer |
|
adb9e8e…
|
drh
|
360 |
// from current block |
|
adb9e8e…
|
drh
|
361 |
} |
|
adb9e8e…
|
drh
|
362 |
|
|
adb9e8e…
|
drh
|
363 |
// Don't have a byte left over, so we are in the middle of |
|
adb9e8e…
|
drh
|
364 |
// a deflate block, or the deflate block ended on a byte |
|
adb9e8e…
|
drh
|
365 |
// boundary. Set prev appropriately for the next entry into |
|
adb9e8e…
|
drh
|
366 |
// BLOCK. |
|
adb9e8e…
|
drh
|
367 |
else if (strm.data_type & 0x80) |
|
adb9e8e…
|
drh
|
368 |
// The block ended on a byte boundary, so no header |
|
adb9e8e…
|
drh
|
369 |
// bits are in the bit buffer. |
|
adb9e8e…
|
drh
|
370 |
prev = num; |
|
adb9e8e…
|
drh
|
371 |
else |
|
adb9e8e…
|
drh
|
372 |
// In the middle of a deflate block, so no header here. |
|
adb9e8e…
|
drh
|
373 |
prev = -1; |
|
adb9e8e…
|
drh
|
374 |
|
|
adb9e8e…
|
drh
|
375 |
// Check for the end of the deflate stream. |
|
adb9e8e…
|
drh
|
376 |
if ((strm.data_type & 0xc0) == 0xc0) { |
|
adb9e8e…
|
drh
|
377 |
// That ends the deflate stream on the input side, the |
|
adb9e8e…
|
drh
|
378 |
// pad bits were discarded, and any remaining bits from |
|
adb9e8e…
|
drh
|
379 |
// the last block in the stream are saved in the bit |
|
adb9e8e…
|
drh
|
380 |
// buffer to prepend to the next stream. Process the |
|
adb9e8e…
|
drh
|
381 |
// gzip trailer next. |
|
adb9e8e…
|
drh
|
382 |
tail = 0; |
|
adb9e8e…
|
drh
|
383 |
part = 0; |
|
adb9e8e…
|
drh
|
384 |
state = TAIL; |
|
adb9e8e…
|
drh
|
385 |
} |
|
adb9e8e…
|
drh
|
386 |
break; |
|
adb9e8e…
|
drh
|
387 |
|
|
adb9e8e…
|
drh
|
388 |
case TAIL: |
|
adb9e8e…
|
drh
|
389 |
// Accumulate available trailer bytes to update the total |
|
adb9e8e…
|
drh
|
390 |
// CRC and the total uncompressed length. |
|
adb9e8e…
|
drh
|
391 |
do { |
|
adb9e8e…
|
drh
|
392 |
part = (part >> 8) + ((unsigned long)(*put++) << 24); |
|
adb9e8e…
|
drh
|
393 |
tail++; |
|
adb9e8e…
|
drh
|
394 |
if (tail == 4) { |
|
adb9e8e…
|
drh
|
395 |
// Update the total CRC. |
|
adb9e8e…
|
drh
|
396 |
z_off_t len2 = memb; |
|
adb9e8e…
|
drh
|
397 |
if (len2 < 0 || (unsigned long long)len2 != memb) |
|
adb9e8e…
|
drh
|
398 |
BYE("overflow error"); |
|
adb9e8e…
|
drh
|
399 |
crc = crc ? crc32_combine(crc, part, len2) : part; |
|
adb9e8e…
|
drh
|
400 |
part = 0; |
|
adb9e8e…
|
drh
|
401 |
} |
|
adb9e8e…
|
drh
|
402 |
else if (tail == 8) { |
|
adb9e8e…
|
drh
|
403 |
// Update the total uncompressed length. (It's ok |
|
adb9e8e…
|
drh
|
404 |
// if this sum is done modulo 2^32.) |
|
adb9e8e…
|
drh
|
405 |
len += part; |
|
adb9e8e…
|
drh
|
406 |
|
|
adb9e8e…
|
drh
|
407 |
// At the end of a member. Set up to inflate an |
|
adb9e8e…
|
drh
|
408 |
// immediately following gzip member. (If we made |
|
adb9e8e…
|
drh
|
409 |
// it this far, then the trailer was valid.) |
|
adb9e8e…
|
drh
|
410 |
if (inflateReset(&strm) != Z_OK) |
|
adb9e8e…
|
drh
|
411 |
BYE("internal error"); |
|
adb9e8e…
|
drh
|
412 |
state = BETWEEN; |
|
adb9e8e…
|
drh
|
413 |
break; |
|
adb9e8e…
|
drh
|
414 |
} |
|
adb9e8e…
|
drh
|
415 |
} while (put < strm.next_in); |
|
adb9e8e…
|
drh
|
416 |
break; |
|
adb9e8e…
|
drh
|
417 |
} |
|
adb9e8e…
|
drh
|
418 |
|
|
adb9e8e…
|
drh
|
419 |
// Process the input buffer until completely consumed. |
|
adb9e8e…
|
drh
|
420 |
} while (strm.avail_in > 0); |
|
adb9e8e…
|
drh
|
421 |
|
|
adb9e8e…
|
drh
|
422 |
// Process input until end of file, invalid input, or i/o error. |
|
adb9e8e…
|
drh
|
423 |
} while (more); |
|
adb9e8e…
|
drh
|
424 |
|
|
adb9e8e…
|
drh
|
425 |
// Done with the inflate engine. |
|
adb9e8e…
|
drh
|
426 |
inflateEnd(&strm); |
|
adb9e8e…
|
drh
|
427 |
|
|
adb9e8e…
|
drh
|
428 |
// Verify the validity of the input. |
|
adb9e8e…
|
drh
|
429 |
if (state != BETWEEN) |
|
adb9e8e…
|
drh
|
430 |
BYE("input invalid: incomplete gzip stream"); |
|
adb9e8e…
|
drh
|
431 |
|
|
adb9e8e…
|
drh
|
432 |
// Write the remaining deflate stream bits, followed by a terminating |
|
adb9e8e…
|
drh
|
433 |
// deflate fixed block. |
|
adb9e8e…
|
drh
|
434 |
buf += (unsigned long)3 << num; |
|
adb9e8e…
|
drh
|
435 |
putc(buf, out); |
|
adb9e8e…
|
drh
|
436 |
putc(buf >> 8, out); |
|
adb9e8e…
|
drh
|
437 |
if (num > 6) |
|
adb9e8e…
|
drh
|
438 |
putc(0, out); |
|
adb9e8e…
|
drh
|
439 |
|
|
adb9e8e…
|
drh
|
440 |
// Write the gzip trailer, which is the CRC and the uncompressed length |
|
adb9e8e…
|
drh
|
441 |
// modulo 2^32, both in little-endian order. |
|
adb9e8e…
|
drh
|
442 |
putc(crc, out); |
|
adb9e8e…
|
drh
|
443 |
putc(crc >> 8, out); |
|
adb9e8e…
|
drh
|
444 |
putc(crc >> 16, out); |
|
adb9e8e…
|
drh
|
445 |
putc(crc >> 24, out); |
|
adb9e8e…
|
drh
|
446 |
putc(len, out); |
|
adb9e8e…
|
drh
|
447 |
putc(len >> 8, out); |
|
adb9e8e…
|
drh
|
448 |
putc(len >> 16, out); |
|
adb9e8e…
|
drh
|
449 |
putc(len >> 24, out); |
|
adb9e8e…
|
drh
|
450 |
fflush(out); |
|
adb9e8e…
|
drh
|
451 |
|
|
adb9e8e…
|
drh
|
452 |
// Check for any i/o errors. |
|
adb9e8e…
|
drh
|
453 |
if (ferror(in) || ferror(out)) |
|
adb9e8e…
|
drh
|
454 |
BYE("i/o error: %s", strerror(errno)); |
|
adb9e8e…
|
drh
|
455 |
|
|
adb9e8e…
|
drh
|
456 |
// All good! |
|
adb9e8e…
|
drh
|
457 |
*err = NULL; |
|
adb9e8e…
|
drh
|
458 |
return 0; |
|
adb9e8e…
|
drh
|
459 |
} |
|
adb9e8e…
|
drh
|
460 |
|
|
adb9e8e…
|
drh
|
461 |
// Normalize the gzip stream on stdin, writing the result to stdout. |
|
adb9e8e…
|
drh
|
462 |
int main(void) { |
|
adb9e8e…
|
drh
|
463 |
// Avoid end-of-line conversions on evil operating systems. |
|
adb9e8e…
|
drh
|
464 |
SET_BINARY_MODE(stdin); |
|
adb9e8e…
|
drh
|
465 |
SET_BINARY_MODE(stdout); |
|
adb9e8e…
|
drh
|
466 |
|
|
adb9e8e…
|
drh
|
467 |
// Normalize from stdin to stdout, returning 1 on error, 0 if ok. |
|
adb9e8e…
|
drh
|
468 |
char *err; |
|
adb9e8e…
|
drh
|
469 |
int ret = gzip_normalize(stdin, stdout, &err); |
|
adb9e8e…
|
drh
|
470 |
if (ret) |
|
adb9e8e…
|
drh
|
471 |
fprintf(stderr, "gznorm error: %s\n", err); |
|
adb9e8e…
|
drh
|
472 |
free(err); |
|
adb9e8e…
|
drh
|
473 |
return ret; |
|
adb9e8e…
|
drh
|
474 |
} |