Fossil SCM

fossil-scm / compat / zlib / examples / gzjoin.c
Blame History Raw 450 lines
1
/* gzjoin -- command to join gzip files into one gzip file
2
3
Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
4
version 1.2, 14 Aug 2012
5
6
This software is provided 'as-is', without any express or implied
7
warranty. In no event will the author be held liable for any damages
8
arising from the use of this software.
9
10
Permission is granted to anyone to use this software for any purpose,
11
including commercial applications, and to alter it and redistribute it
12
freely, subject to the following restrictions:
13
14
1. The origin of this software must not be misrepresented; you must not
15
claim that you wrote the original software. If you use this software
16
in a product, an acknowledgment in the product documentation would be
17
appreciated but is not required.
18
2. Altered source versions must be plainly marked as such, and must not be
19
misrepresented as being the original software.
20
3. This notice may not be removed or altered from any source distribution.
21
22
Mark Adler [email protected]
23
*/
24
25
/*
26
* Change history:
27
*
28
* 1.0 11 Dec 2004 - First version
29
* 1.1 12 Jun 2005 - Changed ssize_t to long for portability
30
* 1.2 14 Aug 2012 - Clean up for z_const usage
31
*/
32
33
/*
34
gzjoin takes one or more gzip files on the command line and writes out a
35
single gzip file that will uncompress to the concatenation of the
36
uncompressed data from the individual gzip files. gzjoin does this without
37
having to recompress any of the data and without having to calculate a new
38
crc32 for the concatenated uncompressed data. gzjoin does however have to
39
decompress all of the input data in order to find the bits in the compressed
40
data that need to be modified to concatenate the streams.
41
42
gzjoin does not do an integrity check on the input gzip files other than
43
checking the gzip header and decompressing the compressed data. They are
44
otherwise assumed to be complete and correct.
45
46
Each joint between gzip files removes at least 18 bytes of previous trailer
47
and subsequent header, and inserts an average of about three bytes to the
48
compressed data in order to connect the streams. The output gzip file
49
has a minimal ten-byte gzip header with no file name or modification time.
50
51
This program was written to illustrate the use of the Z_BLOCK option of
52
inflate() and the crc32_combine() function. gzjoin will not compile with
53
versions of zlib earlier than 1.2.3.
54
*/
55
56
#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
57
#include <stdlib.h> /* exit(), malloc(), free() */
58
#include <fcntl.h> /* open() */
59
#include <unistd.h> /* close(), read(), lseek() */
60
#include "zlib.h"
61
/* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
62
63
#define local static
64
65
/* exit with an error (return a value to allow use in an expression) */
66
local int bail(char *why1, char *why2)
67
{
68
fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
69
exit(1);
70
return 0;
71
}
72
73
/* -- simple buffered file input with access to the buffer -- */
74
75
#define CHUNK 32768 /* must be a power of two and fit in unsigned */
76
77
/* bin buffered input file type */
78
typedef struct {
79
char *name; /* name of file for error messages */
80
int fd; /* file descriptor */
81
unsigned left; /* bytes remaining at next */
82
unsigned char *next; /* next byte to read */
83
unsigned char *buf; /* allocated buffer of length CHUNK */
84
} bin;
85
86
/* close a buffered file and free allocated memory */
87
local void bclose(bin *in)
88
{
89
if (in != NULL) {
90
if (in->fd != -1)
91
close(in->fd);
92
if (in->buf != NULL)
93
free(in->buf);
94
free(in);
95
}
96
}
97
98
/* open a buffered file for input, return a pointer to type bin, or NULL on
99
failure */
100
local bin *bopen(char *name)
101
{
102
bin *in;
103
104
in = malloc(sizeof(bin));
105
if (in == NULL)
106
return NULL;
107
in->buf = malloc(CHUNK);
108
in->fd = open(name, O_RDONLY, 0);
109
if (in->buf == NULL || in->fd == -1) {
110
bclose(in);
111
return NULL;
112
}
113
in->left = 0;
114
in->next = in->buf;
115
in->name = name;
116
return in;
117
}
118
119
/* load buffer from file, return -1 on read error, 0 or 1 on success, with
120
1 indicating that end-of-file was reached */
121
local int bload(bin *in)
122
{
123
long len;
124
125
if (in == NULL)
126
return -1;
127
if (in->left != 0)
128
return 0;
129
in->next = in->buf;
130
do {
131
len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
132
if (len < 0)
133
return -1;
134
in->left += (unsigned)len;
135
} while (len != 0 && in->left < CHUNK);
136
return len == 0 ? 1 : 0;
137
}
138
139
/* get a byte from the file, bail if end of file */
140
#define bget(in) (in->left ? 0 : bload(in), \
141
in->left ? (in->left--, *(in->next)++) : \
142
bail("unexpected end of file on ", in->name))
143
144
/* get a four-byte little-endian unsigned integer from file */
145
local unsigned long bget4(bin *in)
146
{
147
unsigned long val;
148
149
val = bget(in);
150
val += (unsigned long)(bget(in)) << 8;
151
val += (unsigned long)(bget(in)) << 16;
152
val += (unsigned long)(bget(in)) << 24;
153
return val;
154
}
155
156
/* skip bytes in file */
157
local void bskip(bin *in, unsigned skip)
158
{
159
/* check pointer */
160
if (in == NULL)
161
return;
162
163
/* easy case -- skip bytes in buffer */
164
if (skip <= in->left) {
165
in->left -= skip;
166
in->next += skip;
167
return;
168
}
169
170
/* skip what's in buffer, discard buffer contents */
171
skip -= in->left;
172
in->left = 0;
173
174
/* seek past multiples of CHUNK bytes */
175
if (skip > CHUNK) {
176
unsigned left;
177
178
left = skip & (CHUNK - 1);
179
if (left == 0) {
180
/* exact number of chunks: seek all the way minus one byte to check
181
for end-of-file with a read */
182
lseek(in->fd, skip - 1, SEEK_CUR);
183
if (read(in->fd, in->buf, 1) != 1)
184
bail("unexpected end of file on ", in->name);
185
return;
186
}
187
188
/* skip the integral chunks, update skip with remainder */
189
lseek(in->fd, skip - left, SEEK_CUR);
190
skip = left;
191
}
192
193
/* read more input and skip remainder */
194
bload(in);
195
if (skip > in->left)
196
bail("unexpected end of file on ", in->name);
197
in->left -= skip;
198
in->next += skip;
199
}
200
201
/* -- end of buffered input functions -- */
202
203
/* skip the gzip header from file in */
204
local void gzhead(bin *in)
205
{
206
int flags;
207
208
/* verify gzip magic header and compression method */
209
if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
210
bail(in->name, " is not a valid gzip file");
211
212
/* get and verify flags */
213
flags = bget(in);
214
if ((flags & 0xe0) != 0)
215
bail("unknown reserved bits set in ", in->name);
216
217
/* skip modification time, extra flags, and os */
218
bskip(in, 6);
219
220
/* skip extra field if present */
221
if (flags & 4) {
222
unsigned len;
223
224
len = bget(in);
225
len += (unsigned)(bget(in)) << 8;
226
bskip(in, len);
227
}
228
229
/* skip file name if present */
230
if (flags & 8)
231
while (bget(in) != 0)
232
;
233
234
/* skip comment if present */
235
if (flags & 16)
236
while (bget(in) != 0)
237
;
238
239
/* skip header crc if present */
240
if (flags & 2)
241
bskip(in, 2);
242
}
243
244
/* write a four-byte little-endian unsigned integer to out */
245
local void put4(unsigned long val, FILE *out)
246
{
247
putc(val & 0xff, out);
248
putc((val >> 8) & 0xff, out);
249
putc((val >> 16) & 0xff, out);
250
putc((val >> 24) & 0xff, out);
251
}
252
253
/* Load up zlib stream from buffered input, bail if end of file */
254
local void zpull(z_streamp strm, bin *in)
255
{
256
if (in->left == 0)
257
bload(in);
258
if (in->left == 0)
259
bail("unexpected end of file on ", in->name);
260
strm->avail_in = in->left;
261
strm->next_in = in->next;
262
}
263
264
/* Write header for gzip file to out and initialize trailer. */
265
local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
266
{
267
fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
268
*crc = crc32(0L, Z_NULL, 0);
269
*tot = 0;
270
}
271
272
/* Copy the compressed data from name, zeroing the last block bit of the last
273
block if clr is true, and adding empty blocks as needed to get to a byte
274
boundary. If clr is false, then the last block becomes the last block of
275
the output, and the gzip trailer is written. crc and tot maintains the
276
crc and length (modulo 2^32) of the output for the trailer. The resulting
277
gzip file is written to out. gzinit() must be called before the first call
278
of gzcopy() to write the gzip header and to initialize crc and tot. */
279
local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
280
FILE *out)
281
{
282
int ret; /* return value from zlib functions */
283
int pos; /* where the "last block" bit is in byte */
284
int last; /* true if processing the last block */
285
bin *in; /* buffered input file */
286
unsigned char *start; /* start of compressed data in buffer */
287
unsigned char *junk; /* buffer for uncompressed data -- discarded */
288
z_off_t len; /* length of uncompressed data (support > 4 GB) */
289
z_stream strm; /* zlib inflate stream */
290
291
/* open gzip file and skip header */
292
in = bopen(name);
293
if (in == NULL)
294
bail("could not open ", name);
295
gzhead(in);
296
297
/* allocate buffer for uncompressed data and initialize raw inflate
298
stream */
299
junk = malloc(CHUNK);
300
strm.zalloc = Z_NULL;
301
strm.zfree = Z_NULL;
302
strm.opaque = Z_NULL;
303
strm.avail_in = 0;
304
strm.next_in = Z_NULL;
305
ret = inflateInit2(&strm, -15);
306
if (junk == NULL || ret != Z_OK)
307
bail("out of memory", "");
308
309
/* inflate and copy compressed data, clear last-block bit if requested */
310
len = 0;
311
zpull(&strm, in);
312
start = in->next;
313
last = start[0] & 1;
314
if (last && clr)
315
start[0] &= ~1;
316
strm.avail_out = 0;
317
for (;;) {
318
/* if input used and output done, write used input and get more */
319
if (strm.avail_in == 0 && strm.avail_out != 0) {
320
fwrite(start, 1, strm.next_in - start, out);
321
start = in->buf;
322
in->left = 0;
323
zpull(&strm, in);
324
}
325
326
/* decompress -- return early when end-of-block reached */
327
strm.avail_out = CHUNK;
328
strm.next_out = junk;
329
ret = inflate(&strm, Z_BLOCK);
330
switch (ret) {
331
case Z_MEM_ERROR:
332
bail("out of memory", "");
333
case Z_DATA_ERROR:
334
bail("invalid compressed data in ", in->name);
335
}
336
337
/* update length of uncompressed data */
338
len += CHUNK - strm.avail_out;
339
340
/* check for block boundary (only get this when block copied out) */
341
if (strm.data_type & 128) {
342
/* if that was the last block, then done */
343
if (last)
344
break;
345
346
/* number of unused bits in last byte */
347
pos = strm.data_type & 7;
348
349
/* find the next last-block bit */
350
if (pos != 0) {
351
/* next last-block bit is in last used byte */
352
pos = 0x100 >> pos;
353
last = strm.next_in[-1] & pos;
354
if (last && clr)
355
in->buf[strm.next_in - in->buf - 1] &= ~pos;
356
}
357
else {
358
/* next last-block bit is in next unused byte */
359
if (strm.avail_in == 0) {
360
/* don't have that byte yet -- get it */
361
fwrite(start, 1, strm.next_in - start, out);
362
start = in->buf;
363
in->left = 0;
364
zpull(&strm, in);
365
}
366
last = strm.next_in[0] & 1;
367
if (last && clr)
368
in->buf[strm.next_in - in->buf] &= ~1;
369
}
370
}
371
}
372
373
/* update buffer with unused input */
374
in->left = strm.avail_in;
375
in->next = in->buf + (strm.next_in - in->buf);
376
377
/* copy used input, write empty blocks to get to byte boundary */
378
pos = strm.data_type & 7;
379
fwrite(start, 1, in->next - start - 1, out);
380
last = in->next[-1];
381
if (pos == 0 || !clr)
382
/* already at byte boundary, or last file: write last byte */
383
putc(last, out);
384
else {
385
/* append empty blocks to last byte */
386
last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
387
if (pos & 1) {
388
/* odd -- append an empty stored block */
389
putc(last, out);
390
if (pos == 1)
391
putc(0, out); /* two more bits in block header */
392
fwrite("\0\0\xff\xff", 1, 4, out);
393
}
394
else {
395
/* even -- append 1, 2, or 3 empty fixed blocks */
396
switch (pos) {
397
case 6:
398
putc(last | 8, out);
399
last = 0;
400
case 4:
401
putc(last | 0x20, out);
402
last = 0;
403
case 2:
404
putc(last | 0x80, out);
405
putc(0, out);
406
}
407
}
408
}
409
410
/* update crc and tot */
411
*crc = crc32_combine(*crc, bget4(in), len);
412
*tot += (unsigned long)len;
413
414
/* clean up */
415
inflateEnd(&strm);
416
free(junk);
417
bclose(in);
418
419
/* write trailer if this is the last gzip file */
420
if (!clr) {
421
put4(*crc, out);
422
put4(*tot, out);
423
}
424
}
425
426
/* join the gzip files on the command line, write result to stdout */
427
int main(int argc, char **argv)
428
{
429
unsigned long crc, tot; /* running crc and total uncompressed length */
430
431
/* skip command name */
432
argc--;
433
argv++;
434
435
/* show usage if no arguments */
436
if (argc == 0) {
437
fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
438
stderr);
439
return 0;
440
}
441
442
/* join gzip files on command line and write to stdout */
443
gzinit(&crc, &tot, stdout);
444
while (argc--)
445
gzcopy(*argv++, argc, &crc, &tot, stdout);
446
447
/* done */
448
return 0;
449
}
450

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button