Fossil SCM

fossil-scm / compat / zlib / examples / gzappend.c
Blame History Raw 505 lines
1
/* gzappend -- command to append to a gzip file
2
3
Copyright (C) 2003, 2012 Mark Adler, all rights reserved
4
version 1.2, 11 Oct 2012
5
6
This software is provided 'as-is', without any express or implied
7
warranty. In no event will the author be held liable for any damages
8
arising from the use of this software.
9
10
Permission is granted to anyone to use this software for any purpose,
11
including commercial applications, and to alter it and redistribute it
12
freely, subject to the following restrictions:
13
14
1. The origin of this software must not be misrepresented; you must not
15
claim that you wrote the original software. If you use this software
16
in a product, an acknowledgment in the product documentation would be
17
appreciated but is not required.
18
2. Altered source versions must be plainly marked as such, and must not be
19
misrepresented as being the original software.
20
3. This notice may not be removed or altered from any source distribution.
21
22
Mark Adler [email protected]
23
*/
24
25
/*
26
* Change history:
27
*
28
* 1.0 19 Oct 2003 - First version
29
* 1.1 4 Nov 2003 - Expand and clarify some comments and notes
30
* - Add version and copyright to help
31
* - Send help to stdout instead of stderr
32
* - Add some preemptive typecasts
33
* - Add L to constants in lseek() calls
34
* - Remove some debugging information in error messages
35
* - Use new data_type definition for zlib 1.2.1
36
* - Simplify and unify file operations
37
* - Finish off gzip file in gztack()
38
* - Use deflatePrime() instead of adding empty blocks
39
* - Keep gzip file clean on appended file read errors
40
* - Use in-place rotate instead of auxiliary buffer
41
* (Why you ask? Because it was fun to write!)
42
* 1.2 11 Oct 2012 - Fix for proper z_const usage
43
* - Check for input buffer malloc failure
44
*/
45
46
/*
47
gzappend takes a gzip file and appends to it, compressing files from the
48
command line or data from stdin. The gzip file is written to directly, to
49
avoid copying that file, in case it's large. Note that this results in the
50
unfriendly behavior that if gzappend fails, the gzip file is corrupted.
51
52
This program was written to illustrate the use of the new Z_BLOCK option of
53
zlib 1.2.x's inflate() function. This option returns from inflate() at each
54
block boundary to facilitate locating and modifying the last block bit at
55
the start of the final deflate block. Also whether using Z_BLOCK or not,
56
another required feature of zlib 1.2.x is that inflate() now provides the
57
number of unused bits in the last input byte used. gzappend will not work
58
with versions of zlib earlier than 1.2.1.
59
60
gzappend first decompresses the gzip file internally, discarding all but
61
the last 32K of uncompressed data, and noting the location of the last block
62
bit and the number of unused bits in the last byte of the compressed data.
63
The gzip trailer containing the CRC-32 and length of the uncompressed data
64
is verified. This trailer will be later overwritten.
65
66
Then the last block bit is cleared by seeking back in the file and rewriting
67
the byte that contains it. Seeking forward, the last byte of the compressed
68
data is saved along with the number of unused bits to initialize deflate.
69
70
A deflate process is initialized, using the last 32K of the uncompressed
71
data from the gzip file to initialize the dictionary. If the total
72
uncompressed data was less than 32K, then all of it is used to initialize
73
the dictionary. The deflate output bit buffer is also initialized with the
74
last bits from the original deflate stream. From here on, the data to
75
append is simply compressed using deflate, and written to the gzip file.
76
When that is complete, the new CRC-32 and uncompressed length are written
77
as the trailer of the gzip file.
78
*/
79
80
#include <stdio.h>
81
#include <stdlib.h>
82
#include <string.h>
83
#include <fcntl.h>
84
#include <unistd.h>
85
#include "zlib.h"
86
87
#define local static
88
#define LGCHUNK 14
89
#define CHUNK (1U << LGCHUNK)
90
#define DSIZE 32768U
91
92
/* print an error message and terminate with extreme prejudice */
93
local void bye(char *msg1, char *msg2)
94
{
95
fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
96
exit(1);
97
}
98
99
/* return the greatest common divisor of a and b using Euclid's algorithm,
100
modified to be fast when one argument much greater than the other, and
101
coded to avoid unnecessary swapping */
102
local unsigned gcd(unsigned a, unsigned b)
103
{
104
unsigned c;
105
106
while (a && b)
107
if (a > b) {
108
c = b;
109
while (a - c >= c)
110
c <<= 1;
111
a -= c;
112
}
113
else {
114
c = a;
115
while (b - c >= c)
116
c <<= 1;
117
b -= c;
118
}
119
return a + b;
120
}
121
122
/* rotate list[0..len-1] left by rot positions, in place */
123
local void rotate(unsigned char *list, unsigned len, unsigned rot)
124
{
125
unsigned char tmp;
126
unsigned cycles;
127
unsigned char *start, *last, *to, *from;
128
129
/* normalize rot and handle degenerate cases */
130
if (len < 2) return;
131
if (rot >= len) rot %= len;
132
if (rot == 0) return;
133
134
/* pointer to last entry in list */
135
last = list + (len - 1);
136
137
/* do simple left shift by one */
138
if (rot == 1) {
139
tmp = *list;
140
memmove(list, list + 1, len - 1);
141
*last = tmp;
142
return;
143
}
144
145
/* do simple right shift by one */
146
if (rot == len - 1) {
147
tmp = *last;
148
memmove(list + 1, list, len - 1);
149
*list = tmp;
150
return;
151
}
152
153
/* otherwise do rotate as a set of cycles in place */
154
cycles = gcd(len, rot); /* number of cycles */
155
do {
156
start = from = list + cycles; /* start index is arbitrary */
157
tmp = *from; /* save entry to be overwritten */
158
for (;;) {
159
to = from; /* next step in cycle */
160
from += rot; /* go right rot positions */
161
if (from > last) from -= len; /* (pointer better not wrap) */
162
if (from == start) break; /* all but one shifted */
163
*to = *from; /* shift left */
164
}
165
*to = tmp; /* complete the circle */
166
} while (--cycles);
167
}
168
169
/* structure for gzip file read operations */
170
typedef struct {
171
int fd; /* file descriptor */
172
int size; /* 1 << size is bytes in buf */
173
unsigned left; /* bytes available at next */
174
unsigned char *buf; /* buffer */
175
z_const unsigned char *next; /* next byte in buffer */
176
char *name; /* file name for error messages */
177
} file;
178
179
/* reload buffer */
180
local int readin(file *in)
181
{
182
int len;
183
184
len = read(in->fd, in->buf, 1 << in->size);
185
if (len == -1) bye("error reading ", in->name);
186
in->left = (unsigned)len;
187
in->next = in->buf;
188
return len;
189
}
190
191
/* read from file in, exit if end-of-file */
192
local int readmore(file *in)
193
{
194
if (readin(in) == 0) bye("unexpected end of ", in->name);
195
return 0;
196
}
197
198
#define read1(in) (in->left == 0 ? readmore(in) : 0, \
199
in->left--, *(in->next)++)
200
201
/* skip over n bytes of in */
202
local void skip(file *in, unsigned n)
203
{
204
unsigned bypass;
205
206
if (n > in->left) {
207
n -= in->left;
208
bypass = n & ~((1U << in->size) - 1);
209
if (bypass) {
210
if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
211
bye("seeking ", in->name);
212
n -= bypass;
213
}
214
readmore(in);
215
if (n > in->left)
216
bye("unexpected end of ", in->name);
217
}
218
in->left -= n;
219
in->next += n;
220
}
221
222
/* read a four-byte unsigned integer, little-endian, from in */
223
unsigned long read4(file *in)
224
{
225
unsigned long val;
226
227
val = read1(in);
228
val += (unsigned)read1(in) << 8;
229
val += (unsigned long)read1(in) << 16;
230
val += (unsigned long)read1(in) << 24;
231
return val;
232
}
233
234
/* skip over gzip header */
235
local void gzheader(file *in)
236
{
237
int flags;
238
unsigned n;
239
240
if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
241
if (read1(in) != 8) bye("unknown compression method in", in->name);
242
flags = read1(in);
243
if (flags & 0xe0) bye("unknown header flags set in", in->name);
244
skip(in, 6);
245
if (flags & 4) {
246
n = read1(in);
247
n += (unsigned)(read1(in)) << 8;
248
skip(in, n);
249
}
250
if (flags & 8) while (read1(in) != 0) ;
251
if (flags & 16) while (read1(in) != 0) ;
252
if (flags & 2) skip(in, 2);
253
}
254
255
/* decompress gzip file "name", return strm with a deflate stream ready to
256
continue compression of the data in the gzip file, and return a file
257
descriptor pointing to where to write the compressed data -- the deflate
258
stream is initialized to compress using level "level" */
259
local int gzscan(char *name, z_stream *strm, int level)
260
{
261
int ret, lastbit, left, full;
262
unsigned have;
263
unsigned long crc, tot;
264
unsigned char *window;
265
off_t lastoff, end;
266
file gz;
267
268
/* open gzip file */
269
gz.name = name;
270
gz.fd = open(name, O_RDWR, 0);
271
if (gz.fd == -1) bye("cannot open ", name);
272
gz.buf = malloc(CHUNK);
273
if (gz.buf == NULL) bye("out of memory", "");
274
gz.size = LGCHUNK;
275
gz.left = 0;
276
277
/* skip gzip header */
278
gzheader(&gz);
279
280
/* prepare to decompress */
281
window = malloc(DSIZE);
282
if (window == NULL) bye("out of memory", "");
283
strm->zalloc = Z_NULL;
284
strm->zfree = Z_NULL;
285
strm->opaque = Z_NULL;
286
ret = inflateInit2(strm, -15);
287
if (ret != Z_OK) bye("out of memory", " or library mismatch");
288
289
/* decompress the deflate stream, saving append information */
290
lastbit = 0;
291
lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
292
left = 0;
293
strm->avail_in = gz.left;
294
strm->next_in = gz.next;
295
crc = crc32(0L, Z_NULL, 0);
296
have = full = 0;
297
do {
298
/* if needed, get more input */
299
if (strm->avail_in == 0) {
300
readmore(&gz);
301
strm->avail_in = gz.left;
302
strm->next_in = gz.next;
303
}
304
305
/* set up output to next available section of sliding window */
306
strm->avail_out = DSIZE - have;
307
strm->next_out = window + have;
308
309
/* inflate and check for errors */
310
ret = inflate(strm, Z_BLOCK);
311
if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
312
if (ret == Z_MEM_ERROR) bye("out of memory", "");
313
if (ret == Z_DATA_ERROR)
314
bye("invalid compressed data--format violated in", name);
315
316
/* update crc and sliding window pointer */
317
crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
318
if (strm->avail_out)
319
have = DSIZE - strm->avail_out;
320
else {
321
have = 0;
322
full = 1;
323
}
324
325
/* process end of block */
326
if (strm->data_type & 128) {
327
if (strm->data_type & 64)
328
left = strm->data_type & 0x1f;
329
else {
330
lastbit = strm->data_type & 0x1f;
331
lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
332
}
333
}
334
} while (ret != Z_STREAM_END);
335
inflateEnd(strm);
336
gz.left = strm->avail_in;
337
gz.next = strm->next_in;
338
339
/* save the location of the end of the compressed data */
340
end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
341
342
/* check gzip trailer and save total for deflate */
343
if (crc != read4(&gz))
344
bye("invalid compressed data--crc mismatch in ", name);
345
tot = strm->total_out;
346
if ((tot & 0xffffffffUL) != read4(&gz))
347
bye("invalid compressed data--length mismatch in", name);
348
349
/* if not at end of file, warn */
350
if (gz.left || readin(&gz))
351
fprintf(stderr,
352
"gzappend warning: junk at end of gzip file overwritten\n");
353
354
/* clear last block bit */
355
lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
356
if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
357
*gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
358
lseek(gz.fd, -1L, SEEK_CUR);
359
if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
360
361
/* if window wrapped, build dictionary from window by rotating */
362
if (full) {
363
rotate(window, DSIZE, have);
364
have = DSIZE;
365
}
366
367
/* set up deflate stream with window, crc, total_in, and leftover bits */
368
ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
369
if (ret != Z_OK) bye("out of memory", "");
370
deflateSetDictionary(strm, window, have);
371
strm->adler = crc;
372
strm->total_in = tot;
373
if (left) {
374
lseek(gz.fd, --end, SEEK_SET);
375
if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
376
deflatePrime(strm, 8 - left, *gz.buf);
377
}
378
lseek(gz.fd, end, SEEK_SET);
379
380
/* clean up and return */
381
free(window);
382
free(gz.buf);
383
return gz.fd;
384
}
385
386
/* append file "name" to gzip file gd using deflate stream strm -- if last
387
is true, then finish off the deflate stream at the end */
388
local void gztack(char *name, int gd, z_stream *strm, int last)
389
{
390
int fd, len, ret;
391
unsigned left;
392
unsigned char *in, *out;
393
394
/* open file to compress and append */
395
fd = 0;
396
if (name != NULL) {
397
fd = open(name, O_RDONLY, 0);
398
if (fd == -1)
399
fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
400
name);
401
}
402
403
/* allocate buffers */
404
in = malloc(CHUNK);
405
out = malloc(CHUNK);
406
if (in == NULL || out == NULL) bye("out of memory", "");
407
408
/* compress input file and append to gzip file */
409
do {
410
/* get more input */
411
len = read(fd, in, CHUNK);
412
if (len == -1) {
413
fprintf(stderr,
414
"gzappend warning: error reading %s, skipping rest ...\n",
415
name);
416
len = 0;
417
}
418
strm->avail_in = (unsigned)len;
419
strm->next_in = in;
420
if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
421
422
/* compress and write all available output */
423
do {
424
strm->avail_out = CHUNK;
425
strm->next_out = out;
426
ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
427
left = CHUNK - strm->avail_out;
428
while (left) {
429
len = write(gd, out + CHUNK - strm->avail_out - left, left);
430
if (len == -1) bye("writing gzip file", "");
431
left -= (unsigned)len;
432
}
433
} while (strm->avail_out == 0 && ret != Z_STREAM_END);
434
} while (len != 0);
435
436
/* write trailer after last entry */
437
if (last) {
438
deflateEnd(strm);
439
out[0] = (unsigned char)(strm->adler);
440
out[1] = (unsigned char)(strm->adler >> 8);
441
out[2] = (unsigned char)(strm->adler >> 16);
442
out[3] = (unsigned char)(strm->adler >> 24);
443
out[4] = (unsigned char)(strm->total_in);
444
out[5] = (unsigned char)(strm->total_in >> 8);
445
out[6] = (unsigned char)(strm->total_in >> 16);
446
out[7] = (unsigned char)(strm->total_in >> 24);
447
len = 8;
448
do {
449
ret = write(gd, out + 8 - len, len);
450
if (ret == -1) bye("writing gzip file", "");
451
len -= ret;
452
} while (len);
453
close(gd);
454
}
455
456
/* clean up and return */
457
free(out);
458
free(in);
459
if (fd > 0) close(fd);
460
}
461
462
/* process the compression level option if present, scan the gzip file, and
463
append the specified files, or append the data from stdin if no other file
464
names are provided on the command line -- the gzip file must be writable
465
and seekable */
466
int main(int argc, char **argv)
467
{
468
int gd, level;
469
z_stream strm;
470
471
/* ignore command name */
472
argc--; argv++;
473
474
/* provide usage if no arguments */
475
if (*argv == NULL) {
476
printf(
477
"gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n"
478
);
479
printf(
480
"usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
481
return 0;
482
}
483
484
/* set compression level */
485
level = Z_DEFAULT_COMPRESSION;
486
if (argv[0][0] == '-') {
487
if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
488
bye("invalid compression level", "");
489
level = argv[0][1] - '0';
490
if (*++argv == NULL) bye("no gzip file name after options", "");
491
}
492
493
/* prepare to append to gzip file */
494
gd = gzscan(*argv++, &strm, level);
495
496
/* append files on command line, or from stdin if none */
497
if (*argv == NULL)
498
gztack(NULL, gd, &strm, 1);
499
else
500
do {
501
gztack(*argv, gd, &strm, argv[1] == NULL);
502
} while (*++argv != NULL);
503
return 0;
504
}
505

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button