Fossil SCM

fossil-scm / compat / zlib / gzread.c
Blame History Raw 669 lines
1
/* gzread.c -- zlib functions for reading gzip files
2
* Copyright (C) 2004-2026 Mark Adler
3
* For conditions of distribution and use, see copyright notice in zlib.h
4
*/
5
6
#include "gzguts.h"
7
8
/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
9
state->fd, and update state->eof, state->err, and state->msg as appropriate.
10
This function needs to loop on read(), since read() is not guaranteed to
11
read the number of bytes requested, depending on the type of descriptor. It
12
also needs to loop to manage the fact that read() returns an int. If the
13
descriptor is non-blocking and read() returns with no data in order to avoid
14
blocking, then gz_load() will return 0 if some data has been read, or -1 if
15
no data has been read. Either way, state->again is set true to indicate a
16
non-blocking event. If errno is non-zero on return, then there was an error
17
signaled from read(). *have is set to the number of bytes read. */
18
local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
19
unsigned *have) {
20
int ret;
21
unsigned get, max = ((unsigned)-1 >> 2) + 1;
22
23
state->again = 0;
24
errno = 0;
25
*have = 0;
26
do {
27
get = len - *have;
28
if (get > max)
29
get = max;
30
ret = (int)read(state->fd, buf + *have, get);
31
if (ret <= 0)
32
break;
33
*have += (unsigned)ret;
34
} while (*have < len);
35
if (ret < 0) {
36
if (errno == EAGAIN || errno == EWOULDBLOCK) {
37
state->again = 1;
38
if (*have != 0)
39
return 0;
40
}
41
gz_error(state, Z_ERRNO, zstrerror());
42
return -1;
43
}
44
if (ret == 0)
45
state->eof = 1;
46
return 0;
47
}
48
49
/* Load up input buffer and set eof flag if last data loaded -- return -1 on
50
error, 0 otherwise. Note that the eof flag is set when the end of the input
51
file is reached, even though there may be unused data in the buffer. Once
52
that data has been used, no more attempts will be made to read the file.
53
If strm->avail_in != 0, then the current data is moved to the beginning of
54
the input buffer, and then the remainder of the buffer is loaded with the
55
available data from the input file. */
56
local int gz_avail(gz_statep state) {
57
unsigned got;
58
z_streamp strm = &(state->strm);
59
60
if (state->err != Z_OK && state->err != Z_BUF_ERROR)
61
return -1;
62
if (state->eof == 0) {
63
if (strm->avail_in) { /* copy what's there to the start */
64
unsigned char *p = state->in;
65
unsigned const char *q = strm->next_in;
66
67
if (q != p) {
68
unsigned n = strm->avail_in;
69
70
do {
71
*p++ = *q++;
72
} while (--n);
73
}
74
}
75
if (gz_load(state, state->in + strm->avail_in,
76
state->size - strm->avail_in, &got) == -1)
77
return -1;
78
strm->avail_in += got;
79
strm->next_in = state->in;
80
}
81
return 0;
82
}
83
84
/* Look for gzip header, set up for inflate or copy. state->x.have must be 0.
85
If this is the first time in, allocate required memory. state->how will be
86
left unchanged if there is no more input data available, will be set to COPY
87
if there is no gzip header and direct copying will be performed, or it will
88
be set to GZIP for decompression. If direct copying, then leftover input
89
data from the input buffer will be copied to the output buffer. In that
90
case, all further file reads will be directly to either the output buffer or
91
a user buffer. If decompressing, the inflate state will be initialized.
92
gz_look() will return 0 on success or -1 on failure. */
93
local int gz_look(gz_statep state) {
94
z_streamp strm = &(state->strm);
95
96
/* allocate read buffers and inflate memory */
97
if (state->size == 0) {
98
/* allocate buffers */
99
state->in = (unsigned char *)malloc(state->want);
100
state->out = (unsigned char *)malloc(state->want << 1);
101
if (state->in == NULL || state->out == NULL) {
102
free(state->out);
103
free(state->in);
104
gz_error(state, Z_MEM_ERROR, "out of memory");
105
return -1;
106
}
107
state->size = state->want;
108
109
/* allocate inflate memory */
110
state->strm.zalloc = Z_NULL;
111
state->strm.zfree = Z_NULL;
112
state->strm.opaque = Z_NULL;
113
state->strm.avail_in = 0;
114
state->strm.next_in = Z_NULL;
115
if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
116
free(state->out);
117
free(state->in);
118
state->size = 0;
119
gz_error(state, Z_MEM_ERROR, "out of memory");
120
return -1;
121
}
122
}
123
124
/* if transparent reading is disabled, which would only be at the start, or
125
if we're looking for a gzip member after the first one, which is not at
126
the start, then proceed directly to look for a gzip member next */
127
if (state->direct == -1 || state->junk == 0) {
128
inflateReset(strm);
129
state->how = GZIP;
130
state->junk = state->junk != -1;
131
state->direct = 0;
132
return 0;
133
}
134
135
/* otherwise we're at the start with auto-detect -- we check to see if the
136
first four bytes could be gzip header in order to decide whether or not
137
this will be a transparent read */
138
139
/* load any header bytes into the input buffer -- if the input is empty,
140
then it's not an error as this is a transparent read of zero bytes */
141
if (gz_avail(state) == -1)
142
return -1;
143
if (strm->avail_in == 0 || (state->again && strm->avail_in < 4))
144
/* if non-blocking input stalled before getting four bytes, then
145
return and wait until a later call has accumulated enough */
146
return 0;
147
148
/* see if this is (likely) gzip input -- if the first four bytes are
149
consistent with a gzip header, then go look for the first gzip member,
150
otherwise proceed to copy the input transparently */
151
if (strm->avail_in > 3 &&
152
strm->next_in[0] == 31 && strm->next_in[1] == 139 &&
153
strm->next_in[2] == 8 && strm->next_in[3] < 32) {
154
inflateReset(strm);
155
state->how = GZIP;
156
state->junk = 1;
157
state->direct = 0;
158
return 0;
159
}
160
161
/* doing raw i/o: copy any leftover input to output -- this assumes that
162
the output buffer is larger than the input buffer, which also assures
163
space for gzungetc() */
164
state->x.next = state->out;
165
memcpy(state->x.next, strm->next_in, strm->avail_in);
166
state->x.have = strm->avail_in;
167
strm->avail_in = 0;
168
state->how = COPY;
169
return 0;
170
}
171
172
/* Decompress from input to the provided next_out and avail_out in the state.
173
On return, state->x.have and state->x.next point to the just decompressed
174
data. If the gzip stream completes, state->how is reset to LOOK to look for
175
the next gzip stream or raw data, once state->x.have is depleted. Returns 0
176
on success, -1 on failure. If EOF is reached when looking for more input to
177
complete the gzip member, then an unexpected end of file error is raised.
178
If there is no more input, but state->again is true, then EOF has not been
179
reached, and no error is raised. */
180
local int gz_decomp(gz_statep state) {
181
int ret = Z_OK;
182
unsigned had;
183
z_streamp strm = &(state->strm);
184
185
/* fill output buffer up to end of deflate stream */
186
had = strm->avail_out;
187
do {
188
/* get more input for inflate() */
189
if (strm->avail_in == 0 && gz_avail(state) == -1) {
190
ret = state->err;
191
break;
192
}
193
if (strm->avail_in == 0) {
194
if (!state->again)
195
gz_error(state, Z_BUF_ERROR, "unexpected end of file");
196
break;
197
}
198
199
/* decompress and handle errors */
200
ret = inflate(strm, Z_NO_FLUSH);
201
if (strm->avail_out < had)
202
/* any decompressed data marks this as a real gzip stream */
203
state->junk = 0;
204
if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
205
gz_error(state, Z_STREAM_ERROR,
206
"internal error: inflate stream corrupt");
207
break;
208
}
209
if (ret == Z_MEM_ERROR) {
210
gz_error(state, Z_MEM_ERROR, "out of memory");
211
break;
212
}
213
if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
214
if (state->junk == 1) { /* trailing garbage is ok */
215
strm->avail_in = 0;
216
state->eof = 1;
217
state->how = LOOK;
218
ret = Z_OK;
219
break;
220
}
221
gz_error(state, Z_DATA_ERROR,
222
strm->msg == NULL ? "compressed data error" : strm->msg);
223
break;
224
}
225
} while (strm->avail_out && ret != Z_STREAM_END);
226
227
/* update available output */
228
state->x.have = had - strm->avail_out;
229
state->x.next = strm->next_out - state->x.have;
230
231
/* if the gzip stream completed successfully, look for another */
232
if (ret == Z_STREAM_END) {
233
state->junk = 0;
234
state->how = LOOK;
235
return 0;
236
}
237
238
/* return decompression status */
239
return ret != Z_OK ? -1 : 0;
240
}
241
242
/* Fetch data and put it in the output buffer. Assumes state->x.have is 0.
243
Data is either copied from the input file or decompressed from the input
244
file depending on state->how. If state->how is LOOK, then a gzip header is
245
looked for to determine whether to copy or decompress. Returns -1 on error,
246
otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
247
end of the input file has been reached and all data has been processed. */
248
local int gz_fetch(gz_statep state) {
249
z_streamp strm = &(state->strm);
250
251
do {
252
switch(state->how) {
253
case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
254
if (gz_look(state) == -1)
255
return -1;
256
if (state->how == LOOK)
257
return 0;
258
break;
259
case COPY: /* -> COPY */
260
if (gz_load(state, state->out, state->size << 1, &(state->x.have))
261
== -1)
262
return -1;
263
state->x.next = state->out;
264
return 0;
265
case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
266
strm->avail_out = state->size << 1;
267
strm->next_out = state->out;
268
if (gz_decomp(state) == -1)
269
return -1;
270
break;
271
default:
272
gz_error(state, Z_STREAM_ERROR, "state corrupt");
273
return -1;
274
}
275
} while (state->x.have == 0 && (!state->eof || strm->avail_in));
276
return 0;
277
}
278
279
/* Skip state->skip (> 0) uncompressed bytes of output. Return -1 on error, 0
280
on success. */
281
local int gz_skip(gz_statep state) {
282
unsigned n;
283
284
/* skip over len bytes or reach end-of-file, whichever comes first */
285
do {
286
/* skip over whatever is in output buffer */
287
if (state->x.have) {
288
n = GT_OFF(state->x.have) ||
289
(z_off64_t)state->x.have > state->skip ?
290
(unsigned)state->skip : state->x.have;
291
state->x.have -= n;
292
state->x.next += n;
293
state->x.pos += n;
294
state->skip -= n;
295
}
296
297
/* output buffer empty -- return if we're at the end of the input */
298
else if (state->eof && state->strm.avail_in == 0)
299
break;
300
301
/* need more data to skip -- load up output buffer */
302
else {
303
/* get more output, looking for header if required */
304
if (gz_fetch(state) == -1)
305
return -1;
306
}
307
} while (state->skip);
308
return 0;
309
}
310
311
/* Read len bytes into buf from file, or less than len up to the end of the
312
input. Return the number of bytes read. If zero is returned, either the end
313
of file was reached, or there was an error. state->err must be consulted in
314
that case to determine which. If there was an error, but some uncompressed
315
bytes were read before the error, then that count is returned. The error is
316
still recorded, and so is deferred until the next call. */
317
local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
318
z_size_t got;
319
unsigned n;
320
int err;
321
322
/* if len is zero, avoid unnecessary operations */
323
if (len == 0)
324
return 0;
325
326
/* process a skip request */
327
if (state->skip && gz_skip(state) == -1)
328
return 0;
329
330
/* get len bytes to buf, or less than len if at the end */
331
got = 0;
332
err = 0;
333
do {
334
/* set n to the maximum amount of len that fits in an unsigned int */
335
n = (unsigned)-1;
336
if (n > len)
337
n = (unsigned)len;
338
339
/* first just try copying data from the output buffer */
340
if (state->x.have) {
341
if (state->x.have < n)
342
n = state->x.have;
343
memcpy(buf, state->x.next, n);
344
state->x.next += n;
345
state->x.have -= n;
346
if (state->err != Z_OK)
347
/* caught deferred error from gz_fetch() */
348
err = -1;
349
}
350
351
/* output buffer empty -- return if we're at the end of the input */
352
else if (state->eof && state->strm.avail_in == 0)
353
break;
354
355
/* need output data -- for small len or new stream load up our output
356
buffer, so that gzgetc() can be fast */
357
else if (state->how == LOOK || n < (state->size << 1)) {
358
/* get more output, looking for header if required */
359
if (gz_fetch(state) == -1 && state->x.have == 0)
360
/* if state->x.have != 0, error will be caught after copy */
361
err = -1;
362
continue; /* no progress yet -- go back to copy above */
363
/* the copy above assures that we will leave with space in the
364
output buffer, allowing at least one gzungetc() to succeed */
365
}
366
367
/* large len -- read directly into user buffer */
368
else if (state->how == COPY) /* read directly */
369
err = gz_load(state, (unsigned char *)buf, n, &n);
370
371
/* large len -- decompress directly into user buffer */
372
else { /* state->how == GZIP */
373
state->strm.avail_out = n;
374
state->strm.next_out = (unsigned char *)buf;
375
err = gz_decomp(state);
376
n = state->x.have;
377
state->x.have = 0;
378
}
379
380
/* update progress */
381
len -= n;
382
buf = (char *)buf + n;
383
got += n;
384
state->x.pos += n;
385
} while (len && !err);
386
387
/* note read past eof */
388
if (len && state->eof)
389
state->past = 1;
390
391
/* return number of bytes read into user buffer */
392
return got;
393
}
394
395
/* -- see zlib.h -- */
396
int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
397
gz_statep state;
398
399
/* get internal structure and check that it's for reading */
400
if (file == NULL)
401
return -1;
402
state = (gz_statep)file;
403
if (state->mode != GZ_READ)
404
return -1;
405
406
/* check that there was no (serious) error */
407
if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
408
return -1;
409
gz_error(state, Z_OK, NULL);
410
411
/* since an int is returned, make sure len fits in one, otherwise return
412
with an error (this avoids a flaw in the interface) */
413
if ((int)len < 0) {
414
gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
415
return -1;
416
}
417
418
/* read len or fewer bytes to buf */
419
len = (unsigned)gz_read(state, buf, len);
420
421
/* check for an error */
422
if (len == 0) {
423
if (state->err != Z_OK && state->err != Z_BUF_ERROR)
424
return -1;
425
if (state->again) {
426
/* non-blocking input stalled after some input was read, but no
427
uncompressed bytes were produced -- let the application know
428
this isn't EOF */
429
gz_error(state, Z_ERRNO, zstrerror());
430
return -1;
431
}
432
}
433
434
/* return the number of bytes read */
435
return (int)len;
436
}
437
438
/* -- see zlib.h -- */
439
z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
440
gzFile file) {
441
z_size_t len;
442
gz_statep state;
443
444
/* get internal structure and check that it's for reading */
445
if (file == NULL)
446
return 0;
447
state = (gz_statep)file;
448
if (state->mode != GZ_READ)
449
return 0;
450
451
/* check that there was no (serious) error */
452
if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
453
return 0;
454
gz_error(state, Z_OK, NULL);
455
456
/* compute bytes to read -- error on overflow */
457
len = nitems * size;
458
if (size && len / size != nitems) {
459
gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
460
return 0;
461
}
462
463
/* read len or fewer bytes to buf, return the number of full items read */
464
return len ? gz_read(state, buf, len) / size : 0;
465
}
466
467
/* -- see zlib.h -- */
468
#ifdef Z_PREFIX_SET
469
# undef z_gzgetc
470
#else
471
# undef gzgetc
472
#endif
473
int ZEXPORT gzgetc(gzFile file) {
474
unsigned char buf[1];
475
gz_statep state;
476
477
/* get internal structure and check that it's for reading */
478
if (file == NULL)
479
return -1;
480
state = (gz_statep)file;
481
if (state->mode != GZ_READ)
482
return -1;
483
484
/* check that there was no (serious) error */
485
if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
486
return -1;
487
gz_error(state, Z_OK, NULL);
488
489
/* try output buffer (no need to check for skip request) */
490
if (state->x.have) {
491
state->x.have--;
492
state->x.pos++;
493
return *(state->x.next)++;
494
}
495
496
/* nothing there -- try gz_read() */
497
return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
498
}
499
500
int ZEXPORT gzgetc_(gzFile file) {
501
return gzgetc(file);
502
}
503
504
/* -- see zlib.h -- */
505
int ZEXPORT gzungetc(int c, gzFile file) {
506
gz_statep state;
507
508
/* get internal structure and check that it's for reading */
509
if (file == NULL)
510
return -1;
511
state = (gz_statep)file;
512
if (state->mode != GZ_READ)
513
return -1;
514
515
/* in case this was just opened, set up the input buffer */
516
if (state->how == LOOK && state->x.have == 0)
517
(void)gz_look(state);
518
519
/* check that there was no (serious) error */
520
if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
521
return -1;
522
gz_error(state, Z_OK, NULL);
523
524
/* process a skip request */
525
if (state->skip && gz_skip(state) == -1)
526
return -1;
527
528
/* can't push EOF */
529
if (c < 0)
530
return -1;
531
532
/* if output buffer empty, put byte at end (allows more pushing) */
533
if (state->x.have == 0) {
534
state->x.have = 1;
535
state->x.next = state->out + (state->size << 1) - 1;
536
state->x.next[0] = (unsigned char)c;
537
state->x.pos--;
538
state->past = 0;
539
return c;
540
}
541
542
/* if no room, give up (must have already done a gzungetc()) */
543
if (state->x.have == (state->size << 1)) {
544
gz_error(state, Z_DATA_ERROR, "out of room to push characters");
545
return -1;
546
}
547
548
/* slide output data if needed and insert byte before existing data */
549
if (state->x.next == state->out) {
550
unsigned char *src = state->out + state->x.have;
551
unsigned char *dest = state->out + (state->size << 1);
552
553
while (src > state->out)
554
*--dest = *--src;
555
state->x.next = dest;
556
}
557
state->x.have++;
558
state->x.next--;
559
state->x.next[0] = (unsigned char)c;
560
state->x.pos--;
561
state->past = 0;
562
return c;
563
}
564
565
/* -- see zlib.h -- */
566
char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
567
unsigned left, n;
568
char *str;
569
unsigned char *eol;
570
gz_statep state;
571
572
/* check parameters, get internal structure, and check that it's for
573
reading */
574
if (file == NULL || buf == NULL || len < 1)
575
return NULL;
576
state = (gz_statep)file;
577
if (state->mode != GZ_READ)
578
return NULL;
579
580
/* check that there was no (serious) error */
581
if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
582
return NULL;
583
gz_error(state, Z_OK, NULL);
584
585
/* process a skip request */
586
if (state->skip && gz_skip(state) == -1)
587
return NULL;
588
589
/* copy output up to a new line, len-1 bytes, or there is no more output,
590
whichever comes first */
591
str = buf;
592
left = (unsigned)len - 1;
593
if (left) do {
594
/* assure that something is in the output buffer */
595
if (state->x.have == 0 && gz_fetch(state) == -1)
596
break; /* error */
597
if (state->x.have == 0) { /* end of file */
598
state->past = 1; /* read past end */
599
break; /* return what we have */
600
}
601
602
/* look for end-of-line in current output buffer */
603
n = state->x.have > left ? left : state->x.have;
604
eol = (unsigned char *)memchr(state->x.next, '\n', n);
605
if (eol != NULL)
606
n = (unsigned)(eol - state->x.next) + 1;
607
608
/* copy through end-of-line, or remainder if not found */
609
memcpy(buf, state->x.next, n);
610
state->x.have -= n;
611
state->x.next += n;
612
state->x.pos += n;
613
left -= n;
614
buf += n;
615
} while (left && eol == NULL);
616
617
/* append a terminating zero to the string (we don't check for a zero in
618
the contents, let the user worry about that) -- return the terminated
619
string, or if nothing was read, NULL */
620
if (buf == str)
621
return NULL;
622
buf[0] = 0;
623
return str;
624
}
625
626
/* -- see zlib.h -- */
627
int ZEXPORT gzdirect(gzFile file) {
628
gz_statep state;
629
630
/* get internal structure */
631
if (file == NULL)
632
return 0;
633
state = (gz_statep)file;
634
635
/* if the state is not known, but we can find out, then do so (this is
636
mainly for right after a gzopen() or gzdopen()) */
637
if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
638
(void)gz_look(state);
639
640
/* return 1 if transparent, 0 if processing a gzip stream */
641
return state->direct == 1;
642
}
643
644
/* -- see zlib.h -- */
645
int ZEXPORT gzclose_r(gzFile file) {
646
int ret, err;
647
gz_statep state;
648
649
/* get internal structure and check that it's for reading */
650
if (file == NULL)
651
return Z_STREAM_ERROR;
652
state = (gz_statep)file;
653
if (state->mode != GZ_READ)
654
return Z_STREAM_ERROR;
655
656
/* free memory and close file */
657
if (state->size) {
658
inflateEnd(&(state->strm));
659
free(state->out);
660
free(state->in);
661
}
662
err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
663
gz_error(state, Z_OK, NULL);
664
free(state->path);
665
ret = close(state->fd);
666
free(state);
667
return ret ? Z_ERRNO : err;
668
}
669

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button