Fossil SCM

Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8.

ge 2011-07-24 00:36 UTC trunk
Commit 2ef37b3b2a0c2dd48f4f902644fdb85175eae86a
1 file changed +322 -29
+322 -29
--- src/tar.c
+++ src/tar.c
@@ -27,31 +27,296 @@
2727
*/
2828
static struct tarball_t {
2929
unsigned char *aHdr; /* Space for building headers */
3030
char *zSpaces; /* Spaces for padding */
3131
char *zPrevDir; /* Name of directory for previous entry */
32
+ int nPrevDirAlloc; /* size of zPrevDir */
33
+ char *pScratch; /* scratch buffer used to build PAX data */
34
+ int nScratchUsed; /* part of buffer containing data */
35
+ int nScratchAlloc; /* size of buffer */
3236
} tball;
37
+
38
+
39
+/*
40
+** field lengths of 'ustar' name and prefix fields.
41
+*/
42
+#define USTAR_NAME_LEN 100
43
+#define USTAR_PREFIX_LEN 155
44
+
3345
3446
/*
3547
** Begin the process of generating a tarball.
3648
**
3749
** Initialize the GZIP compressor and the table of directory names.
3850
*/
3951
static void tar_begin(void){
4052
assert( tball.aHdr==0 );
41
- tball.aHdr = fossil_malloc(512+512+256);
42
- memset(tball.aHdr, 0, 512+512+256);
53
+ tball.aHdr = fossil_malloc(512+512);
54
+ memset(tball.aHdr, 0, 512+512);
4355
tball.zSpaces = (char*)&tball.aHdr[512];
44
- tball.zPrevDir = (char*)&tball.zSpaces[512];
56
+ /* zPrevDir init */
57
+ tball.zPrevDir = NULL;
58
+ tball.nPrevDirAlloc = 0;
59
+ /* scratch buffer init */
60
+ tball.pScratch = NULL;
61
+ tball.nScratchUsed = 0;
62
+ tball.nScratchAlloc = 0;
63
+
4564
memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
4665
memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47
- memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */
66
+ memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
67
+ memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
68
+ memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
4869
gzip_begin();
4970
db_multi_exec(
5071
"CREATE TEMP TABLE dir(name UNIQUE);"
5172
);
5273
}
74
+
75
+
76
+/*
77
+** print to the scratch buffer
78
+**
79
+** used to build the Pax Interchange Format data, and create
80
+** pseudo-file names for the header data.
81
+**
82
+** The buffer is grown automatically to accommodate the data.
83
+*/
84
+static int scratch_printf(
85
+ const char *fmt,
86
+ ...
87
+){
88
+ for(;;){
89
+ int newSize, minSpace, n;
90
+ /* calculate space in buffer */
91
+ int space = tball.nScratchAlloc - tball.nScratchUsed;
92
+ /* format the string */
93
+ va_list vl;
94
+ va_start(vl, fmt);
95
+ n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl);
96
+ assert(n >= 0);
97
+ va_end(vl);
98
+ /* if it fit we're done */
99
+ if(n < space)
100
+ return n;
101
+ /* buffer too short: calculate reasonable new size */
102
+ minSpace = tball.nScratchUsed+n+1;
103
+ newSize = 2 * tball.nScratchAlloc;
104
+ if(newSize < minSpace)
105
+ newSize = minSpace;
106
+ /* grow the buffer */
107
+ tball.pScratch = fossil_realloc(tball.pScratch, newSize);
108
+ tball.nScratchAlloc = newSize;
109
+ /* loop to try again */
110
+ }
111
+}
112
+
113
+
114
+/*
115
+** verify that lla characters in 'zName' are in the
116
+** ISO646 (=ASCII) character set.
117
+*/
118
+static int is_iso646_name(
119
+ const char *zName, /* file path */
120
+ int nName /* path length */
121
+){
122
+ int i;
123
+ for(i = 0; i < nName; i++){
124
+ unsigned char c = (unsigned char)zName[i];
125
+ if(c > 0x7e)
126
+ return 0;
127
+ }
128
+ return 1;
129
+}
130
+
131
+
132
+/*
133
+** copy string pSrc into pDst, truncating or padding with 0 if necessary
134
+*/
135
+static void padded_copy(
136
+ char *pDest,
137
+ int nDest,
138
+ const char *pSrc,
139
+ int nSrc
140
+){
141
+ if(nSrc >= nDest){
142
+ memcpy(pDest, pSrc, nDest);
143
+ }else{
144
+ memcpy(pDest, pSrc, nSrc);
145
+ memset(&pDest[nSrc], 0, nDest - nSrc);
146
+ }
147
+}
148
+
149
+
150
+
151
+/******************************************************************************
152
+**
153
+** The 'tar' format has evolved over time. Initially the name was stored
154
+** in a 100 byte null-terminated field 'name'. File path names were
155
+** limited to 99 bytes.
156
+**
157
+** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
158
+** for up to 255 characters to be stored. The full file path is formed by
159
+** concatenating the field 'prefix', a slash, and the field 'name'. This
160
+** gives some measure of compatibility with programs that only understand
161
+** the oldest format.
162
+**
163
+** The latest Posix extension is called the 'pax Interchange Format'.
164
+** It removes all the limitations of the previous two formats by allowing
165
+** the storage of arbitrary-length attributes in a separate object that looks
166
+** like a file to programs that do not understand this extension. So the
167
+** contents of the 'name' and 'prefix' fields should contain values that allow
168
+** versions of tar that do not understand this extension to still do
169
+** something useful.
170
+**
171
+******************************************************************************/
172
+
173
+/*
174
+** The position we use to split a file path into the 'name' and 'prefix'
175
+** fields needs to meet the following criteria:
176
+**
177
+** - not at the beginning or end of the string
178
+** - the position must contain a slash
179
+** - no more than 100 characters follow the slash
180
+** - no more than 155 characters precede it
181
+**
182
+** The routine 'find_split_pos' finds a split position. It will meet the
183
+** criteria of listed above if such a position exists. If no such
184
+** position exists it generates one that useful for generating the
185
+** values used for backward compatibility.
186
+*/
187
+static int find_split_pos(
188
+ const char *zName, /* file path */
189
+ int nName /* path length */
190
+){
191
+ int i, split = 0;
192
+ /* only search if the string needs splitting */
193
+ if(nName > USTAR_NAME_LEN){
194
+ for(i = 1; i+1 < nName; i++)
195
+ if(zName[i] == '/'){
196
+ split = i+1;
197
+ /* if the split position is within USTAR_NAME_LEN bytes from
198
+ * the end we can quit */
199
+ if(nName - split <= USTAR_NAME_LEN)
200
+ break;
201
+ }
202
+ }
203
+ return split;
204
+}
205
+
206
+
207
+/*
208
+** attempt to split the file name path to meet 'ustar' header
209
+** criteria.
210
+*/
211
+static int tar_split_path(
212
+ const char *zName, /* path */
213
+ int nName, /* path length */
214
+ char *pName, /* name field */
215
+ char *pPrefix /* prefix field */
216
+){
217
+ int split = find_split_pos(zName, nName);
218
+ /* check whether both pieces fit */
219
+ if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1)
220
+ return 0; /* no */
221
+
222
+ /* extract name */
223
+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
224
+
225
+ /* extract prefix */
226
+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
227
+
228
+ return 1; /* success */
229
+}
230
+
231
+
232
+/*
233
+** When using an extension header we still need to put something
234
+** reasonable in the name and prefix fields. This is probably as
235
+** good as it gets.
236
+*/
237
+static void approximate_split_path(
238
+ const char *zName, /* path */
239
+ int nName, /* path length */
240
+ char *pName, /* name field */
241
+ char *pPrefix, /* prefix field */
242
+ int bHeader /* is this a 'x' type tar header? */
243
+){
244
+ int split;
245
+
246
+ /* if this is a Pax Interchange header prepend "PaxHeader/"
247
+ * so we can tell files apart from metadata */
248
+ if(bHeader){
249
+ int n;
250
+ tball.nScratchUsed = 0;
251
+ n = scratch_printf("PaxHeader/%*.*s", nName, nName, zName);
252
+ zName = tball.pScratch;
253
+ nName = n;
254
+ }
255
+
256
+ /* find the split position */
257
+ split = find_split_pos(zName, nName);
258
+
259
+ /* extract a name, truncate if needed */
260
+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
261
+
262
+ /* extract a prefix field, truncate when needed */
263
+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
264
+}
265
+
266
+
267
+/*
268
+** add a Pax Interchange header to the scratch buffer
269
+**
270
+** format: <length> <key>=<value>\n
271
+** the tricky part is that each header contains its own
272
+** size in decimal, counting that length.
273
+*/
274
+static void add_pax_header(
275
+ const char *zField,
276
+ const char *zValue,
277
+ int nValue
278
+){
279
+ /* calculate length without length field */
280
+ int blen = strlen(zField) + nValue + 3;
281
+ /* calculate the length of the length field */
282
+ int next10 = 1;
283
+ int n;
284
+ for(n = blen; n > 0; ){
285
+ blen++; next10 *= 10;
286
+ n /= 10;
287
+ }
288
+ /* adding the length extended the length field? */
289
+ if(blen > next10)
290
+ blen++;
291
+ /* build the string */
292
+ n = scratch_printf("%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
293
+ /* this _must_ be right */
294
+ if(n != blen)
295
+ fossil_fatal("internal error: PAX tar header has bad length");
296
+ /* add length to scratch buffer */
297
+ tball.nScratchUsed += blen;
298
+}
299
+
300
+
301
+/*
302
+** set the header type, calculate the checksum and output
303
+** the header
304
+*/
305
+static void cksum_and_write_header(
306
+ char cType
307
+){
308
+ unsigned int cksum = 0;
309
+ int i;
310
+ memset(&tball.aHdr[148], ' ', 8);
311
+ tball.aHdr[156] = cType;
312
+ for(i=0; i<512; i++) cksum += tball.aHdr[i];
313
+ sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
314
+ tball.aHdr[155] = 0;
315
+ gzip_step((char*)tball.aHdr, 512);
316
+}
317
+
53318
54319
/*
55320
** Build a header for a file or directory and write that header
56321
** into the growing tarball.
57322
*/
@@ -59,33 +324,47 @@
59324
const char *zName, /* Name of the object */
60325
int nName, /* Number of characters in zName */
61326
int iMode, /* Mode. 0644 or 0755 */
62327
unsigned int mTime, /* File modification time */
63328
int iSize, /* Size of the object in bytes */
64
- int iType /* Type of object. 0==file. 5==directory */
329
+ char cType /* Type of object. '0'==file. '5'==directory */
65330
){
66
- unsigned int cksum = 0;
67
- int i;
68
- if( nName>100 ){
69
- memcpy(&tball.aHdr[345], zName, nName-100);
70
- memcpy(tball.aHdr, &zName[nName-100], 100);
71
- memset(&tball.aHdr[245+nName], 0, 267-nName);
72
- }else{
73
- memcpy(tball.aHdr, zName, nName);
74
- memset(&tball.aHdr[nName], 0, 100-nName);
75
- memset(&tball.aHdr[345], 0, 167);
76
- }
331
+ /* set mode and modification time */
77332
sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78
- sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79333
sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80
- memset(&tball.aHdr[148], ' ', 8);
81
- tball.aHdr[156] = iType + '0';
82
- for(i=0; i<512; i++) cksum += tball.aHdr[i];
83
- sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84
- tball.aHdr[154] = 0;
85
- gzip_step((char*)tball.aHdr, 512);
334
+
335
+ /* see if we need to output a Pax Interchange Header */
336
+ if( !is_iso646_name(zName, nName) ||
337
+ !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
338
+ int lastPage;
339
+ /* add a file name for interoperability with older programs */
340
+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
341
+
342
+ /* generate the Pax Interchange path header */
343
+ tball.nScratchUsed = 0;
344
+ add_pax_header("path", zName, nName);
345
+
346
+ /* set the header length, and write the header */
347
+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed);
348
+ cksum_and_write_header('x');
349
+
350
+ /* write the Pax Interchange data */
351
+ gzip_step(tball.pScratch, tball.nScratchUsed);
352
+ lastPage = tball.nScratchUsed % 512;
353
+ if( lastPage!=0 )
354
+ gzip_step(tball.zSpaces, 512 - lastPage);
355
+
356
+ /* generate an approximate path for the regular header */
357
+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
358
+ }
359
+ /* set the size */
360
+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
361
+
362
+ /* write the regular header */
363
+ cksum_and_write_header(cType);
86364
}
365
+
87366
88367
/*
89368
** Recursively add an directory entry for the given file if those
90369
** directories have not previously been seen.
91370
*/
@@ -95,18 +374,27 @@
95374
unsigned int mTime /* Modification time */
96375
){
97376
int i;
98377
for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99378
if( i<=0 ) return;
100
- if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
379
+ if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
380
+ memcmp(tball.zPrevDir, zName, i)==0 ) return;
101381
db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102382
if( sqlite3_changes(g.db)==0 ) return;
103383
tar_add_directory_of(zName, i-1, mTime);
104
- tar_add_header(zName, i, 0755, mTime, 0, 5);
384
+ tar_add_header(zName, i, 0755, mTime, 0, '5');
385
+ if( i >= tball.nPrevDirAlloc ){
386
+ int nsize = tball.nPrevDirAlloc * 2;
387
+ if(i+1 > nsize)
388
+ nsize = i+1;
389
+ tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
390
+ tball.nPrevDirAlloc = nsize;
391
+ }
105392
memcpy(tball.zPrevDir, zName, i);
106393
tball.zPrevDir[i] = 0;
107394
}
395
+
108396
109397
/*
110398
** Add a single file to the growing tarball.
111399
*/
112400
static void tar_add_file(
@@ -117,15 +405,13 @@
117405
){
118406
int nName = strlen(zName);
119407
int n = blob_size(pContent);
120408
int lastPage;
121409
122
- if( nName>=250 ){
123
- fossil_fatal("name too long for ustar format: \"%s\"", zName);
124
- }
410
+ /* length check moved to tar_split_path */
125411
tar_add_directory_of(zName, nName, mTime);
126
- tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
412
+ tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
127413
if( n ){
128414
gzip_step(blob_buffer(pContent), n);
129415
lastPage = n % 512;
130416
if( lastPage!=0 ){
131417
gzip_step(tball.zSpaces, 512 - lastPage);
@@ -142,10 +428,17 @@
142428
gzip_step(tball.zSpaces, 512);
143429
gzip_step(tball.zSpaces, 512);
144430
gzip_finish(pOut);
145431
fossil_free(tball.aHdr);
146432
tball.aHdr = 0;
433
+ fossil_free(tball.zPrevDir);
434
+ tball.zPrevDir = NULL;
435
+ tball.nPrevDirAlloc = 0;
436
+ fossil_free(tball.pScratch);
437
+ tball.pScratch = NULL;
438
+ tball.nScratchUsed = 0;
439
+ tball.nScratchAlloc = 0;
147440
}
148441
149442
150443
/*
151444
** COMMAND: test-tarball
152445
--- src/tar.c
+++ src/tar.c
@@ -27,31 +27,296 @@
27 */
28 static struct tarball_t {
29 unsigned char *aHdr; /* Space for building headers */
30 char *zSpaces; /* Spaces for padding */
31 char *zPrevDir; /* Name of directory for previous entry */
 
 
 
 
32 } tball;
 
 
 
 
 
 
 
 
33
34 /*
35 ** Begin the process of generating a tarball.
36 **
37 ** Initialize the GZIP compressor and the table of directory names.
38 */
39 static void tar_begin(void){
40 assert( tball.aHdr==0 );
41 tball.aHdr = fossil_malloc(512+512+256);
42 memset(tball.aHdr, 0, 512+512+256);
43 tball.zSpaces = (char*)&tball.aHdr[512];
44 tball.zPrevDir = (char*)&tball.zSpaces[512];
 
 
 
 
 
 
 
45 memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
46 memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47 memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */
 
 
48 gzip_begin();
49 db_multi_exec(
50 "CREATE TEMP TABLE dir(name UNIQUE);"
51 );
52 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
54 /*
55 ** Build a header for a file or directory and write that header
56 ** into the growing tarball.
57 */
@@ -59,33 +324,47 @@
59 const char *zName, /* Name of the object */
60 int nName, /* Number of characters in zName */
61 int iMode, /* Mode. 0644 or 0755 */
62 unsigned int mTime, /* File modification time */
63 int iSize, /* Size of the object in bytes */
64 int iType /* Type of object. 0==file. 5==directory */
65 ){
66 unsigned int cksum = 0;
67 int i;
68 if( nName>100 ){
69 memcpy(&tball.aHdr[345], zName, nName-100);
70 memcpy(tball.aHdr, &zName[nName-100], 100);
71 memset(&tball.aHdr[245+nName], 0, 267-nName);
72 }else{
73 memcpy(tball.aHdr, zName, nName);
74 memset(&tball.aHdr[nName], 0, 100-nName);
75 memset(&tball.aHdr[345], 0, 167);
76 }
77 sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78 sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79 sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80 memset(&tball.aHdr[148], ' ', 8);
81 tball.aHdr[156] = iType + '0';
82 for(i=0; i<512; i++) cksum += tball.aHdr[i];
83 sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84 tball.aHdr[154] = 0;
85 gzip_step((char*)tball.aHdr, 512);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86 }
 
87
88 /*
89 ** Recursively add an directory entry for the given file if those
90 ** directories have not previously been seen.
91 */
@@ -95,18 +374,27 @@
95 unsigned int mTime /* Modification time */
96 ){
97 int i;
98 for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99 if( i<=0 ) return;
100 if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
 
101 db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102 if( sqlite3_changes(g.db)==0 ) return;
103 tar_add_directory_of(zName, i-1, mTime);
104 tar_add_header(zName, i, 0755, mTime, 0, 5);
 
 
 
 
 
 
 
105 memcpy(tball.zPrevDir, zName, i);
106 tball.zPrevDir[i] = 0;
107 }
 
108
109 /*
110 ** Add a single file to the growing tarball.
111 */
112 static void tar_add_file(
@@ -117,15 +405,13 @@
117 ){
118 int nName = strlen(zName);
119 int n = blob_size(pContent);
120 int lastPage;
121
122 if( nName>=250 ){
123 fossil_fatal("name too long for ustar format: \"%s\"", zName);
124 }
125 tar_add_directory_of(zName, nName, mTime);
126 tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
127 if( n ){
128 gzip_step(blob_buffer(pContent), n);
129 lastPage = n % 512;
130 if( lastPage!=0 ){
131 gzip_step(tball.zSpaces, 512 - lastPage);
@@ -142,10 +428,17 @@
142 gzip_step(tball.zSpaces, 512);
143 gzip_step(tball.zSpaces, 512);
144 gzip_finish(pOut);
145 fossil_free(tball.aHdr);
146 tball.aHdr = 0;
 
 
 
 
 
 
 
147 }
148
149
150 /*
151 ** COMMAND: test-tarball
152
--- src/tar.c
+++ src/tar.c
@@ -27,31 +27,296 @@
27 */
28 static struct tarball_t {
29 unsigned char *aHdr; /* Space for building headers */
30 char *zSpaces; /* Spaces for padding */
31 char *zPrevDir; /* Name of directory for previous entry */
32 int nPrevDirAlloc; /* size of zPrevDir */
33 char *pScratch; /* scratch buffer used to build PAX data */
34 int nScratchUsed; /* part of buffer containing data */
35 int nScratchAlloc; /* size of buffer */
36 } tball;
37
38
39 /*
40 ** field lengths of 'ustar' name and prefix fields.
41 */
42 #define USTAR_NAME_LEN 100
43 #define USTAR_PREFIX_LEN 155
44
45
46 /*
47 ** Begin the process of generating a tarball.
48 **
49 ** Initialize the GZIP compressor and the table of directory names.
50 */
51 static void tar_begin(void){
52 assert( tball.aHdr==0 );
53 tball.aHdr = fossil_malloc(512+512);
54 memset(tball.aHdr, 0, 512+512);
55 tball.zSpaces = (char*)&tball.aHdr[512];
56 /* zPrevDir init */
57 tball.zPrevDir = NULL;
58 tball.nPrevDirAlloc = 0;
59 /* scratch buffer init */
60 tball.pScratch = NULL;
61 tball.nScratchUsed = 0;
62 tball.nScratchAlloc = 0;
63
64 memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
65 memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
66 memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
67 memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
68 memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
69 gzip_begin();
70 db_multi_exec(
71 "CREATE TEMP TABLE dir(name UNIQUE);"
72 );
73 }
74
75
76 /*
77 ** print to the scratch buffer
78 **
79 ** used to build the Pax Interchange Format data, and create
80 ** pseudo-file names for the header data.
81 **
82 ** The buffer is grown automatically to accommodate the data.
83 */
84 static int scratch_printf(
85 const char *fmt,
86 ...
87 ){
88 for(;;){
89 int newSize, minSpace, n;
90 /* calculate space in buffer */
91 int space = tball.nScratchAlloc - tball.nScratchUsed;
92 /* format the string */
93 va_list vl;
94 va_start(vl, fmt);
95 n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl);
96 assert(n >= 0);
97 va_end(vl);
98 /* if it fit we're done */
99 if(n < space)
100 return n;
101 /* buffer too short: calculate reasonable new size */
102 minSpace = tball.nScratchUsed+n+1;
103 newSize = 2 * tball.nScratchAlloc;
104 if(newSize < minSpace)
105 newSize = minSpace;
106 /* grow the buffer */
107 tball.pScratch = fossil_realloc(tball.pScratch, newSize);
108 tball.nScratchAlloc = newSize;
109 /* loop to try again */
110 }
111 }
112
113
114 /*
115 ** verify that lla characters in 'zName' are in the
116 ** ISO646 (=ASCII) character set.
117 */
118 static int is_iso646_name(
119 const char *zName, /* file path */
120 int nName /* path length */
121 ){
122 int i;
123 for(i = 0; i < nName; i++){
124 unsigned char c = (unsigned char)zName[i];
125 if(c > 0x7e)
126 return 0;
127 }
128 return 1;
129 }
130
131
132 /*
133 ** copy string pSrc into pDst, truncating or padding with 0 if necessary
134 */
135 static void padded_copy(
136 char *pDest,
137 int nDest,
138 const char *pSrc,
139 int nSrc
140 ){
141 if(nSrc >= nDest){
142 memcpy(pDest, pSrc, nDest);
143 }else{
144 memcpy(pDest, pSrc, nSrc);
145 memset(&pDest[nSrc], 0, nDest - nSrc);
146 }
147 }
148
149
150
151 /******************************************************************************
152 **
153 ** The 'tar' format has evolved over time. Initially the name was stored
154 ** in a 100 byte null-terminated field 'name'. File path names were
155 ** limited to 99 bytes.
156 **
157 ** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
158 ** for up to 255 characters to be stored. The full file path is formed by
159 ** concatenating the field 'prefix', a slash, and the field 'name'. This
160 ** gives some measure of compatibility with programs that only understand
161 ** the oldest format.
162 **
163 ** The latest Posix extension is called the 'pax Interchange Format'.
164 ** It removes all the limitations of the previous two formats by allowing
165 ** the storage of arbitrary-length attributes in a separate object that looks
166 ** like a file to programs that do not understand this extension. So the
167 ** contents of the 'name' and 'prefix' fields should contain values that allow
168 ** versions of tar that do not understand this extension to still do
169 ** something useful.
170 **
171 ******************************************************************************/
172
173 /*
174 ** The position we use to split a file path into the 'name' and 'prefix'
175 ** fields needs to meet the following criteria:
176 **
177 ** - not at the beginning or end of the string
178 ** - the position must contain a slash
179 ** - no more than 100 characters follow the slash
180 ** - no more than 155 characters precede it
181 **
182 ** The routine 'find_split_pos' finds a split position. It will meet the
183 ** criteria of listed above if such a position exists. If no such
184 ** position exists it generates one that useful for generating the
185 ** values used for backward compatibility.
186 */
187 static int find_split_pos(
188 const char *zName, /* file path */
189 int nName /* path length */
190 ){
191 int i, split = 0;
192 /* only search if the string needs splitting */
193 if(nName > USTAR_NAME_LEN){
194 for(i = 1; i+1 < nName; i++)
195 if(zName[i] == '/'){
196 split = i+1;
197 /* if the split position is within USTAR_NAME_LEN bytes from
198 * the end we can quit */
199 if(nName - split <= USTAR_NAME_LEN)
200 break;
201 }
202 }
203 return split;
204 }
205
206
207 /*
208 ** attempt to split the file name path to meet 'ustar' header
209 ** criteria.
210 */
211 static int tar_split_path(
212 const char *zName, /* path */
213 int nName, /* path length */
214 char *pName, /* name field */
215 char *pPrefix /* prefix field */
216 ){
217 int split = find_split_pos(zName, nName);
218 /* check whether both pieces fit */
219 if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1)
220 return 0; /* no */
221
222 /* extract name */
223 padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
224
225 /* extract prefix */
226 padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
227
228 return 1; /* success */
229 }
230
231
232 /*
233 ** When using an extension header we still need to put something
234 ** reasonable in the name and prefix fields. This is probably as
235 ** good as it gets.
236 */
237 static void approximate_split_path(
238 const char *zName, /* path */
239 int nName, /* path length */
240 char *pName, /* name field */
241 char *pPrefix, /* prefix field */
242 int bHeader /* is this a 'x' type tar header? */
243 ){
244 int split;
245
246 /* if this is a Pax Interchange header prepend "PaxHeader/"
247 * so we can tell files apart from metadata */
248 if(bHeader){
249 int n;
250 tball.nScratchUsed = 0;
251 n = scratch_printf("PaxHeader/%*.*s", nName, nName, zName);
252 zName = tball.pScratch;
253 nName = n;
254 }
255
256 /* find the split position */
257 split = find_split_pos(zName, nName);
258
259 /* extract a name, truncate if needed */
260 padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
261
262 /* extract a prefix field, truncate when needed */
263 padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
264 }
265
266
267 /*
268 ** add a Pax Interchange header to the scratch buffer
269 **
270 ** format: <length> <key>=<value>\n
271 ** the tricky part is that each header contains its own
272 ** size in decimal, counting that length.
273 */
274 static void add_pax_header(
275 const char *zField,
276 const char *zValue,
277 int nValue
278 ){
279 /* calculate length without length field */
280 int blen = strlen(zField) + nValue + 3;
281 /* calculate the length of the length field */
282 int next10 = 1;
283 int n;
284 for(n = blen; n > 0; ){
285 blen++; next10 *= 10;
286 n /= 10;
287 }
288 /* adding the length extended the length field? */
289 if(blen > next10)
290 blen++;
291 /* build the string */
292 n = scratch_printf("%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
293 /* this _must_ be right */
294 if(n != blen)
295 fossil_fatal("internal error: PAX tar header has bad length");
296 /* add length to scratch buffer */
297 tball.nScratchUsed += blen;
298 }
299
300
301 /*
302 ** set the header type, calculate the checksum and output
303 ** the header
304 */
305 static void cksum_and_write_header(
306 char cType
307 ){
308 unsigned int cksum = 0;
309 int i;
310 memset(&tball.aHdr[148], ' ', 8);
311 tball.aHdr[156] = cType;
312 for(i=0; i<512; i++) cksum += tball.aHdr[i];
313 sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
314 tball.aHdr[155] = 0;
315 gzip_step((char*)tball.aHdr, 512);
316 }
317
318
319 /*
320 ** Build a header for a file or directory and write that header
321 ** into the growing tarball.
322 */
@@ -59,33 +324,47 @@
324 const char *zName, /* Name of the object */
325 int nName, /* Number of characters in zName */
326 int iMode, /* Mode. 0644 or 0755 */
327 unsigned int mTime, /* File modification time */
328 int iSize, /* Size of the object in bytes */
329 char cType /* Type of object. '0'==file. '5'==directory */
330 ){
331 /* set mode and modification time */
 
 
 
 
 
 
 
 
 
 
332 sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
 
333 sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
334
335 /* see if we need to output a Pax Interchange Header */
336 if( !is_iso646_name(zName, nName) ||
337 !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
338 int lastPage;
339 /* add a file name for interoperability with older programs */
340 approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
341
342 /* generate the Pax Interchange path header */
343 tball.nScratchUsed = 0;
344 add_pax_header("path", zName, nName);
345
346 /* set the header length, and write the header */
347 sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed);
348 cksum_and_write_header('x');
349
350 /* write the Pax Interchange data */
351 gzip_step(tball.pScratch, tball.nScratchUsed);
352 lastPage = tball.nScratchUsed % 512;
353 if( lastPage!=0 )
354 gzip_step(tball.zSpaces, 512 - lastPage);
355
356 /* generate an approximate path for the regular header */
357 approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
358 }
359 /* set the size */
360 sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
361
362 /* write the regular header */
363 cksum_and_write_header(cType);
364 }
365
366
367 /*
368 ** Recursively add an directory entry for the given file if those
369 ** directories have not previously been seen.
370 */
@@ -95,18 +374,27 @@
374 unsigned int mTime /* Modification time */
375 ){
376 int i;
377 for(i=nName-1; i>0 && zName[i]!='/'; i--){}
378 if( i<=0 ) return;
379 if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
380 memcmp(tball.zPrevDir, zName, i)==0 ) return;
381 db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
382 if( sqlite3_changes(g.db)==0 ) return;
383 tar_add_directory_of(zName, i-1, mTime);
384 tar_add_header(zName, i, 0755, mTime, 0, '5');
385 if( i >= tball.nPrevDirAlloc ){
386 int nsize = tball.nPrevDirAlloc * 2;
387 if(i+1 > nsize)
388 nsize = i+1;
389 tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
390 tball.nPrevDirAlloc = nsize;
391 }
392 memcpy(tball.zPrevDir, zName, i);
393 tball.zPrevDir[i] = 0;
394 }
395
396
397 /*
398 ** Add a single file to the growing tarball.
399 */
400 static void tar_add_file(
@@ -117,15 +405,13 @@
405 ){
406 int nName = strlen(zName);
407 int n = blob_size(pContent);
408 int lastPage;
409
410 /* length check moved to tar_split_path */
 
 
411 tar_add_directory_of(zName, nName, mTime);
412 tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
413 if( n ){
414 gzip_step(blob_buffer(pContent), n);
415 lastPage = n % 512;
416 if( lastPage!=0 ){
417 gzip_step(tball.zSpaces, 512 - lastPage);
@@ -142,10 +428,17 @@
428 gzip_step(tball.zSpaces, 512);
429 gzip_step(tball.zSpaces, 512);
430 gzip_finish(pOut);
431 fossil_free(tball.aHdr);
432 tball.aHdr = 0;
433 fossil_free(tball.zPrevDir);
434 tball.zPrevDir = NULL;
435 tball.nPrevDirAlloc = 0;
436 fossil_free(tball.pScratch);
437 tball.pScratch = NULL;
438 tball.nScratchUsed = 0;
439 tball.nScratchAlloc = 0;
440 }
441
442
443 /*
444 ** COMMAND: test-tarball
445

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button