Fossil SCM

Merge the ge-tarfix changes into trunk. This fixes tarball generation for repos that have very long filenames.

drh 2011-07-25 11:21 trunk merge
Commit a26940c22ea353c4ea6b3e2577c1b491c27870a9
1 file changed +278 -29
+278 -29
--- src/tar.c
+++ src/tar.c
@@ -27,31 +27,253 @@
2727
*/
2828
static struct tarball_t {
2929
unsigned char *aHdr; /* Space for building headers */
3030
char *zSpaces; /* Spaces for padding */
3131
char *zPrevDir; /* Name of directory for previous entry */
32
+ int nPrevDirAlloc; /* size of zPrevDir */
33
+ Blob pax; /* PAX data */
3234
} tball;
35
+
36
+
37
+/*
38
+** field lengths of 'ustar' name and prefix fields.
39
+*/
40
+#define USTAR_NAME_LEN 100
41
+#define USTAR_PREFIX_LEN 155
42
+
3343
3444
/*
3545
** Begin the process of generating a tarball.
3646
**
3747
** Initialize the GZIP compressor and the table of directory names.
3848
*/
3949
static void tar_begin(void){
4050
assert( tball.aHdr==0 );
41
- tball.aHdr = fossil_malloc(512+512+256);
42
- memset(tball.aHdr, 0, 512+512+256);
51
+ tball.aHdr = fossil_malloc(512+512);
52
+ memset(tball.aHdr, 0, 512+512);
4353
tball.zSpaces = (char*)&tball.aHdr[512];
44
- tball.zPrevDir = (char*)&tball.zSpaces[512];
54
+ /* zPrevDir init */
55
+ tball.zPrevDir = NULL;
56
+ tball.nPrevDirAlloc = 0;
57
+ /* scratch buffer init */
58
+ blob_zero(&tball.pax);
59
+
4560
memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
4661
memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47
- memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */
62
+ memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
63
+ memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
64
+ memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
4865
gzip_begin();
4966
db_multi_exec(
5067
"CREATE TEMP TABLE dir(name UNIQUE);"
5168
);
5269
}
70
+
71
+
72
+/*
73
+** verify that lla characters in 'zName' are in the
74
+** ISO646 (=ASCII) character set.
75
+*/
76
+static int is_iso646_name(
77
+ const char *zName, /* file path */
78
+ int nName /* path length */
79
+){
80
+ int i;
81
+ for(i = 0; i < nName; i++){
82
+ unsigned char c = (unsigned char)zName[i];
83
+ if( c>0x7e ) return 0;
84
+ }
85
+ return 1;
86
+}
87
+
88
+
89
+/*
90
+** copy string pSrc into pDst, truncating or padding with 0 if necessary
91
+*/
92
+static void padded_copy(
93
+ char *pDest,
94
+ int nDest,
95
+ const char *pSrc,
96
+ int nSrc
97
+){
98
+ if(nSrc >= nDest){
99
+ memcpy(pDest, pSrc, nDest);
100
+ }else{
101
+ memcpy(pDest, pSrc, nSrc);
102
+ memset(&pDest[nSrc], 0, nDest - nSrc);
103
+ }
104
+}
105
+
106
+
107
+
108
+/******************************************************************************
109
+**
110
+** The 'tar' format has evolved over time. Initially the name was stored
111
+** in a 100 byte null-terminated field 'name'. File path names were
112
+** limited to 99 bytes.
113
+**
114
+** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
115
+** for up to 255 characters to be stored. The full file path is formed by
116
+** concatenating the field 'prefix', a slash, and the field 'name'. This
117
+** gives some measure of compatibility with programs that only understand
118
+** the oldest format.
119
+**
120
+** The latest Posix extension is called the 'pax Interchange Format'.
121
+** It removes all the limitations of the previous two formats by allowing
122
+** the storage of arbitrary-length attributes in a separate object that looks
123
+** like a file to programs that do not understand this extension. So the
124
+** contents of the 'name' and 'prefix' fields should contain values that allow
125
+** versions of tar that do not understand this extension to still do
126
+** something useful.
127
+**
128
+******************************************************************************/
129
+
130
+/*
131
+** The position we use to split a file path into the 'name' and 'prefix'
132
+** fields needs to meet the following criteria:
133
+**
134
+** - not at the beginning or end of the string
135
+** - the position must contain a slash
136
+** - no more than 100 characters follow the slash
137
+** - no more than 155 characters precede it
138
+**
139
+** The routine 'find_split_pos' finds a split position. It will meet the
140
+** criteria of listed above if such a position exists. If no such
141
+** position exists it generates one that useful for generating the
142
+** values used for backward compatibility.
143
+*/
144
+static int find_split_pos(
145
+ const char *zName, /* file path */
146
+ int nName /* path length */
147
+){
148
+ int i, split = 0;
149
+ /* only search if the string needs splitting */
150
+ if(nName > USTAR_NAME_LEN){
151
+ for(i = 1; i+1 < nName; i++)
152
+ if(zName[i] == '/'){
153
+ split = i+1;
154
+ /* if the split position is within USTAR_NAME_LEN bytes from
155
+ * the end we can quit */
156
+ if(nName - split <= USTAR_NAME_LEN) break;
157
+ }
158
+ }
159
+ return split;
160
+}
161
+
162
+
163
+/*
164
+** attempt to split the file name path to meet 'ustar' header
165
+** criteria.
166
+*/
167
+static int tar_split_path(
168
+ const char *zName, /* path */
169
+ int nName, /* path length */
170
+ char *pName, /* name field */
171
+ char *pPrefix /* prefix field */
172
+){
173
+ int split = find_split_pos(zName, nName);
174
+ /* check whether both pieces fit */
175
+ if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){
176
+ return 0; /* no */
177
+ }
178
+
179
+ /* extract name */
180
+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
181
+
182
+ /* extract prefix */
183
+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
184
+
185
+ return 1; /* success */
186
+}
187
+
188
+
189
+/*
190
+** When using an extension header we still need to put something
191
+** reasonable in the name and prefix fields. This is probably as
192
+** good as it gets.
193
+*/
194
+static void approximate_split_path(
195
+ const char *zName, /* path */
196
+ int nName, /* path length */
197
+ char *pName, /* name field */
198
+ char *pPrefix, /* prefix field */
199
+ int bHeader /* is this a 'x' type tar header? */
200
+){
201
+ int split;
202
+
203
+ /* if this is a Pax Interchange header prepend "PaxHeader/"
204
+ ** so we can tell files apart from metadata */
205
+ if( bHeader ){
206
+ int n;
207
+ blob_reset(&tball.pax);
208
+ blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName);
209
+ zName = blob_buffer(&tball.pax);
210
+ nName = blob_size(&tball.pax);
211
+ }
212
+
213
+ /* find the split position */
214
+ split = find_split_pos(zName, nName);
215
+
216
+ /* extract a name, truncate if needed */
217
+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
218
+
219
+ /* extract a prefix field, truncate when needed */
220
+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
221
+}
222
+
223
+
224
+/*
225
+** add a Pax Interchange header to the scratch buffer
226
+**
227
+** format: <length> <key>=<value>\n
228
+** the tricky part is that each header contains its own
229
+** size in decimal, counting that length.
230
+*/
231
+static void add_pax_header(
232
+ const char *zField,
233
+ const char *zValue,
234
+ int nValue
235
+){
236
+ /* calculate length without length field */
237
+ int blen = strlen(zField) + nValue + 3;
238
+ /* calculate the length of the length field */
239
+ int next10 = 1;
240
+ int n;
241
+ for(n = blen; n > 0; ){
242
+ blen++; next10 *= 10;
243
+ n /= 10;
244
+ }
245
+ /* adding the length extended the length field? */
246
+ if(blen > next10){
247
+ blen++;
248
+ }
249
+ /* build the string */
250
+ blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
251
+ /* this _must_ be right */
252
+ if(blob_size(&tball.pax) != blen){
253
+ fossil_fatal("internal error: PAX tar header has bad length");
254
+ }
255
+}
256
+
257
+
258
+/*
259
+** set the header type, calculate the checksum and output
260
+** the header
261
+*/
262
+static void cksum_and_write_header(
263
+ char cType
264
+){
265
+ unsigned int cksum = 0;
266
+ int i;
267
+ memset(&tball.aHdr[148], ' ', 8);
268
+ tball.aHdr[156] = cType;
269
+ for(i=0; i<512; i++) cksum += tball.aHdr[i];
270
+ sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
271
+ tball.aHdr[155] = 0;
272
+ gzip_step((char*)tball.aHdr, 512);
273
+}
274
+
53275
54276
/*
55277
** Build a header for a file or directory and write that header
56278
** into the growing tarball.
57279
*/
@@ -59,33 +281,49 @@
59281
const char *zName, /* Name of the object */
60282
int nName, /* Number of characters in zName */
61283
int iMode, /* Mode. 0644 or 0755 */
62284
unsigned int mTime, /* File modification time */
63285
int iSize, /* Size of the object in bytes */
64
- int iType /* Type of object. 0==file. 5==directory */
286
+ char cType /* Type of object. '0'==file. '5'==directory */
65287
){
66
- unsigned int cksum = 0;
67
- int i;
68
- if( nName>100 ){
69
- memcpy(&tball.aHdr[345], zName, nName-100);
70
- memcpy(tball.aHdr, &zName[nName-100], 100);
71
- memset(&tball.aHdr[245+nName], 0, 267-nName);
72
- }else{
73
- memcpy(tball.aHdr, zName, nName);
74
- memset(&tball.aHdr[nName], 0, 100-nName);
75
- memset(&tball.aHdr[345], 0, 167);
76
- }
288
+ /* set mode and modification time */
77289
sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78
- sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79290
sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80
- memset(&tball.aHdr[148], ' ', 8);
81
- tball.aHdr[156] = iType + '0';
82
- for(i=0; i<512; i++) cksum += tball.aHdr[i];
83
- sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84
- tball.aHdr[154] = 0;
85
- gzip_step((char*)tball.aHdr, 512);
291
+
292
+ /* see if we need to output a Pax Interchange Header */
293
+ if( !is_iso646_name(zName, nName) ||
294
+ !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
295
+ int lastPage;
296
+ /* add a file name for interoperability with older programs */
297
+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
298
+
299
+ /* generate the Pax Interchange path header */
300
+ blob_reset(&tball.pax);
301
+ add_pax_header("path", zName, nName);
302
+
303
+ /* set the header length, and write the header */
304
+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o",
305
+ blob_size(&tball.pax));
306
+ cksum_and_write_header('x');
307
+
308
+ /* write the Pax Interchange data */
309
+ gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax));
310
+ lastPage = blob_size(&tball.pax) % 512;
311
+ if( lastPage!=0 ){
312
+ gzip_step(tball.zSpaces, 512 - lastPage);
313
+ }
314
+
315
+ /* generate an approximate path for the regular header */
316
+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
317
+ }
318
+ /* set the size */
319
+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
320
+
321
+ /* write the regular header */
322
+ cksum_and_write_header(cType);
86323
}
324
+
87325
88326
/*
89327
** Recursively add an directory entry for the given file if those
90328
** directories have not previously been seen.
91329
*/
@@ -95,18 +333,27 @@
95333
unsigned int mTime /* Modification time */
96334
){
97335
int i;
98336
for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99337
if( i<=0 ) return;
100
- if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
338
+ if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
339
+ memcmp(tball.zPrevDir, zName, i)==0 ) return;
101340
db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102341
if( sqlite3_changes(g.db)==0 ) return;
103342
tar_add_directory_of(zName, i-1, mTime);
104
- tar_add_header(zName, i, 0755, mTime, 0, 5);
343
+ tar_add_header(zName, i, 0755, mTime, 0, '5');
344
+ if( i >= tball.nPrevDirAlloc ){
345
+ int nsize = tball.nPrevDirAlloc * 2;
346
+ if(i+1 > nsize)
347
+ nsize = i+1;
348
+ tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
349
+ tball.nPrevDirAlloc = nsize;
350
+ }
105351
memcpy(tball.zPrevDir, zName, i);
106352
tball.zPrevDir[i] = 0;
107353
}
354
+
108355
109356
/*
110357
** Add a single file to the growing tarball.
111358
*/
112359
static void tar_add_file(
@@ -117,15 +364,13 @@
117364
){
118365
int nName = strlen(zName);
119366
int n = blob_size(pContent);
120367
int lastPage;
121368
122
- if( nName>=250 ){
123
- fossil_fatal("name too long for ustar format: \"%s\"", zName);
124
- }
369
+ /* length check moved to tar_split_path */
125370
tar_add_directory_of(zName, nName, mTime);
126
- tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
371
+ tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
127372
if( n ){
128373
gzip_step(blob_buffer(pContent), n);
129374
lastPage = n % 512;
130375
if( lastPage!=0 ){
131376
gzip_step(tball.zSpaces, 512 - lastPage);
@@ -142,10 +387,14 @@
142387
gzip_step(tball.zSpaces, 512);
143388
gzip_step(tball.zSpaces, 512);
144389
gzip_finish(pOut);
145390
fossil_free(tball.aHdr);
146391
tball.aHdr = 0;
392
+ fossil_free(tball.zPrevDir);
393
+ tball.zPrevDir = NULL;
394
+ tball.nPrevDirAlloc = 0;
395
+ blob_reset(&tball.pax);
147396
}
148397
149398
150399
/*
151400
** COMMAND: test-tarball
152401
--- src/tar.c
+++ src/tar.c
@@ -27,31 +27,253 @@
27 */
28 static struct tarball_t {
29 unsigned char *aHdr; /* Space for building headers */
30 char *zSpaces; /* Spaces for padding */
31 char *zPrevDir; /* Name of directory for previous entry */
 
 
32 } tball;
 
 
 
 
 
 
 
 
33
34 /*
35 ** Begin the process of generating a tarball.
36 **
37 ** Initialize the GZIP compressor and the table of directory names.
38 */
39 static void tar_begin(void){
40 assert( tball.aHdr==0 );
41 tball.aHdr = fossil_malloc(512+512+256);
42 memset(tball.aHdr, 0, 512+512+256);
43 tball.zSpaces = (char*)&tball.aHdr[512];
44 tball.zPrevDir = (char*)&tball.zSpaces[512];
 
 
 
 
 
45 memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
46 memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47 memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */
 
 
48 gzip_begin();
49 db_multi_exec(
50 "CREATE TEMP TABLE dir(name UNIQUE);"
51 );
52 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
54 /*
55 ** Build a header for a file or directory and write that header
56 ** into the growing tarball.
57 */
@@ -59,33 +281,49 @@
59 const char *zName, /* Name of the object */
60 int nName, /* Number of characters in zName */
61 int iMode, /* Mode. 0644 or 0755 */
62 unsigned int mTime, /* File modification time */
63 int iSize, /* Size of the object in bytes */
64 int iType /* Type of object. 0==file. 5==directory */
65 ){
66 unsigned int cksum = 0;
67 int i;
68 if( nName>100 ){
69 memcpy(&tball.aHdr[345], zName, nName-100);
70 memcpy(tball.aHdr, &zName[nName-100], 100);
71 memset(&tball.aHdr[245+nName], 0, 267-nName);
72 }else{
73 memcpy(tball.aHdr, zName, nName);
74 memset(&tball.aHdr[nName], 0, 100-nName);
75 memset(&tball.aHdr[345], 0, 167);
76 }
77 sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78 sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79 sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80 memset(&tball.aHdr[148], ' ', 8);
81 tball.aHdr[156] = iType + '0';
82 for(i=0; i<512; i++) cksum += tball.aHdr[i];
83 sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84 tball.aHdr[154] = 0;
85 gzip_step((char*)tball.aHdr, 512);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86 }
 
87
88 /*
89 ** Recursively add an directory entry for the given file if those
90 ** directories have not previously been seen.
91 */
@@ -95,18 +333,27 @@
95 unsigned int mTime /* Modification time */
96 ){
97 int i;
98 for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99 if( i<=0 ) return;
100 if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
 
101 db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102 if( sqlite3_changes(g.db)==0 ) return;
103 tar_add_directory_of(zName, i-1, mTime);
104 tar_add_header(zName, i, 0755, mTime, 0, 5);
 
 
 
 
 
 
 
105 memcpy(tball.zPrevDir, zName, i);
106 tball.zPrevDir[i] = 0;
107 }
 
108
109 /*
110 ** Add a single file to the growing tarball.
111 */
112 static void tar_add_file(
@@ -117,15 +364,13 @@
117 ){
118 int nName = strlen(zName);
119 int n = blob_size(pContent);
120 int lastPage;
121
122 if( nName>=250 ){
123 fossil_fatal("name too long for ustar format: \"%s\"", zName);
124 }
125 tar_add_directory_of(zName, nName, mTime);
126 tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
127 if( n ){
128 gzip_step(blob_buffer(pContent), n);
129 lastPage = n % 512;
130 if( lastPage!=0 ){
131 gzip_step(tball.zSpaces, 512 - lastPage);
@@ -142,10 +387,14 @@
142 gzip_step(tball.zSpaces, 512);
143 gzip_step(tball.zSpaces, 512);
144 gzip_finish(pOut);
145 fossil_free(tball.aHdr);
146 tball.aHdr = 0;
 
 
 
 
147 }
148
149
150 /*
151 ** COMMAND: test-tarball
152
--- src/tar.c
+++ src/tar.c
@@ -27,31 +27,253 @@
27 */
28 static struct tarball_t {
29 unsigned char *aHdr; /* Space for building headers */
30 char *zSpaces; /* Spaces for padding */
31 char *zPrevDir; /* Name of directory for previous entry */
32 int nPrevDirAlloc; /* size of zPrevDir */
33 Blob pax; /* PAX data */
34 } tball;
35
36
37 /*
38 ** field lengths of 'ustar' name and prefix fields.
39 */
40 #define USTAR_NAME_LEN 100
41 #define USTAR_PREFIX_LEN 155
42
43
44 /*
45 ** Begin the process of generating a tarball.
46 **
47 ** Initialize the GZIP compressor and the table of directory names.
48 */
49 static void tar_begin(void){
50 assert( tball.aHdr==0 );
51 tball.aHdr = fossil_malloc(512+512);
52 memset(tball.aHdr, 0, 512+512);
53 tball.zSpaces = (char*)&tball.aHdr[512];
54 /* zPrevDir init */
55 tball.zPrevDir = NULL;
56 tball.nPrevDirAlloc = 0;
57 /* scratch buffer init */
58 blob_zero(&tball.pax);
59
60 memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
61 memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
62 memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
63 memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
64 memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
65 gzip_begin();
66 db_multi_exec(
67 "CREATE TEMP TABLE dir(name UNIQUE);"
68 );
69 }
70
71
72 /*
73 ** verify that lla characters in 'zName' are in the
74 ** ISO646 (=ASCII) character set.
75 */
76 static int is_iso646_name(
77 const char *zName, /* file path */
78 int nName /* path length */
79 ){
80 int i;
81 for(i = 0; i < nName; i++){
82 unsigned char c = (unsigned char)zName[i];
83 if( c>0x7e ) return 0;
84 }
85 return 1;
86 }
87
88
89 /*
90 ** copy string pSrc into pDst, truncating or padding with 0 if necessary
91 */
92 static void padded_copy(
93 char *pDest,
94 int nDest,
95 const char *pSrc,
96 int nSrc
97 ){
98 if(nSrc >= nDest){
99 memcpy(pDest, pSrc, nDest);
100 }else{
101 memcpy(pDest, pSrc, nSrc);
102 memset(&pDest[nSrc], 0, nDest - nSrc);
103 }
104 }
105
106
107
108 /******************************************************************************
109 **
110 ** The 'tar' format has evolved over time. Initially the name was stored
111 ** in a 100 byte null-terminated field 'name'. File path names were
112 ** limited to 99 bytes.
113 **
114 ** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
115 ** for up to 255 characters to be stored. The full file path is formed by
116 ** concatenating the field 'prefix', a slash, and the field 'name'. This
117 ** gives some measure of compatibility with programs that only understand
118 ** the oldest format.
119 **
120 ** The latest Posix extension is called the 'pax Interchange Format'.
121 ** It removes all the limitations of the previous two formats by allowing
122 ** the storage of arbitrary-length attributes in a separate object that looks
123 ** like a file to programs that do not understand this extension. So the
124 ** contents of the 'name' and 'prefix' fields should contain values that allow
125 ** versions of tar that do not understand this extension to still do
126 ** something useful.
127 **
128 ******************************************************************************/
129
130 /*
131 ** The position we use to split a file path into the 'name' and 'prefix'
132 ** fields needs to meet the following criteria:
133 **
134 ** - not at the beginning or end of the string
135 ** - the position must contain a slash
136 ** - no more than 100 characters follow the slash
137 ** - no more than 155 characters precede it
138 **
139 ** The routine 'find_split_pos' finds a split position. It will meet the
140 ** criteria of listed above if such a position exists. If no such
141 ** position exists it generates one that useful for generating the
142 ** values used for backward compatibility.
143 */
144 static int find_split_pos(
145 const char *zName, /* file path */
146 int nName /* path length */
147 ){
148 int i, split = 0;
149 /* only search if the string needs splitting */
150 if(nName > USTAR_NAME_LEN){
151 for(i = 1; i+1 < nName; i++)
152 if(zName[i] == '/'){
153 split = i+1;
154 /* if the split position is within USTAR_NAME_LEN bytes from
155 * the end we can quit */
156 if(nName - split <= USTAR_NAME_LEN) break;
157 }
158 }
159 return split;
160 }
161
162
163 /*
164 ** attempt to split the file name path to meet 'ustar' header
165 ** criteria.
166 */
167 static int tar_split_path(
168 const char *zName, /* path */
169 int nName, /* path length */
170 char *pName, /* name field */
171 char *pPrefix /* prefix field */
172 ){
173 int split = find_split_pos(zName, nName);
174 /* check whether both pieces fit */
175 if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){
176 return 0; /* no */
177 }
178
179 /* extract name */
180 padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
181
182 /* extract prefix */
183 padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
184
185 return 1; /* success */
186 }
187
188
189 /*
190 ** When using an extension header we still need to put something
191 ** reasonable in the name and prefix fields. This is probably as
192 ** good as it gets.
193 */
194 static void approximate_split_path(
195 const char *zName, /* path */
196 int nName, /* path length */
197 char *pName, /* name field */
198 char *pPrefix, /* prefix field */
199 int bHeader /* is this a 'x' type tar header? */
200 ){
201 int split;
202
203 /* if this is a Pax Interchange header prepend "PaxHeader/"
204 ** so we can tell files apart from metadata */
205 if( bHeader ){
206 int n;
207 blob_reset(&tball.pax);
208 blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName);
209 zName = blob_buffer(&tball.pax);
210 nName = blob_size(&tball.pax);
211 }
212
213 /* find the split position */
214 split = find_split_pos(zName, nName);
215
216 /* extract a name, truncate if needed */
217 padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
218
219 /* extract a prefix field, truncate when needed */
220 padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
221 }
222
223
224 /*
225 ** add a Pax Interchange header to the scratch buffer
226 **
227 ** format: <length> <key>=<value>\n
228 ** the tricky part is that each header contains its own
229 ** size in decimal, counting that length.
230 */
231 static void add_pax_header(
232 const char *zField,
233 const char *zValue,
234 int nValue
235 ){
236 /* calculate length without length field */
237 int blen = strlen(zField) + nValue + 3;
238 /* calculate the length of the length field */
239 int next10 = 1;
240 int n;
241 for(n = blen; n > 0; ){
242 blen++; next10 *= 10;
243 n /= 10;
244 }
245 /* adding the length extended the length field? */
246 if(blen > next10){
247 blen++;
248 }
249 /* build the string */
250 blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
251 /* this _must_ be right */
252 if(blob_size(&tball.pax) != blen){
253 fossil_fatal("internal error: PAX tar header has bad length");
254 }
255 }
256
257
258 /*
259 ** set the header type, calculate the checksum and output
260 ** the header
261 */
262 static void cksum_and_write_header(
263 char cType
264 ){
265 unsigned int cksum = 0;
266 int i;
267 memset(&tball.aHdr[148], ' ', 8);
268 tball.aHdr[156] = cType;
269 for(i=0; i<512; i++) cksum += tball.aHdr[i];
270 sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
271 tball.aHdr[155] = 0;
272 gzip_step((char*)tball.aHdr, 512);
273 }
274
275
276 /*
277 ** Build a header for a file or directory and write that header
278 ** into the growing tarball.
279 */
@@ -59,33 +281,49 @@
281 const char *zName, /* Name of the object */
282 int nName, /* Number of characters in zName */
283 int iMode, /* Mode. 0644 or 0755 */
284 unsigned int mTime, /* File modification time */
285 int iSize, /* Size of the object in bytes */
286 char cType /* Type of object. '0'==file. '5'==directory */
287 ){
288 /* set mode and modification time */
 
 
 
 
 
 
 
 
 
 
289 sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
 
290 sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
291
292 /* see if we need to output a Pax Interchange Header */
293 if( !is_iso646_name(zName, nName) ||
294 !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
295 int lastPage;
296 /* add a file name for interoperability with older programs */
297 approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
298
299 /* generate the Pax Interchange path header */
300 blob_reset(&tball.pax);
301 add_pax_header("path", zName, nName);
302
303 /* set the header length, and write the header */
304 sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o",
305 blob_size(&tball.pax));
306 cksum_and_write_header('x');
307
308 /* write the Pax Interchange data */
309 gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax));
310 lastPage = blob_size(&tball.pax) % 512;
311 if( lastPage!=0 ){
312 gzip_step(tball.zSpaces, 512 - lastPage);
313 }
314
315 /* generate an approximate path for the regular header */
316 approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
317 }
318 /* set the size */
319 sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
320
321 /* write the regular header */
322 cksum_and_write_header(cType);
323 }
324
325
326 /*
327 ** Recursively add an directory entry for the given file if those
328 ** directories have not previously been seen.
329 */
@@ -95,18 +333,27 @@
333 unsigned int mTime /* Modification time */
334 ){
335 int i;
336 for(i=nName-1; i>0 && zName[i]!='/'; i--){}
337 if( i<=0 ) return;
338 if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
339 memcmp(tball.zPrevDir, zName, i)==0 ) return;
340 db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
341 if( sqlite3_changes(g.db)==0 ) return;
342 tar_add_directory_of(zName, i-1, mTime);
343 tar_add_header(zName, i, 0755, mTime, 0, '5');
344 if( i >= tball.nPrevDirAlloc ){
345 int nsize = tball.nPrevDirAlloc * 2;
346 if(i+1 > nsize)
347 nsize = i+1;
348 tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
349 tball.nPrevDirAlloc = nsize;
350 }
351 memcpy(tball.zPrevDir, zName, i);
352 tball.zPrevDir[i] = 0;
353 }
354
355
356 /*
357 ** Add a single file to the growing tarball.
358 */
359 static void tar_add_file(
@@ -117,15 +364,13 @@
364 ){
365 int nName = strlen(zName);
366 int n = blob_size(pContent);
367 int lastPage;
368
369 /* length check moved to tar_split_path */
 
 
370 tar_add_directory_of(zName, nName, mTime);
371 tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
372 if( n ){
373 gzip_step(blob_buffer(pContent), n);
374 lastPage = n % 512;
375 if( lastPage!=0 ){
376 gzip_step(tball.zSpaces, 512 - lastPage);
@@ -142,10 +387,14 @@
387 gzip_step(tball.zSpaces, 512);
388 gzip_step(tball.zSpaces, 512);
389 gzip_finish(pOut);
390 fossil_free(tball.aHdr);
391 tball.aHdr = 0;
392 fossil_free(tball.zPrevDir);
393 tball.zPrevDir = NULL;
394 tball.nPrevDirAlloc = 0;
395 blob_reset(&tball.pax);
396 }
397
398
399 /*
400 ** COMMAND: test-tarball
401

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button