Fossil SCM
Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8.
Commit
2ef37b3b2a0c2dd48f4f902644fdb85175eae86a
Parent
ba15af450d33b2f…
1 file changed
+322
-29
+322
-29
| --- src/tar.c | ||
| +++ src/tar.c | ||
| @@ -27,31 +27,296 @@ | ||
| 27 | 27 | */ |
| 28 | 28 | static struct tarball_t { |
| 29 | 29 | unsigned char *aHdr; /* Space for building headers */ |
| 30 | 30 | char *zSpaces; /* Spaces for padding */ |
| 31 | 31 | char *zPrevDir; /* Name of directory for previous entry */ |
| 32 | + int nPrevDirAlloc; /* size of zPrevDir */ | |
| 33 | + char *pScratch; /* scratch buffer used to build PAX data */ | |
| 34 | + int nScratchUsed; /* part of buffer containing data */ | |
| 35 | + int nScratchAlloc; /* size of buffer */ | |
| 32 | 36 | } tball; |
| 37 | + | |
| 38 | + | |
| 39 | +/* | |
| 40 | +** field lengths of 'ustar' name and prefix fields. | |
| 41 | +*/ | |
| 42 | +#define USTAR_NAME_LEN 100 | |
| 43 | +#define USTAR_PREFIX_LEN 155 | |
| 44 | + | |
| 33 | 45 | |
| 34 | 46 | /* |
| 35 | 47 | ** Begin the process of generating a tarball. |
| 36 | 48 | ** |
| 37 | 49 | ** Initialize the GZIP compressor and the table of directory names. |
| 38 | 50 | */ |
| 39 | 51 | static void tar_begin(void){ |
| 40 | 52 | assert( tball.aHdr==0 ); |
| 41 | - tball.aHdr = fossil_malloc(512+512+256); | |
| 42 | - memset(tball.aHdr, 0, 512+512+256); | |
| 53 | + tball.aHdr = fossil_malloc(512+512); | |
| 54 | + memset(tball.aHdr, 0, 512+512); | |
| 43 | 55 | tball.zSpaces = (char*)&tball.aHdr[512]; |
| 44 | - tball.zPrevDir = (char*)&tball.zSpaces[512]; | |
| 56 | + /* zPrevDir init */ | |
| 57 | + tball.zPrevDir = NULL; | |
| 58 | + tball.nPrevDirAlloc = 0; | |
| 59 | + /* scratch buffer init */ | |
| 60 | + tball.pScratch = NULL; | |
| 61 | + tball.nScratchUsed = 0; | |
| 62 | + tball.nScratchAlloc = 0; | |
| 63 | + | |
| 45 | 64 | memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ |
| 46 | 65 | memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ |
| 47 | - memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */ | |
| 66 | + memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */ | |
| 67 | + memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */ | |
| 68 | + memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */ | |
| 48 | 69 | gzip_begin(); |
| 49 | 70 | db_multi_exec( |
| 50 | 71 | "CREATE TEMP TABLE dir(name UNIQUE);" |
| 51 | 72 | ); |
| 52 | 73 | } |
| 74 | + | |
| 75 | + | |
| 76 | +/* | |
| 77 | +** print to the scratch buffer | |
| 78 | +** | |
| 79 | +** used to build the Pax Interchange Format data, and create | |
| 80 | +** pseudo-file names for the header data. | |
| 81 | +** | |
| 82 | +** The buffer is grown automatically to accommodate the data. | |
| 83 | +*/ | |
| 84 | +static int scratch_printf( | |
| 85 | + const char *fmt, | |
| 86 | + ... | |
| 87 | +){ | |
| 88 | + for(;;){ | |
| 89 | + int newSize, minSpace, n; | |
| 90 | + /* calculate space in buffer */ | |
| 91 | + int space = tball.nScratchAlloc - tball.nScratchUsed; | |
| 92 | + /* format the string */ | |
| 93 | + va_list vl; | |
| 94 | + va_start(vl, fmt); | |
| 95 | + n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl); | |
| 96 | + assert(n >= 0); | |
| 97 | + va_end(vl); | |
| 98 | + /* if it fit we're done */ | |
| 99 | + if(n < space) | |
| 100 | + return n; | |
| 101 | + /* buffer too short: calculate reasonable new size */ | |
| 102 | + minSpace = tball.nScratchUsed+n+1; | |
| 103 | + newSize = 2 * tball.nScratchAlloc; | |
| 104 | + if(newSize < minSpace) | |
| 105 | + newSize = minSpace; | |
| 106 | + /* grow the buffer */ | |
| 107 | + tball.pScratch = fossil_realloc(tball.pScratch, newSize); | |
| 108 | + tball.nScratchAlloc = newSize; | |
| 109 | + /* loop to try again */ | |
| 110 | + } | |
| 111 | +} | |
| 112 | + | |
| 113 | + | |
| 114 | +/* | |
| 115 | +** verify that lla characters in 'zName' are in the | |
| 116 | +** ISO646 (=ASCII) character set. | |
| 117 | +*/ | |
| 118 | +static int is_iso646_name( | |
| 119 | + const char *zName, /* file path */ | |
| 120 | + int nName /* path length */ | |
| 121 | +){ | |
| 122 | + int i; | |
| 123 | + for(i = 0; i < nName; i++){ | |
| 124 | + unsigned char c = (unsigned char)zName[i]; | |
| 125 | + if(c > 0x7e) | |
| 126 | + return 0; | |
| 127 | + } | |
| 128 | + return 1; | |
| 129 | +} | |
| 130 | + | |
| 131 | + | |
| 132 | +/* | |
| 133 | +** copy string pSrc into pDst, truncating or padding with 0 if necessary | |
| 134 | +*/ | |
| 135 | +static void padded_copy( | |
| 136 | + char *pDest, | |
| 137 | + int nDest, | |
| 138 | + const char *pSrc, | |
| 139 | + int nSrc | |
| 140 | +){ | |
| 141 | + if(nSrc >= nDest){ | |
| 142 | + memcpy(pDest, pSrc, nDest); | |
| 143 | + }else{ | |
| 144 | + memcpy(pDest, pSrc, nSrc); | |
| 145 | + memset(&pDest[nSrc], 0, nDest - nSrc); | |
| 146 | + } | |
| 147 | +} | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | +/****************************************************************************** | |
| 152 | +** | |
| 153 | +** The 'tar' format has evolved over time. Initially the name was stored | |
| 154 | +** in a 100 byte null-terminated field 'name'. File path names were | |
| 155 | +** limited to 99 bytes. | |
| 156 | +** | |
| 157 | +** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing | |
| 158 | +** for up to 255 characters to be stored. The full file path is formed by | |
| 159 | +** concatenating the field 'prefix', a slash, and the field 'name'. This | |
| 160 | +** gives some measure of compatibility with programs that only understand | |
| 161 | +** the oldest format. | |
| 162 | +** | |
| 163 | +** The latest Posix extension is called the 'pax Interchange Format'. | |
| 164 | +** It removes all the limitations of the previous two formats by allowing | |
| 165 | +** the storage of arbitrary-length attributes in a separate object that looks | |
| 166 | +** like a file to programs that do not understand this extension. So the | |
| 167 | +** contents of the 'name' and 'prefix' fields should contain values that allow | |
| 168 | +** versions of tar that do not understand this extension to still do | |
| 169 | +** something useful. | |
| 170 | +** | |
| 171 | +******************************************************************************/ | |
| 172 | + | |
| 173 | +/* | |
| 174 | +** The position we use to split a file path into the 'name' and 'prefix' | |
| 175 | +** fields needs to meet the following criteria: | |
| 176 | +** | |
| 177 | +** - not at the beginning or end of the string | |
| 178 | +** - the position must contain a slash | |
| 179 | +** - no more than 100 characters follow the slash | |
| 180 | +** - no more than 155 characters precede it | |
| 181 | +** | |
| 182 | +** The routine 'find_split_pos' finds a split position. It will meet the | |
| 183 | +** criteria of listed above if such a position exists. If no such | |
| 184 | +** position exists it generates one that useful for generating the | |
| 185 | +** values used for backward compatibility. | |
| 186 | +*/ | |
| 187 | +static int find_split_pos( | |
| 188 | + const char *zName, /* file path */ | |
| 189 | + int nName /* path length */ | |
| 190 | +){ | |
| 191 | + int i, split = 0; | |
| 192 | + /* only search if the string needs splitting */ | |
| 193 | + if(nName > USTAR_NAME_LEN){ | |
| 194 | + for(i = 1; i+1 < nName; i++) | |
| 195 | + if(zName[i] == '/'){ | |
| 196 | + split = i+1; | |
| 197 | + /* if the split position is within USTAR_NAME_LEN bytes from | |
| 198 | + * the end we can quit */ | |
| 199 | + if(nName - split <= USTAR_NAME_LEN) | |
| 200 | + break; | |
| 201 | + } | |
| 202 | + } | |
| 203 | + return split; | |
| 204 | +} | |
| 205 | + | |
| 206 | + | |
| 207 | +/* | |
| 208 | +** attempt to split the file name path to meet 'ustar' header | |
| 209 | +** criteria. | |
| 210 | +*/ | |
| 211 | +static int tar_split_path( | |
| 212 | + const char *zName, /* path */ | |
| 213 | + int nName, /* path length */ | |
| 214 | + char *pName, /* name field */ | |
| 215 | + char *pPrefix /* prefix field */ | |
| 216 | +){ | |
| 217 | + int split = find_split_pos(zName, nName); | |
| 218 | + /* check whether both pieces fit */ | |
| 219 | + if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1) | |
| 220 | + return 0; /* no */ | |
| 221 | + | |
| 222 | + /* extract name */ | |
| 223 | + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); | |
| 224 | + | |
| 225 | + /* extract prefix */ | |
| 226 | + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0)); | |
| 227 | + | |
| 228 | + return 1; /* success */ | |
| 229 | +} | |
| 230 | + | |
| 231 | + | |
| 232 | +/* | |
| 233 | +** When using an extension header we still need to put something | |
| 234 | +** reasonable in the name and prefix fields. This is probably as | |
| 235 | +** good as it gets. | |
| 236 | +*/ | |
| 237 | +static void approximate_split_path( | |
| 238 | + const char *zName, /* path */ | |
| 239 | + int nName, /* path length */ | |
| 240 | + char *pName, /* name field */ | |
| 241 | + char *pPrefix, /* prefix field */ | |
| 242 | + int bHeader /* is this a 'x' type tar header? */ | |
| 243 | +){ | |
| 244 | + int split; | |
| 245 | + | |
| 246 | + /* if this is a Pax Interchange header prepend "PaxHeader/" | |
| 247 | + * so we can tell files apart from metadata */ | |
| 248 | + if(bHeader){ | |
| 249 | + int n; | |
| 250 | + tball.nScratchUsed = 0; | |
| 251 | + n = scratch_printf("PaxHeader/%*.*s", nName, nName, zName); | |
| 252 | + zName = tball.pScratch; | |
| 253 | + nName = n; | |
| 254 | + } | |
| 255 | + | |
| 256 | + /* find the split position */ | |
| 257 | + split = find_split_pos(zName, nName); | |
| 258 | + | |
| 259 | + /* extract a name, truncate if needed */ | |
| 260 | + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); | |
| 261 | + | |
| 262 | + /* extract a prefix field, truncate when needed */ | |
| 263 | + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0)); | |
| 264 | +} | |
| 265 | + | |
| 266 | + | |
| 267 | +/* | |
| 268 | +** add a Pax Interchange header to the scratch buffer | |
| 269 | +** | |
| 270 | +** format: <length> <key>=<value>\n | |
| 271 | +** the tricky part is that each header contains its own | |
| 272 | +** size in decimal, counting that length. | |
| 273 | +*/ | |
| 274 | +static void add_pax_header( | |
| 275 | + const char *zField, | |
| 276 | + const char *zValue, | |
| 277 | + int nValue | |
| 278 | +){ | |
| 279 | + /* calculate length without length field */ | |
| 280 | + int blen = strlen(zField) + nValue + 3; | |
| 281 | + /* calculate the length of the length field */ | |
| 282 | + int next10 = 1; | |
| 283 | + int n; | |
| 284 | + for(n = blen; n > 0; ){ | |
| 285 | + blen++; next10 *= 10; | |
| 286 | + n /= 10; | |
| 287 | + } | |
| 288 | + /* adding the length extended the length field? */ | |
| 289 | + if(blen > next10) | |
| 290 | + blen++; | |
| 291 | + /* build the string */ | |
| 292 | + n = scratch_printf("%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue); | |
| 293 | + /* this _must_ be right */ | |
| 294 | + if(n != blen) | |
| 295 | + fossil_fatal("internal error: PAX tar header has bad length"); | |
| 296 | + /* add length to scratch buffer */ | |
| 297 | + tball.nScratchUsed += blen; | |
| 298 | +} | |
| 299 | + | |
| 300 | + | |
| 301 | +/* | |
| 302 | +** set the header type, calculate the checksum and output | |
| 303 | +** the header | |
| 304 | +*/ | |
| 305 | +static void cksum_and_write_header( | |
| 306 | + char cType | |
| 307 | +){ | |
| 308 | + unsigned int cksum = 0; | |
| 309 | + int i; | |
| 310 | + memset(&tball.aHdr[148], ' ', 8); | |
| 311 | + tball.aHdr[156] = cType; | |
| 312 | + for(i=0; i<512; i++) cksum += tball.aHdr[i]; | |
| 313 | + sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum); | |
| 314 | + tball.aHdr[155] = 0; | |
| 315 | + gzip_step((char*)tball.aHdr, 512); | |
| 316 | +} | |
| 317 | + | |
| 53 | 318 | |
| 54 | 319 | /* |
| 55 | 320 | ** Build a header for a file or directory and write that header |
| 56 | 321 | ** into the growing tarball. |
| 57 | 322 | */ |
| @@ -59,33 +324,47 @@ | ||
| 59 | 324 | const char *zName, /* Name of the object */ |
| 60 | 325 | int nName, /* Number of characters in zName */ |
| 61 | 326 | int iMode, /* Mode. 0644 or 0755 */ |
| 62 | 327 | unsigned int mTime, /* File modification time */ |
| 63 | 328 | int iSize, /* Size of the object in bytes */ |
| 64 | - int iType /* Type of object. 0==file. 5==directory */ | |
| 329 | + char cType /* Type of object. '0'==file. '5'==directory */ | |
| 65 | 330 | ){ |
| 66 | - unsigned int cksum = 0; | |
| 67 | - int i; | |
| 68 | - if( nName>100 ){ | |
| 69 | - memcpy(&tball.aHdr[345], zName, nName-100); | |
| 70 | - memcpy(tball.aHdr, &zName[nName-100], 100); | |
| 71 | - memset(&tball.aHdr[245+nName], 0, 267-nName); | |
| 72 | - }else{ | |
| 73 | - memcpy(tball.aHdr, zName, nName); | |
| 74 | - memset(&tball.aHdr[nName], 0, 100-nName); | |
| 75 | - memset(&tball.aHdr[345], 0, 167); | |
| 76 | - } | |
| 331 | + /* set mode and modification time */ | |
| 77 | 332 | sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); |
| 78 | - sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); | |
| 79 | 333 | sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); |
| 80 | - memset(&tball.aHdr[148], ' ', 8); | |
| 81 | - tball.aHdr[156] = iType + '0'; | |
| 82 | - for(i=0; i<512; i++) cksum += tball.aHdr[i]; | |
| 83 | - sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum); | |
| 84 | - tball.aHdr[154] = 0; | |
| 85 | - gzip_step((char*)tball.aHdr, 512); | |
| 334 | + | |
| 335 | + /* see if we need to output a Pax Interchange Header */ | |
| 336 | + if( !is_iso646_name(zName, nName) || | |
| 337 | + !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){ | |
| 338 | + int lastPage; | |
| 339 | + /* add a file name for interoperability with older programs */ | |
| 340 | + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1); | |
| 341 | + | |
| 342 | + /* generate the Pax Interchange path header */ | |
| 343 | + tball.nScratchUsed = 0; | |
| 344 | + add_pax_header("path", zName, nName); | |
| 345 | + | |
| 346 | + /* set the header length, and write the header */ | |
| 347 | + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed); | |
| 348 | + cksum_and_write_header('x'); | |
| 349 | + | |
| 350 | + /* write the Pax Interchange data */ | |
| 351 | + gzip_step(tball.pScratch, tball.nScratchUsed); | |
| 352 | + lastPage = tball.nScratchUsed % 512; | |
| 353 | + if( lastPage!=0 ) | |
| 354 | + gzip_step(tball.zSpaces, 512 - lastPage); | |
| 355 | + | |
| 356 | + /* generate an approximate path for the regular header */ | |
| 357 | + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0); | |
| 358 | + } | |
| 359 | + /* set the size */ | |
| 360 | + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); | |
| 361 | + | |
| 362 | + /* write the regular header */ | |
| 363 | + cksum_and_write_header(cType); | |
| 86 | 364 | } |
| 365 | + | |
| 87 | 366 | |
| 88 | 367 | /* |
| 89 | 368 | ** Recursively add an directory entry for the given file if those |
| 90 | 369 | ** directories have not previously been seen. |
| 91 | 370 | */ |
| @@ -95,18 +374,27 @@ | ||
| 95 | 374 | unsigned int mTime /* Modification time */ |
| 96 | 375 | ){ |
| 97 | 376 | int i; |
| 98 | 377 | for(i=nName-1; i>0 && zName[i]!='/'; i--){} |
| 99 | 378 | if( i<=0 ) return; |
| 100 | - if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return; | |
| 379 | + if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 && | |
| 380 | + memcmp(tball.zPrevDir, zName, i)==0 ) return; | |
| 101 | 381 | db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); |
| 102 | 382 | if( sqlite3_changes(g.db)==0 ) return; |
| 103 | 383 | tar_add_directory_of(zName, i-1, mTime); |
| 104 | - tar_add_header(zName, i, 0755, mTime, 0, 5); | |
| 384 | + tar_add_header(zName, i, 0755, mTime, 0, '5'); | |
| 385 | + if( i >= tball.nPrevDirAlloc ){ | |
| 386 | + int nsize = tball.nPrevDirAlloc * 2; | |
| 387 | + if(i+1 > nsize) | |
| 388 | + nsize = i+1; | |
| 389 | + tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize); | |
| 390 | + tball.nPrevDirAlloc = nsize; | |
| 391 | + } | |
| 105 | 392 | memcpy(tball.zPrevDir, zName, i); |
| 106 | 393 | tball.zPrevDir[i] = 0; |
| 107 | 394 | } |
| 395 | + | |
| 108 | 396 | |
| 109 | 397 | /* |
| 110 | 398 | ** Add a single file to the growing tarball. |
| 111 | 399 | */ |
| 112 | 400 | static void tar_add_file( |
| @@ -117,15 +405,13 @@ | ||
| 117 | 405 | ){ |
| 118 | 406 | int nName = strlen(zName); |
| 119 | 407 | int n = blob_size(pContent); |
| 120 | 408 | int lastPage; |
| 121 | 409 | |
| 122 | - if( nName>=250 ){ | |
| 123 | - fossil_fatal("name too long for ustar format: \"%s\"", zName); | |
| 124 | - } | |
| 410 | + /* length check moved to tar_split_path */ | |
| 125 | 411 | tar_add_directory_of(zName, nName, mTime); |
| 126 | - tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0); | |
| 412 | + tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0'); | |
| 127 | 413 | if( n ){ |
| 128 | 414 | gzip_step(blob_buffer(pContent), n); |
| 129 | 415 | lastPage = n % 512; |
| 130 | 416 | if( lastPage!=0 ){ |
| 131 | 417 | gzip_step(tball.zSpaces, 512 - lastPage); |
| @@ -142,10 +428,17 @@ | ||
| 142 | 428 | gzip_step(tball.zSpaces, 512); |
| 143 | 429 | gzip_step(tball.zSpaces, 512); |
| 144 | 430 | gzip_finish(pOut); |
| 145 | 431 | fossil_free(tball.aHdr); |
| 146 | 432 | tball.aHdr = 0; |
| 433 | + fossil_free(tball.zPrevDir); | |
| 434 | + tball.zPrevDir = NULL; | |
| 435 | + tball.nPrevDirAlloc = 0; | |
| 436 | + fossil_free(tball.pScratch); | |
| 437 | + tball.pScratch = NULL; | |
| 438 | + tball.nScratchUsed = 0; | |
| 439 | + tball.nScratchAlloc = 0; | |
| 147 | 440 | } |
| 148 | 441 | |
| 149 | 442 | |
| 150 | 443 | /* |
| 151 | 444 | ** COMMAND: test-tarball |
| 152 | 445 |
| --- src/tar.c | |
| +++ src/tar.c | |
| @@ -27,31 +27,296 @@ | |
| 27 | */ |
| 28 | static struct tarball_t { |
| 29 | unsigned char *aHdr; /* Space for building headers */ |
| 30 | char *zSpaces; /* Spaces for padding */ |
| 31 | char *zPrevDir; /* Name of directory for previous entry */ |
| 32 | } tball; |
| 33 | |
| 34 | /* |
| 35 | ** Begin the process of generating a tarball. |
| 36 | ** |
| 37 | ** Initialize the GZIP compressor and the table of directory names. |
| 38 | */ |
| 39 | static void tar_begin(void){ |
| 40 | assert( tball.aHdr==0 ); |
| 41 | tball.aHdr = fossil_malloc(512+512+256); |
| 42 | memset(tball.aHdr, 0, 512+512+256); |
| 43 | tball.zSpaces = (char*)&tball.aHdr[512]; |
| 44 | tball.zPrevDir = (char*)&tball.zSpaces[512]; |
| 45 | memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ |
| 46 | memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ |
| 47 | memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */ |
| 48 | gzip_begin(); |
| 49 | db_multi_exec( |
| 50 | "CREATE TEMP TABLE dir(name UNIQUE);" |
| 51 | ); |
| 52 | } |
| 53 | |
| 54 | /* |
| 55 | ** Build a header for a file or directory and write that header |
| 56 | ** into the growing tarball. |
| 57 | */ |
| @@ -59,33 +324,47 @@ | |
| 59 | const char *zName, /* Name of the object */ |
| 60 | int nName, /* Number of characters in zName */ |
| 61 | int iMode, /* Mode. 0644 or 0755 */ |
| 62 | unsigned int mTime, /* File modification time */ |
| 63 | int iSize, /* Size of the object in bytes */ |
| 64 | int iType /* Type of object. 0==file. 5==directory */ |
| 65 | ){ |
| 66 | unsigned int cksum = 0; |
| 67 | int i; |
| 68 | if( nName>100 ){ |
| 69 | memcpy(&tball.aHdr[345], zName, nName-100); |
| 70 | memcpy(tball.aHdr, &zName[nName-100], 100); |
| 71 | memset(&tball.aHdr[245+nName], 0, 267-nName); |
| 72 | }else{ |
| 73 | memcpy(tball.aHdr, zName, nName); |
| 74 | memset(&tball.aHdr[nName], 0, 100-nName); |
| 75 | memset(&tball.aHdr[345], 0, 167); |
| 76 | } |
| 77 | sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); |
| 78 | sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); |
| 79 | sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); |
| 80 | memset(&tball.aHdr[148], ' ', 8); |
| 81 | tball.aHdr[156] = iType + '0'; |
| 82 | for(i=0; i<512; i++) cksum += tball.aHdr[i]; |
| 83 | sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum); |
| 84 | tball.aHdr[154] = 0; |
| 85 | gzip_step((char*)tball.aHdr, 512); |
| 86 | } |
| 87 | |
| 88 | /* |
| 89 | ** Recursively add an directory entry for the given file if those |
| 90 | ** directories have not previously been seen. |
| 91 | */ |
| @@ -95,18 +374,27 @@ | |
| 95 | unsigned int mTime /* Modification time */ |
| 96 | ){ |
| 97 | int i; |
| 98 | for(i=nName-1; i>0 && zName[i]!='/'; i--){} |
| 99 | if( i<=0 ) return; |
| 100 | if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return; |
| 101 | db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); |
| 102 | if( sqlite3_changes(g.db)==0 ) return; |
| 103 | tar_add_directory_of(zName, i-1, mTime); |
| 104 | tar_add_header(zName, i, 0755, mTime, 0, 5); |
| 105 | memcpy(tball.zPrevDir, zName, i); |
| 106 | tball.zPrevDir[i] = 0; |
| 107 | } |
| 108 | |
| 109 | /* |
| 110 | ** Add a single file to the growing tarball. |
| 111 | */ |
| 112 | static void tar_add_file( |
| @@ -117,15 +405,13 @@ | |
| 117 | ){ |
| 118 | int nName = strlen(zName); |
| 119 | int n = blob_size(pContent); |
| 120 | int lastPage; |
| 121 | |
| 122 | if( nName>=250 ){ |
| 123 | fossil_fatal("name too long for ustar format: \"%s\"", zName); |
| 124 | } |
| 125 | tar_add_directory_of(zName, nName, mTime); |
| 126 | tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0); |
| 127 | if( n ){ |
| 128 | gzip_step(blob_buffer(pContent), n); |
| 129 | lastPage = n % 512; |
| 130 | if( lastPage!=0 ){ |
| 131 | gzip_step(tball.zSpaces, 512 - lastPage); |
| @@ -142,10 +428,17 @@ | |
| 142 | gzip_step(tball.zSpaces, 512); |
| 143 | gzip_step(tball.zSpaces, 512); |
| 144 | gzip_finish(pOut); |
| 145 | fossil_free(tball.aHdr); |
| 146 | tball.aHdr = 0; |
| 147 | } |
| 148 | |
| 149 | |
| 150 | /* |
| 151 | ** COMMAND: test-tarball |
| 152 |
| --- src/tar.c | |
| +++ src/tar.c | |
| @@ -27,31 +27,296 @@ | |
| 27 | */ |
| 28 | static struct tarball_t { |
| 29 | unsigned char *aHdr; /* Space for building headers */ |
| 30 | char *zSpaces; /* Spaces for padding */ |
| 31 | char *zPrevDir; /* Name of directory for previous entry */ |
| 32 | int nPrevDirAlloc; /* size of zPrevDir */ |
| 33 | char *pScratch; /* scratch buffer used to build PAX data */ |
| 34 | int nScratchUsed; /* part of buffer containing data */ |
| 35 | int nScratchAlloc; /* size of buffer */ |
| 36 | } tball; |
| 37 | |
| 38 | |
| 39 | /* |
| 40 | ** field lengths of 'ustar' name and prefix fields. |
| 41 | */ |
| 42 | #define USTAR_NAME_LEN 100 |
| 43 | #define USTAR_PREFIX_LEN 155 |
| 44 | |
| 45 | |
| 46 | /* |
| 47 | ** Begin the process of generating a tarball. |
| 48 | ** |
| 49 | ** Initialize the GZIP compressor and the table of directory names. |
| 50 | */ |
| 51 | static void tar_begin(void){ |
| 52 | assert( tball.aHdr==0 ); |
| 53 | tball.aHdr = fossil_malloc(512+512); |
| 54 | memset(tball.aHdr, 0, 512+512); |
| 55 | tball.zSpaces = (char*)&tball.aHdr[512]; |
| 56 | /* zPrevDir init */ |
| 57 | tball.zPrevDir = NULL; |
| 58 | tball.nPrevDirAlloc = 0; |
| 59 | /* scratch buffer init */ |
| 60 | tball.pScratch = NULL; |
| 61 | tball.nScratchUsed = 0; |
| 62 | tball.nScratchAlloc = 0; |
| 63 | |
| 64 | memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ |
| 65 | memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ |
| 66 | memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */ |
| 67 | memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */ |
| 68 | memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */ |
| 69 | gzip_begin(); |
| 70 | db_multi_exec( |
| 71 | "CREATE TEMP TABLE dir(name UNIQUE);" |
| 72 | ); |
| 73 | } |
| 74 | |
| 75 | |
| 76 | /* |
| 77 | ** print to the scratch buffer |
| 78 | ** |
| 79 | ** used to build the Pax Interchange Format data, and create |
| 80 | ** pseudo-file names for the header data. |
| 81 | ** |
| 82 | ** The buffer is grown automatically to accommodate the data. |
| 83 | */ |
| 84 | static int scratch_printf( |
| 85 | const char *fmt, |
| 86 | ... |
| 87 | ){ |
| 88 | for(;;){ |
| 89 | int newSize, minSpace, n; |
| 90 | /* calculate space in buffer */ |
| 91 | int space = tball.nScratchAlloc - tball.nScratchUsed; |
| 92 | /* format the string */ |
| 93 | va_list vl; |
| 94 | va_start(vl, fmt); |
| 95 | n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl); |
| 96 | assert(n >= 0); |
| 97 | va_end(vl); |
| 98 | /* if it fit we're done */ |
| 99 | if(n < space) |
| 100 | return n; |
| 101 | /* buffer too short: calculate reasonable new size */ |
| 102 | minSpace = tball.nScratchUsed+n+1; |
| 103 | newSize = 2 * tball.nScratchAlloc; |
| 104 | if(newSize < minSpace) |
| 105 | newSize = minSpace; |
| 106 | /* grow the buffer */ |
| 107 | tball.pScratch = fossil_realloc(tball.pScratch, newSize); |
| 108 | tball.nScratchAlloc = newSize; |
| 109 | /* loop to try again */ |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | |
| 114 | /* |
| 115 | ** verify that lla characters in 'zName' are in the |
| 116 | ** ISO646 (=ASCII) character set. |
| 117 | */ |
| 118 | static int is_iso646_name( |
| 119 | const char *zName, /* file path */ |
| 120 | int nName /* path length */ |
| 121 | ){ |
| 122 | int i; |
| 123 | for(i = 0; i < nName; i++){ |
| 124 | unsigned char c = (unsigned char)zName[i]; |
| 125 | if(c > 0x7e) |
| 126 | return 0; |
| 127 | } |
| 128 | return 1; |
| 129 | } |
| 130 | |
| 131 | |
| 132 | /* |
| 133 | ** copy string pSrc into pDst, truncating or padding with 0 if necessary |
| 134 | */ |
| 135 | static void padded_copy( |
| 136 | char *pDest, |
| 137 | int nDest, |
| 138 | const char *pSrc, |
| 139 | int nSrc |
| 140 | ){ |
| 141 | if(nSrc >= nDest){ |
| 142 | memcpy(pDest, pSrc, nDest); |
| 143 | }else{ |
| 144 | memcpy(pDest, pSrc, nSrc); |
| 145 | memset(&pDest[nSrc], 0, nDest - nSrc); |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | |
| 150 | |
| 151 | /****************************************************************************** |
| 152 | ** |
| 153 | ** The 'tar' format has evolved over time. Initially the name was stored |
| 154 | ** in a 100 byte null-terminated field 'name'. File path names were |
| 155 | ** limited to 99 bytes. |
| 156 | ** |
| 157 | ** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing |
| 158 | ** for up to 255 characters to be stored. The full file path is formed by |
| 159 | ** concatenating the field 'prefix', a slash, and the field 'name'. This |
| 160 | ** gives some measure of compatibility with programs that only understand |
| 161 | ** the oldest format. |
| 162 | ** |
| 163 | ** The latest Posix extension is called the 'pax Interchange Format'. |
| 164 | ** It removes all the limitations of the previous two formats by allowing |
| 165 | ** the storage of arbitrary-length attributes in a separate object that looks |
| 166 | ** like a file to programs that do not understand this extension. So the |
| 167 | ** contents of the 'name' and 'prefix' fields should contain values that allow |
| 168 | ** versions of tar that do not understand this extension to still do |
| 169 | ** something useful. |
| 170 | ** |
| 171 | ******************************************************************************/ |
| 172 | |
| 173 | /* |
| 174 | ** The position we use to split a file path into the 'name' and 'prefix' |
| 175 | ** fields needs to meet the following criteria: |
| 176 | ** |
| 177 | ** - not at the beginning or end of the string |
| 178 | ** - the position must contain a slash |
| 179 | ** - no more than 100 characters follow the slash |
| 180 | ** - no more than 155 characters precede it |
| 181 | ** |
| 182 | ** The routine 'find_split_pos' finds a split position. It will meet the |
| 183 | ** criteria of listed above if such a position exists. If no such |
| 184 | ** position exists it generates one that useful for generating the |
| 185 | ** values used for backward compatibility. |
| 186 | */ |
| 187 | static int find_split_pos( |
| 188 | const char *zName, /* file path */ |
| 189 | int nName /* path length */ |
| 190 | ){ |
| 191 | int i, split = 0; |
| 192 | /* only search if the string needs splitting */ |
| 193 | if(nName > USTAR_NAME_LEN){ |
| 194 | for(i = 1; i+1 < nName; i++) |
| 195 | if(zName[i] == '/'){ |
| 196 | split = i+1; |
| 197 | /* if the split position is within USTAR_NAME_LEN bytes from |
| 198 | * the end we can quit */ |
| 199 | if(nName - split <= USTAR_NAME_LEN) |
| 200 | break; |
| 201 | } |
| 202 | } |
| 203 | return split; |
| 204 | } |
| 205 | |
| 206 | |
| 207 | /* |
| 208 | ** attempt to split the file name path to meet 'ustar' header |
| 209 | ** criteria. |
| 210 | */ |
| 211 | static int tar_split_path( |
| 212 | const char *zName, /* path */ |
| 213 | int nName, /* path length */ |
| 214 | char *pName, /* name field */ |
| 215 | char *pPrefix /* prefix field */ |
| 216 | ){ |
| 217 | int split = find_split_pos(zName, nName); |
| 218 | /* check whether both pieces fit */ |
| 219 | if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1) |
| 220 | return 0; /* no */ |
| 221 | |
| 222 | /* extract name */ |
| 223 | padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); |
| 224 | |
| 225 | /* extract prefix */ |
| 226 | padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0)); |
| 227 | |
| 228 | return 1; /* success */ |
| 229 | } |
| 230 | |
| 231 | |
| 232 | /* |
| 233 | ** When using an extension header we still need to put something |
| 234 | ** reasonable in the name and prefix fields. This is probably as |
| 235 | ** good as it gets. |
| 236 | */ |
| 237 | static void approximate_split_path( |
| 238 | const char *zName, /* path */ |
| 239 | int nName, /* path length */ |
| 240 | char *pName, /* name field */ |
| 241 | char *pPrefix, /* prefix field */ |
| 242 | int bHeader /* is this a 'x' type tar header? */ |
| 243 | ){ |
| 244 | int split; |
| 245 | |
| 246 | /* if this is a Pax Interchange header prepend "PaxHeader/" |
| 247 | * so we can tell files apart from metadata */ |
| 248 | if(bHeader){ |
| 249 | int n; |
| 250 | tball.nScratchUsed = 0; |
| 251 | n = scratch_printf("PaxHeader/%*.*s", nName, nName, zName); |
| 252 | zName = tball.pScratch; |
| 253 | nName = n; |
| 254 | } |
| 255 | |
| 256 | /* find the split position */ |
| 257 | split = find_split_pos(zName, nName); |
| 258 | |
| 259 | /* extract a name, truncate if needed */ |
| 260 | padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); |
| 261 | |
| 262 | /* extract a prefix field, truncate when needed */ |
| 263 | padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0)); |
| 264 | } |
| 265 | |
| 266 | |
| 267 | /* |
| 268 | ** add a Pax Interchange header to the scratch buffer |
| 269 | ** |
| 270 | ** format: <length> <key>=<value>\n |
| 271 | ** the tricky part is that each header contains its own |
| 272 | ** size in decimal, counting that length. |
| 273 | */ |
| 274 | static void add_pax_header( |
| 275 | const char *zField, |
| 276 | const char *zValue, |
| 277 | int nValue |
| 278 | ){ |
| 279 | /* calculate length without length field */ |
| 280 | int blen = strlen(zField) + nValue + 3; |
| 281 | /* calculate the length of the length field */ |
| 282 | int next10 = 1; |
| 283 | int n; |
| 284 | for(n = blen; n > 0; ){ |
| 285 | blen++; next10 *= 10; |
| 286 | n /= 10; |
| 287 | } |
| 288 | /* adding the length extended the length field? */ |
| 289 | if(blen > next10) |
| 290 | blen++; |
| 291 | /* build the string */ |
| 292 | n = scratch_printf("%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue); |
| 293 | /* this _must_ be right */ |
| 294 | if(n != blen) |
| 295 | fossil_fatal("internal error: PAX tar header has bad length"); |
| 296 | /* add length to scratch buffer */ |
| 297 | tball.nScratchUsed += blen; |
| 298 | } |
| 299 | |
| 300 | |
| 301 | /* |
| 302 | ** set the header type, calculate the checksum and output |
| 303 | ** the header |
| 304 | */ |
| 305 | static void cksum_and_write_header( |
| 306 | char cType |
| 307 | ){ |
| 308 | unsigned int cksum = 0; |
| 309 | int i; |
| 310 | memset(&tball.aHdr[148], ' ', 8); |
| 311 | tball.aHdr[156] = cType; |
| 312 | for(i=0; i<512; i++) cksum += tball.aHdr[i]; |
| 313 | sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum); |
| 314 | tball.aHdr[155] = 0; |
| 315 | gzip_step((char*)tball.aHdr, 512); |
| 316 | } |
| 317 | |
| 318 | |
| 319 | /* |
| 320 | ** Build a header for a file or directory and write that header |
| 321 | ** into the growing tarball. |
| 322 | */ |
| @@ -59,33 +324,47 @@ | |
| 324 | const char *zName, /* Name of the object */ |
| 325 | int nName, /* Number of characters in zName */ |
| 326 | int iMode, /* Mode. 0644 or 0755 */ |
| 327 | unsigned int mTime, /* File modification time */ |
| 328 | int iSize, /* Size of the object in bytes */ |
| 329 | char cType /* Type of object. '0'==file. '5'==directory */ |
| 330 | ){ |
| 331 | /* set mode and modification time */ |
| 332 | sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); |
| 333 | sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); |
| 334 | |
| 335 | /* see if we need to output a Pax Interchange Header */ |
| 336 | if( !is_iso646_name(zName, nName) || |
| 337 | !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){ |
| 338 | int lastPage; |
| 339 | /* add a file name for interoperability with older programs */ |
| 340 | approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1); |
| 341 | |
| 342 | /* generate the Pax Interchange path header */ |
| 343 | tball.nScratchUsed = 0; |
| 344 | add_pax_header("path", zName, nName); |
| 345 | |
| 346 | /* set the header length, and write the header */ |
| 347 | sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed); |
| 348 | cksum_and_write_header('x'); |
| 349 | |
| 350 | /* write the Pax Interchange data */ |
| 351 | gzip_step(tball.pScratch, tball.nScratchUsed); |
| 352 | lastPage = tball.nScratchUsed % 512; |
| 353 | if( lastPage!=0 ) |
| 354 | gzip_step(tball.zSpaces, 512 - lastPage); |
| 355 | |
| 356 | /* generate an approximate path for the regular header */ |
| 357 | approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0); |
| 358 | } |
| 359 | /* set the size */ |
| 360 | sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); |
| 361 | |
| 362 | /* write the regular header */ |
| 363 | cksum_and_write_header(cType); |
| 364 | } |
| 365 | |
| 366 | |
| 367 | /* |
| 368 | ** Recursively add an directory entry for the given file if those |
| 369 | ** directories have not previously been seen. |
| 370 | */ |
| @@ -95,18 +374,27 @@ | |
| 374 | unsigned int mTime /* Modification time */ |
| 375 | ){ |
| 376 | int i; |
| 377 | for(i=nName-1; i>0 && zName[i]!='/'; i--){} |
| 378 | if( i<=0 ) return; |
| 379 | if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 && |
| 380 | memcmp(tball.zPrevDir, zName, i)==0 ) return; |
| 381 | db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); |
| 382 | if( sqlite3_changes(g.db)==0 ) return; |
| 383 | tar_add_directory_of(zName, i-1, mTime); |
| 384 | tar_add_header(zName, i, 0755, mTime, 0, '5'); |
| 385 | if( i >= tball.nPrevDirAlloc ){ |
| 386 | int nsize = tball.nPrevDirAlloc * 2; |
| 387 | if(i+1 > nsize) |
| 388 | nsize = i+1; |
| 389 | tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize); |
| 390 | tball.nPrevDirAlloc = nsize; |
| 391 | } |
| 392 | memcpy(tball.zPrevDir, zName, i); |
| 393 | tball.zPrevDir[i] = 0; |
| 394 | } |
| 395 | |
| 396 | |
| 397 | /* |
| 398 | ** Add a single file to the growing tarball. |
| 399 | */ |
| 400 | static void tar_add_file( |
| @@ -117,15 +405,13 @@ | |
| 405 | ){ |
| 406 | int nName = strlen(zName); |
| 407 | int n = blob_size(pContent); |
| 408 | int lastPage; |
| 409 | |
| 410 | /* length check moved to tar_split_path */ |
| 411 | tar_add_directory_of(zName, nName, mTime); |
| 412 | tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0'); |
| 413 | if( n ){ |
| 414 | gzip_step(blob_buffer(pContent), n); |
| 415 | lastPage = n % 512; |
| 416 | if( lastPage!=0 ){ |
| 417 | gzip_step(tball.zSpaces, 512 - lastPage); |
| @@ -142,10 +428,17 @@ | |
| 428 | gzip_step(tball.zSpaces, 512); |
| 429 | gzip_step(tball.zSpaces, 512); |
| 430 | gzip_finish(pOut); |
| 431 | fossil_free(tball.aHdr); |
| 432 | tball.aHdr = 0; |
| 433 | fossil_free(tball.zPrevDir); |
| 434 | tball.zPrevDir = NULL; |
| 435 | tball.nPrevDirAlloc = 0; |
| 436 | fossil_free(tball.pScratch); |
| 437 | tball.pScratch = NULL; |
| 438 | tball.nScratchUsed = 0; |
| 439 | tball.nScratchAlloc = 0; |
| 440 | } |
| 441 | |
| 442 | |
| 443 | /* |
| 444 | ** COMMAND: test-tarball |
| 445 |