Fossil SCM
Merge the ge-tarfix changes into trunk. This fixes tarball generation for repos that have very long filenames.
Commit
a26940c22ea353c4ea6b3e2577c1b491c27870a9
Parent
ba15af450d33b2f…
1 file changed
+278
-29
+278
-29
| --- src/tar.c | ||
| +++ src/tar.c | ||
| @@ -27,31 +27,253 @@ | ||
| 27 | 27 | */ |
| 28 | 28 | static struct tarball_t { |
| 29 | 29 | unsigned char *aHdr; /* Space for building headers */ |
| 30 | 30 | char *zSpaces; /* Spaces for padding */ |
| 31 | 31 | char *zPrevDir; /* Name of directory for previous entry */ |
| 32 | + int nPrevDirAlloc; /* size of zPrevDir */ | |
| 33 | + Blob pax; /* PAX data */ | |
| 32 | 34 | } tball; |
| 35 | + | |
| 36 | + | |
| 37 | +/* | |
| 38 | +** field lengths of 'ustar' name and prefix fields. | |
| 39 | +*/ | |
| 40 | +#define USTAR_NAME_LEN 100 | |
| 41 | +#define USTAR_PREFIX_LEN 155 | |
| 42 | + | |
| 33 | 43 | |
| 34 | 44 | /* |
| 35 | 45 | ** Begin the process of generating a tarball. |
| 36 | 46 | ** |
| 37 | 47 | ** Initialize the GZIP compressor and the table of directory names. |
| 38 | 48 | */ |
| 39 | 49 | static void tar_begin(void){ |
| 40 | 50 | assert( tball.aHdr==0 ); |
| 41 | - tball.aHdr = fossil_malloc(512+512+256); | |
| 42 | - memset(tball.aHdr, 0, 512+512+256); | |
| 51 | + tball.aHdr = fossil_malloc(512+512); | |
| 52 | + memset(tball.aHdr, 0, 512+512); | |
| 43 | 53 | tball.zSpaces = (char*)&tball.aHdr[512]; |
| 44 | - tball.zPrevDir = (char*)&tball.zSpaces[512]; | |
| 54 | + /* zPrevDir init */ | |
| 55 | + tball.zPrevDir = NULL; | |
| 56 | + tball.nPrevDirAlloc = 0; | |
| 57 | + /* scratch buffer init */ | |
| 58 | + blob_zero(&tball.pax); | |
| 59 | + | |
| 45 | 60 | memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ |
| 46 | 61 | memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ |
| 47 | - memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */ | |
| 62 | + memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */ | |
| 63 | + memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */ | |
| 64 | + memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */ | |
| 48 | 65 | gzip_begin(); |
| 49 | 66 | db_multi_exec( |
| 50 | 67 | "CREATE TEMP TABLE dir(name UNIQUE);" |
| 51 | 68 | ); |
| 52 | 69 | } |
| 70 | + | |
| 71 | + | |
| 72 | +/* | |
| 73 | +** verify that lla characters in 'zName' are in the | |
| 74 | +** ISO646 (=ASCII) character set. | |
| 75 | +*/ | |
| 76 | +static int is_iso646_name( | |
| 77 | + const char *zName, /* file path */ | |
| 78 | + int nName /* path length */ | |
| 79 | +){ | |
| 80 | + int i; | |
| 81 | + for(i = 0; i < nName; i++){ | |
| 82 | + unsigned char c = (unsigned char)zName[i]; | |
| 83 | + if( c>0x7e ) return 0; | |
| 84 | + } | |
| 85 | + return 1; | |
| 86 | +} | |
| 87 | + | |
| 88 | + | |
| 89 | +/* | |
| 90 | +** copy string pSrc into pDst, truncating or padding with 0 if necessary | |
| 91 | +*/ | |
| 92 | +static void padded_copy( | |
| 93 | + char *pDest, | |
| 94 | + int nDest, | |
| 95 | + const char *pSrc, | |
| 96 | + int nSrc | |
| 97 | +){ | |
| 98 | + if(nSrc >= nDest){ | |
| 99 | + memcpy(pDest, pSrc, nDest); | |
| 100 | + }else{ | |
| 101 | + memcpy(pDest, pSrc, nSrc); | |
| 102 | + memset(&pDest[nSrc], 0, nDest - nSrc); | |
| 103 | + } | |
| 104 | +} | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
| 108 | +/****************************************************************************** | |
| 109 | +** | |
| 110 | +** The 'tar' format has evolved over time. Initially the name was stored | |
| 111 | +** in a 100 byte null-terminated field 'name'. File path names were | |
| 112 | +** limited to 99 bytes. | |
| 113 | +** | |
| 114 | +** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing | |
| 115 | +** for up to 255 characters to be stored. The full file path is formed by | |
| 116 | +** concatenating the field 'prefix', a slash, and the field 'name'. This | |
| 117 | +** gives some measure of compatibility with programs that only understand | |
| 118 | +** the oldest format. | |
| 119 | +** | |
| 120 | +** The latest Posix extension is called the 'pax Interchange Format'. | |
| 121 | +** It removes all the limitations of the previous two formats by allowing | |
| 122 | +** the storage of arbitrary-length attributes in a separate object that looks | |
| 123 | +** like a file to programs that do not understand this extension. So the | |
| 124 | +** contents of the 'name' and 'prefix' fields should contain values that allow | |
| 125 | +** versions of tar that do not understand this extension to still do | |
| 126 | +** something useful. | |
| 127 | +** | |
| 128 | +******************************************************************************/ | |
| 129 | + | |
| 130 | +/* | |
| 131 | +** The position we use to split a file path into the 'name' and 'prefix' | |
| 132 | +** fields needs to meet the following criteria: | |
| 133 | +** | |
| 134 | +** - not at the beginning or end of the string | |
| 135 | +** - the position must contain a slash | |
| 136 | +** - no more than 100 characters follow the slash | |
| 137 | +** - no more than 155 characters precede it | |
| 138 | +** | |
| 139 | +** The routine 'find_split_pos' finds a split position. It will meet the | |
| 140 | +** criteria of listed above if such a position exists. If no such | |
| 141 | +** position exists it generates one that useful for generating the | |
| 142 | +** values used for backward compatibility. | |
| 143 | +*/ | |
| 144 | +static int find_split_pos( | |
| 145 | + const char *zName, /* file path */ | |
| 146 | + int nName /* path length */ | |
| 147 | +){ | |
| 148 | + int i, split = 0; | |
| 149 | + /* only search if the string needs splitting */ | |
| 150 | + if(nName > USTAR_NAME_LEN){ | |
| 151 | + for(i = 1; i+1 < nName; i++) | |
| 152 | + if(zName[i] == '/'){ | |
| 153 | + split = i+1; | |
| 154 | + /* if the split position is within USTAR_NAME_LEN bytes from | |
| 155 | + * the end we can quit */ | |
| 156 | + if(nName - split <= USTAR_NAME_LEN) break; | |
| 157 | + } | |
| 158 | + } | |
| 159 | + return split; | |
| 160 | +} | |
| 161 | + | |
| 162 | + | |
| 163 | +/* | |
| 164 | +** attempt to split the file name path to meet 'ustar' header | |
| 165 | +** criteria. | |
| 166 | +*/ | |
| 167 | +static int tar_split_path( | |
| 168 | + const char *zName, /* path */ | |
| 169 | + int nName, /* path length */ | |
| 170 | + char *pName, /* name field */ | |
| 171 | + char *pPrefix /* prefix field */ | |
| 172 | +){ | |
| 173 | + int split = find_split_pos(zName, nName); | |
| 174 | + /* check whether both pieces fit */ | |
| 175 | + if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){ | |
| 176 | + return 0; /* no */ | |
| 177 | + } | |
| 178 | + | |
| 179 | + /* extract name */ | |
| 180 | + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); | |
| 181 | + | |
| 182 | + /* extract prefix */ | |
| 183 | + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0)); | |
| 184 | + | |
| 185 | + return 1; /* success */ | |
| 186 | +} | |
| 187 | + | |
| 188 | + | |
| 189 | +/* | |
| 190 | +** When using an extension header we still need to put something | |
| 191 | +** reasonable in the name and prefix fields. This is probably as | |
| 192 | +** good as it gets. | |
| 193 | +*/ | |
| 194 | +static void approximate_split_path( | |
| 195 | + const char *zName, /* path */ | |
| 196 | + int nName, /* path length */ | |
| 197 | + char *pName, /* name field */ | |
| 198 | + char *pPrefix, /* prefix field */ | |
| 199 | + int bHeader /* is this a 'x' type tar header? */ | |
| 200 | +){ | |
| 201 | + int split; | |
| 202 | + | |
| 203 | + /* if this is a Pax Interchange header prepend "PaxHeader/" | |
| 204 | + ** so we can tell files apart from metadata */ | |
| 205 | + if( bHeader ){ | |
| 206 | + int n; | |
| 207 | + blob_reset(&tball.pax); | |
| 208 | + blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName); | |
| 209 | + zName = blob_buffer(&tball.pax); | |
| 210 | + nName = blob_size(&tball.pax); | |
| 211 | + } | |
| 212 | + | |
| 213 | + /* find the split position */ | |
| 214 | + split = find_split_pos(zName, nName); | |
| 215 | + | |
| 216 | + /* extract a name, truncate if needed */ | |
| 217 | + padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); | |
| 218 | + | |
| 219 | + /* extract a prefix field, truncate when needed */ | |
| 220 | + padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0)); | |
| 221 | +} | |
| 222 | + | |
| 223 | + | |
| 224 | +/* | |
| 225 | +** add a Pax Interchange header to the scratch buffer | |
| 226 | +** | |
| 227 | +** format: <length> <key>=<value>\n | |
| 228 | +** the tricky part is that each header contains its own | |
| 229 | +** size in decimal, counting that length. | |
| 230 | +*/ | |
| 231 | +static void add_pax_header( | |
| 232 | + const char *zField, | |
| 233 | + const char *zValue, | |
| 234 | + int nValue | |
| 235 | +){ | |
| 236 | + /* calculate length without length field */ | |
| 237 | + int blen = strlen(zField) + nValue + 3; | |
| 238 | + /* calculate the length of the length field */ | |
| 239 | + int next10 = 1; | |
| 240 | + int n; | |
| 241 | + for(n = blen; n > 0; ){ | |
| 242 | + blen++; next10 *= 10; | |
| 243 | + n /= 10; | |
| 244 | + } | |
| 245 | + /* adding the length extended the length field? */ | |
| 246 | + if(blen > next10){ | |
| 247 | + blen++; | |
| 248 | + } | |
| 249 | + /* build the string */ | |
| 250 | + blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue); | |
| 251 | + /* this _must_ be right */ | |
| 252 | + if(blob_size(&tball.pax) != blen){ | |
| 253 | + fossil_fatal("internal error: PAX tar header has bad length"); | |
| 254 | + } | |
| 255 | +} | |
| 256 | + | |
| 257 | + | |
| 258 | +/* | |
| 259 | +** set the header type, calculate the checksum and output | |
| 260 | +** the header | |
| 261 | +*/ | |
| 262 | +static void cksum_and_write_header( | |
| 263 | + char cType | |
| 264 | +){ | |
| 265 | + unsigned int cksum = 0; | |
| 266 | + int i; | |
| 267 | + memset(&tball.aHdr[148], ' ', 8); | |
| 268 | + tball.aHdr[156] = cType; | |
| 269 | + for(i=0; i<512; i++) cksum += tball.aHdr[i]; | |
| 270 | + sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum); | |
| 271 | + tball.aHdr[155] = 0; | |
| 272 | + gzip_step((char*)tball.aHdr, 512); | |
| 273 | +} | |
| 274 | + | |
| 53 | 275 | |
| 54 | 276 | /* |
| 55 | 277 | ** Build a header for a file or directory and write that header |
| 56 | 278 | ** into the growing tarball. |
| 57 | 279 | */ |
| @@ -59,33 +281,49 @@ | ||
| 59 | 281 | const char *zName, /* Name of the object */ |
| 60 | 282 | int nName, /* Number of characters in zName */ |
| 61 | 283 | int iMode, /* Mode. 0644 or 0755 */ |
| 62 | 284 | unsigned int mTime, /* File modification time */ |
| 63 | 285 | int iSize, /* Size of the object in bytes */ |
| 64 | - int iType /* Type of object. 0==file. 5==directory */ | |
| 286 | + char cType /* Type of object. '0'==file. '5'==directory */ | |
| 65 | 287 | ){ |
| 66 | - unsigned int cksum = 0; | |
| 67 | - int i; | |
| 68 | - if( nName>100 ){ | |
| 69 | - memcpy(&tball.aHdr[345], zName, nName-100); | |
| 70 | - memcpy(tball.aHdr, &zName[nName-100], 100); | |
| 71 | - memset(&tball.aHdr[245+nName], 0, 267-nName); | |
| 72 | - }else{ | |
| 73 | - memcpy(tball.aHdr, zName, nName); | |
| 74 | - memset(&tball.aHdr[nName], 0, 100-nName); | |
| 75 | - memset(&tball.aHdr[345], 0, 167); | |
| 76 | - } | |
| 288 | + /* set mode and modification time */ | |
| 77 | 289 | sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); |
| 78 | - sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); | |
| 79 | 290 | sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); |
| 80 | - memset(&tball.aHdr[148], ' ', 8); | |
| 81 | - tball.aHdr[156] = iType + '0'; | |
| 82 | - for(i=0; i<512; i++) cksum += tball.aHdr[i]; | |
| 83 | - sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum); | |
| 84 | - tball.aHdr[154] = 0; | |
| 85 | - gzip_step((char*)tball.aHdr, 512); | |
| 291 | + | |
| 292 | + /* see if we need to output a Pax Interchange Header */ | |
| 293 | + if( !is_iso646_name(zName, nName) || | |
| 294 | + !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){ | |
| 295 | + int lastPage; | |
| 296 | + /* add a file name for interoperability with older programs */ | |
| 297 | + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1); | |
| 298 | + | |
| 299 | + /* generate the Pax Interchange path header */ | |
| 300 | + blob_reset(&tball.pax); | |
| 301 | + add_pax_header("path", zName, nName); | |
| 302 | + | |
| 303 | + /* set the header length, and write the header */ | |
| 304 | + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", | |
| 305 | + blob_size(&tball.pax)); | |
| 306 | + cksum_and_write_header('x'); | |
| 307 | + | |
| 308 | + /* write the Pax Interchange data */ | |
| 309 | + gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax)); | |
| 310 | + lastPage = blob_size(&tball.pax) % 512; | |
| 311 | + if( lastPage!=0 ){ | |
| 312 | + gzip_step(tball.zSpaces, 512 - lastPage); | |
| 313 | + } | |
| 314 | + | |
| 315 | + /* generate an approximate path for the regular header */ | |
| 316 | + approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0); | |
| 317 | + } | |
| 318 | + /* set the size */ | |
| 319 | + sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); | |
| 320 | + | |
| 321 | + /* write the regular header */ | |
| 322 | + cksum_and_write_header(cType); | |
| 86 | 323 | } |
| 324 | + | |
| 87 | 325 | |
| 88 | 326 | /* |
| 89 | 327 | ** Recursively add an directory entry for the given file if those |
| 90 | 328 | ** directories have not previously been seen. |
| 91 | 329 | */ |
| @@ -95,18 +333,27 @@ | ||
| 95 | 333 | unsigned int mTime /* Modification time */ |
| 96 | 334 | ){ |
| 97 | 335 | int i; |
| 98 | 336 | for(i=nName-1; i>0 && zName[i]!='/'; i--){} |
| 99 | 337 | if( i<=0 ) return; |
| 100 | - if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return; | |
| 338 | + if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 && | |
| 339 | + memcmp(tball.zPrevDir, zName, i)==0 ) return; | |
| 101 | 340 | db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); |
| 102 | 341 | if( sqlite3_changes(g.db)==0 ) return; |
| 103 | 342 | tar_add_directory_of(zName, i-1, mTime); |
| 104 | - tar_add_header(zName, i, 0755, mTime, 0, 5); | |
| 343 | + tar_add_header(zName, i, 0755, mTime, 0, '5'); | |
| 344 | + if( i >= tball.nPrevDirAlloc ){ | |
| 345 | + int nsize = tball.nPrevDirAlloc * 2; | |
| 346 | + if(i+1 > nsize) | |
| 347 | + nsize = i+1; | |
| 348 | + tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize); | |
| 349 | + tball.nPrevDirAlloc = nsize; | |
| 350 | + } | |
| 105 | 351 | memcpy(tball.zPrevDir, zName, i); |
| 106 | 352 | tball.zPrevDir[i] = 0; |
| 107 | 353 | } |
| 354 | + | |
| 108 | 355 | |
| 109 | 356 | /* |
| 110 | 357 | ** Add a single file to the growing tarball. |
| 111 | 358 | */ |
| 112 | 359 | static void tar_add_file( |
| @@ -117,15 +364,13 @@ | ||
| 117 | 364 | ){ |
| 118 | 365 | int nName = strlen(zName); |
| 119 | 366 | int n = blob_size(pContent); |
| 120 | 367 | int lastPage; |
| 121 | 368 | |
| 122 | - if( nName>=250 ){ | |
| 123 | - fossil_fatal("name too long for ustar format: \"%s\"", zName); | |
| 124 | - } | |
| 369 | + /* length check moved to tar_split_path */ | |
| 125 | 370 | tar_add_directory_of(zName, nName, mTime); |
| 126 | - tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0); | |
| 371 | + tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0'); | |
| 127 | 372 | if( n ){ |
| 128 | 373 | gzip_step(blob_buffer(pContent), n); |
| 129 | 374 | lastPage = n % 512; |
| 130 | 375 | if( lastPage!=0 ){ |
| 131 | 376 | gzip_step(tball.zSpaces, 512 - lastPage); |
| @@ -142,10 +387,14 @@ | ||
| 142 | 387 | gzip_step(tball.zSpaces, 512); |
| 143 | 388 | gzip_step(tball.zSpaces, 512); |
| 144 | 389 | gzip_finish(pOut); |
| 145 | 390 | fossil_free(tball.aHdr); |
| 146 | 391 | tball.aHdr = 0; |
| 392 | + fossil_free(tball.zPrevDir); | |
| 393 | + tball.zPrevDir = NULL; | |
| 394 | + tball.nPrevDirAlloc = 0; | |
| 395 | + blob_reset(&tball.pax); | |
| 147 | 396 | } |
| 148 | 397 | |
| 149 | 398 | |
| 150 | 399 | /* |
| 151 | 400 | ** COMMAND: test-tarball |
| 152 | 401 |
| --- src/tar.c | |
| +++ src/tar.c | |
| @@ -27,31 +27,253 @@ | |
| 27 | */ |
| 28 | static struct tarball_t { |
| 29 | unsigned char *aHdr; /* Space for building headers */ |
| 30 | char *zSpaces; /* Spaces for padding */ |
| 31 | char *zPrevDir; /* Name of directory for previous entry */ |
| 32 | } tball; |
| 33 | |
| 34 | /* |
| 35 | ** Begin the process of generating a tarball. |
| 36 | ** |
| 37 | ** Initialize the GZIP compressor and the table of directory names. |
| 38 | */ |
| 39 | static void tar_begin(void){ |
| 40 | assert( tball.aHdr==0 ); |
| 41 | tball.aHdr = fossil_malloc(512+512+256); |
| 42 | memset(tball.aHdr, 0, 512+512+256); |
| 43 | tball.zSpaces = (char*)&tball.aHdr[512]; |
| 44 | tball.zPrevDir = (char*)&tball.zSpaces[512]; |
| 45 | memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ |
| 46 | memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ |
| 47 | memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */ |
| 48 | gzip_begin(); |
| 49 | db_multi_exec( |
| 50 | "CREATE TEMP TABLE dir(name UNIQUE);" |
| 51 | ); |
| 52 | } |
| 53 | |
| 54 | /* |
| 55 | ** Build a header for a file or directory and write that header |
| 56 | ** into the growing tarball. |
| 57 | */ |
| @@ -59,33 +281,49 @@ | |
| 59 | const char *zName, /* Name of the object */ |
| 60 | int nName, /* Number of characters in zName */ |
| 61 | int iMode, /* Mode. 0644 or 0755 */ |
| 62 | unsigned int mTime, /* File modification time */ |
| 63 | int iSize, /* Size of the object in bytes */ |
| 64 | int iType /* Type of object. 0==file. 5==directory */ |
| 65 | ){ |
| 66 | unsigned int cksum = 0; |
| 67 | int i; |
| 68 | if( nName>100 ){ |
| 69 | memcpy(&tball.aHdr[345], zName, nName-100); |
| 70 | memcpy(tball.aHdr, &zName[nName-100], 100); |
| 71 | memset(&tball.aHdr[245+nName], 0, 267-nName); |
| 72 | }else{ |
| 73 | memcpy(tball.aHdr, zName, nName); |
| 74 | memset(&tball.aHdr[nName], 0, 100-nName); |
| 75 | memset(&tball.aHdr[345], 0, 167); |
| 76 | } |
| 77 | sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); |
| 78 | sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); |
| 79 | sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); |
| 80 | memset(&tball.aHdr[148], ' ', 8); |
| 81 | tball.aHdr[156] = iType + '0'; |
| 82 | for(i=0; i<512; i++) cksum += tball.aHdr[i]; |
| 83 | sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum); |
| 84 | tball.aHdr[154] = 0; |
| 85 | gzip_step((char*)tball.aHdr, 512); |
| 86 | } |
| 87 | |
| 88 | /* |
| 89 | ** Recursively add an directory entry for the given file if those |
| 90 | ** directories have not previously been seen. |
| 91 | */ |
| @@ -95,18 +333,27 @@ | |
| 95 | unsigned int mTime /* Modification time */ |
| 96 | ){ |
| 97 | int i; |
| 98 | for(i=nName-1; i>0 && zName[i]!='/'; i--){} |
| 99 | if( i<=0 ) return; |
| 100 | if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return; |
| 101 | db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); |
| 102 | if( sqlite3_changes(g.db)==0 ) return; |
| 103 | tar_add_directory_of(zName, i-1, mTime); |
| 104 | tar_add_header(zName, i, 0755, mTime, 0, 5); |
| 105 | memcpy(tball.zPrevDir, zName, i); |
| 106 | tball.zPrevDir[i] = 0; |
| 107 | } |
| 108 | |
| 109 | /* |
| 110 | ** Add a single file to the growing tarball. |
| 111 | */ |
| 112 | static void tar_add_file( |
| @@ -117,15 +364,13 @@ | |
| 117 | ){ |
| 118 | int nName = strlen(zName); |
| 119 | int n = blob_size(pContent); |
| 120 | int lastPage; |
| 121 | |
| 122 | if( nName>=250 ){ |
| 123 | fossil_fatal("name too long for ustar format: \"%s\"", zName); |
| 124 | } |
| 125 | tar_add_directory_of(zName, nName, mTime); |
| 126 | tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0); |
| 127 | if( n ){ |
| 128 | gzip_step(blob_buffer(pContent), n); |
| 129 | lastPage = n % 512; |
| 130 | if( lastPage!=0 ){ |
| 131 | gzip_step(tball.zSpaces, 512 - lastPage); |
| @@ -142,10 +387,14 @@ | |
| 142 | gzip_step(tball.zSpaces, 512); |
| 143 | gzip_step(tball.zSpaces, 512); |
| 144 | gzip_finish(pOut); |
| 145 | fossil_free(tball.aHdr); |
| 146 | tball.aHdr = 0; |
| 147 | } |
| 148 | |
| 149 | |
| 150 | /* |
| 151 | ** COMMAND: test-tarball |
| 152 |
| --- src/tar.c | |
| +++ src/tar.c | |
| @@ -27,31 +27,253 @@ | |
| 27 | */ |
| 28 | static struct tarball_t { |
| 29 | unsigned char *aHdr; /* Space for building headers */ |
| 30 | char *zSpaces; /* Spaces for padding */ |
| 31 | char *zPrevDir; /* Name of directory for previous entry */ |
| 32 | int nPrevDirAlloc; /* size of zPrevDir */ |
| 33 | Blob pax; /* PAX data */ |
| 34 | } tball; |
| 35 | |
| 36 | |
| 37 | /* |
| 38 | ** field lengths of 'ustar' name and prefix fields. |
| 39 | */ |
| 40 | #define USTAR_NAME_LEN 100 |
| 41 | #define USTAR_PREFIX_LEN 155 |
| 42 | |
| 43 | |
| 44 | /* |
| 45 | ** Begin the process of generating a tarball. |
| 46 | ** |
| 47 | ** Initialize the GZIP compressor and the table of directory names. |
| 48 | */ |
| 49 | static void tar_begin(void){ |
| 50 | assert( tball.aHdr==0 ); |
| 51 | tball.aHdr = fossil_malloc(512+512); |
| 52 | memset(tball.aHdr, 0, 512+512); |
| 53 | tball.zSpaces = (char*)&tball.aHdr[512]; |
| 54 | /* zPrevDir init */ |
| 55 | tball.zPrevDir = NULL; |
| 56 | tball.nPrevDirAlloc = 0; |
| 57 | /* scratch buffer init */ |
| 58 | blob_zero(&tball.pax); |
| 59 | |
| 60 | memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */ |
| 61 | memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */ |
| 62 | memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */ |
| 63 | memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */ |
| 64 | memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */ |
| 65 | gzip_begin(); |
| 66 | db_multi_exec( |
| 67 | "CREATE TEMP TABLE dir(name UNIQUE);" |
| 68 | ); |
| 69 | } |
| 70 | |
| 71 | |
| 72 | /* |
| 73 | ** verify that lla characters in 'zName' are in the |
| 74 | ** ISO646 (=ASCII) character set. |
| 75 | */ |
| 76 | static int is_iso646_name( |
| 77 | const char *zName, /* file path */ |
| 78 | int nName /* path length */ |
| 79 | ){ |
| 80 | int i; |
| 81 | for(i = 0; i < nName; i++){ |
| 82 | unsigned char c = (unsigned char)zName[i]; |
| 83 | if( c>0x7e ) return 0; |
| 84 | } |
| 85 | return 1; |
| 86 | } |
| 87 | |
| 88 | |
| 89 | /* |
| 90 | ** copy string pSrc into pDst, truncating or padding with 0 if necessary |
| 91 | */ |
| 92 | static void padded_copy( |
| 93 | char *pDest, |
| 94 | int nDest, |
| 95 | const char *pSrc, |
| 96 | int nSrc |
| 97 | ){ |
| 98 | if(nSrc >= nDest){ |
| 99 | memcpy(pDest, pSrc, nDest); |
| 100 | }else{ |
| 101 | memcpy(pDest, pSrc, nSrc); |
| 102 | memset(&pDest[nSrc], 0, nDest - nSrc); |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | |
| 107 | |
| 108 | /****************************************************************************** |
| 109 | ** |
| 110 | ** The 'tar' format has evolved over time. Initially the name was stored |
| 111 | ** in a 100 byte null-terminated field 'name'. File path names were |
| 112 | ** limited to 99 bytes. |
| 113 | ** |
| 114 | ** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing |
| 115 | ** for up to 255 characters to be stored. The full file path is formed by |
| 116 | ** concatenating the field 'prefix', a slash, and the field 'name'. This |
| 117 | ** gives some measure of compatibility with programs that only understand |
| 118 | ** the oldest format. |
| 119 | ** |
| 120 | ** The latest Posix extension is called the 'pax Interchange Format'. |
| 121 | ** It removes all the limitations of the previous two formats by allowing |
| 122 | ** the storage of arbitrary-length attributes in a separate object that looks |
| 123 | ** like a file to programs that do not understand this extension. So the |
| 124 | ** contents of the 'name' and 'prefix' fields should contain values that allow |
| 125 | ** versions of tar that do not understand this extension to still do |
| 126 | ** something useful. |
| 127 | ** |
| 128 | ******************************************************************************/ |
| 129 | |
| 130 | /* |
| 131 | ** The position we use to split a file path into the 'name' and 'prefix' |
| 132 | ** fields needs to meet the following criteria: |
| 133 | ** |
| 134 | ** - not at the beginning or end of the string |
| 135 | ** - the position must contain a slash |
| 136 | ** - no more than 100 characters follow the slash |
| 137 | ** - no more than 155 characters precede it |
| 138 | ** |
| 139 | ** The routine 'find_split_pos' finds a split position. It will meet the |
| 140 | ** criteria of listed above if such a position exists. If no such |
| 141 | ** position exists it generates one that useful for generating the |
| 142 | ** values used for backward compatibility. |
| 143 | */ |
| 144 | static int find_split_pos( |
| 145 | const char *zName, /* file path */ |
| 146 | int nName /* path length */ |
| 147 | ){ |
| 148 | int i, split = 0; |
| 149 | /* only search if the string needs splitting */ |
| 150 | if(nName > USTAR_NAME_LEN){ |
| 151 | for(i = 1; i+1 < nName; i++) |
| 152 | if(zName[i] == '/'){ |
| 153 | split = i+1; |
| 154 | /* if the split position is within USTAR_NAME_LEN bytes from |
| 155 | * the end we can quit */ |
| 156 | if(nName - split <= USTAR_NAME_LEN) break; |
| 157 | } |
| 158 | } |
| 159 | return split; |
| 160 | } |
| 161 | |
| 162 | |
| 163 | /* |
| 164 | ** attempt to split the file name path to meet 'ustar' header |
| 165 | ** criteria. |
| 166 | */ |
| 167 | static int tar_split_path( |
| 168 | const char *zName, /* path */ |
| 169 | int nName, /* path length */ |
| 170 | char *pName, /* name field */ |
| 171 | char *pPrefix /* prefix field */ |
| 172 | ){ |
| 173 | int split = find_split_pos(zName, nName); |
| 174 | /* check whether both pieces fit */ |
| 175 | if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1){ |
| 176 | return 0; /* no */ |
| 177 | } |
| 178 | |
| 179 | /* extract name */ |
| 180 | padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); |
| 181 | |
| 182 | /* extract prefix */ |
| 183 | padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0)); |
| 184 | |
| 185 | return 1; /* success */ |
| 186 | } |
| 187 | |
| 188 | |
| 189 | /* |
| 190 | ** When using an extension header we still need to put something |
| 191 | ** reasonable in the name and prefix fields. This is probably as |
| 192 | ** good as it gets. |
| 193 | */ |
| 194 | static void approximate_split_path( |
| 195 | const char *zName, /* path */ |
| 196 | int nName, /* path length */ |
| 197 | char *pName, /* name field */ |
| 198 | char *pPrefix, /* prefix field */ |
| 199 | int bHeader /* is this a 'x' type tar header? */ |
| 200 | ){ |
| 201 | int split; |
| 202 | |
| 203 | /* if this is a Pax Interchange header prepend "PaxHeader/" |
| 204 | ** so we can tell files apart from metadata */ |
| 205 | if( bHeader ){ |
| 206 | int n; |
| 207 | blob_reset(&tball.pax); |
| 208 | blob_appendf(&tball.pax, "PaxHeader/%*.*s", nName, nName, zName); |
| 209 | zName = blob_buffer(&tball.pax); |
| 210 | nName = blob_size(&tball.pax); |
| 211 | } |
| 212 | |
| 213 | /* find the split position */ |
| 214 | split = find_split_pos(zName, nName); |
| 215 | |
| 216 | /* extract a name, truncate if needed */ |
| 217 | padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split); |
| 218 | |
| 219 | /* extract a prefix field, truncate when needed */ |
| 220 | padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0)); |
| 221 | } |
| 222 | |
| 223 | |
| 224 | /* |
| 225 | ** add a Pax Interchange header to the scratch buffer |
| 226 | ** |
| 227 | ** format: <length> <key>=<value>\n |
| 228 | ** the tricky part is that each header contains its own |
| 229 | ** size in decimal, counting that length. |
| 230 | */ |
| 231 | static void add_pax_header( |
| 232 | const char *zField, |
| 233 | const char *zValue, |
| 234 | int nValue |
| 235 | ){ |
| 236 | /* calculate length without length field */ |
| 237 | int blen = strlen(zField) + nValue + 3; |
| 238 | /* calculate the length of the length field */ |
| 239 | int next10 = 1; |
| 240 | int n; |
| 241 | for(n = blen; n > 0; ){ |
| 242 | blen++; next10 *= 10; |
| 243 | n /= 10; |
| 244 | } |
| 245 | /* adding the length extended the length field? */ |
| 246 | if(blen > next10){ |
| 247 | blen++; |
| 248 | } |
| 249 | /* build the string */ |
| 250 | blob_appendf(&tball.pax, "%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue); |
| 251 | /* this _must_ be right */ |
| 252 | if(blob_size(&tball.pax) != blen){ |
| 253 | fossil_fatal("internal error: PAX tar header has bad length"); |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | |
| 258 | /* |
| 259 | ** set the header type, calculate the checksum and output |
| 260 | ** the header |
| 261 | */ |
| 262 | static void cksum_and_write_header( |
| 263 | char cType |
| 264 | ){ |
| 265 | unsigned int cksum = 0; |
| 266 | int i; |
| 267 | memset(&tball.aHdr[148], ' ', 8); |
| 268 | tball.aHdr[156] = cType; |
| 269 | for(i=0; i<512; i++) cksum += tball.aHdr[i]; |
| 270 | sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum); |
| 271 | tball.aHdr[155] = 0; |
| 272 | gzip_step((char*)tball.aHdr, 512); |
| 273 | } |
| 274 | |
| 275 | |
| 276 | /* |
| 277 | ** Build a header for a file or directory and write that header |
| 278 | ** into the growing tarball. |
| 279 | */ |
| @@ -59,33 +281,49 @@ | |
| 281 | const char *zName, /* Name of the object */ |
| 282 | int nName, /* Number of characters in zName */ |
| 283 | int iMode, /* Mode. 0644 or 0755 */ |
| 284 | unsigned int mTime, /* File modification time */ |
| 285 | int iSize, /* Size of the object in bytes */ |
| 286 | char cType /* Type of object. '0'==file. '5'==directory */ |
| 287 | ){ |
| 288 | /* set mode and modification time */ |
| 289 | sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode); |
| 290 | sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime); |
| 291 | |
| 292 | /* see if we need to output a Pax Interchange Header */ |
| 293 | if( !is_iso646_name(zName, nName) || |
| 294 | !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){ |
| 295 | int lastPage; |
| 296 | /* add a file name for interoperability with older programs */ |
| 297 | approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1); |
| 298 | |
| 299 | /* generate the Pax Interchange path header */ |
| 300 | blob_reset(&tball.pax); |
| 301 | add_pax_header("path", zName, nName); |
| 302 | |
| 303 | /* set the header length, and write the header */ |
| 304 | sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", |
| 305 | blob_size(&tball.pax)); |
| 306 | cksum_and_write_header('x'); |
| 307 | |
| 308 | /* write the Pax Interchange data */ |
| 309 | gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax)); |
| 310 | lastPage = blob_size(&tball.pax) % 512; |
| 311 | if( lastPage!=0 ){ |
| 312 | gzip_step(tball.zSpaces, 512 - lastPage); |
| 313 | } |
| 314 | |
| 315 | /* generate an approximate path for the regular header */ |
| 316 | approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0); |
| 317 | } |
| 318 | /* set the size */ |
| 319 | sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize); |
| 320 | |
| 321 | /* write the regular header */ |
| 322 | cksum_and_write_header(cType); |
| 323 | } |
| 324 | |
| 325 | |
| 326 | /* |
| 327 | ** Recursively add an directory entry for the given file if those |
| 328 | ** directories have not previously been seen. |
| 329 | */ |
| @@ -95,18 +333,27 @@ | |
| 333 | unsigned int mTime /* Modification time */ |
| 334 | ){ |
| 335 | int i; |
| 336 | for(i=nName-1; i>0 && zName[i]!='/'; i--){} |
| 337 | if( i<=0 ) return; |
| 338 | if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 && |
| 339 | memcmp(tball.zPrevDir, zName, i)==0 ) return; |
| 340 | db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName); |
| 341 | if( sqlite3_changes(g.db)==0 ) return; |
| 342 | tar_add_directory_of(zName, i-1, mTime); |
| 343 | tar_add_header(zName, i, 0755, mTime, 0, '5'); |
| 344 | if( i >= tball.nPrevDirAlloc ){ |
| 345 | int nsize = tball.nPrevDirAlloc * 2; |
| 346 | if(i+1 > nsize) |
| 347 | nsize = i+1; |
| 348 | tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize); |
| 349 | tball.nPrevDirAlloc = nsize; |
| 350 | } |
| 351 | memcpy(tball.zPrevDir, zName, i); |
| 352 | tball.zPrevDir[i] = 0; |
| 353 | } |
| 354 | |
| 355 | |
| 356 | /* |
| 357 | ** Add a single file to the growing tarball. |
| 358 | */ |
| 359 | static void tar_add_file( |
| @@ -117,15 +364,13 @@ | |
| 364 | ){ |
| 365 | int nName = strlen(zName); |
| 366 | int n = blob_size(pContent); |
| 367 | int lastPage; |
| 368 | |
| 369 | /* length check moved to tar_split_path */ |
| 370 | tar_add_directory_of(zName, nName, mTime); |
| 371 | tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0'); |
| 372 | if( n ){ |
| 373 | gzip_step(blob_buffer(pContent), n); |
| 374 | lastPage = n % 512; |
| 375 | if( lastPage!=0 ){ |
| 376 | gzip_step(tball.zSpaces, 512 - lastPage); |
| @@ -142,10 +387,14 @@ | |
| 387 | gzip_step(tball.zSpaces, 512); |
| 388 | gzip_step(tball.zSpaces, 512); |
| 389 | gzip_finish(pOut); |
| 390 | fossil_free(tball.aHdr); |
| 391 | tball.aHdr = 0; |
| 392 | fossil_free(tball.zPrevDir); |
| 393 | tball.zPrevDir = NULL; |
| 394 | tball.nPrevDirAlloc = 0; |
| 395 | blob_reset(&tball.pax); |
| 396 | } |
| 397 | |
| 398 | |
| 399 | /* |
| 400 | ** COMMAND: test-tarball |
| 401 |