Fossil SCM

Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8.

ge 2011-07-24 00:36 UTC trunk

Commit 2ef37b3b2a0c2dd48f4f902644fdb85175eae86a

Parent ba15af450d33b2f…

1 file changed +322 -29

M src/tar.c

+322 -29

		--- src/tar.c
		+++ src/tar.c
		@@ -27,31 +27,296 @@
27	27	*/
28	28	static struct tarball_t {
29	29	unsigned char aHdr; / Space for building headers */
30	30	char zSpaces; / Spaces for padding */
31	31	char zPrevDir; / Name of directory for previous entry */
	32	+ int nPrevDirAlloc; /* size of zPrevDir */
	33	+ char pScratch; / scratch buffer used to build PAX data */
	34	+ int nScratchUsed; /* part of buffer containing data */
	35	+ int nScratchAlloc; /* size of buffer */
32	36	} tball;
	37	+
	38	+
	39	+/*
	40	+** field lengths of 'ustar' name and prefix fields.
	41	+*/
	42	+#define USTAR_NAME_LEN 100
	43	+#define USTAR_PREFIX_LEN 155
	44	+
33	45
34	46	/*
35	47	** Begin the process of generating a tarball.
36	48	**
37	49	** Initialize the GZIP compressor and the table of directory names.
38	50	*/
39	51	static void tar_begin(void){
40	52	assert( tball.aHdr==0 );
41		- tball.aHdr = fossil_malloc(512+512+256);
42		- memset(tball.aHdr, 0, 512+512+256);
	53	+ tball.aHdr = fossil_malloc(512+512);
	54	+ memset(tball.aHdr, 0, 512+512);
43	55	tball.zSpaces = (char*)&tball.aHdr[512];
44		- tball.zPrevDir = (char*)&tball.zSpaces[512];
	56	+ /* zPrevDir init */
	57	+ tball.zPrevDir = NULL;
	58	+ tball.nPrevDirAlloc = 0;
	59	+ /* scratch buffer init */
	60	+ tball.pScratch = NULL;
	61	+ tball.nScratchUsed = 0;
	62	+ tball.nScratchAlloc = 0;
	63	+
45	64	memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
46	65	memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47		- memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */
	66	+ memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
	67	+ memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
	68	+ memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
48	69	gzip_begin();
49	70	db_multi_exec(
50	71	"CREATE TEMP TABLE dir(name UNIQUE);"
51	72	);
52	73	}
	74	+
	75	+
	76	+/*
	77	+** print to the scratch buffer
	78	+**
	79	+** used to build the Pax Interchange Format data, and create
	80	+** pseudo-file names for the header data.
	81	+**
	82	+** The buffer is grown automatically to accommodate the data.
	83	+*/
	84	+static int scratch_printf(
	85	+ const char *fmt,
	86	+ ...
	87	+){
	88	+ for(;;){
	89	+ int newSize, minSpace, n;
	90	+ /* calculate space in buffer */
	91	+ int space = tball.nScratchAlloc - tball.nScratchUsed;
	92	+ /* format the string */
	93	+ va_list vl;
	94	+ va_start(vl, fmt);
	95	+ n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl);
	96	+ assert(n >= 0);
	97	+ va_end(vl);
	98	+ /* if it fit we're done */
	99	+ if(n < space)
	100	+ return n;
	101	+ /* buffer too short: calculate reasonable new size */
	102	+ minSpace = tball.nScratchUsed+n+1;
	103	+ newSize = 2 * tball.nScratchAlloc;
	104	+ if(newSize < minSpace)
	105	+ newSize = minSpace;
	106	+ /* grow the buffer */
	107	+ tball.pScratch = fossil_realloc(tball.pScratch, newSize);
	108	+ tball.nScratchAlloc = newSize;
	109	+ /* loop to try again */
	110	+ }
	111	+}
	112	+
	113	+
	114	+/*
	115	+** verify that lla characters in 'zName' are in the
	116	+** ISO646 (=ASCII) character set.
	117	+*/
	118	+static int is_iso646_name(
	119	+ const char zName, / file path */
	120	+ int nName /* path length */
	121	+){
	122	+ int i;
	123	+ for(i = 0; i < nName; i++){
	124	+ unsigned char c = (unsigned char)zName[i];
	125	+ if(c > 0x7e)
	126	+ return 0;
	127	+ }
	128	+ return 1;
	129	+}
	130	+
	131	+
	132	+/*
	133	+** copy string pSrc into pDst, truncating or padding with 0 if necessary
	134	+*/
	135	+static void padded_copy(
	136	+ char *pDest,
	137	+ int nDest,
	138	+ const char *pSrc,
	139	+ int nSrc
	140	+){
	141	+ if(nSrc >= nDest){
	142	+ memcpy(pDest, pSrc, nDest);
	143	+ }else{
	144	+ memcpy(pDest, pSrc, nSrc);
	145	+ memset(&pDest[nSrc], 0, nDest - nSrc);
	146	+ }
	147	+}
	148	+
	149	+
	150	+
	151	+/******************************************************************************
	152	+**
	153	+** The 'tar' format has evolved over time. Initially the name was stored
	154	+** in a 100 byte null-terminated field 'name'. File path names were
	155	+** limited to 99 bytes.
	156	+**
	157	+** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
	158	+** for up to 255 characters to be stored. The full file path is formed by
	159	+** concatenating the field 'prefix', a slash, and the field 'name'. This
	160	+** gives some measure of compatibility with programs that only understand
	161	+** the oldest format.
	162	+**
	163	+** The latest Posix extension is called the 'pax Interchange Format'.
	164	+** It removes all the limitations of the previous two formats by allowing
	165	+** the storage of arbitrary-length attributes in a separate object that looks
	166	+** like a file to programs that do not understand this extension. So the
	167	+** contents of the 'name' and 'prefix' fields should contain values that allow
	168	+** versions of tar that do not understand this extension to still do
	169	+** something useful.
	170	+**
	171	+******************************************************************************/
	172	+
	173	+/*
	174	+** The position we use to split a file path into the 'name' and 'prefix'
	175	+** fields needs to meet the following criteria:
	176	+**
	177	+** - not at the beginning or end of the string
	178	+** - the position must contain a slash
	179	+** - no more than 100 characters follow the slash
	180	+** - no more than 155 characters precede it
	181	+**
	182	+** The routine 'find_split_pos' finds a split position. It will meet the
	183	+** criteria of listed above if such a position exists. If no such
	184	+** position exists it generates one that useful for generating the
	185	+** values used for backward compatibility.
	186	+*/
	187	+static int find_split_pos(
	188	+ const char zName, / file path */
	189	+ int nName /* path length */
	190	+){
	191	+ int i, split = 0;
	192	+ /* only search if the string needs splitting */
	193	+ if(nName > USTAR_NAME_LEN){
	194	+ for(i = 1; i+1 < nName; i++)
	195	+ if(zName[i] == '/'){
	196	+ split = i+1;
	197	+ /* if the split position is within USTAR_NAME_LEN bytes from
	198	+ * the end we can quit */
	199	+ if(nName - split <= USTAR_NAME_LEN)
	200	+ break;
	201	+ }
	202	+ }
	203	+ return split;
	204	+}
	205	+
	206	+
	207	+/*
	208	+** attempt to split the file name path to meet 'ustar' header
	209	+** criteria.
	210	+*/
	211	+static int tar_split_path(
	212	+ const char zName, / path */
	213	+ int nName, /* path length */
	214	+ char pName, / name field */
	215	+ char pPrefix / prefix field */
	216	+){
	217	+ int split = find_split_pos(zName, nName);
	218	+ /* check whether both pieces fit */
	219	+ if(nName - split > USTAR_NAME_LEN \|\| split > USTAR_PREFIX_LEN+1)
	220	+ return 0; /* no */
	221	+
	222	+ /* extract name */
	223	+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
	224	+
	225	+ /* extract prefix */
	226	+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
	227	+
	228	+ return 1; /* success */
	229	+}
	230	+
	231	+
	232	+/*
	233	+** When using an extension header we still need to put something
	234	+** reasonable in the name and prefix fields. This is probably as
	235	+** good as it gets.
	236	+*/
	237	+static void approximate_split_path(
	238	+ const char zName, / path */
	239	+ int nName, /* path length */
	240	+ char pName, / name field */
	241	+ char pPrefix, / prefix field */
	242	+ int bHeader /* is this a 'x' type tar header? */
	243	+){
	244	+ int split;
	245	+
	246	+ /* if this is a Pax Interchange header prepend "PaxHeader/"
	247	+ * so we can tell files apart from metadata */
	248	+ if(bHeader){
	249	+ int n;
	250	+ tball.nScratchUsed = 0;
	251	+ n = scratch_printf("PaxHeader/%.s", nName, nName, zName);
	252	+ zName = tball.pScratch;
	253	+ nName = n;
	254	+ }
	255	+
	256	+ /* find the split position */
	257	+ split = find_split_pos(zName, nName);
	258	+
	259	+ /* extract a name, truncate if needed */
	260	+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
	261	+
	262	+ /* extract a prefix field, truncate when needed */
	263	+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
	264	+}
	265	+
	266	+
	267	+/*
	268	+** add a Pax Interchange header to the scratch buffer
	269	+**
	270	+** format: <length> <key>=<value>\n
	271	+** the tricky part is that each header contains its own
	272	+** size in decimal, counting that length.
	273	+*/
	274	+static void add_pax_header(
	275	+ const char *zField,
	276	+ const char *zValue,
	277	+ int nValue
	278	+){
	279	+ /* calculate length without length field */
	280	+ int blen = strlen(zField) + nValue + 3;
	281	+ /* calculate the length of the length field */
	282	+ int next10 = 1;
	283	+ int n;
	284	+ for(n = blen; n > 0; ){
	285	+ blen++; next10 *= 10;
	286	+ n /= 10;
	287	+ }
	288	+ /* adding the length extended the length field? */
	289	+ if(blen > next10)
	290	+ blen++;
	291	+ /* build the string */
	292	+ n = scratch_printf("%d %s=%.s\n", blen, zField, nValue, nValue, zValue);
	293	+ /* this _must_ be right */
	294	+ if(n != blen)
	295	+ fossil_fatal("internal error: PAX tar header has bad length");
	296	+ /* add length to scratch buffer */
	297	+ tball.nScratchUsed += blen;
	298	+}
	299	+
	300	+
	301	+/*
	302	+** set the header type, calculate the checksum and output
	303	+** the header
	304	+*/
	305	+static void cksum_and_write_header(
	306	+ char cType
	307	+){
	308	+ unsigned int cksum = 0;
	309	+ int i;
	310	+ memset(&tball.aHdr[148], ' ', 8);
	311	+ tball.aHdr[156] = cType;
	312	+ for(i=0; i<512; i++) cksum += tball.aHdr[i];
	313	+ sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
	314	+ tball.aHdr[155] = 0;
	315	+ gzip_step((char*)tball.aHdr, 512);
	316	+}
	317	+
53	318
54	319	/*
55	320	** Build a header for a file or directory and write that header
56	321	** into the growing tarball.
57	322	*/
		@@ -59,33 +324,47 @@
59	324	const char zName, / Name of the object */
60	325	int nName, /* Number of characters in zName */
61	326	int iMode, /* Mode. 0644 or 0755 */
62	327	unsigned int mTime, /* File modification time */
63	328	int iSize, /* Size of the object in bytes */
64		- int iType /* Type of object. 0==file. 5==directory */
	329	+ char cType /* Type of object. '0'==file. '5'==directory */
65	330	){
66		- unsigned int cksum = 0;
67		- int i;
68		- if( nName>100 ){
69		- memcpy(&tball.aHdr[345], zName, nName-100);
70		- memcpy(tball.aHdr, &zName[nName-100], 100);
71		- memset(&tball.aHdr[245+nName], 0, 267-nName);
72		- }else{
73		- memcpy(tball.aHdr, zName, nName);
74		- memset(&tball.aHdr[nName], 0, 100-nName);
75		- memset(&tball.aHdr[345], 0, 167);
76		- }
	331	+ /* set mode and modification time */
77	332	sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78		- sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79	333	sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80		- memset(&tball.aHdr[148], ' ', 8);
81		- tball.aHdr[156] = iType + '0';
82		- for(i=0; i<512; i++) cksum += tball.aHdr[i];
83		- sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84		- tball.aHdr[154] = 0;
85		- gzip_step((char*)tball.aHdr, 512);
	334	+
	335	+ /* see if we need to output a Pax Interchange Header */
	336	+ if( !is_iso646_name(zName, nName) \|\|
	337	+ !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
	338	+ int lastPage;
	339	+ /* add a file name for interoperability with older programs */
	340	+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
	341	+
	342	+ /* generate the Pax Interchange path header */
	343	+ tball.nScratchUsed = 0;
	344	+ add_pax_header("path", zName, nName);
	345	+
	346	+ /* set the header length, and write the header */
	347	+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed);
	348	+ cksum_and_write_header('x');
	349	+
	350	+ /* write the Pax Interchange data */
	351	+ gzip_step(tball.pScratch, tball.nScratchUsed);
	352	+ lastPage = tball.nScratchUsed % 512;
	353	+ if( lastPage!=0 )
	354	+ gzip_step(tball.zSpaces, 512 - lastPage);
	355	+
	356	+ /* generate an approximate path for the regular header */
	357	+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
	358	+ }
	359	+ /* set the size */
	360	+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
	361	+
	362	+ /* write the regular header */
	363	+ cksum_and_write_header(cType);
86	364	}
	365	+
87	366
88	367	/*
89	368	** Recursively add an directory entry for the given file if those
90	369	** directories have not previously been seen.
91	370	*/
		@@ -95,18 +374,27 @@
95	374	unsigned int mTime /* Modification time */
96	375	){
97	376	int i;
98	377	for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99	378	if( i<=0 ) return;
100		- if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
	379	+ if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
	380	+ memcmp(tball.zPrevDir, zName, i)==0 ) return;
101	381	db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102	382	if( sqlite3_changes(g.db)==0 ) return;
103	383	tar_add_directory_of(zName, i-1, mTime);
104		- tar_add_header(zName, i, 0755, mTime, 0, 5);
	384	+ tar_add_header(zName, i, 0755, mTime, 0, '5');
	385	+ if( i >= tball.nPrevDirAlloc ){
	386	+ int nsize = tball.nPrevDirAlloc * 2;
	387	+ if(i+1 > nsize)
	388	+ nsize = i+1;
	389	+ tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
	390	+ tball.nPrevDirAlloc = nsize;
	391	+ }
105	392	memcpy(tball.zPrevDir, zName, i);
106	393	tball.zPrevDir[i] = 0;
107	394	}
	395	+
108	396
109	397	/*
110	398	** Add a single file to the growing tarball.
111	399	*/
112	400	static void tar_add_file(
		@@ -117,15 +405,13 @@
117	405	){
118	406	int nName = strlen(zName);
119	407	int n = blob_size(pContent);
120	408	int lastPage;
121	409
122		- if( nName>=250 ){
123		- fossil_fatal("name too long for ustar format: \"%s\"", zName);
124		- }
	410	+ /* length check moved to tar_split_path */
125	411	tar_add_directory_of(zName, nName, mTime);
126		- tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
	412	+ tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
127	413	if( n ){
128	414	gzip_step(blob_buffer(pContent), n);
129	415	lastPage = n % 512;
130	416	if( lastPage!=0 ){
131	417	gzip_step(tball.zSpaces, 512 - lastPage);
		@@ -142,10 +428,17 @@
142	428	gzip_step(tball.zSpaces, 512);
143	429	gzip_step(tball.zSpaces, 512);
144	430	gzip_finish(pOut);
145	431	fossil_free(tball.aHdr);
146	432	tball.aHdr = 0;
	433	+ fossil_free(tball.zPrevDir);
	434	+ tball.zPrevDir = NULL;
	435	+ tball.nPrevDirAlloc = 0;
	436	+ fossil_free(tball.pScratch);
	437	+ tball.pScratch = NULL;
	438	+ tball.nScratchUsed = 0;
	439	+ tball.nScratchAlloc = 0;
147	440	}
148	441
149	442
150	443	/*
151	444	** COMMAND: test-tarball
152	445

	--- src/tar.c
	+++ src/tar.c
	@@ -27,31 +27,296 @@
27	*/
28	static struct tarball_t {
29	unsigned char aHdr; / Space for building headers */
30	char zSpaces; / Spaces for padding */
31	char zPrevDir; / Name of directory for previous entry */




32	} tball;








33
34	/*
35	** Begin the process of generating a tarball.
36	**
37	** Initialize the GZIP compressor and the table of directory names.
38	*/
39	static void tar_begin(void){
40	assert( tball.aHdr==0 );
41	tball.aHdr = fossil_malloc(512+512+256);
42	memset(tball.aHdr, 0, 512+512+256);
43	tball.zSpaces = (char*)&tball.aHdr[512];
44	tball.zPrevDir = (char*)&tball.zSpaces[512];







45	memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
46	memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47	memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */


48	gzip_begin();
49	db_multi_exec(
50	"CREATE TEMP TABLE dir(name UNIQUE);"
51	);
52	}




















































































































































































































































53
54	/*
55	** Build a header for a file or directory and write that header
56	** into the growing tarball.
57	*/
	@@ -59,33 +324,47 @@
59	const char zName, / Name of the object */
60	int nName, /* Number of characters in zName */
61	int iMode, /* Mode. 0644 or 0755 */
62	unsigned int mTime, /* File modification time */
63	int iSize, /* Size of the object in bytes */
64	int iType /* Type of object. 0==file. 5==directory */
65	){
66	unsigned int cksum = 0;
67	int i;
68	if( nName>100 ){
69	memcpy(&tball.aHdr[345], zName, nName-100);
70	memcpy(tball.aHdr, &zName[nName-100], 100);
71	memset(&tball.aHdr[245+nName], 0, 267-nName);
72	}else{
73	memcpy(tball.aHdr, zName, nName);
74	memset(&tball.aHdr[nName], 0, 100-nName);
75	memset(&tball.aHdr[345], 0, 167);
76	}
77	sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78	sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79	sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80	memset(&tball.aHdr[148], ' ', 8);
81	tball.aHdr[156] = iType + '0';
82	for(i=0; i<512; i++) cksum += tball.aHdr[i];
83	sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84	tball.aHdr[154] = 0;
85	gzip_step((char*)tball.aHdr, 512);
























86	}

87
88	/*
89	** Recursively add an directory entry for the given file if those
90	** directories have not previously been seen.
91	*/
	@@ -95,18 +374,27 @@
95	unsigned int mTime /* Modification time */
96	){
97	int i;
98	for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99	if( i<=0 ) return;
100	if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;

101	db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102	if( sqlite3_changes(g.db)==0 ) return;
103	tar_add_directory_of(zName, i-1, mTime);
104	tar_add_header(zName, i, 0755, mTime, 0, 5);







105	memcpy(tball.zPrevDir, zName, i);
106	tball.zPrevDir[i] = 0;
107	}

108
109	/*
110	** Add a single file to the growing tarball.
111	*/
112	static void tar_add_file(
	@@ -117,15 +405,13 @@
117	){
118	int nName = strlen(zName);
119	int n = blob_size(pContent);
120	int lastPage;
121
122	if( nName>=250 ){
123	fossil_fatal("name too long for ustar format: \"%s\"", zName);
124	}
125	tar_add_directory_of(zName, nName, mTime);
126	tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
127	if( n ){
128	gzip_step(blob_buffer(pContent), n);
129	lastPage = n % 512;
130	if( lastPage!=0 ){
131	gzip_step(tball.zSpaces, 512 - lastPage);
	@@ -142,10 +428,17 @@
142	gzip_step(tball.zSpaces, 512);
143	gzip_step(tball.zSpaces, 512);
144	gzip_finish(pOut);
145	fossil_free(tball.aHdr);
146	tball.aHdr = 0;







147	}
148
149
150	/*
151	** COMMAND: test-tarball
152

	--- src/tar.c
	+++ src/tar.c
	@@ -27,31 +27,296 @@
27	*/
28	static struct tarball_t {
29	unsigned char aHdr; / Space for building headers */
30	char zSpaces; / Spaces for padding */
31	char zPrevDir; / Name of directory for previous entry */
32	int nPrevDirAlloc; /* size of zPrevDir */
33	char pScratch; / scratch buffer used to build PAX data */
34	int nScratchUsed; /* part of buffer containing data */
35	int nScratchAlloc; /* size of buffer */
36	} tball;
37
38
39	/*
40	** field lengths of 'ustar' name and prefix fields.
41	*/
42	#define USTAR_NAME_LEN 100
43	#define USTAR_PREFIX_LEN 155
44
45
46	/*
47	** Begin the process of generating a tarball.
48	**
49	** Initialize the GZIP compressor and the table of directory names.
50	*/
51	static void tar_begin(void){
52	assert( tball.aHdr==0 );
53	tball.aHdr = fossil_malloc(512+512);
54	memset(tball.aHdr, 0, 512+512);
55	tball.zSpaces = (char*)&tball.aHdr[512];
56	/* zPrevDir init */
57	tball.zPrevDir = NULL;
58	tball.nPrevDirAlloc = 0;
59	/* scratch buffer init */
60	tball.pScratch = NULL;
61	tball.nScratchUsed = 0;
62	tball.nScratchAlloc = 0;
63
64	memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
65	memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
66	memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
67	memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
68	memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
69	gzip_begin();
70	db_multi_exec(
71	"CREATE TEMP TABLE dir(name UNIQUE);"
72	);
73	}
74
75
76	/*
77	** print to the scratch buffer
78	**
79	** used to build the Pax Interchange Format data, and create
80	** pseudo-file names for the header data.
81	**
82	** The buffer is grown automatically to accommodate the data.
83	*/
84	static int scratch_printf(
85	const char *fmt,
86	...
87	){
88	for(;;){
89	int newSize, minSpace, n;
90	/* calculate space in buffer */
91	int space = tball.nScratchAlloc - tball.nScratchUsed;
92	/* format the string */
93	va_list vl;
94	va_start(vl, fmt);
95	n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl);
96	assert(n >= 0);
97	va_end(vl);
98	/* if it fit we're done */
99	if(n < space)
100	return n;
101	/* buffer too short: calculate reasonable new size */
102	minSpace = tball.nScratchUsed+n+1;
103	newSize = 2 * tball.nScratchAlloc;
104	if(newSize < minSpace)
105	newSize = minSpace;
106	/* grow the buffer */
107	tball.pScratch = fossil_realloc(tball.pScratch, newSize);
108	tball.nScratchAlloc = newSize;
109	/* loop to try again */
110	}
111	}
112
113
114	/*
115	** verify that lla characters in 'zName' are in the
116	** ISO646 (=ASCII) character set.
117	*/
118	static int is_iso646_name(
119	const char zName, / file path */
120	int nName /* path length */
121	){
122	int i;
123	for(i = 0; i < nName; i++){
124	unsigned char c = (unsigned char)zName[i];
125	if(c > 0x7e)
126	return 0;
127	}
128	return 1;
129	}
130
131
132	/*
133	** copy string pSrc into pDst, truncating or padding with 0 if necessary
134	*/
135	static void padded_copy(
136	char *pDest,
137	int nDest,
138	const char *pSrc,
139	int nSrc
140	){
141	if(nSrc >= nDest){
142	memcpy(pDest, pSrc, nDest);
143	}else{
144	memcpy(pDest, pSrc, nSrc);
145	memset(&pDest[nSrc], 0, nDest - nSrc);
146	}
147	}
148
149
150
151	/******************************************************************************
152	**
153	** The 'tar' format has evolved over time. Initially the name was stored
154	** in a 100 byte null-terminated field 'name'. File path names were
155	** limited to 99 bytes.
156	**
157	** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
158	** for up to 255 characters to be stored. The full file path is formed by
159	** concatenating the field 'prefix', a slash, and the field 'name'. This
160	** gives some measure of compatibility with programs that only understand
161	** the oldest format.
162	**
163	** The latest Posix extension is called the 'pax Interchange Format'.
164	** It removes all the limitations of the previous two formats by allowing
165	** the storage of arbitrary-length attributes in a separate object that looks
166	** like a file to programs that do not understand this extension. So the
167	** contents of the 'name' and 'prefix' fields should contain values that allow
168	** versions of tar that do not understand this extension to still do
169	** something useful.
170	**
171	******************************************************************************/
172
173	/*
174	** The position we use to split a file path into the 'name' and 'prefix'
175	** fields needs to meet the following criteria:
176	**
177	** - not at the beginning or end of the string
178	** - the position must contain a slash
179	** - no more than 100 characters follow the slash
180	** - no more than 155 characters precede it
181	**
182	** The routine 'find_split_pos' finds a split position. It will meet the
183	** criteria of listed above if such a position exists. If no such
184	** position exists it generates one that useful for generating the
185	** values used for backward compatibility.
186	*/
187	static int find_split_pos(
188	const char zName, / file path */
189	int nName /* path length */
190	){
191	int i, split = 0;
192	/* only search if the string needs splitting */
193	if(nName > USTAR_NAME_LEN){
194	for(i = 1; i+1 < nName; i++)
195	if(zName[i] == '/'){
196	split = i+1;
197	/* if the split position is within USTAR_NAME_LEN bytes from
198	* the end we can quit */
199	if(nName - split <= USTAR_NAME_LEN)
200	break;
201	}
202	}
203	return split;
204	}
205
206
207	/*
208	** attempt to split the file name path to meet 'ustar' header
209	** criteria.
210	*/
211	static int tar_split_path(
212	const char zName, / path */
213	int nName, /* path length */
214	char pName, / name field */
215	char pPrefix / prefix field */
216	){
217	int split = find_split_pos(zName, nName);
218	/* check whether both pieces fit */
219	if(nName - split > USTAR_NAME_LEN \|\| split > USTAR_PREFIX_LEN+1)
220	return 0; /* no */
221
222	/* extract name */
223	padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
224
225	/* extract prefix */
226	padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
227
228	return 1; /* success */
229	}
230
231
232	/*
233	** When using an extension header we still need to put something
234	** reasonable in the name and prefix fields. This is probably as
235	** good as it gets.
236	*/
237	static void approximate_split_path(
238	const char zName, / path */
239	int nName, /* path length */
240	char pName, / name field */
241	char pPrefix, / prefix field */
242	int bHeader /* is this a 'x' type tar header? */
243	){
244	int split;
245
246	/* if this is a Pax Interchange header prepend "PaxHeader/"
247	* so we can tell files apart from metadata */
248	if(bHeader){
249	int n;
250	tball.nScratchUsed = 0;
251	n = scratch_printf("PaxHeader/%.s", nName, nName, zName);
252	zName = tball.pScratch;
253	nName = n;
254	}
255
256	/* find the split position */
257	split = find_split_pos(zName, nName);
258
259	/* extract a name, truncate if needed */
260	padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
261
262	/* extract a prefix field, truncate when needed */
263	padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
264	}
265
266
267	/*
268	** add a Pax Interchange header to the scratch buffer
269	**
270	** format: <length> <key>=<value>\n
271	** the tricky part is that each header contains its own
272	** size in decimal, counting that length.
273	*/
274	static void add_pax_header(
275	const char *zField,
276	const char *zValue,
277	int nValue
278	){
279	/* calculate length without length field */
280	int blen = strlen(zField) + nValue + 3;
281	/* calculate the length of the length field */
282	int next10 = 1;
283	int n;
284	for(n = blen; n > 0; ){
285	blen++; next10 *= 10;
286	n /= 10;
287	}
288	/* adding the length extended the length field? */
289	if(blen > next10)
290	blen++;
291	/* build the string */
292	n = scratch_printf("%d %s=%.s\n", blen, zField, nValue, nValue, zValue);
293	/* this _must_ be right */
294	if(n != blen)
295	fossil_fatal("internal error: PAX tar header has bad length");
296	/* add length to scratch buffer */
297	tball.nScratchUsed += blen;
298	}
299
300
301	/*
302	** set the header type, calculate the checksum and output
303	** the header
304	*/
305	static void cksum_and_write_header(
306	char cType
307	){
308	unsigned int cksum = 0;
309	int i;
310	memset(&tball.aHdr[148], ' ', 8);
311	tball.aHdr[156] = cType;
312	for(i=0; i<512; i++) cksum += tball.aHdr[i];
313	sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
314	tball.aHdr[155] = 0;
315	gzip_step((char*)tball.aHdr, 512);
316	}
317
318
319	/*
320	** Build a header for a file or directory and write that header
321	** into the growing tarball.
322	*/
	@@ -59,33 +324,47 @@
324	const char zName, / Name of the object */
325	int nName, /* Number of characters in zName */
326	int iMode, /* Mode. 0644 or 0755 */
327	unsigned int mTime, /* File modification time */
328	int iSize, /* Size of the object in bytes */
329	char cType /* Type of object. '0'==file. '5'==directory */
330	){
331	/* set mode and modification time */










332	sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);

333	sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
334
335	/* see if we need to output a Pax Interchange Header */
336	if( !is_iso646_name(zName, nName) \|\|
337	!tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
338	int lastPage;
339	/* add a file name for interoperability with older programs */
340	approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
341
342	/* generate the Pax Interchange path header */
343	tball.nScratchUsed = 0;
344	add_pax_header("path", zName, nName);
345
346	/* set the header length, and write the header */
347	sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed);
348	cksum_and_write_header('x');
349
350	/* write the Pax Interchange data */
351	gzip_step(tball.pScratch, tball.nScratchUsed);
352	lastPage = tball.nScratchUsed % 512;
353	if( lastPage!=0 )
354	gzip_step(tball.zSpaces, 512 - lastPage);
355
356	/* generate an approximate path for the regular header */
357	approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
358	}
359	/* set the size */
360	sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
361
362	/* write the regular header */
363	cksum_and_write_header(cType);
364	}
365
366
367	/*
368	** Recursively add an directory entry for the given file if those
369	** directories have not previously been seen.
370	*/
	@@ -95,18 +374,27 @@
374	unsigned int mTime /* Modification time */
375	){
376	int i;
377	for(i=nName-1; i>0 && zName[i]!='/'; i--){}
378	if( i<=0 ) return;
379	if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
380	memcmp(tball.zPrevDir, zName, i)==0 ) return;
381	db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
382	if( sqlite3_changes(g.db)==0 ) return;
383	tar_add_directory_of(zName, i-1, mTime);
384	tar_add_header(zName, i, 0755, mTime, 0, '5');
385	if( i >= tball.nPrevDirAlloc ){
386	int nsize = tball.nPrevDirAlloc * 2;
387	if(i+1 > nsize)
388	nsize = i+1;
389	tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
390	tball.nPrevDirAlloc = nsize;
391	}
392	memcpy(tball.zPrevDir, zName, i);
393	tball.zPrevDir[i] = 0;
394	}
395
396
397	/*
398	** Add a single file to the growing tarball.
399	*/
400	static void tar_add_file(
	@@ -117,15 +405,13 @@
405	){
406	int nName = strlen(zName);
407	int n = blob_size(pContent);
408	int lastPage;
409
410	/* length check moved to tar_split_path */


411	tar_add_directory_of(zName, nName, mTime);
412	tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
413	if( n ){
414	gzip_step(blob_buffer(pContent), n);
415	lastPage = n % 512;
416	if( lastPage!=0 ){
417	gzip_step(tball.zSpaces, 512 - lastPage);
	@@ -142,10 +428,17 @@
428	gzip_step(tball.zSpaces, 512);
429	gzip_step(tball.zSpaces, 512);
430	gzip_finish(pOut);
431	fossil_free(tball.aHdr);
432	tball.aHdr = 0;
433	fossil_free(tball.zPrevDir);
434	tball.zPrevDir = NULL;
435	tball.nPrevDirAlloc = 0;
436	fossil_free(tball.pScratch);
437	tball.pScratch = NULL;
438	tball.nScratchUsed = 0;
439	tball.nScratchAlloc = 0;
440	}
441
442
443	/*
444	** COMMAND: test-tarball
445

Fossil SCM

Keyboard Shortcuts