Fossil SCM

Merge the ge-tarfix changes into trunk. This fixes tarball generation for repos that have very long filenames.

drh 2011-07-25 11:21 trunk merge

Commit a26940c22ea353c4ea6b3e2577c1b491c27870a9

Parent ba15af450d33b2f…

1 file changed +278 -29

M src/tar.c

+278 -29

		--- src/tar.c
		+++ src/tar.c
		@@ -27,31 +27,253 @@
27	27	*/
28	28	static struct tarball_t {
29	29	unsigned char aHdr; / Space for building headers */
30	30	char zSpaces; / Spaces for padding */
31	31	char zPrevDir; / Name of directory for previous entry */
	32	+ int nPrevDirAlloc; /* size of zPrevDir */
	33	+ Blob pax; /* PAX data */
32	34	} tball;
	35	+
	36	+
	37	+/*
	38	+** field lengths of 'ustar' name and prefix fields.
	39	+*/
	40	+#define USTAR_NAME_LEN 100
	41	+#define USTAR_PREFIX_LEN 155
	42	+
33	43
34	44	/*
35	45	** Begin the process of generating a tarball.
36	46	**
37	47	** Initialize the GZIP compressor and the table of directory names.
38	48	*/
39	49	static void tar_begin(void){
40	50	assert( tball.aHdr==0 );
41		- tball.aHdr = fossil_malloc(512+512+256);
42		- memset(tball.aHdr, 0, 512+512+256);
	51	+ tball.aHdr = fossil_malloc(512+512);
	52	+ memset(tball.aHdr, 0, 512+512);
43	53	tball.zSpaces = (char*)&tball.aHdr[512];
44		- tball.zPrevDir = (char*)&tball.zSpaces[512];
	54	+ /* zPrevDir init */
	55	+ tball.zPrevDir = NULL;
	56	+ tball.nPrevDirAlloc = 0;
	57	+ /* scratch buffer init */
	58	+ blob_zero(&tball.pax);
	59	+
45	60	memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
46	61	memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47		- memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */
	62	+ memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
	63	+ memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
	64	+ memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
48	65	gzip_begin();
49	66	db_multi_exec(
50	67	"CREATE TEMP TABLE dir(name UNIQUE);"
51	68	);
52	69	}
	70	+
	71	+
	72	+/*
	73	+** verify that lla characters in 'zName' are in the
	74	+** ISO646 (=ASCII) character set.
	75	+*/
	76	+static int is_iso646_name(
	77	+ const char zName, / file path */
	78	+ int nName /* path length */
	79	+){
	80	+ int i;
	81	+ for(i = 0; i < nName; i++){
	82	+ unsigned char c = (unsigned char)zName[i];
	83	+ if( c>0x7e ) return 0;
	84	+ }
	85	+ return 1;
	86	+}
	87	+
	88	+
	89	+/*
	90	+** copy string pSrc into pDst, truncating or padding with 0 if necessary
	91	+*/
	92	+static void padded_copy(
	93	+ char *pDest,
	94	+ int nDest,
	95	+ const char *pSrc,
	96	+ int nSrc
	97	+){
	98	+ if(nSrc >= nDest){
	99	+ memcpy(pDest, pSrc, nDest);
	100	+ }else{
	101	+ memcpy(pDest, pSrc, nSrc);
	102	+ memset(&pDest[nSrc], 0, nDest - nSrc);
	103	+ }
	104	+}
	105	+
	106	+
	107	+
	108	+/******************************************************************************
	109	+**
	110	+** The 'tar' format has evolved over time. Initially the name was stored
	111	+** in a 100 byte null-terminated field 'name'. File path names were
	112	+** limited to 99 bytes.
	113	+**
	114	+** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
	115	+** for up to 255 characters to be stored. The full file path is formed by
	116	+** concatenating the field 'prefix', a slash, and the field 'name'. This
	117	+** gives some measure of compatibility with programs that only understand
	118	+** the oldest format.
	119	+**
	120	+** The latest Posix extension is called the 'pax Interchange Format'.
	121	+** It removes all the limitations of the previous two formats by allowing
	122	+** the storage of arbitrary-length attributes in a separate object that looks
	123	+** like a file to programs that do not understand this extension. So the
	124	+** contents of the 'name' and 'prefix' fields should contain values that allow
	125	+** versions of tar that do not understand this extension to still do
	126	+** something useful.
	127	+**
	128	+******************************************************************************/
	129	+
	130	+/*
	131	+** The position we use to split a file path into the 'name' and 'prefix'
	132	+** fields needs to meet the following criteria:
	133	+**
	134	+** - not at the beginning or end of the string
	135	+** - the position must contain a slash
	136	+** - no more than 100 characters follow the slash
	137	+** - no more than 155 characters precede it
	138	+**
	139	+** The routine 'find_split_pos' finds a split position. It will meet the
	140	+** criteria of listed above if such a position exists. If no such
	141	+** position exists it generates one that useful for generating the
	142	+** values used for backward compatibility.
	143	+*/
	144	+static int find_split_pos(
	145	+ const char zName, / file path */
	146	+ int nName /* path length */
	147	+){
	148	+ int i, split = 0;
	149	+ /* only search if the string needs splitting */
	150	+ if(nName > USTAR_NAME_LEN){
	151	+ for(i = 1; i+1 < nName; i++)
	152	+ if(zName[i] == '/'){
	153	+ split = i+1;
	154	+ /* if the split position is within USTAR_NAME_LEN bytes from
	155	+ * the end we can quit */
	156	+ if(nName - split <= USTAR_NAME_LEN) break;
	157	+ }
	158	+ }
	159	+ return split;
	160	+}
	161	+
	162	+
	163	+/*
	164	+** attempt to split the file name path to meet 'ustar' header
	165	+** criteria.
	166	+*/
	167	+static int tar_split_path(
	168	+ const char zName, / path */
	169	+ int nName, /* path length */
	170	+ char pName, / name field */
	171	+ char pPrefix / prefix field */
	172	+){
	173	+ int split = find_split_pos(zName, nName);
	174	+ /* check whether both pieces fit */
	175	+ if(nName - split > USTAR_NAME_LEN \|\| split > USTAR_PREFIX_LEN+1){
	176	+ return 0; /* no */
	177	+ }
	178	+
	179	+ /* extract name */
	180	+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
	181	+
	182	+ /* extract prefix */
	183	+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
	184	+
	185	+ return 1; /* success */
	186	+}
	187	+
	188	+
	189	+/*
	190	+** When using an extension header we still need to put something
	191	+** reasonable in the name and prefix fields. This is probably as
	192	+** good as it gets.
	193	+*/
	194	+static void approximate_split_path(
	195	+ const char zName, / path */
	196	+ int nName, /* path length */
	197	+ char pName, / name field */
	198	+ char pPrefix, / prefix field */
	199	+ int bHeader /* is this a 'x' type tar header? */
	200	+){
	201	+ int split;
	202	+
	203	+ /* if this is a Pax Interchange header prepend "PaxHeader/"
	204	+ ** so we can tell files apart from metadata */
	205	+ if( bHeader ){
	206	+ int n;
	207	+ blob_reset(&tball.pax);
	208	+ blob_appendf(&tball.pax, "PaxHeader/%.s", nName, nName, zName);
	209	+ zName = blob_buffer(&tball.pax);
	210	+ nName = blob_size(&tball.pax);
	211	+ }
	212	+
	213	+ /* find the split position */
	214	+ split = find_split_pos(zName, nName);
	215	+
	216	+ /* extract a name, truncate if needed */
	217	+ padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
	218	+
	219	+ /* extract a prefix field, truncate when needed */
	220	+ padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
	221	+}
	222	+
	223	+
	224	+/*
	225	+** add a Pax Interchange header to the scratch buffer
	226	+**
	227	+** format: <length> <key>=<value>\n
	228	+** the tricky part is that each header contains its own
	229	+** size in decimal, counting that length.
	230	+*/
	231	+static void add_pax_header(
	232	+ const char *zField,
	233	+ const char *zValue,
	234	+ int nValue
	235	+){
	236	+ /* calculate length without length field */
	237	+ int blen = strlen(zField) + nValue + 3;
	238	+ /* calculate the length of the length field */
	239	+ int next10 = 1;
	240	+ int n;
	241	+ for(n = blen; n > 0; ){
	242	+ blen++; next10 *= 10;
	243	+ n /= 10;
	244	+ }
	245	+ /* adding the length extended the length field? */
	246	+ if(blen > next10){
	247	+ blen++;
	248	+ }
	249	+ /* build the string */
	250	+ blob_appendf(&tball.pax, "%d %s=%.s\n", blen, zField, nValue, nValue, zValue);
	251	+ /* this _must_ be right */
	252	+ if(blob_size(&tball.pax) != blen){
	253	+ fossil_fatal("internal error: PAX tar header has bad length");
	254	+ }
	255	+}
	256	+
	257	+
	258	+/*
	259	+** set the header type, calculate the checksum and output
	260	+** the header
	261	+*/
	262	+static void cksum_and_write_header(
	263	+ char cType
	264	+){
	265	+ unsigned int cksum = 0;
	266	+ int i;
	267	+ memset(&tball.aHdr[148], ' ', 8);
	268	+ tball.aHdr[156] = cType;
	269	+ for(i=0; i<512; i++) cksum += tball.aHdr[i];
	270	+ sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
	271	+ tball.aHdr[155] = 0;
	272	+ gzip_step((char*)tball.aHdr, 512);
	273	+}
	274	+
53	275
54	276	/*
55	277	** Build a header for a file or directory and write that header
56	278	** into the growing tarball.
57	279	*/
		@@ -59,33 +281,49 @@
59	281	const char zName, / Name of the object */
60	282	int nName, /* Number of characters in zName */
61	283	int iMode, /* Mode. 0644 or 0755 */
62	284	unsigned int mTime, /* File modification time */
63	285	int iSize, /* Size of the object in bytes */
64		- int iType /* Type of object. 0==file. 5==directory */
	286	+ char cType /* Type of object. '0'==file. '5'==directory */
65	287	){
66		- unsigned int cksum = 0;
67		- int i;
68		- if( nName>100 ){
69		- memcpy(&tball.aHdr[345], zName, nName-100);
70		- memcpy(tball.aHdr, &zName[nName-100], 100);
71		- memset(&tball.aHdr[245+nName], 0, 267-nName);
72		- }else{
73		- memcpy(tball.aHdr, zName, nName);
74		- memset(&tball.aHdr[nName], 0, 100-nName);
75		- memset(&tball.aHdr[345], 0, 167);
76		- }
	288	+ /* set mode and modification time */
77	289	sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78		- sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79	290	sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80		- memset(&tball.aHdr[148], ' ', 8);
81		- tball.aHdr[156] = iType + '0';
82		- for(i=0; i<512; i++) cksum += tball.aHdr[i];
83		- sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84		- tball.aHdr[154] = 0;
85		- gzip_step((char*)tball.aHdr, 512);
	291	+
	292	+ /* see if we need to output a Pax Interchange Header */
	293	+ if( !is_iso646_name(zName, nName) \|\|
	294	+ !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
	295	+ int lastPage;
	296	+ /* add a file name for interoperability with older programs */
	297	+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
	298	+
	299	+ /* generate the Pax Interchange path header */
	300	+ blob_reset(&tball.pax);
	301	+ add_pax_header("path", zName, nName);
	302	+
	303	+ /* set the header length, and write the header */
	304	+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o",
	305	+ blob_size(&tball.pax));
	306	+ cksum_and_write_header('x');
	307	+
	308	+ /* write the Pax Interchange data */
	309	+ gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax));
	310	+ lastPage = blob_size(&tball.pax) % 512;
	311	+ if( lastPage!=0 ){
	312	+ gzip_step(tball.zSpaces, 512 - lastPage);
	313	+ }
	314	+
	315	+ /* generate an approximate path for the regular header */
	316	+ approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
	317	+ }
	318	+ /* set the size */
	319	+ sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
	320	+
	321	+ /* write the regular header */
	322	+ cksum_and_write_header(cType);
86	323	}
	324	+
87	325
88	326	/*
89	327	** Recursively add an directory entry for the given file if those
90	328	** directories have not previously been seen.
91	329	*/
		@@ -95,18 +333,27 @@
95	333	unsigned int mTime /* Modification time */
96	334	){
97	335	int i;
98	336	for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99	337	if( i<=0 ) return;
100		- if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
	338	+ if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
	339	+ memcmp(tball.zPrevDir, zName, i)==0 ) return;
101	340	db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102	341	if( sqlite3_changes(g.db)==0 ) return;
103	342	tar_add_directory_of(zName, i-1, mTime);
104		- tar_add_header(zName, i, 0755, mTime, 0, 5);
	343	+ tar_add_header(zName, i, 0755, mTime, 0, '5');
	344	+ if( i >= tball.nPrevDirAlloc ){
	345	+ int nsize = tball.nPrevDirAlloc * 2;
	346	+ if(i+1 > nsize)
	347	+ nsize = i+1;
	348	+ tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
	349	+ tball.nPrevDirAlloc = nsize;
	350	+ }
105	351	memcpy(tball.zPrevDir, zName, i);
106	352	tball.zPrevDir[i] = 0;
107	353	}
	354	+
108	355
109	356	/*
110	357	** Add a single file to the growing tarball.
111	358	*/
112	359	static void tar_add_file(
		@@ -117,15 +364,13 @@
117	364	){
118	365	int nName = strlen(zName);
119	366	int n = blob_size(pContent);
120	367	int lastPage;
121	368
122		- if( nName>=250 ){
123		- fossil_fatal("name too long for ustar format: \"%s\"", zName);
124		- }
	369	+ /* length check moved to tar_split_path */
125	370	tar_add_directory_of(zName, nName, mTime);
126		- tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
	371	+ tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
127	372	if( n ){
128	373	gzip_step(blob_buffer(pContent), n);
129	374	lastPage = n % 512;
130	375	if( lastPage!=0 ){
131	376	gzip_step(tball.zSpaces, 512 - lastPage);
		@@ -142,10 +387,14 @@
142	387	gzip_step(tball.zSpaces, 512);
143	388	gzip_step(tball.zSpaces, 512);
144	389	gzip_finish(pOut);
145	390	fossil_free(tball.aHdr);
146	391	tball.aHdr = 0;
	392	+ fossil_free(tball.zPrevDir);
	393	+ tball.zPrevDir = NULL;
	394	+ tball.nPrevDirAlloc = 0;
	395	+ blob_reset(&tball.pax);
147	396	}
148	397
149	398
150	399	/*
151	400	** COMMAND: test-tarball
152	401

	--- src/tar.c
	+++ src/tar.c
	@@ -27,31 +27,253 @@
27	*/
28	static struct tarball_t {
29	unsigned char aHdr; / Space for building headers */
30	char zSpaces; / Spaces for padding */
31	char zPrevDir; / Name of directory for previous entry */


32	} tball;








33
34	/*
35	** Begin the process of generating a tarball.
36	**
37	** Initialize the GZIP compressor and the table of directory names.
38	*/
39	static void tar_begin(void){
40	assert( tball.aHdr==0 );
41	tball.aHdr = fossil_malloc(512+512+256);
42	memset(tball.aHdr, 0, 512+512+256);
43	tball.zSpaces = (char*)&tball.aHdr[512];
44	tball.zPrevDir = (char*)&tball.zSpaces[512];





45	memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
46	memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
47	memcpy(&tball.aHdr[257], "ustar ", 7); /* Format */


48	gzip_begin();
49	db_multi_exec(
50	"CREATE TEMP TABLE dir(name UNIQUE);"
51	);
52	}













































































































































































































53
54	/*
55	** Build a header for a file or directory and write that header
56	** into the growing tarball.
57	*/
	@@ -59,33 +281,49 @@
59	const char zName, / Name of the object */
60	int nName, /* Number of characters in zName */
61	int iMode, /* Mode. 0644 or 0755 */
62	unsigned int mTime, /* File modification time */
63	int iSize, /* Size of the object in bytes */
64	int iType /* Type of object. 0==file. 5==directory */
65	){
66	unsigned int cksum = 0;
67	int i;
68	if( nName>100 ){
69	memcpy(&tball.aHdr[345], zName, nName-100);
70	memcpy(tball.aHdr, &zName[nName-100], 100);
71	memset(&tball.aHdr[245+nName], 0, 267-nName);
72	}else{
73	memcpy(tball.aHdr, zName, nName);
74	memset(&tball.aHdr[nName], 0, 100-nName);
75	memset(&tball.aHdr[345], 0, 167);
76	}
77	sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
78	sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
79	sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
80	memset(&tball.aHdr[148], ' ', 8);
81	tball.aHdr[156] = iType + '0';
82	for(i=0; i<512; i++) cksum += tball.aHdr[i];
83	sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
84	tball.aHdr[154] = 0;
85	gzip_step((char*)tball.aHdr, 512);


























86	}

87
88	/*
89	** Recursively add an directory entry for the given file if those
90	** directories have not previously been seen.
91	*/
	@@ -95,18 +333,27 @@
95	unsigned int mTime /* Modification time */
96	){
97	int i;
98	for(i=nName-1; i>0 && zName[i]!='/'; i--){}
99	if( i<=0 ) return;
100	if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;

101	db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
102	if( sqlite3_changes(g.db)==0 ) return;
103	tar_add_directory_of(zName, i-1, mTime);
104	tar_add_header(zName, i, 0755, mTime, 0, 5);







105	memcpy(tball.zPrevDir, zName, i);
106	tball.zPrevDir[i] = 0;
107	}

108
109	/*
110	** Add a single file to the growing tarball.
111	*/
112	static void tar_add_file(
	@@ -117,15 +364,13 @@
117	){
118	int nName = strlen(zName);
119	int n = blob_size(pContent);
120	int lastPage;
121
122	if( nName>=250 ){
123	fossil_fatal("name too long for ustar format: \"%s\"", zName);
124	}
125	tar_add_directory_of(zName, nName, mTime);
126	tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
127	if( n ){
128	gzip_step(blob_buffer(pContent), n);
129	lastPage = n % 512;
130	if( lastPage!=0 ){
131	gzip_step(tball.zSpaces, 512 - lastPage);
	@@ -142,10 +387,14 @@
142	gzip_step(tball.zSpaces, 512);
143	gzip_step(tball.zSpaces, 512);
144	gzip_finish(pOut);
145	fossil_free(tball.aHdr);
146	tball.aHdr = 0;




147	}
148
149
150	/*
151	** COMMAND: test-tarball
152

	--- src/tar.c
	+++ src/tar.c
	@@ -27,31 +27,253 @@
27	*/
28	static struct tarball_t {
29	unsigned char aHdr; / Space for building headers */
30	char zSpaces; / Spaces for padding */
31	char zPrevDir; / Name of directory for previous entry */
32	int nPrevDirAlloc; /* size of zPrevDir */
33	Blob pax; /* PAX data */
34	} tball;
35
36
37	/*
38	** field lengths of 'ustar' name and prefix fields.
39	*/
40	#define USTAR_NAME_LEN 100
41	#define USTAR_PREFIX_LEN 155
42
43
44	/*
45	** Begin the process of generating a tarball.
46	**
47	** Initialize the GZIP compressor and the table of directory names.
48	*/
49	static void tar_begin(void){
50	assert( tball.aHdr==0 );
51	tball.aHdr = fossil_malloc(512+512);
52	memset(tball.aHdr, 0, 512+512);
53	tball.zSpaces = (char*)&tball.aHdr[512];
54	/* zPrevDir init */
55	tball.zPrevDir = NULL;
56	tball.nPrevDirAlloc = 0;
57	/* scratch buffer init */
58	blob_zero(&tball.pax);
59
60	memcpy(&tball.aHdr[108], "0000000", 8); /* Owner ID */
61	memcpy(&tball.aHdr[116], "0000000", 8); /* Group ID */
62	memcpy(&tball.aHdr[257], "ustar\00000", 8); /* POSIX.1 format */
63	memcpy(&tball.aHdr[265], "nobody", 7); /* Owner name */
64	memcpy(&tball.aHdr[297], "nobody", 7); /* Group name */
65	gzip_begin();
66	db_multi_exec(
67	"CREATE TEMP TABLE dir(name UNIQUE);"
68	);
69	}
70
71
72	/*
73	** verify that lla characters in 'zName' are in the
74	** ISO646 (=ASCII) character set.
75	*/
76	static int is_iso646_name(
77	const char zName, / file path */
78	int nName /* path length */
79	){
80	int i;
81	for(i = 0; i < nName; i++){
82	unsigned char c = (unsigned char)zName[i];
83	if( c>0x7e ) return 0;
84	}
85	return 1;
86	}
87
88
89	/*
90	** copy string pSrc into pDst, truncating or padding with 0 if necessary
91	*/
92	static void padded_copy(
93	char *pDest,
94	int nDest,
95	const char *pSrc,
96	int nSrc
97	){
98	if(nSrc >= nDest){
99	memcpy(pDest, pSrc, nDest);
100	}else{
101	memcpy(pDest, pSrc, nSrc);
102	memset(&pDest[nSrc], 0, nDest - nSrc);
103	}
104	}
105
106
107
108	/******************************************************************************
109	**
110	** The 'tar' format has evolved over time. Initially the name was stored
111	** in a 100 byte null-terminated field 'name'. File path names were
112	** limited to 99 bytes.
113	**
114	** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
115	** for up to 255 characters to be stored. The full file path is formed by
116	** concatenating the field 'prefix', a slash, and the field 'name'. This
117	** gives some measure of compatibility with programs that only understand
118	** the oldest format.
119	**
120	** The latest Posix extension is called the 'pax Interchange Format'.
121	** It removes all the limitations of the previous two formats by allowing
122	** the storage of arbitrary-length attributes in a separate object that looks
123	** like a file to programs that do not understand this extension. So the
124	** contents of the 'name' and 'prefix' fields should contain values that allow
125	** versions of tar that do not understand this extension to still do
126	** something useful.
127	**
128	******************************************************************************/
129
130	/*
131	** The position we use to split a file path into the 'name' and 'prefix'
132	** fields needs to meet the following criteria:
133	**
134	** - not at the beginning or end of the string
135	** - the position must contain a slash
136	** - no more than 100 characters follow the slash
137	** - no more than 155 characters precede it
138	**
139	** The routine 'find_split_pos' finds a split position. It will meet the
140	** criteria of listed above if such a position exists. If no such
141	** position exists it generates one that useful for generating the
142	** values used for backward compatibility.
143	*/
144	static int find_split_pos(
145	const char zName, / file path */
146	int nName /* path length */
147	){
148	int i, split = 0;
149	/* only search if the string needs splitting */
150	if(nName > USTAR_NAME_LEN){
151	for(i = 1; i+1 < nName; i++)
152	if(zName[i] == '/'){
153	split = i+1;
154	/* if the split position is within USTAR_NAME_LEN bytes from
155	* the end we can quit */
156	if(nName - split <= USTAR_NAME_LEN) break;
157	}
158	}
159	return split;
160	}
161
162
163	/*
164	** attempt to split the file name path to meet 'ustar' header
165	** criteria.
166	*/
167	static int tar_split_path(
168	const char zName, / path */
169	int nName, /* path length */
170	char pName, / name field */
171	char pPrefix / prefix field */
172	){
173	int split = find_split_pos(zName, nName);
174	/* check whether both pieces fit */
175	if(nName - split > USTAR_NAME_LEN \|\| split > USTAR_PREFIX_LEN+1){
176	return 0; /* no */
177	}
178
179	/* extract name */
180	padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
181
182	/* extract prefix */
183	padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));
184
185	return 1; /* success */
186	}
187
188
189	/*
190	** When using an extension header we still need to put something
191	** reasonable in the name and prefix fields. This is probably as
192	** good as it gets.
193	*/
194	static void approximate_split_path(
195	const char zName, / path */
196	int nName, /* path length */
197	char pName, / name field */
198	char pPrefix, / prefix field */
199	int bHeader /* is this a 'x' type tar header? */
200	){
201	int split;
202
203	/* if this is a Pax Interchange header prepend "PaxHeader/"
204	** so we can tell files apart from metadata */
205	if( bHeader ){
206	int n;
207	blob_reset(&tball.pax);
208	blob_appendf(&tball.pax, "PaxHeader/%.s", nName, nName, zName);
209	zName = blob_buffer(&tball.pax);
210	nName = blob_size(&tball.pax);
211	}
212
213	/* find the split position */
214	split = find_split_pos(zName, nName);
215
216	/* extract a name, truncate if needed */
217	padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);
218
219	/* extract a prefix field, truncate when needed */
220	padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
221	}
222
223
224	/*
225	** add a Pax Interchange header to the scratch buffer
226	**
227	** format: <length> <key>=<value>\n
228	** the tricky part is that each header contains its own
229	** size in decimal, counting that length.
230	*/
231	static void add_pax_header(
232	const char *zField,
233	const char *zValue,
234	int nValue
235	){
236	/* calculate length without length field */
237	int blen = strlen(zField) + nValue + 3;
238	/* calculate the length of the length field */
239	int next10 = 1;
240	int n;
241	for(n = blen; n > 0; ){
242	blen++; next10 *= 10;
243	n /= 10;
244	}
245	/* adding the length extended the length field? */
246	if(blen > next10){
247	blen++;
248	}
249	/* build the string */
250	blob_appendf(&tball.pax, "%d %s=%.s\n", blen, zField, nValue, nValue, zValue);
251	/* this _must_ be right */
252	if(blob_size(&tball.pax) != blen){
253	fossil_fatal("internal error: PAX tar header has bad length");
254	}
255	}
256
257
258	/*
259	** set the header type, calculate the checksum and output
260	** the header
261	*/
262	static void cksum_and_write_header(
263	char cType
264	){
265	unsigned int cksum = 0;
266	int i;
267	memset(&tball.aHdr[148], ' ', 8);
268	tball.aHdr[156] = cType;
269	for(i=0; i<512; i++) cksum += tball.aHdr[i];
270	sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
271	tball.aHdr[155] = 0;
272	gzip_step((char*)tball.aHdr, 512);
273	}
274
275
276	/*
277	** Build a header for a file or directory and write that header
278	** into the growing tarball.
279	*/
	@@ -59,33 +281,49 @@
281	const char zName, / Name of the object */
282	int nName, /* Number of characters in zName */
283	int iMode, /* Mode. 0644 or 0755 */
284	unsigned int mTime, /* File modification time */
285	int iSize, /* Size of the object in bytes */
286	char cType /* Type of object. '0'==file. '5'==directory */
287	){
288	/* set mode and modification time */










289	sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);

290	sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
291
292	/* see if we need to output a Pax Interchange Header */
293	if( !is_iso646_name(zName, nName) \|\|
294	!tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
295	int lastPage;
296	/* add a file name for interoperability with older programs */
297	approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);
298
299	/* generate the Pax Interchange path header */
300	blob_reset(&tball.pax);
301	add_pax_header("path", zName, nName);
302
303	/* set the header length, and write the header */
304	sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o",
305	blob_size(&tball.pax));
306	cksum_and_write_header('x');
307
308	/* write the Pax Interchange data */
309	gzip_step(blob_buffer(&tball.pax), blob_size(&tball.pax));
310	lastPage = blob_size(&tball.pax) % 512;
311	if( lastPage!=0 ){
312	gzip_step(tball.zSpaces, 512 - lastPage);
313	}
314
315	/* generate an approximate path for the regular header */
316	approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
317	}
318	/* set the size */
319	sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);
320
321	/* write the regular header */
322	cksum_and_write_header(cType);
323	}
324
325
326	/*
327	** Recursively add an directory entry for the given file if those
328	** directories have not previously been seen.
329	*/
	@@ -95,18 +333,27 @@
333	unsigned int mTime /* Modification time */
334	){
335	int i;
336	for(i=nName-1; i>0 && zName[i]!='/'; i--){}
337	if( i<=0 ) return;
338	if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
339	memcmp(tball.zPrevDir, zName, i)==0 ) return;
340	db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
341	if( sqlite3_changes(g.db)==0 ) return;
342	tar_add_directory_of(zName, i-1, mTime);
343	tar_add_header(zName, i, 0755, mTime, 0, '5');
344	if( i >= tball.nPrevDirAlloc ){
345	int nsize = tball.nPrevDirAlloc * 2;
346	if(i+1 > nsize)
347	nsize = i+1;
348	tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
349	tball.nPrevDirAlloc = nsize;
350	}
351	memcpy(tball.zPrevDir, zName, i);
352	tball.zPrevDir[i] = 0;
353	}
354
355
356	/*
357	** Add a single file to the growing tarball.
358	*/
359	static void tar_add_file(
	@@ -117,15 +364,13 @@
364	){
365	int nName = strlen(zName);
366	int n = blob_size(pContent);
367	int lastPage;
368
369	/* length check moved to tar_split_path */


370	tar_add_directory_of(zName, nName, mTime);
371	tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
372	if( n ){
373	gzip_step(blob_buffer(pContent), n);
374	lastPage = n % 512;
375	if( lastPage!=0 ){
376	gzip_step(tball.zSpaces, 512 - lastPage);
	@@ -142,10 +387,14 @@
387	gzip_step(tball.zSpaces, 512);
388	gzip_step(tball.zSpaces, 512);
389	gzip_finish(pOut);
390	fossil_free(tball.aHdr);
391	tball.aHdr = 0;
392	fossil_free(tball.zPrevDir);
393	tball.zPrevDir = NULL;
394	tball.nPrevDirAlloc = 0;
395	blob_reset(&tball.pax);
396	}
397
398
399	/*
400	** COMMAND: test-tarball
401

Fossil SCM

Keyboard Shortcuts