Fossil SCM

merge trunk

jan.nijtmans 2013-03-07 11:00 ticket-2cfd96b2ba merge

Commit f96894a54adea250a8c5a46853c27b04ab1989ec

Parent 949976e0654099c…

4 files changed +12 -12 +74 -25 +40 -24 +118

~ src/checkin.c ~ src/diff.c ~ src/glob.c ~ test/glob.test

M src/checkin.c

+12 -12

		--- src/checkin.c
		+++ src/checkin.c
		@@ -925,11 +925,22 @@
925	925	const char *zDisable;
926	926	const char *zConvert = "c=convert/";
927	927	Blob ans;
928	928	char cReply;
929	929
930		- if( fHasCrLf && fUnicode ){
	930	+ if( eType==0 ){
	931	+ if( binOk ){
	932	+ return 0; /* We don't want binary warnings for this file. */
	933	+ }
	934	+ if( fHasLength ){
	935	+ zWarning = "long lines";
	936	+ }else{
	937	+ zWarning = "binary data";
	938	+ }
	939	+ zDisable = "\"binary-glob\" setting";
	940	+ zConvert = ""; /* We cannot convert binary files. */
	941	+ }else if( fHasCrLf && fUnicode ){
931	942	if ( crnlOk && encodingOk ){
932	943	return 0; /* We don't want CR/NL and Unicode warnings for this file. */
933	944	}
934	945	zWarning = "CR/NL line endings and Unicode";
935	946	zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
		@@ -937,21 +948,10 @@
937	948	if( crnlOk ){
938	949	return 0; /* We don't want CR/NL warnings for this file. */
939	950	}
940	951	zWarning = "CR/NL line endings";
941	952	zDisable = "\"crnl-glob\" setting";
942		- }else if( eType==0 ){
943		- if( binOk ){
944		- return 0; /* We don't want binary warnings for this file. */
945		- }
946		- if( fHasLength ){
947		- zWarning = "long lines";
948		- }else{
949		- zWarning = "binary data";
950		- }
951		- zDisable = "\"binary-glob\" setting";
952		- zConvert = ""; /* We cannot convert binary files. */
953	953	}else{
954	954	if ( encodingOk ){
955	955	return 0; /* We don't want encoding warnings for this file. */
956	956	}
957	957	zWarning = "Unicode";
958	958

	--- src/checkin.c
	+++ src/checkin.c
	@@ -925,11 +925,22 @@
925	const char *zDisable;
926	const char *zConvert = "c=convert/";
927	Blob ans;
928	char cReply;
929
930	if( fHasCrLf && fUnicode ){











931	if ( crnlOk && encodingOk ){
932	return 0; /* We don't want CR/NL and Unicode warnings for this file. */
933	}
934	zWarning = "CR/NL line endings and Unicode";
935	zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
	@@ -937,21 +948,10 @@
937	if( crnlOk ){
938	return 0; /* We don't want CR/NL warnings for this file. */
939	}
940	zWarning = "CR/NL line endings";
941	zDisable = "\"crnl-glob\" setting";
942	}else if( eType==0 ){
943	if( binOk ){
944	return 0; /* We don't want binary warnings for this file. */
945	}
946	if( fHasLength ){
947	zWarning = "long lines";
948	}else{
949	zWarning = "binary data";
950	}
951	zDisable = "\"binary-glob\" setting";
952	zConvert = ""; /* We cannot convert binary files. */
953	}else{
954	if ( encodingOk ){
955	return 0; /* We don't want encoding warnings for this file. */
956	}
957	zWarning = "Unicode";
958

	--- src/checkin.c
	+++ src/checkin.c
	@@ -925,11 +925,22 @@
925	const char *zDisable;
926	const char *zConvert = "c=convert/";
927	Blob ans;
928	char cReply;
929
930	if( eType==0 ){
931	if( binOk ){
932	return 0; /* We don't want binary warnings for this file. */
933	}
934	if( fHasLength ){
935	zWarning = "long lines";
936	}else{
937	zWarning = "binary data";
938	}
939	zDisable = "\"binary-glob\" setting";
940	zConvert = ""; /* We cannot convert binary files. */
941	}else if( fHasCrLf && fUnicode ){
942	if ( crnlOk && encodingOk ){
943	return 0; /* We don't want CR/NL and Unicode warnings for this file. */
944	}
945	zWarning = "CR/NL line endings and Unicode";
946	zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
	@@ -937,21 +948,10 @@
948	if( crnlOk ){
949	return 0; /* We don't want CR/NL warnings for this file. */
950	}
951	zWarning = "CR/NL line endings";
952	zDisable = "\"crnl-glob\" setting";











953	}else{
954	if ( encodingOk ){
955	return 0; /* We don't want encoding warnings for this file. */
956	}
957	zWarning = "Unicode";
958

M src/diff.c

+74 -25

		--- src/diff.c
		+++ src/diff.c
		@@ -69,13 +69,15 @@
69	69	** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70	70	** to convey status information about the blob content.
71	71	*/
72	72	#define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73	73	#define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74		-#define LOOK_LF ((int)0x00000002) /* One or more LF chars were found. */
75		-#define LOOK_CRLF ((int)0x00000004) /* One or more CR/LF pairs were found. */
76		-#define LOOK_LENGTH ((int)0x00000008) /* An over length line was found. */
	74	+#define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */
	75	+#define LOOK_LF ((int)0x00000004) /* One or more LF chars were found. */
	76	+#define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */
	77	+#define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */
	78	+#define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */
77	79	#endif /* INTERFACE */
78	80
79	81	/*
80	82	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
81	83	*/
		@@ -217,31 +219,34 @@
217	219	** validate any code points. It makes no attempt to detect if any [invalid]
218	220	** switches between UTF-8 and other encodings occur.
219	221	**
220	222	** The only code points that this function cares about are the NUL character,
221	223	** carriage-return, and line-feed.
	224	+**
	225	+** Whether or not this function examines the entire contents of the blob is
	226	+** officially unspecified.
222	227	**
223	228	********************************** WARNING ********************************
224	229	*/
225	230	int looks_like_utf8(const Blob pContent, int pFlags){
226	231	const char *z = blob_buffer(pContent);
227	232	unsigned int n = blob_size(pContent);
228		- int j, c;
	233	+ int j, c, result = 1; /* Assume UTF-8 text, prove otherwise */
229	234
230	235	if( pFlags ) *pFlags = LOOK_NONE;
231		- if( n==0 ) return 1; /* Empty file -> text */
	236	+ if( n==0 ) return result; /* Empty file -> text */
232	237	c = *z;
233	238	if( c==0 ){
234	239	if( pFlags ) *pFlags \|= LOOK_NUL;
235		- return 0; /* NUL character in a file -> binary */
	240	+ result = 0; /* NUL character in a file -> binary */
236	241	}
237	242	j = (c!='\n');
238	243	while( --n>0 ){
239	244	c = *++z; ++j;
240	245	if( c==0 ){
241	246	if( pFlags ) *pFlags \|= LOOK_NUL;
242		- return 0; /* NUL character in a file -> binary */
	247	+ result = 0; /* NUL character in a file -> binary */
243	248	}
244	249	if( c=='\n' ){
245	250	int c2 = z[-1];
246	251	if( pFlags ){
247	252	*pFlags \|= LOOK_LF;
		@@ -249,20 +254,22 @@
249	254	*pFlags \|= LOOK_CRLF;
250	255	}
251	256	}
252	257	if( j>LENGTH_MASK ){
253	258	if( pFlags ) *pFlags \|= LOOK_LENGTH;
254		- return 0; /* Very long line -> binary */
	259	+ result = 0; /* Very long line -> binary */
255	260	}
256	261	j = 0;
	262	+ }else if( c=='\r' ){
	263	+ if( pFlags ) *pFlags \|= LOOK_CR;
257	264	}
258	265	}
259	266	if( j>LENGTH_MASK ){
260	267	if( pFlags ) *pFlags \|= LOOK_LENGTH;
261		- return 0; /* Very long line -> binary */
	268	+ result = 0; /* Very long line -> binary */
262	269	}
263		- return 1; /* No problems seen -> not binary */
	270	+ return result; /* No problems seen -> not binary */
264	271	}
265	272
266	273	/*
267	274	** Define the type needed to represent a Unicode (UTF-16) character.
268	275	*/
		@@ -311,32 +318,38 @@
311	318	** validate any code points. It makes no attempt to detect if any [invalid]
312	319	** switches between the UTF-16be and UTF-16le encodings occur.
313	320	**
314	321	** The only code points that this function cares about are the NUL character,
315	322	** carriage-return, and line-feed.
	323	+**
	324	+** Whether or not this function examines the entire contents of the blob is
	325	+** officially unspecified.
316	326	**
317	327	********************************** WARNING ********************************
318	328	*/
319	329	int looks_like_utf16(const Blob pContent, int pFlags){
320	330	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
321	331	unsigned int n = blob_size(pContent);
322		- int j, c;
	332	+ int j, c, result = 1; /* Assume UTF-16 text, prove otherwise */
323	333
324	334	if( pFlags ) *pFlags = LOOK_NONE;
325		- if( n==0 ) return 1; /* Empty file -> text */
326		- if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
	335	+ if( n==0 ) return result; /* Empty file -> text */
	336	+ if( n%2 ){
	337	+ if( pFlags ) *pFlags \|= LOOK_ODD;
	338	+ return 0; /* Odd number of bytes -> binary (or UTF-8) */
	339	+ }
327	340	c = *z;
328	341	if( c==0 ){
329	342	if( pFlags ) *pFlags \|= LOOK_NUL;
330		- return 0; /* NUL character in a file -> binary */
	343	+ result = 0; /* NUL character in a file -> binary */
331	344	}
332	345	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
333	346	while( (n-=2)>0 ){
334	347	c = *++z; ++j;
335	348	if( c==0 ){
336	349	if( pFlags ) *pFlags \|= LOOK_NUL;
337		- return 0; /* NUL character in a file -> binary */
	350	+ result = 0; /* NUL character in a file -> binary */
338	351	}
339	352	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
340	353	int c2 = z[-1];
341	354	if( pFlags ){
342	355	*pFlags \|= LOOK_LF;
		@@ -344,20 +357,22 @@
344	357	*pFlags \|= LOOK_CRLF;
345	358	}
346	359	}
347	360	if( j>UTF16_LENGTH_MASK ){
348	361	if( pFlags ) *pFlags \|= LOOK_LENGTH;
349		- return 0; /* Very long line -> binary */
	362	+ result = 0; /* Very long line -> binary */
350	363	}
351	364	j = 0;
	365	+ }else if( c==UTF16BE_CR \|\| c==UTF16LE_CR ){
	366	+ if( pFlags ) *pFlags \|= LOOK_CR;
352	367	}
353	368	}
354	369	if( j>UTF16_LENGTH_MASK ){
355	370	if( pFlags ) *pFlags \|= LOOK_LENGTH;
356		- return 0; /* Very long line -> binary */
	371	+ result = 0; /* Very long line -> binary */
357	372	}
358		- return 1; /* No problems seen -> not binary */
	373	+ return result; /* No problems seen -> not binary */
359	374	}
360	375
361	376	/*
362	377	** This function returns an array of bytes representing the byte-order-mark
363	378	** for UTF-8.
		@@ -395,23 +410,24 @@
395	410	const Blob pContent, / IN: Blob content to perform BOM detection on. */
396	411	int pnByte, / OUT: The number of bytes used for the BOM. */
397	412	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
398	413	){
399	414	const unsigned short z = (unsigned short )blob_buffer(pContent);
	415	+ int bomSize = sizeof(unsigned short);
400	416	int size = blob_size(pContent);
401	417
402		- if( (size<2) \|\| (size%2)
403		- \|\| (size>=4 && z[1]==0) ) return 0;
404		- if( z[0] == 0xfffe ){
	418	+ if( size<bomSize ) return 0; /* No: cannot read BOM. */
	419	+ if( size>=(2bomSize) && z[1]==0 ) return 0; / No: possible UTF-32. */
	420	+ if( z[0]==0xfffe ){
405	421	if( pbReverse ) *pbReverse = 1;
406		- }else if( z[0] == 0xfeff ){
	422	+ }else if( z[0]==0xfeff ){
407	423	if( pbReverse ) *pbReverse = 0;
408	424	}else{
409		- return 0;
	425	+ return 0; /* No: UTF-16 byte-order-mark not found. */
410	426	}
411		- if( pnByte ) *pnByte = 2;
412		- return 1;
	427	+ if( pnByte ) *pnByte = bomSize;
	428	+ return 1; /* Yes. */
413	429	}
414	430
415	431	/*
416	432	** Return true if two DLine elements are identical.
417	433	*/
		@@ -2450,5 +2466,38 @@
2450	2466	for(i=0; i<ann.nOrig; i++){
2451	2467	fossil_print("%s: %.*s\n",
2452	2468	ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z);
2453	2469	}
2454	2470	}
	2471	+
	2472	+/*
	2473	+** COMMAND: test-looks-like-utf
	2474	+**
	2475	+** Usage: %fossil test-looks-like-utf FILENAME
	2476	+**
	2477	+** FILENAME is the name of a file to check for textual content in the UTF-8
	2478	+** and/or UTF-16 encodings.
	2479	+*/
	2480	+void looks_like_utf_test_cmd(void){
	2481	+ Blob blob; /* the contents of the specified file */
	2482	+ int eType; /* return value of looks_like_utf8/utf16() */
	2483	+ int fUtf8; /* return value of starts_with_utf8_bom() */
	2484	+ int fUtf16; /* return value of starts_with_utf16_bom() */
	2485	+ int lookFlags; /* output flags from looks_like_utf8/utf16() */
	2486	+ if( g.argc<3 ) usage("FILENAME");
	2487	+ blob_read_from_file(&blob, g.argv[2]);
	2488	+ fUtf8 = starts_with_utf8_bom(&blob, 0);
	2489	+ fUtf16 = starts_with_utf16_bom(&blob, 0, 0);
	2490	+ eType = fUtf16 ? looks_like_utf16(&blob, &lookFlags) :
	2491	+ looks_like_utf8(&blob, &lookFlags);
	2492	+ fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
	2493	+ fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
	2494	+ fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no");
	2495	+ fossil_print("Looks like UTF-%s: %s\n",fUtf16?"16":"8",eType?"yes":"no");
	2496	+ fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no");
	2497	+ fossil_print("Has flag LOOK_CR: %s\n",(lookFlags&LOOK_CR)?"yes":"no");
	2498	+ fossil_print("Has flag LOOK_LF: %s\n",(lookFlags&LOOK_LF)?"yes":"no");
	2499	+ fossil_print("Has flag LOOK_CRLF: %s\n",(lookFlags&LOOK_CRLF)?"yes":"no");
	2500	+ fossil_print("Has flag LOOK_LENGTH: %s\n",(lookFlags&LOOK_LENGTH)?"yes":"no");
	2501	+ fossil_print("Has flag LOOK_ODD: %s\n",(lookFlags&LOOK_ODD)?"yes":"no");
	2502	+ blob_reset(&blob);
	2503	+}
2455	2504

	--- src/diff.c
	+++ src/diff.c
	@@ -69,13 +69,15 @@
69	** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70	** to convey status information about the blob content.
71	*/
72	#define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73	#define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74	#define LOOK_LF ((int)0x00000002) /* One or more LF chars were found. */
75	#define LOOK_CRLF ((int)0x00000004) /* One or more CR/LF pairs were found. */
76	#define LOOK_LENGTH ((int)0x00000008) /* An over length line was found. */


77	#endif /* INTERFACE */
78
79	/*
80	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
81	*/
	@@ -217,31 +219,34 @@
217	** validate any code points. It makes no attempt to detect if any [invalid]
218	** switches between UTF-8 and other encodings occur.
219	**
220	** The only code points that this function cares about are the NUL character,
221	** carriage-return, and line-feed.



222	**
223	********************************** WARNING ********************************
224	*/
225	int looks_like_utf8(const Blob pContent, int pFlags){
226	const char *z = blob_buffer(pContent);
227	unsigned int n = blob_size(pContent);
228	int j, c;
229
230	if( pFlags ) *pFlags = LOOK_NONE;
231	if( n==0 ) return 1; /* Empty file -> text */
232	c = *z;
233	if( c==0 ){
234	if( pFlags ) *pFlags \|= LOOK_NUL;
235	return 0; /* NUL character in a file -> binary */
236	}
237	j = (c!='\n');
238	while( --n>0 ){
239	c = *++z; ++j;
240	if( c==0 ){
241	if( pFlags ) *pFlags \|= LOOK_NUL;
242	return 0; /* NUL character in a file -> binary */
243	}
244	if( c=='\n' ){
245	int c2 = z[-1];
246	if( pFlags ){
247	*pFlags \|= LOOK_LF;
	@@ -249,20 +254,22 @@
249	*pFlags \|= LOOK_CRLF;
250	}
251	}
252	if( j>LENGTH_MASK ){
253	if( pFlags ) *pFlags \|= LOOK_LENGTH;
254	return 0; /* Very long line -> binary */
255	}
256	j = 0;


257	}
258	}
259	if( j>LENGTH_MASK ){
260	if( pFlags ) *pFlags \|= LOOK_LENGTH;
261	return 0; /* Very long line -> binary */
262	}
263	return 1; /* No problems seen -> not binary */
264	}
265
266	/*
267	** Define the type needed to represent a Unicode (UTF-16) character.
268	*/
	@@ -311,32 +318,38 @@
311	** validate any code points. It makes no attempt to detect if any [invalid]
312	** switches between the UTF-16be and UTF-16le encodings occur.
313	**
314	** The only code points that this function cares about are the NUL character,
315	** carriage-return, and line-feed.



316	**
317	********************************** WARNING ********************************
318	*/
319	int looks_like_utf16(const Blob pContent, int pFlags){
320	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
321	unsigned int n = blob_size(pContent);
322	int j, c;
323
324	if( pFlags ) *pFlags = LOOK_NONE;
325	if( n==0 ) return 1; /* Empty file -> text */
326	if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */



327	c = *z;
328	if( c==0 ){
329	if( pFlags ) *pFlags \|= LOOK_NUL;
330	return 0; /* NUL character in a file -> binary */
331	}
332	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
333	while( (n-=2)>0 ){
334	c = *++z; ++j;
335	if( c==0 ){
336	if( pFlags ) *pFlags \|= LOOK_NUL;
337	return 0; /* NUL character in a file -> binary */
338	}
339	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
340	int c2 = z[-1];
341	if( pFlags ){
342	*pFlags \|= LOOK_LF;
	@@ -344,20 +357,22 @@
344	*pFlags \|= LOOK_CRLF;
345	}
346	}
347	if( j>UTF16_LENGTH_MASK ){
348	if( pFlags ) *pFlags \|= LOOK_LENGTH;
349	return 0; /* Very long line -> binary */
350	}
351	j = 0;


352	}
353	}
354	if( j>UTF16_LENGTH_MASK ){
355	if( pFlags ) *pFlags \|= LOOK_LENGTH;
356	return 0; /* Very long line -> binary */
357	}
358	return 1; /* No problems seen -> not binary */
359	}
360
361	/*
362	** This function returns an array of bytes representing the byte-order-mark
363	** for UTF-8.
	@@ -395,23 +410,24 @@
395	const Blob pContent, / IN: Blob content to perform BOM detection on. */
396	int pnByte, / OUT: The number of bytes used for the BOM. */
397	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
398	){
399	const unsigned short z = (unsigned short )blob_buffer(pContent);

400	int size = blob_size(pContent);
401
402	if( (size<2) \|\| (size%2)
403	\|\| (size>=4 && z[1]==0) ) return 0;
404	if( z[0] == 0xfffe ){
405	if( pbReverse ) *pbReverse = 1;
406	}else if( z[0] == 0xfeff ){
407	if( pbReverse ) *pbReverse = 0;
408	}else{
409	return 0;
410	}
411	if( pnByte ) *pnByte = 2;
412	return 1;
413	}
414
415	/*
416	** Return true if two DLine elements are identical.
417	*/
	@@ -2450,5 +2466,38 @@
2450	for(i=0; i<ann.nOrig; i++){
2451	fossil_print("%s: %.*s\n",
2452	ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z);
2453	}
2454	}

































2455

	--- src/diff.c
	+++ src/diff.c
	@@ -69,13 +69,15 @@
69	** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70	** to convey status information about the blob content.
71	*/
72	#define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73	#define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74	#define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */
75	#define LOOK_LF ((int)0x00000004) /* One or more LF chars were found. */
76	#define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */
77	#define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */
78	#define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */
79	#endif /* INTERFACE */
80
81	/*
82	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
83	*/
	@@ -217,31 +219,34 @@
219	** validate any code points. It makes no attempt to detect if any [invalid]
220	** switches between UTF-8 and other encodings occur.
221	**
222	** The only code points that this function cares about are the NUL character,
223	** carriage-return, and line-feed.
224	**
225	** Whether or not this function examines the entire contents of the blob is
226	** officially unspecified.
227	**
228	********************************** WARNING ********************************
229	*/
230	int looks_like_utf8(const Blob pContent, int pFlags){
231	const char *z = blob_buffer(pContent);
232	unsigned int n = blob_size(pContent);
233	int j, c, result = 1; /* Assume UTF-8 text, prove otherwise */
234
235	if( pFlags ) *pFlags = LOOK_NONE;
236	if( n==0 ) return result; /* Empty file -> text */
237	c = *z;
238	if( c==0 ){
239	if( pFlags ) *pFlags \|= LOOK_NUL;
240	result = 0; /* NUL character in a file -> binary */
241	}
242	j = (c!='\n');
243	while( --n>0 ){
244	c = *++z; ++j;
245	if( c==0 ){
246	if( pFlags ) *pFlags \|= LOOK_NUL;
247	result = 0; /* NUL character in a file -> binary */
248	}
249	if( c=='\n' ){
250	int c2 = z[-1];
251	if( pFlags ){
252	*pFlags \|= LOOK_LF;
	@@ -249,20 +254,22 @@
254	*pFlags \|= LOOK_CRLF;
255	}
256	}
257	if( j>LENGTH_MASK ){
258	if( pFlags ) *pFlags \|= LOOK_LENGTH;
259	result = 0; /* Very long line -> binary */
260	}
261	j = 0;
262	}else if( c=='\r' ){
263	if( pFlags ) *pFlags \|= LOOK_CR;
264	}
265	}
266	if( j>LENGTH_MASK ){
267	if( pFlags ) *pFlags \|= LOOK_LENGTH;
268	result = 0; /* Very long line -> binary */
269	}
270	return result; /* No problems seen -> not binary */
271	}
272
273	/*
274	** Define the type needed to represent a Unicode (UTF-16) character.
275	*/
	@@ -311,32 +318,38 @@
318	** validate any code points. It makes no attempt to detect if any [invalid]
319	** switches between the UTF-16be and UTF-16le encodings occur.
320	**
321	** The only code points that this function cares about are the NUL character,
322	** carriage-return, and line-feed.
323	**
324	** Whether or not this function examines the entire contents of the blob is
325	** officially unspecified.
326	**
327	********************************** WARNING ********************************
328	*/
329	int looks_like_utf16(const Blob pContent, int pFlags){
330	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
331	unsigned int n = blob_size(pContent);
332	int j, c, result = 1; /* Assume UTF-16 text, prove otherwise */
333
334	if( pFlags ) *pFlags = LOOK_NONE;
335	if( n==0 ) return result; /* Empty file -> text */
336	if( n%2 ){
337	if( pFlags ) *pFlags \|= LOOK_ODD;
338	return 0; /* Odd number of bytes -> binary (or UTF-8) */
339	}
340	c = *z;
341	if( c==0 ){
342	if( pFlags ) *pFlags \|= LOOK_NUL;
343	result = 0; /* NUL character in a file -> binary */
344	}
345	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
346	while( (n-=2)>0 ){
347	c = *++z; ++j;
348	if( c==0 ){
349	if( pFlags ) *pFlags \|= LOOK_NUL;
350	result = 0; /* NUL character in a file -> binary */
351	}
352	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
353	int c2 = z[-1];
354	if( pFlags ){
355	*pFlags \|= LOOK_LF;
	@@ -344,20 +357,22 @@
357	*pFlags \|= LOOK_CRLF;
358	}
359	}
360	if( j>UTF16_LENGTH_MASK ){
361	if( pFlags ) *pFlags \|= LOOK_LENGTH;
362	result = 0; /* Very long line -> binary */
363	}
364	j = 0;
365	}else if( c==UTF16BE_CR \|\| c==UTF16LE_CR ){
366	if( pFlags ) *pFlags \|= LOOK_CR;
367	}
368	}
369	if( j>UTF16_LENGTH_MASK ){
370	if( pFlags ) *pFlags \|= LOOK_LENGTH;
371	result = 0; /* Very long line -> binary */
372	}
373	return result; /* No problems seen -> not binary */
374	}
375
376	/*
377	** This function returns an array of bytes representing the byte-order-mark
378	** for UTF-8.
	@@ -395,23 +410,24 @@
410	const Blob pContent, / IN: Blob content to perform BOM detection on. */
411	int pnByte, / OUT: The number of bytes used for the BOM. */
412	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
413	){
414	const unsigned short z = (unsigned short )blob_buffer(pContent);
415	int bomSize = sizeof(unsigned short);
416	int size = blob_size(pContent);
417
418	if( size<bomSize ) return 0; /* No: cannot read BOM. */
419	if( size>=(2bomSize) && z[1]==0 ) return 0; / No: possible UTF-32. */
420	if( z[0]==0xfffe ){
421	if( pbReverse ) *pbReverse = 1;
422	}else if( z[0]==0xfeff ){
423	if( pbReverse ) *pbReverse = 0;
424	}else{
425	return 0; /* No: UTF-16 byte-order-mark not found. */
426	}
427	if( pnByte ) *pnByte = bomSize;
428	return 1; /* Yes. */
429	}
430
431	/*
432	** Return true if two DLine elements are identical.
433	*/
	@@ -2450,5 +2466,38 @@
2466	for(i=0; i<ann.nOrig; i++){
2467	fossil_print("%s: %.*s\n",
2468	ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z);
2469	}
2470	}
2471
2472	/*
2473	** COMMAND: test-looks-like-utf
2474	**
2475	** Usage: %fossil test-looks-like-utf FILENAME
2476	**
2477	** FILENAME is the name of a file to check for textual content in the UTF-8
2478	** and/or UTF-16 encodings.
2479	*/
2480	void looks_like_utf_test_cmd(void){
2481	Blob blob; /* the contents of the specified file */
2482	int eType; /* return value of looks_like_utf8/utf16() */
2483	int fUtf8; /* return value of starts_with_utf8_bom() */
2484	int fUtf16; /* return value of starts_with_utf16_bom() */
2485	int lookFlags; /* output flags from looks_like_utf8/utf16() */
2486	if( g.argc<3 ) usage("FILENAME");
2487	blob_read_from_file(&blob, g.argv[2]);
2488	fUtf8 = starts_with_utf8_bom(&blob, 0);
2489	fUtf16 = starts_with_utf16_bom(&blob, 0, 0);
2490	eType = fUtf16 ? looks_like_utf16(&blob, &lookFlags) :
2491	looks_like_utf8(&blob, &lookFlags);
2492	fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2493	fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2494	fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no");
2495	fossil_print("Looks like UTF-%s: %s\n",fUtf16?"16":"8",eType?"yes":"no");
2496	fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no");
2497	fossil_print("Has flag LOOK_CR: %s\n",(lookFlags&LOOK_CR)?"yes":"no");
2498	fossil_print("Has flag LOOK_LF: %s\n",(lookFlags&LOOK_LF)?"yes":"no");
2499	fossil_print("Has flag LOOK_CRLF: %s\n",(lookFlags&LOOK_CRLF)?"yes":"no");
2500	fossil_print("Has flag LOOK_LENGTH: %s\n",(lookFlags&LOOK_LENGTH)?"yes":"no");
2501	fossil_print("Has flag LOOK_ODD: %s\n",(lookFlags&LOOK_ODD)?"yes":"no");
2502	blob_reset(&blob);
2503	}
2504

M src/glob.c

+40 -24

		--- src/glob.c
		+++ src/glob.c
		@@ -29,16 +29,17 @@
29	29	** zVal: "x"
30	30	** zGlobList: ".o,.obj"
31	31	**
32	32	** Result: "(x GLOB '.o' OR x GLOB '.obj')"
33	33	**
34		-** Each element of the GLOB list may optionally be enclosed in either '...'
35		-** or "...". This allows commas in the expression. Whitespace at the
36		-** beginning and end of each GLOB pattern is ignored, except when enclosed
37		-** within '...' or "...".
	34	+** Commas and whitespace are considered to be element delimters. Each
	35	+** element of the GLOB list may optionally be enclosed in either '...' or
	36	+** "...". This allows commas and/or whitespace to be used in the elements
	37	+** themselves.
38	38	**
39		-** This routine makes no effort to free the memory space it uses.
	39	+** This routine makes no effort to free the memory space it uses, which
	40	+** currently consists of a blob object and its contents.
40	41	*/
41	42	char glob_expr(const char zVal, const char *zGlobList){
42	43	Blob expr;
43	44	char *zSep = "(";
44	45	int nTerm = 0;
		@@ -46,21 +47,24 @@
46	47	int cTerm;
47	48
48	49	if( zGlobList==0 \|\| zGlobList[0]==0 ) return "0";
49	50	blob_zero(&expr);
50	51	while( zGlobList[0] ){
51		- while( fossil_isspace(zGlobList[0]) \|\| zGlobList[0]==',' ) zGlobList++;
	52	+ while( fossil_isspace(zGlobList[0]) \|\| zGlobList[0]==',' ){
	53	+ zGlobList++; /* Skip leading commas, spaces, and newlines */
	54	+ }
52	55	if( zGlobList[0]==0 ) break;
53	56	if( zGlobList[0]=='\'' \|\| zGlobList[0]=='"' ){
54	57	cTerm = zGlobList[0];
55	58	zGlobList++;
56	59	}else{
57	60	cTerm = ',';
58	61	}
59		- for(i=0; zGlobList[i] && zGlobList[i]!=cTerm && zGlobList[i]!='\n'; i++){}
60		- if( cTerm==',' ){
61		- while( i>0 && fossil_isspace(zGlobList[i-1]) ){ i--; }
	62	+ /* Find the next delimter (or the end of the string). */
	63	+ for(i=0; zGlobList[i] && zGlobList[i]!=cTerm; i++){
	64	+ if( cTerm!=',' ) continue; /* If quoted, keep going. */
	65	+ if( fossil_isspace(zGlobList[i]) ) break; /* If space, stop. */
62	66	}
63	67	blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
64	68	zSep = " OR ";
65	69	if( cTerm!=',' && zGlobList[i] ) i++;
66	70	zGlobList += i;
		@@ -85,24 +89,24 @@
85	89	char *azPattern; / Array of pointers to patterns */
86	90	};
87	91	#endif /* INTERFACE */
88	92
89	93	/*
90		-** zPatternList is a comma-separate list of glob patterns. Parse up
	94	+** zPatternList is a comma-separated list of glob patterns. Parse up
91	95	** that list and use it to create a new Glob object.
92	96	**
93	97	** Elements of the glob list may be optionally enclosed in single our
94		-** double-quotes. This allows a comma to be part of a glob.
	98	+** double-quotes. This allows a comma to be part of a glob pattern.
95	99	**
96	100	** Leading and trailing spaces on unquoted glob patterns are ignored.
97	101	**
98	102	** An empty or null pattern list results in a null glob, which will
99	103	** match nothing.
100	104	*/
101	105	Glob glob_create(const char zPatternList){
102	106	int nList; /* Size of zPatternList in bytes */
103		- int i, j; /* Loop counters */
	107	+ int i; /* Loop counters */
104	108	Glob p; / The glob being created */
105	109	char z; / Copy of the pattern list */
106	110	char delimiter; /* '\'' or '\"' or 0 */
107	111
108	112	if( zPatternList==0 \|\| zPatternList[0]==0 ) return 0;
		@@ -110,27 +114,26 @@
110	114	p = fossil_malloc( sizeof(*p) + nList+1 );
111	115	memset(p, 0, sizeof(*p));
112	116	z = (char*)&p[1];
113	117	memcpy(z, zPatternList, nList+1);
114	118	while( z[0] ){
115		- while( z[0]==',' \|\| z[0]==' ' \|\| z[0]=='\n' \|\| z[0]=='\r' ){
116		- z++; /* Skip leading spaces and newlines */
	119	+ while( fossil_isspace(z[0]) \|\| z[0]==',' ){
	120	+ z++; /* Skip leading commas, spaces, and newlines */
117	121	}
	122	+ if( z[0]==0 ) break;
118	123	if( z[0]=='\'' \|\| z[0]=='"' ){
119	124	delimiter = z[0];
120	125	z++;
121	126	}else{
122	127	delimiter = ',';
123	128	}
124		- if( z[0]==0 ) break;
125	129	p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)sizeof(char) );
126	130	p->azPattern[p->nPattern++] = z;
127		- for(i=0; z[i] && z[i]!=delimiter && z[i]!='\n' && z[i]!='\r'; i++){}
128		- if( delimiter==',' ){
129		- /* Remove trailing spaces / newlines on a comma-delimited pattern */
130		- for(j=i; j>1 && (z[j-1]==' ' \|\| z[j-1]=='\n' \|\| z[j-1]=='\r'); j--){}
131		- if( j<i ) z[j] = 0;
	131	+ /* Find the next delimter (or the end of the string). */
	132	+ for(i=0; z[i] && z[i]!=delimiter; i++){
	133	+ if( delimiter!=',' ) continue; /* If quoted, keep going. */
	134	+ if( fossil_isspace(z[i]) ) break; /* If space, stop. */
132	135	}
133	136	if( z[i]==0 ) break;
134	137	z[i] = 0;
135	138	z += i+1;
136	139	}
		@@ -245,22 +248,35 @@
245	248	/*
246	249	** COMMAND: test-glob
247	250	**
248	251	** Usage: %fossil test-glob PATTERN STRING...
249	252	**
250		-** PATTERN is a comma-separated list of glob patterns. Show which of
251		-** the STRINGs that follow match the PATTERN.
	253	+** PATTERN is a comma- and whitespace-separated list of optionally
	254	+** quoted glob patterns. Show which of the STRINGs that follow match
	255	+** the PATTERN.
	256	+**
	257	+** If PATTERN begins with "@" the the rest of the pattern is understood
	258	+** to be a setting name (such as binary-glob, crln-glob, or encoding-glob)
	259	+** and the value of that setting is used as the actually glob pattern.
252	260	*/
253	261	void glob_test_cmd(void){
254	262	Glob *pGlob;
255	263	int i;
	264	+ char *zPattern;
256	265	if( g.argc<4 ) usage("PATTERN STRING ...");
257		- fossil_print("SQL expression: %s\n", glob_expr("x", g.argv[2]));
258		- pGlob = glob_create(g.argv[2]);
	266	+ zPattern = g.argv[2];
	267	+ if( zPattern[0]=='@' ){
	268	+ db_find_and_open_repository(OPEN_ANY_SCHEMA,0);
	269	+ zPattern = db_get(zPattern+1, 0);
	270	+ if( zPattern==0 ) fossil_fatal("no such setting: %s", g.argv[2]+1);
	271	+ fossil_print("GLOB pattern: %s\n", zPattern);
	272	+ }
	273	+ fossil_print("SQL expression: %s\n", glob_expr("x", zPattern));
	274	+ pGlob = glob_create(zPattern);
259	275	for(i=0; i<pGlob->nPattern; i++){
260	276	fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
261	277	}
262	278	for(i=3; i<g.argc; i++){
263	279	fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
264	280	}
265	281	glob_free(pGlob);
266	282	}
267	283
268	284	ADDED test/glob.test

	--- src/glob.c
	+++ src/glob.c
	@@ -29,16 +29,17 @@
29	** zVal: "x"
30	** zGlobList: ".o,.obj"
31	**
32	** Result: "(x GLOB '.o' OR x GLOB '.obj')"
33	**
34	** Each element of the GLOB list may optionally be enclosed in either '...'
35	** or "...". This allows commas in the expression. Whitespace at the
36	** beginning and end of each GLOB pattern is ignored, except when enclosed
37	** within '...' or "...".
38	**
39	** This routine makes no effort to free the memory space it uses.

40	*/
41	char glob_expr(const char zVal, const char *zGlobList){
42	Blob expr;
43	char *zSep = "(";
44	int nTerm = 0;
	@@ -46,21 +47,24 @@
46	int cTerm;
47
48	if( zGlobList==0 \|\| zGlobList[0]==0 ) return "0";
49	blob_zero(&expr);
50	while( zGlobList[0] ){
51	while( fossil_isspace(zGlobList[0]) \|\| zGlobList[0]==',' ) zGlobList++;


52	if( zGlobList[0]==0 ) break;
53	if( zGlobList[0]=='\'' \|\| zGlobList[0]=='"' ){
54	cTerm = zGlobList[0];
55	zGlobList++;
56	}else{
57	cTerm = ',';
58	}
59	for(i=0; zGlobList[i] && zGlobList[i]!=cTerm && zGlobList[i]!='\n'; i++){}
60	if( cTerm==',' ){
61	while( i>0 && fossil_isspace(zGlobList[i-1]) ){ i--; }

62	}
63	blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
64	zSep = " OR ";
65	if( cTerm!=',' && zGlobList[i] ) i++;
66	zGlobList += i;
	@@ -85,24 +89,24 @@
85	char *azPattern; / Array of pointers to patterns */
86	};
87	#endif /* INTERFACE */
88
89	/*
90	** zPatternList is a comma-separate list of glob patterns. Parse up
91	** that list and use it to create a new Glob object.
92	**
93	** Elements of the glob list may be optionally enclosed in single our
94	** double-quotes. This allows a comma to be part of a glob.
95	**
96	** Leading and trailing spaces on unquoted glob patterns are ignored.
97	**
98	** An empty or null pattern list results in a null glob, which will
99	** match nothing.
100	*/
101	Glob glob_create(const char zPatternList){
102	int nList; /* Size of zPatternList in bytes */
103	int i, j; /* Loop counters */
104	Glob p; / The glob being created */
105	char z; / Copy of the pattern list */
106	char delimiter; /* '\'' or '\"' or 0 */
107
108	if( zPatternList==0 \|\| zPatternList[0]==0 ) return 0;
	@@ -110,27 +114,26 @@
110	p = fossil_malloc( sizeof(*p) + nList+1 );
111	memset(p, 0, sizeof(*p));
112	z = (char*)&p[1];
113	memcpy(z, zPatternList, nList+1);
114	while( z[0] ){
115	while( z[0]==',' \|\| z[0]==' ' \|\| z[0]=='\n' \|\| z[0]=='\r' ){
116	z++; /* Skip leading spaces and newlines */
117	}

118	if( z[0]=='\'' \|\| z[0]=='"' ){
119	delimiter = z[0];
120	z++;
121	}else{
122	delimiter = ',';
123	}
124	if( z[0]==0 ) break;
125	p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)sizeof(char) );
126	p->azPattern[p->nPattern++] = z;
127	for(i=0; z[i] && z[i]!=delimiter && z[i]!='\n' && z[i]!='\r'; i++){}
128	if( delimiter==',' ){
129	/* Remove trailing spaces / newlines on a comma-delimited pattern */
130	for(j=i; j>1 && (z[j-1]==' ' \|\| z[j-1]=='\n' \|\| z[j-1]=='\r'); j--){}
131	if( j<i ) z[j] = 0;
132	}
133	if( z[i]==0 ) break;
134	z[i] = 0;
135	z += i+1;
136	}
	@@ -245,22 +248,35 @@
245	/*
246	** COMMAND: test-glob
247	**
248	** Usage: %fossil test-glob PATTERN STRING...
249	**
250	** PATTERN is a comma-separated list of glob patterns. Show which of
251	** the STRINGs that follow match the PATTERN.





252	*/
253	void glob_test_cmd(void){
254	Glob *pGlob;
255	int i;

256	if( g.argc<4 ) usage("PATTERN STRING ...");
257	fossil_print("SQL expression: %s\n", glob_expr("x", g.argv[2]));
258	pGlob = glob_create(g.argv[2]);







259	for(i=0; i<pGlob->nPattern; i++){
260	fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
261	}
262	for(i=3; i<g.argc; i++){
263	fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
264	}
265	glob_free(pGlob);
266	}
267
268	DDED test/glob.test

	--- src/glob.c
	+++ src/glob.c
	@@ -29,16 +29,17 @@
29	** zVal: "x"
30	** zGlobList: ".o,.obj"
31	**
32	** Result: "(x GLOB '.o' OR x GLOB '.obj')"
33	**
34	** Commas and whitespace are considered to be element delimters. Each
35	** element of the GLOB list may optionally be enclosed in either '...' or
36	** "...". This allows commas and/or whitespace to be used in the elements
37	** themselves.
38	**
39	** This routine makes no effort to free the memory space it uses, which
40	** currently consists of a blob object and its contents.
41	*/
42	char glob_expr(const char zVal, const char *zGlobList){
43	Blob expr;
44	char *zSep = "(";
45	int nTerm = 0;
	@@ -46,21 +47,24 @@
47	int cTerm;
48
49	if( zGlobList==0 \|\| zGlobList[0]==0 ) return "0";
50	blob_zero(&expr);
51	while( zGlobList[0] ){
52	while( fossil_isspace(zGlobList[0]) \|\| zGlobList[0]==',' ){
53	zGlobList++; /* Skip leading commas, spaces, and newlines */
54	}
55	if( zGlobList[0]==0 ) break;
56	if( zGlobList[0]=='\'' \|\| zGlobList[0]=='"' ){
57	cTerm = zGlobList[0];
58	zGlobList++;
59	}else{
60	cTerm = ',';
61	}
62	/* Find the next delimter (or the end of the string). */
63	for(i=0; zGlobList[i] && zGlobList[i]!=cTerm; i++){
64	if( cTerm!=',' ) continue; /* If quoted, keep going. */
65	if( fossil_isspace(zGlobList[i]) ) break; /* If space, stop. */
66	}
67	blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
68	zSep = " OR ";
69	if( cTerm!=',' && zGlobList[i] ) i++;
70	zGlobList += i;
	@@ -85,24 +89,24 @@
89	char *azPattern; / Array of pointers to patterns */
90	};
91	#endif /* INTERFACE */
92
93	/*
94	** zPatternList is a comma-separated list of glob patterns. Parse up
95	** that list and use it to create a new Glob object.
96	**
97	** Elements of the glob list may be optionally enclosed in single our
98	** double-quotes. This allows a comma to be part of a glob pattern.
99	**
100	** Leading and trailing spaces on unquoted glob patterns are ignored.
101	**
102	** An empty or null pattern list results in a null glob, which will
103	** match nothing.
104	*/
105	Glob glob_create(const char zPatternList){
106	int nList; /* Size of zPatternList in bytes */
107	int i; /* Loop counters */
108	Glob p; / The glob being created */
109	char z; / Copy of the pattern list */
110	char delimiter; /* '\'' or '\"' or 0 */
111
112	if( zPatternList==0 \|\| zPatternList[0]==0 ) return 0;
	@@ -110,27 +114,26 @@
114	p = fossil_malloc( sizeof(*p) + nList+1 );
115	memset(p, 0, sizeof(*p));
116	z = (char*)&p[1];
117	memcpy(z, zPatternList, nList+1);
118	while( z[0] ){
119	while( fossil_isspace(z[0]) \|\| z[0]==',' ){
120	z++; /* Skip leading commas, spaces, and newlines */
121	}
122	if( z[0]==0 ) break;
123	if( z[0]=='\'' \|\| z[0]=='"' ){
124	delimiter = z[0];
125	z++;
126	}else{
127	delimiter = ',';
128	}

129	p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)sizeof(char) );
130	p->azPattern[p->nPattern++] = z;
131	/* Find the next delimter (or the end of the string). */
132	for(i=0; z[i] && z[i]!=delimiter; i++){
133	if( delimiter!=',' ) continue; /* If quoted, keep going. */
134	if( fossil_isspace(z[i]) ) break; /* If space, stop. */

135	}
136	if( z[i]==0 ) break;
137	z[i] = 0;
138	z += i+1;
139	}
	@@ -245,22 +248,35 @@
248	/*
249	** COMMAND: test-glob
250	**
251	** Usage: %fossil test-glob PATTERN STRING...
252	**
253	** PATTERN is a comma- and whitespace-separated list of optionally
254	** quoted glob patterns. Show which of the STRINGs that follow match
255	** the PATTERN.
256	**
257	** If PATTERN begins with "@" the the rest of the pattern is understood
258	** to be a setting name (such as binary-glob, crln-glob, or encoding-glob)
259	** and the value of that setting is used as the actually glob pattern.
260	*/
261	void glob_test_cmd(void){
262	Glob *pGlob;
263	int i;
264	char *zPattern;
265	if( g.argc<4 ) usage("PATTERN STRING ...");
266	zPattern = g.argv[2];
267	if( zPattern[0]=='@' ){
268	db_find_and_open_repository(OPEN_ANY_SCHEMA,0);
269	zPattern = db_get(zPattern+1, 0);
270	if( zPattern==0 ) fossil_fatal("no such setting: %s", g.argv[2]+1);
271	fossil_print("GLOB pattern: %s\n", zPattern);
272	}
273	fossil_print("SQL expression: %s\n", glob_expr("x", zPattern));
274	pGlob = glob_create(zPattern);
275	for(i=0; i<pGlob->nPattern; i++){
276	fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
277	}
278	for(i=3; i<g.argc; i++){
279	fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
280	}
281	glob_free(pGlob);
282	}
283
284	DDED test/glob.test

M test/glob.test

+118

		--- a/test/glob.test
		+++ b/test/glob.test
		@@ -0,0 +1,118 @@
	1	+#
	2	+# Copyright (c) 2013 D. Richard Hipp
	3	+#
	4	+# This program is free software; you can redistribute it and/or
	5	+# modify it under the terms of the Simplified BSD License (also
	6	+# known as the "2-Clause License" or "FreeBSD License".)
	7	+#
	8	+# This program is distributed in the hope that it will be useful,
	9	+# but without any warranty; without even the implied warranty of
	10	+# merchantability or fitness for a particular purpose.
	11	+#
	12	+# Author contact information:
	13	+# [email protected]
	14	+# http://www.hwaci.com/drh/
	15	+#
	16	+############################################################################
	17	+#
	18	+# Test glob pattern parsing
	19	+#
	20	+
	21	+ parsing
	22	+#
	23	+
	24	+test_setup ""
	25	+
	26	+proc glob-parse {testname args} {
	27	+ set i 1
	28	+ foreach {pattern string result} $args {
	29	+ fossil test-glob $pattern $string
	30	+ test glob-parse-$testname.$i {$::RESULT eq $result}
	31	+ incr i
	32	+ }
	33	+}
	34	+
	35	+glob-parse 100 test test [string map [list \r\n \n] \
	36	+{SQL expression: (x GLOB 'test')
	37	+pattern[0] = [test]
	38	+1 t*')
	39	+pattern[0] = [t*]
	40	+1 1 test}]
	41	+
	42	+glob-parse 101 "one two" one [string map [list \r\n \n] \
	43	+{SQL expression: (x GLOB 'one' OR x GLOB 'two')
	44	+pattern[0] = 0 two one}]
	45	+
	46	+glob-p02 t* test-parse 108 "\"o\rtwo\" \"thrt#
	47	+# Copyright (c)#
	48	+# 1 test}]
	49	+
	50	+glob-parse 103 "o*test}]
	51	+
	52	+glob-parse 101 "one two" one [string map [list \r\n \n] \
	53	+*' OR x GLOB 'two')
	54	+pattern[0] =0 two one}]
	55	+
	56	+glob-p04 {"o* two" "three four"} "one two" [string map [list \r\n \n] \
	57	+{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
	58	+pattern[0] = [o* two]
	59	+pone two}]
	60	+
	61	+glob-parse 105 {"o* two" "three four"} "5 {"o* two" "three four"} "two one" [string map [list \r\n \n] \
	62	+{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
	63	+pattern[0] = [o* two]
	64	+p]
	65	+0 0 two one}]
	66	+
	67	+glob-p0 0 two one}]
	68	+
	69	+glob-parse 106 "\"o*\ntwo\" \"three\nfour\"" "one\ntwo" \
	70	+[string map [list \r\n \n] \
	71	+{SQL expression: (x GLOB 'o*
	72	+two' OR x GLOB 'three
	73	+fou] = [one]
	74	+pattern[1] = 1 1 one
	75	+two}]
	76	+
	77	+glob-parse 107 "\"o*\ntwo\" \"three\nfour\"" "two\none" \
	78	+[string map [list \r\n \n] \
	79	+{SQL expression: (x GLOB 'o*
	80	+two' OR x GLOB 'three
	81	+foutwo
	82	+0 two one}]
	83	+
	84	+glob-p08 "\"o*\rtwo\" \"three\rfour\"" "one\rtwo" \
	85	+[string map [list \r\n \n] \
	86	+{SQL expression: (x GLOB 'o*
	87	+two' OR x GLOB 'three
	88	+four')
	89	+pattern[0] = [o*
	90	+two]
	91	+p] = [one]
	92	+pattern[1] =
	93	+ foreach {pattern string result} $args {
	94	+ fossil test-glob $pattern $string
	95	+ test glob-parse-$testname.$i {$::RESULT eq $result}
	96	+ incr i
	97	+ }
	98	+}
	99	+
	100	+gtwo
	101	+0 two one}]
	102	+
	103	+glob-patring map [list \r\n \n] \
	104	+{SQL expression: (x GLOB 'test')
	105	+pattern[0] = [test]
	106	+1 1 test}]
	107	+
	108	+glob-parse 101 "one two" one [string map [list \r\n \n] \
	109	+{] = [one]
	110	+pattern[1] = [two]
	111	+1 1 one}]
	112	+
	113	+glob-parse 102 t* test [string map [list \r\n \n] \
	114	+{SQL expression: (x GLOB 't*')
	115	+pattern[0] = [t*]
	116	+1 1 test}]
	117	+
	118	+glob-parse 103 "o* two" one [st

	--- a/test/glob.test
	+++ b/test/glob.test
	@@ -0,0 +1,118 @@

	--- a/test/glob.test
	+++ b/test/glob.test
	@@ -0,0 +1,118 @@
1	#
2	# Copyright (c) 2013 D. Richard Hipp
3	#
4	# This program is free software; you can redistribute it and/or
5	# modify it under the terms of the Simplified BSD License (also
6	# known as the "2-Clause License" or "FreeBSD License".)
7	#
8	# This program is distributed in the hope that it will be useful,
9	# but without any warranty; without even the implied warranty of
10	# merchantability or fitness for a particular purpose.
11	#
12	# Author contact information:
13	# [email protected]
14	# http://www.hwaci.com/drh/
15	#
16	############################################################################
17	#
18	# Test glob pattern parsing
19	#
20
21	parsing
22	#
23
24	test_setup ""
25
26	proc glob-parse {testname args} {
27	set i 1
28	foreach {pattern string result} $args {
29	fossil test-glob $pattern $string
30	test glob-parse-$testname.$i {$::RESULT eq $result}
31	incr i
32	}
33	}
34
35	glob-parse 100 test test [string map [list \r\n \n] \
36	{SQL expression: (x GLOB 'test')
37	pattern[0] = [test]
38	1 t*')
39	pattern[0] = [t*]
40	1 1 test}]
41
42	glob-parse 101 "one two" one [string map [list \r\n \n] \
43	{SQL expression: (x GLOB 'one' OR x GLOB 'two')
44	pattern[0] = 0 two one}]
45
46	glob-p02 t* test-parse 108 "\"o\rtwo\" \"thrt#
47	# Copyright (c)#
48	# 1 test}]
49
50	glob-parse 103 "o*test}]
51
52	glob-parse 101 "one two" one [string map [list \r\n \n] \
53	*' OR x GLOB 'two')
54	pattern[0] =0 two one}]
55
56	glob-p04 {"o* two" "three four"} "one two" [string map [list \r\n \n] \
57	{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
58	pattern[0] = [o* two]
59	pone two}]
60
61	glob-parse 105 {"o* two" "three four"} "5 {"o* two" "three four"} "two one" [string map [list \r\n \n] \
62	{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
63	pattern[0] = [o* two]
64	p]
65	0 0 two one}]
66
67	glob-p0 0 two one}]
68
69	glob-parse 106 "\"o*\ntwo\" \"three\nfour\"" "one\ntwo" \
70	[string map [list \r\n \n] \
71	{SQL expression: (x GLOB 'o*
72	two' OR x GLOB 'three
73	fou] = [one]
74	pattern[1] = 1 1 one
75	two}]
76
77	glob-parse 107 "\"o*\ntwo\" \"three\nfour\"" "two\none" \
78	[string map [list \r\n \n] \
79	{SQL expression: (x GLOB 'o*
80	two' OR x GLOB 'three
81	foutwo
82	0 two one}]
83
84	glob-p08 "\"o*\rtwo\" \"three\rfour\"" "one\rtwo" \
85	[string map [list \r\n \n] \
86	{SQL expression: (x GLOB 'o*
87	two' OR x GLOB 'three
88	four')
89	pattern[0] = [o*
90	two]
91	p] = [one]
92	pattern[1] =
93	foreach {pattern string result} $args {
94	fossil test-glob $pattern $string
95	test glob-parse-$testname.$i {$::RESULT eq $result}
96	incr i
97	}
98	}
99
100	gtwo
101	0 two one}]
102
103	glob-patring map [list \r\n \n] \
104	{SQL expression: (x GLOB 'test')
105	pattern[0] = [test]
106	1 1 test}]
107
108	glob-parse 101 "one two" one [string map [list \r\n \n] \
109	{] = [one]
110	pattern[1] = [two]
111	1 1 one}]
112
113	glob-parse 102 t* test [string map [list \r\n \n] \
114	{SQL expression: (x GLOB 't*')
115	pattern[0] = [t*]
116	1 1 test}]
117
118	glob-parse 103 "o* two" one [st

Fossil SCM

Keyboard Shortcuts