Fossil SCM

merge trunk

jan.nijtmans 2013-03-07 11:00 ticket-2cfd96b2ba merge
Commit f96894a54adea250a8c5a46853c27b04ab1989ec
4 files changed +12 -12 +74 -25 +40 -24 +118
+12 -12
--- src/checkin.c
+++ src/checkin.c
@@ -925,11 +925,22 @@
925925
const char *zDisable;
926926
const char *zConvert = "c=convert/";
927927
Blob ans;
928928
char cReply;
929929
930
- if( fHasCrLf && fUnicode ){
930
+ if( eType==0 ){
931
+ if( binOk ){
932
+ return 0; /* We don't want binary warnings for this file. */
933
+ }
934
+ if( fHasLength ){
935
+ zWarning = "long lines";
936
+ }else{
937
+ zWarning = "binary data";
938
+ }
939
+ zDisable = "\"binary-glob\" setting";
940
+ zConvert = ""; /* We cannot convert binary files. */
941
+ }else if( fHasCrLf && fUnicode ){
931942
if ( crnlOk && encodingOk ){
932943
return 0; /* We don't want CR/NL and Unicode warnings for this file. */
933944
}
934945
zWarning = "CR/NL line endings and Unicode";
935946
zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
@@ -937,21 +948,10 @@
937948
if( crnlOk ){
938949
return 0; /* We don't want CR/NL warnings for this file. */
939950
}
940951
zWarning = "CR/NL line endings";
941952
zDisable = "\"crnl-glob\" setting";
942
- }else if( eType==0 ){
943
- if( binOk ){
944
- return 0; /* We don't want binary warnings for this file. */
945
- }
946
- if( fHasLength ){
947
- zWarning = "long lines";
948
- }else{
949
- zWarning = "binary data";
950
- }
951
- zDisable = "\"binary-glob\" setting";
952
- zConvert = ""; /* We cannot convert binary files. */
953953
}else{
954954
if ( encodingOk ){
955955
return 0; /* We don't want encoding warnings for this file. */
956956
}
957957
zWarning = "Unicode";
958958
--- src/checkin.c
+++ src/checkin.c
@@ -925,11 +925,22 @@
925 const char *zDisable;
926 const char *zConvert = "c=convert/";
927 Blob ans;
928 char cReply;
929
930 if( fHasCrLf && fUnicode ){
 
 
 
 
 
 
 
 
 
 
 
931 if ( crnlOk && encodingOk ){
932 return 0; /* We don't want CR/NL and Unicode warnings for this file. */
933 }
934 zWarning = "CR/NL line endings and Unicode";
935 zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
@@ -937,21 +948,10 @@
937 if( crnlOk ){
938 return 0; /* We don't want CR/NL warnings for this file. */
939 }
940 zWarning = "CR/NL line endings";
941 zDisable = "\"crnl-glob\" setting";
942 }else if( eType==0 ){
943 if( binOk ){
944 return 0; /* We don't want binary warnings for this file. */
945 }
946 if( fHasLength ){
947 zWarning = "long lines";
948 }else{
949 zWarning = "binary data";
950 }
951 zDisable = "\"binary-glob\" setting";
952 zConvert = ""; /* We cannot convert binary files. */
953 }else{
954 if ( encodingOk ){
955 return 0; /* We don't want encoding warnings for this file. */
956 }
957 zWarning = "Unicode";
958
--- src/checkin.c
+++ src/checkin.c
@@ -925,11 +925,22 @@
925 const char *zDisable;
926 const char *zConvert = "c=convert/";
927 Blob ans;
928 char cReply;
929
930 if( eType==0 ){
931 if( binOk ){
932 return 0; /* We don't want binary warnings for this file. */
933 }
934 if( fHasLength ){
935 zWarning = "long lines";
936 }else{
937 zWarning = "binary data";
938 }
939 zDisable = "\"binary-glob\" setting";
940 zConvert = ""; /* We cannot convert binary files. */
941 }else if( fHasCrLf && fUnicode ){
942 if ( crnlOk && encodingOk ){
943 return 0; /* We don't want CR/NL and Unicode warnings for this file. */
944 }
945 zWarning = "CR/NL line endings and Unicode";
946 zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
@@ -937,21 +948,10 @@
948 if( crnlOk ){
949 return 0; /* We don't want CR/NL warnings for this file. */
950 }
951 zWarning = "CR/NL line endings";
952 zDisable = "\"crnl-glob\" setting";
 
 
 
 
 
 
 
 
 
 
 
953 }else{
954 if ( encodingOk ){
955 return 0; /* We don't want encoding warnings for this file. */
956 }
957 zWarning = "Unicode";
958
+74 -25
--- src/diff.c
+++ src/diff.c
@@ -69,13 +69,15 @@
6969
** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
7070
** to convey status information about the blob content.
7171
*/
7272
#define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
7373
#define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74
-#define LOOK_LF ((int)0x00000002) /* One or more LF chars were found. */
75
-#define LOOK_CRLF ((int)0x00000004) /* One or more CR/LF pairs were found. */
76
-#define LOOK_LENGTH ((int)0x00000008) /* An over length line was found. */
74
+#define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */
75
+#define LOOK_LF ((int)0x00000004) /* One or more LF chars were found. */
76
+#define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */
77
+#define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */
78
+#define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */
7779
#endif /* INTERFACE */
7880
7981
/*
8082
** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
8183
*/
@@ -217,31 +219,34 @@
217219
** validate any code points. It makes no attempt to detect if any [invalid]
218220
** switches between UTF-8 and other encodings occur.
219221
**
220222
** The only code points that this function cares about are the NUL character,
221223
** carriage-return, and line-feed.
224
+**
225
+** Whether or not this function examines the entire contents of the blob is
226
+** officially unspecified.
222227
**
223228
************************************ WARNING **********************************
224229
*/
225230
int looks_like_utf8(const Blob *pContent, int *pFlags){
226231
const char *z = blob_buffer(pContent);
227232
unsigned int n = blob_size(pContent);
228
- int j, c;
233
+ int j, c, result = 1; /* Assume UTF-8 text, prove otherwise */
229234
230235
if( pFlags ) *pFlags = LOOK_NONE;
231
- if( n==0 ) return 1; /* Empty file -> text */
236
+ if( n==0 ) return result; /* Empty file -> text */
232237
c = *z;
233238
if( c==0 ){
234239
if( pFlags ) *pFlags |= LOOK_NUL;
235
- return 0; /* NUL character in a file -> binary */
240
+ result = 0; /* NUL character in a file -> binary */
236241
}
237242
j = (c!='\n');
238243
while( --n>0 ){
239244
c = *++z; ++j;
240245
if( c==0 ){
241246
if( pFlags ) *pFlags |= LOOK_NUL;
242
- return 0; /* NUL character in a file -> binary */
247
+ result = 0; /* NUL character in a file -> binary */
243248
}
244249
if( c=='\n' ){
245250
int c2 = z[-1];
246251
if( pFlags ){
247252
*pFlags |= LOOK_LF;
@@ -249,20 +254,22 @@
249254
*pFlags |= LOOK_CRLF;
250255
}
251256
}
252257
if( j>LENGTH_MASK ){
253258
if( pFlags ) *pFlags |= LOOK_LENGTH;
254
- return 0; /* Very long line -> binary */
259
+ result = 0; /* Very long line -> binary */
255260
}
256261
j = 0;
262
+ }else if( c=='\r' ){
263
+ if( pFlags ) *pFlags |= LOOK_CR;
257264
}
258265
}
259266
if( j>LENGTH_MASK ){
260267
if( pFlags ) *pFlags |= LOOK_LENGTH;
261
- return 0; /* Very long line -> binary */
268
+ result = 0; /* Very long line -> binary */
262269
}
263
- return 1; /* No problems seen -> not binary */
270
+ return result; /* No problems seen -> not binary */
264271
}
265272
266273
/*
267274
** Define the type needed to represent a Unicode (UTF-16) character.
268275
*/
@@ -311,32 +318,38 @@
311318
** validate any code points. It makes no attempt to detect if any [invalid]
312319
** switches between the UTF-16be and UTF-16le encodings occur.
313320
**
314321
** The only code points that this function cares about are the NUL character,
315322
** carriage-return, and line-feed.
323
+**
324
+** Whether or not this function examines the entire contents of the blob is
325
+** officially unspecified.
316326
**
317327
************************************ WARNING **********************************
318328
*/
319329
int looks_like_utf16(const Blob *pContent, int *pFlags){
320330
const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
321331
unsigned int n = blob_size(pContent);
322
- int j, c;
332
+ int j, c, result = 1; /* Assume UTF-16 text, prove otherwise */
323333
324334
if( pFlags ) *pFlags = LOOK_NONE;
325
- if( n==0 ) return 1; /* Empty file -> text */
326
- if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
335
+ if( n==0 ) return result; /* Empty file -> text */
336
+ if( n%2 ){
337
+ if( pFlags ) *pFlags |= LOOK_ODD;
338
+ return 0; /* Odd number of bytes -> binary (or UTF-8) */
339
+ }
327340
c = *z;
328341
if( c==0 ){
329342
if( pFlags ) *pFlags |= LOOK_NUL;
330
- return 0; /* NUL character in a file -> binary */
343
+ result = 0; /* NUL character in a file -> binary */
331344
}
332345
j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
333346
while( (n-=2)>0 ){
334347
c = *++z; ++j;
335348
if( c==0 ){
336349
if( pFlags ) *pFlags |= LOOK_NUL;
337
- return 0; /* NUL character in a file -> binary */
350
+ result = 0; /* NUL character in a file -> binary */
338351
}
339352
if( c==UTF16BE_LF || c==UTF16LE_LF ){
340353
int c2 = z[-1];
341354
if( pFlags ){
342355
*pFlags |= LOOK_LF;
@@ -344,20 +357,22 @@
344357
*pFlags |= LOOK_CRLF;
345358
}
346359
}
347360
if( j>UTF16_LENGTH_MASK ){
348361
if( pFlags ) *pFlags |= LOOK_LENGTH;
349
- return 0; /* Very long line -> binary */
362
+ result = 0; /* Very long line -> binary */
350363
}
351364
j = 0;
365
+ }else if( c==UTF16BE_CR || c==UTF16LE_CR ){
366
+ if( pFlags ) *pFlags |= LOOK_CR;
352367
}
353368
}
354369
if( j>UTF16_LENGTH_MASK ){
355370
if( pFlags ) *pFlags |= LOOK_LENGTH;
356
- return 0; /* Very long line -> binary */
371
+ result = 0; /* Very long line -> binary */
357372
}
358
- return 1; /* No problems seen -> not binary */
373
+ return result; /* No problems seen -> not binary */
359374
}
360375
361376
/*
362377
** This function returns an array of bytes representing the byte-order-mark
363378
** for UTF-8.
@@ -395,23 +410,24 @@
395410
const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
396411
int *pnByte, /* OUT: The number of bytes used for the BOM. */
397412
int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
398413
){
399414
const unsigned short *z = (unsigned short *)blob_buffer(pContent);
415
+ int bomSize = sizeof(unsigned short);
400416
int size = blob_size(pContent);
401417
402
- if( (size<2) || (size%2)
403
- || (size>=4 && z[1]==0) ) return 0;
404
- if( z[0] == 0xfffe ){
418
+ if( size<bomSize ) return 0; /* No: cannot read BOM. */
419
+ if( size>=(2*bomSize) && z[1]==0 ) return 0; /* No: possible UTF-32. */
420
+ if( z[0]==0xfffe ){
405421
if( pbReverse ) *pbReverse = 1;
406
- }else if( z[0] == 0xfeff ){
422
+ }else if( z[0]==0xfeff ){
407423
if( pbReverse ) *pbReverse = 0;
408424
}else{
409
- return 0;
425
+ return 0; /* No: UTF-16 byte-order-mark not found. */
410426
}
411
- if( pnByte ) *pnByte = 2;
412
- return 1;
427
+ if( pnByte ) *pnByte = bomSize;
428
+ return 1; /* Yes. */
413429
}
414430
415431
/*
416432
** Return true if two DLine elements are identical.
417433
*/
@@ -2450,5 +2466,38 @@
24502466
for(i=0; i<ann.nOrig; i++){
24512467
fossil_print("%s: %.*s\n",
24522468
ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z);
24532469
}
24542470
}
2471
+
2472
+/*
2473
+** COMMAND: test-looks-like-utf
2474
+**
2475
+** Usage: %fossil test-looks-like-utf FILENAME
2476
+**
2477
+** FILENAME is the name of a file to check for textual content in the UTF-8
2478
+** and/or UTF-16 encodings.
2479
+*/
2480
+void looks_like_utf_test_cmd(void){
2481
+ Blob blob; /* the contents of the specified file */
2482
+ int eType; /* return value of looks_like_utf8/utf16() */
2483
+ int fUtf8; /* return value of starts_with_utf8_bom() */
2484
+ int fUtf16; /* return value of starts_with_utf16_bom() */
2485
+ int lookFlags; /* output flags from looks_like_utf8/utf16() */
2486
+ if( g.argc<3 ) usage("FILENAME");
2487
+ blob_read_from_file(&blob, g.argv[2]);
2488
+ fUtf8 = starts_with_utf8_bom(&blob, 0);
2489
+ fUtf16 = starts_with_utf16_bom(&blob, 0, 0);
2490
+ eType = fUtf16 ? looks_like_utf16(&blob, &lookFlags) :
2491
+ looks_like_utf8(&blob, &lookFlags);
2492
+ fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2493
+ fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2494
+ fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no");
2495
+ fossil_print("Looks like UTF-%s: %s\n",fUtf16?"16":"8",eType?"yes":"no");
2496
+ fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no");
2497
+ fossil_print("Has flag LOOK_CR: %s\n",(lookFlags&LOOK_CR)?"yes":"no");
2498
+ fossil_print("Has flag LOOK_LF: %s\n",(lookFlags&LOOK_LF)?"yes":"no");
2499
+ fossil_print("Has flag LOOK_CRLF: %s\n",(lookFlags&LOOK_CRLF)?"yes":"no");
2500
+ fossil_print("Has flag LOOK_LENGTH: %s\n",(lookFlags&LOOK_LENGTH)?"yes":"no");
2501
+ fossil_print("Has flag LOOK_ODD: %s\n",(lookFlags&LOOK_ODD)?"yes":"no");
2502
+ blob_reset(&blob);
2503
+}
24552504
--- src/diff.c
+++ src/diff.c
@@ -69,13 +69,15 @@
69 ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70 ** to convey status information about the blob content.
71 */
72 #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73 #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74 #define LOOK_LF ((int)0x00000002) /* One or more LF chars were found. */
75 #define LOOK_CRLF ((int)0x00000004) /* One or more CR/LF pairs were found. */
76 #define LOOK_LENGTH ((int)0x00000008) /* An over length line was found. */
 
 
77 #endif /* INTERFACE */
78
79 /*
80 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
81 */
@@ -217,31 +219,34 @@
217 ** validate any code points. It makes no attempt to detect if any [invalid]
218 ** switches between UTF-8 and other encodings occur.
219 **
220 ** The only code points that this function cares about are the NUL character,
221 ** carriage-return, and line-feed.
 
 
 
222 **
223 ************************************ WARNING **********************************
224 */
225 int looks_like_utf8(const Blob *pContent, int *pFlags){
226 const char *z = blob_buffer(pContent);
227 unsigned int n = blob_size(pContent);
228 int j, c;
229
230 if( pFlags ) *pFlags = LOOK_NONE;
231 if( n==0 ) return 1; /* Empty file -> text */
232 c = *z;
233 if( c==0 ){
234 if( pFlags ) *pFlags |= LOOK_NUL;
235 return 0; /* NUL character in a file -> binary */
236 }
237 j = (c!='\n');
238 while( --n>0 ){
239 c = *++z; ++j;
240 if( c==0 ){
241 if( pFlags ) *pFlags |= LOOK_NUL;
242 return 0; /* NUL character in a file -> binary */
243 }
244 if( c=='\n' ){
245 int c2 = z[-1];
246 if( pFlags ){
247 *pFlags |= LOOK_LF;
@@ -249,20 +254,22 @@
249 *pFlags |= LOOK_CRLF;
250 }
251 }
252 if( j>LENGTH_MASK ){
253 if( pFlags ) *pFlags |= LOOK_LENGTH;
254 return 0; /* Very long line -> binary */
255 }
256 j = 0;
 
 
257 }
258 }
259 if( j>LENGTH_MASK ){
260 if( pFlags ) *pFlags |= LOOK_LENGTH;
261 return 0; /* Very long line -> binary */
262 }
263 return 1; /* No problems seen -> not binary */
264 }
265
266 /*
267 ** Define the type needed to represent a Unicode (UTF-16) character.
268 */
@@ -311,32 +318,38 @@
311 ** validate any code points. It makes no attempt to detect if any [invalid]
312 ** switches between the UTF-16be and UTF-16le encodings occur.
313 **
314 ** The only code points that this function cares about are the NUL character,
315 ** carriage-return, and line-feed.
 
 
 
316 **
317 ************************************ WARNING **********************************
318 */
319 int looks_like_utf16(const Blob *pContent, int *pFlags){
320 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
321 unsigned int n = blob_size(pContent);
322 int j, c;
323
324 if( pFlags ) *pFlags = LOOK_NONE;
325 if( n==0 ) return 1; /* Empty file -> text */
326 if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
 
 
 
327 c = *z;
328 if( c==0 ){
329 if( pFlags ) *pFlags |= LOOK_NUL;
330 return 0; /* NUL character in a file -> binary */
331 }
332 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
333 while( (n-=2)>0 ){
334 c = *++z; ++j;
335 if( c==0 ){
336 if( pFlags ) *pFlags |= LOOK_NUL;
337 return 0; /* NUL character in a file -> binary */
338 }
339 if( c==UTF16BE_LF || c==UTF16LE_LF ){
340 int c2 = z[-1];
341 if( pFlags ){
342 *pFlags |= LOOK_LF;
@@ -344,20 +357,22 @@
344 *pFlags |= LOOK_CRLF;
345 }
346 }
347 if( j>UTF16_LENGTH_MASK ){
348 if( pFlags ) *pFlags |= LOOK_LENGTH;
349 return 0; /* Very long line -> binary */
350 }
351 j = 0;
 
 
352 }
353 }
354 if( j>UTF16_LENGTH_MASK ){
355 if( pFlags ) *pFlags |= LOOK_LENGTH;
356 return 0; /* Very long line -> binary */
357 }
358 return 1; /* No problems seen -> not binary */
359 }
360
361 /*
362 ** This function returns an array of bytes representing the byte-order-mark
363 ** for UTF-8.
@@ -395,23 +410,24 @@
395 const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
396 int *pnByte, /* OUT: The number of bytes used for the BOM. */
397 int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
398 ){
399 const unsigned short *z = (unsigned short *)blob_buffer(pContent);
 
400 int size = blob_size(pContent);
401
402 if( (size<2) || (size%2)
403 || (size>=4 && z[1]==0) ) return 0;
404 if( z[0] == 0xfffe ){
405 if( pbReverse ) *pbReverse = 1;
406 }else if( z[0] == 0xfeff ){
407 if( pbReverse ) *pbReverse = 0;
408 }else{
409 return 0;
410 }
411 if( pnByte ) *pnByte = 2;
412 return 1;
413 }
414
415 /*
416 ** Return true if two DLine elements are identical.
417 */
@@ -2450,5 +2466,38 @@
2450 for(i=0; i<ann.nOrig; i++){
2451 fossil_print("%s: %.*s\n",
2452 ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z);
2453 }
2454 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2455
--- src/diff.c
+++ src/diff.c
@@ -69,13 +69,15 @@
69 ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70 ** to convey status information about the blob content.
71 */
72 #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73 #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74 #define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */
75 #define LOOK_LF ((int)0x00000004) /* One or more LF chars were found. */
76 #define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */
77 #define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */
78 #define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */
79 #endif /* INTERFACE */
80
81 /*
82 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
83 */
@@ -217,31 +219,34 @@
219 ** validate any code points. It makes no attempt to detect if any [invalid]
220 ** switches between UTF-8 and other encodings occur.
221 **
222 ** The only code points that this function cares about are the NUL character,
223 ** carriage-return, and line-feed.
224 **
225 ** Whether or not this function examines the entire contents of the blob is
226 ** officially unspecified.
227 **
228 ************************************ WARNING **********************************
229 */
230 int looks_like_utf8(const Blob *pContent, int *pFlags){
231 const char *z = blob_buffer(pContent);
232 unsigned int n = blob_size(pContent);
233 int j, c, result = 1; /* Assume UTF-8 text, prove otherwise */
234
235 if( pFlags ) *pFlags = LOOK_NONE;
236 if( n==0 ) return result; /* Empty file -> text */
237 c = *z;
238 if( c==0 ){
239 if( pFlags ) *pFlags |= LOOK_NUL;
240 result = 0; /* NUL character in a file -> binary */
241 }
242 j = (c!='\n');
243 while( --n>0 ){
244 c = *++z; ++j;
245 if( c==0 ){
246 if( pFlags ) *pFlags |= LOOK_NUL;
247 result = 0; /* NUL character in a file -> binary */
248 }
249 if( c=='\n' ){
250 int c2 = z[-1];
251 if( pFlags ){
252 *pFlags |= LOOK_LF;
@@ -249,20 +254,22 @@
254 *pFlags |= LOOK_CRLF;
255 }
256 }
257 if( j>LENGTH_MASK ){
258 if( pFlags ) *pFlags |= LOOK_LENGTH;
259 result = 0; /* Very long line -> binary */
260 }
261 j = 0;
262 }else if( c=='\r' ){
263 if( pFlags ) *pFlags |= LOOK_CR;
264 }
265 }
266 if( j>LENGTH_MASK ){
267 if( pFlags ) *pFlags |= LOOK_LENGTH;
268 result = 0; /* Very long line -> binary */
269 }
270 return result; /* No problems seen -> not binary */
271 }
272
273 /*
274 ** Define the type needed to represent a Unicode (UTF-16) character.
275 */
@@ -311,32 +318,38 @@
318 ** validate any code points. It makes no attempt to detect if any [invalid]
319 ** switches between the UTF-16be and UTF-16le encodings occur.
320 **
321 ** The only code points that this function cares about are the NUL character,
322 ** carriage-return, and line-feed.
323 **
324 ** Whether or not this function examines the entire contents of the blob is
325 ** officially unspecified.
326 **
327 ************************************ WARNING **********************************
328 */
329 int looks_like_utf16(const Blob *pContent, int *pFlags){
330 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
331 unsigned int n = blob_size(pContent);
332 int j, c, result = 1; /* Assume UTF-16 text, prove otherwise */
333
334 if( pFlags ) *pFlags = LOOK_NONE;
335 if( n==0 ) return result; /* Empty file -> text */
336 if( n%2 ){
337 if( pFlags ) *pFlags |= LOOK_ODD;
338 return 0; /* Odd number of bytes -> binary (or UTF-8) */
339 }
340 c = *z;
341 if( c==0 ){
342 if( pFlags ) *pFlags |= LOOK_NUL;
343 result = 0; /* NUL character in a file -> binary */
344 }
345 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
346 while( (n-=2)>0 ){
347 c = *++z; ++j;
348 if( c==0 ){
349 if( pFlags ) *pFlags |= LOOK_NUL;
350 result = 0; /* NUL character in a file -> binary */
351 }
352 if( c==UTF16BE_LF || c==UTF16LE_LF ){
353 int c2 = z[-1];
354 if( pFlags ){
355 *pFlags |= LOOK_LF;
@@ -344,20 +357,22 @@
357 *pFlags |= LOOK_CRLF;
358 }
359 }
360 if( j>UTF16_LENGTH_MASK ){
361 if( pFlags ) *pFlags |= LOOK_LENGTH;
362 result = 0; /* Very long line -> binary */
363 }
364 j = 0;
365 }else if( c==UTF16BE_CR || c==UTF16LE_CR ){
366 if( pFlags ) *pFlags |= LOOK_CR;
367 }
368 }
369 if( j>UTF16_LENGTH_MASK ){
370 if( pFlags ) *pFlags |= LOOK_LENGTH;
371 result = 0; /* Very long line -> binary */
372 }
373 return result; /* No problems seen -> not binary */
374 }
375
376 /*
377 ** This function returns an array of bytes representing the byte-order-mark
378 ** for UTF-8.
@@ -395,23 +410,24 @@
410 const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
411 int *pnByte, /* OUT: The number of bytes used for the BOM. */
412 int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
413 ){
414 const unsigned short *z = (unsigned short *)blob_buffer(pContent);
415 int bomSize = sizeof(unsigned short);
416 int size = blob_size(pContent);
417
418 if( size<bomSize ) return 0; /* No: cannot read BOM. */
419 if( size>=(2*bomSize) && z[1]==0 ) return 0; /* No: possible UTF-32. */
420 if( z[0]==0xfffe ){
421 if( pbReverse ) *pbReverse = 1;
422 }else if( z[0]==0xfeff ){
423 if( pbReverse ) *pbReverse = 0;
424 }else{
425 return 0; /* No: UTF-16 byte-order-mark not found. */
426 }
427 if( pnByte ) *pnByte = bomSize;
428 return 1; /* Yes. */
429 }
430
431 /*
432 ** Return true if two DLine elements are identical.
433 */
@@ -2450,5 +2466,38 @@
2466 for(i=0; i<ann.nOrig; i++){
2467 fossil_print("%s: %.*s\n",
2468 ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z);
2469 }
2470 }
2471
2472 /*
2473 ** COMMAND: test-looks-like-utf
2474 **
2475 ** Usage: %fossil test-looks-like-utf FILENAME
2476 **
2477 ** FILENAME is the name of a file to check for textual content in the UTF-8
2478 ** and/or UTF-16 encodings.
2479 */
2480 void looks_like_utf_test_cmd(void){
2481 Blob blob; /* the contents of the specified file */
2482 int eType; /* return value of looks_like_utf8/utf16() */
2483 int fUtf8; /* return value of starts_with_utf8_bom() */
2484 int fUtf16; /* return value of starts_with_utf16_bom() */
2485 int lookFlags; /* output flags from looks_like_utf8/utf16() */
2486 if( g.argc<3 ) usage("FILENAME");
2487 blob_read_from_file(&blob, g.argv[2]);
2488 fUtf8 = starts_with_utf8_bom(&blob, 0);
2489 fUtf16 = starts_with_utf16_bom(&blob, 0, 0);
2490 eType = fUtf16 ? looks_like_utf16(&blob, &lookFlags) :
2491 looks_like_utf8(&blob, &lookFlags);
2492 fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2493 fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2494 fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no");
2495 fossil_print("Looks like UTF-%s: %s\n",fUtf16?"16":"8",eType?"yes":"no");
2496 fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no");
2497 fossil_print("Has flag LOOK_CR: %s\n",(lookFlags&LOOK_CR)?"yes":"no");
2498 fossil_print("Has flag LOOK_LF: %s\n",(lookFlags&LOOK_LF)?"yes":"no");
2499 fossil_print("Has flag LOOK_CRLF: %s\n",(lookFlags&LOOK_CRLF)?"yes":"no");
2500 fossil_print("Has flag LOOK_LENGTH: %s\n",(lookFlags&LOOK_LENGTH)?"yes":"no");
2501 fossil_print("Has flag LOOK_ODD: %s\n",(lookFlags&LOOK_ODD)?"yes":"no");
2502 blob_reset(&blob);
2503 }
2504
+40 -24
--- src/glob.c
+++ src/glob.c
@@ -29,16 +29,17 @@
2929
** zVal: "x"
3030
** zGlobList: "*.o,*.obj"
3131
**
3232
** Result: "(x GLOB '*.o' OR x GLOB '*.obj')"
3333
**
34
-** Each element of the GLOB list may optionally be enclosed in either '...'
35
-** or "...". This allows commas in the expression. Whitespace at the
36
-** beginning and end of each GLOB pattern is ignored, except when enclosed
37
-** within '...' or "...".
34
+** Commas and whitespace are considered to be element delimters. Each
35
+** element of the GLOB list may optionally be enclosed in either '...' or
36
+** "...". This allows commas and/or whitespace to be used in the elements
37
+** themselves.
3838
**
39
-** This routine makes no effort to free the memory space it uses.
39
+** This routine makes no effort to free the memory space it uses, which
40
+** currently consists of a blob object and its contents.
4041
*/
4142
char *glob_expr(const char *zVal, const char *zGlobList){
4243
Blob expr;
4344
char *zSep = "(";
4445
int nTerm = 0;
@@ -46,21 +47,24 @@
4647
int cTerm;
4748
4849
if( zGlobList==0 || zGlobList[0]==0 ) return "0";
4950
blob_zero(&expr);
5051
while( zGlobList[0] ){
51
- while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ) zGlobList++;
52
+ while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ){
53
+ zGlobList++; /* Skip leading commas, spaces, and newlines */
54
+ }
5255
if( zGlobList[0]==0 ) break;
5356
if( zGlobList[0]=='\'' || zGlobList[0]=='"' ){
5457
cTerm = zGlobList[0];
5558
zGlobList++;
5659
}else{
5760
cTerm = ',';
5861
}
59
- for(i=0; zGlobList[i] && zGlobList[i]!=cTerm && zGlobList[i]!='\n'; i++){}
60
- if( cTerm==',' ){
61
- while( i>0 && fossil_isspace(zGlobList[i-1]) ){ i--; }
62
+ /* Find the next delimter (or the end of the string). */
63
+ for(i=0; zGlobList[i] && zGlobList[i]!=cTerm; i++){
64
+ if( cTerm!=',' ) continue; /* If quoted, keep going. */
65
+ if( fossil_isspace(zGlobList[i]) ) break; /* If space, stop. */
6266
}
6367
blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
6468
zSep = " OR ";
6569
if( cTerm!=',' && zGlobList[i] ) i++;
6670
zGlobList += i;
@@ -85,24 +89,24 @@
8589
char **azPattern; /* Array of pointers to patterns */
8690
};
8791
#endif /* INTERFACE */
8892
8993
/*
90
-** zPatternList is a comma-separate list of glob patterns. Parse up
94
+** zPatternList is a comma-separated list of glob patterns. Parse up
9195
** that list and use it to create a new Glob object.
9296
**
9397
** Elements of the glob list may be optionally enclosed in single our
94
-** double-quotes. This allows a comma to be part of a glob.
98
+** double-quotes. This allows a comma to be part of a glob pattern.
9599
**
96100
** Leading and trailing spaces on unquoted glob patterns are ignored.
97101
**
98102
** An empty or null pattern list results in a null glob, which will
99103
** match nothing.
100104
*/
101105
Glob *glob_create(const char *zPatternList){
102106
int nList; /* Size of zPatternList in bytes */
103
- int i, j; /* Loop counters */
107
+ int i; /* Loop counters */
104108
Glob *p; /* The glob being created */
105109
char *z; /* Copy of the pattern list */
106110
char delimiter; /* '\'' or '\"' or 0 */
107111
108112
if( zPatternList==0 || zPatternList[0]==0 ) return 0;
@@ -110,27 +114,26 @@
110114
p = fossil_malloc( sizeof(*p) + nList+1 );
111115
memset(p, 0, sizeof(*p));
112116
z = (char*)&p[1];
113117
memcpy(z, zPatternList, nList+1);
114118
while( z[0] ){
115
- while( z[0]==',' || z[0]==' ' || z[0]=='\n' || z[0]=='\r' ){
116
- z++; /* Skip leading spaces and newlines */
119
+ while( fossil_isspace(z[0]) || z[0]==',' ){
120
+ z++; /* Skip leading commas, spaces, and newlines */
117121
}
122
+ if( z[0]==0 ) break;
118123
if( z[0]=='\'' || z[0]=='"' ){
119124
delimiter = z[0];
120125
z++;
121126
}else{
122127
delimiter = ',';
123128
}
124
- if( z[0]==0 ) break;
125129
p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)*sizeof(char*) );
126130
p->azPattern[p->nPattern++] = z;
127
- for(i=0; z[i] && z[i]!=delimiter && z[i]!='\n' && z[i]!='\r'; i++){}
128
- if( delimiter==',' ){
129
- /* Remove trailing spaces / newlines on a comma-delimited pattern */
130
- for(j=i; j>1 && (z[j-1]==' ' || z[j-1]=='\n' || z[j-1]=='\r'); j--){}
131
- if( j<i ) z[j] = 0;
131
+ /* Find the next delimter (or the end of the string). */
132
+ for(i=0; z[i] && z[i]!=delimiter; i++){
133
+ if( delimiter!=',' ) continue; /* If quoted, keep going. */
134
+ if( fossil_isspace(z[i]) ) break; /* If space, stop. */
132135
}
133136
if( z[i]==0 ) break;
134137
z[i] = 0;
135138
z += i+1;
136139
}
@@ -245,22 +248,35 @@
245248
/*
246249
** COMMAND: test-glob
247250
**
248251
** Usage: %fossil test-glob PATTERN STRING...
249252
**
250
-** PATTERN is a comma-separated list of glob patterns. Show which of
251
-** the STRINGs that follow match the PATTERN.
253
+** PATTERN is a comma- and whitespace-separated list of optionally
254
+** quoted glob patterns. Show which of the STRINGs that follow match
255
+** the PATTERN.
256
+**
257
+** If PATTERN begins with "@" the the rest of the pattern is understood
258
+** to be a setting name (such as binary-glob, crln-glob, or encoding-glob)
259
+** and the value of that setting is used as the actually glob pattern.
252260
*/
253261
void glob_test_cmd(void){
254262
Glob *pGlob;
255263
int i;
264
+ char *zPattern;
256265
if( g.argc<4 ) usage("PATTERN STRING ...");
257
- fossil_print("SQL expression: %s\n", glob_expr("x", g.argv[2]));
258
- pGlob = glob_create(g.argv[2]);
266
+ zPattern = g.argv[2];
267
+ if( zPattern[0]=='@' ){
268
+ db_find_and_open_repository(OPEN_ANY_SCHEMA,0);
269
+ zPattern = db_get(zPattern+1, 0);
270
+ if( zPattern==0 ) fossil_fatal("no such setting: %s", g.argv[2]+1);
271
+ fossil_print("GLOB pattern: %s\n", zPattern);
272
+ }
273
+ fossil_print("SQL expression: %s\n", glob_expr("x", zPattern));
274
+ pGlob = glob_create(zPattern);
259275
for(i=0; i<pGlob->nPattern; i++){
260276
fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
261277
}
262278
for(i=3; i<g.argc; i++){
263279
fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
264280
}
265281
glob_free(pGlob);
266282
}
267283
268284
ADDED test/glob.test
--- src/glob.c
+++ src/glob.c
@@ -29,16 +29,17 @@
29 ** zVal: "x"
30 ** zGlobList: "*.o,*.obj"
31 **
32 ** Result: "(x GLOB '*.o' OR x GLOB '*.obj')"
33 **
34 ** Each element of the GLOB list may optionally be enclosed in either '...'
35 ** or "...". This allows commas in the expression. Whitespace at the
36 ** beginning and end of each GLOB pattern is ignored, except when enclosed
37 ** within '...' or "...".
38 **
39 ** This routine makes no effort to free the memory space it uses.
 
40 */
41 char *glob_expr(const char *zVal, const char *zGlobList){
42 Blob expr;
43 char *zSep = "(";
44 int nTerm = 0;
@@ -46,21 +47,24 @@
46 int cTerm;
47
48 if( zGlobList==0 || zGlobList[0]==0 ) return "0";
49 blob_zero(&expr);
50 while( zGlobList[0] ){
51 while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ) zGlobList++;
 
 
52 if( zGlobList[0]==0 ) break;
53 if( zGlobList[0]=='\'' || zGlobList[0]=='"' ){
54 cTerm = zGlobList[0];
55 zGlobList++;
56 }else{
57 cTerm = ',';
58 }
59 for(i=0; zGlobList[i] && zGlobList[i]!=cTerm && zGlobList[i]!='\n'; i++){}
60 if( cTerm==',' ){
61 while( i>0 && fossil_isspace(zGlobList[i-1]) ){ i--; }
 
62 }
63 blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
64 zSep = " OR ";
65 if( cTerm!=',' && zGlobList[i] ) i++;
66 zGlobList += i;
@@ -85,24 +89,24 @@
85 char **azPattern; /* Array of pointers to patterns */
86 };
87 #endif /* INTERFACE */
88
89 /*
90 ** zPatternList is a comma-separate list of glob patterns. Parse up
91 ** that list and use it to create a new Glob object.
92 **
93 ** Elements of the glob list may be optionally enclosed in single our
94 ** double-quotes. This allows a comma to be part of a glob.
95 **
96 ** Leading and trailing spaces on unquoted glob patterns are ignored.
97 **
98 ** An empty or null pattern list results in a null glob, which will
99 ** match nothing.
100 */
101 Glob *glob_create(const char *zPatternList){
102 int nList; /* Size of zPatternList in bytes */
103 int i, j; /* Loop counters */
104 Glob *p; /* The glob being created */
105 char *z; /* Copy of the pattern list */
106 char delimiter; /* '\'' or '\"' or 0 */
107
108 if( zPatternList==0 || zPatternList[0]==0 ) return 0;
@@ -110,27 +114,26 @@
110 p = fossil_malloc( sizeof(*p) + nList+1 );
111 memset(p, 0, sizeof(*p));
112 z = (char*)&p[1];
113 memcpy(z, zPatternList, nList+1);
114 while( z[0] ){
115 while( z[0]==',' || z[0]==' ' || z[0]=='\n' || z[0]=='\r' ){
116 z++; /* Skip leading spaces and newlines */
117 }
 
118 if( z[0]=='\'' || z[0]=='"' ){
119 delimiter = z[0];
120 z++;
121 }else{
122 delimiter = ',';
123 }
124 if( z[0]==0 ) break;
125 p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)*sizeof(char*) );
126 p->azPattern[p->nPattern++] = z;
127 for(i=0; z[i] && z[i]!=delimiter && z[i]!='\n' && z[i]!='\r'; i++){}
128 if( delimiter==',' ){
129 /* Remove trailing spaces / newlines on a comma-delimited pattern */
130 for(j=i; j>1 && (z[j-1]==' ' || z[j-1]=='\n' || z[j-1]=='\r'); j--){}
131 if( j<i ) z[j] = 0;
132 }
133 if( z[i]==0 ) break;
134 z[i] = 0;
135 z += i+1;
136 }
@@ -245,22 +248,35 @@
245 /*
246 ** COMMAND: test-glob
247 **
248 ** Usage: %fossil test-glob PATTERN STRING...
249 **
250 ** PATTERN is a comma-separated list of glob patterns. Show which of
251 ** the STRINGs that follow match the PATTERN.
 
 
 
 
 
252 */
253 void glob_test_cmd(void){
254 Glob *pGlob;
255 int i;
 
256 if( g.argc<4 ) usage("PATTERN STRING ...");
257 fossil_print("SQL expression: %s\n", glob_expr("x", g.argv[2]));
258 pGlob = glob_create(g.argv[2]);
 
 
 
 
 
 
 
259 for(i=0; i<pGlob->nPattern; i++){
260 fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
261 }
262 for(i=3; i<g.argc; i++){
263 fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
264 }
265 glob_free(pGlob);
266 }
267
268 DDED test/glob.test
--- src/glob.c
+++ src/glob.c
@@ -29,16 +29,17 @@
29 ** zVal: "x"
30 ** zGlobList: "*.o,*.obj"
31 **
32 ** Result: "(x GLOB '*.o' OR x GLOB '*.obj')"
33 **
34 ** Commas and whitespace are considered to be element delimters. Each
35 ** element of the GLOB list may optionally be enclosed in either '...' or
36 ** "...". This allows commas and/or whitespace to be used in the elements
37 ** themselves.
38 **
39 ** This routine makes no effort to free the memory space it uses, which
40 ** currently consists of a blob object and its contents.
41 */
42 char *glob_expr(const char *zVal, const char *zGlobList){
43 Blob expr;
44 char *zSep = "(";
45 int nTerm = 0;
@@ -46,21 +47,24 @@
47 int cTerm;
48
49 if( zGlobList==0 || zGlobList[0]==0 ) return "0";
50 blob_zero(&expr);
51 while( zGlobList[0] ){
52 while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ){
53 zGlobList++; /* Skip leading commas, spaces, and newlines */
54 }
55 if( zGlobList[0]==0 ) break;
56 if( zGlobList[0]=='\'' || zGlobList[0]=='"' ){
57 cTerm = zGlobList[0];
58 zGlobList++;
59 }else{
60 cTerm = ',';
61 }
62 /* Find the next delimter (or the end of the string). */
63 for(i=0; zGlobList[i] && zGlobList[i]!=cTerm; i++){
64 if( cTerm!=',' ) continue; /* If quoted, keep going. */
65 if( fossil_isspace(zGlobList[i]) ) break; /* If space, stop. */
66 }
67 blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
68 zSep = " OR ";
69 if( cTerm!=',' && zGlobList[i] ) i++;
70 zGlobList += i;
@@ -85,24 +89,24 @@
89 char **azPattern; /* Array of pointers to patterns */
90 };
91 #endif /* INTERFACE */
92
93 /*
94 ** zPatternList is a comma-separated list of glob patterns. Parse up
95 ** that list and use it to create a new Glob object.
96 **
97 ** Elements of the glob list may be optionally enclosed in single our
98 ** double-quotes. This allows a comma to be part of a glob pattern.
99 **
100 ** Leading and trailing spaces on unquoted glob patterns are ignored.
101 **
102 ** An empty or null pattern list results in a null glob, which will
103 ** match nothing.
104 */
105 Glob *glob_create(const char *zPatternList){
106 int nList; /* Size of zPatternList in bytes */
107 int i; /* Loop counters */
108 Glob *p; /* The glob being created */
109 char *z; /* Copy of the pattern list */
110 char delimiter; /* '\'' or '\"' or 0 */
111
112 if( zPatternList==0 || zPatternList[0]==0 ) return 0;
@@ -110,27 +114,26 @@
114 p = fossil_malloc( sizeof(*p) + nList+1 );
115 memset(p, 0, sizeof(*p));
116 z = (char*)&p[1];
117 memcpy(z, zPatternList, nList+1);
118 while( z[0] ){
119 while( fossil_isspace(z[0]) || z[0]==',' ){
120 z++; /* Skip leading commas, spaces, and newlines */
121 }
122 if( z[0]==0 ) break;
123 if( z[0]=='\'' || z[0]=='"' ){
124 delimiter = z[0];
125 z++;
126 }else{
127 delimiter = ',';
128 }
 
129 p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)*sizeof(char*) );
130 p->azPattern[p->nPattern++] = z;
131 /* Find the next delimter (or the end of the string). */
132 for(i=0; z[i] && z[i]!=delimiter; i++){
133 if( delimiter!=',' ) continue; /* If quoted, keep going. */
134 if( fossil_isspace(z[i]) ) break; /* If space, stop. */
 
135 }
136 if( z[i]==0 ) break;
137 z[i] = 0;
138 z += i+1;
139 }
@@ -245,22 +248,35 @@
248 /*
249 ** COMMAND: test-glob
250 **
251 ** Usage: %fossil test-glob PATTERN STRING...
252 **
253 ** PATTERN is a comma- and whitespace-separated list of optionally
254 ** quoted glob patterns. Show which of the STRINGs that follow match
255 ** the PATTERN.
256 **
257 ** If PATTERN begins with "@" the the rest of the pattern is understood
258 ** to be a setting name (such as binary-glob, crln-glob, or encoding-glob)
259 ** and the value of that setting is used as the actually glob pattern.
260 */
261 void glob_test_cmd(void){
262 Glob *pGlob;
263 int i;
264 char *zPattern;
265 if( g.argc<4 ) usage("PATTERN STRING ...");
266 zPattern = g.argv[2];
267 if( zPattern[0]=='@' ){
268 db_find_and_open_repository(OPEN_ANY_SCHEMA,0);
269 zPattern = db_get(zPattern+1, 0);
270 if( zPattern==0 ) fossil_fatal("no such setting: %s", g.argv[2]+1);
271 fossil_print("GLOB pattern: %s\n", zPattern);
272 }
273 fossil_print("SQL expression: %s\n", glob_expr("x", zPattern));
274 pGlob = glob_create(zPattern);
275 for(i=0; i<pGlob->nPattern; i++){
276 fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
277 }
278 for(i=3; i<g.argc; i++){
279 fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
280 }
281 glob_free(pGlob);
282 }
283
284 DDED test/glob.test
--- a/test/glob.test
+++ b/test/glob.test
@@ -0,0 +1,118 @@
1
+#
2
+# Copyright (c) 2013 D. Richard Hipp
3
+#
4
+# This program is free software; you can redistribute it and/or
5
+# modify it under the terms of the Simplified BSD License (also
6
+# known as the "2-Clause License" or "FreeBSD License".)
7
+#
8
+# This program is distributed in the hope that it will be useful,
9
+# but without any warranty; without even the implied warranty of
10
+# merchantability or fitness for a particular purpose.
11
+#
12
+# Author contact information:
13
+# [email protected]
14
+# http://www.hwaci.com/drh/
15
+#
16
+############################################################################
17
+#
18
+# Test glob pattern parsing
19
+#
20
+
21
+ parsing
22
+#
23
+
24
+test_setup ""
25
+
26
+proc glob-parse {testname args} {
27
+ set i 1
28
+ foreach {pattern string result} $args {
29
+ fossil test-glob $pattern $string
30
+ test glob-parse-$testname.$i {$::RESULT eq $result}
31
+ incr i
32
+ }
33
+}
34
+
35
+glob-parse 100 test test [string map [list \r\n \n] \
36
+{SQL expression: (x GLOB 'test')
37
+pattern[0] = [test]
38
+1 t*')
39
+pattern[0] = [t*]
40
+1 1 test}]
41
+
42
+glob-parse 101 "one two" one [string map [list \r\n \n] \
43
+{SQL expression: (x GLOB 'one' OR x GLOB 'two')
44
+pattern[0] = 0 two one}]
45
+
46
+glob-p02 t* test-parse 108 "\"o*\rtwo\" \"thrt*#
47
+# Copyright (c)#
48
+# 1 test}]
49
+
50
+glob-parse 103 "o*test}]
51
+
52
+glob-parse 101 "one two" one [string map [list \r\n \n] \
53
+*' OR x GLOB 'two')
54
+pattern[0] =0 two one}]
55
+
56
+glob-p04 {"o* two" "three four"} "one two" [string map [list \r\n \n] \
57
+{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
58
+pattern[0] = [o* two]
59
+pone two}]
60
+
61
+glob-parse 105 {"o* two" "three four"} "5 {"o* two" "three four"} "two one" [string map [list \r\n \n] \
62
+{SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
63
+pattern[0] = [o* two]
64
+p]
65
+0 0 two one}]
66
+
67
+glob-p0 0 two one}]
68
+
69
+glob-parse 106 "\"o*\ntwo\" \"three\nfour\"" "one\ntwo" \
70
+[string map [list \r\n \n] \
71
+{SQL expression: (x GLOB 'o*
72
+two' OR x GLOB 'three
73
+fou] = [one]
74
+pattern[1] = 1 1 one
75
+two}]
76
+
77
+glob-parse 107 "\"o*\ntwo\" \"three\nfour\"" "two\none" \
78
+[string map [list \r\n \n] \
79
+{SQL expression: (x GLOB 'o*
80
+two' OR x GLOB 'three
81
+foutwo
82
+0 two one}]
83
+
84
+glob-p08 "\"o*\rtwo\" \"three\rfour\"" "one\rtwo" \
85
+[string map [list \r\n \n] \
86
+{SQL expression: (x GLOB 'o*
87
+two' OR x GLOB 'three
88
+four')
89
+pattern[0] = [o*
90
+two]
91
+p] = [one]
92
+pattern[1] =
93
+ foreach {pattern string result} $args {
94
+ fossil test-glob $pattern $string
95
+ test glob-parse-$testname.$i {$::RESULT eq $result}
96
+ incr i
97
+ }
98
+}
99
+
100
+gtwo
101
+0 two one}]
102
+
103
+glob-patring map [list \r\n \n] \
104
+{SQL expression: (x GLOB 'test')
105
+pattern[0] = [test]
106
+1 1 test}]
107
+
108
+glob-parse 101 "one two" one [string map [list \r\n \n] \
109
+{] = [one]
110
+pattern[1] = [two]
111
+1 1 one}]
112
+
113
+glob-parse 102 t* test [string map [list \r\n \n] \
114
+{SQL expression: (x GLOB 't*')
115
+pattern[0] = [t*]
116
+1 1 test}]
117
+
118
+glob-parse 103 "o* two" one [st
--- a/test/glob.test
+++ b/test/glob.test
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/test/glob.test
+++ b/test/glob.test
@@ -0,0 +1,118 @@
1 #
2 # Copyright (c) 2013 D. Richard Hipp
3 #
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the Simplified BSD License (also
6 # known as the "2-Clause License" or "FreeBSD License".)
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but without any warranty; without even the implied warranty of
10 # merchantability or fitness for a particular purpose.
11 #
12 # Author contact information:
13 # [email protected]
14 # http://www.hwaci.com/drh/
15 #
16 ############################################################################
17 #
18 # Test glob pattern parsing
19 #
20
21 parsing
22 #
23
24 test_setup ""
25
26 proc glob-parse {testname args} {
27 set i 1
28 foreach {pattern string result} $args {
29 fossil test-glob $pattern $string
30 test glob-parse-$testname.$i {$::RESULT eq $result}
31 incr i
32 }
33 }
34
35 glob-parse 100 test test [string map [list \r\n \n] \
36 {SQL expression: (x GLOB 'test')
37 pattern[0] = [test]
38 1 t*')
39 pattern[0] = [t*]
40 1 1 test}]
41
42 glob-parse 101 "one two" one [string map [list \r\n \n] \
43 {SQL expression: (x GLOB 'one' OR x GLOB 'two')
44 pattern[0] = 0 two one}]
45
46 glob-p02 t* test-parse 108 "\"o*\rtwo\" \"thrt*#
47 # Copyright (c)#
48 # 1 test}]
49
50 glob-parse 103 "o*test}]
51
52 glob-parse 101 "one two" one [string map [list \r\n \n] \
53 *' OR x GLOB 'two')
54 pattern[0] =0 two one}]
55
56 glob-p04 {"o* two" "three four"} "one two" [string map [list \r\n \n] \
57 {SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
58 pattern[0] = [o* two]
59 pone two}]
60
61 glob-parse 105 {"o* two" "three four"} "5 {"o* two" "three four"} "two one" [string map [list \r\n \n] \
62 {SQL expression: (x GLOB 'o* two' OR x GLOB 'three four')
63 pattern[0] = [o* two]
64 p]
65 0 0 two one}]
66
67 glob-p0 0 two one}]
68
69 glob-parse 106 "\"o*\ntwo\" \"three\nfour\"" "one\ntwo" \
70 [string map [list \r\n \n] \
71 {SQL expression: (x GLOB 'o*
72 two' OR x GLOB 'three
73 fou] = [one]
74 pattern[1] = 1 1 one
75 two}]
76
77 glob-parse 107 "\"o*\ntwo\" \"three\nfour\"" "two\none" \
78 [string map [list \r\n \n] \
79 {SQL expression: (x GLOB 'o*
80 two' OR x GLOB 'three
81 foutwo
82 0 two one}]
83
84 glob-p08 "\"o*\rtwo\" \"three\rfour\"" "one\rtwo" \
85 [string map [list \r\n \n] \
86 {SQL expression: (x GLOB 'o*
87 two' OR x GLOB 'three
88 four')
89 pattern[0] = [o*
90 two]
91 p] = [one]
92 pattern[1] =
93 foreach {pattern string result} $args {
94 fossil test-glob $pattern $string
95 test glob-parse-$testname.$i {$::RESULT eq $result}
96 incr i
97 }
98 }
99
100 gtwo
101 0 two one}]
102
103 glob-patring map [list \r\n \n] \
104 {SQL expression: (x GLOB 'test')
105 pattern[0] = [test]
106 1 1 test}]
107
108 glob-parse 101 "one two" one [string map [list \r\n \n] \
109 {] = [one]
110 pattern[1] = [two]
111 1 1 one}]
112
113 glob-parse 102 t* test [string map [list \r\n \n] \
114 {SQL expression: (x GLOB 't*')
115 pattern[0] = [t*]
116 1 1 test}]
117
118 glob-parse 103 "o* two" one [st

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button