Fossil SCM

New warning when file is considered binary due to long lines only.

jan.nijtmans 2013-03-03 15:32 trunk
Commit 3a74f9fe52c3a6b1e97a03cf8a0e5171d0e88f7a
2 files changed +25 +27 -18
--- src/checkin.c
+++ src/checkin.c
@@ -909,10 +909,35 @@
909909
static int allOk = 0; /* Set to true to disable this routine */
910910
911911
if( allOk ) return 0;
912912
fUnicode = starts_with_utf16_bom(p, 0, 0);
913913
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
914
+ if( eType==-4){
915
+ const char *zWarning;
916
+ const char *zDisable;
917
+ Blob ans;
918
+ char cReply;
919
+
920
+ if (!binOk) {
921
+ zWarning = "long lines";
922
+ zDisable = "\"binary-glob\" setting";
923
+ blob_zero(&ans);
924
+ file_relative_name(zFilename, &fname, 0);
925
+ zMsg = mprintf(
926
+ "%s appears to be text, but contains %s. Use --no-warnings or the"
927
+ " %s to disable this warning.\nCommit anyhow (a=all/y/N)? ",
928
+ blob_str(&fname), zWarning, zDisable);
929
+ prompt_user(zMsg, &ans);
930
+ fossil_free(zMsg);
931
+ cReply = blob_str(&ans)[0];
932
+ if( cReply!='y' && cReply!='Y' ){
933
+ fossil_fatal("Abandoning commit due to %s in %s",
934
+ zWarning, blob_str(&fname));
935
+ }
936
+ blob_reset(&ans);
937
+ }
938
+ }
914939
if( eType==0 || eType==-1 || fUnicode ){
915940
const char *zWarning;
916941
const char *zDisable;
917942
const char *zConvert = "c=convert/";
918943
Blob ans;
919944
--- src/checkin.c
+++ src/checkin.c
@@ -909,10 +909,35 @@
909 static int allOk = 0; /* Set to true to disable this routine */
910
911 if( allOk ) return 0;
912 fUnicode = starts_with_utf16_bom(p, 0, 0);
913 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914 if( eType==0 || eType==-1 || fUnicode ){
915 const char *zWarning;
916 const char *zDisable;
917 const char *zConvert = "c=convert/";
918 Blob ans;
919
--- src/checkin.c
+++ src/checkin.c
@@ -909,10 +909,35 @@
909 static int allOk = 0; /* Set to true to disable this routine */
910
911 if( allOk ) return 0;
912 fUnicode = starts_with_utf16_bom(p, 0, 0);
913 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
914 if( eType==-4){
915 const char *zWarning;
916 const char *zDisable;
917 Blob ans;
918 char cReply;
919
920 if (!binOk) {
921 zWarning = "long lines";
922 zDisable = "\"binary-glob\" setting";
923 blob_zero(&ans);
924 file_relative_name(zFilename, &fname, 0);
925 zMsg = mprintf(
926 "%s appears to be text, but contains %s. Use --no-warnings or the"
927 " %s to disable this warning.\nCommit anyhow (a=all/y/N)? ",
928 blob_str(&fname), zWarning, zDisable);
929 prompt_user(zMsg, &ans);
930 fossil_free(zMsg);
931 cReply = blob_str(&ans)[0];
932 if( cReply!='y' && cReply!='Y' ){
933 fossil_fatal("Abandoning commit due to %s in %s",
934 zWarning, blob_str(&fname));
935 }
936 blob_reset(&ans);
937 }
938 }
939 if( eType==0 || eType==-1 || fUnicode ){
940 const char *zWarning;
941 const char *zDisable;
942 const char *zConvert = "c=convert/";
943 Blob ans;
944
+27 -18
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
5757
"more than 10,000 changes\n"
5858
5959
#define DIFF_TOO_MANY_CHANGES_HTML \
6060
"<p class='generalError'>More than 10,000 changes</p>\n"
6161
62
-#define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
62
+#define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
6363
#endif /* INTERFACE */
6464
6565
/*
6666
** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
6767
*/
@@ -198,10 +198,14 @@
198198
** to be binary.
199199
**
200200
** (-1) -- The content appears to consist entirely of text, with lines
201201
** delimited by carriage-return, line-feed pairs; however, the
202202
** encoding may not be UTF-8.
203
+**
204
+** (-4) -- The same as 0, but the determination is based on the fact that
205
+** the blob might be text (any encoding) but it has a line length
206
+** bigger than the diff logic in fossil can handle.
203207
**
204208
************************************ WARNING **********************************
205209
**
206210
** This function does not validate that the blob content is properly formed
207211
** UTF-8. It assumes that all code points are the same size. It does not
@@ -215,36 +219,37 @@
215219
*/
216220
int looks_like_utf8(const Blob *pContent){
217221
const char *z = blob_buffer(pContent);
218222
unsigned int n = blob_size(pContent);
219223
int j, c;
220
- int result = 1; /* Assume UTF-8 text with no CR/NL */
224
+ int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
221225
222226
/* Check individual lines.
223227
*/
224
- if( n==0 ) return result; /* Empty file -> text */
228
+ if( n==0 ) return 1; /* Empty file -> text */
225229
c = *z;
226
- if( c==0 ) return 0; /* Zero byte in a file -> binary */
227230
j = (c!='\n');
231
+ if( c==0 ){
232
+ return 0; /* Zero byte in a file -> binary */
233
+ }
228234
while( --n>0 ){
229235
c = *++z; ++j;
230236
if( c==0 ) return 0; /* Zero byte in a file -> binary */
231237
if( c=='\n' ){
232
- int c2 = z[-1];
233
- if( c2=='\r' ){
234
- result = -1; /* Contains CR/NL, continue */
238
+ if( z[-1]=='\r' ){
239
+ flags |= 2; /* Contains CR/NL, continue */
235240
}
236241
if( j>LENGTH_MASK ){
237
- return 0; /* Very long line -> binary */
242
+ flags |= 1; /* Very long line, continue */
238243
}
239244
j = 0;
240245
}
241246
}
242
- if( j>LENGTH_MASK ){
243
- return 0; /* Very long line -> binary */
247
+ if( (flags&1) || (j>LENGTH_MASK) ){
248
+ return -4; /* Very long line -> binary */
244249
}
245
- return result; /* No problems seen -> not binary */
250
+ return 1-flags; /* No problems seen -> not binary */
246251
}
247252
248253
/*
249254
** Define the type needed to represent a Unicode (UTF-16) character.
250255
*/
@@ -288,10 +293,14 @@
288293
** to be binary.
289294
**
290295
** (-1) -- The content appears to consist entirely of text, with lines
291296
** delimited by carriage-return, line-feed pairs; however, the
292297
** encoding may not be UTF-16.
298
+**
299
+** (-4) -- The same as 0, but the determination is based on the fact that
300
+** the blob might be text (any encoding) but it has a line length
301
+** bigger than the diff logic in fossil can handle.
293302
**
294303
************************************ WARNING **********************************
295304
**
296305
** This function does not validate that the blob content is properly formed
297306
** UTF-16. It assumes that all code points are the same size. It does not
@@ -305,15 +314,15 @@
305314
*/
306315
int looks_like_utf16(const Blob *pContent){
307316
const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
308317
unsigned int n = blob_size(pContent);
309318
int j, c;
310
- int result = 1; /* Assume UTF-16 text with no CR/NL */
319
+ int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
311320
312321
/* Check individual lines.
313322
*/
314
- if( n==0 ) return result; /* Empty file -> text */
323
+ if( n==0 ) return 1; /* Empty file -> text */
315324
if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
316325
c = *z;
317326
if( c==0 ) return 0; /* NUL character in a file -> binary */
318327
j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
319328
while( (n-=2)>0 ){
@@ -320,22 +329,22 @@
320329
c = *++z; ++j;
321330
if( c==0 ) return 0; /* NUL character in a file -> binary */
322331
if( c==UTF16BE_LF || c==UTF16LE_LF ){
323332
int c2 = z[-1];
324333
if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
325
- result = -1; /* Contains CR/NL, continue */
334
+ flags |= 2; /* Contains CR/NL, continue */
326335
}
327336
if( j>UTF16_LENGTH_MASK ){
328
- return 0; /* Very long line -> binary */
337
+ flags |= 1; /* Very long line, continue */
329338
}
330339
j = 0;
331340
}
332341
}
333
- if( j>UTF16_LENGTH_MASK ){
334
- return 0; /* Very long line -> binary */
342
+ if( (flags&1) || (j>LENGTH_MASK) ){
343
+ return -4; /* Very long line -> binary */
335344
}
336
- return result; /* No problems seen -> not binary */
345
+ return 1-flags; /* No problems seen -> not binary */
337346
}
338347
339348
/*
340349
** This function returns an array of bytes representing the byte-order-mark
341350
** for UTF-8.
342351
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
57 "more than 10,000 changes\n"
58
59 #define DIFF_TOO_MANY_CHANGES_HTML \
60 "<p class='generalError'>More than 10,000 changes</p>\n"
61
62 #define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
63 #endif /* INTERFACE */
64
65 /*
66 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
67 */
@@ -198,10 +198,14 @@
198 ** to be binary.
199 **
200 ** (-1) -- The content appears to consist entirely of text, with lines
201 ** delimited by carriage-return, line-feed pairs; however, the
202 ** encoding may not be UTF-8.
 
 
 
 
203 **
204 ************************************ WARNING **********************************
205 **
206 ** This function does not validate that the blob content is properly formed
207 ** UTF-8. It assumes that all code points are the same size. It does not
@@ -215,36 +219,37 @@
215 */
216 int looks_like_utf8(const Blob *pContent){
217 const char *z = blob_buffer(pContent);
218 unsigned int n = blob_size(pContent);
219 int j, c;
220 int result = 1; /* Assume UTF-8 text with no CR/NL */
221
222 /* Check individual lines.
223 */
224 if( n==0 ) return result; /* Empty file -> text */
225 c = *z;
226 if( c==0 ) return 0; /* Zero byte in a file -> binary */
227 j = (c!='\n');
 
 
 
228 while( --n>0 ){
229 c = *++z; ++j;
230 if( c==0 ) return 0; /* Zero byte in a file -> binary */
231 if( c=='\n' ){
232 int c2 = z[-1];
233 if( c2=='\r' ){
234 result = -1; /* Contains CR/NL, continue */
235 }
236 if( j>LENGTH_MASK ){
237 return 0; /* Very long line -> binary */
238 }
239 j = 0;
240 }
241 }
242 if( j>LENGTH_MASK ){
243 return 0; /* Very long line -> binary */
244 }
245 return result; /* No problems seen -> not binary */
246 }
247
248 /*
249 ** Define the type needed to represent a Unicode (UTF-16) character.
250 */
@@ -288,10 +293,14 @@
288 ** to be binary.
289 **
290 ** (-1) -- The content appears to consist entirely of text, with lines
291 ** delimited by carriage-return, line-feed pairs; however, the
292 ** encoding may not be UTF-16.
 
 
 
 
293 **
294 ************************************ WARNING **********************************
295 **
296 ** This function does not validate that the blob content is properly formed
297 ** UTF-16. It assumes that all code points are the same size. It does not
@@ -305,15 +314,15 @@
305 */
306 int looks_like_utf16(const Blob *pContent){
307 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
308 unsigned int n = blob_size(pContent);
309 int j, c;
310 int result = 1; /* Assume UTF-16 text with no CR/NL */
311
312 /* Check individual lines.
313 */
314 if( n==0 ) return result; /* Empty file -> text */
315 if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
316 c = *z;
317 if( c==0 ) return 0; /* NUL character in a file -> binary */
318 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
319 while( (n-=2)>0 ){
@@ -320,22 +329,22 @@
320 c = *++z; ++j;
321 if( c==0 ) return 0; /* NUL character in a file -> binary */
322 if( c==UTF16BE_LF || c==UTF16LE_LF ){
323 int c2 = z[-1];
324 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
325 result = -1; /* Contains CR/NL, continue */
326 }
327 if( j>UTF16_LENGTH_MASK ){
328 return 0; /* Very long line -> binary */
329 }
330 j = 0;
331 }
332 }
333 if( j>UTF16_LENGTH_MASK ){
334 return 0; /* Very long line -> binary */
335 }
336 return result; /* No problems seen -> not binary */
337 }
338
339 /*
340 ** This function returns an array of bytes representing the byte-order-mark
341 ** for UTF-8.
342
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
57 "more than 10,000 changes\n"
58
59 #define DIFF_TOO_MANY_CHANGES_HTML \
60 "<p class='generalError'>More than 10,000 changes</p>\n"
61
62 #define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
63 #endif /* INTERFACE */
64
65 /*
66 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
67 */
@@ -198,10 +198,14 @@
198 ** to be binary.
199 **
200 ** (-1) -- The content appears to consist entirely of text, with lines
201 ** delimited by carriage-return, line-feed pairs; however, the
202 ** encoding may not be UTF-8.
203 **
204 ** (-4) -- The same as 0, but the determination is based on the fact that
205 ** the blob might be text (any encoding) but it has a line length
206 ** bigger than the diff logic in fossil can handle.
207 **
208 ************************************ WARNING **********************************
209 **
210 ** This function does not validate that the blob content is properly formed
211 ** UTF-8. It assumes that all code points are the same size. It does not
@@ -215,36 +219,37 @@
219 */
220 int looks_like_utf8(const Blob *pContent){
221 const char *z = blob_buffer(pContent);
222 unsigned int n = blob_size(pContent);
223 int j, c;
224 int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
225
226 /* Check individual lines.
227 */
228 if( n==0 ) return 1; /* Empty file -> text */
229 c = *z;
 
230 j = (c!='\n');
231 if( c==0 ){
232 return 0; /* Zero byte in a file -> binary */
233 }
234 while( --n>0 ){
235 c = *++z; ++j;
236 if( c==0 ) return 0; /* Zero byte in a file -> binary */
237 if( c=='\n' ){
238 if( z[-1]=='\r' ){
239 flags |= 2; /* Contains CR/NL, continue */
 
240 }
241 if( j>LENGTH_MASK ){
242 flags |= 1; /* Very long line, continue */
243 }
244 j = 0;
245 }
246 }
247 if( (flags&1) || (j>LENGTH_MASK) ){
248 return -4; /* Very long line -> binary */
249 }
250 return 1-flags; /* No problems seen -> not binary */
251 }
252
253 /*
254 ** Define the type needed to represent a Unicode (UTF-16) character.
255 */
@@ -288,10 +293,14 @@
293 ** to be binary.
294 **
295 ** (-1) -- The content appears to consist entirely of text, with lines
296 ** delimited by carriage-return, line-feed pairs; however, the
297 ** encoding may not be UTF-16.
298 **
299 ** (-4) -- The same as 0, but the determination is based on the fact that
300 ** the blob might be text (any encoding) but it has a line length
301 ** bigger than the diff logic in fossil can handle.
302 **
303 ************************************ WARNING **********************************
304 **
305 ** This function does not validate that the blob content is properly formed
306 ** UTF-16. It assumes that all code points are the same size. It does not
@@ -305,15 +314,15 @@
314 */
315 int looks_like_utf16(const Blob *pContent){
316 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
317 unsigned int n = blob_size(pContent);
318 int j, c;
319 int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
320
321 /* Check individual lines.
322 */
323 if( n==0 ) return 1; /* Empty file -> text */
324 if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
325 c = *z;
326 if( c==0 ) return 0; /* NUL character in a file -> binary */
327 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
328 while( (n-=2)>0 ){
@@ -320,22 +329,22 @@
329 c = *++z; ++j;
330 if( c==0 ) return 0; /* NUL character in a file -> binary */
331 if( c==UTF16BE_LF || c==UTF16LE_LF ){
332 int c2 = z[-1];
333 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
334 flags |= 2; /* Contains CR/NL, continue */
335 }
336 if( j>UTF16_LENGTH_MASK ){
337 flags |= 1; /* Very long line, continue */
338 }
339 j = 0;
340 }
341 }
342 if( (flags&1) || (j>LENGTH_MASK) ){
343 return -4; /* Very long line -> binary */
344 }
345 return 1-flags; /* No problems seen -> not binary */
346 }
347
348 /*
349 ** This function returns an array of bytes representing the byte-order-mark
350 ** for UTF-8.
351

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button