Fossil SCM

Add commit warning messate when text files have a long line length

jan.nijtmans 2012-11-05 11:23 improve_commit_warning
Commit 879012769b2dfc2f26c3f7d177ba9264065c500e
2 files changed +10 -4 +13 -11
+10 -4
--- src/checkin.c
+++ src/checkin.c
@@ -907,24 +907,30 @@
907907
908908
if( allOk ) return;
909909
fUnicode = starts_with_utf16_bom(p);
910910
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
911911
if( eType<-2){
912
+ const char *zWarning;
912913
Blob ans;
913914
char cReply;
914915
916
+ if(eType==-4){
917
+ zWarning = "long lines";
918
+ }else{
919
+ zWarning = "invalid UTF-8";
920
+ }
915921
blob_zero(&ans);
916922
file_relative_name(zFilename, &fname, 0);
917923
zMsg = mprintf(
918
- "%s appears to be text, but not UTF-8 or ASCII. commit anyhow (y/N)? ",
919
- blob_str(&fname));
924
+ "%s appears to be text, but contains %s. commit anyhow (y/N)? ",
925
+ blob_str(&fname), zWarning);
920926
prompt_user(zMsg, &ans);
921927
fossil_free(zMsg);
922928
cReply = blob_str(&ans)[0];
923929
if( cReply!='y' && cReply!='Y' ){
924
- fossil_fatal("Abandoning commit due to non-UTF-8 in %s",
925
- blob_str(&fname));
930
+ fossil_fatal("Abandoning commit due to %s in %s",
931
+ blob_str(&fname), zWarning);
926932
}
927933
blob_reset(&ans);
928934
eType +=4 ;
929935
}
930936
if( eType==0 || eType==-1 || fUnicode ){
931937
--- src/checkin.c
+++ src/checkin.c
@@ -907,24 +907,30 @@
907
908 if( allOk ) return;
909 fUnicode = starts_with_utf16_bom(p);
910 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
911 if( eType<-2){
 
912 Blob ans;
913 char cReply;
914
 
 
 
 
 
915 blob_zero(&ans);
916 file_relative_name(zFilename, &fname, 0);
917 zMsg = mprintf(
918 "%s appears to be text, but not UTF-8 or ASCII. commit anyhow (y/N)? ",
919 blob_str(&fname));
920 prompt_user(zMsg, &ans);
921 fossil_free(zMsg);
922 cReply = blob_str(&ans)[0];
923 if( cReply!='y' && cReply!='Y' ){
924 fossil_fatal("Abandoning commit due to non-UTF-8 in %s",
925 blob_str(&fname));
926 }
927 blob_reset(&ans);
928 eType +=4 ;
929 }
930 if( eType==0 || eType==-1 || fUnicode ){
931
--- src/checkin.c
+++ src/checkin.c
@@ -907,24 +907,30 @@
907
908 if( allOk ) return;
909 fUnicode = starts_with_utf16_bom(p);
910 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
911 if( eType<-2){
912 const char *zWarning;
913 Blob ans;
914 char cReply;
915
916 if(eType==-4){
917 zWarning = "long lines";
918 }else{
919 zWarning = "invalid UTF-8";
920 }
921 blob_zero(&ans);
922 file_relative_name(zFilename, &fname, 0);
923 zMsg = mprintf(
924 "%s appears to be text, but contains %s. commit anyhow (y/N)? ",
925 blob_str(&fname), zWarning);
926 prompt_user(zMsg, &ans);
927 fossil_free(zMsg);
928 cReply = blob_str(&ans)[0];
929 if( cReply!='y' && cReply!='Y' ){
930 fossil_fatal("Abandoning commit due to %s in %s",
931 blob_str(&fname), zWarning);
932 }
933 blob_reset(&ans);
934 eType +=4 ;
935 }
936 if( eType==0 || eType==-1 || fUnicode ){
937
+13 -11
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
4848
"cannot compute difference between binary files\n"
4949
5050
#define DIFF_CANNOT_COMPUTE_SYMLINK \
5151
"cannot compute difference between symlink and regular file\n"
5252
53
-#define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
53
+#define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
5454
#endif /* INTERFACE */
5555
5656
/*
5757
** Maximum length of a line in a text file, in bytes. (8192)
5858
*/
@@ -221,17 +221,15 @@
221221
** to be binary.
222222
**
223223
** (-1) -- The content appears to consist entirely of text, with lines
224224
** delimited by carriage-return, line-feed pairs.
225225
**
226
-** (-3) -- The content appears to consist entirely of text, with lines
227
-** delimited by line-feed characters; however, the encoding is
228
-** not UTF-8 or ASCII.
226
+** (-3, -5) The same as (1, -3); however, the encoding is not UTF-8 or ASCII.
229227
**
230
-** (-5) -- The content appears to consist entirely of text, with lines
231
-** delimited by carriage-return, line-feed pairs; however, the
232
-** encoding is not UTF-8 or ASCII.
228
+** (-4) -- The same as 0, but the determination is based on the fact that
229
+** the blob might be text (any encoding) but it has a line length
230
+** bigger than the diff logic in fossil can handle.
233231
**
234232
************************************ WARNING **********************************
235233
**
236234
** This function does not validate any code points.
237235
**
@@ -267,17 +265,17 @@
267265
} else if( c=='\n' ){
268266
if( z[-1]=='\r' ){
269267
result |= 2; /* Contains CR/NL, continue */
270268
}
271269
if( j>LENGTH_MASK ){
272
- return 0; /* Very long line -> binary */
270
+ return -4; /* Very long line -> binary */
273271
}
274272
j = 0;
275273
}
276274
}
277275
if( j>LENGTH_MASK ){
278
- return 0; /* Very long line -> binary */
276
+ return -4; /* Very long line -> binary */
279277
}
280278
return 1-result; /* No problems seen -> not binary */
281279
}
282280
283281
/*
@@ -323,10 +321,14 @@
323321
** to be binary.
324322
**
325323
** (-1) -- The content appears to consist entirely of text, with lines
326324
** delimited by carriage-return, line-feed pairs; however, the
327325
** encoding may not be UTF-16.
326
+**
327
+** (-4) -- The same as 0, but the determination is based on the fact that
328
+** the blob might be text (any encoding) but it has a line length
329
+** bigger than the diff logic in fossil can handle.
328330
**
329331
************************************ WARNING **********************************
330332
**
331333
** This function does not validate that the blob content is properly formed
332334
** UTF-16. It assumes that all code points are the same size. It does not
@@ -358,17 +360,17 @@
358360
int c2 = z[-1];
359361
if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
360362
result = -1; /* Contains CR/NL, continue */
361363
}
362364
if( j>UTF16_LENGTH_MASK ){
363
- return 0; /* Very long line -> binary */
365
+ return -4; /* Very long line -> binary */
364366
}
365367
j = 0;
366368
}
367369
}
368370
if( j>UTF16_LENGTH_MASK ){
369
- return 0; /* Very long line -> binary */
371
+ return -4; /* Very long line -> binary */
370372
}
371373
return result; /* No problems seen -> not binary */
372374
}
373375
374376
/*
375377
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file, in bytes. (8192)
58 */
@@ -221,17 +221,15 @@
221 ** to be binary.
222 **
223 ** (-1) -- The content appears to consist entirely of text, with lines
224 ** delimited by carriage-return, line-feed pairs.
225 **
226 ** (-3) -- The content appears to consist entirely of text, with lines
227 ** delimited by line-feed characters; however, the encoding is
228 ** not UTF-8 or ASCII.
229 **
230 ** (-5) -- The content appears to consist entirely of text, with lines
231 ** delimited by carriage-return, line-feed pairs; however, the
232 ** encoding is not UTF-8 or ASCII.
233 **
234 ************************************ WARNING **********************************
235 **
236 ** This function does not validate any code points.
237 **
@@ -267,17 +265,17 @@
267 } else if( c=='\n' ){
268 if( z[-1]=='\r' ){
269 result |= 2; /* Contains CR/NL, continue */
270 }
271 if( j>LENGTH_MASK ){
272 return 0; /* Very long line -> binary */
273 }
274 j = 0;
275 }
276 }
277 if( j>LENGTH_MASK ){
278 return 0; /* Very long line -> binary */
279 }
280 return 1-result; /* No problems seen -> not binary */
281 }
282
283 /*
@@ -323,10 +321,14 @@
323 ** to be binary.
324 **
325 ** (-1) -- The content appears to consist entirely of text, with lines
326 ** delimited by carriage-return, line-feed pairs; however, the
327 ** encoding may not be UTF-16.
 
 
 
 
328 **
329 ************************************ WARNING **********************************
330 **
331 ** This function does not validate that the blob content is properly formed
332 ** UTF-16. It assumes that all code points are the same size. It does not
@@ -358,17 +360,17 @@
358 int c2 = z[-1];
359 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
360 result = -1; /* Contains CR/NL, continue */
361 }
362 if( j>UTF16_LENGTH_MASK ){
363 return 0; /* Very long line -> binary */
364 }
365 j = 0;
366 }
367 }
368 if( j>UTF16_LENGTH_MASK ){
369 return 0; /* Very long line -> binary */
370 }
371 return result; /* No problems seen -> not binary */
372 }
373
374 /*
375
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file, in bytes. (8192)
58 */
@@ -221,17 +221,15 @@
221 ** to be binary.
222 **
223 ** (-1) -- The content appears to consist entirely of text, with lines
224 ** delimited by carriage-return, line-feed pairs.
225 **
226 ** (-3, -5) The same as (1, -3); however, the encoding is not UTF-8 or ASCII.
 
 
227 **
228 ** (-4) -- The same as 0, but the determination is based on the fact that
229 ** the blob might be text (any encoding) but it has a line length
230 ** bigger than the diff logic in fossil can handle.
231 **
232 ************************************ WARNING **********************************
233 **
234 ** This function does not validate any code points.
235 **
@@ -267,17 +265,17 @@
265 } else if( c=='\n' ){
266 if( z[-1]=='\r' ){
267 result |= 2; /* Contains CR/NL, continue */
268 }
269 if( j>LENGTH_MASK ){
270 return -4; /* Very long line -> binary */
271 }
272 j = 0;
273 }
274 }
275 if( j>LENGTH_MASK ){
276 return -4; /* Very long line -> binary */
277 }
278 return 1-result; /* No problems seen -> not binary */
279 }
280
281 /*
@@ -323,10 +321,14 @@
321 ** to be binary.
322 **
323 ** (-1) -- The content appears to consist entirely of text, with lines
324 ** delimited by carriage-return, line-feed pairs; however, the
325 ** encoding may not be UTF-16.
326 **
327 ** (-4) -- The same as 0, but the determination is based on the fact that
328 ** the blob might be text (any encoding) but it has a line length
329 ** bigger than the diff logic in fossil can handle.
330 **
331 ************************************ WARNING **********************************
332 **
333 ** This function does not validate that the blob content is properly formed
334 ** UTF-16. It assumes that all code points are the same size. It does not
@@ -358,17 +360,17 @@
360 int c2 = z[-1];
361 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
362 result = -1; /* Contains CR/NL, continue */
363 }
364 if( j>UTF16_LENGTH_MASK ){
365 return -4; /* Very long line -> binary */
366 }
367 j = 0;
368 }
369 }
370 if( j>UTF16_LENGTH_MASK ){
371 return -4; /* Very long line -> binary */
372 }
373 return result; /* No problems seen -> not binary */
374 }
375
376 /*
377

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button