Fossil SCM
Add commit warning messate when text files have a long line length
Commit
879012769b2dfc2f26c3f7d177ba9264065c500e
Parent
a7fff6fea61901c…
2 files changed
+10
-4
+13
-11
+10
-4
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -907,24 +907,30 @@ | ||
| 907 | 907 | |
| 908 | 908 | if( allOk ) return; |
| 909 | 909 | fUnicode = starts_with_utf16_bom(p); |
| 910 | 910 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 911 | 911 | if( eType<-2){ |
| 912 | + const char *zWarning; | |
| 912 | 913 | Blob ans; |
| 913 | 914 | char cReply; |
| 914 | 915 | |
| 916 | + if(eType==-4){ | |
| 917 | + zWarning = "long lines"; | |
| 918 | + }else{ | |
| 919 | + zWarning = "invalid UTF-8"; | |
| 920 | + } | |
| 915 | 921 | blob_zero(&ans); |
| 916 | 922 | file_relative_name(zFilename, &fname, 0); |
| 917 | 923 | zMsg = mprintf( |
| 918 | - "%s appears to be text, but not UTF-8 or ASCII. commit anyhow (y/N)? ", | |
| 919 | - blob_str(&fname)); | |
| 924 | + "%s appears to be text, but contains %s. commit anyhow (y/N)? ", | |
| 925 | + blob_str(&fname), zWarning); | |
| 920 | 926 | prompt_user(zMsg, &ans); |
| 921 | 927 | fossil_free(zMsg); |
| 922 | 928 | cReply = blob_str(&ans)[0]; |
| 923 | 929 | if( cReply!='y' && cReply!='Y' ){ |
| 924 | - fossil_fatal("Abandoning commit due to non-UTF-8 in %s", | |
| 925 | - blob_str(&fname)); | |
| 930 | + fossil_fatal("Abandoning commit due to %s in %s", | |
| 931 | + blob_str(&fname), zWarning); | |
| 926 | 932 | } |
| 927 | 933 | blob_reset(&ans); |
| 928 | 934 | eType +=4 ; |
| 929 | 935 | } |
| 930 | 936 | if( eType==0 || eType==-1 || fUnicode ){ |
| 931 | 937 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -907,24 +907,30 @@ | |
| 907 | |
| 908 | if( allOk ) return; |
| 909 | fUnicode = starts_with_utf16_bom(p); |
| 910 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 911 | if( eType<-2){ |
| 912 | Blob ans; |
| 913 | char cReply; |
| 914 | |
| 915 | blob_zero(&ans); |
| 916 | file_relative_name(zFilename, &fname, 0); |
| 917 | zMsg = mprintf( |
| 918 | "%s appears to be text, but not UTF-8 or ASCII. commit anyhow (y/N)? ", |
| 919 | blob_str(&fname)); |
| 920 | prompt_user(zMsg, &ans); |
| 921 | fossil_free(zMsg); |
| 922 | cReply = blob_str(&ans)[0]; |
| 923 | if( cReply!='y' && cReply!='Y' ){ |
| 924 | fossil_fatal("Abandoning commit due to non-UTF-8 in %s", |
| 925 | blob_str(&fname)); |
| 926 | } |
| 927 | blob_reset(&ans); |
| 928 | eType +=4 ; |
| 929 | } |
| 930 | if( eType==0 || eType==-1 || fUnicode ){ |
| 931 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -907,24 +907,30 @@ | |
| 907 | |
| 908 | if( allOk ) return; |
| 909 | fUnicode = starts_with_utf16_bom(p); |
| 910 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 911 | if( eType<-2){ |
| 912 | const char *zWarning; |
| 913 | Blob ans; |
| 914 | char cReply; |
| 915 | |
| 916 | if(eType==-4){ |
| 917 | zWarning = "long lines"; |
| 918 | }else{ |
| 919 | zWarning = "invalid UTF-8"; |
| 920 | } |
| 921 | blob_zero(&ans); |
| 922 | file_relative_name(zFilename, &fname, 0); |
| 923 | zMsg = mprintf( |
| 924 | "%s appears to be text, but contains %s. commit anyhow (y/N)? ", |
| 925 | blob_str(&fname), zWarning); |
| 926 | prompt_user(zMsg, &ans); |
| 927 | fossil_free(zMsg); |
| 928 | cReply = blob_str(&ans)[0]; |
| 929 | if( cReply!='y' && cReply!='Y' ){ |
| 930 | fossil_fatal("Abandoning commit due to %s in %s", |
| 931 | blob_str(&fname), zWarning); |
| 932 | } |
| 933 | blob_reset(&ans); |
| 934 | eType +=4 ; |
| 935 | } |
| 936 | if( eType==0 || eType==-1 || fUnicode ){ |
| 937 |
+13
-11
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -48,11 +48,11 @@ | ||
| 48 | 48 | "cannot compute difference between binary files\n" |
| 49 | 49 | |
| 50 | 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | 52 | |
| 53 | -#define looks_like_binary(blob) (looks_like_utf8((blob)) == 0) | |
| 53 | +#define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0) | |
| 54 | 54 | #endif /* INTERFACE */ |
| 55 | 55 | |
| 56 | 56 | /* |
| 57 | 57 | ** Maximum length of a line in a text file, in bytes. (8192) |
| 58 | 58 | */ |
| @@ -221,17 +221,15 @@ | ||
| 221 | 221 | ** to be binary. |
| 222 | 222 | ** |
| 223 | 223 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 224 | 224 | ** delimited by carriage-return, line-feed pairs. |
| 225 | 225 | ** |
| 226 | -** (-3) -- The content appears to consist entirely of text, with lines | |
| 227 | -** delimited by line-feed characters; however, the encoding is | |
| 228 | -** not UTF-8 or ASCII. | |
| 226 | +** (-3, -5) The same as (1, -3); however, the encoding is not UTF-8 or ASCII. | |
| 229 | 227 | ** |
| 230 | -** (-5) -- The content appears to consist entirely of text, with lines | |
| 231 | -** delimited by carriage-return, line-feed pairs; however, the | |
| 232 | -** encoding is not UTF-8 or ASCII. | |
| 228 | +** (-4) -- The same as 0, but the determination is based on the fact that | |
| 229 | +** the blob might be text (any encoding) but it has a line length | |
| 230 | +** bigger than the diff logic in fossil can handle. | |
| 233 | 231 | ** |
| 234 | 232 | ************************************ WARNING ********************************** |
| 235 | 233 | ** |
| 236 | 234 | ** This function does not validate any code points. |
| 237 | 235 | ** |
| @@ -267,17 +265,17 @@ | ||
| 267 | 265 | } else if( c=='\n' ){ |
| 268 | 266 | if( z[-1]=='\r' ){ |
| 269 | 267 | result |= 2; /* Contains CR/NL, continue */ |
| 270 | 268 | } |
| 271 | 269 | if( j>LENGTH_MASK ){ |
| 272 | - return 0; /* Very long line -> binary */ | |
| 270 | + return -4; /* Very long line -> binary */ | |
| 273 | 271 | } |
| 274 | 272 | j = 0; |
| 275 | 273 | } |
| 276 | 274 | } |
| 277 | 275 | if( j>LENGTH_MASK ){ |
| 278 | - return 0; /* Very long line -> binary */ | |
| 276 | + return -4; /* Very long line -> binary */ | |
| 279 | 277 | } |
| 280 | 278 | return 1-result; /* No problems seen -> not binary */ |
| 281 | 279 | } |
| 282 | 280 | |
| 283 | 281 | /* |
| @@ -323,10 +321,14 @@ | ||
| 323 | 321 | ** to be binary. |
| 324 | 322 | ** |
| 325 | 323 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 326 | 324 | ** delimited by carriage-return, line-feed pairs; however, the |
| 327 | 325 | ** encoding may not be UTF-16. |
| 326 | +** | |
| 327 | +** (-4) -- The same as 0, but the determination is based on the fact that | |
| 328 | +** the blob might be text (any encoding) but it has a line length | |
| 329 | +** bigger than the diff logic in fossil can handle. | |
| 328 | 330 | ** |
| 329 | 331 | ************************************ WARNING ********************************** |
| 330 | 332 | ** |
| 331 | 333 | ** This function does not validate that the blob content is properly formed |
| 332 | 334 | ** UTF-16. It assumes that all code points are the same size. It does not |
| @@ -358,17 +360,17 @@ | ||
| 358 | 360 | int c2 = z[-1]; |
| 359 | 361 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 360 | 362 | result = -1; /* Contains CR/NL, continue */ |
| 361 | 363 | } |
| 362 | 364 | if( j>UTF16_LENGTH_MASK ){ |
| 363 | - return 0; /* Very long line -> binary */ | |
| 365 | + return -4; /* Very long line -> binary */ | |
| 364 | 366 | } |
| 365 | 367 | j = 0; |
| 366 | 368 | } |
| 367 | 369 | } |
| 368 | 370 | if( j>UTF16_LENGTH_MASK ){ |
| 369 | - return 0; /* Very long line -> binary */ | |
| 371 | + return -4; /* Very long line -> binary */ | |
| 370 | 372 | } |
| 371 | 373 | return result; /* No problems seen -> not binary */ |
| 372 | 374 | } |
| 373 | 375 | |
| 374 | 376 | /* |
| 375 | 377 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) (looks_like_utf8((blob)) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file, in bytes. (8192) |
| 58 | */ |
| @@ -221,17 +221,15 @@ | |
| 221 | ** to be binary. |
| 222 | ** |
| 223 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 224 | ** delimited by carriage-return, line-feed pairs. |
| 225 | ** |
| 226 | ** (-3) -- The content appears to consist entirely of text, with lines |
| 227 | ** delimited by line-feed characters; however, the encoding is |
| 228 | ** not UTF-8 or ASCII. |
| 229 | ** |
| 230 | ** (-5) -- The content appears to consist entirely of text, with lines |
| 231 | ** delimited by carriage-return, line-feed pairs; however, the |
| 232 | ** encoding is not UTF-8 or ASCII. |
| 233 | ** |
| 234 | ************************************ WARNING ********************************** |
| 235 | ** |
| 236 | ** This function does not validate any code points. |
| 237 | ** |
| @@ -267,17 +265,17 @@ | |
| 267 | } else if( c=='\n' ){ |
| 268 | if( z[-1]=='\r' ){ |
| 269 | result |= 2; /* Contains CR/NL, continue */ |
| 270 | } |
| 271 | if( j>LENGTH_MASK ){ |
| 272 | return 0; /* Very long line -> binary */ |
| 273 | } |
| 274 | j = 0; |
| 275 | } |
| 276 | } |
| 277 | if( j>LENGTH_MASK ){ |
| 278 | return 0; /* Very long line -> binary */ |
| 279 | } |
| 280 | return 1-result; /* No problems seen -> not binary */ |
| 281 | } |
| 282 | |
| 283 | /* |
| @@ -323,10 +321,14 @@ | |
| 323 | ** to be binary. |
| 324 | ** |
| 325 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 326 | ** delimited by carriage-return, line-feed pairs; however, the |
| 327 | ** encoding may not be UTF-16. |
| 328 | ** |
| 329 | ************************************ WARNING ********************************** |
| 330 | ** |
| 331 | ** This function does not validate that the blob content is properly formed |
| 332 | ** UTF-16. It assumes that all code points are the same size. It does not |
| @@ -358,17 +360,17 @@ | |
| 358 | int c2 = z[-1]; |
| 359 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 360 | result = -1; /* Contains CR/NL, continue */ |
| 361 | } |
| 362 | if( j>UTF16_LENGTH_MASK ){ |
| 363 | return 0; /* Very long line -> binary */ |
| 364 | } |
| 365 | j = 0; |
| 366 | } |
| 367 | } |
| 368 | if( j>UTF16_LENGTH_MASK ){ |
| 369 | return 0; /* Very long line -> binary */ |
| 370 | } |
| 371 | return result; /* No problems seen -> not binary */ |
| 372 | } |
| 373 | |
| 374 | /* |
| 375 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file, in bytes. (8192) |
| 58 | */ |
| @@ -221,17 +221,15 @@ | |
| 221 | ** to be binary. |
| 222 | ** |
| 223 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 224 | ** delimited by carriage-return, line-feed pairs. |
| 225 | ** |
| 226 | ** (-3, -5) The same as (1, -3); however, the encoding is not UTF-8 or ASCII. |
| 227 | ** |
| 228 | ** (-4) -- The same as 0, but the determination is based on the fact that |
| 229 | ** the blob might be text (any encoding) but it has a line length |
| 230 | ** bigger than the diff logic in fossil can handle. |
| 231 | ** |
| 232 | ************************************ WARNING ********************************** |
| 233 | ** |
| 234 | ** This function does not validate any code points. |
| 235 | ** |
| @@ -267,17 +265,17 @@ | |
| 265 | } else if( c=='\n' ){ |
| 266 | if( z[-1]=='\r' ){ |
| 267 | result |= 2; /* Contains CR/NL, continue */ |
| 268 | } |
| 269 | if( j>LENGTH_MASK ){ |
| 270 | return -4; /* Very long line -> binary */ |
| 271 | } |
| 272 | j = 0; |
| 273 | } |
| 274 | } |
| 275 | if( j>LENGTH_MASK ){ |
| 276 | return -4; /* Very long line -> binary */ |
| 277 | } |
| 278 | return 1-result; /* No problems seen -> not binary */ |
| 279 | } |
| 280 | |
| 281 | /* |
| @@ -323,10 +321,14 @@ | |
| 321 | ** to be binary. |
| 322 | ** |
| 323 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 324 | ** delimited by carriage-return, line-feed pairs; however, the |
| 325 | ** encoding may not be UTF-16. |
| 326 | ** |
| 327 | ** (-4) -- The same as 0, but the determination is based on the fact that |
| 328 | ** the blob might be text (any encoding) but it has a line length |
| 329 | ** bigger than the diff logic in fossil can handle. |
| 330 | ** |
| 331 | ************************************ WARNING ********************************** |
| 332 | ** |
| 333 | ** This function does not validate that the blob content is properly formed |
| 334 | ** UTF-16. It assumes that all code points are the same size. It does not |
| @@ -358,17 +360,17 @@ | |
| 360 | int c2 = z[-1]; |
| 361 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 362 | result = -1; /* Contains CR/NL, continue */ |
| 363 | } |
| 364 | if( j>UTF16_LENGTH_MASK ){ |
| 365 | return -4; /* Very long line -> binary */ |
| 366 | } |
| 367 | j = 0; |
| 368 | } |
| 369 | } |
| 370 | if( j>UTF16_LENGTH_MASK ){ |
| 371 | return -4; /* Very long line -> binary */ |
| 372 | } |
| 373 | return result; /* No problems seen -> not binary */ |
| 374 | } |
| 375 | |
| 376 | /* |
| 377 |