Fossil SCM
New warning when file is considered binary due to long lines only.
Commit
3a74f9fe52c3a6b1e97a03cf8a0e5171d0e88f7a
Parent
69fe4237cee86c0…
2 files changed
+25
+27
-18
+25
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -909,10 +909,35 @@ | ||
| 909 | 909 | static int allOk = 0; /* Set to true to disable this routine */ |
| 910 | 910 | |
| 911 | 911 | if( allOk ) return 0; |
| 912 | 912 | fUnicode = starts_with_utf16_bom(p, 0, 0); |
| 913 | 913 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 914 | + if( eType==-4){ | |
| 915 | + const char *zWarning; | |
| 916 | + const char *zDisable; | |
| 917 | + Blob ans; | |
| 918 | + char cReply; | |
| 919 | + | |
| 920 | + if (!binOk) { | |
| 921 | + zWarning = "long lines"; | |
| 922 | + zDisable = "\"binary-glob\" setting"; | |
| 923 | + blob_zero(&ans); | |
| 924 | + file_relative_name(zFilename, &fname, 0); | |
| 925 | + zMsg = mprintf( | |
| 926 | + "%s appears to be text, but contains %s. Use --no-warnings or the" | |
| 927 | + " %s to disable this warning.\nCommit anyhow (a=all/y/N)? ", | |
| 928 | + blob_str(&fname), zWarning, zDisable); | |
| 929 | + prompt_user(zMsg, &ans); | |
| 930 | + fossil_free(zMsg); | |
| 931 | + cReply = blob_str(&ans)[0]; | |
| 932 | + if( cReply!='y' && cReply!='Y' ){ | |
| 933 | + fossil_fatal("Abandoning commit due to %s in %s", | |
| 934 | + zWarning, blob_str(&fname)); | |
| 935 | + } | |
| 936 | + blob_reset(&ans); | |
| 937 | + } | |
| 938 | + } | |
| 914 | 939 | if( eType==0 || eType==-1 || fUnicode ){ |
| 915 | 940 | const char *zWarning; |
| 916 | 941 | const char *zDisable; |
| 917 | 942 | const char *zConvert = "c=convert/"; |
| 918 | 943 | Blob ans; |
| 919 | 944 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -909,10 +909,35 @@ | |
| 909 | static int allOk = 0; /* Set to true to disable this routine */ |
| 910 | |
| 911 | if( allOk ) return 0; |
| 912 | fUnicode = starts_with_utf16_bom(p, 0, 0); |
| 913 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 914 | if( eType==0 || eType==-1 || fUnicode ){ |
| 915 | const char *zWarning; |
| 916 | const char *zDisable; |
| 917 | const char *zConvert = "c=convert/"; |
| 918 | Blob ans; |
| 919 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -909,10 +909,35 @@ | |
| 909 | static int allOk = 0; /* Set to true to disable this routine */ |
| 910 | |
| 911 | if( allOk ) return 0; |
| 912 | fUnicode = starts_with_utf16_bom(p, 0, 0); |
| 913 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 914 | if( eType==-4){ |
| 915 | const char *zWarning; |
| 916 | const char *zDisable; |
| 917 | Blob ans; |
| 918 | char cReply; |
| 919 | |
| 920 | if (!binOk) { |
| 921 | zWarning = "long lines"; |
| 922 | zDisable = "\"binary-glob\" setting"; |
| 923 | blob_zero(&ans); |
| 924 | file_relative_name(zFilename, &fname, 0); |
| 925 | zMsg = mprintf( |
| 926 | "%s appears to be text, but contains %s. Use --no-warnings or the" |
| 927 | " %s to disable this warning.\nCommit anyhow (a=all/y/N)? ", |
| 928 | blob_str(&fname), zWarning, zDisable); |
| 929 | prompt_user(zMsg, &ans); |
| 930 | fossil_free(zMsg); |
| 931 | cReply = blob_str(&ans)[0]; |
| 932 | if( cReply!='y' && cReply!='Y' ){ |
| 933 | fossil_fatal("Abandoning commit due to %s in %s", |
| 934 | zWarning, blob_str(&fname)); |
| 935 | } |
| 936 | blob_reset(&ans); |
| 937 | } |
| 938 | } |
| 939 | if( eType==0 || eType==-1 || fUnicode ){ |
| 940 | const char *zWarning; |
| 941 | const char *zDisable; |
| 942 | const char *zConvert = "c=convert/"; |
| 943 | Blob ans; |
| 944 |
+27
-18
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -57,11 +57,11 @@ | ||
| 57 | 57 | "more than 10,000 changes\n" |
| 58 | 58 | |
| 59 | 59 | #define DIFF_TOO_MANY_CHANGES_HTML \ |
| 60 | 60 | "<p class='generalError'>More than 10,000 changes</p>\n" |
| 61 | 61 | |
| 62 | -#define looks_like_binary(blob) (looks_like_utf8((blob)) == 0) | |
| 62 | +#define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0) | |
| 63 | 63 | #endif /* INTERFACE */ |
| 64 | 64 | |
| 65 | 65 | /* |
| 66 | 66 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 67 | 67 | */ |
| @@ -198,10 +198,14 @@ | ||
| 198 | 198 | ** to be binary. |
| 199 | 199 | ** |
| 200 | 200 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 201 | 201 | ** delimited by carriage-return, line-feed pairs; however, the |
| 202 | 202 | ** encoding may not be UTF-8. |
| 203 | +** | |
| 204 | +** (-4) -- The same as 0, but the determination is based on the fact that | |
| 205 | +** the blob might be text (any encoding) but it has a line length | |
| 206 | +** bigger than the diff logic in fossil can handle. | |
| 203 | 207 | ** |
| 204 | 208 | ************************************ WARNING ********************************** |
| 205 | 209 | ** |
| 206 | 210 | ** This function does not validate that the blob content is properly formed |
| 207 | 211 | ** UTF-8. It assumes that all code points are the same size. It does not |
| @@ -215,36 +219,37 @@ | ||
| 215 | 219 | */ |
| 216 | 220 | int looks_like_utf8(const Blob *pContent){ |
| 217 | 221 | const char *z = blob_buffer(pContent); |
| 218 | 222 | unsigned int n = blob_size(pContent); |
| 219 | 223 | int j, c; |
| 220 | - int result = 1; /* Assume UTF-8 text with no CR/NL */ | |
| 224 | + int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */ | |
| 221 | 225 | |
| 222 | 226 | /* Check individual lines. |
| 223 | 227 | */ |
| 224 | - if( n==0 ) return result; /* Empty file -> text */ | |
| 228 | + if( n==0 ) return 1; /* Empty file -> text */ | |
| 225 | 229 | c = *z; |
| 226 | - if( c==0 ) return 0; /* Zero byte in a file -> binary */ | |
| 227 | 230 | j = (c!='\n'); |
| 231 | + if( c==0 ){ | |
| 232 | + return 0; /* Zero byte in a file -> binary */ | |
| 233 | + } | |
| 228 | 234 | while( --n>0 ){ |
| 229 | 235 | c = *++z; ++j; |
| 230 | 236 | if( c==0 ) return 0; /* Zero byte in a file -> binary */ |
| 231 | 237 | if( c=='\n' ){ |
| 232 | - int c2 = z[-1]; | |
| 233 | - if( c2=='\r' ){ | |
| 234 | - result = -1; /* Contains CR/NL, continue */ | |
| 238 | + if( z[-1]=='\r' ){ | |
| 239 | + flags |= 2; /* Contains CR/NL, continue */ | |
| 235 | 240 | } |
| 236 | 241 | if( j>LENGTH_MASK ){ |
| 237 | - return 0; /* Very long line -> binary */ | |
| 242 | + flags |= 1; /* Very long line, continue */ | |
| 238 | 243 | } |
| 239 | 244 | j = 0; |
| 240 | 245 | } |
| 241 | 246 | } |
| 242 | - if( j>LENGTH_MASK ){ | |
| 243 | - return 0; /* Very long line -> binary */ | |
| 247 | + if( (flags&1) || (j>LENGTH_MASK) ){ | |
| 248 | + return -4; /* Very long line -> binary */ | |
| 244 | 249 | } |
| 245 | - return result; /* No problems seen -> not binary */ | |
| 250 | + return 1-flags; /* No problems seen -> not binary */ | |
| 246 | 251 | } |
| 247 | 252 | |
| 248 | 253 | /* |
| 249 | 254 | ** Define the type needed to represent a Unicode (UTF-16) character. |
| 250 | 255 | */ |
| @@ -288,10 +293,14 @@ | ||
| 288 | 293 | ** to be binary. |
| 289 | 294 | ** |
| 290 | 295 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 291 | 296 | ** delimited by carriage-return, line-feed pairs; however, the |
| 292 | 297 | ** encoding may not be UTF-16. |
| 298 | +** | |
| 299 | +** (-4) -- The same as 0, but the determination is based on the fact that | |
| 300 | +** the blob might be text (any encoding) but it has a line length | |
| 301 | +** bigger than the diff logic in fossil can handle. | |
| 293 | 302 | ** |
| 294 | 303 | ************************************ WARNING ********************************** |
| 295 | 304 | ** |
| 296 | 305 | ** This function does not validate that the blob content is properly formed |
| 297 | 306 | ** UTF-16. It assumes that all code points are the same size. It does not |
| @@ -305,15 +314,15 @@ | ||
| 305 | 314 | */ |
| 306 | 315 | int looks_like_utf16(const Blob *pContent){ |
| 307 | 316 | const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent); |
| 308 | 317 | unsigned int n = blob_size(pContent); |
| 309 | 318 | int j, c; |
| 310 | - int result = 1; /* Assume UTF-16 text with no CR/NL */ | |
| 319 | + int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */ | |
| 311 | 320 | |
| 312 | 321 | /* Check individual lines. |
| 313 | 322 | */ |
| 314 | - if( n==0 ) return result; /* Empty file -> text */ | |
| 323 | + if( n==0 ) return 1; /* Empty file -> text */ | |
| 315 | 324 | if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */ |
| 316 | 325 | c = *z; |
| 317 | 326 | if( c==0 ) return 0; /* NUL character in a file -> binary */ |
| 318 | 327 | j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF)); |
| 319 | 328 | while( (n-=2)>0 ){ |
| @@ -320,22 +329,22 @@ | ||
| 320 | 329 | c = *++z; ++j; |
| 321 | 330 | if( c==0 ) return 0; /* NUL character in a file -> binary */ |
| 322 | 331 | if( c==UTF16BE_LF || c==UTF16LE_LF ){ |
| 323 | 332 | int c2 = z[-1]; |
| 324 | 333 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 325 | - result = -1; /* Contains CR/NL, continue */ | |
| 334 | + flags |= 2; /* Contains CR/NL, continue */ | |
| 326 | 335 | } |
| 327 | 336 | if( j>UTF16_LENGTH_MASK ){ |
| 328 | - return 0; /* Very long line -> binary */ | |
| 337 | + flags |= 1; /* Very long line, continue */ | |
| 329 | 338 | } |
| 330 | 339 | j = 0; |
| 331 | 340 | } |
| 332 | 341 | } |
| 333 | - if( j>UTF16_LENGTH_MASK ){ | |
| 334 | - return 0; /* Very long line -> binary */ | |
| 342 | + if( (flags&1) || (j>LENGTH_MASK) ){ | |
| 343 | + return -4; /* Very long line -> binary */ | |
| 335 | 344 | } |
| 336 | - return result; /* No problems seen -> not binary */ | |
| 345 | + return 1-flags; /* No problems seen -> not binary */ | |
| 337 | 346 | } |
| 338 | 347 | |
| 339 | 348 | /* |
| 340 | 349 | ** This function returns an array of bytes representing the byte-order-mark |
| 341 | 350 | ** for UTF-8. |
| 342 | 351 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -57,11 +57,11 @@ | |
| 57 | "more than 10,000 changes\n" |
| 58 | |
| 59 | #define DIFF_TOO_MANY_CHANGES_HTML \ |
| 60 | "<p class='generalError'>More than 10,000 changes</p>\n" |
| 61 | |
| 62 | #define looks_like_binary(blob) (looks_like_utf8((blob)) == 0) |
| 63 | #endif /* INTERFACE */ |
| 64 | |
| 65 | /* |
| 66 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 67 | */ |
| @@ -198,10 +198,14 @@ | |
| 198 | ** to be binary. |
| 199 | ** |
| 200 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 201 | ** delimited by carriage-return, line-feed pairs; however, the |
| 202 | ** encoding may not be UTF-8. |
| 203 | ** |
| 204 | ************************************ WARNING ********************************** |
| 205 | ** |
| 206 | ** This function does not validate that the blob content is properly formed |
| 207 | ** UTF-8. It assumes that all code points are the same size. It does not |
| @@ -215,36 +219,37 @@ | |
| 215 | */ |
| 216 | int looks_like_utf8(const Blob *pContent){ |
| 217 | const char *z = blob_buffer(pContent); |
| 218 | unsigned int n = blob_size(pContent); |
| 219 | int j, c; |
| 220 | int result = 1; /* Assume UTF-8 text with no CR/NL */ |
| 221 | |
| 222 | /* Check individual lines. |
| 223 | */ |
| 224 | if( n==0 ) return result; /* Empty file -> text */ |
| 225 | c = *z; |
| 226 | if( c==0 ) return 0; /* Zero byte in a file -> binary */ |
| 227 | j = (c!='\n'); |
| 228 | while( --n>0 ){ |
| 229 | c = *++z; ++j; |
| 230 | if( c==0 ) return 0; /* Zero byte in a file -> binary */ |
| 231 | if( c=='\n' ){ |
| 232 | int c2 = z[-1]; |
| 233 | if( c2=='\r' ){ |
| 234 | result = -1; /* Contains CR/NL, continue */ |
| 235 | } |
| 236 | if( j>LENGTH_MASK ){ |
| 237 | return 0; /* Very long line -> binary */ |
| 238 | } |
| 239 | j = 0; |
| 240 | } |
| 241 | } |
| 242 | if( j>LENGTH_MASK ){ |
| 243 | return 0; /* Very long line -> binary */ |
| 244 | } |
| 245 | return result; /* No problems seen -> not binary */ |
| 246 | } |
| 247 | |
| 248 | /* |
| 249 | ** Define the type needed to represent a Unicode (UTF-16) character. |
| 250 | */ |
| @@ -288,10 +293,14 @@ | |
| 288 | ** to be binary. |
| 289 | ** |
| 290 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 291 | ** delimited by carriage-return, line-feed pairs; however, the |
| 292 | ** encoding may not be UTF-16. |
| 293 | ** |
| 294 | ************************************ WARNING ********************************** |
| 295 | ** |
| 296 | ** This function does not validate that the blob content is properly formed |
| 297 | ** UTF-16. It assumes that all code points are the same size. It does not |
| @@ -305,15 +314,15 @@ | |
| 305 | */ |
| 306 | int looks_like_utf16(const Blob *pContent){ |
| 307 | const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent); |
| 308 | unsigned int n = blob_size(pContent); |
| 309 | int j, c; |
| 310 | int result = 1; /* Assume UTF-16 text with no CR/NL */ |
| 311 | |
| 312 | /* Check individual lines. |
| 313 | */ |
| 314 | if( n==0 ) return result; /* Empty file -> text */ |
| 315 | if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */ |
| 316 | c = *z; |
| 317 | if( c==0 ) return 0; /* NUL character in a file -> binary */ |
| 318 | j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF)); |
| 319 | while( (n-=2)>0 ){ |
| @@ -320,22 +329,22 @@ | |
| 320 | c = *++z; ++j; |
| 321 | if( c==0 ) return 0; /* NUL character in a file -> binary */ |
| 322 | if( c==UTF16BE_LF || c==UTF16LE_LF ){ |
| 323 | int c2 = z[-1]; |
| 324 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 325 | result = -1; /* Contains CR/NL, continue */ |
| 326 | } |
| 327 | if( j>UTF16_LENGTH_MASK ){ |
| 328 | return 0; /* Very long line -> binary */ |
| 329 | } |
| 330 | j = 0; |
| 331 | } |
| 332 | } |
| 333 | if( j>UTF16_LENGTH_MASK ){ |
| 334 | return 0; /* Very long line -> binary */ |
| 335 | } |
| 336 | return result; /* No problems seen -> not binary */ |
| 337 | } |
| 338 | |
| 339 | /* |
| 340 | ** This function returns an array of bytes representing the byte-order-mark |
| 341 | ** for UTF-8. |
| 342 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -57,11 +57,11 @@ | |
| 57 | "more than 10,000 changes\n" |
| 58 | |
| 59 | #define DIFF_TOO_MANY_CHANGES_HTML \ |
| 60 | "<p class='generalError'>More than 10,000 changes</p>\n" |
| 61 | |
| 62 | #define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0) |
| 63 | #endif /* INTERFACE */ |
| 64 | |
| 65 | /* |
| 66 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 67 | */ |
| @@ -198,10 +198,14 @@ | |
| 198 | ** to be binary. |
| 199 | ** |
| 200 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 201 | ** delimited by carriage-return, line-feed pairs; however, the |
| 202 | ** encoding may not be UTF-8. |
| 203 | ** |
| 204 | ** (-4) -- The same as 0, but the determination is based on the fact that |
| 205 | ** the blob might be text (any encoding) but it has a line length |
| 206 | ** bigger than the diff logic in fossil can handle. |
| 207 | ** |
| 208 | ************************************ WARNING ********************************** |
| 209 | ** |
| 210 | ** This function does not validate that the blob content is properly formed |
| 211 | ** UTF-8. It assumes that all code points are the same size. It does not |
| @@ -215,36 +219,37 @@ | |
| 219 | */ |
| 220 | int looks_like_utf8(const Blob *pContent){ |
| 221 | const char *z = blob_buffer(pContent); |
| 222 | unsigned int n = blob_size(pContent); |
| 223 | int j, c; |
| 224 | int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */ |
| 225 | |
| 226 | /* Check individual lines. |
| 227 | */ |
| 228 | if( n==0 ) return 1; /* Empty file -> text */ |
| 229 | c = *z; |
| 230 | j = (c!='\n'); |
| 231 | if( c==0 ){ |
| 232 | return 0; /* Zero byte in a file -> binary */ |
| 233 | } |
| 234 | while( --n>0 ){ |
| 235 | c = *++z; ++j; |
| 236 | if( c==0 ) return 0; /* Zero byte in a file -> binary */ |
| 237 | if( c=='\n' ){ |
| 238 | if( z[-1]=='\r' ){ |
| 239 | flags |= 2; /* Contains CR/NL, continue */ |
| 240 | } |
| 241 | if( j>LENGTH_MASK ){ |
| 242 | flags |= 1; /* Very long line, continue */ |
| 243 | } |
| 244 | j = 0; |
| 245 | } |
| 246 | } |
| 247 | if( (flags&1) || (j>LENGTH_MASK) ){ |
| 248 | return -4; /* Very long line -> binary */ |
| 249 | } |
| 250 | return 1-flags; /* No problems seen -> not binary */ |
| 251 | } |
| 252 | |
| 253 | /* |
| 254 | ** Define the type needed to represent a Unicode (UTF-16) character. |
| 255 | */ |
| @@ -288,10 +293,14 @@ | |
| 293 | ** to be binary. |
| 294 | ** |
| 295 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 296 | ** delimited by carriage-return, line-feed pairs; however, the |
| 297 | ** encoding may not be UTF-16. |
| 298 | ** |
| 299 | ** (-4) -- The same as 0, but the determination is based on the fact that |
| 300 | ** the blob might be text (any encoding) but it has a line length |
| 301 | ** bigger than the diff logic in fossil can handle. |
| 302 | ** |
| 303 | ************************************ WARNING ********************************** |
| 304 | ** |
| 305 | ** This function does not validate that the blob content is properly formed |
| 306 | ** UTF-16. It assumes that all code points are the same size. It does not |
| @@ -305,15 +314,15 @@ | |
| 314 | */ |
| 315 | int looks_like_utf16(const Blob *pContent){ |
| 316 | const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent); |
| 317 | unsigned int n = blob_size(pContent); |
| 318 | int j, c; |
| 319 | int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */ |
| 320 | |
| 321 | /* Check individual lines. |
| 322 | */ |
| 323 | if( n==0 ) return 1; /* Empty file -> text */ |
| 324 | if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */ |
| 325 | c = *z; |
| 326 | if( c==0 ) return 0; /* NUL character in a file -> binary */ |
| 327 | j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF)); |
| 328 | while( (n-=2)>0 ){ |
| @@ -320,22 +329,22 @@ | |
| 329 | c = *++z; ++j; |
| 330 | if( c==0 ) return 0; /* NUL character in a file -> binary */ |
| 331 | if( c==UTF16BE_LF || c==UTF16LE_LF ){ |
| 332 | int c2 = z[-1]; |
| 333 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 334 | flags |= 2; /* Contains CR/NL, continue */ |
| 335 | } |
| 336 | if( j>UTF16_LENGTH_MASK ){ |
| 337 | flags |= 1; /* Very long line, continue */ |
| 338 | } |
| 339 | j = 0; |
| 340 | } |
| 341 | } |
| 342 | if( (flags&1) || (j>LENGTH_MASK) ){ |
| 343 | return -4; /* Very long line -> binary */ |
| 344 | } |
| 345 | return 1-flags; /* No problems seen -> not binary */ |
| 346 | } |
| 347 | |
| 348 | /* |
| 349 | ** This function returns an array of bytes representing the byte-order-mark |
| 350 | ** for UTF-8. |
| 351 |