Fossil SCM
Merge commit warning and looks_like_text() enhancements to trunk. Further changes based on these will occur on a branch.
Commit
618258421767778c41b643302f73e82954946b89
Parent
fc0bffd995d8ee0…
2 files changed
+10
-6
+22
-11
+10
-6
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -887,22 +887,26 @@ | ||
| 887 | 887 | ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending |
| 888 | 888 | ** is seen in a text file. |
| 889 | 889 | */ |
| 890 | 890 | static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ |
| 891 | 891 | int eType; /* return value of looks_like_text() */ |
| 892 | + int fUnicode; /* return value of starts_with_utf16_bom() */ | |
| 892 | 893 | char *zMsg; /* Warning message */ |
| 893 | 894 | Blob fname; /* Relative pathname of the file */ |
| 894 | 895 | static int allOk = 0; /* Set to true to disable this routine */ |
| 895 | 896 | |
| 896 | 897 | if( allOk ) return; |
| 897 | 898 | eType = looks_like_text(p); |
| 898 | - if( eType<0 ){ | |
| 899 | - const char *zWarning ; | |
| 899 | + fUnicode = starts_with_utf16_bom(p); | |
| 900 | + if( eType==-1 || fUnicode ){ | |
| 901 | + const char *zWarning; | |
| 900 | 902 | Blob ans; |
| 901 | 903 | char cReply; |
| 902 | 904 | |
| 903 | - if( eType&1 ){ | |
| 905 | + if( eType==-1 && fUnicode ){ | |
| 906 | + zWarning = "Unicode and CR/NL line endings"; | |
| 907 | + }else if( eType==-1 ){ | |
| 904 | 908 | if( crnlOk ){ |
| 905 | 909 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | 910 | } |
| 907 | 911 | zWarning = "CR/NL line endings"; |
| 908 | 912 | }else{ |
| @@ -909,20 +913,20 @@ | ||
| 909 | 913 | zWarning = "Unicode"; |
| 910 | 914 | } |
| 911 | 915 | file_relative_name(zFilename, &fname, 0); |
| 912 | 916 | blob_zero(&ans); |
| 913 | 917 | zMsg = mprintf( |
| 914 | - "%s contains %s. commit anyhow (a=all/y/N)? ", | |
| 915 | - blob_str(&fname), zWarning ); | |
| 918 | + "%s contains %s; commit anyhow (a=all/y/N)?", | |
| 919 | + blob_str(&fname), zWarning); | |
| 916 | 920 | prompt_user(zMsg, &ans); |
| 917 | 921 | fossil_free(zMsg); |
| 918 | 922 | cReply = blob_str(&ans)[0]; |
| 919 | 923 | if( cReply=='a' || cReply=='A' ){ |
| 920 | 924 | allOk = 1; |
| 921 | 925 | }else if( cReply!='y' && cReply!='Y' ){ |
| 922 | 926 | fossil_fatal("Abandoning commit due to %s in %s", |
| 923 | - zWarning , blob_str(&fname)); | |
| 927 | + zWarning, blob_str(&fname)); | |
| 924 | 928 | } |
| 925 | 929 | blob_reset(&ans); |
| 926 | 930 | blob_reset(&fname); |
| 927 | 931 | } |
| 928 | 932 | } |
| 929 | 933 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -887,22 +887,26 @@ | |
| 887 | ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending |
| 888 | ** is seen in a text file. |
| 889 | */ |
| 890 | static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ |
| 891 | int eType; /* return value of looks_like_text() */ |
| 892 | char *zMsg; /* Warning message */ |
| 893 | Blob fname; /* Relative pathname of the file */ |
| 894 | static int allOk = 0; /* Set to true to disable this routine */ |
| 895 | |
| 896 | if( allOk ) return; |
| 897 | eType = looks_like_text(p); |
| 898 | if( eType<0 ){ |
| 899 | const char *zWarning ; |
| 900 | Blob ans; |
| 901 | char cReply; |
| 902 | |
| 903 | if( eType&1 ){ |
| 904 | if( crnlOk ){ |
| 905 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | } |
| 907 | zWarning = "CR/NL line endings"; |
| 908 | }else{ |
| @@ -909,20 +913,20 @@ | |
| 909 | zWarning = "Unicode"; |
| 910 | } |
| 911 | file_relative_name(zFilename, &fname, 0); |
| 912 | blob_zero(&ans); |
| 913 | zMsg = mprintf( |
| 914 | "%s contains %s. commit anyhow (a=all/y/N)? ", |
| 915 | blob_str(&fname), zWarning ); |
| 916 | prompt_user(zMsg, &ans); |
| 917 | fossil_free(zMsg); |
| 918 | cReply = blob_str(&ans)[0]; |
| 919 | if( cReply=='a' || cReply=='A' ){ |
| 920 | allOk = 1; |
| 921 | }else if( cReply!='y' && cReply!='Y' ){ |
| 922 | fossil_fatal("Abandoning commit due to %s in %s", |
| 923 | zWarning , blob_str(&fname)); |
| 924 | } |
| 925 | blob_reset(&ans); |
| 926 | blob_reset(&fname); |
| 927 | } |
| 928 | } |
| 929 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -887,22 +887,26 @@ | |
| 887 | ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending |
| 888 | ** is seen in a text file. |
| 889 | */ |
| 890 | static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ |
| 891 | int eType; /* return value of looks_like_text() */ |
| 892 | int fUnicode; /* return value of starts_with_utf16_bom() */ |
| 893 | char *zMsg; /* Warning message */ |
| 894 | Blob fname; /* Relative pathname of the file */ |
| 895 | static int allOk = 0; /* Set to true to disable this routine */ |
| 896 | |
| 897 | if( allOk ) return; |
| 898 | eType = looks_like_text(p); |
| 899 | fUnicode = starts_with_utf16_bom(p); |
| 900 | if( eType==-1 || fUnicode ){ |
| 901 | const char *zWarning; |
| 902 | Blob ans; |
| 903 | char cReply; |
| 904 | |
| 905 | if( eType==-1 && fUnicode ){ |
| 906 | zWarning = "Unicode and CR/NL line endings"; |
| 907 | }else if( eType==-1 ){ |
| 908 | if( crnlOk ){ |
| 909 | return; /* We don't want CR/NL warnings for this file. */ |
| 910 | } |
| 911 | zWarning = "CR/NL line endings"; |
| 912 | }else{ |
| @@ -909,20 +913,20 @@ | |
| 913 | zWarning = "Unicode"; |
| 914 | } |
| 915 | file_relative_name(zFilename, &fname, 0); |
| 916 | blob_zero(&ans); |
| 917 | zMsg = mprintf( |
| 918 | "%s contains %s; commit anyhow (a=all/y/N)?", |
| 919 | blob_str(&fname), zWarning); |
| 920 | prompt_user(zMsg, &ans); |
| 921 | fossil_free(zMsg); |
| 922 | cReply = blob_str(&ans)[0]; |
| 923 | if( cReply=='a' || cReply=='A' ){ |
| 924 | allOk = 1; |
| 925 | }else if( cReply!='y' && cReply!='Y' ){ |
| 926 | fossil_fatal("Abandoning commit due to %s in %s", |
| 927 | zWarning, blob_str(&fname)); |
| 928 | } |
| 929 | blob_reset(&ans); |
| 930 | blob_reset(&fname); |
| 931 | } |
| 932 | } |
| 933 |
+22
-11
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -48,11 +48,11 @@ | ||
| 48 | 48 | "cannot compute difference between binary files\n" |
| 49 | 49 | |
| 50 | 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | 52 | |
| 53 | -#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) | |
| 53 | +#define looks_like_binary(blob) (looks_like_text((blob)) == 0) | |
| 54 | 54 | #endif /* INTERFACE */ |
| 55 | 55 | |
| 56 | 56 | /* |
| 57 | 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | 58 | */ |
| @@ -179,18 +179,18 @@ | ||
| 179 | 179 | ** (1) -- The content appears to consist entirely of text, with lines |
| 180 | 180 | ** delimited by line-feed characters; however, the encoding may |
| 181 | 181 | ** not be UTF-8. |
| 182 | 182 | ** |
| 183 | 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | -** NUL (\000) characters or an extremely long line. | |
| 184 | +** NUL (\000) characters or an extremely long line. Since this | |
| 185 | +** function does not understand UTF-16, it may falsely consider | |
| 186 | +** UTF-16 text to be binary. | |
| 185 | 187 | ** |
| 186 | 188 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 187 | 189 | ** delimited by carriage-return, line-feed pairs; however, the |
| 188 | 190 | ** encoding may not be UTF-8. |
| 189 | 191 | ** |
| 190 | -** (-2) -- The content appears to consist entirely of text, in the | |
| 191 | -** UTF-16 (BE or LE) encoding. | |
| 192 | 192 | */ |
| 193 | 193 | int looks_like_text(const Blob *pContent){ |
| 194 | 194 | const char *z = blob_buffer(pContent); |
| 195 | 195 | unsigned int n = blob_size(pContent); |
| 196 | 196 | int j, c; |
| @@ -199,17 +199,10 @@ | ||
| 199 | 199 | /* Check individual lines. |
| 200 | 200 | */ |
| 201 | 201 | if( n==0 ) return result; /* Empty file -> text */ |
| 202 | 202 | c = *z; |
| 203 | 203 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 204 | - if ( n > 1 ){ | |
| 205 | - if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){ | |
| 206 | - return -2; | |
| 207 | - } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){ | |
| 208 | - return -2; | |
| 209 | - } | |
| 210 | - } | |
| 211 | 204 | j = (c!='\n'); |
| 212 | 205 | while( --n>0 ){ |
| 213 | 206 | c = *++z; ++j; |
| 214 | 207 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 215 | 208 | if( c=='\n' ){ |
| @@ -225,10 +218,28 @@ | ||
| 225 | 218 | if( j>LENGTH_MASK ){ |
| 226 | 219 | return 0; /* Very long line -> binary */ |
| 227 | 220 | } |
| 228 | 221 | return result; /* No problems seen -> not binary */ |
| 229 | 222 | } |
| 223 | + | |
| 224 | +/* | |
| 225 | +** This function returns non-zero if the blob starts with a UTF-16le or | |
| 226 | +** UTF-16be byte-order-mark (BOM). | |
| 227 | +*/ | |
| 228 | +int starts_with_utf16_bom(const Blob *pContent){ | |
| 229 | + const char *z = blob_buffer(pContent); | |
| 230 | + int c1, c2; | |
| 231 | + | |
| 232 | + if( blob_size(pContent)<2 ) return 0; | |
| 233 | + c1 = z[0]; c2 = z[1]; | |
| 234 | + if( (c1==(char)0xff) && (c2==(char)0xfe) ){ | |
| 235 | + return 1; | |
| 236 | + }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){ | |
| 237 | + return 1; | |
| 238 | + } | |
| 239 | + return 0; | |
| 240 | +} | |
| 230 | 241 | |
| 231 | 242 | /* |
| 232 | 243 | ** Return true if two DLine elements are identical. |
| 233 | 244 | */ |
| 234 | 245 | static int same_dline(DLine *pA, DLine *pB){ |
| 235 | 246 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -179,18 +179,18 @@ | |
| 179 | ** (1) -- The content appears to consist entirely of text, with lines |
| 180 | ** delimited by line-feed characters; however, the encoding may |
| 181 | ** not be UTF-8. |
| 182 | ** |
| 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | ** NUL (\000) characters or an extremely long line. |
| 185 | ** |
| 186 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 187 | ** delimited by carriage-return, line-feed pairs; however, the |
| 188 | ** encoding may not be UTF-8. |
| 189 | ** |
| 190 | ** (-2) -- The content appears to consist entirely of text, in the |
| 191 | ** UTF-16 (BE or LE) encoding. |
| 192 | */ |
| 193 | int looks_like_text(const Blob *pContent){ |
| 194 | const char *z = blob_buffer(pContent); |
| 195 | unsigned int n = blob_size(pContent); |
| 196 | int j, c; |
| @@ -199,17 +199,10 @@ | |
| 199 | /* Check individual lines. |
| 200 | */ |
| 201 | if( n==0 ) return result; /* Empty file -> text */ |
| 202 | c = *z; |
| 203 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 204 | if ( n > 1 ){ |
| 205 | if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){ |
| 206 | return -2; |
| 207 | } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){ |
| 208 | return -2; |
| 209 | } |
| 210 | } |
| 211 | j = (c!='\n'); |
| 212 | while( --n>0 ){ |
| 213 | c = *++z; ++j; |
| 214 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 215 | if( c=='\n' ){ |
| @@ -225,10 +218,28 @@ | |
| 225 | if( j>LENGTH_MASK ){ |
| 226 | return 0; /* Very long line -> binary */ |
| 227 | } |
| 228 | return result; /* No problems seen -> not binary */ |
| 229 | } |
| 230 | |
| 231 | /* |
| 232 | ** Return true if two DLine elements are identical. |
| 233 | */ |
| 234 | static int same_dline(DLine *pA, DLine *pB){ |
| 235 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) (looks_like_text((blob)) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -179,18 +179,18 @@ | |
| 179 | ** (1) -- The content appears to consist entirely of text, with lines |
| 180 | ** delimited by line-feed characters; however, the encoding may |
| 181 | ** not be UTF-8. |
| 182 | ** |
| 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | ** NUL (\000) characters or an extremely long line. Since this |
| 185 | ** function does not understand UTF-16, it may falsely consider |
| 186 | ** UTF-16 text to be binary. |
| 187 | ** |
| 188 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 189 | ** delimited by carriage-return, line-feed pairs; however, the |
| 190 | ** encoding may not be UTF-8. |
| 191 | ** |
| 192 | */ |
| 193 | int looks_like_text(const Blob *pContent){ |
| 194 | const char *z = blob_buffer(pContent); |
| 195 | unsigned int n = blob_size(pContent); |
| 196 | int j, c; |
| @@ -199,17 +199,10 @@ | |
| 199 | /* Check individual lines. |
| 200 | */ |
| 201 | if( n==0 ) return result; /* Empty file -> text */ |
| 202 | c = *z; |
| 203 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 204 | j = (c!='\n'); |
| 205 | while( --n>0 ){ |
| 206 | c = *++z; ++j; |
| 207 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 208 | if( c=='\n' ){ |
| @@ -225,10 +218,28 @@ | |
| 218 | if( j>LENGTH_MASK ){ |
| 219 | return 0; /* Very long line -> binary */ |
| 220 | } |
| 221 | return result; /* No problems seen -> not binary */ |
| 222 | } |
| 223 | |
| 224 | /* |
| 225 | ** This function returns non-zero if the blob starts with a UTF-16le or |
| 226 | ** UTF-16be byte-order-mark (BOM). |
| 227 | */ |
| 228 | int starts_with_utf16_bom(const Blob *pContent){ |
| 229 | const char *z = blob_buffer(pContent); |
| 230 | int c1, c2; |
| 231 | |
| 232 | if( blob_size(pContent)<2 ) return 0; |
| 233 | c1 = z[0]; c2 = z[1]; |
| 234 | if( (c1==(char)0xff) && (c2==(char)0xfe) ){ |
| 235 | return 1; |
| 236 | }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){ |
| 237 | return 1; |
| 238 | } |
| 239 | return 0; |
| 240 | } |
| 241 | |
| 242 | /* |
| 243 | ** Return true if two DLine elements are identical. |
| 244 | */ |
| 245 | static int same_dline(DLine *pA, DLine *pB){ |
| 246 |