Fossil SCM
Refactor commit warning functionality. Break out UTF-16 BOM detection into a new function. Style and comment fixes.
Commit
d57f0a9361c0da4ba2ae729f18cbfe856516804d
Parent
70b4f105ebaab44…
2 files changed
+18
-13
+38
-15
+18
-13
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -882,46 +882,51 @@ | ||
| 882 | 882 | if( pnFBcard ) *pnFBcard = nFBcard; |
| 883 | 883 | } |
| 884 | 884 | |
| 885 | 885 | /* |
| 886 | 886 | ** Issue a warning and give the user an opportunity to abandon out |
| 887 | -** if unicode or a \r\n line ending is seen in a text file. | |
| 887 | +** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending | |
| 888 | +** is seen in a text file. | |
| 888 | 889 | */ |
| 889 | -static void encoding_warning(const Blob *p, int crnlOk, const char *zFilename){ | |
| 890 | - int looksLike; /* return value of looks_like_text() */ | |
| 890 | +static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ | |
| 891 | + int eType; /* return value of looks_like_text() */ | |
| 892 | + int fUnicode; /* return value of starts_with_utf16_bom() */ | |
| 891 | 893 | char *zMsg; /* Warning message */ |
| 892 | 894 | Blob fname; /* Relative pathname of the file */ |
| 893 | 895 | static int allOk = 0; /* Set to true to disable this routine */ |
| 894 | 896 | |
| 895 | 897 | if( allOk ) return; |
| 896 | - looksLike = looks_like_text(p); | |
| 897 | - if( looksLike<0 ){ | |
| 898 | - const char *type; | |
| 898 | + eType = looks_like_text(p); | |
| 899 | + fUnicode = starts_with_utf16_bom(p); | |
| 900 | + if( eType==-1 || fUnicode ){ | |
| 901 | + const char *zWarning; | |
| 899 | 902 | Blob ans; |
| 900 | 903 | char cReply; |
| 901 | 904 | |
| 902 | - if( looksLike&1 ){ | |
| 905 | + if( eType==-1 && fUnicode ){ | |
| 906 | + zWarning = "Unicode and CR/NL line endings"; | |
| 907 | + }else if( eType==-1 ){ | |
| 903 | 908 | if( crnlOk ){ |
| 904 | - return; /* We don't want CrLf warnings for this file. */ | |
| 909 | + return; /* We don't want CR/NL warnings for this file. */ | |
| 905 | 910 | } |
| 906 | - type = "CR/NL line endings"; | |
| 911 | + zWarning = "CR/NL line endings"; | |
| 907 | 912 | }else{ |
| 908 | - type = "unicode"; | |
| 913 | + zWarning = "Unicode"; | |
| 909 | 914 | } |
| 910 | 915 | file_relative_name(zFilename, &fname, 0); |
| 911 | 916 | blob_zero(&ans); |
| 912 | 917 | zMsg = mprintf( |
| 913 | 918 | "%s contains %s; commit anyhow (a=all/y/N)?", |
| 914 | - blob_str(&fname), type); | |
| 919 | + blob_str(&fname), zWarning); | |
| 915 | 920 | prompt_user(zMsg, &ans); |
| 916 | 921 | fossil_free(zMsg); |
| 917 | 922 | cReply = blob_str(&ans)[0]; |
| 918 | 923 | if( cReply=='a' || cReply=='A' ){ |
| 919 | 924 | allOk = 1; |
| 920 | 925 | }else if( cReply!='y' && cReply!='Y' ){ |
| 921 | 926 | fossil_fatal("Abandoning commit due to %s in %s", |
| 922 | - type, blob_str(&fname)); | |
| 927 | + zWarning, blob_str(&fname)); | |
| 923 | 928 | } |
| 924 | 929 | blob_reset(&ans); |
| 925 | 930 | blob_reset(&fname); |
| 926 | 931 | } |
| 927 | 932 | } |
| @@ -1232,11 +1237,11 @@ | ||
| 1232 | 1237 | /* Instead of file content, put link destination path */ |
| 1233 | 1238 | blob_read_link(&content, zFullname); |
| 1234 | 1239 | }else{ |
| 1235 | 1240 | blob_read_from_file(&content, zFullname); |
| 1236 | 1241 | } |
| 1237 | - encoding_warning(&content, crnlOk, zFullname); | |
| 1242 | + commit_warning(&content, crnlOk, zFullname); | |
| 1238 | 1243 | if( chnged==1 && contains_merge_marker(&content) ){ |
| 1239 | 1244 | Blob fname; /* Relative pathname of the file */ |
| 1240 | 1245 | |
| 1241 | 1246 | nConflict++; |
| 1242 | 1247 | file_relative_name(zFullname, &fname, 0); |
| 1243 | 1248 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -882,46 +882,51 @@ | |
| 882 | if( pnFBcard ) *pnFBcard = nFBcard; |
| 883 | } |
| 884 | |
| 885 | /* |
| 886 | ** Issue a warning and give the user an opportunity to abandon out |
| 887 | ** if unicode or a \r\n line ending is seen in a text file. |
| 888 | */ |
| 889 | static void encoding_warning(const Blob *p, int crnlOk, const char *zFilename){ |
| 890 | int looksLike; /* return value of looks_like_text() */ |
| 891 | char *zMsg; /* Warning message */ |
| 892 | Blob fname; /* Relative pathname of the file */ |
| 893 | static int allOk = 0; /* Set to true to disable this routine */ |
| 894 | |
| 895 | if( allOk ) return; |
| 896 | looksLike = looks_like_text(p); |
| 897 | if( looksLike<0 ){ |
| 898 | const char *type; |
| 899 | Blob ans; |
| 900 | char cReply; |
| 901 | |
| 902 | if( looksLike&1 ){ |
| 903 | if( crnlOk ){ |
| 904 | return; /* We don't want CrLf warnings for this file. */ |
| 905 | } |
| 906 | type = "CR/NL line endings"; |
| 907 | }else{ |
| 908 | type = "unicode"; |
| 909 | } |
| 910 | file_relative_name(zFilename, &fname, 0); |
| 911 | blob_zero(&ans); |
| 912 | zMsg = mprintf( |
| 913 | "%s contains %s; commit anyhow (a=all/y/N)?", |
| 914 | blob_str(&fname), type); |
| 915 | prompt_user(zMsg, &ans); |
| 916 | fossil_free(zMsg); |
| 917 | cReply = blob_str(&ans)[0]; |
| 918 | if( cReply=='a' || cReply=='A' ){ |
| 919 | allOk = 1; |
| 920 | }else if( cReply!='y' && cReply!='Y' ){ |
| 921 | fossil_fatal("Abandoning commit due to %s in %s", |
| 922 | type, blob_str(&fname)); |
| 923 | } |
| 924 | blob_reset(&ans); |
| 925 | blob_reset(&fname); |
| 926 | } |
| 927 | } |
| @@ -1232,11 +1237,11 @@ | |
| 1232 | /* Instead of file content, put link destination path */ |
| 1233 | blob_read_link(&content, zFullname); |
| 1234 | }else{ |
| 1235 | blob_read_from_file(&content, zFullname); |
| 1236 | } |
| 1237 | encoding_warning(&content, crnlOk, zFullname); |
| 1238 | if( chnged==1 && contains_merge_marker(&content) ){ |
| 1239 | Blob fname; /* Relative pathname of the file */ |
| 1240 | |
| 1241 | nConflict++; |
| 1242 | file_relative_name(zFullname, &fname, 0); |
| 1243 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -882,46 +882,51 @@ | |
| 882 | if( pnFBcard ) *pnFBcard = nFBcard; |
| 883 | } |
| 884 | |
| 885 | /* |
| 886 | ** Issue a warning and give the user an opportunity to abandon out |
| 887 | ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending |
| 888 | ** is seen in a text file. |
| 889 | */ |
| 890 | static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ |
| 891 | int eType; /* return value of looks_like_text() */ |
| 892 | int fUnicode; /* return value of starts_with_utf16_bom() */ |
| 893 | char *zMsg; /* Warning message */ |
| 894 | Blob fname; /* Relative pathname of the file */ |
| 895 | static int allOk = 0; /* Set to true to disable this routine */ |
| 896 | |
| 897 | if( allOk ) return; |
| 898 | eType = looks_like_text(p); |
| 899 | fUnicode = starts_with_utf16_bom(p); |
| 900 | if( eType==-1 || fUnicode ){ |
| 901 | const char *zWarning; |
| 902 | Blob ans; |
| 903 | char cReply; |
| 904 | |
| 905 | if( eType==-1 && fUnicode ){ |
| 906 | zWarning = "Unicode and CR/NL line endings"; |
| 907 | }else if( eType==-1 ){ |
| 908 | if( crnlOk ){ |
| 909 | return; /* We don't want CR/NL warnings for this file. */ |
| 910 | } |
| 911 | zWarning = "CR/NL line endings"; |
| 912 | }else{ |
| 913 | zWarning = "Unicode"; |
| 914 | } |
| 915 | file_relative_name(zFilename, &fname, 0); |
| 916 | blob_zero(&ans); |
| 917 | zMsg = mprintf( |
| 918 | "%s contains %s; commit anyhow (a=all/y/N)?", |
| 919 | blob_str(&fname), zWarning); |
| 920 | prompt_user(zMsg, &ans); |
| 921 | fossil_free(zMsg); |
| 922 | cReply = blob_str(&ans)[0]; |
| 923 | if( cReply=='a' || cReply=='A' ){ |
| 924 | allOk = 1; |
| 925 | }else if( cReply!='y' && cReply!='Y' ){ |
| 926 | fossil_fatal("Abandoning commit due to %s in %s", |
| 927 | zWarning, blob_str(&fname)); |
| 928 | } |
| 929 | blob_reset(&ans); |
| 930 | blob_reset(&fname); |
| 931 | } |
| 932 | } |
| @@ -1232,11 +1237,11 @@ | |
| 1237 | /* Instead of file content, put link destination path */ |
| 1238 | blob_read_link(&content, zFullname); |
| 1239 | }else{ |
| 1240 | blob_read_from_file(&content, zFullname); |
| 1241 | } |
| 1242 | commit_warning(&content, crnlOk, zFullname); |
| 1243 | if( chnged==1 && contains_merge_marker(&content) ){ |
| 1244 | Blob fname; /* Relative pathname of the file */ |
| 1245 | |
| 1246 | nConflict++; |
| 1247 | file_relative_name(zFullname, &fname, 0); |
| 1248 |
+38
-15
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -48,11 +48,11 @@ | ||
| 48 | 48 | "cannot compute difference between binary files\n" |
| 49 | 49 | |
| 50 | 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | 52 | |
| 53 | -#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) | |
| 53 | +#define looks_like_binary(blob) (looks_like_text((blob)) == 0) | |
| 54 | 54 | #endif /* INTERFACE */ |
| 55 | 55 | |
| 56 | 56 | /* |
| 57 | 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | 58 | */ |
| @@ -170,41 +170,46 @@ | ||
| 170 | 170 | *pnLine = nLine; |
| 171 | 171 | return a; |
| 172 | 172 | } |
| 173 | 173 | |
| 174 | 174 | /* |
| 175 | -** Returns 1, if everything OK | |
| 176 | -** Returns 0 if the specified content appears to be binary or | |
| 177 | -** contains a line that is too long | |
| 178 | -** Returns -1, if the file appears text, but it contains CrLf | |
| 179 | -** Returns -2, if the file starts with an UTF-16 BOM (le or be) | |
| 175 | +** This function attempts to scan each logical line within the blob to | |
| 176 | +** determine the type of content it appears to contain. Possible return | |
| 177 | +** values are: | |
| 178 | +** | |
| 179 | +** (1) -- The content appears to consist entirely of text, with lines | |
| 180 | +** delimited by line-feed characters; however, the encoding may | |
| 181 | +** not be UTF-8. | |
| 182 | +** | |
| 183 | +** (0) -- The content appears to be binary because it contains embedded | |
| 184 | +** NUL (\000) characters or an extremely long line. Since this | |
| 185 | +** function does not understand UTF-16, it may falsely consider | |
| 186 | +** UTF-16 text to be binary. | |
| 187 | +** | |
| 188 | +** (-1) -- The content appears to consist entirely of text, with lines | |
| 189 | +** delimited by carriage-return, line-feed pairs; however, the | |
| 190 | +** encoding may not be UTF-8. | |
| 191 | +** | |
| 180 | 192 | */ |
| 181 | 193 | int looks_like_text(const Blob *pContent){ |
| 182 | 194 | const char *z = blob_buffer(pContent); |
| 183 | 195 | unsigned int n = blob_size(pContent); |
| 184 | 196 | int j, c; |
| 185 | - int result = 1; /* Assume text with no CrLf */ | |
| 197 | + int result = 1; /* Assume text with no CR/NL */ | |
| 186 | 198 | |
| 187 | 199 | /* Check individual lines. |
| 188 | 200 | */ |
| 189 | 201 | if( n==0 ) return result; /* Empty file -> text */ |
| 190 | 202 | c = *z; |
| 191 | 203 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 192 | - if ( n > 1 ){ | |
| 193 | - if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){ | |
| 194 | - return -2; | |
| 195 | - } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){ | |
| 196 | - return -2; | |
| 197 | - } | |
| 198 | - } | |
| 199 | 204 | j = (c!='\n'); |
| 200 | 205 | while( --n>0 ){ |
| 201 | 206 | c = *++z; ++j; |
| 202 | 207 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 203 | 208 | if( c=='\n' ){ |
| 204 | 209 | if( z[-1]=='\r' ){ |
| 205 | - result = -1; /* Contains CrLf, continue */ | |
| 210 | + result = -1; /* Contains CR/NL, continue */ | |
| 206 | 211 | } |
| 207 | 212 | if( j>LENGTH_MASK ){ |
| 208 | 213 | return 0; /* Very long line -> binary */ |
| 209 | 214 | } |
| 210 | 215 | j = 0; |
| @@ -213,10 +218,28 @@ | ||
| 213 | 218 | if( j>LENGTH_MASK ){ |
| 214 | 219 | return 0; /* Very long line -> binary */ |
| 215 | 220 | } |
| 216 | 221 | return result; /* No problems seen -> not binary */ |
| 217 | 222 | } |
| 223 | + | |
| 224 | +/* | |
| 225 | +** This function returns non-zero if the blob starts with a UTF-16le or | |
| 226 | +** UTF-16be byte-order-mark (BOM). | |
| 227 | +*/ | |
| 228 | +int starts_with_utf16_bom(const Blob *pContent){ | |
| 229 | + const char *z = blob_buffer(pContent); | |
| 230 | + int c1, c2; | |
| 231 | + | |
| 232 | + if( blob_size(pContent)<2 ) return 0; | |
| 233 | + c1 = z[0]; c2 = z[1]; | |
| 234 | + if( (c1==(char)0xff) && (c2==(char)0xfe) ){ | |
| 235 | + return 1; | |
| 236 | + }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){ | |
| 237 | + return 1; | |
| 238 | + } | |
| 239 | + return 0; | |
| 240 | +} | |
| 218 | 241 | |
| 219 | 242 | /* |
| 220 | 243 | ** Return true if two DLine elements are identical. |
| 221 | 244 | */ |
| 222 | 245 | static int same_dline(DLine *pA, DLine *pB){ |
| 223 | 246 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -170,41 +170,46 @@ | |
| 170 | *pnLine = nLine; |
| 171 | return a; |
| 172 | } |
| 173 | |
| 174 | /* |
| 175 | ** Returns 1, if everything OK |
| 176 | ** Returns 0 if the specified content appears to be binary or |
| 177 | ** contains a line that is too long |
| 178 | ** Returns -1, if the file appears text, but it contains CrLf |
| 179 | ** Returns -2, if the file starts with an UTF-16 BOM (le or be) |
| 180 | */ |
| 181 | int looks_like_text(const Blob *pContent){ |
| 182 | const char *z = blob_buffer(pContent); |
| 183 | unsigned int n = blob_size(pContent); |
| 184 | int j, c; |
| 185 | int result = 1; /* Assume text with no CrLf */ |
| 186 | |
| 187 | /* Check individual lines. |
| 188 | */ |
| 189 | if( n==0 ) return result; /* Empty file -> text */ |
| 190 | c = *z; |
| 191 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 192 | if ( n > 1 ){ |
| 193 | if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){ |
| 194 | return -2; |
| 195 | } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){ |
| 196 | return -2; |
| 197 | } |
| 198 | } |
| 199 | j = (c!='\n'); |
| 200 | while( --n>0 ){ |
| 201 | c = *++z; ++j; |
| 202 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 203 | if( c=='\n' ){ |
| 204 | if( z[-1]=='\r' ){ |
| 205 | result = -1; /* Contains CrLf, continue */ |
| 206 | } |
| 207 | if( j>LENGTH_MASK ){ |
| 208 | return 0; /* Very long line -> binary */ |
| 209 | } |
| 210 | j = 0; |
| @@ -213,10 +218,28 @@ | |
| 213 | if( j>LENGTH_MASK ){ |
| 214 | return 0; /* Very long line -> binary */ |
| 215 | } |
| 216 | return result; /* No problems seen -> not binary */ |
| 217 | } |
| 218 | |
| 219 | /* |
| 220 | ** Return true if two DLine elements are identical. |
| 221 | */ |
| 222 | static int same_dline(DLine *pA, DLine *pB){ |
| 223 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) (looks_like_text((blob)) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -170,41 +170,46 @@ | |
| 170 | *pnLine = nLine; |
| 171 | return a; |
| 172 | } |
| 173 | |
| 174 | /* |
| 175 | ** This function attempts to scan each logical line within the blob to |
| 176 | ** determine the type of content it appears to contain. Possible return |
| 177 | ** values are: |
| 178 | ** |
| 179 | ** (1) -- The content appears to consist entirely of text, with lines |
| 180 | ** delimited by line-feed characters; however, the encoding may |
| 181 | ** not be UTF-8. |
| 182 | ** |
| 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | ** NUL (\000) characters or an extremely long line. Since this |
| 185 | ** function does not understand UTF-16, it may falsely consider |
| 186 | ** UTF-16 text to be binary. |
| 187 | ** |
| 188 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 189 | ** delimited by carriage-return, line-feed pairs; however, the |
| 190 | ** encoding may not be UTF-8. |
| 191 | ** |
| 192 | */ |
| 193 | int looks_like_text(const Blob *pContent){ |
| 194 | const char *z = blob_buffer(pContent); |
| 195 | unsigned int n = blob_size(pContent); |
| 196 | int j, c; |
| 197 | int result = 1; /* Assume text with no CR/NL */ |
| 198 | |
| 199 | /* Check individual lines. |
| 200 | */ |
| 201 | if( n==0 ) return result; /* Empty file -> text */ |
| 202 | c = *z; |
| 203 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 204 | j = (c!='\n'); |
| 205 | while( --n>0 ){ |
| 206 | c = *++z; ++j; |
| 207 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 208 | if( c=='\n' ){ |
| 209 | if( z[-1]=='\r' ){ |
| 210 | result = -1; /* Contains CR/NL, continue */ |
| 211 | } |
| 212 | if( j>LENGTH_MASK ){ |
| 213 | return 0; /* Very long line -> binary */ |
| 214 | } |
| 215 | j = 0; |
| @@ -213,10 +218,28 @@ | |
| 218 | if( j>LENGTH_MASK ){ |
| 219 | return 0; /* Very long line -> binary */ |
| 220 | } |
| 221 | return result; /* No problems seen -> not binary */ |
| 222 | } |
| 223 | |
| 224 | /* |
| 225 | ** This function returns non-zero if the blob starts with a UTF-16le or |
| 226 | ** UTF-16be byte-order-mark (BOM). |
| 227 | */ |
| 228 | int starts_with_utf16_bom(const Blob *pContent){ |
| 229 | const char *z = blob_buffer(pContent); |
| 230 | int c1, c2; |
| 231 | |
| 232 | if( blob_size(pContent)<2 ) return 0; |
| 233 | c1 = z[0]; c2 = z[1]; |
| 234 | if( (c1==(char)0xff) && (c2==(char)0xfe) ){ |
| 235 | return 1; |
| 236 | }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){ |
| 237 | return 1; |
| 238 | } |
| 239 | return 0; |
| 240 | } |
| 241 | |
| 242 | /* |
| 243 | ** Return true if two DLine elements are identical. |
| 244 | */ |
| 245 | static int same_dline(DLine *pA, DLine *pB){ |
| 246 |