Fossil SCM

Refactor commit warning functionality. Break out UTF-16 BOM detection into a new function. Style and comment fixes.

mistachkin 2012-10-30 02:17 UTC trunk
Commit d57f0a9361c0da4ba2ae729f18cbfe856516804d
2 files changed +18 -13 +38 -15
+18 -13
--- src/checkin.c
+++ src/checkin.c
@@ -882,46 +882,51 @@
882882
if( pnFBcard ) *pnFBcard = nFBcard;
883883
}
884884
885885
/*
886886
** Issue a warning and give the user an opportunity to abandon out
887
-** if unicode or a \r\n line ending is seen in a text file.
887
+** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888
+** is seen in a text file.
888889
*/
889
-static void encoding_warning(const Blob *p, int crnlOk, const char *zFilename){
890
- int looksLike; /* return value of looks_like_text() */
890
+static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
891
+ int eType; /* return value of looks_like_text() */
892
+ int fUnicode; /* return value of starts_with_utf16_bom() */
891893
char *zMsg; /* Warning message */
892894
Blob fname; /* Relative pathname of the file */
893895
static int allOk = 0; /* Set to true to disable this routine */
894896
895897
if( allOk ) return;
896
- looksLike = looks_like_text(p);
897
- if( looksLike<0 ){
898
- const char *type;
898
+ eType = looks_like_text(p);
899
+ fUnicode = starts_with_utf16_bom(p);
900
+ if( eType==-1 || fUnicode ){
901
+ const char *zWarning;
899902
Blob ans;
900903
char cReply;
901904
902
- if( looksLike&1 ){
905
+ if( eType==-1 && fUnicode ){
906
+ zWarning = "Unicode and CR/NL line endings";
907
+ }else if( eType==-1 ){
903908
if( crnlOk ){
904
- return; /* We don't want CrLf warnings for this file. */
909
+ return; /* We don't want CR/NL warnings for this file. */
905910
}
906
- type = "CR/NL line endings";
911
+ zWarning = "CR/NL line endings";
907912
}else{
908
- type = "unicode";
913
+ zWarning = "Unicode";
909914
}
910915
file_relative_name(zFilename, &fname, 0);
911916
blob_zero(&ans);
912917
zMsg = mprintf(
913918
"%s contains %s; commit anyhow (a=all/y/N)?",
914
- blob_str(&fname), type);
919
+ blob_str(&fname), zWarning);
915920
prompt_user(zMsg, &ans);
916921
fossil_free(zMsg);
917922
cReply = blob_str(&ans)[0];
918923
if( cReply=='a' || cReply=='A' ){
919924
allOk = 1;
920925
}else if( cReply!='y' && cReply!='Y' ){
921926
fossil_fatal("Abandoning commit due to %s in %s",
922
- type, blob_str(&fname));
927
+ zWarning, blob_str(&fname));
923928
}
924929
blob_reset(&ans);
925930
blob_reset(&fname);
926931
}
927932
}
@@ -1232,11 +1237,11 @@
12321237
/* Instead of file content, put link destination path */
12331238
blob_read_link(&content, zFullname);
12341239
}else{
12351240
blob_read_from_file(&content, zFullname);
12361241
}
1237
- encoding_warning(&content, crnlOk, zFullname);
1242
+ commit_warning(&content, crnlOk, zFullname);
12381243
if( chnged==1 && contains_merge_marker(&content) ){
12391244
Blob fname; /* Relative pathname of the file */
12401245
12411246
nConflict++;
12421247
file_relative_name(zFullname, &fname, 0);
12431248
--- src/checkin.c
+++ src/checkin.c
@@ -882,46 +882,51 @@
882 if( pnFBcard ) *pnFBcard = nFBcard;
883 }
884
885 /*
886 ** Issue a warning and give the user an opportunity to abandon out
887 ** if unicode or a \r\n line ending is seen in a text file.
 
888 */
889 static void encoding_warning(const Blob *p, int crnlOk, const char *zFilename){
890 int looksLike; /* return value of looks_like_text() */
 
891 char *zMsg; /* Warning message */
892 Blob fname; /* Relative pathname of the file */
893 static int allOk = 0; /* Set to true to disable this routine */
894
895 if( allOk ) return;
896 looksLike = looks_like_text(p);
897 if( looksLike<0 ){
898 const char *type;
 
899 Blob ans;
900 char cReply;
901
902 if( looksLike&1 ){
 
 
903 if( crnlOk ){
904 return; /* We don't want CrLf warnings for this file. */
905 }
906 type = "CR/NL line endings";
907 }else{
908 type = "unicode";
909 }
910 file_relative_name(zFilename, &fname, 0);
911 blob_zero(&ans);
912 zMsg = mprintf(
913 "%s contains %s; commit anyhow (a=all/y/N)?",
914 blob_str(&fname), type);
915 prompt_user(zMsg, &ans);
916 fossil_free(zMsg);
917 cReply = blob_str(&ans)[0];
918 if( cReply=='a' || cReply=='A' ){
919 allOk = 1;
920 }else if( cReply!='y' && cReply!='Y' ){
921 fossil_fatal("Abandoning commit due to %s in %s",
922 type, blob_str(&fname));
923 }
924 blob_reset(&ans);
925 blob_reset(&fname);
926 }
927 }
@@ -1232,11 +1237,11 @@
1232 /* Instead of file content, put link destination path */
1233 blob_read_link(&content, zFullname);
1234 }else{
1235 blob_read_from_file(&content, zFullname);
1236 }
1237 encoding_warning(&content, crnlOk, zFullname);
1238 if( chnged==1 && contains_merge_marker(&content) ){
1239 Blob fname; /* Relative pathname of the file */
1240
1241 nConflict++;
1242 file_relative_name(zFullname, &fname, 0);
1243
--- src/checkin.c
+++ src/checkin.c
@@ -882,46 +882,51 @@
882 if( pnFBcard ) *pnFBcard = nFBcard;
883 }
884
885 /*
886 ** Issue a warning and give the user an opportunity to abandon out
887 ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888 ** is seen in a text file.
889 */
890 static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
891 int eType; /* return value of looks_like_text() */
892 int fUnicode; /* return value of starts_with_utf16_bom() */
893 char *zMsg; /* Warning message */
894 Blob fname; /* Relative pathname of the file */
895 static int allOk = 0; /* Set to true to disable this routine */
896
897 if( allOk ) return;
898 eType = looks_like_text(p);
899 fUnicode = starts_with_utf16_bom(p);
900 if( eType==-1 || fUnicode ){
901 const char *zWarning;
902 Blob ans;
903 char cReply;
904
905 if( eType==-1 && fUnicode ){
906 zWarning = "Unicode and CR/NL line endings";
907 }else if( eType==-1 ){
908 if( crnlOk ){
909 return; /* We don't want CR/NL warnings for this file. */
910 }
911 zWarning = "CR/NL line endings";
912 }else{
913 zWarning = "Unicode";
914 }
915 file_relative_name(zFilename, &fname, 0);
916 blob_zero(&ans);
917 zMsg = mprintf(
918 "%s contains %s; commit anyhow (a=all/y/N)?",
919 blob_str(&fname), zWarning);
920 prompt_user(zMsg, &ans);
921 fossil_free(zMsg);
922 cReply = blob_str(&ans)[0];
923 if( cReply=='a' || cReply=='A' ){
924 allOk = 1;
925 }else if( cReply!='y' && cReply!='Y' ){
926 fossil_fatal("Abandoning commit due to %s in %s",
927 zWarning, blob_str(&fname));
928 }
929 blob_reset(&ans);
930 blob_reset(&fname);
931 }
932 }
@@ -1232,11 +1237,11 @@
1237 /* Instead of file content, put link destination path */
1238 blob_read_link(&content, zFullname);
1239 }else{
1240 blob_read_from_file(&content, zFullname);
1241 }
1242 commit_warning(&content, crnlOk, zFullname);
1243 if( chnged==1 && contains_merge_marker(&content) ){
1244 Blob fname; /* Relative pathname of the file */
1245
1246 nConflict++;
1247 file_relative_name(zFullname, &fname, 0);
1248
+38 -15
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
4848
"cannot compute difference between binary files\n"
4949
5050
#define DIFF_CANNOT_COMPUTE_SYMLINK \
5151
"cannot compute difference between symlink and regular file\n"
5252
53
-#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
53
+#define looks_like_binary(blob) (looks_like_text((blob)) == 0)
5454
#endif /* INTERFACE */
5555
5656
/*
5757
** Maximum length of a line in a text file. (8192)
5858
*/
@@ -170,41 +170,46 @@
170170
*pnLine = nLine;
171171
return a;
172172
}
173173
174174
/*
175
-** Returns 1, if everything OK
176
-** Returns 0 if the specified content appears to be binary or
177
-** contains a line that is too long
178
-** Returns -1, if the file appears text, but it contains CrLf
179
-** Returns -2, if the file starts with an UTF-16 BOM (le or be)
175
+** This function attempts to scan each logical line within the blob to
176
+** determine the type of content it appears to contain. Possible return
177
+** values are:
178
+**
179
+** (1) -- The content appears to consist entirely of text, with lines
180
+** delimited by line-feed characters; however, the encoding may
181
+** not be UTF-8.
182
+**
183
+** (0) -- The content appears to be binary because it contains embedded
184
+** NUL (\000) characters or an extremely long line. Since this
185
+** function does not understand UTF-16, it may falsely consider
186
+** UTF-16 text to be binary.
187
+**
188
+** (-1) -- The content appears to consist entirely of text, with lines
189
+** delimited by carriage-return, line-feed pairs; however, the
190
+** encoding may not be UTF-8.
191
+**
180192
*/
181193
int looks_like_text(const Blob *pContent){
182194
const char *z = blob_buffer(pContent);
183195
unsigned int n = blob_size(pContent);
184196
int j, c;
185
- int result = 1; /* Assume text with no CrLf */
197
+ int result = 1; /* Assume text with no CR/NL */
186198
187199
/* Check individual lines.
188200
*/
189201
if( n==0 ) return result; /* Empty file -> text */
190202
c = *z;
191203
if( c==0 ) return 0; /* \000 byte in a file -> binary */
192
- if ( n > 1 ){
193
- if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
194
- return -2;
195
- } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
196
- return -2;
197
- }
198
- }
199204
j = (c!='\n');
200205
while( --n>0 ){
201206
c = *++z; ++j;
202207
if( c==0 ) return 0; /* \000 byte in a file -> binary */
203208
if( c=='\n' ){
204209
if( z[-1]=='\r' ){
205
- result = -1; /* Contains CrLf, continue */
210
+ result = -1; /* Contains CR/NL, continue */
206211
}
207212
if( j>LENGTH_MASK ){
208213
return 0; /* Very long line -> binary */
209214
}
210215
j = 0;
@@ -213,10 +218,28 @@
213218
if( j>LENGTH_MASK ){
214219
return 0; /* Very long line -> binary */
215220
}
216221
return result; /* No problems seen -> not binary */
217222
}
223
+
224
+/*
225
+** This function returns non-zero if the blob starts with a UTF-16le or
226
+** UTF-16be byte-order-mark (BOM).
227
+*/
228
+int starts_with_utf16_bom(const Blob *pContent){
229
+ const char *z = blob_buffer(pContent);
230
+ int c1, c2;
231
+
232
+ if( blob_size(pContent)<2 ) return 0;
233
+ c1 = z[0]; c2 = z[1];
234
+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
235
+ return 1;
236
+ }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
237
+ return 1;
238
+ }
239
+ return 0;
240
+}
218241
219242
/*
220243
** Return true if two DLine elements are identical.
221244
*/
222245
static int same_dline(DLine *pA, DLine *pB){
223246
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -170,41 +170,46 @@
170 *pnLine = nLine;
171 return a;
172 }
173
174 /*
175 ** Returns 1, if everything OK
176 ** Returns 0 if the specified content appears to be binary or
177 ** contains a line that is too long
178 ** Returns -1, if the file appears text, but it contains CrLf
179 ** Returns -2, if the file starts with an UTF-16 BOM (le or be)
 
 
 
 
 
 
 
 
 
 
 
 
180 */
181 int looks_like_text(const Blob *pContent){
182 const char *z = blob_buffer(pContent);
183 unsigned int n = blob_size(pContent);
184 int j, c;
185 int result = 1; /* Assume text with no CrLf */
186
187 /* Check individual lines.
188 */
189 if( n==0 ) return result; /* Empty file -> text */
190 c = *z;
191 if( c==0 ) return 0; /* \000 byte in a file -> binary */
192 if ( n > 1 ){
193 if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
194 return -2;
195 } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
196 return -2;
197 }
198 }
199 j = (c!='\n');
200 while( --n>0 ){
201 c = *++z; ++j;
202 if( c==0 ) return 0; /* \000 byte in a file -> binary */
203 if( c=='\n' ){
204 if( z[-1]=='\r' ){
205 result = -1; /* Contains CrLf, continue */
206 }
207 if( j>LENGTH_MASK ){
208 return 0; /* Very long line -> binary */
209 }
210 j = 0;
@@ -213,10 +218,28 @@
213 if( j>LENGTH_MASK ){
214 return 0; /* Very long line -> binary */
215 }
216 return result; /* No problems seen -> not binary */
217 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
219 /*
220 ** Return true if two DLine elements are identical.
221 */
222 static int same_dline(DLine *pA, DLine *pB){
223
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) (looks_like_text((blob)) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -170,41 +170,46 @@
170 *pnLine = nLine;
171 return a;
172 }
173
174 /*
175 ** This function attempts to scan each logical line within the blob to
176 ** determine the type of content it appears to contain. Possible return
177 ** values are:
178 **
179 ** (1) -- The content appears to consist entirely of text, with lines
180 ** delimited by line-feed characters; however, the encoding may
181 ** not be UTF-8.
182 **
183 ** (0) -- The content appears to be binary because it contains embedded
184 ** NUL (\000) characters or an extremely long line. Since this
185 ** function does not understand UTF-16, it may falsely consider
186 ** UTF-16 text to be binary.
187 **
188 ** (-1) -- The content appears to consist entirely of text, with lines
189 ** delimited by carriage-return, line-feed pairs; however, the
190 ** encoding may not be UTF-8.
191 **
192 */
193 int looks_like_text(const Blob *pContent){
194 const char *z = blob_buffer(pContent);
195 unsigned int n = blob_size(pContent);
196 int j, c;
197 int result = 1; /* Assume text with no CR/NL */
198
199 /* Check individual lines.
200 */
201 if( n==0 ) return result; /* Empty file -> text */
202 c = *z;
203 if( c==0 ) return 0; /* \000 byte in a file -> binary */
 
 
 
 
 
 
 
204 j = (c!='\n');
205 while( --n>0 ){
206 c = *++z; ++j;
207 if( c==0 ) return 0; /* \000 byte in a file -> binary */
208 if( c=='\n' ){
209 if( z[-1]=='\r' ){
210 result = -1; /* Contains CR/NL, continue */
211 }
212 if( j>LENGTH_MASK ){
213 return 0; /* Very long line -> binary */
214 }
215 j = 0;
@@ -213,10 +218,28 @@
218 if( j>LENGTH_MASK ){
219 return 0; /* Very long line -> binary */
220 }
221 return result; /* No problems seen -> not binary */
222 }
223
224 /*
225 ** This function returns non-zero if the blob starts with a UTF-16le or
226 ** UTF-16be byte-order-mark (BOM).
227 */
228 int starts_with_utf16_bom(const Blob *pContent){
229 const char *z = blob_buffer(pContent);
230 int c1, c2;
231
232 if( blob_size(pContent)<2 ) return 0;
233 c1 = z[0]; c2 = z[1];
234 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
235 return 1;
236 }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
237 return 1;
238 }
239 return 0;
240 }
241
242 /*
243 ** Return true if two DLine elements are identical.
244 */
245 static int same_dline(DLine *pA, DLine *pB){
246

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button