Fossil SCM

Merge commit warning and looks_like_text() enhancements to trunk. Further changes based on these will occur on a branch.

mistachkin 2012-11-01 03:44 trunk merge
Commit 618258421767778c41b643302f73e82954946b89
2 files changed +10 -6 +22 -11
+10 -6
--- src/checkin.c
+++ src/checkin.c
@@ -887,22 +887,26 @@
887887
** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888888
** is seen in a text file.
889889
*/
890890
static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
891891
int eType; /* return value of looks_like_text() */
892
+ int fUnicode; /* return value of starts_with_utf16_bom() */
892893
char *zMsg; /* Warning message */
893894
Blob fname; /* Relative pathname of the file */
894895
static int allOk = 0; /* Set to true to disable this routine */
895896
896897
if( allOk ) return;
897898
eType = looks_like_text(p);
898
- if( eType<0 ){
899
- const char *zWarning ;
899
+ fUnicode = starts_with_utf16_bom(p);
900
+ if( eType==-1 || fUnicode ){
901
+ const char *zWarning;
900902
Blob ans;
901903
char cReply;
902904
903
- if( eType&1 ){
905
+ if( eType==-1 && fUnicode ){
906
+ zWarning = "Unicode and CR/NL line endings";
907
+ }else if( eType==-1 ){
904908
if( crnlOk ){
905909
return; /* We don't want CR/NL warnings for this file. */
906910
}
907911
zWarning = "CR/NL line endings";
908912
}else{
@@ -909,20 +913,20 @@
909913
zWarning = "Unicode";
910914
}
911915
file_relative_name(zFilename, &fname, 0);
912916
blob_zero(&ans);
913917
zMsg = mprintf(
914
- "%s contains %s. commit anyhow (a=all/y/N)? ",
915
- blob_str(&fname), zWarning );
918
+ "%s contains %s; commit anyhow (a=all/y/N)?",
919
+ blob_str(&fname), zWarning);
916920
prompt_user(zMsg, &ans);
917921
fossil_free(zMsg);
918922
cReply = blob_str(&ans)[0];
919923
if( cReply=='a' || cReply=='A' ){
920924
allOk = 1;
921925
}else if( cReply!='y' && cReply!='Y' ){
922926
fossil_fatal("Abandoning commit due to %s in %s",
923
- zWarning , blob_str(&fname));
927
+ zWarning, blob_str(&fname));
924928
}
925929
blob_reset(&ans);
926930
blob_reset(&fname);
927931
}
928932
}
929933
--- src/checkin.c
+++ src/checkin.c
@@ -887,22 +887,26 @@
887 ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888 ** is seen in a text file.
889 */
890 static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
891 int eType; /* return value of looks_like_text() */
 
892 char *zMsg; /* Warning message */
893 Blob fname; /* Relative pathname of the file */
894 static int allOk = 0; /* Set to true to disable this routine */
895
896 if( allOk ) return;
897 eType = looks_like_text(p);
898 if( eType<0 ){
899 const char *zWarning ;
 
900 Blob ans;
901 char cReply;
902
903 if( eType&1 ){
 
 
904 if( crnlOk ){
905 return; /* We don't want CR/NL warnings for this file. */
906 }
907 zWarning = "CR/NL line endings";
908 }else{
@@ -909,20 +913,20 @@
909 zWarning = "Unicode";
910 }
911 file_relative_name(zFilename, &fname, 0);
912 blob_zero(&ans);
913 zMsg = mprintf(
914 "%s contains %s. commit anyhow (a=all/y/N)? ",
915 blob_str(&fname), zWarning );
916 prompt_user(zMsg, &ans);
917 fossil_free(zMsg);
918 cReply = blob_str(&ans)[0];
919 if( cReply=='a' || cReply=='A' ){
920 allOk = 1;
921 }else if( cReply!='y' && cReply!='Y' ){
922 fossil_fatal("Abandoning commit due to %s in %s",
923 zWarning , blob_str(&fname));
924 }
925 blob_reset(&ans);
926 blob_reset(&fname);
927 }
928 }
929
--- src/checkin.c
+++ src/checkin.c
@@ -887,22 +887,26 @@
887 ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888 ** is seen in a text file.
889 */
890 static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
891 int eType; /* return value of looks_like_text() */
892 int fUnicode; /* return value of starts_with_utf16_bom() */
893 char *zMsg; /* Warning message */
894 Blob fname; /* Relative pathname of the file */
895 static int allOk = 0; /* Set to true to disable this routine */
896
897 if( allOk ) return;
898 eType = looks_like_text(p);
899 fUnicode = starts_with_utf16_bom(p);
900 if( eType==-1 || fUnicode ){
901 const char *zWarning;
902 Blob ans;
903 char cReply;
904
905 if( eType==-1 && fUnicode ){
906 zWarning = "Unicode and CR/NL line endings";
907 }else if( eType==-1 ){
908 if( crnlOk ){
909 return; /* We don't want CR/NL warnings for this file. */
910 }
911 zWarning = "CR/NL line endings";
912 }else{
@@ -909,20 +913,20 @@
913 zWarning = "Unicode";
914 }
915 file_relative_name(zFilename, &fname, 0);
916 blob_zero(&ans);
917 zMsg = mprintf(
918 "%s contains %s; commit anyhow (a=all/y/N)?",
919 blob_str(&fname), zWarning);
920 prompt_user(zMsg, &ans);
921 fossil_free(zMsg);
922 cReply = blob_str(&ans)[0];
923 if( cReply=='a' || cReply=='A' ){
924 allOk = 1;
925 }else if( cReply!='y' && cReply!='Y' ){
926 fossil_fatal("Abandoning commit due to %s in %s",
927 zWarning, blob_str(&fname));
928 }
929 blob_reset(&ans);
930 blob_reset(&fname);
931 }
932 }
933
+22 -11
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
4848
"cannot compute difference between binary files\n"
4949
5050
#define DIFF_CANNOT_COMPUTE_SYMLINK \
5151
"cannot compute difference between symlink and regular file\n"
5252
53
-#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
53
+#define looks_like_binary(blob) (looks_like_text((blob)) == 0)
5454
#endif /* INTERFACE */
5555
5656
/*
5757
** Maximum length of a line in a text file. (8192)
5858
*/
@@ -179,18 +179,18 @@
179179
** (1) -- The content appears to consist entirely of text, with lines
180180
** delimited by line-feed characters; however, the encoding may
181181
** not be UTF-8.
182182
**
183183
** (0) -- The content appears to be binary because it contains embedded
184
-** NUL (\000) characters or an extremely long line.
184
+** NUL (\000) characters or an extremely long line. Since this
185
+** function does not understand UTF-16, it may falsely consider
186
+** UTF-16 text to be binary.
185187
**
186188
** (-1) -- The content appears to consist entirely of text, with lines
187189
** delimited by carriage-return, line-feed pairs; however, the
188190
** encoding may not be UTF-8.
189191
**
190
-** (-2) -- The content appears to consist entirely of text, in the
191
-** UTF-16 (BE or LE) encoding.
192192
*/
193193
int looks_like_text(const Blob *pContent){
194194
const char *z = blob_buffer(pContent);
195195
unsigned int n = blob_size(pContent);
196196
int j, c;
@@ -199,17 +199,10 @@
199199
/* Check individual lines.
200200
*/
201201
if( n==0 ) return result; /* Empty file -> text */
202202
c = *z;
203203
if( c==0 ) return 0; /* \000 byte in a file -> binary */
204
- if ( n > 1 ){
205
- if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
206
- return -2;
207
- } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
208
- return -2;
209
- }
210
- }
211204
j = (c!='\n');
212205
while( --n>0 ){
213206
c = *++z; ++j;
214207
if( c==0 ) return 0; /* \000 byte in a file -> binary */
215208
if( c=='\n' ){
@@ -225,10 +218,28 @@
225218
if( j>LENGTH_MASK ){
226219
return 0; /* Very long line -> binary */
227220
}
228221
return result; /* No problems seen -> not binary */
229222
}
223
+
224
+/*
225
+** This function returns non-zero if the blob starts with a UTF-16le or
226
+** UTF-16be byte-order-mark (BOM).
227
+*/
228
+int starts_with_utf16_bom(const Blob *pContent){
229
+ const char *z = blob_buffer(pContent);
230
+ int c1, c2;
231
+
232
+ if( blob_size(pContent)<2 ) return 0;
233
+ c1 = z[0]; c2 = z[1];
234
+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
235
+ return 1;
236
+ }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
237
+ return 1;
238
+ }
239
+ return 0;
240
+}
230241
231242
/*
232243
** Return true if two DLine elements are identical.
233244
*/
234245
static int same_dline(DLine *pA, DLine *pB){
235246
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -179,18 +179,18 @@
179 ** (1) -- The content appears to consist entirely of text, with lines
180 ** delimited by line-feed characters; however, the encoding may
181 ** not be UTF-8.
182 **
183 ** (0) -- The content appears to be binary because it contains embedded
184 ** NUL (\000) characters or an extremely long line.
 
 
185 **
186 ** (-1) -- The content appears to consist entirely of text, with lines
187 ** delimited by carriage-return, line-feed pairs; however, the
188 ** encoding may not be UTF-8.
189 **
190 ** (-2) -- The content appears to consist entirely of text, in the
191 ** UTF-16 (BE or LE) encoding.
192 */
193 int looks_like_text(const Blob *pContent){
194 const char *z = blob_buffer(pContent);
195 unsigned int n = blob_size(pContent);
196 int j, c;
@@ -199,17 +199,10 @@
199 /* Check individual lines.
200 */
201 if( n==0 ) return result; /* Empty file -> text */
202 c = *z;
203 if( c==0 ) return 0; /* \000 byte in a file -> binary */
204 if ( n > 1 ){
205 if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
206 return -2;
207 } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
208 return -2;
209 }
210 }
211 j = (c!='\n');
212 while( --n>0 ){
213 c = *++z; ++j;
214 if( c==0 ) return 0; /* \000 byte in a file -> binary */
215 if( c=='\n' ){
@@ -225,10 +218,28 @@
225 if( j>LENGTH_MASK ){
226 return 0; /* Very long line -> binary */
227 }
228 return result; /* No problems seen -> not binary */
229 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
231 /*
232 ** Return true if two DLine elements are identical.
233 */
234 static int same_dline(DLine *pA, DLine *pB){
235
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) (looks_like_text((blob)) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -179,18 +179,18 @@
179 ** (1) -- The content appears to consist entirely of text, with lines
180 ** delimited by line-feed characters; however, the encoding may
181 ** not be UTF-8.
182 **
183 ** (0) -- The content appears to be binary because it contains embedded
184 ** NUL (\000) characters or an extremely long line. Since this
185 ** function does not understand UTF-16, it may falsely consider
186 ** UTF-16 text to be binary.
187 **
188 ** (-1) -- The content appears to consist entirely of text, with lines
189 ** delimited by carriage-return, line-feed pairs; however, the
190 ** encoding may not be UTF-8.
191 **
 
 
192 */
193 int looks_like_text(const Blob *pContent){
194 const char *z = blob_buffer(pContent);
195 unsigned int n = blob_size(pContent);
196 int j, c;
@@ -199,17 +199,10 @@
199 /* Check individual lines.
200 */
201 if( n==0 ) return result; /* Empty file -> text */
202 c = *z;
203 if( c==0 ) return 0; /* \000 byte in a file -> binary */
 
 
 
 
 
 
 
204 j = (c!='\n');
205 while( --n>0 ){
206 c = *++z; ++j;
207 if( c==0 ) return 0; /* \000 byte in a file -> binary */
208 if( c=='\n' ){
@@ -225,10 +218,28 @@
218 if( j>LENGTH_MASK ){
219 return 0; /* Very long line -> binary */
220 }
221 return result; /* No problems seen -> not binary */
222 }
223
224 /*
225 ** This function returns non-zero if the blob starts with a UTF-16le or
226 ** UTF-16be byte-order-mark (BOM).
227 */
228 int starts_with_utf16_bom(const Blob *pContent){
229 const char *z = blob_buffer(pContent);
230 int c1, c2;
231
232 if( blob_size(pContent)<2 ) return 0;
233 c1 = z[0]; c2 = z[1];
234 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
235 return 1;
236 }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
237 return 1;
238 }
239 return 0;
240 }
241
242 /*
243 ** Return true if two DLine elements are identical.
244 */
245 static int same_dline(DLine *pA, DLine *pB){
246

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button