Fossil SCM

Revert check-in [3a74f9fe52], which added a special warning for overly long lines being detected as binary files.

mistachkin 2013-03-05 01:27 trunk merge
Commit ccdd1abba7c752a5d99abe34edc69cb31cf758e9
2 files changed +1 -26 +17 -24
+1 -26
--- src/checkin.c
+++ src/checkin.c
@@ -909,35 +909,10 @@
909909
static int allOk = 0; /* Set to true to disable this routine */
910910
911911
if( allOk ) return 0;
912912
fUnicode = starts_with_utf16_bom(p, 0, 0);
913913
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
914
- if( eType==-4){
915
- const char *zWarning;
916
- const char *zDisable;
917
- Blob ans;
918
- char cReply;
919
-
920
- if (!binOk) {
921
- zWarning = "long lines";
922
- zDisable = "\"binary-glob\" setting";
923
- blob_zero(&ans);
924
- file_relative_name(zFilename, &fname, 0);
925
- zMsg = mprintf(
926
- "%s appears to be text, but contains %s. Use --no-warnings or the"
927
- " %s to disable this warning.\nCommit anyhow (a=all/y/N)? ",
928
- blob_str(&fname), zWarning, zDisable);
929
- prompt_user(zMsg, &ans);
930
- fossil_free(zMsg);
931
- cReply = blob_str(&ans)[0];
932
- if( cReply!='y' && cReply!='Y' ){
933
- fossil_fatal("Abandoning commit due to %s in %s",
934
- zWarning, blob_str(&fname));
935
- }
936
- blob_reset(&ans);
937
- }
938
- }
939914
if( eType==0 || eType==-1 || fUnicode ){
940915
const char *zWarning;
941916
const char *zDisable;
942917
const char *zConvert = "c=convert/";
943918
Blob ans;
@@ -974,11 +949,11 @@
974949
}
975950
file_relative_name(zFilename, &fname, 0);
976951
blob_zero(&ans);
977952
zMsg = mprintf(
978953
"%s contains %s. Use --no-warnings or the %s to disable this warning.\n"
979
- "Commit anyhow (a=all/%sy/N)? ",
954
+ "Commit anyhow (a=all/%sy/N)? ",
980955
blob_str(&fname), zWarning, zDisable, zConvert);
981956
prompt_user(zMsg, &ans);
982957
fossil_free(zMsg);
983958
cReply = blob_str(&ans)[0];
984959
if( cReply=='a' || cReply=='A' ){
985960
--- src/checkin.c
+++ src/checkin.c
@@ -909,35 +909,10 @@
909 static int allOk = 0; /* Set to true to disable this routine */
910
911 if( allOk ) return 0;
912 fUnicode = starts_with_utf16_bom(p, 0, 0);
913 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
914 if( eType==-4){
915 const char *zWarning;
916 const char *zDisable;
917 Blob ans;
918 char cReply;
919
920 if (!binOk) {
921 zWarning = "long lines";
922 zDisable = "\"binary-glob\" setting";
923 blob_zero(&ans);
924 file_relative_name(zFilename, &fname, 0);
925 zMsg = mprintf(
926 "%s appears to be text, but contains %s. Use --no-warnings or the"
927 " %s to disable this warning.\nCommit anyhow (a=all/y/N)? ",
928 blob_str(&fname), zWarning, zDisable);
929 prompt_user(zMsg, &ans);
930 fossil_free(zMsg);
931 cReply = blob_str(&ans)[0];
932 if( cReply!='y' && cReply!='Y' ){
933 fossil_fatal("Abandoning commit due to %s in %s",
934 zWarning, blob_str(&fname));
935 }
936 blob_reset(&ans);
937 }
938 }
939 if( eType==0 || eType==-1 || fUnicode ){
940 const char *zWarning;
941 const char *zDisable;
942 const char *zConvert = "c=convert/";
943 Blob ans;
@@ -974,11 +949,11 @@
974 }
975 file_relative_name(zFilename, &fname, 0);
976 blob_zero(&ans);
977 zMsg = mprintf(
978 "%s contains %s. Use --no-warnings or the %s to disable this warning.\n"
979 "Commit anyhow (a=all/%sy/N)? ",
980 blob_str(&fname), zWarning, zDisable, zConvert);
981 prompt_user(zMsg, &ans);
982 fossil_free(zMsg);
983 cReply = blob_str(&ans)[0];
984 if( cReply=='a' || cReply=='A' ){
985
--- src/checkin.c
+++ src/checkin.c
@@ -909,35 +909,10 @@
909 static int allOk = 0; /* Set to true to disable this routine */
910
911 if( allOk ) return 0;
912 fUnicode = starts_with_utf16_bom(p, 0, 0);
913 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914 if( eType==0 || eType==-1 || fUnicode ){
915 const char *zWarning;
916 const char *zDisable;
917 const char *zConvert = "c=convert/";
918 Blob ans;
@@ -974,11 +949,11 @@
949 }
950 file_relative_name(zFilename, &fname, 0);
951 blob_zero(&ans);
952 zMsg = mprintf(
953 "%s contains %s. Use --no-warnings or the %s to disable this warning.\n"
954 "Commit anyhow (a=all/%sy/N)? ",
955 blob_str(&fname), zWarning, zDisable, zConvert);
956 prompt_user(zMsg, &ans);
957 fossil_free(zMsg);
958 cReply = blob_str(&ans)[0];
959 if( cReply=='a' || cReply=='A' ){
960
+17 -24
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
5757
"more than 10,000 changes\n"
5858
5959
#define DIFF_TOO_MANY_CHANGES_HTML \
6060
"<p class='generalError'>More than 10,000 changes</p>\n"
6161
62
-#define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
62
+#define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
6363
#endif /* INTERFACE */
6464
6565
/*
6666
** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
6767
*/
@@ -199,14 +199,10 @@
199199
**
200200
** (-1) -- The content appears to consist entirely of text, with lines
201201
** delimited by carriage-return, line-feed pairs; however, the
202202
** encoding may not be UTF-8.
203203
**
204
-** (-4) -- The same as 0, but the determination is based on the fact that
205
-** the blob might be text (any encoding) but it has a line length
206
-** bigger than the diff logic in fossil can handle.
207
-**
208204
************************************ WARNING **********************************
209205
**
210206
** This function does not validate that the blob content is properly formed
211207
** UTF-8. It assumes that all code points are the same size. It does not
212208
** validate any code points. It makes no attempt to detect if any [invalid]
@@ -219,35 +215,36 @@
219215
*/
220216
int looks_like_utf8(const Blob *pContent){
221217
const char *z = blob_buffer(pContent);
222218
unsigned int n = blob_size(pContent);
223219
int j, c;
224
- int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
220
+ int result = 1; /* Assume UTF-8 text with no CR/NL */
225221
226222
/* Check individual lines.
227223
*/
228
- if( n==0 ) return 1; /* Empty file -> text */
224
+ if( n==0 ) return result; /* Empty file -> text */
229225
c = *z;
230226
if( c==0 ) return 0; /* Zero byte in a file -> binary */
231227
j = (c!='\n');
232228
while( --n>0 ){
233229
c = *++z; ++j;
234230
if( c==0 ) return 0; /* Zero byte in a file -> binary */
235231
if( c=='\n' ){
236
- if( z[-1]=='\r' ){
237
- flags |= 2; /* Contains CR/NL, continue */
232
+ int c2 = z[-1];
233
+ if( c2=='\r' ){
234
+ result = -1; /* Contains CR/NL, continue */
238235
}
239236
if( j>LENGTH_MASK ){
240
- flags |= 1; /* Very long line, continue */
237
+ return 0; /* Very long line -> binary */
241238
}
242239
j = 0;
243240
}
244241
}
245
- if( (flags&1) || (j>LENGTH_MASK) ){
246
- return -4; /* Very long line -> binary */
242
+ if( j>LENGTH_MASK ){
243
+ return 0; /* Very long line -> binary */
247244
}
248
- return 1-flags; /* No problems seen -> not binary */
245
+ return result; /* No problems seen -> not binary */
249246
}
250247
251248
/*
252249
** Define the type needed to represent a Unicode (UTF-16) character.
253250
*/
@@ -292,14 +289,10 @@
292289
**
293290
** (-1) -- The content appears to consist entirely of text, with lines
294291
** delimited by carriage-return, line-feed pairs; however, the
295292
** encoding may not be UTF-16.
296293
**
297
-** (-4) -- The same as 0, but the determination is based on the fact that
298
-** the blob might be text (any encoding) but it has a line length
299
-** bigger than the diff logic in fossil can handle.
300
-**
301294
************************************ WARNING **********************************
302295
**
303296
** This function does not validate that the blob content is properly formed
304297
** UTF-16. It assumes that all code points are the same size. It does not
305298
** validate any code points. It makes no attempt to detect if any [invalid]
@@ -312,15 +305,15 @@
312305
*/
313306
int looks_like_utf16(const Blob *pContent){
314307
const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
315308
unsigned int n = blob_size(pContent);
316309
int j, c;
317
- int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
310
+ int result = 1; /* Assume UTF-16 text with no CR/NL */
318311
319312
/* Check individual lines.
320313
*/
321
- if( n==0 ) return 1; /* Empty file -> text */
314
+ if( n==0 ) return result; /* Empty file -> text */
322315
if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
323316
c = *z;
324317
if( c==0 ) return 0; /* NUL character in a file -> binary */
325318
j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
326319
while( (n-=2)>0 ){
@@ -327,22 +320,22 @@
327320
c = *++z; ++j;
328321
if( c==0 ) return 0; /* NUL character in a file -> binary */
329322
if( c==UTF16BE_LF || c==UTF16LE_LF ){
330323
int c2 = z[-1];
331324
if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
332
- flags |= 2; /* Contains CR/NL, continue */
325
+ result = -1; /* Contains CR/NL, continue */
333326
}
334327
if( j>UTF16_LENGTH_MASK ){
335
- flags |= 1; /* Very long line, continue */
328
+ return 0; /* Very long line -> binary */
336329
}
337330
j = 0;
338331
}
339332
}
340
- if( (flags&1) || (j>UTF16_LENGTH_MASK) ){
341
- return -4; /* Very long line -> binary */
333
+ if( j>UTF16_LENGTH_MASK ){
334
+ return 0; /* Very long line -> binary */
342335
}
343
- return 1-flags; /* No problems seen -> not binary */
336
+ return result; /* No problems seen -> not binary */
344337
}
345338
346339
/*
347340
** This function returns an array of bytes representing the byte-order-mark
348341
** for UTF-8.
349342
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
57 "more than 10,000 changes\n"
58
59 #define DIFF_TOO_MANY_CHANGES_HTML \
60 "<p class='generalError'>More than 10,000 changes</p>\n"
61
62 #define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
63 #endif /* INTERFACE */
64
65 /*
66 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
67 */
@@ -199,14 +199,10 @@
199 **
200 ** (-1) -- The content appears to consist entirely of text, with lines
201 ** delimited by carriage-return, line-feed pairs; however, the
202 ** encoding may not be UTF-8.
203 **
204 ** (-4) -- The same as 0, but the determination is based on the fact that
205 ** the blob might be text (any encoding) but it has a line length
206 ** bigger than the diff logic in fossil can handle.
207 **
208 ************************************ WARNING **********************************
209 **
210 ** This function does not validate that the blob content is properly formed
211 ** UTF-8. It assumes that all code points are the same size. It does not
212 ** validate any code points. It makes no attempt to detect if any [invalid]
@@ -219,35 +215,36 @@
219 */
220 int looks_like_utf8(const Blob *pContent){
221 const char *z = blob_buffer(pContent);
222 unsigned int n = blob_size(pContent);
223 int j, c;
224 int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
225
226 /* Check individual lines.
227 */
228 if( n==0 ) return 1; /* Empty file -> text */
229 c = *z;
230 if( c==0 ) return 0; /* Zero byte in a file -> binary */
231 j = (c!='\n');
232 while( --n>0 ){
233 c = *++z; ++j;
234 if( c==0 ) return 0; /* Zero byte in a file -> binary */
235 if( c=='\n' ){
236 if( z[-1]=='\r' ){
237 flags |= 2; /* Contains CR/NL, continue */
 
238 }
239 if( j>LENGTH_MASK ){
240 flags |= 1; /* Very long line, continue */
241 }
242 j = 0;
243 }
244 }
245 if( (flags&1) || (j>LENGTH_MASK) ){
246 return -4; /* Very long line -> binary */
247 }
248 return 1-flags; /* No problems seen -> not binary */
249 }
250
251 /*
252 ** Define the type needed to represent a Unicode (UTF-16) character.
253 */
@@ -292,14 +289,10 @@
292 **
293 ** (-1) -- The content appears to consist entirely of text, with lines
294 ** delimited by carriage-return, line-feed pairs; however, the
295 ** encoding may not be UTF-16.
296 **
297 ** (-4) -- The same as 0, but the determination is based on the fact that
298 ** the blob might be text (any encoding) but it has a line length
299 ** bigger than the diff logic in fossil can handle.
300 **
301 ************************************ WARNING **********************************
302 **
303 ** This function does not validate that the blob content is properly formed
304 ** UTF-16. It assumes that all code points are the same size. It does not
305 ** validate any code points. It makes no attempt to detect if any [invalid]
@@ -312,15 +305,15 @@
312 */
313 int looks_like_utf16(const Blob *pContent){
314 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
315 unsigned int n = blob_size(pContent);
316 int j, c;
317 int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
318
319 /* Check individual lines.
320 */
321 if( n==0 ) return 1; /* Empty file -> text */
322 if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
323 c = *z;
324 if( c==0 ) return 0; /* NUL character in a file -> binary */
325 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
326 while( (n-=2)>0 ){
@@ -327,22 +320,22 @@
327 c = *++z; ++j;
328 if( c==0 ) return 0; /* NUL character in a file -> binary */
329 if( c==UTF16BE_LF || c==UTF16LE_LF ){
330 int c2 = z[-1];
331 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
332 flags |= 2; /* Contains CR/NL, continue */
333 }
334 if( j>UTF16_LENGTH_MASK ){
335 flags |= 1; /* Very long line, continue */
336 }
337 j = 0;
338 }
339 }
340 if( (flags&1) || (j>UTF16_LENGTH_MASK) ){
341 return -4; /* Very long line -> binary */
342 }
343 return 1-flags; /* No problems seen -> not binary */
344 }
345
346 /*
347 ** This function returns an array of bytes representing the byte-order-mark
348 ** for UTF-8.
349
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
57 "more than 10,000 changes\n"
58
59 #define DIFF_TOO_MANY_CHANGES_HTML \
60 "<p class='generalError'>More than 10,000 changes</p>\n"
61
62 #define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
63 #endif /* INTERFACE */
64
65 /*
66 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
67 */
@@ -199,14 +199,10 @@
199 **
200 ** (-1) -- The content appears to consist entirely of text, with lines
201 ** delimited by carriage-return, line-feed pairs; however, the
202 ** encoding may not be UTF-8.
203 **
 
 
 
 
204 ************************************ WARNING **********************************
205 **
206 ** This function does not validate that the blob content is properly formed
207 ** UTF-8. It assumes that all code points are the same size. It does not
208 ** validate any code points. It makes no attempt to detect if any [invalid]
@@ -219,35 +215,36 @@
215 */
216 int looks_like_utf8(const Blob *pContent){
217 const char *z = blob_buffer(pContent);
218 unsigned int n = blob_size(pContent);
219 int j, c;
220 int result = 1; /* Assume UTF-8 text with no CR/NL */
221
222 /* Check individual lines.
223 */
224 if( n==0 ) return result; /* Empty file -> text */
225 c = *z;
226 if( c==0 ) return 0; /* Zero byte in a file -> binary */
227 j = (c!='\n');
228 while( --n>0 ){
229 c = *++z; ++j;
230 if( c==0 ) return 0; /* Zero byte in a file -> binary */
231 if( c=='\n' ){
232 int c2 = z[-1];
233 if( c2=='\r' ){
234 result = -1; /* Contains CR/NL, continue */
235 }
236 if( j>LENGTH_MASK ){
237 return 0; /* Very long line -> binary */
238 }
239 j = 0;
240 }
241 }
242 if( j>LENGTH_MASK ){
243 return 0; /* Very long line -> binary */
244 }
245 return result; /* No problems seen -> not binary */
246 }
247
248 /*
249 ** Define the type needed to represent a Unicode (UTF-16) character.
250 */
@@ -292,14 +289,10 @@
289 **
290 ** (-1) -- The content appears to consist entirely of text, with lines
291 ** delimited by carriage-return, line-feed pairs; however, the
292 ** encoding may not be UTF-16.
293 **
 
 
 
 
294 ************************************ WARNING **********************************
295 **
296 ** This function does not validate that the blob content is properly formed
297 ** UTF-16. It assumes that all code points are the same size. It does not
298 ** validate any code points. It makes no attempt to detect if any [invalid]
@@ -312,15 +305,15 @@
305 */
306 int looks_like_utf16(const Blob *pContent){
307 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
308 unsigned int n = blob_size(pContent);
309 int j, c;
310 int result = 1; /* Assume UTF-16 text with no CR/NL */
311
312 /* Check individual lines.
313 */
314 if( n==0 ) return result; /* Empty file -> text */
315 if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
316 c = *z;
317 if( c==0 ) return 0; /* NUL character in a file -> binary */
318 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
319 while( (n-=2)>0 ){
@@ -327,22 +320,22 @@
320 c = *++z; ++j;
321 if( c==0 ) return 0; /* NUL character in a file -> binary */
322 if( c==UTF16BE_LF || c==UTF16LE_LF ){
323 int c2 = z[-1];
324 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
325 result = -1; /* Contains CR/NL, continue */
326 }
327 if( j>UTF16_LENGTH_MASK ){
328 return 0; /* Very long line -> binary */
329 }
330 j = 0;
331 }
332 }
333 if( j>UTF16_LENGTH_MASK ){
334 return 0; /* Very long line -> binary */
335 }
336 return result; /* No problems seen -> not binary */
337 }
338
339 /*
340 ** This function returns an array of bytes representing the byte-order-mark
341 ** for UTF-8.
342

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button