Fossil SCM

Don't trigger the long-lines warning if the long line is followed by a null-byte: it's a normal binary file then. re-write looks_like_utf8/16 to handle crlf the same way as long lines (thanks, Joe, for the long-lines rewrite!)

jan.nijtmans 2013-03-05 09:24 trunk
Commit ab2920c2b9478b41563d2747e960a02b53c62f59
2 files changed +7 -6 +33 -33
+7 -6
--- src/checkin.c
+++ src/checkin.c
@@ -902,33 +902,34 @@
902902
int encodingOk, /* Non-zero if encoding warnings should be disabled. */
903903
const char *zFilename /* The full name of the file being committed. */
904904
){
905905
int eType; /* return value of looks_like_utf8/utf16() */
906906
int fUnicode; /* return value of starts_with_utf16_bom() */
907
- int longLine; /* non-zero if blob has "long lines" */
907
+ int longLine = 0; /* non-zero if blob has "long lines" */
908
+ int crlf = 0; /* non-zero if blob has "crlf" */
908909
char *zMsg; /* Warning message */
909910
Blob fname; /* Relative pathname of the file */
910911
static int allOk = 0; /* Set to true to disable this routine */
911912
912913
if( allOk ) return 0;
913914
fUnicode = starts_with_utf16_bom(p, 0, 0);
914
- eType = fUnicode ? looks_like_utf16(p, &longLine) :
915
- looks_like_utf8(p, &longLine);
916
- if( eType==0 || eType==-1 || fUnicode ){
915
+ eType = fUnicode ? looks_like_utf16(p, &longLine, &crlf) :
916
+ looks_like_utf8(p, &longLine, &crlf);
917
+ if( eType==0 || crlf || fUnicode ){
917918
const char *zWarning;
918919
const char *zDisable;
919920
const char *zConvert = "c=convert/";
920921
Blob ans;
921922
char cReply;
922923
923
- if( eType==-1 && fUnicode ){
924
+ if( crlf && fUnicode ){
924925
if ( crnlOk && encodingOk ){
925926
return 0; /* We don't want CR/NL and Unicode warnings for this file. */
926927
}
927928
zWarning = "CR/NL line endings and Unicode";
928929
zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
929
- }else if( eType==-1 ){
930
+ }else if( crlf ){
930931
if( crnlOk ){
931932
return 0; /* We don't want CR/NL warnings for this file. */
932933
}
933934
zWarning = "CR/NL line endings";
934935
zDisable = "\"crnl-glob\" setting";
935936
--- src/checkin.c
+++ src/checkin.c
@@ -902,33 +902,34 @@
902 int encodingOk, /* Non-zero if encoding warnings should be disabled. */
903 const char *zFilename /* The full name of the file being committed. */
904 ){
905 int eType; /* return value of looks_like_utf8/utf16() */
906 int fUnicode; /* return value of starts_with_utf16_bom() */
907 int longLine; /* non-zero if blob has "long lines" */
 
908 char *zMsg; /* Warning message */
909 Blob fname; /* Relative pathname of the file */
910 static int allOk = 0; /* Set to true to disable this routine */
911
912 if( allOk ) return 0;
913 fUnicode = starts_with_utf16_bom(p, 0, 0);
914 eType = fUnicode ? looks_like_utf16(p, &longLine) :
915 looks_like_utf8(p, &longLine);
916 if( eType==0 || eType==-1 || fUnicode ){
917 const char *zWarning;
918 const char *zDisable;
919 const char *zConvert = "c=convert/";
920 Blob ans;
921 char cReply;
922
923 if( eType==-1 && fUnicode ){
924 if ( crnlOk && encodingOk ){
925 return 0; /* We don't want CR/NL and Unicode warnings for this file. */
926 }
927 zWarning = "CR/NL line endings and Unicode";
928 zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
929 }else if( eType==-1 ){
930 if( crnlOk ){
931 return 0; /* We don't want CR/NL warnings for this file. */
932 }
933 zWarning = "CR/NL line endings";
934 zDisable = "\"crnl-glob\" setting";
935
--- src/checkin.c
+++ src/checkin.c
@@ -902,33 +902,34 @@
902 int encodingOk, /* Non-zero if encoding warnings should be disabled. */
903 const char *zFilename /* The full name of the file being committed. */
904 ){
905 int eType; /* return value of looks_like_utf8/utf16() */
906 int fUnicode; /* return value of starts_with_utf16_bom() */
907 int longLine = 0; /* non-zero if blob has "long lines" */
908 int crlf = 0; /* non-zero if blob has "crlf" */
909 char *zMsg; /* Warning message */
910 Blob fname; /* Relative pathname of the file */
911 static int allOk = 0; /* Set to true to disable this routine */
912
913 if( allOk ) return 0;
914 fUnicode = starts_with_utf16_bom(p, 0, 0);
915 eType = fUnicode ? looks_like_utf16(p, &longLine, &crlf) :
916 looks_like_utf8(p, &longLine, &crlf);
917 if( eType==0 || crlf || fUnicode ){
918 const char *zWarning;
919 const char *zDisable;
920 const char *zConvert = "c=convert/";
921 Blob ans;
922 char cReply;
923
924 if( crlf && fUnicode ){
925 if ( crnlOk && encodingOk ){
926 return 0; /* We don't want CR/NL and Unicode warnings for this file. */
927 }
928 zWarning = "CR/NL line endings and Unicode";
929 zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
930 }else if( crlf ){
931 if( crnlOk ){
932 return 0; /* We don't want CR/NL warnings for this file. */
933 }
934 zWarning = "CR/NL line endings";
935 zDisable = "\"crnl-glob\" setting";
936
+33 -33
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
5757
"more than 10,000 changes\n"
5858
5959
#define DIFF_TOO_MANY_CHANGES_HTML \
6060
"<p class='generalError'>More than 10,000 changes</p>\n"
6161
62
-#define looks_like_binary(blob) (looks_like_utf8((blob), 0) == 0)
62
+#define looks_like_binary(blob) (looks_like_utf8((blob), 0, 0) != 1)
6363
#endif /* INTERFACE */
6464
6565
/*
6666
** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
6767
*/
@@ -186,68 +186,68 @@
186186
/*
187187
** This function attempts to scan each logical line within the blob to
188188
** determine the type of content it appears to contain. Possible return
189189
** values are:
190190
**
191
-** (1) -- The content appears to consist entirely of text, with lines
192
-** delimited by line-feed characters; however, the encoding may
193
-** not be UTF-8.
191
+** (1) -- The content appears to consist entirely of text;
192
+** however, the encoding may not be UTF-8.
194193
**
195194
** (0) -- The content appears to be binary because it contains embedded
196195
** NUL characters or an extremely long line. Since this function
197196
** does not understand UTF-16, it may falsely consider UTF-16 text
198197
** to be binary.
199198
**
200
-** (-1) -- The content appears to consist entirely of text, with lines
201
-** delimited by carriage-return, line-feed pairs; however, the
202
-** encoding may not be UTF-8.
203
-**
204199
************************************ WARNING **********************************
205200
**
206201
** This function does not validate that the blob content is properly formed
207202
** UTF-8. It assumes that all code points are the same size. It does not
208203
** validate any code points. It makes no attempt to detect if any [invalid]
209204
** switches between UTF-8 and other encodings occur.
210205
**
211206
** The only code points that this function cares about are the NUL character,
212207
** carriage-return, and line-feed.
208
+**
209
+** If pbLongLine is not NULL and the blob is detected as being binary only because
210
+** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
211
+** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
212
+** to is set to 1. Otherwise, it is left as is.
213213
**
214214
************************************ WARNING **********************************
215215
*/
216
-int looks_like_utf8(const Blob *pContent, int *pbLongLine){
216
+int looks_like_utf8(const Blob *pContent, int *pbLongLine, int *pbCrlf){
217217
const char *z = blob_buffer(pContent);
218218
unsigned int n = blob_size(pContent);
219219
int j, c;
220
- int result = 1; /* Assume UTF-8 text with no CR/NL */
220
+ int crlf = 0;
221
+ int longline = 0;
221222
222223
/* Check individual lines.
223224
*/
224
- if( pbLongLine ) *pbLongLine = 0;
225
- if( n==0 ) return result; /* Empty file -> text */
225
+ if( n==0 ) return 1; /* Empty file -> text */
226226
c = *z;
227227
if( c==0 ) return 0; /* Zero byte in a file -> binary */
228228
j = (c!='\n');
229229
while( --n>0 ){
230230
c = *++z; ++j;
231231
if( c==0 ) return 0; /* Zero byte in a file -> binary */
232232
if( c=='\n' ){
233233
int c2 = z[-1];
234234
if( c2=='\r' ){
235
- result = -1; /* Contains CR/NL, continue */
235
+ crlf = 1; /* Contains CR/NL, continue */
236236
}
237237
if( j>LENGTH_MASK ){
238
- if( pbLongLine ) *pbLongLine = 1;
239
- return 0; /* Very long line -> binary */
238
+ longline = 1; /* Contains long line, continue */
240239
}
241240
j = 0;
242241
}
243242
}
244
- if( j>LENGTH_MASK ){
243
+ if( longline || (j>LENGTH_MASK) ){
245244
if( pbLongLine ) *pbLongLine = 1;
246245
return 0; /* Very long line -> binary */
247246
}
248
- return result; /* No problems seen -> not binary */
247
+ if( pbCrlf && crlf) *pbCrlf = 1;
248
+ return 1; /* No problems seen -> not binary */
249249
}
250250
251251
/*
252252
** Define the type needed to represent a Unicode (UTF-16) character.
253253
*/
@@ -279,45 +279,45 @@
279279
/*
280280
** This function attempts to scan each logical line within the blob to
281281
** determine the type of content it appears to contain. Possible return
282282
** values are:
283283
**
284
-** (1) -- The content appears to consist entirely of text, with lines
285
-** delimited by line-feed characters; however, the encoding may
286
-** not be UTF-16.
284
+** (1) -- The content appears to consist entirely of text;
285
+** however, the encoding may not be UTF-16.
287286
**
288287
** (0) -- The content appears to be binary because it contains embedded
289288
** NUL characters or an extremely long line. Since this function
290289
** does not understand UTF-8, it may falsely consider UTF-8 text
291290
** to be binary.
292291
**
293
-** (-1) -- The content appears to consist entirely of text, with lines
294
-** delimited by carriage-return, line-feed pairs; however, the
295
-** encoding may not be UTF-16.
296
-**
297292
************************************ WARNING **********************************
298293
**
299294
** This function does not validate that the blob content is properly formed
300295
** UTF-16. It assumes that all code points are the same size. It does not
301296
** validate any code points. It makes no attempt to detect if any [invalid]
302297
** switches between the UTF-16be and UTF-16le encodings occur.
303298
**
304299
** The only code points that this function cares about are the NUL character,
305300
** carriage-return, and line-feed.
301
+**
302
+** If pbLongLine is not NULL and the blob is detected as being binary only because
303
+** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
304
+** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
305
+** to is set to 1. Otherwise, it is left as is.
306306
**
307307
************************************ WARNING **********************************
308308
*/
309
-int looks_like_utf16(const Blob *pContent, int *pbLongLine){
309
+int looks_like_utf16(const Blob *pContent, int *pbLongLine, int *pbCrlf){
310310
const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
311311
unsigned int n = blob_size(pContent);
312312
int j, c;
313
- int result = 1; /* Assume UTF-16 text with no CR/NL */
313
+ int crlf = 0;
314
+ int longline = 0;
314315
315316
/* Check individual lines.
316317
*/
317
- if( pbLongLine ) *pbLongLine = 0;
318
- if( n==0 ) return result; /* Empty file -> text */
318
+ if( n==0 ) return 1; /* Empty file -> text */
319319
if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
320320
c = *z;
321321
if( c==0 ) return 0; /* NUL character in a file -> binary */
322322
j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
323323
while( (n-=2)>0 ){
@@ -324,24 +324,24 @@
324324
c = *++z; ++j;
325325
if( c==0 ) return 0; /* NUL character in a file -> binary */
326326
if( c==UTF16BE_LF || c==UTF16LE_LF ){
327327
int c2 = z[-1];
328328
if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
329
- result = -1; /* Contains CR/NL, continue */
329
+ crlf = 1; /* Contains CR/NL, continue */
330330
}
331331
if( j>UTF16_LENGTH_MASK ){
332
- if( pbLongLine ) *pbLongLine = 1;
333
- return 0; /* Very long line -> binary */
332
+ longline = 1; /* Contains long line, continue */
334333
}
335334
j = 0;
336335
}
337336
}
338
- if( j>UTF16_LENGTH_MASK ){
337
+ if( longline || j>UTF16_LENGTH_MASK ){
339338
if( pbLongLine ) *pbLongLine = 1;
340339
return 0; /* Very long line -> binary */
341340
}
342
- return result; /* No problems seen -> not binary */
341
+ if( pbCrlf ) *pbCrlf = crlf;
342
+ return 1; /* No problems seen -> not binary */
343343
}
344344
345345
/*
346346
** This function returns an array of bytes representing the byte-order-mark
347347
** for UTF-8.
348348
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
57 "more than 10,000 changes\n"
58
59 #define DIFF_TOO_MANY_CHANGES_HTML \
60 "<p class='generalError'>More than 10,000 changes</p>\n"
61
62 #define looks_like_binary(blob) (looks_like_utf8((blob), 0) == 0)
63 #endif /* INTERFACE */
64
65 /*
66 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
67 */
@@ -186,68 +186,68 @@
186 /*
187 ** This function attempts to scan each logical line within the blob to
188 ** determine the type of content it appears to contain. Possible return
189 ** values are:
190 **
191 ** (1) -- The content appears to consist entirely of text, with lines
192 ** delimited by line-feed characters; however, the encoding may
193 ** not be UTF-8.
194 **
195 ** (0) -- The content appears to be binary because it contains embedded
196 ** NUL characters or an extremely long line. Since this function
197 ** does not understand UTF-16, it may falsely consider UTF-16 text
198 ** to be binary.
199 **
200 ** (-1) -- The content appears to consist entirely of text, with lines
201 ** delimited by carriage-return, line-feed pairs; however, the
202 ** encoding may not be UTF-8.
203 **
204 ************************************ WARNING **********************************
205 **
206 ** This function does not validate that the blob content is properly formed
207 ** UTF-8. It assumes that all code points are the same size. It does not
208 ** validate any code points. It makes no attempt to detect if any [invalid]
209 ** switches between UTF-8 and other encodings occur.
210 **
211 ** The only code points that this function cares about are the NUL character,
212 ** carriage-return, and line-feed.
 
 
 
 
 
213 **
214 ************************************ WARNING **********************************
215 */
216 int looks_like_utf8(const Blob *pContent, int *pbLongLine){
217 const char *z = blob_buffer(pContent);
218 unsigned int n = blob_size(pContent);
219 int j, c;
220 int result = 1; /* Assume UTF-8 text with no CR/NL */
 
221
222 /* Check individual lines.
223 */
224 if( pbLongLine ) *pbLongLine = 0;
225 if( n==0 ) return result; /* Empty file -> text */
226 c = *z;
227 if( c==0 ) return 0; /* Zero byte in a file -> binary */
228 j = (c!='\n');
229 while( --n>0 ){
230 c = *++z; ++j;
231 if( c==0 ) return 0; /* Zero byte in a file -> binary */
232 if( c=='\n' ){
233 int c2 = z[-1];
234 if( c2=='\r' ){
235 result = -1; /* Contains CR/NL, continue */
236 }
237 if( j>LENGTH_MASK ){
238 if( pbLongLine ) *pbLongLine = 1;
239 return 0; /* Very long line -> binary */
240 }
241 j = 0;
242 }
243 }
244 if( j>LENGTH_MASK ){
245 if( pbLongLine ) *pbLongLine = 1;
246 return 0; /* Very long line -> binary */
247 }
248 return result; /* No problems seen -> not binary */
 
249 }
250
251 /*
252 ** Define the type needed to represent a Unicode (UTF-16) character.
253 */
@@ -279,45 +279,45 @@
279 /*
280 ** This function attempts to scan each logical line within the blob to
281 ** determine the type of content it appears to contain. Possible return
282 ** values are:
283 **
284 ** (1) -- The content appears to consist entirely of text, with lines
285 ** delimited by line-feed characters; however, the encoding may
286 ** not be UTF-16.
287 **
288 ** (0) -- The content appears to be binary because it contains embedded
289 ** NUL characters or an extremely long line. Since this function
290 ** does not understand UTF-8, it may falsely consider UTF-8 text
291 ** to be binary.
292 **
293 ** (-1) -- The content appears to consist entirely of text, with lines
294 ** delimited by carriage-return, line-feed pairs; however, the
295 ** encoding may not be UTF-16.
296 **
297 ************************************ WARNING **********************************
298 **
299 ** This function does not validate that the blob content is properly formed
300 ** UTF-16. It assumes that all code points are the same size. It does not
301 ** validate any code points. It makes no attempt to detect if any [invalid]
302 ** switches between the UTF-16be and UTF-16le encodings occur.
303 **
304 ** The only code points that this function cares about are the NUL character,
305 ** carriage-return, and line-feed.
 
 
 
 
 
306 **
307 ************************************ WARNING **********************************
308 */
309 int looks_like_utf16(const Blob *pContent, int *pbLongLine){
310 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
311 unsigned int n = blob_size(pContent);
312 int j, c;
313 int result = 1; /* Assume UTF-16 text with no CR/NL */
 
314
315 /* Check individual lines.
316 */
317 if( pbLongLine ) *pbLongLine = 0;
318 if( n==0 ) return result; /* Empty file -> text */
319 if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
320 c = *z;
321 if( c==0 ) return 0; /* NUL character in a file -> binary */
322 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
323 while( (n-=2)>0 ){
@@ -324,24 +324,24 @@
324 c = *++z; ++j;
325 if( c==0 ) return 0; /* NUL character in a file -> binary */
326 if( c==UTF16BE_LF || c==UTF16LE_LF ){
327 int c2 = z[-1];
328 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
329 result = -1; /* Contains CR/NL, continue */
330 }
331 if( j>UTF16_LENGTH_MASK ){
332 if( pbLongLine ) *pbLongLine = 1;
333 return 0; /* Very long line -> binary */
334 }
335 j = 0;
336 }
337 }
338 if( j>UTF16_LENGTH_MASK ){
339 if( pbLongLine ) *pbLongLine = 1;
340 return 0; /* Very long line -> binary */
341 }
342 return result; /* No problems seen -> not binary */
 
343 }
344
345 /*
346 ** This function returns an array of bytes representing the byte-order-mark
347 ** for UTF-8.
348
--- src/diff.c
+++ src/diff.c
@@ -57,11 +57,11 @@
57 "more than 10,000 changes\n"
58
59 #define DIFF_TOO_MANY_CHANGES_HTML \
60 "<p class='generalError'>More than 10,000 changes</p>\n"
61
62 #define looks_like_binary(blob) (looks_like_utf8((blob), 0, 0) != 1)
63 #endif /* INTERFACE */
64
65 /*
66 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
67 */
@@ -186,68 +186,68 @@
186 /*
187 ** This function attempts to scan each logical line within the blob to
188 ** determine the type of content it appears to contain. Possible return
189 ** values are:
190 **
191 ** (1) -- The content appears to consist entirely of text;
192 ** however, the encoding may not be UTF-8.
 
193 **
194 ** (0) -- The content appears to be binary because it contains embedded
195 ** NUL characters or an extremely long line. Since this function
196 ** does not understand UTF-16, it may falsely consider UTF-16 text
197 ** to be binary.
198 **
 
 
 
 
199 ************************************ WARNING **********************************
200 **
201 ** This function does not validate that the blob content is properly formed
202 ** UTF-8. It assumes that all code points are the same size. It does not
203 ** validate any code points. It makes no attempt to detect if any [invalid]
204 ** switches between UTF-8 and other encodings occur.
205 **
206 ** The only code points that this function cares about are the NUL character,
207 ** carriage-return, and line-feed.
208 **
209 ** If pbLongLine is not NULL and the blob is detected as being binary only because
210 ** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
211 ** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
212 ** to is set to 1. Otherwise, it is left as is.
213 **
214 ************************************ WARNING **********************************
215 */
216 int looks_like_utf8(const Blob *pContent, int *pbLongLine, int *pbCrlf){
217 const char *z = blob_buffer(pContent);
218 unsigned int n = blob_size(pContent);
219 int j, c;
220 int crlf = 0;
221 int longline = 0;
222
223 /* Check individual lines.
224 */
225 if( n==0 ) return 1; /* Empty file -> text */
 
226 c = *z;
227 if( c==0 ) return 0; /* Zero byte in a file -> binary */
228 j = (c!='\n');
229 while( --n>0 ){
230 c = *++z; ++j;
231 if( c==0 ) return 0; /* Zero byte in a file -> binary */
232 if( c=='\n' ){
233 int c2 = z[-1];
234 if( c2=='\r' ){
235 crlf = 1; /* Contains CR/NL, continue */
236 }
237 if( j>LENGTH_MASK ){
238 longline = 1; /* Contains long line, continue */
 
239 }
240 j = 0;
241 }
242 }
243 if( longline || (j>LENGTH_MASK) ){
244 if( pbLongLine ) *pbLongLine = 1;
245 return 0; /* Very long line -> binary */
246 }
247 if( pbCrlf && crlf) *pbCrlf = 1;
248 return 1; /* No problems seen -> not binary */
249 }
250
251 /*
252 ** Define the type needed to represent a Unicode (UTF-16) character.
253 */
@@ -279,45 +279,45 @@
279 /*
280 ** This function attempts to scan each logical line within the blob to
281 ** determine the type of content it appears to contain. Possible return
282 ** values are:
283 **
284 ** (1) -- The content appears to consist entirely of text;
285 ** however, the encoding may not be UTF-16.
 
286 **
287 ** (0) -- The content appears to be binary because it contains embedded
288 ** NUL characters or an extremely long line. Since this function
289 ** does not understand UTF-8, it may falsely consider UTF-8 text
290 ** to be binary.
291 **
 
 
 
 
292 ************************************ WARNING **********************************
293 **
294 ** This function does not validate that the blob content is properly formed
295 ** UTF-16. It assumes that all code points are the same size. It does not
296 ** validate any code points. It makes no attempt to detect if any [invalid]
297 ** switches between the UTF-16be and UTF-16le encodings occur.
298 **
299 ** The only code points that this function cares about are the NUL character,
300 ** carriage-return, and line-feed.
301 **
302 ** If pbLongLine is not NULL and the blob is detected as being binary only because
303 ** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
304 ** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
305 ** to is set to 1. Otherwise, it is left as is.
306 **
307 ************************************ WARNING **********************************
308 */
309 int looks_like_utf16(const Blob *pContent, int *pbLongLine, int *pbCrlf){
310 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
311 unsigned int n = blob_size(pContent);
312 int j, c;
313 int crlf = 0;
314 int longline = 0;
315
316 /* Check individual lines.
317 */
318 if( n==0 ) return 1; /* Empty file -> text */
 
319 if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
320 c = *z;
321 if( c==0 ) return 0; /* NUL character in a file -> binary */
322 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
323 while( (n-=2)>0 ){
@@ -324,24 +324,24 @@
324 c = *++z; ++j;
325 if( c==0 ) return 0; /* NUL character in a file -> binary */
326 if( c==UTF16BE_LF || c==UTF16LE_LF ){
327 int c2 = z[-1];
328 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
329 crlf = 1; /* Contains CR/NL, continue */
330 }
331 if( j>UTF16_LENGTH_MASK ){
332 longline = 1; /* Contains long line, continue */
 
333 }
334 j = 0;
335 }
336 }
337 if( longline || j>UTF16_LENGTH_MASK ){
338 if( pbLongLine ) *pbLongLine = 1;
339 return 0; /* Very long line -> binary */
340 }
341 if( pbCrlf ) *pbCrlf = crlf;
342 return 1; /* No problems seen -> not binary */
343 }
344
345 /*
346 ** This function returns an array of bytes representing the byte-order-mark
347 ** for UTF-8.
348

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button