Fossil SCM

Don't trigger the long-lines warning if the long line is followed by a null-byte: it's a normal binary file then. re-write looks_like_utf8/16 to handle crlf the same way as long lines (thanks, Joe, for the long-lines rewrite!)

jan.nijtmans 2013-03-05 09:24 trunk

Commit ab2920c2b9478b41563d2747e960a02b53c62f59

Parent 10fbcda270363ce…

2 files changed +7 -6 +33 -33

~ src/checkin.c ~ src/diff.c

M src/checkin.c

+7 -6

		--- src/checkin.c
		+++ src/checkin.c
		@@ -902,33 +902,34 @@
902	902	int encodingOk, /* Non-zero if encoding warnings should be disabled. */
903	903	const char zFilename / The full name of the file being committed. */
904	904	){
905	905	int eType; /* return value of looks_like_utf8/utf16() */
906	906	int fUnicode; /* return value of starts_with_utf16_bom() */
907		- int longLine; /* non-zero if blob has "long lines" */
	907	+ int longLine = 0; /* non-zero if blob has "long lines" */
	908	+ int crlf = 0; /* non-zero if blob has "crlf" */
908	909	char zMsg; / Warning message */
909	910	Blob fname; /* Relative pathname of the file */
910	911	static int allOk = 0; /* Set to true to disable this routine */
911	912
912	913	if( allOk ) return 0;
913	914	fUnicode = starts_with_utf16_bom(p, 0, 0);
914		- eType = fUnicode ? looks_like_utf16(p, &longLine) :
915		- looks_like_utf8(p, &longLine);
916		- if( eType==0 \|\| eType==-1 \|\| fUnicode ){
	915	+ eType = fUnicode ? looks_like_utf16(p, &longLine, &crlf) :
	916	+ looks_like_utf8(p, &longLine, &crlf);
	917	+ if( eType==0 \|\| crlf \|\| fUnicode ){
917	918	const char *zWarning;
918	919	const char *zDisable;
919	920	const char *zConvert = "c=convert/";
920	921	Blob ans;
921	922	char cReply;
922	923
923		- if( eType==-1 && fUnicode ){
	924	+ if( crlf && fUnicode ){
924	925	if ( crnlOk && encodingOk ){
925	926	return 0; /* We don't want CR/NL and Unicode warnings for this file. */
926	927	}
927	928	zWarning = "CR/NL line endings and Unicode";
928	929	zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
929		- }else if( eType==-1 ){
	930	+ }else if( crlf ){
930	931	if( crnlOk ){
931	932	return 0; /* We don't want CR/NL warnings for this file. */
932	933	}
933	934	zWarning = "CR/NL line endings";
934	935	zDisable = "\"crnl-glob\" setting";
935	936

	--- src/checkin.c
	+++ src/checkin.c
	@@ -902,33 +902,34 @@
902	int encodingOk, /* Non-zero if encoding warnings should be disabled. */
903	const char zFilename / The full name of the file being committed. */
904	){
905	int eType; /* return value of looks_like_utf8/utf16() */
906	int fUnicode; /* return value of starts_with_utf16_bom() */
907	int longLine; /* non-zero if blob has "long lines" */

908	char zMsg; / Warning message */
909	Blob fname; /* Relative pathname of the file */
910	static int allOk = 0; /* Set to true to disable this routine */
911
912	if( allOk ) return 0;
913	fUnicode = starts_with_utf16_bom(p, 0, 0);
914	eType = fUnicode ? looks_like_utf16(p, &longLine) :
915	looks_like_utf8(p, &longLine);
916	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
917	const char *zWarning;
918	const char *zDisable;
919	const char *zConvert = "c=convert/";
920	Blob ans;
921	char cReply;
922
923	if( eType==-1 && fUnicode ){
924	if ( crnlOk && encodingOk ){
925	return 0; /* We don't want CR/NL and Unicode warnings for this file. */
926	}
927	zWarning = "CR/NL line endings and Unicode";
928	zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
929	}else if( eType==-1 ){
930	if( crnlOk ){
931	return 0; /* We don't want CR/NL warnings for this file. */
932	}
933	zWarning = "CR/NL line endings";
934	zDisable = "\"crnl-glob\" setting";
935

	--- src/checkin.c
	+++ src/checkin.c
	@@ -902,33 +902,34 @@
902	int encodingOk, /* Non-zero if encoding warnings should be disabled. */
903	const char zFilename / The full name of the file being committed. */
904	){
905	int eType; /* return value of looks_like_utf8/utf16() */
906	int fUnicode; /* return value of starts_with_utf16_bom() */
907	int longLine = 0; /* non-zero if blob has "long lines" */
908	int crlf = 0; /* non-zero if blob has "crlf" */
909	char zMsg; / Warning message */
910	Blob fname; /* Relative pathname of the file */
911	static int allOk = 0; /* Set to true to disable this routine */
912
913	if( allOk ) return 0;
914	fUnicode = starts_with_utf16_bom(p, 0, 0);
915	eType = fUnicode ? looks_like_utf16(p, &longLine, &crlf) :
916	looks_like_utf8(p, &longLine, &crlf);
917	if( eType==0 \|\| crlf \|\| fUnicode ){
918	const char *zWarning;
919	const char *zDisable;
920	const char *zConvert = "c=convert/";
921	Blob ans;
922	char cReply;
923
924	if( crlf && fUnicode ){
925	if ( crnlOk && encodingOk ){
926	return 0; /* We don't want CR/NL and Unicode warnings for this file. */
927	}
928	zWarning = "CR/NL line endings and Unicode";
929	zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
930	}else if( crlf ){
931	if( crnlOk ){
932	return 0; /* We don't want CR/NL warnings for this file. */
933	}
934	zWarning = "CR/NL line endings";
935	zDisable = "\"crnl-glob\" setting";
936

M src/diff.c

+33 -33

		--- src/diff.c
		+++ src/diff.c
		@@ -57,11 +57,11 @@
57	57	"more than 10,000 changes\n"
58	58
59	59	#define DIFF_TOO_MANY_CHANGES_HTML \
60	60	"<p class='generalError'>More than 10,000 changes</p>\n"
61	61
62		-#define looks_like_binary(blob) (looks_like_utf8((blob), 0) == 0)
	62	+#define looks_like_binary(blob) (looks_like_utf8((blob), 0, 0) != 1)
63	63	#endif /* INTERFACE */
64	64
65	65	/*
66	66	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
67	67	*/
		@@ -186,68 +186,68 @@
186	186	/*
187	187	** This function attempts to scan each logical line within the blob to
188	188	** determine the type of content it appears to contain. Possible return
189	189	** values are:
190	190	**
191		-** (1) -- The content appears to consist entirely of text, with lines
192		-** delimited by line-feed characters; however, the encoding may
193		-** not be UTF-8.
	191	+** (1) -- The content appears to consist entirely of text;
	192	+** however, the encoding may not be UTF-8.
194	193	**
195	194	** (0) -- The content appears to be binary because it contains embedded
196	195	** NUL characters or an extremely long line. Since this function
197	196	** does not understand UTF-16, it may falsely consider UTF-16 text
198	197	** to be binary.
199	198	**
200		-** (-1) -- The content appears to consist entirely of text, with lines
201		-** delimited by carriage-return, line-feed pairs; however, the
202		-** encoding may not be UTF-8.
203		-**
204	199	********************************** WARNING ********************************
205	200	**
206	201	** This function does not validate that the blob content is properly formed
207	202	** UTF-8. It assumes that all code points are the same size. It does not
208	203	** validate any code points. It makes no attempt to detect if any [invalid]
209	204	** switches between UTF-8 and other encodings occur.
210	205	**
211	206	** The only code points that this function cares about are the NUL character,
212	207	** carriage-return, and line-feed.
	208	+**
	209	+** If pbLongLine is not NULL and the blob is detected as being binary only because
	210	+** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
	211	+** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
	212	+** to is set to 1. Otherwise, it is left as is.
213	213	**
214	214	********************************** WARNING ********************************
215	215	*/
216		-int looks_like_utf8(const Blob pContent, int pbLongLine){
	216	+int looks_like_utf8(const Blob pContent, int pbLongLine, int *pbCrlf){
217	217	const char *z = blob_buffer(pContent);
218	218	unsigned int n = blob_size(pContent);
219	219	int j, c;
220		- int result = 1; /* Assume UTF-8 text with no CR/NL */
	220	+ int crlf = 0;
	221	+ int longline = 0;
221	222
222	223	/* Check individual lines.
223	224	*/
224		- if( pbLongLine ) *pbLongLine = 0;
225		- if( n==0 ) return result; /* Empty file -> text */
	225	+ if( n==0 ) return 1; /* Empty file -> text */
226	226	c = *z;
227	227	if( c==0 ) return 0; /* Zero byte in a file -> binary */
228	228	j = (c!='\n');
229	229	while( --n>0 ){
230	230	c = *++z; ++j;
231	231	if( c==0 ) return 0; /* Zero byte in a file -> binary */
232	232	if( c=='\n' ){
233	233	int c2 = z[-1];
234	234	if( c2=='\r' ){
235		- result = -1; /* Contains CR/NL, continue */
	235	+ crlf = 1; /* Contains CR/NL, continue */
236	236	}
237	237	if( j>LENGTH_MASK ){
238		- if( pbLongLine ) *pbLongLine = 1;
239		- return 0; /* Very long line -> binary */
	238	+ longline = 1; /* Contains long line, continue */
240	239	}
241	240	j = 0;
242	241	}
243	242	}
244		- if( j>LENGTH_MASK ){
	243	+ if( longline \|\| (j>LENGTH_MASK) ){
245	244	if( pbLongLine ) *pbLongLine = 1;
246	245	return 0; /* Very long line -> binary */
247	246	}
248		- return result; /* No problems seen -> not binary */
	247	+ if( pbCrlf && crlf) *pbCrlf = 1;
	248	+ return 1; /* No problems seen -> not binary */
249	249	}
250	250
251	251	/*
252	252	** Define the type needed to represent a Unicode (UTF-16) character.
253	253	*/
		@@ -279,45 +279,45 @@
279	279	/*
280	280	** This function attempts to scan each logical line within the blob to
281	281	** determine the type of content it appears to contain. Possible return
282	282	** values are:
283	283	**
284		-** (1) -- The content appears to consist entirely of text, with lines
285		-** delimited by line-feed characters; however, the encoding may
286		-** not be UTF-16.
	284	+** (1) -- The content appears to consist entirely of text;
	285	+** however, the encoding may not be UTF-16.
287	286	**
288	287	** (0) -- The content appears to be binary because it contains embedded
289	288	** NUL characters or an extremely long line. Since this function
290	289	** does not understand UTF-8, it may falsely consider UTF-8 text
291	290	** to be binary.
292	291	**
293		-** (-1) -- The content appears to consist entirely of text, with lines
294		-** delimited by carriage-return, line-feed pairs; however, the
295		-** encoding may not be UTF-16.
296		-**
297	292	********************************** WARNING ********************************
298	293	**
299	294	** This function does not validate that the blob content is properly formed
300	295	** UTF-16. It assumes that all code points are the same size. It does not
301	296	** validate any code points. It makes no attempt to detect if any [invalid]
302	297	** switches between the UTF-16be and UTF-16le encodings occur.
303	298	**
304	299	** The only code points that this function cares about are the NUL character,
305	300	** carriage-return, and line-feed.
	301	+**
	302	+** If pbLongLine is not NULL and the blob is detected as being binary only because
	303	+** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
	304	+** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
	305	+** to is set to 1. Otherwise, it is left as is.
306	306	**
307	307	********************************** WARNING ********************************
308	308	*/
309		-int looks_like_utf16(const Blob pContent, int pbLongLine){
	309	+int looks_like_utf16(const Blob pContent, int pbLongLine, int *pbCrlf){
310	310	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
311	311	unsigned int n = blob_size(pContent);
312	312	int j, c;
313		- int result = 1; /* Assume UTF-16 text with no CR/NL */
	313	+ int crlf = 0;
	314	+ int longline = 0;
314	315
315	316	/* Check individual lines.
316	317	*/
317		- if( pbLongLine ) *pbLongLine = 0;
318		- if( n==0 ) return result; /* Empty file -> text */
	318	+ if( n==0 ) return 1; /* Empty file -> text */
319	319	if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
320	320	c = *z;
321	321	if( c==0 ) return 0; /* NUL character in a file -> binary */
322	322	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
323	323	while( (n-=2)>0 ){
		@@ -324,24 +324,24 @@
324	324	c = *++z; ++j;
325	325	if( c==0 ) return 0; /* NUL character in a file -> binary */
326	326	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
327	327	int c2 = z[-1];
328	328	if( c2==UTF16BE_CR \|\| c2==UTF16LE_CR ){
329		- result = -1; /* Contains CR/NL, continue */
	329	+ crlf = 1; /* Contains CR/NL, continue */
330	330	}
331	331	if( j>UTF16_LENGTH_MASK ){
332		- if( pbLongLine ) *pbLongLine = 1;
333		- return 0; /* Very long line -> binary */
	332	+ longline = 1; /* Contains long line, continue */
334	333	}
335	334	j = 0;
336	335	}
337	336	}
338		- if( j>UTF16_LENGTH_MASK ){
	337	+ if( longline \|\| j>UTF16_LENGTH_MASK ){
339	338	if( pbLongLine ) *pbLongLine = 1;
340	339	return 0; /* Very long line -> binary */
341	340	}
342		- return result; /* No problems seen -> not binary */
	341	+ if( pbCrlf ) *pbCrlf = crlf;
	342	+ return 1; /* No problems seen -> not binary */
343	343	}
344	344
345	345	/*
346	346	** This function returns an array of bytes representing the byte-order-mark
347	347	** for UTF-8.
348	348

	--- src/diff.c
	+++ src/diff.c
	@@ -57,11 +57,11 @@
57	"more than 10,000 changes\n"
58
59	#define DIFF_TOO_MANY_CHANGES_HTML \
60	"<p class='generalError'>More than 10,000 changes</p>\n"
61
62	#define looks_like_binary(blob) (looks_like_utf8((blob), 0) == 0)
63	#endif /* INTERFACE */
64
65	/*
66	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
67	*/
	@@ -186,68 +186,68 @@
186	/*
187	** This function attempts to scan each logical line within the blob to
188	** determine the type of content it appears to contain. Possible return
189	** values are:
190	**
191	** (1) -- The content appears to consist entirely of text, with lines
192	** delimited by line-feed characters; however, the encoding may
193	** not be UTF-8.
194	**
195	** (0) -- The content appears to be binary because it contains embedded
196	** NUL characters or an extremely long line. Since this function
197	** does not understand UTF-16, it may falsely consider UTF-16 text
198	** to be binary.
199	**
200	** (-1) -- The content appears to consist entirely of text, with lines
201	** delimited by carriage-return, line-feed pairs; however, the
202	** encoding may not be UTF-8.
203	**
204	********************************** WARNING ********************************
205	**
206	** This function does not validate that the blob content is properly formed
207	** UTF-8. It assumes that all code points are the same size. It does not
208	** validate any code points. It makes no attempt to detect if any [invalid]
209	** switches between UTF-8 and other encodings occur.
210	**
211	** The only code points that this function cares about are the NUL character,
212	** carriage-return, and line-feed.





213	**
214	********************************** WARNING ********************************
215	*/
216	int looks_like_utf8(const Blob pContent, int pbLongLine){
217	const char *z = blob_buffer(pContent);
218	unsigned int n = blob_size(pContent);
219	int j, c;
220	int result = 1; /* Assume UTF-8 text with no CR/NL */

221
222	/* Check individual lines.
223	*/
224	if( pbLongLine ) *pbLongLine = 0;
225	if( n==0 ) return result; /* Empty file -> text */
226	c = *z;
227	if( c==0 ) return 0; /* Zero byte in a file -> binary */
228	j = (c!='\n');
229	while( --n>0 ){
230	c = *++z; ++j;
231	if( c==0 ) return 0; /* Zero byte in a file -> binary */
232	if( c=='\n' ){
233	int c2 = z[-1];
234	if( c2=='\r' ){
235	result = -1; /* Contains CR/NL, continue */
236	}
237	if( j>LENGTH_MASK ){
238	if( pbLongLine ) *pbLongLine = 1;
239	return 0; /* Very long line -> binary */
240	}
241	j = 0;
242	}
243	}
244	if( j>LENGTH_MASK ){
245	if( pbLongLine ) *pbLongLine = 1;
246	return 0; /* Very long line -> binary */
247	}
248	return result; /* No problems seen -> not binary */

249	}
250
251	/*
252	** Define the type needed to represent a Unicode (UTF-16) character.
253	*/
	@@ -279,45 +279,45 @@
279	/*
280	** This function attempts to scan each logical line within the blob to
281	** determine the type of content it appears to contain. Possible return
282	** values are:
283	**
284	** (1) -- The content appears to consist entirely of text, with lines
285	** delimited by line-feed characters; however, the encoding may
286	** not be UTF-16.
287	**
288	** (0) -- The content appears to be binary because it contains embedded
289	** NUL characters or an extremely long line. Since this function
290	** does not understand UTF-8, it may falsely consider UTF-8 text
291	** to be binary.
292	**
293	** (-1) -- The content appears to consist entirely of text, with lines
294	** delimited by carriage-return, line-feed pairs; however, the
295	** encoding may not be UTF-16.
296	**
297	********************************** WARNING ********************************
298	**
299	** This function does not validate that the blob content is properly formed
300	** UTF-16. It assumes that all code points are the same size. It does not
301	** validate any code points. It makes no attempt to detect if any [invalid]
302	** switches between the UTF-16be and UTF-16le encodings occur.
303	**
304	** The only code points that this function cares about are the NUL character,
305	** carriage-return, and line-feed.





306	**
307	********************************** WARNING ********************************
308	*/
309	int looks_like_utf16(const Blob pContent, int pbLongLine){
310	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
311	unsigned int n = blob_size(pContent);
312	int j, c;
313	int result = 1; /* Assume UTF-16 text with no CR/NL */

314
315	/* Check individual lines.
316	*/
317	if( pbLongLine ) *pbLongLine = 0;
318	if( n==0 ) return result; /* Empty file -> text */
319	if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
320	c = *z;
321	if( c==0 ) return 0; /* NUL character in a file -> binary */
322	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
323	while( (n-=2)>0 ){
	@@ -324,24 +324,24 @@
324	c = *++z; ++j;
325	if( c==0 ) return 0; /* NUL character in a file -> binary */
326	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
327	int c2 = z[-1];
328	if( c2==UTF16BE_CR \|\| c2==UTF16LE_CR ){
329	result = -1; /* Contains CR/NL, continue */
330	}
331	if( j>UTF16_LENGTH_MASK ){
332	if( pbLongLine ) *pbLongLine = 1;
333	return 0; /* Very long line -> binary */
334	}
335	j = 0;
336	}
337	}
338	if( j>UTF16_LENGTH_MASK ){
339	if( pbLongLine ) *pbLongLine = 1;
340	return 0; /* Very long line -> binary */
341	}
342	return result; /* No problems seen -> not binary */

343	}
344
345	/*
346	** This function returns an array of bytes representing the byte-order-mark
347	** for UTF-8.
348

	--- src/diff.c
	+++ src/diff.c
	@@ -57,11 +57,11 @@
57	"more than 10,000 changes\n"
58
59	#define DIFF_TOO_MANY_CHANGES_HTML \
60	"<p class='generalError'>More than 10,000 changes</p>\n"
61
62	#define looks_like_binary(blob) (looks_like_utf8((blob), 0, 0) != 1)
63	#endif /* INTERFACE */
64
65	/*
66	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
67	*/
	@@ -186,68 +186,68 @@
186	/*
187	** This function attempts to scan each logical line within the blob to
188	** determine the type of content it appears to contain. Possible return
189	** values are:
190	**
191	** (1) -- The content appears to consist entirely of text;
192	** however, the encoding may not be UTF-8.

193	**
194	** (0) -- The content appears to be binary because it contains embedded
195	** NUL characters or an extremely long line. Since this function
196	** does not understand UTF-16, it may falsely consider UTF-16 text
197	** to be binary.
198	**




199	********************************** WARNING ********************************
200	**
201	** This function does not validate that the blob content is properly formed
202	** UTF-8. It assumes that all code points are the same size. It does not
203	** validate any code points. It makes no attempt to detect if any [invalid]
204	** switches between UTF-8 and other encodings occur.
205	**
206	** The only code points that this function cares about are the NUL character,
207	** carriage-return, and line-feed.
208	**
209	** If pbLongLine is not NULL and the blob is detected as being binary only because
210	** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
211	** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
212	** to is set to 1. Otherwise, it is left as is.
213	**
214	********************************** WARNING ********************************
215	*/
216	int looks_like_utf8(const Blob pContent, int pbLongLine, int *pbCrlf){
217	const char *z = blob_buffer(pContent);
218	unsigned int n = blob_size(pContent);
219	int j, c;
220	int crlf = 0;
221	int longline = 0;
222
223	/* Check individual lines.
224	*/
225	if( n==0 ) return 1; /* Empty file -> text */

226	c = *z;
227	if( c==0 ) return 0; /* Zero byte in a file -> binary */
228	j = (c!='\n');
229	while( --n>0 ){
230	c = *++z; ++j;
231	if( c==0 ) return 0; /* Zero byte in a file -> binary */
232	if( c=='\n' ){
233	int c2 = z[-1];
234	if( c2=='\r' ){
235	crlf = 1; /* Contains CR/NL, continue */
236	}
237	if( j>LENGTH_MASK ){
238	longline = 1; /* Contains long line, continue */

239	}
240	j = 0;
241	}
242	}
243	if( longline \|\| (j>LENGTH_MASK) ){
244	if( pbLongLine ) *pbLongLine = 1;
245	return 0; /* Very long line -> binary */
246	}
247	if( pbCrlf && crlf) *pbCrlf = 1;
248	return 1; /* No problems seen -> not binary */
249	}
250
251	/*
252	** Define the type needed to represent a Unicode (UTF-16) character.
253	*/
	@@ -279,45 +279,45 @@
279	/*
280	** This function attempts to scan each logical line within the blob to
281	** determine the type of content it appears to contain. Possible return
282	** values are:
283	**
284	** (1) -- The content appears to consist entirely of text;
285	** however, the encoding may not be UTF-16.

286	**
287	** (0) -- The content appears to be binary because it contains embedded
288	** NUL characters or an extremely long line. Since this function
289	** does not understand UTF-8, it may falsely consider UTF-8 text
290	** to be binary.
291	**




292	********************************** WARNING ********************************
293	**
294	** This function does not validate that the blob content is properly formed
295	** UTF-16. It assumes that all code points are the same size. It does not
296	** validate any code points. It makes no attempt to detect if any [invalid]
297	** switches between the UTF-16be and UTF-16le encodings occur.
298	**
299	** The only code points that this function cares about are the NUL character,
300	** carriage-return, and line-feed.
301	**
302	** If pbLongLine is not NULL and the blob is detected as being binary only because
303	** of long lines, the integer pointed to is set to 1. Otherwise, it is left as is.
304	** If pbCrlf is not NULL and the blob contains crlf, the integer pointed
305	** to is set to 1. Otherwise, it is left as is.
306	**
307	********************************** WARNING ********************************
308	*/
309	int looks_like_utf16(const Blob pContent, int pbLongLine, int *pbCrlf){
310	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
311	unsigned int n = blob_size(pContent);
312	int j, c;
313	int crlf = 0;
314	int longline = 0;
315
316	/* Check individual lines.
317	*/
318	if( n==0 ) return 1; /* Empty file -> text */

319	if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
320	c = *z;
321	if( c==0 ) return 0; /* NUL character in a file -> binary */
322	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
323	while( (n-=2)>0 ){
	@@ -324,24 +324,24 @@
324	c = *++z; ++j;
325	if( c==0 ) return 0; /* NUL character in a file -> binary */
326	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
327	int c2 = z[-1];
328	if( c2==UTF16BE_CR \|\| c2==UTF16LE_CR ){
329	crlf = 1; /* Contains CR/NL, continue */
330	}
331	if( j>UTF16_LENGTH_MASK ){
332	longline = 1; /* Contains long line, continue */

333	}
334	j = 0;
335	}
336	}
337	if( longline \|\| j>UTF16_LENGTH_MASK ){
338	if( pbLongLine ) *pbLongLine = 1;
339	return 0; /* Very long line -> binary */
340	}
341	if( pbCrlf ) *pbCrlf = crlf;
342	return 1; /* No problems seen -> not binary */
343	}
344
345	/*
346	** This function returns an array of bytes representing the byte-order-mark
347	** for UTF-8.
348

Fossil SCM

Keyboard Shortcuts