Fossil SCM

Style and clarity revisions to the looks_like_utf*() functions. Correct off-by-one fix for the looks_like_utf16() function. Add -utf8 and -utf16 options to the 'test-looks-like-utf' command.

mistachkin 2013-05-13 22:52 trunk

Commit 4ffaf2ee08a4bc54cd1f1ff2c5dd48aef2d7b491

Parent bb4776e2e0fdfb6…

1 file changed +31 -22

~ src/diff.c

M src/diff.c

+31 -22

		--- src/diff.c
		+++ src/diff.c
		@@ -243,25 +243,26 @@
243	243	unsigned int n = blob_size(pContent);
244	244	int j, c, flags = LOOK_NONE; /* Assume UTF-8 text, prove otherwise */
245	245
246	246	if( n==0 ) return flags; /* Empty file -> text */
247	247	c = *z;
248		- j = (c!='\n');
249		- if( !j ){
250		- flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
251		- }else if( c==0 ){
	248	+ if( c==0 ){
252	249	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
253	250	}else if( c=='\r' ){
254	251	flags \|= LOOK_CR;
255	252	if( n<=1 \|\| z[1]!='\n' ){
256	253	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
257	254	}
258	255	}
	256	+ j = (c!='\n');
	257	+ if( !j ) flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
259	258	while( !(flags&stopFlags) && --n>0 ){
260	259	int c2 = c;
261	260	c = *++z; ++j;
262		- if( c=='\n' ){
	261	+ if( c==0 ){
	262	+ flags \|= LOOK_NUL; /* NUL character in a file -> binary */
	263	+ }else if( c=='\n' ){
263	264	flags \|= LOOK_LF;
264	265	if( c2=='\r' ){
265	266	flags \|= (LOOK_CR \| LOOK_CRLF); /* Found LF preceded by CR */
266	267	}else{
267	268	flags \|= LOOK_LONE_LF;
		@@ -268,21 +269,19 @@
268	269	}
269	270	if( j>LENGTH_MASK ){
270	271	flags \|= LOOK_LONG; /* Very long line -> binary */
271	272	}
272	273	j = 0;
273		- }else if( c==0 ){
274		- flags \|= LOOK_NUL; /* NUL character in a file -> binary */
275	274	}else if( c=='\r' ){
276	275	flags \|= LOOK_CR;
277	276	if( n<=1 \|\| z[1]!='\n' ){
278	277	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
279	278	}
280	279	}
281	280	}
282	281	if( n ){
283		- flags \|= LOOK_SHORT; /* Not the whole blob is examined */
	282	+ flags \|= LOOK_SHORT; /* The whole blob was not examined */
284	283	}
285	284	if( j>LENGTH_MASK ){
286	285	flags \|= LOOK_LONG; /* Very long line -> binary */
287	286	}
288	287	return flags;
		@@ -358,31 +357,33 @@
358	357	}
359	358	c = *z;
360	359	if( bReverse ){
361	360	c = UTF16_SWAP(c);
362	361	}
363		- j = (c!='\n');
364		- if( !j ){
365		- flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
366		- }else if( c==0 ){
	362	+ if( c==0 ){
367	363	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
368	364	}else if( c=='\r' ){
369	365	flags \|= LOOK_CR;
370		- if( n<2*sizeof(WCHAR_T) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
	366	+ if( n<=sizeof(WCHAR_T) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
371	367	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
372	368	}
373	369	}
	370	+ j = (c!='\n');
	371	+ if( !j ) flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
374	372	while( 1 ){
375	373	int c2 = c;
	374	+ if( flags&stopFlags ) break;
376	375	n -= sizeof(WCHAR_T);
377		- if( (flags&stopFlags) \|\| n<sizeof(WCHAR_T) ) break;
	376	+ if( n<sizeof(WCHAR_T) ) break;
378	377	c = *++z;
379	378	if( bReverse ){
380	379	c = UTF16_SWAP(c);
381	380	}
382	381	++j;
383		- if( c=='\n' ){
	382	+ if( c==0 ){
	383	+ flags \|= LOOK_NUL; /* NUL character in a file -> binary */
	384	+ }else if( c=='\n' ){
384	385	flags \|= LOOK_LF;
385	386	if( c2=='\r' ){
386	387	flags \|= (LOOK_CR \| LOOK_CRLF); /* Found LF preceded by CR */
387	388	}else{
388	389	flags \|= LOOK_LONE_LF;
		@@ -389,21 +390,19 @@
389	390	}
390	391	if( j>UTF16_LENGTH_MASK ){
391	392	flags \|= LOOK_LONG; /* Very long line -> binary */
392	393	}
393	394	j = 0;
394		- }else if( c==0 ){
395		- flags \|= LOOK_NUL; /* NUL character in a file -> binary */
396	395	}else if( c=='\r' ){
397	396	flags \|= LOOK_CR;
398		- if( n<2*sizeof(WCHAR_T) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
	397	+ if( n<(2*sizeof(WCHAR_T)) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
399	398	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
400	399	}
401	400	}
402	401	}
403	402	if( n ){
404		- flags \|= LOOK_SHORT; /* Not the whole blob is examined */
	403	+ flags \|= LOOK_SHORT; /* The whole blob was not examined */
405	404	}
406	405	if( j>UTF16_LENGTH_MASK ){
407	406	flags \|= LOOK_LONG; /* Very long line -> binary */
408	407	}
409	408	return flags;
		@@ -440,11 +439,11 @@
440	439	** byte-order-mark (BOM), either in the endianness of the machine
441	440	** or in reversed byte order. The UTF-32 BOM is ruled out by checking
442	441	** if the UTF-16 BOM is not immediately followed by (utf16) 0.
443	442	** pnByte is only set when the function returns 1.
444	443	**
445		-** pbReverse is always set, even when no BOM is found. Without BOM,
	444	+** pbReverse is always set, even when no BOM is found. Without a BOM,
446	445	** it is set to 1 on little-endian and 0 on big-endian platforms. See
447	446	** clause D98 of conformance (section 3.10) of the Unicode standard.
448	447	*/
449	448	int starts_with_utf16_bom(
450	449	const Blob pContent, / IN: Blob content to perform BOM detection on. */
		@@ -452,19 +451,19 @@
452	451	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
453	452	){
454	453	const unsigned short z = (unsigned short )blob_buffer(pContent);
455	454	int bomSize = sizeof(unsigned short);
456	455	int size = blob_size(pContent);
457		- static const int one = 1;
458	456
459	457	if( size<bomSize ) goto noBom; /* No: cannot read BOM. */
460	458	if( size>=(2bomSize) && z[1]==0 ) goto noBom; / No: possible UTF-32. */
461	459	if( z[0]==0xfeff ){
462	460	if( pbReverse ) *pbReverse = 0;
463	461	}else if( z[0]==0xfffe ){
464	462	if( pbReverse ) *pbReverse = 1;
465	463	}else{
	464	+ static const int one = 1;
466	465	noBom:
467	466	if( pbReverse ) pbReverse = (char *) &one;
468	467	return 0; /* No: UTF-16 byte-order-mark not found. */
469	468	}
470	469	if( pnByte ) *pnByte = bomSize;
		@@ -2586,10 +2585,14 @@
2586	2585	/*
2587	2586	** COMMAND: test-looks-like-utf
2588	2587	**
2589	2588	** Usage: %fossil test-looks-like-utf FILENAME
2590	2589	**
	2590	+** Options:
	2591	+** --utf8 Ignoring BOM and file size, force UTF-8 checking
	2592	+** --utf16 Ignoring BOM and file size, force UTF-16 checking
	2593	+**
2591	2594	** FILENAME is the name of a file to check for textual content in the UTF-8
2592	2595	** and/or UTF-16 encodings.
2593	2596	*/
2594	2597	void looks_like_utf_test_cmd(void){
2595	2598	Blob blob; /* the contents of the specified file */
		@@ -2597,15 +2600,21 @@
2597	2600	int fUtf16; /* return value of starts_with_utf16_bom() */
2598	2601	int fUnicode; /* return value of could_be_utf16() */
2599	2602	int lookFlags; /* output flags from looks_like_utf8/utf16() */
2600	2603	int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
2601	2604	int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */
	2605	+ int fForceUtf8 = find_option("utf8",0,0)!=0;
	2606	+ int fForceUtf16 = find_option("utf16",0,0)!=0;
2602	2607	if( g.argc!=3 ) usage("FILENAME");
2603	2608	blob_read_from_file(&blob, g.argv[2]);
2604	2609	fUtf8 = starts_with_utf8_bom(&blob, 0);
2605	2610	fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
2606		- fUnicode = could_be_utf16(&blob, &bRevUnicode);
	2611	+ if( fForceUtf8 ){
	2612	+ fUnicode = 0;
	2613	+ }else{
	2614	+ fUnicode = fForceUtf16 \|\| could_be_utf16(&blob, &bRevUnicode);
	2615	+ }
2607	2616	lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) :
2608	2617	looks_like_utf8(&blob, 0);
2609	2618	fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2610	2619	fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2611	2620	fossil_print("Starts with UTF-16 BOM: %s\n",
2612	2621

	--- src/diff.c
	+++ src/diff.c
	@@ -243,25 +243,26 @@
243	unsigned int n = blob_size(pContent);
244	int j, c, flags = LOOK_NONE; /* Assume UTF-8 text, prove otherwise */
245
246	if( n==0 ) return flags; /* Empty file -> text */
247	c = *z;
248	j = (c!='\n');
249	if( !j ){
250	flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
251	}else if( c==0 ){
252	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
253	}else if( c=='\r' ){
254	flags \|= LOOK_CR;
255	if( n<=1 \|\| z[1]!='\n' ){
256	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
257	}
258	}


259	while( !(flags&stopFlags) && --n>0 ){
260	int c2 = c;
261	c = *++z; ++j;
262	if( c=='\n' ){


263	flags \|= LOOK_LF;
264	if( c2=='\r' ){
265	flags \|= (LOOK_CR \| LOOK_CRLF); /* Found LF preceded by CR */
266	}else{
267	flags \|= LOOK_LONE_LF;
	@@ -268,21 +269,19 @@
268	}
269	if( j>LENGTH_MASK ){
270	flags \|= LOOK_LONG; /* Very long line -> binary */
271	}
272	j = 0;
273	}else if( c==0 ){
274	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
275	}else if( c=='\r' ){
276	flags \|= LOOK_CR;
277	if( n<=1 \|\| z[1]!='\n' ){
278	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
279	}
280	}
281	}
282	if( n ){
283	flags \|= LOOK_SHORT; /* Not the whole blob is examined */
284	}
285	if( j>LENGTH_MASK ){
286	flags \|= LOOK_LONG; /* Very long line -> binary */
287	}
288	return flags;
	@@ -358,31 +357,33 @@
358	}
359	c = *z;
360	if( bReverse ){
361	c = UTF16_SWAP(c);
362	}
363	j = (c!='\n');
364	if( !j ){
365	flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
366	}else if( c==0 ){
367	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
368	}else if( c=='\r' ){
369	flags \|= LOOK_CR;
370	if( n<2*sizeof(WCHAR_T) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
371	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
372	}
373	}


374	while( 1 ){
375	int c2 = c;

376	n -= sizeof(WCHAR_T);
377	if( (flags&stopFlags) \|\| n<sizeof(WCHAR_T) ) break;
378	c = *++z;
379	if( bReverse ){
380	c = UTF16_SWAP(c);
381	}
382	++j;
383	if( c=='\n' ){


384	flags \|= LOOK_LF;
385	if( c2=='\r' ){
386	flags \|= (LOOK_CR \| LOOK_CRLF); /* Found LF preceded by CR */
387	}else{
388	flags \|= LOOK_LONE_LF;
	@@ -389,21 +390,19 @@
389	}
390	if( j>UTF16_LENGTH_MASK ){
391	flags \|= LOOK_LONG; /* Very long line -> binary */
392	}
393	j = 0;
394	}else if( c==0 ){
395	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
396	}else if( c=='\r' ){
397	flags \|= LOOK_CR;
398	if( n<2*sizeof(WCHAR_T) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
399	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
400	}
401	}
402	}
403	if( n ){
404	flags \|= LOOK_SHORT; /* Not the whole blob is examined */
405	}
406	if( j>UTF16_LENGTH_MASK ){
407	flags \|= LOOK_LONG; /* Very long line -> binary */
408	}
409	return flags;
	@@ -440,11 +439,11 @@
440	** byte-order-mark (BOM), either in the endianness of the machine
441	** or in reversed byte order. The UTF-32 BOM is ruled out by checking
442	** if the UTF-16 BOM is not immediately followed by (utf16) 0.
443	** pnByte is only set when the function returns 1.
444	**
445	** pbReverse is always set, even when no BOM is found. Without BOM,
446	** it is set to 1 on little-endian and 0 on big-endian platforms. See
447	** clause D98 of conformance (section 3.10) of the Unicode standard.
448	*/
449	int starts_with_utf16_bom(
450	const Blob pContent, / IN: Blob content to perform BOM detection on. */
	@@ -452,19 +451,19 @@
452	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
453	){
454	const unsigned short z = (unsigned short )blob_buffer(pContent);
455	int bomSize = sizeof(unsigned short);
456	int size = blob_size(pContent);
457	static const int one = 1;
458
459	if( size<bomSize ) goto noBom; /* No: cannot read BOM. */
460	if( size>=(2bomSize) && z[1]==0 ) goto noBom; / No: possible UTF-32. */
461	if( z[0]==0xfeff ){
462	if( pbReverse ) *pbReverse = 0;
463	}else if( z[0]==0xfffe ){
464	if( pbReverse ) *pbReverse = 1;
465	}else{

466	noBom:
467	if( pbReverse ) pbReverse = (char *) &one;
468	return 0; /* No: UTF-16 byte-order-mark not found. */
469	}
470	if( pnByte ) *pnByte = bomSize;
	@@ -2586,10 +2585,14 @@
2586	/*
2587	** COMMAND: test-looks-like-utf
2588	**
2589	** Usage: %fossil test-looks-like-utf FILENAME
2590	**




2591	** FILENAME is the name of a file to check for textual content in the UTF-8
2592	** and/or UTF-16 encodings.
2593	*/
2594	void looks_like_utf_test_cmd(void){
2595	Blob blob; /* the contents of the specified file */
	@@ -2597,15 +2600,21 @@
2597	int fUtf16; /* return value of starts_with_utf16_bom() */
2598	int fUnicode; /* return value of could_be_utf16() */
2599	int lookFlags; /* output flags from looks_like_utf8/utf16() */
2600	int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
2601	int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */


2602	if( g.argc!=3 ) usage("FILENAME");
2603	blob_read_from_file(&blob, g.argv[2]);
2604	fUtf8 = starts_with_utf8_bom(&blob, 0);
2605	fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
2606	fUnicode = could_be_utf16(&blob, &bRevUnicode);




2607	lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) :
2608	looks_like_utf8(&blob, 0);
2609	fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2610	fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2611	fossil_print("Starts with UTF-16 BOM: %s\n",
2612

	--- src/diff.c
	+++ src/diff.c
	@@ -243,25 +243,26 @@
243	unsigned int n = blob_size(pContent);
244	int j, c, flags = LOOK_NONE; /* Assume UTF-8 text, prove otherwise */
245
246	if( n==0 ) return flags; /* Empty file -> text */
247	c = *z;
248	if( c==0 ){



249	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
250	}else if( c=='\r' ){
251	flags \|= LOOK_CR;
252	if( n<=1 \|\| z[1]!='\n' ){
253	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
254	}
255	}
256	j = (c!='\n');
257	if( !j ) flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
258	while( !(flags&stopFlags) && --n>0 ){
259	int c2 = c;
260	c = *++z; ++j;
261	if( c==0 ){
262	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
263	}else if( c=='\n' ){
264	flags \|= LOOK_LF;
265	if( c2=='\r' ){
266	flags \|= (LOOK_CR \| LOOK_CRLF); /* Found LF preceded by CR */
267	}else{
268	flags \|= LOOK_LONE_LF;
	@@ -268,21 +269,19 @@
269	}
270	if( j>LENGTH_MASK ){
271	flags \|= LOOK_LONG; /* Very long line -> binary */
272	}
273	j = 0;


274	}else if( c=='\r' ){
275	flags \|= LOOK_CR;
276	if( n<=1 \|\| z[1]!='\n' ){
277	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
278	}
279	}
280	}
281	if( n ){
282	flags \|= LOOK_SHORT; /* The whole blob was not examined */
283	}
284	if( j>LENGTH_MASK ){
285	flags \|= LOOK_LONG; /* Very long line -> binary */
286	}
287	return flags;
	@@ -358,31 +357,33 @@
357	}
358	c = *z;
359	if( bReverse ){
360	c = UTF16_SWAP(c);
361	}
362	if( c==0 ){



363	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
364	}else if( c=='\r' ){
365	flags \|= LOOK_CR;
366	if( n<=sizeof(WCHAR_T) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
367	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
368	}
369	}
370	j = (c!='\n');
371	if( !j ) flags \|= (LOOK_LF \| LOOK_LONE_LF); /* Found LF as first char */
372	while( 1 ){
373	int c2 = c;
374	if( flags&stopFlags ) break;
375	n -= sizeof(WCHAR_T);
376	if( n<sizeof(WCHAR_T) ) break;
377	c = *++z;
378	if( bReverse ){
379	c = UTF16_SWAP(c);
380	}
381	++j;
382	if( c==0 ){
383	flags \|= LOOK_NUL; /* NUL character in a file -> binary */
384	}else if( c=='\n' ){
385	flags \|= LOOK_LF;
386	if( c2=='\r' ){
387	flags \|= (LOOK_CR \| LOOK_CRLF); /* Found LF preceded by CR */
388	}else{
389	flags \|= LOOK_LONE_LF;
	@@ -389,21 +390,19 @@
390	}
391	if( j>UTF16_LENGTH_MASK ){
392	flags \|= LOOK_LONG; /* Very long line -> binary */
393	}
394	j = 0;


395	}else if( c=='\r' ){
396	flags \|= LOOK_CR;
397	if( n<(2*sizeof(WCHAR_T)) \|\| UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
398	flags \|= LOOK_LONE_CR; /* More chars, next char is not LF */
399	}
400	}
401	}
402	if( n ){
403	flags \|= LOOK_SHORT; /* The whole blob was not examined */
404	}
405	if( j>UTF16_LENGTH_MASK ){
406	flags \|= LOOK_LONG; /* Very long line -> binary */
407	}
408	return flags;
	@@ -440,11 +439,11 @@
439	** byte-order-mark (BOM), either in the endianness of the machine
440	** or in reversed byte order. The UTF-32 BOM is ruled out by checking
441	** if the UTF-16 BOM is not immediately followed by (utf16) 0.
442	** pnByte is only set when the function returns 1.
443	**
444	** pbReverse is always set, even when no BOM is found. Without a BOM,
445	** it is set to 1 on little-endian and 0 on big-endian platforms. See
446	** clause D98 of conformance (section 3.10) of the Unicode standard.
447	*/
448	int starts_with_utf16_bom(
449	const Blob pContent, / IN: Blob content to perform BOM detection on. */
	@@ -452,19 +451,19 @@
451	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
452	){
453	const unsigned short z = (unsigned short )blob_buffer(pContent);
454	int bomSize = sizeof(unsigned short);
455	int size = blob_size(pContent);

456
457	if( size<bomSize ) goto noBom; /* No: cannot read BOM. */
458	if( size>=(2bomSize) && z[1]==0 ) goto noBom; / No: possible UTF-32. */
459	if( z[0]==0xfeff ){
460	if( pbReverse ) *pbReverse = 0;
461	}else if( z[0]==0xfffe ){
462	if( pbReverse ) *pbReverse = 1;
463	}else{
464	static const int one = 1;
465	noBom:
466	if( pbReverse ) pbReverse = (char *) &one;
467	return 0; /* No: UTF-16 byte-order-mark not found. */
468	}
469	if( pnByte ) *pnByte = bomSize;
	@@ -2586,10 +2585,14 @@
2585	/*
2586	** COMMAND: test-looks-like-utf
2587	**
2588	** Usage: %fossil test-looks-like-utf FILENAME
2589	**
2590	** Options:
2591	** --utf8 Ignoring BOM and file size, force UTF-8 checking
2592	** --utf16 Ignoring BOM and file size, force UTF-16 checking
2593	**
2594	** FILENAME is the name of a file to check for textual content in the UTF-8
2595	** and/or UTF-16 encodings.
2596	*/
2597	void looks_like_utf_test_cmd(void){
2598	Blob blob; /* the contents of the specified file */
	@@ -2597,15 +2600,21 @@
2600	int fUtf16; /* return value of starts_with_utf16_bom() */
2601	int fUnicode; /* return value of could_be_utf16() */
2602	int lookFlags; /* output flags from looks_like_utf8/utf16() */
2603	int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
2604	int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */
2605	int fForceUtf8 = find_option("utf8",0,0)!=0;
2606	int fForceUtf16 = find_option("utf16",0,0)!=0;
2607	if( g.argc!=3 ) usage("FILENAME");
2608	blob_read_from_file(&blob, g.argv[2]);
2609	fUtf8 = starts_with_utf8_bom(&blob, 0);
2610	fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
2611	if( fForceUtf8 ){
2612	fUnicode = 0;
2613	}else{
2614	fUnicode = fForceUtf16 \|\| could_be_utf16(&blob, &bRevUnicode);
2615	}
2616	lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) :
2617	looks_like_utf8(&blob, 0);
2618	fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2619	fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2620	fossil_print("Starts with UTF-16 BOM: %s\n",
2621

Fossil SCM

Keyboard Shortcuts