3d988df6706… — Fossil SCM

M src/blob.c

+4 -3

		--- src/blob.c
		+++ src/blob.c
		@@ -1096,24 +1096,25 @@
1096	1096	** to be UTF-8 already, so no conversion is done.
1097	1097	*/
1098	1098	void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099	1099	char *zUtf8;
1100	1100	int bomSize = 0;
	1101	+ int bomReverse = 0;
1101	1102	if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102	1103	struct Blob temp;
1103	1104	zUtf8 = blob_str(pBlob) + bomSize;
1104	1105	blob_zero(&temp);
1105	1106	blob_append(&temp, zUtf8, -1);
1106	1107	blob_swap(pBlob, &temp);
1107	1108	blob_reset(&temp);
1108	1109	#ifdef _WIN32
1109		- }else if( starts_with_utf16_bom(pBlob, &bomSize) ){
	1110	+ }else if( starts_with_utf16_bom(pBlob, &bomSize, &bomReverse) ){
1110	1111	zUtf8 = blob_buffer(pBlob);
1111		- if (((unsigned short )zUtf8) == 0xfffe) {
	1112	+ if( bomReverse ){
1112	1113	/* Found BOM, but with reversed bytes */
1113	1114	unsigned int i = blob_size(pBlob);
1114		- while( i > 0 ){
	1115	+ while( i>0 ){
1115	1116	/* swap bytes of unicode representation */
1116	1117	char zTemp = zUtf8[--i];
1117	1118	zUtf8[i] = zUtf8[i-1];
1118	1119	zUtf8[--i] = zTemp;
1119	1120	}
1120	1121

	--- src/blob.c
	+++ src/blob.c
	@@ -1096,24 +1096,25 @@
1096	** to be UTF-8 already, so no conversion is done.
1097	*/
1098	void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099	char *zUtf8;
1100	int bomSize = 0;

1101	if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102	struct Blob temp;
1103	zUtf8 = blob_str(pBlob) + bomSize;
1104	blob_zero(&temp);
1105	blob_append(&temp, zUtf8, -1);
1106	blob_swap(pBlob, &temp);
1107	blob_reset(&temp);
1108	#ifdef _WIN32
1109	}else if( starts_with_utf16_bom(pBlob, &bomSize) ){
1110	zUtf8 = blob_buffer(pBlob);
1111	if (((unsigned short )zUtf8) == 0xfffe) {
1112	/* Found BOM, but with reversed bytes */
1113	unsigned int i = blob_size(pBlob);
1114	while( i > 0 ){
1115	/* swap bytes of unicode representation */
1116	char zTemp = zUtf8[--i];
1117	zUtf8[i] = zUtf8[i-1];
1118	zUtf8[--i] = zTemp;
1119	}
1120

	--- src/blob.c
	+++ src/blob.c
	@@ -1096,24 +1096,25 @@
1096	** to be UTF-8 already, so no conversion is done.
1097	*/
1098	void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099	char *zUtf8;
1100	int bomSize = 0;
1101	int bomReverse = 0;
1102	if( starts_with_utf8_bom(pBlob, &bomSize) ){
1103	struct Blob temp;
1104	zUtf8 = blob_str(pBlob) + bomSize;
1105	blob_zero(&temp);
1106	blob_append(&temp, zUtf8, -1);
1107	blob_swap(pBlob, &temp);
1108	blob_reset(&temp);
1109	#ifdef _WIN32
1110	}else if( starts_with_utf16_bom(pBlob, &bomSize, &bomReverse) ){
1111	zUtf8 = blob_buffer(pBlob);
1112	if( bomReverse ){
1113	/* Found BOM, but with reversed bytes */
1114	unsigned int i = blob_size(pBlob);
1115	while( i>0 ){
1116	/* swap bytes of unicode representation */
1117	char zTemp = zUtf8[--i];
1118	zUtf8[i] = zUtf8[i-1];
1119	zUtf8[--i] = zTemp;
1120	}
1121

M src/checkin.c

+1 -1

		--- src/checkin.c
		+++ src/checkin.c
		@@ -906,11 +906,11 @@
906	906	char zMsg; / Warning message */
907	907	Blob fname; /* Relative pathname of the file */
908	908	static int allOk = 0; /* Set to true to disable this routine */
909	909
910	910	if( allOk ) return 0;
911		- fUnicode = starts_with_utf16_bom(p, 0);
	911	+ fUnicode = starts_with_utf16_bom(p, 0, 0);
912	912	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
913	913	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
914	914	const char *zWarning;
915	915	const char *zDisable;
916	916	const char *zConvert = "c=convert/";
917	917

	--- src/checkin.c
	+++ src/checkin.c
	@@ -906,11 +906,11 @@
906	char zMsg; / Warning message */
907	Blob fname; /* Relative pathname of the file */
908	static int allOk = 0; /* Set to true to disable this routine */
909
910	if( allOk ) return 0;
911	fUnicode = starts_with_utf16_bom(p, 0);
912	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
913	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
914	const char *zWarning;
915	const char *zDisable;
916	const char *zConvert = "c=convert/";
917

	--- src/checkin.c
	+++ src/checkin.c
	@@ -906,11 +906,11 @@
906	char zMsg; / Warning message */
907	Blob fname; /* Relative pathname of the file */
908	static int allOk = 0; /* Set to true to disable this routine */
909
910	if( allOk ) return 0;
911	fUnicode = starts_with_utf16_bom(p, 0, 0);
912	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
913	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
914	const char *zWarning;
915	const char *zDisable;
916	const char *zConvert = "c=convert/";
917

M src/checkin.c

+1 -1

		--- src/checkin.c
		+++ src/checkin.c
		@@ -906,11 +906,11 @@
906	906	char zMsg; / Warning message */
907	907	Blob fname; /* Relative pathname of the file */
908	908	static int allOk = 0; /* Set to true to disable this routine */
909	909
910	910	if( allOk ) return 0;
911		- fUnicode = starts_with_utf16_bom(p, 0);
	911	+ fUnicode = starts_with_utf16_bom(p, 0, 0);
912	912	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
913	913	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
914	914	const char *zWarning;
915	915	const char *zDisable;
916	916	const char *zConvert = "c=convert/";
917	917

	--- src/checkin.c
	+++ src/checkin.c
	@@ -906,11 +906,11 @@
906	char zMsg; / Warning message */
907	Blob fname; /* Relative pathname of the file */
908	static int allOk = 0; /* Set to true to disable this routine */
909
910	if( allOk ) return 0;
911	fUnicode = starts_with_utf16_bom(p, 0);
912	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
913	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
914	const char *zWarning;
915	const char *zDisable;
916	const char *zConvert = "c=convert/";
917

	--- src/checkin.c
	+++ src/checkin.c
	@@ -906,11 +906,11 @@
906	char zMsg; / Warning message */
907	Blob fname; /* Relative pathname of the file */
908	static int allOk = 0; /* Set to true to disable this routine */
909
910	if( allOk ) return 0;
911	fUnicode = starts_with_utf16_bom(p, 0, 0);
912	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
913	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
914	const char *zWarning;
915	const char *zDisable;
916	const char *zConvert = "c=convert/";
917

M src/diff.c

+26 -12

		--- src/diff.c
		+++ src/diff.c
		@@ -354,24 +354,38 @@
354	354	if( blob_size(pContent)<bomSize ) return 0;
355	355	return memcmp(z, bom, bomSize)==0;
356	356	}
357	357
358	358	/*
359		-** This function returns non-zero if the blob starts with a UTF-16le or
360		-** UTF-16be byte-order-mark (BOM).
	359	+** This function returns non-zero if the blob starts with a UTF-16
	360	+** byte-order-mark (BOM), either in the endianness of the machine
	361	+** or in reversed byte order.
361	362	*/
362		-int starts_with_utf16_bom(const Blob pContent, int pnByte){
	363	+int starts_with_utf16_bom(
	364	+ const Blob pContent, / IN: Blob content to perform BOM detection on. */
	365	+ int pnByte, / OUT: The number of bytes used for the BOM. */
	366	+ int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
	367	+){
363	368	const char *z = blob_buffer(pContent);
364		- int c1;
365		-
366		- if( pnByte ) *pnByte = 2;
367		- if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
368		- c1 = ((unsigned short *)z)[0];
369		- if( (c1==0xfeff) \|\| (c1==0xfffe) ){
370		- if( blob_size(pContent) < 4 ) return 1;
371		- c1 = ((unsigned short *)z)[1];
372		- if( c1 != 0 ) return 1;
	369	+ int bomSize = 2;
	370	+ static const unsigned short bom = 0xfeff;
	371	+ static const unsigned short bom_reversed = 0xfffe;
	372	+ static const unsigned short null = 0;
	373	+ int size;
	374	+
	375	+ if( pnByte ) *pnByte = bomSize;
	376	+ if( pbReverse ) pbReverse = -1; / Unknown. */
	377	+ size = blob_size(pContent);
	378	+ if( (size<bomSize) \|\| (size%2) ) return 0;
	379	+ if( memcmp(z, &bom_reversed, bomSize)==0 ){
	380	+ if( pbReverse ) *pbReverse = 1;
	381	+ if( size<(2*bomSize) ) return 1;
	382	+ if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
	383	+ }else if( memcmp(z, &bom, bomSize)==0 ){
	384	+ if( pbReverse ) *pbReverse = 0;
	385	+ if( size<(2*bomSize) ) return 1;
	386	+ if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
373	387	}
374	388	return 0;
375	389	}
376	390
377	391	/*
378	392

	--- src/diff.c
	+++ src/diff.c
	@@ -354,24 +354,38 @@
354	if( blob_size(pContent)<bomSize ) return 0;
355	return memcmp(z, bom, bomSize)==0;
356	}
357
358	/*
359	** This function returns non-zero if the blob starts with a UTF-16le or
360	** UTF-16be byte-order-mark (BOM).

361	*/
362	int starts_with_utf16_bom(const Blob pContent, int pnByte){




363	const char *z = blob_buffer(pContent);
364	int c1;
365
366	if( pnByte ) *pnByte = 2;
367	if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
368	c1 = ((unsigned short *)z)[0];
369	if( (c1==0xfeff) \|\| (c1==0xfffe) ){
370	if( blob_size(pContent) < 4 ) return 1;
371	c1 = ((unsigned short *)z)[1];
372	if( c1 != 0 ) return 1;









373	}
374	return 0;
375	}
376
377	/*
378

	--- src/diff.c
	+++ src/diff.c
	@@ -354,24 +354,38 @@
354	if( blob_size(pContent)<bomSize ) return 0;
355	return memcmp(z, bom, bomSize)==0;
356	}
357
358	/*
359	** This function returns non-zero if the blob starts with a UTF-16
360	** byte-order-mark (BOM), either in the endianness of the machine
361	** or in reversed byte order.
362	*/
363	int starts_with_utf16_bom(
364	const Blob pContent, / IN: Blob content to perform BOM detection on. */
365	int pnByte, / OUT: The number of bytes used for the BOM. */
366	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
367	){
368	const char *z = blob_buffer(pContent);
369	int bomSize = 2;
370	static const unsigned short bom = 0xfeff;
371	static const unsigned short bom_reversed = 0xfffe;
372	static const unsigned short null = 0;
373	int size;
374
375	if( pnByte ) *pnByte = bomSize;
376	if( pbReverse ) pbReverse = -1; / Unknown. */
377	size = blob_size(pContent);
378	if( (size<bomSize) \|\| (size%2) ) return 0;
379	if( memcmp(z, &bom_reversed, bomSize)==0 ){
380	if( pbReverse ) *pbReverse = 1;
381	if( size<(2*bomSize) ) return 1;
382	if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
383	}else if( memcmp(z, &bom, bomSize)==0 ){
384	if( pbReverse ) *pbReverse = 0;
385	if( size<(2*bomSize) ) return 1;
386	if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
387	}
388	return 0;
389	}
390
391	/*
392

M src/diff.c

+26 -12

		--- src/diff.c
		+++ src/diff.c
		@@ -354,24 +354,38 @@
354	354	if( blob_size(pContent)<bomSize ) return 0;
355	355	return memcmp(z, bom, bomSize)==0;
356	356	}
357	357
358	358	/*
359		-** This function returns non-zero if the blob starts with a UTF-16le or
360		-** UTF-16be byte-order-mark (BOM).
	359	+** This function returns non-zero if the blob starts with a UTF-16
	360	+** byte-order-mark (BOM), either in the endianness of the machine
	361	+** or in reversed byte order.
361	362	*/
362		-int starts_with_utf16_bom(const Blob pContent, int pnByte){
	363	+int starts_with_utf16_bom(
	364	+ const Blob pContent, / IN: Blob content to perform BOM detection on. */
	365	+ int pnByte, / OUT: The number of bytes used for the BOM. */
	366	+ int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
	367	+){
363	368	const char *z = blob_buffer(pContent);
364		- int c1;
365		-
366		- if( pnByte ) *pnByte = 2;
367		- if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
368		- c1 = ((unsigned short *)z)[0];
369		- if( (c1==0xfeff) \|\| (c1==0xfffe) ){
370		- if( blob_size(pContent) < 4 ) return 1;
371		- c1 = ((unsigned short *)z)[1];
372		- if( c1 != 0 ) return 1;
	369	+ int bomSize = 2;
	370	+ static const unsigned short bom = 0xfeff;
	371	+ static const unsigned short bom_reversed = 0xfffe;
	372	+ static const unsigned short null = 0;
	373	+ int size;
	374	+
	375	+ if( pnByte ) *pnByte = bomSize;
	376	+ if( pbReverse ) pbReverse = -1; / Unknown. */
	377	+ size = blob_size(pContent);
	378	+ if( (size<bomSize) \|\| (size%2) ) return 0;
	379	+ if( memcmp(z, &bom_reversed, bomSize)==0 ){
	380	+ if( pbReverse ) *pbReverse = 1;
	381	+ if( size<(2*bomSize) ) return 1;
	382	+ if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
	383	+ }else if( memcmp(z, &bom, bomSize)==0 ){
	384	+ if( pbReverse ) *pbReverse = 0;
	385	+ if( size<(2*bomSize) ) return 1;
	386	+ if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
373	387	}
374	388	return 0;
375	389	}
376	390
377	391	/*
378	392

	--- src/diff.c
	+++ src/diff.c
	@@ -354,24 +354,38 @@
354	if( blob_size(pContent)<bomSize ) return 0;
355	return memcmp(z, bom, bomSize)==0;
356	}
357
358	/*
359	** This function returns non-zero if the blob starts with a UTF-16le or
360	** UTF-16be byte-order-mark (BOM).

361	*/
362	int starts_with_utf16_bom(const Blob pContent, int pnByte){




363	const char *z = blob_buffer(pContent);
364	int c1;
365
366	if( pnByte ) *pnByte = 2;
367	if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
368	c1 = ((unsigned short *)z)[0];
369	if( (c1==0xfeff) \|\| (c1==0xfffe) ){
370	if( blob_size(pContent) < 4 ) return 1;
371	c1 = ((unsigned short *)z)[1];
372	if( c1 != 0 ) return 1;









373	}
374	return 0;
375	}
376
377	/*
378

	--- src/diff.c
	+++ src/diff.c
	@@ -354,24 +354,38 @@
354	if( blob_size(pContent)<bomSize ) return 0;
355	return memcmp(z, bom, bomSize)==0;
356	}
357
358	/*
359	** This function returns non-zero if the blob starts with a UTF-16
360	** byte-order-mark (BOM), either in the endianness of the machine
361	** or in reversed byte order.
362	*/
363	int starts_with_utf16_bom(
364	const Blob pContent, / IN: Blob content to perform BOM detection on. */
365	int pnByte, / OUT: The number of bytes used for the BOM. */
366	int pbReverse / OUT: Non-zero for BOM in reverse byte-order. */
367	){
368	const char *z = blob_buffer(pContent);
369	int bomSize = 2;
370	static const unsigned short bom = 0xfeff;
371	static const unsigned short bom_reversed = 0xfffe;
372	static const unsigned short null = 0;
373	int size;
374
375	if( pnByte ) *pnByte = bomSize;
376	if( pbReverse ) pbReverse = -1; / Unknown. */
377	size = blob_size(pContent);
378	if( (size<bomSize) \|\| (size%2) ) return 0;
379	if( memcmp(z, &bom_reversed, bomSize)==0 ){
380	if( pbReverse ) *pbReverse = 1;
381	if( size<(2*bomSize) ) return 1;
382	if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
383	}else if( memcmp(z, &bom, bomSize)==0 ){
384	if( pbReverse ) *pbReverse = 0;
385	if( size<(2*bomSize) ) return 1;
386	if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
387	}
388	return 0;
389	}
390
391	/*
392

Fossil SCM

Keyboard Shortcuts