9b800ee41c3… — Fossil SCM

M src/attach.c

+1 -1

		--- src/attach.c
		+++ src/attach.c
		@@ -520,11 +520,11 @@
520	520	blob_zero(&attach);
521	521	if( zMime==0 \|\| strncmp(zMime,"text/", 5)==0 ){
522	522	const char *z;
523	523	const char *zLn = P("ln");
524	524	content_get(ridSrc, &attach);
525		- blob_strip_bom(&attach, 0);
	525	+ blob_to_utf8_no_bom(&attach, 0);
526	526	z = blob_str(&attach);
527	527	if( zLn ){
528	528	output_text_with_line_numbers(z, zLn);
529	529	}else{
530	530	@ <pre>
531	531

	--- src/attach.c
	+++ src/attach.c
	@@ -520,11 +520,11 @@
520	blob_zero(&attach);
521	if( zMime==0 \|\| strncmp(zMime,"text/", 5)==0 ){
522	const char *z;
523	const char *zLn = P("ln");
524	content_get(ridSrc, &attach);
525	blob_strip_bom(&attach, 0);
526	z = blob_str(&attach);
527	if( zLn ){
528	output_text_with_line_numbers(z, zLn);
529	}else{
530	@ <pre>
531

	--- src/attach.c
	+++ src/attach.c
	@@ -520,11 +520,11 @@
520	blob_zero(&attach);
521	if( zMime==0 \|\| strncmp(zMime,"text/", 5)==0 ){
522	const char *z;
523	const char *zLn = P("ln");
524	content_get(ridSrc, &attach);
525	blob_to_utf8_no_bom(&attach, 0);
526	z = blob_str(&attach);
527	if( zLn ){
528	output_text_with_line_numbers(z, zLn);
529	}else{
530	@ <pre>
531

M src/attach.c

+1 -1

		--- src/attach.c
		+++ src/attach.c
		@@ -520,11 +520,11 @@
520	520	blob_zero(&attach);
521	521	if( zMime==0 \|\| strncmp(zMime,"text/", 5)==0 ){
522	522	const char *z;
523	523	const char *zLn = P("ln");
524	524	content_get(ridSrc, &attach);
525		- blob_strip_bom(&attach, 0);
	525	+ blob_to_utf8_no_bom(&attach, 0);
526	526	z = blob_str(&attach);
527	527	if( zLn ){
528	528	output_text_with_line_numbers(z, zLn);
529	529	}else{
530	530	@ <pre>
531	531

	--- src/attach.c
	+++ src/attach.c
	@@ -520,11 +520,11 @@
520	blob_zero(&attach);
521	if( zMime==0 \|\| strncmp(zMime,"text/", 5)==0 ){
522	const char *z;
523	const char *zLn = P("ln");
524	content_get(ridSrc, &attach);
525	blob_strip_bom(&attach, 0);
526	z = blob_str(&attach);
527	if( zLn ){
528	output_text_with_line_numbers(z, zLn);
529	}else{
530	@ <pre>
531

	--- src/attach.c
	+++ src/attach.c
	@@ -520,11 +520,11 @@
520	blob_zero(&attach);
521	if( zMime==0 \|\| strncmp(zMime,"text/", 5)==0 ){
522	const char *z;
523	const char *zLn = P("ln");
524	content_get(ridSrc, &attach);
525	blob_to_utf8_no_bom(&attach, 0);
526	z = blob_str(&attach);
527	if( zLn ){
528	output_text_with_line_numbers(z, zLn);
529	}else{
530	@ <pre>
531

M src/blob.c

+43 -51

		--- src/blob.c
		+++ src/blob.c
		@@ -1088,61 +1088,53 @@
1088	1088	pLeft = pRight;
1089	1089	*pRight = swap;
1090	1090	}
1091	1091
1092	1092	/*
1093		-** Strip a possible BOM from the blob. On Windows, if there
1094		-** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095		-** to UTF-8 is done.
1096		-** If useMbcs is false and there is no BOM, the input string
1097		-** is assumed to be UTF-8 already, so no conversion is done.
1098		-*/
1099		-void blob_strip_bom(Blob *pBlob, int useMbcs){
1100		- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101		-#ifdef _WIN32
1102		- static const unsigned short ubom = 0xfeff;
1103		- static const unsigned short urbom = 0xfffe;
1104		-#endif /* _WIN32 */
1105		- char *zUtf8;
1106		- if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107		- struct Blob temp;
1108		- zUtf8 = blob_str(pBlob) + 3;
1109		- blob_zero(&temp);
1110		- blob_append(&temp, zUtf8, -1);
1111		- fossil_mbcs_free(zUtf8);
1112		- blob_swap(pBlob, &temp);
1113		- blob_reset(&temp);
1114		-#ifdef _WIN32
1115		- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116		- && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117		- /* Make sure the blob contains two terminating 0-bytes */
1118		- blob_append(pBlob, "", 1);
1119		- zUtf8 = blob_str(pBlob) + 2;
1120		- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121		- blob_zero(pBlob);
1122		- blob_append(pBlob, zUtf8, -1);
1123		- fossil_mbcs_free(zUtf8);
1124		- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125		- && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126		- unsigned int i = blob_size(pBlob);
1127		- zUtf8 = blob_buffer(pBlob);
1128		- while( i > 0 ){
1129		- /* swap bytes of unicode representation */
1130		- char temp = zUtf8[--i];
1131		- zUtf8[i] = zUtf8[i-1];
1132		- zUtf8[--i] = temp;
1133		- }
1134		- /* Make sure the blob contains two terminating 0-bytes */
1135		- blob_append(pBlob, "", 1);
1136		- zUtf8 = blob_str(pBlob) + 2;
1137		- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138		- blob_zero(pBlob);
1139		- blob_append(pBlob, zUtf8, -1);
1140		- fossil_mbcs_free(zUtf8);
1141		- }else if (useMbcs) {
1142		- zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143		- blob_zero(pBlob);
	1093	+** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
	1094	+** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
	1095	+** done. If useMbcs is false and there is no BOM, the input string is assumed
	1096	+** to be UTF-8 already, so no conversion is done.
	1097	+*/
	1098	+void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
	1099	+ char *zUtf8;
	1100	+ int bomSize = 0;
	1101	+ if( starts_with_utf8_bom(pBlob, &bomSize) ){
	1102	+ struct Blob temp;
	1103	+ zUtf8 = blob_str(pBlob) + bomSize;
	1104	+ blob_zero(&temp);
	1105	+ blob_append(&temp, zUtf8, -1);
	1106	+ blob_swap(pBlob, &temp);
	1107	+ blob_reset(&temp);
	1108	+#ifdef _WIN32
	1109	+ }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
	1110	+ /* Make sure the blob contains two terminating 0-bytes */
	1111	+ blob_append(pBlob, "", 1);
	1112	+ zUtf8 = blob_str(pBlob) + bomSize;
	1113	+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
	1114	+ blob_zero(pBlob);
	1115	+ blob_append(pBlob, zUtf8, -1);
	1116	+ fossil_mbcs_free(zUtf8);
	1117	+ }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
	1118	+ unsigned int i = blob_size(pBlob);
	1119	+ zUtf8 = blob_buffer(pBlob);
	1120	+ while( i > 0 ){
	1121	+ /* swap bytes of unicode representation */
	1122	+ char zTemp = zUtf8[--i];
	1123	+ zUtf8[i] = zUtf8[i-1];
	1124	+ zUtf8[--i] = zTemp;
	1125	+ }
	1126	+ /* Make sure the blob contains two terminating 0-bytes */
	1127	+ blob_append(pBlob, "", 1);
	1128	+ zUtf8 = blob_str(pBlob) + bomSize;
	1129	+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
	1130	+ blob_zero(pBlob);
	1131	+ blob_append(pBlob, zUtf8, -1);
	1132	+ fossil_mbcs_free(zUtf8);
	1133	+ }else if( useMbcs ){
	1134	+ zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
	1135	+ blob_reset(pBlob);
1144	1136	blob_append(pBlob, zUtf8, -1);
1145	1137	fossil_mbcs_free(zUtf8);
1146	1138	#endif /* _WIN32 */
1147	1139	}
1148	1140	}
1149	1141

	--- src/blob.c
	+++ src/blob.c
	@@ -1088,61 +1088,53 @@
1088	pLeft = pRight;
1089	*pRight = swap;
1090	}
1091
1092	/*
1093	** Strip a possible BOM from the blob. On Windows, if there
1094	** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095	** to UTF-8 is done.
1096	** If useMbcs is false and there is no BOM, the input string
1097	** is assumed to be UTF-8 already, so no conversion is done.
1098	*/
1099	void blob_strip_bom(Blob *pBlob, int useMbcs){
1100	static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101	#ifdef _WIN32
1102	static const unsigned short ubom = 0xfeff;
1103	static const unsigned short urbom = 0xfffe;
1104	#endif /* _WIN32 */
1105	char *zUtf8;
1106	if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107	struct Blob temp;
1108	zUtf8 = blob_str(pBlob) + 3;
1109	blob_zero(&temp);
1110	blob_append(&temp, zUtf8, -1);
1111	fossil_mbcs_free(zUtf8);
1112	blob_swap(pBlob, &temp);
1113	blob_reset(&temp);
1114	#ifdef _WIN32
1115	}else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116	&& memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117	/* Make sure the blob contains two terminating 0-bytes */
1118	blob_append(pBlob, "", 1);
1119	zUtf8 = blob_str(pBlob) + 2;
1120	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121	blob_zero(pBlob);
1122	blob_append(pBlob, zUtf8, -1);
1123	fossil_mbcs_free(zUtf8);
1124	}else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125	&& memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126	unsigned int i = blob_size(pBlob);
1127	zUtf8 = blob_buffer(pBlob);
1128	while( i > 0 ){
1129	/* swap bytes of unicode representation */
1130	char temp = zUtf8[--i];
1131	zUtf8[i] = zUtf8[i-1];
1132	zUtf8[--i] = temp;
1133	}
1134	/* Make sure the blob contains two terminating 0-bytes */
1135	blob_append(pBlob, "", 1);
1136	zUtf8 = blob_str(pBlob) + 2;
1137	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138	blob_zero(pBlob);
1139	blob_append(pBlob, zUtf8, -1);
1140	fossil_mbcs_free(zUtf8);
1141	}else if (useMbcs) {
1142	zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143	blob_zero(pBlob);
1144	blob_append(pBlob, zUtf8, -1);
1145	fossil_mbcs_free(zUtf8);
1146	#endif /* _WIN32 */
1147	}
1148	}
1149

	--- src/blob.c
	+++ src/blob.c
	@@ -1088,61 +1088,53 @@
1088	pLeft = pRight;
1089	*pRight = swap;
1090	}
1091
1092	/*
1093	** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094	** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095	** done. If useMbcs is false and there is no BOM, the input string is assumed
1096	** to be UTF-8 already, so no conversion is done.
1097	*/
1098	void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099	char *zUtf8;
1100	int bomSize = 0;
1101	if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102	struct Blob temp;
1103	zUtf8 = blob_str(pBlob) + bomSize;
1104	blob_zero(&temp);
1105	blob_append(&temp, zUtf8, -1);
1106	blob_swap(pBlob, &temp);
1107	blob_reset(&temp);
1108	#ifdef _WIN32
1109	}else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1110	/* Make sure the blob contains two terminating 0-bytes */
1111	blob_append(pBlob, "", 1);
1112	zUtf8 = blob_str(pBlob) + bomSize;
1113	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114	blob_zero(pBlob);
1115	blob_append(pBlob, zUtf8, -1);
1116	fossil_mbcs_free(zUtf8);
1117	}else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1118	unsigned int i = blob_size(pBlob);
1119	zUtf8 = blob_buffer(pBlob);
1120	while( i > 0 ){
1121	/* swap bytes of unicode representation */
1122	char zTemp = zUtf8[--i];
1123	zUtf8[i] = zUtf8[i-1];
1124	zUtf8[--i] = zTemp;
1125	}
1126	/* Make sure the blob contains two terminating 0-bytes */
1127	blob_append(pBlob, "", 1);
1128	zUtf8 = blob_str(pBlob) + bomSize;
1129	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1130	blob_zero(pBlob);
1131	blob_append(pBlob, zUtf8, -1);
1132	fossil_mbcs_free(zUtf8);
1133	}else if( useMbcs ){
1134	zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1135	blob_reset(pBlob);








1136	blob_append(pBlob, zUtf8, -1);
1137	fossil_mbcs_free(zUtf8);
1138	#endif /* _WIN32 */
1139	}
1140	}
1141

M src/blob.c

+43 -51

		--- src/blob.c
		+++ src/blob.c
		@@ -1088,61 +1088,53 @@
1088	1088	pLeft = pRight;
1089	1089	*pRight = swap;
1090	1090	}
1091	1091
1092	1092	/*
1093		-** Strip a possible BOM from the blob. On Windows, if there
1094		-** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095		-** to UTF-8 is done.
1096		-** If useMbcs is false and there is no BOM, the input string
1097		-** is assumed to be UTF-8 already, so no conversion is done.
1098		-*/
1099		-void blob_strip_bom(Blob *pBlob, int useMbcs){
1100		- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101		-#ifdef _WIN32
1102		- static const unsigned short ubom = 0xfeff;
1103		- static const unsigned short urbom = 0xfffe;
1104		-#endif /* _WIN32 */
1105		- char *zUtf8;
1106		- if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107		- struct Blob temp;
1108		- zUtf8 = blob_str(pBlob) + 3;
1109		- blob_zero(&temp);
1110		- blob_append(&temp, zUtf8, -1);
1111		- fossil_mbcs_free(zUtf8);
1112		- blob_swap(pBlob, &temp);
1113		- blob_reset(&temp);
1114		-#ifdef _WIN32
1115		- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116		- && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117		- /* Make sure the blob contains two terminating 0-bytes */
1118		- blob_append(pBlob, "", 1);
1119		- zUtf8 = blob_str(pBlob) + 2;
1120		- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121		- blob_zero(pBlob);
1122		- blob_append(pBlob, zUtf8, -1);
1123		- fossil_mbcs_free(zUtf8);
1124		- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125		- && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126		- unsigned int i = blob_size(pBlob);
1127		- zUtf8 = blob_buffer(pBlob);
1128		- while( i > 0 ){
1129		- /* swap bytes of unicode representation */
1130		- char temp = zUtf8[--i];
1131		- zUtf8[i] = zUtf8[i-1];
1132		- zUtf8[--i] = temp;
1133		- }
1134		- /* Make sure the blob contains two terminating 0-bytes */
1135		- blob_append(pBlob, "", 1);
1136		- zUtf8 = blob_str(pBlob) + 2;
1137		- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138		- blob_zero(pBlob);
1139		- blob_append(pBlob, zUtf8, -1);
1140		- fossil_mbcs_free(zUtf8);
1141		- }else if (useMbcs) {
1142		- zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143		- blob_zero(pBlob);
	1093	+** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
	1094	+** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
	1095	+** done. If useMbcs is false and there is no BOM, the input string is assumed
	1096	+** to be UTF-8 already, so no conversion is done.
	1097	+*/
	1098	+void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
	1099	+ char *zUtf8;
	1100	+ int bomSize = 0;
	1101	+ if( starts_with_utf8_bom(pBlob, &bomSize) ){
	1102	+ struct Blob temp;
	1103	+ zUtf8 = blob_str(pBlob) + bomSize;
	1104	+ blob_zero(&temp);
	1105	+ blob_append(&temp, zUtf8, -1);
	1106	+ blob_swap(pBlob, &temp);
	1107	+ blob_reset(&temp);
	1108	+#ifdef _WIN32
	1109	+ }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
	1110	+ /* Make sure the blob contains two terminating 0-bytes */
	1111	+ blob_append(pBlob, "", 1);
	1112	+ zUtf8 = blob_str(pBlob) + bomSize;
	1113	+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
	1114	+ blob_zero(pBlob);
	1115	+ blob_append(pBlob, zUtf8, -1);
	1116	+ fossil_mbcs_free(zUtf8);
	1117	+ }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
	1118	+ unsigned int i = blob_size(pBlob);
	1119	+ zUtf8 = blob_buffer(pBlob);
	1120	+ while( i > 0 ){
	1121	+ /* swap bytes of unicode representation */
	1122	+ char zTemp = zUtf8[--i];
	1123	+ zUtf8[i] = zUtf8[i-1];
	1124	+ zUtf8[--i] = zTemp;
	1125	+ }
	1126	+ /* Make sure the blob contains two terminating 0-bytes */
	1127	+ blob_append(pBlob, "", 1);
	1128	+ zUtf8 = blob_str(pBlob) + bomSize;
	1129	+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
	1130	+ blob_zero(pBlob);
	1131	+ blob_append(pBlob, zUtf8, -1);
	1132	+ fossil_mbcs_free(zUtf8);
	1133	+ }else if( useMbcs ){
	1134	+ zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
	1135	+ blob_reset(pBlob);
1144	1136	blob_append(pBlob, zUtf8, -1);
1145	1137	fossil_mbcs_free(zUtf8);
1146	1138	#endif /* _WIN32 */
1147	1139	}
1148	1140	}
1149	1141

	--- src/blob.c
	+++ src/blob.c
	@@ -1088,61 +1088,53 @@
1088	pLeft = pRight;
1089	*pRight = swap;
1090	}
1091
1092	/*
1093	** Strip a possible BOM from the blob. On Windows, if there
1094	** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095	** to UTF-8 is done.
1096	** If useMbcs is false and there is no BOM, the input string
1097	** is assumed to be UTF-8 already, so no conversion is done.
1098	*/
1099	void blob_strip_bom(Blob *pBlob, int useMbcs){
1100	static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101	#ifdef _WIN32
1102	static const unsigned short ubom = 0xfeff;
1103	static const unsigned short urbom = 0xfffe;
1104	#endif /* _WIN32 */
1105	char *zUtf8;
1106	if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107	struct Blob temp;
1108	zUtf8 = blob_str(pBlob) + 3;
1109	blob_zero(&temp);
1110	blob_append(&temp, zUtf8, -1);
1111	fossil_mbcs_free(zUtf8);
1112	blob_swap(pBlob, &temp);
1113	blob_reset(&temp);
1114	#ifdef _WIN32
1115	}else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116	&& memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117	/* Make sure the blob contains two terminating 0-bytes */
1118	blob_append(pBlob, "", 1);
1119	zUtf8 = blob_str(pBlob) + 2;
1120	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121	blob_zero(pBlob);
1122	blob_append(pBlob, zUtf8, -1);
1123	fossil_mbcs_free(zUtf8);
1124	}else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125	&& memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126	unsigned int i = blob_size(pBlob);
1127	zUtf8 = blob_buffer(pBlob);
1128	while( i > 0 ){
1129	/* swap bytes of unicode representation */
1130	char temp = zUtf8[--i];
1131	zUtf8[i] = zUtf8[i-1];
1132	zUtf8[--i] = temp;
1133	}
1134	/* Make sure the blob contains two terminating 0-bytes */
1135	blob_append(pBlob, "", 1);
1136	zUtf8 = blob_str(pBlob) + 2;
1137	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138	blob_zero(pBlob);
1139	blob_append(pBlob, zUtf8, -1);
1140	fossil_mbcs_free(zUtf8);
1141	}else if (useMbcs) {
1142	zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143	blob_zero(pBlob);
1144	blob_append(pBlob, zUtf8, -1);
1145	fossil_mbcs_free(zUtf8);
1146	#endif /* _WIN32 */
1147	}
1148	}
1149

	--- src/blob.c
	+++ src/blob.c
	@@ -1088,61 +1088,53 @@
1088	pLeft = pRight;
1089	*pRight = swap;
1090	}
1091
1092	/*
1093	** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094	** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095	** done. If useMbcs is false and there is no BOM, the input string is assumed
1096	** to be UTF-8 already, so no conversion is done.
1097	*/
1098	void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099	char *zUtf8;
1100	int bomSize = 0;
1101	if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102	struct Blob temp;
1103	zUtf8 = blob_str(pBlob) + bomSize;
1104	blob_zero(&temp);
1105	blob_append(&temp, zUtf8, -1);
1106	blob_swap(pBlob, &temp);
1107	blob_reset(&temp);
1108	#ifdef _WIN32
1109	}else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1110	/* Make sure the blob contains two terminating 0-bytes */
1111	blob_append(pBlob, "", 1);
1112	zUtf8 = blob_str(pBlob) + bomSize;
1113	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114	blob_zero(pBlob);
1115	blob_append(pBlob, zUtf8, -1);
1116	fossil_mbcs_free(zUtf8);
1117	}else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1118	unsigned int i = blob_size(pBlob);
1119	zUtf8 = blob_buffer(pBlob);
1120	while( i > 0 ){
1121	/* swap bytes of unicode representation */
1122	char zTemp = zUtf8[--i];
1123	zUtf8[i] = zUtf8[i-1];
1124	zUtf8[--i] = zTemp;
1125	}
1126	/* Make sure the blob contains two terminating 0-bytes */
1127	blob_append(pBlob, "", 1);
1128	zUtf8 = blob_str(pBlob) + bomSize;
1129	zUtf8 = fossil_unicode_to_utf8(zUtf8);
1130	blob_zero(pBlob);
1131	blob_append(pBlob, zUtf8, -1);
1132	fossil_mbcs_free(zUtf8);
1133	}else if( useMbcs ){
1134	zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1135	blob_reset(pBlob);








1136	blob_append(pBlob, zUtf8, -1);
1137	fossil_mbcs_free(zUtf8);
1138	#endif /* _WIN32 */
1139	}
1140	}
1141

M src/checkin.c

+6 -5

		--- src/checkin.c
		+++ src/checkin.c
		@@ -521,11 +521,11 @@
521	521	break;
522	522	}
523	523	blob_append(&reply, zIn, -1);
524	524	}
525	525	}
526		- blob_strip_bom(&reply, 1);
	526	+ blob_to_utf8_no_bom(&reply, 1);
527	527	blob_remove_cr(&reply);
528	528	file_delete(zFile);
529	529	free(zFile);
530	530	blob_zero(pComment);
531	531	while( blob_line(&reply, &line) ){
		@@ -570,12 +570,13 @@
570	570	int parent_rid,
571	571	const char *zUserOvrd
572	572	){
573	573	Blob prompt;
574	574	#ifdef _WIN32
575		- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576		- blob_init(&prompt, (const char *) bom, 3);
	575	+ int bomSize;
	576	+ const unsigned char *bom = get_utf8_bom(&bomSize);
	577	+ blob_init(&prompt, (const char *) bom, bomSize);
577	578	if( zInit && zInit[0]) {
578	579	blob_append(&prompt, zInit, -1);
579	580	}
580	581	#else
581	582	blob_init(&prompt, zInit, -1);
		@@ -900,11 +901,11 @@
900	901	char zMsg; / Warning message */
901	902	Blob fname; /* Relative pathname of the file */
902	903	static int allOk = 0; /* Set to true to disable this routine */
903	904
904	905	if( allOk ) return;
905		- fUnicode = starts_with_utf16_bom(p);
	906	+ fUnicode = starts_with_utf16_bom(p, 0);
906	907	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907	908	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
908	909	const char *zWarning;
909	910	Blob ans;
910	911	char cReply;
		@@ -1251,11 +1252,11 @@
1251	1252	blob_zero(&comment);
1252	1253	blob_append(&comment, zComment, -1);
1253	1254	}else if( zComFile ){
1254	1255	blob_zero(&comment);
1255	1256	blob_read_from_file(&comment, zComFile);
1256		- blob_strip_bom(&comment, 1);
	1257	+ blob_to_utf8_no_bom(&comment, 1);
1257	1258	}else{
1258	1259	char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1259	1260	prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1260	1261	if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1261	1262	blob_zero(&ans);
1262	1263

	--- src/checkin.c
	+++ src/checkin.c
	@@ -521,11 +521,11 @@
521	break;
522	}
523	blob_append(&reply, zIn, -1);
524	}
525	}
526	blob_strip_bom(&reply, 1);
527	blob_remove_cr(&reply);
528	file_delete(zFile);
529	free(zFile);
530	blob_zero(pComment);
531	while( blob_line(&reply, &line) ){
	@@ -570,12 +570,13 @@
570	int parent_rid,
571	const char *zUserOvrd
572	){
573	Blob prompt;
574	#ifdef _WIN32
575	static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576	blob_init(&prompt, (const char *) bom, 3);

577	if( zInit && zInit[0]) {
578	blob_append(&prompt, zInit, -1);
579	}
580	#else
581	blob_init(&prompt, zInit, -1);
	@@ -900,11 +901,11 @@
900	char zMsg; / Warning message */
901	Blob fname; /* Relative pathname of the file */
902	static int allOk = 0; /* Set to true to disable this routine */
903
904	if( allOk ) return;
905	fUnicode = starts_with_utf16_bom(p);
906	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
908	const char *zWarning;
909	Blob ans;
910	char cReply;
	@@ -1251,11 +1252,11 @@
1251	blob_zero(&comment);
1252	blob_append(&comment, zComment, -1);
1253	}else if( zComFile ){
1254	blob_zero(&comment);
1255	blob_read_from_file(&comment, zComFile);
1256	blob_strip_bom(&comment, 1);
1257	}else{
1258	char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1259	prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1260	if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1261	blob_zero(&ans);
1262

	--- src/checkin.c
	+++ src/checkin.c
	@@ -521,11 +521,11 @@
521	break;
522	}
523	blob_append(&reply, zIn, -1);
524	}
525	}
526	blob_to_utf8_no_bom(&reply, 1);
527	blob_remove_cr(&reply);
528	file_delete(zFile);
529	free(zFile);
530	blob_zero(pComment);
531	while( blob_line(&reply, &line) ){
	@@ -570,12 +570,13 @@
570	int parent_rid,
571	const char *zUserOvrd
572	){
573	Blob prompt;
574	#ifdef _WIN32
575	int bomSize;
576	const unsigned char *bom = get_utf8_bom(&bomSize);
577	blob_init(&prompt, (const char *) bom, bomSize);
578	if( zInit && zInit[0]) {
579	blob_append(&prompt, zInit, -1);
580	}
581	#else
582	blob_init(&prompt, zInit, -1);
	@@ -900,11 +901,11 @@
901	char zMsg; / Warning message */
902	Blob fname; /* Relative pathname of the file */
903	static int allOk = 0; /* Set to true to disable this routine */
904
905	if( allOk ) return;
906	fUnicode = starts_with_utf16_bom(p, 0);
907	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
908	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
909	const char *zWarning;
910	Blob ans;
911	char cReply;
	@@ -1251,11 +1252,11 @@
1252	blob_zero(&comment);
1253	blob_append(&comment, zComment, -1);
1254	}else if( zComFile ){
1255	blob_zero(&comment);
1256	blob_read_from_file(&comment, zComFile);
1257	blob_to_utf8_no_bom(&comment, 1);
1258	}else{
1259	char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1260	prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1261	if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1262	blob_zero(&ans);
1263

M src/checkin.c

+6 -5

		--- src/checkin.c
		+++ src/checkin.c
		@@ -521,11 +521,11 @@
521	521	break;
522	522	}
523	523	blob_append(&reply, zIn, -1);
524	524	}
525	525	}
526		- blob_strip_bom(&reply, 1);
	526	+ blob_to_utf8_no_bom(&reply, 1);
527	527	blob_remove_cr(&reply);
528	528	file_delete(zFile);
529	529	free(zFile);
530	530	blob_zero(pComment);
531	531	while( blob_line(&reply, &line) ){
		@@ -570,12 +570,13 @@
570	570	int parent_rid,
571	571	const char *zUserOvrd
572	572	){
573	573	Blob prompt;
574	574	#ifdef _WIN32
575		- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576		- blob_init(&prompt, (const char *) bom, 3);
	575	+ int bomSize;
	576	+ const unsigned char *bom = get_utf8_bom(&bomSize);
	577	+ blob_init(&prompt, (const char *) bom, bomSize);
577	578	if( zInit && zInit[0]) {
578	579	blob_append(&prompt, zInit, -1);
579	580	}
580	581	#else
581	582	blob_init(&prompt, zInit, -1);
		@@ -900,11 +901,11 @@
900	901	char zMsg; / Warning message */
901	902	Blob fname; /* Relative pathname of the file */
902	903	static int allOk = 0; /* Set to true to disable this routine */
903	904
904	905	if( allOk ) return;
905		- fUnicode = starts_with_utf16_bom(p);
	906	+ fUnicode = starts_with_utf16_bom(p, 0);
906	907	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907	908	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
908	909	const char *zWarning;
909	910	Blob ans;
910	911	char cReply;
		@@ -1251,11 +1252,11 @@
1251	1252	blob_zero(&comment);
1252	1253	blob_append(&comment, zComment, -1);
1253	1254	}else if( zComFile ){
1254	1255	blob_zero(&comment);
1255	1256	blob_read_from_file(&comment, zComFile);
1256		- blob_strip_bom(&comment, 1);
	1257	+ blob_to_utf8_no_bom(&comment, 1);
1257	1258	}else{
1258	1259	char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1259	1260	prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1260	1261	if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1261	1262	blob_zero(&ans);
1262	1263

	--- src/checkin.c
	+++ src/checkin.c
	@@ -521,11 +521,11 @@
521	break;
522	}
523	blob_append(&reply, zIn, -1);
524	}
525	}
526	blob_strip_bom(&reply, 1);
527	blob_remove_cr(&reply);
528	file_delete(zFile);
529	free(zFile);
530	blob_zero(pComment);
531	while( blob_line(&reply, &line) ){
	@@ -570,12 +570,13 @@
570	int parent_rid,
571	const char *zUserOvrd
572	){
573	Blob prompt;
574	#ifdef _WIN32
575	static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576	blob_init(&prompt, (const char *) bom, 3);

577	if( zInit && zInit[0]) {
578	blob_append(&prompt, zInit, -1);
579	}
580	#else
581	blob_init(&prompt, zInit, -1);
	@@ -900,11 +901,11 @@
900	char zMsg; / Warning message */
901	Blob fname; /* Relative pathname of the file */
902	static int allOk = 0; /* Set to true to disable this routine */
903
904	if( allOk ) return;
905	fUnicode = starts_with_utf16_bom(p);
906	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
908	const char *zWarning;
909	Blob ans;
910	char cReply;
	@@ -1251,11 +1252,11 @@
1251	blob_zero(&comment);
1252	blob_append(&comment, zComment, -1);
1253	}else if( zComFile ){
1254	blob_zero(&comment);
1255	blob_read_from_file(&comment, zComFile);
1256	blob_strip_bom(&comment, 1);
1257	}else{
1258	char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1259	prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1260	if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1261	blob_zero(&ans);
1262

	--- src/checkin.c
	+++ src/checkin.c
	@@ -521,11 +521,11 @@
521	break;
522	}
523	blob_append(&reply, zIn, -1);
524	}
525	}
526	blob_to_utf8_no_bom(&reply, 1);
527	blob_remove_cr(&reply);
528	file_delete(zFile);
529	free(zFile);
530	blob_zero(pComment);
531	while( blob_line(&reply, &line) ){
	@@ -570,12 +570,13 @@
570	int parent_rid,
571	const char *zUserOvrd
572	){
573	Blob prompt;
574	#ifdef _WIN32
575	int bomSize;
576	const unsigned char *bom = get_utf8_bom(&bomSize);
577	blob_init(&prompt, (const char *) bom, bomSize);
578	if( zInit && zInit[0]) {
579	blob_append(&prompt, zInit, -1);
580	}
581	#else
582	blob_init(&prompt, zInit, -1);
	@@ -900,11 +901,11 @@
901	char zMsg; / Warning message */
902	Blob fname; /* Relative pathname of the file */
903	static int allOk = 0; /* Set to true to disable this routine */
904
905	if( allOk ) return;
906	fUnicode = starts_with_utf16_bom(p, 0);
907	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
908	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
909	const char *zWarning;
910	Blob ans;
911	char cReply;
	@@ -1251,11 +1252,11 @@
1252	blob_zero(&comment);
1253	blob_append(&comment, zComment, -1);
1254	}else if( zComFile ){
1255	blob_zero(&comment);
1256	blob_read_from_file(&comment, zComFile);
1257	blob_to_utf8_no_bom(&comment, 1);
1258	}else{
1259	char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1260	prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1261	if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1262	blob_zero(&ans);
1263

M src/diff.c

+62 -1

		--- src/diff.c
		+++ src/diff.c
		@@ -321,28 +321,89 @@
321	321	if( j>UTF16_LENGTH_MASK ){
322	322	return 0; /* Very long line -> binary */
323	323	}
324	324	return result; /* No problems seen -> not binary */
325	325	}
	326	+
	327	+/*
	328	+** This function returns an array of bytes representing the byte-order-mark
	329	+** for UTF-8.
	330	+*/
	331	+const unsigned char get_utf8_bom(int pnByte){
	332	+ static const unsigned char bom[] = {
	333	+ 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
	334	+ };
	335	+ if( pnByte ) *pnByte = 3;
	336	+ return bom;
	337	+}
	338	+
	339	+/*
	340	+** This function returns non-zero if the blob starts with a UTF-8
	341	+** byte-order-mark (BOM).
	342	+*/
	343	+int starts_with_utf8_bom(const Blob pContent, int pnByte){
	344	+ const char *z = blob_buffer(pContent);
	345	+ int bomSize = 0;
	346	+ const unsigned char *bom = get_utf8_bom(&bomSize);
	347	+
	348	+ if( pnByte ) *pnByte = bomSize;
	349	+ if( blob_size(pContent)<bomSize ) return 0;
	350	+ return memcmp(z, bom, bomSize)==0;
	351	+}
326	352
327	353	/*
328	354	** This function returns non-zero if the blob starts with a UTF-16le or
329	355	** UTF-16be byte-order-mark (BOM).
330	356	*/
331		-int starts_with_utf16_bom(const Blob *pContent){
	357	+int starts_with_utf16_bom(const Blob pContent, int pnByte){
332	358	const char *z = blob_buffer(pContent);
333	359	int c1, c2;
334	360
	361	+ if( pnByte ) *pnByte = 2;
335	362	if( blob_size(pContent)<2 ) return 0;
336	363	c1 = z[0]; c2 = z[1];
337	364	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338	365	return 1;
339	366	}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340	367	return 1;
341	368	}
342	369	return 0;
343	370	}
	371	+
	372	+/*
	373	+** This function returns non-zero if the blob starts with a UTF-16le
	374	+** byte-order-mark (BOM).
	375	+*/
	376	+int starts_with_utf16le_bom(const Blob pContent, int pnByte){
	377	+ const char *z = blob_buffer(pContent);
	378	+ int c1, c2;
	379	+
	380	+ if( pnByte ) *pnByte = 2;
	381	+ if( blob_size(pContent)<2 ) return 0;
	382	+ c1 = z[0]; c2 = z[1];
	383	+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
	384	+ return 1;
	385	+ }
	386	+ return 0;
	387	+}
	388	+
	389	+/*
	390	+** This function returns non-zero if the blob starts with a UTF-16be
	391	+** byte-order-mark (BOM).
	392	+*/
	393	+int starts_with_utf16be_bom(const Blob pContent, int pnByte){
	394	+ const char *z = blob_buffer(pContent);
	395	+ int c1, c2;
	396	+
	397	+ if( pnByte ) *pnByte = 2;
	398	+ if( blob_size(pContent)<2 ) return 0;
	399	+ c1 = z[0]; c2 = z[1];
	400	+ if( (c1==(char)0xfe) && (c2==(char)0xff) ){
	401	+ return 1;
	402	+ }
	403	+ return 0;
	404	+}
344	405
345	406	/*
346	407	** Return true if two DLine elements are identical.
347	408	*/
348	409	static int same_dline(DLine pA, DLine pB){
349	410

	--- src/diff.c
	+++ src/diff.c
	@@ -321,28 +321,89 @@
321	if( j>UTF16_LENGTH_MASK ){
322	return 0; /* Very long line -> binary */
323	}
324	return result; /* No problems seen -> not binary */
325	}


























326
327	/*
328	** This function returns non-zero if the blob starts with a UTF-16le or
329	** UTF-16be byte-order-mark (BOM).
330	*/
331	int starts_with_utf16_bom(const Blob *pContent){
332	const char *z = blob_buffer(pContent);
333	int c1, c2;
334

335	if( blob_size(pContent)<2 ) return 0;
336	c1 = z[0]; c2 = z[1];
337	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338	return 1;
339	}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340	return 1;
341	}
342	return 0;
343	}


































344
345	/*
346	** Return true if two DLine elements are identical.
347	*/
348	static int same_dline(DLine pA, DLine pB){
349

	--- src/diff.c
	+++ src/diff.c
	@@ -321,28 +321,89 @@
321	if( j>UTF16_LENGTH_MASK ){
322	return 0; /* Very long line -> binary */
323	}
324	return result; /* No problems seen -> not binary */
325	}
326
327	/*
328	** This function returns an array of bytes representing the byte-order-mark
329	** for UTF-8.
330	*/
331	const unsigned char get_utf8_bom(int pnByte){
332	static const unsigned char bom[] = {
333	0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
334	};
335	if( pnByte ) *pnByte = 3;
336	return bom;
337	}
338
339	/*
340	** This function returns non-zero if the blob starts with a UTF-8
341	** byte-order-mark (BOM).
342	*/
343	int starts_with_utf8_bom(const Blob pContent, int pnByte){
344	const char *z = blob_buffer(pContent);
345	int bomSize = 0;
346	const unsigned char *bom = get_utf8_bom(&bomSize);
347
348	if( pnByte ) *pnByte = bomSize;
349	if( blob_size(pContent)<bomSize ) return 0;
350	return memcmp(z, bom, bomSize)==0;
351	}
352
353	/*
354	** This function returns non-zero if the blob starts with a UTF-16le or
355	** UTF-16be byte-order-mark (BOM).
356	*/
357	int starts_with_utf16_bom(const Blob pContent, int pnByte){
358	const char *z = blob_buffer(pContent);
359	int c1, c2;
360
361	if( pnByte ) *pnByte = 2;
362	if( blob_size(pContent)<2 ) return 0;
363	c1 = z[0]; c2 = z[1];
364	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
365	return 1;
366	}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
367	return 1;
368	}
369	return 0;
370	}
371
372	/*
373	** This function returns non-zero if the blob starts with a UTF-16le
374	** byte-order-mark (BOM).
375	*/
376	int starts_with_utf16le_bom(const Blob pContent, int pnByte){
377	const char *z = blob_buffer(pContent);
378	int c1, c2;
379
380	if( pnByte ) *pnByte = 2;
381	if( blob_size(pContent)<2 ) return 0;
382	c1 = z[0]; c2 = z[1];
383	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
384	return 1;
385	}
386	return 0;
387	}
388
389	/*
390	** This function returns non-zero if the blob starts with a UTF-16be
391	** byte-order-mark (BOM).
392	*/
393	int starts_with_utf16be_bom(const Blob pContent, int pnByte){
394	const char *z = blob_buffer(pContent);
395	int c1, c2;
396
397	if( pnByte ) *pnByte = 2;
398	if( blob_size(pContent)<2 ) return 0;
399	c1 = z[0]; c2 = z[1];
400	if( (c1==(char)0xfe) && (c2==(char)0xff) ){
401	return 1;
402	}
403	return 0;
404	}
405
406	/*
407	** Return true if two DLine elements are identical.
408	*/
409	static int same_dline(DLine pA, DLine pB){
410

M src/diff.c

+62 -1

		--- src/diff.c
		+++ src/diff.c
		@@ -321,28 +321,89 @@
321	321	if( j>UTF16_LENGTH_MASK ){
322	322	return 0; /* Very long line -> binary */
323	323	}
324	324	return result; /* No problems seen -> not binary */
325	325	}
	326	+
	327	+/*
	328	+** This function returns an array of bytes representing the byte-order-mark
	329	+** for UTF-8.
	330	+*/
	331	+const unsigned char get_utf8_bom(int pnByte){
	332	+ static const unsigned char bom[] = {
	333	+ 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
	334	+ };
	335	+ if( pnByte ) *pnByte = 3;
	336	+ return bom;
	337	+}
	338	+
	339	+/*
	340	+** This function returns non-zero if the blob starts with a UTF-8
	341	+** byte-order-mark (BOM).
	342	+*/
	343	+int starts_with_utf8_bom(const Blob pContent, int pnByte){
	344	+ const char *z = blob_buffer(pContent);
	345	+ int bomSize = 0;
	346	+ const unsigned char *bom = get_utf8_bom(&bomSize);
	347	+
	348	+ if( pnByte ) *pnByte = bomSize;
	349	+ if( blob_size(pContent)<bomSize ) return 0;
	350	+ return memcmp(z, bom, bomSize)==0;
	351	+}
326	352
327	353	/*
328	354	** This function returns non-zero if the blob starts with a UTF-16le or
329	355	** UTF-16be byte-order-mark (BOM).
330	356	*/
331		-int starts_with_utf16_bom(const Blob *pContent){
	357	+int starts_with_utf16_bom(const Blob pContent, int pnByte){
332	358	const char *z = blob_buffer(pContent);
333	359	int c1, c2;
334	360
	361	+ if( pnByte ) *pnByte = 2;
335	362	if( blob_size(pContent)<2 ) return 0;
336	363	c1 = z[0]; c2 = z[1];
337	364	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338	365	return 1;
339	366	}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340	367	return 1;
341	368	}
342	369	return 0;
343	370	}
	371	+
	372	+/*
	373	+** This function returns non-zero if the blob starts with a UTF-16le
	374	+** byte-order-mark (BOM).
	375	+*/
	376	+int starts_with_utf16le_bom(const Blob pContent, int pnByte){
	377	+ const char *z = blob_buffer(pContent);
	378	+ int c1, c2;
	379	+
	380	+ if( pnByte ) *pnByte = 2;
	381	+ if( blob_size(pContent)<2 ) return 0;
	382	+ c1 = z[0]; c2 = z[1];
	383	+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
	384	+ return 1;
	385	+ }
	386	+ return 0;
	387	+}
	388	+
	389	+/*
	390	+** This function returns non-zero if the blob starts with a UTF-16be
	391	+** byte-order-mark (BOM).
	392	+*/
	393	+int starts_with_utf16be_bom(const Blob pContent, int pnByte){
	394	+ const char *z = blob_buffer(pContent);
	395	+ int c1, c2;
	396	+
	397	+ if( pnByte ) *pnByte = 2;
	398	+ if( blob_size(pContent)<2 ) return 0;
	399	+ c1 = z[0]; c2 = z[1];
	400	+ if( (c1==(char)0xfe) && (c2==(char)0xff) ){
	401	+ return 1;
	402	+ }
	403	+ return 0;
	404	+}
344	405
345	406	/*
346	407	** Return true if two DLine elements are identical.
347	408	*/
348	409	static int same_dline(DLine pA, DLine pB){
349	410

	--- src/diff.c
	+++ src/diff.c
	@@ -321,28 +321,89 @@
321	if( j>UTF16_LENGTH_MASK ){
322	return 0; /* Very long line -> binary */
323	}
324	return result; /* No problems seen -> not binary */
325	}


























326
327	/*
328	** This function returns non-zero if the blob starts with a UTF-16le or
329	** UTF-16be byte-order-mark (BOM).
330	*/
331	int starts_with_utf16_bom(const Blob *pContent){
332	const char *z = blob_buffer(pContent);
333	int c1, c2;
334

335	if( blob_size(pContent)<2 ) return 0;
336	c1 = z[0]; c2 = z[1];
337	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338	return 1;
339	}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340	return 1;
341	}
342	return 0;
343	}


































344
345	/*
346	** Return true if two DLine elements are identical.
347	*/
348	static int same_dline(DLine pA, DLine pB){
349

	--- src/diff.c
	+++ src/diff.c
	@@ -321,28 +321,89 @@
321	if( j>UTF16_LENGTH_MASK ){
322	return 0; /* Very long line -> binary */
323	}
324	return result; /* No problems seen -> not binary */
325	}
326
327	/*
328	** This function returns an array of bytes representing the byte-order-mark
329	** for UTF-8.
330	*/
331	const unsigned char get_utf8_bom(int pnByte){
332	static const unsigned char bom[] = {
333	0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
334	};
335	if( pnByte ) *pnByte = 3;
336	return bom;
337	}
338
339	/*
340	** This function returns non-zero if the blob starts with a UTF-8
341	** byte-order-mark (BOM).
342	*/
343	int starts_with_utf8_bom(const Blob pContent, int pnByte){
344	const char *z = blob_buffer(pContent);
345	int bomSize = 0;
346	const unsigned char *bom = get_utf8_bom(&bomSize);
347
348	if( pnByte ) *pnByte = bomSize;
349	if( blob_size(pContent)<bomSize ) return 0;
350	return memcmp(z, bom, bomSize)==0;
351	}
352
353	/*
354	** This function returns non-zero if the blob starts with a UTF-16le or
355	** UTF-16be byte-order-mark (BOM).
356	*/
357	int starts_with_utf16_bom(const Blob pContent, int pnByte){
358	const char *z = blob_buffer(pContent);
359	int c1, c2;
360
361	if( pnByte ) *pnByte = 2;
362	if( blob_size(pContent)<2 ) return 0;
363	c1 = z[0]; c2 = z[1];
364	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
365	return 1;
366	}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
367	return 1;
368	}
369	return 0;
370	}
371
372	/*
373	** This function returns non-zero if the blob starts with a UTF-16le
374	** byte-order-mark (BOM).
375	*/
376	int starts_with_utf16le_bom(const Blob pContent, int pnByte){
377	const char *z = blob_buffer(pContent);
378	int c1, c2;
379
380	if( pnByte ) *pnByte = 2;
381	if( blob_size(pContent)<2 ) return 0;
382	c1 = z[0]; c2 = z[1];
383	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
384	return 1;
385	}
386	return 0;
387	}
388
389	/*
390	** This function returns non-zero if the blob starts with a UTF-16be
391	** byte-order-mark (BOM).
392	*/
393	int starts_with_utf16be_bom(const Blob pContent, int pnByte){
394	const char *z = blob_buffer(pContent);
395	int c1, c2;
396
397	if( pnByte ) *pnByte = 2;
398	if( blob_size(pContent)<2 ) return 0;
399	c1 = z[0]; c2 = z[1];
400	if( (c1==(char)0xfe) && (c2==(char)0xff) ){
401	return 1;
402	}
403	return 0;
404	}
405
406	/*
407	** Return true if two DLine elements are identical.
408	*/
409	static int same_dline(DLine pA, DLine pB){
410

M src/info.c

+2 -2

		--- src/info.c
		+++ src/info.c
		@@ -1634,21 +1634,21 @@
1634	1634	content_get(rid, &content);
1635	1635	if( renderAsWiki ){
1636	1636	wiki_convert(&content, 0, 0);
1637	1637	}else if( renderAsHtml ){
1638	1638	@ <div>
1639		- blob_strip_bom(&content, 0);
	1639	+ blob_to_utf8_no_bom(&content, 0);
1640	1640	cgi_append_content(blob_buffer(&content), blob_size(&content));
1641	1641	@ </div>
1642	1642	}else{
1643	1643	style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644	1644	zMime = mimetype_from_content(&content);
1645	1645	@ <blockquote>
1646	1646	if( zMime==0 ){
1647	1647	const char *zLn = P("ln");
1648	1648	const char *z;
1649		- blob_strip_bom(&content, 0);
	1649	+ blob_to_utf8_no_bom(&content, 0);
1650	1650	z = blob_str(&content);
1651	1651	if( zLn ){
1652	1652	output_text_with_line_numbers(z, zLn);
1653	1653	}else{
1654	1654	@ <pre>
1655	1655

	--- src/info.c
	+++ src/info.c
	@@ -1634,21 +1634,21 @@
1634	content_get(rid, &content);
1635	if( renderAsWiki ){
1636	wiki_convert(&content, 0, 0);
1637	}else if( renderAsHtml ){
1638	@ <div>
1639	blob_strip_bom(&content, 0);
1640	cgi_append_content(blob_buffer(&content), blob_size(&content));
1641	@ </div>
1642	}else{
1643	style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644	zMime = mimetype_from_content(&content);
1645	@ <blockquote>
1646	if( zMime==0 ){
1647	const char *zLn = P("ln");
1648	const char *z;
1649	blob_strip_bom(&content, 0);
1650	z = blob_str(&content);
1651	if( zLn ){
1652	output_text_with_line_numbers(z, zLn);
1653	}else{
1654	@ <pre>
1655

	--- src/info.c
	+++ src/info.c
	@@ -1634,21 +1634,21 @@
1634	content_get(rid, &content);
1635	if( renderAsWiki ){
1636	wiki_convert(&content, 0, 0);
1637	}else if( renderAsHtml ){
1638	@ <div>
1639	blob_to_utf8_no_bom(&content, 0);
1640	cgi_append_content(blob_buffer(&content), blob_size(&content));
1641	@ </div>
1642	}else{
1643	style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644	zMime = mimetype_from_content(&content);
1645	@ <blockquote>
1646	if( zMime==0 ){
1647	const char *zLn = P("ln");
1648	const char *z;
1649	blob_to_utf8_no_bom(&content, 0);
1650	z = blob_str(&content);
1651	if( zLn ){
1652	output_text_with_line_numbers(z, zLn);
1653	}else{
1654	@ <pre>
1655

M src/info.c

+2 -2

		--- src/info.c
		+++ src/info.c
		@@ -1634,21 +1634,21 @@
1634	1634	content_get(rid, &content);
1635	1635	if( renderAsWiki ){
1636	1636	wiki_convert(&content, 0, 0);
1637	1637	}else if( renderAsHtml ){
1638	1638	@ <div>
1639		- blob_strip_bom(&content, 0);
	1639	+ blob_to_utf8_no_bom(&content, 0);
1640	1640	cgi_append_content(blob_buffer(&content), blob_size(&content));
1641	1641	@ </div>
1642	1642	}else{
1643	1643	style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644	1644	zMime = mimetype_from_content(&content);
1645	1645	@ <blockquote>
1646	1646	if( zMime==0 ){
1647	1647	const char *zLn = P("ln");
1648	1648	const char *z;
1649		- blob_strip_bom(&content, 0);
	1649	+ blob_to_utf8_no_bom(&content, 0);
1650	1650	z = blob_str(&content);
1651	1651	if( zLn ){
1652	1652	output_text_with_line_numbers(z, zLn);
1653	1653	}else{
1654	1654	@ <pre>
1655	1655

	--- src/info.c
	+++ src/info.c
	@@ -1634,21 +1634,21 @@
1634	content_get(rid, &content);
1635	if( renderAsWiki ){
1636	wiki_convert(&content, 0, 0);
1637	}else if( renderAsHtml ){
1638	@ <div>
1639	blob_strip_bom(&content, 0);
1640	cgi_append_content(blob_buffer(&content), blob_size(&content));
1641	@ </div>
1642	}else{
1643	style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644	zMime = mimetype_from_content(&content);
1645	@ <blockquote>
1646	if( zMime==0 ){
1647	const char *zLn = P("ln");
1648	const char *z;
1649	blob_strip_bom(&content, 0);
1650	z = blob_str(&content);
1651	if( zLn ){
1652	output_text_with_line_numbers(z, zLn);
1653	}else{
1654	@ <pre>
1655

	--- src/info.c
	+++ src/info.c
	@@ -1634,21 +1634,21 @@
1634	content_get(rid, &content);
1635	if( renderAsWiki ){
1636	wiki_convert(&content, 0, 0);
1637	}else if( renderAsHtml ){
1638	@ <div>
1639	blob_to_utf8_no_bom(&content, 0);
1640	cgi_append_content(blob_buffer(&content), blob_size(&content));
1641	@ </div>
1642	}else{
1643	style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644	zMime = mimetype_from_content(&content);
1645	@ <blockquote>
1646	if( zMime==0 ){
1647	const char *zLn = P("ln");
1648	const char *z;
1649	blob_to_utf8_no_bom(&content, 0);
1650	z = blob_str(&content);
1651	if( zLn ){
1652	output_text_with_line_numbers(z, zLn);
1653	}else{
1654	@ <pre>
1655

M src/main.c

+1 -1

		--- src/main.c
		+++ src/main.c
		@@ -519,11 +519,11 @@
519	519	if(stdin != zInFile){
520	520	fclose(zInFile);
521	521	}
522	522	zInFile = NULL;
523	523	}
524		- blob_strip_bom(&file, 1);
	524	+ blob_to_utf8_no_bom(&file, 1);
525	525	z = blob_str(&file);
526	526	for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527	527	newArgv = fossil_malloc( sizeof(char)(g.argc + nLine*2) );
528	528	for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529	529
530	530

	--- src/main.c
	+++ src/main.c
	@@ -519,11 +519,11 @@
519	if(stdin != zInFile){
520	fclose(zInFile);
521	}
522	zInFile = NULL;
523	}
524	blob_strip_bom(&file, 1);
525	z = blob_str(&file);
526	for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527	newArgv = fossil_malloc( sizeof(char)(g.argc + nLine*2) );
528	for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530

	--- src/main.c
	+++ src/main.c
	@@ -519,11 +519,11 @@
519	if(stdin != zInFile){
520	fclose(zInFile);
521	}
522	zInFile = NULL;
523	}
524	blob_to_utf8_no_bom(&file, 1);
525	z = blob_str(&file);
526	for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527	newArgv = fossil_malloc( sizeof(char)(g.argc + nLine*2) );
528	for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530

M src/main.c

+1 -1

		--- src/main.c
		+++ src/main.c
		@@ -519,11 +519,11 @@
519	519	if(stdin != zInFile){
520	520	fclose(zInFile);
521	521	}
522	522	zInFile = NULL;
523	523	}
524		- blob_strip_bom(&file, 1);
	524	+ blob_to_utf8_no_bom(&file, 1);
525	525	z = blob_str(&file);
526	526	for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527	527	newArgv = fossil_malloc( sizeof(char)(g.argc + nLine*2) );
528	528	for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529	529
530	530

	--- src/main.c
	+++ src/main.c
	@@ -519,11 +519,11 @@
519	if(stdin != zInFile){
520	fclose(zInFile);
521	}
522	zInFile = NULL;
523	}
524	blob_strip_bom(&file, 1);
525	z = blob_str(&file);
526	for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527	newArgv = fossil_malloc( sizeof(char)(g.argc + nLine*2) );
528	for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530

	--- src/main.c
	+++ src/main.c
	@@ -519,11 +519,11 @@
519	if(stdin != zInFile){
520	fclose(zInFile);
521	}
522	zInFile = NULL;
523	}
524	blob_to_utf8_no_bom(&file, 1);
525	z = blob_str(&file);
526	for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527	newArgv = fossil_malloc( sizeof(char)(g.argc + nLine*2) );
528	for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530

M src/stash.c

+3 -2

		--- src/stash.c
		+++ src/stash.c
		@@ -159,12 +159,13 @@
159	159	verify_all_options();
160	160	if( zComment==0 ){
161	161	Blob prompt; /* Prompt for stash comment */
162	162	Blob comment; /* User comment reply */
163	163	#ifdef _WIN32
164		- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
165		- blob_init(&prompt, (const char *) bom, 3);
	164	+ int bomSize;
	165	+ const unsigned char *bom = get_utf8_bom(&bomSize);
	166	+ blob_init(&prompt, (const char *) bom, bomSize);
166	167	#else
167	168	blob_zero(&prompt);
168	169	#endif
169	170	blob_append(&prompt,
170	171	"\n"
171	172

	--- src/stash.c
	+++ src/stash.c
	@@ -159,12 +159,13 @@
159	verify_all_options();
160	if( zComment==0 ){
161	Blob prompt; /* Prompt for stash comment */
162	Blob comment; /* User comment reply */
163	#ifdef _WIN32
164	static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
165	blob_init(&prompt, (const char *) bom, 3);

166	#else
167	blob_zero(&prompt);
168	#endif
169	blob_append(&prompt,
170	"\n"
171

	--- src/stash.c
	+++ src/stash.c
	@@ -159,12 +159,13 @@
159	verify_all_options();
160	if( zComment==0 ){
161	Blob prompt; /* Prompt for stash comment */
162	Blob comment; /* User comment reply */
163	#ifdef _WIN32
164	int bomSize;
165	const unsigned char *bom = get_utf8_bom(&bomSize);
166	blob_init(&prompt, (const char *) bom, bomSize);
167	#else
168	blob_zero(&prompt);
169	#endif
170	blob_append(&prompt,
171	"\n"
172

M src/wikiformat.c

+2 -2

		--- src/wikiformat.c
		+++ src/wikiformat.c
		@@ -1634,11 +1634,11 @@
1634	1634	renderer.pOut = pOut;
1635	1635	}else{
1636	1636	renderer.pOut = cgi_output_blob();
1637	1637	}
1638	1638
1639		- blob_strip_bom(pIn, 0);
	1639	+ blob_to_utf8_no_bom(pIn, 0);
1640	1640	wiki_render(&renderer, blob_str(pIn));
1641	1641	endAutoParagraph(&renderer);
1642	1642	while( renderer.nStack ){
1643	1643	popStack(&renderer);
1644	1644	}
		@@ -1698,11 +1698,11 @@
1698	1698	*/
1699	1699	int wiki_find_title(Blob pIn, Blob pTitle, Blob *pTail){
1700	1700	char *z;
1701	1701	int i;
1702	1702	int iStart;
1703		- blob_strip_bom(pIn, 0);
	1703	+ blob_to_utf8_no_bom(pIn, 0);
1704	1704	z = blob_str(pIn);
1705	1705	for(i=0; fossil_isspace(z[i]); i++){}
1706	1706	if( z[i]!='<' ) return 0;
1707	1707	i++;
1708	1708	if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709	1709

	--- src/wikiformat.c
	+++ src/wikiformat.c
	@@ -1634,11 +1634,11 @@
1634	renderer.pOut = pOut;
1635	}else{
1636	renderer.pOut = cgi_output_blob();
1637	}
1638
1639	blob_strip_bom(pIn, 0);
1640	wiki_render(&renderer, blob_str(pIn));
1641	endAutoParagraph(&renderer);
1642	while( renderer.nStack ){
1643	popStack(&renderer);
1644	}
	@@ -1698,11 +1698,11 @@
1698	*/
1699	int wiki_find_title(Blob pIn, Blob pTitle, Blob *pTail){
1700	char *z;
1701	int i;
1702	int iStart;
1703	blob_strip_bom(pIn, 0);
1704	z = blob_str(pIn);
1705	for(i=0; fossil_isspace(z[i]); i++){}
1706	if( z[i]!='<' ) return 0;
1707	i++;
1708	if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709

	--- src/wikiformat.c
	+++ src/wikiformat.c
	@@ -1634,11 +1634,11 @@
1634	renderer.pOut = pOut;
1635	}else{
1636	renderer.pOut = cgi_output_blob();
1637	}
1638
1639	blob_to_utf8_no_bom(pIn, 0);
1640	wiki_render(&renderer, blob_str(pIn));
1641	endAutoParagraph(&renderer);
1642	while( renderer.nStack ){
1643	popStack(&renderer);
1644	}
	@@ -1698,11 +1698,11 @@
1698	*/
1699	int wiki_find_title(Blob pIn, Blob pTitle, Blob *pTail){
1700	char *z;
1701	int i;
1702	int iStart;
1703	blob_to_utf8_no_bom(pIn, 0);
1704	z = blob_str(pIn);
1705	for(i=0; fossil_isspace(z[i]); i++){}
1706	if( z[i]!='<' ) return 0;
1707	i++;
1708	if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709

M src/wikiformat.c

+2 -2

		--- src/wikiformat.c
		+++ src/wikiformat.c
		@@ -1634,11 +1634,11 @@
1634	1634	renderer.pOut = pOut;
1635	1635	}else{
1636	1636	renderer.pOut = cgi_output_blob();
1637	1637	}
1638	1638
1639		- blob_strip_bom(pIn, 0);
	1639	+ blob_to_utf8_no_bom(pIn, 0);
1640	1640	wiki_render(&renderer, blob_str(pIn));
1641	1641	endAutoParagraph(&renderer);
1642	1642	while( renderer.nStack ){
1643	1643	popStack(&renderer);
1644	1644	}
		@@ -1698,11 +1698,11 @@
1698	1698	*/
1699	1699	int wiki_find_title(Blob pIn, Blob pTitle, Blob *pTail){
1700	1700	char *z;
1701	1701	int i;
1702	1702	int iStart;
1703		- blob_strip_bom(pIn, 0);
	1703	+ blob_to_utf8_no_bom(pIn, 0);
1704	1704	z = blob_str(pIn);
1705	1705	for(i=0; fossil_isspace(z[i]); i++){}
1706	1706	if( z[i]!='<' ) return 0;
1707	1707	i++;
1708	1708	if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709	1709

	--- src/wikiformat.c
	+++ src/wikiformat.c
	@@ -1634,11 +1634,11 @@
1634	renderer.pOut = pOut;
1635	}else{
1636	renderer.pOut = cgi_output_blob();
1637	}
1638
1639	blob_strip_bom(pIn, 0);
1640	wiki_render(&renderer, blob_str(pIn));
1641	endAutoParagraph(&renderer);
1642	while( renderer.nStack ){
1643	popStack(&renderer);
1644	}
	@@ -1698,11 +1698,11 @@
1698	*/
1699	int wiki_find_title(Blob pIn, Blob pTitle, Blob *pTail){
1700	char *z;
1701	int i;
1702	int iStart;
1703	blob_strip_bom(pIn, 0);
1704	z = blob_str(pIn);
1705	for(i=0; fossil_isspace(z[i]); i++){}
1706	if( z[i]!='<' ) return 0;
1707	i++;
1708	if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709

	--- src/wikiformat.c
	+++ src/wikiformat.c
	@@ -1634,11 +1634,11 @@
1634	renderer.pOut = pOut;
1635	}else{
1636	renderer.pOut = cgi_output_blob();
1637	}
1638
1639	blob_to_utf8_no_bom(pIn, 0);
1640	wiki_render(&renderer, blob_str(pIn));
1641	endAutoParagraph(&renderer);
1642	while( renderer.nStack ){
1643	popStack(&renderer);
1644	}
	@@ -1698,11 +1698,11 @@
1698	*/
1699	int wiki_find_title(Blob pIn, Blob pTitle, Blob *pTail){
1700	char *z;
1701	int i;
1702	int iStart;
1703	blob_to_utf8_no_bom(pIn, 0);
1704	z = blob_str(pIn);
1705	for(i=0; fossil_isspace(z[i]); i++){}
1706	if( z[i]!='<' ) return 0;
1707	i++;
1708	if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709

Fossil SCM

Keyboard Shortcuts