Fossil SCM

Merge byte-order-mark handling enhancements to trunk.

mistachkin 2012-11-26 22:51 trunk merge
Commit 9b800ee41c32c4e1941a3bc19798c7b90a6b9e9c
+1 -1
--- src/attach.c
+++ src/attach.c
@@ -520,11 +520,11 @@
520520
blob_zero(&attach);
521521
if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
522522
const char *z;
523523
const char *zLn = P("ln");
524524
content_get(ridSrc, &attach);
525
- blob_strip_bom(&attach, 0);
525
+ blob_to_utf8_no_bom(&attach, 0);
526526
z = blob_str(&attach);
527527
if( zLn ){
528528
output_text_with_line_numbers(z, zLn);
529529
}else{
530530
@ <pre>
531531
--- src/attach.c
+++ src/attach.c
@@ -520,11 +520,11 @@
520 blob_zero(&attach);
521 if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
522 const char *z;
523 const char *zLn = P("ln");
524 content_get(ridSrc, &attach);
525 blob_strip_bom(&attach, 0);
526 z = blob_str(&attach);
527 if( zLn ){
528 output_text_with_line_numbers(z, zLn);
529 }else{
530 @ <pre>
531
--- src/attach.c
+++ src/attach.c
@@ -520,11 +520,11 @@
520 blob_zero(&attach);
521 if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
522 const char *z;
523 const char *zLn = P("ln");
524 content_get(ridSrc, &attach);
525 blob_to_utf8_no_bom(&attach, 0);
526 z = blob_str(&attach);
527 if( zLn ){
528 output_text_with_line_numbers(z, zLn);
529 }else{
530 @ <pre>
531
+1 -1
--- src/attach.c
+++ src/attach.c
@@ -520,11 +520,11 @@
520520
blob_zero(&attach);
521521
if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
522522
const char *z;
523523
const char *zLn = P("ln");
524524
content_get(ridSrc, &attach);
525
- blob_strip_bom(&attach, 0);
525
+ blob_to_utf8_no_bom(&attach, 0);
526526
z = blob_str(&attach);
527527
if( zLn ){
528528
output_text_with_line_numbers(z, zLn);
529529
}else{
530530
@ <pre>
531531
--- src/attach.c
+++ src/attach.c
@@ -520,11 +520,11 @@
520 blob_zero(&attach);
521 if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
522 const char *z;
523 const char *zLn = P("ln");
524 content_get(ridSrc, &attach);
525 blob_strip_bom(&attach, 0);
526 z = blob_str(&attach);
527 if( zLn ){
528 output_text_with_line_numbers(z, zLn);
529 }else{
530 @ <pre>
531
--- src/attach.c
+++ src/attach.c
@@ -520,11 +520,11 @@
520 blob_zero(&attach);
521 if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
522 const char *z;
523 const char *zLn = P("ln");
524 content_get(ridSrc, &attach);
525 blob_to_utf8_no_bom(&attach, 0);
526 z = blob_str(&attach);
527 if( zLn ){
528 output_text_with_line_numbers(z, zLn);
529 }else{
530 @ <pre>
531
+43 -51
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,53 @@
10881088
*pLeft = *pRight;
10891089
*pRight = swap;
10901090
}
10911091
10921092
/*
1093
-** Strip a possible BOM from the blob. On Windows, if there
1094
-** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095
-** to UTF-8 is done.
1096
-** If useMbcs is false and there is no BOM, the input string
1097
-** is assumed to be UTF-8 already, so no conversion is done.
1098
-*/
1099
-void blob_strip_bom(Blob *pBlob, int useMbcs){
1100
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101
-#ifdef _WIN32
1102
- static const unsigned short ubom = 0xfeff;
1103
- static const unsigned short urbom = 0xfffe;
1104
-#endif /* _WIN32 */
1105
- char *zUtf8;
1106
- if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107
- struct Blob temp;
1108
- zUtf8 = blob_str(pBlob) + 3;
1109
- blob_zero(&temp);
1110
- blob_append(&temp, zUtf8, -1);
1111
- fossil_mbcs_free(zUtf8);
1112
- blob_swap(pBlob, &temp);
1113
- blob_reset(&temp);
1114
-#ifdef _WIN32
1115
- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116
- && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117
- /* Make sure the blob contains two terminating 0-bytes */
1118
- blob_append(pBlob, "", 1);
1119
- zUtf8 = blob_str(pBlob) + 2;
1120
- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121
- blob_zero(pBlob);
1122
- blob_append(pBlob, zUtf8, -1);
1123
- fossil_mbcs_free(zUtf8);
1124
- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125
- && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126
- unsigned int i = blob_size(pBlob);
1127
- zUtf8 = blob_buffer(pBlob);
1128
- while( i > 0 ){
1129
- /* swap bytes of unicode representation */
1130
- char temp = zUtf8[--i];
1131
- zUtf8[i] = zUtf8[i-1];
1132
- zUtf8[--i] = temp;
1133
- }
1134
- /* Make sure the blob contains two terminating 0-bytes */
1135
- blob_append(pBlob, "", 1);
1136
- zUtf8 = blob_str(pBlob) + 2;
1137
- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138
- blob_zero(pBlob);
1139
- blob_append(pBlob, zUtf8, -1);
1140
- fossil_mbcs_free(zUtf8);
1141
- }else if (useMbcs) {
1142
- zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143
- blob_zero(pBlob);
1093
+** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094
+** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095
+** done. If useMbcs is false and there is no BOM, the input string is assumed
1096
+** to be UTF-8 already, so no conversion is done.
1097
+*/
1098
+void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099
+ char *zUtf8;
1100
+ int bomSize = 0;
1101
+ if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102
+ struct Blob temp;
1103
+ zUtf8 = blob_str(pBlob) + bomSize;
1104
+ blob_zero(&temp);
1105
+ blob_append(&temp, zUtf8, -1);
1106
+ blob_swap(pBlob, &temp);
1107
+ blob_reset(&temp);
1108
+#ifdef _WIN32
1109
+ }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1110
+ /* Make sure the blob contains two terminating 0-bytes */
1111
+ blob_append(pBlob, "", 1);
1112
+ zUtf8 = blob_str(pBlob) + bomSize;
1113
+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114
+ blob_zero(pBlob);
1115
+ blob_append(pBlob, zUtf8, -1);
1116
+ fossil_mbcs_free(zUtf8);
1117
+ }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1118
+ unsigned int i = blob_size(pBlob);
1119
+ zUtf8 = blob_buffer(pBlob);
1120
+ while( i > 0 ){
1121
+ /* swap bytes of unicode representation */
1122
+ char zTemp = zUtf8[--i];
1123
+ zUtf8[i] = zUtf8[i-1];
1124
+ zUtf8[--i] = zTemp;
1125
+ }
1126
+ /* Make sure the blob contains two terminating 0-bytes */
1127
+ blob_append(pBlob, "", 1);
1128
+ zUtf8 = blob_str(pBlob) + bomSize;
1129
+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
1130
+ blob_zero(pBlob);
1131
+ blob_append(pBlob, zUtf8, -1);
1132
+ fossil_mbcs_free(zUtf8);
1133
+ }else if( useMbcs ){
1134
+ zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1135
+ blob_reset(pBlob);
11441136
blob_append(pBlob, zUtf8, -1);
11451137
fossil_mbcs_free(zUtf8);
11461138
#endif /* _WIN32 */
11471139
}
11481140
}
11491141
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,53 @@
1088 *pLeft = *pRight;
1089 *pRight = swap;
1090 }
1091
1092 /*
1093 ** Strip a possible BOM from the blob. On Windows, if there
1094 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095 ** to UTF-8 is done.
1096 ** If useMbcs is false and there is no BOM, the input string
1097 ** is assumed to be UTF-8 already, so no conversion is done.
1098 */
1099 void blob_strip_bom(Blob *pBlob, int useMbcs){
1100 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101 #ifdef _WIN32
1102 static const unsigned short ubom = 0xfeff;
1103 static const unsigned short urbom = 0xfffe;
1104 #endif /* _WIN32 */
1105 char *zUtf8;
1106 if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107 struct Blob temp;
1108 zUtf8 = blob_str(pBlob) + 3;
1109 blob_zero(&temp);
1110 blob_append(&temp, zUtf8, -1);
1111 fossil_mbcs_free(zUtf8);
1112 blob_swap(pBlob, &temp);
1113 blob_reset(&temp);
1114 #ifdef _WIN32
1115 }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116 && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117 /* Make sure the blob contains two terminating 0-bytes */
1118 blob_append(pBlob, "", 1);
1119 zUtf8 = blob_str(pBlob) + 2;
1120 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121 blob_zero(pBlob);
1122 blob_append(pBlob, zUtf8, -1);
1123 fossil_mbcs_free(zUtf8);
1124 }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125 && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126 unsigned int i = blob_size(pBlob);
1127 zUtf8 = blob_buffer(pBlob);
1128 while( i > 0 ){
1129 /* swap bytes of unicode representation */
1130 char temp = zUtf8[--i];
1131 zUtf8[i] = zUtf8[i-1];
1132 zUtf8[--i] = temp;
1133 }
1134 /* Make sure the blob contains two terminating 0-bytes */
1135 blob_append(pBlob, "", 1);
1136 zUtf8 = blob_str(pBlob) + 2;
1137 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138 blob_zero(pBlob);
1139 blob_append(pBlob, zUtf8, -1);
1140 fossil_mbcs_free(zUtf8);
1141 }else if (useMbcs) {
1142 zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143 blob_zero(pBlob);
1144 blob_append(pBlob, zUtf8, -1);
1145 fossil_mbcs_free(zUtf8);
1146 #endif /* _WIN32 */
1147 }
1148 }
1149
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,53 @@
1088 *pLeft = *pRight;
1089 *pRight = swap;
1090 }
1091
1092 /*
1093 ** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095 ** done. If useMbcs is false and there is no BOM, the input string is assumed
1096 ** to be UTF-8 already, so no conversion is done.
1097 */
1098 void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099 char *zUtf8;
1100 int bomSize = 0;
1101 if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102 struct Blob temp;
1103 zUtf8 = blob_str(pBlob) + bomSize;
1104 blob_zero(&temp);
1105 blob_append(&temp, zUtf8, -1);
1106 blob_swap(pBlob, &temp);
1107 blob_reset(&temp);
1108 #ifdef _WIN32
1109 }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1110 /* Make sure the blob contains two terminating 0-bytes */
1111 blob_append(pBlob, "", 1);
1112 zUtf8 = blob_str(pBlob) + bomSize;
1113 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114 blob_zero(pBlob);
1115 blob_append(pBlob, zUtf8, -1);
1116 fossil_mbcs_free(zUtf8);
1117 }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1118 unsigned int i = blob_size(pBlob);
1119 zUtf8 = blob_buffer(pBlob);
1120 while( i > 0 ){
1121 /* swap bytes of unicode representation */
1122 char zTemp = zUtf8[--i];
1123 zUtf8[i] = zUtf8[i-1];
1124 zUtf8[--i] = zTemp;
1125 }
1126 /* Make sure the blob contains two terminating 0-bytes */
1127 blob_append(pBlob, "", 1);
1128 zUtf8 = blob_str(pBlob) + bomSize;
1129 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1130 blob_zero(pBlob);
1131 blob_append(pBlob, zUtf8, -1);
1132 fossil_mbcs_free(zUtf8);
1133 }else if( useMbcs ){
1134 zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1135 blob_reset(pBlob);
 
 
 
 
 
 
 
 
1136 blob_append(pBlob, zUtf8, -1);
1137 fossil_mbcs_free(zUtf8);
1138 #endif /* _WIN32 */
1139 }
1140 }
1141
+43 -51
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,53 @@
10881088
*pLeft = *pRight;
10891089
*pRight = swap;
10901090
}
10911091
10921092
/*
1093
-** Strip a possible BOM from the blob. On Windows, if there
1094
-** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095
-** to UTF-8 is done.
1096
-** If useMbcs is false and there is no BOM, the input string
1097
-** is assumed to be UTF-8 already, so no conversion is done.
1098
-*/
1099
-void blob_strip_bom(Blob *pBlob, int useMbcs){
1100
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101
-#ifdef _WIN32
1102
- static const unsigned short ubom = 0xfeff;
1103
- static const unsigned short urbom = 0xfffe;
1104
-#endif /* _WIN32 */
1105
- char *zUtf8;
1106
- if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107
- struct Blob temp;
1108
- zUtf8 = blob_str(pBlob) + 3;
1109
- blob_zero(&temp);
1110
- blob_append(&temp, zUtf8, -1);
1111
- fossil_mbcs_free(zUtf8);
1112
- blob_swap(pBlob, &temp);
1113
- blob_reset(&temp);
1114
-#ifdef _WIN32
1115
- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116
- && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117
- /* Make sure the blob contains two terminating 0-bytes */
1118
- blob_append(pBlob, "", 1);
1119
- zUtf8 = blob_str(pBlob) + 2;
1120
- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121
- blob_zero(pBlob);
1122
- blob_append(pBlob, zUtf8, -1);
1123
- fossil_mbcs_free(zUtf8);
1124
- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125
- && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126
- unsigned int i = blob_size(pBlob);
1127
- zUtf8 = blob_buffer(pBlob);
1128
- while( i > 0 ){
1129
- /* swap bytes of unicode representation */
1130
- char temp = zUtf8[--i];
1131
- zUtf8[i] = zUtf8[i-1];
1132
- zUtf8[--i] = temp;
1133
- }
1134
- /* Make sure the blob contains two terminating 0-bytes */
1135
- blob_append(pBlob, "", 1);
1136
- zUtf8 = blob_str(pBlob) + 2;
1137
- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138
- blob_zero(pBlob);
1139
- blob_append(pBlob, zUtf8, -1);
1140
- fossil_mbcs_free(zUtf8);
1141
- }else if (useMbcs) {
1142
- zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143
- blob_zero(pBlob);
1093
+** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094
+** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095
+** done. If useMbcs is false and there is no BOM, the input string is assumed
1096
+** to be UTF-8 already, so no conversion is done.
1097
+*/
1098
+void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099
+ char *zUtf8;
1100
+ int bomSize = 0;
1101
+ if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102
+ struct Blob temp;
1103
+ zUtf8 = blob_str(pBlob) + bomSize;
1104
+ blob_zero(&temp);
1105
+ blob_append(&temp, zUtf8, -1);
1106
+ blob_swap(pBlob, &temp);
1107
+ blob_reset(&temp);
1108
+#ifdef _WIN32
1109
+ }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1110
+ /* Make sure the blob contains two terminating 0-bytes */
1111
+ blob_append(pBlob, "", 1);
1112
+ zUtf8 = blob_str(pBlob) + bomSize;
1113
+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114
+ blob_zero(pBlob);
1115
+ blob_append(pBlob, zUtf8, -1);
1116
+ fossil_mbcs_free(zUtf8);
1117
+ }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1118
+ unsigned int i = blob_size(pBlob);
1119
+ zUtf8 = blob_buffer(pBlob);
1120
+ while( i > 0 ){
1121
+ /* swap bytes of unicode representation */
1122
+ char zTemp = zUtf8[--i];
1123
+ zUtf8[i] = zUtf8[i-1];
1124
+ zUtf8[--i] = zTemp;
1125
+ }
1126
+ /* Make sure the blob contains two terminating 0-bytes */
1127
+ blob_append(pBlob, "", 1);
1128
+ zUtf8 = blob_str(pBlob) + bomSize;
1129
+ zUtf8 = fossil_unicode_to_utf8(zUtf8);
1130
+ blob_zero(pBlob);
1131
+ blob_append(pBlob, zUtf8, -1);
1132
+ fossil_mbcs_free(zUtf8);
1133
+ }else if( useMbcs ){
1134
+ zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1135
+ blob_reset(pBlob);
11441136
blob_append(pBlob, zUtf8, -1);
11451137
fossil_mbcs_free(zUtf8);
11461138
#endif /* _WIN32 */
11471139
}
11481140
}
11491141
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,53 @@
1088 *pLeft = *pRight;
1089 *pRight = swap;
1090 }
1091
1092 /*
1093 ** Strip a possible BOM from the blob. On Windows, if there
1094 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095 ** to UTF-8 is done.
1096 ** If useMbcs is false and there is no BOM, the input string
1097 ** is assumed to be UTF-8 already, so no conversion is done.
1098 */
1099 void blob_strip_bom(Blob *pBlob, int useMbcs){
1100 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101 #ifdef _WIN32
1102 static const unsigned short ubom = 0xfeff;
1103 static const unsigned short urbom = 0xfffe;
1104 #endif /* _WIN32 */
1105 char *zUtf8;
1106 if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107 struct Blob temp;
1108 zUtf8 = blob_str(pBlob) + 3;
1109 blob_zero(&temp);
1110 blob_append(&temp, zUtf8, -1);
1111 fossil_mbcs_free(zUtf8);
1112 blob_swap(pBlob, &temp);
1113 blob_reset(&temp);
1114 #ifdef _WIN32
1115 }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116 && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117 /* Make sure the blob contains two terminating 0-bytes */
1118 blob_append(pBlob, "", 1);
1119 zUtf8 = blob_str(pBlob) + 2;
1120 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121 blob_zero(pBlob);
1122 blob_append(pBlob, zUtf8, -1);
1123 fossil_mbcs_free(zUtf8);
1124 }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125 && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126 unsigned int i = blob_size(pBlob);
1127 zUtf8 = blob_buffer(pBlob);
1128 while( i > 0 ){
1129 /* swap bytes of unicode representation */
1130 char temp = zUtf8[--i];
1131 zUtf8[i] = zUtf8[i-1];
1132 zUtf8[--i] = temp;
1133 }
1134 /* Make sure the blob contains two terminating 0-bytes */
1135 blob_append(pBlob, "", 1);
1136 zUtf8 = blob_str(pBlob) + 2;
1137 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138 blob_zero(pBlob);
1139 blob_append(pBlob, zUtf8, -1);
1140 fossil_mbcs_free(zUtf8);
1141 }else if (useMbcs) {
1142 zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143 blob_zero(pBlob);
1144 blob_append(pBlob, zUtf8, -1);
1145 fossil_mbcs_free(zUtf8);
1146 #endif /* _WIN32 */
1147 }
1148 }
1149
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,53 @@
1088 *pLeft = *pRight;
1089 *pRight = swap;
1090 }
1091
1092 /*
1093 ** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095 ** done. If useMbcs is false and there is no BOM, the input string is assumed
1096 ** to be UTF-8 already, so no conversion is done.
1097 */
1098 void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
1099 char *zUtf8;
1100 int bomSize = 0;
1101 if( starts_with_utf8_bom(pBlob, &bomSize) ){
1102 struct Blob temp;
1103 zUtf8 = blob_str(pBlob) + bomSize;
1104 blob_zero(&temp);
1105 blob_append(&temp, zUtf8, -1);
1106 blob_swap(pBlob, &temp);
1107 blob_reset(&temp);
1108 #ifdef _WIN32
1109 }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1110 /* Make sure the blob contains two terminating 0-bytes */
1111 blob_append(pBlob, "", 1);
1112 zUtf8 = blob_str(pBlob) + bomSize;
1113 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114 blob_zero(pBlob);
1115 blob_append(pBlob, zUtf8, -1);
1116 fossil_mbcs_free(zUtf8);
1117 }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1118 unsigned int i = blob_size(pBlob);
1119 zUtf8 = blob_buffer(pBlob);
1120 while( i > 0 ){
1121 /* swap bytes of unicode representation */
1122 char zTemp = zUtf8[--i];
1123 zUtf8[i] = zUtf8[i-1];
1124 zUtf8[--i] = zTemp;
1125 }
1126 /* Make sure the blob contains two terminating 0-bytes */
1127 blob_append(pBlob, "", 1);
1128 zUtf8 = blob_str(pBlob) + bomSize;
1129 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1130 blob_zero(pBlob);
1131 blob_append(pBlob, zUtf8, -1);
1132 fossil_mbcs_free(zUtf8);
1133 }else if( useMbcs ){
1134 zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1135 blob_reset(pBlob);
 
 
 
 
 
 
 
 
1136 blob_append(pBlob, zUtf8, -1);
1137 fossil_mbcs_free(zUtf8);
1138 #endif /* _WIN32 */
1139 }
1140 }
1141
+6 -5
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521521
break;
522522
}
523523
blob_append(&reply, zIn, -1);
524524
}
525525
}
526
- blob_strip_bom(&reply, 1);
526
+ blob_to_utf8_no_bom(&reply, 1);
527527
blob_remove_cr(&reply);
528528
file_delete(zFile);
529529
free(zFile);
530530
blob_zero(pComment);
531531
while( blob_line(&reply, &line) ){
@@ -570,12 +570,13 @@
570570
int parent_rid,
571571
const char *zUserOvrd
572572
){
573573
Blob prompt;
574574
#ifdef _WIN32
575
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576
- blob_init(&prompt, (const char *) bom, 3);
575
+ int bomSize;
576
+ const unsigned char *bom = get_utf8_bom(&bomSize);
577
+ blob_init(&prompt, (const char *) bom, bomSize);
577578
if( zInit && zInit[0]) {
578579
blob_append(&prompt, zInit, -1);
579580
}
580581
#else
581582
blob_init(&prompt, zInit, -1);
@@ -900,11 +901,11 @@
900901
char *zMsg; /* Warning message */
901902
Blob fname; /* Relative pathname of the file */
902903
static int allOk = 0; /* Set to true to disable this routine */
903904
904905
if( allOk ) return;
905
- fUnicode = starts_with_utf16_bom(p);
906
+ fUnicode = starts_with_utf16_bom(p, 0);
906907
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907908
if( eType==0 || eType==-1 || fUnicode ){
908909
const char *zWarning;
909910
Blob ans;
910911
char cReply;
@@ -1251,11 +1252,11 @@
12511252
blob_zero(&comment);
12521253
blob_append(&comment, zComment, -1);
12531254
}else if( zComFile ){
12541255
blob_zero(&comment);
12551256
blob_read_from_file(&comment, zComFile);
1256
- blob_strip_bom(&comment, 1);
1257
+ blob_to_utf8_no_bom(&comment, 1);
12571258
}else{
12581259
char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
12591260
prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
12601261
if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
12611262
blob_zero(&ans);
12621263
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521 break;
522 }
523 blob_append(&reply, zIn, -1);
524 }
525 }
526 blob_strip_bom(&reply, 1);
527 blob_remove_cr(&reply);
528 file_delete(zFile);
529 free(zFile);
530 blob_zero(pComment);
531 while( blob_line(&reply, &line) ){
@@ -570,12 +570,13 @@
570 int parent_rid,
571 const char *zUserOvrd
572 ){
573 Blob prompt;
574 #ifdef _WIN32
575 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576 blob_init(&prompt, (const char *) bom, 3);
 
577 if( zInit && zInit[0]) {
578 blob_append(&prompt, zInit, -1);
579 }
580 #else
581 blob_init(&prompt, zInit, -1);
@@ -900,11 +901,11 @@
900 char *zMsg; /* Warning message */
901 Blob fname; /* Relative pathname of the file */
902 static int allOk = 0; /* Set to true to disable this routine */
903
904 if( allOk ) return;
905 fUnicode = starts_with_utf16_bom(p);
906 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907 if( eType==0 || eType==-1 || fUnicode ){
908 const char *zWarning;
909 Blob ans;
910 char cReply;
@@ -1251,11 +1252,11 @@
1251 blob_zero(&comment);
1252 blob_append(&comment, zComment, -1);
1253 }else if( zComFile ){
1254 blob_zero(&comment);
1255 blob_read_from_file(&comment, zComFile);
1256 blob_strip_bom(&comment, 1);
1257 }else{
1258 char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1259 prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1260 if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1261 blob_zero(&ans);
1262
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521 break;
522 }
523 blob_append(&reply, zIn, -1);
524 }
525 }
526 blob_to_utf8_no_bom(&reply, 1);
527 blob_remove_cr(&reply);
528 file_delete(zFile);
529 free(zFile);
530 blob_zero(pComment);
531 while( blob_line(&reply, &line) ){
@@ -570,12 +570,13 @@
570 int parent_rid,
571 const char *zUserOvrd
572 ){
573 Blob prompt;
574 #ifdef _WIN32
575 int bomSize;
576 const unsigned char *bom = get_utf8_bom(&bomSize);
577 blob_init(&prompt, (const char *) bom, bomSize);
578 if( zInit && zInit[0]) {
579 blob_append(&prompt, zInit, -1);
580 }
581 #else
582 blob_init(&prompt, zInit, -1);
@@ -900,11 +901,11 @@
901 char *zMsg; /* Warning message */
902 Blob fname; /* Relative pathname of the file */
903 static int allOk = 0; /* Set to true to disable this routine */
904
905 if( allOk ) return;
906 fUnicode = starts_with_utf16_bom(p, 0);
907 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
908 if( eType==0 || eType==-1 || fUnicode ){
909 const char *zWarning;
910 Blob ans;
911 char cReply;
@@ -1251,11 +1252,11 @@
1252 blob_zero(&comment);
1253 blob_append(&comment, zComment, -1);
1254 }else if( zComFile ){
1255 blob_zero(&comment);
1256 blob_read_from_file(&comment, zComFile);
1257 blob_to_utf8_no_bom(&comment, 1);
1258 }else{
1259 char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1260 prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1261 if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1262 blob_zero(&ans);
1263
+6 -5
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521521
break;
522522
}
523523
blob_append(&reply, zIn, -1);
524524
}
525525
}
526
- blob_strip_bom(&reply, 1);
526
+ blob_to_utf8_no_bom(&reply, 1);
527527
blob_remove_cr(&reply);
528528
file_delete(zFile);
529529
free(zFile);
530530
blob_zero(pComment);
531531
while( blob_line(&reply, &line) ){
@@ -570,12 +570,13 @@
570570
int parent_rid,
571571
const char *zUserOvrd
572572
){
573573
Blob prompt;
574574
#ifdef _WIN32
575
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576
- blob_init(&prompt, (const char *) bom, 3);
575
+ int bomSize;
576
+ const unsigned char *bom = get_utf8_bom(&bomSize);
577
+ blob_init(&prompt, (const char *) bom, bomSize);
577578
if( zInit && zInit[0]) {
578579
blob_append(&prompt, zInit, -1);
579580
}
580581
#else
581582
blob_init(&prompt, zInit, -1);
@@ -900,11 +901,11 @@
900901
char *zMsg; /* Warning message */
901902
Blob fname; /* Relative pathname of the file */
902903
static int allOk = 0; /* Set to true to disable this routine */
903904
904905
if( allOk ) return;
905
- fUnicode = starts_with_utf16_bom(p);
906
+ fUnicode = starts_with_utf16_bom(p, 0);
906907
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907908
if( eType==0 || eType==-1 || fUnicode ){
908909
const char *zWarning;
909910
Blob ans;
910911
char cReply;
@@ -1251,11 +1252,11 @@
12511252
blob_zero(&comment);
12521253
blob_append(&comment, zComment, -1);
12531254
}else if( zComFile ){
12541255
blob_zero(&comment);
12551256
blob_read_from_file(&comment, zComFile);
1256
- blob_strip_bom(&comment, 1);
1257
+ blob_to_utf8_no_bom(&comment, 1);
12571258
}else{
12581259
char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
12591260
prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
12601261
if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
12611262
blob_zero(&ans);
12621263
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521 break;
522 }
523 blob_append(&reply, zIn, -1);
524 }
525 }
526 blob_strip_bom(&reply, 1);
527 blob_remove_cr(&reply);
528 file_delete(zFile);
529 free(zFile);
530 blob_zero(pComment);
531 while( blob_line(&reply, &line) ){
@@ -570,12 +570,13 @@
570 int parent_rid,
571 const char *zUserOvrd
572 ){
573 Blob prompt;
574 #ifdef _WIN32
575 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576 blob_init(&prompt, (const char *) bom, 3);
 
577 if( zInit && zInit[0]) {
578 blob_append(&prompt, zInit, -1);
579 }
580 #else
581 blob_init(&prompt, zInit, -1);
@@ -900,11 +901,11 @@
900 char *zMsg; /* Warning message */
901 Blob fname; /* Relative pathname of the file */
902 static int allOk = 0; /* Set to true to disable this routine */
903
904 if( allOk ) return;
905 fUnicode = starts_with_utf16_bom(p);
906 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
907 if( eType==0 || eType==-1 || fUnicode ){
908 const char *zWarning;
909 Blob ans;
910 char cReply;
@@ -1251,11 +1252,11 @@
1251 blob_zero(&comment);
1252 blob_append(&comment, zComment, -1);
1253 }else if( zComFile ){
1254 blob_zero(&comment);
1255 blob_read_from_file(&comment, zComFile);
1256 blob_strip_bom(&comment, 1);
1257 }else{
1258 char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1259 prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1260 if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1261 blob_zero(&ans);
1262
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521 break;
522 }
523 blob_append(&reply, zIn, -1);
524 }
525 }
526 blob_to_utf8_no_bom(&reply, 1);
527 blob_remove_cr(&reply);
528 file_delete(zFile);
529 free(zFile);
530 blob_zero(pComment);
531 while( blob_line(&reply, &line) ){
@@ -570,12 +570,13 @@
570 int parent_rid,
571 const char *zUserOvrd
572 ){
573 Blob prompt;
574 #ifdef _WIN32
575 int bomSize;
576 const unsigned char *bom = get_utf8_bom(&bomSize);
577 blob_init(&prompt, (const char *) bom, bomSize);
578 if( zInit && zInit[0]) {
579 blob_append(&prompt, zInit, -1);
580 }
581 #else
582 blob_init(&prompt, zInit, -1);
@@ -900,11 +901,11 @@
901 char *zMsg; /* Warning message */
902 Blob fname; /* Relative pathname of the file */
903 static int allOk = 0; /* Set to true to disable this routine */
904
905 if( allOk ) return;
906 fUnicode = starts_with_utf16_bom(p, 0);
907 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
908 if( eType==0 || eType==-1 || fUnicode ){
909 const char *zWarning;
910 Blob ans;
911 char cReply;
@@ -1251,11 +1252,11 @@
1252 blob_zero(&comment);
1253 blob_append(&comment, zComment, -1);
1254 }else if( zComFile ){
1255 blob_zero(&comment);
1256 blob_read_from_file(&comment, zComFile);
1257 blob_to_utf8_no_bom(&comment, 1);
1258 }else{
1259 char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1260 prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1261 if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1262 blob_zero(&ans);
1263
+62 -1
--- src/diff.c
+++ src/diff.c
@@ -321,28 +321,89 @@
321321
if( j>UTF16_LENGTH_MASK ){
322322
return 0; /* Very long line -> binary */
323323
}
324324
return result; /* No problems seen -> not binary */
325325
}
326
+
327
+/*
328
+** This function returns an array of bytes representing the byte-order-mark
329
+** for UTF-8.
330
+*/
331
+const unsigned char *get_utf8_bom(int *pnByte){
332
+ static const unsigned char bom[] = {
333
+ 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
334
+ };
335
+ if( pnByte ) *pnByte = 3;
336
+ return bom;
337
+}
338
+
339
+/*
340
+** This function returns non-zero if the blob starts with a UTF-8
341
+** byte-order-mark (BOM).
342
+*/
343
+int starts_with_utf8_bom(const Blob *pContent, int *pnByte){
344
+ const char *z = blob_buffer(pContent);
345
+ int bomSize = 0;
346
+ const unsigned char *bom = get_utf8_bom(&bomSize);
347
+
348
+ if( pnByte ) *pnByte = bomSize;
349
+ if( blob_size(pContent)<bomSize ) return 0;
350
+ return memcmp(z, bom, bomSize)==0;
351
+}
326352
327353
/*
328354
** This function returns non-zero if the blob starts with a UTF-16le or
329355
** UTF-16be byte-order-mark (BOM).
330356
*/
331
-int starts_with_utf16_bom(const Blob *pContent){
357
+int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
332358
const char *z = blob_buffer(pContent);
333359
int c1, c2;
334360
361
+ if( pnByte ) *pnByte = 2;
335362
if( blob_size(pContent)<2 ) return 0;
336363
c1 = z[0]; c2 = z[1];
337364
if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338365
return 1;
339366
}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340367
return 1;
341368
}
342369
return 0;
343370
}
371
+
372
+/*
373
+** This function returns non-zero if the blob starts with a UTF-16le
374
+** byte-order-mark (BOM).
375
+*/
376
+int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
377
+ const char *z = blob_buffer(pContent);
378
+ int c1, c2;
379
+
380
+ if( pnByte ) *pnByte = 2;
381
+ if( blob_size(pContent)<2 ) return 0;
382
+ c1 = z[0]; c2 = z[1];
383
+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
384
+ return 1;
385
+ }
386
+ return 0;
387
+}
388
+
389
+/*
390
+** This function returns non-zero if the blob starts with a UTF-16be
391
+** byte-order-mark (BOM).
392
+*/
393
+int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
394
+ const char *z = blob_buffer(pContent);
395
+ int c1, c2;
396
+
397
+ if( pnByte ) *pnByte = 2;
398
+ if( blob_size(pContent)<2 ) return 0;
399
+ c1 = z[0]; c2 = z[1];
400
+ if( (c1==(char)0xfe) && (c2==(char)0xff) ){
401
+ return 1;
402
+ }
403
+ return 0;
404
+}
344405
345406
/*
346407
** Return true if two DLine elements are identical.
347408
*/
348409
static int same_dline(DLine *pA, DLine *pB){
349410
--- src/diff.c
+++ src/diff.c
@@ -321,28 +321,89 @@
321 if( j>UTF16_LENGTH_MASK ){
322 return 0; /* Very long line -> binary */
323 }
324 return result; /* No problems seen -> not binary */
325 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
327 /*
328 ** This function returns non-zero if the blob starts with a UTF-16le or
329 ** UTF-16be byte-order-mark (BOM).
330 */
331 int starts_with_utf16_bom(const Blob *pContent){
332 const char *z = blob_buffer(pContent);
333 int c1, c2;
334
 
335 if( blob_size(pContent)<2 ) return 0;
336 c1 = z[0]; c2 = z[1];
337 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338 return 1;
339 }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340 return 1;
341 }
342 return 0;
343 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
345 /*
346 ** Return true if two DLine elements are identical.
347 */
348 static int same_dline(DLine *pA, DLine *pB){
349
--- src/diff.c
+++ src/diff.c
@@ -321,28 +321,89 @@
321 if( j>UTF16_LENGTH_MASK ){
322 return 0; /* Very long line -> binary */
323 }
324 return result; /* No problems seen -> not binary */
325 }
326
327 /*
328 ** This function returns an array of bytes representing the byte-order-mark
329 ** for UTF-8.
330 */
331 const unsigned char *get_utf8_bom(int *pnByte){
332 static const unsigned char bom[] = {
333 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
334 };
335 if( pnByte ) *pnByte = 3;
336 return bom;
337 }
338
339 /*
340 ** This function returns non-zero if the blob starts with a UTF-8
341 ** byte-order-mark (BOM).
342 */
343 int starts_with_utf8_bom(const Blob *pContent, int *pnByte){
344 const char *z = blob_buffer(pContent);
345 int bomSize = 0;
346 const unsigned char *bom = get_utf8_bom(&bomSize);
347
348 if( pnByte ) *pnByte = bomSize;
349 if( blob_size(pContent)<bomSize ) return 0;
350 return memcmp(z, bom, bomSize)==0;
351 }
352
353 /*
354 ** This function returns non-zero if the blob starts with a UTF-16le or
355 ** UTF-16be byte-order-mark (BOM).
356 */
357 int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
358 const char *z = blob_buffer(pContent);
359 int c1, c2;
360
361 if( pnByte ) *pnByte = 2;
362 if( blob_size(pContent)<2 ) return 0;
363 c1 = z[0]; c2 = z[1];
364 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
365 return 1;
366 }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
367 return 1;
368 }
369 return 0;
370 }
371
372 /*
373 ** This function returns non-zero if the blob starts with a UTF-16le
374 ** byte-order-mark (BOM).
375 */
376 int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
377 const char *z = blob_buffer(pContent);
378 int c1, c2;
379
380 if( pnByte ) *pnByte = 2;
381 if( blob_size(pContent)<2 ) return 0;
382 c1 = z[0]; c2 = z[1];
383 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
384 return 1;
385 }
386 return 0;
387 }
388
389 /*
390 ** This function returns non-zero if the blob starts with a UTF-16be
391 ** byte-order-mark (BOM).
392 */
393 int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
394 const char *z = blob_buffer(pContent);
395 int c1, c2;
396
397 if( pnByte ) *pnByte = 2;
398 if( blob_size(pContent)<2 ) return 0;
399 c1 = z[0]; c2 = z[1];
400 if( (c1==(char)0xfe) && (c2==(char)0xff) ){
401 return 1;
402 }
403 return 0;
404 }
405
406 /*
407 ** Return true if two DLine elements are identical.
408 */
409 static int same_dline(DLine *pA, DLine *pB){
410
+62 -1
--- src/diff.c
+++ src/diff.c
@@ -321,28 +321,89 @@
321321
if( j>UTF16_LENGTH_MASK ){
322322
return 0; /* Very long line -> binary */
323323
}
324324
return result; /* No problems seen -> not binary */
325325
}
326
+
327
+/*
328
+** This function returns an array of bytes representing the byte-order-mark
329
+** for UTF-8.
330
+*/
331
+const unsigned char *get_utf8_bom(int *pnByte){
332
+ static const unsigned char bom[] = {
333
+ 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
334
+ };
335
+ if( pnByte ) *pnByte = 3;
336
+ return bom;
337
+}
338
+
339
+/*
340
+** This function returns non-zero if the blob starts with a UTF-8
341
+** byte-order-mark (BOM).
342
+*/
343
+int starts_with_utf8_bom(const Blob *pContent, int *pnByte){
344
+ const char *z = blob_buffer(pContent);
345
+ int bomSize = 0;
346
+ const unsigned char *bom = get_utf8_bom(&bomSize);
347
+
348
+ if( pnByte ) *pnByte = bomSize;
349
+ if( blob_size(pContent)<bomSize ) return 0;
350
+ return memcmp(z, bom, bomSize)==0;
351
+}
326352
327353
/*
328354
** This function returns non-zero if the blob starts with a UTF-16le or
329355
** UTF-16be byte-order-mark (BOM).
330356
*/
331
-int starts_with_utf16_bom(const Blob *pContent){
357
+int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
332358
const char *z = blob_buffer(pContent);
333359
int c1, c2;
334360
361
+ if( pnByte ) *pnByte = 2;
335362
if( blob_size(pContent)<2 ) return 0;
336363
c1 = z[0]; c2 = z[1];
337364
if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338365
return 1;
339366
}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340367
return 1;
341368
}
342369
return 0;
343370
}
371
+
372
+/*
373
+** This function returns non-zero if the blob starts with a UTF-16le
374
+** byte-order-mark (BOM).
375
+*/
376
+int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
377
+ const char *z = blob_buffer(pContent);
378
+ int c1, c2;
379
+
380
+ if( pnByte ) *pnByte = 2;
381
+ if( blob_size(pContent)<2 ) return 0;
382
+ c1 = z[0]; c2 = z[1];
383
+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
384
+ return 1;
385
+ }
386
+ return 0;
387
+}
388
+
389
+/*
390
+** This function returns non-zero if the blob starts with a UTF-16be
391
+** byte-order-mark (BOM).
392
+*/
393
+int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
394
+ const char *z = blob_buffer(pContent);
395
+ int c1, c2;
396
+
397
+ if( pnByte ) *pnByte = 2;
398
+ if( blob_size(pContent)<2 ) return 0;
399
+ c1 = z[0]; c2 = z[1];
400
+ if( (c1==(char)0xfe) && (c2==(char)0xff) ){
401
+ return 1;
402
+ }
403
+ return 0;
404
+}
344405
345406
/*
346407
** Return true if two DLine elements are identical.
347408
*/
348409
static int same_dline(DLine *pA, DLine *pB){
349410
--- src/diff.c
+++ src/diff.c
@@ -321,28 +321,89 @@
321 if( j>UTF16_LENGTH_MASK ){
322 return 0; /* Very long line -> binary */
323 }
324 return result; /* No problems seen -> not binary */
325 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
327 /*
328 ** This function returns non-zero if the blob starts with a UTF-16le or
329 ** UTF-16be byte-order-mark (BOM).
330 */
331 int starts_with_utf16_bom(const Blob *pContent){
332 const char *z = blob_buffer(pContent);
333 int c1, c2;
334
 
335 if( blob_size(pContent)<2 ) return 0;
336 c1 = z[0]; c2 = z[1];
337 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
338 return 1;
339 }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340 return 1;
341 }
342 return 0;
343 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
345 /*
346 ** Return true if two DLine elements are identical.
347 */
348 static int same_dline(DLine *pA, DLine *pB){
349
--- src/diff.c
+++ src/diff.c
@@ -321,28 +321,89 @@
321 if( j>UTF16_LENGTH_MASK ){
322 return 0; /* Very long line -> binary */
323 }
324 return result; /* No problems seen -> not binary */
325 }
326
327 /*
328 ** This function returns an array of bytes representing the byte-order-mark
329 ** for UTF-8.
330 */
331 const unsigned char *get_utf8_bom(int *pnByte){
332 static const unsigned char bom[] = {
333 0xEF, 0xBB, 0xBF, 0x00, 0x00, 0x00
334 };
335 if( pnByte ) *pnByte = 3;
336 return bom;
337 }
338
339 /*
340 ** This function returns non-zero if the blob starts with a UTF-8
341 ** byte-order-mark (BOM).
342 */
343 int starts_with_utf8_bom(const Blob *pContent, int *pnByte){
344 const char *z = blob_buffer(pContent);
345 int bomSize = 0;
346 const unsigned char *bom = get_utf8_bom(&bomSize);
347
348 if( pnByte ) *pnByte = bomSize;
349 if( blob_size(pContent)<bomSize ) return 0;
350 return memcmp(z, bom, bomSize)==0;
351 }
352
353 /*
354 ** This function returns non-zero if the blob starts with a UTF-16le or
355 ** UTF-16be byte-order-mark (BOM).
356 */
357 int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
358 const char *z = blob_buffer(pContent);
359 int c1, c2;
360
361 if( pnByte ) *pnByte = 2;
362 if( blob_size(pContent)<2 ) return 0;
363 c1 = z[0]; c2 = z[1];
364 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
365 return 1;
366 }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
367 return 1;
368 }
369 return 0;
370 }
371
372 /*
373 ** This function returns non-zero if the blob starts with a UTF-16le
374 ** byte-order-mark (BOM).
375 */
376 int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
377 const char *z = blob_buffer(pContent);
378 int c1, c2;
379
380 if( pnByte ) *pnByte = 2;
381 if( blob_size(pContent)<2 ) return 0;
382 c1 = z[0]; c2 = z[1];
383 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
384 return 1;
385 }
386 return 0;
387 }
388
389 /*
390 ** This function returns non-zero if the blob starts with a UTF-16be
391 ** byte-order-mark (BOM).
392 */
393 int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
394 const char *z = blob_buffer(pContent);
395 int c1, c2;
396
397 if( pnByte ) *pnByte = 2;
398 if( blob_size(pContent)<2 ) return 0;
399 c1 = z[0]; c2 = z[1];
400 if( (c1==(char)0xfe) && (c2==(char)0xff) ){
401 return 1;
402 }
403 return 0;
404 }
405
406 /*
407 ** Return true if two DLine elements are identical.
408 */
409 static int same_dline(DLine *pA, DLine *pB){
410
+2 -2
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
16341634
content_get(rid, &content);
16351635
if( renderAsWiki ){
16361636
wiki_convert(&content, 0, 0);
16371637
}else if( renderAsHtml ){
16381638
@ <div>
1639
- blob_strip_bom(&content, 0);
1639
+ blob_to_utf8_no_bom(&content, 0);
16401640
cgi_append_content(blob_buffer(&content), blob_size(&content));
16411641
@ </div>
16421642
}else{
16431643
style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
16441644
zMime = mimetype_from_content(&content);
16451645
@ <blockquote>
16461646
if( zMime==0 ){
16471647
const char *zLn = P("ln");
16481648
const char *z;
1649
- blob_strip_bom(&content, 0);
1649
+ blob_to_utf8_no_bom(&content, 0);
16501650
z = blob_str(&content);
16511651
if( zLn ){
16521652
output_text_with_line_numbers(z, zLn);
16531653
}else{
16541654
@ <pre>
16551655
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
1634 content_get(rid, &content);
1635 if( renderAsWiki ){
1636 wiki_convert(&content, 0, 0);
1637 }else if( renderAsHtml ){
1638 @ <div>
1639 blob_strip_bom(&content, 0);
1640 cgi_append_content(blob_buffer(&content), blob_size(&content));
1641 @ </div>
1642 }else{
1643 style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644 zMime = mimetype_from_content(&content);
1645 @ <blockquote>
1646 if( zMime==0 ){
1647 const char *zLn = P("ln");
1648 const char *z;
1649 blob_strip_bom(&content, 0);
1650 z = blob_str(&content);
1651 if( zLn ){
1652 output_text_with_line_numbers(z, zLn);
1653 }else{
1654 @ <pre>
1655
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
1634 content_get(rid, &content);
1635 if( renderAsWiki ){
1636 wiki_convert(&content, 0, 0);
1637 }else if( renderAsHtml ){
1638 @ <div>
1639 blob_to_utf8_no_bom(&content, 0);
1640 cgi_append_content(blob_buffer(&content), blob_size(&content));
1641 @ </div>
1642 }else{
1643 style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644 zMime = mimetype_from_content(&content);
1645 @ <blockquote>
1646 if( zMime==0 ){
1647 const char *zLn = P("ln");
1648 const char *z;
1649 blob_to_utf8_no_bom(&content, 0);
1650 z = blob_str(&content);
1651 if( zLn ){
1652 output_text_with_line_numbers(z, zLn);
1653 }else{
1654 @ <pre>
1655
+2 -2
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
16341634
content_get(rid, &content);
16351635
if( renderAsWiki ){
16361636
wiki_convert(&content, 0, 0);
16371637
}else if( renderAsHtml ){
16381638
@ <div>
1639
- blob_strip_bom(&content, 0);
1639
+ blob_to_utf8_no_bom(&content, 0);
16401640
cgi_append_content(blob_buffer(&content), blob_size(&content));
16411641
@ </div>
16421642
}else{
16431643
style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
16441644
zMime = mimetype_from_content(&content);
16451645
@ <blockquote>
16461646
if( zMime==0 ){
16471647
const char *zLn = P("ln");
16481648
const char *z;
1649
- blob_strip_bom(&content, 0);
1649
+ blob_to_utf8_no_bom(&content, 0);
16501650
z = blob_str(&content);
16511651
if( zLn ){
16521652
output_text_with_line_numbers(z, zLn);
16531653
}else{
16541654
@ <pre>
16551655
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
1634 content_get(rid, &content);
1635 if( renderAsWiki ){
1636 wiki_convert(&content, 0, 0);
1637 }else if( renderAsHtml ){
1638 @ <div>
1639 blob_strip_bom(&content, 0);
1640 cgi_append_content(blob_buffer(&content), blob_size(&content));
1641 @ </div>
1642 }else{
1643 style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644 zMime = mimetype_from_content(&content);
1645 @ <blockquote>
1646 if( zMime==0 ){
1647 const char *zLn = P("ln");
1648 const char *z;
1649 blob_strip_bom(&content, 0);
1650 z = blob_str(&content);
1651 if( zLn ){
1652 output_text_with_line_numbers(z, zLn);
1653 }else{
1654 @ <pre>
1655
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
1634 content_get(rid, &content);
1635 if( renderAsWiki ){
1636 wiki_convert(&content, 0, 0);
1637 }else if( renderAsHtml ){
1638 @ <div>
1639 blob_to_utf8_no_bom(&content, 0);
1640 cgi_append_content(blob_buffer(&content), blob_size(&content));
1641 @ </div>
1642 }else{
1643 style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644 zMime = mimetype_from_content(&content);
1645 @ <blockquote>
1646 if( zMime==0 ){
1647 const char *zLn = P("ln");
1648 const char *z;
1649 blob_to_utf8_no_bom(&content, 0);
1650 z = blob_str(&content);
1651 if( zLn ){
1652 output_text_with_line_numbers(z, zLn);
1653 }else{
1654 @ <pre>
1655
+1 -1
--- src/main.c
+++ src/main.c
@@ -519,11 +519,11 @@
519519
if(stdin != zInFile){
520520
fclose(zInFile);
521521
}
522522
zInFile = NULL;
523523
}
524
- blob_strip_bom(&file, 1);
524
+ blob_to_utf8_no_bom(&file, 1);
525525
z = blob_str(&file);
526526
for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527527
newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
528528
for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529529
530530
--- src/main.c
+++ src/main.c
@@ -519,11 +519,11 @@
519 if(stdin != zInFile){
520 fclose(zInFile);
521 }
522 zInFile = NULL;
523 }
524 blob_strip_bom(&file, 1);
525 z = blob_str(&file);
526 for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527 newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
528 for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530
--- src/main.c
+++ src/main.c
@@ -519,11 +519,11 @@
519 if(stdin != zInFile){
520 fclose(zInFile);
521 }
522 zInFile = NULL;
523 }
524 blob_to_utf8_no_bom(&file, 1);
525 z = blob_str(&file);
526 for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527 newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
528 for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530
+1 -1
--- src/main.c
+++ src/main.c
@@ -519,11 +519,11 @@
519519
if(stdin != zInFile){
520520
fclose(zInFile);
521521
}
522522
zInFile = NULL;
523523
}
524
- blob_strip_bom(&file, 1);
524
+ blob_to_utf8_no_bom(&file, 1);
525525
z = blob_str(&file);
526526
for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527527
newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
528528
for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529529
530530
--- src/main.c
+++ src/main.c
@@ -519,11 +519,11 @@
519 if(stdin != zInFile){
520 fclose(zInFile);
521 }
522 zInFile = NULL;
523 }
524 blob_strip_bom(&file, 1);
525 z = blob_str(&file);
526 for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527 newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
528 for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530
--- src/main.c
+++ src/main.c
@@ -519,11 +519,11 @@
519 if(stdin != zInFile){
520 fclose(zInFile);
521 }
522 zInFile = NULL;
523 }
524 blob_to_utf8_no_bom(&file, 1);
525 z = blob_str(&file);
526 for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
527 newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
528 for(j=0; j<i; j++) newArgv[j] = g.argv[j];
529
530
+3 -2
--- src/stash.c
+++ src/stash.c
@@ -159,12 +159,13 @@
159159
verify_all_options();
160160
if( zComment==0 ){
161161
Blob prompt; /* Prompt for stash comment */
162162
Blob comment; /* User comment reply */
163163
#ifdef _WIN32
164
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
165
- blob_init(&prompt, (const char *) bom, 3);
164
+ int bomSize;
165
+ const unsigned char *bom = get_utf8_bom(&bomSize);
166
+ blob_init(&prompt, (const char *) bom, bomSize);
166167
#else
167168
blob_zero(&prompt);
168169
#endif
169170
blob_append(&prompt,
170171
"\n"
171172
--- src/stash.c
+++ src/stash.c
@@ -159,12 +159,13 @@
159 verify_all_options();
160 if( zComment==0 ){
161 Blob prompt; /* Prompt for stash comment */
162 Blob comment; /* User comment reply */
163 #ifdef _WIN32
164 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
165 blob_init(&prompt, (const char *) bom, 3);
 
166 #else
167 blob_zero(&prompt);
168 #endif
169 blob_append(&prompt,
170 "\n"
171
--- src/stash.c
+++ src/stash.c
@@ -159,12 +159,13 @@
159 verify_all_options();
160 if( zComment==0 ){
161 Blob prompt; /* Prompt for stash comment */
162 Blob comment; /* User comment reply */
163 #ifdef _WIN32
164 int bomSize;
165 const unsigned char *bom = get_utf8_bom(&bomSize);
166 blob_init(&prompt, (const char *) bom, bomSize);
167 #else
168 blob_zero(&prompt);
169 #endif
170 blob_append(&prompt,
171 "\n"
172
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1634,11 +1634,11 @@
16341634
renderer.pOut = pOut;
16351635
}else{
16361636
renderer.pOut = cgi_output_blob();
16371637
}
16381638
1639
- blob_strip_bom(pIn, 0);
1639
+ blob_to_utf8_no_bom(pIn, 0);
16401640
wiki_render(&renderer, blob_str(pIn));
16411641
endAutoParagraph(&renderer);
16421642
while( renderer.nStack ){
16431643
popStack(&renderer);
16441644
}
@@ -1698,11 +1698,11 @@
16981698
*/
16991699
int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
17001700
char *z;
17011701
int i;
17021702
int iStart;
1703
- blob_strip_bom(pIn, 0);
1703
+ blob_to_utf8_no_bom(pIn, 0);
17041704
z = blob_str(pIn);
17051705
for(i=0; fossil_isspace(z[i]); i++){}
17061706
if( z[i]!='<' ) return 0;
17071707
i++;
17081708
if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
17091709
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1634,11 +1634,11 @@
1634 renderer.pOut = pOut;
1635 }else{
1636 renderer.pOut = cgi_output_blob();
1637 }
1638
1639 blob_strip_bom(pIn, 0);
1640 wiki_render(&renderer, blob_str(pIn));
1641 endAutoParagraph(&renderer);
1642 while( renderer.nStack ){
1643 popStack(&renderer);
1644 }
@@ -1698,11 +1698,11 @@
1698 */
1699 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1700 char *z;
1701 int i;
1702 int iStart;
1703 blob_strip_bom(pIn, 0);
1704 z = blob_str(pIn);
1705 for(i=0; fossil_isspace(z[i]); i++){}
1706 if( z[i]!='<' ) return 0;
1707 i++;
1708 if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1634,11 +1634,11 @@
1634 renderer.pOut = pOut;
1635 }else{
1636 renderer.pOut = cgi_output_blob();
1637 }
1638
1639 blob_to_utf8_no_bom(pIn, 0);
1640 wiki_render(&renderer, blob_str(pIn));
1641 endAutoParagraph(&renderer);
1642 while( renderer.nStack ){
1643 popStack(&renderer);
1644 }
@@ -1698,11 +1698,11 @@
1698 */
1699 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1700 char *z;
1701 int i;
1702 int iStart;
1703 blob_to_utf8_no_bom(pIn, 0);
1704 z = blob_str(pIn);
1705 for(i=0; fossil_isspace(z[i]); i++){}
1706 if( z[i]!='<' ) return 0;
1707 i++;
1708 if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1634,11 +1634,11 @@
16341634
renderer.pOut = pOut;
16351635
}else{
16361636
renderer.pOut = cgi_output_blob();
16371637
}
16381638
1639
- blob_strip_bom(pIn, 0);
1639
+ blob_to_utf8_no_bom(pIn, 0);
16401640
wiki_render(&renderer, blob_str(pIn));
16411641
endAutoParagraph(&renderer);
16421642
while( renderer.nStack ){
16431643
popStack(&renderer);
16441644
}
@@ -1698,11 +1698,11 @@
16981698
*/
16991699
int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
17001700
char *z;
17011701
int i;
17021702
int iStart;
1703
- blob_strip_bom(pIn, 0);
1703
+ blob_to_utf8_no_bom(pIn, 0);
17041704
z = blob_str(pIn);
17051705
for(i=0; fossil_isspace(z[i]); i++){}
17061706
if( z[i]!='<' ) return 0;
17071707
i++;
17081708
if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
17091709
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1634,11 +1634,11 @@
1634 renderer.pOut = pOut;
1635 }else{
1636 renderer.pOut = cgi_output_blob();
1637 }
1638
1639 blob_strip_bom(pIn, 0);
1640 wiki_render(&renderer, blob_str(pIn));
1641 endAutoParagraph(&renderer);
1642 while( renderer.nStack ){
1643 popStack(&renderer);
1644 }
@@ -1698,11 +1698,11 @@
1698 */
1699 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1700 char *z;
1701 int i;
1702 int iStart;
1703 blob_strip_bom(pIn, 0);
1704 z = blob_str(pIn);
1705 for(i=0; fossil_isspace(z[i]); i++){}
1706 if( z[i]!='<' ) return 0;
1707 i++;
1708 if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1634,11 +1634,11 @@
1634 renderer.pOut = pOut;
1635 }else{
1636 renderer.pOut = cgi_output_blob();
1637 }
1638
1639 blob_to_utf8_no_bom(pIn, 0);
1640 wiki_render(&renderer, blob_str(pIn));
1641 endAutoParagraph(&renderer);
1642 while( renderer.nStack ){
1643 popStack(&renderer);
1644 }
@@ -1698,11 +1698,11 @@
1698 */
1699 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1700 char *z;
1701 int i;
1702 int iStart;
1703 blob_to_utf8_no_bom(pIn, 0);
1704 z = blob_str(pIn);
1705 for(i=0; fossil_isspace(z[i]); i++){}
1706 if( z[i]!='<' ) return 0;
1707 i++;
1708 if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1709

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button