Fossil SCM

Modularize byte-order-mark and blob UTF-8 conversion handling.

mistachkin 2012-11-19 04:39 UTC convert_before_commit
Commit d29dd5449c8b4b3f7a8b8e97d501bf7d56b9634d
+1 -1
--- src/attach.c
+++ src/attach.c
@@ -513,11 +513,11 @@
513513
blob_zero(&attach);
514514
if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
515515
const char *z;
516516
const char *zLn = P("ln");
517517
content_get(ridSrc, &attach);
518
- blob_strip_bom(&attach, 0);
518
+ blob_to_utf8_no_bom(&attach, 0);
519519
z = blob_str(&attach);
520520
if( zLn ){
521521
output_text_with_line_numbers(z, zLn);
522522
}else{
523523
@ <pre>
524524
--- src/attach.c
+++ src/attach.c
@@ -513,11 +513,11 @@
513 blob_zero(&attach);
514 if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
515 const char *z;
516 const char *zLn = P("ln");
517 content_get(ridSrc, &attach);
518 blob_strip_bom(&attach, 0);
519 z = blob_str(&attach);
520 if( zLn ){
521 output_text_with_line_numbers(z, zLn);
522 }else{
523 @ <pre>
524
--- src/attach.c
+++ src/attach.c
@@ -513,11 +513,11 @@
513 blob_zero(&attach);
514 if( zMime==0 || strncmp(zMime,"text/", 5)==0 ){
515 const char *z;
516 const char *zLn = P("ln");
517 content_get(ridSrc, &attach);
518 blob_to_utf8_no_bom(&attach, 0);
519 z = blob_str(&attach);
520 if( zLn ){
521 output_text_with_line_numbers(z, zLn);
522 }else{
523 @ <pre>
524
+15 -24
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,52 @@
10881088
*pLeft = *pRight;
10891089
*pRight = swap;
10901090
}
10911091
10921092
/*
1093
-** Strip a possible BOM from the blob. On Windows, if there
1094
-** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095
-** to UTF-8 is done.
1096
-** If useMbcs is false and there is no BOM, the input string
1097
-** is assumed to be UTF-8 already, so no conversion is done.
1093
+** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094
+** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095
+** done. If useMbcs is false and there is no BOM, the input string is assumed
1096
+** to be UTF-8 already, so no conversion is done.
10981097
*/
1099
-void blob_strip_bom(Blob *pBlob, int useMbcs){
1100
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101
-#ifdef _WIN32
1102
- static const unsigned short ubom = 0xfeff;
1103
- static const unsigned short urbom = 0xfffe;
1104
-#endif /* _WIN32 */
1098
+void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
11051099
char *zUtf8;
1106
- if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107
- struct Blob temp;
1100
+ if( starts_with_utf8_bom(pBlob) ){
1101
+ struct Blob temp;
11081102
zUtf8 = blob_str(pBlob) + 3;
11091103
blob_zero(&temp);
11101104
blob_append(&temp, zUtf8, -1);
1111
- fossil_mbcs_free(zUtf8);
11121105
blob_swap(pBlob, &temp);
11131106
blob_reset(&temp);
11141107
#ifdef _WIN32
1115
- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116
- && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1108
+ }else if( starts_with_utf16be_bom(pBlob) ){
11171109
/* Make sure the blob contains two terminating 0-bytes */
11181110
blob_append(pBlob, "", 1);
11191111
zUtf8 = blob_str(pBlob) + 2;
11201112
zUtf8 = fossil_unicode_to_utf8(zUtf8);
11211113
blob_zero(pBlob);
11221114
blob_append(pBlob, zUtf8, -1);
11231115
fossil_mbcs_free(zUtf8);
1124
- }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125
- && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1116
+ }else if( starts_with_utf16le_bom(pBlob) ){
11261117
unsigned int i = blob_size(pBlob);
11271118
zUtf8 = blob_buffer(pBlob);
11281119
while( i > 0 ){
1129
- /* swap bytes of unicode representation */
1130
- char temp = zUtf8[--i];
1131
- zUtf8[i] = zUtf8[i-1];
1132
- zUtf8[--i] = temp;
1120
+ /* swap bytes of unicode representation */
1121
+ char temp = zUtf8[--i];
1122
+ zUtf8[i] = zUtf8[i-1];
1123
+ zUtf8[--i] = temp;
11331124
}
11341125
/* Make sure the blob contains two terminating 0-bytes */
11351126
blob_append(pBlob, "", 1);
11361127
zUtf8 = blob_str(pBlob) + 2;
11371128
zUtf8 = fossil_unicode_to_utf8(zUtf8);
11381129
blob_zero(pBlob);
11391130
blob_append(pBlob, zUtf8, -1);
11401131
fossil_mbcs_free(zUtf8);
1141
- }else if (useMbcs) {
1132
+ }else if( useMbcs ){
11421133
zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143
- blob_zero(pBlob);
1134
+ blob_reset(pBlob);
11441135
blob_append(pBlob, zUtf8, -1);
11451136
fossil_mbcs_free(zUtf8);
11461137
#endif /* _WIN32 */
11471138
}
11481139
}
11491140
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,52 @@
1088 *pLeft = *pRight;
1089 *pRight = swap;
1090 }
1091
1092 /*
1093 ** Strip a possible BOM from the blob. On Windows, if there
1094 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
1095 ** to UTF-8 is done.
1096 ** If useMbcs is false and there is no BOM, the input string
1097 ** is assumed to be UTF-8 already, so no conversion is done.
1098 */
1099 void blob_strip_bom(Blob *pBlob, int useMbcs){
1100 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
1101 #ifdef _WIN32
1102 static const unsigned short ubom = 0xfeff;
1103 static const unsigned short urbom = 0xfffe;
1104 #endif /* _WIN32 */
1105 char *zUtf8;
1106 if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
1107 struct Blob temp;
1108 zUtf8 = blob_str(pBlob) + 3;
1109 blob_zero(&temp);
1110 blob_append(&temp, zUtf8, -1);
1111 fossil_mbcs_free(zUtf8);
1112 blob_swap(pBlob, &temp);
1113 blob_reset(&temp);
1114 #ifdef _WIN32
1115 }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1116 && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
1117 /* Make sure the blob contains two terminating 0-bytes */
1118 blob_append(pBlob, "", 1);
1119 zUtf8 = blob_str(pBlob) + 2;
1120 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1121 blob_zero(pBlob);
1122 blob_append(pBlob, zUtf8, -1);
1123 fossil_mbcs_free(zUtf8);
1124 }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
1125 && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
1126 unsigned int i = blob_size(pBlob);
1127 zUtf8 = blob_buffer(pBlob);
1128 while( i > 0 ){
1129 /* swap bytes of unicode representation */
1130 char temp = zUtf8[--i];
1131 zUtf8[i] = zUtf8[i-1];
1132 zUtf8[--i] = temp;
1133 }
1134 /* Make sure the blob contains two terminating 0-bytes */
1135 blob_append(pBlob, "", 1);
1136 zUtf8 = blob_str(pBlob) + 2;
1137 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1138 blob_zero(pBlob);
1139 blob_append(pBlob, zUtf8, -1);
1140 fossil_mbcs_free(zUtf8);
1141 }else if (useMbcs) {
1142 zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1143 blob_zero(pBlob);
1144 blob_append(pBlob, zUtf8, -1);
1145 fossil_mbcs_free(zUtf8);
1146 #endif /* _WIN32 */
1147 }
1148 }
1149
--- src/blob.c
+++ src/blob.c
@@ -1088,61 +1088,52 @@
1088 *pLeft = *pRight;
1089 *pRight = swap;
1090 }
1091
1092 /*
1093 ** Strip a possible byte-order-mark (BOM) from the blob. On Windows, if there
1094 ** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion to UTF-8 is
1095 ** done. If useMbcs is false and there is no BOM, the input string is assumed
1096 ** to be UTF-8 already, so no conversion is done.
 
1097 */
1098 void blob_to_utf8_no_bom(Blob *pBlob, int useMbcs){
 
 
 
 
 
1099 char *zUtf8;
1100 if( starts_with_utf8_bom(pBlob) ){
1101 struct Blob temp;
1102 zUtf8 = blob_str(pBlob) + 3;
1103 blob_zero(&temp);
1104 blob_append(&temp, zUtf8, -1);
 
1105 blob_swap(pBlob, &temp);
1106 blob_reset(&temp);
1107 #ifdef _WIN32
1108 }else if( starts_with_utf16be_bom(pBlob) ){
 
1109 /* Make sure the blob contains two terminating 0-bytes */
1110 blob_append(pBlob, "", 1);
1111 zUtf8 = blob_str(pBlob) + 2;
1112 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1113 blob_zero(pBlob);
1114 blob_append(pBlob, zUtf8, -1);
1115 fossil_mbcs_free(zUtf8);
1116 }else if( starts_with_utf16le_bom(pBlob) ){
 
1117 unsigned int i = blob_size(pBlob);
1118 zUtf8 = blob_buffer(pBlob);
1119 while( i > 0 ){
1120 /* swap bytes of unicode representation */
1121 char temp = zUtf8[--i];
1122 zUtf8[i] = zUtf8[i-1];
1123 zUtf8[--i] = temp;
1124 }
1125 /* Make sure the blob contains two terminating 0-bytes */
1126 blob_append(pBlob, "", 1);
1127 zUtf8 = blob_str(pBlob) + 2;
1128 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1129 blob_zero(pBlob);
1130 blob_append(pBlob, zUtf8, -1);
1131 fossil_mbcs_free(zUtf8);
1132 }else if( useMbcs ){
1133 zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
1134 blob_reset(pBlob);
1135 blob_append(pBlob, zUtf8, -1);
1136 fossil_mbcs_free(zUtf8);
1137 #endif /* _WIN32 */
1138 }
1139 }
1140
+10 -10
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521521
break;
522522
}
523523
blob_append(&reply, zIn, -1);
524524
}
525525
}
526
- blob_strip_bom(&reply, 1);
526
+ blob_to_utf8_no_bom(&reply, 1);
527527
blob_remove_cr(&reply);
528528
file_delete(zFile);
529529
free(zFile);
530530
blob_zero(pComment);
531531
while( blob_line(&reply, &line) ){
@@ -570,11 +570,11 @@
570570
int parent_rid,
571571
const char *zUserOvrd
572572
){
573573
Blob prompt;
574574
#ifdef _WIN32
575
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
575
+ const unsigned char *bom = get_utf8_bom();
576576
blob_init(&prompt, (const char *) bom, 3);
577577
if( zInit && zInit[0]) {
578578
blob_append(&prompt, zInit, -1);
579579
}
580580
#else
@@ -908,11 +908,11 @@
908908
if( allOk ) return 0;
909909
fUnicode = starts_with_utf16_bom(p);
910910
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
911911
if( eType==0 || eType==-1 || fUnicode ){
912912
const char *zWarning;
913
- const char *c = "c=convert/";
913
+ const char *zConvert = "c=convert/";
914914
Blob ans;
915915
char cReply;
916916
917917
if( eType==-1 && fUnicode ){
918918
zWarning = "Unicode and CR/NL line endings";
@@ -924,37 +924,37 @@
924924
}else if( eType==0 ){
925925
if( binOk ){
926926
return 0; /* We don't want binary warnings for this file. */
927927
}
928928
zWarning = "binary data";
929
- c = ""; /* We cannot automatically convert binary files */
929
+ zConvert = ""; /* We cannot convert binary files. */
930930
}else{
931931
zWarning = "Unicode";
932932
#ifndef _WIN32
933
- c = ""; /* On UNIX, we cannot automatically convert unicode files */
933
+ zConvert = ""; /* On Unix, we cannot easily convert Unicode files. */
934934
#endif
935935
}
936936
file_relative_name(zFilename, &fname, 0);
937937
blob_zero(&ans);
938938
zMsg = mprintf(
939939
"%s contains %s. commit anyhow (a=all/%sy/N)? ",
940
- blob_str(&fname), zWarning, c);
940
+ blob_str(&fname), zWarning, zConvert);
941941
prompt_user(zMsg, &ans);
942942
fossil_free(zMsg);
943943
cReply = blob_str(&ans)[0];
944944
if( cReply=='a' || cReply=='A' ){
945945
allOk = 1;
946
- }else if( (c[0] != 0) && (cReply=='c' || cReply=='C') ){
946
+ }else if( *zConvert && (cReply=='c' || cReply=='C') ){
947947
char *zOrig = file_newname(zFilename, "original", 1);
948948
FILE *f;
949949
blob_write_to_file(p, zOrig);
950950
fossil_free(zOrig);
951951
f = fossil_fopen(zFilename, "wb");
952952
if( fUnicode ) {
953
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
953
+ const unsigned char *bom = get_utf8_bom();
954954
fwrite(bom, 1, 3, f);
955
- blob_strip_bom(p, 0);
955
+ blob_to_utf8_no_bom(p, 0);
956956
}
957957
blob_remove_cr(p);
958958
fwrite(blob_buffer(p), 1, blob_size(p), f);
959959
fclose(f);
960960
return 1;
@@ -1248,11 +1248,11 @@
12481248
blob_zero(&comment);
12491249
blob_append(&comment, zComment, -1);
12501250
}else if( zComFile ){
12511251
blob_zero(&comment);
12521252
blob_read_from_file(&comment, zComFile);
1253
- blob_strip_bom(&comment, 1);
1253
+ blob_to_utf8_no_bom(&comment, 1);
12541254
}else{
12551255
char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
12561256
prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
12571257
if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
12581258
blob_zero(&ans);
12591259
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521 break;
522 }
523 blob_append(&reply, zIn, -1);
524 }
525 }
526 blob_strip_bom(&reply, 1);
527 blob_remove_cr(&reply);
528 file_delete(zFile);
529 free(zFile);
530 blob_zero(pComment);
531 while( blob_line(&reply, &line) ){
@@ -570,11 +570,11 @@
570 int parent_rid,
571 const char *zUserOvrd
572 ){
573 Blob prompt;
574 #ifdef _WIN32
575 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
576 blob_init(&prompt, (const char *) bom, 3);
577 if( zInit && zInit[0]) {
578 blob_append(&prompt, zInit, -1);
579 }
580 #else
@@ -908,11 +908,11 @@
908 if( allOk ) return 0;
909 fUnicode = starts_with_utf16_bom(p);
910 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
911 if( eType==0 || eType==-1 || fUnicode ){
912 const char *zWarning;
913 const char *c = "c=convert/";
914 Blob ans;
915 char cReply;
916
917 if( eType==-1 && fUnicode ){
918 zWarning = "Unicode and CR/NL line endings";
@@ -924,37 +924,37 @@
924 }else if( eType==0 ){
925 if( binOk ){
926 return 0; /* We don't want binary warnings for this file. */
927 }
928 zWarning = "binary data";
929 c = ""; /* We cannot automatically convert binary files */
930 }else{
931 zWarning = "Unicode";
932 #ifndef _WIN32
933 c = ""; /* On UNIX, we cannot automatically convert unicode files */
934 #endif
935 }
936 file_relative_name(zFilename, &fname, 0);
937 blob_zero(&ans);
938 zMsg = mprintf(
939 "%s contains %s. commit anyhow (a=all/%sy/N)? ",
940 blob_str(&fname), zWarning, c);
941 prompt_user(zMsg, &ans);
942 fossil_free(zMsg);
943 cReply = blob_str(&ans)[0];
944 if( cReply=='a' || cReply=='A' ){
945 allOk = 1;
946 }else if( (c[0] != 0) && (cReply=='c' || cReply=='C') ){
947 char *zOrig = file_newname(zFilename, "original", 1);
948 FILE *f;
949 blob_write_to_file(p, zOrig);
950 fossil_free(zOrig);
951 f = fossil_fopen(zFilename, "wb");
952 if( fUnicode ) {
953 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
954 fwrite(bom, 1, 3, f);
955 blob_strip_bom(p, 0);
956 }
957 blob_remove_cr(p);
958 fwrite(blob_buffer(p), 1, blob_size(p), f);
959 fclose(f);
960 return 1;
@@ -1248,11 +1248,11 @@
1248 blob_zero(&comment);
1249 blob_append(&comment, zComment, -1);
1250 }else if( zComFile ){
1251 blob_zero(&comment);
1252 blob_read_from_file(&comment, zComFile);
1253 blob_strip_bom(&comment, 1);
1254 }else{
1255 char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1256 prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1257 if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1258 blob_zero(&ans);
1259
--- src/checkin.c
+++ src/checkin.c
@@ -521,11 +521,11 @@
521 break;
522 }
523 blob_append(&reply, zIn, -1);
524 }
525 }
526 blob_to_utf8_no_bom(&reply, 1);
527 blob_remove_cr(&reply);
528 file_delete(zFile);
529 free(zFile);
530 blob_zero(pComment);
531 while( blob_line(&reply, &line) ){
@@ -570,11 +570,11 @@
570 int parent_rid,
571 const char *zUserOvrd
572 ){
573 Blob prompt;
574 #ifdef _WIN32
575 const unsigned char *bom = get_utf8_bom();
576 blob_init(&prompt, (const char *) bom, 3);
577 if( zInit && zInit[0]) {
578 blob_append(&prompt, zInit, -1);
579 }
580 #else
@@ -908,11 +908,11 @@
908 if( allOk ) return 0;
909 fUnicode = starts_with_utf16_bom(p);
910 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
911 if( eType==0 || eType==-1 || fUnicode ){
912 const char *zWarning;
913 const char *zConvert = "c=convert/";
914 Blob ans;
915 char cReply;
916
917 if( eType==-1 && fUnicode ){
918 zWarning = "Unicode and CR/NL line endings";
@@ -924,37 +924,37 @@
924 }else if( eType==0 ){
925 if( binOk ){
926 return 0; /* We don't want binary warnings for this file. */
927 }
928 zWarning = "binary data";
929 zConvert = ""; /* We cannot convert binary files. */
930 }else{
931 zWarning = "Unicode";
932 #ifndef _WIN32
933 zConvert = ""; /* On Unix, we cannot easily convert Unicode files. */
934 #endif
935 }
936 file_relative_name(zFilename, &fname, 0);
937 blob_zero(&ans);
938 zMsg = mprintf(
939 "%s contains %s. commit anyhow (a=all/%sy/N)? ",
940 blob_str(&fname), zWarning, zConvert);
941 prompt_user(zMsg, &ans);
942 fossil_free(zMsg);
943 cReply = blob_str(&ans)[0];
944 if( cReply=='a' || cReply=='A' ){
945 allOk = 1;
946 }else if( *zConvert && (cReply=='c' || cReply=='C') ){
947 char *zOrig = file_newname(zFilename, "original", 1);
948 FILE *f;
949 blob_write_to_file(p, zOrig);
950 fossil_free(zOrig);
951 f = fossil_fopen(zFilename, "wb");
952 if( fUnicode ) {
953 const unsigned char *bom = get_utf8_bom();
954 fwrite(bom, 1, 3, f);
955 blob_to_utf8_no_bom(p, 0);
956 }
957 blob_remove_cr(p);
958 fwrite(blob_buffer(p), 1, blob_size(p), f);
959 fclose(f);
960 return 1;
@@ -1248,11 +1248,11 @@
1248 blob_zero(&comment);
1249 blob_append(&comment, zComment, -1);
1250 }else if( zComFile ){
1251 blob_zero(&comment);
1252 blob_read_from_file(&comment, zComFile);
1253 blob_to_utf8_no_bom(&comment, 1);
1254 }else{
1255 char *zInit = db_text(0, "SELECT value FROM vvar WHERE name='ci-comment'");
1256 prepare_commit_comment(&comment, zInit, zBranch, vid, zUserOvrd);
1257 if( zInit && zInit[0] && fossil_strcmp(zInit, blob_str(&comment))==0 ){
1258 blob_zero(&ans);
1259
+53
--- src/diff.c
+++ src/diff.c
@@ -321,10 +321,31 @@
321321
if( j>UTF16_LENGTH_MASK ){
322322
return 0; /* Very long line -> binary */
323323
}
324324
return result; /* No problems seen -> not binary */
325325
}
326
+
327
+/*
328
+** This function returns an array of bytes representing the byte-order-mark
329
+** for UTF-8.
330
+*/
331
+const unsigned char *get_utf8_bom(){
332
+ static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
333
+ return bom;
334
+}
335
+
336
+/*
337
+** This function returns non-zero if the blob starts with a UTF-8
338
+** byte-order-mark (BOM).
339
+*/
340
+int starts_with_utf8_bom(const Blob *pContent){
341
+ const char *z = blob_buffer(pContent);
342
+ const unsigned char *bom = get_utf8_bom();
343
+
344
+ if( blob_size(pContent)<3 ) return 0;
345
+ return memcmp(z, bom, 3)==0;
346
+}
326347
327348
/*
328349
** This function returns non-zero if the blob starts with a UTF-16le or
329350
** UTF-16be byte-order-mark (BOM).
330351
*/
@@ -339,10 +360,42 @@
339360
}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340361
return 1;
341362
}
342363
return 0;
343364
}
365
+
366
+/*
367
+** This function returns non-zero if the blob starts with a UTF-16le
368
+** byte-order-mark (BOM).
369
+*/
370
+int starts_with_utf16le_bom(const Blob *pContent){
371
+ const char *z = blob_buffer(pContent);
372
+ int c1, c2;
373
+
374
+ if( blob_size(pContent)<2 ) return 0;
375
+ c1 = z[0]; c2 = z[1];
376
+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
377
+ return 1;
378
+ }
379
+ return 0;
380
+}
381
+
382
+/*
383
+** This function returns non-zero if the blob starts with a UTF-16be
384
+** byte-order-mark (BOM).
385
+*/
386
+int starts_with_utf16be_bom(const Blob *pContent){
387
+ const char *z = blob_buffer(pContent);
388
+ int c1, c2;
389
+
390
+ if( blob_size(pContent)<2 ) return 0;
391
+ c1 = z[0]; c2 = z[1];
392
+ if( (c1==(char)0xfe) && (c2==(char)0xff) ){
393
+ return 1;
394
+ }
395
+ return 0;
396
+}
344397
345398
/*
346399
** Return true if two DLine elements are identical.
347400
*/
348401
static int same_dline(DLine *pA, DLine *pB){
349402
--- src/diff.c
+++ src/diff.c
@@ -321,10 +321,31 @@
321 if( j>UTF16_LENGTH_MASK ){
322 return 0; /* Very long line -> binary */
323 }
324 return result; /* No problems seen -> not binary */
325 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
327 /*
328 ** This function returns non-zero if the blob starts with a UTF-16le or
329 ** UTF-16be byte-order-mark (BOM).
330 */
@@ -339,10 +360,42 @@
339 }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
340 return 1;
341 }
342 return 0;
343 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
345 /*
346 ** Return true if two DLine elements are identical.
347 */
348 static int same_dline(DLine *pA, DLine *pB){
349
--- src/diff.c
+++ src/diff.c
@@ -321,10 +321,31 @@
321 if( j>UTF16_LENGTH_MASK ){
322 return 0; /* Very long line -> binary */
323 }
324 return result; /* No problems seen -> not binary */
325 }
326
327 /*
328 ** This function returns an array of bytes representing the byte-order-mark
329 ** for UTF-8.
330 */
331 const unsigned char *get_utf8_bom(){
332 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
333 return bom;
334 }
335
336 /*
337 ** This function returns non-zero if the blob starts with a UTF-8
338 ** byte-order-mark (BOM).
339 */
340 int starts_with_utf8_bom(const Blob *pContent){
341 const char *z = blob_buffer(pContent);
342 const unsigned char *bom = get_utf8_bom();
343
344 if( blob_size(pContent)<3 ) return 0;
345 return memcmp(z, bom, 3)==0;
346 }
347
348 /*
349 ** This function returns non-zero if the blob starts with a UTF-16le or
350 ** UTF-16be byte-order-mark (BOM).
351 */
@@ -339,10 +360,42 @@
360 }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
361 return 1;
362 }
363 return 0;
364 }
365
366 /*
367 ** This function returns non-zero if the blob starts with a UTF-16le
368 ** byte-order-mark (BOM).
369 */
370 int starts_with_utf16le_bom(const Blob *pContent){
371 const char *z = blob_buffer(pContent);
372 int c1, c2;
373
374 if( blob_size(pContent)<2 ) return 0;
375 c1 = z[0]; c2 = z[1];
376 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
377 return 1;
378 }
379 return 0;
380 }
381
382 /*
383 ** This function returns non-zero if the blob starts with a UTF-16be
384 ** byte-order-mark (BOM).
385 */
386 int starts_with_utf16be_bom(const Blob *pContent){
387 const char *z = blob_buffer(pContent);
388 int c1, c2;
389
390 if( blob_size(pContent)<2 ) return 0;
391 c1 = z[0]; c2 = z[1];
392 if( (c1==(char)0xfe) && (c2==(char)0xff) ){
393 return 1;
394 }
395 return 0;
396 }
397
398 /*
399 ** Return true if two DLine elements are identical.
400 */
401 static int same_dline(DLine *pA, DLine *pB){
402
+2 -2
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
16341634
content_get(rid, &content);
16351635
if( renderAsWiki ){
16361636
wiki_convert(&content, 0, 0);
16371637
}else if( renderAsHtml ){
16381638
@ <div>
1639
- blob_strip_bom(&content, 0);
1639
+ blob_to_utf8_no_bom(&content, 0);
16401640
cgi_append_content(blob_buffer(&content), blob_size(&content));
16411641
@ </div>
16421642
}else{
16431643
style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
16441644
zMime = mimetype_from_content(&content);
16451645
@ <blockquote>
16461646
if( zMime==0 ){
16471647
const char *zLn = P("ln");
16481648
const char *z;
1649
- blob_strip_bom(&content, 0);
1649
+ blob_to_utf8_no_bom(&content, 0);
16501650
z = blob_str(&content);
16511651
if( zLn ){
16521652
output_text_with_line_numbers(z, zLn);
16531653
}else{
16541654
@ <pre>
16551655
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
1634 content_get(rid, &content);
1635 if( renderAsWiki ){
1636 wiki_convert(&content, 0, 0);
1637 }else if( renderAsHtml ){
1638 @ <div>
1639 blob_strip_bom(&content, 0);
1640 cgi_append_content(blob_buffer(&content), blob_size(&content));
1641 @ </div>
1642 }else{
1643 style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644 zMime = mimetype_from_content(&content);
1645 @ <blockquote>
1646 if( zMime==0 ){
1647 const char *zLn = P("ln");
1648 const char *z;
1649 blob_strip_bom(&content, 0);
1650 z = blob_str(&content);
1651 if( zLn ){
1652 output_text_with_line_numbers(z, zLn);
1653 }else{
1654 @ <pre>
1655
--- src/info.c
+++ src/info.c
@@ -1634,21 +1634,21 @@
1634 content_get(rid, &content);
1635 if( renderAsWiki ){
1636 wiki_convert(&content, 0, 0);
1637 }else if( renderAsHtml ){
1638 @ <div>
1639 blob_to_utf8_no_bom(&content, 0);
1640 cgi_append_content(blob_buffer(&content), blob_size(&content));
1641 @ </div>
1642 }else{
1643 style_submenu_element("Hex","Hex", "%s/hexdump?name=%s", g.zTop, zUuid);
1644 zMime = mimetype_from_content(&content);
1645 @ <blockquote>
1646 if( zMime==0 ){
1647 const char *zLn = P("ln");
1648 const char *z;
1649 blob_to_utf8_no_bom(&content, 0);
1650 z = blob_str(&content);
1651 if( zLn ){
1652 output_text_with_line_numbers(z, zLn);
1653 }else{
1654 @ <pre>
1655
+1 -1
--- src/main.c
+++ src/main.c
@@ -516,11 +516,11 @@
516516
if(stdin != zInFile){
517517
fclose(zInFile);
518518
}
519519
zInFile = NULL;
520520
}
521
- blob_strip_bom(&file, 1);
521
+ blob_to_utf8_no_bom(&file, 1);
522522
z = blob_str(&file);
523523
for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
524524
newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
525525
for(j=0; j<i; j++) newArgv[j] = g.argv[j];
526526
527527
--- src/main.c
+++ src/main.c
@@ -516,11 +516,11 @@
516 if(stdin != zInFile){
517 fclose(zInFile);
518 }
519 zInFile = NULL;
520 }
521 blob_strip_bom(&file, 1);
522 z = blob_str(&file);
523 for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
524 newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
525 for(j=0; j<i; j++) newArgv[j] = g.argv[j];
526
527
--- src/main.c
+++ src/main.c
@@ -516,11 +516,11 @@
516 if(stdin != zInFile){
517 fclose(zInFile);
518 }
519 zInFile = NULL;
520 }
521 blob_to_utf8_no_bom(&file, 1);
522 z = blob_str(&file);
523 for(k=0, nLine=1; z[k]; k++) if( z[k]=='\n' ) nLine++;
524 newArgv = fossil_malloc( sizeof(char*)*(g.argc + nLine*2) );
525 for(j=0; j<i; j++) newArgv[j] = g.argv[j];
526
527
+1 -1
--- src/stash.c
+++ src/stash.c
@@ -159,11 +159,11 @@
159159
verify_all_options();
160160
if( zComment==0 ){
161161
Blob prompt; /* Prompt for stash comment */
162162
Blob comment; /* User comment reply */
163163
#ifdef _WIN32
164
- static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
164
+ const unsigned char *bom = get_utf8_bom();
165165
blob_init(&prompt, (const char *) bom, 3);
166166
#else
167167
blob_zero(&prompt);
168168
#endif
169169
blob_append(&prompt,
170170
--- src/stash.c
+++ src/stash.c
@@ -159,11 +159,11 @@
159 verify_all_options();
160 if( zComment==0 ){
161 Blob prompt; /* Prompt for stash comment */
162 Blob comment; /* User comment reply */
163 #ifdef _WIN32
164 static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
165 blob_init(&prompt, (const char *) bom, 3);
166 #else
167 blob_zero(&prompt);
168 #endif
169 blob_append(&prompt,
170
--- src/stash.c
+++ src/stash.c
@@ -159,11 +159,11 @@
159 verify_all_options();
160 if( zComment==0 ){
161 Blob prompt; /* Prompt for stash comment */
162 Blob comment; /* User comment reply */
163 #ifdef _WIN32
164 const unsigned char *bom = get_utf8_bom();
165 blob_init(&prompt, (const char *) bom, 3);
166 #else
167 blob_zero(&prompt);
168 #endif
169 blob_append(&prompt,
170
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1576,11 +1576,11 @@
15761576
renderer.pOut = pOut;
15771577
}else{
15781578
renderer.pOut = cgi_output_blob();
15791579
}
15801580
1581
- blob_strip_bom(pIn, 0);
1581
+ blob_to_utf8_no_bom(pIn, 0);
15821582
wiki_render(&renderer, blob_str(pIn));
15831583
endAutoParagraph(&renderer);
15841584
while( renderer.nStack ){
15851585
popStack(&renderer);
15861586
}
@@ -1619,11 +1619,11 @@
16191619
*/
16201620
int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
16211621
char *z;
16221622
int i;
16231623
int iStart;
1624
- blob_strip_bom(pIn, 0);
1624
+ blob_to_utf8_no_bom(pIn, 0);
16251625
z = blob_str(pIn);
16261626
for(i=0; fossil_isspace(z[i]); i++){}
16271627
if( z[i]!='<' ) return 0;
16281628
i++;
16291629
if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
16301630
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1576,11 +1576,11 @@
1576 renderer.pOut = pOut;
1577 }else{
1578 renderer.pOut = cgi_output_blob();
1579 }
1580
1581 blob_strip_bom(pIn, 0);
1582 wiki_render(&renderer, blob_str(pIn));
1583 endAutoParagraph(&renderer);
1584 while( renderer.nStack ){
1585 popStack(&renderer);
1586 }
@@ -1619,11 +1619,11 @@
1619 */
1620 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1621 char *z;
1622 int i;
1623 int iStart;
1624 blob_strip_bom(pIn, 0);
1625 z = blob_str(pIn);
1626 for(i=0; fossil_isspace(z[i]); i++){}
1627 if( z[i]!='<' ) return 0;
1628 i++;
1629 if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1630
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1576,11 +1576,11 @@
1576 renderer.pOut = pOut;
1577 }else{
1578 renderer.pOut = cgi_output_blob();
1579 }
1580
1581 blob_to_utf8_no_bom(pIn, 0);
1582 wiki_render(&renderer, blob_str(pIn));
1583 endAutoParagraph(&renderer);
1584 while( renderer.nStack ){
1585 popStack(&renderer);
1586 }
@@ -1619,11 +1619,11 @@
1619 */
1620 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1621 char *z;
1622 int i;
1623 int iStart;
1624 blob_to_utf8_no_bom(pIn, 0);
1625 z = blob_str(pIn);
1626 for(i=0; fossil_isspace(z[i]); i++){}
1627 if( z[i]!='<' ) return 0;
1628 i++;
1629 if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1630

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button