Fossil SCM

Add optional iso8859-1 to utf-8 conversion. Still to do: special cp1252 characters.

jan.nijtmans 2012-12-12 13:53 improve_commit_warning
Commit 4f060f6afb7d7a1cc268ce3b8b6a1b5180393152
2 files changed +30 +16 -3
+30
--- src/blob.c
+++ src/blob.c
@@ -1027,10 +1027,40 @@
10271027
if( z[i]!='\r' ) z[j++] = z[i];
10281028
}
10291029
z[j] = 0;
10301030
p->nUsed = j;
10311031
}
1032
+
1033
+/*
1034
+** Convert blob from cp1252 to utf-8. As cp1252 is a superset
1035
+** of iso8895-1, this is useful on UNIX as well.
1036
+**
1037
+** TODO: the bytes 0x80..0xBF need a special table, iso8895-1 works.
1038
+*/
1039
+void blob_cp1252_to_utf8(Blob *p){
1040
+ unsigned char *z = (unsigned char *)p->aData;
1041
+ int j = p->nUsed;
1042
+ int i, n;
1043
+ for(i=n=0; i<j; i++){
1044
+ if( z[i]>=0x80 ) n++;
1045
+ }
1046
+ j += n;
1047
+ if( j>=p->nAlloc ){
1048
+ blob_resize(p, j);
1049
+ z = (unsigned char *)p->aData;
1050
+ }
1051
+ p->nUsed = j;
1052
+ z[j] = 0;
1053
+ while( j>i ){
1054
+ if( z[--i]>=0x80 ){
1055
+ z[--j] = 0x80 | (z[i]&0x3F);
1056
+ z[--j] = 0xC0 | (z[i]>>6);
1057
+ }else{
1058
+ z[--j] = z[i];
1059
+ }
1060
+ }
1061
+}
10321062
10331063
/*
10341064
** Shell-escape the given string. Append the result to a blob.
10351065
*/
10361066
void shell_escape(Blob *pBlob, const char *zIn){
10371067
--- src/blob.c
+++ src/blob.c
@@ -1027,10 +1027,40 @@
1027 if( z[i]!='\r' ) z[j++] = z[i];
1028 }
1029 z[j] = 0;
1030 p->nUsed = j;
1031 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1032
1033 /*
1034 ** Shell-escape the given string. Append the result to a blob.
1035 */
1036 void shell_escape(Blob *pBlob, const char *zIn){
1037
--- src/blob.c
+++ src/blob.c
@@ -1027,10 +1027,40 @@
1027 if( z[i]!='\r' ) z[j++] = z[i];
1028 }
1029 z[j] = 0;
1030 p->nUsed = j;
1031 }
1032
1033 /*
1034 ** Convert blob from cp1252 to utf-8. As cp1252 is a superset
1035 ** of iso8895-1, this is useful on UNIX as well.
1036 **
1037 ** TODO: the bytes 0x80..0xBF need a special table, iso8895-1 works.
1038 */
1039 void blob_cp1252_to_utf8(Blob *p){
1040 unsigned char *z = (unsigned char *)p->aData;
1041 int j = p->nUsed;
1042 int i, n;
1043 for(i=n=0; i<j; i++){
1044 if( z[i]>=0x80 ) n++;
1045 }
1046 j += n;
1047 if( j>=p->nAlloc ){
1048 blob_resize(p, j);
1049 z = (unsigned char *)p->aData;
1050 }
1051 p->nUsed = j;
1052 z[j] = 0;
1053 while( j>i ){
1054 if( z[--i]>=0x80 ){
1055 z[--j] = 0x80 | (z[i]&0x3F);
1056 z[--j] = 0xC0 | (z[i]>>6);
1057 }else{
1058 z[--j] = z[i];
1059 }
1060 }
1061 }
1062
1063 /*
1064 ** Shell-escape the given string. Append the result to a blob.
1065 */
1066 void shell_escape(Blob *pBlob, const char *zIn){
1067
+16 -3
--- src/checkin.c
+++ src/checkin.c
@@ -909,27 +909,40 @@
909909
if( allOk ) return 0;
910910
fUnicode = starts_with_utf16_bom(p, 0);
911911
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
912912
if( eType<-2){
913913
const char *zWarning;
914
+ const char *zConvert;
914915
Blob ans;
915916
char cReply;
916917
917918
if(eType==-4){
918919
zWarning = "long lines";
920
+ zConvert = "";
919921
}else{
920922
zWarning = "invalid UTF-8";
923
+ zConvert = "c=convert/";
921924
}
922925
blob_zero(&ans);
923926
file_relative_name(zFilename, &fname, 0);
924927
zMsg = mprintf(
925
- "%s appears to be text, but contains %s. commit anyhow (y/N)? ",
926
- blob_str(&fname), zWarning);
928
+ "%s appears to be text, but contains %s. commit anyhow (%sy/N)? ",
929
+ blob_str(&fname), zWarning, zConvert);
927930
prompt_user(zMsg, &ans);
928931
fossil_free(zMsg);
929932
cReply = blob_str(&ans)[0];
930
- if( cReply!='y' && cReply!='Y' ){
933
+ if( *zConvert && (cReply=='c' || cReply=='C') ){
934
+ char *zOrig = file_newname(zFilename, "original", 1);
935
+ FILE *f;
936
+ blob_write_to_file(p, zOrig);
937
+ fossil_free(zOrig);
938
+ f = fossil_fopen(zFilename, "wb");
939
+ blob_cp1252_to_utf8(p);
940
+ fwrite(blob_buffer(p), 1, blob_size(p), f);
941
+ fclose(f);
942
+ return 1;
943
+ } else if( cReply!='y' && cReply!='Y' ){
931944
fossil_fatal("Abandoning commit due to %s in %s",
932945
zWarning, blob_str(&fname));
933946
}
934947
blob_reset(&ans);
935948
eType +=4 ;
936949
--- src/checkin.c
+++ src/checkin.c
@@ -909,27 +909,40 @@
909 if( allOk ) return 0;
910 fUnicode = starts_with_utf16_bom(p, 0);
911 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
912 if( eType<-2){
913 const char *zWarning;
 
914 Blob ans;
915 char cReply;
916
917 if(eType==-4){
918 zWarning = "long lines";
 
919 }else{
920 zWarning = "invalid UTF-8";
 
921 }
922 blob_zero(&ans);
923 file_relative_name(zFilename, &fname, 0);
924 zMsg = mprintf(
925 "%s appears to be text, but contains %s. commit anyhow (y/N)? ",
926 blob_str(&fname), zWarning);
927 prompt_user(zMsg, &ans);
928 fossil_free(zMsg);
929 cReply = blob_str(&ans)[0];
930 if( cReply!='y' && cReply!='Y' ){
 
 
 
 
 
 
 
 
 
 
931 fossil_fatal("Abandoning commit due to %s in %s",
932 zWarning, blob_str(&fname));
933 }
934 blob_reset(&ans);
935 eType +=4 ;
936
--- src/checkin.c
+++ src/checkin.c
@@ -909,27 +909,40 @@
909 if( allOk ) return 0;
910 fUnicode = starts_with_utf16_bom(p, 0);
911 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
912 if( eType<-2){
913 const char *zWarning;
914 const char *zConvert;
915 Blob ans;
916 char cReply;
917
918 if(eType==-4){
919 zWarning = "long lines";
920 zConvert = "";
921 }else{
922 zWarning = "invalid UTF-8";
923 zConvert = "c=convert/";
924 }
925 blob_zero(&ans);
926 file_relative_name(zFilename, &fname, 0);
927 zMsg = mprintf(
928 "%s appears to be text, but contains %s. commit anyhow (%sy/N)? ",
929 blob_str(&fname), zWarning, zConvert);
930 prompt_user(zMsg, &ans);
931 fossil_free(zMsg);
932 cReply = blob_str(&ans)[0];
933 if( *zConvert && (cReply=='c' || cReply=='C') ){
934 char *zOrig = file_newname(zFilename, "original", 1);
935 FILE *f;
936 blob_write_to_file(p, zOrig);
937 fossil_free(zOrig);
938 f = fossil_fopen(zFilename, "wb");
939 blob_cp1252_to_utf8(p);
940 fwrite(blob_buffer(p), 1, blob_size(p), f);
941 fclose(f);
942 return 1;
943 } else if( cReply!='y' && cReply!='Y' ){
944 fossil_fatal("Abandoning commit due to %s in %s",
945 zWarning, blob_str(&fname));
946 }
947 blob_reset(&ans);
948 eType +=4 ;
949

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button