Fossil SCM
Add optional iso8859-1 to utf-8 conversion. Still to do: special cp1252 characters.
Commit
4f060f6afb7d7a1cc268ce3b8b6a1b5180393152
Parent
b70a320288c6f16…
2 files changed
+30
+16
-3
+30
| --- src/blob.c | ||
| +++ src/blob.c | ||
| @@ -1027,10 +1027,40 @@ | ||
| 1027 | 1027 | if( z[i]!='\r' ) z[j++] = z[i]; |
| 1028 | 1028 | } |
| 1029 | 1029 | z[j] = 0; |
| 1030 | 1030 | p->nUsed = j; |
| 1031 | 1031 | } |
| 1032 | + | |
| 1033 | +/* | |
| 1034 | +** Convert blob from cp1252 to utf-8. As cp1252 is a superset | |
| 1035 | +** of iso8895-1, this is useful on UNIX as well. | |
| 1036 | +** | |
| 1037 | +** TODO: the bytes 0x80..0xBF need a special table, iso8895-1 works. | |
| 1038 | +*/ | |
| 1039 | +void blob_cp1252_to_utf8(Blob *p){ | |
| 1040 | + unsigned char *z = (unsigned char *)p->aData; | |
| 1041 | + int j = p->nUsed; | |
| 1042 | + int i, n; | |
| 1043 | + for(i=n=0; i<j; i++){ | |
| 1044 | + if( z[i]>=0x80 ) n++; | |
| 1045 | + } | |
| 1046 | + j += n; | |
| 1047 | + if( j>=p->nAlloc ){ | |
| 1048 | + blob_resize(p, j); | |
| 1049 | + z = (unsigned char *)p->aData; | |
| 1050 | + } | |
| 1051 | + p->nUsed = j; | |
| 1052 | + z[j] = 0; | |
| 1053 | + while( j>i ){ | |
| 1054 | + if( z[--i]>=0x80 ){ | |
| 1055 | + z[--j] = 0x80 | (z[i]&0x3F); | |
| 1056 | + z[--j] = 0xC0 | (z[i]>>6); | |
| 1057 | + }else{ | |
| 1058 | + z[--j] = z[i]; | |
| 1059 | + } | |
| 1060 | + } | |
| 1061 | +} | |
| 1032 | 1062 | |
| 1033 | 1063 | /* |
| 1034 | 1064 | ** Shell-escape the given string. Append the result to a blob. |
| 1035 | 1065 | */ |
| 1036 | 1066 | void shell_escape(Blob *pBlob, const char *zIn){ |
| 1037 | 1067 |
| --- src/blob.c | |
| +++ src/blob.c | |
| @@ -1027,10 +1027,40 @@ | |
| 1027 | if( z[i]!='\r' ) z[j++] = z[i]; |
| 1028 | } |
| 1029 | z[j] = 0; |
| 1030 | p->nUsed = j; |
| 1031 | } |
| 1032 | |
| 1033 | /* |
| 1034 | ** Shell-escape the given string. Append the result to a blob. |
| 1035 | */ |
| 1036 | void shell_escape(Blob *pBlob, const char *zIn){ |
| 1037 |
| --- src/blob.c | |
| +++ src/blob.c | |
| @@ -1027,10 +1027,40 @@ | |
| 1027 | if( z[i]!='\r' ) z[j++] = z[i]; |
| 1028 | } |
| 1029 | z[j] = 0; |
| 1030 | p->nUsed = j; |
| 1031 | } |
| 1032 | |
| 1033 | /* |
| 1034 | ** Convert blob from cp1252 to utf-8. As cp1252 is a superset |
| 1035 | ** of iso8895-1, this is useful on UNIX as well. |
| 1036 | ** |
| 1037 | ** TODO: the bytes 0x80..0xBF need a special table, iso8895-1 works. |
| 1038 | */ |
| 1039 | void blob_cp1252_to_utf8(Blob *p){ |
| 1040 | unsigned char *z = (unsigned char *)p->aData; |
| 1041 | int j = p->nUsed; |
| 1042 | int i, n; |
| 1043 | for(i=n=0; i<j; i++){ |
| 1044 | if( z[i]>=0x80 ) n++; |
| 1045 | } |
| 1046 | j += n; |
| 1047 | if( j>=p->nAlloc ){ |
| 1048 | blob_resize(p, j); |
| 1049 | z = (unsigned char *)p->aData; |
| 1050 | } |
| 1051 | p->nUsed = j; |
| 1052 | z[j] = 0; |
| 1053 | while( j>i ){ |
| 1054 | if( z[--i]>=0x80 ){ |
| 1055 | z[--j] = 0x80 | (z[i]&0x3F); |
| 1056 | z[--j] = 0xC0 | (z[i]>>6); |
| 1057 | }else{ |
| 1058 | z[--j] = z[i]; |
| 1059 | } |
| 1060 | } |
| 1061 | } |
| 1062 | |
| 1063 | /* |
| 1064 | ** Shell-escape the given string. Append the result to a blob. |
| 1065 | */ |
| 1066 | void shell_escape(Blob *pBlob, const char *zIn){ |
| 1067 |
+16
-3
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -909,27 +909,40 @@ | ||
| 909 | 909 | if( allOk ) return 0; |
| 910 | 910 | fUnicode = starts_with_utf16_bom(p, 0); |
| 911 | 911 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 912 | 912 | if( eType<-2){ |
| 913 | 913 | const char *zWarning; |
| 914 | + const char *zConvert; | |
| 914 | 915 | Blob ans; |
| 915 | 916 | char cReply; |
| 916 | 917 | |
| 917 | 918 | if(eType==-4){ |
| 918 | 919 | zWarning = "long lines"; |
| 920 | + zConvert = ""; | |
| 919 | 921 | }else{ |
| 920 | 922 | zWarning = "invalid UTF-8"; |
| 923 | + zConvert = "c=convert/"; | |
| 921 | 924 | } |
| 922 | 925 | blob_zero(&ans); |
| 923 | 926 | file_relative_name(zFilename, &fname, 0); |
| 924 | 927 | zMsg = mprintf( |
| 925 | - "%s appears to be text, but contains %s. commit anyhow (y/N)? ", | |
| 926 | - blob_str(&fname), zWarning); | |
| 928 | + "%s appears to be text, but contains %s. commit anyhow (%sy/N)? ", | |
| 929 | + blob_str(&fname), zWarning, zConvert); | |
| 927 | 930 | prompt_user(zMsg, &ans); |
| 928 | 931 | fossil_free(zMsg); |
| 929 | 932 | cReply = blob_str(&ans)[0]; |
| 930 | - if( cReply!='y' && cReply!='Y' ){ | |
| 933 | + if( *zConvert && (cReply=='c' || cReply=='C') ){ | |
| 934 | + char *zOrig = file_newname(zFilename, "original", 1); | |
| 935 | + FILE *f; | |
| 936 | + blob_write_to_file(p, zOrig); | |
| 937 | + fossil_free(zOrig); | |
| 938 | + f = fossil_fopen(zFilename, "wb"); | |
| 939 | + blob_cp1252_to_utf8(p); | |
| 940 | + fwrite(blob_buffer(p), 1, blob_size(p), f); | |
| 941 | + fclose(f); | |
| 942 | + return 1; | |
| 943 | + } else if( cReply!='y' && cReply!='Y' ){ | |
| 931 | 944 | fossil_fatal("Abandoning commit due to %s in %s", |
| 932 | 945 | zWarning, blob_str(&fname)); |
| 933 | 946 | } |
| 934 | 947 | blob_reset(&ans); |
| 935 | 948 | eType +=4 ; |
| 936 | 949 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -909,27 +909,40 @@ | |
| 909 | if( allOk ) return 0; |
| 910 | fUnicode = starts_with_utf16_bom(p, 0); |
| 911 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 912 | if( eType<-2){ |
| 913 | const char *zWarning; |
| 914 | Blob ans; |
| 915 | char cReply; |
| 916 | |
| 917 | if(eType==-4){ |
| 918 | zWarning = "long lines"; |
| 919 | }else{ |
| 920 | zWarning = "invalid UTF-8"; |
| 921 | } |
| 922 | blob_zero(&ans); |
| 923 | file_relative_name(zFilename, &fname, 0); |
| 924 | zMsg = mprintf( |
| 925 | "%s appears to be text, but contains %s. commit anyhow (y/N)? ", |
| 926 | blob_str(&fname), zWarning); |
| 927 | prompt_user(zMsg, &ans); |
| 928 | fossil_free(zMsg); |
| 929 | cReply = blob_str(&ans)[0]; |
| 930 | if( cReply!='y' && cReply!='Y' ){ |
| 931 | fossil_fatal("Abandoning commit due to %s in %s", |
| 932 | zWarning, blob_str(&fname)); |
| 933 | } |
| 934 | blob_reset(&ans); |
| 935 | eType +=4 ; |
| 936 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -909,27 +909,40 @@ | |
| 909 | if( allOk ) return 0; |
| 910 | fUnicode = starts_with_utf16_bom(p, 0); |
| 911 | eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); |
| 912 | if( eType<-2){ |
| 913 | const char *zWarning; |
| 914 | const char *zConvert; |
| 915 | Blob ans; |
| 916 | char cReply; |
| 917 | |
| 918 | if(eType==-4){ |
| 919 | zWarning = "long lines"; |
| 920 | zConvert = ""; |
| 921 | }else{ |
| 922 | zWarning = "invalid UTF-8"; |
| 923 | zConvert = "c=convert/"; |
| 924 | } |
| 925 | blob_zero(&ans); |
| 926 | file_relative_name(zFilename, &fname, 0); |
| 927 | zMsg = mprintf( |
| 928 | "%s appears to be text, but contains %s. commit anyhow (%sy/N)? ", |
| 929 | blob_str(&fname), zWarning, zConvert); |
| 930 | prompt_user(zMsg, &ans); |
| 931 | fossil_free(zMsg); |
| 932 | cReply = blob_str(&ans)[0]; |
| 933 | if( *zConvert && (cReply=='c' || cReply=='C') ){ |
| 934 | char *zOrig = file_newname(zFilename, "original", 1); |
| 935 | FILE *f; |
| 936 | blob_write_to_file(p, zOrig); |
| 937 | fossil_free(zOrig); |
| 938 | f = fossil_fopen(zFilename, "wb"); |
| 939 | blob_cp1252_to_utf8(p); |
| 940 | fwrite(blob_buffer(p), 1, blob_size(p), f); |
| 941 | fclose(f); |
| 942 | return 1; |
| 943 | } else if( cReply!='y' && cReply!='Y' ){ |
| 944 | fossil_fatal("Abandoning commit due to %s in %s", |
| 945 | zWarning, blob_str(&fname)); |
| 946 | } |
| 947 | blob_reset(&ans); |
| 948 | eType +=4 ; |
| 949 |