Fossil SCM

Add optional iso8859-1 to utf-8 conversion. Still to do: special cp1252 characters.

jan.nijtmans 2012-12-12 13:53 improve_commit_warning

Commit 4f060f6afb7d7a1cc268ce3b8b6a1b5180393152

Parent b70a320288c6f16…

2 files changed +30 +16 -3

M src/blob.c

+30

		--- src/blob.c
		+++ src/blob.c
		@@ -1027,10 +1027,40 @@
1027	1027	if( z[i]!='\r' ) z[j++] = z[i];
1028	1028	}
1029	1029	z[j] = 0;
1030	1030	p->nUsed = j;
1031	1031	}
	1032	+
	1033	+/*
	1034	+** Convert blob from cp1252 to utf-8. As cp1252 is a superset
	1035	+** of iso8895-1, this is useful on UNIX as well.
	1036	+**
	1037	+** TODO: the bytes 0x80..0xBF need a special table, iso8895-1 works.
	1038	+*/
	1039	+void blob_cp1252_to_utf8(Blob *p){
	1040	+ unsigned char z = (unsigned char )p->aData;
	1041	+ int j = p->nUsed;
	1042	+ int i, n;
	1043	+ for(i=n=0; i<j; i++){
	1044	+ if( z[i]>=0x80 ) n++;
	1045	+ }
	1046	+ j += n;
	1047	+ if( j>=p->nAlloc ){
	1048	+ blob_resize(p, j);
	1049	+ z = (unsigned char *)p->aData;
	1050	+ }
	1051	+ p->nUsed = j;
	1052	+ z[j] = 0;
	1053	+ while( j>i ){
	1054	+ if( z[--i]>=0x80 ){
	1055	+ z[--j] = 0x80 \| (z[i]&0x3F);
	1056	+ z[--j] = 0xC0 \| (z[i]>>6);
	1057	+ }else{
	1058	+ z[--j] = z[i];
	1059	+ }
	1060	+ }
	1061	+}
1032	1062
1033	1063	/*
1034	1064	** Shell-escape the given string. Append the result to a blob.
1035	1065	*/
1036	1066	void shell_escape(Blob pBlob, const char zIn){
1037	1067

	--- src/blob.c
	+++ src/blob.c
	@@ -1027,10 +1027,40 @@
1027	if( z[i]!='\r' ) z[j++] = z[i];
1028	}
1029	z[j] = 0;
1030	p->nUsed = j;
1031	}






























1032
1033	/*
1034	** Shell-escape the given string. Append the result to a blob.
1035	*/
1036	void shell_escape(Blob pBlob, const char zIn){
1037

	--- src/blob.c
	+++ src/blob.c
	@@ -1027,10 +1027,40 @@
1027	if( z[i]!='\r' ) z[j++] = z[i];
1028	}
1029	z[j] = 0;
1030	p->nUsed = j;
1031	}
1032
1033	/*
1034	** Convert blob from cp1252 to utf-8. As cp1252 is a superset
1035	** of iso8895-1, this is useful on UNIX as well.
1036	**
1037	** TODO: the bytes 0x80..0xBF need a special table, iso8895-1 works.
1038	*/
1039	void blob_cp1252_to_utf8(Blob *p){
1040	unsigned char z = (unsigned char )p->aData;
1041	int j = p->nUsed;
1042	int i, n;
1043	for(i=n=0; i<j; i++){
1044	if( z[i]>=0x80 ) n++;
1045	}
1046	j += n;
1047	if( j>=p->nAlloc ){
1048	blob_resize(p, j);
1049	z = (unsigned char *)p->aData;
1050	}
1051	p->nUsed = j;
1052	z[j] = 0;
1053	while( j>i ){
1054	if( z[--i]>=0x80 ){
1055	z[--j] = 0x80 \| (z[i]&0x3F);
1056	z[--j] = 0xC0 \| (z[i]>>6);
1057	}else{
1058	z[--j] = z[i];
1059	}
1060	}
1061	}
1062
1063	/*
1064	** Shell-escape the given string. Append the result to a blob.
1065	*/
1066	void shell_escape(Blob pBlob, const char zIn){
1067

M src/checkin.c

+16 -3

		--- src/checkin.c
		+++ src/checkin.c
		@@ -909,27 +909,40 @@
909	909	if( allOk ) return 0;
910	910	fUnicode = starts_with_utf16_bom(p, 0);
911	911	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
912	912	if( eType<-2){
913	913	const char *zWarning;
	914	+ const char *zConvert;
914	915	Blob ans;
915	916	char cReply;
916	917
917	918	if(eType==-4){
918	919	zWarning = "long lines";
	920	+ zConvert = "";
919	921	}else{
920	922	zWarning = "invalid UTF-8";
	923	+ zConvert = "c=convert/";
921	924	}
922	925	blob_zero(&ans);
923	926	file_relative_name(zFilename, &fname, 0);
924	927	zMsg = mprintf(
925		- "%s appears to be text, but contains %s. commit anyhow (y/N)? ",
926		- blob_str(&fname), zWarning);
	928	+ "%s appears to be text, but contains %s. commit anyhow (%sy/N)? ",
	929	+ blob_str(&fname), zWarning, zConvert);
927	930	prompt_user(zMsg, &ans);
928	931	fossil_free(zMsg);
929	932	cReply = blob_str(&ans)[0];
930		- if( cReply!='y' && cReply!='Y' ){
	933	+ if( *zConvert && (cReply=='c' \|\| cReply=='C') ){
	934	+ char *zOrig = file_newname(zFilename, "original", 1);
	935	+ FILE *f;
	936	+ blob_write_to_file(p, zOrig);
	937	+ fossil_free(zOrig);
	938	+ f = fossil_fopen(zFilename, "wb");
	939	+ blob_cp1252_to_utf8(p);
	940	+ fwrite(blob_buffer(p), 1, blob_size(p), f);
	941	+ fclose(f);
	942	+ return 1;
	943	+ } else if( cReply!='y' && cReply!='Y' ){
931	944	fossil_fatal("Abandoning commit due to %s in %s",
932	945	zWarning, blob_str(&fname));
933	946	}
934	947	blob_reset(&ans);
935	948	eType +=4 ;
936	949

	--- src/checkin.c
	+++ src/checkin.c
	@@ -909,27 +909,40 @@
909	if( allOk ) return 0;
910	fUnicode = starts_with_utf16_bom(p, 0);
911	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
912	if( eType<-2){
913	const char *zWarning;

914	Blob ans;
915	char cReply;
916
917	if(eType==-4){
918	zWarning = "long lines";

919	}else{
920	zWarning = "invalid UTF-8";

921	}
922	blob_zero(&ans);
923	file_relative_name(zFilename, &fname, 0);
924	zMsg = mprintf(
925	"%s appears to be text, but contains %s. commit anyhow (y/N)? ",
926	blob_str(&fname), zWarning);
927	prompt_user(zMsg, &ans);
928	fossil_free(zMsg);
929	cReply = blob_str(&ans)[0];
930	if( cReply!='y' && cReply!='Y' ){










931	fossil_fatal("Abandoning commit due to %s in %s",
932	zWarning, blob_str(&fname));
933	}
934	blob_reset(&ans);
935	eType +=4 ;
936

	--- src/checkin.c
	+++ src/checkin.c
	@@ -909,27 +909,40 @@
909	if( allOk ) return 0;
910	fUnicode = starts_with_utf16_bom(p, 0);
911	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
912	if( eType<-2){
913	const char *zWarning;
914	const char *zConvert;
915	Blob ans;
916	char cReply;
917
918	if(eType==-4){
919	zWarning = "long lines";
920	zConvert = "";
921	}else{
922	zWarning = "invalid UTF-8";
923	zConvert = "c=convert/";
924	}
925	blob_zero(&ans);
926	file_relative_name(zFilename, &fname, 0);
927	zMsg = mprintf(
928	"%s appears to be text, but contains %s. commit anyhow (%sy/N)? ",
929	blob_str(&fname), zWarning, zConvert);
930	prompt_user(zMsg, &ans);
931	fossil_free(zMsg);
932	cReply = blob_str(&ans)[0];
933	if( *zConvert && (cReply=='c' \|\| cReply=='C') ){
934	char *zOrig = file_newname(zFilename, "original", 1);
935	FILE *f;
936	blob_write_to_file(p, zOrig);
937	fossil_free(zOrig);
938	f = fossil_fopen(zFilename, "wb");
939	blob_cp1252_to_utf8(p);
940	fwrite(blob_buffer(p), 1, blob_size(p), f);
941	fclose(f);
942	return 1;
943	} else if( cReply!='y' && cReply!='Y' ){
944	fossil_fatal("Abandoning commit due to %s in %s",
945	zWarning, blob_str(&fname));
946	}
947	blob_reset(&ans);
948	eType +=4 ;
949