Fossil SCM

pulled in a minor (doc) cleanup for the latin1 workaround.

stephan 2013-09-06 18:09 trunk

Commit 339f9f324fca2cf0509ca979c5328ce1e3f1fd6e

Parent aa6c2b1eb7ab60c…

1 file changed +12 -4

M src/cson_amalgamation.c

+12 -4

		--- src/cson_amalgamation.c
		+++ src/cson_amalgamation.c
		@@ -3801,16 +3801,24 @@
3801	3801	f460839cff85d4e4f1360b366bb2858cef1411ea,
3802	3802	which has what appears to be latin1-encoded
3803	3803	text. file(1) thinks it's a FORTRAN program.
3804	3804	*/
3805	3805	if((*pos != ch) && (0xfffd==ch)){
3806		- ch = *pos;
	3806	+ ch = *pos
	3807	+ /* We should arguably translate to '?', and
	3808	+ will if this problem ever comes up with a
	3809	+ non-latin1 encoding. For latin1 this
	3810	+ workaround incidentally corrects the output
	3811	+ to proper UTF8-escaped characters, and only
	3812	+ for that reason is it being kept around.
	3813	+ */;
3807	3814	/* MARKER("ch=%04x, pos=%04x\n", ch, pos); */
3808		- goto two_bytes;
	3815	+ goto assume_latin1;
3809	3816	}
3810		-#endif
	3817	+#else
3811	3818	assert( *pos == ch );
	3819	+#endif
3812	3820	escChar[1] = 0;
3813	3821	switch(ch)
3814	3822	{
3815	3823	case '\t': escChar[1] = 't'; break;
3816	3824	case '\r': escChar[1] = 'r'; break;
		@@ -3862,11 +3870,11 @@
3862	3870	continue;
3863	3871	}
3864	3872	else
3865	3873	{ /* UTF: transform it to \uXXXX */
3866	3874	#if defined(CSON_FOSSIL_MODE)
3867		- two_bytes:
	3875	+ assume_latin1:
3868	3876	#endif
3869	3877	memset(ubuf,0,UBLen);
3870	3878	rc = sprintf(ubuf, "\\u%04x",ch);
3871	3879	if( rc != 6 )
3872	3880	{
3873	3881

	--- src/cson_amalgamation.c
	+++ src/cson_amalgamation.c
	@@ -3801,16 +3801,24 @@
3801	f460839cff85d4e4f1360b366bb2858cef1411ea,
3802	which has what appears to be latin1-encoded
3803	text. file(1) thinks it's a FORTRAN program.
3804	*/
3805	if((*pos != ch) && (0xfffd==ch)){
3806	ch = *pos;







3807	/* MARKER("ch=%04x, pos=%04x\n", ch, pos); */
3808	goto two_bytes;
3809	}
3810	#endif
3811	assert( *pos == ch );

3812	escChar[1] = 0;
3813	switch(ch)
3814	{
3815	case '\t': escChar[1] = 't'; break;
3816	case '\r': escChar[1] = 'r'; break;
	@@ -3862,11 +3870,11 @@
3862	continue;
3863	}
3864	else
3865	{ /* UTF: transform it to \uXXXX */
3866	#if defined(CSON_FOSSIL_MODE)
3867	two_bytes:
3868	#endif
3869	memset(ubuf,0,UBLen);
3870	rc = sprintf(ubuf, "\\u%04x",ch);
3871	if( rc != 6 )
3872	{
3873

	--- src/cson_amalgamation.c
	+++ src/cson_amalgamation.c
	@@ -3801,16 +3801,24 @@
3801	f460839cff85d4e4f1360b366bb2858cef1411ea,
3802	which has what appears to be latin1-encoded
3803	text. file(1) thinks it's a FORTRAN program.
3804	*/
3805	if((*pos != ch) && (0xfffd==ch)){
3806	ch = *pos
3807	/* We should arguably translate to '?', and
3808	will if this problem ever comes up with a
3809	non-latin1 encoding. For latin1 this
3810	workaround incidentally corrects the output
3811	to proper UTF8-escaped characters, and only
3812	for that reason is it being kept around.
3813	*/;
3814	/* MARKER("ch=%04x, pos=%04x\n", ch, pos); */
3815	goto assume_latin1;
3816	}
3817	#else
3818	assert( *pos == ch );
3819	#endif
3820	escChar[1] = 0;
3821	switch(ch)
3822	{
3823	case '\t': escChar[1] = 't'; break;
3824	case '\r': escChar[1] = 'r'; break;
	@@ -3862,11 +3870,11 @@
3870	continue;
3871	}
3872	else
3873	{ /* UTF: transform it to \uXXXX */
3874	#if defined(CSON_FOSSIL_MODE)
3875	assume_latin1:
3876	#endif
3877	memset(ubuf,0,UBLen);
3878	rc = sprintf(ubuf, "\\u%04x",ch);
3879	if( rc != 6 )
3880	{
3881