Fossil SCM

pulled in a minor (doc) cleanup for the latin1 workaround.

stephan 2013-09-06 18:09 trunk
Commit 339f9f324fca2cf0509ca979c5328ce1e3f1fd6e
1 file changed +12 -4
--- src/cson_amalgamation.c
+++ src/cson_amalgamation.c
@@ -3801,16 +3801,24 @@
38013801
f460839cff85d4e4f1360b366bb2858cef1411ea,
38023802
which has what appears to be latin1-encoded
38033803
text. file(1) thinks it's a FORTRAN program.
38043804
*/
38053805
if((*pos != ch) && (0xfffd==ch)){
3806
- ch = *pos;
3806
+ ch = *pos
3807
+ /* We should arguably translate to '?', and
3808
+ will if this problem ever comes up with a
3809
+ non-latin1 encoding. For latin1 this
3810
+ workaround incidentally corrects the output
3811
+ to proper UTF8-escaped characters, and only
3812
+ for that reason is it being kept around.
3813
+ */;
38073814
/* MARKER("ch=%04x, *pos=%04x\n", ch, *pos); */
3808
- goto two_bytes;
3815
+ goto assume_latin1;
38093816
}
3810
-#endif
3817
+#else
38113818
assert( *pos == ch );
3819
+#endif
38123820
escChar[1] = 0;
38133821
switch(ch)
38143822
{
38153823
case '\t': escChar[1] = 't'; break;
38163824
case '\r': escChar[1] = 'r'; break;
@@ -3862,11 +3870,11 @@
38623870
continue;
38633871
}
38643872
else
38653873
{ /* UTF: transform it to \uXXXX */
38663874
#if defined(CSON_FOSSIL_MODE)
3867
- two_bytes:
3875
+ assume_latin1:
38683876
#endif
38693877
memset(ubuf,0,UBLen);
38703878
rc = sprintf(ubuf, "\\u%04x",ch);
38713879
if( rc != 6 )
38723880
{
38733881
--- src/cson_amalgamation.c
+++ src/cson_amalgamation.c
@@ -3801,16 +3801,24 @@
3801 f460839cff85d4e4f1360b366bb2858cef1411ea,
3802 which has what appears to be latin1-encoded
3803 text. file(1) thinks it's a FORTRAN program.
3804 */
3805 if((*pos != ch) && (0xfffd==ch)){
3806 ch = *pos;
 
 
 
 
 
 
 
3807 /* MARKER("ch=%04x, *pos=%04x\n", ch, *pos); */
3808 goto two_bytes;
3809 }
3810 #endif
3811 assert( *pos == ch );
 
3812 escChar[1] = 0;
3813 switch(ch)
3814 {
3815 case '\t': escChar[1] = 't'; break;
3816 case '\r': escChar[1] = 'r'; break;
@@ -3862,11 +3870,11 @@
3862 continue;
3863 }
3864 else
3865 { /* UTF: transform it to \uXXXX */
3866 #if defined(CSON_FOSSIL_MODE)
3867 two_bytes:
3868 #endif
3869 memset(ubuf,0,UBLen);
3870 rc = sprintf(ubuf, "\\u%04x",ch);
3871 if( rc != 6 )
3872 {
3873
--- src/cson_amalgamation.c
+++ src/cson_amalgamation.c
@@ -3801,16 +3801,24 @@
3801 f460839cff85d4e4f1360b366bb2858cef1411ea,
3802 which has what appears to be latin1-encoded
3803 text. file(1) thinks it's a FORTRAN program.
3804 */
3805 if((*pos != ch) && (0xfffd==ch)){
3806 ch = *pos
3807 /* We should arguably translate to '?', and
3808 will if this problem ever comes up with a
3809 non-latin1 encoding. For latin1 this
3810 workaround incidentally corrects the output
3811 to proper UTF8-escaped characters, and only
3812 for that reason is it being kept around.
3813 */;
3814 /* MARKER("ch=%04x, *pos=%04x\n", ch, *pos); */
3815 goto assume_latin1;
3816 }
3817 #else
3818 assert( *pos == ch );
3819 #endif
3820 escChar[1] = 0;
3821 switch(ch)
3822 {
3823 case '\t': escChar[1] = 't'; break;
3824 case '\r': escChar[1] = 'r'; break;
@@ -3862,11 +3870,11 @@
3870 continue;
3871 }
3872 else
3873 { /* UTF: transform it to \uXXXX */
3874 #if defined(CSON_FOSSIL_MODE)
3875 assume_latin1:
3876 #endif
3877 memset(ubuf,0,UBLen);
3878 rc = sprintf(ubuf, "\\u%04x",ch);
3879 if( rc != 6 )
3880 {
3881

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button