Fossil SCM
pulled in a minor (doc) cleanup for the latin1 workaround.
Commit
339f9f324fca2cf0509ca979c5328ce1e3f1fd6e
Parent
aa6c2b1eb7ab60c…
1 file changed
+12
-4
+12
-4
| --- src/cson_amalgamation.c | ||
| +++ src/cson_amalgamation.c | ||
| @@ -3801,16 +3801,24 @@ | ||
| 3801 | 3801 | f460839cff85d4e4f1360b366bb2858cef1411ea, |
| 3802 | 3802 | which has what appears to be latin1-encoded |
| 3803 | 3803 | text. file(1) thinks it's a FORTRAN program. |
| 3804 | 3804 | */ |
| 3805 | 3805 | if((*pos != ch) && (0xfffd==ch)){ |
| 3806 | - ch = *pos; | |
| 3806 | + ch = *pos | |
| 3807 | + /* We should arguably translate to '?', and | |
| 3808 | + will if this problem ever comes up with a | |
| 3809 | + non-latin1 encoding. For latin1 this | |
| 3810 | + workaround incidentally corrects the output | |
| 3811 | + to proper UTF8-escaped characters, and only | |
| 3812 | + for that reason is it being kept around. | |
| 3813 | + */; | |
| 3807 | 3814 | /* MARKER("ch=%04x, *pos=%04x\n", ch, *pos); */ |
| 3808 | - goto two_bytes; | |
| 3815 | + goto assume_latin1; | |
| 3809 | 3816 | } |
| 3810 | -#endif | |
| 3817 | +#else | |
| 3811 | 3818 | assert( *pos == ch ); |
| 3819 | +#endif | |
| 3812 | 3820 | escChar[1] = 0; |
| 3813 | 3821 | switch(ch) |
| 3814 | 3822 | { |
| 3815 | 3823 | case '\t': escChar[1] = 't'; break; |
| 3816 | 3824 | case '\r': escChar[1] = 'r'; break; |
| @@ -3862,11 +3870,11 @@ | ||
| 3862 | 3870 | continue; |
| 3863 | 3871 | } |
| 3864 | 3872 | else |
| 3865 | 3873 | { /* UTF: transform it to \uXXXX */ |
| 3866 | 3874 | #if defined(CSON_FOSSIL_MODE) |
| 3867 | - two_bytes: | |
| 3875 | + assume_latin1: | |
| 3868 | 3876 | #endif |
| 3869 | 3877 | memset(ubuf,0,UBLen); |
| 3870 | 3878 | rc = sprintf(ubuf, "\\u%04x",ch); |
| 3871 | 3879 | if( rc != 6 ) |
| 3872 | 3880 | { |
| 3873 | 3881 |
| --- src/cson_amalgamation.c | |
| +++ src/cson_amalgamation.c | |
| @@ -3801,16 +3801,24 @@ | |
| 3801 | f460839cff85d4e4f1360b366bb2858cef1411ea, |
| 3802 | which has what appears to be latin1-encoded |
| 3803 | text. file(1) thinks it's a FORTRAN program. |
| 3804 | */ |
| 3805 | if((*pos != ch) && (0xfffd==ch)){ |
| 3806 | ch = *pos; |
| 3807 | /* MARKER("ch=%04x, *pos=%04x\n", ch, *pos); */ |
| 3808 | goto two_bytes; |
| 3809 | } |
| 3810 | #endif |
| 3811 | assert( *pos == ch ); |
| 3812 | escChar[1] = 0; |
| 3813 | switch(ch) |
| 3814 | { |
| 3815 | case '\t': escChar[1] = 't'; break; |
| 3816 | case '\r': escChar[1] = 'r'; break; |
| @@ -3862,11 +3870,11 @@ | |
| 3862 | continue; |
| 3863 | } |
| 3864 | else |
| 3865 | { /* UTF: transform it to \uXXXX */ |
| 3866 | #if defined(CSON_FOSSIL_MODE) |
| 3867 | two_bytes: |
| 3868 | #endif |
| 3869 | memset(ubuf,0,UBLen); |
| 3870 | rc = sprintf(ubuf, "\\u%04x",ch); |
| 3871 | if( rc != 6 ) |
| 3872 | { |
| 3873 |
| --- src/cson_amalgamation.c | |
| +++ src/cson_amalgamation.c | |
| @@ -3801,16 +3801,24 @@ | |
| 3801 | f460839cff85d4e4f1360b366bb2858cef1411ea, |
| 3802 | which has what appears to be latin1-encoded |
| 3803 | text. file(1) thinks it's a FORTRAN program. |
| 3804 | */ |
| 3805 | if((*pos != ch) && (0xfffd==ch)){ |
| 3806 | ch = *pos |
| 3807 | /* We should arguably translate to '?', and |
| 3808 | will if this problem ever comes up with a |
| 3809 | non-latin1 encoding. For latin1 this |
| 3810 | workaround incidentally corrects the output |
| 3811 | to proper UTF8-escaped characters, and only |
| 3812 | for that reason is it being kept around. |
| 3813 | */; |
| 3814 | /* MARKER("ch=%04x, *pos=%04x\n", ch, *pos); */ |
| 3815 | goto assume_latin1; |
| 3816 | } |
| 3817 | #else |
| 3818 | assert( *pos == ch ); |
| 3819 | #endif |
| 3820 | escChar[1] = 0; |
| 3821 | switch(ch) |
| 3822 | { |
| 3823 | case '\t': escChar[1] = 't'; break; |
| 3824 | case '\r': escChar[1] = 'r'; break; |
| @@ -3862,11 +3870,11 @@ | |
| 3870 | continue; |
| 3871 | } |
| 3872 | else |
| 3873 | { /* UTF: transform it to \uXXXX */ |
| 3874 | #if defined(CSON_FOSSIL_MODE) |
| 3875 | assume_latin1: |
| 3876 | #endif |
| 3877 | memset(ubuf,0,UBLen); |
| 3878 | rc = sprintf(ubuf, "\\u%04x",ch); |
| 3879 | if( rc != 6 ) |
| 3880 | { |
| 3881 |