Fossil SCM
shrunk size of lead byte table for invalid_utf8, and took a shortcut to invalidate lead bytes between 0x80 & 0xBF inclusive
Commit
69328517f5726df77c55f842ad934a992f849036
Parent
5be2e9cf7d44838…
1 file changed
+4
-10
+4
-10
| --- src/lookslike.c | ||
| +++ src/lookslike.c | ||
| @@ -160,18 +160,10 @@ | ||
| 160 | 160 | #define US0A 0xFF, 0x00 /* for any other lead byte */ |
| 161 | 161 | |
| 162 | 162 | /* a table used for quick lookup of the definition that goes with a |
| 163 | 163 | * particular lead byte */ |
| 164 | 164 | static const unsigned char lb_tab[] = { |
| 165 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 166 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 167 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 168 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 169 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 170 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 171 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 172 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 173 | 165 | US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B, |
| 174 | 166 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 175 | 167 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 176 | 168 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 177 | 169 | US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B, |
| @@ -190,20 +182,22 @@ | ||
| 190 | 182 | if( n==0 ) return 0; /* Empty file -> OK */ |
| 191 | 183 | c = *z; |
| 192 | 184 | while( --n>0 ){ |
| 193 | 185 | c2 = c; |
| 194 | 186 | c = *++z; |
| 195 | - if( c2>=0x80 ){ | |
| 196 | - const unsigned char *def = &lb_tab[(2*c2)-0x100]; | |
| 187 | + if( c2>=0xC0 ){ | |
| 188 | + const unsigned char *def = &lb_tab[(2*c2)-0x180]; | |
| 197 | 189 | if( (c<*def) || (c>*++def) ){ |
| 198 | 190 | return LOOK_INVALID; /* Invalid UTF-8 */ |
| 199 | 191 | } |
| 200 | 192 | if( c2>=0xe0 ){ |
| 201 | 193 | c = (c2<<1)|3; |
| 202 | 194 | }else{ |
| 203 | 195 | c = ' '; |
| 204 | 196 | } |
| 197 | + }else if( c2>=0x80 ){ | |
| 198 | + return LOOK_INVALID; | |
| 205 | 199 | } |
| 206 | 200 | } |
| 207 | 201 | return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */ |
| 208 | 202 | } |
| 209 | 203 | |
| 210 | 204 |
| --- src/lookslike.c | |
| +++ src/lookslike.c | |
| @@ -160,18 +160,10 @@ | |
| 160 | #define US0A 0xFF, 0x00 /* for any other lead byte */ |
| 161 | |
| 162 | /* a table used for quick lookup of the definition that goes with a |
| 163 | * particular lead byte */ |
| 164 | static const unsigned char lb_tab[] = { |
| 165 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 166 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 167 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 168 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 169 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 170 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 171 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 172 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 173 | US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B, |
| 174 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 175 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 176 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 177 | US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B, |
| @@ -190,20 +182,22 @@ | |
| 190 | if( n==0 ) return 0; /* Empty file -> OK */ |
| 191 | c = *z; |
| 192 | while( --n>0 ){ |
| 193 | c2 = c; |
| 194 | c = *++z; |
| 195 | if( c2>=0x80 ){ |
| 196 | const unsigned char *def = &lb_tab[(2*c2)-0x100]; |
| 197 | if( (c<*def) || (c>*++def) ){ |
| 198 | return LOOK_INVALID; /* Invalid UTF-8 */ |
| 199 | } |
| 200 | if( c2>=0xe0 ){ |
| 201 | c = (c2<<1)|3; |
| 202 | }else{ |
| 203 | c = ' '; |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */ |
| 208 | } |
| 209 | |
| 210 |
| --- src/lookslike.c | |
| +++ src/lookslike.c | |
| @@ -160,18 +160,10 @@ | |
| 160 | #define US0A 0xFF, 0x00 /* for any other lead byte */ |
| 161 | |
| 162 | /* a table used for quick lookup of the definition that goes with a |
| 163 | * particular lead byte */ |
| 164 | static const unsigned char lb_tab[] = { |
| 165 | US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B, |
| 166 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 167 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 168 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 169 | US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B, |
| @@ -190,20 +182,22 @@ | |
| 182 | if( n==0 ) return 0; /* Empty file -> OK */ |
| 183 | c = *z; |
| 184 | while( --n>0 ){ |
| 185 | c2 = c; |
| 186 | c = *++z; |
| 187 | if( c2>=0xC0 ){ |
| 188 | const unsigned char *def = &lb_tab[(2*c2)-0x180]; |
| 189 | if( (c<*def) || (c>*++def) ){ |
| 190 | return LOOK_INVALID; /* Invalid UTF-8 */ |
| 191 | } |
| 192 | if( c2>=0xe0 ){ |
| 193 | c = (c2<<1)|3; |
| 194 | }else{ |
| 195 | c = ' '; |
| 196 | } |
| 197 | }else if( c2>=0x80 ){ |
| 198 | return LOOK_INVALID; |
| 199 | } |
| 200 | } |
| 201 | return (c>=0x80) ? LOOK_INVALID : 0; /* Last byte must be ASCII. */ |
| 202 | } |
| 203 | |
| 204 |