Fossil SCM
more optimizations (all bytes between 0x80 & 0xBF are invalid, so use simple check for those, and also can shrink the invalid_utf8 lead byte table even more)
Commit
6eb9a30c085d755122b7f875ade5636fdb7300b2
Parent
ec7f6b2e71c5001…
1 file changed
+3
-9
+3
-9
| --- src/lookslike.c | ||
| +++ src/lookslike.c | ||
| @@ -155,18 +155,10 @@ | ||
| 155 | 155 | #define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */ |
| 156 | 156 | |
| 157 | 157 | /* a table used for quick lookup of the definition that goes with a |
| 158 | 158 | * particular lead byte */ |
| 159 | 159 | static const unsigned char lb_tab[] = { |
| 160 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 161 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 162 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 163 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 164 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 165 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 166 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 167 | - US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, | |
| 168 | 160 | US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B, |
| 169 | 161 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 170 | 162 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 171 | 163 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 172 | 164 | US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B, |
| @@ -186,13 +178,15 @@ | ||
| 186 | 178 | while( n>0 ){ |
| 187 | 179 | /* ascii is trivial */ |
| 188 | 180 | if( *z<0x80 ){ |
| 189 | 181 | ++z; |
| 190 | 182 | --n; |
| 183 | + }else if( *z<0xC0 ){ | |
| 184 | + return LOOK_INVALID; | |
| 191 | 185 | }else{ |
| 192 | 186 | /* get the definition for this lead byte */ |
| 193 | - const unsigned char* def = &lb_tab[(3 * *z++)-0x180]; | |
| 187 | + const unsigned char* def = &lb_tab[(3 * *z++)-0x240]; | |
| 194 | 188 | unsigned char len; |
| 195 | 189 | |
| 196 | 190 | /* get the expected sequence length */ |
| 197 | 191 | len = *def; |
| 198 | 192 | /* if there aren't enough bytes left, return invalid */ |
| 199 | 193 |
| --- src/lookslike.c | |
| +++ src/lookslike.c | |
| @@ -155,18 +155,10 @@ | |
| 155 | #define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */ |
| 156 | |
| 157 | /* a table used for quick lookup of the definition that goes with a |
| 158 | * particular lead byte */ |
| 159 | static const unsigned char lb_tab[] = { |
| 160 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 161 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 162 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 163 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 164 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 165 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 166 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 167 | US0A, US0A, US0A, US0A, US0A, US0A, US0A, US0A, |
| 168 | US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B, |
| 169 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 170 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 171 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 172 | US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B, |
| @@ -186,13 +178,15 @@ | |
| 186 | while( n>0 ){ |
| 187 | /* ascii is trivial */ |
| 188 | if( *z<0x80 ){ |
| 189 | ++z; |
| 190 | --n; |
| 191 | }else{ |
| 192 | /* get the definition for this lead byte */ |
| 193 | const unsigned char* def = &lb_tab[(3 * *z++)-0x180]; |
| 194 | unsigned char len; |
| 195 | |
| 196 | /* get the expected sequence length */ |
| 197 | len = *def; |
| 198 | /* if there aren't enough bytes left, return invalid */ |
| 199 |
| --- src/lookslike.c | |
| +++ src/lookslike.c | |
| @@ -155,18 +155,10 @@ | |
| 155 | #define US0A 0xFF, 0xFF, 0x00 /* for any other lead byte */ |
| 156 | |
| 157 | /* a table used for quick lookup of the definition that goes with a |
| 158 | * particular lead byte */ |
| 159 | static const unsigned char lb_tab[] = { |
| 160 | US2A, US0A, US2B, US2B, US2B, US2B, US2B, US2B, |
| 161 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 162 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 163 | US2B, US2B, US2B, US2B, US2B, US2B, US2B, US2B, |
| 164 | US3A, US3B, US3B, US3B, US3B, US3B, US3B, US3B, |
| @@ -186,13 +178,15 @@ | |
| 178 | while( n>0 ){ |
| 179 | /* ascii is trivial */ |
| 180 | if( *z<0x80 ){ |
| 181 | ++z; |
| 182 | --n; |
| 183 | }else if( *z<0xC0 ){ |
| 184 | return LOOK_INVALID; |
| 185 | }else{ |
| 186 | /* get the definition for this lead byte */ |
| 187 | const unsigned char* def = &lb_tab[(3 * *z++)-0x240]; |
| 188 | unsigned char len; |
| 189 | |
| 190 | /* get the expected sequence length */ |
| 191 | len = *def; |
| 192 | /* if there aren't enough bytes left, return invalid */ |
| 193 |