Fossil SCM
make local aDia[] and aChar[] tables "static const".
Commit
5b26a50e1ba7bd016266758876e3ebc99317e32f
Parent
c2a9c1f7f859925…
1 file changed
+40
-40
+40
-40
| --- src/unicode.c | ||
| +++ src/unicode.c | ||
| @@ -31,15 +31,15 @@ | ||
| 31 | 31 | int unicode_isalnum(int c){ |
| 32 | 32 | /* Each unsigned integer in the following array corresponds to a contiguous |
| 33 | 33 | ** range of unicode codepoints that are not either letters or numbers (i.e. |
| 34 | 34 | ** codepoints for which this function should return 0). |
| 35 | 35 | ** |
| 36 | - ** The most significant 22 bits in each 32-bit value contain the first | |
| 36 | + ** The most significant 22 bits in each 32-bit value contain the first | |
| 37 | 37 | ** codepoint in the range. The least significant 10 bits are used to store |
| 38 | - ** the size of the range (always at least 1). In other words, the value | |
| 39 | - ** ((C<<22) + N) represents a range of N codepoints starting with codepoint | |
| 40 | - ** C. It is not possible to represent a range larger than 1023 codepoints | |
| 38 | + ** the size of the range (always at least 1). In other words, the value | |
| 39 | + ** ((C<<22) + N) represents a range of N codepoints starting with codepoint | |
| 40 | + ** C. It is not possible to represent a range larger than 1023 codepoints | |
| 41 | 41 | ** using this format. |
| 42 | 42 | */ |
| 43 | 43 | static const unsigned int aEntry[] = { |
| 44 | 44 | 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, |
| 45 | 45 | 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, |
| @@ -160,35 +160,35 @@ | ||
| 160 | 160 | ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
| 161 | 161 | ** E"). The resuls of passing a codepoint that corresponds to an |
| 162 | 162 | ** uppercase letter are undefined. |
| 163 | 163 | */ |
| 164 | 164 | static int unicode_remove_diacritic(int c){ |
| 165 | - unsigned short aDia[] = { | |
| 166 | - 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, | |
| 167 | - 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, | |
| 168 | - 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, | |
| 169 | - 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, | |
| 170 | - 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, | |
| 171 | - 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, | |
| 172 | - 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, | |
| 173 | - 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, | |
| 174 | - 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, | |
| 175 | - 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, | |
| 176 | - 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, | |
| 177 | - 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, | |
| 178 | - 62924, 63050, 63082, 63274, 63390, | |
| 165 | + static const unsigned short aDia[] = { | |
| 166 | + 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, | |
| 167 | + 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, | |
| 168 | + 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, | |
| 169 | + 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, | |
| 170 | + 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, | |
| 171 | + 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, | |
| 172 | + 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, | |
| 173 | + 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, | |
| 174 | + 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, | |
| 175 | + 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, | |
| 176 | + 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, | |
| 177 | + 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, | |
| 178 | + 62924, 63050, 63082, 63274, 63390, | |
| 179 | 179 | }; |
| 180 | - char aChar[] = { | |
| 181 | - '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', | |
| 182 | - 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', | |
| 183 | - 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', | |
| 184 | - 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', | |
| 185 | - 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', | |
| 186 | - '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', | |
| 187 | - 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', | |
| 188 | - 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', | |
| 189 | - 'e', 'i', 'o', 'u', 'y', | |
| 180 | + static const char aChar[] = { | |
| 181 | + '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', | |
| 182 | + 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', | |
| 183 | + 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', | |
| 184 | + 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', | |
| 185 | + 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', | |
| 186 | + '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', | |
| 187 | + 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', | |
| 188 | + 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', | |
| 189 | + 'e', 'i', 'o', 'u', 'y', | |
| 190 | 190 | }; |
| 191 | 191 | |
| 192 | 192 | unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
| 193 | 193 | int iRes = 0; |
| 194 | 194 | int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
| @@ -305,23 +305,23 @@ | ||
| 305 | 305 | {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
| 306 | 306 | {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, |
| 307 | 307 | {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, |
| 308 | 308 | {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, |
| 309 | 309 | {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, |
| 310 | - {65313, 14, 26}, | |
| 310 | + {65313, 14, 26}, | |
| 311 | 311 | }; |
| 312 | 312 | static const unsigned short aiOff[] = { |
| 313 | - 1, 2, 8, 15, 16, 26, 28, 32, | |
| 314 | - 37, 38, 40, 48, 63, 64, 69, 71, | |
| 315 | - 79, 80, 116, 202, 203, 205, 206, 207, | |
| 316 | - 209, 210, 211, 213, 214, 217, 218, 219, | |
| 317 | - 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, | |
| 318 | - 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, | |
| 319 | - 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, | |
| 320 | - 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, | |
| 321 | - 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, | |
| 322 | - 65514, 65521, 65527, 65528, 65529, | |
| 313 | + 1, 2, 8, 15, 16, 26, 28, 32, | |
| 314 | + 37, 38, 40, 48, 63, 64, 69, 71, | |
| 315 | + 79, 80, 116, 202, 203, 205, 206, 207, | |
| 316 | + 209, 210, 211, 213, 214, 217, 218, 219, | |
| 317 | + 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, | |
| 318 | + 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, | |
| 319 | + 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, | |
| 320 | + 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, | |
| 321 | + 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, | |
| 322 | + 65514, 65521, 65527, 65528, 65529, | |
| 323 | 323 | }; |
| 324 | 324 | |
| 325 | 325 | int ret = c; |
| 326 | 326 | |
| 327 | 327 | assert( c>=0 ); |
| @@ -354,12 +354,12 @@ | ||
| 354 | 354 | } |
| 355 | 355 | } |
| 356 | 356 | |
| 357 | 357 | if( bRemoveDiacritic ) ret = unicode_remove_diacritic(ret); |
| 358 | 358 | } |
| 359 | - | |
| 359 | + | |
| 360 | 360 | else if( c>=66560 && c<66600 ){ |
| 361 | 361 | ret = c + 40; |
| 362 | 362 | } |
| 363 | 363 | |
| 364 | 364 | return ret; |
| 365 | 365 | } |
| 366 | 366 |
| --- src/unicode.c | |
| +++ src/unicode.c | |
| @@ -31,15 +31,15 @@ | |
| 31 | int unicode_isalnum(int c){ |
| 32 | /* Each unsigned integer in the following array corresponds to a contiguous |
| 33 | ** range of unicode codepoints that are not either letters or numbers (i.e. |
| 34 | ** codepoints for which this function should return 0). |
| 35 | ** |
| 36 | ** The most significant 22 bits in each 32-bit value contain the first |
| 37 | ** codepoint in the range. The least significant 10 bits are used to store |
| 38 | ** the size of the range (always at least 1). In other words, the value |
| 39 | ** ((C<<22) + N) represents a range of N codepoints starting with codepoint |
| 40 | ** C. It is not possible to represent a range larger than 1023 codepoints |
| 41 | ** using this format. |
| 42 | */ |
| 43 | static const unsigned int aEntry[] = { |
| 44 | 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, |
| 45 | 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, |
| @@ -160,35 +160,35 @@ | |
| 160 | ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
| 161 | ** E"). The resuls of passing a codepoint that corresponds to an |
| 162 | ** uppercase letter are undefined. |
| 163 | */ |
| 164 | static int unicode_remove_diacritic(int c){ |
| 165 | unsigned short aDia[] = { |
| 166 | 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
| 167 | 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
| 168 | 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
| 169 | 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
| 170 | 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, |
| 171 | 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, |
| 172 | 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, |
| 173 | 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, |
| 174 | 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, |
| 175 | 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, |
| 176 | 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, |
| 177 | 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, |
| 178 | 62924, 63050, 63082, 63274, 63390, |
| 179 | }; |
| 180 | char aChar[] = { |
| 181 | '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', |
| 182 | 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', |
| 183 | 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', |
| 184 | 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', |
| 185 | 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', |
| 186 | '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', |
| 187 | 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', |
| 188 | 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', |
| 189 | 'e', 'i', 'o', 'u', 'y', |
| 190 | }; |
| 191 | |
| 192 | unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
| 193 | int iRes = 0; |
| 194 | int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
| @@ -305,23 +305,23 @@ | |
| 305 | {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
| 306 | {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, |
| 307 | {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, |
| 308 | {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, |
| 309 | {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, |
| 310 | {65313, 14, 26}, |
| 311 | }; |
| 312 | static const unsigned short aiOff[] = { |
| 313 | 1, 2, 8, 15, 16, 26, 28, 32, |
| 314 | 37, 38, 40, 48, 63, 64, 69, 71, |
| 315 | 79, 80, 116, 202, 203, 205, 206, 207, |
| 316 | 209, 210, 211, 213, 214, 217, 218, 219, |
| 317 | 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, |
| 318 | 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
| 319 | 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, |
| 320 | 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, |
| 321 | 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, |
| 322 | 65514, 65521, 65527, 65528, 65529, |
| 323 | }; |
| 324 | |
| 325 | int ret = c; |
| 326 | |
| 327 | assert( c>=0 ); |
| @@ -354,12 +354,12 @@ | |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | if( bRemoveDiacritic ) ret = unicode_remove_diacritic(ret); |
| 358 | } |
| 359 | |
| 360 | else if( c>=66560 && c<66600 ){ |
| 361 | ret = c + 40; |
| 362 | } |
| 363 | |
| 364 | return ret; |
| 365 | } |
| 366 |
| --- src/unicode.c | |
| +++ src/unicode.c | |
| @@ -31,15 +31,15 @@ | |
| 31 | int unicode_isalnum(int c){ |
| 32 | /* Each unsigned integer in the following array corresponds to a contiguous |
| 33 | ** range of unicode codepoints that are not either letters or numbers (i.e. |
| 34 | ** codepoints for which this function should return 0). |
| 35 | ** |
| 36 | ** The most significant 22 bits in each 32-bit value contain the first |
| 37 | ** codepoint in the range. The least significant 10 bits are used to store |
| 38 | ** the size of the range (always at least 1). In other words, the value |
| 39 | ** ((C<<22) + N) represents a range of N codepoints starting with codepoint |
| 40 | ** C. It is not possible to represent a range larger than 1023 codepoints |
| 41 | ** using this format. |
| 42 | */ |
| 43 | static const unsigned int aEntry[] = { |
| 44 | 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, |
| 45 | 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, |
| @@ -160,35 +160,35 @@ | |
| 160 | ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
| 161 | ** E"). The resuls of passing a codepoint that corresponds to an |
| 162 | ** uppercase letter are undefined. |
| 163 | */ |
| 164 | static int unicode_remove_diacritic(int c){ |
| 165 | static const unsigned short aDia[] = { |
| 166 | 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
| 167 | 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
| 168 | 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
| 169 | 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
| 170 | 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, |
| 171 | 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, |
| 172 | 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, |
| 173 | 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, |
| 174 | 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, |
| 175 | 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, |
| 176 | 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, |
| 177 | 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, |
| 178 | 62924, 63050, 63082, 63274, 63390, |
| 179 | }; |
| 180 | static const char aChar[] = { |
| 181 | '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', |
| 182 | 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', |
| 183 | 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', |
| 184 | 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', |
| 185 | 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', |
| 186 | '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', |
| 187 | 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', |
| 188 | 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', |
| 189 | 'e', 'i', 'o', 'u', 'y', |
| 190 | }; |
| 191 | |
| 192 | unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
| 193 | int iRes = 0; |
| 194 | int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
| @@ -305,23 +305,23 @@ | |
| 305 | {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
| 306 | {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, |
| 307 | {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, |
| 308 | {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, |
| 309 | {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, |
| 310 | {65313, 14, 26}, |
| 311 | }; |
| 312 | static const unsigned short aiOff[] = { |
| 313 | 1, 2, 8, 15, 16, 26, 28, 32, |
| 314 | 37, 38, 40, 48, 63, 64, 69, 71, |
| 315 | 79, 80, 116, 202, 203, 205, 206, 207, |
| 316 | 209, 210, 211, 213, 214, 217, 218, 219, |
| 317 | 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, |
| 318 | 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
| 319 | 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, |
| 320 | 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, |
| 321 | 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, |
| 322 | 65514, 65521, 65527, 65528, 65529, |
| 323 | }; |
| 324 | |
| 325 | int ret = c; |
| 326 | |
| 327 | assert( c>=0 ); |
| @@ -354,12 +354,12 @@ | |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | if( bRemoveDiacritic ) ret = unicode_remove_diacritic(ret); |
| 358 | } |
| 359 | |
| 360 | else if( c>=66560 && c<66600 ){ |
| 361 | ret = c + 40; |
| 362 | } |
| 363 | |
| 364 | return ret; |
| 365 | } |
| 366 |