|
1
|
/* |
|
2
|
** Copyright (c) 2013 D. Richard Hipp |
|
3
|
** |
|
4
|
** This program is free software; you can redistribute it and/or |
|
5
|
** modify it under the terms of the Simplified BSD License (also |
|
6
|
** known as the "2-Clause License" or "FreeBSD License".) |
|
7
|
** |
|
8
|
** This program is distributed in the hope that it will be useful, |
|
9
|
** but without any warranty; without even the implied warranty of |
|
10
|
** merchantability or fitness for a particular purpose. |
|
11
|
** |
|
12
|
** Author contact information: |
|
13
|
** [email protected] |
|
14
|
** http://www.hwaci.com/drh/ |
|
15
|
** |
|
16
|
******************************************************************************* |
|
17
|
** |
|
18
|
** This file is copied from ext/fts5/fts5_unicode2.c of SQLite3 with |
|
19
|
** minor changes. |
|
20
|
*/ |
|
21
|
#include "config.h" |
|
22
|
#include "unicode.h" |
|
23
|
|
|
24
|
/* |
|
25
|
** Return true if the argument corresponds to a unicode codepoint |
|
26
|
** classified as either a letter or a number. Otherwise false. |
|
27
|
** |
|
28
|
** The results are undefined if the value passed to this function |
|
29
|
** is less than zero. |
|
30
|
*/ |
|
31
|
int unicode_isalnum(int c){ |
|
32
|
/* Each unsigned integer in the following array corresponds to a contiguous |
|
33
|
** range of unicode codepoints that are not either letters or numbers (i.e. |
|
34
|
** codepoints for which this function should return 0). |
|
35
|
** |
|
36
|
** The most significant 22 bits in each 32-bit value contain the first |
|
37
|
** codepoint in the range. The least significant 10 bits are used to store |
|
38
|
** the size of the range (always at least 1). In other words, the value |
|
39
|
** ((C<<22) + N) represents a range of N codepoints starting with codepoint |
|
40
|
** C. It is not possible to represent a range larger than 1023 codepoints |
|
41
|
** using this format. |
|
42
|
*/ |
|
43
|
static const unsigned int aEntry[] = { |
|
44
|
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, |
|
45
|
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, |
|
46
|
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, |
|
47
|
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, |
|
48
|
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163403, |
|
49
|
0x00164437, 0x0017CC02, 0x00180020, 0x00192C15, 0x0019A804, |
|
50
|
0x0019C001, 0x001B5001, 0x001B580F, 0x001B9C07, 0x001BF402, |
|
51
|
0x001C000E, 0x001C3C01, 0x001C4401, 0x001CC01B, 0x001E980B, |
|
52
|
0x001FAC09, 0x001FD804, 0x001FF403, 0x00205804, 0x00206C09, |
|
53
|
0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, 0x00217801, |
|
54
|
0x00222001, 0x00224002, 0x00225C09, 0x0023283A, 0x0024E803, |
|
55
|
0x0024F812, 0x00254407, 0x00258804, 0x0025C001, 0x00260403, |
|
56
|
0x0026F001, 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, |
|
57
|
0x00278802, 0x0027C802, 0x0027E802, 0x0027F402, 0x00280403, |
|
58
|
0x0028F001, 0x0028F805, 0x00291C02, 0x00292C03, 0x00294401, |
|
59
|
0x0029C002, 0x0029D402, 0x002A0403, 0x002AF001, 0x002AF808, |
|
60
|
0x002B1C03, 0x002B2C03, 0x002B8802, 0x002BC002, 0x002BE806, |
|
61
|
0x002C0403, 0x002CF001, 0x002CF807, 0x002D1C02, 0x002D2C03, |
|
62
|
0x002D5403, 0x002D8802, 0x002DC001, 0x002E0801, 0x002EF805, |
|
63
|
0x002F1803, 0x002F2804, 0x002F5C01, 0x002FCC08, 0x00300005, |
|
64
|
0x0030F001, 0x0030F807, 0x00311803, 0x00312804, 0x00315402, |
|
65
|
0x00318802, 0x0031DC01, 0x0031FC01, 0x00320404, 0x0032F001, |
|
66
|
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, |
|
67
|
0x0033CC01, 0x00340004, 0x0034EC02, 0x0034F807, 0x00351803, |
|
68
|
0x00352804, 0x00353C01, 0x00355C01, 0x00358802, 0x0035E401, |
|
69
|
0x00360403, 0x00372801, 0x00373C06, 0x00375801, 0x00376008, |
|
70
|
0x0037C803, 0x0038C401, 0x0038D007, 0x0038FC01, 0x00391C09, |
|
71
|
0x00396802, 0x003AC401, 0x003AD009, 0x003B2007, 0x003C041F, |
|
72
|
0x003CD00C, 0x003DC417, 0x003E340B, 0x003E6424, 0x003EF80F, |
|
73
|
0x003F380D, 0x0040AC14, 0x00412806, 0x00415804, 0x00417803, |
|
74
|
0x00418803, 0x00419C07, 0x0041C404, 0x0042080C, 0x00423C01, |
|
75
|
0x00426806, 0x0043EC01, 0x004D740C, 0x004E400A, 0x00500001, |
|
76
|
0x0059B402, 0x005A0001, 0x005A6C02, 0x005BAC03, 0x005C4804, |
|
77
|
0x005CC805, 0x005D4802, 0x005DC802, 0x005ED023, 0x005F6004, |
|
78
|
0x005F7401, 0x00600010, 0x00621402, 0x0062A401, 0x0064800C, |
|
79
|
0x0064C00C, 0x00650001, 0x00651002, 0x00677822, 0x00685C05, |
|
80
|
0x00687802, 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, |
|
81
|
0x006AA006, 0x006AC02E, 0x006B800C, 0x006C0005, 0x006CD011, |
|
82
|
0x006D3802, 0x006D6829, 0x006E840D, 0x006F980E, 0x006FF004, |
|
83
|
0x00709014, 0x0070EC05, 0x0071F802, 0x00730008, 0x00734019, |
|
84
|
0x0073B401, 0x0073D001, 0x0073DC03, 0x00770040, 0x007EF401, |
|
85
|
0x007EFC03, 0x007F3403, 0x007F7403, 0x007FB403, 0x007FF402, |
|
86
|
0x00800065, 0x0081980A, 0x0081E805, 0x00822805, 0x00828022, |
|
87
|
0x00834021, 0x00840002, 0x00840C04, 0x00842002, 0x00845001, |
|
88
|
0x00845803, 0x00847806, 0x00849401, 0x00849C01, 0x0084A401, |
|
89
|
0x0084B801, 0x0084E802, 0x00850005, 0x00852804, 0x00853C01, |
|
90
|
0x00862802, 0x0086429A, 0x0091000B, 0x0092704E, 0x00940276, |
|
91
|
0x009E53E0, 0x00ADD88A, 0x00B39406, 0x00B3BC03, 0x00B3E404, |
|
92
|
0x00B3F802, 0x00B5C001, 0x00B5FC01, 0x00B7804F, 0x00B8C02E, |
|
93
|
0x00BA001A, 0x00BA6C59, 0x00BC00D6, 0x00BFC015, 0x00C02019, |
|
94
|
0x00C0A807, 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, |
|
95
|
0x00C3EC01, 0x00C64002, 0x00C6580A, 0x00C70026, 0x00C7BC01, |
|
96
|
0x00C8001F, 0x00C8A81E, 0x00C94001, 0x00C98020, 0x00CA2827, |
|
97
|
0x00CB0140, 0x01370040, 0x02924037, 0x0293F802, 0x02983403, |
|
98
|
0x0299BC10, 0x029A7802, 0x029BC008, 0x029C0017, 0x029C8002, |
|
99
|
0x029E2402, 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C0A, |
|
100
|
0x02A0D804, 0x02A1D004, 0x02A20002, 0x02A2D012, 0x02A33802, |
|
101
|
0x02A38012, 0x02A3E003, 0x02A3F001, 0x02A3FC01, 0x02A4980A, |
|
102
|
0x02A51C0D, 0x02A57C01, 0x02A60004, 0x02A6CC1B, 0x02A77802, |
|
103
|
0x02A79401, 0x02A8A40E, 0x02A90C01, 0x02A93002, 0x02A97004, |
|
104
|
0x02A9DC03, 0x02A9EC03, 0x02AAC001, 0x02AAC803, 0x02AADC02, |
|
105
|
0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, 0x02ABD402, |
|
106
|
0x02AD6C01, 0x02ADA802, 0x02AF8C0B, 0x03600001, 0x036DFC02, |
|
107
|
0x036FFC02, 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC821, |
|
108
|
0x03F4F812, 0x03F64002, 0x03F72008, 0x03F7F01E, 0x03F88033, |
|
109
|
0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, |
|
110
|
0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, |
|
111
|
0x04040003, 0x0404DC09, 0x0405E411, 0x04063003, 0x0406400D, |
|
112
|
0x04068001, 0x0407402E, 0x040B8001, 0x040DD805, 0x040E7C01, |
|
113
|
0x040F4001, 0x0415BC01, 0x04215C01, 0x0421DC02, 0x04247C01, |
|
114
|
0x0424FC01, 0x04280403, 0x04281402, 0x04283004, 0x0428E003, |
|
115
|
0x0428FC01, 0x04294009, 0x0429FC01, 0x042B2001, 0x042B9402, |
|
116
|
0x042BC007, 0x042CE407, 0x042E6404, 0x04349004, 0x0435A406, |
|
117
|
0x04363802, 0x043AAC03, 0x043B4009, 0x043BE806, 0x043D180B, |
|
118
|
0x043D5405, 0x043E0808, 0x04400003, 0x0440E016, 0x0441C001, |
|
119
|
0x0441CC02, 0x0441FC04, 0x0442C013, 0x04433401, 0x04440003, |
|
120
|
0x04449C0E, 0x04450004, 0x04451402, 0x0445CC03, 0x04460003, |
|
121
|
0x0446CC0E, 0x0447140B, 0x04476C01, 0x04477403, 0x0448B013, |
|
122
|
0x04490401, 0x044AA401, 0x044B7C0C, 0x044C0004, 0x044CEC02, |
|
123
|
0x044CF807, 0x044D1C02, 0x044D2C03, 0x044D5C01, 0x044D8802, |
|
124
|
0x044D9807, 0x044DC005, 0x044EE009, 0x044F0801, 0x044F1401, |
|
125
|
0x044F1C04, 0x044F3005, 0x044F4801, 0x044F5002, 0x044F5C02, |
|
126
|
0x044F8402, 0x0450D412, 0x04512C05, 0x04516802, 0x04517402, |
|
127
|
0x0452C014, 0x04531801, 0x0456BC07, 0x0456E020, 0x04577002, |
|
128
|
0x0458C014, 0x0459800D, 0x045AAC0D, 0x045AE401, 0x045C740F, |
|
129
|
0x045CF004, 0x0460B010, 0x0464C006, 0x0464DC02, 0x0464EC04, |
|
130
|
0x04650001, 0x04650805, 0x04674407, 0x04676807, 0x04678801, |
|
131
|
0x04679001, 0x0468040A, 0x0468CC07, 0x0468EC0D, 0x0469440B, |
|
132
|
0x046A2813, 0x046A7805, 0x046C000A, 0x046D8008, 0x046F8401, |
|
133
|
0x0470BC08, 0x0470E008, 0x04710405, 0x0471C002, 0x04724816, |
|
134
|
0x0472A40E, 0x0474C406, 0x0474E801, 0x0474F002, 0x0474FC07, |
|
135
|
0x04751C01, 0x04762805, 0x04764002, 0x04764C05, 0x047BCC06, |
|
136
|
0x047C0002, 0x047C0C01, 0x047CD007, 0x047CF812, 0x047D6801, |
|
137
|
0x047F541D, 0x047FFC01, 0x0491C005, 0x04BFC402, 0x04D0C011, |
|
138
|
0x04D11C0F, 0x05847812, 0x05A9B802, 0x05ABC006, 0x05ACC010, |
|
139
|
0x05AD1002, 0x05B5B403, 0x05BA5C04, 0x05BD3C01, 0x05BD4437, |
|
140
|
0x05BE3C04, 0x05BF8801, 0x05BF9001, 0x05BFC002, 0x06F27008, |
|
141
|
0x073000F0, 0x0733E803, 0x073401B4, 0x073AE817, 0x073B8011, |
|
142
|
0x073C002E, 0x073CC017, 0x073D4074, 0x074000F6, 0x07440027, |
|
143
|
0x0744A4C2, 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, |
|
144
|
0x075BEC01, 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, |
|
145
|
0x075E2401, 0x075EA401, 0x075F0C01, 0x0760028C, 0x076A6C05, |
|
146
|
0x076A840F, 0x07800007, 0x07802011, 0x07806C07, 0x07808C02, |
|
147
|
0x07809805, 0x07823C01, 0x0784C007, 0x07853C01, 0x078AB801, |
|
148
|
0x078BB004, 0x078BFC01, 0x0793B004, 0x0797B802, 0x0797FC01, |
|
149
|
0x079B8C01, 0x079B9801, 0x079BB802, 0x079BD401, 0x07A34007, |
|
150
|
0x07A51007, 0x07A57802, 0x07B2B001, 0x07B2C001, 0x07B4B801, |
|
151
|
0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, 0x07C2C40F, |
|
152
|
0x07C3040F, 0x07C34425, 0x07C434A1, 0x07C7981D, 0x07C8402C, |
|
153
|
0x07C90009, 0x07C94002, 0x07C98006, 0x07CC03D9, 0x07DB7011, |
|
154
|
0x07DBC00D, 0x07DC00DA, 0x07DF800C, 0x07DFC001, 0x07E0000C, |
|
155
|
0x07E04038, 0x07E1400A, 0x07E18028, 0x07E2401E, 0x07E2C00C, |
|
156
|
0x07E30002, 0x07E34009, 0x07E40158, 0x07E9800E, 0x07E9C00D, |
|
157
|
0x07EA000B, 0x07EA3839, 0x07EB2001, 0x07EB3410, 0x07EB7C0C, |
|
158
|
0x07EBBC0A, 0x07EC0093, 0x07EE505C, 0x07EFE801, 0x38000401, |
|
159
|
0x38008060, 0x380400F0, |
|
160
|
}; |
|
161
|
static const unsigned int aAscii[4] = { |
|
162
|
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, |
|
163
|
}; |
|
164
|
|
|
165
|
if( (unsigned int)c<128 ){ |
|
166
|
return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 ); |
|
167
|
}else if( (unsigned int)c<(1<<22) ){ |
|
168
|
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; |
|
169
|
int iRes = 0; |
|
170
|
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
|
171
|
int iLo = 0; |
|
172
|
while( iHi>=iLo ){ |
|
173
|
int iTest = (iHi + iLo) / 2; |
|
174
|
if( key >= aEntry[iTest] ){ |
|
175
|
iRes = iTest; |
|
176
|
iLo = iTest+1; |
|
177
|
}else{ |
|
178
|
iHi = iTest-1; |
|
179
|
} |
|
180
|
} |
|
181
|
assert( aEntry[0]<key ); |
|
182
|
assert( key>=aEntry[iRes] ); |
|
183
|
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); |
|
184
|
} |
|
185
|
return 1; |
|
186
|
} |
|
187
|
|
|
188
|
|
|
189
|
/* |
|
190
|
** If the argument is a codepoint corresponding to a lowercase letter |
|
191
|
** in the ASCII range with a diacritic added, return the codepoint |
|
192
|
** of the ASCII letter only. For example, if passed 235 - "LATIN |
|
193
|
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
|
194
|
** E"). The results of passing a codepoint that corresponds to an |
|
195
|
** uppercase letter are undefined. |
|
196
|
*/ |
|
197
|
static int unicode_remove_diacritic(int c, int bComplex){ |
|
198
|
static const unsigned short aDia[] = { |
|
199
|
0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
|
200
|
2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
|
201
|
2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
|
202
|
2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
|
203
|
3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, |
|
204
|
3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, |
|
205
|
4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, |
|
206
|
4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, |
|
207
|
6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, |
|
208
|
61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, |
|
209
|
61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, |
|
210
|
61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, |
|
211
|
62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, |
|
212
|
62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, |
|
213
|
62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, |
|
214
|
63182, 63242, 63274, 63310, 63368, 63390, |
|
215
|
}; |
|
216
|
#define HIBIT ((unsigned char)0x80) |
|
217
|
static const unsigned char aChar[] = { |
|
218
|
'\0', 'a', 'c', 'e', 'i', 'n', |
|
219
|
'o', 'u', 'y', 'y', 'a', 'c', |
|
220
|
'd', 'e', 'e', 'g', 'h', 'i', |
|
221
|
'j', 'k', 'l', 'n', 'o', 'r', |
|
222
|
's', 't', 'u', 'u', 'w', 'y', |
|
223
|
'z', 'o', 'u', 'a', 'i', 'o', |
|
224
|
'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o', |
|
225
|
'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a', |
|
226
|
'e', 'i', 'o', 'r', 'u', 's', |
|
227
|
't', 'h', 'a', 'e', 'o'|HIBIT, 'o', |
|
228
|
'o'|HIBIT, 'y', '\0', '\0', '\0', '\0', |
|
229
|
'\0', '\0', '\0', '\0', 'a', 'b', |
|
230
|
'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT, |
|
231
|
'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, |
|
232
|
'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', |
|
233
|
'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', |
|
234
|
's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', |
|
235
|
'w', 'x', 'y', 'z', 'h', 't', |
|
236
|
'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, |
|
237
|
'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, |
|
238
|
'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y', |
|
239
|
}; |
|
240
|
|
|
241
|
unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
|
242
|
int iRes = 0; |
|
243
|
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
|
244
|
int iLo = 0; |
|
245
|
while( iHi>=iLo ){ |
|
246
|
int iTest = (iHi + iLo) / 2; |
|
247
|
if( key >= aDia[iTest] ){ |
|
248
|
iRes = iTest; |
|
249
|
iLo = iTest+1; |
|
250
|
}else{ |
|
251
|
iHi = iTest-1; |
|
252
|
} |
|
253
|
} |
|
254
|
assert( key>=aDia[iRes] ); |
|
255
|
if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; |
|
256
|
return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); |
|
257
|
} |
|
258
|
|
|
259
|
|
|
260
|
/* |
|
261
|
** Return true if the argument interpreted as a unicode codepoint |
|
262
|
** is a diacritical modifier character. |
|
263
|
*/ |
|
264
|
int unicode_is_diacritic(int c){ |
|
265
|
unsigned int mask0 = 0x08029FDF; |
|
266
|
unsigned int mask1 = 0x000361F8; |
|
267
|
if( c<768 || c>817 ) return 0; |
|
268
|
return (c < 768+32) ? |
|
269
|
(mask0 & ((unsigned int)1 << (c-768))) : |
|
270
|
(mask1 & ((unsigned int)1 << (c-768-32))); |
|
271
|
} |
|
272
|
|
|
273
|
|
|
274
|
/* |
|
275
|
** Interpret the argument as a unicode codepoint. If the codepoint |
|
276
|
** is an upper case character that has a lower case equivalent, |
|
277
|
** return the codepoint corresponding to the lower case version. |
|
278
|
** Otherwise, return a copy of the argument. |
|
279
|
** |
|
280
|
** The results are undefined if the value passed to this function |
|
281
|
** is less than zero. |
|
282
|
*/ |
|
283
|
int unicode_fold(int c, int eRemoveDiacritic){ |
|
284
|
/* Each entry in the following array defines a rule for folding a range |
|
285
|
** of codepoints to lower case. The rule applies to a range of nRange |
|
286
|
** codepoints starting at codepoint iCode. |
|
287
|
** |
|
288
|
** If the least significant bit in flags is clear, then the rule applies |
|
289
|
** to all nRange codepoints (i.e. all nRange codepoints are upper case and |
|
290
|
** need to be folded). Or, if it is set, then the rule only applies to |
|
291
|
** every second codepoint in the range, starting with codepoint C. |
|
292
|
** |
|
293
|
** The 7 most significant bits in flags are an index into the aiOff[] |
|
294
|
** array. If a specific codepoint C does require folding, then its lower |
|
295
|
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). |
|
296
|
** |
|
297
|
** The contents of this array are generated by parsing the CaseFolding.txt |
|
298
|
** file distributed as part of the "Unicode Character Database". See |
|
299
|
** http://www.unicode.org for details. |
|
300
|
*/ |
|
301
|
static const struct TableEntry { |
|
302
|
unsigned short iCode; |
|
303
|
unsigned char flags; |
|
304
|
unsigned char nRange; |
|
305
|
} aEntry[] = { |
|
306
|
{65, 16, 26}, {181, 70, 1}, {192, 16, 23}, |
|
307
|
{216, 16, 7}, {256, 1, 48}, {306, 1, 6}, |
|
308
|
{313, 1, 16}, {330, 1, 46}, {376, 168, 1}, |
|
309
|
{377, 1, 6}, {383, 156, 1}, {385, 56, 1}, |
|
310
|
{386, 1, 4}, {390, 50, 1}, {391, 0, 1}, |
|
311
|
{393, 48, 2}, {395, 0, 1}, {398, 38, 1}, |
|
312
|
{399, 44, 1}, {400, 46, 1}, {401, 0, 1}, |
|
313
|
{403, 48, 1}, {404, 52, 1}, {406, 58, 1}, |
|
314
|
{407, 54, 1}, {408, 0, 1}, {412, 58, 1}, |
|
315
|
{413, 60, 1}, {415, 62, 1}, {416, 1, 6}, |
|
316
|
{422, 66, 1}, {423, 0, 1}, {425, 66, 1}, |
|
317
|
{428, 0, 1}, {430, 66, 1}, {431, 0, 1}, |
|
318
|
{433, 64, 2}, {435, 1, 4}, {439, 68, 1}, |
|
319
|
{440, 0, 1}, {444, 0, 1}, {452, 2, 1}, |
|
320
|
{453, 0, 1}, {455, 2, 1}, {456, 0, 1}, |
|
321
|
{458, 2, 1}, {459, 1, 18}, {478, 1, 18}, |
|
322
|
{497, 2, 1}, {498, 1, 4}, {502, 174, 1}, |
|
323
|
{503, 186, 1}, {504, 1, 40}, {544, 162, 1}, |
|
324
|
{546, 1, 18}, {570, 78, 1}, {571, 0, 1}, |
|
325
|
{573, 160, 1}, {574, 76, 1}, {577, 0, 1}, |
|
326
|
{579, 158, 1}, {580, 34, 1}, {581, 36, 1}, |
|
327
|
{582, 1, 10}, {837, 42, 1}, {880, 1, 4}, |
|
328
|
{886, 0, 1}, {895, 42, 1}, {902, 22, 1}, |
|
329
|
{904, 20, 3}, {908, 32, 1}, {910, 30, 2}, |
|
330
|
{913, 16, 17}, {931, 16, 9}, {962, 0, 1}, |
|
331
|
{975, 4, 1}, {976, 192, 1}, {977, 194, 1}, |
|
332
|
{981, 198, 1}, {982, 196, 1}, {984, 1, 24}, |
|
333
|
{1008, 188, 1}, {1009, 190, 1}, {1012, 182, 1}, |
|
334
|
{1013, 180, 1}, {1015, 0, 1}, {1017, 204, 1}, |
|
335
|
{1018, 0, 1}, {1021, 162, 3}, {1024, 40, 16}, |
|
336
|
{1040, 16, 32}, {1120, 1, 34}, {1162, 1, 54}, |
|
337
|
{1216, 6, 1}, {1217, 1, 14}, {1232, 1, 96}, |
|
338
|
{1329, 28, 38}, {4256, 74, 38}, {4295, 74, 1}, |
|
339
|
{4301, 74, 1}, {5112, 202, 6}, {7296, 138, 1}, |
|
340
|
{7297, 140, 1}, {7298, 142, 1}, {7299, 146, 2}, |
|
341
|
{7301, 144, 1}, {7302, 148, 1}, {7303, 150, 1}, |
|
342
|
{7304, 108, 1}, {7305, 0, 1}, {7312, 154, 43}, |
|
343
|
{7357, 154, 3}, {7680, 1, 150}, {7835, 184, 1}, |
|
344
|
{7838, 128, 1}, {7840, 1, 96}, {7944, 202, 8}, |
|
345
|
{7960, 202, 6}, {7976, 202, 8}, {7992, 202, 8}, |
|
346
|
{8008, 202, 6}, {8025, 203, 8}, {8040, 202, 8}, |
|
347
|
{8072, 202, 8}, {8088, 202, 8}, {8104, 202, 8}, |
|
348
|
{8120, 202, 2}, {8122, 178, 2}, {8124, 200, 1}, |
|
349
|
{8126, 136, 1}, {8136, 176, 4}, {8140, 200, 1}, |
|
350
|
{8147, 132, 1}, {8152, 202, 2}, {8154, 172, 2}, |
|
351
|
{8163, 134, 1}, {8168, 202, 2}, {8170, 170, 2}, |
|
352
|
{8172, 204, 1}, {8184, 164, 2}, {8186, 166, 2}, |
|
353
|
{8188, 200, 1}, {8486, 130, 1}, {8490, 124, 1}, |
|
354
|
{8491, 126, 1}, {8498, 14, 1}, {8544, 8, 16}, |
|
355
|
{8579, 0, 1}, {9398, 10, 26}, {11264, 28, 48}, |
|
356
|
{11360, 0, 1}, {11362, 120, 1}, {11363, 152, 1}, |
|
357
|
{11364, 122, 1}, {11367, 1, 6}, {11373, 116, 1}, |
|
358
|
{11374, 118, 1}, {11375, 112, 1}, {11376, 114, 1}, |
|
359
|
{11378, 0, 1}, {11381, 0, 1}, {11390, 110, 2}, |
|
360
|
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
|
361
|
{42560, 1, 46}, {42624, 1, 28}, {42786, 1, 14}, |
|
362
|
{42802, 1, 62}, {42873, 1, 4}, {42877, 106, 1}, |
|
363
|
{42878, 1, 10}, {42891, 0, 1}, {42893, 96, 1}, |
|
364
|
{42896, 1, 4}, {42902, 1, 20}, {42922, 88, 1}, |
|
365
|
{42923, 84, 1}, {42924, 86, 1}, {42925, 92, 1}, |
|
366
|
{42926, 88, 1}, {42928, 100, 1}, {42929, 94, 1}, |
|
367
|
{42930, 98, 1}, {42931, 72, 1}, {42932, 1, 16}, |
|
368
|
{42948, 190, 1}, {42949, 90, 1}, {42950, 104, 1}, |
|
369
|
{42951, 1, 4}, {42955, 82, 1}, {42956, 1, 16}, |
|
370
|
{42972, 80, 1}, {42997, 0, 1}, {43888, 102, 80}, |
|
371
|
{64261, 0, 1}, {65313, 16, 26}, |
|
372
|
}; |
|
373
|
static const unsigned short aiOff[] = { |
|
374
|
1, 2, 8, 15, 16, 26, 27, 28, |
|
375
|
32, 34, 37, 38, 39, 40, 48, 63, |
|
376
|
64, 69, 71, 79, 80, 116, 202, 203, |
|
377
|
205, 206, 207, 209, 210, 211, 213, 214, |
|
378
|
217, 218, 219, 775, 928, 7264, 10792, 10795, |
|
379
|
22975, 23193, 23217, 23221, 23228, 23229, 23231, 23254, |
|
380
|
23256, 23275, 23278, 26672, 30152, 30204, 35267, 54721, |
|
381
|
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
|
382
|
57921, 58019, 58301, 58317, 58363, 59314, 59315, 59324, |
|
383
|
59325, 59326, 59332, 59356, 61722, 62528, 65268, 65341, |
|
384
|
65373, 65406, 65408, 65410, 65415, 65424, 65436, 65439, |
|
385
|
65450, 65462, 65472, 65476, 65478, 65480, 65482, 65488, |
|
386
|
65506, 65511, 65514, 65521, 65527, 65528, 65529, |
|
387
|
}; |
|
388
|
|
|
389
|
int ret = c; |
|
390
|
|
|
391
|
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); |
|
392
|
|
|
393
|
if( c<128 ){ |
|
394
|
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
|
395
|
}else if( c<65536 ){ |
|
396
|
const struct TableEntry *p; |
|
397
|
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
|
398
|
int iLo = 0; |
|
399
|
int iRes = -1; |
|
400
|
|
|
401
|
assert( c>aEntry[0].iCode ); |
|
402
|
while( iHi>=iLo ){ |
|
403
|
int iTest = (iHi + iLo) / 2; |
|
404
|
int cmp = (c - aEntry[iTest].iCode); |
|
405
|
if( cmp>=0 ){ |
|
406
|
iRes = iTest; |
|
407
|
iLo = iTest+1; |
|
408
|
}else{ |
|
409
|
iHi = iTest-1; |
|
410
|
} |
|
411
|
} |
|
412
|
|
|
413
|
assert( iRes>=0 && c>=aEntry[iRes].iCode ); |
|
414
|
p = &aEntry[iRes]; |
|
415
|
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
|
416
|
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
|
417
|
assert( ret>0 ); |
|
418
|
} |
|
419
|
|
|
420
|
if( eRemoveDiacritic ){ |
|
421
|
ret = unicode_remove_diacritic(ret, eRemoveDiacritic==2); |
|
422
|
} |
|
423
|
} |
|
424
|
|
|
425
|
else if( c>=66560 && c<66600 ){ |
|
426
|
ret = c + 40; |
|
427
|
} |
|
428
|
else if( c>=66736 && c<66772 ){ |
|
429
|
ret = c + 40; |
|
430
|
} |
|
431
|
else if( c>=66928 && c<66939 ){ |
|
432
|
ret = c + 39; |
|
433
|
} |
|
434
|
else if( c>=66940 && c<66955 ){ |
|
435
|
ret = c + 39; |
|
436
|
} |
|
437
|
else if( c>=66956 && c<66963 ){ |
|
438
|
ret = c + 39; |
|
439
|
} |
|
440
|
else if( c>=66964 && c<66966 ){ |
|
441
|
ret = c + 39; |
|
442
|
} |
|
443
|
else if( c>=68736 && c<68787 ){ |
|
444
|
ret = c + 64; |
|
445
|
} |
|
446
|
else if( c>=68944 && c<68966 ){ |
|
447
|
ret = c + 32; |
|
448
|
} |
|
449
|
else if( c>=71840 && c<71872 ){ |
|
450
|
ret = c + 32; |
|
451
|
} |
|
452
|
else if( c>=93760 && c<93792 ){ |
|
453
|
ret = c + 32; |
|
454
|
} |
|
455
|
else if( c>=93856 && c<93881 ){ |
|
456
|
ret = c + 27; |
|
457
|
} |
|
458
|
else if( c>=125184 && c<125218 ){ |
|
459
|
ret = c + 34; |
|
460
|
} |
|
461
|
|
|
462
|
return ret; |
|
463
|
} |
|
464
|
|