Fossil SCM

fossil-scm / src / unicode.c
Blame History Raw 464 lines
1
/*
2
** Copyright (c) 2013 D. Richard Hipp
3
**
4
** This program is free software; you can redistribute it and/or
5
** modify it under the terms of the Simplified BSD License (also
6
** known as the "2-Clause License" or "FreeBSD License".)
7
**
8
** This program is distributed in the hope that it will be useful,
9
** but without any warranty; without even the implied warranty of
10
** merchantability or fitness for a particular purpose.
11
**
12
** Author contact information:
13
** [email protected]
14
** http://www.hwaci.com/drh/
15
**
16
*******************************************************************************
17
**
18
** This file is copied from ext/fts5/fts5_unicode2.c of SQLite3 with
19
** minor changes.
20
*/
21
#include "config.h"
22
#include "unicode.h"
23
24
/*
25
** Return true if the argument corresponds to a unicode codepoint
26
** classified as either a letter or a number. Otherwise false.
27
**
28
** The results are undefined if the value passed to this function
29
** is less than zero.
30
*/
31
int unicode_isalnum(int c){
32
/* Each unsigned integer in the following array corresponds to a contiguous
33
** range of unicode codepoints that are not either letters or numbers (i.e.
34
** codepoints for which this function should return 0).
35
**
36
** The most significant 22 bits in each 32-bit value contain the first
37
** codepoint in the range. The least significant 10 bits are used to store
38
** the size of the range (always at least 1). In other words, the value
39
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
40
** C. It is not possible to represent a range larger than 1023 codepoints
41
** using this format.
42
*/
43
static const unsigned int aEntry[] = {
44
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
45
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
46
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
47
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
48
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163403,
49
0x00164437, 0x0017CC02, 0x00180020, 0x00192C15, 0x0019A804,
50
0x0019C001, 0x001B5001, 0x001B580F, 0x001B9C07, 0x001BF402,
51
0x001C000E, 0x001C3C01, 0x001C4401, 0x001CC01B, 0x001E980B,
52
0x001FAC09, 0x001FD804, 0x001FF403, 0x00205804, 0x00206C09,
53
0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, 0x00217801,
54
0x00222001, 0x00224002, 0x00225C09, 0x0023283A, 0x0024E803,
55
0x0024F812, 0x00254407, 0x00258804, 0x0025C001, 0x00260403,
56
0x0026F001, 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01,
57
0x00278802, 0x0027C802, 0x0027E802, 0x0027F402, 0x00280403,
58
0x0028F001, 0x0028F805, 0x00291C02, 0x00292C03, 0x00294401,
59
0x0029C002, 0x0029D402, 0x002A0403, 0x002AF001, 0x002AF808,
60
0x002B1C03, 0x002B2C03, 0x002B8802, 0x002BC002, 0x002BE806,
61
0x002C0403, 0x002CF001, 0x002CF807, 0x002D1C02, 0x002D2C03,
62
0x002D5403, 0x002D8802, 0x002DC001, 0x002E0801, 0x002EF805,
63
0x002F1803, 0x002F2804, 0x002F5C01, 0x002FCC08, 0x00300005,
64
0x0030F001, 0x0030F807, 0x00311803, 0x00312804, 0x00315402,
65
0x00318802, 0x0031DC01, 0x0031FC01, 0x00320404, 0x0032F001,
66
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
67
0x0033CC01, 0x00340004, 0x0034EC02, 0x0034F807, 0x00351803,
68
0x00352804, 0x00353C01, 0x00355C01, 0x00358802, 0x0035E401,
69
0x00360403, 0x00372801, 0x00373C06, 0x00375801, 0x00376008,
70
0x0037C803, 0x0038C401, 0x0038D007, 0x0038FC01, 0x00391C09,
71
0x00396802, 0x003AC401, 0x003AD009, 0x003B2007, 0x003C041F,
72
0x003CD00C, 0x003DC417, 0x003E340B, 0x003E6424, 0x003EF80F,
73
0x003F380D, 0x0040AC14, 0x00412806, 0x00415804, 0x00417803,
74
0x00418803, 0x00419C07, 0x0041C404, 0x0042080C, 0x00423C01,
75
0x00426806, 0x0043EC01, 0x004D740C, 0x004E400A, 0x00500001,
76
0x0059B402, 0x005A0001, 0x005A6C02, 0x005BAC03, 0x005C4804,
77
0x005CC805, 0x005D4802, 0x005DC802, 0x005ED023, 0x005F6004,
78
0x005F7401, 0x00600010, 0x00621402, 0x0062A401, 0x0064800C,
79
0x0064C00C, 0x00650001, 0x00651002, 0x00677822, 0x00685C05,
80
0x00687802, 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007,
81
0x006AA006, 0x006AC02E, 0x006B800C, 0x006C0005, 0x006CD011,
82
0x006D3802, 0x006D6829, 0x006E840D, 0x006F980E, 0x006FF004,
83
0x00709014, 0x0070EC05, 0x0071F802, 0x00730008, 0x00734019,
84
0x0073B401, 0x0073D001, 0x0073DC03, 0x00770040, 0x007EF401,
85
0x007EFC03, 0x007F3403, 0x007F7403, 0x007FB403, 0x007FF402,
86
0x00800065, 0x0081980A, 0x0081E805, 0x00822805, 0x00828022,
87
0x00834021, 0x00840002, 0x00840C04, 0x00842002, 0x00845001,
88
0x00845803, 0x00847806, 0x00849401, 0x00849C01, 0x0084A401,
89
0x0084B801, 0x0084E802, 0x00850005, 0x00852804, 0x00853C01,
90
0x00862802, 0x0086429A, 0x0091000B, 0x0092704E, 0x00940276,
91
0x009E53E0, 0x00ADD88A, 0x00B39406, 0x00B3BC03, 0x00B3E404,
92
0x00B3F802, 0x00B5C001, 0x00B5FC01, 0x00B7804F, 0x00B8C02E,
93
0x00BA001A, 0x00BA6C59, 0x00BC00D6, 0x00BFC015, 0x00C02019,
94
0x00C0A807, 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001,
95
0x00C3EC01, 0x00C64002, 0x00C6580A, 0x00C70026, 0x00C7BC01,
96
0x00C8001F, 0x00C8A81E, 0x00C94001, 0x00C98020, 0x00CA2827,
97
0x00CB0140, 0x01370040, 0x02924037, 0x0293F802, 0x02983403,
98
0x0299BC10, 0x029A7802, 0x029BC008, 0x029C0017, 0x029C8002,
99
0x029E2402, 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C0A,
100
0x02A0D804, 0x02A1D004, 0x02A20002, 0x02A2D012, 0x02A33802,
101
0x02A38012, 0x02A3E003, 0x02A3F001, 0x02A3FC01, 0x02A4980A,
102
0x02A51C0D, 0x02A57C01, 0x02A60004, 0x02A6CC1B, 0x02A77802,
103
0x02A79401, 0x02A8A40E, 0x02A90C01, 0x02A93002, 0x02A97004,
104
0x02A9DC03, 0x02A9EC03, 0x02AAC001, 0x02AAC803, 0x02AADC02,
105
0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, 0x02ABD402,
106
0x02AD6C01, 0x02ADA802, 0x02AF8C0B, 0x03600001, 0x036DFC02,
107
0x036FFC02, 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC821,
108
0x03F4F812, 0x03F64002, 0x03F72008, 0x03F7F01E, 0x03F88033,
109
0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807,
110
0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405,
111
0x04040003, 0x0404DC09, 0x0405E411, 0x04063003, 0x0406400D,
112
0x04068001, 0x0407402E, 0x040B8001, 0x040DD805, 0x040E7C01,
113
0x040F4001, 0x0415BC01, 0x04215C01, 0x0421DC02, 0x04247C01,
114
0x0424FC01, 0x04280403, 0x04281402, 0x04283004, 0x0428E003,
115
0x0428FC01, 0x04294009, 0x0429FC01, 0x042B2001, 0x042B9402,
116
0x042BC007, 0x042CE407, 0x042E6404, 0x04349004, 0x0435A406,
117
0x04363802, 0x043AAC03, 0x043B4009, 0x043BE806, 0x043D180B,
118
0x043D5405, 0x043E0808, 0x04400003, 0x0440E016, 0x0441C001,
119
0x0441CC02, 0x0441FC04, 0x0442C013, 0x04433401, 0x04440003,
120
0x04449C0E, 0x04450004, 0x04451402, 0x0445CC03, 0x04460003,
121
0x0446CC0E, 0x0447140B, 0x04476C01, 0x04477403, 0x0448B013,
122
0x04490401, 0x044AA401, 0x044B7C0C, 0x044C0004, 0x044CEC02,
123
0x044CF807, 0x044D1C02, 0x044D2C03, 0x044D5C01, 0x044D8802,
124
0x044D9807, 0x044DC005, 0x044EE009, 0x044F0801, 0x044F1401,
125
0x044F1C04, 0x044F3005, 0x044F4801, 0x044F5002, 0x044F5C02,
126
0x044F8402, 0x0450D412, 0x04512C05, 0x04516802, 0x04517402,
127
0x0452C014, 0x04531801, 0x0456BC07, 0x0456E020, 0x04577002,
128
0x0458C014, 0x0459800D, 0x045AAC0D, 0x045AE401, 0x045C740F,
129
0x045CF004, 0x0460B010, 0x0464C006, 0x0464DC02, 0x0464EC04,
130
0x04650001, 0x04650805, 0x04674407, 0x04676807, 0x04678801,
131
0x04679001, 0x0468040A, 0x0468CC07, 0x0468EC0D, 0x0469440B,
132
0x046A2813, 0x046A7805, 0x046C000A, 0x046D8008, 0x046F8401,
133
0x0470BC08, 0x0470E008, 0x04710405, 0x0471C002, 0x04724816,
134
0x0472A40E, 0x0474C406, 0x0474E801, 0x0474F002, 0x0474FC07,
135
0x04751C01, 0x04762805, 0x04764002, 0x04764C05, 0x047BCC06,
136
0x047C0002, 0x047C0C01, 0x047CD007, 0x047CF812, 0x047D6801,
137
0x047F541D, 0x047FFC01, 0x0491C005, 0x04BFC402, 0x04D0C011,
138
0x04D11C0F, 0x05847812, 0x05A9B802, 0x05ABC006, 0x05ACC010,
139
0x05AD1002, 0x05B5B403, 0x05BA5C04, 0x05BD3C01, 0x05BD4437,
140
0x05BE3C04, 0x05BF8801, 0x05BF9001, 0x05BFC002, 0x06F27008,
141
0x073000F0, 0x0733E803, 0x073401B4, 0x073AE817, 0x073B8011,
142
0x073C002E, 0x073CC017, 0x073D4074, 0x074000F6, 0x07440027,
143
0x0744A4C2, 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01,
144
0x075BEC01, 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01,
145
0x075E2401, 0x075EA401, 0x075F0C01, 0x0760028C, 0x076A6C05,
146
0x076A840F, 0x07800007, 0x07802011, 0x07806C07, 0x07808C02,
147
0x07809805, 0x07823C01, 0x0784C007, 0x07853C01, 0x078AB801,
148
0x078BB004, 0x078BFC01, 0x0793B004, 0x0797B802, 0x0797FC01,
149
0x079B8C01, 0x079B9801, 0x079BB802, 0x079BD401, 0x07A34007,
150
0x07A51007, 0x07A57802, 0x07B2B001, 0x07B2C001, 0x07B4B801,
151
0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, 0x07C2C40F,
152
0x07C3040F, 0x07C34425, 0x07C434A1, 0x07C7981D, 0x07C8402C,
153
0x07C90009, 0x07C94002, 0x07C98006, 0x07CC03D9, 0x07DB7011,
154
0x07DBC00D, 0x07DC00DA, 0x07DF800C, 0x07DFC001, 0x07E0000C,
155
0x07E04038, 0x07E1400A, 0x07E18028, 0x07E2401E, 0x07E2C00C,
156
0x07E30002, 0x07E34009, 0x07E40158, 0x07E9800E, 0x07E9C00D,
157
0x07EA000B, 0x07EA3839, 0x07EB2001, 0x07EB3410, 0x07EB7C0C,
158
0x07EBBC0A, 0x07EC0093, 0x07EE505C, 0x07EFE801, 0x38000401,
159
0x38008060, 0x380400F0,
160
};
161
static const unsigned int aAscii[4] = {
162
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
163
};
164
165
if( (unsigned int)c<128 ){
166
return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
167
}else if( (unsigned int)c<(1<<22) ){
168
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
169
int iRes = 0;
170
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
171
int iLo = 0;
172
while( iHi>=iLo ){
173
int iTest = (iHi + iLo) / 2;
174
if( key >= aEntry[iTest] ){
175
iRes = iTest;
176
iLo = iTest+1;
177
}else{
178
iHi = iTest-1;
179
}
180
}
181
assert( aEntry[0]<key );
182
assert( key>=aEntry[iRes] );
183
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
184
}
185
return 1;
186
}
187
188
189
/*
190
** If the argument is a codepoint corresponding to a lowercase letter
191
** in the ASCII range with a diacritic added, return the codepoint
192
** of the ASCII letter only. For example, if passed 235 - "LATIN
193
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
194
** E"). The results of passing a codepoint that corresponds to an
195
** uppercase letter are undefined.
196
*/
197
static int unicode_remove_diacritic(int c, int bComplex){
198
static const unsigned short aDia[] = {
199
0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
200
2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
201
2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
202
2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
203
3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896,
204
3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106,
205
4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344,
206
4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198,
207
6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468,
208
61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
209
61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
210
61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
211
62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
212
62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,
213
62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
214
63182, 63242, 63274, 63310, 63368, 63390,
215
};
216
#define HIBIT ((unsigned char)0x80)
217
static const unsigned char aChar[] = {
218
'\0', 'a', 'c', 'e', 'i', 'n',
219
'o', 'u', 'y', 'y', 'a', 'c',
220
'd', 'e', 'e', 'g', 'h', 'i',
221
'j', 'k', 'l', 'n', 'o', 'r',
222
's', 't', 'u', 'u', 'w', 'y',
223
'z', 'o', 'u', 'a', 'i', 'o',
224
'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o',
225
'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a',
226
'e', 'i', 'o', 'r', 'u', 's',
227
't', 'h', 'a', 'e', 'o'|HIBIT, 'o',
228
'o'|HIBIT, 'y', '\0', '\0', '\0', '\0',
229
'\0', '\0', '\0', '\0', 'a', 'b',
230
'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT,
231
'f', 'g', 'h', 'h', 'i', 'i'|HIBIT,
232
'k', 'l', 'l'|HIBIT, 'l', 'm', 'n',
233
'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's',
234
's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w',
235
'w', 'x', 'y', 'z', 'h', 't',
236
'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT,
237
'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT,
238
'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y',
239
};
240
241
unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
242
int iRes = 0;
243
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
244
int iLo = 0;
245
while( iHi>=iLo ){
246
int iTest = (iHi + iLo) / 2;
247
if( key >= aDia[iTest] ){
248
iRes = iTest;
249
iLo = iTest+1;
250
}else{
251
iHi = iTest-1;
252
}
253
}
254
assert( key>=aDia[iRes] );
255
if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
256
return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
257
}
258
259
260
/*
261
** Return true if the argument interpreted as a unicode codepoint
262
** is a diacritical modifier character.
263
*/
264
int unicode_is_diacritic(int c){
265
unsigned int mask0 = 0x08029FDF;
266
unsigned int mask1 = 0x000361F8;
267
if( c<768 || c>817 ) return 0;
268
return (c < 768+32) ?
269
(mask0 & ((unsigned int)1 << (c-768))) :
270
(mask1 & ((unsigned int)1 << (c-768-32)));
271
}
272
273
274
/*
275
** Interpret the argument as a unicode codepoint. If the codepoint
276
** is an upper case character that has a lower case equivalent,
277
** return the codepoint corresponding to the lower case version.
278
** Otherwise, return a copy of the argument.
279
**
280
** The results are undefined if the value passed to this function
281
** is less than zero.
282
*/
283
int unicode_fold(int c, int eRemoveDiacritic){
284
/* Each entry in the following array defines a rule for folding a range
285
** of codepoints to lower case. The rule applies to a range of nRange
286
** codepoints starting at codepoint iCode.
287
**
288
** If the least significant bit in flags is clear, then the rule applies
289
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
290
** need to be folded). Or, if it is set, then the rule only applies to
291
** every second codepoint in the range, starting with codepoint C.
292
**
293
** The 7 most significant bits in flags are an index into the aiOff[]
294
** array. If a specific codepoint C does require folding, then its lower
295
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
296
**
297
** The contents of this array are generated by parsing the CaseFolding.txt
298
** file distributed as part of the "Unicode Character Database". See
299
** http://www.unicode.org for details.
300
*/
301
static const struct TableEntry {
302
unsigned short iCode;
303
unsigned char flags;
304
unsigned char nRange;
305
} aEntry[] = {
306
{65, 16, 26}, {181, 70, 1}, {192, 16, 23},
307
{216, 16, 7}, {256, 1, 48}, {306, 1, 6},
308
{313, 1, 16}, {330, 1, 46}, {376, 168, 1},
309
{377, 1, 6}, {383, 156, 1}, {385, 56, 1},
310
{386, 1, 4}, {390, 50, 1}, {391, 0, 1},
311
{393, 48, 2}, {395, 0, 1}, {398, 38, 1},
312
{399, 44, 1}, {400, 46, 1}, {401, 0, 1},
313
{403, 48, 1}, {404, 52, 1}, {406, 58, 1},
314
{407, 54, 1}, {408, 0, 1}, {412, 58, 1},
315
{413, 60, 1}, {415, 62, 1}, {416, 1, 6},
316
{422, 66, 1}, {423, 0, 1}, {425, 66, 1},
317
{428, 0, 1}, {430, 66, 1}, {431, 0, 1},
318
{433, 64, 2}, {435, 1, 4}, {439, 68, 1},
319
{440, 0, 1}, {444, 0, 1}, {452, 2, 1},
320
{453, 0, 1}, {455, 2, 1}, {456, 0, 1},
321
{458, 2, 1}, {459, 1, 18}, {478, 1, 18},
322
{497, 2, 1}, {498, 1, 4}, {502, 174, 1},
323
{503, 186, 1}, {504, 1, 40}, {544, 162, 1},
324
{546, 1, 18}, {570, 78, 1}, {571, 0, 1},
325
{573, 160, 1}, {574, 76, 1}, {577, 0, 1},
326
{579, 158, 1}, {580, 34, 1}, {581, 36, 1},
327
{582, 1, 10}, {837, 42, 1}, {880, 1, 4},
328
{886, 0, 1}, {895, 42, 1}, {902, 22, 1},
329
{904, 20, 3}, {908, 32, 1}, {910, 30, 2},
330
{913, 16, 17}, {931, 16, 9}, {962, 0, 1},
331
{975, 4, 1}, {976, 192, 1}, {977, 194, 1},
332
{981, 198, 1}, {982, 196, 1}, {984, 1, 24},
333
{1008, 188, 1}, {1009, 190, 1}, {1012, 182, 1},
334
{1013, 180, 1}, {1015, 0, 1}, {1017, 204, 1},
335
{1018, 0, 1}, {1021, 162, 3}, {1024, 40, 16},
336
{1040, 16, 32}, {1120, 1, 34}, {1162, 1, 54},
337
{1216, 6, 1}, {1217, 1, 14}, {1232, 1, 96},
338
{1329, 28, 38}, {4256, 74, 38}, {4295, 74, 1},
339
{4301, 74, 1}, {5112, 202, 6}, {7296, 138, 1},
340
{7297, 140, 1}, {7298, 142, 1}, {7299, 146, 2},
341
{7301, 144, 1}, {7302, 148, 1}, {7303, 150, 1},
342
{7304, 108, 1}, {7305, 0, 1}, {7312, 154, 43},
343
{7357, 154, 3}, {7680, 1, 150}, {7835, 184, 1},
344
{7838, 128, 1}, {7840, 1, 96}, {7944, 202, 8},
345
{7960, 202, 6}, {7976, 202, 8}, {7992, 202, 8},
346
{8008, 202, 6}, {8025, 203, 8}, {8040, 202, 8},
347
{8072, 202, 8}, {8088, 202, 8}, {8104, 202, 8},
348
{8120, 202, 2}, {8122, 178, 2}, {8124, 200, 1},
349
{8126, 136, 1}, {8136, 176, 4}, {8140, 200, 1},
350
{8147, 132, 1}, {8152, 202, 2}, {8154, 172, 2},
351
{8163, 134, 1}, {8168, 202, 2}, {8170, 170, 2},
352
{8172, 204, 1}, {8184, 164, 2}, {8186, 166, 2},
353
{8188, 200, 1}, {8486, 130, 1}, {8490, 124, 1},
354
{8491, 126, 1}, {8498, 14, 1}, {8544, 8, 16},
355
{8579, 0, 1}, {9398, 10, 26}, {11264, 28, 48},
356
{11360, 0, 1}, {11362, 120, 1}, {11363, 152, 1},
357
{11364, 122, 1}, {11367, 1, 6}, {11373, 116, 1},
358
{11374, 118, 1}, {11375, 112, 1}, {11376, 114, 1},
359
{11378, 0, 1}, {11381, 0, 1}, {11390, 110, 2},
360
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
361
{42560, 1, 46}, {42624, 1, 28}, {42786, 1, 14},
362
{42802, 1, 62}, {42873, 1, 4}, {42877, 106, 1},
363
{42878, 1, 10}, {42891, 0, 1}, {42893, 96, 1},
364
{42896, 1, 4}, {42902, 1, 20}, {42922, 88, 1},
365
{42923, 84, 1}, {42924, 86, 1}, {42925, 92, 1},
366
{42926, 88, 1}, {42928, 100, 1}, {42929, 94, 1},
367
{42930, 98, 1}, {42931, 72, 1}, {42932, 1, 16},
368
{42948, 190, 1}, {42949, 90, 1}, {42950, 104, 1},
369
{42951, 1, 4}, {42955, 82, 1}, {42956, 1, 16},
370
{42972, 80, 1}, {42997, 0, 1}, {43888, 102, 80},
371
{64261, 0, 1}, {65313, 16, 26},
372
};
373
static const unsigned short aiOff[] = {
374
1, 2, 8, 15, 16, 26, 27, 28,
375
32, 34, 37, 38, 39, 40, 48, 63,
376
64, 69, 71, 79, 80, 116, 202, 203,
377
205, 206, 207, 209, 210, 211, 213, 214,
378
217, 218, 219, 775, 928, 7264, 10792, 10795,
379
22975, 23193, 23217, 23221, 23228, 23229, 23231, 23254,
380
23256, 23275, 23278, 26672, 30152, 30204, 35267, 54721,
381
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
382
57921, 58019, 58301, 58317, 58363, 59314, 59315, 59324,
383
59325, 59326, 59332, 59356, 61722, 62528, 65268, 65341,
384
65373, 65406, 65408, 65410, 65415, 65424, 65436, 65439,
385
65450, 65462, 65472, 65476, 65478, 65480, 65482, 65488,
386
65506, 65511, 65514, 65521, 65527, 65528, 65529,
387
};
388
389
int ret = c;
390
391
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
392
393
if( c<128 ){
394
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
395
}else if( c<65536 ){
396
const struct TableEntry *p;
397
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
398
int iLo = 0;
399
int iRes = -1;
400
401
assert( c>aEntry[0].iCode );
402
while( iHi>=iLo ){
403
int iTest = (iHi + iLo) / 2;
404
int cmp = (c - aEntry[iTest].iCode);
405
if( cmp>=0 ){
406
iRes = iTest;
407
iLo = iTest+1;
408
}else{
409
iHi = iTest-1;
410
}
411
}
412
413
assert( iRes>=0 && c>=aEntry[iRes].iCode );
414
p = &aEntry[iRes];
415
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
416
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
417
assert( ret>0 );
418
}
419
420
if( eRemoveDiacritic ){
421
ret = unicode_remove_diacritic(ret, eRemoveDiacritic==2);
422
}
423
}
424
425
else if( c>=66560 && c<66600 ){
426
ret = c + 40;
427
}
428
else if( c>=66736 && c<66772 ){
429
ret = c + 40;
430
}
431
else if( c>=66928 && c<66939 ){
432
ret = c + 39;
433
}
434
else if( c>=66940 && c<66955 ){
435
ret = c + 39;
436
}
437
else if( c>=66956 && c<66963 ){
438
ret = c + 39;
439
}
440
else if( c>=66964 && c<66966 ){
441
ret = c + 39;
442
}
443
else if( c>=68736 && c<68787 ){
444
ret = c + 64;
445
}
446
else if( c>=68944 && c<68966 ){
447
ret = c + 32;
448
}
449
else if( c>=71840 && c<71872 ){
450
ret = c + 32;
451
}
452
else if( c>=93760 && c<93792 ){
453
ret = c + 32;
454
}
455
else if( c>=93856 && c<93881 ){
456
ret = c + 27;
457
}
458
else if( c>=125184 && c<125218 ){
459
ret = c + 34;
460
}
461
462
return ret;
463
}
464

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button