Fossil SCM

added a few comments

sdr 2016-06-15 15:00 invalid_utf8_table
Commit 63313a5f168933ddae7dec33807e8282d77573d7
1 file changed +7 -7
+7 -7
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,29 +143,29 @@
143143
** the derivatives CESU-8 & WTF-8 (as described in the same
144144
** wikipedia article referenced previously).
145145
*/
146146
147147
/* definitions for various UTF-8 sequence lengths */
148
-static const unsigned char us2a[] = {
148
+static const unsigned char us2a[] = { /* for lead byte 0xC0 */
149149
2, 0x80, 0x80
150150
};
151
-static const unsigned char us2b[] = {
151
+static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152152
2, 0x80, 0xBF
153153
};
154
-static const unsigned char us3a[] = {
154
+static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155155
3, 0xA0, 0xBF, 0x80, 0xBF
156156
};
157
-static const unsigned char us3b[] = {
157
+static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158158
3, 0x80, 0xBF, 0x80, 0xBF
159159
};
160
-static const unsigned char us4a[] = {
160
+static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161161
4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162162
};
163
-static const unsigned char us4b[] = {
163
+static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164164
4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165165
};
166
-static const unsigned char us4c[] = {
166
+static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167167
4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168168
};
169169
170170
/* a table used for quick lookup of the definition that goes with a
171171
* particular lead byte */
172172
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,29 +143,29 @@
143 ** the derivatives CESU-8 & WTF-8 (as described in the same
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 static const unsigned char us2a[] = {
149 2, 0x80, 0x80
150 };
151 static const unsigned char us2b[] = {
152 2, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = {
155 3, 0xA0, 0xBF, 0x80, 0xBF
156 };
157 static const unsigned char us3b[] = {
158 3, 0x80, 0xBF, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = {
161 4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162 };
163 static const unsigned char us4b[] = {
164 4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = {
167 4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172
--- src/lookslike.c
+++ src/lookslike.c
@@ -143,29 +143,29 @@
143 ** the derivatives CESU-8 & WTF-8 (as described in the same
144 ** wikipedia article referenced previously).
145 */
146
147 /* definitions for various UTF-8 sequence lengths */
148 static const unsigned char us2a[] = { /* for lead byte 0xC0 */
149 2, 0x80, 0x80
150 };
151 static const unsigned char us2b[] = { /* for lead bytes 0xC2-0xDF */
152 2, 0x80, 0xBF
153 };
154 static const unsigned char us3a[] = { /* for lead byte 0xE0 */
155 3, 0xA0, 0xBF, 0x80, 0xBF
156 };
157 static const unsigned char us3b[] = { /* for lead bytes 0xE1-0xEF */
158 3, 0x80, 0xBF, 0x80, 0xBF
159 };
160 static const unsigned char us4a[] = { /* for lead byte 0xF0 */
161 4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162 };
163 static const unsigned char us4b[] = { /* for lead bytes 0xF1-0xF3 */
164 4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165 };
166 static const unsigned char us4c[] = { /* for lead byte 0xF4 */
167 4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168 };
169
170 /* a table used for quick lookup of the definition that goes with a
171 * particular lead byte */
172

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button