Fossil SCM

Simplifications and constification

jan.nijtmans 2016-06-15 08:19 invalid_utf8_table

Commit 6051c441edda95837da361f9bb764befb93954b0

Parent 12675ab79449571…

1 file changed +10 -12

M src/lookslike.c

+10 -12

		--- src/lookslike.c
		+++ src/lookslike.c
		@@ -144,34 +144,34 @@
144	144	** wikipedia article referenced previously).
145	145	*/
146	146
147	147	/* definitions for various UTF-8 sequence lengths */
148	148	static const unsigned char us2a[] = {
149		- 2, 0xC0, 0xC0, 0x80, 0x80
	149	+ 2, 0x80, 0x80
150	150	};
151	151	static const unsigned char us2b[] = {
152		- 2, 0xC2, 0xDF, 0x80, 0xBF
	152	+ 2, 0x80, 0xBF
153	153	};
154	154	static const unsigned char us3a[] = {
155		- 3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF
	155	+ 3, 0xA0, 0xBF, 0x80, 0xBF
156	156	};
157	157	static const unsigned char us3b[] = {
158		- 3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF
	158	+ 3, 0x80, 0xBF, 0x80, 0xBF
159	159	};
160	160	static const unsigned char us4a[] = {
161		- 4, 0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
	161	+ 4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162	162	};
163	163	static const unsigned char us4b[] = {
164		- 4, 0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
	164	+ 4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165	165	};
166	166	static const unsigned char us4c[] = {
167		- 4, 0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
	167	+ 4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168	168	};
169	169
170	170	/* a table used for quick lookup of the definition that goes with a
171	171	* particular lead byte */
172		-static const unsigned char* lb_tab[] = {
	172	+static const unsigned char* const lb_tab[] = {
173	173	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174	174	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175	175	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176	176	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177	177	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
		@@ -201,21 +201,19 @@
201	201	if( *z<0x80 ){
202	202	++z;
203	203	--n;
204	204	}else{
205	205	/* get the definition for this lead byte */
206		- unsigned char* def = lb_tab[(*z++)-0x80];
	206	+ const unsigned char* def = lb_tab[(*z++)-0x80];
207	207	unsigned char i, len;
208	208
209	209	/* if the definition doesn't exist, return invalid */
210	210	if( !def ) return LOOK_INVALID;
211	211	/* get the expected sequence length */
212		- len = *def;
	212	+ len = *def++;
213	213	/* if there aren't enough bytes left, return invalid */
214	214	if( n<len ) return LOOK_INVALID;
215		- /* skip the length & lead byte range */
216		- def += 3;
217	215	/* we already know byte #0 is good, so check the remaining bytes */
218	216	for(i=1; i<len; ++i){
219	217	/* if the byte is outside the allowed range for this definition,
220	218	* return invalid */
221	219	if( (z<def++) \|\| (z++>def++) ){
222	220

	--- src/lookslike.c
	+++ src/lookslike.c
	@@ -144,34 +144,34 @@
144	** wikipedia article referenced previously).
145	*/
146
147	/* definitions for various UTF-8 sequence lengths */
148	static const unsigned char us2a[] = {
149	2, 0xC0, 0xC0, 0x80, 0x80
150	};
151	static const unsigned char us2b[] = {
152	2, 0xC2, 0xDF, 0x80, 0xBF
153	};
154	static const unsigned char us3a[] = {
155	3, 0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF
156	};
157	static const unsigned char us3b[] = {
158	3, 0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF
159	};
160	static const unsigned char us4a[] = {
161	4, 0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162	};
163	static const unsigned char us4b[] = {
164	4, 0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165	};
166	static const unsigned char us4c[] = {
167	4, 0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168	};
169
170	/* a table used for quick lookup of the definition that goes with a
171	* particular lead byte */
172	static const unsigned char* lb_tab[] = {
173	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
	@@ -201,21 +201,19 @@
201	if( *z<0x80 ){
202	++z;
203	--n;
204	}else{
205	/* get the definition for this lead byte */
206	unsigned char* def = lb_tab[(*z++)-0x80];
207	unsigned char i, len;
208
209	/* if the definition doesn't exist, return invalid */
210	if( !def ) return LOOK_INVALID;
211	/* get the expected sequence length */
212	len = *def;
213	/* if there aren't enough bytes left, return invalid */
214	if( n<len ) return LOOK_INVALID;
215	/* skip the length & lead byte range */
216	def += 3;
217	/* we already know byte #0 is good, so check the remaining bytes */
218	for(i=1; i<len; ++i){
219	/* if the byte is outside the allowed range for this definition,
220	* return invalid */
221	if( (z<def++) \|\| (z++>def++) ){
222

	--- src/lookslike.c
	+++ src/lookslike.c
	@@ -144,34 +144,34 @@
144	** wikipedia article referenced previously).
145	*/
146
147	/* definitions for various UTF-8 sequence lengths */
148	static const unsigned char us2a[] = {
149	2, 0x80, 0x80
150	};
151	static const unsigned char us2b[] = {
152	2, 0x80, 0xBF
153	};
154	static const unsigned char us3a[] = {
155	3, 0xA0, 0xBF, 0x80, 0xBF
156	};
157	static const unsigned char us3b[] = {
158	3, 0x80, 0xBF, 0x80, 0xBF
159	};
160	static const unsigned char us4a[] = {
161	4, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF
162	};
163	static const unsigned char us4b[] = {
164	4, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF
165	};
166	static const unsigned char us4c[] = {
167	4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF
168	};
169
170	/* a table used for quick lookup of the definition that goes with a
171	* particular lead byte */
172	static const unsigned char* const lb_tab[] = {
173	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
177	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
	@@ -201,21 +201,19 @@
201	if( *z<0x80 ){
202	++z;
203	--n;
204	}else{
205	/* get the definition for this lead byte */
206	const unsigned char* def = lb_tab[(*z++)-0x80];
207	unsigned char i, len;
208
209	/* if the definition doesn't exist, return invalid */
210	if( !def ) return LOOK_INVALID;
211	/* get the expected sequence length */
212	len = *def++;
213	/* if there aren't enough bytes left, return invalid */
214	if( n<len ) return LOOK_INVALID;


215	/* we already know byte #0 is good, so check the remaining bytes */
216	for(i=1; i<len; ++i){
217	/* if the byte is outside the allowed range for this definition,
218	* return invalid */
219	if( (z<def++) \|\| (z++>def++) ){
220

Fossil SCM

Keyboard Shortcuts