Fossil SCM

Divide blob length check (even number of bytes) and UTF-32 check in the 3 versions of the UTF-16 BOM functions.

jan.nijtmans 2013-02-07 15:28 trunk

Commit be6756e26b9c150e9126418cc57a0fbb66079773

Parent 43c452262344bf1…

1 file changed +21 -17

M src/diff.c

+21 -17

		--- src/diff.c
		+++ src/diff.c
		@@ -359,19 +359,19 @@
359	359	** This function returns non-zero if the blob starts with a UTF-16le or
360	360	** UTF-16be byte-order-mark (BOM).
361	361	*/
362	362	int starts_with_utf16_bom(const Blob pContent, int pnByte){
363	363	const char *z = blob_buffer(pContent);
364		- int c1, c2;
	364	+ int c1;
365	365
366	366	if( pnByte ) *pnByte = 2;
367		- if( blob_size(pContent)<2 ) return 0;
368		- c1 = z[0]; c2 = z[1];
369		- if( (c1==(char)0xff) && (c2==(char)0xfe) ){
370		- return 1;
371		- }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
372		- return 1;
	367	+ if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
	368	+ c1 = ((unsigned short *)z)[0];
	369	+ if( (c1==0xfeff) \|\| (c1==0xfffe) ){
	370	+ if( blob_size(pContent) < 4 ) return 1;
	371	+ c1 = ((unsigned short *)z)[1];
	372	+ if( c1 != 0 ) return 1;
373	373	}
374	374	return 0;
375	375	}
376	376
377	377	/*
		@@ -378,17 +378,19 @@
378	378	** This function returns non-zero if the blob starts with a UTF-16le
379	379	** byte-order-mark (BOM).
380	380	*/
381	381	int starts_with_utf16le_bom(const Blob pContent, int pnByte){
382	382	const char *z = blob_buffer(pContent);
383		- int c1, c2;
	383	+ int c1;
384	384
385	385	if( pnByte ) *pnByte = 2;
386		- if( blob_size(pContent)<2 ) return 0;
387		- c1 = z[0]; c2 = z[1];
388		- if( (c1==(char)0xff) && (c2==(char)0xfe) ){
389		- return 1;
	386	+ if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
	387	+ c1 = ((unsigned short *)z)[0];
	388	+ if( c1==0xfeff ){
	389	+ if( blob_size(pContent) < 4 ) return 1;
	390	+ c1 = ((unsigned short *)z)[1];
	391	+ if( c1 != 0 ) return 1;
390	392	}
391	393	return 0;
392	394	}
393	395
394	396	/*
		@@ -395,17 +397,19 @@
395	397	** This function returns non-zero if the blob starts with a UTF-16be
396	398	** byte-order-mark (BOM).
397	399	*/
398	400	int starts_with_utf16be_bom(const Blob pContent, int pnByte){
399	401	const char *z = blob_buffer(pContent);
400		- int c1, c2;
	402	+ int c1;
401	403
402	404	if( pnByte ) *pnByte = 2;
403		- if( blob_size(pContent)<2 ) return 0;
404		- c1 = z[0]; c2 = z[1];
405		- if( (c1==(char)0xfe) && (c2==(char)0xff) ){
406		- return 1;
	405	+ if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
	406	+ c1 = ((unsigned short *)z)[0];
	407	+ if( c1==0xfffe ){
	408	+ if( blob_size(pContent) < 4 ) return 1;
	409	+ c1 = ((unsigned short *)z)[1];
	410	+ if( c1 != 0 ) return 1;
407	411	}
408	412	return 0;
409	413	}
410	414
411	415	/*
412	416

	--- src/diff.c
	+++ src/diff.c
	@@ -359,19 +359,19 @@
359	** This function returns non-zero if the blob starts with a UTF-16le or
360	** UTF-16be byte-order-mark (BOM).
361	*/
362	int starts_with_utf16_bom(const Blob pContent, int pnByte){
363	const char *z = blob_buffer(pContent);
364	int c1, c2;
365
366	if( pnByte ) *pnByte = 2;
367	if( blob_size(pContent)<2 ) return 0;
368	c1 = z[0]; c2 = z[1];
369	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
370	return 1;
371	}else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
372	return 1;
373	}
374	return 0;
375	}
376
377	/*
	@@ -378,17 +378,19 @@
378	** This function returns non-zero if the blob starts with a UTF-16le
379	** byte-order-mark (BOM).
380	*/
381	int starts_with_utf16le_bom(const Blob pContent, int pnByte){
382	const char *z = blob_buffer(pContent);
383	int c1, c2;
384
385	if( pnByte ) *pnByte = 2;
386	if( blob_size(pContent)<2 ) return 0;
387	c1 = z[0]; c2 = z[1];
388	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
389	return 1;


390	}
391	return 0;
392	}
393
394	/*
	@@ -395,17 +397,19 @@
395	** This function returns non-zero if the blob starts with a UTF-16be
396	** byte-order-mark (BOM).
397	*/
398	int starts_with_utf16be_bom(const Blob pContent, int pnByte){
399	const char *z = blob_buffer(pContent);
400	int c1, c2;
401
402	if( pnByte ) *pnByte = 2;
403	if( blob_size(pContent)<2 ) return 0;
404	c1 = z[0]; c2 = z[1];
405	if( (c1==(char)0xfe) && (c2==(char)0xff) ){
406	return 1;


407	}
408	return 0;
409	}
410
411	/*
412

	--- src/diff.c
	+++ src/diff.c
	@@ -359,19 +359,19 @@
359	** This function returns non-zero if the blob starts with a UTF-16le or
360	** UTF-16be byte-order-mark (BOM).
361	*/
362	int starts_with_utf16_bom(const Blob pContent, int pnByte){
363	const char *z = blob_buffer(pContent);
364	int c1;
365
366	if( pnByte ) *pnByte = 2;
367	if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
368	c1 = ((unsigned short *)z)[0];
369	if( (c1==0xfeff) \|\| (c1==0xfffe) ){
370	if( blob_size(pContent) < 4 ) return 1;
371	c1 = ((unsigned short *)z)[1];
372	if( c1 != 0 ) return 1;
373	}
374	return 0;
375	}
376
377	/*
	@@ -378,17 +378,19 @@
378	** This function returns non-zero if the blob starts with a UTF-16le
379	** byte-order-mark (BOM).
380	*/
381	int starts_with_utf16le_bom(const Blob pContent, int pnByte){
382	const char *z = blob_buffer(pContent);
383	int c1;
384
385	if( pnByte ) *pnByte = 2;
386	if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
387	c1 = ((unsigned short *)z)[0];
388	if( c1==0xfeff ){
389	if( blob_size(pContent) < 4 ) return 1;
390	c1 = ((unsigned short *)z)[1];
391	if( c1 != 0 ) return 1;
392	}
393	return 0;
394	}
395
396	/*
	@@ -395,17 +397,19 @@
397	** This function returns non-zero if the blob starts with a UTF-16be
398	** byte-order-mark (BOM).
399	*/
400	int starts_with_utf16be_bom(const Blob pContent, int pnByte){
401	const char *z = blob_buffer(pContent);
402	int c1;
403
404	if( pnByte ) *pnByte = 2;
405	if( (blob_size(pContent)<2) \|\| (blob_size(pContent)&1)) return 0;
406	c1 = ((unsigned short *)z)[0];
407	if( c1==0xfffe ){
408	if( blob_size(pContent) < 4 ) return 1;
409	c1 = ((unsigned short *)z)[1];
410	if( c1 != 0 ) return 1;
411	}
412	return 0;
413	}
414
415	/*
416

Fossil SCM

Keyboard Shortcuts