Fossil SCM

Merge commit warning and looks_like_text() enhancements to trunk. Further changes based on these will occur on a branch.

mistachkin 2012-11-01 03:44 trunk merge

Commit 618258421767778c41b643302f73e82954946b89

Parent fc0bffd995d8ee0…

2 files changed +10 -6 +22 -11

~ src/checkin.c ~ src/diff.c

M src/checkin.c

+10 -6

		--- src/checkin.c
		+++ src/checkin.c
		@@ -887,22 +887,26 @@
887	887	** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888	888	** is seen in a text file.
889	889	*/
890	890	static void commit_warning(const Blob p, int crnlOk, const char zFilename){
891	891	int eType; /* return value of looks_like_text() */
	892	+ int fUnicode; /* return value of starts_with_utf16_bom() */
892	893	char zMsg; / Warning message */
893	894	Blob fname; /* Relative pathname of the file */
894	895	static int allOk = 0; /* Set to true to disable this routine */
895	896
896	897	if( allOk ) return;
897	898	eType = looks_like_text(p);
898		- if( eType<0 ){
899		- const char *zWarning ;
	899	+ fUnicode = starts_with_utf16_bom(p);
	900	+ if( eType==-1 \|\| fUnicode ){
	901	+ const char *zWarning;
900	902	Blob ans;
901	903	char cReply;
902	904
903		- if( eType&1 ){
	905	+ if( eType==-1 && fUnicode ){
	906	+ zWarning = "Unicode and CR/NL line endings";
	907	+ }else if( eType==-1 ){
904	908	if( crnlOk ){
905	909	return; /* We don't want CR/NL warnings for this file. */
906	910	}
907	911	zWarning = "CR/NL line endings";
908	912	}else{
		@@ -909,20 +913,20 @@
909	913	zWarning = "Unicode";
910	914	}
911	915	file_relative_name(zFilename, &fname, 0);
912	916	blob_zero(&ans);
913	917	zMsg = mprintf(
914		- "%s contains %s. commit anyhow (a=all/y/N)? ",
915		- blob_str(&fname), zWarning );
	918	+ "%s contains %s; commit anyhow (a=all/y/N)?",
	919	+ blob_str(&fname), zWarning);
916	920	prompt_user(zMsg, &ans);
917	921	fossil_free(zMsg);
918	922	cReply = blob_str(&ans)[0];
919	923	if( cReply=='a' \|\| cReply=='A' ){
920	924	allOk = 1;
921	925	}else if( cReply!='y' && cReply!='Y' ){
922	926	fossil_fatal("Abandoning commit due to %s in %s",
923		- zWarning , blob_str(&fname));
	927	+ zWarning, blob_str(&fname));
924	928	}
925	929	blob_reset(&ans);
926	930	blob_reset(&fname);
927	931	}
928	932	}
929	933

	--- src/checkin.c
	+++ src/checkin.c
	@@ -887,22 +887,26 @@
887	** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888	** is seen in a text file.
889	*/
890	static void commit_warning(const Blob p, int crnlOk, const char zFilename){
891	int eType; /* return value of looks_like_text() */

892	char zMsg; / Warning message */
893	Blob fname; /* Relative pathname of the file */
894	static int allOk = 0; /* Set to true to disable this routine */
895
896	if( allOk ) return;
897	eType = looks_like_text(p);
898	if( eType<0 ){
899	const char *zWarning ;

900	Blob ans;
901	char cReply;
902
903	if( eType&1 ){


904	if( crnlOk ){
905	return; /* We don't want CR/NL warnings for this file. */
906	}
907	zWarning = "CR/NL line endings";
908	}else{
	@@ -909,20 +913,20 @@
909	zWarning = "Unicode";
910	}
911	file_relative_name(zFilename, &fname, 0);
912	blob_zero(&ans);
913	zMsg = mprintf(
914	"%s contains %s. commit anyhow (a=all/y/N)? ",
915	blob_str(&fname), zWarning );
916	prompt_user(zMsg, &ans);
917	fossil_free(zMsg);
918	cReply = blob_str(&ans)[0];
919	if( cReply=='a' \|\| cReply=='A' ){
920	allOk = 1;
921	}else if( cReply!='y' && cReply!='Y' ){
922	fossil_fatal("Abandoning commit due to %s in %s",
923	zWarning , blob_str(&fname));
924	}
925	blob_reset(&ans);
926	blob_reset(&fname);
927	}
928	}
929

	--- src/checkin.c
	+++ src/checkin.c
	@@ -887,22 +887,26 @@
887	** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888	** is seen in a text file.
889	*/
890	static void commit_warning(const Blob p, int crnlOk, const char zFilename){
891	int eType; /* return value of looks_like_text() */
892	int fUnicode; /* return value of starts_with_utf16_bom() */
893	char zMsg; / Warning message */
894	Blob fname; /* Relative pathname of the file */
895	static int allOk = 0; /* Set to true to disable this routine */
896
897	if( allOk ) return;
898	eType = looks_like_text(p);
899	fUnicode = starts_with_utf16_bom(p);
900	if( eType==-1 \|\| fUnicode ){
901	const char *zWarning;
902	Blob ans;
903	char cReply;
904
905	if( eType==-1 && fUnicode ){
906	zWarning = "Unicode and CR/NL line endings";
907	}else if( eType==-1 ){
908	if( crnlOk ){
909	return; /* We don't want CR/NL warnings for this file. */
910	}
911	zWarning = "CR/NL line endings";
912	}else{
	@@ -909,20 +913,20 @@
913	zWarning = "Unicode";
914	}
915	file_relative_name(zFilename, &fname, 0);
916	blob_zero(&ans);
917	zMsg = mprintf(
918	"%s contains %s; commit anyhow (a=all/y/N)?",
919	blob_str(&fname), zWarning);
920	prompt_user(zMsg, &ans);
921	fossil_free(zMsg);
922	cReply = blob_str(&ans)[0];
923	if( cReply=='a' \|\| cReply=='A' ){
924	allOk = 1;
925	}else if( cReply!='y' && cReply!='Y' ){
926	fossil_fatal("Abandoning commit due to %s in %s",
927	zWarning, blob_str(&fname));
928	}
929	blob_reset(&ans);
930	blob_reset(&fname);
931	}
932	}
933

M src/diff.c

+22 -11

		--- src/diff.c
		+++ src/diff.c
		@@ -48,11 +48,11 @@
48	48	"cannot compute difference between binary files\n"
49	49
50	50	#define DIFF_CANNOT_COMPUTE_SYMLINK \
51	51	"cannot compute difference between symlink and regular file\n"
52	52
53		-#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
	53	+#define looks_like_binary(blob) (looks_like_text((blob)) == 0)
54	54	#endif /* INTERFACE */
55	55
56	56	/*
57	57	** Maximum length of a line in a text file. (8192)
58	58	*/
		@@ -179,18 +179,18 @@
179	179	** (1) -- The content appears to consist entirely of text, with lines
180	180	** delimited by line-feed characters; however, the encoding may
181	181	** not be UTF-8.
182	182	**
183	183	** (0) -- The content appears to be binary because it contains embedded
184		-** NUL (\000) characters or an extremely long line.
	184	+** NUL (\000) characters or an extremely long line. Since this
	185	+** function does not understand UTF-16, it may falsely consider
	186	+** UTF-16 text to be binary.
185	187	**
186	188	** (-1) -- The content appears to consist entirely of text, with lines
187	189	** delimited by carriage-return, line-feed pairs; however, the
188	190	** encoding may not be UTF-8.
189	191	**
190		-** (-2) -- The content appears to consist entirely of text, in the
191		-** UTF-16 (BE or LE) encoding.
192	192	*/
193	193	int looks_like_text(const Blob *pContent){
194	194	const char *z = blob_buffer(pContent);
195	195	unsigned int n = blob_size(pContent);
196	196	int j, c;
		@@ -199,17 +199,10 @@
199	199	/* Check individual lines.
200	200	*/
201	201	if( n==0 ) return result; /* Empty file -> text */
202	202	c = *z;
203	203	if( c==0 ) return 0; /* \000 byte in a file -> binary */
204		- if ( n > 1 ){
205		- if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
206		- return -2;
207		- } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
208		- return -2;
209		- }
210		- }
211	204	j = (c!='\n');
212	205	while( --n>0 ){
213	206	c = *++z; ++j;
214	207	if( c==0 ) return 0; /* \000 byte in a file -> binary */
215	208	if( c=='\n' ){
		@@ -225,10 +218,28 @@
225	218	if( j>LENGTH_MASK ){
226	219	return 0; /* Very long line -> binary */
227	220	}
228	221	return result; /* No problems seen -> not binary */
229	222	}
	223	+
	224	+/*
	225	+** This function returns non-zero if the blob starts with a UTF-16le or
	226	+** UTF-16be byte-order-mark (BOM).
	227	+*/
	228	+int starts_with_utf16_bom(const Blob *pContent){
	229	+ const char *z = blob_buffer(pContent);
	230	+ int c1, c2;
	231	+
	232	+ if( blob_size(pContent)<2 ) return 0;
	233	+ c1 = z[0]; c2 = z[1];
	234	+ if( (c1==(char)0xff) && (c2==(char)0xfe) ){
	235	+ return 1;
	236	+ }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
	237	+ return 1;
	238	+ }
	239	+ return 0;
	240	+}
230	241
231	242	/*
232	243	** Return true if two DLine elements are identical.
233	244	*/
234	245	static int same_dline(DLine pA, DLine pB){
235	246

	--- src/diff.c
	+++ src/diff.c
	@@ -48,11 +48,11 @@
48	"cannot compute difference between binary files\n"
49
50	#define DIFF_CANNOT_COMPUTE_SYMLINK \
51	"cannot compute difference between symlink and regular file\n"
52
53	#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
54	#endif /* INTERFACE */
55
56	/*
57	** Maximum length of a line in a text file. (8192)
58	*/
	@@ -179,18 +179,18 @@
179	** (1) -- The content appears to consist entirely of text, with lines
180	** delimited by line-feed characters; however, the encoding may
181	** not be UTF-8.
182	**
183	** (0) -- The content appears to be binary because it contains embedded
184	** NUL (\000) characters or an extremely long line.


185	**
186	** (-1) -- The content appears to consist entirely of text, with lines
187	** delimited by carriage-return, line-feed pairs; however, the
188	** encoding may not be UTF-8.
189	**
190	** (-2) -- The content appears to consist entirely of text, in the
191	** UTF-16 (BE or LE) encoding.
192	*/
193	int looks_like_text(const Blob *pContent){
194	const char *z = blob_buffer(pContent);
195	unsigned int n = blob_size(pContent);
196	int j, c;
	@@ -199,17 +199,10 @@
199	/* Check individual lines.
200	*/
201	if( n==0 ) return result; /* Empty file -> text */
202	c = *z;
203	if( c==0 ) return 0; /* \000 byte in a file -> binary */
204	if ( n > 1 ){
205	if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
206	return -2;
207	} else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
208	return -2;
209	}
210	}
211	j = (c!='\n');
212	while( --n>0 ){
213	c = *++z; ++j;
214	if( c==0 ) return 0; /* \000 byte in a file -> binary */
215	if( c=='\n' ){
	@@ -225,10 +218,28 @@
225	if( j>LENGTH_MASK ){
226	return 0; /* Very long line -> binary */
227	}
228	return result; /* No problems seen -> not binary */
229	}


















230
231	/*
232	** Return true if two DLine elements are identical.
233	*/
234	static int same_dline(DLine pA, DLine pB){
235

	--- src/diff.c
	+++ src/diff.c
	@@ -48,11 +48,11 @@
48	"cannot compute difference between binary files\n"
49
50	#define DIFF_CANNOT_COMPUTE_SYMLINK \
51	"cannot compute difference between symlink and regular file\n"
52
53	#define looks_like_binary(blob) (looks_like_text((blob)) == 0)
54	#endif /* INTERFACE */
55
56	/*
57	** Maximum length of a line in a text file. (8192)
58	*/
	@@ -179,18 +179,18 @@
179	** (1) -- The content appears to consist entirely of text, with lines
180	** delimited by line-feed characters; however, the encoding may
181	** not be UTF-8.
182	**
183	** (0) -- The content appears to be binary because it contains embedded
184	** NUL (\000) characters or an extremely long line. Since this
185	** function does not understand UTF-16, it may falsely consider
186	** UTF-16 text to be binary.
187	**
188	** (-1) -- The content appears to consist entirely of text, with lines
189	** delimited by carriage-return, line-feed pairs; however, the
190	** encoding may not be UTF-8.
191	**


192	*/
193	int looks_like_text(const Blob *pContent){
194	const char *z = blob_buffer(pContent);
195	unsigned int n = blob_size(pContent);
196	int j, c;
	@@ -199,17 +199,10 @@
199	/* Check individual lines.
200	*/
201	if( n==0 ) return result; /* Empty file -> text */
202	c = *z;
203	if( c==0 ) return 0; /* \000 byte in a file -> binary */







204	j = (c!='\n');
205	while( --n>0 ){
206	c = *++z; ++j;
207	if( c==0 ) return 0; /* \000 byte in a file -> binary */
208	if( c=='\n' ){
	@@ -225,10 +218,28 @@
218	if( j>LENGTH_MASK ){
219	return 0; /* Very long line -> binary */
220	}
221	return result; /* No problems seen -> not binary */
222	}
223
224	/*
225	** This function returns non-zero if the blob starts with a UTF-16le or
226	** UTF-16be byte-order-mark (BOM).
227	*/
228	int starts_with_utf16_bom(const Blob *pContent){
229	const char *z = blob_buffer(pContent);
230	int c1, c2;
231
232	if( blob_size(pContent)<2 ) return 0;
233	c1 = z[0]; c2 = z[1];
234	if( (c1==(char)0xff) && (c2==(char)0xfe) ){
235	return 1;
236	}else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
237	return 1;
238	}
239	return 0;
240	}
241
242	/*
243	** Return true if two DLine elements are identical.
244	*/
245	static int same_dline(DLine pA, DLine pB){
246

Fossil SCM

Keyboard Shortcuts