Fossil SCM

New warning when file is considered binary due to long lines only.

jan.nijtmans 2013-03-03 15:32 trunk

Commit 3a74f9fe52c3a6b1e97a03cf8a0e5171d0e88f7a

Parent 69fe4237cee86c0…

2 files changed +25 +27 -18

M src/checkin.c

+25

		--- src/checkin.c
		+++ src/checkin.c
		@@ -909,10 +909,35 @@
909	909	static int allOk = 0; /* Set to true to disable this routine */
910	910
911	911	if( allOk ) return 0;
912	912	fUnicode = starts_with_utf16_bom(p, 0, 0);
913	913	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
	914	+ if( eType==-4){
	915	+ const char *zWarning;
	916	+ const char *zDisable;
	917	+ Blob ans;
	918	+ char cReply;
	919	+
	920	+ if (!binOk) {
	921	+ zWarning = "long lines";
	922	+ zDisable = "\"binary-glob\" setting";
	923	+ blob_zero(&ans);
	924	+ file_relative_name(zFilename, &fname, 0);
	925	+ zMsg = mprintf(
	926	+ "%s appears to be text, but contains %s. Use --no-warnings or the"
	927	+ " %s to disable this warning.\nCommit anyhow (a=all/y/N)? ",
	928	+ blob_str(&fname), zWarning, zDisable);
	929	+ prompt_user(zMsg, &ans);
	930	+ fossil_free(zMsg);
	931	+ cReply = blob_str(&ans)[0];
	932	+ if( cReply!='y' && cReply!='Y' ){
	933	+ fossil_fatal("Abandoning commit due to %s in %s",
	934	+ zWarning, blob_str(&fname));
	935	+ }
	936	+ blob_reset(&ans);
	937	+ }
	938	+ }
914	939	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
915	940	const char *zWarning;
916	941	const char *zDisable;
917	942	const char *zConvert = "c=convert/";
918	943	Blob ans;
919	944

	--- src/checkin.c
	+++ src/checkin.c
	@@ -909,10 +909,35 @@
909	static int allOk = 0; /* Set to true to disable this routine */
910
911	if( allOk ) return 0;
912	fUnicode = starts_with_utf16_bom(p, 0, 0);
913	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);

























914	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
915	const char *zWarning;
916	const char *zDisable;
917	const char *zConvert = "c=convert/";
918	Blob ans;
919

	--- src/checkin.c
	+++ src/checkin.c
	@@ -909,10 +909,35 @@
909	static int allOk = 0; /* Set to true to disable this routine */
910
911	if( allOk ) return 0;
912	fUnicode = starts_with_utf16_bom(p, 0, 0);
913	eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
914	if( eType==-4){
915	const char *zWarning;
916	const char *zDisable;
917	Blob ans;
918	char cReply;
919
920	if (!binOk) {
921	zWarning = "long lines";
922	zDisable = "\"binary-glob\" setting";
923	blob_zero(&ans);
924	file_relative_name(zFilename, &fname, 0);
925	zMsg = mprintf(
926	"%s appears to be text, but contains %s. Use --no-warnings or the"
927	" %s to disable this warning.\nCommit anyhow (a=all/y/N)? ",
928	blob_str(&fname), zWarning, zDisable);
929	prompt_user(zMsg, &ans);
930	fossil_free(zMsg);
931	cReply = blob_str(&ans)[0];
932	if( cReply!='y' && cReply!='Y' ){
933	fossil_fatal("Abandoning commit due to %s in %s",
934	zWarning, blob_str(&fname));
935	}
936	blob_reset(&ans);
937	}
938	}
939	if( eType==0 \|\| eType==-1 \|\| fUnicode ){
940	const char *zWarning;
941	const char *zDisable;
942	const char *zConvert = "c=convert/";
943	Blob ans;
944

M src/diff.c

+27 -18

		--- src/diff.c
		+++ src/diff.c
		@@ -57,11 +57,11 @@
57	57	"more than 10,000 changes\n"
58	58
59	59	#define DIFF_TOO_MANY_CHANGES_HTML \
60	60	"<p class='generalError'>More than 10,000 changes</p>\n"
61	61
62		-#define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
	62	+#define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
63	63	#endif /* INTERFACE */
64	64
65	65	/*
66	66	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
67	67	*/
		@@ -198,10 +198,14 @@
198	198	** to be binary.
199	199	**
200	200	** (-1) -- The content appears to consist entirely of text, with lines
201	201	** delimited by carriage-return, line-feed pairs; however, the
202	202	** encoding may not be UTF-8.
	203	+**
	204	+** (-4) -- The same as 0, but the determination is based on the fact that
	205	+** the blob might be text (any encoding) but it has a line length
	206	+** bigger than the diff logic in fossil can handle.
203	207	**
204	208	********************************** WARNING ********************************
205	209	**
206	210	** This function does not validate that the blob content is properly formed
207	211	** UTF-8. It assumes that all code points are the same size. It does not
		@@ -215,36 +219,37 @@
215	219	*/
216	220	int looks_like_utf8(const Blob *pContent){
217	221	const char *z = blob_buffer(pContent);
218	222	unsigned int n = blob_size(pContent);
219	223	int j, c;
220		- int result = 1; /* Assume UTF-8 text with no CR/NL */
	224	+ int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
221	225
222	226	/* Check individual lines.
223	227	*/
224		- if( n==0 ) return result; /* Empty file -> text */
	228	+ if( n==0 ) return 1; /* Empty file -> text */
225	229	c = *z;
226		- if( c==0 ) return 0; /* Zero byte in a file -> binary */
227	230	j = (c!='\n');
	231	+ if( c==0 ){
	232	+ return 0; /* Zero byte in a file -> binary */
	233	+ }
228	234	while( --n>0 ){
229	235	c = *++z; ++j;
230	236	if( c==0 ) return 0; /* Zero byte in a file -> binary */
231	237	if( c=='\n' ){
232		- int c2 = z[-1];
233		- if( c2=='\r' ){
234		- result = -1; /* Contains CR/NL, continue */
	238	+ if( z[-1]=='\r' ){
	239	+ flags \|= 2; /* Contains CR/NL, continue */
235	240	}
236	241	if( j>LENGTH_MASK ){
237		- return 0; /* Very long line -> binary */
	242	+ flags \|= 1; /* Very long line, continue */
238	243	}
239	244	j = 0;
240	245	}
241	246	}
242		- if( j>LENGTH_MASK ){
243		- return 0; /* Very long line -> binary */
	247	+ if( (flags&1) \|\| (j>LENGTH_MASK) ){
	248	+ return -4; /* Very long line -> binary */
244	249	}
245		- return result; /* No problems seen -> not binary */
	250	+ return 1-flags; /* No problems seen -> not binary */
246	251	}
247	252
248	253	/*
249	254	** Define the type needed to represent a Unicode (UTF-16) character.
250	255	*/
		@@ -288,10 +293,14 @@
288	293	** to be binary.
289	294	**
290	295	** (-1) -- The content appears to consist entirely of text, with lines
291	296	** delimited by carriage-return, line-feed pairs; however, the
292	297	** encoding may not be UTF-16.
	298	+**
	299	+** (-4) -- The same as 0, but the determination is based on the fact that
	300	+** the blob might be text (any encoding) but it has a line length
	301	+** bigger than the diff logic in fossil can handle.
293	302	**
294	303	********************************** WARNING ********************************
295	304	**
296	305	** This function does not validate that the blob content is properly formed
297	306	** UTF-16. It assumes that all code points are the same size. It does not
		@@ -305,15 +314,15 @@
305	314	*/
306	315	int looks_like_utf16(const Blob *pContent){
307	316	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
308	317	unsigned int n = blob_size(pContent);
309	318	int j, c;
310		- int result = 1; /* Assume UTF-16 text with no CR/NL */
	319	+ int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
311	320
312	321	/* Check individual lines.
313	322	*/
314		- if( n==0 ) return result; /* Empty file -> text */
	323	+ if( n==0 ) return 1; /* Empty file -> text */
315	324	if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
316	325	c = *z;
317	326	if( c==0 ) return 0; /* NUL character in a file -> binary */
318	327	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
319	328	while( (n-=2)>0 ){
		@@ -320,22 +329,22 @@
320	329	c = *++z; ++j;
321	330	if( c==0 ) return 0; /* NUL character in a file -> binary */
322	331	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
323	332	int c2 = z[-1];
324	333	if( c2==UTF16BE_CR \|\| c2==UTF16LE_CR ){
325		- result = -1; /* Contains CR/NL, continue */
	334	+ flags \|= 2; /* Contains CR/NL, continue */
326	335	}
327	336	if( j>UTF16_LENGTH_MASK ){
328		- return 0; /* Very long line -> binary */
	337	+ flags \|= 1; /* Very long line, continue */
329	338	}
330	339	j = 0;
331	340	}
332	341	}
333		- if( j>UTF16_LENGTH_MASK ){
334		- return 0; /* Very long line -> binary */
	342	+ if( (flags&1) \|\| (j>LENGTH_MASK) ){
	343	+ return -4; /* Very long line -> binary */
335	344	}
336		- return result; /* No problems seen -> not binary */
	345	+ return 1-flags; /* No problems seen -> not binary */
337	346	}
338	347
339	348	/*
340	349	** This function returns an array of bytes representing the byte-order-mark
341	350	** for UTF-8.
342	351

	--- src/diff.c
	+++ src/diff.c
	@@ -57,11 +57,11 @@
57	"more than 10,000 changes\n"
58
59	#define DIFF_TOO_MANY_CHANGES_HTML \
60	"<p class='generalError'>More than 10,000 changes</p>\n"
61
62	#define looks_like_binary(blob) (looks_like_utf8((blob)) == 0)
63	#endif /* INTERFACE */
64
65	/*
66	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
67	*/
	@@ -198,10 +198,14 @@
198	** to be binary.
199	**
200	** (-1) -- The content appears to consist entirely of text, with lines
201	** delimited by carriage-return, line-feed pairs; however, the
202	** encoding may not be UTF-8.




203	**
204	********************************** WARNING ********************************
205	**
206	** This function does not validate that the blob content is properly formed
207	** UTF-8. It assumes that all code points are the same size. It does not
	@@ -215,36 +219,37 @@
215	*/
216	int looks_like_utf8(const Blob *pContent){
217	const char *z = blob_buffer(pContent);
218	unsigned int n = blob_size(pContent);
219	int j, c;
220	int result = 1; /* Assume UTF-8 text with no CR/NL */
221
222	/* Check individual lines.
223	*/
224	if( n==0 ) return result; /* Empty file -> text */
225	c = *z;
226	if( c==0 ) return 0; /* Zero byte in a file -> binary */
227	j = (c!='\n');



228	while( --n>0 ){
229	c = *++z; ++j;
230	if( c==0 ) return 0; /* Zero byte in a file -> binary */
231	if( c=='\n' ){
232	int c2 = z[-1];
233	if( c2=='\r' ){
234	result = -1; /* Contains CR/NL, continue */
235	}
236	if( j>LENGTH_MASK ){
237	return 0; /* Very long line -> binary */
238	}
239	j = 0;
240	}
241	}
242	if( j>LENGTH_MASK ){
243	return 0; /* Very long line -> binary */
244	}
245	return result; /* No problems seen -> not binary */
246	}
247
248	/*
249	** Define the type needed to represent a Unicode (UTF-16) character.
250	*/
	@@ -288,10 +293,14 @@
288	** to be binary.
289	**
290	** (-1) -- The content appears to consist entirely of text, with lines
291	** delimited by carriage-return, line-feed pairs; however, the
292	** encoding may not be UTF-16.




293	**
294	********************************** WARNING ********************************
295	**
296	** This function does not validate that the blob content is properly formed
297	** UTF-16. It assumes that all code points are the same size. It does not
	@@ -305,15 +314,15 @@
305	*/
306	int looks_like_utf16(const Blob *pContent){
307	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
308	unsigned int n = blob_size(pContent);
309	int j, c;
310	int result = 1; /* Assume UTF-16 text with no CR/NL */
311
312	/* Check individual lines.
313	*/
314	if( n==0 ) return result; /* Empty file -> text */
315	if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
316	c = *z;
317	if( c==0 ) return 0; /* NUL character in a file -> binary */
318	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
319	while( (n-=2)>0 ){
	@@ -320,22 +329,22 @@
320	c = *++z; ++j;
321	if( c==0 ) return 0; /* NUL character in a file -> binary */
322	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
323	int c2 = z[-1];
324	if( c2==UTF16BE_CR \|\| c2==UTF16LE_CR ){
325	result = -1; /* Contains CR/NL, continue */
326	}
327	if( j>UTF16_LENGTH_MASK ){
328	return 0; /* Very long line -> binary */
329	}
330	j = 0;
331	}
332	}
333	if( j>UTF16_LENGTH_MASK ){
334	return 0; /* Very long line -> binary */
335	}
336	return result; /* No problems seen -> not binary */
337	}
338
339	/*
340	** This function returns an array of bytes representing the byte-order-mark
341	** for UTF-8.
342

	--- src/diff.c
	+++ src/diff.c
	@@ -57,11 +57,11 @@
57	"more than 10,000 changes\n"
58
59	#define DIFF_TOO_MANY_CHANGES_HTML \
60	"<p class='generalError'>More than 10,000 changes</p>\n"
61
62	#define looks_like_binary(blob) ((looks_like_utf8((blob))&3) == 0)
63	#endif /* INTERFACE */
64
65	/*
66	Maximum length of a line in a text file, in bytes. (213 = 8192 bytes)
67	*/
	@@ -198,10 +198,14 @@
198	** to be binary.
199	**
200	** (-1) -- The content appears to consist entirely of text, with lines
201	** delimited by carriage-return, line-feed pairs; however, the
202	** encoding may not be UTF-8.
203	**
204	** (-4) -- The same as 0, but the determination is based on the fact that
205	** the blob might be text (any encoding) but it has a line length
206	** bigger than the diff logic in fossil can handle.
207	**
208	********************************** WARNING ********************************
209	**
210	** This function does not validate that the blob content is properly formed
211	** UTF-8. It assumes that all code points are the same size. It does not
	@@ -215,36 +219,37 @@
219	*/
220	int looks_like_utf8(const Blob *pContent){
221	const char *z = blob_buffer(pContent);
222	unsigned int n = blob_size(pContent);
223	int j, c;
224	int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
225
226	/* Check individual lines.
227	*/
228	if( n==0 ) return 1; /* Empty file -> text */
229	c = *z;

230	j = (c!='\n');
231	if( c==0 ){
232	return 0; /* Zero byte in a file -> binary */
233	}
234	while( --n>0 ){
235	c = *++z; ++j;
236	if( c==0 ) return 0; /* Zero byte in a file -> binary */
237	if( c=='\n' ){
238	if( z[-1]=='\r' ){
239	flags \|= 2; /* Contains CR/NL, continue */

240	}
241	if( j>LENGTH_MASK ){
242	flags \|= 1; /* Very long line, continue */
243	}
244	j = 0;
245	}
246	}
247	if( (flags&1) \|\| (j>LENGTH_MASK) ){
248	return -4; /* Very long line -> binary */
249	}
250	return 1-flags; /* No problems seen -> not binary */
251	}
252
253	/*
254	** Define the type needed to represent a Unicode (UTF-16) character.
255	*/
	@@ -288,10 +293,14 @@
293	** to be binary.
294	**
295	** (-1) -- The content appears to consist entirely of text, with lines
296	** delimited by carriage-return, line-feed pairs; however, the
297	** encoding may not be UTF-16.
298	**
299	** (-4) -- The same as 0, but the determination is based on the fact that
300	** the blob might be text (any encoding) but it has a line length
301	** bigger than the diff logic in fossil can handle.
302	**
303	********************************** WARNING ********************************
304	**
305	** This function does not validate that the blob content is properly formed
306	** UTF-16. It assumes that all code points are the same size. It does not
	@@ -305,15 +314,15 @@
314	*/
315	int looks_like_utf16(const Blob *pContent){
316	const WCHAR_T z = (WCHAR_T )blob_buffer(pContent);
317	unsigned int n = blob_size(pContent);
318	int j, c;
319	int flags = 0; /* bit 0 = long lines found, 1 = CR/NL found. */
320
321	/* Check individual lines.
322	*/
323	if( n==0 ) return 1; /* Empty file -> text */
324	if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
325	c = *z;
326	if( c==0 ) return 0; /* NUL character in a file -> binary */
327	j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
328	while( (n-=2)>0 ){
	@@ -320,22 +329,22 @@
329	c = *++z; ++j;
330	if( c==0 ) return 0; /* NUL character in a file -> binary */
331	if( c==UTF16BE_LF \|\| c==UTF16LE_LF ){
332	int c2 = z[-1];
333	if( c2==UTF16BE_CR \|\| c2==UTF16LE_CR ){
334	flags \|= 2; /* Contains CR/NL, continue */
335	}
336	if( j>UTF16_LENGTH_MASK ){
337	flags \|= 1; /* Very long line, continue */
338	}
339	j = 0;
340	}
341	}
342	if( (flags&1) \|\| (j>LENGTH_MASK) ){
343	return -4; /* Very long line -> binary */
344	}
345	return 1-flags; /* No problems seen -> not binary */
346	}
347
348	/*
349	** This function returns an array of bytes representing the byte-order-mark
350	** for UTF-8.
351

Fossil SCM

Keyboard Shortcuts