Fossil SCM

Improvements to the command-line comment formatter so that it works better with non-ASCII characters.

drh 2018-11-29 11:09 trunk merge

Commit 1c84a0c14ac3e00e830309881ebf89077a39de942b3f770a4a9fc3afc3bfc31e

Parent 7d034d34bac7521…

1 file changed +132 -28

M src/comformat.c

+132 -28

		--- src/comformat.c
		+++ src/comformat.c
		@@ -2,11 +2,11 @@
2	2	** Copyright (c) 2007 D. Richard Hipp
3	3	**
4	4	** This program is free software; you can redistribute it and/or
5	5	** modify it under the terms of the Simplified BSD License (also
6	6	** known as the "2-Clause License" or "FreeBSD License".)
7		-
	7	+**
8	8	** This program is distributed in the hope that it will be useful,
9	9	** but without any warranty; without even the implied warranty of
10	10	** merchantability or fitness for a particular purpose.
11	11	**
12	12	** Author contact information:
		@@ -95,21 +95,20 @@
95	95	#endif
96	96	}
97	97
98	98	/*
99	99	** This function checks the current line being printed against the original
100		-** comment text. Upon matching, it emits a new line and updates the provided
101		-** character and line counts, if applicable.
	100	+** comment text. Upon matching, it updates the provided character and line
	101	+** counts, if applicable. The caller needs to emit a new line, if desired.
102	102	*/
103	103	static int comment_check_orig(
104	104	const char zOrigText, / [in] Original comment text ONLY, may be NULL. */
105	105	const char zLine, / [in] The comment line to print. */
106	106	int pCharCnt, / [in/out] Pointer to the line character count. */
107	107	int pLineCnt / [in/out] Pointer to the total line count. */
108	108	){
109	109	if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){
110		- fossil_print("\n");
111	110	if( pCharCnt ) *pCharCnt = 0;
112	111	if( pLineCnt ) (*pLineCnt)++;
113	112	return 1;
114	113	}
115	114	return 0;
		@@ -121,37 +120,76 @@
121	120	** zero if such a character cannot be found. For the purposes of this
122	121	** algorithm, the NUL character is treated the same as a spacing character.
123	122	*/
124	123	static int comment_next_space(
125	124	const char zLine, / [in] The comment line being printed. */
126		- int index /* [in] The current character index being handled. */
	125	+ int index, /* [in] The current character index being handled. */
	126	+ int distUTF8 / [out] Distance to next space in UTF-8 sequences. */
127	127	){
128	128	int nextIndex = index + 1;
	129	+ int fNonASCII=0;
129	130	for(;;){
130	131	char c = zLine[nextIndex];
	132	+ if( (c&0x80)==0x80 ) fNonASCII=1;
131	133	if( c==0 \|\| fossil_isspace(c) ){
	134	+ if( distUTF8 ){
	135	+ if( fNonASCII!=0 ){
	136	+ *distUTF8 = strlen_utf8(&zLine[index], nextIndex-index);
	137	+ }else{
	138	+ *distUTF8 = nextIndex-index;
	139	+ }
	140	+ }
132	141	return nextIndex;
133	142	}
134	143	nextIndex++;
135	144	}
136	145	return 0; /* NOT REACHED */
137	146	}
138	147
139	148	/*
140		-** This function is called when printing a logical comment line to perform
141		-** the necessary indenting.
	149	+** Count the number of UTF-8 sequences in a string. Incomplete, ill-formed and
	150	+** overlong sequences are counted as one sequence. The invalid lead bytes 0xC0
	151	+** to 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and 4-byte
	152	+** sequences, respectively, the other invalid lead bytes 0xF8 to 0xFF are
	153	+** treated as invalid 1-byte sequences (as lone trail bytes).
	154	+** Combining characters and East Asian Wide and Fullwidth characters are counted
	155	+** as one, so this function does not calculate the effective "display width".
	156	+*/
	157	+int strlen_utf8(const char *zString, int lengthBytes){
	158	+ int i; /* Counted bytes. */
	159	+ int lengthUTF8; /* Counted UTF-8 sequences. */
	160	+#if 0
	161	+ assert( lengthBytes>=0 );
	162	+#endif
	163	+ for(i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++){
	164	+ char c = zString[i];
	165	+ int cchUTF8=1; /* Code units consumed. */
	166	+ int maxUTF8=1; /* Expected sequence length. */
	167	+ if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
	168	+ else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
	169	+ else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
	170	+ while( cchUTF8<maxUTF8 &&
	171	+ i<lengthBytes-1 &&
	172	+ (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
	173	+ cchUTF8++;
	174	+ i++;
	175	+ }
	176	+ }
	177	+ return lengthUTF8;
	178	+}
	179	+
	180	+/*
	181	+** This function is called when printing a logical comment line to calculate
	182	+** the necessary indenting. The caller needs to emit the indenting spaces.
142	183	*/
143		-static void comment_print_indent(
	184	+static void comment_calc_indent(
144	185	const char zLine, / [in] The comment line being printed. */
145	186	int indent, /* [in] Number of spaces to indent, zero for none. */
146	187	int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
147	188	int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
148	189	int piIndex / [in/out] Pointer to first non-space character. */
149	190	){
150		- if( indent>0 ){
151		- fossil_print("%*s", indent, "");
152		- }
153	191	if( zLine && piIndex ){
154	192	int index = *piIndex;
155	193	if( trimCrLf ){
156	194	while( zLine[index]=='\r' \|\| zLine[index]=='\n' ){ index++; }
157	195	}
		@@ -179,26 +217,56 @@
179	217	int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
180	218	int origBreak, /* [in] Non-zero to break before original comment. */
181	219	int pLineCnt, / [in/out] Pointer to the total line count. */
182	220	const char *pzLine / [out] Pointer to the end of the logical line. */
183	221	){
184		- int index = 0, charCnt = 0, lineCnt = 0, maxChars;
	222	+ int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
	223	+ char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
	224	+ int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
185	225	if( !zLine ) return;
186	226	if( lineChars<=0 ) return;
187		- comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);
	227	+#if 0
	228	+ assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */
	229	+ assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */
	230	+#endif
	231	+ if( indent>sizeof(zBuf)-6 ){
	232	+ /* Limit initial indent to fit output buffer. */
	233	+ indent = sizeof(zBuf)-6;
	234	+ }
	235	+ comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
	236	+ if( indent>0 ){
	237	+ for(i=0; i<indent; i++){
	238	+ zBuf[iBuf++] = ' ';
	239	+ }
	240	+ }
	241	+ if( origIndent>sizeof(zBuf)-6 ){
	242	+ /* Limit line indent to fit output buffer. */
	243	+ origIndent = sizeof(zBuf)-6;
	244	+ }
188	245	maxChars = lineChars;
189	246	for(;;){
190	247	int useChars = 1;
191	248	char c = zLine[index];
	249	+ /* Flush the output buffer if there's no space left for at least one more
	250	+ ** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
	251	+ ** a new line, and a terminating NULL. */
	252	+ if( iBuf>sizeof(zBuf)-origIndent-6 ){
	253	+ zBuf[iBuf]=0;
	254	+ iBuf=0;
	255	+ fossil_print("%s", zBuf);
	256	+ }
192	257	if( c==0 ){
193	258	break;
194	259	}else{
195	260	if( origBreak && index>0 ){
196	261	const char *zCurrent = &zLine[index];
197	262	if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
198		- comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
199		- &index);
	263	+ zBuf[iBuf++] = '\n';
	264	+ comment_calc_indent(zLine, origIndent, trimCrLf, trimSpace, &index);
	265	+ for( i=0; i<origIndent; i++ ){
	266	+ zBuf[iBuf++] = ' ';
	267	+ }
200	268	maxChars = lineChars;
201	269	}
202	270	}
203	271	index++;
204	272	}
		@@ -205,38 +273,57 @@
205	273	if( c=='\n' ){
206	274	lineCnt++;
207	275	charCnt = 0;
208	276	useChars = 0;
209	277	}else if( c=='\t' ){
210		- int nextIndex = comment_next_space(zLine, index);
211		- if( nextIndex<=0 \|\| (nextIndex-index)>maxChars ){
	278	+ int distUTF8;
	279	+ int nextIndex = comment_next_space(zLine, index, &distUTF8);
	280	+ if( nextIndex<=0 \|\| distUTF8>maxChars ){
212	281	break;
213	282	}
214	283	charCnt++;
215	284	useChars = COMMENT_TAB_WIDTH;
216	285	if( maxChars<useChars ){
217		- fossil_print(" ");
	286	+ zBuf[iBuf++] = ' ';
218	287	break;
219	288	}
220	289	}else if( wordBreak && fossil_isspace(c) ){
221		- int nextIndex = comment_next_space(zLine, index);
222		- if( nextIndex<=0 \|\| (nextIndex-index)>maxChars ){
	290	+ int distUTF8;
	291	+ int nextIndex = comment_next_space(zLine, index, &distUTF8);
	292	+ if( nextIndex<=0 \|\| distUTF8>maxChars ){
223	293	break;
224	294	}
225	295	charCnt++;
226	296	}else{
227	297	charCnt++;
228	298	}
229	299	assert( c!='\n' \|\| charCnt==0 );
230		- fossil_print("%c", c);
231		- if( (c&0x80)==0 \|\| (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
	300	+ zBuf[iBuf++] = c;
	301	+ /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
	302	+ cchUTF8=1; /* Code units consumed. */
	303	+ maxUTF8=1; /* Expected sequence length. */
	304	+ if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
	305	+ else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
	306	+ else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
	307	+ while( cchUTF8<maxUTF8 &&
	308	+ (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
	309	+ cchUTF8++;
	310	+ zBuf[iBuf++] = zLine[index++];
	311	+ }
	312	+ maxChars -= useChars;
232	313	if( maxChars<=0 ) break;
233	314	if( c=='\n' ) break;
234	315	}
235	316	if( charCnt>0 ){
236		- fossil_print("\n");
	317	+ zBuf[iBuf++] = '\n';
237	318	lineCnt++;
	319	+ }
	320	+ /* Flush the remaining output buffer. */
	321	+ if( iBuf>0 ){
	322	+ zBuf[iBuf]=0;
	323	+ iBuf=0;
	324	+ fossil_print("%s", zBuf);
238	325	}
239	326	if( pLineCnt ){
240	327	*pLineCnt += lineCnt;
241	328	}
242	329	if( pzLine ){
		@@ -259,25 +346,27 @@
259	346	const char zText, / The comment text to be printed. */
260	347	int indent, /* Number of spaces to indent each non-initial line. */
261	348	int width /* Maximum number of characters per line. */
262	349	){
263	350	int maxChars = width - indent;
264		- int si, sk, i, k;
	351	+ int si, sk, i, k, kc;
265	352	int doIndent = 0;
266	353	char *zBuf;
267	354	char zBuffer[400];
268	355	int lineCnt = 0;
	356	+ int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
269	357
270	358	if( width<0 ){
271	359	comment_set_maxchars(indent, &maxChars);
272	360	}
273	361	if( zText==0 ) zText = "(NULL)";
274	362	if( maxChars<=0 ){
275	363	maxChars = strlen(zText);
276	364	}
277		- if( maxChars >= (sizeof(zBuffer)) ){
278		- zBuf = fossil_malloc(maxChars+1);
	365	+ /* Ensure the buffer can hold the longest-possible UTF-8 sequences. */
	366	+ if( maxChars >= (sizeof(zBuffer)/4-1) ){
	367	+ zBuf = fossil_malloc(maxChars*4+1);
279	368	}else{
280	369	zBuf = zBuffer;
281	370	}
282	371	for(;;){
283	372	while( fossil_isspace(zText[0]) ){ zText++; }
		@@ -287,13 +376,28 @@
287	376	lineCnt = 1;
288	377	}
289	378	if( zBuf!=zBuffer) fossil_free(zBuf);
290	379	return lineCnt;
291	380	}
292		- for(sk=si=i=k=0; zText[i] && k<maxChars; i++){
	381	+ for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
293	382	char c = zText[i];
294		- if( fossil_isspace(c) ){
	383	+ kc++; /* Count complete UTF-8 sequences. */
	384	+ /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
	385	+ cchUTF8=1; /* Code units consumed. */
	386	+ maxUTF8=1; /* Expected sequence length. */
	387	+ if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
	388	+ else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
	389	+ else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
	390	+ if( maxUTF8>1 ){
	391	+ zBuf[k++] = c;
	392	+ while( cchUTF8<maxUTF8 &&
	393	+ (zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
	394	+ cchUTF8++;
	395	+ zBuf[k++] = zText[++i];
	396	+ }
	397	+ }
	398	+ else if( fossil_isspace(c) ){
295	399	si = i;
296	400	sk = k;
297	401	if( k==0 \|\| zBuf[k-1]!=' ' ){
298	402	zBuf[k++] = ' ';
299	403	}
300	404

	--- src/comformat.c
	+++ src/comformat.c
	@@ -2,11 +2,11 @@
2	** Copyright (c) 2007 D. Richard Hipp
3	**
4	** This program is free software; you can redistribute it and/or
5	** modify it under the terms of the Simplified BSD License (also
6	** known as the "2-Clause License" or "FreeBSD License".)
7
8	** This program is distributed in the hope that it will be useful,
9	** but without any warranty; without even the implied warranty of
10	** merchantability or fitness for a particular purpose.
11	**
12	** Author contact information:
	@@ -95,21 +95,20 @@
95	#endif
96	}
97
98	/*
99	** This function checks the current line being printed against the original
100	** comment text. Upon matching, it emits a new line and updates the provided
101	** character and line counts, if applicable.
102	*/
103	static int comment_check_orig(
104	const char zOrigText, / [in] Original comment text ONLY, may be NULL. */
105	const char zLine, / [in] The comment line to print. */
106	int pCharCnt, / [in/out] Pointer to the line character count. */
107	int pLineCnt / [in/out] Pointer to the total line count. */
108	){
109	if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){
110	fossil_print("\n");
111	if( pCharCnt ) *pCharCnt = 0;
112	if( pLineCnt ) (*pLineCnt)++;
113	return 1;
114	}
115	return 0;
	@@ -121,37 +120,76 @@
121	** zero if such a character cannot be found. For the purposes of this
122	** algorithm, the NUL character is treated the same as a spacing character.
123	*/
124	static int comment_next_space(
125	const char zLine, / [in] The comment line being printed. */
126	int index /* [in] The current character index being handled. */

127	){
128	int nextIndex = index + 1;

129	for(;;){
130	char c = zLine[nextIndex];

131	if( c==0 \|\| fossil_isspace(c) ){







132	return nextIndex;
133	}
134	nextIndex++;
135	}
136	return 0; /* NOT REACHED */
137	}
138
139	/*
140	** This function is called when printing a logical comment line to perform
141	** the necessary indenting.
































142	*/
143	static void comment_print_indent(
144	const char zLine, / [in] The comment line being printed. */
145	int indent, /* [in] Number of spaces to indent, zero for none. */
146	int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
147	int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
148	int piIndex / [in/out] Pointer to first non-space character. */
149	){
150	if( indent>0 ){
151	fossil_print("%*s", indent, "");
152	}
153	if( zLine && piIndex ){
154	int index = *piIndex;
155	if( trimCrLf ){
156	while( zLine[index]=='\r' \|\| zLine[index]=='\n' ){ index++; }
157	}
	@@ -179,26 +217,56 @@
179	int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
180	int origBreak, /* [in] Non-zero to break before original comment. */
181	int pLineCnt, / [in/out] Pointer to the total line count. */
182	const char *pzLine / [out] Pointer to the end of the logical line. */
183	){
184	int index = 0, charCnt = 0, lineCnt = 0, maxChars;


185	if( !zLine ) return;
186	if( lineChars<=0 ) return;
187	comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);

















188	maxChars = lineChars;
189	for(;;){
190	int useChars = 1;
191	char c = zLine[index];








192	if( c==0 ){
193	break;
194	}else{
195	if( origBreak && index>0 ){
196	const char *zCurrent = &zLine[index];
197	if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
198	comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
199	&index);



200	maxChars = lineChars;
201	}
202	}
203	index++;
204	}
	@@ -205,38 +273,57 @@
205	if( c=='\n' ){
206	lineCnt++;
207	charCnt = 0;
208	useChars = 0;
209	}else if( c=='\t' ){
210	int nextIndex = comment_next_space(zLine, index);
211	if( nextIndex<=0 \|\| (nextIndex-index)>maxChars ){

212	break;
213	}
214	charCnt++;
215	useChars = COMMENT_TAB_WIDTH;
216	if( maxChars<useChars ){
217	fossil_print(" ");
218	break;
219	}
220	}else if( wordBreak && fossil_isspace(c) ){
221	int nextIndex = comment_next_space(zLine, index);
222	if( nextIndex<=0 \|\| (nextIndex-index)>maxChars ){

223	break;
224	}
225	charCnt++;
226	}else{
227	charCnt++;
228	}
229	assert( c!='\n' \|\| charCnt==0 );
230	fossil_print("%c", c);
231	if( (c&0x80)==0 \|\| (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;











232	if( maxChars<=0 ) break;
233	if( c=='\n' ) break;
234	}
235	if( charCnt>0 ){
236	fossil_print("\n");
237	lineCnt++;






238	}
239	if( pLineCnt ){
240	*pLineCnt += lineCnt;
241	}
242	if( pzLine ){
	@@ -259,25 +346,27 @@
259	const char zText, / The comment text to be printed. */
260	int indent, /* Number of spaces to indent each non-initial line. */
261	int width /* Maximum number of characters per line. */
262	){
263	int maxChars = width - indent;
264	int si, sk, i, k;
265	int doIndent = 0;
266	char *zBuf;
267	char zBuffer[400];
268	int lineCnt = 0;

269
270	if( width<0 ){
271	comment_set_maxchars(indent, &maxChars);
272	}
273	if( zText==0 ) zText = "(NULL)";
274	if( maxChars<=0 ){
275	maxChars = strlen(zText);
276	}
277	if( maxChars >= (sizeof(zBuffer)) ){
278	zBuf = fossil_malloc(maxChars+1);

279	}else{
280	zBuf = zBuffer;
281	}
282	for(;;){
283	while( fossil_isspace(zText[0]) ){ zText++; }
	@@ -287,13 +376,28 @@
287	lineCnt = 1;
288	}
289	if( zBuf!=zBuffer) fossil_free(zBuf);
290	return lineCnt;
291	}
292	for(sk=si=i=k=0; zText[i] && k<maxChars; i++){
293	char c = zText[i];
294	if( fossil_isspace(c) ){















295	si = i;
296	sk = k;
297	if( k==0 \|\| zBuf[k-1]!=' ' ){
298	zBuf[k++] = ' ';
299	}
300

	--- src/comformat.c
	+++ src/comformat.c
	@@ -2,11 +2,11 @@
2	** Copyright (c) 2007 D. Richard Hipp
3	**
4	** This program is free software; you can redistribute it and/or
5	** modify it under the terms of the Simplified BSD License (also
6	** known as the "2-Clause License" or "FreeBSD License".)
7	**
8	** This program is distributed in the hope that it will be useful,
9	** but without any warranty; without even the implied warranty of
10	** merchantability or fitness for a particular purpose.
11	**
12	** Author contact information:
	@@ -95,21 +95,20 @@
95	#endif
96	}
97
98	/*
99	** This function checks the current line being printed against the original
100	** comment text. Upon matching, it updates the provided character and line
101	** counts, if applicable. The caller needs to emit a new line, if desired.
102	*/
103	static int comment_check_orig(
104	const char zOrigText, / [in] Original comment text ONLY, may be NULL. */
105	const char zLine, / [in] The comment line to print. */
106	int pCharCnt, / [in/out] Pointer to the line character count. */
107	int pLineCnt / [in/out] Pointer to the total line count. */
108	){
109	if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){

110	if( pCharCnt ) *pCharCnt = 0;
111	if( pLineCnt ) (*pLineCnt)++;
112	return 1;
113	}
114	return 0;
	@@ -121,37 +120,76 @@
120	** zero if such a character cannot be found. For the purposes of this
121	** algorithm, the NUL character is treated the same as a spacing character.
122	*/
123	static int comment_next_space(
124	const char zLine, / [in] The comment line being printed. */
125	int index, /* [in] The current character index being handled. */
126	int distUTF8 / [out] Distance to next space in UTF-8 sequences. */
127	){
128	int nextIndex = index + 1;
129	int fNonASCII=0;
130	for(;;){
131	char c = zLine[nextIndex];
132	if( (c&0x80)==0x80 ) fNonASCII=1;
133	if( c==0 \|\| fossil_isspace(c) ){
134	if( distUTF8 ){
135	if( fNonASCII!=0 ){
136	*distUTF8 = strlen_utf8(&zLine[index], nextIndex-index);
137	}else{
138	*distUTF8 = nextIndex-index;
139	}
140	}
141	return nextIndex;
142	}
143	nextIndex++;
144	}
145	return 0; /* NOT REACHED */
146	}
147
148	/*
149	** Count the number of UTF-8 sequences in a string. Incomplete, ill-formed and
150	** overlong sequences are counted as one sequence. The invalid lead bytes 0xC0
151	** to 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and 4-byte
152	** sequences, respectively, the other invalid lead bytes 0xF8 to 0xFF are
153	** treated as invalid 1-byte sequences (as lone trail bytes).
154	** Combining characters and East Asian Wide and Fullwidth characters are counted
155	** as one, so this function does not calculate the effective "display width".
156	*/
157	int strlen_utf8(const char *zString, int lengthBytes){
158	int i; /* Counted bytes. */
159	int lengthUTF8; /* Counted UTF-8 sequences. */
160	#if 0
161	assert( lengthBytes>=0 );
162	#endif
163	for(i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++){
164	char c = zString[i];
165	int cchUTF8=1; /* Code units consumed. */
166	int maxUTF8=1; /* Expected sequence length. */
167	if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
168	else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
169	else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
170	while( cchUTF8<maxUTF8 &&
171	i<lengthBytes-1 &&
172	(zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
173	cchUTF8++;
174	i++;
175	}
176	}
177	return lengthUTF8;
178	}
179
180	/*
181	** This function is called when printing a logical comment line to calculate
182	** the necessary indenting. The caller needs to emit the indenting spaces.
183	*/
184	static void comment_calc_indent(
185	const char zLine, / [in] The comment line being printed. */
186	int indent, /* [in] Number of spaces to indent, zero for none. */
187	int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
188	int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
189	int piIndex / [in/out] Pointer to first non-space character. */
190	){



191	if( zLine && piIndex ){
192	int index = *piIndex;
193	if( trimCrLf ){
194	while( zLine[index]=='\r' \|\| zLine[index]=='\n' ){ index++; }
195	}
	@@ -179,26 +217,56 @@
217	int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
218	int origBreak, /* [in] Non-zero to break before original comment. */
219	int pLineCnt, / [in/out] Pointer to the total line count. */
220	const char *pzLine / [out] Pointer to the end of the logical line. */
221	){
222	int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
223	char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
224	int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
225	if( !zLine ) return;
226	if( lineChars<=0 ) return;
227	#if 0
228	assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */
229	assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */
230	#endif
231	if( indent>sizeof(zBuf)-6 ){
232	/* Limit initial indent to fit output buffer. */
233	indent = sizeof(zBuf)-6;
234	}
235	comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
236	if( indent>0 ){
237	for(i=0; i<indent; i++){
238	zBuf[iBuf++] = ' ';
239	}
240	}
241	if( origIndent>sizeof(zBuf)-6 ){
242	/* Limit line indent to fit output buffer. */
243	origIndent = sizeof(zBuf)-6;
244	}
245	maxChars = lineChars;
246	for(;;){
247	int useChars = 1;
248	char c = zLine[index];
249	/* Flush the output buffer if there's no space left for at least one more
250	** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
251	** a new line, and a terminating NULL. */
252	if( iBuf>sizeof(zBuf)-origIndent-6 ){
253	zBuf[iBuf]=0;
254	iBuf=0;
255	fossil_print("%s", zBuf);
256	}
257	if( c==0 ){
258	break;
259	}else{
260	if( origBreak && index>0 ){
261	const char *zCurrent = &zLine[index];
262	if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
263	zBuf[iBuf++] = '\n';
264	comment_calc_indent(zLine, origIndent, trimCrLf, trimSpace, &index);
265	for( i=0; i<origIndent; i++ ){
266	zBuf[iBuf++] = ' ';
267	}
268	maxChars = lineChars;
269	}
270	}
271	index++;
272	}
	@@ -205,38 +273,57 @@
273	if( c=='\n' ){
274	lineCnt++;
275	charCnt = 0;
276	useChars = 0;
277	}else if( c=='\t' ){
278	int distUTF8;
279	int nextIndex = comment_next_space(zLine, index, &distUTF8);
280	if( nextIndex<=0 \|\| distUTF8>maxChars ){
281	break;
282	}
283	charCnt++;
284	useChars = COMMENT_TAB_WIDTH;
285	if( maxChars<useChars ){
286	zBuf[iBuf++] = ' ';
287	break;
288	}
289	}else if( wordBreak && fossil_isspace(c) ){
290	int distUTF8;
291	int nextIndex = comment_next_space(zLine, index, &distUTF8);
292	if( nextIndex<=0 \|\| distUTF8>maxChars ){
293	break;
294	}
295	charCnt++;
296	}else{
297	charCnt++;
298	}
299	assert( c!='\n' \|\| charCnt==0 );
300	zBuf[iBuf++] = c;
301	/* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
302	cchUTF8=1; /* Code units consumed. */
303	maxUTF8=1; /* Expected sequence length. */
304	if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
305	else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
306	else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
307	while( cchUTF8<maxUTF8 &&
308	(zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
309	cchUTF8++;
310	zBuf[iBuf++] = zLine[index++];
311	}
312	maxChars -= useChars;
313	if( maxChars<=0 ) break;
314	if( c=='\n' ) break;
315	}
316	if( charCnt>0 ){
317	zBuf[iBuf++] = '\n';
318	lineCnt++;
319	}
320	/* Flush the remaining output buffer. */
321	if( iBuf>0 ){
322	zBuf[iBuf]=0;
323	iBuf=0;
324	fossil_print("%s", zBuf);
325	}
326	if( pLineCnt ){
327	*pLineCnt += lineCnt;
328	}
329	if( pzLine ){
	@@ -259,25 +346,27 @@
346	const char zText, / The comment text to be printed. */
347	int indent, /* Number of spaces to indent each non-initial line. */
348	int width /* Maximum number of characters per line. */
349	){
350	int maxChars = width - indent;
351	int si, sk, i, k, kc;
352	int doIndent = 0;
353	char *zBuf;
354	char zBuffer[400];
355	int lineCnt = 0;
356	int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
357
358	if( width<0 ){
359	comment_set_maxchars(indent, &maxChars);
360	}
361	if( zText==0 ) zText = "(NULL)";
362	if( maxChars<=0 ){
363	maxChars = strlen(zText);
364	}
365	/* Ensure the buffer can hold the longest-possible UTF-8 sequences. */
366	if( maxChars >= (sizeof(zBuffer)/4-1) ){
367	zBuf = fossil_malloc(maxChars*4+1);
368	}else{
369	zBuf = zBuffer;
370	}
371	for(;;){
372	while( fossil_isspace(zText[0]) ){ zText++; }
	@@ -287,13 +376,28 @@
376	lineCnt = 1;
377	}
378	if( zBuf!=zBuffer) fossil_free(zBuf);
379	return lineCnt;
380	}
381	for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
382	char c = zText[i];
383	kc++; /* Count complete UTF-8 sequences. */
384	/* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
385	cchUTF8=1; /* Code units consumed. */
386	maxUTF8=1; /* Expected sequence length. */
387	if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
388	else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
389	else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
390	if( maxUTF8>1 ){
391	zBuf[k++] = c;
392	while( cchUTF8<maxUTF8 &&
393	(zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
394	cchUTF8++;
395	zBuf[k++] = zText[++i];
396	}
397	}
398	else if( fossil_isspace(c) ){
399	si = i;
400	sk = k;
401	if( k==0 \|\| zBuf[k-1]!=' ' ){
402	zBuf[k++] = ' ';
403	}
404

Fossil SCM

Keyboard Shortcuts