Fossil SCM

Improvements to the command-line comment formatter so that it works better with non-ASCII characters.

drh 2018-11-29 11:09 trunk merge
Commit 1c84a0c14ac3e00e830309881ebf89077a39de942b3f770a4a9fc3afc3bfc31e
1 file changed +132 -28
+132 -28
--- src/comformat.c
+++ src/comformat.c
@@ -2,11 +2,11 @@
22
** Copyright (c) 2007 D. Richard Hipp
33
**
44
** This program is free software; you can redistribute it and/or
55
** modify it under the terms of the Simplified BSD License (also
66
** known as the "2-Clause License" or "FreeBSD License".)
7
-
7
+**
88
** This program is distributed in the hope that it will be useful,
99
** but without any warranty; without even the implied warranty of
1010
** merchantability or fitness for a particular purpose.
1111
**
1212
** Author contact information:
@@ -95,21 +95,20 @@
9595
#endif
9696
}
9797
9898
/*
9999
** This function checks the current line being printed against the original
100
-** comment text. Upon matching, it emits a new line and updates the provided
101
-** character and line counts, if applicable.
100
+** comment text. Upon matching, it updates the provided character and line
101
+** counts, if applicable. The caller needs to emit a new line, if desired.
102102
*/
103103
static int comment_check_orig(
104104
const char *zOrigText, /* [in] Original comment text ONLY, may be NULL. */
105105
const char *zLine, /* [in] The comment line to print. */
106106
int *pCharCnt, /* [in/out] Pointer to the line character count. */
107107
int *pLineCnt /* [in/out] Pointer to the total line count. */
108108
){
109109
if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){
110
- fossil_print("\n");
111110
if( pCharCnt ) *pCharCnt = 0;
112111
if( pLineCnt ) (*pLineCnt)++;
113112
return 1;
114113
}
115114
return 0;
@@ -121,37 +120,76 @@
121120
** zero if such a character cannot be found. For the purposes of this
122121
** algorithm, the NUL character is treated the same as a spacing character.
123122
*/
124123
static int comment_next_space(
125124
const char *zLine, /* [in] The comment line being printed. */
126
- int index /* [in] The current character index being handled. */
125
+ int index, /* [in] The current character index being handled. */
126
+ int *distUTF8 /* [out] Distance to next space in UTF-8 sequences. */
127127
){
128128
int nextIndex = index + 1;
129
+ int fNonASCII=0;
129130
for(;;){
130131
char c = zLine[nextIndex];
132
+ if( (c&0x80)==0x80 ) fNonASCII=1;
131133
if( c==0 || fossil_isspace(c) ){
134
+ if( distUTF8 ){
135
+ if( fNonASCII!=0 ){
136
+ *distUTF8 = strlen_utf8(&zLine[index], nextIndex-index);
137
+ }else{
138
+ *distUTF8 = nextIndex-index;
139
+ }
140
+ }
132141
return nextIndex;
133142
}
134143
nextIndex++;
135144
}
136145
return 0; /* NOT REACHED */
137146
}
138147
139148
/*
140
-** This function is called when printing a logical comment line to perform
141
-** the necessary indenting.
149
+** Count the number of UTF-8 sequences in a string. Incomplete, ill-formed and
150
+** overlong sequences are counted as one sequence. The invalid lead bytes 0xC0
151
+** to 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and 4-byte
152
+** sequences, respectively, the other invalid lead bytes 0xF8 to 0xFF are
153
+** treated as invalid 1-byte sequences (as lone trail bytes).
154
+** Combining characters and East Asian Wide and Fullwidth characters are counted
155
+** as one, so this function does not calculate the effective "display width".
156
+*/
157
+int strlen_utf8(const char *zString, int lengthBytes){
158
+ int i; /* Counted bytes. */
159
+ int lengthUTF8; /* Counted UTF-8 sequences. */
160
+#if 0
161
+ assert( lengthBytes>=0 );
162
+#endif
163
+ for(i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++){
164
+ char c = zString[i];
165
+ int cchUTF8=1; /* Code units consumed. */
166
+ int maxUTF8=1; /* Expected sequence length. */
167
+ if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
168
+ else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
169
+ else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
170
+ while( cchUTF8<maxUTF8 &&
171
+ i<lengthBytes-1 &&
172
+ (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
173
+ cchUTF8++;
174
+ i++;
175
+ }
176
+ }
177
+ return lengthUTF8;
178
+}
179
+
180
+/*
181
+** This function is called when printing a logical comment line to calculate
182
+** the necessary indenting. The caller needs to emit the indenting spaces.
142183
*/
143
-static void comment_print_indent(
184
+static void comment_calc_indent(
144185
const char *zLine, /* [in] The comment line being printed. */
145186
int indent, /* [in] Number of spaces to indent, zero for none. */
146187
int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
147188
int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
148189
int *piIndex /* [in/out] Pointer to first non-space character. */
149190
){
150
- if( indent>0 ){
151
- fossil_print("%*s", indent, "");
152
- }
153191
if( zLine && piIndex ){
154192
int index = *piIndex;
155193
if( trimCrLf ){
156194
while( zLine[index]=='\r' || zLine[index]=='\n' ){ index++; }
157195
}
@@ -179,26 +217,56 @@
179217
int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
180218
int origBreak, /* [in] Non-zero to break before original comment. */
181219
int *pLineCnt, /* [in/out] Pointer to the total line count. */
182220
const char **pzLine /* [out] Pointer to the end of the logical line. */
183221
){
184
- int index = 0, charCnt = 0, lineCnt = 0, maxChars;
222
+ int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
223
+ char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
224
+ int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
185225
if( !zLine ) return;
186226
if( lineChars<=0 ) return;
187
- comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);
227
+#if 0
228
+ assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */
229
+ assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */
230
+#endif
231
+ if( indent>sizeof(zBuf)-6 ){
232
+ /* Limit initial indent to fit output buffer. */
233
+ indent = sizeof(zBuf)-6;
234
+ }
235
+ comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
236
+ if( indent>0 ){
237
+ for(i=0; i<indent; i++){
238
+ zBuf[iBuf++] = ' ';
239
+ }
240
+ }
241
+ if( origIndent>sizeof(zBuf)-6 ){
242
+ /* Limit line indent to fit output buffer. */
243
+ origIndent = sizeof(zBuf)-6;
244
+ }
188245
maxChars = lineChars;
189246
for(;;){
190247
int useChars = 1;
191248
char c = zLine[index];
249
+ /* Flush the output buffer if there's no space left for at least one more
250
+ ** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
251
+ ** a new line, and a terminating NULL. */
252
+ if( iBuf>sizeof(zBuf)-origIndent-6 ){
253
+ zBuf[iBuf]=0;
254
+ iBuf=0;
255
+ fossil_print("%s", zBuf);
256
+ }
192257
if( c==0 ){
193258
break;
194259
}else{
195260
if( origBreak && index>0 ){
196261
const char *zCurrent = &zLine[index];
197262
if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
198
- comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
199
- &index);
263
+ zBuf[iBuf++] = '\n';
264
+ comment_calc_indent(zLine, origIndent, trimCrLf, trimSpace, &index);
265
+ for( i=0; i<origIndent; i++ ){
266
+ zBuf[iBuf++] = ' ';
267
+ }
200268
maxChars = lineChars;
201269
}
202270
}
203271
index++;
204272
}
@@ -205,38 +273,57 @@
205273
if( c=='\n' ){
206274
lineCnt++;
207275
charCnt = 0;
208276
useChars = 0;
209277
}else if( c=='\t' ){
210
- int nextIndex = comment_next_space(zLine, index);
211
- if( nextIndex<=0 || (nextIndex-index)>maxChars ){
278
+ int distUTF8;
279
+ int nextIndex = comment_next_space(zLine, index, &distUTF8);
280
+ if( nextIndex<=0 || distUTF8>maxChars ){
212281
break;
213282
}
214283
charCnt++;
215284
useChars = COMMENT_TAB_WIDTH;
216285
if( maxChars<useChars ){
217
- fossil_print(" ");
286
+ zBuf[iBuf++] = ' ';
218287
break;
219288
}
220289
}else if( wordBreak && fossil_isspace(c) ){
221
- int nextIndex = comment_next_space(zLine, index);
222
- if( nextIndex<=0 || (nextIndex-index)>maxChars ){
290
+ int distUTF8;
291
+ int nextIndex = comment_next_space(zLine, index, &distUTF8);
292
+ if( nextIndex<=0 || distUTF8>maxChars ){
223293
break;
224294
}
225295
charCnt++;
226296
}else{
227297
charCnt++;
228298
}
229299
assert( c!='\n' || charCnt==0 );
230
- fossil_print("%c", c);
231
- if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
300
+ zBuf[iBuf++] = c;
301
+ /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
302
+ cchUTF8=1; /* Code units consumed. */
303
+ maxUTF8=1; /* Expected sequence length. */
304
+ if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
305
+ else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
306
+ else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
307
+ while( cchUTF8<maxUTF8 &&
308
+ (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
309
+ cchUTF8++;
310
+ zBuf[iBuf++] = zLine[index++];
311
+ }
312
+ maxChars -= useChars;
232313
if( maxChars<=0 ) break;
233314
if( c=='\n' ) break;
234315
}
235316
if( charCnt>0 ){
236
- fossil_print("\n");
317
+ zBuf[iBuf++] = '\n';
237318
lineCnt++;
319
+ }
320
+ /* Flush the remaining output buffer. */
321
+ if( iBuf>0 ){
322
+ zBuf[iBuf]=0;
323
+ iBuf=0;
324
+ fossil_print("%s", zBuf);
238325
}
239326
if( pLineCnt ){
240327
*pLineCnt += lineCnt;
241328
}
242329
if( pzLine ){
@@ -259,25 +346,27 @@
259346
const char *zText, /* The comment text to be printed. */
260347
int indent, /* Number of spaces to indent each non-initial line. */
261348
int width /* Maximum number of characters per line. */
262349
){
263350
int maxChars = width - indent;
264
- int si, sk, i, k;
351
+ int si, sk, i, k, kc;
265352
int doIndent = 0;
266353
char *zBuf;
267354
char zBuffer[400];
268355
int lineCnt = 0;
356
+ int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
269357
270358
if( width<0 ){
271359
comment_set_maxchars(indent, &maxChars);
272360
}
273361
if( zText==0 ) zText = "(NULL)";
274362
if( maxChars<=0 ){
275363
maxChars = strlen(zText);
276364
}
277
- if( maxChars >= (sizeof(zBuffer)) ){
278
- zBuf = fossil_malloc(maxChars+1);
365
+ /* Ensure the buffer can hold the longest-possible UTF-8 sequences. */
366
+ if( maxChars >= (sizeof(zBuffer)/4-1) ){
367
+ zBuf = fossil_malloc(maxChars*4+1);
279368
}else{
280369
zBuf = zBuffer;
281370
}
282371
for(;;){
283372
while( fossil_isspace(zText[0]) ){ zText++; }
@@ -287,13 +376,28 @@
287376
lineCnt = 1;
288377
}
289378
if( zBuf!=zBuffer) fossil_free(zBuf);
290379
return lineCnt;
291380
}
292
- for(sk=si=i=k=0; zText[i] && k<maxChars; i++){
381
+ for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
293382
char c = zText[i];
294
- if( fossil_isspace(c) ){
383
+ kc++; /* Count complete UTF-8 sequences. */
384
+ /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
385
+ cchUTF8=1; /* Code units consumed. */
386
+ maxUTF8=1; /* Expected sequence length. */
387
+ if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
388
+ else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
389
+ else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
390
+ if( maxUTF8>1 ){
391
+ zBuf[k++] = c;
392
+ while( cchUTF8<maxUTF8 &&
393
+ (zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
394
+ cchUTF8++;
395
+ zBuf[k++] = zText[++i];
396
+ }
397
+ }
398
+ else if( fossil_isspace(c) ){
295399
si = i;
296400
sk = k;
297401
if( k==0 || zBuf[k-1]!=' ' ){
298402
zBuf[k++] = ' ';
299403
}
300404
--- src/comformat.c
+++ src/comformat.c
@@ -2,11 +2,11 @@
2 ** Copyright (c) 2007 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
@@ -95,21 +95,20 @@
95 #endif
96 }
97
98 /*
99 ** This function checks the current line being printed against the original
100 ** comment text. Upon matching, it emits a new line and updates the provided
101 ** character and line counts, if applicable.
102 */
103 static int comment_check_orig(
104 const char *zOrigText, /* [in] Original comment text ONLY, may be NULL. */
105 const char *zLine, /* [in] The comment line to print. */
106 int *pCharCnt, /* [in/out] Pointer to the line character count. */
107 int *pLineCnt /* [in/out] Pointer to the total line count. */
108 ){
109 if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){
110 fossil_print("\n");
111 if( pCharCnt ) *pCharCnt = 0;
112 if( pLineCnt ) (*pLineCnt)++;
113 return 1;
114 }
115 return 0;
@@ -121,37 +120,76 @@
121 ** zero if such a character cannot be found. For the purposes of this
122 ** algorithm, the NUL character is treated the same as a spacing character.
123 */
124 static int comment_next_space(
125 const char *zLine, /* [in] The comment line being printed. */
126 int index /* [in] The current character index being handled. */
 
127 ){
128 int nextIndex = index + 1;
 
129 for(;;){
130 char c = zLine[nextIndex];
 
131 if( c==0 || fossil_isspace(c) ){
 
 
 
 
 
 
 
132 return nextIndex;
133 }
134 nextIndex++;
135 }
136 return 0; /* NOT REACHED */
137 }
138
139 /*
140 ** This function is called when printing a logical comment line to perform
141 ** the necessary indenting.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142 */
143 static void comment_print_indent(
144 const char *zLine, /* [in] The comment line being printed. */
145 int indent, /* [in] Number of spaces to indent, zero for none. */
146 int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
147 int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
148 int *piIndex /* [in/out] Pointer to first non-space character. */
149 ){
150 if( indent>0 ){
151 fossil_print("%*s", indent, "");
152 }
153 if( zLine && piIndex ){
154 int index = *piIndex;
155 if( trimCrLf ){
156 while( zLine[index]=='\r' || zLine[index]=='\n' ){ index++; }
157 }
@@ -179,26 +217,56 @@
179 int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
180 int origBreak, /* [in] Non-zero to break before original comment. */
181 int *pLineCnt, /* [in/out] Pointer to the total line count. */
182 const char **pzLine /* [out] Pointer to the end of the logical line. */
183 ){
184 int index = 0, charCnt = 0, lineCnt = 0, maxChars;
 
 
185 if( !zLine ) return;
186 if( lineChars<=0 ) return;
187 comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188 maxChars = lineChars;
189 for(;;){
190 int useChars = 1;
191 char c = zLine[index];
 
 
 
 
 
 
 
 
192 if( c==0 ){
193 break;
194 }else{
195 if( origBreak && index>0 ){
196 const char *zCurrent = &zLine[index];
197 if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
198 comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
199 &index);
 
 
 
200 maxChars = lineChars;
201 }
202 }
203 index++;
204 }
@@ -205,38 +273,57 @@
205 if( c=='\n' ){
206 lineCnt++;
207 charCnt = 0;
208 useChars = 0;
209 }else if( c=='\t' ){
210 int nextIndex = comment_next_space(zLine, index);
211 if( nextIndex<=0 || (nextIndex-index)>maxChars ){
 
212 break;
213 }
214 charCnt++;
215 useChars = COMMENT_TAB_WIDTH;
216 if( maxChars<useChars ){
217 fossil_print(" ");
218 break;
219 }
220 }else if( wordBreak && fossil_isspace(c) ){
221 int nextIndex = comment_next_space(zLine, index);
222 if( nextIndex<=0 || (nextIndex-index)>maxChars ){
 
223 break;
224 }
225 charCnt++;
226 }else{
227 charCnt++;
228 }
229 assert( c!='\n' || charCnt==0 );
230 fossil_print("%c", c);
231 if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
 
 
 
 
 
 
 
 
 
 
 
232 if( maxChars<=0 ) break;
233 if( c=='\n' ) break;
234 }
235 if( charCnt>0 ){
236 fossil_print("\n");
237 lineCnt++;
 
 
 
 
 
 
238 }
239 if( pLineCnt ){
240 *pLineCnt += lineCnt;
241 }
242 if( pzLine ){
@@ -259,25 +346,27 @@
259 const char *zText, /* The comment text to be printed. */
260 int indent, /* Number of spaces to indent each non-initial line. */
261 int width /* Maximum number of characters per line. */
262 ){
263 int maxChars = width - indent;
264 int si, sk, i, k;
265 int doIndent = 0;
266 char *zBuf;
267 char zBuffer[400];
268 int lineCnt = 0;
 
269
270 if( width<0 ){
271 comment_set_maxchars(indent, &maxChars);
272 }
273 if( zText==0 ) zText = "(NULL)";
274 if( maxChars<=0 ){
275 maxChars = strlen(zText);
276 }
277 if( maxChars >= (sizeof(zBuffer)) ){
278 zBuf = fossil_malloc(maxChars+1);
 
279 }else{
280 zBuf = zBuffer;
281 }
282 for(;;){
283 while( fossil_isspace(zText[0]) ){ zText++; }
@@ -287,13 +376,28 @@
287 lineCnt = 1;
288 }
289 if( zBuf!=zBuffer) fossil_free(zBuf);
290 return lineCnt;
291 }
292 for(sk=si=i=k=0; zText[i] && k<maxChars; i++){
293 char c = zText[i];
294 if( fossil_isspace(c) ){
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295 si = i;
296 sk = k;
297 if( k==0 || zBuf[k-1]!=' ' ){
298 zBuf[k++] = ' ';
299 }
300
--- src/comformat.c
+++ src/comformat.c
@@ -2,11 +2,11 @@
2 ** Copyright (c) 2007 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7 **
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
@@ -95,21 +95,20 @@
95 #endif
96 }
97
98 /*
99 ** This function checks the current line being printed against the original
100 ** comment text. Upon matching, it updates the provided character and line
101 ** counts, if applicable. The caller needs to emit a new line, if desired.
102 */
103 static int comment_check_orig(
104 const char *zOrigText, /* [in] Original comment text ONLY, may be NULL. */
105 const char *zLine, /* [in] The comment line to print. */
106 int *pCharCnt, /* [in/out] Pointer to the line character count. */
107 int *pLineCnt /* [in/out] Pointer to the total line count. */
108 ){
109 if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){
 
110 if( pCharCnt ) *pCharCnt = 0;
111 if( pLineCnt ) (*pLineCnt)++;
112 return 1;
113 }
114 return 0;
@@ -121,37 +120,76 @@
120 ** zero if such a character cannot be found. For the purposes of this
121 ** algorithm, the NUL character is treated the same as a spacing character.
122 */
123 static int comment_next_space(
124 const char *zLine, /* [in] The comment line being printed. */
125 int index, /* [in] The current character index being handled. */
126 int *distUTF8 /* [out] Distance to next space in UTF-8 sequences. */
127 ){
128 int nextIndex = index + 1;
129 int fNonASCII=0;
130 for(;;){
131 char c = zLine[nextIndex];
132 if( (c&0x80)==0x80 ) fNonASCII=1;
133 if( c==0 || fossil_isspace(c) ){
134 if( distUTF8 ){
135 if( fNonASCII!=0 ){
136 *distUTF8 = strlen_utf8(&zLine[index], nextIndex-index);
137 }else{
138 *distUTF8 = nextIndex-index;
139 }
140 }
141 return nextIndex;
142 }
143 nextIndex++;
144 }
145 return 0; /* NOT REACHED */
146 }
147
148 /*
149 ** Count the number of UTF-8 sequences in a string. Incomplete, ill-formed and
150 ** overlong sequences are counted as one sequence. The invalid lead bytes 0xC0
151 ** to 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and 4-byte
152 ** sequences, respectively, the other invalid lead bytes 0xF8 to 0xFF are
153 ** treated as invalid 1-byte sequences (as lone trail bytes).
154 ** Combining characters and East Asian Wide and Fullwidth characters are counted
155 ** as one, so this function does not calculate the effective "display width".
156 */
157 int strlen_utf8(const char *zString, int lengthBytes){
158 int i; /* Counted bytes. */
159 int lengthUTF8; /* Counted UTF-8 sequences. */
160 #if 0
161 assert( lengthBytes>=0 );
162 #endif
163 for(i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++){
164 char c = zString[i];
165 int cchUTF8=1; /* Code units consumed. */
166 int maxUTF8=1; /* Expected sequence length. */
167 if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
168 else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
169 else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
170 while( cchUTF8<maxUTF8 &&
171 i<lengthBytes-1 &&
172 (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
173 cchUTF8++;
174 i++;
175 }
176 }
177 return lengthUTF8;
178 }
179
180 /*
181 ** This function is called when printing a logical comment line to calculate
182 ** the necessary indenting. The caller needs to emit the indenting spaces.
183 */
184 static void comment_calc_indent(
185 const char *zLine, /* [in] The comment line being printed. */
186 int indent, /* [in] Number of spaces to indent, zero for none. */
187 int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
188 int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
189 int *piIndex /* [in/out] Pointer to first non-space character. */
190 ){
 
 
 
191 if( zLine && piIndex ){
192 int index = *piIndex;
193 if( trimCrLf ){
194 while( zLine[index]=='\r' || zLine[index]=='\n' ){ index++; }
195 }
@@ -179,26 +217,56 @@
217 int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
218 int origBreak, /* [in] Non-zero to break before original comment. */
219 int *pLineCnt, /* [in/out] Pointer to the total line count. */
220 const char **pzLine /* [out] Pointer to the end of the logical line. */
221 ){
222 int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
223 char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
224 int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
225 if( !zLine ) return;
226 if( lineChars<=0 ) return;
227 #if 0
228 assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */
229 assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */
230 #endif
231 if( indent>sizeof(zBuf)-6 ){
232 /* Limit initial indent to fit output buffer. */
233 indent = sizeof(zBuf)-6;
234 }
235 comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
236 if( indent>0 ){
237 for(i=0; i<indent; i++){
238 zBuf[iBuf++] = ' ';
239 }
240 }
241 if( origIndent>sizeof(zBuf)-6 ){
242 /* Limit line indent to fit output buffer. */
243 origIndent = sizeof(zBuf)-6;
244 }
245 maxChars = lineChars;
246 for(;;){
247 int useChars = 1;
248 char c = zLine[index];
249 /* Flush the output buffer if there's no space left for at least one more
250 ** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
251 ** a new line, and a terminating NULL. */
252 if( iBuf>sizeof(zBuf)-origIndent-6 ){
253 zBuf[iBuf]=0;
254 iBuf=0;
255 fossil_print("%s", zBuf);
256 }
257 if( c==0 ){
258 break;
259 }else{
260 if( origBreak && index>0 ){
261 const char *zCurrent = &zLine[index];
262 if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
263 zBuf[iBuf++] = '\n';
264 comment_calc_indent(zLine, origIndent, trimCrLf, trimSpace, &index);
265 for( i=0; i<origIndent; i++ ){
266 zBuf[iBuf++] = ' ';
267 }
268 maxChars = lineChars;
269 }
270 }
271 index++;
272 }
@@ -205,38 +273,57 @@
273 if( c=='\n' ){
274 lineCnt++;
275 charCnt = 0;
276 useChars = 0;
277 }else if( c=='\t' ){
278 int distUTF8;
279 int nextIndex = comment_next_space(zLine, index, &distUTF8);
280 if( nextIndex<=0 || distUTF8>maxChars ){
281 break;
282 }
283 charCnt++;
284 useChars = COMMENT_TAB_WIDTH;
285 if( maxChars<useChars ){
286 zBuf[iBuf++] = ' ';
287 break;
288 }
289 }else if( wordBreak && fossil_isspace(c) ){
290 int distUTF8;
291 int nextIndex = comment_next_space(zLine, index, &distUTF8);
292 if( nextIndex<=0 || distUTF8>maxChars ){
293 break;
294 }
295 charCnt++;
296 }else{
297 charCnt++;
298 }
299 assert( c!='\n' || charCnt==0 );
300 zBuf[iBuf++] = c;
301 /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
302 cchUTF8=1; /* Code units consumed. */
303 maxUTF8=1; /* Expected sequence length. */
304 if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
305 else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
306 else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
307 while( cchUTF8<maxUTF8 &&
308 (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
309 cchUTF8++;
310 zBuf[iBuf++] = zLine[index++];
311 }
312 maxChars -= useChars;
313 if( maxChars<=0 ) break;
314 if( c=='\n' ) break;
315 }
316 if( charCnt>0 ){
317 zBuf[iBuf++] = '\n';
318 lineCnt++;
319 }
320 /* Flush the remaining output buffer. */
321 if( iBuf>0 ){
322 zBuf[iBuf]=0;
323 iBuf=0;
324 fossil_print("%s", zBuf);
325 }
326 if( pLineCnt ){
327 *pLineCnt += lineCnt;
328 }
329 if( pzLine ){
@@ -259,25 +346,27 @@
346 const char *zText, /* The comment text to be printed. */
347 int indent, /* Number of spaces to indent each non-initial line. */
348 int width /* Maximum number of characters per line. */
349 ){
350 int maxChars = width - indent;
351 int si, sk, i, k, kc;
352 int doIndent = 0;
353 char *zBuf;
354 char zBuffer[400];
355 int lineCnt = 0;
356 int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */
357
358 if( width<0 ){
359 comment_set_maxchars(indent, &maxChars);
360 }
361 if( zText==0 ) zText = "(NULL)";
362 if( maxChars<=0 ){
363 maxChars = strlen(zText);
364 }
365 /* Ensure the buffer can hold the longest-possible UTF-8 sequences. */
366 if( maxChars >= (sizeof(zBuffer)/4-1) ){
367 zBuf = fossil_malloc(maxChars*4+1);
368 }else{
369 zBuf = zBuffer;
370 }
371 for(;;){
372 while( fossil_isspace(zText[0]) ){ zText++; }
@@ -287,13 +376,28 @@
376 lineCnt = 1;
377 }
378 if( zBuf!=zBuffer) fossil_free(zBuf);
379 return lineCnt;
380 }
381 for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
382 char c = zText[i];
383 kc++; /* Count complete UTF-8 sequences. */
384 /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */
385 cchUTF8=1; /* Code units consumed. */
386 maxUTF8=1; /* Expected sequence length. */
387 if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
388 else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
389 else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
390 if( maxUTF8>1 ){
391 zBuf[k++] = c;
392 while( cchUTF8<maxUTF8 &&
393 (zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
394 cchUTF8++;
395 zBuf[k++] = zText[++i];
396 }
397 }
398 else if( fossil_isspace(c) ){
399 si = i;
400 sk = k;
401 if( k==0 || zBuf[k-1]!=' ' ){
402 zBuf[k++] = ' ';
403 }
404

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button