Fossil SCM

Add output buffering to the (non-legacy) comment printing algorithm, to reduce calls to fossil_print(). The resulting performance improvement can be up to factor 10, with a perceptible difference even for short comments (measured and tested on Windows with MSVC builds, and on Ubuntu with GCC builds). (For comparison: for the legacy comment printing algorithm, the extra UTF-8 checks added by this branch impair performance by 0.12-1.8%, depending on whether the input contains predominantly multi-byte vs. ASCII-only sequences.)

florian 2018-11-15 12:43 UTC comment-formatter-utf8
Commit 16fde3ff666cf0733102f7a061756c718597a299
1 file changed +26 -8
+26 -8
--- src/comformat.c
+++ src/comformat.c
@@ -180,23 +180,37 @@
180180
int origBreak, /* [in] Non-zero to break before original comment. */
181181
int *pLineCnt, /* [in/out] Pointer to the total line count. */
182182
const char **pzLine /* [out] Pointer to the end of the logical line. */
183183
){
184184
int index = 0, charCnt = 0, lineCnt = 0, maxChars;
185
+ char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
185186
if( !zLine ) return;
186187
if( lineChars<=0 ) return;
187188
comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);
188189
maxChars = lineChars;
189190
for(;;){
190191
int useChars = 1;
191192
char c = zLine[index];
193
+ /* Flush the output buffer if there's no space left for at least one more
194
+ ** (potentially 4-byte) UTF-8 sequence and a terminating NULL. */
195
+ if ( iBuf>sizeof(zBuf)-5 ){
196
+ zBuf[iBuf]=0;
197
+ iBuf=0;
198
+ fossil_print("%s", zBuf);
199
+ }
192200
if( c==0 ){
193201
break;
194202
}else{
195203
if( origBreak && index>0 ){
196204
const char *zCurrent = &zLine[index];
197205
if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
206
+ /* Flush the output buffer before printing the indentation. */
207
+ if ( iBuf>0 ){
208
+ zBuf[iBuf]=0;
209
+ iBuf=0;
210
+ fossil_print("%s", zBuf);
211
+ }
198212
comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
199213
&index);
200214
maxChars = lineChars;
201215
}
202216
}
@@ -212,11 +226,11 @@
212226
break;
213227
}
214228
charCnt++;
215229
useChars = COMMENT_TAB_WIDTH;
216230
if( maxChars<useChars ){
217
- fossil_print(" ");
231
+ zBuf[iBuf++] = ' ';
218232
break;
219233
}
220234
}else if( wordBreak && fossil_isspace(c) ){
221235
int nextIndex = comment_next_space(zLine, index);
222236
if( nextIndex<=0 || (nextIndex-index)>maxChars ){
@@ -234,33 +248,37 @@
234248
** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively,
235249
** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte
236250
** sequences (as lone trail bytes).
237251
*/
238252
if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */
239
- char zUTF8[5]; /* Buffer to hold a UTF-8 sequence. */
240253
int cchUTF8=1; /* Code units consumed. */
241254
int maxUTF8=1; /* Expected sequence length. */
242
- zUTF8[0]=c;
255
+ zBuf[iBuf++]=c;
243256
if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
244257
else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
245258
else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
246259
while( cchUTF8<maxUTF8 &&
247260
(zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
248
- zUTF8[cchUTF8++] = zLine[index++];
261
+ cchUTF8++;
262
+ zBuf[iBuf++] = zLine[index++];
249263
}
250
- zUTF8[cchUTF8]=0;
251
- fossil_print("%s", zUTF8);
252264
}
253265
else
254
- fossil_print("%c", c);
266
+ zBuf[iBuf++] = c;
255267
if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
256268
if( maxChars<=0 ) break;
257269
if( c=='\n' ) break;
258270
}
259271
if( charCnt>0 ){
260
- fossil_print("\n");
272
+ zBuf[iBuf++] = '\n';
261273
lineCnt++;
274
+ }
275
+ /* Flush the remaining output buffer. */
276
+ if ( iBuf>0 ) {
277
+ zBuf[iBuf]=0;
278
+ iBuf=0;
279
+ fossil_print("%s", zBuf);
262280
}
263281
if( pLineCnt ){
264282
*pLineCnt += lineCnt;
265283
}
266284
if( pzLine ){
267285
--- src/comformat.c
+++ src/comformat.c
@@ -180,23 +180,37 @@
180 int origBreak, /* [in] Non-zero to break before original comment. */
181 int *pLineCnt, /* [in/out] Pointer to the total line count. */
182 const char **pzLine /* [out] Pointer to the end of the logical line. */
183 ){
184 int index = 0, charCnt = 0, lineCnt = 0, maxChars;
 
185 if( !zLine ) return;
186 if( lineChars<=0 ) return;
187 comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);
188 maxChars = lineChars;
189 for(;;){
190 int useChars = 1;
191 char c = zLine[index];
 
 
 
 
 
 
 
192 if( c==0 ){
193 break;
194 }else{
195 if( origBreak && index>0 ){
196 const char *zCurrent = &zLine[index];
197 if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
 
 
 
 
 
 
198 comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
199 &index);
200 maxChars = lineChars;
201 }
202 }
@@ -212,11 +226,11 @@
212 break;
213 }
214 charCnt++;
215 useChars = COMMENT_TAB_WIDTH;
216 if( maxChars<useChars ){
217 fossil_print(" ");
218 break;
219 }
220 }else if( wordBreak && fossil_isspace(c) ){
221 int nextIndex = comment_next_space(zLine, index);
222 if( nextIndex<=0 || (nextIndex-index)>maxChars ){
@@ -234,33 +248,37 @@
234 ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively,
235 ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte
236 ** sequences (as lone trail bytes).
237 */
238 if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */
239 char zUTF8[5]; /* Buffer to hold a UTF-8 sequence. */
240 int cchUTF8=1; /* Code units consumed. */
241 int maxUTF8=1; /* Expected sequence length. */
242 zUTF8[0]=c;
243 if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
244 else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
245 else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
246 while( cchUTF8<maxUTF8 &&
247 (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
248 zUTF8[cchUTF8++] = zLine[index++];
 
249 }
250 zUTF8[cchUTF8]=0;
251 fossil_print("%s", zUTF8);
252 }
253 else
254 fossil_print("%c", c);
255 if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
256 if( maxChars<=0 ) break;
257 if( c=='\n' ) break;
258 }
259 if( charCnt>0 ){
260 fossil_print("\n");
261 lineCnt++;
 
 
 
 
 
 
262 }
263 if( pLineCnt ){
264 *pLineCnt += lineCnt;
265 }
266 if( pzLine ){
267
--- src/comformat.c
+++ src/comformat.c
@@ -180,23 +180,37 @@
180 int origBreak, /* [in] Non-zero to break before original comment. */
181 int *pLineCnt, /* [in/out] Pointer to the total line count. */
182 const char **pzLine /* [out] Pointer to the end of the logical line. */
183 ){
184 int index = 0, charCnt = 0, lineCnt = 0, maxChars;
185 char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
186 if( !zLine ) return;
187 if( lineChars<=0 ) return;
188 comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index);
189 maxChars = lineChars;
190 for(;;){
191 int useChars = 1;
192 char c = zLine[index];
193 /* Flush the output buffer if there's no space left for at least one more
194 ** (potentially 4-byte) UTF-8 sequence and a terminating NULL. */
195 if ( iBuf>sizeof(zBuf)-5 ){
196 zBuf[iBuf]=0;
197 iBuf=0;
198 fossil_print("%s", zBuf);
199 }
200 if( c==0 ){
201 break;
202 }else{
203 if( origBreak && index>0 ){
204 const char *zCurrent = &zLine[index];
205 if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
206 /* Flush the output buffer before printing the indentation. */
207 if ( iBuf>0 ){
208 zBuf[iBuf]=0;
209 iBuf=0;
210 fossil_print("%s", zBuf);
211 }
212 comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace,
213 &index);
214 maxChars = lineChars;
215 }
216 }
@@ -212,11 +226,11 @@
226 break;
227 }
228 charCnt++;
229 useChars = COMMENT_TAB_WIDTH;
230 if( maxChars<useChars ){
231 zBuf[iBuf++] = ' ';
232 break;
233 }
234 }else if( wordBreak && fossil_isspace(c) ){
235 int nextIndex = comment_next_space(zLine, index);
236 if( nextIndex<=0 || (nextIndex-index)>maxChars ){
@@ -234,33 +248,37 @@
248 ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively,
249 ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte
250 ** sequences (as lone trail bytes).
251 */
252 if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */
 
253 int cchUTF8=1; /* Code units consumed. */
254 int maxUTF8=1; /* Expected sequence length. */
255 zBuf[iBuf++]=c;
256 if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */
257 else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */
258 else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */
259 while( cchUTF8<maxUTF8 &&
260 (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
261 cchUTF8++;
262 zBuf[iBuf++] = zLine[index++];
263 }
 
 
264 }
265 else
266 zBuf[iBuf++] = c;
267 if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars;
268 if( maxChars<=0 ) break;
269 if( c=='\n' ) break;
270 }
271 if( charCnt>0 ){
272 zBuf[iBuf++] = '\n';
273 lineCnt++;
274 }
275 /* Flush the remaining output buffer. */
276 if ( iBuf>0 ) {
277 zBuf[iBuf]=0;
278 iBuf=0;
279 fossil_print("%s", zBuf);
280 }
281 if( pLineCnt ){
282 *pLineCnt += lineCnt;
283 }
284 if( pzLine ){
285

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button