Fossil SCM
Add output buffering to the (non-legacy) comment printing algorithm, to reduce calls to fossil_print(). The resulting performance improvement can be up to factor 10, with a perceptible difference even for short comments (measured and tested on Windows with MSVC builds, and on Ubuntu with GCC builds). (For comparison: for the legacy comment printing algorithm, the extra UTF-8 checks added by this branch impair performance by 0.12-1.8%, depending on whether the input contains predominantly multi-byte vs. ASCII-only sequences.)
Commit
16fde3ff666cf0733102f7a061756c718597a299
Parent
1bbca2c3f89b826…
1 file changed
+26
-8
+26
-8
| --- src/comformat.c | ||
| +++ src/comformat.c | ||
| @@ -180,23 +180,37 @@ | ||
| 180 | 180 | int origBreak, /* [in] Non-zero to break before original comment. */ |
| 181 | 181 | int *pLineCnt, /* [in/out] Pointer to the total line count. */ |
| 182 | 182 | const char **pzLine /* [out] Pointer to the end of the logical line. */ |
| 183 | 183 | ){ |
| 184 | 184 | int index = 0, charCnt = 0, lineCnt = 0, maxChars; |
| 185 | + char zBuf[400]; int iBuf=0; /* Output buffer and counter. */ | |
| 185 | 186 | if( !zLine ) return; |
| 186 | 187 | if( lineChars<=0 ) return; |
| 187 | 188 | comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index); |
| 188 | 189 | maxChars = lineChars; |
| 189 | 190 | for(;;){ |
| 190 | 191 | int useChars = 1; |
| 191 | 192 | char c = zLine[index]; |
| 193 | + /* Flush the output buffer if there's no space left for at least one more | |
| 194 | + ** (potentially 4-byte) UTF-8 sequence and a terminating NULL. */ | |
| 195 | + if ( iBuf>sizeof(zBuf)-5 ){ | |
| 196 | + zBuf[iBuf]=0; | |
| 197 | + iBuf=0; | |
| 198 | + fossil_print("%s", zBuf); | |
| 199 | + } | |
| 192 | 200 | if( c==0 ){ |
| 193 | 201 | break; |
| 194 | 202 | }else{ |
| 195 | 203 | if( origBreak && index>0 ){ |
| 196 | 204 | const char *zCurrent = &zLine[index]; |
| 197 | 205 | if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){ |
| 206 | + /* Flush the output buffer before printing the indentation. */ | |
| 207 | + if ( iBuf>0 ){ | |
| 208 | + zBuf[iBuf]=0; | |
| 209 | + iBuf=0; | |
| 210 | + fossil_print("%s", zBuf); | |
| 211 | + } | |
| 198 | 212 | comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace, |
| 199 | 213 | &index); |
| 200 | 214 | maxChars = lineChars; |
| 201 | 215 | } |
| 202 | 216 | } |
| @@ -212,11 +226,11 @@ | ||
| 212 | 226 | break; |
| 213 | 227 | } |
| 214 | 228 | charCnt++; |
| 215 | 229 | useChars = COMMENT_TAB_WIDTH; |
| 216 | 230 | if( maxChars<useChars ){ |
| 217 | - fossil_print(" "); | |
| 231 | + zBuf[iBuf++] = ' '; | |
| 218 | 232 | break; |
| 219 | 233 | } |
| 220 | 234 | }else if( wordBreak && fossil_isspace(c) ){ |
| 221 | 235 | int nextIndex = comment_next_space(zLine, index); |
| 222 | 236 | if( nextIndex<=0 || (nextIndex-index)>maxChars ){ |
| @@ -234,33 +248,37 @@ | ||
| 234 | 248 | ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively, |
| 235 | 249 | ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte |
| 236 | 250 | ** sequences (as lone trail bytes). |
| 237 | 251 | */ |
| 238 | 252 | if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */ |
| 239 | - char zUTF8[5]; /* Buffer to hold a UTF-8 sequence. */ | |
| 240 | 253 | int cchUTF8=1; /* Code units consumed. */ |
| 241 | 254 | int maxUTF8=1; /* Expected sequence length. */ |
| 242 | - zUTF8[0]=c; | |
| 255 | + zBuf[iBuf++]=c; | |
| 243 | 256 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 244 | 257 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 245 | 258 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 246 | 259 | while( cchUTF8<maxUTF8 && |
| 247 | 260 | (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 248 | - zUTF8[cchUTF8++] = zLine[index++]; | |
| 261 | + cchUTF8++; | |
| 262 | + zBuf[iBuf++] = zLine[index++]; | |
| 249 | 263 | } |
| 250 | - zUTF8[cchUTF8]=0; | |
| 251 | - fossil_print("%s", zUTF8); | |
| 252 | 264 | } |
| 253 | 265 | else |
| 254 | - fossil_print("%c", c); | |
| 266 | + zBuf[iBuf++] = c; | |
| 255 | 267 | if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars; |
| 256 | 268 | if( maxChars<=0 ) break; |
| 257 | 269 | if( c=='\n' ) break; |
| 258 | 270 | } |
| 259 | 271 | if( charCnt>0 ){ |
| 260 | - fossil_print("\n"); | |
| 272 | + zBuf[iBuf++] = '\n'; | |
| 261 | 273 | lineCnt++; |
| 274 | + } | |
| 275 | + /* Flush the remaining output buffer. */ | |
| 276 | + if ( iBuf>0 ) { | |
| 277 | + zBuf[iBuf]=0; | |
| 278 | + iBuf=0; | |
| 279 | + fossil_print("%s", zBuf); | |
| 262 | 280 | } |
| 263 | 281 | if( pLineCnt ){ |
| 264 | 282 | *pLineCnt += lineCnt; |
| 265 | 283 | } |
| 266 | 284 | if( pzLine ){ |
| 267 | 285 |
| --- src/comformat.c | |
| +++ src/comformat.c | |
| @@ -180,23 +180,37 @@ | |
| 180 | int origBreak, /* [in] Non-zero to break before original comment. */ |
| 181 | int *pLineCnt, /* [in/out] Pointer to the total line count. */ |
| 182 | const char **pzLine /* [out] Pointer to the end of the logical line. */ |
| 183 | ){ |
| 184 | int index = 0, charCnt = 0, lineCnt = 0, maxChars; |
| 185 | if( !zLine ) return; |
| 186 | if( lineChars<=0 ) return; |
| 187 | comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index); |
| 188 | maxChars = lineChars; |
| 189 | for(;;){ |
| 190 | int useChars = 1; |
| 191 | char c = zLine[index]; |
| 192 | if( c==0 ){ |
| 193 | break; |
| 194 | }else{ |
| 195 | if( origBreak && index>0 ){ |
| 196 | const char *zCurrent = &zLine[index]; |
| 197 | if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){ |
| 198 | comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace, |
| 199 | &index); |
| 200 | maxChars = lineChars; |
| 201 | } |
| 202 | } |
| @@ -212,11 +226,11 @@ | |
| 212 | break; |
| 213 | } |
| 214 | charCnt++; |
| 215 | useChars = COMMENT_TAB_WIDTH; |
| 216 | if( maxChars<useChars ){ |
| 217 | fossil_print(" "); |
| 218 | break; |
| 219 | } |
| 220 | }else if( wordBreak && fossil_isspace(c) ){ |
| 221 | int nextIndex = comment_next_space(zLine, index); |
| 222 | if( nextIndex<=0 || (nextIndex-index)>maxChars ){ |
| @@ -234,33 +248,37 @@ | |
| 234 | ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively, |
| 235 | ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte |
| 236 | ** sequences (as lone trail bytes). |
| 237 | */ |
| 238 | if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */ |
| 239 | char zUTF8[5]; /* Buffer to hold a UTF-8 sequence. */ |
| 240 | int cchUTF8=1; /* Code units consumed. */ |
| 241 | int maxUTF8=1; /* Expected sequence length. */ |
| 242 | zUTF8[0]=c; |
| 243 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 244 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 245 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 246 | while( cchUTF8<maxUTF8 && |
| 247 | (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 248 | zUTF8[cchUTF8++] = zLine[index++]; |
| 249 | } |
| 250 | zUTF8[cchUTF8]=0; |
| 251 | fossil_print("%s", zUTF8); |
| 252 | } |
| 253 | else |
| 254 | fossil_print("%c", c); |
| 255 | if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars; |
| 256 | if( maxChars<=0 ) break; |
| 257 | if( c=='\n' ) break; |
| 258 | } |
| 259 | if( charCnt>0 ){ |
| 260 | fossil_print("\n"); |
| 261 | lineCnt++; |
| 262 | } |
| 263 | if( pLineCnt ){ |
| 264 | *pLineCnt += lineCnt; |
| 265 | } |
| 266 | if( pzLine ){ |
| 267 |
| --- src/comformat.c | |
| +++ src/comformat.c | |
| @@ -180,23 +180,37 @@ | |
| 180 | int origBreak, /* [in] Non-zero to break before original comment. */ |
| 181 | int *pLineCnt, /* [in/out] Pointer to the total line count. */ |
| 182 | const char **pzLine /* [out] Pointer to the end of the logical line. */ |
| 183 | ){ |
| 184 | int index = 0, charCnt = 0, lineCnt = 0, maxChars; |
| 185 | char zBuf[400]; int iBuf=0; /* Output buffer and counter. */ |
| 186 | if( !zLine ) return; |
| 187 | if( lineChars<=0 ) return; |
| 188 | comment_print_indent(zLine, indent, trimCrLf, trimSpace, &index); |
| 189 | maxChars = lineChars; |
| 190 | for(;;){ |
| 191 | int useChars = 1; |
| 192 | char c = zLine[index]; |
| 193 | /* Flush the output buffer if there's no space left for at least one more |
| 194 | ** (potentially 4-byte) UTF-8 sequence and a terminating NULL. */ |
| 195 | if ( iBuf>sizeof(zBuf)-5 ){ |
| 196 | zBuf[iBuf]=0; |
| 197 | iBuf=0; |
| 198 | fossil_print("%s", zBuf); |
| 199 | } |
| 200 | if( c==0 ){ |
| 201 | break; |
| 202 | }else{ |
| 203 | if( origBreak && index>0 ){ |
| 204 | const char *zCurrent = &zLine[index]; |
| 205 | if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){ |
| 206 | /* Flush the output buffer before printing the indentation. */ |
| 207 | if ( iBuf>0 ){ |
| 208 | zBuf[iBuf]=0; |
| 209 | iBuf=0; |
| 210 | fossil_print("%s", zBuf); |
| 211 | } |
| 212 | comment_print_indent(zCurrent, origIndent, trimCrLf, trimSpace, |
| 213 | &index); |
| 214 | maxChars = lineChars; |
| 215 | } |
| 216 | } |
| @@ -212,11 +226,11 @@ | |
| 226 | break; |
| 227 | } |
| 228 | charCnt++; |
| 229 | useChars = COMMENT_TAB_WIDTH; |
| 230 | if( maxChars<useChars ){ |
| 231 | zBuf[iBuf++] = ' '; |
| 232 | break; |
| 233 | } |
| 234 | }else if( wordBreak && fossil_isspace(c) ){ |
| 235 | int nextIndex = comment_next_space(zLine, index); |
| 236 | if( nextIndex<=0 || (nextIndex-index)>maxChars ){ |
| @@ -234,33 +248,37 @@ | |
| 248 | ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively, |
| 249 | ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte |
| 250 | ** sequences (as lone trail bytes). |
| 251 | */ |
| 252 | if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */ |
| 253 | int cchUTF8=1; /* Code units consumed. */ |
| 254 | int maxUTF8=1; /* Expected sequence length. */ |
| 255 | zBuf[iBuf++]=c; |
| 256 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 257 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 258 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 259 | while( cchUTF8<maxUTF8 && |
| 260 | (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 261 | cchUTF8++; |
| 262 | zBuf[iBuf++] = zLine[index++]; |
| 263 | } |
| 264 | } |
| 265 | else |
| 266 | zBuf[iBuf++] = c; |
| 267 | if( (c&0x80)==0 || (zLine[index+1]&0xc0)!=0xc0 ) maxChars -= useChars; |
| 268 | if( maxChars<=0 ) break; |
| 269 | if( c=='\n' ) break; |
| 270 | } |
| 271 | if( charCnt>0 ){ |
| 272 | zBuf[iBuf++] = '\n'; |
| 273 | lineCnt++; |
| 274 | } |
| 275 | /* Flush the remaining output buffer. */ |
| 276 | if ( iBuf>0 ) { |
| 277 | zBuf[iBuf]=0; |
| 278 | iBuf=0; |
| 279 | fossil_print("%s", zBuf); |
| 280 | } |
| 281 | if( pLineCnt ){ |
| 282 | *pLineCnt += lineCnt; |
| 283 | } |
| 284 | if( pzLine ){ |
| 285 |