Fossil SCM

fossil-scm / src / comformat.c
Blame History Raw 854 lines
1
/*
2
** Copyright (c) 2007 D. Richard Hipp
3
**
4
** This program is free software; you can redistribute it and/or
5
** modify it under the terms of the Simplified BSD License (also
6
** known as the "2-Clause License" or "FreeBSD License".)
7
**
8
** This program is distributed in the hope that it will be useful,
9
** but without any warranty; without even the implied warranty of
10
** merchantability or fitness for a particular purpose.
11
**
12
** Author contact information:
13
** [email protected]
14
** http://www.hwaci.com/drh/
15
**
16
*******************************************************************************
17
**
18
** This file contains code used to format and print comments or other
19
** text on a TTY.
20
*/
21
#include "config.h"
22
#include "comformat.h"
23
#include <assert.h>
24
25
#if INTERFACE
26
#define COMMENT_PRINT_NONE ((u32)0x00000000) /* No flags */
27
#define COMMENT_PRINT_CANONICAL ((u32)0x00000001) /* Use canonical algorithm */
28
#define COMMENT_PRINT_DEFAULT COMMENT_PRINT_CANONICAL /* Default */
29
#define COMMENT_PRINT_UNSET (-1) /* Not initialized */
30
31
/* The canonical comment printing algorithm is recommended. We make
32
** no promise of on-going support for any of the following flags:
33
*/
34
#define COMMENT_PRINT_TRIM_CRLF ((u32)0x00000002) /* Trim leading CR/LF. */
35
#define COMMENT_PRINT_TRIM_SPACE ((u32)0x00000004) /* Trim leading/trailing. */
36
#define COMMENT_PRINT_WORD_BREAK ((u32)0x00000008) /* Break lines on words. */
37
#define COMMENT_PRINT_ORIG_BREAK ((u32)0x00000010) /* Break before original. */
38
#endif
39
40
/********* Code copied from SQLite src/shell.c.in on 2024-09-30 **********/
41
/* Lookup table to estimate the number of columns consumed by a Unicode
42
** character.
43
*/
44
static const struct {
45
unsigned char w; /* Width of the character in columns */
46
int iFirst; /* First character in a span having this width */
47
} aUWidth[] = {
48
/* {1, 0x00000}, */
49
{0, 0x00300}, {1, 0x00370}, {0, 0x00483}, {1, 0x00487}, {0, 0x00488},
50
{1, 0x0048a}, {0, 0x00591}, {1, 0x005be}, {0, 0x005bf}, {1, 0x005c0},
51
{0, 0x005c1}, {1, 0x005c3}, {0, 0x005c4}, {1, 0x005c6}, {0, 0x005c7},
52
{1, 0x005c8}, {0, 0x00600}, {1, 0x00604}, {0, 0x00610}, {1, 0x00616},
53
{0, 0x0064b}, {1, 0x0065f}, {0, 0x00670}, {1, 0x00671}, {0, 0x006d6},
54
{1, 0x006e5}, {0, 0x006e7}, {1, 0x006e9}, {0, 0x006ea}, {1, 0x006ee},
55
{0, 0x0070f}, {1, 0x00710}, {0, 0x00711}, {1, 0x00712}, {0, 0x00730},
56
{1, 0x0074b}, {0, 0x007a6}, {1, 0x007b1}, {0, 0x007eb}, {1, 0x007f4},
57
{0, 0x00901}, {1, 0x00903}, {0, 0x0093c}, {1, 0x0093d}, {0, 0x00941},
58
{1, 0x00949}, {0, 0x0094d}, {1, 0x0094e}, {0, 0x00951}, {1, 0x00955},
59
{0, 0x00962}, {1, 0x00964}, {0, 0x00981}, {1, 0x00982}, {0, 0x009bc},
60
{1, 0x009bd}, {0, 0x009c1}, {1, 0x009c5}, {0, 0x009cd}, {1, 0x009ce},
61
{0, 0x009e2}, {1, 0x009e4}, {0, 0x00a01}, {1, 0x00a03}, {0, 0x00a3c},
62
{1, 0x00a3d}, {0, 0x00a41}, {1, 0x00a43}, {0, 0x00a47}, {1, 0x00a49},
63
{0, 0x00a4b}, {1, 0x00a4e}, {0, 0x00a70}, {1, 0x00a72}, {0, 0x00a81},
64
{1, 0x00a83}, {0, 0x00abc}, {1, 0x00abd}, {0, 0x00ac1}, {1, 0x00ac6},
65
{0, 0x00ac7}, {1, 0x00ac9}, {0, 0x00acd}, {1, 0x00ace}, {0, 0x00ae2},
66
{1, 0x00ae4}, {0, 0x00b01}, {1, 0x00b02}, {0, 0x00b3c}, {1, 0x00b3d},
67
{0, 0x00b3f}, {1, 0x00b40}, {0, 0x00b41}, {1, 0x00b44}, {0, 0x00b4d},
68
{1, 0x00b4e}, {0, 0x00b56}, {1, 0x00b57}, {0, 0x00b82}, {1, 0x00b83},
69
{0, 0x00bc0}, {1, 0x00bc1}, {0, 0x00bcd}, {1, 0x00bce}, {0, 0x00c3e},
70
{1, 0x00c41}, {0, 0x00c46}, {1, 0x00c49}, {0, 0x00c4a}, {1, 0x00c4e},
71
{0, 0x00c55}, {1, 0x00c57}, {0, 0x00cbc}, {1, 0x00cbd}, {0, 0x00cbf},
72
{1, 0x00cc0}, {0, 0x00cc6}, {1, 0x00cc7}, {0, 0x00ccc}, {1, 0x00cce},
73
{0, 0x00ce2}, {1, 0x00ce4}, {0, 0x00d41}, {1, 0x00d44}, {0, 0x00d4d},
74
{1, 0x00d4e}, {0, 0x00dca}, {1, 0x00dcb}, {0, 0x00dd2}, {1, 0x00dd5},
75
{0, 0x00dd6}, {1, 0x00dd7}, {0, 0x00e31}, {1, 0x00e32}, {0, 0x00e34},
76
{1, 0x00e3b}, {0, 0x00e47}, {1, 0x00e4f}, {0, 0x00eb1}, {1, 0x00eb2},
77
{0, 0x00eb4}, {1, 0x00eba}, {0, 0x00ebb}, {1, 0x00ebd}, {0, 0x00ec8},
78
{1, 0x00ece}, {0, 0x00f18}, {1, 0x00f1a}, {0, 0x00f35}, {1, 0x00f36},
79
{0, 0x00f37}, {1, 0x00f38}, {0, 0x00f39}, {1, 0x00f3a}, {0, 0x00f71},
80
{1, 0x00f7f}, {0, 0x00f80}, {1, 0x00f85}, {0, 0x00f86}, {1, 0x00f88},
81
{0, 0x00f90}, {1, 0x00f98}, {0, 0x00f99}, {1, 0x00fbd}, {0, 0x00fc6},
82
{1, 0x00fc7}, {0, 0x0102d}, {1, 0x01031}, {0, 0x01032}, {1, 0x01033},
83
{0, 0x01036}, {1, 0x0103b}, {0, 0x01058},
84
{1, 0x0105a}, {2, 0x01100}, {0, 0x01160}, {1, 0x01200}, {0, 0x0135f},
85
{1, 0x01360}, {0, 0x01712}, {1, 0x01715}, {0, 0x01732}, {1, 0x01735},
86
{0, 0x01752}, {1, 0x01754}, {0, 0x01772}, {1, 0x01774}, {0, 0x017b4},
87
{1, 0x017b6}, {0, 0x017b7}, {1, 0x017be}, {0, 0x017c6}, {1, 0x017c7},
88
{0, 0x017c9}, {1, 0x017d4}, {0, 0x017dd}, {1, 0x017de}, {0, 0x0180b},
89
{1, 0x0180e}, {0, 0x018a9}, {1, 0x018aa}, {0, 0x01920}, {1, 0x01923},
90
{0, 0x01927}, {1, 0x01929}, {0, 0x01932}, {1, 0x01933}, {0, 0x01939},
91
{1, 0x0193c}, {0, 0x01a17}, {1, 0x01a19}, {0, 0x01b00}, {1, 0x01b04},
92
{0, 0x01b34}, {1, 0x01b35}, {0, 0x01b36}, {1, 0x01b3b}, {0, 0x01b3c},
93
{1, 0x01b3d}, {0, 0x01b42}, {1, 0x01b43}, {0, 0x01b6b}, {1, 0x01b74},
94
{0, 0x01dc0}, {1, 0x01dcb}, {0, 0x01dfe}, {1, 0x01e00}, {0, 0x0200b},
95
{1, 0x02010}, {0, 0x0202a}, {1, 0x0202f}, {0, 0x02060}, {1, 0x02064},
96
{0, 0x0206a}, {1, 0x02070}, {0, 0x020d0}, {1, 0x020f0}, {2, 0x02329},
97
{1, 0x0232b}, {2, 0x02e80}, {0, 0x0302a}, {2, 0x03030}, {1, 0x0303f},
98
{2, 0x03040}, {0, 0x03099}, {2, 0x0309b}, {1, 0x0a4d0}, {0, 0x0a806},
99
{1, 0x0a807}, {0, 0x0a80b}, {1, 0x0a80c}, {0, 0x0a825}, {1, 0x0a827},
100
{2, 0x0ac00}, {1, 0x0d7a4}, {2, 0x0f900}, {1, 0x0fb00}, {0, 0x0fb1e},
101
{1, 0x0fb1f}, {0, 0x0fe00}, {2, 0x0fe10}, {1, 0x0fe1a}, {0, 0x0fe20},
102
{1, 0x0fe24}, {2, 0x0fe30}, {1, 0x0fe70}, {0, 0x0feff}, {2, 0x0ff00},
103
{1, 0x0ff61}, {2, 0x0ffe0}, {1, 0x0ffe7}, {0, 0x0fff9}, {1, 0x0fffc},
104
{0, 0x10a01}, {1, 0x10a04}, {0, 0x10a05}, {1, 0x10a07}, {0, 0x10a0c},
105
{1, 0x10a10}, {0, 0x10a38}, {1, 0x10a3b}, {0, 0x10a3f}, {1, 0x10a40},
106
{0, 0x1d167}, {1, 0x1d16a}, {0, 0x1d173}, {1, 0x1d183}, {0, 0x1d185},
107
{1, 0x1d18c}, {0, 0x1d1aa}, {1, 0x1d1ae}, {0, 0x1d242}, {1, 0x1d245},
108
{2, 0x20000}, {1, 0x2fffe}, {2, 0x30000}, {1, 0x3fffe}, {0, 0xe0001},
109
{1, 0xe0002}, {0, 0xe0020}, {1, 0xe0080}, {0, 0xe0100}, {1, 0xe01f0}
110
};
111
112
/*
113
** Return an estimate of the width, in columns, for the single Unicode
114
** character c. For normal characters, the answer is always 1. But the
115
** estimate might be 0 or 2 for zero-width and double-width characters.
116
**
117
** Different display devices display unicode using different widths. So
118
** it is impossible to know that true display width with 100% accuracy.
119
** Inaccuracies in the width estimates might cause columns to be misaligned.
120
** Unfortunately, there is nothing we can do about that.
121
*/
122
static int cli_wcwidth(int c){
123
int iFirst, iLast;
124
125
/* Fast path for common characters */
126
if( c<0x300 ) return 1;
127
128
/* The general case */
129
iFirst = 0;
130
iLast = sizeof(aUWidth)/sizeof(aUWidth[0]) - 1;
131
while( iFirst<iLast-1 ){
132
int iMid = (iFirst+iLast)/2;
133
int cMid = aUWidth[iMid].iFirst;
134
if( cMid < c ){
135
iFirst = iMid;
136
}else if( cMid > c ){
137
iLast = iMid - 1;
138
}else{
139
return aUWidth[iMid].w;
140
}
141
}
142
if( aUWidth[iLast].iFirst > c ) return aUWidth[iFirst].w;
143
return aUWidth[iLast].w;
144
}
145
/******* End of code copied from SQLite *************************************/
146
147
/*
148
** This is the previous value used by most external callers when they
149
** needed to specify a default maximum line length to be used with the
150
** comment_print() function.
151
*/
152
#ifndef COMMENT_LEGACY_LINE_LENGTH
153
# define COMMENT_LEGACY_LINE_LENGTH (78)
154
#endif
155
156
/*
157
** This is the number of spaces to print when a tab character is seen.
158
*/
159
#ifndef COMMENT_TAB_WIDTH
160
# define COMMENT_TAB_WIDTH (8)
161
#endif
162
163
/*
164
** This function sets the maximum number of characters to print per line
165
** based on the detected terminal line width, if available; otherwise, it
166
** uses the legacy default terminal line width minus the amount to indent.
167
**
168
** Zero is returned to indicate any failure. One is returned to indicate
169
** the successful detection of the terminal line width. Negative one is
170
** returned to indicate the terminal line width is using the hard-coded
171
** legacy default value.
172
*/
173
static int comment_set_maxchars(
174
int indent,
175
int *pMaxChars
176
){
177
struct TerminalSize ts;
178
if ( !terminal_get_size(&ts) ){
179
return 0;
180
}
181
182
if( ts.nColumns ){
183
*pMaxChars = ts.nColumns - indent;
184
return 1;
185
}else{
186
/*
187
** Fallback to using more-or-less the "legacy semantics" of hard-coding
188
** the maximum line length to a value reasonable for the vast majority
189
** of supported systems.
190
*/
191
*pMaxChars = COMMENT_LEGACY_LINE_LENGTH - indent;
192
return -1;
193
}
194
}
195
196
/*
197
** This function checks the current line being printed against the original
198
** comment text. Upon matching, it updates the provided character and line
199
** counts, if applicable. The caller needs to emit a new line, if desired.
200
*/
201
static int comment_check_orig(
202
const char *zOrigText, /* [in] Original comment text ONLY, may be NULL. */
203
const char *zLine, /* [in] The comment line to print. */
204
int *pCharCnt, /* [in/out] Pointer to the line character count. */
205
int *pLineCnt /* [in/out] Pointer to the total line count. */
206
){
207
if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){
208
if( pCharCnt ) *pCharCnt = 0;
209
if( pLineCnt ) (*pLineCnt)++;
210
return 1;
211
}
212
return 0;
213
}
214
215
/*
216
** This function scans the specified comment line starting just after the
217
** initial index and returns the index of the next spacing character -OR-
218
** zero if such a character cannot be found. For the purposes of this
219
** algorithm, the NUL character is treated the same as a spacing character.
220
*/
221
static int comment_next_space(
222
const char *zLine, /* [in] The comment line being printed. */
223
int index, /* [in] The current character index being handled. */
224
int maxChars, /* [in] Optimization hint to abort before space found. */
225
int *sumWidth /* [out] Summated width of all characters to next space. */
226
){
227
int cchUTF8, utf32, wcwidth = 0;
228
int nextIndex = index;
229
if( zLine[index]==0 ) return index;
230
for(;;){
231
char_info_utf8(&zLine[nextIndex],&cchUTF8,&utf32);
232
nextIndex += cchUTF8;
233
wcwidth += cli_wcwidth(utf32);
234
if( zLine[nextIndex]==0 || fossil_isspace(zLine[nextIndex]) ||
235
wcwidth>maxChars ){
236
*sumWidth = wcwidth;
237
return nextIndex;
238
}
239
}
240
return 0; /* NOT REACHED */
241
}
242
243
/*
244
** Return information about the next (single- or multi-byte) character in
245
** z[0]. Two values are computed:
246
**
247
** * The number of bytes needed to represent the character.
248
** * The UTF code point value.
249
**
250
** Incomplete, ill-formed and overlong sequences are consumed together as
251
** one invalid code point. The invalid lead bytes 0xC0 to 0xC1 and 0xF5 to
252
** 0xF7 are allowed to initiate (ill-formed) 2- and 4-byte sequences,
253
** respectively, the other invalid lead bytes 0xF8 to 0xFF are treated
254
** as invalid 1-byte sequences (as lone trail bytes), all resulting
255
** in one invalid code point. Invalid UTF-8 sequences encoding a
256
** non-scalar code point (UTF-16 surrogates U+D800 to U+DFFF) are allowed.
257
**
258
** ANSI escape sequences of the form "\033[...X" are interpreted as a
259
** zero-width character.
260
*/
261
void char_info_utf8(
262
const char *z, /* The character to be analyzed */
263
int *pCchUTF8, /* OUT: The number of bytes used by this character */
264
int *pUtf32 /* OUT: The UTF8 code point (used to determine width) */
265
){
266
int i = 0; /* Counted bytes. */
267
int cchUTF8 = 1; /* Code units consumed. */
268
int maxUTF8 = 1; /* Expected sequence length. */
269
char c = z[i++];
270
if( c==0x1b && z[i]=='[' ){
271
i++;
272
while( z[i]>=0x30 && z[i]<=0x3f ){ i++; }
273
while( z[i]>=0x20 && z[i]<=0x2f ){ i++; }
274
if( z[i]>=0x40 && z[i]<=0x7e ){
275
*pCchUTF8 = i+1;
276
*pUtf32 = 0x301; /* A zero-width character */
277
return;
278
}
279
}
280
if( (c&0x80)==0x00 ){ /* 7-bit ASCII character. */
281
*pCchUTF8 = 1;
282
*pUtf32 = (int)z[0];
283
return;
284
}
285
else if( (c&0xe0)==0xc0 ) maxUTF8 = 2; /* UTF-8 lead byte 110vvvvv */
286
else if( (c&0xf0)==0xe0 ) maxUTF8 = 3; /* UTF-8 lead byte 1110vvvv */
287
else if( (c&0xf8)==0xf0 ) maxUTF8 = 4; /* UTF-8 lead byte 11110vvv */
288
while( cchUTF8<maxUTF8 &&
289
(z[i]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */
290
cchUTF8++;
291
i++;
292
}
293
*pCchUTF8 = cchUTF8;
294
if( cchUTF8!=maxUTF8 || /* Incomplete UTF-8 sequence. */
295
( cchUTF8==1 && (c&0x80)==0x80 )){ /* Lone UTF-8 trail byte. */
296
*pUtf32 = 0xfffd; /* U+FFFD Replacement Character */
297
#ifdef FOSSIL_DEBUG
298
assert( *pUtf32!=0xfffd ); /* Invalid UTF-8 sequence. */
299
#endif
300
return;
301
}
302
switch( cchUTF8 ){
303
case 4:
304
*pUtf32 =
305
( (z[0] & 0x0f)<<18 ) |
306
( (z[1] & 0x3f)<<12 ) |
307
( (z[2] & 0x3f)<< 6 ) |
308
( (z[3] & 0x3f)<< 0 ) ;
309
break;
310
case 3:
311
*pUtf32 =
312
( (z[0] & 0x0f)<<12 ) |
313
( (z[1] & 0x3f)<< 6 ) |
314
( (z[2] & 0x3f)<< 0 ) ;
315
break;
316
case 2:
317
*pUtf32 =
318
( (z[0] & 0x1f)<< 6 ) |
319
( (z[1] & 0x3f)<< 0 ) ;
320
break;
321
default:
322
*pUtf32 = 0xfffd; /* U+FFFD Replacement Character */
323
break;
324
}
325
#ifdef FOSSIL_DEBUG
326
assert(
327
*pUtf32>=0 && *pUtf32<=0x10ffff && /* Valid range U+0000 to U+10FFFF. */
328
*pUtf32<0xd800 && *pUtf32>0xdfff /* Non-scalar (UTF-16 surrogates). */
329
);
330
#endif
331
}
332
333
/*
334
** This function is called when printing a logical comment line to calculate
335
** the necessary indenting. The caller needs to emit the indenting spaces.
336
*/
337
static void comment_calc_indent(
338
const char *zLine, /* [in] The comment line being printed. */
339
int indent, /* [in] Number of spaces to indent, zero for none. */
340
int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
341
int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
342
int *piIndex /* [in/out] Pointer to first non-space character. */
343
){
344
if( zLine && piIndex ){
345
int index = *piIndex;
346
if( trimCrLf ){
347
while( zLine[index]=='\r' || zLine[index]=='\n' ){ index++; }
348
}
349
if( trimSpace ){
350
while( fossil_isspace(zLine[index]) ){ index++; }
351
}
352
*piIndex = index;
353
}
354
}
355
356
/*
357
** This function prints one logical line of a comment, stopping when it hits
358
** a new line -OR- runs out of space on the logical line.
359
*/
360
static void comment_print_line(
361
const char *zOrigText, /* [in] Original comment text ONLY, may be NULL. */
362
const char *zLine, /* [in] The comment line to print. */
363
int origIndent, /* [in] Number of spaces to indent before the original
364
** comment. */
365
int indent, /* [in] Number of spaces to indent, before the line
366
** to print. */
367
int lineChars, /* [in] Maximum number of characters to print. */
368
int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */
369
int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */
370
int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */
371
int origBreak, /* [in] Non-zero to break before original comment. */
372
int *pLineCnt, /* [in/out] Pointer to the total line count. */
373
const char **pzLine /* [out] Pointer to the end of the logical line. */
374
){
375
int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;
376
char zBuf[400]; int iBuf=0; /* Output buffer and counter. */
377
if( !zLine ) return;
378
if( lineChars<=0 ) return;
379
#if 0
380
assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */
381
assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */
382
#endif
383
if( indent>(int)sizeof(zBuf)-6 ){
384
/* Limit initial indent to fit output buffer. */
385
indent = sizeof(zBuf)-6;
386
}
387
comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);
388
if( indent>0 ){
389
for(i=0; i<indent; i++){
390
zBuf[iBuf++] = ' ';
391
}
392
}
393
if( origIndent>(int)sizeof(zBuf)-6 ){
394
/* Limit line indent to fit output buffer. */
395
origIndent = sizeof(zBuf)-6;
396
}
397
maxChars = lineChars;
398
for(;;){
399
int cchUTF8, utf32;
400
int useChars = 1;
401
char c = zLine[index];
402
/* Flush the output buffer if there's no space left for at least one more
403
** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,
404
** a new line, and a terminating NULL. */
405
if( iBuf>(int)sizeof(zBuf)-origIndent-6 ){
406
zBuf[iBuf]=0;
407
iBuf=0;
408
fossil_print("%s", zBuf);
409
}
410
if( c==0 ){
411
break;
412
}else{
413
if( origBreak && index>0 ){
414
const char *zCurrent = &zLine[index];
415
if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){
416
zBuf[iBuf++] = '\n';
417
comment_calc_indent(zLine, origIndent, trimCrLf, trimSpace, &index);
418
for( i=0; i<origIndent; i++ ){
419
zBuf[iBuf++] = ' ';
420
}
421
maxChars = lineChars;
422
}
423
}
424
index++;
425
}
426
if( c=='\n' ){
427
lineCnt++;
428
charCnt = 0;
429
useChars = 0;
430
}else if( c=='\t' ){
431
int sumWidth;
432
int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth);
433
if( nextIndex<=0 || sumWidth>maxChars ){
434
break;
435
}
436
charCnt++;
437
useChars = COMMENT_TAB_WIDTH;
438
if( maxChars<useChars ){
439
zBuf[iBuf++] = ' ';
440
break;
441
}
442
}else if( wordBreak && fossil_isspace(c) ){
443
int sumWidth;
444
int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth);
445
if( nextIndex<=0 || sumWidth>=maxChars ){
446
break;
447
}
448
charCnt++;
449
}else{
450
charCnt++;
451
}
452
assert( c!='\n' || charCnt==0 );
453
zBuf[iBuf++] = c;
454
char_info_utf8(&zLine[index-1],&cchUTF8,&utf32);
455
if( cchUTF8>1 ){
456
int wcwidth;
457
wcwidth = cli_wcwidth(utf32);
458
if( wcwidth>maxChars && lineChars>=wcwidth ){ /* rollback */
459
index--;
460
iBuf--;
461
zBuf[iBuf] = 0;
462
break;
463
}
464
for( ; cchUTF8>1; cchUTF8-- ){
465
zBuf[iBuf++] = zLine[index++];
466
}
467
useChars += wcwidth - 1;
468
}
469
maxChars -= useChars;
470
if( maxChars<=0 ) break;
471
if( c=='\n' ) break;
472
}
473
if( charCnt>0 ){
474
zBuf[iBuf++] = '\n';
475
lineCnt++;
476
}
477
/* Flush the remaining output buffer. */
478
if( iBuf>0 ){
479
zBuf[iBuf]=0;
480
iBuf=0;
481
fossil_print("%s", zBuf);
482
}
483
if( pLineCnt ){
484
*pLineCnt += lineCnt;
485
}
486
if( pzLine ){
487
*pzLine = zLine + index;
488
}
489
}
490
491
/*
492
** This is the canonical comment printing algorithm. This is the algorithm
493
** that is recommended and that is used unless the administrator has made
494
** special arrangements to use a customized algorithm.
495
**
496
** Given a comment string, format that string for printing on a TTY.
497
** Assume that the output cursor is indent spaces from the left margin
498
** and that a single line can contain no more than 'width' characters.
499
** Indent all subsequent lines by 'indent'.
500
**
501
** Formatting features:
502
**
503
** * Leading whitespace is removed.
504
** * Internal whitespace sequences are changed into a single space (0x20)
505
** character.
506
** * Lines are broken at a space, or at a hyphen ("-") whenever possible.
507
**
508
** Returns the number of new lines emitted.
509
*/
510
static int comment_print_canonical(
511
const char *zText, /* The comment text to be printed. */
512
int indent, /* Number of spaces to indent each non-initial line. */
513
int width /* Maximum number of characters per line. */
514
){
515
int maxChars = width - indent;
516
int si, sk, i, k, kc;
517
int doIndent = 0;
518
char *zBuf;
519
char zBuffer[400];
520
int lineCnt = 0;
521
522
if( width<0 ){
523
comment_set_maxchars(indent, &maxChars);
524
}
525
if( zText==0 ) zText = "(NULL)";
526
if( maxChars<=0 ){
527
maxChars = strlen(zText);
528
}
529
/* Ensure the buffer can hold the longest-possible UTF-8 sequences. */
530
if( maxChars >= ((int)sizeof(zBuffer)/4-1) ){
531
zBuf = fossil_malloc(maxChars*4+1);
532
}else{
533
zBuf = zBuffer;
534
}
535
for(;;){
536
while( fossil_isspace(zText[0]) ){ zText++; }
537
if( zText[0]==0 ){
538
if( doIndent==0 ){
539
fossil_print("\n");
540
lineCnt = 1;
541
}
542
if( zBuf!=zBuffer) fossil_free(zBuf);
543
return lineCnt;
544
}
545
for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){
546
int cchUTF8, utf32;
547
char c = zText[i];
548
kc++; /* Count complete UTF-8 sequences. */
549
char_info_utf8(&zText[i],&cchUTF8,&utf32);
550
if( cchUTF8>1 ){
551
int wcwidth;
552
wcwidth = cli_wcwidth(utf32);
553
if( kc+wcwidth-1>maxChars && maxChars>=wcwidth ){ /* rollback */
554
kc--;
555
break;
556
}
557
for( i--; cchUTF8>0; cchUTF8-- ){
558
zBuf[k++] = zText[++i];
559
}
560
kc += wcwidth - 1;
561
}
562
else if( fossil_isspace(c) ){
563
si = i;
564
sk = k;
565
if( k==0 || zBuf[k-1]!=' ' ){
566
zBuf[k++] = ' ';
567
}
568
}else{
569
zBuf[k] = c;
570
if( c=='-' && k>0 && fossil_isalpha(zBuf[k-1]) ){
571
si = i+1;
572
sk = k+1;
573
}
574
k++;
575
}
576
}
577
if( doIndent ){
578
fossil_print("%*s", indent, "");
579
}
580
doIndent = 1;
581
if( sk>0 && zText[i] ){
582
zText += si;
583
zBuf[sk] = 0;
584
}else{
585
zText += i;
586
zBuf[k] = 0;
587
}
588
fossil_print("%s\n", zBuf);
589
lineCnt++;
590
}
591
}
592
593
/*
594
** This is the comment printing function. The comment printing algorithm
595
** contained within it attempts to preserve the formatting present within
596
** the comment string itself while honoring line width limitations. There
597
** are several flags that modify the default behavior of this function:
598
**
599
** COMMENT_PRINT_CANONICAL: Use the canonical printing algorithm:
600
** * Omit leading and trailing whitespace
601
** * Collapse internal whitespace into a
602
** single space (0x20) character.
603
** * Attempt to break lines at whitespace
604
** or hyphens.
605
** This is the recommended algorithm and is
606
** used in most cases.
607
**
608
** COMMENT_PRINT_TRIM_CRLF: Trims leading and trailing carriage-returns
609
** and line-feeds where they do not materially
610
** impact pre-existing formatting (i.e. at the
611
** start of the comment string -AND- right
612
** before line indentation). This flag does
613
** not apply to the legacy comment printing
614
** algorithm. This flag may be combined with
615
** COMMENT_PRINT_TRIM_SPACE.
616
**
617
** COMMENT_PRINT_TRIM_SPACE: Trims leading and trailing spaces where they
618
** do not materially impact the pre-existing
619
** formatting (i.e. at the start of the comment
620
** string -AND- right before line indentation).
621
** This flag does not apply to the legacy
622
** comment printing algorithm. This flag may
623
** be combined with COMMENT_PRINT_TRIM_CRLF.
624
**
625
** COMMENT_PRINT_WORD_BREAK: Attempts to break lines on word boundaries
626
** while honoring the logical line length.
627
** If this flag is not specified, honoring the
628
** logical line length may result in breaking
629
** lines in the middle of words. This flag
630
** does not apply to the legacy comment
631
** printing algorithm.
632
**
633
** COMMENT_PRINT_ORIG_BREAK: Looks for the original comment text within
634
** the text being printed. Upon matching, a
635
** new line will be emitted, thus preserving
636
** more of the pre-existing formatting.
637
**
638
** Given a comment string, format that string for printing on a TTY.
639
** Assume that the output cursors is indent spaces from the left margin
640
** and that a single line can contain no more than 'width' characters.
641
** Indent all subsequent lines by 'indent'.
642
**
643
** Returns the number of new lines emitted.
644
*/
645
int comment_print(
646
const char *zText, /* The comment text to be printed. */
647
const char *zOrigText, /* Original comment text ONLY, may be NULL. */
648
int indent, /* Spaces to indent each non-initial line. */
649
int width, /* Maximum number of characters per line. */
650
int flags /* Zero or more "COMMENT_PRINT_*" flags. */
651
){
652
int maxChars = width - indent;
653
654
if( flags & COMMENT_PRINT_CANONICAL ){
655
/* Use the canonical algorithm. This is what happens in almost
656
** all cases. */
657
return comment_print_canonical(zText, indent, width);
658
}else{
659
/* The remaining is a more complex formatting algorithm that is very
660
** seldom used and is considered deprecated.
661
*/
662
int trimCrLf = flags & COMMENT_PRINT_TRIM_CRLF;
663
int trimSpace = flags & COMMENT_PRINT_TRIM_SPACE;
664
int wordBreak = flags & COMMENT_PRINT_WORD_BREAK;
665
int origBreak = flags & COMMENT_PRINT_ORIG_BREAK;
666
int lineCnt = 0;
667
const char *zLine;
668
669
if( width<0 ){
670
comment_set_maxchars(indent, &maxChars);
671
}
672
if( zText==0 ) zText = "(NULL)";
673
if( maxChars<=0 ){
674
maxChars = strlen(zText);
675
}
676
if( trimSpace ){
677
while( fossil_isspace(zText[0]) ){ zText++; }
678
}
679
if( zText[0]==0 ){
680
fossil_print("\n");
681
lineCnt++;
682
return lineCnt;
683
}
684
zLine = zText;
685
for(;;){
686
comment_print_line(zOrigText, zLine, indent, zLine>zText ? indent : 0,
687
maxChars, trimCrLf, trimSpace, wordBreak, origBreak,
688
&lineCnt, &zLine);
689
if( zLine==0 ) break;
690
while( fossil_isspace(zLine[0]) ) zLine++;
691
if( zLine[0]==0 ) break;
692
}
693
return lineCnt;
694
}
695
}
696
697
/*
698
** Return the "COMMENT_PRINT_*" flags specified by the following sources,
699
** evaluated in the following cascading order:
700
**
701
** 1. The local (per-repository) "comment-format" setting.
702
** 2. The global (all-repositories) "comment-format" setting.
703
** 3. The default value COMMENT_PRINT_DEFAULT.
704
*/
705
int get_comment_format(){
706
int comFmtFlags;
707
708
/* We must cache this result, else running the timeline can end up
709
** querying the comment-format setting from the global db once per
710
** timeline entry, which brings it to a crawl if that db is
711
** network-mounted. Discussed in:
712
** https://fossil-scm.org/forum/forumpost/9aaefe4e536e01bf */
713
714
/* The global command-line option is present, or the value has been cached. */
715
if( g.comFmtFlags!=COMMENT_PRINT_UNSET ){
716
return g.comFmtFlags;
717
}
718
/* Load the local (per-repository) or global (all-repositories) value, and use
719
** g.comFmtFlags as a cache. */
720
comFmtFlags = db_get_int("comment-format", COMMENT_PRINT_UNSET);
721
if( comFmtFlags!=COMMENT_PRINT_UNSET ){
722
g.comFmtFlags = comFmtFlags;
723
return comFmtFlags;
724
}
725
/* Fallback to the default value. */
726
g.comFmtFlags = COMMENT_PRINT_DEFAULT;
727
return g.comFmtFlags;
728
}
729
730
/*
731
**
732
** COMMAND: test-comment-format
733
**
734
** Usage: %fossil test-comment-format [OPTIONS] TEXT [PREFIX] [ORIGTEXT]
735
**
736
** Test comment formatting and printing. Use for testing only.
737
**
738
** The default (canonical) formatting algorithm is:
739
**
740
** * Omit leading/trailing whitespace
741
** * Collapse internal whitespace into a single space character.
742
** * Attempt to break lines at whitespace or at a hyphen.
743
**
744
** Use --whitespace, --origbreak, --trimcrlf, --trimspace,
745
** and/or --wordbreak to disable the canonical processing and do
746
** the special processing specified by those other options.
747
**
748
** Options:
749
** --decode Decode the text using the same method used when
750
** handling the value of a C-card from a manifest.
751
** --file FILE Omit the TEXT argument and read the comment text
752
** from FILE.
753
** --indent Number of spaces to indent (default (-1) is to
754
** auto-detect). Zero means no indent.
755
** --orig FILE Take the value for the ORIGTEXT argument from FILE.
756
** --origbreak Attempt to break when the original comment text
757
** is detected.
758
** --trimcrlf Enable trimming of leading/trailing CR/LF.
759
** --trimspace Enable trimming of leading/trailing spaces.
760
** --whitespace Keep all internal whitespace.
761
** --wordbreak Attempt to break lines on word boundaries.
762
** -W|--width NUM Width of lines (default (-1) is to auto-detect).
763
** Zero means no limit.
764
*/
765
void test_comment_format(void){
766
const char *zWidth;
767
const char *zIndent;
768
const char *zPrefix = 0;
769
char *zText = 0;
770
char *zOrigText = 0;
771
int indent, width;
772
int i;
773
const char *fromFile = find_option("file", 0, 1);
774
int decode = find_option("decode", 0, 0)!=0;
775
int flags = COMMENT_PRINT_CANONICAL;
776
const char *fromOrig = find_option("orig", 0, 1);
777
if( find_option("whitespace",0,0) ){
778
flags = 0;
779
}
780
if( find_option("trimcrlf", 0, 0) ){
781
flags = COMMENT_PRINT_TRIM_CRLF;
782
}
783
if( find_option("trimspace", 0, 0) ){
784
flags |= COMMENT_PRINT_TRIM_SPACE;
785
flags &= COMMENT_PRINT_CANONICAL;
786
}
787
if( find_option("wordbreak", 0, 0) ){
788
flags |= COMMENT_PRINT_WORD_BREAK;
789
flags &= COMMENT_PRINT_CANONICAL;
790
}
791
if( find_option("origbreak", 0, 0) ){
792
flags |= COMMENT_PRINT_ORIG_BREAK;
793
flags &= COMMENT_PRINT_CANONICAL;
794
}
795
zWidth = find_option("width","W",1);
796
if( zWidth ){
797
width = atoi(zWidth);
798
}else{
799
width = -1; /* automatic */
800
}
801
zIndent = find_option("indent",0,1);
802
if( zIndent ){
803
indent = atoi(zIndent);
804
}else{
805
indent = -1; /* automatic */
806
}
807
verify_all_options();
808
zPrefix = zText = zOrigText = 0;
809
if( fromFile ){
810
Blob fileData;
811
blob_read_from_file(&fileData, fromFile, ExtFILE);
812
zText = fossil_strdup(blob_str(&fileData));
813
blob_reset(&fileData);
814
}
815
if( fromOrig ){
816
Blob fileData;
817
blob_read_from_file(&fileData, fromOrig, ExtFILE);
818
zOrigText = fossil_strdup(blob_str(&fileData));
819
blob_reset(&fileData);
820
}
821
for(i=2; i<g.argc; i++){
822
if( zText==0 ){
823
zText = g.argv[i];
824
continue;
825
}
826
if( zPrefix==0 ){
827
zPrefix = g.argv[i];
828
continue;
829
}
830
if( zOrigText==0 ){
831
zOrigText = g.argv[i];
832
continue;
833
}
834
usage("[OPTIONS] TEXT [PREFIX] [ORIGTEXT]");
835
}
836
if( decode ){
837
zText = mprintf(fromFile?"%z":"%s" /*works-like:"%s"*/, zText);
838
defossilize(zText);
839
if( zOrigText ){
840
zOrigText = mprintf(fromFile?"%z":"%s" /*works-like:"%s"*/, zOrigText);
841
defossilize(zOrigText);
842
}
843
}
844
if( zPrefix==0 ) zPrefix = "00:00:00 ";
845
if( indent<0 ){
846
indent = strlen(zPrefix);
847
}
848
if( zPrefix && *zPrefix ){
849
fossil_print("%s", zPrefix);
850
}
851
fossil_print("(%d lines output)\n",
852
comment_print(zText, zOrigText, indent, width, flags));
853
}
854

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button