Fossil SCM

fossil-scm / src / comformat.c

Blame History Raw 854 lines

1	`/*`
2	`** Copyright (c) 2007 D. Richard Hipp`
3	`**`
4	`** This program is free software; you can redistribute it and/or`
5	`** modify it under the terms of the Simplified BSD License (also`
6	`** known as the "2-Clause License" or "FreeBSD License".)`
7	`**`
8	`** This program is distributed in the hope that it will be useful,`
9	`** but without any warranty; without even the implied warranty of`
10	`** merchantability or fitness for a particular purpose.`
11	`**`
12	`** Author contact information:`
13	`** [email protected]`
14	`** http://www.hwaci.com/drh/`
15	`**`
16	`*******************************************************************************`
17	`**`
18	`** This file contains code used to format and print comments or other`
19	`** text on a TTY.`
20	`*/`
21	`#include "config.h"`
22	`#include "comformat.h"`
23	`#include <assert.h>`
24
25	`#if INTERFACE`
26	`#define COMMENT_PRINT_NONE ((u32)0x00000000) /* No flags */`
27	`#define COMMENT_PRINT_CANONICAL ((u32)0x00000001) /* Use canonical algorithm */`
28	`#define COMMENT_PRINT_DEFAULT COMMENT_PRINT_CANONICAL /* Default */`
29	`#define COMMENT_PRINT_UNSET (-1) /* Not initialized */`
30
31	`/* The canonical comment printing algorithm is recommended. We make`
32	`** no promise of on-going support for any of the following flags:`
33	`*/`
34	`#define COMMENT_PRINT_TRIM_CRLF ((u32)0x00000002) /* Trim leading CR/LF. */`
35	`#define COMMENT_PRINT_TRIM_SPACE ((u32)0x00000004) /* Trim leading/trailing. */`
36	`#define COMMENT_PRINT_WORD_BREAK ((u32)0x00000008) /* Break lines on words. */`
37	`#define COMMENT_PRINT_ORIG_BREAK ((u32)0x00000010) /* Break before original. */`
38	`#endif`
39
40	`/******* Code copied from SQLite src/shell.c.in on 2024-09-30 ********/`
41	`/* Lookup table to estimate the number of columns consumed by a Unicode`
42	`** character.`
43	`*/`
44	`static const struct {`
45	`unsigned char w; /* Width of the character in columns */`
46	`int iFirst; /* First character in a span having this width */`
47	`} aUWidth[] = {`
48	`/* {1, 0x00000}, */`
49	`{0, 0x00300}, {1, 0x00370}, {0, 0x00483}, {1, 0x00487}, {0, 0x00488},`
50	`{1, 0x0048a}, {0, 0x00591}, {1, 0x005be}, {0, 0x005bf}, {1, 0x005c0},`
51	`{0, 0x005c1}, {1, 0x005c3}, {0, 0x005c4}, {1, 0x005c6}, {0, 0x005c7},`
52	`{1, 0x005c8}, {0, 0x00600}, {1, 0x00604}, {0, 0x00610}, {1, 0x00616},`
53	`{0, 0x0064b}, {1, 0x0065f}, {0, 0x00670}, {1, 0x00671}, {0, 0x006d6},`
54	`{1, 0x006e5}, {0, 0x006e7}, {1, 0x006e9}, {0, 0x006ea}, {1, 0x006ee},`
55	`{0, 0x0070f}, {1, 0x00710}, {0, 0x00711}, {1, 0x00712}, {0, 0x00730},`
56	`{1, 0x0074b}, {0, 0x007a6}, {1, 0x007b1}, {0, 0x007eb}, {1, 0x007f4},`
57	`{0, 0x00901}, {1, 0x00903}, {0, 0x0093c}, {1, 0x0093d}, {0, 0x00941},`
58	`{1, 0x00949}, {0, 0x0094d}, {1, 0x0094e}, {0, 0x00951}, {1, 0x00955},`
59	`{0, 0x00962}, {1, 0x00964}, {0, 0x00981}, {1, 0x00982}, {0, 0x009bc},`
60	`{1, 0x009bd}, {0, 0x009c1}, {1, 0x009c5}, {0, 0x009cd}, {1, 0x009ce},`
61	`{0, 0x009e2}, {1, 0x009e4}, {0, 0x00a01}, {1, 0x00a03}, {0, 0x00a3c},`
62	`{1, 0x00a3d}, {0, 0x00a41}, {1, 0x00a43}, {0, 0x00a47}, {1, 0x00a49},`
63	`{0, 0x00a4b}, {1, 0x00a4e}, {0, 0x00a70}, {1, 0x00a72}, {0, 0x00a81},`
64	`{1, 0x00a83}, {0, 0x00abc}, {1, 0x00abd}, {0, 0x00ac1}, {1, 0x00ac6},`
65	`{0, 0x00ac7}, {1, 0x00ac9}, {0, 0x00acd}, {1, 0x00ace}, {0, 0x00ae2},`
66	`{1, 0x00ae4}, {0, 0x00b01}, {1, 0x00b02}, {0, 0x00b3c}, {1, 0x00b3d},`
67	`{0, 0x00b3f}, {1, 0x00b40}, {0, 0x00b41}, {1, 0x00b44}, {0, 0x00b4d},`
68	`{1, 0x00b4e}, {0, 0x00b56}, {1, 0x00b57}, {0, 0x00b82}, {1, 0x00b83},`
69	`{0, 0x00bc0}, {1, 0x00bc1}, {0, 0x00bcd}, {1, 0x00bce}, {0, 0x00c3e},`
70	`{1, 0x00c41}, {0, 0x00c46}, {1, 0x00c49}, {0, 0x00c4a}, {1, 0x00c4e},`
71	`{0, 0x00c55}, {1, 0x00c57}, {0, 0x00cbc}, {1, 0x00cbd}, {0, 0x00cbf},`
72	`{1, 0x00cc0}, {0, 0x00cc6}, {1, 0x00cc7}, {0, 0x00ccc}, {1, 0x00cce},`
73	`{0, 0x00ce2}, {1, 0x00ce4}, {0, 0x00d41}, {1, 0x00d44}, {0, 0x00d4d},`
74	`{1, 0x00d4e}, {0, 0x00dca}, {1, 0x00dcb}, {0, 0x00dd2}, {1, 0x00dd5},`
75	`{0, 0x00dd6}, {1, 0x00dd7}, {0, 0x00e31}, {1, 0x00e32}, {0, 0x00e34},`
76	`{1, 0x00e3b}, {0, 0x00e47}, {1, 0x00e4f}, {0, 0x00eb1}, {1, 0x00eb2},`
77	`{0, 0x00eb4}, {1, 0x00eba}, {0, 0x00ebb}, {1, 0x00ebd}, {0, 0x00ec8},`
78	`{1, 0x00ece}, {0, 0x00f18}, {1, 0x00f1a}, {0, 0x00f35}, {1, 0x00f36},`
79	`{0, 0x00f37}, {1, 0x00f38}, {0, 0x00f39}, {1, 0x00f3a}, {0, 0x00f71},`
80	`{1, 0x00f7f}, {0, 0x00f80}, {1, 0x00f85}, {0, 0x00f86}, {1, 0x00f88},`
81	`{0, 0x00f90}, {1, 0x00f98}, {0, 0x00f99}, {1, 0x00fbd}, {0, 0x00fc6},`
82	`{1, 0x00fc7}, {0, 0x0102d}, {1, 0x01031}, {0, 0x01032}, {1, 0x01033},`
83	`{0, 0x01036}, {1, 0x0103b}, {0, 0x01058},`
84	`{1, 0x0105a}, {2, 0x01100}, {0, 0x01160}, {1, 0x01200}, {0, 0x0135f},`
85	`{1, 0x01360}, {0, 0x01712}, {1, 0x01715}, {0, 0x01732}, {1, 0x01735},`
86	`{0, 0x01752}, {1, 0x01754}, {0, 0x01772}, {1, 0x01774}, {0, 0x017b4},`
87	`{1, 0x017b6}, {0, 0x017b7}, {1, 0x017be}, {0, 0x017c6}, {1, 0x017c7},`
88	`{0, 0x017c9}, {1, 0x017d4}, {0, 0x017dd}, {1, 0x017de}, {0, 0x0180b},`
89	`{1, 0x0180e}, {0, 0x018a9}, {1, 0x018aa}, {0, 0x01920}, {1, 0x01923},`
90	`{0, 0x01927}, {1, 0x01929}, {0, 0x01932}, {1, 0x01933}, {0, 0x01939},`
91	`{1, 0x0193c}, {0, 0x01a17}, {1, 0x01a19}, {0, 0x01b00}, {1, 0x01b04},`
92	`{0, 0x01b34}, {1, 0x01b35}, {0, 0x01b36}, {1, 0x01b3b}, {0, 0x01b3c},`
93	`{1, 0x01b3d}, {0, 0x01b42}, {1, 0x01b43}, {0, 0x01b6b}, {1, 0x01b74},`
94	`{0, 0x01dc0}, {1, 0x01dcb}, {0, 0x01dfe}, {1, 0x01e00}, {0, 0x0200b},`
95	`{1, 0x02010}, {0, 0x0202a}, {1, 0x0202f}, {0, 0x02060}, {1, 0x02064},`
96	`{0, 0x0206a}, {1, 0x02070}, {0, 0x020d0}, {1, 0x020f0}, {2, 0x02329},`
97	`{1, 0x0232b}, {2, 0x02e80}, {0, 0x0302a}, {2, 0x03030}, {1, 0x0303f},`
98	`{2, 0x03040}, {0, 0x03099}, {2, 0x0309b}, {1, 0x0a4d0}, {0, 0x0a806},`
99	`{1, 0x0a807}, {0, 0x0a80b}, {1, 0x0a80c}, {0, 0x0a825}, {1, 0x0a827},`
100	`{2, 0x0ac00}, {1, 0x0d7a4}, {2, 0x0f900}, {1, 0x0fb00}, {0, 0x0fb1e},`
101	`{1, 0x0fb1f}, {0, 0x0fe00}, {2, 0x0fe10}, {1, 0x0fe1a}, {0, 0x0fe20},`
102	`{1, 0x0fe24}, {2, 0x0fe30}, {1, 0x0fe70}, {0, 0x0feff}, {2, 0x0ff00},`
103	`{1, 0x0ff61}, {2, 0x0ffe0}, {1, 0x0ffe7}, {0, 0x0fff9}, {1, 0x0fffc},`
104	`{0, 0x10a01}, {1, 0x10a04}, {0, 0x10a05}, {1, 0x10a07}, {0, 0x10a0c},`
105	`{1, 0x10a10}, {0, 0x10a38}, {1, 0x10a3b}, {0, 0x10a3f}, {1, 0x10a40},`
106	`{0, 0x1d167}, {1, 0x1d16a}, {0, 0x1d173}, {1, 0x1d183}, {0, 0x1d185},`
107	`{1, 0x1d18c}, {0, 0x1d1aa}, {1, 0x1d1ae}, {0, 0x1d242}, {1, 0x1d245},`
108	`{2, 0x20000}, {1, 0x2fffe}, {2, 0x30000}, {1, 0x3fffe}, {0, 0xe0001},`
109	`{1, 0xe0002}, {0, 0xe0020}, {1, 0xe0080}, {0, 0xe0100}, {1, 0xe01f0}`
110	`};`
111
112	`/*`
113	`** Return an estimate of the width, in columns, for the single Unicode`
114	`** character c. For normal characters, the answer is always 1. But the`
115	`** estimate might be 0 or 2 for zero-width and double-width characters.`
116	`**`
117	`** Different display devices display unicode using different widths. So`
118	`** it is impossible to know that true display width with 100% accuracy.`
119	`** Inaccuracies in the width estimates might cause columns to be misaligned.`
120	`** Unfortunately, there is nothing we can do about that.`
121	`*/`
122	`static int cli_wcwidth(int c){`
123	`int iFirst, iLast;`
124
125	`/* Fast path for common characters */`
126	`if( c<0x300 ) return 1;`
127
128	`/* The general case */`
129	`iFirst = 0;`
130	`iLast = sizeof(aUWidth)/sizeof(aUWidth[0]) - 1;`
131	`while( iFirst<iLast-1 ){`
132	`int iMid = (iFirst+iLast)/2;`
133	`int cMid = aUWidth[iMid].iFirst;`
134	`if( cMid < c ){`
135	`iFirst = iMid;`
136	`}else if( cMid > c ){`
137	`iLast = iMid - 1;`
138	`}else{`
139	`return aUWidth[iMid].w;`
140	`}`
141	`}`
142	`if( aUWidth[iLast].iFirst > c ) return aUWidth[iFirst].w;`
143	`return aUWidth[iLast].w;`
144	`}`
145	`/***** End of code copied from SQLite ***********************************/`
146
147	`/*`
148	`** This is the previous value used by most external callers when they`
149	`** needed to specify a default maximum line length to be used with the`
150	`** comment_print() function.`
151	`*/`
152	`#ifndef COMMENT_LEGACY_LINE_LENGTH`
153	`# define COMMENT_LEGACY_LINE_LENGTH (78)`
154	`#endif`
155
156	`/*`
157	`** This is the number of spaces to print when a tab character is seen.`
158	`*/`
159	`#ifndef COMMENT_TAB_WIDTH`
160	`# define COMMENT_TAB_WIDTH (8)`
161	`#endif`
162
163	`/*`
164	`** This function sets the maximum number of characters to print per line`
165	`** based on the detected terminal line width, if available; otherwise, it`
166	`** uses the legacy default terminal line width minus the amount to indent.`
167	`**`
168	`** Zero is returned to indicate any failure. One is returned to indicate`
169	`** the successful detection of the terminal line width. Negative one is`
170	`** returned to indicate the terminal line width is using the hard-coded`
171	`** legacy default value.`
172	`*/`
173	`static int comment_set_maxchars(`
174	`int indent,`
175	`int *pMaxChars`
176	`){`
177	`struct TerminalSize ts;`
178	`if ( !terminal_get_size(&ts) ){`
179	`return 0;`
180	`}`
181
182	`if( ts.nColumns ){`
183	`*pMaxChars = ts.nColumns - indent;`
184	`return 1;`
185	`}else{`
186	`/*`
187	`** Fallback to using more-or-less the "legacy semantics" of hard-coding`
188	`** the maximum line length to a value reasonable for the vast majority`
189	`** of supported systems.`
190	`*/`
191	`*pMaxChars = COMMENT_LEGACY_LINE_LENGTH - indent;`
192	`return -1;`
193	`}`
194	`}`
195
196	`/*`
197	`** This function checks the current line being printed against the original`
198	`** comment text. Upon matching, it updates the provided character and line`
199	`** counts, if applicable. The caller needs to emit a new line, if desired.`
200	`*/`
201	`static int comment_check_orig(`
202	`const char zOrigText, / [in] Original comment text ONLY, may be NULL. */`
203	`const char zLine, / [in] The comment line to print. */`
204	`int pCharCnt, / [in/out] Pointer to the line character count. */`
205	`int pLineCnt / [in/out] Pointer to the total line count. */`
206	`){`
207	`if( zOrigText && fossil_strcmp(zLine, zOrigText)==0 ){`
208	`if( pCharCnt ) *pCharCnt = 0;`
209	`if( pLineCnt ) (*pLineCnt)++;`
210	`return 1;`
211	`}`
212	`return 0;`
213	`}`
214
215	`/*`
216	`** This function scans the specified comment line starting just after the`
217	`** initial index and returns the index of the next spacing character -OR-`
218	`** zero if such a character cannot be found. For the purposes of this`
219	`** algorithm, the NUL character is treated the same as a spacing character.`
220	`*/`
221	`static int comment_next_space(`
222	`const char zLine, / [in] The comment line being printed. */`
223	`int index, /* [in] The current character index being handled. */`
224	`int maxChars, /* [in] Optimization hint to abort before space found. */`
225	`int sumWidth / [out] Summated width of all characters to next space. */`
226	`){`
227	`int cchUTF8, utf32, wcwidth = 0;`
228	`int nextIndex = index;`
229	`if( zLine[index]==0 ) return index;`
230	`for(;;){`
231	`char_info_utf8(&zLine[nextIndex],&cchUTF8,&utf32);`
232	`nextIndex += cchUTF8;`
233	`wcwidth += cli_wcwidth(utf32);`
234	`if( zLine[nextIndex]==0 \|\| fossil_isspace(zLine[nextIndex]) \|\|`
235	`wcwidth>maxChars ){`
236	`*sumWidth = wcwidth;`
237	`return nextIndex;`
238	`}`
239	`}`
240	`return 0; /* NOT REACHED */`
241	`}`
242
243	`/*`
244	`** Return information about the next (single- or multi-byte) character in`
245	`** z[0]. Two values are computed:`
246	`**`
247	`** * The number of bytes needed to represent the character.`
248	`** * The UTF code point value.`
249	`**`
250	`** Incomplete, ill-formed and overlong sequences are consumed together as`
251	`** one invalid code point. The invalid lead bytes 0xC0 to 0xC1 and 0xF5 to`
252	`** 0xF7 are allowed to initiate (ill-formed) 2- and 4-byte sequences,`
253	`** respectively, the other invalid lead bytes 0xF8 to 0xFF are treated`
254	`** as invalid 1-byte sequences (as lone trail bytes), all resulting`
255	`** in one invalid code point. Invalid UTF-8 sequences encoding a`
256	`** non-scalar code point (UTF-16 surrogates U+D800 to U+DFFF) are allowed.`
257	`**`
258	`** ANSI escape sequences of the form "\033[...X" are interpreted as a`
259	`** zero-width character.`
260	`*/`
261	`void char_info_utf8(`
262	`const char z, / The character to be analyzed */`
263	`int pCchUTF8, / OUT: The number of bytes used by this character */`
264	`int pUtf32 / OUT: The UTF8 code point (used to determine width) */`
265	`){`
266	`int i = 0; /* Counted bytes. */`
267	`int cchUTF8 = 1; /* Code units consumed. */`
268	`int maxUTF8 = 1; /* Expected sequence length. */`
269	`char c = z[i++];`
270	`if( c==0x1b && z[i]=='[' ){`
271	`i++;`
272	`while( z[i]>=0x30 && z[i]<=0x3f ){ i++; }`
273	`while( z[i]>=0x20 && z[i]<=0x2f ){ i++; }`
274	`if( z[i]>=0x40 && z[i]<=0x7e ){`
275	`*pCchUTF8 = i+1;`
276	`pUtf32 = 0x301; / A zero-width character */`
277	`return;`
278	`}`
279	`}`
280	`if( (c&0x80)==0x00 ){ /* 7-bit ASCII character. */`
281	`*pCchUTF8 = 1;`
282	`*pUtf32 = (int)z[0];`
283	`return;`
284	`}`
285	`else if( (c&0xe0)==0xc0 ) maxUTF8 = 2; /* UTF-8 lead byte 110vvvvv */`
286	`else if( (c&0xf0)==0xe0 ) maxUTF8 = 3; /* UTF-8 lead byte 1110vvvv */`
287	`else if( (c&0xf8)==0xf0 ) maxUTF8 = 4; /* UTF-8 lead byte 11110vvv */`
288	`while( cchUTF8<maxUTF8 &&`
289	`(z[i]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */`
290	`cchUTF8++;`
291	`i++;`
292	`}`
293	`*pCchUTF8 = cchUTF8;`
294	`if( cchUTF8!=maxUTF8 \|\| /* Incomplete UTF-8 sequence. */`
295	`( cchUTF8==1 && (c&0x80)==0x80 )){ /* Lone UTF-8 trail byte. */`
296	`pUtf32 = 0xfffd; / U+FFFD Replacement Character */`
297	`#ifdef FOSSIL_DEBUG`
298	`assert( pUtf32!=0xfffd ); / Invalid UTF-8 sequence. */`
299	`#endif`
300	`return;`
301	`}`
302	`switch( cchUTF8 ){`
303	`case 4:`
304	`*pUtf32 =`
305	`( (z[0] & 0x0f)<<18 ) \|`
306	`( (z[1] & 0x3f)<<12 ) \|`
307	`( (z[2] & 0x3f)<< 6 ) \|`
308	`( (z[3] & 0x3f)<< 0 ) ;`
309	`break;`
310	`case 3:`
311	`*pUtf32 =`
312	`( (z[0] & 0x0f)<<12 ) \|`
313	`( (z[1] & 0x3f)<< 6 ) \|`
314	`( (z[2] & 0x3f)<< 0 ) ;`
315	`break;`
316	`case 2:`
317	`*pUtf32 =`
318	`( (z[0] & 0x1f)<< 6 ) \|`
319	`( (z[1] & 0x3f)<< 0 ) ;`
320	`break;`
321	`default:`
322	`pUtf32 = 0xfffd; / U+FFFD Replacement Character */`
323	`break;`
324	`}`
325	`#ifdef FOSSIL_DEBUG`
326	`assert(`
327	`pUtf32>=0 && pUtf32<=0x10ffff && /* Valid range U+0000 to U+10FFFF. */`
328	`pUtf32<0xd800 && pUtf32>0xdfff /* Non-scalar (UTF-16 surrogates). */`
329	`);`
330	`#endif`
331	`}`
332
333	`/*`
334	`** This function is called when printing a logical comment line to calculate`
335	`** the necessary indenting. The caller needs to emit the indenting spaces.`
336	`*/`
337	`static void comment_calc_indent(`
338	`const char zLine, / [in] The comment line being printed. */`
339	`int indent, /* [in] Number of spaces to indent, zero for none. */`
340	`int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */`
341	`int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */`
342	`int piIndex / [in/out] Pointer to first non-space character. */`
343	`){`
344	`if( zLine && piIndex ){`
345	`int index = *piIndex;`
346	`if( trimCrLf ){`
347	`while( zLine[index]=='\r' \|\| zLine[index]=='\n' ){ index++; }`
348	`}`
349	`if( trimSpace ){`
350	`while( fossil_isspace(zLine[index]) ){ index++; }`
351	`}`
352	`*piIndex = index;`
353	`}`
354	`}`
355
356	`/*`
357	`** This function prints one logical line of a comment, stopping when it hits`
358	`** a new line -OR- runs out of space on the logical line.`
359	`*/`
360	`static void comment_print_line(`
361	`const char zOrigText, / [in] Original comment text ONLY, may be NULL. */`
362	`const char zLine, / [in] The comment line to print. */`
363	`int origIndent, /* [in] Number of spaces to indent before the original`
364	`** comment. */`
365	`int indent, /* [in] Number of spaces to indent, before the line`
366	`** to print. */`
367	`int lineChars, /* [in] Maximum number of characters to print. */`
368	`int trimCrLf, /* [in] Non-zero to trim leading/trailing CR/LF. */`
369	`int trimSpace, /* [in] Non-zero to trim leading/trailing spaces. */`
370	`int wordBreak, /* [in] Non-zero to try breaking on word boundaries. */`
371	`int origBreak, /* [in] Non-zero to break before original comment. */`
372	`int pLineCnt, / [in/out] Pointer to the total line count. */`
373	`const char *pzLine / [out] Pointer to the end of the logical line. */`
374	`){`
375	`int index = 0, charCnt = 0, lineCnt = 0, maxChars, i;`
376	`char zBuf[400]; int iBuf=0; /* Output buffer and counter. */`
377	`if( !zLine ) return;`
378	`if( lineChars<=0 ) return;`
379	`#if 0`
380	`assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */`
381	`assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */`
382	`#endif`
383	`if( indent>(int)sizeof(zBuf)-6 ){`
384	`/* Limit initial indent to fit output buffer. */`
385	`indent = sizeof(zBuf)-6;`
386	`}`
387	`comment_calc_indent(zLine, indent, trimCrLf, trimSpace, &index);`
388	`if( indent>0 ){`
389	`for(i=0; i<indent; i++){`
390	`zBuf[iBuf++] = ' ';`
391	`}`
392	`}`
393	`if( origIndent>(int)sizeof(zBuf)-6 ){`
394	`/* Limit line indent to fit output buffer. */`
395	`origIndent = sizeof(zBuf)-6;`
396	`}`
397	`maxChars = lineChars;`
398	`for(;;){`
399	`int cchUTF8, utf32;`
400	`int useChars = 1;`
401	`char c = zLine[index];`
402	`/* Flush the output buffer if there's no space left for at least one more`
403	`** (potentially 4-byte) UTF-8 sequence, one level of indentation spaces,`
404	`** a new line, and a terminating NULL. */`
405	`if( iBuf>(int)sizeof(zBuf)-origIndent-6 ){`
406	`zBuf[iBuf]=0;`
407	`iBuf=0;`
408	`fossil_print("%s", zBuf);`
409	`}`
410	`if( c==0 ){`
411	`break;`
412	`}else{`
413	`if( origBreak && index>0 ){`
414	`const char *zCurrent = &zLine[index];`
415	`if( comment_check_orig(zOrigText, zCurrent, &charCnt, &lineCnt) ){`
416	`zBuf[iBuf++] = '\n';`
417	`comment_calc_indent(zLine, origIndent, trimCrLf, trimSpace, &index);`
418	`for( i=0; i<origIndent; i++ ){`
419	`zBuf[iBuf++] = ' ';`
420	`}`
421	`maxChars = lineChars;`
422	`}`
423	`}`
424	`index++;`
425	`}`
426	`if( c=='\n' ){`
427	`lineCnt++;`
428	`charCnt = 0;`
429	`useChars = 0;`
430	`}else if( c=='\t' ){`
431	`int sumWidth;`
432	`int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth);`
433	`if( nextIndex<=0 \|\| sumWidth>maxChars ){`
434	`break;`
435	`}`
436	`charCnt++;`
437	`useChars = COMMENT_TAB_WIDTH;`
438	`if( maxChars<useChars ){`
439	`zBuf[iBuf++] = ' ';`
440	`break;`
441	`}`
442	`}else if( wordBreak && fossil_isspace(c) ){`
443	`int sumWidth;`
444	`int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth);`
445	`if( nextIndex<=0 \|\| sumWidth>=maxChars ){`
446	`break;`
447	`}`
448	`charCnt++;`
449	`}else{`
450	`charCnt++;`
451	`}`
452	`assert( c!='\n' \|\| charCnt==0 );`
453	`zBuf[iBuf++] = c;`
454	`char_info_utf8(&zLine[index-1],&cchUTF8,&utf32);`
455	`if( cchUTF8>1 ){`
456	`int wcwidth;`
457	`wcwidth = cli_wcwidth(utf32);`
458	`if( wcwidth>maxChars && lineChars>=wcwidth ){ /* rollback */`
459	`index--;`
460	`iBuf--;`
461	`zBuf[iBuf] = 0;`
462	`break;`
463	`}`
464	`for( ; cchUTF8>1; cchUTF8-- ){`
465	`zBuf[iBuf++] = zLine[index++];`
466	`}`
467	`useChars += wcwidth - 1;`
468	`}`
469	`maxChars -= useChars;`
470	`if( maxChars<=0 ) break;`
471	`if( c=='\n' ) break;`
472	`}`
473	`if( charCnt>0 ){`
474	`zBuf[iBuf++] = '\n';`
475	`lineCnt++;`
476	`}`
477	`/* Flush the remaining output buffer. */`
478	`if( iBuf>0 ){`
479	`zBuf[iBuf]=0;`
480	`iBuf=0;`
481	`fossil_print("%s", zBuf);`
482	`}`
483	`if( pLineCnt ){`
484	`*pLineCnt += lineCnt;`
485	`}`
486	`if( pzLine ){`
487	`*pzLine = zLine + index;`
488	`}`
489	`}`
490
491	`/*`
492	`** This is the canonical comment printing algorithm. This is the algorithm`
493	`** that is recommended and that is used unless the administrator has made`
494	`** special arrangements to use a customized algorithm.`
495	`**`
496	`** Given a comment string, format that string for printing on a TTY.`
497	`** Assume that the output cursor is indent spaces from the left margin`
498	`** and that a single line can contain no more than 'width' characters.`
499	`** Indent all subsequent lines by 'indent'.`
500	`**`
501	`** Formatting features:`
502	`**`
503	`** * Leading whitespace is removed.`
504	`** * Internal whitespace sequences are changed into a single space (0x20)`
505	`** character.`
506	`** * Lines are broken at a space, or at a hyphen ("-") whenever possible.`
507	`**`
508	`** Returns the number of new lines emitted.`
509	`*/`
510	`static int comment_print_canonical(`
511	`const char zText, / The comment text to be printed. */`
512	`int indent, /* Number of spaces to indent each non-initial line. */`
513	`int width /* Maximum number of characters per line. */`
514	`){`
515	`int maxChars = width - indent;`
516	`int si, sk, i, k, kc;`
517	`int doIndent = 0;`
518	`char *zBuf;`
519	`char zBuffer[400];`
520	`int lineCnt = 0;`
521
522	`if( width<0 ){`
523	`comment_set_maxchars(indent, &maxChars);`
524	`}`
525	`if( zText==0 ) zText = "(NULL)";`
526	`if( maxChars<=0 ){`
527	`maxChars = strlen(zText);`
528	`}`
529	`/* Ensure the buffer can hold the longest-possible UTF-8 sequences. */`
530	`if( maxChars >= ((int)sizeof(zBuffer)/4-1) ){`
531	`zBuf = fossil_malloc(maxChars*4+1);`
532	`}else{`
533	`zBuf = zBuffer;`
534	`}`
535	`for(;;){`
536	`while( fossil_isspace(zText[0]) ){ zText++; }`
537	`if( zText[0]==0 ){`
538	`if( doIndent==0 ){`
539	`fossil_print("\n");`
540	`lineCnt = 1;`
541	`}`
542	`if( zBuf!=zBuffer) fossil_free(zBuf);`
543	`return lineCnt;`
544	`}`
545	`for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){`
546	`int cchUTF8, utf32;`
547	`char c = zText[i];`
548	`kc++; /* Count complete UTF-8 sequences. */`
549	`char_info_utf8(&zText[i],&cchUTF8,&utf32);`
550	`if( cchUTF8>1 ){`
551	`int wcwidth;`
552	`wcwidth = cli_wcwidth(utf32);`
553	`if( kc+wcwidth-1>maxChars && maxChars>=wcwidth ){ /* rollback */`
554	`kc--;`
555	`break;`
556	`}`
557	`for( i--; cchUTF8>0; cchUTF8-- ){`
558	`zBuf[k++] = zText[++i];`
559	`}`
560	`kc += wcwidth - 1;`
561	`}`
562	`else if( fossil_isspace(c) ){`
563	`si = i;`
564	`sk = k;`
565	`if( k==0 \|\| zBuf[k-1]!=' ' ){`
566	`zBuf[k++] = ' ';`
567	`}`
568	`}else{`
569	`zBuf[k] = c;`
570	`if( c=='-' && k>0 && fossil_isalpha(zBuf[k-1]) ){`
571	`si = i+1;`
572	`sk = k+1;`
573	`}`
574	`k++;`
575	`}`
576	`}`
577	`if( doIndent ){`
578	`fossil_print("%*s", indent, "");`
579	`}`
580	`doIndent = 1;`
581	`if( sk>0 && zText[i] ){`
582	`zText += si;`
583	`zBuf[sk] = 0;`
584	`}else{`
585	`zText += i;`
586	`zBuf[k] = 0;`
587	`}`
588	`fossil_print("%s\n", zBuf);`
589	`lineCnt++;`
590	`}`
591	`}`
592
593	`/*`
594	`** This is the comment printing function. The comment printing algorithm`
595	`** contained within it attempts to preserve the formatting present within`
596	`** the comment string itself while honoring line width limitations. There`
597	`** are several flags that modify the default behavior of this function:`
598	`**`
599	`** COMMENT_PRINT_CANONICAL: Use the canonical printing algorithm:`
600	`** * Omit leading and trailing whitespace`
601	`** * Collapse internal whitespace into a`
602	`** single space (0x20) character.`
603	`** * Attempt to break lines at whitespace`
604	`** or hyphens.`
605	`** This is the recommended algorithm and is`
606	`** used in most cases.`
607	`**`
608	`** COMMENT_PRINT_TRIM_CRLF: Trims leading and trailing carriage-returns`
609	`** and line-feeds where they do not materially`
610	`** impact pre-existing formatting (i.e. at the`
611	`** start of the comment string -AND- right`
612	`** before line indentation). This flag does`
613	`** not apply to the legacy comment printing`
614	`** algorithm. This flag may be combined with`
615	`** COMMENT_PRINT_TRIM_SPACE.`
616	`**`
617	`** COMMENT_PRINT_TRIM_SPACE: Trims leading and trailing spaces where they`
618	`** do not materially impact the pre-existing`
619	`** formatting (i.e. at the start of the comment`
620	`** string -AND- right before line indentation).`
621	`** This flag does not apply to the legacy`
622	`** comment printing algorithm. This flag may`
623	`** be combined with COMMENT_PRINT_TRIM_CRLF.`
624	`**`
625	`** COMMENT_PRINT_WORD_BREAK: Attempts to break lines on word boundaries`
626	`** while honoring the logical line length.`
627	`** If this flag is not specified, honoring the`
628	`** logical line length may result in breaking`
629	`** lines in the middle of words. This flag`
630	`** does not apply to the legacy comment`
631	`** printing algorithm.`
632	`**`
633	`** COMMENT_PRINT_ORIG_BREAK: Looks for the original comment text within`
634	`** the text being printed. Upon matching, a`
635	`** new line will be emitted, thus preserving`
636	`** more of the pre-existing formatting.`
637	`**`
638	`** Given a comment string, format that string for printing on a TTY.`
639	`** Assume that the output cursors is indent spaces from the left margin`
640	`** and that a single line can contain no more than 'width' characters.`
641	`** Indent all subsequent lines by 'indent'.`
642	`**`
643	`** Returns the number of new lines emitted.`
644	`*/`
645	`int comment_print(`
646	`const char zText, / The comment text to be printed. */`
647	`const char zOrigText, / Original comment text ONLY, may be NULL. */`
648	`int indent, /* Spaces to indent each non-initial line. */`
649	`int width, /* Maximum number of characters per line. */`
650	`int flags /* Zero or more "COMMENT_PRINT_" flags. /`
651	`){`
652	`int maxChars = width - indent;`
653
654	`if( flags & COMMENT_PRINT_CANONICAL ){`
655	`/* Use the canonical algorithm. This is what happens in almost`
656	`** all cases. */`
657	`return comment_print_canonical(zText, indent, width);`
658	`}else{`
659	`/* The remaining is a more complex formatting algorithm that is very`
660	`** seldom used and is considered deprecated.`
661	`*/`
662	`int trimCrLf = flags & COMMENT_PRINT_TRIM_CRLF;`
663	`int trimSpace = flags & COMMENT_PRINT_TRIM_SPACE;`
664	`int wordBreak = flags & COMMENT_PRINT_WORD_BREAK;`
665	`int origBreak = flags & COMMENT_PRINT_ORIG_BREAK;`
666	`int lineCnt = 0;`
667	`const char *zLine;`
668
669	`if( width<0 ){`
670	`comment_set_maxchars(indent, &maxChars);`
671	`}`
672	`if( zText==0 ) zText = "(NULL)";`
673	`if( maxChars<=0 ){`
674	`maxChars = strlen(zText);`
675	`}`
676	`if( trimSpace ){`
677	`while( fossil_isspace(zText[0]) ){ zText++; }`
678	`}`
679	`if( zText[0]==0 ){`
680	`fossil_print("\n");`
681	`lineCnt++;`
682	`return lineCnt;`
683	`}`
684	`zLine = zText;`
685	`for(;;){`
686	`comment_print_line(zOrigText, zLine, indent, zLine>zText ? indent : 0,`
687	`maxChars, trimCrLf, trimSpace, wordBreak, origBreak,`
688	`&lineCnt, &zLine);`
689	`if( zLine==0 ) break;`
690	`while( fossil_isspace(zLine[0]) ) zLine++;`
691	`if( zLine[0]==0 ) break;`
692	`}`
693	`return lineCnt;`
694	`}`
695	`}`
696
697	`/*`
698	`** Return the "COMMENT_PRINT_*" flags specified by the following sources,`
699	`** evaluated in the following cascading order:`
700	`**`
701	`** 1. The local (per-repository) "comment-format" setting.`
702	`** 2. The global (all-repositories) "comment-format" setting.`
703	`** 3. The default value COMMENT_PRINT_DEFAULT.`
704	`*/`
705	`int get_comment_format(){`
706	`int comFmtFlags;`
707
708	`/* We must cache this result, else running the timeline can end up`
709	`** querying the comment-format setting from the global db once per`
710	`** timeline entry, which brings it to a crawl if that db is`
711	`** network-mounted. Discussed in:`
712	`** https://fossil-scm.org/forum/forumpost/9aaefe4e536e01bf */`
713
714	`/* The global command-line option is present, or the value has been cached. */`
715	`if( g.comFmtFlags!=COMMENT_PRINT_UNSET ){`
716	`return g.comFmtFlags;`
717	`}`
718	`/* Load the local (per-repository) or global (all-repositories) value, and use`
719	`** g.comFmtFlags as a cache. */`
720	`comFmtFlags = db_get_int("comment-format", COMMENT_PRINT_UNSET);`
721	`if( comFmtFlags!=COMMENT_PRINT_UNSET ){`
722	`g.comFmtFlags = comFmtFlags;`
723	`return comFmtFlags;`
724	`}`
725	`/* Fallback to the default value. */`
726	`g.comFmtFlags = COMMENT_PRINT_DEFAULT;`
727	`return g.comFmtFlags;`
728	`}`
729
730	`/*`
731	`**`
732	`** COMMAND: test-comment-format`
733	`**`
734	`** Usage: %fossil test-comment-format [OPTIONS] TEXT [PREFIX] [ORIGTEXT]`
735	`**`
736	`** Test comment formatting and printing. Use for testing only.`
737	`**`
738	`** The default (canonical) formatting algorithm is:`
739	`**`
740	`** * Omit leading/trailing whitespace`
741	`** * Collapse internal whitespace into a single space character.`
742	`** * Attempt to break lines at whitespace or at a hyphen.`
743	`**`
744	`** Use --whitespace, --origbreak, --trimcrlf, --trimspace,`
745	`** and/or --wordbreak to disable the canonical processing and do`
746	`** the special processing specified by those other options.`
747	`**`
748	`** Options:`
749	`** --decode Decode the text using the same method used when`
750	`** handling the value of a C-card from a manifest.`
751	`** --file FILE Omit the TEXT argument and read the comment text`
752	`** from FILE.`
753	`** --indent Number of spaces to indent (default (-1) is to`
754	`** auto-detect). Zero means no indent.`
755	`** --orig FILE Take the value for the ORIGTEXT argument from FILE.`
756	`** --origbreak Attempt to break when the original comment text`
757	`** is detected.`
758	`** --trimcrlf Enable trimming of leading/trailing CR/LF.`
759	`** --trimspace Enable trimming of leading/trailing spaces.`
760	`** --whitespace Keep all internal whitespace.`
761	`** --wordbreak Attempt to break lines on word boundaries.`
762	`** -W\|--width NUM Width of lines (default (-1) is to auto-detect).`
763	`** Zero means no limit.`
764	`*/`
765	`void test_comment_format(void){`
766	`const char *zWidth;`
767	`const char *zIndent;`
768	`const char *zPrefix = 0;`
769	`char *zText = 0;`
770	`char *zOrigText = 0;`
771	`int indent, width;`
772	`int i;`
773	`const char *fromFile = find_option("file", 0, 1);`
774	`int decode = find_option("decode", 0, 0)!=0;`
775	`int flags = COMMENT_PRINT_CANONICAL;`
776	`const char *fromOrig = find_option("orig", 0, 1);`
777	`if( find_option("whitespace",0,0) ){`
778	`flags = 0;`
779	`}`
780	`if( find_option("trimcrlf", 0, 0) ){`
781	`flags = COMMENT_PRINT_TRIM_CRLF;`
782	`}`
783	`if( find_option("trimspace", 0, 0) ){`
784	`flags \|= COMMENT_PRINT_TRIM_SPACE;`
785	`flags &= COMMENT_PRINT_CANONICAL;`
786	`}`
787	`if( find_option("wordbreak", 0, 0) ){`
788	`flags \|= COMMENT_PRINT_WORD_BREAK;`
789	`flags &= COMMENT_PRINT_CANONICAL;`
790	`}`
791	`if( find_option("origbreak", 0, 0) ){`
792	`flags \|= COMMENT_PRINT_ORIG_BREAK;`
793	`flags &= COMMENT_PRINT_CANONICAL;`
794	`}`
795	`zWidth = find_option("width","W",1);`
796	`if( zWidth ){`
797	`width = atoi(zWidth);`
798	`}else{`
799	`width = -1; /* automatic */`
800	`}`
801	`zIndent = find_option("indent",0,1);`
802	`if( zIndent ){`
803	`indent = atoi(zIndent);`
804	`}else{`
805	`indent = -1; /* automatic */`
806	`}`
807	`verify_all_options();`
808	`zPrefix = zText = zOrigText = 0;`
809	`if( fromFile ){`
810	`Blob fileData;`
811	`blob_read_from_file(&fileData, fromFile, ExtFILE);`
812	`zText = fossil_strdup(blob_str(&fileData));`
813	`blob_reset(&fileData);`
814	`}`
815	`if( fromOrig ){`
816	`Blob fileData;`
817	`blob_read_from_file(&fileData, fromOrig, ExtFILE);`
818	`zOrigText = fossil_strdup(blob_str(&fileData));`
819	`blob_reset(&fileData);`
820	`}`
821	`for(i=2; i<g.argc; i++){`
822	`if( zText==0 ){`
823	`zText = g.argv[i];`
824	`continue;`
825	`}`
826	`if( zPrefix==0 ){`
827	`zPrefix = g.argv[i];`
828	`continue;`
829	`}`
830	`if( zOrigText==0 ){`
831	`zOrigText = g.argv[i];`
832	`continue;`
833	`}`
834	`usage("[OPTIONS] TEXT [PREFIX] [ORIGTEXT]");`
835	`}`
836	`if( decode ){`
837	`zText = mprintf(fromFile?"%z":"%s" /works-like:"%s"/, zText);`
838	`defossilize(zText);`
839	`if( zOrigText ){`
840	`zOrigText = mprintf(fromFile?"%z":"%s" /works-like:"%s"/, zOrigText);`
841	`defossilize(zOrigText);`
842	`}`
843	`}`
844	`if( zPrefix==0 ) zPrefix = "00:00:00 ";`
845	`if( indent<0 ){`
846	`indent = strlen(zPrefix);`
847	`}`
848	`if( zPrefix && *zPrefix ){`
849	`fossil_print("%s", zPrefix);`
850	`}`
851	`fossil_print("(%d lines output)\n",`
852	`comment_print(zText, zOrigText, indent, width, flags));`
853	`}`
854

Fossil SCM

Keyboard Shortcuts