Fossil SCM

Show formatting rules according to the current repository settings on the checkin comment prompt. Improvements to the html_to_plaintext() routine for improved handling of whitespace.

drh 2025-03-22 13:08 trunk
Commit d62ca2b85f1eb7d98f58860736dfe52a2992f00828524bb91b850a7071f68e8c
+40 -6
--- src/checkin.c
+++ src/checkin.c
@@ -1467,10 +1467,11 @@
14671467
CheckinInfo *p,
14681468
int parent_rid,
14691469
int dryRunFlag
14701470
){
14711471
Blob prompt;
1472
+ int wikiFlags;
14721473
#if defined(_WIN32) || defined(__CYGWIN__)
14731474
int bomSize;
14741475
const unsigned char *bom = get_utf8_bom(&bomSize);
14751476
blob_init(&prompt, (const char *) bom, bomSize);
14761477
if( zInit && zInit[0]){
@@ -1479,14 +1480,37 @@
14791480
#else
14801481
blob_init(&prompt, zInit, -1);
14811482
#endif
14821483
blob_append(&prompt,
14831484
"\n"
1484
- "# Enter a commit message for this check-in."
1485
- " Lines beginning with # are ignored.\n"
1486
- "#\n", -1
1485
+ "# Enter the commit message. Formatting rules:\n"
1486
+ "# * Lines beginning with # are ignored.\n",
1487
+ -1
14871488
);
1489
+ wikiFlags = wiki_convert_flags(1);
1490
+ if( wikiFlags & WIKI_LINKSONLY ){
1491
+ blob_append(&prompt,"# * Hyperlinks inside of [...]\n", -1);
1492
+ if( wikiFlags & WIKI_NEWLINE ){
1493
+ blob_append(&prompt,
1494
+ "# * Newlines are significant and are displayed as written\n", -1);
1495
+ }else{
1496
+ blob_append(&prompt,
1497
+ "# * Newlines are interpreted as ordinary spaces\n",
1498
+ -1
1499
+ );
1500
+ }
1501
+ blob_append(&prompt,
1502
+ "# * All other text will be displayed as written\n", -1);
1503
+ }else{
1504
+ blob_append(&prompt,
1505
+ "# * Hyperlinks: [target] or [target|display-text]\n"
1506
+ "# * Blank lines cause a paragraph break\n"
1507
+ "# * Other text rendered as if it where HTML\n", -1
1508
+ );
1509
+ }
1510
+ blob_append(&prompt, "#\n", 2);
1511
+
14881512
if( dryRunFlag ){
14891513
blob_appendf(&prompt, "# DRY-RUN: This is a test commit. No changes "
14901514
"will be made to the repository\n#\n");
14911515
}
14921516
blob_appendf(&prompt, "# user: %s\n",
@@ -2327,26 +2351,36 @@
23272351
23282352
if( mFlags==0 ) return 0;
23292353
blob_init(&in, blob_str(pComment), -1);
23302354
blob_init(&html, 0, 0);
23312355
wFlags = wiki_convert_flags(0);
2332
- wFlags &= WIKI_NOBADLINKS;
2356
+ wFlags &= ~WIKI_NOBADLINKS;
23332357
wFlags |= WIKI_MARK;
23342358
mResult = wiki_convert(&in, &html, wFlags);
23352359
if( mResult & RENDER_ANYERROR ) rc |= COMCK_MARKUP;
23362360
if( rc ){
2337
- int htot = HTOT_NO_WS;
2361
+ int htot = ((wFlags & WIKI_NEWLINE)!=0 ? 0 : HTOT_FLOW)|HTOT_TRIM;
23382362
Blob txt;
23392363
if( terminal_is_vt100() ) htot |= HTOT_VT100;
23402364
blob_init(&txt, 0, 0);
23412365
html_to_plaintext(blob_str(&html), &txt, htot);
23422366
if( rc & COMCK_MARKUP ){
23432367
fossil_print("Possible format errors in the check-in comment:\n\n ");
23442368
}else{
23452369
fossil_print("Preview of the check-in comment:\n\n ");
23462370
}
2347
- comment_print(blob_str(&txt), 0, 3, -1, get_comment_format());
2371
+ if( wFlags & WIKI_NEWLINE ){
2372
+ Blob line;
2373
+ char *zIndent = "";
2374
+ while( blob_line(&txt, &line) ){
2375
+ fossil_print("%s%b", zIndent, &line);
2376
+ zIndent = " ";
2377
+ }
2378
+ fossil_print("\n");
2379
+ }else{
2380
+ comment_print(blob_str(&txt), 0, 3, -1, get_comment_format());
2381
+ }
23482382
fossil_print("\n");
23492383
fflush(stdout);
23502384
blob_reset(&txt);
23512385
}
23522386
blob_reset(&html);
23532387
--- src/checkin.c
+++ src/checkin.c
@@ -1467,10 +1467,11 @@
1467 CheckinInfo *p,
1468 int parent_rid,
1469 int dryRunFlag
1470 ){
1471 Blob prompt;
 
1472 #if defined(_WIN32) || defined(__CYGWIN__)
1473 int bomSize;
1474 const unsigned char *bom = get_utf8_bom(&bomSize);
1475 blob_init(&prompt, (const char *) bom, bomSize);
1476 if( zInit && zInit[0]){
@@ -1479,14 +1480,37 @@
1479 #else
1480 blob_init(&prompt, zInit, -1);
1481 #endif
1482 blob_append(&prompt,
1483 "\n"
1484 "# Enter a commit message for this check-in."
1485 " Lines beginning with # are ignored.\n"
1486 "#\n", -1
1487 );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1488 if( dryRunFlag ){
1489 blob_appendf(&prompt, "# DRY-RUN: This is a test commit. No changes "
1490 "will be made to the repository\n#\n");
1491 }
1492 blob_appendf(&prompt, "# user: %s\n",
@@ -2327,26 +2351,36 @@
2327
2328 if( mFlags==0 ) return 0;
2329 blob_init(&in, blob_str(pComment), -1);
2330 blob_init(&html, 0, 0);
2331 wFlags = wiki_convert_flags(0);
2332 wFlags &= WIKI_NOBADLINKS;
2333 wFlags |= WIKI_MARK;
2334 mResult = wiki_convert(&in, &html, wFlags);
2335 if( mResult & RENDER_ANYERROR ) rc |= COMCK_MARKUP;
2336 if( rc ){
2337 int htot = HTOT_NO_WS;
2338 Blob txt;
2339 if( terminal_is_vt100() ) htot |= HTOT_VT100;
2340 blob_init(&txt, 0, 0);
2341 html_to_plaintext(blob_str(&html), &txt, htot);
2342 if( rc & COMCK_MARKUP ){
2343 fossil_print("Possible format errors in the check-in comment:\n\n ");
2344 }else{
2345 fossil_print("Preview of the check-in comment:\n\n ");
2346 }
2347 comment_print(blob_str(&txt), 0, 3, -1, get_comment_format());
 
 
 
 
 
 
 
 
 
 
2348 fossil_print("\n");
2349 fflush(stdout);
2350 blob_reset(&txt);
2351 }
2352 blob_reset(&html);
2353
--- src/checkin.c
+++ src/checkin.c
@@ -1467,10 +1467,11 @@
1467 CheckinInfo *p,
1468 int parent_rid,
1469 int dryRunFlag
1470 ){
1471 Blob prompt;
1472 int wikiFlags;
1473 #if defined(_WIN32) || defined(__CYGWIN__)
1474 int bomSize;
1475 const unsigned char *bom = get_utf8_bom(&bomSize);
1476 blob_init(&prompt, (const char *) bom, bomSize);
1477 if( zInit && zInit[0]){
@@ -1479,14 +1480,37 @@
1480 #else
1481 blob_init(&prompt, zInit, -1);
1482 #endif
1483 blob_append(&prompt,
1484 "\n"
1485 "# Enter the commit message. Formatting rules:\n"
1486 "# * Lines beginning with # are ignored.\n",
1487 -1
1488 );
1489 wikiFlags = wiki_convert_flags(1);
1490 if( wikiFlags & WIKI_LINKSONLY ){
1491 blob_append(&prompt,"# * Hyperlinks inside of [...]\n", -1);
1492 if( wikiFlags & WIKI_NEWLINE ){
1493 blob_append(&prompt,
1494 "# * Newlines are significant and are displayed as written\n", -1);
1495 }else{
1496 blob_append(&prompt,
1497 "# * Newlines are interpreted as ordinary spaces\n",
1498 -1
1499 );
1500 }
1501 blob_append(&prompt,
1502 "# * All other text will be displayed as written\n", -1);
1503 }else{
1504 blob_append(&prompt,
1505 "# * Hyperlinks: [target] or [target|display-text]\n"
1506 "# * Blank lines cause a paragraph break\n"
1507 "# * Other text rendered as if it where HTML\n", -1
1508 );
1509 }
1510 blob_append(&prompt, "#\n", 2);
1511
1512 if( dryRunFlag ){
1513 blob_appendf(&prompt, "# DRY-RUN: This is a test commit. No changes "
1514 "will be made to the repository\n#\n");
1515 }
1516 blob_appendf(&prompt, "# user: %s\n",
@@ -2327,26 +2351,36 @@
2351
2352 if( mFlags==0 ) return 0;
2353 blob_init(&in, blob_str(pComment), -1);
2354 blob_init(&html, 0, 0);
2355 wFlags = wiki_convert_flags(0);
2356 wFlags &= ~WIKI_NOBADLINKS;
2357 wFlags |= WIKI_MARK;
2358 mResult = wiki_convert(&in, &html, wFlags);
2359 if( mResult & RENDER_ANYERROR ) rc |= COMCK_MARKUP;
2360 if( rc ){
2361 int htot = ((wFlags & WIKI_NEWLINE)!=0 ? 0 : HTOT_FLOW)|HTOT_TRIM;
2362 Blob txt;
2363 if( terminal_is_vt100() ) htot |= HTOT_VT100;
2364 blob_init(&txt, 0, 0);
2365 html_to_plaintext(blob_str(&html), &txt, htot);
2366 if( rc & COMCK_MARKUP ){
2367 fossil_print("Possible format errors in the check-in comment:\n\n ");
2368 }else{
2369 fossil_print("Preview of the check-in comment:\n\n ");
2370 }
2371 if( wFlags & WIKI_NEWLINE ){
2372 Blob line;
2373 char *zIndent = "";
2374 while( blob_line(&txt, &line) ){
2375 fossil_print("%s%b", zIndent, &line);
2376 zIndent = " ";
2377 }
2378 fossil_print("\n");
2379 }else{
2380 comment_print(blob_str(&txt), 0, 3, -1, get_comment_format());
2381 }
2382 fossil_print("\n");
2383 fflush(stdout);
2384 blob_reset(&txt);
2385 }
2386 blob_reset(&html);
2387
+1 -1
--- src/search.c
+++ src/search.c
@@ -726,11 +726,11 @@
726726
const char *zId = db_column_text(&q, 3);
727727
char *zOrig;
728728
blob_appendf(&snip, "%s", zSnippet);
729729
zOrig = blob_materialize(&snip);
730730
blob_init(&snip, 0, 0);
731
- html_to_plaintext(zOrig, &snip, (nTty ? HTOT_VT100 : 0)|HTOT_NO_WS);
731
+ html_to_plaintext(zOrig, &snip, (nTty?HTOT_VT100:0)|HTOT_FLOW|HTOT_TRIM);
732732
fossil_free(zOrig);
733733
blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate);
734734
if( bDebug ){
735735
blob_appendf(&com," score: %s id: %s", zScore, zId);
736736
}
737737
--- src/search.c
+++ src/search.c
@@ -726,11 +726,11 @@
726 const char *zId = db_column_text(&q, 3);
727 char *zOrig;
728 blob_appendf(&snip, "%s", zSnippet);
729 zOrig = blob_materialize(&snip);
730 blob_init(&snip, 0, 0);
731 html_to_plaintext(zOrig, &snip, (nTty ? HTOT_VT100 : 0)|HTOT_NO_WS);
732 fossil_free(zOrig);
733 blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate);
734 if( bDebug ){
735 blob_appendf(&com," score: %s id: %s", zScore, zId);
736 }
737
--- src/search.c
+++ src/search.c
@@ -726,11 +726,11 @@
726 const char *zId = db_column_text(&q, 3);
727 char *zOrig;
728 blob_appendf(&snip, "%s", zSnippet);
729 zOrig = blob_materialize(&snip);
730 blob_init(&snip, 0, 0);
731 html_to_plaintext(zOrig, &snip, (nTty?HTOT_VT100:0)|HTOT_FLOW|HTOT_TRIM);
732 fossil_free(zOrig);
733 blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate);
734 if( bDebug ){
735 blob_appendf(&com," score: %s id: %s", zScore, zId);
736 }
737
+55 -65
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1998,13 +1998,13 @@
19981998
blob_read_from_file(&in, g.argv[2], ExtFILE);
19991999
}
20002000
mType = wiki_convert(&in, &out, flags);
20012001
if( bText ){
20022002
Blob txt;
2003
- int htot = 0;
2003
+ int htot = HTOT_TRIM;
20042004
if( terminal_is_vt100() ) htot |= HTOT_VT100;
2005
- if( bFlow ) htot |= HTOT_NO_WS;
2005
+ if( bFlow ) htot |= HTOT_FLOW;
20062006
blob_init(&txt, 0, 0);
20072007
html_to_plaintext(blob_str(&out),&txt, htot);
20082008
blob_reset(&out);
20092009
out = txt;
20102010
}
@@ -2532,12 +2532,13 @@
25322532
25332533
#if INTERFACE
25342534
/*
25352535
** Allowed flag options for html_to_plaintext().
25362536
*/
2537
-#define HTOT_VT100 0x0001 /* <mark> becomes ^[[91m */
2538
-#define HTOT_NO_WS 0x0002 /* Collapse whitespace to a single space */
2537
+#define HTOT_VT100 0x01 /* <mark> becomes ^[[91m */
2538
+#define HTOT_FLOW 0x02 /* Collapse internal whitespace to a single space */
2539
+#define HTOT_TRIM 0x04 /* Trim off leading and trailing whitespace */
25392540
25402541
#endif /* INTERFACE */
25412542
25422543
/*
25432544
** Add <MARK> or </MARK> to the output, or similar VT-100 escape
@@ -2559,25 +2560,28 @@
25592560
** If there is no <title>, then create a blank first line.
25602561
*/
25612562
void html_to_plaintext(const char *zIn, Blob *pOut, int mFlags){
25622563
int n;
25632564
int i, j;
2564
- int bNoWS = 0; /* Transform WS into a single space */
2565
- int seenText = 0; /* True after first non-whitespace seen */
2566
- int nNL = 0; /* Number of \n characters at the end of pOut */
2567
- int nWS = 0; /* True if pOut ends with whitespace */
2568
- int nMark = 0; /* True if inside of <mark>..</mark> */
2569
-
2570
- while( fossil_isspace(zIn[0]) ) zIn++; /* Skip leading whitespace */
2571
- if( mFlags & HTOT_NO_WS ) bNoWS = 1;
2565
+ int bFlow = 0; /* Transform internal WS into a single space */
2566
+ int prevWS = 1; /* Previous output was whitespace or start of msg */
2567
+ int nMark = 0; /* True if inside of <mark>..</mark> */
2568
+
2569
+ for(i=0; fossil_isspace(zIn[i]); i++){}
2570
+ if( i>0 && (mFlags & HTOT_TRIM)==0 ){
2571
+ blob_append(pOut, zIn, i);
2572
+ }
2573
+ zIn += i;
2574
+ if( mFlags & HTOT_FLOW ) bFlow = 1;
25722575
while( zIn[0] ){
25732576
n = html_token_length(zIn);
25742577
if( zIn[0]=='<' && n>1 ){
25752578
int isCloseTag;
25762579
int eTag;
25772580
int eType;
25782581
char zTag[32];
2582
+ prevWS = 0;
25792583
isCloseTag = zIn[1]=='/';
25802584
for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
25812585
zTag[i] = fossil_tolower(zIn[j]);
25822586
}
25832587
zTag[i] = 0;
@@ -2603,36 +2607,32 @@
26032607
}
26042608
zIn += n;
26052609
continue;
26062610
}
26072611
if( eTag==MARKUP_TITLE ){
2608
- if( isCloseTag && (mFlags & HTOT_NO_WS)==0 ){
2609
- bNoWS = 0;
2610
- }else{
2611
- bNoWS = 1;
2612
- }
2613
- }
2614
- if( !isCloseTag && seenText && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){
2615
- if( nNL==0 ){
2616
- blob_append_char(pOut, '\n');
2617
- nNL++;
2618
- }
2619
- nWS = 1;
2620
- }
2621
- }else if( fossil_isspace(zIn[0]) ){
2622
- if( seenText ){
2623
- nNL = 0;
2624
- if( !bNoWS ){ /* '\n' -> ' ' within <title> */
2625
- for(i=0; i<n; i++) if( zIn[i]=='\n' ) nNL++;
2626
- }
2627
- if( !nWS ){
2628
- blob_append_char(pOut, nNL ? '\n' : ' ');
2629
- nWS = 1;
2630
- }
2612
+ if( isCloseTag && (mFlags & HTOT_FLOW)==0 ){
2613
+ bFlow = 0;
2614
+ }else{
2615
+ bFlow = 1;
2616
+ }
2617
+ }
2618
+ if( !isCloseTag && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){
2619
+ blob_append_char(pOut, '\n');
2620
+ }
2621
+ }else if( fossil_isspace(zIn[0]) ){
2622
+ if( bFlow==0 ){
2623
+ if( zIn[n]==0 && (mFlags & HTOT_TRIM) ) break;
2624
+ blob_append(pOut, zIn, n);
2625
+ }else if( !prevWS ){
2626
+ prevWS = 1;
2627
+ blob_append_char(pOut, ' ');
2628
+ zIn += n;
2629
+ n = 0;
26312630
}
26322631
}else if( zIn[0]=='&' ){
26332632
u32 c = '?';
2633
+ prevWS = 0;
26342634
if( zIn[1]=='#' ){
26352635
c = atoi(&zIn[2]);
26362636
if( c==0 ) c = '?';
26372637
}else{
26382638
static const struct { int n; u32 c; char *z; } aEntity[] = {
@@ -2648,44 +2648,34 @@
26482648
c = aEntity[jj].c;
26492649
break;
26502650
}
26512651
}
26522652
}
2653
- if( fossil_isspace(c) ){
2654
- if( nWS==0 && seenText ) blob_append_char(pOut, c);
2655
- nWS = 1;
2656
- nNL = c=='\n';
2657
- }else{
2658
- if( !seenText && !bNoWS ) blob_append_char(pOut, '\n');
2659
- seenText = 1;
2660
- nNL = nWS = 0;
2661
- if( c<0x00080 ){
2662
- blob_append_char(pOut, c & 0xff);
2663
- }else if( c<0x00800 ){
2664
- blob_append_char(pOut, 0xc0 + (u8)((c>>6)&0x1f));
2665
- blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2666
- }else if( c<0x10000 ){
2667
- blob_append_char(pOut, 0xe0 + (u8)((c>>12)&0x0f));
2668
- blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2669
- blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2670
- }else{
2671
- blob_append_char(pOut, 0xf0 + (u8)((c>>18)&0x07));
2672
- blob_append_char(pOut, 0x80 + (u8)((c>>12)&0x3f));
2673
- blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2674
- blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2675
- }
2676
- }
2677
- }else{
2678
- if( !seenText && !bNoWS ) blob_append_char(pOut, '\n');
2679
- seenText = 1;
2680
- nNL = nWS = 0;
2653
+ if( c<0x00080 ){
2654
+ blob_append_char(pOut, c & 0xff);
2655
+ }else if( c<0x00800 ){
2656
+ blob_append_char(pOut, 0xc0 + (u8)((c>>6)&0x1f));
2657
+ blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2658
+ }else if( c<0x10000 ){
2659
+ blob_append_char(pOut, 0xe0 + (u8)((c>>12)&0x0f));
2660
+ blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2661
+ blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2662
+ }else{
2663
+ blob_append_char(pOut, 0xf0 + (u8)((c>>18)&0x07));
2664
+ blob_append_char(pOut, 0x80 + (u8)((c>>12)&0x3f));
2665
+ blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2666
+ blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2667
+ }
2668
+ }else{
2669
+ prevWS = 0;
26812670
blob_append(pOut, zIn, n);
26822671
}
26832672
zIn += n;
26842673
}
2685
- if( nMark ) addMark(pOut, mFlags, 1);
2686
- if( nNL==0 ) blob_append_char(pOut, '\n');
2674
+ if( nMark ){
2675
+ addMark(pOut, mFlags, 1);
2676
+ }
26872677
}
26882678
26892679
/*
26902680
** COMMAND: test-html-to-text
26912681
**
26922682
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1998,13 +1998,13 @@
1998 blob_read_from_file(&in, g.argv[2], ExtFILE);
1999 }
2000 mType = wiki_convert(&in, &out, flags);
2001 if( bText ){
2002 Blob txt;
2003 int htot = 0;
2004 if( terminal_is_vt100() ) htot |= HTOT_VT100;
2005 if( bFlow ) htot |= HTOT_NO_WS;
2006 blob_init(&txt, 0, 0);
2007 html_to_plaintext(blob_str(&out),&txt, htot);
2008 blob_reset(&out);
2009 out = txt;
2010 }
@@ -2532,12 +2532,13 @@
2532
2533 #if INTERFACE
2534 /*
2535 ** Allowed flag options for html_to_plaintext().
2536 */
2537 #define HTOT_VT100 0x0001 /* <mark> becomes ^[[91m */
2538 #define HTOT_NO_WS 0x0002 /* Collapse whitespace to a single space */
 
2539
2540 #endif /* INTERFACE */
2541
2542 /*
2543 ** Add <MARK> or </MARK> to the output, or similar VT-100 escape
@@ -2559,25 +2560,28 @@
2559 ** If there is no <title>, then create a blank first line.
2560 */
2561 void html_to_plaintext(const char *zIn, Blob *pOut, int mFlags){
2562 int n;
2563 int i, j;
2564 int bNoWS = 0; /* Transform WS into a single space */
2565 int seenText = 0; /* True after first non-whitespace seen */
2566 int nNL = 0; /* Number of \n characters at the end of pOut */
2567 int nWS = 0; /* True if pOut ends with whitespace */
2568 int nMark = 0; /* True if inside of <mark>..</mark> */
2569
2570 while( fossil_isspace(zIn[0]) ) zIn++; /* Skip leading whitespace */
2571 if( mFlags & HTOT_NO_WS ) bNoWS = 1;
 
 
2572 while( zIn[0] ){
2573 n = html_token_length(zIn);
2574 if( zIn[0]=='<' && n>1 ){
2575 int isCloseTag;
2576 int eTag;
2577 int eType;
2578 char zTag[32];
 
2579 isCloseTag = zIn[1]=='/';
2580 for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2581 zTag[i] = fossil_tolower(zIn[j]);
2582 }
2583 zTag[i] = 0;
@@ -2603,36 +2607,32 @@
2603 }
2604 zIn += n;
2605 continue;
2606 }
2607 if( eTag==MARKUP_TITLE ){
2608 if( isCloseTag && (mFlags & HTOT_NO_WS)==0 ){
2609 bNoWS = 0;
2610 }else{
2611 bNoWS = 1;
2612 }
2613 }
2614 if( !isCloseTag && seenText && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){
2615 if( nNL==0 ){
2616 blob_append_char(pOut, '\n');
2617 nNL++;
2618 }
2619 nWS = 1;
2620 }
2621 }else if( fossil_isspace(zIn[0]) ){
2622 if( seenText ){
2623 nNL = 0;
2624 if( !bNoWS ){ /* '\n' -> ' ' within <title> */
2625 for(i=0; i<n; i++) if( zIn[i]=='\n' ) nNL++;
2626 }
2627 if( !nWS ){
2628 blob_append_char(pOut, nNL ? '\n' : ' ');
2629 nWS = 1;
2630 }
2631 }
2632 }else if( zIn[0]=='&' ){
2633 u32 c = '?';
 
2634 if( zIn[1]=='#' ){
2635 c = atoi(&zIn[2]);
2636 if( c==0 ) c = '?';
2637 }else{
2638 static const struct { int n; u32 c; char *z; } aEntity[] = {
@@ -2648,44 +2648,34 @@
2648 c = aEntity[jj].c;
2649 break;
2650 }
2651 }
2652 }
2653 if( fossil_isspace(c) ){
2654 if( nWS==0 && seenText ) blob_append_char(pOut, c);
2655 nWS = 1;
2656 nNL = c=='\n';
2657 }else{
2658 if( !seenText && !bNoWS ) blob_append_char(pOut, '\n');
2659 seenText = 1;
2660 nNL = nWS = 0;
2661 if( c<0x00080 ){
2662 blob_append_char(pOut, c & 0xff);
2663 }else if( c<0x00800 ){
2664 blob_append_char(pOut, 0xc0 + (u8)((c>>6)&0x1f));
2665 blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2666 }else if( c<0x10000 ){
2667 blob_append_char(pOut, 0xe0 + (u8)((c>>12)&0x0f));
2668 blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2669 blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2670 }else{
2671 blob_append_char(pOut, 0xf0 + (u8)((c>>18)&0x07));
2672 blob_append_char(pOut, 0x80 + (u8)((c>>12)&0x3f));
2673 blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2674 blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2675 }
2676 }
2677 }else{
2678 if( !seenText && !bNoWS ) blob_append_char(pOut, '\n');
2679 seenText = 1;
2680 nNL = nWS = 0;
2681 blob_append(pOut, zIn, n);
2682 }
2683 zIn += n;
2684 }
2685 if( nMark ) addMark(pOut, mFlags, 1);
2686 if( nNL==0 ) blob_append_char(pOut, '\n');
 
2687 }
2688
2689 /*
2690 ** COMMAND: test-html-to-text
2691 **
2692
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1998,13 +1998,13 @@
1998 blob_read_from_file(&in, g.argv[2], ExtFILE);
1999 }
2000 mType = wiki_convert(&in, &out, flags);
2001 if( bText ){
2002 Blob txt;
2003 int htot = HTOT_TRIM;
2004 if( terminal_is_vt100() ) htot |= HTOT_VT100;
2005 if( bFlow ) htot |= HTOT_FLOW;
2006 blob_init(&txt, 0, 0);
2007 html_to_plaintext(blob_str(&out),&txt, htot);
2008 blob_reset(&out);
2009 out = txt;
2010 }
@@ -2532,12 +2532,13 @@
2532
2533 #if INTERFACE
2534 /*
2535 ** Allowed flag options for html_to_plaintext().
2536 */
2537 #define HTOT_VT100 0x01 /* <mark> becomes ^[[91m */
2538 #define HTOT_FLOW 0x02 /* Collapse internal whitespace to a single space */
2539 #define HTOT_TRIM 0x04 /* Trim off leading and trailing whitespace */
2540
2541 #endif /* INTERFACE */
2542
2543 /*
2544 ** Add <MARK> or </MARK> to the output, or similar VT-100 escape
@@ -2559,25 +2560,28 @@
2560 ** If there is no <title>, then create a blank first line.
2561 */
2562 void html_to_plaintext(const char *zIn, Blob *pOut, int mFlags){
2563 int n;
2564 int i, j;
2565 int bFlow = 0; /* Transform internal WS into a single space */
2566 int prevWS = 1; /* Previous output was whitespace or start of msg */
2567 int nMark = 0; /* True if inside of <mark>..</mark> */
2568
2569 for(i=0; fossil_isspace(zIn[i]); i++){}
2570 if( i>0 && (mFlags & HTOT_TRIM)==0 ){
2571 blob_append(pOut, zIn, i);
2572 }
2573 zIn += i;
2574 if( mFlags & HTOT_FLOW ) bFlow = 1;
2575 while( zIn[0] ){
2576 n = html_token_length(zIn);
2577 if( zIn[0]=='<' && n>1 ){
2578 int isCloseTag;
2579 int eTag;
2580 int eType;
2581 char zTag[32];
2582 prevWS = 0;
2583 isCloseTag = zIn[1]=='/';
2584 for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2585 zTag[i] = fossil_tolower(zIn[j]);
2586 }
2587 zTag[i] = 0;
@@ -2603,36 +2607,32 @@
2607 }
2608 zIn += n;
2609 continue;
2610 }
2611 if( eTag==MARKUP_TITLE ){
2612 if( isCloseTag && (mFlags & HTOT_FLOW)==0 ){
2613 bFlow = 0;
2614 }else{
2615 bFlow = 1;
2616 }
2617 }
2618 if( !isCloseTag && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){
2619 blob_append_char(pOut, '\n');
2620 }
2621 }else if( fossil_isspace(zIn[0]) ){
2622 if( bFlow==0 ){
2623 if( zIn[n]==0 && (mFlags & HTOT_TRIM) ) break;
2624 blob_append(pOut, zIn, n);
2625 }else if( !prevWS ){
2626 prevWS = 1;
2627 blob_append_char(pOut, ' ');
2628 zIn += n;
2629 n = 0;
 
 
 
 
 
2630 }
2631 }else if( zIn[0]=='&' ){
2632 u32 c = '?';
2633 prevWS = 0;
2634 if( zIn[1]=='#' ){
2635 c = atoi(&zIn[2]);
2636 if( c==0 ) c = '?';
2637 }else{
2638 static const struct { int n; u32 c; char *z; } aEntity[] = {
@@ -2648,44 +2648,34 @@
2648 c = aEntity[jj].c;
2649 break;
2650 }
2651 }
2652 }
2653 if( c<0x00080 ){
2654 blob_append_char(pOut, c & 0xff);
2655 }else if( c<0x00800 ){
2656 blob_append_char(pOut, 0xc0 + (u8)((c>>6)&0x1f));
2657 blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2658 }else if( c<0x10000 ){
2659 blob_append_char(pOut, 0xe0 + (u8)((c>>12)&0x0f));
2660 blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2661 blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2662 }else{
2663 blob_append_char(pOut, 0xf0 + (u8)((c>>18)&0x07));
2664 blob_append_char(pOut, 0x80 + (u8)((c>>12)&0x3f));
2665 blob_append_char(pOut, 0x80 + (u8)((c>>6)&0x3f));
2666 blob_append_char(pOut, 0x80 + (u8)(c&0x3f));
2667 }
2668 }else{
2669 prevWS = 0;
 
 
 
 
 
 
 
 
 
 
 
2670 blob_append(pOut, zIn, n);
2671 }
2672 zIn += n;
2673 }
2674 if( nMark ){
2675 addMark(pOut, mFlags, 1);
2676 }
2677 }
2678
2679 /*
2680 ** COMMAND: test-html-to-text
2681 **
2682

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button