Fossil SCM

Revamp the "fossil search" command to always use full-text search (unless the undocumented --legacy option is added). Improve the output. Lots of new options to "fossil search".

drh 2025-02-26 14:54 help-search
Commit ef5292645d55d5f55dced21952afcb093293a94c4be2d7669af8a9cf033010bf
1 file changed +172 -65
+172 -65
--- src/search.c
+++ src/search.c
@@ -558,97 +558,208 @@
558558
sqlite3_create_function(db, "body", 3, enc, 0,
559559
search_body_sqlfunc, 0, 0);
560560
sqlite3_create_function(db, "urlencode", 1, enc, 0,
561561
search_urlencode_sqlfunc, 0, 0);
562562
}
563
+
564
+/*
565
+** The pSnip input contains snippet text from a search formatted
566
+** as HTML. Attempt to make that text more readable on a TTY.
567
+**
568
+** If nTty is positive, use ANSI escape codes "\e[Nm" where N is nTty
569
+** to highly marked text.
570
+*/
571
+void search_snippet_to_plaintext(Blob *pSnip, int nTty){
572
+ char *zSnip;
573
+ unsigned int j, k;
574
+
575
+ zSnip = pSnip->aData;
576
+ for(j=k=0; j<pSnip->nUsed; j++){
577
+ char c = zSnip[j];
578
+ if( c=='<' ){
579
+ if( memcmp(&zSnip[j],"<mark>",6)==0 ){
580
+ if( nTty ){
581
+ zSnip[k++] = 0x1b;
582
+ zSnip[k++] = '[';
583
+ if( nTty>=10 ) zSnip[k++] = (nTty/10)%10 + '0';
584
+ zSnip[k++] = nTty%10 + '0';
585
+ zSnip[k++] = 'm';
586
+ }
587
+ j += 5;
588
+ }else if( memcmp(&zSnip[j],"</mark>",7)==0 ){
589
+ if( nTty ){
590
+ zSnip[k++] = 0x1b;
591
+ zSnip[k++] = '[';
592
+ zSnip[k++] = '0';
593
+ zSnip[k++] = 'm';
594
+ }
595
+ j += 6;
596
+ }else{
597
+ zSnip[k++] = zSnip[j];
598
+ }
599
+ }else if( fossil_isspace(c) ){
600
+ zSnip[k++] = ' ';
601
+ while( fossil_isspace(zSnip[j+1]) ) j++;
602
+ }else if( c=='&' ){
603
+ if( zSnip[j+1]=='#' && fossil_isdigit(zSnip[j+2]) ){
604
+ int n = 3;
605
+ int x = zSnip[j+2] - '0';
606
+ if( fossil_isdigit(zSnip[j+3]) ){
607
+ x = x*10 + zSnip[j+3] - '0';
608
+ n++;
609
+ if( fossil_isdigit(zSnip[j+4]) ){
610
+ x = x*10 + zSnip[j+4] - '0';
611
+ n++;
612
+ }
613
+ }
614
+ if( zSnip[j+n]==';' ){
615
+ zSnip[k++] = (char)x;
616
+ j += n;
617
+ }else{
618
+ zSnip[k++] = c;
619
+ }
620
+ }else if( memcmp(&zSnip[j],"&lt;",4)==0 ){
621
+ zSnip[k++] = '<';
622
+ j += 3;
623
+ }else if( memcmp(&zSnip[j],"&gt;",4)==0 ){
624
+ zSnip[k++] = '>';
625
+ j += 3;
626
+ }else if( memcmp(&zSnip[j],"&quot;",6)==0 ){
627
+ zSnip[k++] = '<';
628
+ j += 5;
629
+ }else if( memcmp(&zSnip[j],"&amp;",5)==0 ){
630
+ zSnip[k++] = '<';
631
+ j += 4;
632
+ }else{
633
+ zSnip[k++] = c;
634
+ }
635
+ }else{
636
+ zSnip[k++] = c;
637
+ }
638
+ }
639
+ zSnip[k] = 0;
640
+ pSnip->nUsed = k;
641
+}
563642
564643
/*
565644
** Testing the search function.
566645
**
567646
** COMMAND: search*
568647
**
569
-** Usage: %fossil search [-a|-all] [-n|-limit #] [-W|-width #] pattern...
570
-**
571
-** Search for timeline entries matching all words provided on the
572
-** command line. Whole-word matches scope more highly than partial
573
-** matches.
574
-**
575
-** Note: This command only searches the EVENT table. So it will only
576
-** display check-in comments or other comments that appear on an
577
-** unaugmented timeline. It does not search document text or forum
578
-** messages.
579
-**
580
-** Outputs, by default, some top-N fraction of the results. The -all
581
-** option can be used to output all matches, regardless of their search
582
-** score. The -limit option can be used to limit the number of entries
583
-** returned. The -width option can be used to set the output width used
584
-** when printing matches.
648
+** Usage: %fossil search [OPTIONS] PATTERN...
649
+**
650
+** Search the repository database for PATTERN and show matches.
651
+** The following elements of the repository can be searched:
652
+**
653
+** * check-in comments
654
+** * embedded documentation
655
+** * forum posts
656
+** * tickets
657
+** * tech notes
658
+** * wiki pages
659
+** * built-in fossil help text
660
+**
661
+** Use options (listed below) to select the scope of the search. The
662
+** default is check-in comments only.
585663
**
586664
** Options:
587
-** -a|--all Output all matches, not just best matches
588
-** --debug Show additional debug content on --fts search
589
-** --fts Use the full-text search mechanism (testing only)
665
+** -a|--all Search everything
666
+** -c|--checkins Search checkin comments
667
+** --docs Search embedded documentation
668
+** --forum Search forum posts
669
+** -h|--bi-help Search built-in help
590670
** -n|--limit N Limit output to N matches
591
-** --scope SCOPE Scope of search. Valid for --fts only. One or
592
-** more of: all, c, d, e, f, t, w. Defaults to all.
671
+** --technotes Search tech notes
672
+** --tickets Search tickets
593673
** -W|--width WIDTH Set display width to WIDTH columns, 0 for
594
-** unlimited. Defaults the terminal's width.
674
+** unlimited. Defaults to the terminal's width.
675
+** --wiki Search wiki
595676
*/
596677
void search_cmd(void){
597678
Blob pattern;
598679
int i;
599680
Blob sql = empty_blob;
600681
Stmt q;
601682
int iBest;
683
+ int srchFlags = 0;
684
+ int bFts = 1; /* Use FTS search by default now */
602685
char fAll = NULL != find_option("all", "a", 0);
603686
const char *zLimit = find_option("limit","n",1);
687
+ const char *zScope = 0;
604688
const char *zWidth = find_option("width","W",1);
605
- const char *zScope = find_option("scope",0,1);
606
- int bDebug = find_option("debug",0,0)!=0;
689
+ int bDebug = find_option("debug",0,0)!=0; /* Undocumented */
607690
int nLimit = zLimit ? atoi(zLimit) : -1000;
608691
int width;
609
- int bFts = find_option("fts",0,0)!=0;
692
+ int nTty = fossil_isatty(1) ? 91 : 0;
693
+
694
+ /* Undocumented option to change highlight color */
695
+ const char *zHighlight = find_option("highlight",0,1);
696
+ if( zHighlight ) nTty = atoi(zHighlight);
697
+
698
+ /* Undocumented option (legacy) */
699
+ zScope = find_option("scope",0,1);
700
+
701
+ if( find_option("fts",0,0)!=0 ) bFts = 1; /* Undocumented legacy */
702
+ if( find_option("legacy",0,0)!=0 ) bFts = 0; /* Undocumented */
610703
611704
if( zWidth ){
612705
width = atoi(zWidth);
613706
if( (width!=0) && (width<=20) ){
614707
fossil_fatal("-W|--width value must be >20 or 0");
615708
}
616709
}else{
617710
width = -1;
618711
}
712
+ if( zScope ){
713
+ for(i=0; zScope[i]; i++){
714
+ switch( zScope[i] ){
715
+ case 'a': srchFlags = SRCH_ALL; break;
716
+ case 'c': srchFlags |= SRCH_CKIN; break;
717
+ case 'd': srchFlags |= SRCH_DOC; break;
718
+ case 'e': srchFlags |= SRCH_TECHNOTE; break;
719
+ case 'f': srchFlags |= SRCH_FORUM; break;
720
+ case 'h': srchFlags |= SRCH_HELP; break;
721
+ case 't': srchFlags |= SRCH_TKT; break;
722
+ case 'w': srchFlags |= SRCH_WIKI; break;
723
+ }
724
+ }
725
+ bFts = 1;
726
+ }
727
+ if( find_option("all","a",0) ){ srchFlags |= SRCH_ALL; bFts = 1; }
728
+ if( find_option("bi-help","h",0) ){ srchFlags |= SRCH_HELP; bFts = 1; }
729
+ if( find_option("checkins","c",0) ){ srchFlags |= SRCH_CKIN; bFts = 1; }
730
+ if( find_option("docs",0,0) ){ srchFlags |= SRCH_DOC; bFts = 1; }
731
+ if( find_option("forum",0,0) ){ srchFlags |= SRCH_FORUM; bFts = 1; }
732
+ if( find_option("technotes",0,0) ){ srchFlags |= SRCH_TECHNOTE; bFts = 1; }
733
+ if( find_option("tickets",0,0) ){ srchFlags |= SRCH_TKT; bFts = 1; }
734
+ if( find_option("wiki",0,0) ){ srchFlags |= SRCH_WIKI; bFts = 1; }
735
+
736
+ /* If no search objects are specified, default to "check-in comments" */
737
+ if( srchFlags==0 ) srchFlags = SRCH_CKIN;
738
+
619739
620740
db_find_and_open_repository(0, 0);
741
+ verify_all_options();
621742
if( g.argc<3 ) return;
743
+ login_set_capabilities("s", 0);
744
+ if( search_restrict(srchFlags)==0 ){
745
+ fossil_print(
746
+ "Search is disabled on this repository.\n"
747
+ "Use the \"fossil fts-config\" command to enable.\n"
748
+ );
749
+ return;
750
+ }
751
+
622752
blob_init(&pattern, g.argv[2], -1);
623753
for(i=3; i<g.argc; i++){
624754
blob_appendf(&pattern, " %s", g.argv[i]);
625755
}
626756
if( bFts ){
627757
/* Search using FTS */
628758
Blob com;
629759
Blob snip;
630760
const char *zPattern = blob_str(&pattern);
631
- int srchFlags;
632
- unsigned int j;
633
- if( zScope==0 ){
634
- srchFlags = SRCH_ALL;
635
- }else{
636
- srchFlags = 0;
637
- for(i=0; zScope[i]; i++){
638
- switch( zScope[i] ){
639
- case 'a': srchFlags = SRCH_ALL; break;
640
- case 'c': srchFlags |= SRCH_CKIN; break;
641
- case 'd': srchFlags |= SRCH_DOC; break;
642
- case 'e': srchFlags |= SRCH_TECHNOTE; break;
643
- case 'f': srchFlags |= SRCH_FORUM; break;
644
- case 'h': srchFlags |= SRCH_HELP; break;
645
- case 't': srchFlags |= SRCH_TKT; break;
646
- case 'w': srchFlags |= SRCH_WIKI; break;
647
- }
648
- }
649
- }
650761
search_sql_setup(g.db);
651762
add_content_sql_commands(g.db);
652763
db_multi_exec(
653764
"CREATE TEMP TABLE x(label,url,score,id,date,snip);"
654765
);
@@ -672,16 +783,11 @@
672783
const char *zLabel = db_column_text(&q, 1);
673784
const char *zDate = db_column_text(&q, 4);
674785
const char *zScore = db_column_text(&q, 2);
675786
const char *zId = db_column_text(&q, 3);
676787
blob_appendf(&snip, "%s", zSnippet);
677
- for(j=0; j<snip.nUsed; j++){
678
- if( snip.aData[j]=='\n' ){
679
- if( j>0 && snip.aData[j-1]=='\r' ) snip.aData[j-1] = ' ';
680
- snip.aData[j] = ' ';
681
- }
682
- }
788
+ search_snippet_to_plaintext(&snip, nTty);
683789
blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate);
684790
if( bDebug ){
685791
blob_appendf(&com," score: %s id: %s", zScore, zId);
686792
}
687793
comment_print(blob_str(&com), 0, 5, width,
@@ -2146,15 +2252,15 @@
21462252
** reindex Rebuild the search index. This is a no-op if
21472253
** index search is disabled
21482254
**
21492255
** index (on|off) Turn the search index on or off
21502256
**
2151
-** enable cdtwef Enable various kinds of search. c=Check-ins,
2257
+** enable cdtwefh Enable various kinds of search. c=Check-ins,
21522258
** d=Documents, t=Tickets, w=Wiki, e=Tech Notes,
2153
-** f=Forum.
2259
+** f=Forum, h=built-in-help.
21542260
**
2155
-** disable cdtwef Disable various kinds of search
2261
+** disable cdtwefh Disable various kinds of search
21562262
**
21572263
** tokenizer VALUE Select a tokenizer for indexed search. VALUE
21582264
** may be one of (porter, on, off, trigram, unicode61),
21592265
** and "on" is equivalent to "porter". Unindexed
21602266
** search never uses tokenization or stemming.
@@ -2176,16 +2282,17 @@
21762282
static const struct {
21772283
const char *zSetting;
21782284
const char *zName;
21792285
const char *zSw;
21802286
} aSetng[] = {
2181
- { "search-ci", "check-in search:", "c" },
2182
- { "search-doc", "document search:", "d" },
2183
- { "search-tkt", "ticket search:", "t" },
2184
- { "search-wiki", "wiki search:", "w" },
2185
- { "search-technote", "tech note search:", "e" },
2186
- { "search-forum", "forum search:", "f" },
2287
+ { "search-ci", "check-in search:", "c" },
2288
+ { "search-doc", "document search:", "d" },
2289
+ { "search-tkt", "ticket search:", "t" },
2290
+ { "search-wiki", "wiki search:", "w" },
2291
+ { "search-technote", "tech note search:", "e" },
2292
+ { "search-forum", "forum search:", "f" },
2293
+ { "search-help", "built-in help search:", "h" },
21872294
};
21882295
char *zSubCmd = 0;
21892296
int i, j, n;
21902297
int iCmd = 0;
21912298
int iAction = 0;
@@ -2250,30 +2357,30 @@
22502357
search_rebuild_index();
22512358
}
22522359
22532360
/* Always show the status before ending */
22542361
for(i=0; i<count(aSetng); i++){
2255
- fossil_print("%-17s %s\n", aSetng[i].zName,
2362
+ fossil_print("%-21s %s\n", aSetng[i].zName,
22562363
db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
22572364
}
2258
- fossil_print("%-17s %s\n", "tokenizer:",
2365
+ fossil_print("%-21s %s\n", "tokenizer:",
22592366
search_tokenizer_for_string(0));
22602367
if( search_index_exists() ){
22612368
int pgsz = db_int64(0, "PRAGMA repository.page_size;");
22622369
i64 nTotal = db_int64(0, "PRAGMA repository.page_count;")*pgsz;
22632370
i64 nFts = db_int64(0, "SELECT count(*) FROM dbstat"
22642371
" WHERE schema='repository'"
22652372
" AND name LIKE 'fts%%'")*pgsz;
22662373
char zSize[50];
2267
- fossil_print("%-17s FTS%d\n", "full-text index:", search_index_type(1));
2268
- fossil_print("%-17s %d\n", "documents:",
2374
+ fossil_print("%-21s FTS%d\n", "full-text index:", search_index_type(1));
2375
+ fossil_print("%-21s %d\n", "documents:",
22692376
db_int(0, "SELECT count(*) FROM ftsdocs"));
22702377
approxSizeName(sizeof(zSize), zSize, nFts);
2271
- fossil_print("%-17s %s (%.1f%% of repository)\n", "space used",
2378
+ fossil_print("%-21s %s (%.1f%% of repository)\n", "space used",
22722379
zSize, 100.0*((double)nFts/(double)nTotal));
22732380
}else{
2274
- fossil_print("%-17s disabled\n", "full-text index:");
2381
+ fossil_print("%-21s disabled\n", "full-text index:");
22752382
}
22762383
db_end_transaction(0);
22772384
}
22782385
22792386
/*
22802387
--- src/search.c
+++ src/search.c
@@ -558,97 +558,208 @@
558 sqlite3_create_function(db, "body", 3, enc, 0,
559 search_body_sqlfunc, 0, 0);
560 sqlite3_create_function(db, "urlencode", 1, enc, 0,
561 search_urlencode_sqlfunc, 0, 0);
562 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
564 /*
565 ** Testing the search function.
566 **
567 ** COMMAND: search*
568 **
569 ** Usage: %fossil search [-a|-all] [-n|-limit #] [-W|-width #] pattern...
570 **
571 ** Search for timeline entries matching all words provided on the
572 ** command line. Whole-word matches scope more highly than partial
573 ** matches.
574 **
575 ** Note: This command only searches the EVENT table. So it will only
576 ** display check-in comments or other comments that appear on an
577 ** unaugmented timeline. It does not search document text or forum
578 ** messages.
579 **
580 ** Outputs, by default, some top-N fraction of the results. The -all
581 ** option can be used to output all matches, regardless of their search
582 ** score. The -limit option can be used to limit the number of entries
583 ** returned. The -width option can be used to set the output width used
584 ** when printing matches.
585 **
586 ** Options:
587 ** -a|--all Output all matches, not just best matches
588 ** --debug Show additional debug content on --fts search
589 ** --fts Use the full-text search mechanism (testing only)
 
 
590 ** -n|--limit N Limit output to N matches
591 ** --scope SCOPE Scope of search. Valid for --fts only. One or
592 ** more of: all, c, d, e, f, t, w. Defaults to all.
593 ** -W|--width WIDTH Set display width to WIDTH columns, 0 for
594 ** unlimited. Defaults the terminal's width.
 
595 */
596 void search_cmd(void){
597 Blob pattern;
598 int i;
599 Blob sql = empty_blob;
600 Stmt q;
601 int iBest;
 
 
602 char fAll = NULL != find_option("all", "a", 0);
603 const char *zLimit = find_option("limit","n",1);
 
604 const char *zWidth = find_option("width","W",1);
605 const char *zScope = find_option("scope",0,1);
606 int bDebug = find_option("debug",0,0)!=0;
607 int nLimit = zLimit ? atoi(zLimit) : -1000;
608 int width;
609 int bFts = find_option("fts",0,0)!=0;
 
 
 
 
 
 
 
 
 
 
610
611 if( zWidth ){
612 width = atoi(zWidth);
613 if( (width!=0) && (width<=20) ){
614 fossil_fatal("-W|--width value must be >20 or 0");
615 }
616 }else{
617 width = -1;
618 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
620 db_find_and_open_repository(0, 0);
 
621 if( g.argc<3 ) return;
 
 
 
 
 
 
 
 
 
622 blob_init(&pattern, g.argv[2], -1);
623 for(i=3; i<g.argc; i++){
624 blob_appendf(&pattern, " %s", g.argv[i]);
625 }
626 if( bFts ){
627 /* Search using FTS */
628 Blob com;
629 Blob snip;
630 const char *zPattern = blob_str(&pattern);
631 int srchFlags;
632 unsigned int j;
633 if( zScope==0 ){
634 srchFlags = SRCH_ALL;
635 }else{
636 srchFlags = 0;
637 for(i=0; zScope[i]; i++){
638 switch( zScope[i] ){
639 case 'a': srchFlags = SRCH_ALL; break;
640 case 'c': srchFlags |= SRCH_CKIN; break;
641 case 'd': srchFlags |= SRCH_DOC; break;
642 case 'e': srchFlags |= SRCH_TECHNOTE; break;
643 case 'f': srchFlags |= SRCH_FORUM; break;
644 case 'h': srchFlags |= SRCH_HELP; break;
645 case 't': srchFlags |= SRCH_TKT; break;
646 case 'w': srchFlags |= SRCH_WIKI; break;
647 }
648 }
649 }
650 search_sql_setup(g.db);
651 add_content_sql_commands(g.db);
652 db_multi_exec(
653 "CREATE TEMP TABLE x(label,url,score,id,date,snip);"
654 );
@@ -672,16 +783,11 @@
672 const char *zLabel = db_column_text(&q, 1);
673 const char *zDate = db_column_text(&q, 4);
674 const char *zScore = db_column_text(&q, 2);
675 const char *zId = db_column_text(&q, 3);
676 blob_appendf(&snip, "%s", zSnippet);
677 for(j=0; j<snip.nUsed; j++){
678 if( snip.aData[j]=='\n' ){
679 if( j>0 && snip.aData[j-1]=='\r' ) snip.aData[j-1] = ' ';
680 snip.aData[j] = ' ';
681 }
682 }
683 blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate);
684 if( bDebug ){
685 blob_appendf(&com," score: %s id: %s", zScore, zId);
686 }
687 comment_print(blob_str(&com), 0, 5, width,
@@ -2146,15 +2252,15 @@
2146 ** reindex Rebuild the search index. This is a no-op if
2147 ** index search is disabled
2148 **
2149 ** index (on|off) Turn the search index on or off
2150 **
2151 ** enable cdtwef Enable various kinds of search. c=Check-ins,
2152 ** d=Documents, t=Tickets, w=Wiki, e=Tech Notes,
2153 ** f=Forum.
2154 **
2155 ** disable cdtwef Disable various kinds of search
2156 **
2157 ** tokenizer VALUE Select a tokenizer for indexed search. VALUE
2158 ** may be one of (porter, on, off, trigram, unicode61),
2159 ** and "on" is equivalent to "porter". Unindexed
2160 ** search never uses tokenization or stemming.
@@ -2176,16 +2282,17 @@
2176 static const struct {
2177 const char *zSetting;
2178 const char *zName;
2179 const char *zSw;
2180 } aSetng[] = {
2181 { "search-ci", "check-in search:", "c" },
2182 { "search-doc", "document search:", "d" },
2183 { "search-tkt", "ticket search:", "t" },
2184 { "search-wiki", "wiki search:", "w" },
2185 { "search-technote", "tech note search:", "e" },
2186 { "search-forum", "forum search:", "f" },
 
2187 };
2188 char *zSubCmd = 0;
2189 int i, j, n;
2190 int iCmd = 0;
2191 int iAction = 0;
@@ -2250,30 +2357,30 @@
2250 search_rebuild_index();
2251 }
2252
2253 /* Always show the status before ending */
2254 for(i=0; i<count(aSetng); i++){
2255 fossil_print("%-17s %s\n", aSetng[i].zName,
2256 db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
2257 }
2258 fossil_print("%-17s %s\n", "tokenizer:",
2259 search_tokenizer_for_string(0));
2260 if( search_index_exists() ){
2261 int pgsz = db_int64(0, "PRAGMA repository.page_size;");
2262 i64 nTotal = db_int64(0, "PRAGMA repository.page_count;")*pgsz;
2263 i64 nFts = db_int64(0, "SELECT count(*) FROM dbstat"
2264 " WHERE schema='repository'"
2265 " AND name LIKE 'fts%%'")*pgsz;
2266 char zSize[50];
2267 fossil_print("%-17s FTS%d\n", "full-text index:", search_index_type(1));
2268 fossil_print("%-17s %d\n", "documents:",
2269 db_int(0, "SELECT count(*) FROM ftsdocs"));
2270 approxSizeName(sizeof(zSize), zSize, nFts);
2271 fossil_print("%-17s %s (%.1f%% of repository)\n", "space used",
2272 zSize, 100.0*((double)nFts/(double)nTotal));
2273 }else{
2274 fossil_print("%-17s disabled\n", "full-text index:");
2275 }
2276 db_end_transaction(0);
2277 }
2278
2279 /*
2280
--- src/search.c
+++ src/search.c
@@ -558,97 +558,208 @@
558 sqlite3_create_function(db, "body", 3, enc, 0,
559 search_body_sqlfunc, 0, 0);
560 sqlite3_create_function(db, "urlencode", 1, enc, 0,
561 search_urlencode_sqlfunc, 0, 0);
562 }
563
564 /*
565 ** The pSnip input contains snippet text from a search formatted
566 ** as HTML. Attempt to make that text more readable on a TTY.
567 **
568 ** If nTty is positive, use ANSI escape codes "\e[Nm" where N is nTty
569 ** to highly marked text.
570 */
571 void search_snippet_to_plaintext(Blob *pSnip, int nTty){
572 char *zSnip;
573 unsigned int j, k;
574
575 zSnip = pSnip->aData;
576 for(j=k=0; j<pSnip->nUsed; j++){
577 char c = zSnip[j];
578 if( c=='<' ){
579 if( memcmp(&zSnip[j],"<mark>",6)==0 ){
580 if( nTty ){
581 zSnip[k++] = 0x1b;
582 zSnip[k++] = '[';
583 if( nTty>=10 ) zSnip[k++] = (nTty/10)%10 + '0';
584 zSnip[k++] = nTty%10 + '0';
585 zSnip[k++] = 'm';
586 }
587 j += 5;
588 }else if( memcmp(&zSnip[j],"</mark>",7)==0 ){
589 if( nTty ){
590 zSnip[k++] = 0x1b;
591 zSnip[k++] = '[';
592 zSnip[k++] = '0';
593 zSnip[k++] = 'm';
594 }
595 j += 6;
596 }else{
597 zSnip[k++] = zSnip[j];
598 }
599 }else if( fossil_isspace(c) ){
600 zSnip[k++] = ' ';
601 while( fossil_isspace(zSnip[j+1]) ) j++;
602 }else if( c=='&' ){
603 if( zSnip[j+1]=='#' && fossil_isdigit(zSnip[j+2]) ){
604 int n = 3;
605 int x = zSnip[j+2] - '0';
606 if( fossil_isdigit(zSnip[j+3]) ){
607 x = x*10 + zSnip[j+3] - '0';
608 n++;
609 if( fossil_isdigit(zSnip[j+4]) ){
610 x = x*10 + zSnip[j+4] - '0';
611 n++;
612 }
613 }
614 if( zSnip[j+n]==';' ){
615 zSnip[k++] = (char)x;
616 j += n;
617 }else{
618 zSnip[k++] = c;
619 }
620 }else if( memcmp(&zSnip[j],"&lt;",4)==0 ){
621 zSnip[k++] = '<';
622 j += 3;
623 }else if( memcmp(&zSnip[j],"&gt;",4)==0 ){
624 zSnip[k++] = '>';
625 j += 3;
626 }else if( memcmp(&zSnip[j],"&quot;",6)==0 ){
627 zSnip[k++] = '<';
628 j += 5;
629 }else if( memcmp(&zSnip[j],"&amp;",5)==0 ){
630 zSnip[k++] = '<';
631 j += 4;
632 }else{
633 zSnip[k++] = c;
634 }
635 }else{
636 zSnip[k++] = c;
637 }
638 }
639 zSnip[k] = 0;
640 pSnip->nUsed = k;
641 }
642
643 /*
644 ** Testing the search function.
645 **
646 ** COMMAND: search*
647 **
648 ** Usage: %fossil search [OPTIONS] PATTERN...
649 **
650 ** Search the repository database for PATTERN and show matches.
651 ** The following elements of the repository can be searched:
652 **
653 ** * check-in comments
654 ** * embedded documentation
655 ** * forum posts
656 ** * tickets
657 ** * tech notes
658 ** * wiki pages
659 ** * built-in fossil help text
660 **
661 ** Use options (listed below) to select the scope of the search. The
662 ** default is check-in comments only.
 
663 **
664 ** Options:
665 ** -a|--all Search everything
666 ** -c|--checkins Search checkin comments
667 ** --docs Search embedded documentation
668 ** --forum Search forum posts
669 ** -h|--bi-help Search built-in help
670 ** -n|--limit N Limit output to N matches
671 ** --technotes Search tech notes
672 ** --tickets Search tickets
673 ** -W|--width WIDTH Set display width to WIDTH columns, 0 for
674 ** unlimited. Defaults to the terminal's width.
675 ** --wiki Search wiki
676 */
677 void search_cmd(void){
678 Blob pattern;
679 int i;
680 Blob sql = empty_blob;
681 Stmt q;
682 int iBest;
683 int srchFlags = 0;
684 int bFts = 1; /* Use FTS search by default now */
685 char fAll = NULL != find_option("all", "a", 0);
686 const char *zLimit = find_option("limit","n",1);
687 const char *zScope = 0;
688 const char *zWidth = find_option("width","W",1);
689 int bDebug = find_option("debug",0,0)!=0; /* Undocumented */
 
690 int nLimit = zLimit ? atoi(zLimit) : -1000;
691 int width;
692 int nTty = fossil_isatty(1) ? 91 : 0;
693
694 /* Undocumented option to change highlight color */
695 const char *zHighlight = find_option("highlight",0,1);
696 if( zHighlight ) nTty = atoi(zHighlight);
697
698 /* Undocumented option (legacy) */
699 zScope = find_option("scope",0,1);
700
701 if( find_option("fts",0,0)!=0 ) bFts = 1; /* Undocumented legacy */
702 if( find_option("legacy",0,0)!=0 ) bFts = 0; /* Undocumented */
703
704 if( zWidth ){
705 width = atoi(zWidth);
706 if( (width!=0) && (width<=20) ){
707 fossil_fatal("-W|--width value must be >20 or 0");
708 }
709 }else{
710 width = -1;
711 }
712 if( zScope ){
713 for(i=0; zScope[i]; i++){
714 switch( zScope[i] ){
715 case 'a': srchFlags = SRCH_ALL; break;
716 case 'c': srchFlags |= SRCH_CKIN; break;
717 case 'd': srchFlags |= SRCH_DOC; break;
718 case 'e': srchFlags |= SRCH_TECHNOTE; break;
719 case 'f': srchFlags |= SRCH_FORUM; break;
720 case 'h': srchFlags |= SRCH_HELP; break;
721 case 't': srchFlags |= SRCH_TKT; break;
722 case 'w': srchFlags |= SRCH_WIKI; break;
723 }
724 }
725 bFts = 1;
726 }
727 if( find_option("all","a",0) ){ srchFlags |= SRCH_ALL; bFts = 1; }
728 if( find_option("bi-help","h",0) ){ srchFlags |= SRCH_HELP; bFts = 1; }
729 if( find_option("checkins","c",0) ){ srchFlags |= SRCH_CKIN; bFts = 1; }
730 if( find_option("docs",0,0) ){ srchFlags |= SRCH_DOC; bFts = 1; }
731 if( find_option("forum",0,0) ){ srchFlags |= SRCH_FORUM; bFts = 1; }
732 if( find_option("technotes",0,0) ){ srchFlags |= SRCH_TECHNOTE; bFts = 1; }
733 if( find_option("tickets",0,0) ){ srchFlags |= SRCH_TKT; bFts = 1; }
734 if( find_option("wiki",0,0) ){ srchFlags |= SRCH_WIKI; bFts = 1; }
735
736 /* If no search objects are specified, default to "check-in comments" */
737 if( srchFlags==0 ) srchFlags = SRCH_CKIN;
738
739
740 db_find_and_open_repository(0, 0);
741 verify_all_options();
742 if( g.argc<3 ) return;
743 login_set_capabilities("s", 0);
744 if( search_restrict(srchFlags)==0 ){
745 fossil_print(
746 "Search is disabled on this repository.\n"
747 "Use the \"fossil fts-config\" command to enable.\n"
748 );
749 return;
750 }
751
752 blob_init(&pattern, g.argv[2], -1);
753 for(i=3; i<g.argc; i++){
754 blob_appendf(&pattern, " %s", g.argv[i]);
755 }
756 if( bFts ){
757 /* Search using FTS */
758 Blob com;
759 Blob snip;
760 const char *zPattern = blob_str(&pattern);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761 search_sql_setup(g.db);
762 add_content_sql_commands(g.db);
763 db_multi_exec(
764 "CREATE TEMP TABLE x(label,url,score,id,date,snip);"
765 );
@@ -672,16 +783,11 @@
783 const char *zLabel = db_column_text(&q, 1);
784 const char *zDate = db_column_text(&q, 4);
785 const char *zScore = db_column_text(&q, 2);
786 const char *zId = db_column_text(&q, 3);
787 blob_appendf(&snip, "%s", zSnippet);
788 search_snippet_to_plaintext(&snip, nTty);
 
 
 
 
 
789 blob_appendf(&com, "%s\n%s\n%s", zLabel, blob_str(&snip), zDate);
790 if( bDebug ){
791 blob_appendf(&com," score: %s id: %s", zScore, zId);
792 }
793 comment_print(blob_str(&com), 0, 5, width,
@@ -2146,15 +2252,15 @@
2252 ** reindex Rebuild the search index. This is a no-op if
2253 ** index search is disabled
2254 **
2255 ** index (on|off) Turn the search index on or off
2256 **
2257 ** enable cdtwefh Enable various kinds of search. c=Check-ins,
2258 ** d=Documents, t=Tickets, w=Wiki, e=Tech Notes,
2259 ** f=Forum, h=built-in-help.
2260 **
2261 ** disable cdtwefh Disable various kinds of search
2262 **
2263 ** tokenizer VALUE Select a tokenizer for indexed search. VALUE
2264 ** may be one of (porter, on, off, trigram, unicode61),
2265 ** and "on" is equivalent to "porter". Unindexed
2266 ** search never uses tokenization or stemming.
@@ -2176,16 +2282,17 @@
2282 static const struct {
2283 const char *zSetting;
2284 const char *zName;
2285 const char *zSw;
2286 } aSetng[] = {
2287 { "search-ci", "check-in search:", "c" },
2288 { "search-doc", "document search:", "d" },
2289 { "search-tkt", "ticket search:", "t" },
2290 { "search-wiki", "wiki search:", "w" },
2291 { "search-technote", "tech note search:", "e" },
2292 { "search-forum", "forum search:", "f" },
2293 { "search-help", "built-in help search:", "h" },
2294 };
2295 char *zSubCmd = 0;
2296 int i, j, n;
2297 int iCmd = 0;
2298 int iAction = 0;
@@ -2250,30 +2357,30 @@
2357 search_rebuild_index();
2358 }
2359
2360 /* Always show the status before ending */
2361 for(i=0; i<count(aSetng); i++){
2362 fossil_print("%-21s %s\n", aSetng[i].zName,
2363 db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
2364 }
2365 fossil_print("%-21s %s\n", "tokenizer:",
2366 search_tokenizer_for_string(0));
2367 if( search_index_exists() ){
2368 int pgsz = db_int64(0, "PRAGMA repository.page_size;");
2369 i64 nTotal = db_int64(0, "PRAGMA repository.page_count;")*pgsz;
2370 i64 nFts = db_int64(0, "SELECT count(*) FROM dbstat"
2371 " WHERE schema='repository'"
2372 " AND name LIKE 'fts%%'")*pgsz;
2373 char zSize[50];
2374 fossil_print("%-21s FTS%d\n", "full-text index:", search_index_type(1));
2375 fossil_print("%-21s %d\n", "documents:",
2376 db_int(0, "SELECT count(*) FROM ftsdocs"));
2377 approxSizeName(sizeof(zSize), zSize, nFts);
2378 fossil_print("%-21s %s (%.1f%% of repository)\n", "space used",
2379 zSize, 100.0*((double)nFts/(double)nTotal));
2380 }else{
2381 fossil_print("%-21s disabled\n", "full-text index:");
2382 }
2383 db_end_transaction(0);
2384 }
2385
2386 /*
2387

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button