Fossil SCM
Add an experimental /search page for embedded documentation. Need to enhance with (1) wiki search, (2) ticket search, (3) configuration options, and (4) CSS, and maybe other things as well. But it is a start.
Commit
046d7430bfec4bbb3ddb0ba6fac4c2474095f7b7
Parent
16a4f60b24fc7eb…
2 files changed
+182
-33
+1
+182
-33
| --- src/search.c | ||
| +++ src/search.c | ||
| @@ -40,20 +40,28 @@ | ||
| 40 | 40 | struct srchTerm { /* For each search term */ |
| 41 | 41 | char *z; /* Text */ |
| 42 | 42 | int n; /* length */ |
| 43 | 43 | } a[SEARCH_MAX_TERM]; |
| 44 | 44 | /* Snippet controls */ |
| 45 | + char *zPattern; /* The search pattern */ | |
| 45 | 46 | char *zMarkBegin; /* Start of a match */ |
| 46 | 47 | char *zMarkEnd; /* End of a match */ |
| 47 | 48 | char *zMarkGap; /* A gap between two matches */ |
| 48 | 49 | unsigned fSrchFlg; /* Flags */ |
| 49 | 50 | }; |
| 50 | 51 | |
| 51 | -#define SRCHFLG_HTML 0x0001 /* Escape snippet text for HTML */ | |
| 52 | +#define SRCHFLG_HTML 0x01 /* Escape snippet text for HTML */ | |
| 53 | +#define SRCHFLG_SCORE 0x02 /* Prepend the score to each snippet */ | |
| 54 | +#define SRCHFLG_STATIC 0x04 /* The static gSearch object */ | |
| 52 | 55 | |
| 53 | 56 | #endif |
| 54 | 57 | |
| 58 | +/* | |
| 59 | +** There is a single global Search object: | |
| 60 | +*/ | |
| 61 | +static Search gSearch; | |
| 62 | + | |
| 55 | 63 | |
| 56 | 64 | /* |
| 57 | 65 | ** Theses characters constitute a word boundary |
| 58 | 66 | */ |
| 59 | 67 | static const char isBoundary[] = { |
| @@ -74,23 +82,51 @@ | ||
| 74 | 82 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 75 | 83 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 76 | 84 | }; |
| 77 | 85 | #define ISALNUM(x) (!isBoundary[(x)&0xff]) |
| 78 | 86 | |
| 87 | + | |
| 88 | +/* | |
| 89 | +** Destroy a search context. | |
| 90 | +*/ | |
| 91 | +void search_end(Search *p){ | |
| 92 | + if( p ){ | |
| 93 | + fossil_free(p->zPattern); | |
| 94 | + fossil_free(p->zMarkBegin); | |
| 95 | + fossil_free(p->zMarkEnd); | |
| 96 | + fossil_free(p->zMarkGap); | |
| 97 | + memset(p, 0, sizeof(*p)); | |
| 98 | + if( p!=&gSearch ) fossil_free(p); | |
| 99 | + } | |
| 100 | +} | |
| 101 | + | |
| 79 | 102 | /* |
| 80 | 103 | ** Compile a search pattern |
| 81 | 104 | */ |
| 82 | -Search *search_init(const char *zPattern){ | |
| 83 | - int nPattern = strlen(zPattern); | |
| 105 | +Search *search_init( | |
| 106 | + const char *zPattern, /* The search pattern */ | |
| 107 | + const char *zMarkBegin, /* Start of a match */ | |
| 108 | + const char *zMarkEnd, /* End of a match */ | |
| 109 | + const char *zMarkGap, /* A gap between two matches */ | |
| 110 | + unsigned fSrchFlg /* Flags */ | |
| 111 | +){ | |
| 84 | 112 | Search *p; |
| 85 | 113 | char *z; |
| 86 | 114 | int i; |
| 87 | 115 | |
| 88 | - p = fossil_malloc( nPattern + sizeof(*p) + 1); | |
| 89 | - z = (char*)&p[1]; | |
| 90 | - memcpy(z, zPattern, nPattern+1); | |
| 91 | - memset(p, 0, sizeof(*p)); | |
| 116 | + if( fSrchFlg & SRCHFLG_STATIC ){ | |
| 117 | + p = &gSearch; | |
| 118 | + search_end(p); | |
| 119 | + }else{ | |
| 120 | + p = fossil_malloc(sizeof(*p)); | |
| 121 | + memset(p, 0, sizeof(*p)); | |
| 122 | + } | |
| 123 | + p->zPattern = z = mprintf("%s", zPattern); | |
| 124 | + p->zMarkBegin = mprintf("%s", zMarkBegin); | |
| 125 | + p->zMarkEnd = mprintf("%s", zMarkEnd); | |
| 126 | + p->zMarkGap = mprintf("%s", zMarkGap); | |
| 127 | + p->fSrchFlg = fSrchFlg; | |
| 92 | 128 | while( *z && p->nTerm<SEARCH_MAX_TERM ){ |
| 93 | 129 | while( *z && !ISALNUM(*z) ){ z++; } |
| 94 | 130 | if( *z==0 ) break; |
| 95 | 131 | p->a[p->nTerm].z = z; |
| 96 | 132 | for(i=1; ISALNUM(z[i]); i++){} |
| @@ -100,30 +136,25 @@ | ||
| 100 | 136 | } |
| 101 | 137 | return p; |
| 102 | 138 | } |
| 103 | 139 | |
| 104 | 140 | |
| 105 | -/* | |
| 106 | -** Destroy a search context. | |
| 107 | -*/ | |
| 108 | -void search_end(Search *p){ | |
| 109 | - free(p); | |
| 110 | -} | |
| 111 | - | |
| 112 | 141 | /* |
| 113 | 142 | ** Append n bytes of text to snippet zTxt. Encode the text appropriately. |
| 114 | 143 | */ |
| 115 | 144 | static void snippet_text_append( |
| 116 | 145 | Search *p, /* The search context */ |
| 117 | 146 | Blob *pSnip, /* Append to this snippet */ |
| 118 | 147 | const char *zTxt, /* Text to append */ |
| 119 | 148 | int n /* How many bytes to append */ |
| 120 | 149 | ){ |
| 121 | - if( p->fSrchFlg & SRCHFLG_HTML ){ | |
| 122 | - blob_appendf(pSnip, "%.*h", n, zTxt); | |
| 123 | - }else{ | |
| 124 | - blob_append(pSnip, zTxt, n); | |
| 150 | + if( n>0 ){ | |
| 151 | + if( p->fSrchFlg & SRCHFLG_HTML ){ | |
| 152 | + blob_appendf(pSnip, "%#h", n, zTxt); | |
| 153 | + }else{ | |
| 154 | + blob_append(pSnip, zTxt, n); | |
| 155 | + } | |
| 125 | 156 | } |
| 126 | 157 | } |
| 127 | 158 | |
| 128 | 159 | /* |
| 129 | 160 | ** Compare a search pattern against one or more input strings which |
| @@ -200,10 +231,11 @@ | ||
| 200 | 231 | |
| 201 | 232 | |
| 202 | 233 | /* Prepare a snippet that describes the matching text. |
| 203 | 234 | */ |
| 204 | 235 | blob_init(pSnip, 0, 0); |
| 236 | + if( p->fSrchFlg & SRCHFLG_SCORE ) blob_appendf(pSnip, "%08x", score); | |
| 205 | 237 | |
| 206 | 238 | while(1){ |
| 207 | 239 | int iOfst; |
| 208 | 240 | int iTail; |
| 209 | 241 | int iBest; |
| @@ -271,11 +303,11 @@ | ||
| 271 | 303 | } /* end for(j) */ |
| 272 | 304 | if( j<p->nTerm ){ |
| 273 | 305 | while( ISALNUM(zDoc[i]) && i<iTail ){ i++; } |
| 274 | 306 | } |
| 275 | 307 | } /* end for(i) */ |
| 276 | - if( iTail>0 ) snippet_text_append(p, pSnip, zDoc, iTail); | |
| 308 | + snippet_text_append(p, pSnip, zDoc, iTail); | |
| 277 | 309 | } |
| 278 | 310 | if( wantGap ) blob_append(pSnip, p->zMarkGap, -1); |
| 279 | 311 | return score; |
| 280 | 312 | } |
| 281 | 313 | |
| @@ -289,15 +321,23 @@ | ||
| 289 | 321 | int i; |
| 290 | 322 | Blob x; |
| 291 | 323 | Blob snip; |
| 292 | 324 | int score; |
| 293 | 325 | char *zDoc; |
| 326 | + int flg = 0; | |
| 327 | + char *zBegin = (char*)find_option("begin",0,1); | |
| 328 | + char *zEnd = (char*)find_option("end",0,1); | |
| 329 | + char *zGap = (char*)find_option("gap",0,1); | |
| 330 | + if( find_option("html",0,0)!=0 ) flg |= SRCHFLG_HTML; | |
| 331 | + if( find_option("score",0,0)!=0 ) flg |= SRCHFLG_SCORE; | |
| 332 | + if( find_option("static",0,0)!=0 ) flg |= SRCHFLG_STATIC; | |
| 333 | + verify_all_options(); | |
| 294 | 334 | if( g.argc<4 ) usage("SEARCHSTRING FILE1..."); |
| 295 | - p = search_init(g.argv[2]); | |
| 296 | - p->zMarkBegin = "[["; | |
| 297 | - p->zMarkEnd = "]]"; | |
| 298 | - p->zMarkGap = " ... "; | |
| 335 | + if( zBegin==0 ) zBegin = "[["; | |
| 336 | + if( zEnd==0 ) zEnd = "]]"; | |
| 337 | + if( zGap==0 ) zGap = " ... "; | |
| 338 | + p = search_init(g.argv[2], zBegin, zEnd, zGap, flg); | |
| 299 | 339 | for(i=3; i<g.argc; i++){ |
| 300 | 340 | blob_read_from_file(&x, g.argv[i]); |
| 301 | 341 | zDoc = blob_str(&x); |
| 302 | 342 | score = search_score(p, 1, (const char**)&zDoc, &snip); |
| 303 | 343 | fossil_print("%s: %d\n", g.argv[i], score); |
| @@ -304,42 +344,90 @@ | ||
| 304 | 344 | blob_reset(&x); |
| 305 | 345 | if( score ){ |
| 306 | 346 | fossil_print("%.78c\n%s\n%.78c\n\n", '=', blob_str(&snip), '='); |
| 307 | 347 | blob_reset(&snip); |
| 308 | 348 | } |
| 309 | - } | |
| 349 | + } | |
| 350 | +} | |
| 351 | + | |
| 352 | +/* | |
| 353 | +** An SQL function to initialize the global search pattern: | |
| 354 | +** | |
| 355 | +** search_init(PATTERN,BEGIN,END,GAP,FLAGS) | |
| 356 | +** | |
| 357 | +** All arguments are optional. | |
| 358 | +*/ | |
| 359 | +static void search_init_sqlfunc( | |
| 360 | + sqlite3_context *context, | |
| 361 | + int argc, | |
| 362 | + sqlite3_value **argv | |
| 363 | +){ | |
| 364 | + const char *zPattern = 0; | |
| 365 | + const char *zBegin = "<b>"; | |
| 366 | + const char *zEnd = "</b>"; | |
| 367 | + const char *zGap = " ... "; | |
| 368 | + unsigned int flg = SRCHFLG_HTML; | |
| 369 | + switch( argc ){ | |
| 370 | + default: | |
| 371 | + flg = (unsigned int)sqlite3_value_int(argv[4]); | |
| 372 | + case 4: | |
| 373 | + zGap = (const char*)sqlite3_value_text(argv[3]); | |
| 374 | + case 3: | |
| 375 | + zEnd = (const char*)sqlite3_value_text(argv[2]); | |
| 376 | + case 2: | |
| 377 | + zBegin = (const char*)sqlite3_value_text(argv[1]); | |
| 378 | + case 1: | |
| 379 | + zPattern = (const char*)sqlite3_value_text(argv[0]); | |
| 380 | + } | |
| 381 | + if( zPattern && zPattern[0] ){ | |
| 382 | + search_init(zPattern, zBegin, zEnd, zGap, flg | SRCHFLG_STATIC); | |
| 383 | + }else{ | |
| 384 | + search_end(&gSearch); | |
| 385 | + } | |
| 310 | 386 | } |
| 311 | 387 | |
| 312 | 388 | /* |
| 313 | 389 | ** This is an SQLite function that scores its input using |
| 314 | -** a pre-computed pattern. | |
| 390 | +** the pattern from the previous call to search_init(). | |
| 315 | 391 | */ |
| 316 | 392 | static void search_score_sqlfunc( |
| 317 | 393 | sqlite3_context *context, |
| 318 | 394 | int argc, |
| 319 | 395 | sqlite3_value **argv |
| 320 | 396 | ){ |
| 321 | - Search *p = (Search*)sqlite3_user_data(context); | |
| 397 | + int isSnippet = sqlite3_user_data(context)!=0; | |
| 322 | 398 | const char **azDoc; |
| 323 | 399 | int score; |
| 324 | 400 | int i; |
| 401 | + Blob snip; | |
| 325 | 402 | |
| 403 | + if( gSearch.nTerm==0 ) return; | |
| 326 | 404 | azDoc = fossil_malloc( sizeof(const char*)*(argc+1) ); |
| 327 | 405 | for(i=0; i<argc; i++) azDoc[i] = (const char*)sqlite3_value_text(argv[i]); |
| 328 | - score = search_score(p, argc, azDoc, 0); | |
| 406 | + score = search_score(&gSearch, argc, azDoc, isSnippet ? &snip : 0); | |
| 329 | 407 | fossil_free((void *)azDoc); |
| 330 | - sqlite3_result_int(context, score); | |
| 408 | + if( isSnippet ){ | |
| 409 | + if( score ){ | |
| 410 | + sqlite3_result_text(context, blob_materialize(&snip), -1, fossil_free); | |
| 411 | + } | |
| 412 | + }else{ | |
| 413 | + sqlite3_result_int(context, score); | |
| 414 | + } | |
| 331 | 415 | } |
| 332 | 416 | |
| 333 | 417 | /* |
| 334 | 418 | ** Register the "score()" SQL function to score its input text |
| 335 | 419 | ** using the given Search object. Once this function is registered, |
| 336 | 420 | ** do not delete the Search object. |
| 337 | 421 | */ |
| 338 | -void search_sql_setup(Search *p){ | |
| 339 | - sqlite3_create_function(g.db, "score", -1, SQLITE_UTF8, p, | |
| 422 | +void search_sql_setup(sqlite3 *db){ | |
| 423 | + sqlite3_create_function(db, "score", -1, SQLITE_UTF8, 0, | |
| 424 | + search_score_sqlfunc, 0, 0); | |
| 425 | + sqlite3_create_function(db, "snippet", -1, SQLITE_UTF8, &gSearch, | |
| 340 | 426 | search_score_sqlfunc, 0, 0); |
| 427 | + sqlite3_create_function(db, "search_init", -1, SQLITE_UTF8, 0, | |
| 428 | + search_init_sqlfunc, 0, 0); | |
| 341 | 429 | } |
| 342 | 430 | |
| 343 | 431 | /* |
| 344 | 432 | ** Testing the search function. |
| 345 | 433 | ** |
| @@ -357,11 +445,10 @@ | ||
| 357 | 445 | ** of entries returned. The -width option can be |
| 358 | 446 | ** used to set the output width used when printing |
| 359 | 447 | ** matches. |
| 360 | 448 | */ |
| 361 | 449 | void search_cmd(void){ |
| 362 | - Search *p; | |
| 363 | 450 | Blob pattern; |
| 364 | 451 | int i; |
| 365 | 452 | Blob sql = empty_blob; |
| 366 | 453 | Stmt q; |
| 367 | 454 | int iBest; |
| @@ -386,13 +473,13 @@ | ||
| 386 | 473 | if( g.argc<2 ) return; |
| 387 | 474 | blob_init(&pattern, g.argv[2], -1); |
| 388 | 475 | for(i=3; i<g.argc; i++){ |
| 389 | 476 | blob_appendf(&pattern, " %s", g.argv[i]); |
| 390 | 477 | } |
| 391 | - p = search_init(blob_str(&pattern)); | |
| 478 | + (void)search_init(blob_str(&pattern),"*","*","...",SRCHFLG_STATIC); | |
| 392 | 479 | blob_reset(&pattern); |
| 393 | - search_sql_setup(p); | |
| 480 | + search_sql_setup(g.db); | |
| 394 | 481 | |
| 395 | 482 | db_multi_exec( |
| 396 | 483 | "CREATE TEMP TABLE srch(rid,uuid,date,comment,x);" |
| 397 | 484 | "CREATE INDEX srch_idx1 ON srch(x);" |
| 398 | 485 | "INSERT INTO srch(rid,uuid,date,comment,x)" |
| @@ -414,5 +501,67 @@ | ||
| 414 | 501 | db_prepare(&q, "%s", blob_sql_text(&sql)); |
| 415 | 502 | blob_reset(&sql); |
| 416 | 503 | print_timeline(&q, nLimit, width, 0); |
| 417 | 504 | db_finalize(&q); |
| 418 | 505 | } |
| 506 | + | |
| 507 | +/* | |
| 508 | +** WEBPAGE: /search | |
| 509 | +** | |
| 510 | +** This is an EXPERIMENTAL page for doing search across a repository. | |
| 511 | +** | |
| 512 | +** The current implementation does a full text search over embedded | |
| 513 | +** documentation files on the tip of the "trunk" branch. Only files | |
| 514 | +** ending in ".wiki", ".md", ".html", and ".txt" are searched. | |
| 515 | +** | |
| 516 | +** The entire text is scanned. There is no full-text index. This is | |
| 517 | +** experimental. We may change to using a full-text index depending | |
| 518 | +** on performance. | |
| 519 | +** | |
| 520 | +** Other pending enhancements: | |
| 521 | +** * Search tickets | |
| 522 | +** * Search wiki | |
| 523 | +*/ | |
| 524 | +void search_page(void){ | |
| 525 | + const char *zPattern = PD("s",""); | |
| 526 | + Stmt q; | |
| 527 | + | |
| 528 | + login_check_credentials(); | |
| 529 | + if( !g.perm.Read ){ login_needed(); return; } | |
| 530 | + style_header("Search"); | |
| 531 | + @ <form method="GET" action="search"><center> | |
| 532 | + @ <input type="text" name="s" size="40" value="%h(zPattern)"> | |
| 533 | + @ <input type="submit" value="Search"> | |
| 534 | + @ </center></form> | |
| 535 | + while( fossil_isspace(zPattern[0]) ) zPattern++; | |
| 536 | + if( zPattern[0] ){ | |
| 537 | + search_sql_setup(g.db); | |
| 538 | + add_content_sql_commands(g.db); | |
| 539 | + search_init(zPattern, "<b>", "</b>", " ... ", | |
| 540 | + SRCHFLG_STATIC|SRCHFLG_HTML|SRCHFLG_SCORE); | |
| 541 | + db_multi_exec( | |
| 542 | + "CREATE VIRTUAL TABLE temp.foci USING files_of_checkin;" | |
| 543 | + "CREATE TEMP TABLE x(fn TEXT,url TEXT,snip TEXT);" | |
| 544 | + "INSERT INTO x(fn,url,snip)" | |
| 545 | + " SELECT filename, printf('%R/doc/trunk/%%s',filename)," | |
| 546 | + " snippet(content(uuid))" | |
| 547 | + " FROM foci" | |
| 548 | + " WHERE checkinID=symbolic_name_to_rid('trunk')" | |
| 549 | + " AND (filename GLOB '*.wiki' OR" | |
| 550 | + " filename GLOB '*.md' OR" | |
| 551 | + " filename GLOB '*.txt' OR" | |
| 552 | + " filename GLOB '*.html');" | |
| 553 | + ); | |
| 554 | + db_prepare(&q, "SELECT url, substr(snip,8)" | |
| 555 | + " FROM x WHERE snip IS NOT NULL" | |
| 556 | + " ORDER BY substr(snip,1,8) DESC, fn;"); | |
| 557 | + @ <ol> | |
| 558 | + while( db_step(&q)==SQLITE_ROW ){ | |
| 559 | + const char *zUrl = db_column_text(&q, 0); | |
| 560 | + const char *zSnippet = db_column_text(&q, 1); | |
| 561 | + @ <li><p>%s(href("%s",zUrl))%h(zUrl)</a><br>%s(zSnippet)</li> | |
| 562 | + } | |
| 563 | + db_finalize(&q); | |
| 564 | + @ </ol> | |
| 565 | + } | |
| 566 | + style_footer(); | |
| 567 | +} | |
| 419 | 568 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -40,20 +40,28 @@ | |
| 40 | struct srchTerm { /* For each search term */ |
| 41 | char *z; /* Text */ |
| 42 | int n; /* length */ |
| 43 | } a[SEARCH_MAX_TERM]; |
| 44 | /* Snippet controls */ |
| 45 | char *zMarkBegin; /* Start of a match */ |
| 46 | char *zMarkEnd; /* End of a match */ |
| 47 | char *zMarkGap; /* A gap between two matches */ |
| 48 | unsigned fSrchFlg; /* Flags */ |
| 49 | }; |
| 50 | |
| 51 | #define SRCHFLG_HTML 0x0001 /* Escape snippet text for HTML */ |
| 52 | |
| 53 | #endif |
| 54 | |
| 55 | |
| 56 | /* |
| 57 | ** Theses characters constitute a word boundary |
| 58 | */ |
| 59 | static const char isBoundary[] = { |
| @@ -74,23 +82,51 @@ | |
| 74 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 75 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 76 | }; |
| 77 | #define ISALNUM(x) (!isBoundary[(x)&0xff]) |
| 78 | |
| 79 | /* |
| 80 | ** Compile a search pattern |
| 81 | */ |
| 82 | Search *search_init(const char *zPattern){ |
| 83 | int nPattern = strlen(zPattern); |
| 84 | Search *p; |
| 85 | char *z; |
| 86 | int i; |
| 87 | |
| 88 | p = fossil_malloc( nPattern + sizeof(*p) + 1); |
| 89 | z = (char*)&p[1]; |
| 90 | memcpy(z, zPattern, nPattern+1); |
| 91 | memset(p, 0, sizeof(*p)); |
| 92 | while( *z && p->nTerm<SEARCH_MAX_TERM ){ |
| 93 | while( *z && !ISALNUM(*z) ){ z++; } |
| 94 | if( *z==0 ) break; |
| 95 | p->a[p->nTerm].z = z; |
| 96 | for(i=1; ISALNUM(z[i]); i++){} |
| @@ -100,30 +136,25 @@ | |
| 100 | } |
| 101 | return p; |
| 102 | } |
| 103 | |
| 104 | |
| 105 | /* |
| 106 | ** Destroy a search context. |
| 107 | */ |
| 108 | void search_end(Search *p){ |
| 109 | free(p); |
| 110 | } |
| 111 | |
| 112 | /* |
| 113 | ** Append n bytes of text to snippet zTxt. Encode the text appropriately. |
| 114 | */ |
| 115 | static void snippet_text_append( |
| 116 | Search *p, /* The search context */ |
| 117 | Blob *pSnip, /* Append to this snippet */ |
| 118 | const char *zTxt, /* Text to append */ |
| 119 | int n /* How many bytes to append */ |
| 120 | ){ |
| 121 | if( p->fSrchFlg & SRCHFLG_HTML ){ |
| 122 | blob_appendf(pSnip, "%.*h", n, zTxt); |
| 123 | }else{ |
| 124 | blob_append(pSnip, zTxt, n); |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | /* |
| 129 | ** Compare a search pattern against one or more input strings which |
| @@ -200,10 +231,11 @@ | |
| 200 | |
| 201 | |
| 202 | /* Prepare a snippet that describes the matching text. |
| 203 | */ |
| 204 | blob_init(pSnip, 0, 0); |
| 205 | |
| 206 | while(1){ |
| 207 | int iOfst; |
| 208 | int iTail; |
| 209 | int iBest; |
| @@ -271,11 +303,11 @@ | |
| 271 | } /* end for(j) */ |
| 272 | if( j<p->nTerm ){ |
| 273 | while( ISALNUM(zDoc[i]) && i<iTail ){ i++; } |
| 274 | } |
| 275 | } /* end for(i) */ |
| 276 | if( iTail>0 ) snippet_text_append(p, pSnip, zDoc, iTail); |
| 277 | } |
| 278 | if( wantGap ) blob_append(pSnip, p->zMarkGap, -1); |
| 279 | return score; |
| 280 | } |
| 281 | |
| @@ -289,15 +321,23 @@ | |
| 289 | int i; |
| 290 | Blob x; |
| 291 | Blob snip; |
| 292 | int score; |
| 293 | char *zDoc; |
| 294 | if( g.argc<4 ) usage("SEARCHSTRING FILE1..."); |
| 295 | p = search_init(g.argv[2]); |
| 296 | p->zMarkBegin = "[["; |
| 297 | p->zMarkEnd = "]]"; |
| 298 | p->zMarkGap = " ... "; |
| 299 | for(i=3; i<g.argc; i++){ |
| 300 | blob_read_from_file(&x, g.argv[i]); |
| 301 | zDoc = blob_str(&x); |
| 302 | score = search_score(p, 1, (const char**)&zDoc, &snip); |
| 303 | fossil_print("%s: %d\n", g.argv[i], score); |
| @@ -304,42 +344,90 @@ | |
| 304 | blob_reset(&x); |
| 305 | if( score ){ |
| 306 | fossil_print("%.78c\n%s\n%.78c\n\n", '=', blob_str(&snip), '='); |
| 307 | blob_reset(&snip); |
| 308 | } |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | /* |
| 313 | ** This is an SQLite function that scores its input using |
| 314 | ** a pre-computed pattern. |
| 315 | */ |
| 316 | static void search_score_sqlfunc( |
| 317 | sqlite3_context *context, |
| 318 | int argc, |
| 319 | sqlite3_value **argv |
| 320 | ){ |
| 321 | Search *p = (Search*)sqlite3_user_data(context); |
| 322 | const char **azDoc; |
| 323 | int score; |
| 324 | int i; |
| 325 | |
| 326 | azDoc = fossil_malloc( sizeof(const char*)*(argc+1) ); |
| 327 | for(i=0; i<argc; i++) azDoc[i] = (const char*)sqlite3_value_text(argv[i]); |
| 328 | score = search_score(p, argc, azDoc, 0); |
| 329 | fossil_free((void *)azDoc); |
| 330 | sqlite3_result_int(context, score); |
| 331 | } |
| 332 | |
| 333 | /* |
| 334 | ** Register the "score()" SQL function to score its input text |
| 335 | ** using the given Search object. Once this function is registered, |
| 336 | ** do not delete the Search object. |
| 337 | */ |
| 338 | void search_sql_setup(Search *p){ |
| 339 | sqlite3_create_function(g.db, "score", -1, SQLITE_UTF8, p, |
| 340 | search_score_sqlfunc, 0, 0); |
| 341 | } |
| 342 | |
| 343 | /* |
| 344 | ** Testing the search function. |
| 345 | ** |
| @@ -357,11 +445,10 @@ | |
| 357 | ** of entries returned. The -width option can be |
| 358 | ** used to set the output width used when printing |
| 359 | ** matches. |
| 360 | */ |
| 361 | void search_cmd(void){ |
| 362 | Search *p; |
| 363 | Blob pattern; |
| 364 | int i; |
| 365 | Blob sql = empty_blob; |
| 366 | Stmt q; |
| 367 | int iBest; |
| @@ -386,13 +473,13 @@ | |
| 386 | if( g.argc<2 ) return; |
| 387 | blob_init(&pattern, g.argv[2], -1); |
| 388 | for(i=3; i<g.argc; i++){ |
| 389 | blob_appendf(&pattern, " %s", g.argv[i]); |
| 390 | } |
| 391 | p = search_init(blob_str(&pattern)); |
| 392 | blob_reset(&pattern); |
| 393 | search_sql_setup(p); |
| 394 | |
| 395 | db_multi_exec( |
| 396 | "CREATE TEMP TABLE srch(rid,uuid,date,comment,x);" |
| 397 | "CREATE INDEX srch_idx1 ON srch(x);" |
| 398 | "INSERT INTO srch(rid,uuid,date,comment,x)" |
| @@ -414,5 +501,67 @@ | |
| 414 | db_prepare(&q, "%s", blob_sql_text(&sql)); |
| 415 | blob_reset(&sql); |
| 416 | print_timeline(&q, nLimit, width, 0); |
| 417 | db_finalize(&q); |
| 418 | } |
| 419 |
| --- src/search.c | |
| +++ src/search.c | |
| @@ -40,20 +40,28 @@ | |
| 40 | struct srchTerm { /* For each search term */ |
| 41 | char *z; /* Text */ |
| 42 | int n; /* length */ |
| 43 | } a[SEARCH_MAX_TERM]; |
| 44 | /* Snippet controls */ |
| 45 | char *zPattern; /* The search pattern */ |
| 46 | char *zMarkBegin; /* Start of a match */ |
| 47 | char *zMarkEnd; /* End of a match */ |
| 48 | char *zMarkGap; /* A gap between two matches */ |
| 49 | unsigned fSrchFlg; /* Flags */ |
| 50 | }; |
| 51 | |
| 52 | #define SRCHFLG_HTML 0x01 /* Escape snippet text for HTML */ |
| 53 | #define SRCHFLG_SCORE 0x02 /* Prepend the score to each snippet */ |
| 54 | #define SRCHFLG_STATIC 0x04 /* The static gSearch object */ |
| 55 | |
| 56 | #endif |
| 57 | |
| 58 | /* |
| 59 | ** There is a single global Search object: |
| 60 | */ |
| 61 | static Search gSearch; |
| 62 | |
| 63 | |
| 64 | /* |
| 65 | ** Theses characters constitute a word boundary |
| 66 | */ |
| 67 | static const char isBoundary[] = { |
| @@ -74,23 +82,51 @@ | |
| 82 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 83 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 84 | }; |
| 85 | #define ISALNUM(x) (!isBoundary[(x)&0xff]) |
| 86 | |
| 87 | |
| 88 | /* |
| 89 | ** Destroy a search context. |
| 90 | */ |
| 91 | void search_end(Search *p){ |
| 92 | if( p ){ |
| 93 | fossil_free(p->zPattern); |
| 94 | fossil_free(p->zMarkBegin); |
| 95 | fossil_free(p->zMarkEnd); |
| 96 | fossil_free(p->zMarkGap); |
| 97 | memset(p, 0, sizeof(*p)); |
| 98 | if( p!=&gSearch ) fossil_free(p); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | /* |
| 103 | ** Compile a search pattern |
| 104 | */ |
| 105 | Search *search_init( |
| 106 | const char *zPattern, /* The search pattern */ |
| 107 | const char *zMarkBegin, /* Start of a match */ |
| 108 | const char *zMarkEnd, /* End of a match */ |
| 109 | const char *zMarkGap, /* A gap between two matches */ |
| 110 | unsigned fSrchFlg /* Flags */ |
| 111 | ){ |
| 112 | Search *p; |
| 113 | char *z; |
| 114 | int i; |
| 115 | |
| 116 | if( fSrchFlg & SRCHFLG_STATIC ){ |
| 117 | p = &gSearch; |
| 118 | search_end(p); |
| 119 | }else{ |
| 120 | p = fossil_malloc(sizeof(*p)); |
| 121 | memset(p, 0, sizeof(*p)); |
| 122 | } |
| 123 | p->zPattern = z = mprintf("%s", zPattern); |
| 124 | p->zMarkBegin = mprintf("%s", zMarkBegin); |
| 125 | p->zMarkEnd = mprintf("%s", zMarkEnd); |
| 126 | p->zMarkGap = mprintf("%s", zMarkGap); |
| 127 | p->fSrchFlg = fSrchFlg; |
| 128 | while( *z && p->nTerm<SEARCH_MAX_TERM ){ |
| 129 | while( *z && !ISALNUM(*z) ){ z++; } |
| 130 | if( *z==0 ) break; |
| 131 | p->a[p->nTerm].z = z; |
| 132 | for(i=1; ISALNUM(z[i]); i++){} |
| @@ -100,30 +136,25 @@ | |
| 136 | } |
| 137 | return p; |
| 138 | } |
| 139 | |
| 140 | |
| 141 | /* |
| 142 | ** Append n bytes of text to snippet zTxt. Encode the text appropriately. |
| 143 | */ |
| 144 | static void snippet_text_append( |
| 145 | Search *p, /* The search context */ |
| 146 | Blob *pSnip, /* Append to this snippet */ |
| 147 | const char *zTxt, /* Text to append */ |
| 148 | int n /* How many bytes to append */ |
| 149 | ){ |
| 150 | if( n>0 ){ |
| 151 | if( p->fSrchFlg & SRCHFLG_HTML ){ |
| 152 | blob_appendf(pSnip, "%#h", n, zTxt); |
| 153 | }else{ |
| 154 | blob_append(pSnip, zTxt, n); |
| 155 | } |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | /* |
| 160 | ** Compare a search pattern against one or more input strings which |
| @@ -200,10 +231,11 @@ | |
| 231 | |
| 232 | |
| 233 | /* Prepare a snippet that describes the matching text. |
| 234 | */ |
| 235 | blob_init(pSnip, 0, 0); |
| 236 | if( p->fSrchFlg & SRCHFLG_SCORE ) blob_appendf(pSnip, "%08x", score); |
| 237 | |
| 238 | while(1){ |
| 239 | int iOfst; |
| 240 | int iTail; |
| 241 | int iBest; |
| @@ -271,11 +303,11 @@ | |
| 303 | } /* end for(j) */ |
| 304 | if( j<p->nTerm ){ |
| 305 | while( ISALNUM(zDoc[i]) && i<iTail ){ i++; } |
| 306 | } |
| 307 | } /* end for(i) */ |
| 308 | snippet_text_append(p, pSnip, zDoc, iTail); |
| 309 | } |
| 310 | if( wantGap ) blob_append(pSnip, p->zMarkGap, -1); |
| 311 | return score; |
| 312 | } |
| 313 | |
| @@ -289,15 +321,23 @@ | |
| 321 | int i; |
| 322 | Blob x; |
| 323 | Blob snip; |
| 324 | int score; |
| 325 | char *zDoc; |
| 326 | int flg = 0; |
| 327 | char *zBegin = (char*)find_option("begin",0,1); |
| 328 | char *zEnd = (char*)find_option("end",0,1); |
| 329 | char *zGap = (char*)find_option("gap",0,1); |
| 330 | if( find_option("html",0,0)!=0 ) flg |= SRCHFLG_HTML; |
| 331 | if( find_option("score",0,0)!=0 ) flg |= SRCHFLG_SCORE; |
| 332 | if( find_option("static",0,0)!=0 ) flg |= SRCHFLG_STATIC; |
| 333 | verify_all_options(); |
| 334 | if( g.argc<4 ) usage("SEARCHSTRING FILE1..."); |
| 335 | if( zBegin==0 ) zBegin = "[["; |
| 336 | if( zEnd==0 ) zEnd = "]]"; |
| 337 | if( zGap==0 ) zGap = " ... "; |
| 338 | p = search_init(g.argv[2], zBegin, zEnd, zGap, flg); |
| 339 | for(i=3; i<g.argc; i++){ |
| 340 | blob_read_from_file(&x, g.argv[i]); |
| 341 | zDoc = blob_str(&x); |
| 342 | score = search_score(p, 1, (const char**)&zDoc, &snip); |
| 343 | fossil_print("%s: %d\n", g.argv[i], score); |
| @@ -304,42 +344,90 @@ | |
| 344 | blob_reset(&x); |
| 345 | if( score ){ |
| 346 | fossil_print("%.78c\n%s\n%.78c\n\n", '=', blob_str(&snip), '='); |
| 347 | blob_reset(&snip); |
| 348 | } |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | /* |
| 353 | ** An SQL function to initialize the global search pattern: |
| 354 | ** |
| 355 | ** search_init(PATTERN,BEGIN,END,GAP,FLAGS) |
| 356 | ** |
| 357 | ** All arguments are optional. |
| 358 | */ |
| 359 | static void search_init_sqlfunc( |
| 360 | sqlite3_context *context, |
| 361 | int argc, |
| 362 | sqlite3_value **argv |
| 363 | ){ |
| 364 | const char *zPattern = 0; |
| 365 | const char *zBegin = "<b>"; |
| 366 | const char *zEnd = "</b>"; |
| 367 | const char *zGap = " ... "; |
| 368 | unsigned int flg = SRCHFLG_HTML; |
| 369 | switch( argc ){ |
| 370 | default: |
| 371 | flg = (unsigned int)sqlite3_value_int(argv[4]); |
| 372 | case 4: |
| 373 | zGap = (const char*)sqlite3_value_text(argv[3]); |
| 374 | case 3: |
| 375 | zEnd = (const char*)sqlite3_value_text(argv[2]); |
| 376 | case 2: |
| 377 | zBegin = (const char*)sqlite3_value_text(argv[1]); |
| 378 | case 1: |
| 379 | zPattern = (const char*)sqlite3_value_text(argv[0]); |
| 380 | } |
| 381 | if( zPattern && zPattern[0] ){ |
| 382 | search_init(zPattern, zBegin, zEnd, zGap, flg | SRCHFLG_STATIC); |
| 383 | }else{ |
| 384 | search_end(&gSearch); |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | /* |
| 389 | ** This is an SQLite function that scores its input using |
| 390 | ** the pattern from the previous call to search_init(). |
| 391 | */ |
| 392 | static void search_score_sqlfunc( |
| 393 | sqlite3_context *context, |
| 394 | int argc, |
| 395 | sqlite3_value **argv |
| 396 | ){ |
| 397 | int isSnippet = sqlite3_user_data(context)!=0; |
| 398 | const char **azDoc; |
| 399 | int score; |
| 400 | int i; |
| 401 | Blob snip; |
| 402 | |
| 403 | if( gSearch.nTerm==0 ) return; |
| 404 | azDoc = fossil_malloc( sizeof(const char*)*(argc+1) ); |
| 405 | for(i=0; i<argc; i++) azDoc[i] = (const char*)sqlite3_value_text(argv[i]); |
| 406 | score = search_score(&gSearch, argc, azDoc, isSnippet ? &snip : 0); |
| 407 | fossil_free((void *)azDoc); |
| 408 | if( isSnippet ){ |
| 409 | if( score ){ |
| 410 | sqlite3_result_text(context, blob_materialize(&snip), -1, fossil_free); |
| 411 | } |
| 412 | }else{ |
| 413 | sqlite3_result_int(context, score); |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | /* |
| 418 | ** Register the "score()" SQL function to score its input text |
| 419 | ** using the given Search object. Once this function is registered, |
| 420 | ** do not delete the Search object. |
| 421 | */ |
| 422 | void search_sql_setup(sqlite3 *db){ |
| 423 | sqlite3_create_function(db, "score", -1, SQLITE_UTF8, 0, |
| 424 | search_score_sqlfunc, 0, 0); |
| 425 | sqlite3_create_function(db, "snippet", -1, SQLITE_UTF8, &gSearch, |
| 426 | search_score_sqlfunc, 0, 0); |
| 427 | sqlite3_create_function(db, "search_init", -1, SQLITE_UTF8, 0, |
| 428 | search_init_sqlfunc, 0, 0); |
| 429 | } |
| 430 | |
| 431 | /* |
| 432 | ** Testing the search function. |
| 433 | ** |
| @@ -357,11 +445,10 @@ | |
| 445 | ** of entries returned. The -width option can be |
| 446 | ** used to set the output width used when printing |
| 447 | ** matches. |
| 448 | */ |
| 449 | void search_cmd(void){ |
| 450 | Blob pattern; |
| 451 | int i; |
| 452 | Blob sql = empty_blob; |
| 453 | Stmt q; |
| 454 | int iBest; |
| @@ -386,13 +473,13 @@ | |
| 473 | if( g.argc<2 ) return; |
| 474 | blob_init(&pattern, g.argv[2], -1); |
| 475 | for(i=3; i<g.argc; i++){ |
| 476 | blob_appendf(&pattern, " %s", g.argv[i]); |
| 477 | } |
| 478 | (void)search_init(blob_str(&pattern),"*","*","...",SRCHFLG_STATIC); |
| 479 | blob_reset(&pattern); |
| 480 | search_sql_setup(g.db); |
| 481 | |
| 482 | db_multi_exec( |
| 483 | "CREATE TEMP TABLE srch(rid,uuid,date,comment,x);" |
| 484 | "CREATE INDEX srch_idx1 ON srch(x);" |
| 485 | "INSERT INTO srch(rid,uuid,date,comment,x)" |
| @@ -414,5 +501,67 @@ | |
| 501 | db_prepare(&q, "%s", blob_sql_text(&sql)); |
| 502 | blob_reset(&sql); |
| 503 | print_timeline(&q, nLimit, width, 0); |
| 504 | db_finalize(&q); |
| 505 | } |
| 506 | |
| 507 | /* |
| 508 | ** WEBPAGE: /search |
| 509 | ** |
| 510 | ** This is an EXPERIMENTAL page for doing search across a repository. |
| 511 | ** |
| 512 | ** The current implementation does a full text search over embedded |
| 513 | ** documentation files on the tip of the "trunk" branch. Only files |
| 514 | ** ending in ".wiki", ".md", ".html", and ".txt" are searched. |
| 515 | ** |
| 516 | ** The entire text is scanned. There is no full-text index. This is |
| 517 | ** experimental. We may change to using a full-text index depending |
| 518 | ** on performance. |
| 519 | ** |
| 520 | ** Other pending enhancements: |
| 521 | ** * Search tickets |
| 522 | ** * Search wiki |
| 523 | */ |
| 524 | void search_page(void){ |
| 525 | const char *zPattern = PD("s",""); |
| 526 | Stmt q; |
| 527 | |
| 528 | login_check_credentials(); |
| 529 | if( !g.perm.Read ){ login_needed(); return; } |
| 530 | style_header("Search"); |
| 531 | @ <form method="GET" action="search"><center> |
| 532 | @ <input type="text" name="s" size="40" value="%h(zPattern)"> |
| 533 | @ <input type="submit" value="Search"> |
| 534 | @ </center></form> |
| 535 | while( fossil_isspace(zPattern[0]) ) zPattern++; |
| 536 | if( zPattern[0] ){ |
| 537 | search_sql_setup(g.db); |
| 538 | add_content_sql_commands(g.db); |
| 539 | search_init(zPattern, "<b>", "</b>", " ... ", |
| 540 | SRCHFLG_STATIC|SRCHFLG_HTML|SRCHFLG_SCORE); |
| 541 | db_multi_exec( |
| 542 | "CREATE VIRTUAL TABLE temp.foci USING files_of_checkin;" |
| 543 | "CREATE TEMP TABLE x(fn TEXT,url TEXT,snip TEXT);" |
| 544 | "INSERT INTO x(fn,url,snip)" |
| 545 | " SELECT filename, printf('%R/doc/trunk/%%s',filename)," |
| 546 | " snippet(content(uuid))" |
| 547 | " FROM foci" |
| 548 | " WHERE checkinID=symbolic_name_to_rid('trunk')" |
| 549 | " AND (filename GLOB '*.wiki' OR" |
| 550 | " filename GLOB '*.md' OR" |
| 551 | " filename GLOB '*.txt' OR" |
| 552 | " filename GLOB '*.html');" |
| 553 | ); |
| 554 | db_prepare(&q, "SELECT url, substr(snip,8)" |
| 555 | " FROM x WHERE snip IS NOT NULL" |
| 556 | " ORDER BY substr(snip,1,8) DESC, fn;"); |
| 557 | @ <ol> |
| 558 | while( db_step(&q)==SQLITE_ROW ){ |
| 559 | const char *zUrl = db_column_text(&q, 0); |
| 560 | const char *zSnippet = db_column_text(&q, 1); |
| 561 | @ <li><p>%s(href("%s",zUrl))%h(zUrl)</a><br>%s(zSnippet)</li> |
| 562 | } |
| 563 | db_finalize(&q); |
| 564 | @ </ol> |
| 565 | } |
| 566 | style_footer(); |
| 567 | } |
| 568 |
+1
| --- src/sqlcmd.c | ||
| +++ src/sqlcmd.c | ||
| @@ -134,10 +134,11 @@ | ||
| 134 | 134 | const void *notUsed |
| 135 | 135 | ){ |
| 136 | 136 | add_content_sql_commands(db); |
| 137 | 137 | db_add_aux_functions(db); |
| 138 | 138 | re_add_sql_func(db); |
| 139 | + search_sql_setup(db); | |
| 139 | 140 | g.zMainDbType = "repository"; |
| 140 | 141 | foci_register(db); |
| 141 | 142 | g.repositoryOpen = 1; |
| 142 | 143 | g.db = db; |
| 143 | 144 | return SQLITE_OK; |
| 144 | 145 |
| --- src/sqlcmd.c | |
| +++ src/sqlcmd.c | |
| @@ -134,10 +134,11 @@ | |
| 134 | const void *notUsed |
| 135 | ){ |
| 136 | add_content_sql_commands(db); |
| 137 | db_add_aux_functions(db); |
| 138 | re_add_sql_func(db); |
| 139 | g.zMainDbType = "repository"; |
| 140 | foci_register(db); |
| 141 | g.repositoryOpen = 1; |
| 142 | g.db = db; |
| 143 | return SQLITE_OK; |
| 144 |
| --- src/sqlcmd.c | |
| +++ src/sqlcmd.c | |
| @@ -134,10 +134,11 @@ | |
| 134 | const void *notUsed |
| 135 | ){ |
| 136 | add_content_sql_commands(db); |
| 137 | db_add_aux_functions(db); |
| 138 | re_add_sql_func(db); |
| 139 | search_sql_setup(db); |
| 140 | g.zMainDbType = "repository"; |
| 141 | foci_register(db); |
| 142 | g.repositoryOpen = 1; |
| 143 | g.db = db; |
| 144 | return SQLITE_OK; |
| 145 |