Fossil SCM

Initial work on the search_stext() function used to extract searchable text from formatted files.

drh 2015-01-31 22:13 trunk
Commit 48e1e18304a2ec18173e4985977a28f7b0a87c5f
2 files changed +40 -26 +92
+40 -26
--- src/doc.c
+++ src/doc.c
@@ -350,10 +350,46 @@
350350
int i;
351351
for(i=2; i<g.argc; i++){
352352
fossil_print("%-20s -> %s\n", g.argv[i], mimetype_from_name(g.argv[i]));
353353
}
354354
}
355
+
356
+/*
357
+** Look for a file named zName in the checkin with RID=vid. Load the content
358
+** of that file into pContent and return the RID for the file. Or return 0
359
+** if the file is not found or could not be loaded.
360
+*/
361
+int doc_load_content(int vid, const char *zName, Blob *pContent){
362
+ int rid; /* The RID of the file being loaded */
363
+ if( !db_table_exists("repository","vcache") ){
364
+ db_multi_exec(
365
+ "CREATE TABLE IF NOT EXISTS vcache(\n"
366
+ " vid INTEGER, -- checkin ID\n"
367
+ " fname TEXT, -- filename\n"
368
+ " rid INTEGER, -- artifact ID\n"
369
+ " PRIMARY KEY(vid,fname)\n"
370
+ ") WITHOUT ROWID"
371
+ );
372
+ }
373
+ if( !db_exists("SELECT 1 FROM vcache WHERE vid=%d", vid) ){
374
+ db_multi_exec(
375
+ "DELETE FROM vcache;\n"
376
+ "CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;\n"
377
+ "INSERT INTO vcache(vid,fname,rid)"
378
+ " SELECT checkinID, filename, blob.rid FROM foci, blob"
379
+ " WHERE blob.uuid=foci.uuid"
380
+ " AND foci.checkinID=%d;",
381
+ vid
382
+ );
383
+ }
384
+ rid = db_int(0, "SELECT rid FROM vcache"
385
+ " WHERE vid=%d AND fname=%Q", vid, zName);
386
+ if( rid && content_get(rid, pContent)==0 ){
387
+ rid = 0;
388
+ }
389
+ return rid;
390
+}
355391
356392
/*
357393
** WEBPAGE: doc
358394
** URL: /doc?name=CHECKIN/FILE
359395
** URL: /doc/CHECKIN/FILE
@@ -397,10 +433,11 @@
397433
"index.html", "index.wiki", "index.md"
398434
};
399435
400436
login_check_credentials();
401437
if( !g.perm.Read ){ login_needed(); return; }
438
+ db_begin_transaction();
402439
while( rid==0 && (++nMiss)<=ArraySize(azSuffix) ){
403440
zName = PD("name", "tip/index.wiki");
404441
for(i=0; zName[i] && zName[i]!='/'; i++){}
405442
zCheckin = mprintf("%.*s", i, zName);
406443
if( fossil_strcmp(zCheckin,"ckout")==0 && db_open_local(0)==0 ){
@@ -437,37 +474,12 @@
437474
&& blob_read_from_file(&filebody, zFullpath)>0 ){
438475
rid = 1; /* Fake RID just to get the loop to end */
439476
}
440477
fossil_free(zFullpath);
441478
}else{
442
- db_begin_transaction();
443479
vid = name_to_typed_rid(zCheckin, "ci");
444
- db_multi_exec(
445
- "CREATE TABLE IF NOT EXISTS vcache(\n"
446
- " vid INTEGER, -- checkin ID\n"
447
- " fname TEXT, -- filename\n"
448
- " rid INTEGER, -- artifact ID\n"
449
- " PRIMARY KEY(vid,fname)\n"
450
- ") WITHOUT ROWID"
451
- );
452
- if( !db_exists("SELECT 1 FROM vcache WHERE vid=%d", vid) ){
453
- db_multi_exec(
454
- "DELETE FROM vcache;\n"
455
- "CREATE VIRTUAL TABLE temp.foci USING files_of_checkin;\n"
456
- "INSERT INTO vcache(vid,fname,rid)"
457
- " SELECT checkinID, filename, blob.rid FROM foci, blob"
458
- " WHERE blob.uuid=foci.uuid"
459
- " AND foci.checkinID=%d;",
460
- vid
461
- );
462
- }
463
- rid = db_int(0, "SELECT rid FROM vcache"
464
- " WHERE vid=%d AND fname=%Q", vid, zName);
465
- if( rid==0 || content_get(rid, &filebody)==0 ){
466
- goto doc_not_found;
467
- }
468
- db_end_transaction(0);
480
+ rid = doc_load_content(vid, zName, &filebody);
469481
}
470482
}
471483
if( rid==0 ) goto doc_not_found;
472484
blob_to_utf8_no_bom(&filebody, 0);
473485
@@ -522,10 +534,11 @@
522534
}else{
523535
cgi_set_content_type(zMime);
524536
cgi_set_content(&filebody);
525537
}
526538
if( nMiss>=ArraySize(azSuffix) ) cgi_set_status(404, "Not Found");
539
+ db_end_transaction(0);
527540
return;
528541
529542
/* Jump here when unable to locate the document */
530543
doc_not_found:
531544
db_end_transaction(0);
@@ -534,10 +547,11 @@
534547
@ <p>Document %h(zOrigName) not found
535548
if( fossil_strcmp(zCheckin,"ckout")!=0 ){
536549
@ in %z(href("%R/tree?ci=%T",zCheckin))%h(zCheckin)</a>
537550
}
538551
style_footer();
552
+ db_end_transaction(0);
539553
return;
540554
}
541555
542556
/*
543557
** The default logo.
544558
--- src/doc.c
+++ src/doc.c
@@ -350,10 +350,46 @@
350 int i;
351 for(i=2; i<g.argc; i++){
352 fossil_print("%-20s -> %s\n", g.argv[i], mimetype_from_name(g.argv[i]));
353 }
354 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
356 /*
357 ** WEBPAGE: doc
358 ** URL: /doc?name=CHECKIN/FILE
359 ** URL: /doc/CHECKIN/FILE
@@ -397,10 +433,11 @@
397 "index.html", "index.wiki", "index.md"
398 };
399
400 login_check_credentials();
401 if( !g.perm.Read ){ login_needed(); return; }
 
402 while( rid==0 && (++nMiss)<=ArraySize(azSuffix) ){
403 zName = PD("name", "tip/index.wiki");
404 for(i=0; zName[i] && zName[i]!='/'; i++){}
405 zCheckin = mprintf("%.*s", i, zName);
406 if( fossil_strcmp(zCheckin,"ckout")==0 && db_open_local(0)==0 ){
@@ -437,37 +474,12 @@
437 && blob_read_from_file(&filebody, zFullpath)>0 ){
438 rid = 1; /* Fake RID just to get the loop to end */
439 }
440 fossil_free(zFullpath);
441 }else{
442 db_begin_transaction();
443 vid = name_to_typed_rid(zCheckin, "ci");
444 db_multi_exec(
445 "CREATE TABLE IF NOT EXISTS vcache(\n"
446 " vid INTEGER, -- checkin ID\n"
447 " fname TEXT, -- filename\n"
448 " rid INTEGER, -- artifact ID\n"
449 " PRIMARY KEY(vid,fname)\n"
450 ") WITHOUT ROWID"
451 );
452 if( !db_exists("SELECT 1 FROM vcache WHERE vid=%d", vid) ){
453 db_multi_exec(
454 "DELETE FROM vcache;\n"
455 "CREATE VIRTUAL TABLE temp.foci USING files_of_checkin;\n"
456 "INSERT INTO vcache(vid,fname,rid)"
457 " SELECT checkinID, filename, blob.rid FROM foci, blob"
458 " WHERE blob.uuid=foci.uuid"
459 " AND foci.checkinID=%d;",
460 vid
461 );
462 }
463 rid = db_int(0, "SELECT rid FROM vcache"
464 " WHERE vid=%d AND fname=%Q", vid, zName);
465 if( rid==0 || content_get(rid, &filebody)==0 ){
466 goto doc_not_found;
467 }
468 db_end_transaction(0);
469 }
470 }
471 if( rid==0 ) goto doc_not_found;
472 blob_to_utf8_no_bom(&filebody, 0);
473
@@ -522,10 +534,11 @@
522 }else{
523 cgi_set_content_type(zMime);
524 cgi_set_content(&filebody);
525 }
526 if( nMiss>=ArraySize(azSuffix) ) cgi_set_status(404, "Not Found");
 
527 return;
528
529 /* Jump here when unable to locate the document */
530 doc_not_found:
531 db_end_transaction(0);
@@ -534,10 +547,11 @@
534 @ <p>Document %h(zOrigName) not found
535 if( fossil_strcmp(zCheckin,"ckout")!=0 ){
536 @ in %z(href("%R/tree?ci=%T",zCheckin))%h(zCheckin)</a>
537 }
538 style_footer();
 
539 return;
540 }
541
542 /*
543 ** The default logo.
544
--- src/doc.c
+++ src/doc.c
@@ -350,10 +350,46 @@
350 int i;
351 for(i=2; i<g.argc; i++){
352 fossil_print("%-20s -> %s\n", g.argv[i], mimetype_from_name(g.argv[i]));
353 }
354 }
355
356 /*
357 ** Look for a file named zName in the checkin with RID=vid. Load the content
358 ** of that file into pContent and return the RID for the file. Or return 0
359 ** if the file is not found or could not be loaded.
360 */
361 int doc_load_content(int vid, const char *zName, Blob *pContent){
362 int rid; /* The RID of the file being loaded */
363 if( !db_table_exists("repository","vcache") ){
364 db_multi_exec(
365 "CREATE TABLE IF NOT EXISTS vcache(\n"
366 " vid INTEGER, -- checkin ID\n"
367 " fname TEXT, -- filename\n"
368 " rid INTEGER, -- artifact ID\n"
369 " PRIMARY KEY(vid,fname)\n"
370 ") WITHOUT ROWID"
371 );
372 }
373 if( !db_exists("SELECT 1 FROM vcache WHERE vid=%d", vid) ){
374 db_multi_exec(
375 "DELETE FROM vcache;\n"
376 "CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;\n"
377 "INSERT INTO vcache(vid,fname,rid)"
378 " SELECT checkinID, filename, blob.rid FROM foci, blob"
379 " WHERE blob.uuid=foci.uuid"
380 " AND foci.checkinID=%d;",
381 vid
382 );
383 }
384 rid = db_int(0, "SELECT rid FROM vcache"
385 " WHERE vid=%d AND fname=%Q", vid, zName);
386 if( rid && content_get(rid, pContent)==0 ){
387 rid = 0;
388 }
389 return rid;
390 }
391
392 /*
393 ** WEBPAGE: doc
394 ** URL: /doc?name=CHECKIN/FILE
395 ** URL: /doc/CHECKIN/FILE
@@ -397,10 +433,11 @@
433 "index.html", "index.wiki", "index.md"
434 };
435
436 login_check_credentials();
437 if( !g.perm.Read ){ login_needed(); return; }
438 db_begin_transaction();
439 while( rid==0 && (++nMiss)<=ArraySize(azSuffix) ){
440 zName = PD("name", "tip/index.wiki");
441 for(i=0; zName[i] && zName[i]!='/'; i++){}
442 zCheckin = mprintf("%.*s", i, zName);
443 if( fossil_strcmp(zCheckin,"ckout")==0 && db_open_local(0)==0 ){
@@ -437,37 +474,12 @@
474 && blob_read_from_file(&filebody, zFullpath)>0 ){
475 rid = 1; /* Fake RID just to get the loop to end */
476 }
477 fossil_free(zFullpath);
478 }else{
 
479 vid = name_to_typed_rid(zCheckin, "ci");
480 rid = doc_load_content(vid, zName, &filebody);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481 }
482 }
483 if( rid==0 ) goto doc_not_found;
484 blob_to_utf8_no_bom(&filebody, 0);
485
@@ -522,10 +534,11 @@
534 }else{
535 cgi_set_content_type(zMime);
536 cgi_set_content(&filebody);
537 }
538 if( nMiss>=ArraySize(azSuffix) ) cgi_set_status(404, "Not Found");
539 db_end_transaction(0);
540 return;
541
542 /* Jump here when unable to locate the document */
543 doc_not_found:
544 db_end_transaction(0);
@@ -534,10 +547,11 @@
547 @ <p>Document %h(zOrigName) not found
548 if( fossil_strcmp(zCheckin,"ckout")!=0 ){
549 @ in %z(href("%R/tree?ci=%T",zCheckin))%h(zCheckin)</a>
550 }
551 style_footer();
552 db_end_transaction(0);
553 return;
554 }
555
556 /*
557 ** The default logo.
558
+92
--- src/search.c
+++ src/search.c
@@ -563,5 +563,97 @@
563563
db_finalize(&q);
564564
@ </ol>
565565
}
566566
style_footer();
567567
}
568
+
569
+
570
+/*
571
+** This is a helper function for search_stext(). Writing into pOut
572
+** the search text obtained from pIn according to zMimetype.
573
+*/
574
+static void get_stext_by_mimetype(
575
+ Blob *pIn,
576
+ const char *zMimetype,
577
+ Blob *pOut
578
+){
579
+ Blob html, title;
580
+ blob_init(&html, 0, 0);
581
+ blob_init(&title, 0, 0);
582
+ if( zMimetype==0 ) zMimetype = "text/plain";
583
+ if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){
584
+ wiki_convert(pIn, &html, 0);
585
+ html_to_plaintext(blob_str(&html), pOut);
586
+ }else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){
587
+ markdown_to_html(pIn, &title, &html);
588
+ html_to_plaintext(blob_str(&html), pOut);
589
+ }else if( fossil_strcmp(zMimetype,"text/html")==0 ){
590
+ html_to_plaintext(blob_str(pIn), pOut);
591
+ }else{
592
+ *pOut = *pIn;
593
+ blob_init(pIn, 0, 0);
594
+ }
595
+ blob_reset(&html);
596
+ blob_reset(&title);
597
+}
598
+
599
+/*
600
+** Return "search text" - a reduced version of a document appropriate for
601
+** full text search and/or for constructing a search result snippet.
602
+**
603
+** cType: d Embedded documentation
604
+** s Source code listing
605
+** w Wiki page
606
+** c Check-in comment
607
+** t Ticket text
608
+** e Event/Blog text
609
+** k Diff of a wiki
610
+** f Diff of a checkin
611
+**
612
+** zArg1, zArg2: Description of the document, depending on cType.
613
+*/
614
+void search_stext(
615
+ char cType, /* Type of document */
616
+ const char *zArg1, /* First parameter */
617
+ const char *zArg2, /* Second parameter */
618
+ Blob *pOut /* OUT: Initialize to the search text */
619
+){
620
+ blob_init(pOut, 0, 0);
621
+ switch( cType ){
622
+ case 'd': /* Doc. zArg1: RID of the file. zArg2: Filename */
623
+ case 's': { /* Source. zArg1: RID of the file. zArg2: Filename */
624
+ int rid = atoi(zArg1);
625
+ Blob doc;
626
+ content_get(rid, &doc);
627
+ blob_to_utf8_no_bom(&doc, 0);
628
+ get_stext_by_mimetype(&doc, mimetype_from_name(zArg2), pOut);
629
+ blob_reset(&doc);
630
+ break;
631
+ }
632
+ case 'w': { /* Wiki. zArg1: RID of the page. zArg2: Page name */
633
+ int rid = atoi(zArg1);
634
+ Manifest *pWiki = manifest_get(rid, CFTYPE_WIKI,0);
635
+ Blob wiki;
636
+ if( pWiki==0 ) break;
637
+ blob_init(&wiki, pWiki->zWiki, -1);
638
+ get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype),
639
+ pOut);
640
+ blob_reset(&wiki);
641
+ manifest_destroy(pWiki);
642
+ break;
643
+ }
644
+ }
645
+}
646
+
647
+/*
648
+** COMMAND: test-search-stext
649
+**
650
+** Usage: fossil test-search-stext TYPE ARG1 ARG2
651
+*/
652
+void test_search_stext(void){
653
+ Blob out;
654
+ db_find_and_open_repository(0,0);
655
+ if( g.argc!=5 ) usage("TYPE ARG1 ARG2");
656
+ search_stext(g.argv[2][0], g.argv[3], g.argv[4], &out);
657
+ fossil_print("%s",blob_str(&out));
658
+ blob_reset(&out);
659
+}
568660
--- src/search.c
+++ src/search.c
@@ -563,5 +563,97 @@
563 db_finalize(&q);
564 @ </ol>
565 }
566 style_footer();
567 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
--- src/search.c
+++ src/search.c
@@ -563,5 +563,97 @@
563 db_finalize(&q);
564 @ </ol>
565 }
566 style_footer();
567 }
568
569
570 /*
571 ** This is a helper function for search_stext(). Writing into pOut
572 ** the search text obtained from pIn according to zMimetype.
573 */
574 static void get_stext_by_mimetype(
575 Blob *pIn,
576 const char *zMimetype,
577 Blob *pOut
578 ){
579 Blob html, title;
580 blob_init(&html, 0, 0);
581 blob_init(&title, 0, 0);
582 if( zMimetype==0 ) zMimetype = "text/plain";
583 if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){
584 wiki_convert(pIn, &html, 0);
585 html_to_plaintext(blob_str(&html), pOut);
586 }else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){
587 markdown_to_html(pIn, &title, &html);
588 html_to_plaintext(blob_str(&html), pOut);
589 }else if( fossil_strcmp(zMimetype,"text/html")==0 ){
590 html_to_plaintext(blob_str(pIn), pOut);
591 }else{
592 *pOut = *pIn;
593 blob_init(pIn, 0, 0);
594 }
595 blob_reset(&html);
596 blob_reset(&title);
597 }
598
599 /*
600 ** Return "search text" - a reduced version of a document appropriate for
601 ** full text search and/or for constructing a search result snippet.
602 **
603 ** cType: d Embedded documentation
604 ** s Source code listing
605 ** w Wiki page
606 ** c Check-in comment
607 ** t Ticket text
608 ** e Event/Blog text
609 ** k Diff of a wiki
610 ** f Diff of a checkin
611 **
612 ** zArg1, zArg2: Description of the document, depending on cType.
613 */
614 void search_stext(
615 char cType, /* Type of document */
616 const char *zArg1, /* First parameter */
617 const char *zArg2, /* Second parameter */
618 Blob *pOut /* OUT: Initialize to the search text */
619 ){
620 blob_init(pOut, 0, 0);
621 switch( cType ){
622 case 'd': /* Doc. zArg1: RID of the file. zArg2: Filename */
623 case 's': { /* Source. zArg1: RID of the file. zArg2: Filename */
624 int rid = atoi(zArg1);
625 Blob doc;
626 content_get(rid, &doc);
627 blob_to_utf8_no_bom(&doc, 0);
628 get_stext_by_mimetype(&doc, mimetype_from_name(zArg2), pOut);
629 blob_reset(&doc);
630 break;
631 }
632 case 'w': { /* Wiki. zArg1: RID of the page. zArg2: Page name */
633 int rid = atoi(zArg1);
634 Manifest *pWiki = manifest_get(rid, CFTYPE_WIKI,0);
635 Blob wiki;
636 if( pWiki==0 ) break;
637 blob_init(&wiki, pWiki->zWiki, -1);
638 get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype),
639 pOut);
640 blob_reset(&wiki);
641 manifest_destroy(pWiki);
642 break;
643 }
644 }
645 }
646
647 /*
648 ** COMMAND: test-search-stext
649 **
650 ** Usage: fossil test-search-stext TYPE ARG1 ARG2
651 */
652 void test_search_stext(void){
653 Blob out;
654 db_find_and_open_repository(0,0);
655 if( g.argc!=5 ) usage("TYPE ARG1 ARG2");
656 search_stext(g.argv[2][0], g.argv[3], g.argv[4], &out);
657 fossil_print("%s",blob_str(&out));
658 blob_reset(&out);
659 }
660

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button