Fossil SCM

Enhance the diff logic so that it can suppress differences that do not match a regular expression. This capability is so far only exposed on the test-diff command.

drh 2013-01-02 00:58 trunk
Commit 97a7b924c0e101782831fe7a011ad30a8380e800
+128 -35
--- src/diff.c
+++ src/diff.c
@@ -412,24 +412,45 @@
412412
** Return true if two DLine elements are identical.
413413
*/
414414
static int same_dline(DLine *pA, DLine *pB){
415415
return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
416416
}
417
+
418
+/*
419
+** Return true if the regular expression *pRe matches any of the
420
+** N dlines
421
+*/
422
+static int re_dline_match(
423
+ ReCompiled *pRe, /* The regular expression to be matched */
424
+ DLine *aDLine, /* First of N DLines to compare against */
425
+ int N /* Number of DLines to check */
426
+){
427
+ while( N-- ){
428
+ if( re_exec(pRe, aDLine->z, LENGTH(aDLine)) ){
429
+ return 1;
430
+ }
431
+ aDLine++;
432
+ }
433
+ return 0;
434
+}
417435
418436
/*
419437
** Append a single line of context-diff output to pOut.
420438
*/
421439
static void appendDiffLine(
422440
Blob *pOut, /* Where to write the line of output */
423441
char cPrefix, /* One of " ", "+", or "-" */
424442
DLine *pLine, /* The line to be output */
425
- int html /* True if generating HTML. False for plain text */
443
+ int html, /* True if generating HTML. False for plain text */
444
+ ReCompiled *pRe /* Colorize only if line matches this Regex */
426445
){
427446
blob_append(pOut, &cPrefix, 1);
428447
if( html ){
429448
char *zHtml;
430
- if( cPrefix=='+' ){
449
+ if( pRe && re_dline_match(pRe, pLine, 1)==0 ){
450
+ cPrefix = ' ';
451
+ }else if( cPrefix=='+' ){
431452
blob_append(pOut, "<span class=\"diffadd\">", -1);
432453
}else if( cPrefix=='-' ){
433454
blob_append(pOut, "<span class=\"diffrm\">", -1);
434455
}
435456
zHtml = htmlize(pLine->z, (pLine->h & LENGTH_MASK));
@@ -463,21 +484,19 @@
463484
blob_append(pOut, " ", 8);
464485
}
465486
if( html ) blob_append(pOut, "</span>", -1);
466487
}
467488
468
-
469489
/*
470490
** Given a raw diff p[] in which the p->aEdit[] array has been filled
471491
** in, compute a context diff into pOut.
472492
*/
473493
static void contextDiff(
474494
DContext *p, /* The difference */
475495
Blob *pOut, /* Output a context diff to here */
476
- int nContext, /* Number of lines of context */
477
- int showLn, /* Show line numbers */
478
- int html /* Render as HTML */
496
+ ReCompiled *pRe, /* Only show changes that match this regex */
497
+ u64 diffFlags /* Flags controlling the diff format */
479498
){
480499
DLine *A; /* Left side of the diff */
481500
DLine *B; /* Right side of the diff */
482501
int a = 0; /* Index of next line in A[] */
483502
int b = 0; /* Index of next line in B[] */
@@ -488,11 +507,18 @@
488507
int na, nb; /* Number of lines shown from A and B */
489508
int i, j; /* Loop counters */
490509
int m; /* Number of lines to output */
491510
int skip; /* Number of lines to skip */
492511
int nChunk = 0; /* Number of diff chunks seen so far */
512
+ int nContext; /* Number of lines of context */
513
+ int showLn; /* Show line numbers */
514
+ int html; /* Render as HTML */
515
+ int showDivider = 0; /* True to show the divider between diff blocks */
493516
517
+ nContext = diff_context_lines(diffFlags);
518
+ showLn = (diffFlags & DIFF_LINENO)!=0;
519
+ html = (diffFlags & DIFF_HTML)!=0;
494520
A = p->aFrom;
495521
B = p->aTo;
496522
R = p->aEdit;
497523
mxr = p->nEdit;
498524
while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
@@ -499,10 +525,35 @@
499525
for(r=0; r<mxr; r += 3*nr){
500526
/* Figure out how many triples to show in a single block */
501527
for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
502528
/* printf("r=%d nr=%d\n", r, nr); */
503529
530
+ /* If there is a regex, skip this block (generate no diff output)
531
+ ** if the regex matches or does not match both insert and delete.
532
+ ** Only display the block if one side matches but the other side does
533
+ ** not.
534
+ */
535
+ if( pRe ){
536
+ int hideBlock = 1;
537
+ int xa = a, xb = b;
538
+ for(i=0; hideBlock && i<nr; i++){
539
+ int c1, c2;
540
+ xa += R[r+i*3];
541
+ xb += R[r+i*3];
542
+ c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
543
+ c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
544
+ hideBlock = c1==c2;
545
+ xa += R[r+i*3+1];
546
+ xb += R[r+i*3+2];
547
+ }
548
+ if( hideBlock ){
549
+ a = xa;
550
+ b = xb;
551
+ continue;
552
+ }
553
+ }
554
+
504555
/* For the current block comprising nr triples, figure out
505556
** how many lines of A and B are to be displayed
506557
*/
507558
if( R[r]>nContext ){
508559
na = nb = nContext;
@@ -531,12 +582,13 @@
531582
** context diff that contains line numbers, show the separator from
532583
** the previous block.
533584
*/
534585
nChunk++;
535586
if( showLn ){
536
- if( r==0 ){
587
+ if( !showDivider ){
537588
/* Do not show a top divider */
589
+ showDivider = 1;
538590
}else if( html ){
539591
blob_appendf(pOut, "<span class=\"diffhr\">%.80c</span>\n", '.');
540592
blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
541593
}else{
542594
blob_appendf(pOut, "%.80c\n", '.');
@@ -559,34 +611,36 @@
559611
a += skip;
560612
b += skip;
561613
m = R[r] - skip;
562614
for(j=0; j<m; j++){
563615
if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
564
- appendDiffLine(pOut, ' ', &A[a+j], html);
616
+ appendDiffLine(pOut, ' ', &A[a+j], html, 0);
565617
}
566618
a += m;
567619
b += m;
568620
569621
/* Show the differences */
570622
for(i=0; i<nr; i++){
571623
m = R[r+i*3+1];
572624
for(j=0; j<m; j++){
625
+ char cMark = '-';
573626
if( showLn ) appendDiffLineno(pOut, a+j+1, 0, html);
574
- appendDiffLine(pOut, '-', &A[a+j], html);
627
+ if( pRe && re_dline_match(pRe, &A[a+j], 1)==0 ) cMark = ' ';
628
+ appendDiffLine(pOut, '-', &A[a+j], html, pRe);
575629
}
576630
a += m;
577631
m = R[r+i*3+2];
578632
for(j=0; j<m; j++){
579633
if( showLn ) appendDiffLineno(pOut, 0, b+j+1, html);
580
- appendDiffLine(pOut, '+', &B[b+j], html);
634
+ appendDiffLine(pOut, '+', &B[b+j], html, pRe);
581635
}
582636
b += m;
583637
if( i<nr-1 ){
584638
m = R[r+i*3+3];
585639
for(j=0; j<m; j++){
586640
if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
587
- appendDiffLine(pOut, ' ', &B[b+j], html);
641
+ appendDiffLine(pOut, ' ', &B[b+j], html, 0);
588642
}
589643
b += m;
590644
a += m;
591645
}
592646
}
@@ -595,11 +649,11 @@
595649
assert( nr==i );
596650
m = R[r+nr*3];
597651
if( m>nContext ) m = nContext;
598652
for(j=0; j<m; j++){
599653
if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
600
- appendDiffLine(pOut, ' ', &B[b+j], html);
654
+ appendDiffLine(pOut, ' ', &B[b+j], html, 0);
601655
}
602656
}
603657
}
604658
605659
/*
@@ -615,10 +669,11 @@
615669
const char *zStart; /* A <span> tag */
616670
int iEnd; /* Write </span> prior to character iEnd */
617671
int iStart2; /* Write zStart2 prior to character iStart2 */
618672
const char *zStart2; /* A <span> tag */
619673
int iEnd2; /* Write </span> prior to character iEnd2 */
674
+ ReCompiled *pRe; /* Only colorize matching lines, if not NULL */
620675
};
621676
622677
/*
623678
** Flags for sbsWriteText()
624679
*/
@@ -640,13 +695,17 @@
640695
int k; /* Cursor position */
641696
int needEndSpan = 0;
642697
const char *zIn = pLine->z;
643698
char *z = &p->zLine[p->n];
644699
int w = p->width;
700
+ int colorize = p->escHtml;
701
+ if( colorize && p->pRe && re_dline_match(p->pRe, pLine, 1)==0 ){
702
+ colorize = 0;
703
+ }
645704
for(i=j=k=0; k<w && i<n; i++, k++){
646705
char c = zIn[i];
647
- if( p->escHtml ){
706
+ if( colorize ){
648707
if( i==p->iStart ){
649708
int x = strlen(p->zStart);
650709
memcpy(z+j, p->zStart, x);
651710
j += x;
652711
needEndSpan = 1;
@@ -1196,13 +1255,12 @@
11961255
** in, compute a side-by-side diff into pOut.
11971256
*/
11981257
static void sbsDiff(
11991258
DContext *p, /* The computed diff */
12001259
Blob *pOut, /* Write the results here */
1201
- int nContext, /* Number of lines of context around each change */
1202
- int width, /* Width of each column of output */
1203
- int escHtml /* True to generate HTML output */
1260
+ ReCompiled *pRe, /* Only show changes that match this regex */
1261
+ u64 diffFlags /* Flags controlling the diff */
12041262
){
12051263
DLine *A; /* Left side of the diff */
12061264
DLine *B; /* Right side of the diff */
12071265
int a = 0; /* Index of next line in A[] */
12081266
int b = 0; /* Index of next line in B[] */
@@ -1214,16 +1272,20 @@
12141272
int i, j; /* Loop counters */
12151273
int m, ma, mb;/* Number of lines to output */
12161274
int skip; /* Number of lines to skip */
12171275
int nChunk = 0; /* Number of chunks of diff output seen so far */
12181276
SbsLine s; /* Output line buffer */
1277
+ int nContext; /* Lines of context above and below each change */
1278
+ int showDivider = 0; /* True to show the divider */
12191279
12201280
memset(&s, 0, sizeof(s));
1221
- s.zLine = fossil_malloc( 15*width + 200 );
1281
+ s.width = diff_width(diffFlags);
1282
+ s.zLine = fossil_malloc( 15*s.width + 200 );
12221283
if( s.zLine==0 ) return;
1223
- s.width = width;
1224
- s.escHtml = escHtml;
1284
+ nContext = diff_context_lines(diffFlags);
1285
+ s.escHtml = (diffFlags & DIFF_HTML)!=0;
1286
+ s.pRe = pRe;
12251287
s.iStart = -1;
12261288
s.iStart2 = 0;
12271289
s.iEnd = -1;
12281290
A = p->aFrom;
12291291
B = p->aTo;
@@ -1232,10 +1294,35 @@
12321294
while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
12331295
for(r=0; r<mxr; r += 3*nr){
12341296
/* Figure out how many triples to show in a single block */
12351297
for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
12361298
/* printf("r=%d nr=%d\n", r, nr); */
1299
+
1300
+ /* If there is a regex, skip this block (generate no diff output)
1301
+ ** if the regex matches or does not match both insert and delete.
1302
+ ** Only display the block if one side matches but the other side does
1303
+ ** not.
1304
+ */
1305
+ if( pRe ){
1306
+ int hideBlock = 1;
1307
+ int xa = a, xb = b;
1308
+ for(i=0; hideBlock && i<nr; i++){
1309
+ int c1, c2;
1310
+ xa += R[r+i*3];
1311
+ xb += R[r+i*3];
1312
+ c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
1313
+ c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
1314
+ hideBlock = c1==c2;
1315
+ xa += R[r+i*3+1];
1316
+ xb += R[r+i*3+2];
1317
+ }
1318
+ if( hideBlock ){
1319
+ a = xa;
1320
+ b = xb;
1321
+ continue;
1322
+ }
1323
+ }
12371324
12381325
/* For the current block comprising nr triples, figure out
12391326
** how many lines of A and B are to be displayed
12401327
*/
12411328
if( R[r]>nContext ){
@@ -1260,20 +1347,21 @@
12601347
na += R[r+i*3];
12611348
nb += R[r+i*3];
12621349
}
12631350
12641351
/* Draw the separator between blocks */
1265
- if( r>0 ){
1266
- if( escHtml ){
1352
+ if( showDivider ){
1353
+ if( s.escHtml ){
12671354
blob_appendf(pOut, "<span class=\"diffhr\">%.*c</span>\n",
1268
- width*2+16, '.');
1355
+ s.width*2+16, '.');
12691356
}else{
1270
- blob_appendf(pOut, "%.*c\n", width*2+16, '.');
1357
+ blob_appendf(pOut, "%.*c\n", s.width*2+16, '.');
12711358
}
12721359
}
1360
+ showDivider = 1;
12731361
nChunk++;
1274
- if( escHtml ){
1362
+ if( s.escHtml ){
12751363
blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
12761364
}
12771365
12781366
/* Show the initial common area */
12791367
a += skip;
@@ -1316,11 +1404,11 @@
13161404
sbsWriteLineno(&s, a);
13171405
s.iStart = 0;
13181406
s.zStart = "<span class=\"diffrm\">";
13191407
s.iEnd = s.width;
13201408
sbsWriteText(&s, &A[a], SBS_PAD);
1321
- if( escHtml ){
1409
+ if( s.escHtml ){
13221410
sbsWrite(&s, " &lt;\n", 6);
13231411
}else{
13241412
sbsWrite(&s, " <\n", 3);
13251413
}
13261414
blob_append(pOut, s.zLine, s.n);
@@ -1338,12 +1426,12 @@
13381426
a++;
13391427
b++;
13401428
}else if( alignment[j]==2 ){
13411429
/* Insert one line on the right */
13421430
s.n = 0;
1343
- sbsWriteSpace(&s, width + 7);
1344
- if( escHtml ){
1431
+ sbsWriteSpace(&s, s.width + 7);
1432
+ if( s.escHtml ){
13451433
sbsWrite(&s, " &gt; ", 6);
13461434
}else{
13471435
sbsWrite(&s, " > ", 3);
13481436
}
13491437
sbsWriteLineno(&s, b);
@@ -1833,10 +1921,11 @@
18331921
*/
18341922
int *text_diff(
18351923
Blob *pA_Blob, /* FROM file */
18361924
Blob *pB_Blob, /* TO file */
18371925
Blob *pOut, /* Write diff here if not NULL */
1926
+ ReCompiled *pRe, /* Only output changes where this Regexp matches */
18381927
u64 diffFlags /* DIFF_* flags defined above */
18391928
){
18401929
int ignoreEolWs; /* Ignore whitespace at the end of lines */
18411930
int nContext; /* Amount of context to display */
18421931
DContext c;
@@ -1844,11 +1933,10 @@
18441933
if( diffFlags & DIFF_INVERT ){
18451934
Blob *pTemp = pA_Blob;
18461935
pA_Blob = pB_Blob;
18471936
pB_Blob = pTemp;
18481937
}
1849
- nContext = diff_context_lines(diffFlags);
18501938
ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
18511939
18521940
/* Prepare the input files */
18531941
memset(&c, 0, sizeof(c));
18541942
c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
@@ -1868,17 +1956,14 @@
18681956
diff_all(&c);
18691957
if( (diffFlags & DIFF_NOOPT)==0 ) diff_optimize(&c);
18701958
18711959
if( pOut ){
18721960
/* Compute a context or side-by-side diff into pOut */
1873
- int escHtml = (diffFlags & DIFF_HTML)!=0;
18741961
if( diffFlags & DIFF_SIDEBYSIDE ){
1875
- int width = diff_width(diffFlags);
1876
- sbsDiff(&c, pOut, nContext, width, escHtml);
1962
+ sbsDiff(&c, pOut, pRe, diffFlags);
18771963
}else{
1878
- int showLn = (diffFlags & DIFF_LINENO)!=0;
1879
- contextDiff(&c, pOut, nContext, showLn, escHtml);
1964
+ contextDiff(&c, pOut, pRe, diffFlags);
18801965
}
18811966
fossil_free(c.aFrom);
18821967
fossil_free(c.aTo);
18831968
fossil_free(c.aEdit);
18841969
return 0;
@@ -1941,11 +2026,11 @@
19412026
if( g.argc<4 ) usage("FILE1 FILE2 ...");
19422027
blob_read_from_file(&a, g.argv[2]);
19432028
for(i=3; i<g.argc; i++){
19442029
if( i>3 ) fossil_print("-------------------------------\n");
19452030
blob_read_from_file(&b, g.argv[i]);
1946
- R = text_diff(&a, &b, 0, diffFlags);
2031
+ R = text_diff(&a, &b, 0, 0, diffFlags);
19472032
for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
19482033
fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
19492034
}
19502035
/* free(R); */
19512036
blob_reset(&b);
@@ -1960,25 +2045,33 @@
19602045
** Print the difference between two files. The usual diff options apply.
19612046
*/
19622047
void test_diff_cmd(void){
19632048
Blob a, b, out;
19642049
u64 diffFlag;
2050
+ const char *zRe; /* Regex filter for diff output */
2051
+ ReCompiled *pRe = 0; /* Regex filter for diff output */
19652052
19662053
if( find_option("tk",0,0)!=0 ){
19672054
diff_tk("test-diff", 2);
19682055
return;
19692056
}
19702057
find_option("i",0,0);
2058
+ zRe = find_option("regexp","e",1);
2059
+ if( zRe ){
2060
+ const char *zErr = re_compile(&pRe, zRe, 0);
2061
+ if( zErr ) fossil_fatal("regex error: %s", zErr);
2062
+ }
19712063
diffFlag = diff_options();
19722064
verify_all_options();
19732065
if( g.argc!=4 ) usage("FILE1 FILE2");
19742066
diff_print_filenames(g.argv[2], g.argv[3], diffFlag);
19752067
blob_read_from_file(&a, g.argv[2]);
19762068
blob_read_from_file(&b, g.argv[3]);
19772069
blob_zero(&out);
1978
- text_diff(&a, &b, &out, diffFlag);
2070
+ text_diff(&a, &b, &out, pRe, diffFlag);
19792071
blob_write_to_file(&out, "-");
2072
+ re_free(pRe);
19802073
}
19812074
19822075
/**************************************************************************
19832076
** The basic difference engine is above. What follows is the annotation
19842077
** engine. Both are in the same file since they share many components.
19852078
--- src/diff.c
+++ src/diff.c
@@ -412,24 +412,45 @@
412 ** Return true if two DLine elements are identical.
413 */
414 static int same_dline(DLine *pA, DLine *pB){
415 return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
416 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
418 /*
419 ** Append a single line of context-diff output to pOut.
420 */
421 static void appendDiffLine(
422 Blob *pOut, /* Where to write the line of output */
423 char cPrefix, /* One of " ", "+", or "-" */
424 DLine *pLine, /* The line to be output */
425 int html /* True if generating HTML. False for plain text */
 
426 ){
427 blob_append(pOut, &cPrefix, 1);
428 if( html ){
429 char *zHtml;
430 if( cPrefix=='+' ){
 
 
431 blob_append(pOut, "<span class=\"diffadd\">", -1);
432 }else if( cPrefix=='-' ){
433 blob_append(pOut, "<span class=\"diffrm\">", -1);
434 }
435 zHtml = htmlize(pLine->z, (pLine->h & LENGTH_MASK));
@@ -463,21 +484,19 @@
463 blob_append(pOut, " ", 8);
464 }
465 if( html ) blob_append(pOut, "</span>", -1);
466 }
467
468
469 /*
470 ** Given a raw diff p[] in which the p->aEdit[] array has been filled
471 ** in, compute a context diff into pOut.
472 */
473 static void contextDiff(
474 DContext *p, /* The difference */
475 Blob *pOut, /* Output a context diff to here */
476 int nContext, /* Number of lines of context */
477 int showLn, /* Show line numbers */
478 int html /* Render as HTML */
479 ){
480 DLine *A; /* Left side of the diff */
481 DLine *B; /* Right side of the diff */
482 int a = 0; /* Index of next line in A[] */
483 int b = 0; /* Index of next line in B[] */
@@ -488,11 +507,18 @@
488 int na, nb; /* Number of lines shown from A and B */
489 int i, j; /* Loop counters */
490 int m; /* Number of lines to output */
491 int skip; /* Number of lines to skip */
492 int nChunk = 0; /* Number of diff chunks seen so far */
 
 
 
 
493
 
 
 
494 A = p->aFrom;
495 B = p->aTo;
496 R = p->aEdit;
497 mxr = p->nEdit;
498 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
@@ -499,10 +525,35 @@
499 for(r=0; r<mxr; r += 3*nr){
500 /* Figure out how many triples to show in a single block */
501 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
502 /* printf("r=%d nr=%d\n", r, nr); */
503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504 /* For the current block comprising nr triples, figure out
505 ** how many lines of A and B are to be displayed
506 */
507 if( R[r]>nContext ){
508 na = nb = nContext;
@@ -531,12 +582,13 @@
531 ** context diff that contains line numbers, show the separator from
532 ** the previous block.
533 */
534 nChunk++;
535 if( showLn ){
536 if( r==0 ){
537 /* Do not show a top divider */
 
538 }else if( html ){
539 blob_appendf(pOut, "<span class=\"diffhr\">%.80c</span>\n", '.');
540 blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
541 }else{
542 blob_appendf(pOut, "%.80c\n", '.');
@@ -559,34 +611,36 @@
559 a += skip;
560 b += skip;
561 m = R[r] - skip;
562 for(j=0; j<m; j++){
563 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
564 appendDiffLine(pOut, ' ', &A[a+j], html);
565 }
566 a += m;
567 b += m;
568
569 /* Show the differences */
570 for(i=0; i<nr; i++){
571 m = R[r+i*3+1];
572 for(j=0; j<m; j++){
 
573 if( showLn ) appendDiffLineno(pOut, a+j+1, 0, html);
574 appendDiffLine(pOut, '-', &A[a+j], html);
 
575 }
576 a += m;
577 m = R[r+i*3+2];
578 for(j=0; j<m; j++){
579 if( showLn ) appendDiffLineno(pOut, 0, b+j+1, html);
580 appendDiffLine(pOut, '+', &B[b+j], html);
581 }
582 b += m;
583 if( i<nr-1 ){
584 m = R[r+i*3+3];
585 for(j=0; j<m; j++){
586 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
587 appendDiffLine(pOut, ' ', &B[b+j], html);
588 }
589 b += m;
590 a += m;
591 }
592 }
@@ -595,11 +649,11 @@
595 assert( nr==i );
596 m = R[r+nr*3];
597 if( m>nContext ) m = nContext;
598 for(j=0; j<m; j++){
599 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
600 appendDiffLine(pOut, ' ', &B[b+j], html);
601 }
602 }
603 }
604
605 /*
@@ -615,10 +669,11 @@
615 const char *zStart; /* A <span> tag */
616 int iEnd; /* Write </span> prior to character iEnd */
617 int iStart2; /* Write zStart2 prior to character iStart2 */
618 const char *zStart2; /* A <span> tag */
619 int iEnd2; /* Write </span> prior to character iEnd2 */
 
620 };
621
622 /*
623 ** Flags for sbsWriteText()
624 */
@@ -640,13 +695,17 @@
640 int k; /* Cursor position */
641 int needEndSpan = 0;
642 const char *zIn = pLine->z;
643 char *z = &p->zLine[p->n];
644 int w = p->width;
 
 
 
 
645 for(i=j=k=0; k<w && i<n; i++, k++){
646 char c = zIn[i];
647 if( p->escHtml ){
648 if( i==p->iStart ){
649 int x = strlen(p->zStart);
650 memcpy(z+j, p->zStart, x);
651 j += x;
652 needEndSpan = 1;
@@ -1196,13 +1255,12 @@
1196 ** in, compute a side-by-side diff into pOut.
1197 */
1198 static void sbsDiff(
1199 DContext *p, /* The computed diff */
1200 Blob *pOut, /* Write the results here */
1201 int nContext, /* Number of lines of context around each change */
1202 int width, /* Width of each column of output */
1203 int escHtml /* True to generate HTML output */
1204 ){
1205 DLine *A; /* Left side of the diff */
1206 DLine *B; /* Right side of the diff */
1207 int a = 0; /* Index of next line in A[] */
1208 int b = 0; /* Index of next line in B[] */
@@ -1214,16 +1272,20 @@
1214 int i, j; /* Loop counters */
1215 int m, ma, mb;/* Number of lines to output */
1216 int skip; /* Number of lines to skip */
1217 int nChunk = 0; /* Number of chunks of diff output seen so far */
1218 SbsLine s; /* Output line buffer */
 
 
1219
1220 memset(&s, 0, sizeof(s));
1221 s.zLine = fossil_malloc( 15*width + 200 );
 
1222 if( s.zLine==0 ) return;
1223 s.width = width;
1224 s.escHtml = escHtml;
 
1225 s.iStart = -1;
1226 s.iStart2 = 0;
1227 s.iEnd = -1;
1228 A = p->aFrom;
1229 B = p->aTo;
@@ -1232,10 +1294,35 @@
1232 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
1233 for(r=0; r<mxr; r += 3*nr){
1234 /* Figure out how many triples to show in a single block */
1235 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
1236 /* printf("r=%d nr=%d\n", r, nr); */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1237
1238 /* For the current block comprising nr triples, figure out
1239 ** how many lines of A and B are to be displayed
1240 */
1241 if( R[r]>nContext ){
@@ -1260,20 +1347,21 @@
1260 na += R[r+i*3];
1261 nb += R[r+i*3];
1262 }
1263
1264 /* Draw the separator between blocks */
1265 if( r>0 ){
1266 if( escHtml ){
1267 blob_appendf(pOut, "<span class=\"diffhr\">%.*c</span>\n",
1268 width*2+16, '.');
1269 }else{
1270 blob_appendf(pOut, "%.*c\n", width*2+16, '.');
1271 }
1272 }
 
1273 nChunk++;
1274 if( escHtml ){
1275 blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
1276 }
1277
1278 /* Show the initial common area */
1279 a += skip;
@@ -1316,11 +1404,11 @@
1316 sbsWriteLineno(&s, a);
1317 s.iStart = 0;
1318 s.zStart = "<span class=\"diffrm\">";
1319 s.iEnd = s.width;
1320 sbsWriteText(&s, &A[a], SBS_PAD);
1321 if( escHtml ){
1322 sbsWrite(&s, " &lt;\n", 6);
1323 }else{
1324 sbsWrite(&s, " <\n", 3);
1325 }
1326 blob_append(pOut, s.zLine, s.n);
@@ -1338,12 +1426,12 @@
1338 a++;
1339 b++;
1340 }else if( alignment[j]==2 ){
1341 /* Insert one line on the right */
1342 s.n = 0;
1343 sbsWriteSpace(&s, width + 7);
1344 if( escHtml ){
1345 sbsWrite(&s, " &gt; ", 6);
1346 }else{
1347 sbsWrite(&s, " > ", 3);
1348 }
1349 sbsWriteLineno(&s, b);
@@ -1833,10 +1921,11 @@
1833 */
1834 int *text_diff(
1835 Blob *pA_Blob, /* FROM file */
1836 Blob *pB_Blob, /* TO file */
1837 Blob *pOut, /* Write diff here if not NULL */
 
1838 u64 diffFlags /* DIFF_* flags defined above */
1839 ){
1840 int ignoreEolWs; /* Ignore whitespace at the end of lines */
1841 int nContext; /* Amount of context to display */
1842 DContext c;
@@ -1844,11 +1933,10 @@
1844 if( diffFlags & DIFF_INVERT ){
1845 Blob *pTemp = pA_Blob;
1846 pA_Blob = pB_Blob;
1847 pB_Blob = pTemp;
1848 }
1849 nContext = diff_context_lines(diffFlags);
1850 ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
1851
1852 /* Prepare the input files */
1853 memset(&c, 0, sizeof(c));
1854 c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
@@ -1868,17 +1956,14 @@
1868 diff_all(&c);
1869 if( (diffFlags & DIFF_NOOPT)==0 ) diff_optimize(&c);
1870
1871 if( pOut ){
1872 /* Compute a context or side-by-side diff into pOut */
1873 int escHtml = (diffFlags & DIFF_HTML)!=0;
1874 if( diffFlags & DIFF_SIDEBYSIDE ){
1875 int width = diff_width(diffFlags);
1876 sbsDiff(&c, pOut, nContext, width, escHtml);
1877 }else{
1878 int showLn = (diffFlags & DIFF_LINENO)!=0;
1879 contextDiff(&c, pOut, nContext, showLn, escHtml);
1880 }
1881 fossil_free(c.aFrom);
1882 fossil_free(c.aTo);
1883 fossil_free(c.aEdit);
1884 return 0;
@@ -1941,11 +2026,11 @@
1941 if( g.argc<4 ) usage("FILE1 FILE2 ...");
1942 blob_read_from_file(&a, g.argv[2]);
1943 for(i=3; i<g.argc; i++){
1944 if( i>3 ) fossil_print("-------------------------------\n");
1945 blob_read_from_file(&b, g.argv[i]);
1946 R = text_diff(&a, &b, 0, diffFlags);
1947 for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
1948 fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
1949 }
1950 /* free(R); */
1951 blob_reset(&b);
@@ -1960,25 +2045,33 @@
1960 ** Print the difference between two files. The usual diff options apply.
1961 */
1962 void test_diff_cmd(void){
1963 Blob a, b, out;
1964 u64 diffFlag;
 
 
1965
1966 if( find_option("tk",0,0)!=0 ){
1967 diff_tk("test-diff", 2);
1968 return;
1969 }
1970 find_option("i",0,0);
 
 
 
 
 
1971 diffFlag = diff_options();
1972 verify_all_options();
1973 if( g.argc!=4 ) usage("FILE1 FILE2");
1974 diff_print_filenames(g.argv[2], g.argv[3], diffFlag);
1975 blob_read_from_file(&a, g.argv[2]);
1976 blob_read_from_file(&b, g.argv[3]);
1977 blob_zero(&out);
1978 text_diff(&a, &b, &out, diffFlag);
1979 blob_write_to_file(&out, "-");
 
1980 }
1981
1982 /**************************************************************************
1983 ** The basic difference engine is above. What follows is the annotation
1984 ** engine. Both are in the same file since they share many components.
1985
--- src/diff.c
+++ src/diff.c
@@ -412,24 +412,45 @@
412 ** Return true if two DLine elements are identical.
413 */
414 static int same_dline(DLine *pA, DLine *pB){
415 return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
416 }
417
418 /*
419 ** Return true if the regular expression *pRe matches any of the
420 ** N dlines
421 */
422 static int re_dline_match(
423 ReCompiled *pRe, /* The regular expression to be matched */
424 DLine *aDLine, /* First of N DLines to compare against */
425 int N /* Number of DLines to check */
426 ){
427 while( N-- ){
428 if( re_exec(pRe, aDLine->z, LENGTH(aDLine)) ){
429 return 1;
430 }
431 aDLine++;
432 }
433 return 0;
434 }
435
436 /*
437 ** Append a single line of context-diff output to pOut.
438 */
439 static void appendDiffLine(
440 Blob *pOut, /* Where to write the line of output */
441 char cPrefix, /* One of " ", "+", or "-" */
442 DLine *pLine, /* The line to be output */
443 int html, /* True if generating HTML. False for plain text */
444 ReCompiled *pRe /* Colorize only if line matches this Regex */
445 ){
446 blob_append(pOut, &cPrefix, 1);
447 if( html ){
448 char *zHtml;
449 if( pRe && re_dline_match(pRe, pLine, 1)==0 ){
450 cPrefix = ' ';
451 }else if( cPrefix=='+' ){
452 blob_append(pOut, "<span class=\"diffadd\">", -1);
453 }else if( cPrefix=='-' ){
454 blob_append(pOut, "<span class=\"diffrm\">", -1);
455 }
456 zHtml = htmlize(pLine->z, (pLine->h & LENGTH_MASK));
@@ -463,21 +484,19 @@
484 blob_append(pOut, " ", 8);
485 }
486 if( html ) blob_append(pOut, "</span>", -1);
487 }
488
 
489 /*
490 ** Given a raw diff p[] in which the p->aEdit[] array has been filled
491 ** in, compute a context diff into pOut.
492 */
493 static void contextDiff(
494 DContext *p, /* The difference */
495 Blob *pOut, /* Output a context diff to here */
496 ReCompiled *pRe, /* Only show changes that match this regex */
497 u64 diffFlags /* Flags controlling the diff format */
 
498 ){
499 DLine *A; /* Left side of the diff */
500 DLine *B; /* Right side of the diff */
501 int a = 0; /* Index of next line in A[] */
502 int b = 0; /* Index of next line in B[] */
@@ -488,11 +507,18 @@
507 int na, nb; /* Number of lines shown from A and B */
508 int i, j; /* Loop counters */
509 int m; /* Number of lines to output */
510 int skip; /* Number of lines to skip */
511 int nChunk = 0; /* Number of diff chunks seen so far */
512 int nContext; /* Number of lines of context */
513 int showLn; /* Show line numbers */
514 int html; /* Render as HTML */
515 int showDivider = 0; /* True to show the divider between diff blocks */
516
517 nContext = diff_context_lines(diffFlags);
518 showLn = (diffFlags & DIFF_LINENO)!=0;
519 html = (diffFlags & DIFF_HTML)!=0;
520 A = p->aFrom;
521 B = p->aTo;
522 R = p->aEdit;
523 mxr = p->nEdit;
524 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
@@ -499,10 +525,35 @@
525 for(r=0; r<mxr; r += 3*nr){
526 /* Figure out how many triples to show in a single block */
527 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
528 /* printf("r=%d nr=%d\n", r, nr); */
529
530 /* If there is a regex, skip this block (generate no diff output)
531 ** if the regex matches or does not match both insert and delete.
532 ** Only display the block if one side matches but the other side does
533 ** not.
534 */
535 if( pRe ){
536 int hideBlock = 1;
537 int xa = a, xb = b;
538 for(i=0; hideBlock && i<nr; i++){
539 int c1, c2;
540 xa += R[r+i*3];
541 xb += R[r+i*3];
542 c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
543 c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
544 hideBlock = c1==c2;
545 xa += R[r+i*3+1];
546 xb += R[r+i*3+2];
547 }
548 if( hideBlock ){
549 a = xa;
550 b = xb;
551 continue;
552 }
553 }
554
555 /* For the current block comprising nr triples, figure out
556 ** how many lines of A and B are to be displayed
557 */
558 if( R[r]>nContext ){
559 na = nb = nContext;
@@ -531,12 +582,13 @@
582 ** context diff that contains line numbers, show the separator from
583 ** the previous block.
584 */
585 nChunk++;
586 if( showLn ){
587 if( !showDivider ){
588 /* Do not show a top divider */
589 showDivider = 1;
590 }else if( html ){
591 blob_appendf(pOut, "<span class=\"diffhr\">%.80c</span>\n", '.');
592 blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
593 }else{
594 blob_appendf(pOut, "%.80c\n", '.');
@@ -559,34 +611,36 @@
611 a += skip;
612 b += skip;
613 m = R[r] - skip;
614 for(j=0; j<m; j++){
615 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
616 appendDiffLine(pOut, ' ', &A[a+j], html, 0);
617 }
618 a += m;
619 b += m;
620
621 /* Show the differences */
622 for(i=0; i<nr; i++){
623 m = R[r+i*3+1];
624 for(j=0; j<m; j++){
625 char cMark = '-';
626 if( showLn ) appendDiffLineno(pOut, a+j+1, 0, html);
627 if( pRe && re_dline_match(pRe, &A[a+j], 1)==0 ) cMark = ' ';
628 appendDiffLine(pOut, '-', &A[a+j], html, pRe);
629 }
630 a += m;
631 m = R[r+i*3+2];
632 for(j=0; j<m; j++){
633 if( showLn ) appendDiffLineno(pOut, 0, b+j+1, html);
634 appendDiffLine(pOut, '+', &B[b+j], html, pRe);
635 }
636 b += m;
637 if( i<nr-1 ){
638 m = R[r+i*3+3];
639 for(j=0; j<m; j++){
640 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
641 appendDiffLine(pOut, ' ', &B[b+j], html, 0);
642 }
643 b += m;
644 a += m;
645 }
646 }
@@ -595,11 +649,11 @@
649 assert( nr==i );
650 m = R[r+nr*3];
651 if( m>nContext ) m = nContext;
652 for(j=0; j<m; j++){
653 if( showLn ) appendDiffLineno(pOut, a+j+1, b+j+1, html);
654 appendDiffLine(pOut, ' ', &B[b+j], html, 0);
655 }
656 }
657 }
658
659 /*
@@ -615,10 +669,11 @@
669 const char *zStart; /* A <span> tag */
670 int iEnd; /* Write </span> prior to character iEnd */
671 int iStart2; /* Write zStart2 prior to character iStart2 */
672 const char *zStart2; /* A <span> tag */
673 int iEnd2; /* Write </span> prior to character iEnd2 */
674 ReCompiled *pRe; /* Only colorize matching lines, if not NULL */
675 };
676
677 /*
678 ** Flags for sbsWriteText()
679 */
@@ -640,13 +695,17 @@
695 int k; /* Cursor position */
696 int needEndSpan = 0;
697 const char *zIn = pLine->z;
698 char *z = &p->zLine[p->n];
699 int w = p->width;
700 int colorize = p->escHtml;
701 if( colorize && p->pRe && re_dline_match(p->pRe, pLine, 1)==0 ){
702 colorize = 0;
703 }
704 for(i=j=k=0; k<w && i<n; i++, k++){
705 char c = zIn[i];
706 if( colorize ){
707 if( i==p->iStart ){
708 int x = strlen(p->zStart);
709 memcpy(z+j, p->zStart, x);
710 j += x;
711 needEndSpan = 1;
@@ -1196,13 +1255,12 @@
1255 ** in, compute a side-by-side diff into pOut.
1256 */
1257 static void sbsDiff(
1258 DContext *p, /* The computed diff */
1259 Blob *pOut, /* Write the results here */
1260 ReCompiled *pRe, /* Only show changes that match this regex */
1261 u64 diffFlags /* Flags controlling the diff */
 
1262 ){
1263 DLine *A; /* Left side of the diff */
1264 DLine *B; /* Right side of the diff */
1265 int a = 0; /* Index of next line in A[] */
1266 int b = 0; /* Index of next line in B[] */
@@ -1214,16 +1272,20 @@
1272 int i, j; /* Loop counters */
1273 int m, ma, mb;/* Number of lines to output */
1274 int skip; /* Number of lines to skip */
1275 int nChunk = 0; /* Number of chunks of diff output seen so far */
1276 SbsLine s; /* Output line buffer */
1277 int nContext; /* Lines of context above and below each change */
1278 int showDivider = 0; /* True to show the divider */
1279
1280 memset(&s, 0, sizeof(s));
1281 s.width = diff_width(diffFlags);
1282 s.zLine = fossil_malloc( 15*s.width + 200 );
1283 if( s.zLine==0 ) return;
1284 nContext = diff_context_lines(diffFlags);
1285 s.escHtml = (diffFlags & DIFF_HTML)!=0;
1286 s.pRe = pRe;
1287 s.iStart = -1;
1288 s.iStart2 = 0;
1289 s.iEnd = -1;
1290 A = p->aFrom;
1291 B = p->aTo;
@@ -1232,10 +1294,35 @@
1294 while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
1295 for(r=0; r<mxr; r += 3*nr){
1296 /* Figure out how many triples to show in a single block */
1297 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
1298 /* printf("r=%d nr=%d\n", r, nr); */
1299
1300 /* If there is a regex, skip this block (generate no diff output)
1301 ** if the regex matches or does not match both insert and delete.
1302 ** Only display the block if one side matches but the other side does
1303 ** not.
1304 */
1305 if( pRe ){
1306 int hideBlock = 1;
1307 int xa = a, xb = b;
1308 for(i=0; hideBlock && i<nr; i++){
1309 int c1, c2;
1310 xa += R[r+i*3];
1311 xb += R[r+i*3];
1312 c1 = re_dline_match(pRe, &A[xa], R[r+i*3+1]);
1313 c2 = re_dline_match(pRe, &B[xb], R[r+i*3+2]);
1314 hideBlock = c1==c2;
1315 xa += R[r+i*3+1];
1316 xb += R[r+i*3+2];
1317 }
1318 if( hideBlock ){
1319 a = xa;
1320 b = xb;
1321 continue;
1322 }
1323 }
1324
1325 /* For the current block comprising nr triples, figure out
1326 ** how many lines of A and B are to be displayed
1327 */
1328 if( R[r]>nContext ){
@@ -1260,20 +1347,21 @@
1347 na += R[r+i*3];
1348 nb += R[r+i*3];
1349 }
1350
1351 /* Draw the separator between blocks */
1352 if( showDivider ){
1353 if( s.escHtml ){
1354 blob_appendf(pOut, "<span class=\"diffhr\">%.*c</span>\n",
1355 s.width*2+16, '.');
1356 }else{
1357 blob_appendf(pOut, "%.*c\n", s.width*2+16, '.');
1358 }
1359 }
1360 showDivider = 1;
1361 nChunk++;
1362 if( s.escHtml ){
1363 blob_appendf(pOut, "<a name=\"chunk%d\"></a>\n", nChunk);
1364 }
1365
1366 /* Show the initial common area */
1367 a += skip;
@@ -1316,11 +1404,11 @@
1404 sbsWriteLineno(&s, a);
1405 s.iStart = 0;
1406 s.zStart = "<span class=\"diffrm\">";
1407 s.iEnd = s.width;
1408 sbsWriteText(&s, &A[a], SBS_PAD);
1409 if( s.escHtml ){
1410 sbsWrite(&s, " &lt;\n", 6);
1411 }else{
1412 sbsWrite(&s, " <\n", 3);
1413 }
1414 blob_append(pOut, s.zLine, s.n);
@@ -1338,12 +1426,12 @@
1426 a++;
1427 b++;
1428 }else if( alignment[j]==2 ){
1429 /* Insert one line on the right */
1430 s.n = 0;
1431 sbsWriteSpace(&s, s.width + 7);
1432 if( s.escHtml ){
1433 sbsWrite(&s, " &gt; ", 6);
1434 }else{
1435 sbsWrite(&s, " > ", 3);
1436 }
1437 sbsWriteLineno(&s, b);
@@ -1833,10 +1921,11 @@
1921 */
1922 int *text_diff(
1923 Blob *pA_Blob, /* FROM file */
1924 Blob *pB_Blob, /* TO file */
1925 Blob *pOut, /* Write diff here if not NULL */
1926 ReCompiled *pRe, /* Only output changes where this Regexp matches */
1927 u64 diffFlags /* DIFF_* flags defined above */
1928 ){
1929 int ignoreEolWs; /* Ignore whitespace at the end of lines */
1930 int nContext; /* Amount of context to display */
1931 DContext c;
@@ -1844,11 +1933,10 @@
1933 if( diffFlags & DIFF_INVERT ){
1934 Blob *pTemp = pA_Blob;
1935 pA_Blob = pB_Blob;
1936 pB_Blob = pTemp;
1937 }
 
1938 ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0;
1939
1940 /* Prepare the input files */
1941 memset(&c, 0, sizeof(c));
1942 c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
@@ -1868,17 +1956,14 @@
1956 diff_all(&c);
1957 if( (diffFlags & DIFF_NOOPT)==0 ) diff_optimize(&c);
1958
1959 if( pOut ){
1960 /* Compute a context or side-by-side diff into pOut */
 
1961 if( diffFlags & DIFF_SIDEBYSIDE ){
1962 sbsDiff(&c, pOut, pRe, diffFlags);
 
1963 }else{
1964 contextDiff(&c, pOut, pRe, diffFlags);
 
1965 }
1966 fossil_free(c.aFrom);
1967 fossil_free(c.aTo);
1968 fossil_free(c.aEdit);
1969 return 0;
@@ -1941,11 +2026,11 @@
2026 if( g.argc<4 ) usage("FILE1 FILE2 ...");
2027 blob_read_from_file(&a, g.argv[2]);
2028 for(i=3; i<g.argc; i++){
2029 if( i>3 ) fossil_print("-------------------------------\n");
2030 blob_read_from_file(&b, g.argv[i]);
2031 R = text_diff(&a, &b, 0, 0, diffFlags);
2032 for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
2033 fossil_print(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
2034 }
2035 /* free(R); */
2036 blob_reset(&b);
@@ -1960,25 +2045,33 @@
2045 ** Print the difference between two files. The usual diff options apply.
2046 */
2047 void test_diff_cmd(void){
2048 Blob a, b, out;
2049 u64 diffFlag;
2050 const char *zRe; /* Regex filter for diff output */
2051 ReCompiled *pRe = 0; /* Regex filter for diff output */
2052
2053 if( find_option("tk",0,0)!=0 ){
2054 diff_tk("test-diff", 2);
2055 return;
2056 }
2057 find_option("i",0,0);
2058 zRe = find_option("regexp","e",1);
2059 if( zRe ){
2060 const char *zErr = re_compile(&pRe, zRe, 0);
2061 if( zErr ) fossil_fatal("regex error: %s", zErr);
2062 }
2063 diffFlag = diff_options();
2064 verify_all_options();
2065 if( g.argc!=4 ) usage("FILE1 FILE2");
2066 diff_print_filenames(g.argv[2], g.argv[3], diffFlag);
2067 blob_read_from_file(&a, g.argv[2]);
2068 blob_read_from_file(&b, g.argv[3]);
2069 blob_zero(&out);
2070 text_diff(&a, &b, &out, pRe, diffFlag);
2071 blob_write_to_file(&out, "-");
2072 re_free(pRe);
2073 }
2074
2075 /**************************************************************************
2076 ** The basic difference engine is above. What follows is the annotation
2077 ** engine. Both are in the same file since they share many components.
2078
+2 -2
--- src/diffcmd.c
+++ src/diffcmd.c
@@ -109,11 +109,11 @@
109109
if( blob_compare(pFile1, &file2) ){
110110
fossil_print("CHANGED %s\n", zName);
111111
}
112112
}else{
113113
blob_zero(&out);
114
- text_diff(pFile1, &file2, &out, diffFlags);
114
+ text_diff(pFile1, &file2, &out, 0, diffFlags);
115115
if( blob_size(&out) ){
116116
diff_print_filenames(zName, zName2, diffFlags);
117117
fossil_print("%s\n", blob_str(&out));
118118
}
119119
blob_reset(&out);
@@ -210,11 +210,11 @@
210210
if( diffFlags & DIFF_BRIEF ) return;
211211
if( zDiffCmd==0 ){
212212
Blob out; /* Diff output text */
213213
214214
blob_zero(&out);
215
- text_diff(pFile1, pFile2, &out, diffFlags);
215
+ text_diff(pFile1, pFile2, &out, 0, diffFlags);
216216
diff_print_filenames(zName, zName, diffFlags);
217217
fossil_print("%s\n", blob_str(&out));
218218
219219
/* Release memory resources */
220220
blob_reset(&out);
221221
--- src/diffcmd.c
+++ src/diffcmd.c
@@ -109,11 +109,11 @@
109 if( blob_compare(pFile1, &file2) ){
110 fossil_print("CHANGED %s\n", zName);
111 }
112 }else{
113 blob_zero(&out);
114 text_diff(pFile1, &file2, &out, diffFlags);
115 if( blob_size(&out) ){
116 diff_print_filenames(zName, zName2, diffFlags);
117 fossil_print("%s\n", blob_str(&out));
118 }
119 blob_reset(&out);
@@ -210,11 +210,11 @@
210 if( diffFlags & DIFF_BRIEF ) return;
211 if( zDiffCmd==0 ){
212 Blob out; /* Diff output text */
213
214 blob_zero(&out);
215 text_diff(pFile1, pFile2, &out, diffFlags);
216 diff_print_filenames(zName, zName, diffFlags);
217 fossil_print("%s\n", blob_str(&out));
218
219 /* Release memory resources */
220 blob_reset(&out);
221
--- src/diffcmd.c
+++ src/diffcmd.c
@@ -109,11 +109,11 @@
109 if( blob_compare(pFile1, &file2) ){
110 fossil_print("CHANGED %s\n", zName);
111 }
112 }else{
113 blob_zero(&out);
114 text_diff(pFile1, &file2, &out, 0, diffFlags);
115 if( blob_size(&out) ){
116 diff_print_filenames(zName, zName2, diffFlags);
117 fossil_print("%s\n", blob_str(&out));
118 }
119 blob_reset(&out);
@@ -210,11 +210,11 @@
210 if( diffFlags & DIFF_BRIEF ) return;
211 if( zDiffCmd==0 ){
212 Blob out; /* Diff output text */
213
214 blob_zero(&out);
215 text_diff(pFile1, pFile2, &out, 0, diffFlags);
216 diff_print_filenames(zName, zName, diffFlags);
217 fossil_print("%s\n", blob_str(&out));
218
219 /* Release memory resources */
220 blob_reset(&out);
221
+3 -3
--- src/info.c
+++ src/info.c
@@ -307,16 +307,16 @@
307307
}else{
308308
blob_zero(&to);
309309
}
310310
blob_zero(&out);
311311
if( diffFlags & DIFF_SIDEBYSIDE ){
312
- text_diff(&from, &to, &out, diffFlags | DIFF_HTML);
312
+ text_diff(&from, &to, &out, 0, diffFlags | DIFF_HTML);
313313
@ <div class="sbsdiff">
314314
@ %s(blob_str(&out))
315315
@ </div>
316316
}else{
317
- text_diff(&from, &to, &out, diffFlags | DIFF_LINENO | DIFF_HTML);
317
+ text_diff(&from, &to, &out, 0, diffFlags | DIFF_LINENO | DIFF_HTML);
318318
@ <div class="udiff">
319319
@ %s(blob_str(&out))
320320
@ </div>
321321
}
322322
blob_reset(&from);
@@ -1298,11 +1298,11 @@
12981298
zStyle = "udiff";
12991299
}
13001300
}
13011301
content_get(v1, &c1);
13021302
content_get(v2, &c2);
1303
- text_diff(&c1, &c2, pOut, diffFlags);
1303
+ text_diff(&c1, &c2, pOut, 0, diffFlags);
13041304
blob_reset(&c1);
13051305
blob_reset(&c2);
13061306
if( !isPatch ){
13071307
style_header("Diff");
13081308
style_submenu_element("Patch", "Patch", "%s/fdiff?v1=%T&v2=%T&patch",
13091309
--- src/info.c
+++ src/info.c
@@ -307,16 +307,16 @@
307 }else{
308 blob_zero(&to);
309 }
310 blob_zero(&out);
311 if( diffFlags & DIFF_SIDEBYSIDE ){
312 text_diff(&from, &to, &out, diffFlags | DIFF_HTML);
313 @ <div class="sbsdiff">
314 @ %s(blob_str(&out))
315 @ </div>
316 }else{
317 text_diff(&from, &to, &out, diffFlags | DIFF_LINENO | DIFF_HTML);
318 @ <div class="udiff">
319 @ %s(blob_str(&out))
320 @ </div>
321 }
322 blob_reset(&from);
@@ -1298,11 +1298,11 @@
1298 zStyle = "udiff";
1299 }
1300 }
1301 content_get(v1, &c1);
1302 content_get(v2, &c2);
1303 text_diff(&c1, &c2, pOut, diffFlags);
1304 blob_reset(&c1);
1305 blob_reset(&c2);
1306 if( !isPatch ){
1307 style_header("Diff");
1308 style_submenu_element("Patch", "Patch", "%s/fdiff?v1=%T&v2=%T&patch",
1309
--- src/info.c
+++ src/info.c
@@ -307,16 +307,16 @@
307 }else{
308 blob_zero(&to);
309 }
310 blob_zero(&out);
311 if( diffFlags & DIFF_SIDEBYSIDE ){
312 text_diff(&from, &to, &out, 0, diffFlags | DIFF_HTML);
313 @ <div class="sbsdiff">
314 @ %s(blob_str(&out))
315 @ </div>
316 }else{
317 text_diff(&from, &to, &out, 0, diffFlags | DIFF_LINENO | DIFF_HTML);
318 @ <div class="udiff">
319 @ %s(blob_str(&out))
320 @ </div>
321 }
322 blob_reset(&from);
@@ -1298,11 +1298,11 @@
1298 zStyle = "udiff";
1299 }
1300 }
1301 content_get(v1, &c1);
1302 content_get(v2, &c2);
1303 text_diff(&c1, &c2, pOut, 0, diffFlags);
1304 blob_reset(&c1);
1305 blob_reset(&c2);
1306 if( !isPatch ){
1307 style_header("Diff");
1308 style_submenu_element("Patch", "Patch", "%s/fdiff?v1=%T&v2=%T&patch",
1309
+1 -1
--- src/json_diff.c
+++ src/json_diff.c
@@ -58,11 +58,11 @@
5858
return NULL;
5959
}
6060
content_get(fromid, &from);
6161
content_get(toid, &to);
6262
blob_zero(&out);
63
- text_diff(&from, &to, &out, flags);
63
+ text_diff(&from, &to, &out, 0, flags);
6464
blob_reset(&from);
6565
blob_reset(&to);
6666
outLen = blob_size(&out);
6767
if(outLen>=0){
6868
rc = cson_value_new_string(blob_buffer(&out),
6969
--- src/json_diff.c
+++ src/json_diff.c
@@ -58,11 +58,11 @@
58 return NULL;
59 }
60 content_get(fromid, &from);
61 content_get(toid, &to);
62 blob_zero(&out);
63 text_diff(&from, &to, &out, flags);
64 blob_reset(&from);
65 blob_reset(&to);
66 outLen = blob_size(&out);
67 if(outLen>=0){
68 rc = cson_value_new_string(blob_buffer(&out),
69
--- src/json_diff.c
+++ src/json_diff.c
@@ -58,11 +58,11 @@
58 return NULL;
59 }
60 content_get(fromid, &from);
61 content_get(toid, &to);
62 blob_zero(&out);
63 text_diff(&from, &to, &out, 0, flags);
64 blob_reset(&from);
65 blob_reset(&to);
66 outLen = blob_size(&out);
67 if(outLen>=0){
68 rc = cson_value_new_string(blob_buffer(&out),
69
+1 -1
--- src/json_wiki.c
+++ src/json_wiki.c
@@ -543,11 +543,11 @@
543543
blob_init(&w1, pW1->zWiki, -1);
544544
blob_zero(&w2);
545545
blob_init(&w2, pW2->zWiki, -1);
546546
blob_zero(&d);
547547
diffFlags = DIFF_IGNORE_EOLWS | DIFF_INLINE;
548
- text_diff(&w2, &w1, &d, diffFlags);
548
+ text_diff(&w2, &w1, &d, 0, diffFlags);
549549
blob_reset(&w1);
550550
blob_reset(&w2);
551551
552552
pay = cson_new_object();
553553
554554
--- src/json_wiki.c
+++ src/json_wiki.c
@@ -543,11 +543,11 @@
543 blob_init(&w1, pW1->zWiki, -1);
544 blob_zero(&w2);
545 blob_init(&w2, pW2->zWiki, -1);
546 blob_zero(&d);
547 diffFlags = DIFF_IGNORE_EOLWS | DIFF_INLINE;
548 text_diff(&w2, &w1, &d, diffFlags);
549 blob_reset(&w1);
550 blob_reset(&w2);
551
552 pay = cson_new_object();
553
554
--- src/json_wiki.c
+++ src/json_wiki.c
@@ -543,11 +543,11 @@
543 blob_init(&w1, pW1->zWiki, -1);
544 blob_zero(&w2);
545 blob_init(&w2, pW2->zWiki, -1);
546 blob_zero(&d);
547 diffFlags = DIFF_IGNORE_EOLWS | DIFF_INLINE;
548 text_diff(&w2, &w1, &d, 0, diffFlags);
549 blob_reset(&w1);
550 blob_reset(&w2);
551
552 pay = cson_new_object();
553
554
+2 -2
--- src/merge3.c
+++ src/merge3.c
@@ -175,12 +175,12 @@
175175
** is the number of lines of text to copy directly from the pivot,
176176
** the second integer is the number of lines of text to omit from the
177177
** pivot, and the third integer is the number of lines of text that are
178178
** inserted. The edit array ends with a triple of 0,0,0.
179179
*/
180
- aC1 = text_diff(pPivot, pV1, 0, 0);
181
- aC2 = text_diff(pPivot, pV2, 0, 0);
180
+ aC1 = text_diff(pPivot, pV1, 0, 0, 0);
181
+ aC2 = text_diff(pPivot, pV2, 0, 0, 0);
182182
if( aC1==0 || aC2==0 ){
183183
free(aC1);
184184
free(aC2);
185185
return -1;
186186
}
187187
--- src/merge3.c
+++ src/merge3.c
@@ -175,12 +175,12 @@
175 ** is the number of lines of text to copy directly from the pivot,
176 ** the second integer is the number of lines of text to omit from the
177 ** pivot, and the third integer is the number of lines of text that are
178 ** inserted. The edit array ends with a triple of 0,0,0.
179 */
180 aC1 = text_diff(pPivot, pV1, 0, 0);
181 aC2 = text_diff(pPivot, pV2, 0, 0);
182 if( aC1==0 || aC2==0 ){
183 free(aC1);
184 free(aC2);
185 return -1;
186 }
187
--- src/merge3.c
+++ src/merge3.c
@@ -175,12 +175,12 @@
175 ** is the number of lines of text to copy directly from the pivot,
176 ** the second integer is the number of lines of text to omit from the
177 ** pivot, and the third integer is the number of lines of text that are
178 ** inserted. The edit array ends with a triple of 0,0,0.
179 */
180 aC1 = text_diff(pPivot, pV1, 0, 0, 0);
181 aC2 = text_diff(pPivot, pV2, 0, 0, 0);
182 if( aC1==0 || aC2==0 ){
183 free(aC1);
184 free(aC2);
185 return -1;
186 }
187
+87 -64
--- src/regexp.c
+++ src/regexp.c
@@ -94,19 +94,27 @@
9494
unsigned nState; /* Number of current states */
9595
ReStateNumber *aState; /* Current states */
9696
} ReStateSet;
9797
9898
#if INTERFACE
99
+/* An input string read one character at a time.
100
+*/
101
+struct ReInput {
102
+ const unsigned char *z; /* All text */
103
+ int i; /* Next byte to read */
104
+ int mx; /* EOF when i>=mx */
105
+};
106
+
99107
/* A compiled NFA (or an NFA that is in the process of being compiled) is
100108
** an instance of the following object.
101109
*/
102110
struct ReCompiled {
103
- const unsigned char *zIn; /* Regular expression text */
111
+ ReInput sIn; /* Regular expression text */
104112
const char *zErr; /* Error message to return */
105113
char *aOp; /* Operators for the virtual machine */
106114
int *aArg; /* Arguments to each operator */
107
- unsigned (*xNextChar)(const unsigned char**); /* Next character function */
115
+ unsigned (*xNextChar)(ReInput*); /* Next character function */
108116
char zInit[12]; /* Initial text to match */
109117
int nInit; /* Number of characters in zInit */
110118
unsigned nState; /* Number of entries in aOp[] and aArg[] */
111119
unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
112120
};
@@ -122,37 +130,37 @@
122130
/* Extract the next unicode character from *pzIn and return it. Advance
123131
** *pzIn to the first byte past the end of the character returned. To
124132
** be clear: this routine converts utf8 to unicode. This routine is
125133
** optimized for the common case where the next character is a single byte.
126134
*/
127
-static unsigned re_next_char(const unsigned char **pzIn){
128
- unsigned c = **pzIn;
129
- if( c>0 ) (*pzIn)++;
135
+static unsigned re_next_char(ReInput *p){
136
+ unsigned c;
137
+ if( p->i>=p->mx ) return 0;
138
+ c = p->z[p->i++];
130139
if( c>0x80 ){
131
- if( (c&0xe0)==0xc0 && ((*pzIn)[0]&0xc0)==0x80 ){
132
- c = (c&0x1f)<<6 | ((*pzIn)[0]&0x3f);
133
- (*pzIn)++;
140
+ if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){
141
+ c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f);
134142
if( c<0x80 ) c = 0xfffd;
135
- }else if( (c&0xf0)==0xe0 && ((*pzIn)[0]&0xc0)==0x80
136
- && ((*pzIn)[1]&0xc0)==0x80 ){
137
- c = (c&0x0f)<<12 | (((*pzIn)[0]&0x3f)<<6) | ((*pzIn)[1]&0x3f);
138
- *pzIn += 2;
143
+ }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
144
+ && (p->z[p->i+1]&0xc0)==0x80 ){
145
+ c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
146
+ p->i += 2;
139147
if( c<0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
140
- }else if( (c&0xf8)==0xf0 && ((*pzIn)[0]&0xc0)==0x80
141
- && ((*pzIn)[1]&0xc0)==0x80 && ((*pzIn)[2]&0xc0)==0x80 ){
142
- c = (c&0x07)<<18 | (((*pzIn)[0]&0x3f)<<12) | (((*pzIn)[1]&0x3f)<<6)
143
- | ((*pzIn)[2]&0x3f);
144
- *pzIn += 3;
148
+ }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
149
+ && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
150
+ c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
151
+ | (p->z[p->i+2]&0x3f);
152
+ p->i += 3;
145153
if( c<0xffff ) c = 0xfffd;
146154
}else{
147155
c = 0xfffd;
148156
}
149157
}
150158
return c;
151159
}
152
-static unsigned re_next_char_nocase(const unsigned char **pzIn){
153
- unsigned c = re_next_char(pzIn);
160
+static unsigned re_next_char_nocase(ReInput *p){
161
+ unsigned c = re_next_char(p);
154162
return unicode_fold(c,1);
155163
}
156164
157165
/* Return true if c is a perl "word" character: [A-Za-z0-9_] */
158166
static int re_word_char(int c){
@@ -170,26 +178,32 @@
170178
}
171179
172180
/* Run a compiled regular expression on the zero-terminated input
173181
** string zIn[]. Return true on a match and false if there is no match.
174182
*/
175
-int re_exec(ReCompiled *pRe, const unsigned char *zIn){
183
+int re_exec(ReCompiled *pRe, const unsigned char *zIn, int nIn){
176184
ReStateSet aStateSet[2], *pThis, *pNext;
177185
ReStateNumber aSpace[100];
178186
ReStateNumber *pToFree;
179187
unsigned int i = 0;
180188
unsigned int iSwap = 0;
181189
int c = RE_EOF+1;
182190
int cPrev = 0;
183191
int rc = 0;
184
-
192
+ ReInput in;
193
+
194
+ in.z = zIn;
195
+ in.i = 0;
196
+ in.mx = nIn>=0 ? nIn : strlen(zIn);
185197
if( pRe->nInit ){
186198
unsigned char x = pRe->zInit[0];
187
- while( zIn[0] && (zIn[0]!=x || memcmp(zIn, pRe->zInit, pRe->nInit)!=0) ){
188
- zIn++;
199
+ while( in.i+pRe->nInit<in.mx
200
+ && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
201
+ ){
202
+ in.i++;
189203
}
190
- if( zIn[0]==0 ) return 0;
204
+ if( in.i+pRe->nInit>=in.mx ) return 0;
191205
}
192206
if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
193207
pToFree = 0;
194208
aStateSet[0].aState = aSpace;
195209
}else{
@@ -201,11 +215,11 @@
201215
pNext = &aStateSet[1];
202216
pNext->nState = 0;
203217
re_add_state(pNext, 0);
204218
while( c!=RE_EOF && pNext->nState>0 ){
205219
cPrev = c;
206
- c = pRe->xNextChar(&zIn);
220
+ c = pRe->xNextChar(&in);
207221
pThis = pNext;
208222
pNext = &aStateSet[iSwap];
209223
iSwap = 1 - iSwap;
210224
pNext->nState = 0;
211225
for(i=0; i<pThis->nState; i++){
@@ -370,42 +384,50 @@
370384
*/
371385
static unsigned re_esc_char(ReCompiled *p){
372386
static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
373387
static const char zTrans[] = "\a\f\n\r\t\v";
374388
int i, v = 0;
375
- char c = p->zIn[0];
376
- if( c=='u' ){
389
+ char c;
390
+ if( p->sIn.i>=p->sIn.mx ) return 0;
391
+ c = p->sIn.z[0];
392
+ if( c=='u' && p->sIn.i+5<p->sIn.mx ){
377393
v = 0;
378
- if( re_hex(p->zIn[1],&v)
379
- && re_hex(p->zIn[2],&v)
380
- && re_hex(p->zIn[3],&v)
381
- && re_hex(p->zIn[4],&v)
394
+ const unsigned char *zIn = p->sIn.z + p->sIn.i;
395
+ if( re_hex(zIn[1],&v)
396
+ && re_hex(zIn[2],&v)
397
+ && re_hex(zIn[3],&v)
398
+ && re_hex(zIn[4],&v)
382399
){
383
- p->zIn += 5;
400
+ p->sIn.i += 5;
384401
return v;
385402
}
386403
}
387404
if( c=='x' ){
388405
v = 0;
389
- for(i=1; re_hex(p->zIn[i], &v); i++){}
406
+ for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){}
390407
if( i>1 ){
391
- p->zIn += i;
408
+ p->sIn.i += i;
392409
return v;
393410
}
394411
}
395412
for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
396413
if( zEsc[i] ){
397414
if( i<6 ) c = zTrans[i];
398
- p->zIn++;
415
+ p->sIn.i++;
399416
}else{
400417
p->zErr = "unknown \\ escape";
401418
}
402419
return c;
403420
}
404421
405422
/* Forward declaration */
406423
static const char *re_subcompile_string(ReCompiled*);
424
+
425
+/* Peek at the next byte of input */
426
+static unsigned char rePeek(ReCompiled *p){
427
+ return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0;
428
+}
407429
408430
/* Compile RE text into a sequence of opcodes. Continue up to the
409431
** first unmatched ")" character, then return. If an error is found,
410432
** return a pointer to the error message string.
411433
*/
@@ -413,15 +435,15 @@
413435
const char *zErr;
414436
int iStart, iEnd, iGoto;
415437
iStart = p->nState;
416438
zErr = re_subcompile_string(p);
417439
if( zErr ) return zErr;
418
- while( p->zIn[0]=='|' ){
440
+ while( rePeek(p)=='|' ){
419441
iEnd = p->nState;
420442
re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);
421443
iGoto = re_append(p, RE_OP_GOTO, 0);
422
- p->zIn++;
444
+ p->sIn.i++;
423445
zErr = re_subcompile_string(p);
424446
if( zErr ) return zErr;
425447
p->aArg[iGoto] = p->nState - iGoto;
426448
}
427449
return 0;
@@ -434,30 +456,30 @@
434456
static const char *re_subcompile_string(ReCompiled *p){
435457
int iPrev = -1;
436458
int iStart;
437459
unsigned c;
438460
const char *zErr;
439
- while( (c = p->xNextChar(&p->zIn))!=0 ){
461
+ while( (c = p->xNextChar(&p->sIn))!=0 ){
440462
iStart = p->nState;
441463
switch( c ){
442464
case '|':
443465
case '$':
444466
case ')': {
445
- p->zIn--;
467
+ p->sIn.i--;
446468
return 0;
447469
}
448470
case '(': {
449471
zErr = re_subcompile_re(p);
450472
if( zErr ) return zErr;
451
- if( p->zIn[0]!=')' ) return "unmatched '('";
452
- p->zIn++;
473
+ if( rePeek(p)!=')' ) return "unmatched '('";
474
+ p->sIn.i++;
453475
break;
454476
}
455477
case '.': {
456
- if( p->zIn[0]=='*' ){
478
+ if( rePeek(p)=='*' ){
457479
re_append(p, RE_OP_ANYSTAR, 0);
458
- p->zIn++;
480
+ p->sIn.i++;
459481
}else{
460482
re_append(p, RE_OP_ANY, 0);
461483
}
462484
break;
463485
}
@@ -479,20 +501,20 @@
479501
}
480502
case '{': {
481503
int m = 0, n = 0;
482504
int sz, j;
483505
if( iPrev<0 ) return "'{m,n}' without operand";
484
- while( (c=p->zIn[0])>='0' && c<='9' ){ m = m*10 + c - '0'; p->zIn++; }
506
+ while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; }
485507
n = m;
486508
if( c==',' ){
487
- p->zIn++;
509
+ p->sIn.i++;
488510
n = 0;
489
- while( (c=p->zIn[0])>='0' && c<='9' ){ n = n*10 + c - '0'; p->zIn++; }
511
+ while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; }
490512
}
491513
if( c!='}' ) return "unmatched '{'";
492514
if( n>0 && n<m ) return "n less than m in '{m,n}'";
493
- p->zIn++;
515
+ p->sIn.i++;
494516
sz = p->nState - iPrev;
495517
if( m==0 ){
496518
if( n==0 ) return "both m and n are zero in '{m,n}'";
497519
re_insert(p, iPrev, RE_OP_FORK, sz+1);
498520
n--;
@@ -508,49 +530,49 @@
508530
}
509531
break;
510532
}
511533
case '[': {
512534
int iFirst = p->nState;
513
- if( p->zIn[0]=='^' ){
535
+ if( rePeek(p)=='^' ){
514536
re_append(p, RE_OP_CC_EXC, 0);
515
- p->zIn++;
537
+ p->sIn.i++;
516538
}else{
517539
re_append(p, RE_OP_CC_INC, 0);
518540
}
519
- while( (c = p->xNextChar(&p->zIn))!=0 ){
520
- if( c=='[' && p->zIn[0]==':' ){
541
+ while( (c = p->xNextChar(&p->sIn))!=0 ){
542
+ if( c=='[' && rePeek(p)==':' ){
521543
return "POSIX character classes not supported";
522544
}
523545
if( c=='\\' ) c = re_esc_char(p);
524
- if( p->zIn[0]=='-' && p->zIn[1] ){
546
+ if( rePeek(p)=='-' ){
525547
re_append(p, RE_OP_CC_RANGE, c);
526
- p->zIn++;
527
- c = p->xNextChar(&p->zIn);
548
+ p->sIn.i++;
549
+ c = p->xNextChar(&p->sIn);
528550
if( c=='\\' ) c = re_esc_char(p);
529551
re_append(p, RE_OP_CC_RANGE, c);
530552
}else{
531553
re_append(p, RE_OP_CC_VALUE, c);
532554
}
533
- if( p->zIn[0]==']' ){ p->zIn++; break; }
555
+ if( rePeek(p)==']' ){ p->sIn.i++; break; }
534556
}
535557
if( c==0 ) return "unclosed '['";
536558
p->aArg[iFirst] = p->nState - iFirst;
537559
break;
538560
}
539561
case '\\': {
540562
int specialOp = 0;
541
- switch( p->zIn[0] ){
563
+ switch( rePeek(p) ){
542564
case 'b': specialOp = RE_OP_BOUNDARY; break;
543565
case 'd': specialOp = RE_OP_DIGIT; break;
544566
case 'D': specialOp = RE_OP_NOTDIGIT; break;
545567
case 's': specialOp = RE_OP_SPACE; break;
546568
case 'S': specialOp = RE_OP_NOTSPACE; break;
547569
case 'w': specialOp = RE_OP_WORD; break;
548570
case 'W': specialOp = RE_OP_NOTWORD; break;
549571
}
550572
if( specialOp ){
551
- p->zIn++;
573
+ p->sIn.i++;
552574
re_append(p, specialOp, 0);
553575
}else{
554576
c = re_esc_char(p);
555577
re_append(p, RE_OP_MATCH, c);
556578
}
@@ -602,21 +624,23 @@
602624
if( zIn[0]=='^' ){
603625
zIn++;
604626
}else{
605627
re_append(pRe, RE_OP_ANYSTAR, 0);
606628
}
607
- pRe->zIn = (unsigned char*)zIn;
629
+ pRe->sIn.z = (unsigned char*)zIn;
630
+ pRe->sIn.i = 0;
631
+ pRe->sIn.mx = strlen(pRe->sIn.z);
608632
zErr = re_subcompile_re(pRe);
609633
if( zErr ){
610634
re_free(pRe);
611635
return zErr;
612636
}
613
- if( pRe->zIn[0]=='$' && pRe->zIn[1]==0 ){
637
+ if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){
614638
re_append(pRe, RE_OP_MATCH, RE_EOF);
615639
re_append(pRe, RE_OP_ACCEPT, 0);
616640
*ppRe = pRe;
617
- }else if( pRe->zIn[0]==0 ){
641
+ }else if( pRe->sIn.i>=pRe->sIn.mx ){
618642
re_append(pRe, RE_OP_ACCEPT, 0);
619643
*ppRe = pRe;
620644
}else{
621645
re_free(pRe);
622646
return "unrecognized character";
@@ -676,11 +700,11 @@
676700
}
677701
sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
678702
}
679703
zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
680704
if( zStr!=0 ){
681
- sqlite3_result_int(context, re_exec(pRe, zStr));
705
+ sqlite3_result_int(context, re_exec(pRe, zStr, -1));
682706
}
683707
}
684708
685709
/*
686710
** Invoke this routine in order to install the REGEXP function in an
@@ -707,13 +731,12 @@
707731
char zLine[2000];
708732
while( fgets(zLine, sizeof(zLine), in) ){
709733
ln++;
710734
n = (int)strlen(zLine);
711735
while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--;
712
- zLine[n] = 0;
713
- if( re_exec(pRe, (const unsigned char*)zLine) ){
714
- printf("%s:%d:%s\n", zFile, ln, zLine);
736
+ if( re_exec(pRe, (const unsigned char*)zLine, n) ){
737
+ printf("%s:%d:%.*s\n", zFile, ln, n, zLine);
715738
}
716739
}
717740
}
718741
719742
/*
720743
--- src/regexp.c
+++ src/regexp.c
@@ -94,19 +94,27 @@
94 unsigned nState; /* Number of current states */
95 ReStateNumber *aState; /* Current states */
96 } ReStateSet;
97
98 #if INTERFACE
 
 
 
 
 
 
 
 
99 /* A compiled NFA (or an NFA that is in the process of being compiled) is
100 ** an instance of the following object.
101 */
102 struct ReCompiled {
103 const unsigned char *zIn; /* Regular expression text */
104 const char *zErr; /* Error message to return */
105 char *aOp; /* Operators for the virtual machine */
106 int *aArg; /* Arguments to each operator */
107 unsigned (*xNextChar)(const unsigned char**); /* Next character function */
108 char zInit[12]; /* Initial text to match */
109 int nInit; /* Number of characters in zInit */
110 unsigned nState; /* Number of entries in aOp[] and aArg[] */
111 unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
112 };
@@ -122,37 +130,37 @@
122 /* Extract the next unicode character from *pzIn and return it. Advance
123 ** *pzIn to the first byte past the end of the character returned. To
124 ** be clear: this routine converts utf8 to unicode. This routine is
125 ** optimized for the common case where the next character is a single byte.
126 */
127 static unsigned re_next_char(const unsigned char **pzIn){
128 unsigned c = **pzIn;
129 if( c>0 ) (*pzIn)++;
 
130 if( c>0x80 ){
131 if( (c&0xe0)==0xc0 && ((*pzIn)[0]&0xc0)==0x80 ){
132 c = (c&0x1f)<<6 | ((*pzIn)[0]&0x3f);
133 (*pzIn)++;
134 if( c<0x80 ) c = 0xfffd;
135 }else if( (c&0xf0)==0xe0 && ((*pzIn)[0]&0xc0)==0x80
136 && ((*pzIn)[1]&0xc0)==0x80 ){
137 c = (c&0x0f)<<12 | (((*pzIn)[0]&0x3f)<<6) | ((*pzIn)[1]&0x3f);
138 *pzIn += 2;
139 if( c<0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
140 }else if( (c&0xf8)==0xf0 && ((*pzIn)[0]&0xc0)==0x80
141 && ((*pzIn)[1]&0xc0)==0x80 && ((*pzIn)[2]&0xc0)==0x80 ){
142 c = (c&0x07)<<18 | (((*pzIn)[0]&0x3f)<<12) | (((*pzIn)[1]&0x3f)<<6)
143 | ((*pzIn)[2]&0x3f);
144 *pzIn += 3;
145 if( c<0xffff ) c = 0xfffd;
146 }else{
147 c = 0xfffd;
148 }
149 }
150 return c;
151 }
152 static unsigned re_next_char_nocase(const unsigned char **pzIn){
153 unsigned c = re_next_char(pzIn);
154 return unicode_fold(c,1);
155 }
156
157 /* Return true if c is a perl "word" character: [A-Za-z0-9_] */
158 static int re_word_char(int c){
@@ -170,26 +178,32 @@
170 }
171
172 /* Run a compiled regular expression on the zero-terminated input
173 ** string zIn[]. Return true on a match and false if there is no match.
174 */
175 int re_exec(ReCompiled *pRe, const unsigned char *zIn){
176 ReStateSet aStateSet[2], *pThis, *pNext;
177 ReStateNumber aSpace[100];
178 ReStateNumber *pToFree;
179 unsigned int i = 0;
180 unsigned int iSwap = 0;
181 int c = RE_EOF+1;
182 int cPrev = 0;
183 int rc = 0;
184
 
 
 
 
185 if( pRe->nInit ){
186 unsigned char x = pRe->zInit[0];
187 while( zIn[0] && (zIn[0]!=x || memcmp(zIn, pRe->zInit, pRe->nInit)!=0) ){
188 zIn++;
 
 
189 }
190 if( zIn[0]==0 ) return 0;
191 }
192 if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
193 pToFree = 0;
194 aStateSet[0].aState = aSpace;
195 }else{
@@ -201,11 +215,11 @@
201 pNext = &aStateSet[1];
202 pNext->nState = 0;
203 re_add_state(pNext, 0);
204 while( c!=RE_EOF && pNext->nState>0 ){
205 cPrev = c;
206 c = pRe->xNextChar(&zIn);
207 pThis = pNext;
208 pNext = &aStateSet[iSwap];
209 iSwap = 1 - iSwap;
210 pNext->nState = 0;
211 for(i=0; i<pThis->nState; i++){
@@ -370,42 +384,50 @@
370 */
371 static unsigned re_esc_char(ReCompiled *p){
372 static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
373 static const char zTrans[] = "\a\f\n\r\t\v";
374 int i, v = 0;
375 char c = p->zIn[0];
376 if( c=='u' ){
 
 
377 v = 0;
378 if( re_hex(p->zIn[1],&v)
379 && re_hex(p->zIn[2],&v)
380 && re_hex(p->zIn[3],&v)
381 && re_hex(p->zIn[4],&v)
 
382 ){
383 p->zIn += 5;
384 return v;
385 }
386 }
387 if( c=='x' ){
388 v = 0;
389 for(i=1; re_hex(p->zIn[i], &v); i++){}
390 if( i>1 ){
391 p->zIn += i;
392 return v;
393 }
394 }
395 for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
396 if( zEsc[i] ){
397 if( i<6 ) c = zTrans[i];
398 p->zIn++;
399 }else{
400 p->zErr = "unknown \\ escape";
401 }
402 return c;
403 }
404
405 /* Forward declaration */
406 static const char *re_subcompile_string(ReCompiled*);
 
 
 
 
 
407
408 /* Compile RE text into a sequence of opcodes. Continue up to the
409 ** first unmatched ")" character, then return. If an error is found,
410 ** return a pointer to the error message string.
411 */
@@ -413,15 +435,15 @@
413 const char *zErr;
414 int iStart, iEnd, iGoto;
415 iStart = p->nState;
416 zErr = re_subcompile_string(p);
417 if( zErr ) return zErr;
418 while( p->zIn[0]=='|' ){
419 iEnd = p->nState;
420 re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);
421 iGoto = re_append(p, RE_OP_GOTO, 0);
422 p->zIn++;
423 zErr = re_subcompile_string(p);
424 if( zErr ) return zErr;
425 p->aArg[iGoto] = p->nState - iGoto;
426 }
427 return 0;
@@ -434,30 +456,30 @@
434 static const char *re_subcompile_string(ReCompiled *p){
435 int iPrev = -1;
436 int iStart;
437 unsigned c;
438 const char *zErr;
439 while( (c = p->xNextChar(&p->zIn))!=0 ){
440 iStart = p->nState;
441 switch( c ){
442 case '|':
443 case '$':
444 case ')': {
445 p->zIn--;
446 return 0;
447 }
448 case '(': {
449 zErr = re_subcompile_re(p);
450 if( zErr ) return zErr;
451 if( p->zIn[0]!=')' ) return "unmatched '('";
452 p->zIn++;
453 break;
454 }
455 case '.': {
456 if( p->zIn[0]=='*' ){
457 re_append(p, RE_OP_ANYSTAR, 0);
458 p->zIn++;
459 }else{
460 re_append(p, RE_OP_ANY, 0);
461 }
462 break;
463 }
@@ -479,20 +501,20 @@
479 }
480 case '{': {
481 int m = 0, n = 0;
482 int sz, j;
483 if( iPrev<0 ) return "'{m,n}' without operand";
484 while( (c=p->zIn[0])>='0' && c<='9' ){ m = m*10 + c - '0'; p->zIn++; }
485 n = m;
486 if( c==',' ){
487 p->zIn++;
488 n = 0;
489 while( (c=p->zIn[0])>='0' && c<='9' ){ n = n*10 + c - '0'; p->zIn++; }
490 }
491 if( c!='}' ) return "unmatched '{'";
492 if( n>0 && n<m ) return "n less than m in '{m,n}'";
493 p->zIn++;
494 sz = p->nState - iPrev;
495 if( m==0 ){
496 if( n==0 ) return "both m and n are zero in '{m,n}'";
497 re_insert(p, iPrev, RE_OP_FORK, sz+1);
498 n--;
@@ -508,49 +530,49 @@
508 }
509 break;
510 }
511 case '[': {
512 int iFirst = p->nState;
513 if( p->zIn[0]=='^' ){
514 re_append(p, RE_OP_CC_EXC, 0);
515 p->zIn++;
516 }else{
517 re_append(p, RE_OP_CC_INC, 0);
518 }
519 while( (c = p->xNextChar(&p->zIn))!=0 ){
520 if( c=='[' && p->zIn[0]==':' ){
521 return "POSIX character classes not supported";
522 }
523 if( c=='\\' ) c = re_esc_char(p);
524 if( p->zIn[0]=='-' && p->zIn[1] ){
525 re_append(p, RE_OP_CC_RANGE, c);
526 p->zIn++;
527 c = p->xNextChar(&p->zIn);
528 if( c=='\\' ) c = re_esc_char(p);
529 re_append(p, RE_OP_CC_RANGE, c);
530 }else{
531 re_append(p, RE_OP_CC_VALUE, c);
532 }
533 if( p->zIn[0]==']' ){ p->zIn++; break; }
534 }
535 if( c==0 ) return "unclosed '['";
536 p->aArg[iFirst] = p->nState - iFirst;
537 break;
538 }
539 case '\\': {
540 int specialOp = 0;
541 switch( p->zIn[0] ){
542 case 'b': specialOp = RE_OP_BOUNDARY; break;
543 case 'd': specialOp = RE_OP_DIGIT; break;
544 case 'D': specialOp = RE_OP_NOTDIGIT; break;
545 case 's': specialOp = RE_OP_SPACE; break;
546 case 'S': specialOp = RE_OP_NOTSPACE; break;
547 case 'w': specialOp = RE_OP_WORD; break;
548 case 'W': specialOp = RE_OP_NOTWORD; break;
549 }
550 if( specialOp ){
551 p->zIn++;
552 re_append(p, specialOp, 0);
553 }else{
554 c = re_esc_char(p);
555 re_append(p, RE_OP_MATCH, c);
556 }
@@ -602,21 +624,23 @@
602 if( zIn[0]=='^' ){
603 zIn++;
604 }else{
605 re_append(pRe, RE_OP_ANYSTAR, 0);
606 }
607 pRe->zIn = (unsigned char*)zIn;
 
 
608 zErr = re_subcompile_re(pRe);
609 if( zErr ){
610 re_free(pRe);
611 return zErr;
612 }
613 if( pRe->zIn[0]=='$' && pRe->zIn[1]==0 ){
614 re_append(pRe, RE_OP_MATCH, RE_EOF);
615 re_append(pRe, RE_OP_ACCEPT, 0);
616 *ppRe = pRe;
617 }else if( pRe->zIn[0]==0 ){
618 re_append(pRe, RE_OP_ACCEPT, 0);
619 *ppRe = pRe;
620 }else{
621 re_free(pRe);
622 return "unrecognized character";
@@ -676,11 +700,11 @@
676 }
677 sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
678 }
679 zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
680 if( zStr!=0 ){
681 sqlite3_result_int(context, re_exec(pRe, zStr));
682 }
683 }
684
685 /*
686 ** Invoke this routine in order to install the REGEXP function in an
@@ -707,13 +731,12 @@
707 char zLine[2000];
708 while( fgets(zLine, sizeof(zLine), in) ){
709 ln++;
710 n = (int)strlen(zLine);
711 while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--;
712 zLine[n] = 0;
713 if( re_exec(pRe, (const unsigned char*)zLine) ){
714 printf("%s:%d:%s\n", zFile, ln, zLine);
715 }
716 }
717 }
718
719 /*
720
--- src/regexp.c
+++ src/regexp.c
@@ -94,19 +94,27 @@
94 unsigned nState; /* Number of current states */
95 ReStateNumber *aState; /* Current states */
96 } ReStateSet;
97
98 #if INTERFACE
99 /* An input string read one character at a time.
100 */
101 struct ReInput {
102 const unsigned char *z; /* All text */
103 int i; /* Next byte to read */
104 int mx; /* EOF when i>=mx */
105 };
106
107 /* A compiled NFA (or an NFA that is in the process of being compiled) is
108 ** an instance of the following object.
109 */
110 struct ReCompiled {
111 ReInput sIn; /* Regular expression text */
112 const char *zErr; /* Error message to return */
113 char *aOp; /* Operators for the virtual machine */
114 int *aArg; /* Arguments to each operator */
115 unsigned (*xNextChar)(ReInput*); /* Next character function */
116 char zInit[12]; /* Initial text to match */
117 int nInit; /* Number of characters in zInit */
118 unsigned nState; /* Number of entries in aOp[] and aArg[] */
119 unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
120 };
@@ -122,37 +130,37 @@
130 /* Extract the next unicode character from *pzIn and return it. Advance
131 ** *pzIn to the first byte past the end of the character returned. To
132 ** be clear: this routine converts utf8 to unicode. This routine is
133 ** optimized for the common case where the next character is a single byte.
134 */
135 static unsigned re_next_char(ReInput *p){
136 unsigned c;
137 if( p->i>=p->mx ) return 0;
138 c = p->z[p->i++];
139 if( c>0x80 ){
140 if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){
141 c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f);
 
142 if( c<0x80 ) c = 0xfffd;
143 }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80
144 && (p->z[p->i+1]&0xc0)==0x80 ){
145 c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
146 p->i += 2;
147 if( c<0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
148 }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
149 && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
150 c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
151 | (p->z[p->i+2]&0x3f);
152 p->i += 3;
153 if( c<0xffff ) c = 0xfffd;
154 }else{
155 c = 0xfffd;
156 }
157 }
158 return c;
159 }
160 static unsigned re_next_char_nocase(ReInput *p){
161 unsigned c = re_next_char(p);
162 return unicode_fold(c,1);
163 }
164
165 /* Return true if c is a perl "word" character: [A-Za-z0-9_] */
166 static int re_word_char(int c){
@@ -170,26 +178,32 @@
178 }
179
180 /* Run a compiled regular expression on the zero-terminated input
181 ** string zIn[]. Return true on a match and false if there is no match.
182 */
183 int re_exec(ReCompiled *pRe, const unsigned char *zIn, int nIn){
184 ReStateSet aStateSet[2], *pThis, *pNext;
185 ReStateNumber aSpace[100];
186 ReStateNumber *pToFree;
187 unsigned int i = 0;
188 unsigned int iSwap = 0;
189 int c = RE_EOF+1;
190 int cPrev = 0;
191 int rc = 0;
192 ReInput in;
193
194 in.z = zIn;
195 in.i = 0;
196 in.mx = nIn>=0 ? nIn : strlen(zIn);
197 if( pRe->nInit ){
198 unsigned char x = pRe->zInit[0];
199 while( in.i+pRe->nInit<in.mx
200 && (zIn[in.i]!=x || memcmp(zIn+in.i, pRe->zInit, pRe->nInit)!=0)
201 ){
202 in.i++;
203 }
204 if( in.i+pRe->nInit>=in.mx ) return 0;
205 }
206 if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
207 pToFree = 0;
208 aStateSet[0].aState = aSpace;
209 }else{
@@ -201,11 +215,11 @@
215 pNext = &aStateSet[1];
216 pNext->nState = 0;
217 re_add_state(pNext, 0);
218 while( c!=RE_EOF && pNext->nState>0 ){
219 cPrev = c;
220 c = pRe->xNextChar(&in);
221 pThis = pNext;
222 pNext = &aStateSet[iSwap];
223 iSwap = 1 - iSwap;
224 pNext->nState = 0;
225 for(i=0; i<pThis->nState; i++){
@@ -370,42 +384,50 @@
384 */
385 static unsigned re_esc_char(ReCompiled *p){
386 static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]";
387 static const char zTrans[] = "\a\f\n\r\t\v";
388 int i, v = 0;
389 char c;
390 if( p->sIn.i>=p->sIn.mx ) return 0;
391 c = p->sIn.z[0];
392 if( c=='u' && p->sIn.i+5<p->sIn.mx ){
393 v = 0;
394 const unsigned char *zIn = p->sIn.z + p->sIn.i;
395 if( re_hex(zIn[1],&v)
396 && re_hex(zIn[2],&v)
397 && re_hex(zIn[3],&v)
398 && re_hex(zIn[4],&v)
399 ){
400 p->sIn.i += 5;
401 return v;
402 }
403 }
404 if( c=='x' ){
405 v = 0;
406 for(i=1; p->sIn.i<p->sIn.mx && re_hex(p->sIn.z[p->sIn.i+i], &v); i++){}
407 if( i>1 ){
408 p->sIn.i += i;
409 return v;
410 }
411 }
412 for(i=0; zEsc[i] && zEsc[i]!=c; i++){}
413 if( zEsc[i] ){
414 if( i<6 ) c = zTrans[i];
415 p->sIn.i++;
416 }else{
417 p->zErr = "unknown \\ escape";
418 }
419 return c;
420 }
421
422 /* Forward declaration */
423 static const char *re_subcompile_string(ReCompiled*);
424
425 /* Peek at the next byte of input */
426 static unsigned char rePeek(ReCompiled *p){
427 return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0;
428 }
429
430 /* Compile RE text into a sequence of opcodes. Continue up to the
431 ** first unmatched ")" character, then return. If an error is found,
432 ** return a pointer to the error message string.
433 */
@@ -413,15 +435,15 @@
435 const char *zErr;
436 int iStart, iEnd, iGoto;
437 iStart = p->nState;
438 zErr = re_subcompile_string(p);
439 if( zErr ) return zErr;
440 while( rePeek(p)=='|' ){
441 iEnd = p->nState;
442 re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);
443 iGoto = re_append(p, RE_OP_GOTO, 0);
444 p->sIn.i++;
445 zErr = re_subcompile_string(p);
446 if( zErr ) return zErr;
447 p->aArg[iGoto] = p->nState - iGoto;
448 }
449 return 0;
@@ -434,30 +456,30 @@
456 static const char *re_subcompile_string(ReCompiled *p){
457 int iPrev = -1;
458 int iStart;
459 unsigned c;
460 const char *zErr;
461 while( (c = p->xNextChar(&p->sIn))!=0 ){
462 iStart = p->nState;
463 switch( c ){
464 case '|':
465 case '$':
466 case ')': {
467 p->sIn.i--;
468 return 0;
469 }
470 case '(': {
471 zErr = re_subcompile_re(p);
472 if( zErr ) return zErr;
473 if( rePeek(p)!=')' ) return "unmatched '('";
474 p->sIn.i++;
475 break;
476 }
477 case '.': {
478 if( rePeek(p)=='*' ){
479 re_append(p, RE_OP_ANYSTAR, 0);
480 p->sIn.i++;
481 }else{
482 re_append(p, RE_OP_ANY, 0);
483 }
484 break;
485 }
@@ -479,20 +501,20 @@
501 }
502 case '{': {
503 int m = 0, n = 0;
504 int sz, j;
505 if( iPrev<0 ) return "'{m,n}' without operand";
506 while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; }
507 n = m;
508 if( c==',' ){
509 p->sIn.i++;
510 n = 0;
511 while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; }
512 }
513 if( c!='}' ) return "unmatched '{'";
514 if( n>0 && n<m ) return "n less than m in '{m,n}'";
515 p->sIn.i++;
516 sz = p->nState - iPrev;
517 if( m==0 ){
518 if( n==0 ) return "both m and n are zero in '{m,n}'";
519 re_insert(p, iPrev, RE_OP_FORK, sz+1);
520 n--;
@@ -508,49 +530,49 @@
530 }
531 break;
532 }
533 case '[': {
534 int iFirst = p->nState;
535 if( rePeek(p)=='^' ){
536 re_append(p, RE_OP_CC_EXC, 0);
537 p->sIn.i++;
538 }else{
539 re_append(p, RE_OP_CC_INC, 0);
540 }
541 while( (c = p->xNextChar(&p->sIn))!=0 ){
542 if( c=='[' && rePeek(p)==':' ){
543 return "POSIX character classes not supported";
544 }
545 if( c=='\\' ) c = re_esc_char(p);
546 if( rePeek(p)=='-' ){
547 re_append(p, RE_OP_CC_RANGE, c);
548 p->sIn.i++;
549 c = p->xNextChar(&p->sIn);
550 if( c=='\\' ) c = re_esc_char(p);
551 re_append(p, RE_OP_CC_RANGE, c);
552 }else{
553 re_append(p, RE_OP_CC_VALUE, c);
554 }
555 if( rePeek(p)==']' ){ p->sIn.i++; break; }
556 }
557 if( c==0 ) return "unclosed '['";
558 p->aArg[iFirst] = p->nState - iFirst;
559 break;
560 }
561 case '\\': {
562 int specialOp = 0;
563 switch( rePeek(p) ){
564 case 'b': specialOp = RE_OP_BOUNDARY; break;
565 case 'd': specialOp = RE_OP_DIGIT; break;
566 case 'D': specialOp = RE_OP_NOTDIGIT; break;
567 case 's': specialOp = RE_OP_SPACE; break;
568 case 'S': specialOp = RE_OP_NOTSPACE; break;
569 case 'w': specialOp = RE_OP_WORD; break;
570 case 'W': specialOp = RE_OP_NOTWORD; break;
571 }
572 if( specialOp ){
573 p->sIn.i++;
574 re_append(p, specialOp, 0);
575 }else{
576 c = re_esc_char(p);
577 re_append(p, RE_OP_MATCH, c);
578 }
@@ -602,21 +624,23 @@
624 if( zIn[0]=='^' ){
625 zIn++;
626 }else{
627 re_append(pRe, RE_OP_ANYSTAR, 0);
628 }
629 pRe->sIn.z = (unsigned char*)zIn;
630 pRe->sIn.i = 0;
631 pRe->sIn.mx = strlen(pRe->sIn.z);
632 zErr = re_subcompile_re(pRe);
633 if( zErr ){
634 re_free(pRe);
635 return zErr;
636 }
637 if( rePeek(pRe)=='$' && pRe->sIn.i+1==pRe->sIn.mx ){
638 re_append(pRe, RE_OP_MATCH, RE_EOF);
639 re_append(pRe, RE_OP_ACCEPT, 0);
640 *ppRe = pRe;
641 }else if( pRe->sIn.i>=pRe->sIn.mx ){
642 re_append(pRe, RE_OP_ACCEPT, 0);
643 *ppRe = pRe;
644 }else{
645 re_free(pRe);
646 return "unrecognized character";
@@ -676,11 +700,11 @@
700 }
701 sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free);
702 }
703 zStr = (const unsigned char*)sqlite3_value_text(argv[1]);
704 if( zStr!=0 ){
705 sqlite3_result_int(context, re_exec(pRe, zStr, -1));
706 }
707 }
708
709 /*
710 ** Invoke this routine in order to install the REGEXP function in an
@@ -707,13 +731,12 @@
731 char zLine[2000];
732 while( fgets(zLine, sizeof(zLine), in) ){
733 ln++;
734 n = (int)strlen(zLine);
735 while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--;
736 if( re_exec(pRe, (const unsigned char*)zLine, n) ){
737 printf("%s:%d:%.*s\n", zFile, ln, n, zLine);
 
738 }
739 }
740 }
741
742 /*
743
+1 -1
--- src/wiki.c
+++ src/wiki.c
@@ -667,11 +667,11 @@
667667
if( rid2 && (pW2 = manifest_get(rid2, CFTYPE_WIKI))!=0 ){
668668
blob_init(&w2, pW2->zWiki, -1);
669669
}
670670
blob_zero(&d);
671671
diffFlags = construct_diff_flags(1,0);
672
- text_diff(&w2, &w1, &d, diffFlags | DIFF_HTML | DIFF_LINENO);
672
+ text_diff(&w2, &w1, &d, 0, diffFlags | DIFF_HTML | DIFF_LINENO);
673673
@ <div class="udiff">
674674
@ %s(blob_str(&d))
675675
@ </div>
676676
manifest_destroy(pW1);
677677
manifest_destroy(pW2);
678678
--- src/wiki.c
+++ src/wiki.c
@@ -667,11 +667,11 @@
667 if( rid2 && (pW2 = manifest_get(rid2, CFTYPE_WIKI))!=0 ){
668 blob_init(&w2, pW2->zWiki, -1);
669 }
670 blob_zero(&d);
671 diffFlags = construct_diff_flags(1,0);
672 text_diff(&w2, &w1, &d, diffFlags | DIFF_HTML | DIFF_LINENO);
673 @ <div class="udiff">
674 @ %s(blob_str(&d))
675 @ </div>
676 manifest_destroy(pW1);
677 manifest_destroy(pW2);
678
--- src/wiki.c
+++ src/wiki.c
@@ -667,11 +667,11 @@
667 if( rid2 && (pW2 = manifest_get(rid2, CFTYPE_WIKI))!=0 ){
668 blob_init(&w2, pW2->zWiki, -1);
669 }
670 blob_zero(&d);
671 diffFlags = construct_diff_flags(1,0);
672 text_diff(&w2, &w1, &d, 0, diffFlags | DIFF_HTML | DIFF_LINENO);
673 @ <div class="udiff">
674 @ %s(blob_str(&d))
675 @ </div>
676 manifest_destroy(pW1);
677 manifest_destroy(pW2);
678

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button