Fossil SCM

Use the new multi-recursive-term capability of CTEs in SQLite to fix the /finfo clade calculation, and to make /finfo run about 5x faster.

drh 2020-10-19 14:37 trunk
Commit 47bfea074b1226161c9cbe6489b742b7b8cdf3d3c2cd985718d6d504b1a5a37c
1 file changed +17 -39
+17 -39
--- src/finfo.c
+++ src/finfo.c
@@ -386,56 +386,34 @@
386386
blob_zero(&sql);
387387
if( ridCi ){
388388
/* If we will be tracking changes across renames, some extra temp
389389
** tables (implemented as CTEs) are required */
390390
blob_append_sql(&sql,
391
- /* The fns(fnid) table holds the list of all filename-IDs that
392
- ** might possibly exist in the output. This is an optimization
393
- ** used to reduce the size and computation efforts for subsequent
394
- ** CTEs.
395
- */
396
- "WITH RECURSIVE fns(fnid) AS (\n"
397
- " SELECT %d\n" /* <---- fnid */
398
- " UNION\n"
399
- " SELECT pfnid FROM mlink, fns\n"
400
- " WHERE mlink.fnid=fns.fnid\n"
401
- " AND pfnid>0\n"
402
- "),\n"
403
-
404
- /* The flink(fid,fnid,pfid,pfnid) table indicates that there
405
- ** is an edit and/or rename arc connecting two files (fid,fnid)
406
- ** and (pfid,pfnid). This is similar to the built-in mlink
407
- ** table except that flink() is bidirectional. Also the pfnid
408
- ** column is always set even no rename occurs.
409
- */
410
- "flink(fid,fnid,pfid,pfnid) AS (\n"
411
- " SELECT fid, fnid, pid,\n"
412
- " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END\n"
413
- " FROM mlink\n"
414
- " WHERE NOT isaux AND fid>0 AND pid>0 AND fnid IN fns\n"
415
- " UNION\n"
416
- " SELECT pid,\n"
417
- " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END,\n"
418
- " fid, fnid\n"
419
- " FROM mlink\n"
420
- " WHERE NOT isaux AND pid>0 AND fid>0 AND fnid IN fns\n"
421
- "),\n"
422
-
423391
/* The clade(fid,fnid) table is the set of all (fid,fnid) pairs
424392
** that should participate in the output. Clade is computed by
425
- ** walking the graph formed by the flink table.
393
+ ** walking the graph of mlink edges.
426394
*/
427
- "clade(fid,fnid) AS (\n"
395
+ "WITH RECURSIVE clade(fid,fnid) AS (\n"
428396
" SELECT blob.rid, %d FROM blob\n" /* %d is fnid */
429397
" WHERE blob.uuid=(SELECT uuid FROM files_of_checkin(%Q)\n"
430398
" WHERE filename=%Q)\n" /* %Q is the filename */
431399
" UNION\n"
432
- " SELECT flink.fid, flink.fnid\n"
433
- " FROM clade, flink\n"
434
- " WHERE clade.fid=flink.pfid AND clade.fnid=flink.pfnid\n"
400
+ " SELECT mlink.fid, mlink.fnid\n"
401
+ " FROM clade, mlink\n"
402
+ " WHERE clade.fid=mlink.pid\n"
403
+ " AND ((mlink.pfnid=0 AND mlink.fnid=clade.fnid)\n"
404
+ " OR mlink.pfnid=clade.fnid)\n"
405
+ " AND mlink.fid>0"
406
+ " UNION\n"
407
+ " SELECT mlink.pid,"
408
+ " CASE WHEN mlink.pfnid>0 THEN mlink.pfnid ELSE mlink.fnid END\n"
409
+ " FROM clade, mlink\n"
410
+ " WHERE mlink.pid>0\n"
411
+ " AND mlink.fid=clade.fid\n"
412
+ " AND mlink.fnid=clade.fnid\n"
435413
")\n",
436
- fnid, fnid, zCI, zFilename
414
+ fnid, zCI, zFilename
437415
);
438416
}else{
439417
/* This is the case for all files with a given name. We will still
440418
** create a "clade(fid,fnid)" table that identifies all participates
441419
** in the output graph, so that subsequent queries can all be the same,
@@ -466,11 +444,11 @@
466444
" mlink.mid,\n" /* check-in ID */
467445
" mlink.pfnid,\n" /* Previous filename */
468446
" blob.size,\n" /* File size */
469447
" mlink.fnid,\n" /* Current filename */
470448
" filename.name\n" /* Current filename */
471
- "FROM clade, mlink, event, blob, filename\n"
449
+ "FROM clade CROSS JOIN mlink, event, blob, filename\n"
472450
"WHERE mlink.fnid=clade.fnid AND mlink.fid=clade.fid\n"
473451
" AND event.objid=mlink.mid\n"
474452
" AND blob.rid=clade.fid\n"
475453
" AND filename.fnid=clade.fnid\n",
476454
TAG_BRANCH
477455
--- src/finfo.c
+++ src/finfo.c
@@ -386,56 +386,34 @@
386 blob_zero(&sql);
387 if( ridCi ){
388 /* If we will be tracking changes across renames, some extra temp
389 ** tables (implemented as CTEs) are required */
390 blob_append_sql(&sql,
391 /* The fns(fnid) table holds the list of all filename-IDs that
392 ** might possibly exist in the output. This is an optimization
393 ** used to reduce the size and computation efforts for subsequent
394 ** CTEs.
395 */
396 "WITH RECURSIVE fns(fnid) AS (\n"
397 " SELECT %d\n" /* <---- fnid */
398 " UNION\n"
399 " SELECT pfnid FROM mlink, fns\n"
400 " WHERE mlink.fnid=fns.fnid\n"
401 " AND pfnid>0\n"
402 "),\n"
403
404 /* The flink(fid,fnid,pfid,pfnid) table indicates that there
405 ** is an edit and/or rename arc connecting two files (fid,fnid)
406 ** and (pfid,pfnid). This is similar to the built-in mlink
407 ** table except that flink() is bidirectional. Also the pfnid
408 ** column is always set even no rename occurs.
409 */
410 "flink(fid,fnid,pfid,pfnid) AS (\n"
411 " SELECT fid, fnid, pid,\n"
412 " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END\n"
413 " FROM mlink\n"
414 " WHERE NOT isaux AND fid>0 AND pid>0 AND fnid IN fns\n"
415 " UNION\n"
416 " SELECT pid,\n"
417 " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END,\n"
418 " fid, fnid\n"
419 " FROM mlink\n"
420 " WHERE NOT isaux AND pid>0 AND fid>0 AND fnid IN fns\n"
421 "),\n"
422
423 /* The clade(fid,fnid) table is the set of all (fid,fnid) pairs
424 ** that should participate in the output. Clade is computed by
425 ** walking the graph formed by the flink table.
426 */
427 "clade(fid,fnid) AS (\n"
428 " SELECT blob.rid, %d FROM blob\n" /* %d is fnid */
429 " WHERE blob.uuid=(SELECT uuid FROM files_of_checkin(%Q)\n"
430 " WHERE filename=%Q)\n" /* %Q is the filename */
431 " UNION\n"
432 " SELECT flink.fid, flink.fnid\n"
433 " FROM clade, flink\n"
434 " WHERE clade.fid=flink.pfid AND clade.fnid=flink.pfnid\n"
 
 
 
 
 
 
 
 
 
 
435 ")\n",
436 fnid, fnid, zCI, zFilename
437 );
438 }else{
439 /* This is the case for all files with a given name. We will still
440 ** create a "clade(fid,fnid)" table that identifies all participates
441 ** in the output graph, so that subsequent queries can all be the same,
@@ -466,11 +444,11 @@
466 " mlink.mid,\n" /* check-in ID */
467 " mlink.pfnid,\n" /* Previous filename */
468 " blob.size,\n" /* File size */
469 " mlink.fnid,\n" /* Current filename */
470 " filename.name\n" /* Current filename */
471 "FROM clade, mlink, event, blob, filename\n"
472 "WHERE mlink.fnid=clade.fnid AND mlink.fid=clade.fid\n"
473 " AND event.objid=mlink.mid\n"
474 " AND blob.rid=clade.fid\n"
475 " AND filename.fnid=clade.fnid\n",
476 TAG_BRANCH
477
--- src/finfo.c
+++ src/finfo.c
@@ -386,56 +386,34 @@
386 blob_zero(&sql);
387 if( ridCi ){
388 /* If we will be tracking changes across renames, some extra temp
389 ** tables (implemented as CTEs) are required */
390 blob_append_sql(&sql,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391 /* The clade(fid,fnid) table is the set of all (fid,fnid) pairs
392 ** that should participate in the output. Clade is computed by
393 ** walking the graph of mlink edges.
394 */
395 "WITH RECURSIVE clade(fid,fnid) AS (\n"
396 " SELECT blob.rid, %d FROM blob\n" /* %d is fnid */
397 " WHERE blob.uuid=(SELECT uuid FROM files_of_checkin(%Q)\n"
398 " WHERE filename=%Q)\n" /* %Q is the filename */
399 " UNION\n"
400 " SELECT mlink.fid, mlink.fnid\n"
401 " FROM clade, mlink\n"
402 " WHERE clade.fid=mlink.pid\n"
403 " AND ((mlink.pfnid=0 AND mlink.fnid=clade.fnid)\n"
404 " OR mlink.pfnid=clade.fnid)\n"
405 " AND mlink.fid>0"
406 " UNION\n"
407 " SELECT mlink.pid,"
408 " CASE WHEN mlink.pfnid>0 THEN mlink.pfnid ELSE mlink.fnid END\n"
409 " FROM clade, mlink\n"
410 " WHERE mlink.pid>0\n"
411 " AND mlink.fid=clade.fid\n"
412 " AND mlink.fnid=clade.fnid\n"
413 ")\n",
414 fnid, zCI, zFilename
415 );
416 }else{
417 /* This is the case for all files with a given name. We will still
418 ** create a "clade(fid,fnid)" table that identifies all participates
419 ** in the output graph, so that subsequent queries can all be the same,
@@ -466,11 +444,11 @@
444 " mlink.mid,\n" /* check-in ID */
445 " mlink.pfnid,\n" /* Previous filename */
446 " blob.size,\n" /* File size */
447 " mlink.fnid,\n" /* Current filename */
448 " filename.name\n" /* Current filename */
449 "FROM clade CROSS JOIN mlink, event, blob, filename\n"
450 "WHERE mlink.fnid=clade.fnid AND mlink.fid=clade.fid\n"
451 " AND event.objid=mlink.mid\n"
452 " AND blob.rid=clade.fid\n"
453 " AND filename.fnid=clade.fnid\n",
454 TAG_BRANCH
455

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button