Fossil SCM
Use the new multi-recursive-term capability of CTEs in SQLite to fix the /finfo clade calculation, and to make /finfo run about 5x faster.
Commit
47bfea074b1226161c9cbe6489b742b7b8cdf3d3c2cd985718d6d504b1a5a37c
Parent
5328f8216004895…
1 file changed
+17
-39
+17
-39
| --- src/finfo.c | ||
| +++ src/finfo.c | ||
| @@ -386,56 +386,34 @@ | ||
| 386 | 386 | blob_zero(&sql); |
| 387 | 387 | if( ridCi ){ |
| 388 | 388 | /* If we will be tracking changes across renames, some extra temp |
| 389 | 389 | ** tables (implemented as CTEs) are required */ |
| 390 | 390 | blob_append_sql(&sql, |
| 391 | - /* The fns(fnid) table holds the list of all filename-IDs that | |
| 392 | - ** might possibly exist in the output. This is an optimization | |
| 393 | - ** used to reduce the size and computation efforts for subsequent | |
| 394 | - ** CTEs. | |
| 395 | - */ | |
| 396 | - "WITH RECURSIVE fns(fnid) AS (\n" | |
| 397 | - " SELECT %d\n" /* <---- fnid */ | |
| 398 | - " UNION\n" | |
| 399 | - " SELECT pfnid FROM mlink, fns\n" | |
| 400 | - " WHERE mlink.fnid=fns.fnid\n" | |
| 401 | - " AND pfnid>0\n" | |
| 402 | - "),\n" | |
| 403 | - | |
| 404 | - /* The flink(fid,fnid,pfid,pfnid) table indicates that there | |
| 405 | - ** is an edit and/or rename arc connecting two files (fid,fnid) | |
| 406 | - ** and (pfid,pfnid). This is similar to the built-in mlink | |
| 407 | - ** table except that flink() is bidirectional. Also the pfnid | |
| 408 | - ** column is always set even no rename occurs. | |
| 409 | - */ | |
| 410 | - "flink(fid,fnid,pfid,pfnid) AS (\n" | |
| 411 | - " SELECT fid, fnid, pid,\n" | |
| 412 | - " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END\n" | |
| 413 | - " FROM mlink\n" | |
| 414 | - " WHERE NOT isaux AND fid>0 AND pid>0 AND fnid IN fns\n" | |
| 415 | - " UNION\n" | |
| 416 | - " SELECT pid,\n" | |
| 417 | - " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END,\n" | |
| 418 | - " fid, fnid\n" | |
| 419 | - " FROM mlink\n" | |
| 420 | - " WHERE NOT isaux AND pid>0 AND fid>0 AND fnid IN fns\n" | |
| 421 | - "),\n" | |
| 422 | - | |
| 423 | 391 | /* The clade(fid,fnid) table is the set of all (fid,fnid) pairs |
| 424 | 392 | ** that should participate in the output. Clade is computed by |
| 425 | - ** walking the graph formed by the flink table. | |
| 393 | + ** walking the graph of mlink edges. | |
| 426 | 394 | */ |
| 427 | - "clade(fid,fnid) AS (\n" | |
| 395 | + "WITH RECURSIVE clade(fid,fnid) AS (\n" | |
| 428 | 396 | " SELECT blob.rid, %d FROM blob\n" /* %d is fnid */ |
| 429 | 397 | " WHERE blob.uuid=(SELECT uuid FROM files_of_checkin(%Q)\n" |
| 430 | 398 | " WHERE filename=%Q)\n" /* %Q is the filename */ |
| 431 | 399 | " UNION\n" |
| 432 | - " SELECT flink.fid, flink.fnid\n" | |
| 433 | - " FROM clade, flink\n" | |
| 434 | - " WHERE clade.fid=flink.pfid AND clade.fnid=flink.pfnid\n" | |
| 400 | + " SELECT mlink.fid, mlink.fnid\n" | |
| 401 | + " FROM clade, mlink\n" | |
| 402 | + " WHERE clade.fid=mlink.pid\n" | |
| 403 | + " AND ((mlink.pfnid=0 AND mlink.fnid=clade.fnid)\n" | |
| 404 | + " OR mlink.pfnid=clade.fnid)\n" | |
| 405 | + " AND mlink.fid>0" | |
| 406 | + " UNION\n" | |
| 407 | + " SELECT mlink.pid," | |
| 408 | + " CASE WHEN mlink.pfnid>0 THEN mlink.pfnid ELSE mlink.fnid END\n" | |
| 409 | + " FROM clade, mlink\n" | |
| 410 | + " WHERE mlink.pid>0\n" | |
| 411 | + " AND mlink.fid=clade.fid\n" | |
| 412 | + " AND mlink.fnid=clade.fnid\n" | |
| 435 | 413 | ")\n", |
| 436 | - fnid, fnid, zCI, zFilename | |
| 414 | + fnid, zCI, zFilename | |
| 437 | 415 | ); |
| 438 | 416 | }else{ |
| 439 | 417 | /* This is the case for all files with a given name. We will still |
| 440 | 418 | ** create a "clade(fid,fnid)" table that identifies all participates |
| 441 | 419 | ** in the output graph, so that subsequent queries can all be the same, |
| @@ -466,11 +444,11 @@ | ||
| 466 | 444 | " mlink.mid,\n" /* check-in ID */ |
| 467 | 445 | " mlink.pfnid,\n" /* Previous filename */ |
| 468 | 446 | " blob.size,\n" /* File size */ |
| 469 | 447 | " mlink.fnid,\n" /* Current filename */ |
| 470 | 448 | " filename.name\n" /* Current filename */ |
| 471 | - "FROM clade, mlink, event, blob, filename\n" | |
| 449 | + "FROM clade CROSS JOIN mlink, event, blob, filename\n" | |
| 472 | 450 | "WHERE mlink.fnid=clade.fnid AND mlink.fid=clade.fid\n" |
| 473 | 451 | " AND event.objid=mlink.mid\n" |
| 474 | 452 | " AND blob.rid=clade.fid\n" |
| 475 | 453 | " AND filename.fnid=clade.fnid\n", |
| 476 | 454 | TAG_BRANCH |
| 477 | 455 |
| --- src/finfo.c | |
| +++ src/finfo.c | |
| @@ -386,56 +386,34 @@ | |
| 386 | blob_zero(&sql); |
| 387 | if( ridCi ){ |
| 388 | /* If we will be tracking changes across renames, some extra temp |
| 389 | ** tables (implemented as CTEs) are required */ |
| 390 | blob_append_sql(&sql, |
| 391 | /* The fns(fnid) table holds the list of all filename-IDs that |
| 392 | ** might possibly exist in the output. This is an optimization |
| 393 | ** used to reduce the size and computation efforts for subsequent |
| 394 | ** CTEs. |
| 395 | */ |
| 396 | "WITH RECURSIVE fns(fnid) AS (\n" |
| 397 | " SELECT %d\n" /* <---- fnid */ |
| 398 | " UNION\n" |
| 399 | " SELECT pfnid FROM mlink, fns\n" |
| 400 | " WHERE mlink.fnid=fns.fnid\n" |
| 401 | " AND pfnid>0\n" |
| 402 | "),\n" |
| 403 | |
| 404 | /* The flink(fid,fnid,pfid,pfnid) table indicates that there |
| 405 | ** is an edit and/or rename arc connecting two files (fid,fnid) |
| 406 | ** and (pfid,pfnid). This is similar to the built-in mlink |
| 407 | ** table except that flink() is bidirectional. Also the pfnid |
| 408 | ** column is always set even no rename occurs. |
| 409 | */ |
| 410 | "flink(fid,fnid,pfid,pfnid) AS (\n" |
| 411 | " SELECT fid, fnid, pid,\n" |
| 412 | " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END\n" |
| 413 | " FROM mlink\n" |
| 414 | " WHERE NOT isaux AND fid>0 AND pid>0 AND fnid IN fns\n" |
| 415 | " UNION\n" |
| 416 | " SELECT pid,\n" |
| 417 | " CASE WHEN pfnid>0 THEN pfnid ELSE fnid END,\n" |
| 418 | " fid, fnid\n" |
| 419 | " FROM mlink\n" |
| 420 | " WHERE NOT isaux AND pid>0 AND fid>0 AND fnid IN fns\n" |
| 421 | "),\n" |
| 422 | |
| 423 | /* The clade(fid,fnid) table is the set of all (fid,fnid) pairs |
| 424 | ** that should participate in the output. Clade is computed by |
| 425 | ** walking the graph formed by the flink table. |
| 426 | */ |
| 427 | "clade(fid,fnid) AS (\n" |
| 428 | " SELECT blob.rid, %d FROM blob\n" /* %d is fnid */ |
| 429 | " WHERE blob.uuid=(SELECT uuid FROM files_of_checkin(%Q)\n" |
| 430 | " WHERE filename=%Q)\n" /* %Q is the filename */ |
| 431 | " UNION\n" |
| 432 | " SELECT flink.fid, flink.fnid\n" |
| 433 | " FROM clade, flink\n" |
| 434 | " WHERE clade.fid=flink.pfid AND clade.fnid=flink.pfnid\n" |
| 435 | ")\n", |
| 436 | fnid, fnid, zCI, zFilename |
| 437 | ); |
| 438 | }else{ |
| 439 | /* This is the case for all files with a given name. We will still |
| 440 | ** create a "clade(fid,fnid)" table that identifies all participates |
| 441 | ** in the output graph, so that subsequent queries can all be the same, |
| @@ -466,11 +444,11 @@ | |
| 466 | " mlink.mid,\n" /* check-in ID */ |
| 467 | " mlink.pfnid,\n" /* Previous filename */ |
| 468 | " blob.size,\n" /* File size */ |
| 469 | " mlink.fnid,\n" /* Current filename */ |
| 470 | " filename.name\n" /* Current filename */ |
| 471 | "FROM clade, mlink, event, blob, filename\n" |
| 472 | "WHERE mlink.fnid=clade.fnid AND mlink.fid=clade.fid\n" |
| 473 | " AND event.objid=mlink.mid\n" |
| 474 | " AND blob.rid=clade.fid\n" |
| 475 | " AND filename.fnid=clade.fnid\n", |
| 476 | TAG_BRANCH |
| 477 |
| --- src/finfo.c | |
| +++ src/finfo.c | |
| @@ -386,56 +386,34 @@ | |
| 386 | blob_zero(&sql); |
| 387 | if( ridCi ){ |
| 388 | /* If we will be tracking changes across renames, some extra temp |
| 389 | ** tables (implemented as CTEs) are required */ |
| 390 | blob_append_sql(&sql, |
| 391 | /* The clade(fid,fnid) table is the set of all (fid,fnid) pairs |
| 392 | ** that should participate in the output. Clade is computed by |
| 393 | ** walking the graph of mlink edges. |
| 394 | */ |
| 395 | "WITH RECURSIVE clade(fid,fnid) AS (\n" |
| 396 | " SELECT blob.rid, %d FROM blob\n" /* %d is fnid */ |
| 397 | " WHERE blob.uuid=(SELECT uuid FROM files_of_checkin(%Q)\n" |
| 398 | " WHERE filename=%Q)\n" /* %Q is the filename */ |
| 399 | " UNION\n" |
| 400 | " SELECT mlink.fid, mlink.fnid\n" |
| 401 | " FROM clade, mlink\n" |
| 402 | " WHERE clade.fid=mlink.pid\n" |
| 403 | " AND ((mlink.pfnid=0 AND mlink.fnid=clade.fnid)\n" |
| 404 | " OR mlink.pfnid=clade.fnid)\n" |
| 405 | " AND mlink.fid>0" |
| 406 | " UNION\n" |
| 407 | " SELECT mlink.pid," |
| 408 | " CASE WHEN mlink.pfnid>0 THEN mlink.pfnid ELSE mlink.fnid END\n" |
| 409 | " FROM clade, mlink\n" |
| 410 | " WHERE mlink.pid>0\n" |
| 411 | " AND mlink.fid=clade.fid\n" |
| 412 | " AND mlink.fnid=clade.fnid\n" |
| 413 | ")\n", |
| 414 | fnid, zCI, zFilename |
| 415 | ); |
| 416 | }else{ |
| 417 | /* This is the case for all files with a given name. We will still |
| 418 | ** create a "clade(fid,fnid)" table that identifies all participates |
| 419 | ** in the output graph, so that subsequent queries can all be the same, |
| @@ -466,11 +444,11 @@ | |
| 444 | " mlink.mid,\n" /* check-in ID */ |
| 445 | " mlink.pfnid,\n" /* Previous filename */ |
| 446 | " blob.size,\n" /* File size */ |
| 447 | " mlink.fnid,\n" /* Current filename */ |
| 448 | " filename.name\n" /* Current filename */ |
| 449 | "FROM clade CROSS JOIN mlink, event, blob, filename\n" |
| 450 | "WHERE mlink.fnid=clade.fnid AND mlink.fid=clade.fid\n" |
| 451 | " AND event.objid=mlink.mid\n" |
| 452 | " AND blob.rid=clade.fid\n" |
| 453 | " AND filename.fnid=clade.fnid\n", |
| 454 | TAG_BRANCH |
| 455 |