Fossil SCM

fossil-scm / tools / cvs2fossil / lib / c2f_integrity.tcl
Blame History Raw 965 lines
1
## -*- tcl -*-
2
# # ## ### ##### ######## ############# #####################
3
## Copyright (c) 2007-2008 Andreas Kupries.
4
#
5
# This software is licensed as described in the file LICENSE, which
6
# you should have received as part of this distribution.
7
#
8
# This software consists of voluntary contributions made by many
9
# individuals. For exact contribution history, see the revision
10
# history and logs, available at http://fossil-scm.hwaci.com/fossil
11
# # ## ### ##### ######## ############# #####################
12
13
## This package holds a number of integrity checks done on the
14
## persistent state. This is used by the passes II and IV.
15
16
# # ## ### ##### ######## ############# #####################
17
## Requirements
18
19
package require Tcl 8.4 ; # Required runtime.
20
package require snit ; # OO system.
21
package require vc::tools::trouble ; # Error reporting.
22
package require vc::tools::log ; # User feedback.
23
package require vc::fossil::import::cvs::state ; # State storage.
24
25
# # ## ### ##### ######## ############# #####################
26
##
27
28
snit::type ::vc::fossil::import::cvs::integrity {
29
# # ## ### ##### ######## #############
30
## Public API
31
32
typemethod assert {expression failmessage} {
33
set ok [uplevel 1 [list ::expr $expression]]
34
if {$ok} return
35
trouble internal [uplevel 1 [list ::subst $failmessage]]
36
return
37
}
38
39
typemethod strict {} {
40
log write 4 integrity {Check database consistency}
41
42
set n 0
43
AllButMeta
44
Meta
45
return
46
}
47
48
typemethod metarelaxed {} {
49
log write 4 integrity {Check database consistency}
50
51
set n 0
52
AllButMeta
53
return
54
}
55
56
typemethod changesets {} {
57
log write 4 integrity {Check database consistency}
58
59
set n 0
60
RevisionChangesets
61
TagChangesets
62
BranchChangesets
63
return
64
}
65
66
# # ## ### ##### ######## #############
67
## Internal methods
68
69
proc AllButMeta {} {
70
# This code performs a number of paranoid checks of the
71
# database, searching for inconsistent cross-references.
72
73
upvar 1 n n ; # Counter for the checks (we print an id before
74
# the main label).
75
76
# Find all revisions which disagree with their line of
77
# development about the project they are owned by.
78
CheckRev \
79
{Revisions and their LODs have to be in the same project} \
80
{disagrees with its LOD about owning project} {
81
SELECT F.name, R.rev
82
FROM revision R, file F, symbol S
83
WHERE R.fid = F.fid -- get file of rev
84
AND R.lod = S.sid -- get symbol of its lod
85
AND F.pid != S.pid -- disagreement about the owning project
86
;
87
}
88
# Find all revisions which disgree with their meta data about
89
# the project they are owned by.
90
CheckRev \
91
{Revisions and their meta data have to be in the same project} \
92
{disagrees with its meta data about owning project} {
93
SELECT F.name, R.rev
94
FROM revision R, file F, meta M
95
WHERE R.fid = F.fid -- get file of rev
96
AND R.mid = M.mid -- get meta of rev
97
AND F.pid != M.pid -- disagreement about owning project
98
;
99
}
100
# Find all revisions with a primary child which disagrees
101
# about the file they belong to.
102
CheckRev \
103
{Revisions and their primary children have to be in the same file} \
104
{disagrees with its primary child about the owning file} {
105
SELECT F.name, R.rev
106
FROM revision R, revision C, file F
107
WHERE R.fid = F.fid -- get file of rev
108
AND R.child IS NOT NULL -- get all with primary children
109
AND R.child = C.rid -- get primary child
110
AND C.fid != R.fid -- wrongly in different file
111
;
112
}
113
114
# Find all revisions with a branch parent symbol whose parent
115
# disagrees about the file they belong to.
116
CheckRev \
117
{Revisions and their branch children have to be in the same file} \
118
{at the beginning of its branch and its parent disagree about the owning file} {
119
SELECT F.name, R.rev
120
FROM revision R, revision P, file F
121
WHERE R.fid = F.fid -- get file of rev
122
AND R.bparent IS NOT NULL -- get first-of-branch revisions
123
AND R.parent = P.rid -- get out-of-branch parent
124
AND R.fid != P.fid -- wrongly in different file
125
;
126
}
127
# Find all revisions with a non-NTDB child which disagrees
128
# about the file they belong to.
129
CheckRev \
130
{Revisions and their non-NTDB children have to be in the same file} \
131
{disagrees with its non-NTDB child about the owning file} {
132
SELECT F.name, R.rev
133
FROM revision R, revision C, file F
134
WHERE R.fid = F.fid -- get file of rev
135
AND R.dbchild IS NOT NULL -- get last NTDB revisions
136
AND R.dbchild = C.rid -- get their child
137
AND C.fid != R.fid -- wrongly in different file
138
;
139
}
140
# Find all revisions which have a primary child, but the child
141
# does not have them as parent.
142
CheckRev \
143
{Revisions have to be parents of their primary children} \
144
{is not the parent of its primary child} {
145
SELECT F.name, R.rev
146
FROM revision R, revision C, file F
147
WHERE R.fid = F.fid -- get file of rev
148
AND R.child IS NOT NULL -- get all with primary children
149
AND R.child = C.rid -- get primary child
150
AND C.parent != R.rid -- child's parent wrongly not us
151
;
152
}
153
# Find all revisions which have a primrary child, but the
154
# child has a branch parent symbol making them brach starters.
155
CheckRev \
156
{Primary children of revisions must not start branches} \
157
{is parent of a primary child which is the beginning of a branch} {
158
SELECT F.name, R.rev
159
FROM revision R, revision C, file F
160
WHERE R.fid = F.fid -- get file of rev
161
AND R.child IS NOT NULL -- get all with primary children
162
AND R.child = C.rid -- get primary child
163
AND C.bparent IS NOT NULL -- but indicates to be on branch
164
;
165
}
166
# Find all revisions without branch parent symbol which have a
167
# parent, but the parent does not have them as primary child.
168
CheckRev \
169
{Revisions have to be primary children of their parents, if any} \
170
{is not the child of its parent} {
171
SELECT F.name, R.rev
172
FROM revision R, revision P, file F
173
WHERE R.fid = F.fid -- get file of revision
174
AND R.bparent IS NULL -- exclude all first-on-branch revisions
175
AND R.parent IS NOT NULL -- which are not root of their line
176
AND R.parent = P.rid -- get in-lod parent
177
AND P.child != R.rid -- but does not have rev as primary child
178
;
179
}
180
# Find all revisions with a branch parent symbol which do not
181
# have a parent.
182
CheckRev \
183
{Branch starting revisions have to have a parent, if not detached} \
184
{at the beginning of its branch has no parent, but its branch has} {
185
SELECT F.name, R.rev
186
FROM revision R, file F, branch B
187
WHERE R.fid = F.fid -- get file of revision
188
AND R.bparent IS NOT NULL -- limit to first-on-branch revisions
189
AND R.parent IS NULL -- which are detached
190
AND B.sid = R.bparent -- get branch governing the rev
191
AND B.fid = R.fid -- in the revision's file
192
AND B.root IS NOT NULL -- but says that branch is attached
193
;
194
}
195
# Find all revisions with a branch parent symbol whose parent
196
# has them as primary child.
197
CheckRev \
198
{Branch starting revisions must not be primary children of their parents} \
199
{at the beginning of its branch is the primary child of its parent} {
200
SELECT F.name, R.rev
201
FROM revision R, revision P, file F
202
WHERE R.fid = F.fid -- get file of revision
203
AND R.bparent IS NOT NULL -- limit to first-on-branch revisions
204
AND R.parent IS NOT NULL -- which are attached
205
AND R.parent = P.rid -- get out-of-branch parent
206
AND P.child = R.rid -- wrongly has rev as primary child
207
;
208
}
209
# Find all revisions with a non-NTDB child which are not on
210
# the NTDB.
211
CheckRev \
212
{NTDB to trunk transition has to begin on NTDB} \
213
{has a non-NTDB child, yet is not on the NTDB} {
214
SELECT F.name, R.rev
215
FROM revision R, file F
216
WHERE R.fid = F.fid -- get file of revision
217
AND R.dbchild IS NOT NULL -- limit to last NTDB revision
218
AND NOT R.isdefault -- but signals not-NTDB
219
;
220
}
221
# Find all revisions with a NTDB parent which are on the NTDB.
222
CheckRev \
223
{NTDB to trunk transition has to end on non-NTDB} \
224
{has a NTDB parent, yet is on the NTDB} {
225
SELECT F.name, R.rev
226
FROM revision R, file F
227
WHERE R.fid = F.fid -- get file of revision
228
AND R.dbparent IS NOT NULL -- limit to roots of non-NTDB
229
AND R.isdefault -- but signals to be NTDB
230
;
231
}
232
# Find all revisions with a child which disagrees about the
233
# line of development they belong to.
234
CheckRev \
235
{Revisions and their primary children have to be in the same LOD} \
236
{and its primary child disagree about their LOD} {
237
SELECT F.name, R.rev
238
FROM revision R, revision C, file F
239
WHERE R.fid = F.fid -- get file of revision
240
AND R.child IS NOT NULL -- revision has a primary child
241
AND R.child = C.rid -- get that child
242
AND C.lod != R.lod -- child wrongly disagrees with lod
243
;
244
}
245
# Find all revisions with a non-NTDB child which agrees about
246
# the line of development they belong to.
247
CheckRev \
248
{NTDB and trunk revisions have to be in different LODs} \
249
{on NTDB and its non-NTDB child wrongly agree about their LOD} {
250
SELECT F.name, R.rev
251
FROM revision R, revision C, file F
252
WHERE R.fid = F.fid -- get file of revision
253
AND R.dbchild IS NOT NULL -- limit to last NTDB revision
254
AND R.dbchild = C.rid -- get non-NTDB child
255
AND C.lod = R.lod -- child wrongly has same lod
256
;
257
}
258
# Find all revisions with a branch parent symbol which is not
259
# their LOD.
260
CheckRev \
261
{Branch starting revisions have to have their LOD as branch parent symbol} \
262
{at the beginning of its branch does not have the branch symbol as its LOD} {
263
SELECT F.name, R.rev
264
FROM revision R, file F
265
WHERE R.fid = F.fid -- get file of revision
266
AND R.bparent IS NOT NULL -- limit to branch-first revisions
267
AND R.lod != R.bparent -- out-of-branch parent wrongly is not the lod
268
;
269
}
270
# Find all revisions with a branch parent symbol whose parent
271
# is in the same line of development.
272
CheckRev \
273
{Revisions and their branch children have to be in different LODs} \
274
{at the beginning of its branch and its parent wrongly agree about their LOD} {
275
SELECT F.name, R.rev
276
FROM revision R, revision P, file F
277
WHERE R.fid = F.fid -- get file of revision
278
AND R.bparent IS NOT NULL -- limit to branch-first revisions
279
AND R.parent = P.rid -- get out-of-branch parent of revision
280
AND R.lod = P.lod -- rev and parent wrongly agree on lod
281
;
282
}
283
return
284
}
285
286
proc Meta {} {
287
# This code performs a number of paranoid checks of the
288
# database, searching for inconsistent cross-references.
289
290
upvar 1 n n ; # Counter for the checks (we print an id before
291
# the main label).
292
293
# Find all revisions which disgree with their meta data about
294
# the branch/line of development they belong to.
295
CheckRev \
296
{Revisions and their meta data have to be in the same LOD} \
297
{disagrees with its meta data about owning LOD} {
298
SELECT F.name, R.rev
299
FROM revision R, meta M, file F
300
WHERE R.mid = M.mid -- get meta data of revision
301
AND R.lod != M.bid -- rev wrongly disagrees with meta about lod
302
AND R.fid = F.fid -- get file of revision
303
;
304
}
305
return
306
}
307
308
proc RevisionChangesets {} {
309
# This code performs a number of paranoid checks of the
310
# database, searching for inconsistent changeset/revision
311
# information.
312
313
upvar 1 n n ; # Counter for the checks (we print an id before
314
# the main label).
315
316
# Find all revisions which are not used by at least one
317
# changeset.
318
CheckRev \
319
{All revisions have to be used by least one changeset} \
320
{is not used by a revision changeset} {
321
-- Unused revisions = All revisions
322
-- - revisions used by revision changesets.
323
--
324
-- Both sets can be computed easily, and subtracted
325
-- from each other. Then we can get the associated
326
-- file (name) for display.
327
328
SELECT F.name, R.rev
329
FROM revision R, file F
330
WHERE R.rid IN (SELECT rid
331
FROM revision -- All revisions
332
EXCEPT -- subtract
333
SELECT CI.iid
334
FROM csitem CI, changeset C -- revisions used
335
WHERE C.cid = CI.cid -- by any revision
336
AND C.type = 0) -- changeset
337
AND R.fid = F.fid -- get file of unused revision
338
}
339
# Find all revisions which are used by more than one
340
# changeset.
341
CheckRev \
342
{All revisions have to be used by at most one changeset} \
343
{is used by multiple changesets} {
344
-- Principle of operation: Get all revision/changeset
345
-- pairs for all revision changesets, group by
346
-- revision to aggregate the changeset, counting
347
-- them. From the resulting revision/count table
348
-- select those with more than one user, and get their
349
-- associated file (name) for display.
350
351
SELECT F.name, R.rev
352
FROM revision R, file F,
353
(SELECT CI.iid AS rid, -- revision item
354
count(CI.cid) AS count -- number of csets using item
355
FROM csitem CI, changeset C
356
WHERE C.type = 0 -- limit to revision csets
357
AND C.cid = CI.cid -- get item in changeset
358
GROUP BY CI.iid -- aggregate by item, count csets/item
359
) AS U
360
WHERE U.count > 1 -- limit to item with multiple users
361
AND R.rid = U.rid -- get revision of item
362
AND R.fid = F.fid -- get file of revision
363
}
364
# All revisions have to refer to the same meta information as
365
# their changeset.
366
CheckRevCS \
367
{All revisions have to agree with their changeset about the used meta information} \
368
{disagrees with its changeset @ about the meta information} {
369
SELECT CT.name, C.cid, F.name, R.rev
370
FROM changeset C, cstype CT, revision R, file F, csitem CI
371
WHERE C.type = 0 -- revision changesets only
372
AND C.cid = CI.cid -- changeset --> its revisions
373
AND R.rid = CI.iid -- look at them
374
AND R.mid != C.src -- Only those which disagree with changeset about the meta
375
AND R.fid = F.fid -- get file of the revision
376
AND CT.tid = C.type -- get changeset type, for labeling
377
}
378
# All revisions have to agree on the LOD their changeset
379
# belongs to. In other words, all revisions in a changeset
380
# have to refer to the same line of development.
381
#
382
# Instead of looking at all pairs of revisions in all
383
# changesets we generate the distinct set of all LODs
384
# referenced by the revisions of a changeset, look for those
385
# with cardinality > 1, and get the identifying information
386
# for the changesets found thusly.
387
CheckCS \
388
{All revisions in a changeset have to belong to the same LOD} \
389
{: Its revisions disagree about the LOD they belong to} {
390
SELECT T.name, C.cid
391
FROM changeset C, cstype T
392
WHERE C.cid IN (SELECT U.cid
393
FROM (SELECT DISTINCT -- unique cset/lod pairs
394
CI.cid AS cid, -- revision cset
395
R.lod AS lod -- lod of item in cset
396
FROM csitem CI, changeset C, revision R
397
WHERE CI.iid = R.rid -- get rev of item in cset
398
AND C.cid = CI.cid -- get changeset of item
399
AND C.type = 0 -- limit to rev csets
400
) AS U
401
GROUP BY U.cid -- aggregate by cset, count lods/cset
402
HAVING COUNT(U.lod) > 1 -- find csets with multiple lods
403
)
404
AND T.tid = C.type
405
}
406
# All revisions have to agree on the project their changeset
407
# belongs to. In other words, all revisions in a changeset
408
# have to refer to the same project.
409
#
410
# Instead of looking at all pairs of revisions in all
411
# changesets we generate the distinct set of all projects
412
# referenced by the revisions of a changeset, look for those
413
# with cardinality > 1, and get the identifying information
414
# for the changesets found thusly.
415
CheckCS \
416
{All revisions in a changeset have to belong to the same project} \
417
{: Its revisions disagree about the project they belong to} {
418
SELECT T.name, C.cid
419
FROM changeset C, cstype T
420
WHERE C.cid IN (SELECT U.cid
421
FROM (SELECT DISTINCT -- unique cset/proj pairs
422
CI.cid AS cid, -- rev cset
423
F.pid AS pid -- project of item in cset
424
FROM csitem CI, changeset C, revision R, file F
425
WHERE CI.iid = R.rid -- get rev of item in cset
426
AND C.cid = CI.cid -- get changeset of item
427
AND C.type = 0 -- limit to rev changesets
428
AND F.fid = R.fid -- get file of revision
429
) AS U
430
GROUP BY U.cid -- aggregate by csets, count proj/cset
431
HAVING COUNT(U.pid) > 1 -- find csets with multiple projects
432
)
433
AND T.tid = C.type -- get readable changeset type
434
}
435
# All revisions in a single changeset have to belong to
436
# different files. Conversely: No two revisions of a single
437
# file are allowed to be in the same changeset.
438
#
439
# Instead of looking at all pairs of revisions in all
440
# changesets we generate the distinct set of all files
441
# referenced by the revisions of a changeset, and look for
442
# those with cardinality < the cardinality of the set of
443
# revisions, and get the identifying information for the
444
# changesets found thusly.
445
CheckCS \
446
{All revisions in a changeset have to belong to different files} \
447
{: Its revisions share files} {
448
SELECT T.name, C.cid
449
FROM changeset C, cstype T
450
WHERE C.cid IN (SELECT VV.cid
451
FROM (SELECT U.cid AS cid, -- rev changeset
452
COUNT (U.fid) AS fcount -- number of files by items
453
FROM (SELECT DISTINCT -- unique cset/file pairs
454
CI.cid AS cid, -- rev changeset
455
R.fid AS fid -- file of item in changeset
456
FROM csitem CI, changeset C, revision R
457
WHERE CI.iid = R.rid -- get rev of item in changeset
458
AND C.cid = CI.cid -- get changeset of item
459
AND C.type = 0 -- limit to rev csets
460
) AS U
461
GROUP BY U.cid -- aggregate by csets, count files/cset
462
) AS UU,
463
(SELECT V.cid AS cid, -- rev changeset
464
COUNT (V.iid) AS rcount -- number of items
465
FROM csitem V, changeset X
466
WHERE X.cid = V.cid -- get changeset of item
467
AND X.type = 0 -- limit to rev csets
468
GROUP BY V.cid -- aggregate by csets, count items/cset
469
) AS VV
470
WHERE VV.cid = UU.cid -- sync #items/cset with #files/cset
471
AND UU.fcount < VV.rcount -- less files than items
472
-- => items belong to the same file.
473
)
474
AND T.tid = C.type -- get readable changeset type
475
}
476
return
477
}
478
479
proc TagChangesets {} {
480
# This code performs a number of paranoid checks of the
481
# database, searching for inconsistent changeset/revision
482
# information.
483
484
upvar 1 n n ; # Counter for the checks (we print an id before
485
# the main label).
486
487
# Find all tags which are not used by at least one changeset.
488
CheckTag \
489
{All tags have to be used by least one changeset} \
490
{is not used by a tag symbol changeset} {
491
-- Unused tags = All tags
492
-- - revisions used by tag changesets.
493
--
494
-- Both sets can be computed easily, and subtracted
495
-- from each other. Then we can get the associated
496
-- file (name) for display.
497
498
SELECT P.name, S.name
499
FROM project P, tag T, symbol S
500
WHERE T.tid IN (SELECT tid -- All tags
501
FROM tag
502
EXCEPT -- subtract
503
SELECT CI.iid -- tags used
504
FROM csitem CI, changeset C
505
WHERE C.cid = CI.cid -- by any tag
506
AND C.type = 1) -- changeset
507
AND S.sid = T.sid -- get symbol of tag
508
AND P.pid = S.pid -- get project of symbol
509
}
510
# Find all tags which are used by more than one changeset.
511
CheckRev \
512
{All tags have to be used by at most one changeset} \
513
{is used by multiple changesets} {
514
-- Principle of operation: Get all tag/changeset pairs
515
-- for all tag changesets, group by tag to aggregate
516
-- the changeset, counting them. From the resulting
517
-- tag/count table select those with more than one
518
-- user, and get their associated file (name) for
519
-- display.
520
521
SELECT P.name, S.name
522
FROM tag T, project P, symbol S,
523
(SELECT CI.iid AS iid, -- item
524
count(CI.cid) AS count -- number of csets using item
525
FROM csitem CI, changeset C
526
WHERE C.type = 1 -- limit to tag csets
527
AND C.cid = CI.cid -- get items of cset
528
GROUP BY CI.iid -- aggregate by item, count csets/item
529
) AS U
530
WHERE U.count > 1 -- find tag item used multiple times
531
AND T.tid = U.iid -- get tag of item
532
AND S.sid = T.sid -- get symbol of tag
533
AND P.pid = S.pid -- get project of symbol
534
}
535
if 0 {
536
# This check is disabled for the moment. Apparently tags
537
# can cross lines of development, at least if the involved
538
# LODs are the trunk, and the NTDB. That makes sense, as
539
# the NTDB revisions are initially logically a part of the
540
# trunk. The standard check below however does not capture
541
# this. When I manage to rephrase it to accept this type
542
# of cross-over it will be re-activated.
543
544
# All tags have to agree on the LOD their changeset
545
# belongs to. In other words, all tags in a changeset have
546
# to refer to the same line of development.
547
#
548
# Instead of looking at all pairs of tags in all
549
# changesets we generate the distinct set of all LODs
550
# referenced by the tags of a changeset, look for those
551
# with cardinality > 1, and get the identifying
552
# information for the changesets found thusly.
553
CheckCS \
554
{All tags in a changeset have to belong to the same LOD} \
555
{: Its tags disagree about the LOD they belong to} {
556
SELECT T.name, C.cid
557
FROM changeset C, cstype T
558
WHERE C.cid IN (SELECT U.cid
559
FROM (SELECT DISTINCT CI.cid AS cid, T.lod AS lod
560
FROM csitem CI, changeset C, tag T
561
WHERE CI.iid = T.tid
562
AND C.cid = CI.cid
563
AND C.type = 1) AS U
564
GROUP BY U.cid HAVING COUNT(U.lod) > 1)
565
AND T.tid = C.type
566
}
567
}
568
# All tags have to agree on the project their changeset
569
# belongs to. In other words, all tags in a changeset have to
570
# refer to the same project.
571
#
572
# Instead of looking at all pairs of tags in all changesets we
573
# generate the distinct set of all projects referenced by the
574
# tags of a changeset, look for those with cardinality > 1,
575
# and get the identifying information for the changesets found
576
# thusly.
577
CheckCS \
578
{All tags in a changeset have to belong to the same project} \
579
{: Its tags disagree about the project they belong to} {
580
SELECT T.name, C.cid
581
FROM changeset C, cstype T
582
WHERE C.cid IN (SELECT U.cid
583
FROM (SELECT DISTINCT -- unique cset/proj pairs
584
CI.cid AS cid, -- tag cset
585
F.pid AS pid -- project of item in cset
586
FROM csitem CI, changeset C, tag T, file F
587
WHERE CI.iid = T.tid -- get tag of item in cset
588
AND C.cid = CI.cid -- get changeset of item
589
AND C.type = 1 -- limit to tag changesets
590
AND F.fid = T.fid -- get file of tag
591
) AS U
592
GROUP BY U.cid -- aggregate by csets, count proj/cset
593
HAVING COUNT(U.pid) > 1 -- find csets with multiple projects
594
)
595
AND T.tid = C.type -- get readable changeset type
596
}
597
# All tags in a single changeset have to belong to different
598
# files. Conversely: No two tags of a single file are allowed
599
# to be in the same changeset.
600
#
601
# Instead of looking at all pairs of tags in all changesets we
602
# generate the distinct set of all files referenced by the
603
# tags of a changeset, and look for those with cardinality <
604
# the cardinality of the set of tags, and get the identifying
605
# information for the changesets found thusly.
606
CheckCS \
607
{All tags in a changeset have to belong to different files} \
608
{: Its tags share files} {
609
SELECT T.name, C.cid
610
FROM changeset C, cstype T
611
WHERE C.cid IN (SELECT VV.cid
612
FROM (SELECT U.cid AS cid, -- changeset
613
COUNT (U.fid) AS fcount -- number of files by items
614
FROM (SELECT DISTINCT -- unique cset/file pairs
615
CI.cid AS cid, -- tag changeset
616
T.fid AS fid -- file of item in changeset
617
FROM csitem CI, changeset C, tag T
618
WHERE CI.iid = T.tid -- get tag of item in changeset
619
AND C.cid = CI.cid -- get changeset of item
620
AND C.type = 1 -- limit to tag changesets
621
) AS U
622
GROUP BY U.cid -- aggregate by csets, count files/cset
623
) AS UU,
624
(SELECT V.cid AS cid, -- changeset
625
COUNT (V.iid) AS rcount -- number of items in cset
626
FROM csitem V, changeset X
627
WHERE X.cid = V.cid -- get changeset of item
628
AND X.type = 1 -- limit to tag changesets
629
GROUP BY V.cid -- aggregate by csets, count items/cset
630
) AS VV
631
WHERE VV.cid = UU.cid -- sync #items/cset with #files/cset
632
AND UU.fcount < VV.rcount -- less files than items
633
-- => items belong to the same file.
634
)
635
AND T.tid = C.type -- get readable changeset type
636
}
637
return
638
}
639
640
proc BranchChangesets {} {
641
# This code performs a number of paranoid checks of the
642
# database, searching for inconsistent changeset/revision
643
# information.
644
645
upvar 1 n n ; # Counter for the checks (we print an id before
646
# the main label).
647
648
# Find all branches which are not used by at least one
649
# changeset.
650
CheckBranch \
651
{All branches have to be used by least one changeset} \
652
{is not used by a branch symbol changeset} {
653
-- Unused branches = All branches
654
-- - branches used by branch changesets.
655
--
656
-- Both sets can be computed easily, and subtracted
657
-- from each other. Then we can get the associated
658
-- file (name) for display.
659
660
SELECT P.name, S.name
661
FROM project P, branch B, symbol S
662
WHERE B.bid IN (SELECT bid -- All branches
663
FROM branch
664
EXCEPT -- subtract
665
SELECT CI.iid -- branches used
666
FROM csitem CI, changeset C
667
WHERE C.cid = CI.cid -- by any branch
668
AND C.type = 2 -- changeset
669
)
670
AND S.sid = B.sid -- get symbol of branch
671
AND P.pid = S.pid -- get project of symbol
672
}
673
# Find all branches which are used by more than one changeset.
674
CheckRev \
675
{All branches have to be used by at most one changeset} \
676
{is used by multiple changesets} {
677
-- Principle of operation: Get all branch/changeset
678
-- pairs for all branch changesets, group by tag to
679
-- aggregate the changeset, counting them. From the
680
-- resulting branch/count table select those with more
681
-- than one user, and get their associated file (name)
682
-- for display.
683
684
SELECT P.name, S.name
685
FROM branch B, project P, symbol S,
686
(SELECT CI.iid AS iid, -- item
687
count(CI.cid) AS count -- number of csets for item
688
FROM csitem CI, changeset C
689
WHERE C.type = 2 -- limit to branch changesets,
690
AND C.cid = CI.cid -- get the items they contain,
691
GROUP BY CI.iid -- aggregate by items, count csets/item (x)
692
) AS U
693
WHERE U.count > 1 -- find items used multiple times
694
AND B.bid = U.iid -- get the users (branch changesets)
695
AND S.sid = B.sid -- get symbol of branch
696
AND P.pid = S.pid -- get project of symbol
697
}
698
if 0 {
699
# This check has been disabled. When the converter was run
700
# on the Tcl CVS several branches tripped this
701
# constraint. One of them was a free-floating branch, and
702
# its handling has been fixed by now. The others however
703
# seem semi-legitimate, in the sense that they show
704
# inconsistencies in the CVS history the user is not
705
# really able to solve, but it might be possible to simply
706
# ignore them.
707
708
# For example in Tcl we have a branch X with a prefered
709
# parent Y, except for a single file where the prefered
710
# parent seems to be created after its current parent,
711
# making re-parenting impossible. However we may be able
712
# to ignore this, it should only cause the branch to have
713
# more than one predecessor, and shifting it around in the
714
# commit order. The backend would still use the prefered
715
# parent for the attachment point in fossil.
716
717
# So, for now I have decided to disable this and press
718
# forward. Of course, if we run into actual trouble we
719
# will have to go back here see what can be done to fix
720
# this. Even if only giving the user the instruction how
721
# to edit the CVS repository to remove the inconsistency.
722
723
# All branches have to agree on the LOD their changeset
724
# belongs to. In other words, all branches in a changeset
725
# have to refer to the same line of development.
726
#
727
# Instead of looking at all pairs of branches in all
728
# changesets we generate the distinct set of all LODs
729
# referenced by the branches of a changeset, look for
730
# those with cardinality > 1, and get the identifying
731
# information for the changesets found thusly.
732
CheckCS \
733
{All branches in a changeset have to belong to the same LOD} \
734
{: Its branches disagree about the LOD they belong to} {
735
SELECT T.name, C.cid
736
FROM changeset C, cstype T
737
WHERE C.cid IN (SELECT U.cid
738
FROM (SELECT DISTINCT CI.cid AS cid, B.lod AS lod
739
FROM csitem CI, changeset C, branch B
740
WHERE CI.iid = B.bid
741
AND C.cid = CI.cid
742
AND C.type = 2) AS U
743
GROUP BY U.cid HAVING COUNT(U.lod) > 1)
744
AND T.tid = C.type
745
}
746
}
747
# All branches have to agree on the project their changeset
748
# belongs to. In other words, all branches in a changeset have
749
# to refer to the same project.
750
#
751
# Instead of looking at all pairs of branches in all
752
# changesets we generate the distinct set of all projects
753
# referenced by the branches of a changeset, look for those
754
# with cardinality > 1, and get the identifying information
755
# for the changesets found thusly.
756
CheckCS \
757
{All branches in a changeset have to belong to the same project} \
758
{: Its branches disagree about the project they belong to} {
759
SELECT T.name, C.cid
760
FROM changeset C, cstype T
761
WHERE C.cid IN (SELECT U.cid
762
FROM (SELECT DISTINCT -- Unique cset/proj pairs
763
CI.cid AS cid, -- Branch cset
764
F.pid AS pid -- Project of item in cset
765
FROM csitem CI, changeset C, branch B, file F
766
WHERE CI.iid = B.bid -- get branch of item in cset
767
AND C.cid = CI.cid -- get changeset of item
768
AND C.type = 2 -- limit to branch changesets
769
AND F.fid = B.fid -- get file of branch
770
) AS U
771
GROUP BY U.cid -- aggregate by csets, count proj/cset
772
HAVING COUNT(U.pid) > 1 -- find cset with multiple projects
773
)
774
AND T.tid = C.type -- get readable changeset type
775
}
776
# All branches in a single changeset have to belong to
777
# different files. Conversely: No two branches of a single
778
# file are allowed to be in the same changeset.
779
#
780
# Instead of looking at all pairs of branches in all
781
# changesets we generate the distinct set of all files
782
# referenced by the branches of a changeset, and look for
783
# those with cardinality < the cardinality of the set of
784
# branches, and get the identifying information for the
785
# changesets found thusly.
786
CheckCS \
787
{All branches in a changeset have to belong to different files} \
788
{: Its branches share files} {
789
SELECT T.name, C.cid
790
FROM changeset C, cstype T
791
WHERE C.cid IN (SELECT VV.cid
792
FROM (SELECT U.cid AS cid, -- changeset
793
COUNT (U.fid) AS fcount -- number of files by items
794
FROM (SELECT DISTINCT -- unique cset/file pairs
795
CI.cid AS cid, -- Branch changeset
796
B.fid AS fid -- File of item in changeset
797
FROM csitem CI, changeset C, branch B
798
WHERE CI.iid = B.bid -- get tag of item in changeset
799
AND C.cid = CI.cid -- get changeset of item
800
AND C.type = 2 -- limit to branch changesets
801
) AS U
802
GROUP BY U.cid -- aggregate by csets, count files/cset
803
) AS UU,
804
(SELECT V.cid AS cid, -- changeset
805
COUNT (V.iid) AS rcount -- number of items in cset
806
FROM csitem V, changeset X
807
WHERE X.cid = V.cid -- get changeset of item
808
AND X.type = 2 -- limit to branch changesets
809
GROUP BY V.cid -- aggregate by csets, count items/cset
810
) AS VV
811
WHERE VV.cid = UU.cid -- sync #items/cset with #files/cset
812
AND UU.fcount < VV.rcount -- less files than items
813
-- => items belong to the same file.
814
)
815
AND T.tid = C.type -- get readable changeset type
816
}
817
return
818
}
819
820
proc ___UnusedChangesetChecks___ {} {
821
# This code performs a number of paranoid checks of the
822
# database, searching for inconsistent changeset/revision
823
# information.
824
825
return ; # Disabled for now, bottlenecks ...
826
827
upvar 1 n n ; # Counter for the checks (we print an id before
828
# the main label).
829
830
# The next two checks are BOTTLENECKS. In essence we are
831
# checking each symbol changeset one by one.
832
833
# TODO: Try to rephrase the checks to make more use of
834
# indices, set and stream operations.
835
836
# All revisions used by tag symbol changesets have to have the
837
# changeset's tag associated with them.
838
CheckRevCS \
839
{All revisions used by tag symbol changesets have to have the changeset's tag attached to them} \
840
{does not have the tag of its symbol changeset @ attached to it} {
841
SELECT CT.name, C.cid, F.name, R.rev
842
FROM changeset C, cstype CT, revision R, file F, csitem CI, tag T
843
WHERE C.type = 1 -- symbol changesets only
844
AND C.src = T.sid -- tag only, linked by symbol id
845
AND C.cid = CI.cid -- changeset --> its revisions
846
AND R.rid = CI.iid -- look at the revisions
847
-- and look for the tag among the attached ones.
848
AND T.sid NOT IN (SELECT TB.sid
849
FROM tag TB
850
WHERE TB.rev = R.rid)
851
AND R.fid = F.fid -- get file of revision
852
}
853
854
# All revisions used by branch symbol changesets have to have
855
# the changeset's branch associated with them.
856
857
CheckRevCS \
858
{All revisions used by branch symbol changesets have to have the changeset's branch attached to them} \
859
{does not have the branch of its symbol changeset @ attached to it} {
860
SELECT CT.name, C.cid, F.name, R.rev, C.cid
861
FROM changeset C, cstype CT, revision R, file F, csitem CI, branch B
862
WHERE C.type = 1 -- symbol changesets only
863
AND C.src = B.sid -- branches only
864
AND C.cid = CI.cid -- changeset --> its revisions
865
AND R.rid = CI.iid -- look at the revisions
866
-- and look for the branch among the attached ones.
867
AND B.sid NOT IN (SELECT BB.sid
868
FROM branch BB
869
WHERE BB.root = R.rid)
870
AND R.fid = F.fid -- get file of revision
871
}
872
873
# TODO
874
# The state has to contain at least one tag symbol changeset
875
# for all known tags.
876
877
# TODO
878
# The state has to contain at least one branch symbol changeset
879
# for all known branches.
880
return
881
}
882
883
884
proc CheckRev {header label sql} {
885
upvar 1 n n
886
set ok 1
887
foreach {fname revnr} [state run $sql] {
888
set ok 0
889
trouble fatal "${revnr}::$fname $label"
890
}
891
log write 5 integrity {\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header}
892
return
893
}
894
895
proc CheckTag {header label sql} {
896
upvar 1 n n
897
set ok 1
898
foreach {pname sname} [state run $sql] {
899
set ok 0
900
trouble fatal "<$pname tag '$sname'> $label"
901
}
902
log write 5 integrity {\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header}
903
return
904
}
905
906
proc CheckBranch {header label sql} {
907
upvar 1 n n
908
set ok 1
909
foreach {pname sname} [state run $sql] {
910
set ok 0
911
trouble fatal "<$pname branch '$sname'> $label"
912
}
913
log write 5 integrity {\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header}
914
return
915
}
916
917
proc CheckCS {header label sql} {
918
upvar 1 n n
919
set ok 1
920
foreach {ctype cid} [state run $sql] {
921
set ok 0
922
trouble fatal "<$ctype $cid> $label"
923
}
924
log write 5 integrity {\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header}
925
return
926
}
927
928
proc CheckRevCS {header label sql} {
929
upvar 1 n n
930
set ok 1
931
foreach {cstype csid fname revnr} [state run $sql] {
932
set ok 0
933
set b "<$cstype $csid>"
934
trouble fatal "$fname <$revnr> [string map [list @ $b] $label]"
935
}
936
log write 5 integrity {\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header}
937
return
938
}
939
940
# # ## ### ##### ######## #############
941
## Configuration
942
943
pragma -hasinstances no ; # singleton
944
pragma -hastypeinfo no ; # no introspection
945
pragma -hastypedestroy no ; # immortal
946
947
# # ## ### ##### ######## #############
948
}
949
950
namespace eval ::vc::fossil::import::cvs {
951
namespace export integrity
952
namespace eval integrity {
953
namespace import ::vc::fossil::import::cvs::state
954
namespace import ::vc::tools::trouble
955
namespace import ::vc::tools::log
956
log register integrity
957
}
958
}
959
960
# # ## ### ##### ######## ############# #####################
961
## Ready
962
963
package provide vc::fossil::import::cvs::integrity 1.0
964
return
965

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button