Fossil SCM

Moved the integrity checks for split fragments into separate command. Reworked breaking of internal dependencies to contrain the length of the pending list. That part of the system is still a memory hog, especially for large changesets. Added notes about this and the successor retrieval being a bottleneck.

aku 2007-11-30 06:50 trunk
Commit c14e8f84cd33db73c2eeee82e2781f964d650752
--- tools/cvs2fossil/lib/c2f_prev.tcl
+++ tools/cvs2fossil/lib/c2f_prev.tcl
@@ -142,10 +142,20 @@
142142
set mypremap [array get tmp]
143143
return $mypremap
144144
}
145145
146146
method breakinternaldependencies {} {
147
+
148
+ ##
149
+ ## NOTE: This method, maybe in conjunction with its caller
150
+ ## seems to be a memory hog, especially for large
151
+ ## changesets, with 'large' meaning to have a 'long list
152
+ ## of items, several thousand'. Investigate where the
153
+ ## memory is spent and then look for ways of rectifying
154
+ ## the problem.
155
+ ##
156
+
147157
# This method inspects the changesets for internal
148158
# dependencies. Nothing is done if there are no
149159
# such. Otherwise the changeset is split into a set of
150160
# fragments without internal dependencies, transforming the
151161
# internal dependencies into external ones. The new changesets
@@ -187,58 +197,73 @@
187197
# A dependency is a single-element map parent -> child
188198
189199
InitializeBreakState $myitems
190200
191201
set fragments {}
192
- set pending [list $range]
193
- set at 0
202
+ set new [list $range]
194203
array set breaks {}
195204
196
- while {$at < [llength $pending]} {
197
- set current [lindex $pending $at]
198
-
199
- log write 6 csets {. . .. ... ..... ........ .............}
200
- log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
201
- log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
202
-
203
- set best [FindBestBreak $current]
204
-
205
- if {$best < 0} {
206
- # The inspected range has no internal
207
- # dependencies. This is a complete fragment.
208
- lappend fragments $current
209
-
210
- log write 6 csets "No breaks, final"
211
- } else {
212
- # Split the range and schedule the resulting fragments
213
- # for further inspection. Remember the number of
214
- # dependencies cut before we remove them from
215
- # consideration, for documentation later.
216
-
217
- set breaks($best) $cross($best)
218
-
219
- log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
220
-
221
- # Note: The value of best is an abolute location in
222
- # myitems. Use the start of current to make it an
223
- # index absolute to current.
224
-
225
- set brel [expr {$best - [lindex $current 0]}]
226
- set bnext $brel ; incr bnext
227
- set fragbefore [lrange $current 0 $brel]
228
- set fragafter [lrange $current $bnext end]
229
-
230
- log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
231
-
232
- integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
233
- integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
234
-
235
- lappend pending $fragbefore $fragafter
236
- CutAt $best
237
- }
238
-
239
- incr at
205
+ # Instead of one list holding both processed and pending
206
+ # fragments we use two, one for the framents to process, one
207
+ # to hold the new fragments, and the latter is copied to the
208
+ # former when they run out. This keeps the list of pending
209
+ # fragments short without sacrificing speed by shifting stuff
210
+ # down. We especially drop the memory of fragments broken
211
+ # during processing after a short time, instead of letting it
212
+ # consume memory.
213
+
214
+ while {[llength $new]} {
215
+
216
+ set pending $new
217
+ set new {}
218
+ set at 0
219
+
220
+ while {$at < [llength $pending]} {
221
+ set current [lindex $pending $at]
222
+
223
+ log write 6 csets {. . .. ... ..... ........ .............}
224
+ log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
225
+ log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
226
+
227
+ set best [FindBestBreak $current]
228
+
229
+ if {$best < 0} {
230
+ # The inspected range has no internal
231
+ # dependencies. This is a complete fragment.
232
+ lappend fragments $current
233
+
234
+ log write 6 csets "No breaks, final"
235
+ } else {
236
+ # Split the range and schedule the resulting
237
+ # fragments for further inspection. Remember the
238
+ # number of dependencies cut before we remove them
239
+ # from consideration, for documentation later.
240
+
241
+ set breaks($best) $cross($best)
242
+
243
+ log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
244
+
245
+ # Note: The value of best is an abolute location
246
+ # in myitems. Use the start of current to make it
247
+ # an index absolute to current.
248
+
249
+ set brel [expr {$best - [lindex $current 0]}]
250
+ set bnext $brel ; incr bnext
251
+ set fragbefore [lrange $current 0 $brel]
252
+ set fragafter [lrange $current $bnext end]
253
+
254
+ log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
255
+
256
+ integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
257
+ integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
258
+
259
+ lappend new $fragbefore $fragafter
260
+ CutAt $best
261
+ }
262
+
263
+ incr at
264
+ }
240265
}
241266
242267
log write 6 csets ". . .. ... ..... ........ ............."
243268
244269
# (*) We clear out the associated part of the myitemmap
@@ -339,11 +364,11 @@
339364
set mychangesets [lreplace $mychangesets $pos $pos]
340365
return
341366
}
342367
343368
method selfreferential {} {
344
- log write 9 csets {Checking [$self str] /[llength $myitems]}
369
+ log write 7 csets {Checking [$self str] /[llength $myitems]}
345370
346371
if {![struct::set contains [$self successors] $self]} {
347372
return 0
348373
}
349374
if {[log verbosity?] < 8} { return 1 }
@@ -378,40 +403,12 @@
378403
#
379404
# Note: The item lists found in args are tagged items. They
380405
# have to have the same type as the changeset, being subsets
381406
# of its items. This is checked in Untag1.
382407
383
- # Constraints: No fragment must be empty. All fragments have
384
- # to be subsets of the cset. The union has to cover the
385
- # original. All pairwise intersections have to be empty.
386
-
387408
log write 8 csets {OLD: [lsort [$cset items]]}
388
-
389
- set cover {}
390
- foreach fragmentitems $args {
391
- log write 8 csets {NEW: [lsort $fragmentitems]}
392
-
393
- integrity assert {
394
- ![struct::set empty $fragmentitems]
395
- } {changeset fragment is empty}
396
- integrity assert {
397
- [struct::set subsetof $fragmentitems [$cset items]]
398
- } {changeset fragment is not a subset}
399
- struct::set add cover $fragmentitems
400
- }
401
- integrity assert {
402
- [struct::set equal $cover [$cset items]]
403
- } {The fragments do not cover the original changeset}
404
- set i 1
405
- foreach fia $args {
406
- foreach fib [lrange $args $i end] {
407
- integrity assert {
408
- [struct::set empty [struct::set intersect $fia $fib]]
409
- } {The fragments <$fia> and <$fib> overlap}
410
- }
411
- incr i
412
- }
409
+ ValidateFragments $cset $args
413410
414411
# All checks pass, actually perform the split.
415412
416413
struct::list assign [$cset data] project cstype cssrc
417414
@@ -433,10 +430,15 @@
433430
}
434431
435432
trouble abort?
436433
return $newcsets
437434
}
435
+
436
+ typemethod itemstr {item} {
437
+ struct::list assign $item itype iid
438
+ return [$itype str $iid]
439
+ }
438440
439441
typemethod strlist {changesets} {
440442
return [join [struct::list map $changesets [myproc ID]]]
441443
}
442444
@@ -450,13 +452,55 @@
450452
struct::list assign $theitem t i
451453
integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
452454
return $i
453455
}
454456
455
- typemethod itemstr {item} {
456
- struct::list assign $item itype iid
457
- return [$itype str $iid]
457
+ proc ValidateFragments {cset fragments} {
458
+ # Check the various integrity constraints for the fragments
459
+ # specifying how to split the changeset:
460
+ #
461
+ # * We must have two or more fragments, as splitting a
462
+ # changeset into one makes no sense.
463
+ # * No fragment may be empty.
464
+ # * All fragments have to be true subsets of the items in the
465
+ # changeset to split. The 'true' is implied because none are
466
+ # allowed to be empty, so each has to be smaller than the
467
+ # total.
468
+ # * The union of the fragments has to be the item set of the
469
+ # changeset.
470
+ # * The fragment must not overlap, i.e. their pairwise
471
+ # intersections have to be empty.
472
+
473
+ set cover {}
474
+ foreach fragmentitems $args {
475
+ log write 8 csets {NEW: [lsort $fragmentitems]}
476
+
477
+ integrity assert {
478
+ ![struct::set empty $fragmentitems]
479
+ } {changeset fragment is empty}
480
+
481
+ integrity assert {
482
+ [struct::set subsetof $fragmentitems [$cset items]]
483
+ } {changeset fragment is not a subset}
484
+ struct::set add cover $fragmentitems
485
+ }
486
+
487
+ integrity assert {
488
+ [struct::set equal $cover [$cset items]]
489
+ } {The fragments do not cover the original changeset}
490
+
491
+ set i 1
492
+ foreach fia $args {
493
+ foreach fib [lrange $args $i end] {
494
+ integrity assert {
495
+ [struct::set empty [struct::set intersect $fia $fib]]
496
+ } {The fragments <$fia> and <$fib> overlap}
497
+ }
498
+ incr i
499
+ }
500
+
501
+ return
458502
}
459503
460504
# # ## ### ##### ######## #############
461505
## State
462506
@@ -747,10 +791,16 @@
747791
pragma -hastypeinfo no ; # no type introspection
748792
pragma -hasinfo no ; # no object introspection
749793
750794
# # ## ### ##### ######## #############
751795
}
796
+
797
+##
798
+## NOTE: The successor and predecessor methods defined by the classes
799
+## below are -- bottle necks --. Look for ways to make the SQL
800
+## faster.
801
+##
752802
753803
# # ## ### ##### ######## ############# #####################
754804
## Helper singleton. Commands for revision changesets.
755805
756806
snit::type ::vc::fossil::import::cvs::project::rev::rev {
757807
--- tools/cvs2fossil/lib/c2f_prev.tcl
+++ tools/cvs2fossil/lib/c2f_prev.tcl
@@ -142,10 +142,20 @@
142 set mypremap [array get tmp]
143 return $mypremap
144 }
145
146 method breakinternaldependencies {} {
 
 
 
 
 
 
 
 
 
 
147 # This method inspects the changesets for internal
148 # dependencies. Nothing is done if there are no
149 # such. Otherwise the changeset is split into a set of
150 # fragments without internal dependencies, transforming the
151 # internal dependencies into external ones. The new changesets
@@ -187,58 +197,73 @@
187 # A dependency is a single-element map parent -> child
188
189 InitializeBreakState $myitems
190
191 set fragments {}
192 set pending [list $range]
193 set at 0
194 array set breaks {}
195
196 while {$at < [llength $pending]} {
197 set current [lindex $pending $at]
198
199 log write 6 csets {. . .. ... ..... ........ .............}
200 log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
201 log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
202
203 set best [FindBestBreak $current]
204
205 if {$best < 0} {
206 # The inspected range has no internal
207 # dependencies. This is a complete fragment.
208 lappend fragments $current
209
210 log write 6 csets "No breaks, final"
211 } else {
212 # Split the range and schedule the resulting fragments
213 # for further inspection. Remember the number of
214 # dependencies cut before we remove them from
215 # consideration, for documentation later.
216
217 set breaks($best) $cross($best)
218
219 log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
220
221 # Note: The value of best is an abolute location in
222 # myitems. Use the start of current to make it an
223 # index absolute to current.
224
225 set brel [expr {$best - [lindex $current 0]}]
226 set bnext $brel ; incr bnext
227 set fragbefore [lrange $current 0 $brel]
228 set fragafter [lrange $current $bnext end]
229
230 log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
231
232 integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
233 integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
234
235 lappend pending $fragbefore $fragafter
236 CutAt $best
237 }
238
239 incr at
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240 }
241
242 log write 6 csets ". . .. ... ..... ........ ............."
243
244 # (*) We clear out the associated part of the myitemmap
@@ -339,11 +364,11 @@
339 set mychangesets [lreplace $mychangesets $pos $pos]
340 return
341 }
342
343 method selfreferential {} {
344 log write 9 csets {Checking [$self str] /[llength $myitems]}
345
346 if {![struct::set contains [$self successors] $self]} {
347 return 0
348 }
349 if {[log verbosity?] < 8} { return 1 }
@@ -378,40 +403,12 @@
378 #
379 # Note: The item lists found in args are tagged items. They
380 # have to have the same type as the changeset, being subsets
381 # of its items. This is checked in Untag1.
382
383 # Constraints: No fragment must be empty. All fragments have
384 # to be subsets of the cset. The union has to cover the
385 # original. All pairwise intersections have to be empty.
386
387 log write 8 csets {OLD: [lsort [$cset items]]}
388
389 set cover {}
390 foreach fragmentitems $args {
391 log write 8 csets {NEW: [lsort $fragmentitems]}
392
393 integrity assert {
394 ![struct::set empty $fragmentitems]
395 } {changeset fragment is empty}
396 integrity assert {
397 [struct::set subsetof $fragmentitems [$cset items]]
398 } {changeset fragment is not a subset}
399 struct::set add cover $fragmentitems
400 }
401 integrity assert {
402 [struct::set equal $cover [$cset items]]
403 } {The fragments do not cover the original changeset}
404 set i 1
405 foreach fia $args {
406 foreach fib [lrange $args $i end] {
407 integrity assert {
408 [struct::set empty [struct::set intersect $fia $fib]]
409 } {The fragments <$fia> and <$fib> overlap}
410 }
411 incr i
412 }
413
414 # All checks pass, actually perform the split.
415
416 struct::list assign [$cset data] project cstype cssrc
417
@@ -433,10 +430,15 @@
433 }
434
435 trouble abort?
436 return $newcsets
437 }
 
 
 
 
 
438
439 typemethod strlist {changesets} {
440 return [join [struct::list map $changesets [myproc ID]]]
441 }
442
@@ -450,13 +452,55 @@
450 struct::list assign $theitem t i
451 integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
452 return $i
453 }
454
455 typemethod itemstr {item} {
456 struct::list assign $item itype iid
457 return [$itype str $iid]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458 }
459
460 # # ## ### ##### ######## #############
461 ## State
462
@@ -747,10 +791,16 @@
747 pragma -hastypeinfo no ; # no type introspection
748 pragma -hasinfo no ; # no object introspection
749
750 # # ## ### ##### ######## #############
751 }
 
 
 
 
 
 
752
753 # # ## ### ##### ######## ############# #####################
754 ## Helper singleton. Commands for revision changesets.
755
756 snit::type ::vc::fossil::import::cvs::project::rev::rev {
757
--- tools/cvs2fossil/lib/c2f_prev.tcl
+++ tools/cvs2fossil/lib/c2f_prev.tcl
@@ -142,10 +142,20 @@
142 set mypremap [array get tmp]
143 return $mypremap
144 }
145
146 method breakinternaldependencies {} {
147
148 ##
149 ## NOTE: This method, maybe in conjunction with its caller
150 ## seems to be a memory hog, especially for large
151 ## changesets, with 'large' meaning to have a 'long list
152 ## of items, several thousand'. Investigate where the
153 ## memory is spent and then look for ways of rectifying
154 ## the problem.
155 ##
156
157 # This method inspects the changesets for internal
158 # dependencies. Nothing is done if there are no
159 # such. Otherwise the changeset is split into a set of
160 # fragments without internal dependencies, transforming the
161 # internal dependencies into external ones. The new changesets
@@ -187,58 +197,73 @@
197 # A dependency is a single-element map parent -> child
198
199 InitializeBreakState $myitems
200
201 set fragments {}
202 set new [list $range]
 
203 array set breaks {}
204
205 # Instead of one list holding both processed and pending
206 # fragments we use two, one for the framents to process, one
207 # to hold the new fragments, and the latter is copied to the
208 # former when they run out. This keeps the list of pending
209 # fragments short without sacrificing speed by shifting stuff
210 # down. We especially drop the memory of fragments broken
211 # during processing after a short time, instead of letting it
212 # consume memory.
213
214 while {[llength $new]} {
215
216 set pending $new
217 set new {}
218 set at 0
219
220 while {$at < [llength $pending]} {
221 set current [lindex $pending $at]
222
223 log write 6 csets {. . .. ... ..... ........ .............}
224 log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
225 log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
226
227 set best [FindBestBreak $current]
228
229 if {$best < 0} {
230 # The inspected range has no internal
231 # dependencies. This is a complete fragment.
232 lappend fragments $current
233
234 log write 6 csets "No breaks, final"
235 } else {
236 # Split the range and schedule the resulting
237 # fragments for further inspection. Remember the
238 # number of dependencies cut before we remove them
239 # from consideration, for documentation later.
240
241 set breaks($best) $cross($best)
242
243 log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
244
245 # Note: The value of best is an abolute location
246 # in myitems. Use the start of current to make it
247 # an index absolute to current.
248
249 set brel [expr {$best - [lindex $current 0]}]
250 set bnext $brel ; incr bnext
251 set fragbefore [lrange $current 0 $brel]
252 set fragafter [lrange $current $bnext end]
253
254 log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
255
256 integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
257 integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
258
259 lappend new $fragbefore $fragafter
260 CutAt $best
261 }
262
263 incr at
264 }
265 }
266
267 log write 6 csets ". . .. ... ..... ........ ............."
268
269 # (*) We clear out the associated part of the myitemmap
@@ -339,11 +364,11 @@
364 set mychangesets [lreplace $mychangesets $pos $pos]
365 return
366 }
367
368 method selfreferential {} {
369 log write 7 csets {Checking [$self str] /[llength $myitems]}
370
371 if {![struct::set contains [$self successors] $self]} {
372 return 0
373 }
374 if {[log verbosity?] < 8} { return 1 }
@@ -378,40 +403,12 @@
403 #
404 # Note: The item lists found in args are tagged items. They
405 # have to have the same type as the changeset, being subsets
406 # of its items. This is checked in Untag1.
407
 
 
 
 
408 log write 8 csets {OLD: [lsort [$cset items]]}
409 ValidateFragments $cset $args
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
411 # All checks pass, actually perform the split.
412
413 struct::list assign [$cset data] project cstype cssrc
414
@@ -433,10 +430,15 @@
430 }
431
432 trouble abort?
433 return $newcsets
434 }
435
436 typemethod itemstr {item} {
437 struct::list assign $item itype iid
438 return [$itype str $iid]
439 }
440
441 typemethod strlist {changesets} {
442 return [join [struct::list map $changesets [myproc ID]]]
443 }
444
@@ -450,13 +452,55 @@
452 struct::list assign $theitem t i
453 integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
454 return $i
455 }
456
457 proc ValidateFragments {cset fragments} {
458 # Check the various integrity constraints for the fragments
459 # specifying how to split the changeset:
460 #
461 # * We must have two or more fragments, as splitting a
462 # changeset into one makes no sense.
463 # * No fragment may be empty.
464 # * All fragments have to be true subsets of the items in the
465 # changeset to split. The 'true' is implied because none are
466 # allowed to be empty, so each has to be smaller than the
467 # total.
468 # * The union of the fragments has to be the item set of the
469 # changeset.
470 # * The fragment must not overlap, i.e. their pairwise
471 # intersections have to be empty.
472
473 set cover {}
474 foreach fragmentitems $args {
475 log write 8 csets {NEW: [lsort $fragmentitems]}
476
477 integrity assert {
478 ![struct::set empty $fragmentitems]
479 } {changeset fragment is empty}
480
481 integrity assert {
482 [struct::set subsetof $fragmentitems [$cset items]]
483 } {changeset fragment is not a subset}
484 struct::set add cover $fragmentitems
485 }
486
487 integrity assert {
488 [struct::set equal $cover [$cset items]]
489 } {The fragments do not cover the original changeset}
490
491 set i 1
492 foreach fia $args {
493 foreach fib [lrange $args $i end] {
494 integrity assert {
495 [struct::set empty [struct::set intersect $fia $fib]]
496 } {The fragments <$fia> and <$fib> overlap}
497 }
498 incr i
499 }
500
501 return
502 }
503
504 # # ## ### ##### ######## #############
505 ## State
506
@@ -747,10 +791,16 @@
791 pragma -hastypeinfo no ; # no type introspection
792 pragma -hasinfo no ; # no object introspection
793
794 # # ## ### ##### ######## #############
795 }
796
797 ##
798 ## NOTE: The successor and predecessor methods defined by the classes
799 ## below are -- bottle necks --. Look for ways to make the SQL
800 ## faster.
801 ##
802
803 # # ## ### ##### ######## ############# #####################
804 ## Helper singleton. Commands for revision changesets.
805
806 snit::type ::vc::fossil::import::cvs::project::rev::rev {
807

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button