Fossil SCM

Moved the integrity checks for split fragments into separate command. Reworked breaking of internal dependencies to contrain the length of the pending list. That part of the system is still a memory hog, especially for large changesets. Added notes about this and the successor retrieval being a bottleneck.

aku 2007-11-30 06:50 trunk

Commit c14e8f84cd33db73c2eeee82e2781f964d650752

Parent facb4a8721e735b…

1 file changed +129 -79

~ tools/cvs2fossil/lib/c2f_prev.tcl

M tools/cvs2fossil/lib/c2f_prev.tcl

+129 -79

		--- tools/cvs2fossil/lib/c2f_prev.tcl
		+++ tools/cvs2fossil/lib/c2f_prev.tcl
		@@ -142,10 +142,20 @@
142	142	set mypremap [array get tmp]
143	143	return $mypremap
144	144	}
145	145
146	146	method breakinternaldependencies {} {
	147	+
	148	+ ##
	149	+ ## NOTE: This method, maybe in conjunction with its caller
	150	+ ## seems to be a memory hog, especially for large
	151	+ ## changesets, with 'large' meaning to have a 'long list
	152	+ ## of items, several thousand'. Investigate where the
	153	+ ## memory is spent and then look for ways of rectifying
	154	+ ## the problem.
	155	+ ##
	156	+
147	157	# This method inspects the changesets for internal
148	158	# dependencies. Nothing is done if there are no
149	159	# such. Otherwise the changeset is split into a set of
150	160	# fragments without internal dependencies, transforming the
151	161	# internal dependencies into external ones. The new changesets
		@@ -187,58 +197,73 @@
187	197	# A dependency is a single-element map parent -> child
188	198
189	199	InitializeBreakState $myitems
190	200
191	201	set fragments {}
192		- set pending [list $range]
193		- set at 0
	202	+ set new [list $range]
194	203	array set breaks {}
195	204
196		- while {$at < [llength $pending]} {
197		- set current [lindex $pending $at]
198		-
199		- log write 6 csets {. . .. ... ..... ........ .............}
200		- log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
201		- log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
202		-
203		- set best [FindBestBreak $current]
204		-
205		- if {$best < 0} {
206		- # The inspected range has no internal
207		- # dependencies. This is a complete fragment.
208		- lappend fragments $current
209		-
210		- log write 6 csets "No breaks, final"
211		- } else {
212		- # Split the range and schedule the resulting fragments
213		- # for further inspection. Remember the number of
214		- # dependencies cut before we remove them from
215		- # consideration, for documentation later.
216		-
217		- set breaks($best) $cross($best)
218		-
219		- log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
220		-
221		- # Note: The value of best is an abolute location in
222		- # myitems. Use the start of current to make it an
223		- # index absolute to current.
224		-
225		- set brel [expr {$best - [lindex $current 0]}]
226		- set bnext $brel ; incr bnext
227		- set fragbefore [lrange $current 0 $brel]
228		- set fragafter [lrange $current $bnext end]
229		-
230		- log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
231		-
232		- integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
233		- integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
234		-
235		- lappend pending $fragbefore $fragafter
236		- CutAt $best
237		- }
238		-
239		- incr at
	205	+ # Instead of one list holding both processed and pending
	206	+ # fragments we use two, one for the framents to process, one
	207	+ # to hold the new fragments, and the latter is copied to the
	208	+ # former when they run out. This keeps the list of pending
	209	+ # fragments short without sacrificing speed by shifting stuff
	210	+ # down. We especially drop the memory of fragments broken
	211	+ # during processing after a short time, instead of letting it
	212	+ # consume memory.
	213	+
	214	+ while {[llength $new]} {
	215	+
	216	+ set pending $new
	217	+ set new {}
	218	+ set at 0
	219	+
	220	+ while {$at < [llength $pending]} {
	221	+ set current [lindex $pending $at]
	222	+
	223	+ log write 6 csets {. . .. ... ..... ........ .............}
	224	+ log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
	225	+ log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
	226	+
	227	+ set best [FindBestBreak $current]
	228	+
	229	+ if {$best < 0} {
	230	+ # The inspected range has no internal
	231	+ # dependencies. This is a complete fragment.
	232	+ lappend fragments $current
	233	+
	234	+ log write 6 csets "No breaks, final"
	235	+ } else {
	236	+ # Split the range and schedule the resulting
	237	+ # fragments for further inspection. Remember the
	238	+ # number of dependencies cut before we remove them
	239	+ # from consideration, for documentation later.
	240	+
	241	+ set breaks($best) $cross($best)
	242	+
	243	+ log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
	244	+
	245	+ # Note: The value of best is an abolute location
	246	+ # in myitems. Use the start of current to make it
	247	+ # an index absolute to current.
	248	+
	249	+ set brel [expr {$best - [lindex $current 0]}]
	250	+ set bnext $brel ; incr bnext
	251	+ set fragbefore [lrange $current 0 $brel]
	252	+ set fragafter [lrange $current $bnext end]
	253	+
	254	+ log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
	255	+
	256	+ integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
	257	+ integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
	258	+
	259	+ lappend new $fragbefore $fragafter
	260	+ CutAt $best
	261	+ }
	262	+
	263	+ incr at
	264	+ }
240	265	}
241	266
242	267	log write 6 csets ". . .. ... ..... ........ ............."
243	268
244	269	# (*) We clear out the associated part of the myitemmap
		@@ -339,11 +364,11 @@
339	364	set mychangesets [lreplace $mychangesets $pos $pos]
340	365	return
341	366	}
342	367
343	368	method selfreferential {} {
344		- log write 9 csets {Checking [$self str] /[llength $myitems]}
	369	+ log write 7 csets {Checking [$self str] /[llength $myitems]}
345	370
346	371	if {![struct::set contains [$self successors] $self]} {
347	372	return 0
348	373	}
349	374	if {[log verbosity?] < 8} { return 1 }
		@@ -378,40 +403,12 @@
378	403	#
379	404	# Note: The item lists found in args are tagged items. They
380	405	# have to have the same type as the changeset, being subsets
381	406	# of its items. This is checked in Untag1.
382	407
383		- # Constraints: No fragment must be empty. All fragments have
384		- # to be subsets of the cset. The union has to cover the
385		- # original. All pairwise intersections have to be empty.
386		-
387	408	log write 8 csets {OLD: [lsort [$cset items]]}
388		-
389		- set cover {}
390		- foreach fragmentitems $args {
391		- log write 8 csets {NEW: [lsort $fragmentitems]}
392		-
393		- integrity assert {
394		- ![struct::set empty $fragmentitems]
395		- } {changeset fragment is empty}
396		- integrity assert {
397		- [struct::set subsetof $fragmentitems [$cset items]]
398		- } {changeset fragment is not a subset}
399		- struct::set add cover $fragmentitems
400		- }
401		- integrity assert {
402		- [struct::set equal $cover [$cset items]]
403		- } {The fragments do not cover the original changeset}
404		- set i 1
405		- foreach fia $args {
406		- foreach fib [lrange $args $i end] {
407		- integrity assert {
408		- [struct::set empty [struct::set intersect $fia $fib]]
409		- } {The fragments <$fia> and <$fib> overlap}
410		- }
411		- incr i
412		- }
	409	+ ValidateFragments $cset $args
413	410
414	411	# All checks pass, actually perform the split.
415	412
416	413	struct::list assign [$cset data] project cstype cssrc
417	414
		@@ -433,10 +430,15 @@
433	430	}
434	431
435	432	trouble abort?
436	433	return $newcsets
437	434	}
	435	+
	436	+ typemethod itemstr {item} {
	437	+ struct::list assign $item itype iid
	438	+ return [$itype str $iid]
	439	+ }
438	440
439	441	typemethod strlist {changesets} {
440	442	return [join [struct::list map $changesets [myproc ID]]]
441	443	}
442	444
		@@ -450,13 +452,55 @@
450	452	struct::list assign $theitem t i
451	453	integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
452	454	return $i
453	455	}
454	456
455		- typemethod itemstr {item} {
456		- struct::list assign $item itype iid
457		- return [$itype str $iid]
	457	+ proc ValidateFragments {cset fragments} {
	458	+ # Check the various integrity constraints for the fragments
	459	+ # specifying how to split the changeset:
	460	+ #
	461	+ # * We must have two or more fragments, as splitting a
	462	+ # changeset into one makes no sense.
	463	+ # * No fragment may be empty.
	464	+ # * All fragments have to be true subsets of the items in the
	465	+ # changeset to split. The 'true' is implied because none are
	466	+ # allowed to be empty, so each has to be smaller than the
	467	+ # total.
	468	+ # * The union of the fragments has to be the item set of the
	469	+ # changeset.
	470	+ # * The fragment must not overlap, i.e. their pairwise
	471	+ # intersections have to be empty.
	472	+
	473	+ set cover {}
	474	+ foreach fragmentitems $args {
	475	+ log write 8 csets {NEW: [lsort $fragmentitems]}
	476	+
	477	+ integrity assert {
	478	+ ![struct::set empty $fragmentitems]
	479	+ } {changeset fragment is empty}
	480	+
	481	+ integrity assert {
	482	+ [struct::set subsetof $fragmentitems [$cset items]]
	483	+ } {changeset fragment is not a subset}
	484	+ struct::set add cover $fragmentitems
	485	+ }
	486	+
	487	+ integrity assert {
	488	+ [struct::set equal $cover [$cset items]]
	489	+ } {The fragments do not cover the original changeset}
	490	+
	491	+ set i 1
	492	+ foreach fia $args {
	493	+ foreach fib [lrange $args $i end] {
	494	+ integrity assert {
	495	+ [struct::set empty [struct::set intersect $fia $fib]]
	496	+ } {The fragments <$fia> and <$fib> overlap}
	497	+ }
	498	+ incr i
	499	+ }
	500	+
	501	+ return
458	502	}
459	503
460	504	# # ## ### ##### ######## #############
461	505	## State
462	506
		@@ -747,10 +791,16 @@
747	791	pragma -hastypeinfo no ; # no type introspection
748	792	pragma -hasinfo no ; # no object introspection
749	793
750	794	# # ## ### ##### ######## #############
751	795	}
	796	+
	797	+##
	798	+## NOTE: The successor and predecessor methods defined by the classes
	799	+## below are -- bottle necks --. Look for ways to make the SQL
	800	+## faster.
	801	+##
752	802
753	803	# # ## ### ##### ######## ############# #####################
754	804	## Helper singleton. Commands for revision changesets.
755	805
756	806	snit::type ::vc::fossil::import::cvs::project::rev::rev {
757	807

	--- tools/cvs2fossil/lib/c2f_prev.tcl
	+++ tools/cvs2fossil/lib/c2f_prev.tcl
	@@ -142,10 +142,20 @@
142	set mypremap [array get tmp]
143	return $mypremap
144	}
145
146	method breakinternaldependencies {} {










147	# This method inspects the changesets for internal
148	# dependencies. Nothing is done if there are no
149	# such. Otherwise the changeset is split into a set of
150	# fragments without internal dependencies, transforming the
151	# internal dependencies into external ones. The new changesets
	@@ -187,58 +197,73 @@
187	# A dependency is a single-element map parent -> child
188
189	InitializeBreakState $myitems
190
191	set fragments {}
192	set pending [list $range]
193	set at 0
194	array set breaks {}
195
196	while {$at < [llength $pending]} {
197	set current [lindex $pending $at]
198
199	log write 6 csets {. . .. ... ..... ........ .............}
200	log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
201	log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
202
203	set best [FindBestBreak $current]
204
205	if {$best < 0} {
206	# The inspected range has no internal
207	# dependencies. This is a complete fragment.
208	lappend fragments $current
209
210	log write 6 csets "No breaks, final"
211	} else {
212	# Split the range and schedule the resulting fragments
213	# for further inspection. Remember the number of
214	# dependencies cut before we remove them from
215	# consideration, for documentation later.
216
217	set breaks($best) $cross($best)
218
219	log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
220
221	# Note: The value of best is an abolute location in
222	# myitems. Use the start of current to make it an
223	# index absolute to current.
224
225	set brel [expr {$best - [lindex $current 0]}]
226	set bnext $brel ; incr bnext
227	set fragbefore [lrange $current 0 $brel]
228	set fragafter [lrange $current $bnext end]
229
230	log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
231
232	integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
233	integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
234
235	lappend pending $fragbefore $fragafter
236	CutAt $best
237	}
238
239	incr at
















240	}
241
242	log write 6 csets ". . .. ... ..... ........ ............."
243
244	# (*) We clear out the associated part of the myitemmap
	@@ -339,11 +364,11 @@
339	set mychangesets [lreplace $mychangesets $pos $pos]
340	return
341	}
342
343	method selfreferential {} {
344	log write 9 csets {Checking [$self str] /[llength $myitems]}
345
346	if {![struct::set contains [$self successors] $self]} {
347	return 0
348	}
349	if {[log verbosity?] < 8} { return 1 }
	@@ -378,40 +403,12 @@
378	#
379	# Note: The item lists found in args are tagged items. They
380	# have to have the same type as the changeset, being subsets
381	# of its items. This is checked in Untag1.
382
383	# Constraints: No fragment must be empty. All fragments have
384	# to be subsets of the cset. The union has to cover the
385	# original. All pairwise intersections have to be empty.
386
387	log write 8 csets {OLD: [lsort [$cset items]]}
388
389	set cover {}
390	foreach fragmentitems $args {
391	log write 8 csets {NEW: [lsort $fragmentitems]}
392
393	integrity assert {
394	![struct::set empty $fragmentitems]
395	} {changeset fragment is empty}
396	integrity assert {
397	[struct::set subsetof $fragmentitems [$cset items]]
398	} {changeset fragment is not a subset}
399	struct::set add cover $fragmentitems
400	}
401	integrity assert {
402	[struct::set equal $cover [$cset items]]
403	} {The fragments do not cover the original changeset}
404	set i 1
405	foreach fia $args {
406	foreach fib [lrange $args $i end] {
407	integrity assert {
408	[struct::set empty [struct::set intersect $fia $fib]]
409	} {The fragments <$fia> and <$fib> overlap}
410	}
411	incr i
412	}
413
414	# All checks pass, actually perform the split.
415
416	struct::list assign [$cset data] project cstype cssrc
417
	@@ -433,10 +430,15 @@
433	}
434
435	trouble abort?
436	return $newcsets
437	}





438
439	typemethod strlist {changesets} {
440	return [join [struct::list map $changesets [myproc ID]]]
441	}
442
	@@ -450,13 +452,55 @@
450	struct::list assign $theitem t i
451	integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
452	return $i
453	}
454
455	typemethod itemstr {item} {
456	struct::list assign $item itype iid
457	return [$itype str $iid]










































458	}
459
460	# # ## ### ##### ######## #############
461	## State
462
	@@ -747,10 +791,16 @@
747	pragma -hastypeinfo no ; # no type introspection
748	pragma -hasinfo no ; # no object introspection
749
750	# # ## ### ##### ######## #############
751	}






752
753	# # ## ### ##### ######## ############# #####################
754	## Helper singleton. Commands for revision changesets.
755
756	snit::type ::vc::fossil::import::cvs::project::rev::rev {
757

	--- tools/cvs2fossil/lib/c2f_prev.tcl
	+++ tools/cvs2fossil/lib/c2f_prev.tcl
	@@ -142,10 +142,20 @@
142	set mypremap [array get tmp]
143	return $mypremap
144	}
145
146	method breakinternaldependencies {} {
147
148	##
149	## NOTE: This method, maybe in conjunction with its caller
150	## seems to be a memory hog, especially for large
151	## changesets, with 'large' meaning to have a 'long list
152	## of items, several thousand'. Investigate where the
153	## memory is spent and then look for ways of rectifying
154	## the problem.
155	##
156
157	# This method inspects the changesets for internal
158	# dependencies. Nothing is done if there are no
159	# such. Otherwise the changeset is split into a set of
160	# fragments without internal dependencies, transforming the
161	# internal dependencies into external ones. The new changesets
	@@ -187,58 +197,73 @@
197	# A dependency is a single-element map parent -> child
198
199	InitializeBreakState $myitems
200
201	set fragments {}
202	set new [list $range]

203	array set breaks {}
204
205	# Instead of one list holding both processed and pending
206	# fragments we use two, one for the framents to process, one
207	# to hold the new fragments, and the latter is copied to the
208	# former when they run out. This keeps the list of pending
209	# fragments short without sacrificing speed by shifting stuff
210	# down. We especially drop the memory of fragments broken
211	# during processing after a short time, instead of letting it
212	# consume memory.
213
214	while {[llength $new]} {
215
216	set pending $new
217	set new {}
218	set at 0
219
220	while {$at < [llength $pending]} {
221	set current [lindex $pending $at]
222
223	log write 6 csets {. . .. ... ..... ........ .............}
224	log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]}
225	log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
226
227	set best [FindBestBreak $current]
228
229	if {$best < 0} {
230	# The inspected range has no internal
231	# dependencies. This is a complete fragment.
232	lappend fragments $current
233
234	log write 6 csets "No breaks, final"
235	} else {
236	# Split the range and schedule the resulting
237	# fragments for further inspection. Remember the
238	# number of dependencies cut before we remove them
239	# from consideration, for documentation later.
240
241	set breaks($best) $cross($best)
242
243	log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
244
245	# Note: The value of best is an abolute location
246	# in myitems. Use the start of current to make it
247	# an index absolute to current.
248
249	set brel [expr {$best - [lindex $current 0]}]
250	set bnext $brel ; incr bnext
251	set fragbefore [lrange $current 0 $brel]
252	set fragafter [lrange $current $bnext end]
253
254	log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
255
256	integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
257	integrity assert {[llength $fragafter]} {Found zero-length fragment at the end}
258
259	lappend new $fragbefore $fragafter
260	CutAt $best
261	}
262
263	incr at
264	}
265	}
266
267	log write 6 csets ". . .. ... ..... ........ ............."
268
269	# (*) We clear out the associated part of the myitemmap
	@@ -339,11 +364,11 @@
364	set mychangesets [lreplace $mychangesets $pos $pos]
365	return
366	}
367
368	method selfreferential {} {
369	log write 7 csets {Checking [$self str] /[llength $myitems]}
370
371	if {![struct::set contains [$self successors] $self]} {
372	return 0
373	}
374	if {[log verbosity?] < 8} { return 1 }
	@@ -378,40 +403,12 @@
403	#
404	# Note: The item lists found in args are tagged items. They
405	# have to have the same type as the changeset, being subsets
406	# of its items. This is checked in Untag1.
407




408	log write 8 csets {OLD: [lsort [$cset items]]}
409	ValidateFragments $cset $args
























410
411	# All checks pass, actually perform the split.
412
413	struct::list assign [$cset data] project cstype cssrc
414
	@@ -433,10 +430,15 @@
430	}
431
432	trouble abort?
433	return $newcsets
434	}
435
436	typemethod itemstr {item} {
437	struct::list assign $item itype iid
438	return [$itype str $iid]
439	}
440
441	typemethod strlist {changesets} {
442	return [join [struct::list map $changesets [myproc ID]]]
443	}
444
	@@ -450,13 +452,55 @@
452	struct::list assign $theitem t i
453	integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
454	return $i
455	}
456
457	proc ValidateFragments {cset fragments} {
458	# Check the various integrity constraints for the fragments
459	# specifying how to split the changeset:
460	#
461	# * We must have two or more fragments, as splitting a
462	# changeset into one makes no sense.
463	# * No fragment may be empty.
464	# * All fragments have to be true subsets of the items in the
465	# changeset to split. The 'true' is implied because none are
466	# allowed to be empty, so each has to be smaller than the
467	# total.
468	# * The union of the fragments has to be the item set of the
469	# changeset.
470	# * The fragment must not overlap, i.e. their pairwise
471	# intersections have to be empty.
472
473	set cover {}
474	foreach fragmentitems $args {
475	log write 8 csets {NEW: [lsort $fragmentitems]}
476
477	integrity assert {
478	![struct::set empty $fragmentitems]
479	} {changeset fragment is empty}
480
481	integrity assert {
482	[struct::set subsetof $fragmentitems [$cset items]]
483	} {changeset fragment is not a subset}
484	struct::set add cover $fragmentitems
485	}
486
487	integrity assert {
488	[struct::set equal $cover [$cset items]]
489	} {The fragments do not cover the original changeset}
490
491	set i 1
492	foreach fia $args {
493	foreach fib [lrange $args $i end] {
494	integrity assert {
495	[struct::set empty [struct::set intersect $fia $fib]]
496	} {The fragments <$fia> and <$fib> overlap}
497	}
498	incr i
499	}
500
501	return
502	}
503
504	# # ## ### ##### ######## #############
505	## State
506
	@@ -747,10 +791,16 @@
791	pragma -hastypeinfo no ; # no type introspection
792	pragma -hasinfo no ; # no object introspection
793
794	# # ## ### ##### ######## #############
795	}
796
797	##
798	## NOTE: The successor and predecessor methods defined by the classes
799	## below are -- bottle necks --. Look for ways to make the SQL
800	## faster.
801	##
802
803	# # ## ### ##### ######## ############# #####################
804	## Helper singleton. Commands for revision changesets.
805
806	snit::type ::vc::fossil::import::cvs::project::rev::rev {
807

Fossil SCM

Keyboard Shortcuts