Fossil SCM

Rewrote the algorithm for breaking internal dependencies to my liking. The complex part handling multiple splits has moved from the pass code to the changeset class itself, reusing the state computed for the first split. The state is a bit more complex to allow for its incremental update after a break has been done. Factored major pieces into separate procedures to keep the highlevel code readable. Added lots of official log output to help debugging in case of trouble.

aku 2007-11-10 23:44 trunk

Commit 08ebab80cd39311fed41f400d3dab0c490b029a9

Parent 95af789e1fa51e6…

3 files changed +5 -27 +362 -136 +4

~ tools/cvs2fossil/lib/c2f_pinitcsets.tcl ~ tools/cvs2fossil/lib/c2f_prev.tcl ~ tools/cvs2fossil/lib/log.tcl

M tools/cvs2fossil/lib/c2f_pinitcsets.tcl

+5 -27

		--- tools/cvs2fossil/lib/c2f_pinitcsets.tcl
		+++ tools/cvs2fossil/lib/c2f_pinitcsets.tcl
		@@ -283,39 +283,17 @@
283	283	# by splitting the problematic changeset into multiple
284	284	# fragments. The results are changesets which have no internal
285	285	# dependencies, only external ones.
286	286
287	287	log write 3 initcsets {Break internal dependencies}
288		- set n 0
	288	+ set old [llength $csets]
289	289
290	290	foreach cset $csets {
291		- # The main method for splitting does only one split, which
292		- # may not be enough. The code here iterates until no more
293		- # splits can be performed. An iterative algorithm was
294		- # chosen over a recursive one to prevent running into
295		- # stack limits.
296		-
297		- set tosplit [list $cset]
298		- set at 0
299		- while {$at < [llength $tosplit]} {
300		- # Note here how we are __not__ advancing in the list
301		- # when we were able to break the current
302		- # changeset into two pieces, causing the loop to
303		- # immediately check the first of the two pieces
304		- # again for further break possibilities. The
305		- # other piece is added at the end, thus processed
306		- # later.
307		- while {[[lindex $tosplit $at] breakinternaldependencies tosplit]} {}
308		- incr at
309		- }
310		-
311		- # At last the generated fragments are added to the main
312		- # list of changesets. The first element is skipped as it
313		- # is already in the list.
314		- foreach cset [lrange $tosplit 1 end] { lappend csets $cset ; incr n }
315		- }
316		-
	291	+ $cset breakinternaldependencies csets
	292	+ }
	293	+
	294	+ set n [expr {[llength $csets] - $old}]
317	295	log write 4 initcsets "Created [nsp $n {additional revision changeset}]"
318	296	log write 4 initcsets Ok.
319	297	return
320	298	}
321	299
322	300

	--- tools/cvs2fossil/lib/c2f_pinitcsets.tcl
	+++ tools/cvs2fossil/lib/c2f_pinitcsets.tcl
	@@ -283,39 +283,17 @@
283	# by splitting the problematic changeset into multiple
284	# fragments. The results are changesets which have no internal
285	# dependencies, only external ones.
286
287	log write 3 initcsets {Break internal dependencies}
288	set n 0
289
290	foreach cset $csets {
291	# The main method for splitting does only one split, which
292	# may not be enough. The code here iterates until no more
293	# splits can be performed. An iterative algorithm was
294	# chosen over a recursive one to prevent running into
295	# stack limits.
296
297	set tosplit [list $cset]
298	set at 0
299	while {$at < [llength $tosplit]} {
300	# Note here how we are __not__ advancing in the list
301	# when we were able to break the current
302	# changeset into two pieces, causing the loop to
303	# immediately check the first of the two pieces
304	# again for further break possibilities. The
305	# other piece is added at the end, thus processed
306	# later.
307	while {[[lindex $tosplit $at] breakinternaldependencies tosplit]} {}
308	incr at
309	}
310
311	# At last the generated fragments are added to the main
312	# list of changesets. The first element is skipped as it
313	# is already in the list.
314	foreach cset [lrange $tosplit 1 end] { lappend csets $cset ; incr n }
315	}
316
317	log write 4 initcsets "Created [nsp $n {additional revision changeset}]"
318	log write 4 initcsets Ok.
319	return
320	}
321
322

	--- tools/cvs2fossil/lib/c2f_pinitcsets.tcl
	+++ tools/cvs2fossil/lib/c2f_pinitcsets.tcl
	@@ -283,39 +283,17 @@
283	# by splitting the problematic changeset into multiple
284	# fragments. The results are changesets which have no internal
285	# dependencies, only external ones.
286
287	log write 3 initcsets {Break internal dependencies}
288	set old [llength $csets]
289
290	foreach cset $csets {
291	$cset breakinternaldependencies csets
292	}
293
294	set n [expr {[llength $csets] - $old}]






















295	log write 4 initcsets "Created [nsp $n {additional revision changeset}]"
296	log write 4 initcsets Ok.
297	return
298	}
299
300

M tools/cvs2fossil/lib/c2f_prev.tcl

+362 -136

		--- tools/cvs2fossil/lib/c2f_prev.tcl
		+++ tools/cvs2fossil/lib/c2f_prev.tcl
		@@ -16,10 +16,12 @@
16	16	# # ## ### ##### ######## ############# #####################
17	17	## Requirements
18	18
19	19	package require Tcl 8.4 ; # Required runtime.
20	20	package require snit ; # OO system.
	21	+package require vc::tools::misc ; # Text formatting
	22	+package require vc::tools::trouble ; # Error reporting.
21	23	package require vc::tools::log ; # User feedback.
22	24	package require vc::fossil::import::cvs::state ; # State storage.
23	25
24	26	# # ## ### ##### ######## ############# #####################
25	27	##
		@@ -47,158 +49,139 @@
47	49	# such. Otherwise the changeset is split into a set of
48	50	# fragments without internal dependencies, transforming the
49	51	# internal dependencies into external ones. The new changesets
50	52	# are added to the list of all changesets.
51	53
52		- # Actually at most one split is performed, resulting in at
53		- # most one additional fragment. It is the caller's
54		- # responsibility to spli the resulting fragments further.
	54	+ # We perform all necessary splits in one go, instead of only
	55	+ # one. The previous algorithm, adapted from cvs2svn, computed
	56	+ # a lot of state which was thrown away and then computed again
	57	+ # for each of the fragments. It should be easier to update and
	58	+ # reuse that state.
55	59
56		- # The code checks only sucessor dependencies, automatically
57		- # covering the predecessor dependencies as well (A sucessor
58		- # dependency a -> b is a predecessor dependency b -> a).
	60	+ # The code checks only sucessor dependencies, as this
	61	+ # automatically covers the predecessor dependencies as well (A
	62	+ # successor dependency a -> b is also a predecessor dependency
	63	+ # b -> a).
59	64
60	65	# Array of dependencies (parent -> child). This is pulled from
61	66	# the state, and limited to successors within the changeset.
	67	+
62	68	array set dependencies {}
63		-
64		- set theset ('[join $myrevisions {','}]')
65		-
66		- foreach {rid child} [state run "
67		- SELECT R.rid, R.child
68		- FROM revision R
69		- WHERE R.rid IN $theset
70		- AND R.child IS NOT NULL
71		- AND R.child IN $theset
72		- UNION
73		- SELECT R.rid, R.dbchild
74		- FROM revision R
75		- WHERE R.rid IN $theset
76		- AND R.dbchild IS NOT NULL
77		- AND R.dbchild IN $theset
78		- UNION
79		- SELECT R.rid, B.brid
80		- FROM revision R, revisionbranchchildren B
81		- WHERE R.rid IN $theset
82		- AND R.rid = B.rid
83		- AND B.brid IN $theset
84		- "] {
85		- # Consider moving this to the integrity module.
86		- if {$rid == $child} {
87		- trouble internal "Revision $rid depends on itself."
88		- }
89		- set dependencies($rid) $child
90		- }
91		-
	69	+ PullInternalDependencies dependencies $myrevisions
92	70	if {![array size dependencies]} {return 0} ; # Nothing to break.
	71	+
	72	+ log write 6 csets ...<$myid>.......................................................
93	73
94	74	# We have internal dependencies to break. We now iterate over
95	75	# all positions in the list (which is chronological, at least
96	76	# as far as the timestamps are correct and unique) and
97	77	# determine the best position for the break, by trying to
98		- # break as many dependencies as possible in one go.
99		-
100		- # First we create a map of positions to make it easier to
101		- # determine whether a dependency cross a particular index.
102		-
103		- array set pos {}
104		- array set crossing {}
105		- set n 0
106		- foreach rev $myrevisions {
107		- set pos($rev) $n
108		- set crossing($n) 0
109		- incr n
110		- }
111		-
112		- # Secondly we count the crossings per position, by iterating
113		- # over the recorded internal dependencies.
114		-
115		- foreach {rid child} [array get dependencies] {
116		- set start $pos($rid)
117		- set end $pos($child)
118		-
119		- # Note: If the timestamps are badly out of order it is
120		- # possible to have a backward successor dependency,
121		- # i.e. with start > end. We may have to swap the
122		- # indices to ensure that the following loop runs
123		- # correctly.
124		- #
125		- # Note 2: start == end is not possible. It indicates a
126		- # self-dependency due to the uniqueness of
127		- # positions, and that is something we have ruled
128		- # out already.
129		-
130		- if {$start > $end} {
131		- while {$end < $start} { incr crossing($end) ; incr end }
	78	+ # break as many dependencies as possible in one go. When a
	79	+ # break was found this is redone for the fragments coming and
	80	+ # after, after upding the crossing information.
	81	+
	82	+ # Data structures:
	83	+ # Map: POS revision id -> position in list.
	84	+ # CROSS position in list -> number of dependencies crossing it
	85	+ # DEPC dependency -> positions it crosses
	86	+ # List: RANGE Of the positions itself.
	87	+ # A dependency is a single-element map parent -> child
	88	+
	89	+ InitializeBreakState $myrevisions
	90	+
	91	+ set fragments {}
	92	+ set pending [list $range]
	93	+ set at 0
	94	+ array set breaks {}
	95	+
	96	+ while {$at < [llength $pending]} {
	97	+ set current [lindex $pending $at]
	98	+
	99	+ log write 6 csets ". . .. ... ..... ........ ............."
	100	+ log write 6 csets "Scheduled [join [PRs [lrange $pending $at end]] { }]"
	101	+ log write 6 csets "Considering [PR $current] \[$at/[llength $pending]\]"
	102	+
	103	+ set best [FindBestBreak $current]
	104	+
	105	+ if {$best < 0} {
	106	+ # The inspected range has no internal
	107	+ # dependencies. This is a complete fragment.
	108	+ lappend fragments $current
	109	+
	110	+ log write 6 csets "No breaks, final"
132	111	} else {
133		- while {$start < $end} { incr crossing($start) ; incr start }
134		- }
135		- }
136		-
137		- # Now we can determine the best break location. First we look
138		- # for the locations with the maximal number of crossings. If
139		- # there are several we look for the shortest time interval
140		- # among them. If we still have multiple possibilities after
141		- # that we select the smallest index among these.
142		-
143		- set max -1
144		- set best {}
145		-
146		- foreach key [array names crossing] {
147		- set now $crossing($key)
148		- if {$now > $max} {
149		- set max $now
150		- set best $key
151		- continue
152		- } elseif {$now == $max} {
153		- lappend best $key
154		- }
155		- }
156		-
157		- if {[llength $best] > 1} {
158		- set min -1
159		- set newbest {}
160		- foreach at $best {
161		- set rat [lindex $myrevisions $at] ; incr at
162		- set rnext [lindex $myrevisions $at] ; incr at -1
163		- set tat [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rat }] 0]
164		- set tnext [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rnext}] 0]
165		- set delta [expr {$tnext - $tat}]
166		- if {($min < 0) \|\| ($delta < $min)} {
167		- set min $delta
168		- set newbest $at
169		- } elseif {$delta == $min} {
170		- lappend newbest $at
171		- }
172		- }
173		- set best $newbest
174		- }
175		-
176		- if {[llength $best] > 1} {
177		- set best [lindex [lsort -integer -increasing $best] 0]
178		- }
179		-
180		- # Now we can split off a fragment.
181		-
182		- set bnext $best ; incr bnext
183		- set revbefore [lrange $myrevisions 0 $best]
184		- set revafter [lrange $myrevisions $bnext end]
185		-
186		- if {![llength $revbefore]} {
187		- trouble internal "Tried to split off a zero-length fragment at the beginning"
188		- }
189		- if {![llength $revafter]} {
190		- trouble internal "Tried to split off a zero-length fragment at the end"
191		- }
192		-
193		- lappend csets [set new [$type %AUTO% $myproject $mytype $mysrcid $revafter]]
194		- set myrevisions $revbefore
195		-
196		- log write 4 csets "Breaking <$myid> @$best, making <[$new id]>, cutting $crossing($best)"
197		-
198		- #puts "\tKeeping <$revbefore>"
199		- #puts "\tSplit off <$revafter>"
	112	+ # Split the range and schedule the resulting fragments
	113	+ # for further inspection. Remember the number of
	114	+ # dependencies cut before we remove them from
	115	+ # consideration, for documentation later.
	116	+
	117	+ set breaks($best) $cross($best)
	118	+
	119	+ log write 6 csets "Best break @ $best, cuts [nsp $cross($best) dependency dependencies]"
	120	+
	121	+ # Note: The value of best is an abolute location in
	122	+ # myrevisions. Use the start of current to make it an
	123	+ # index absolute to current.
	124	+
	125	+ set brel [expr {$best - [lindex $current 0]}]
	126	+ set bnext $brel ; incr bnext
	127	+ set fragbefore [lrange $current 0 $brel]
	128	+ set fragafter [lrange $current $bnext end]
	129	+
	130	+ log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
	131	+
	132	+ if {![llength $fragbefore]} {
	133	+ trouble internal "Tried to split off a zero-length fragment at the beginning"
	134	+ }
	135	+ if {![llength $fragafter]} {
	136	+ trouble internal "Tried to split off a zero-length fragment at the end"
	137	+ }
	138	+
	139	+ lappend pending $fragbefore $fragafter
	140	+ CutAt $best
	141	+ }
	142	+
	143	+ incr at
	144	+ }
	145	+
	146	+ log write 6 csets ". . .. ... ..... ........ ............."
	147	+
	148	+ # Create changesets for the fragments, reusing the current one
	149	+ # for the first fragment. We sort them in order to allow
	150	+ # checking for gaps and nice messages.
	151	+
	152	+ set fragments [lsort -index 0 -integer $fragments]
	153	+
	154	+ #puts \t.[join [PRs $fragments] .\n\t.].
	155	+
	156	+ Border [lindex $fragments 0] firsts firste
	157	+
	158	+ if {$firsts != 0} {
	159	+ trouble internal "Bad fragment start @ $firsts, gap, or before beginning of the range"
	160	+ }
	161	+
	162	+ set laste $firste
	163	+ foreach fragment [lrange $fragments 1 end] {
	164	+ Border $fragment s e
	165	+ if {$laste != ($s - 1)} {
	166	+ trouble internal "Bad fragment border <$laste \| $s>, gap or overlap"
	167	+ }
	168	+
	169	+ set new [$type %AUTO% $myproject $mytype $mysrcid [lrange $myrevisions $s $e]]
	170	+ lappend csets $new
	171	+
	172	+ log write 4 csets "Breaking <$myid> @ $laste, new <[$new id]>, cutting $breaks($laste)"
	173	+
	174	+ set laste $e
	175	+ }
	176	+
	177	+ if {$laste != ([llength $myrevisions]-1)} {
	178	+ trouble internal "Bad fragment end @ $laste, gap, or beyond end of the range"
	179	+ }
	180	+
	181	+ # Put the first fragment into the current changeset.
	182	+ set myrevisions [lrange $myrevisions 0 $firste]
200	183
201	184	return 1
202	185	}
203	186
204	187	method persist {} {
		@@ -242,10 +225,251 @@
242	225	foreach {tid name} [state run {
243	226	SELECT tid, name FROM cstype;
244	227	}] { set mycstype($name) $tid }
245	228	return
246	229	}
	230	+
	231	+ proc PullInternalDependencies {dv revisions} {
	232	+ upvar 1 $dv dependencies
	233	+ set theset ('[join $revisions {','}]')
	234	+
	235	+ foreach {rid child} [state run "
	236	+ -- Primary children
	237	+ SELECT R.rid, R.child
	238	+ FROM revision R
	239	+ WHERE R.rid IN $theset
	240	+ AND R.child IS NOT NULL
	241	+ AND R.child IN $theset
	242	+ UNION
	243	+ -- Transition NTDB to trunk
	244	+ SELECT R.rid, R.dbchild
	245	+ FROM revision R
	246	+ WHERE R.rid IN $theset
	247	+ AND R.dbchild IS NOT NULL
	248	+ AND R.dbchild IN $theset
	249	+ UNION
	250	+ -- Secondary (branch) children
	251	+ SELECT R.rid, B.brid
	252	+ FROM revision R, revisionbranchchildren B
	253	+ WHERE R.rid IN $theset
	254	+ AND R.rid = B.rid
	255	+ AND B.brid IN $theset
	256	+ "] {
	257	+ # Consider moving this to the integrity module.
	258	+ if {$rid == $child} {
	259	+ trouble internal "Revision $rid depends on itself."
	260	+ }
	261	+ set dependencies($rid) $child
	262	+ }
	263	+ }
	264	+
	265	+ proc InitializeBreakState {revisions} {
	266	+ upvar 1 pos pos cross cross range range depc depc delta delta \
	267	+ dependencies dependencies
	268	+
	269	+ # First we create a map of positions to make it easier to
	270	+ # determine whether a dependency crosses a particular index.
	271	+
	272	+ array set pos {}
	273	+ array set cross {}
	274	+ array set depc {}
	275	+ set range {}
	276	+ set n 0
	277	+ foreach rev $revisions {
	278	+ lappend range $n
	279	+ set pos($rev) $n
	280	+ set cross($n) 0
	281	+ incr n
	282	+ }
	283	+
	284	+ # Secondly we count the crossings per position, by iterating
	285	+ # over the recorded internal dependencies.
	286	+
	287	+ # Note: If the timestamps are badly out of order it is
	288	+ # possible to have a backward successor dependency,
	289	+ # i.e. with start > end. We may have to swap the indices
	290	+ # to ensure that the following loop runs correctly.
	291	+ #
	292	+ # Note 2: start == end is not possible. It indicates a
	293	+ # self-dependency due to the uniqueness of positions,
	294	+ # and that is something we have ruled out already, see
	295	+ # PullInternalDependencies.
	296	+
	297	+ foreach {rid child} [array get dependencies] {
	298	+ set dkey [list $rid $child]
	299	+ set start $pos($rid)
	300	+ set end $pos($child)
	301	+ set crosses {}
	302	+
	303	+ if {$start > $end} {
	304	+ while {$end < $start} {
	305	+ lappend crosses $end
	306	+ incr cross($end)
	307	+ incr end
	308	+ }
	309	+ } else {
	310	+ while {$start < $end} {
	311	+ lappend crosses $start
	312	+ incr cross($start)
	313	+ incr start
	314	+ }
	315	+ }
	316	+ set depc($dkey) $crosses
	317	+ }
	318	+
	319	+ InitializeDeltas $revisions
	320	+ return
	321	+ }
	322	+
	323	+ proc InitializeDeltas {revisions} {
	324	+ upvar 1 delta delta
	325	+
	326	+ # Pull the timestamps for all revisions in the changesets and
	327	+ # compute their deltas for use by the break finder.
	328	+
	329	+ array set delta {}
	330	+ array set stamp {}
	331	+
	332	+ set theset ('[join $revisions {','}]')
	333	+ foreach {rid time} [state run "
	334	+ SELECT R.rid, R.date
	335	+ FROM revision R
	336	+ WHERE R.rid IN $theset
	337	+ "] {
	338	+ set stamp($rid) $time
	339	+ }
	340	+
	341	+ set n 0
	342	+ foreach rid [lrange $revisions 0 end-1] rnext [lrange $revisions 1 end] {
	343	+ set delta($n) [expr {$stamp($rnext) - $stamp($rid)}]
	344	+ incr n
	345	+ }
	346	+ return
	347	+ }
	348	+
	349	+ proc FindBestBreak {range} {
	350	+ upvar 1 cross cross delta delta
	351	+
	352	+ # Determine the best break location in the given range of
	353	+ # positions. First we look for the locations with the maximal
	354	+ # number of crossings. If there are several we look for the
	355	+ # shortest time interval among them. If we still have multiple
	356	+ # possibilities after that we select the earliest location
	357	+ # among these.
	358	+
	359	+ # Note: If the maximal number of crossings is 0 then the range
	360	+ # has no internal dependencies, and no break location at
	361	+ # all. This possibility is signaled via result -1.
	362	+
	363	+ # Note: A range of length 1 or less cannot have internal
	364	+ # dependencies, as that needs at least two revisions in
	365	+ # the range.
	366	+
	367	+ if {[llength $range] < 2} { return -1 }
	368	+
	369	+ set max -1
	370	+ set best {}
	371	+
	372	+ foreach location $range {
	373	+ set crossings $cross($location)
	374	+ if {$crossings > $max} {
	375	+ set max $crossings
	376	+ set best [list $location]
	377	+ continue
	378	+ } elseif {$crossings == $max} {
	379	+ lappend best $location
	380	+ }
	381	+ }
	382	+
	383	+ if {$max == 0} { return -1 }
	384	+ if {[llength $best] == 1} { return [lindex $best 0] }
	385	+
	386	+ set locations $best
	387	+ set best {}
	388	+ set min -1
	389	+
	390	+ foreach location $locations {
	391	+ set interval $delta($location)
	392	+ if {($min < 0) \|\| ($interval < $min)} {
	393	+ set min $interval
	394	+ set best [list $location]
	395	+ } elseif {$interval == $min} {
	396	+ lappend best $location
	397	+ }
	398	+ }
	399	+
	400	+ if {[llength $best] == 1} { return [lindex $best 0] }
	401	+
	402	+ return [lindex [lsort -integer -increasing $best] 0]
	403	+ }
	404	+
	405	+ proc CutAt {location} {
	406	+ upvar 1 cross cross depc depc
	407	+
	408	+ # It was decided to split the changeset at the given
	409	+ # location. This cuts a number of dependencies. Here we update
	410	+ # the cross information so that the break finder has accurate
	411	+ # data when we look at the generated fragments.
	412	+
	413	+ set six [log visible? 6]
	414	+
	415	+ foreach {dep range} [array get depc] {
	416	+ # Check all dependencies still known, take their range and
	417	+ # see if the break location falls within.
	418	+
	419	+ Border $range s e
	420	+ if {$location < $s} continue ; # break before range, ignore
	421	+ if {$location > $e} continue ; # break after range, ignore.
	422	+
	423	+ # This dependency crosses the break location. We remove it
	424	+ # from the crossings counters, and then also from the set
	425	+ # of known dependencies, as we are done with it.
	426	+
	427	+ foreach loc $depc($dep) { incr cross($loc) -1 }
	428	+ unset depc($dep)
	429	+
	430	+ if {!$six} continue
	431	+
	432	+ struct::list assign $dep parent child
	433	+ log write 6 csets "Broke dependency [PD $parent] --> [PD $child]"
	434	+ }
	435	+
	436	+ return
	437	+ }
	438	+
	439	+ # Print identifying data for a revision (project, file, dotted rev
	440	+ # number), for high verbosity log output.
	441	+
	442	+ proc PD {id} {
	443	+ foreach {p f r} [state run {
	444	+ SELECT P.name , F.name, R.rev
	445	+ FROM revision R, file F, project P
	446	+ WHERE R.rid = $id
	447	+ AND R.fid = F.fid
	448	+ AND F.pid = P.pid
	449	+ }] break
	450	+ return "'$p : $f/$r'"
	451	+ }
	452	+
	453	+ # Printing one or more ranges, formatted, and only their border to
	454	+ # keep the strings short.
	455	+
	456	+ proc PRs {ranges} {
	457	+ return [struct::list map $ranges [myproc PR]]
	458	+ }
	459	+
	460	+ proc PR {range} {
	461	+ Border $range s e
	462	+ return <${s}...${e}>
	463	+ }
	464	+
	465	+ proc Border {range sv ev} {
	466	+ upvar 1 $sv s $ev e
	467	+ set s [lindex $range 0]
	468	+ set e [lindex $range end]
	469	+ return
	470	+ }
247	471
248	472	# # ## ### ##### ######## #############
249	473	## Configuration
250	474
251	475	pragma -hastypeinfo no ; # no type introspection
		@@ -257,10 +481,12 @@
257	481
258	482	namespace eval ::vc::fossil::import::cvs::project {
259	483	namespace export rev
260	484	namespace eval rev {
261	485	namespace import ::vc::fossil::import::cvs::state
	486	+ namespace import ::vc::tools::misc::*
	487	+ namespace import ::vc::tools::trouble
262	488	namespace import ::vc::tools::log
263	489	log register csets
264	490	}
265	491	}
266	492
267	493

	--- tools/cvs2fossil/lib/c2f_prev.tcl
	+++ tools/cvs2fossil/lib/c2f_prev.tcl
	@@ -16,10 +16,12 @@
16	# # ## ### ##### ######## ############# #####################
17	## Requirements
18
19	package require Tcl 8.4 ; # Required runtime.
20	package require snit ; # OO system.


21	package require vc::tools::log ; # User feedback.
22	package require vc::fossil::import::cvs::state ; # State storage.
23
24	# # ## ### ##### ######## ############# #####################
25	##
	@@ -47,158 +49,139 @@
47	# such. Otherwise the changeset is split into a set of
48	# fragments without internal dependencies, transforming the
49	# internal dependencies into external ones. The new changesets
50	# are added to the list of all changesets.
51
52	# Actually at most one split is performed, resulting in at
53	# most one additional fragment. It is the caller's
54	# responsibility to spli the resulting fragments further.


55
56	# The code checks only sucessor dependencies, automatically
57	# covering the predecessor dependencies as well (A sucessor
58	# dependency a -> b is a predecessor dependency b -> a).

59
60	# Array of dependencies (parent -> child). This is pulled from
61	# the state, and limited to successors within the changeset.

62	array set dependencies {}
63
64	set theset ('[join $myrevisions {','}]')
65
66	foreach {rid child} [state run "
67	SELECT R.rid, R.child
68	FROM revision R
69	WHERE R.rid IN $theset
70	AND R.child IS NOT NULL
71	AND R.child IN $theset
72	UNION
73	SELECT R.rid, R.dbchild
74	FROM revision R
75	WHERE R.rid IN $theset
76	AND R.dbchild IS NOT NULL
77	AND R.dbchild IN $theset
78	UNION
79	SELECT R.rid, B.brid
80	FROM revision R, revisionbranchchildren B
81	WHERE R.rid IN $theset
82	AND R.rid = B.rid
83	AND B.brid IN $theset
84	"] {
85	# Consider moving this to the integrity module.
86	if {$rid == $child} {
87	trouble internal "Revision $rid depends on itself."
88	}
89	set dependencies($rid) $child
90	}
91
92	if {![array size dependencies]} {return 0} ; # Nothing to break.


93
94	# We have internal dependencies to break. We now iterate over
95	# all positions in the list (which is chronological, at least
96	# as far as the timestamps are correct and unique) and
97	# determine the best position for the break, by trying to
98	# break as many dependencies as possible in one go.
99
100	# First we create a map of positions to make it easier to
101	# determine whether a dependency cross a particular index.
102
103	array set pos {}
104	array set crossing {}
105	set n 0
106	foreach rev $myrevisions {
107	set pos($rev) $n
108	set crossing($n) 0
109	incr n
110	}
111
112	# Secondly we count the crossings per position, by iterating
113	# over the recorded internal dependencies.
114
115	foreach {rid child} [array get dependencies] {
116	set start $pos($rid)
117	set end $pos($child)
118
119	# Note: If the timestamps are badly out of order it is
120	# possible to have a backward successor dependency,
121	# i.e. with start > end. We may have to swap the
122	# indices to ensure that the following loop runs
123	# correctly.
124	#
125	# Note 2: start == end is not possible. It indicates a
126	# self-dependency due to the uniqueness of
127	# positions, and that is something we have ruled
128	# out already.
129
130	if {$start > $end} {
131	while {$end < $start} { incr crossing($end) ; incr end }
132	} else {
133	while {$start < $end} { incr crossing($start) ; incr start }
134	}
135	}
136
137	# Now we can determine the best break location. First we look
138	# for the locations with the maximal number of crossings. If
139	# there are several we look for the shortest time interval
140	# among them. If we still have multiple possibilities after
141	# that we select the smallest index among these.
142
143	set max -1
144	set best {}
145
146	foreach key [array names crossing] {
147	set now $crossing($key)
148	if {$now > $max} {
149	set max $now
150	set best $key
151	continue
152	} elseif {$now == $max} {
153	lappend best $key
154	}
155	}
156
157	if {[llength $best] > 1} {
158	set min -1
159	set newbest {}
160	foreach at $best {
161	set rat [lindex $myrevisions $at] ; incr at
162	set rnext [lindex $myrevisions $at] ; incr at -1
163	set tat [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rat }] 0]
164	set tnext [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rnext}] 0]
165	set delta [expr {$tnext - $tat}]
166	if {($min < 0) \|\| ($delta < $min)} {
167	set min $delta
168	set newbest $at
169	} elseif {$delta == $min} {
170	lappend newbest $at
171	}
172	}
173	set best $newbest
174	}
175
176	if {[llength $best] > 1} {
177	set best [lindex [lsort -integer -increasing $best] 0]
178	}
179
180	# Now we can split off a fragment.
181
182	set bnext $best ; incr bnext
183	set revbefore [lrange $myrevisions 0 $best]
184	set revafter [lrange $myrevisions $bnext end]
185
186	if {![llength $revbefore]} {
187	trouble internal "Tried to split off a zero-length fragment at the beginning"
188	}
189	if {![llength $revafter]} {
190	trouble internal "Tried to split off a zero-length fragment at the end"
191	}
192
193	lappend csets [set new [$type %AUTO% $myproject $mytype $mysrcid $revafter]]
194	set myrevisions $revbefore
195
196	log write 4 csets "Breaking <$myid> @$best, making <[$new id]>, cutting $crossing($best)"
197
198	#puts "\tKeeping <$revbefore>"
199	#puts "\tSplit off <$revafter>"




200
201	return 1
202	}
203
204	method persist {} {
	@@ -242,10 +225,251 @@
242	foreach {tid name} [state run {
243	SELECT tid, name FROM cstype;
244	}] { set mycstype($name) $tid }
245	return
246	}

















































































































































































































































247
248	# # ## ### ##### ######## #############
249	## Configuration
250
251	pragma -hastypeinfo no ; # no type introspection
	@@ -257,10 +481,12 @@
257
258	namespace eval ::vc::fossil::import::cvs::project {
259	namespace export rev
260	namespace eval rev {
261	namespace import ::vc::fossil::import::cvs::state


262	namespace import ::vc::tools::log
263	log register csets
264	}
265	}
266
267

	--- tools/cvs2fossil/lib/c2f_prev.tcl
	+++ tools/cvs2fossil/lib/c2f_prev.tcl
	@@ -16,10 +16,12 @@
16	# # ## ### ##### ######## ############# #####################
17	## Requirements
18
19	package require Tcl 8.4 ; # Required runtime.
20	package require snit ; # OO system.
21	package require vc::tools::misc ; # Text formatting
22	package require vc::tools::trouble ; # Error reporting.
23	package require vc::tools::log ; # User feedback.
24	package require vc::fossil::import::cvs::state ; # State storage.
25
26	# # ## ### ##### ######## ############# #####################
27	##
	@@ -47,158 +49,139 @@
49	# such. Otherwise the changeset is split into a set of
50	# fragments without internal dependencies, transforming the
51	# internal dependencies into external ones. The new changesets
52	# are added to the list of all changesets.
53
54	# We perform all necessary splits in one go, instead of only
55	# one. The previous algorithm, adapted from cvs2svn, computed
56	# a lot of state which was thrown away and then computed again
57	# for each of the fragments. It should be easier to update and
58	# reuse that state.
59
60	# The code checks only sucessor dependencies, as this
61	# automatically covers the predecessor dependencies as well (A
62	# successor dependency a -> b is also a predecessor dependency
63	# b -> a).
64
65	# Array of dependencies (parent -> child). This is pulled from
66	# the state, and limited to successors within the changeset.
67
68	array set dependencies {}
69	PullInternalDependencies dependencies $myrevisions




























70	if {![array size dependencies]} {return 0} ; # Nothing to break.
71
72	log write 6 csets ...<$myid>.......................................................
73
74	# We have internal dependencies to break. We now iterate over
75	# all positions in the list (which is chronological, at least
76	# as far as the timestamps are correct and unique) and
77	# determine the best position for the break, by trying to
78	# break as many dependencies as possible in one go. When a
79	# break was found this is redone for the fragments coming and
80	# after, after upding the crossing information.
81
82	# Data structures:
83	# Map: POS revision id -> position in list.
84	# CROSS position in list -> number of dependencies crossing it
85	# DEPC dependency -> positions it crosses
86	# List: RANGE Of the positions itself.
87	# A dependency is a single-element map parent -> child
88
89	InitializeBreakState $myrevisions
90
91	set fragments {}
92	set pending [list $range]
93	set at 0
94	array set breaks {}
95
96	while {$at < [llength $pending]} {
97	set current [lindex $pending $at]
98
99	log write 6 csets ". . .. ... ..... ........ ............."
100	log write 6 csets "Scheduled [join [PRs [lrange $pending $at end]] { }]"
101	log write 6 csets "Considering [PR $current] \[$at/[llength $pending]\]"
102
103	set best [FindBestBreak $current]
104
105	if {$best < 0} {
106	# The inspected range has no internal
107	# dependencies. This is a complete fragment.
108	lappend fragments $current
109
110	log write 6 csets "No breaks, final"

111	} else {
112	# Split the range and schedule the resulting fragments
113	# for further inspection. Remember the number of
114	# dependencies cut before we remove them from
115	# consideration, for documentation later.
116
117	set breaks($best) $cross($best)
118
119	log write 6 csets "Best break @ $best, cuts [nsp $cross($best) dependency dependencies]"
120
121	# Note: The value of best is an abolute location in
122	# myrevisions. Use the start of current to make it an
123	# index absolute to current.
124
125	set brel [expr {$best - [lindex $current 0]}]
126	set bnext $brel ; incr bnext
127	set fragbefore [lrange $current 0 $brel]
128	set fragafter [lrange $current $bnext end]
129
130	log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
131
132	if {![llength $fragbefore]} {
133	trouble internal "Tried to split off a zero-length fragment at the beginning"
134	}
135	if {![llength $fragafter]} {
136	trouble internal "Tried to split off a zero-length fragment at the end"
137	}
138
139	lappend pending $fragbefore $fragafter
140	CutAt $best
141	}
142
143	incr at
144	}
145
146	log write 6 csets ". . .. ... ..... ........ ............."
147
148	# Create changesets for the fragments, reusing the current one
149	# for the first fragment. We sort them in order to allow
150	# checking for gaps and nice messages.
151
152	set fragments [lsort -index 0 -integer $fragments]
153
154	#puts \t.[join [PRs $fragments] .\n\t.].
155
156	Border [lindex $fragments 0] firsts firste
157
158	if {$firsts != 0} {
159	trouble internal "Bad fragment start @ $firsts, gap, or before beginning of the range"
160	}
161
162	set laste $firste
163	foreach fragment [lrange $fragments 1 end] {
164	Border $fragment s e
165	if {$laste != ($s - 1)} {
166	trouble internal "Bad fragment border <$laste \| $s>, gap or overlap"
167	}
168
169	set new [$type %AUTO% $myproject $mytype $mysrcid [lrange $myrevisions $s $e]]
170	lappend csets $new
171
172	log write 4 csets "Breaking <$myid> @ $laste, new <[$new id]>, cutting $breaks($laste)"
173
174	set laste $e
175	}
176
177	if {$laste != ([llength $myrevisions]-1)} {
178	trouble internal "Bad fragment end @ $laste, gap, or beyond end of the range"
179	}
180
181	# Put the first fragment into the current changeset.
182	set myrevisions [lrange $myrevisions 0 $firste]
183
184	return 1
185	}
186
187	method persist {} {
	@@ -242,10 +225,251 @@
225	foreach {tid name} [state run {
226	SELECT tid, name FROM cstype;
227	}] { set mycstype($name) $tid }
228	return
229	}
230
231	proc PullInternalDependencies {dv revisions} {
232	upvar 1 $dv dependencies
233	set theset ('[join $revisions {','}]')
234
235	foreach {rid child} [state run "
236	-- Primary children
237	SELECT R.rid, R.child
238	FROM revision R
239	WHERE R.rid IN $theset
240	AND R.child IS NOT NULL
241	AND R.child IN $theset
242	UNION
243	-- Transition NTDB to trunk
244	SELECT R.rid, R.dbchild
245	FROM revision R
246	WHERE R.rid IN $theset
247	AND R.dbchild IS NOT NULL
248	AND R.dbchild IN $theset
249	UNION
250	-- Secondary (branch) children
251	SELECT R.rid, B.brid
252	FROM revision R, revisionbranchchildren B
253	WHERE R.rid IN $theset
254	AND R.rid = B.rid
255	AND B.brid IN $theset
256	"] {
257	# Consider moving this to the integrity module.
258	if {$rid == $child} {
259	trouble internal "Revision $rid depends on itself."
260	}
261	set dependencies($rid) $child
262	}
263	}
264
265	proc InitializeBreakState {revisions} {
266	upvar 1 pos pos cross cross range range depc depc delta delta \
267	dependencies dependencies
268
269	# First we create a map of positions to make it easier to
270	# determine whether a dependency crosses a particular index.
271
272	array set pos {}
273	array set cross {}
274	array set depc {}
275	set range {}
276	set n 0
277	foreach rev $revisions {
278	lappend range $n
279	set pos($rev) $n
280	set cross($n) 0
281	incr n
282	}
283
284	# Secondly we count the crossings per position, by iterating
285	# over the recorded internal dependencies.
286
287	# Note: If the timestamps are badly out of order it is
288	# possible to have a backward successor dependency,
289	# i.e. with start > end. We may have to swap the indices
290	# to ensure that the following loop runs correctly.
291	#
292	# Note 2: start == end is not possible. It indicates a
293	# self-dependency due to the uniqueness of positions,
294	# and that is something we have ruled out already, see
295	# PullInternalDependencies.
296
297	foreach {rid child} [array get dependencies] {
298	set dkey [list $rid $child]
299	set start $pos($rid)
300	set end $pos($child)
301	set crosses {}
302
303	if {$start > $end} {
304	while {$end < $start} {
305	lappend crosses $end
306	incr cross($end)
307	incr end
308	}
309	} else {
310	while {$start < $end} {
311	lappend crosses $start
312	incr cross($start)
313	incr start
314	}
315	}
316	set depc($dkey) $crosses
317	}
318
319	InitializeDeltas $revisions
320	return
321	}
322
323	proc InitializeDeltas {revisions} {
324	upvar 1 delta delta
325
326	# Pull the timestamps for all revisions in the changesets and
327	# compute their deltas for use by the break finder.
328
329	array set delta {}
330	array set stamp {}
331
332	set theset ('[join $revisions {','}]')
333	foreach {rid time} [state run "
334	SELECT R.rid, R.date
335	FROM revision R
336	WHERE R.rid IN $theset
337	"] {
338	set stamp($rid) $time
339	}
340
341	set n 0
342	foreach rid [lrange $revisions 0 end-1] rnext [lrange $revisions 1 end] {
343	set delta($n) [expr {$stamp($rnext) - $stamp($rid)}]
344	incr n
345	}
346	return
347	}
348
349	proc FindBestBreak {range} {
350	upvar 1 cross cross delta delta
351
352	# Determine the best break location in the given range of
353	# positions. First we look for the locations with the maximal
354	# number of crossings. If there are several we look for the
355	# shortest time interval among them. If we still have multiple
356	# possibilities after that we select the earliest location
357	# among these.
358
359	# Note: If the maximal number of crossings is 0 then the range
360	# has no internal dependencies, and no break location at
361	# all. This possibility is signaled via result -1.
362
363	# Note: A range of length 1 or less cannot have internal
364	# dependencies, as that needs at least two revisions in
365	# the range.
366
367	if {[llength $range] < 2} { return -1 }
368
369	set max -1
370	set best {}
371
372	foreach location $range {
373	set crossings $cross($location)
374	if {$crossings > $max} {
375	set max $crossings
376	set best [list $location]
377	continue
378	} elseif {$crossings == $max} {
379	lappend best $location
380	}
381	}
382
383	if {$max == 0} { return -1 }
384	if {[llength $best] == 1} { return [lindex $best 0] }
385
386	set locations $best
387	set best {}
388	set min -1
389
390	foreach location $locations {
391	set interval $delta($location)
392	if {($min < 0) \|\| ($interval < $min)} {
393	set min $interval
394	set best [list $location]
395	} elseif {$interval == $min} {
396	lappend best $location
397	}
398	}
399
400	if {[llength $best] == 1} { return [lindex $best 0] }
401
402	return [lindex [lsort -integer -increasing $best] 0]
403	}
404
405	proc CutAt {location} {
406	upvar 1 cross cross depc depc
407
408	# It was decided to split the changeset at the given
409	# location. This cuts a number of dependencies. Here we update
410	# the cross information so that the break finder has accurate
411	# data when we look at the generated fragments.
412
413	set six [log visible? 6]
414
415	foreach {dep range} [array get depc] {
416	# Check all dependencies still known, take their range and
417	# see if the break location falls within.
418
419	Border $range s e
420	if {$location < $s} continue ; # break before range, ignore
421	if {$location > $e} continue ; # break after range, ignore.
422
423	# This dependency crosses the break location. We remove it
424	# from the crossings counters, and then also from the set
425	# of known dependencies, as we are done with it.
426
427	foreach loc $depc($dep) { incr cross($loc) -1 }
428	unset depc($dep)
429
430	if {!$six} continue
431
432	struct::list assign $dep parent child
433	log write 6 csets "Broke dependency [PD $parent] --> [PD $child]"
434	}
435
436	return
437	}
438
439	# Print identifying data for a revision (project, file, dotted rev
440	# number), for high verbosity log output.
441
442	proc PD {id} {
443	foreach {p f r} [state run {
444	SELECT P.name , F.name, R.rev
445	FROM revision R, file F, project P
446	WHERE R.rid = $id
447	AND R.fid = F.fid
448	AND F.pid = P.pid
449	}] break
450	return "'$p : $f/$r'"
451	}
452
453	# Printing one or more ranges, formatted, and only their border to
454	# keep the strings short.
455
456	proc PRs {ranges} {
457	return [struct::list map $ranges [myproc PR]]
458	}
459
460	proc PR {range} {
461	Border $range s e
462	return <${s}...${e}>
463	}
464
465	proc Border {range sv ev} {
466	upvar 1 $sv s $ev e
467	set s [lindex $range 0]
468	set e [lindex $range end]
469	return
470	}
471
472	# # ## ### ##### ######## #############
473	## Configuration
474
475	pragma -hastypeinfo no ; # no type introspection
	@@ -257,10 +481,12 @@
481
482	namespace eval ::vc::fossil::import::cvs::project {
483	namespace export rev
484	namespace eval rev {
485	namespace import ::vc::fossil::import::cvs::state
486	namespace import ::vc::tools::misc::*
487	namespace import ::vc::tools::trouble
488	namespace import ::vc::tools::log
489	log register csets
490	}
491	}
492
493

M tools/cvs2fossil/lib/log.tcl

		--- tools/cvs2fossil/lib/log.tcl
		+++ tools/cvs2fossil/lib/log.tcl
		@@ -44,10 +44,14 @@
44	44	typemethod progress {verbosity system n max} {
45	45	if {$verbosity > $myloglevel} return
46	46	uplevel #0 [linsert $mylogcmd end progress [System $system] $n $max]
47	47	return
48	48	}
	49	+
	50	+ typemethod visible? {verbosity} {
	51	+ return [expr {$verbosity <= $myloglevel}]
	52	+ }
49	53
50	54	# # ## ### ##### ######## #############
51	55	# Public API, Administrative methods
52	56
53	57	# Set verbosity to the chosen 'level'. Only messages with a level
54	58

	--- tools/cvs2fossil/lib/log.tcl
	+++ tools/cvs2fossil/lib/log.tcl
	@@ -44,10 +44,14 @@
44	typemethod progress {verbosity system n max} {
45	if {$verbosity > $myloglevel} return
46	uplevel #0 [linsert $mylogcmd end progress [System $system] $n $max]
47	return
48	}




49
50	# # ## ### ##### ######## #############
51	# Public API, Administrative methods
52
53	# Set verbosity to the chosen 'level'. Only messages with a level
54

	--- tools/cvs2fossil/lib/log.tcl
	+++ tools/cvs2fossil/lib/log.tcl
	@@ -44,10 +44,14 @@
44	typemethod progress {verbosity system n max} {
45	if {$verbosity > $myloglevel} return
46	uplevel #0 [linsert $mylogcmd end progress [System $system] $n $max]
47	return
48	}
49
50	typemethod visible? {verbosity} {
51	return [expr {$verbosity <= $myloglevel}]
52	}
53
54	# # ## ### ##### ######## #############
55	# Public API, Administrative methods
56
57	# Set verbosity to the chosen 'level'. Only messages with a level
58

Fossil SCM

Keyboard Shortcuts