Fossil SCM

rcs parser rewritten to snit type, singleton, using callbacks to convey information. Got a grip on regexp -start -indices (^ -> \A, indices are absolute). This means that I now don't have to shift the unprocessed string down in memory, it is all index processing now. This makes for a much faster parser, especially for large files with many revisions (example: ChangeLogs).

aku 2007-10-06 21:00 trunk
Commit e7bb3d073d90a44b8de6ecaa99943054d83a06d5
--- tools/cvs2fossil/lib/rcsparser.tcl
+++ tools/cvs2fossil/lib/rcsparser.tcl
@@ -1,378 +1,423 @@
1
-# -----------------------------------------------------------------------------
2
-# Tool packages. Parsing RCS files.
3
-#
4
-# Some of the information in RCS files is skipped over, most
5
-# importantly the actual delta texts. The users of this parser need
6
-# only the meta-data about when revisions were added, the tree
7
-# (branching) structure, commit messages.
8
-#
9
-# The parser is based on Recursive Descent.
10
-
11
-# -----------------------------------------------------------------------------
12
-# Requirements
13
-
14
-package require Tcl 8.4
15
-package require fileutil ; # Tcllib (cat)
16
-package require vc::tools::log ; # User feedback
17
-
18
-namespace eval ::vc::rcs::parser {
19
- namespace import ::vc::tools::log
20
- log register rcs
21
-}
22
-
23
-# -----------------------------------------------------------------------------
24
-# API
25
-
26
-# vc::rcs::parser::process file
27
-#
28
-# Parses the rcs file and returns a dictionary containing the meta
29
-# data. The following keys are used
30
-#
31
-# Key Meaning
32
-# --- -------
33
-# 'head' head revision
34
-# 'branch' ?
35
-# 'symbol' dict (symbol -> revision)
36
-# 'lock' dict (symbol -> revision)
37
-# 'comment' file comment
38
-# 'expand' ?
39
-# 'date' dict (revision -> date)
40
-# 'author' dict (revision -> author)
41
-# 'state' dict (revision -> state)
42
-# 'parent' dict (revision -> parent revision)
43
-# 'commit' dict (revision -> commit message)
44
-#
45
-# The state 'dead' has special meaning, the user should know that.
46
-
47
-# -----------------------------------------------------------------------------
48
-# API Implementation
49
-
50
-proc ::vc::rcs::parser::configure {key value} {
51
- variable cache
52
- switch -exact -- $key {
53
- -cache {
54
- set cache $value
55
- }
56
- default {
57
- return -code error "Unknown switch $key, expected one of -cache"
58
- }
59
- }
60
- return
61
-}
62
-
63
-proc ::vc::rcs::parser::process {path} {
64
- variable cache
65
-
66
- if {!$cache} {
67
- return [Process $path]
68
- }
69
-
70
- set cachefile [Cache $path]
71
- if {
72
- [file exists $cachefile] &&
73
- ([file mtime $cachefile] > [file mtime $path])
74
- } {
75
- # Use preparsed data if not invalidated by changes to the
76
- # archive they are derived from.
77
- write 4 rcs {Load preparsed data block}
78
- return [fileutil::cat -encoding binary $cachefile]
79
- }
80
-
81
- set res [Process $path]
82
-
83
- # Save parse result for quick pickup by future runs.
84
- fileutil::writeFile $cachefile $res
85
-
86
- return $res
87
-}
88
-
89
-# -----------------------------------------------------------------------------
90
-
91
-proc ::vc::rcs::parser::Process {path} {
92
- set data [fileutil::cat -encoding binary $path]
93
- array set res {}
94
- set res(size) [file size $path]
95
- set res(done) 0
96
- set res(nsize) [string length $res(size)]
97
-
98
- Admin
99
- Deltas
100
- Description
101
- DeltaTexts
102
-
103
- # Remove parser state
104
- catch {unset res(id)}
105
- catch {unset res(lastval)}
106
- unset res(size)
107
- unset res(nsize)
108
- unset res(done)
109
-
110
- return [array get res]
111
-}
112
-
113
-proc ::vc::rcs::parser::Cache {path} {
114
- return ${path},,preparsed
115
-}
116
-
117
-# -----------------------------------------------------------------------------
118
-# Internal - Recursive Descent functions implementing the syntax.
119
-
120
-proc ::vc::rcs::parser::Admin {} {
121
- upvar 1 data data res res
122
- Head ; Branch ; Access ; Symbols ; Locks ; Strict ; Comment ; Expand
123
- return
124
-}
125
-
126
-proc ::vc::rcs::parser::Deltas {} {
127
- upvar 1 data data res res
128
- while {[Num 0]} { IsIdent ; Date ; Author ; State ; Branches ; NextRev }
129
- return
130
-}
131
-
132
-proc ::vc::rcs::parser::Description {} {
133
- upvar 1 data data res res
134
- Literal desc
135
- String 1
136
- Def desc
137
- return
138
-}
139
-
140
-proc ::vc::rcs::parser::DeltaTexts {} {
141
- upvar 1 data data res res
142
- while {[Num 0]} { IsIdent ; Log ; Text }
143
- return
144
-}
145
-
146
-proc ::vc::rcs::parser::Head {} {
147
- upvar 1 data data res res
148
- Literal head ; Num 1 ; Literal \;
149
- Def head
150
- return
151
-}
152
-
153
-proc ::vc::rcs::parser::Branch {} {
154
- upvar 1 data data res res
155
- if {![Literal branch 0]} return ; Num 1 ; Literal \;
156
- Def branch
157
- return
158
-}
159
-
160
-proc ::vc::rcs::parser::Access {} {
161
- upvar 1 data data res res
162
- Literal access ; Literal \;
163
- return
164
-}
165
-
166
-proc ::vc::rcs::parser::Symbols {} {
167
- upvar 1 data data res res
168
- Literal symbols
169
- while {[Ident]} { Num 1 ; Map symbol }
170
- Literal \;
171
- return
172
-}
173
-
174
-proc ::vc::rcs::parser::Locks {} {
175
- upvar 1 data data res res
176
- Literal locks
177
- while {[Ident]} { Num 1 ; Map lock }
178
- Literal \;
179
- return
180
-}
181
-
182
-proc ::vc::rcs::parser::Strict {} {
183
- upvar 1 data data res res
184
- if {![Literal strict 0]} return ; Literal \;
185
- return
186
-}
187
-
188
-proc ::vc::rcs::parser::Comment {} {
189
- upvar 1 data data res res
190
- if {![Literal comment 0]} return ;
191
- if {![String 0]} return ;
192
- Literal \;
193
- Def comment
194
- return
195
-}
196
-
197
-proc ::vc::rcs::parser::Expand {} {
198
- upvar 1 data data res res
199
- if {![Literal expand 0]} return ;
200
- if {![String 0]} return ;
201
- Literal \;
202
- Def expand
203
- return
204
-}
205
-
206
-proc ::vc::rcs::parser::Date {} {
207
- upvar 1 data data res res
208
- Literal date ; Num 1 ; Literal \;
209
-
210
- foreach {yr mo dy h m s} [split $res(lastval) .] break
211
- if {$yr < 100} {incr yr 1900}
212
- set res(lastval) [join [list $yr $mo $dy $h $m $s] .]
213
- Map date
214
- return
215
-}
216
-
217
-proc ::vc::rcs::parser::Author {} {
218
- upvar 1 data data res res
219
- Literal author ; Skip ; Literal \; ; Map author
220
- return
221
-}
222
-
223
-proc ::vc::rcs::parser::State {} {
224
- upvar 1 data data res res
225
- Literal state ; Skip ; Literal \; ; Map state
226
- return
227
-}
228
-
229
-proc ::vc::rcs::parser::Branches {} {
230
- upvar 1 data data res res
231
- Literal branches ; Skip ; Literal \;
232
- return
233
-}
234
-
235
-proc ::vc::rcs::parser::NextRev {} {
236
- upvar 1 data data res res
237
- Literal next ; Skip ; Literal \; ; Map parent
238
- return
239
-}
240
-
241
-proc ::vc::rcs::parser::Log {} {
242
- upvar 1 data data res res
243
- Literal log ; String 1 ; Map commit
244
- return
245
-}
246
-
247
-proc ::vc::rcs::parser::Text {} {
248
- upvar 1 data data res res
249
- Literal text ; String 1
250
- return
251
-}
252
-
253
-# -----------------------------------------------------------------------------
254
-# Internal - Lexicographical commands and data aquisition preparation
255
-
256
-proc ::vc::rcs::parser::Ident {} {
257
- upvar 1 data data res res
258
-
259
- #puts I@?<[string range $data 0 10]...>
260
-
261
- if {[regexp -indices -- {^\s*;\s*} $data]} {
262
- return 0
263
- } elseif {![regexp -indices -- {^\s*([^:]*)\s*:\s*} $data match val]} {
264
- return 0
265
- }
266
-
267
- Get $val ; IsIdent
268
- Next
269
- return 1
270
-}
271
-
272
-proc ::vc::rcs::parser::Literal {name {required 1}} {
273
- upvar 1 data data res res
274
- if {![regexp -indices -- "^\\s*$name\\s*" $data match]} {
275
- if {$required} {
276
- return -code error "Expected '$name' @ '[string range $data 0 30]...'"
277
- }
278
- return 0
279
- }
280
-
281
- Next
282
- return 1
283
-}
284
-
285
-proc ::vc::rcs::parser::String {{required 1}} {
286
- upvar 1 data data res res
287
-
288
- if {![regexp -indices -- {^\s*@(([^@]*(@@)*)*)@\s*} $data match val]} {
289
- if {$required} {
290
- return -code error "Expected string @ '[string range $data 0 30]...'"
291
- }
292
- return 0
293
- }
294
-
295
- Get $val
296
- Next
297
- return 1
298
-}
299
-
300
-proc ::vc::rcs::parser::Num {required} {
301
- upvar 1 data data res res
302
- if {![regexp -indices -- {^\s*((\d|\.)+)\s*} $data match val]} {
303
- if {$required} {
304
- return -code error "Expected id @ '[string range $data 0 30]...'"
305
- }
306
- return 0
307
- }
308
-
309
- Get $val
310
- Next
311
- return 1
312
-}
313
-
314
-proc ::vc::rcs::parser::Skip {} {
315
- upvar 1 data data res res
316
- regexp -indices -- {^\s*([^;]*)\s*} $data match val
317
- Get $val
318
- Next
319
- return
320
-}
321
-
322
-# -----------------------------------------------------------------------------
323
-# Internal - Data aquisition
324
-
325
-proc ::vc::rcs::parser::Def {key} {
326
- upvar 1 data data res res
327
- set res($key) $res(lastval)
328
- unset res(lastval)
329
- return
330
-}
331
-
332
-proc ::vc::rcs::parser::Map {key} {
333
- upvar 1 data data res res
334
- lappend res($key) $res(id) $res(lastval)
335
- #puts Map($res(id))=($res(lastval))
336
- unset res(lastval)
337
- #unset res(id);#Keep id for additional mappings.
338
- return
339
-}
340
-
341
-proc ::vc::rcs::parser::IsIdent {} {
342
- upvar 1 data data res res
343
- set res(id) $res(lastval)
344
- unset res(lastval)
345
- return
346
-}
347
-
348
-proc ::vc::rcs::parser::Get {val} {
349
- upvar 1 data data res res
350
- foreach {s e} $val break
351
- set res(lastval) [string range $data $s $e]
352
- #puts G|$res(lastval)
353
- return
354
-}
355
-
356
-proc ::vc::rcs::parser::Next {} {
357
- upvar 1 match match data data res res
358
- foreach {s e} $match break ; incr e
359
- set data [string range $data $e end]
360
- set res(done) [expr {$res(size) - [string length $data]}]
361
-
362
- progress 2 rcs $res(done) $res(size)
363
- return
364
-}
365
-
366
-# -----------------------------------------------------------------------------
367
-
368
-namespace eval ::vc::rcs::parser {
369
- variable cache 0 ; # No result caching by default.
370
-
371
- namespace export process configure
372
-}
373
-
374
-# -----------------------------------------------------------------------------
375
-# Ready
1
+## -*- tcl -*-
2
+# # ## ### ##### ######## ############# #####################
3
+## Copyright (c) 2007 Andreas Kupries.
4
+#
5
+# This software is licensed as described in the file LICENSE, which
6
+# you should have received as part of this distribution.
7
+#
8
+# This software consists of voluntary contributions made by many
9
+# individuals. For exact contribution history, see the revision
10
+# history and logs, available at http://fossil-scm.hwaci.com/fossil
11
+# # ## ### ##### ######## ############# #####################
12
+
13
+# A tool package, provides a parser for RCS archive files. This parser
14
+# is implemented via recursive descent. It is not only given a file to
15
+# process, but also a 'sink', an object it calls out to at important
16
+# places of the parsing process to either signal an event and/or
17
+# convey gathered information to it. The sink is responsible for the
18
+# actual processing of the data in whatever way it desires.
19
+
20
+# # ## ### ##### ######## ############# #####################
21
+## Requirements
22
+
23
+package require Tcl 8.4 ; # Required runtime.
24
+package require snit ; # OO system.
25
+package require fileutil ; # File utilities.
26
+package require vc::tools::log ; # User feedback.
27
+package require struct::list ; # Advanced list ops.
28
+
29
+# # ## ### ##### ######## ############# #####################
30
+##
31
+
32
+snit::type ::vc::rcs::parser {
33
+ # # ## ### ##### ######## #############
34
+ ## Public API
35
+
36
+ typemethod process {path sink} {
37
+ Initialize $path $sink
38
+ Call begin
39
+ Admin ; Deltas ; Description ; DeltaTexts
40
+ Call done
41
+ return
42
+ }
43
+
44
+ # # ## ### ##### ######## #############
45
+ ## Internal methods, recursive descent, syntactical processing
46
+
47
+ proc Admin {} {
48
+ Head ; PrincipalBranch ; Access ; Symbols
49
+ Locks ; Strictness ; FileComment ; Expand
50
+ Call admindone
51
+ return
52
+ }
53
+
54
+ # # ## ### ##### ######## #############
55
+
56
+ proc Head {} {
57
+ RequiredLiteral head
58
+ RequiredNumber -> head
59
+ Semicolon
60
+ Call sethead $head
61
+ return
62
+ }
63
+
64
+ proc PrincipalBranch {} {
65
+ if {![OptionalLiteral branch]} return
66
+ RequiredNumber -> branch
67
+ Semicolon
68
+ Call setprincipalbranch $branch
69
+ return
70
+ }
71
+
72
+ proc Access {} {
73
+ RequiredLiteral access ;
74
+ Semicolon
75
+ return
76
+ }
77
+
78
+ proc Symbols {} {
79
+ set d {}
80
+ RequiredLiteral symbols
81
+ while {[Ident -> symbol]} {
82
+ RequiredNumber -> rev
83
+ # TODO: distinguish branch/tag symbols, transform revision
84
+ # numbers into canonical form.
85
+ lappend d $symbol $rev
86
+ }
87
+ Semicolon
88
+ Call setsymbols $d
89
+ return
90
+ }
91
+
92
+ proc Locks {} {
93
+ # Not saving locks.
94
+ RequiredLiteral locks
95
+ while {[Ident -> symbol]} {
96
+ RequiredNumber -> l
97
+ }
98
+ Semicolon
99
+ return
100
+ }
101
+
102
+ proc Strictness {} {
103
+ # Not saving strictness
104
+ if {![OptionalLiteral strict]} return
105
+ Semicolon
106
+ return
107
+ }
108
+
109
+ proc FileComment {} {
110
+ if {![OptionalLiteral comment]} return
111
+ if {![OptionalString -> c]} return
112
+ Semicolon
113
+ Call setcomment $c
114
+ return
115
+ }
116
+
117
+ proc Expand {} {
118
+ # Not saving expanded keywords
119
+ if {![OptionalLiteral expand]} return
120
+ if {![OptionalString -> dummy]} return
121
+ Semicolon
122
+ return
123
+ }
124
+
125
+ # # ## ### ##### ######## #############
126
+
127
+ proc Deltas {} {
128
+ while {[OptionalNumber -> rev]} {
129
+ Date -> d
130
+ Author -> a
131
+ State -> s
132
+ Branches -> b
133
+ NextRev -> n
134
+ Call def $rev $d $a $s $n $b
135
+ }
136
+ return
137
+ }
138
+
139
+ # # ## ### ##### ######## #############
140
+
141
+ proc Date {_ dv} {
142
+ upvar 1 $dv d
143
+ RequiredLiteral date
144
+ RequiredNumber -> d
145
+ Semicolon
146
+
147
+ struct::list assign [split $d .] year month day hour min sec
148
+ if {$year < 100} {incr year 1900}
149
+ set d [clock scan "${year}-${month}-${day} ${hour}:${min}:${sec}"]
150
+ return
151
+ }
152
+
153
+ proc Author {_ av} {
154
+ upvar 1 $av a
155
+ RequiredLiteral author
156
+ Anything -> a
157
+ Semicolon
158
+ return
159
+ }
160
+
161
+ proc State {_ sv} {
162
+ upvar 1 $sv s
163
+ RequiredLiteral state
164
+ Anything -> s
165
+ Semicolon
166
+ return
167
+ }
168
+
169
+ proc Branches {_ bv} {
170
+ upvar 1 $bv b
171
+ RequiredLiteral branches
172
+ Anything -> b
173
+ Semicolon
174
+ return
175
+ }
176
+
177
+ proc NextRev {_ nv} {
178
+ upvar 1 $nv n
179
+ RequiredLiteral next
180
+ Anything -> n
181
+ Semicolon
182
+ return
183
+ }
184
+
185
+ # # ## ### ##### ######## #############
186
+
187
+ proc Description {} {
188
+ upvar 1 data data res res
189
+ RequiredLiteral desc
190
+ RequiredString -> d
191
+ Call setdesc $d
192
+ return
193
+ }
194
+
195
+ # # ## ### ##### ######## #############
196
+
197
+ proc DeltaTexts {} {
198
+ while {[OptionalNumber -> rev]} {
199
+ RequiredLiteral log
200
+ RequiredString -> cmsg
201
+ RequiredLiteral text
202
+ RequiredStringRange -> delta
203
+ Call extend $rev $cmsg $delta
204
+ }
205
+ return
206
+ }
207
+
208
+ # # ## ### ##### ######## #############
209
+ ## Internal methods, lexiographical processing
210
+
211
+ proc Semicolon {} {
212
+ ::variable mydata
213
+ ::variable mypos
214
+
215
+ set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata match]
216
+ if {!$ok} { Expected ';' }
217
+
218
+ SkipOver match
219
+ return
220
+ }
221
+
222
+ proc RequiredLiteral {name} {
223
+ ::variable mydata
224
+ ::variable mypos
225
+
226
+ set pattern "\\A\\s*$name\\s*"
227
+ set ok [regexp -start $mypos -indices -- $pattern $mydata match]
228
+ if {!$ok} { Expected '$name' }
229
+
230
+ SkipOver match
231
+ return
232
+ }
233
+
234
+ proc OptionalLiteral {name} {
235
+ ::variable mydata
236
+ ::variable mypos
237
+
238
+ set pattern "\\A\\s*$name\\s*"
239
+ set ok [regexp -start $mypos -indices -- $pattern $mydata match]
240
+ if {!$ok} { return 0 }
241
+
242
+ SkipOver match
243
+ return 1
244
+ }
245
+
246
+ proc RequiredNumber {_ v} {
247
+ upvar 1 $v value
248
+ ::variable mydata
249
+ ::variable mypos
250
+
251
+ set pattern {\A\s*((\d|\.)+)\s*}
252
+ set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
253
+ if {!$ok} { Expected id }
254
+
255
+ Extract $v -> value
256
+ SkipOver match
257
+ return
258
+ }
259
+
260
+ proc OptionalNumber {_ v} {
261
+ upvar 1 $v value
262
+ ::variable mydata
263
+ ::variable mypos
264
+
265
+ set pattern {\A\s*((\d|\.)+)\s*}
266
+ set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
267
+ if {!$ok} { return 0 }
268
+
269
+ Extract $v -> value
270
+ SkipOver match
271
+ return 1
272
+ }
273
+
274
+ proc RequiredString {_ v} {
275
+ upvar 1 $v value
276
+ ::variable mydata
277
+ ::variable mypos
278
+
279
+ set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
280
+ if {!$ok} { Expected string }
281
+
282
+ Extract $v -> value
283
+ set value [string map {@@ @} $value]
284
+ SkipOver match
285
+ return
286
+ }
287
+
288
+ proc RequiredStringRange {_ v} {
289
+ upvar 1 $v value
290
+ ::variable mydata
291
+ ::variable mypos
292
+
293
+ set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match value]
294
+ if {!$ok} { Expected string }
295
+
296
+ SkipOver match
297
+ return
298
+ }
299
+
300
+ proc OptionalString {_ v} {
301
+ upvar 1 $v value
302
+ ::variable mydata
303
+ ::variable mypos
304
+
305
+ set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
306
+ if {!$ok} { return 0 }
307
+
308
+ Extract $v -> value
309
+ set value [string map {@@ @} $value]
310
+ SkipOver match
311
+ return 1
312
+ }
313
+
314
+ proc Ident {_ v} {
315
+ upvar 1 $v value
316
+ ::variable mydata
317
+ ::variable mypos
318
+
319
+ set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata]
320
+ if {$ok} { return 0 }
321
+
322
+ set ok [regexp -start $mypos -indices -- {\A\s*([^:]*)\s*:\s*} $mydata match v]
323
+ if {!$ok} { return 0 }
324
+
325
+ Extract $v -> value
326
+ SkipOver match
327
+ return 1
328
+ }
329
+
330
+ proc Anything {_ v} {
331
+ upvar 1 $v value
332
+ ::variable mydata
333
+ ::variable mypos
334
+
335
+ regexp -start $mypos -indices -- {\A\s*([^;]*)\s*} $mydata match v
336
+
337
+ Extract $v -> value
338
+ SkipOver match
339
+ return
340
+ }
341
+
342
+ # # ## ### ##### ######## #############
343
+ ## Internal methods, input handling
344
+
345
+ proc Extract {range _ v} {
346
+ upvar 1 $v value
347
+ ::variable mydata
348
+ struct::list assign $range s e
349
+ set value [string range $mydata $s $e]
350
+ return
351
+ }
352
+
353
+ proc SkipOver {mv} {
354
+ # Note: The indices are absolute!, not relative to the start
355
+ # location.
356
+ upvar 1 $mv match
357
+ ::variable mypos
358
+ ::variable mysize
359
+
360
+ struct::list assign $match s e
361
+ #puts "<$s $e> [info level -1]"
362
+
363
+ set mypos $e
364
+ incr mypos
365
+
366
+ log progress 2 rcs $mypos $mysize
367
+ #puts $mypos/$mysize
368
+ return
369
+ }
370
+
371
+ proc Expected {x} {
372
+ ::variable mydata
373
+ ::variable mypos
374
+ set e $mypos ; incr e 30
375
+ return -code error "Expected $x @ '[string range $mydata $mypos $e]...'"
376
+ }
377
+
378
+ # # ## ### ##### ######## #############
379
+ ## Setup, callbacks.
380
+
381
+ proc Initialize {path sink} {
382
+ ::variable mypos 0
383
+ ::variable mydata [fileutil::cat -encoding binary $path]
384
+ ::variable mysize [file size $path]
385
+ ::variable mysink $sink
386
+ return
387
+ }
388
+
389
+ proc Call {args} {
390
+ ::variable mysink
391
+ set cmd $mysink
392
+ foreach a $args { lappend cmd $a }
393
+ eval $cmd
394
+ return
395
+ }
396
+
397
+ # # ## ### ##### ######## #############
398
+ ## Configuration
399
+
400
+ typevariable mydata {} ; # Rcs archive contents to process
401
+ typevariable mysize 0 ; # Length of contents
402
+ typevariable mysink {} ; # Sink to report to
403
+
404
+ pragma -hasinstances no ; # singleton
405
+ pragma -hastypeinfo no ; # no introspection
406
+ pragma -hastypedestroy no ; # immortal
407
+
408
+ # # ## ### ##### ######## #############
409
+}
410
+
411
+namespace eval ::vc::rcs {
412
+ namespace export parser
413
+ namespace eval parser {
414
+ namespace import ::vc::tools::log
415
+ log register rcs
416
+ }
417
+}
418
+
419
+# # ## ### ##### ######## ############# #####################
420
+## Ready
376421
377422
package provide vc::rcs::parser 1.0
378423
return
379424
--- tools/cvs2fossil/lib/rcsparser.tcl
+++ tools/cvs2fossil/lib/rcsparser.tcl
@@ -1,378 +1,423 @@
1 # -----------------------------------------------------------------------------
2 # Tool packages. Parsing RCS files.
3 #
4 # Some of the information in RCS files is skipped over, most
5 # importantly the actual delta texts. The users of this parser need
6 # only the meta-data about when revisions were added, the tree
7 # (branching) structure, commit messages.
8 #
9 # The parser is based on Recursive Descent.
10
11 # -----------------------------------------------------------------------------
12 # Requirements
13
14 package require Tcl 8.4
15 package require fileutil ; # Tcllib (cat)
16 package require vc::tools::log ; # User feedback
17
18 namespace eval ::vc::rcs::parser {
19 namespace import ::vc::tools::log
20 log register rcs
21 }
22
23 # -----------------------------------------------------------------------------
24 # API
25
26 # vc::rcs::parser::process file
27 #
28 # Parses the rcs file and returns a dictionary containing the meta
29 # data. The following keys are used
30 #
31 # Key Meaning
32 # --- -------
33 # 'head' head revision
34 # 'branch' ?
35 # 'symbol' dict (symbol -> revision)
36 # 'lock' dict (symbol -> revision)
37 # 'comment' file comment
38 # 'expand' ?
39 # 'date' dict (revision -> date)
40 # 'author' dict (revision -> author)
41 # 'state' dict (revision -> state)
42 # 'parent' dict (revision -> parent revision)
43 # 'commit' dict (revision -> commit message)
44 #
45 # The state 'dead' has special meaning, the user should know that.
46
47 # -----------------------------------------------------------------------------
48 # API Implementation
49
50 proc ::vc::rcs::parser::configure {key value} {
51 variable cache
52 switch -exact -- $key {
53 -cache {
54 set cache $value
55 }
56 default {
57 return -code error "Unknown switch $key, expected one of -cache"
58 }
59 }
60 return
61 }
62
63 proc ::vc::rcs::parser::process {path} {
64 variable cache
65
66 if {!$cache} {
67 return [Process $path]
68 }
69
70 set cachefile [Cache $path]
71 if {
72 [file exists $cachefile] &&
73 ([file mtime $cachefile] > [file mtime $path])
74 } {
75 # Use preparsed data if not invalidated by changes to the
76 # archive they are derived from.
77 write 4 rcs {Load preparsed data block}
78 return [fileutil::cat -encoding binary $cachefile]
79 }
80
81 set res [Process $path]
82
83 # Save parse result for quick pickup by future runs.
84 fileutil::writeFile $cachefile $res
85
86 return $res
87 }
88
89 # -----------------------------------------------------------------------------
90
91 proc ::vc::rcs::parser::Process {path} {
92 set data [fileutil::cat -encoding binary $path]
93 array set res {}
94 set res(size) [file size $path]
95 set res(done) 0
96 set res(nsize) [string length $res(size)]
97
98 Admin
99 Deltas
100 Description
101 DeltaTexts
102
103 # Remove parser state
104 catch {unset res(id)}
105 catch {unset res(lastval)}
106 unset res(size)
107 unset res(nsize)
108 unset res(done)
109
110 return [array get res]
111 }
112
113 proc ::vc::rcs::parser::Cache {path} {
114 return ${path},,preparsed
115 }
116
117 # -----------------------------------------------------------------------------
118 # Internal - Recursive Descent functions implementing the syntax.
119
120 proc ::vc::rcs::parser::Admin {} {
121 upvar 1 data data res res
122 Head ; Branch ; Access ; Symbols ; Locks ; Strict ; Comment ; Expand
123 return
124 }
125
126 proc ::vc::rcs::parser::Deltas {} {
127 upvar 1 data data res res
128 while {[Num 0]} { IsIdent ; Date ; Author ; State ; Branches ; NextRev }
129 return
130 }
131
132 proc ::vc::rcs::parser::Description {} {
133 upvar 1 data data res res
134 Literal desc
135 String 1
136 Def desc
137 return
138 }
139
140 proc ::vc::rcs::parser::DeltaTexts {} {
141 upvar 1 data data res res
142 while {[Num 0]} { IsIdent ; Log ; Text }
143 return
144 }
145
146 proc ::vc::rcs::parser::Head {} {
147 upvar 1 data data res res
148 Literal head ; Num 1 ; Literal \;
149 Def head
150 return
151 }
152
153 proc ::vc::rcs::parser::Branch {} {
154 upvar 1 data data res res
155 if {![Literal branch 0]} return ; Num 1 ; Literal \;
156 Def branch
157 return
158 }
159
160 proc ::vc::rcs::parser::Access {} {
161 upvar 1 data data res res
162 Literal access ; Literal \;
163 return
164 }
165
166 proc ::vc::rcs::parser::Symbols {} {
167 upvar 1 data data res res
168 Literal symbols
169 while {[Ident]} { Num 1 ; Map symbol }
170 Literal \;
171 return
172 }
173
174 proc ::vc::rcs::parser::Locks {} {
175 upvar 1 data data res res
176 Literal locks
177 while {[Ident]} { Num 1 ; Map lock }
178 Literal \;
179 return
180 }
181
182 proc ::vc::rcs::parser::Strict {} {
183 upvar 1 data data res res
184 if {![Literal strict 0]} return ; Literal \;
185 return
186 }
187
188 proc ::vc::rcs::parser::Comment {} {
189 upvar 1 data data res res
190 if {![Literal comment 0]} return ;
191 if {![String 0]} return ;
192 Literal \;
193 Def comment
194 return
195 }
196
197 proc ::vc::rcs::parser::Expand {} {
198 upvar 1 data data res res
199 if {![Literal expand 0]} return ;
200 if {![String 0]} return ;
201 Literal \;
202 Def expand
203 return
204 }
205
206 proc ::vc::rcs::parser::Date {} {
207 upvar 1 data data res res
208 Literal date ; Num 1 ; Literal \;
209
210 foreach {yr mo dy h m s} [split $res(lastval) .] break
211 if {$yr < 100} {incr yr 1900}
212 set res(lastval) [join [list $yr $mo $dy $h $m $s] .]
213 Map date
214 return
215 }
216
217 proc ::vc::rcs::parser::Author {} {
218 upvar 1 data data res res
219 Literal author ; Skip ; Literal \; ; Map author
220 return
221 }
222
223 proc ::vc::rcs::parser::State {} {
224 upvar 1 data data res res
225 Literal state ; Skip ; Literal \; ; Map state
226 return
227 }
228
229 proc ::vc::rcs::parser::Branches {} {
230 upvar 1 data data res res
231 Literal branches ; Skip ; Literal \;
232 return
233 }
234
235 proc ::vc::rcs::parser::NextRev {} {
236 upvar 1 data data res res
237 Literal next ; Skip ; Literal \; ; Map parent
238 return
239 }
240
241 proc ::vc::rcs::parser::Log {} {
242 upvar 1 data data res res
243 Literal log ; String 1 ; Map commit
244 return
245 }
246
247 proc ::vc::rcs::parser::Text {} {
248 upvar 1 data data res res
249 Literal text ; String 1
250 return
251 }
252
253 # -----------------------------------------------------------------------------
254 # Internal - Lexicographical commands and data aquisition preparation
255
256 proc ::vc::rcs::parser::Ident {} {
257 upvar 1 data data res res
258
259 #puts I@?<[string range $data 0 10]...>
260
261 if {[regexp -indices -- {^\s*;\s*} $data]} {
262 return 0
263 } elseif {![regexp -indices -- {^\s*([^:]*)\s*:\s*} $data match val]} {
264 return 0
265 }
266
267 Get $val ; IsIdent
268 Next
269 return 1
270 }
271
272 proc ::vc::rcs::parser::Literal {name {required 1}} {
273 upvar 1 data data res res
274 if {![regexp -indices -- "^\\s*$name\\s*" $data match]} {
275 if {$required} {
276 return -code error "Expected '$name' @ '[string range $data 0 30]...'"
277 }
278 return 0
279 }
280
281 Next
282 return 1
283 }
284
285 proc ::vc::rcs::parser::String {{required 1}} {
286 upvar 1 data data res res
287
288 if {![regexp -indices -- {^\s*@(([^@]*(@@)*)*)@\s*} $data match val]} {
289 if {$required} {
290 return -code error "Expected string @ '[string range $data 0 30]...'"
291 }
292 return 0
293 }
294
295 Get $val
296 Next
297 return 1
298 }
299
300 proc ::vc::rcs::parser::Num {required} {
301 upvar 1 data data res res
302 if {![regexp -indices -- {^\s*((\d|\.)+)\s*} $data match val]} {
303 if {$required} {
304 return -code error "Expected id @ '[string range $data 0 30]...'"
305 }
306 return 0
307 }
308
309 Get $val
310 Next
311 return 1
312 }
313
314 proc ::vc::rcs::parser::Skip {} {
315 upvar 1 data data res res
316 regexp -indices -- {^\s*([^;]*)\s*} $data match val
317 Get $val
318 Next
319 return
320 }
321
322 # -----------------------------------------------------------------------------
323 # Internal - Data aquisition
324
325 proc ::vc::rcs::parser::Def {key} {
326 upvar 1 data data res res
327 set res($key) $res(lastval)
328 unset res(lastval)
329 return
330 }
331
332 proc ::vc::rcs::parser::Map {key} {
333 upvar 1 data data res res
334 lappend res($key) $res(id) $res(lastval)
335 #puts Map($res(id))=($res(lastval))
336 unset res(lastval)
337 #unset res(id);#Keep id for additional mappings.
338 return
339 }
340
341 proc ::vc::rcs::parser::IsIdent {} {
342 upvar 1 data data res res
343 set res(id) $res(lastval)
344 unset res(lastval)
345 return
346 }
347
348 proc ::vc::rcs::parser::Get {val} {
349 upvar 1 data data res res
350 foreach {s e} $val break
351 set res(lastval) [string range $data $s $e]
352 #puts G|$res(lastval)
353 return
354 }
355
356 proc ::vc::rcs::parser::Next {} {
357 upvar 1 match match data data res res
358 foreach {s e} $match break ; incr e
359 set data [string range $data $e end]
360 set res(done) [expr {$res(size) - [string length $data]}]
361
362 progress 2 rcs $res(done) $res(size)
363 return
364 }
365
366 # -----------------------------------------------------------------------------
367
368 namespace eval ::vc::rcs::parser {
369 variable cache 0 ; # No result caching by default.
370
371 namespace export process configure
372 }
373
374 # -----------------------------------------------------------------------------
375 # Ready
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
377 package provide vc::rcs::parser 1.0
378 return
379
--- tools/cvs2fossil/lib/rcsparser.tcl
+++ tools/cvs2fossil/lib/rcsparser.tcl
@@ -1,378 +1,423 @@
1 ## -*- tcl -*-
2 # # ## ### ##### ######## ############# #####################
3 ## Copyright (c) 2007 Andreas Kupries.
4 #
5 # This software is licensed as described in the file LICENSE, which
6 # you should have received as part of this distribution.
7 #
8 # This software consists of voluntary contributions made by many
9 # individuals. For exact contribution history, see the revision
10 # history and logs, available at http://fossil-scm.hwaci.com/fossil
11 # # ## ### ##### ######## ############# #####################
12
13 # A tool package, provides a parser for RCS archive files. This parser
14 # is implemented via recursive descent. It is not only given a file to
15 # process, but also a 'sink', an object it calls out to at important
16 # places of the parsing process to either signal an event and/or
17 # convey gathered information to it. The sink is responsible for the
18 # actual processing of the data in whatever way it desires.
19
20 # # ## ### ##### ######## ############# #####################
21 ## Requirements
22
23 package require Tcl 8.4 ; # Required runtime.
24 package require snit ; # OO system.
25 package require fileutil ; # File utilities.
26 package require vc::tools::log ; # User feedback.
27 package require struct::list ; # Advanced list ops.
28
29 # # ## ### ##### ######## ############# #####################
30 ##
31
32 snit::type ::vc::rcs::parser {
33 # # ## ### ##### ######## #############
34 ## Public API
35
36 typemethod process {path sink} {
37 Initialize $path $sink
38 Call begin
39 Admin ; Deltas ; Description ; DeltaTexts
40 Call done
41 return
42 }
43
44 # # ## ### ##### ######## #############
45 ## Internal methods, recursive descent, syntactical processing
46
47 proc Admin {} {
48 Head ; PrincipalBranch ; Access ; Symbols
49 Locks ; Strictness ; FileComment ; Expand
50 Call admindone
51 return
52 }
53
54 # # ## ### ##### ######## #############
55
56 proc Head {} {
57 RequiredLiteral head
58 RequiredNumber -> head
59 Semicolon
60 Call sethead $head
61 return
62 }
63
64 proc PrincipalBranch {} {
65 if {![OptionalLiteral branch]} return
66 RequiredNumber -> branch
67 Semicolon
68 Call setprincipalbranch $branch
69 return
70 }
71
72 proc Access {} {
73 RequiredLiteral access ;
74 Semicolon
75 return
76 }
77
78 proc Symbols {} {
79 set d {}
80 RequiredLiteral symbols
81 while {[Ident -> symbol]} {
82 RequiredNumber -> rev
83 # TODO: distinguish branch/tag symbols, transform revision
84 # numbers into canonical form.
85 lappend d $symbol $rev
86 }
87 Semicolon
88 Call setsymbols $d
89 return
90 }
91
92 proc Locks {} {
93 # Not saving locks.
94 RequiredLiteral locks
95 while {[Ident -> symbol]} {
96 RequiredNumber -> l
97 }
98 Semicolon
99 return
100 }
101
102 proc Strictness {} {
103 # Not saving strictness
104 if {![OptionalLiteral strict]} return
105 Semicolon
106 return
107 }
108
109 proc FileComment {} {
110 if {![OptionalLiteral comment]} return
111 if {![OptionalString -> c]} return
112 Semicolon
113 Call setcomment $c
114 return
115 }
116
117 proc Expand {} {
118 # Not saving expanded keywords
119 if {![OptionalLiteral expand]} return
120 if {![OptionalString -> dummy]} return
121 Semicolon
122 return
123 }
124
125 # # ## ### ##### ######## #############
126
127 proc Deltas {} {
128 while {[OptionalNumber -> rev]} {
129 Date -> d
130 Author -> a
131 State -> s
132 Branches -> b
133 NextRev -> n
134 Call def $rev $d $a $s $n $b
135 }
136 return
137 }
138
139 # # ## ### ##### ######## #############
140
141 proc Date {_ dv} {
142 upvar 1 $dv d
143 RequiredLiteral date
144 RequiredNumber -> d
145 Semicolon
146
147 struct::list assign [split $d .] year month day hour min sec
148 if {$year < 100} {incr year 1900}
149 set d [clock scan "${year}-${month}-${day} ${hour}:${min}:${sec}"]
150 return
151 }
152
153 proc Author {_ av} {
154 upvar 1 $av a
155 RequiredLiteral author
156 Anything -> a
157 Semicolon
158 return
159 }
160
161 proc State {_ sv} {
162 upvar 1 $sv s
163 RequiredLiteral state
164 Anything -> s
165 Semicolon
166 return
167 }
168
169 proc Branches {_ bv} {
170 upvar 1 $bv b
171 RequiredLiteral branches
172 Anything -> b
173 Semicolon
174 return
175 }
176
177 proc NextRev {_ nv} {
178 upvar 1 $nv n
179 RequiredLiteral next
180 Anything -> n
181 Semicolon
182 return
183 }
184
185 # # ## ### ##### ######## #############
186
187 proc Description {} {
188 upvar 1 data data res res
189 RequiredLiteral desc
190 RequiredString -> d
191 Call setdesc $d
192 return
193 }
194
195 # # ## ### ##### ######## #############
196
197 proc DeltaTexts {} {
198 while {[OptionalNumber -> rev]} {
199 RequiredLiteral log
200 RequiredString -> cmsg
201 RequiredLiteral text
202 RequiredStringRange -> delta
203 Call extend $rev $cmsg $delta
204 }
205 return
206 }
207
208 # # ## ### ##### ######## #############
209 ## Internal methods, lexiographical processing
210
211 proc Semicolon {} {
212 ::variable mydata
213 ::variable mypos
214
215 set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata match]
216 if {!$ok} { Expected ';' }
217
218 SkipOver match
219 return
220 }
221
222 proc RequiredLiteral {name} {
223 ::variable mydata
224 ::variable mypos
225
226 set pattern "\\A\\s*$name\\s*"
227 set ok [regexp -start $mypos -indices -- $pattern $mydata match]
228 if {!$ok} { Expected '$name' }
229
230 SkipOver match
231 return
232 }
233
234 proc OptionalLiteral {name} {
235 ::variable mydata
236 ::variable mypos
237
238 set pattern "\\A\\s*$name\\s*"
239 set ok [regexp -start $mypos -indices -- $pattern $mydata match]
240 if {!$ok} { return 0 }
241
242 SkipOver match
243 return 1
244 }
245
246 proc RequiredNumber {_ v} {
247 upvar 1 $v value
248 ::variable mydata
249 ::variable mypos
250
251 set pattern {\A\s*((\d|\.)+)\s*}
252 set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
253 if {!$ok} { Expected id }
254
255 Extract $v -> value
256 SkipOver match
257 return
258 }
259
260 proc OptionalNumber {_ v} {
261 upvar 1 $v value
262 ::variable mydata
263 ::variable mypos
264
265 set pattern {\A\s*((\d|\.)+)\s*}
266 set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
267 if {!$ok} { return 0 }
268
269 Extract $v -> value
270 SkipOver match
271 return 1
272 }
273
274 proc RequiredString {_ v} {
275 upvar 1 $v value
276 ::variable mydata
277 ::variable mypos
278
279 set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
280 if {!$ok} { Expected string }
281
282 Extract $v -> value
283 set value [string map {@@ @} $value]
284 SkipOver match
285 return
286 }
287
288 proc RequiredStringRange {_ v} {
289 upvar 1 $v value
290 ::variable mydata
291 ::variable mypos
292
293 set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match value]
294 if {!$ok} { Expected string }
295
296 SkipOver match
297 return
298 }
299
300 proc OptionalString {_ v} {
301 upvar 1 $v value
302 ::variable mydata
303 ::variable mypos
304
305 set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
306 if {!$ok} { return 0 }
307
308 Extract $v -> value
309 set value [string map {@@ @} $value]
310 SkipOver match
311 return 1
312 }
313
314 proc Ident {_ v} {
315 upvar 1 $v value
316 ::variable mydata
317 ::variable mypos
318
319 set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata]
320 if {$ok} { return 0 }
321
322 set ok [regexp -start $mypos -indices -- {\A\s*([^:]*)\s*:\s*} $mydata match v]
323 if {!$ok} { return 0 }
324
325 Extract $v -> value
326 SkipOver match
327 return 1
328 }
329
330 proc Anything {_ v} {
331 upvar 1 $v value
332 ::variable mydata
333 ::variable mypos
334
335 regexp -start $mypos -indices -- {\A\s*([^;]*)\s*} $mydata match v
336
337 Extract $v -> value
338 SkipOver match
339 return
340 }
341
342 # # ## ### ##### ######## #############
343 ## Internal methods, input handling
344
345 proc Extract {range _ v} {
346 upvar 1 $v value
347 ::variable mydata
348 struct::list assign $range s e
349 set value [string range $mydata $s $e]
350 return
351 }
352
353 proc SkipOver {mv} {
354 # Note: The indices are absolute!, not relative to the start
355 # location.
356 upvar 1 $mv match
357 ::variable mypos
358 ::variable mysize
359
360 struct::list assign $match s e
361 #puts "<$s $e> [info level -1]"
362
363 set mypos $e
364 incr mypos
365
366 log progress 2 rcs $mypos $mysize
367 #puts $mypos/$mysize
368 return
369 }
370
371 proc Expected {x} {
372 ::variable mydata
373 ::variable mypos
374 set e $mypos ; incr e 30
375 return -code error "Expected $x @ '[string range $mydata $mypos $e]...'"
376 }
377
378 # # ## ### ##### ######## #############
379 ## Setup, callbacks.
380
381 proc Initialize {path sink} {
382 ::variable mypos 0
383 ::variable mydata [fileutil::cat -encoding binary $path]
384 ::variable mysize [file size $path]
385 ::variable mysink $sink
386 return
387 }
388
389 proc Call {args} {
390 ::variable mysink
391 set cmd $mysink
392 foreach a $args { lappend cmd $a }
393 eval $cmd
394 return
395 }
396
397 # # ## ### ##### ######## #############
398 ## Configuration
399
400 typevariable mydata {} ; # Rcs archive contents to process
401 typevariable mysize 0 ; # Length of contents
402 typevariable mysink {} ; # Sink to report to
403
404 pragma -hasinstances no ; # singleton
405 pragma -hastypeinfo no ; # no introspection
406 pragma -hastypedestroy no ; # immortal
407
408 # # ## ### ##### ######## #############
409 }
410
411 namespace eval ::vc::rcs {
412 namespace export parser
413 namespace eval parser {
414 namespace import ::vc::tools::log
415 log register rcs
416 }
417 }
418
419 # # ## ### ##### ######## ############# #####################
420 ## Ready
421
422 package provide vc::rcs::parser 1.0
423 return
424

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button