##// END OF EJS Templates
convert: fix builtin cvsps under Windows...
Patrick Mezard -
r7097:d4218edd default
parent child Browse files
Show More
@@ -1,548 +1,587 b''
1 1 #
2 2 # Mercurial built-in replacement for cvsps.
3 3 #
4 4 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
5 5 #
6 6 # This software may be used and distributed according to the terms
7 7 # of the GNU General Public License, incorporated herein by reference.
8 8
9 9 import os
10 10 import re
11 11 import sys
12 12 import cPickle as pickle
13 13 from mercurial import util
14 14 from mercurial.i18n import _
15 15
16 16 def listsort(list, key):
17 17 "helper to sort by key in Python 2.3"
18 18 try:
19 19 list.sort(key=key)
20 20 except TypeError:
21 21 list.sort(lambda l, r: cmp(key(l), key(r)))
22 22
23 23 class logentry(object):
24 24 '''Class logentry has the following attributes:
25 25 .author - author name as CVS knows it
26 26 .branch - name of branch this revision is on
27 27 .branches - revision tuple of branches starting at this revision
28 28 .comment - commit message
29 29 .date - the commit date as a (time, tz) tuple
30 30 .dead - true if file revision is dead
31 31 .file - Name of file
32 32 .lines - a tuple (+lines, -lines) or None
33 33 .parent - Previous revision of this entry
34 34 .rcs - name of file as returned from CVS
35 35 .revision - revision number as tuple
36 36 .tags - list of tags on the file
37 37 '''
38 38 def __init__(self, **entries):
39 39 self.__dict__.update(entries)
40 40
41 41 class logerror(Exception):
42 42 pass
43 43
44 def getrepopath(cvspath):
45 """Return the repository path from a CVS path.
46
47 >>> getrepopath('/foo/bar')
48 '/foo/bar'
49 >>> getrepopath('c:/foo/bar')
50 'c:/foo/bar'
51 >>> getrepopath(':pserver:10/foo/bar')
52 '/foo/bar'
53 >>> getrepopath(':pserver:10c:/foo/bar')
54 '/foo/bar'
55 >>> getrepopath(':pserver:/foo/bar')
56 '/foo/bar'
57 >>> getrepopath(':pserver:c:/foo/bar')
58 'c:/foo/bar'
59 >>> getrepopath(':pserver:truc@foo.bar:/foo/bar')
60 '/foo/bar'
61 >>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')
62 'c:/foo/bar'
63 """
64 # According to CVS manual, CVS paths are expressed like:
65 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
66 #
67 # Unfortunately, Windows absolute paths start with a drive letter
68 # like 'c:' making it harder to parse. Here we assume that drive
69 # letters are only one character long and any CVS component before
70 # the repository path is at least 2 characters long, and use this
71 # to disambiguate.
72 parts = cvspath.split(':')
73 if len(parts) == 1:
74 return parts[0]
75 # Here there is an ambiguous case if we have a port number
76 # immediately followed by a Windows driver letter. We assume this
77 # never happens and decide it must be CVS path component,
78 # therefore ignoring it.
79 if len(parts[-2]) > 1:
80 return parts[-1].lstrip('0123456789')
81 return parts[-2] + ':' + parts[-1]
82
44 83 def createlog(ui, directory=None, root="", rlog=True, cache=None):
45 84 '''Collect the CVS rlog'''
46 85
47 86 # Because we store many duplicate commit log messages, reusing strings
48 87 # saves a lot of memory and pickle storage space.
49 88 _scache = {}
50 89 def scache(s):
51 90 "return a shared version of a string"
52 91 return _scache.setdefault(s, s)
53 92
54 93 ui.status(_('collecting CVS rlog\n'))
55 94
56 95 log = [] # list of logentry objects containing the CVS state
57 96
58 97 # patterns to match in CVS (r)log output, by state of use
59 98 re_00 = re.compile('RCS file: (.+)$')
60 99 re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
61 100 re_02 = re.compile('cvs (r?log|server): (.+)\n$')
62 101 re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")
63 102 re_10 = re.compile('Working file: (.+)$')
64 103 re_20 = re.compile('symbolic names:')
65 104 re_30 = re.compile('\t(.+): ([\\d.]+)$')
66 105 re_31 = re.compile('----------------------------$')
67 106 re_32 = re.compile('=============================================================================$')
68 107 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
69 108 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
70 109 re_70 = re.compile('branches: (.+);$')
71 110
72 111 prefix = '' # leading path to strip of what we get from CVS
73 112
74 113 if directory is None:
75 114 # Current working directory
76 115
77 116 # Get the real directory in the repository
78 117 try:
79 118 prefix = file(os.path.join('CVS','Repository')).read().strip()
80 119 if prefix == ".":
81 120 prefix = ""
82 121 directory = prefix
83 122 except IOError:
84 123 raise logerror('Not a CVS sandbox')
85 124
86 if prefix and not prefix.endswith('/'):
87 prefix += '/'
125 if prefix and not prefix.endswith(os.sep):
126 prefix += os.sep
88 127
89 128 # Use the Root file in the sandbox, if it exists
90 129 try:
91 130 root = file(os.path.join('CVS','Root')).read().strip()
92 131 except IOError:
93 132 pass
94 133
95 134 if not root:
96 135 root = os.environ.get('CVSROOT', '')
97 136
98 137 # read log cache if one exists
99 138 oldlog = []
100 139 date = None
101 140
102 141 if cache:
103 142 cachedir = os.path.expanduser('~/.hg.cvsps')
104 143 if not os.path.exists(cachedir):
105 144 os.mkdir(cachedir)
106 145
107 146 # The cvsps cache pickle needs a uniquified name, based on the
108 147 # repository location. The address may have all sort of nasties
109 148 # in it, slashes, colons and such. So here we take just the
110 149 # alphanumerics, concatenated in a way that does not mix up the
111 150 # various components, so that
112 151 # :pserver:user@server:/path
113 152 # and
114 153 # /pserver/user/server/path
115 154 # are mapped to different cache file names.
116 155 cachefile = root.split(":") + [directory, "cache"]
117 156 cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
118 157 cachefile = os.path.join(cachedir,
119 158 '.'.join([s for s in cachefile if s]))
120 159
121 160 if cache == 'update':
122 161 try:
123 162 ui.note(_('reading cvs log cache %s\n') % cachefile)
124 163 oldlog = pickle.load(file(cachefile))
125 164 ui.note(_('cache has %d log entries\n') % len(oldlog))
126 165 except Exception, e:
127 166 ui.note(_('error reading cache: %r\n') % e)
128 167
129 168 if oldlog:
130 169 date = oldlog[-1].date # last commit date as a (time,tz) tuple
131 170 date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
132 171
133 172 # build the CVS commandline
134 173 cmd = ['cvs', '-q']
135 174 if root:
136 175 cmd.append('-d%s' % root)
137 p = root.split(':')[-1]
176 p = util.normpath(getrepopath(root))
138 177 if not p.endswith('/'):
139 178 p += '/'
140 prefix = p + prefix
179 prefix = p + util.normpath(prefix)
141 180 cmd.append(['log', 'rlog'][rlog])
142 181 if date:
143 182 # no space between option and date string
144 183 cmd.append('-d>%s' % date)
145 184 cmd.append(directory)
146 185
147 186 # state machine begins here
148 187 tags = {} # dictionary of revisions on current file with their tags
149 188 state = 0
150 189 store = False # set when a new record can be appended
151 190
152 191 cmd = [util.shellquote(arg) for arg in cmd]
153 192 ui.note(_("running %s\n") % (' '.join(cmd)))
154 193 ui.debug(_("prefix=%r directory=%r root=%r\n") % (prefix, directory, root))
155 194
156 195 for line in util.popen(' '.join(cmd)):
157 196 if line.endswith('\n'):
158 197 line = line[:-1]
159 198 #ui.debug('state=%d line=%r\n' % (state, line))
160 199
161 200 if state == 0:
162 201 # initial state, consume input until we see 'RCS file'
163 202 match = re_00.match(line)
164 203 if match:
165 204 rcs = match.group(1)
166 205 tags = {}
167 206 if rlog:
168 filename = rcs[:-2]
207 filename = util.normpath(rcs[:-2])
169 208 if filename.startswith(prefix):
170 209 filename = filename[len(prefix):]
171 210 if filename.startswith('/'):
172 211 filename = filename[1:]
173 212 if filename.startswith('Attic/'):
174 213 filename = filename[6:]
175 214 else:
176 215 filename = filename.replace('/Attic/', '/')
177 216 state = 2
178 217 continue
179 218 state = 1
180 219 continue
181 220 match = re_01.match(line)
182 221 if match:
183 222 raise Exception(match.group(1))
184 223 match = re_02.match(line)
185 224 if match:
186 225 raise Exception(match.group(2))
187 226 if re_03.match(line):
188 227 raise Exception(line)
189 228
190 229 elif state == 1:
191 230 # expect 'Working file' (only when using log instead of rlog)
192 231 match = re_10.match(line)
193 232 assert match, _('RCS file must be followed by working file')
194 filename = match.group(1)
233 filename = util.normpath(match.group(1))
195 234 state = 2
196 235
197 236 elif state == 2:
198 237 # expect 'symbolic names'
199 238 if re_20.match(line):
200 239 state = 3
201 240
202 241 elif state == 3:
203 242 # read the symbolic names and store as tags
204 243 match = re_30.match(line)
205 244 if match:
206 245 rev = [int(x) for x in match.group(2).split('.')]
207 246
208 247 # Convert magic branch number to an odd-numbered one
209 248 revn = len(rev)
210 249 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
211 250 rev = rev[:-2] + rev[-1:]
212 251 rev = tuple(rev)
213 252
214 253 if rev not in tags:
215 254 tags[rev] = []
216 255 tags[rev].append(match.group(1))
217 256
218 257 elif re_31.match(line):
219 258 state = 5
220 259 elif re_32.match(line):
221 260 state = 0
222 261
223 262 elif state == 4:
224 263 # expecting '------' separator before first revision
225 264 if re_31.match(line):
226 265 state = 5
227 266 else:
228 267 assert not re_32.match(line), _('Must have at least some revisions')
229 268
230 269 elif state == 5:
231 270 # expecting revision number and possibly (ignored) lock indication
232 271 # we create the logentry here from values stored in states 0 to 4,
233 272 # as this state is re-entered for subsequent revisions of a file.
234 273 match = re_50.match(line)
235 274 assert match, _('expected revision number')
236 275 e = logentry(rcs=scache(rcs), file=scache(filename),
237 276 revision=tuple([int(x) for x in match.group(1).split('.')]),
238 277 branches=[], parent=None)
239 278 state = 6
240 279
241 280 elif state == 6:
242 281 # expecting date, author, state, lines changed
243 282 match = re_60.match(line)
244 283 assert match, _('revision must be followed by date line')
245 284 d = match.group(1)
246 285 if d[2] == '/':
247 286 # Y2K
248 287 d = '19' + d
249 288
250 289 if len(d.split()) != 3:
251 290 # cvs log dates always in GMT
252 291 d = d + ' UTC'
253 292 e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])
254 293 e.author = scache(match.group(2))
255 294 e.dead = match.group(3).lower() == 'dead'
256 295
257 296 if match.group(5):
258 297 if match.group(6):
259 298 e.lines = (int(match.group(5)), int(match.group(6)))
260 299 else:
261 300 e.lines = (int(match.group(5)), 0)
262 301 elif match.group(6):
263 302 e.lines = (0, int(match.group(6)))
264 303 else:
265 304 e.lines = None
266 305 e.comment = []
267 306 state = 7
268 307
269 308 elif state == 7:
270 309 # read the revision numbers of branches that start at this revision
271 310 # or store the commit log message otherwise
272 311 m = re_70.match(line)
273 312 if m:
274 313 e.branches = [tuple([int(y) for y in x.strip().split('.')])
275 314 for x in m.group(1).split(';')]
276 315 state = 8
277 316 elif re_31.match(line):
278 317 state = 5
279 318 store = True
280 319 elif re_32.match(line):
281 320 state = 0
282 321 store = True
283 322 else:
284 323 e.comment.append(line)
285 324
286 325 elif state == 8:
287 326 # store commit log message
288 327 if re_31.match(line):
289 328 state = 5
290 329 store = True
291 330 elif re_32.match(line):
292 331 state = 0
293 332 store = True
294 333 else:
295 334 e.comment.append(line)
296 335
297 336 if store:
298 337 # clean up the results and save in the log.
299 338 store = False
300 339 e.tags = util.sort([scache(x) for x in tags.get(e.revision, [])])
301 340 e.comment = scache('\n'.join(e.comment))
302 341
303 342 revn = len(e.revision)
304 343 if revn > 3 and (revn % 2) == 0:
305 344 e.branch = tags.get(e.revision[:-1], [None])[0]
306 345 else:
307 346 e.branch = None
308 347
309 348 log.append(e)
310 349
311 350 if len(log) % 100 == 0:
312 351 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
313 352
314 353 listsort(log, key=lambda x:(x.rcs, x.revision))
315 354
316 355 # find parent revisions of individual files
317 356 versions = {}
318 357 for e in log:
319 358 branch = e.revision[:-1]
320 359 p = versions.get((e.rcs, branch), None)
321 360 if p is None:
322 361 p = e.revision[:-2]
323 362 e.parent = p
324 363 versions[(e.rcs, branch)] = e.revision
325 364
326 365 # update the log cache
327 366 if cache:
328 367 if log:
329 368 # join up the old and new logs
330 369 listsort(log, key=lambda x:x.date)
331 370
332 371 if oldlog and oldlog[-1].date >= log[0].date:
333 372 raise logerror('Log cache overlaps with new log entries,'
334 373 ' re-run without cache.')
335 374
336 375 log = oldlog + log
337 376
338 377 # write the new cachefile
339 378 ui.note(_('writing cvs log cache %s\n') % cachefile)
340 379 pickle.dump(log, file(cachefile, 'w'))
341 380 else:
342 381 log = oldlog
343 382
344 383 ui.status(_('%d log entries\n') % len(log))
345 384
346 385 return log
347 386
348 387
349 388 class changeset(object):
350 389 '''Class changeset has the following attributes:
351 390 .author - author name as CVS knows it
352 391 .branch - name of branch this changeset is on, or None
353 392 .comment - commit message
354 393 .date - the commit date as a (time,tz) tuple
355 394 .entries - list of logentry objects in this changeset
356 395 .parents - list of one or two parent changesets
357 396 .tags - list of tags on this changeset
358 397 '''
359 398 def __init__(self, **entries):
360 399 self.__dict__.update(entries)
361 400
362 401 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
363 402 '''Convert log into changesets.'''
364 403
365 404 ui.status(_('creating changesets\n'))
366 405
367 406 # Merge changesets
368 407
369 408 listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))
370 409
371 410 changesets = []
372 411 files = {}
373 412 c = None
374 413 for i, e in enumerate(log):
375 414
376 415 # Check if log entry belongs to the current changeset or not.
377 416 if not (c and
378 417 e.comment == c.comment and
379 418 e.author == c.author and
380 419 e.branch == c.branch and
381 420 ((c.date[0] + c.date[1]) <=
382 421 (e.date[0] + e.date[1]) <=
383 422 (c.date[0] + c.date[1]) + fuzz) and
384 423 e.file not in files):
385 424 c = changeset(comment=e.comment, author=e.author,
386 425 branch=e.branch, date=e.date, entries=[])
387 426 changesets.append(c)
388 427 files = {}
389 428 if len(changesets) % 100 == 0:
390 429 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
391 430 ui.status(util.ellipsis(t, 80) + '\n')
392 431
393 432 c.entries.append(e)
394 433 files[e.file] = True
395 434 c.date = e.date # changeset date is date of latest commit in it
396 435
397 436 # Sort files in each changeset
398 437
399 438 for c in changesets:
400 439 def pathcompare(l, r):
401 440 'Mimic cvsps sorting order'
402 441 l = l.split('/')
403 442 r = r.split('/')
404 443 nl = len(l)
405 444 nr = len(r)
406 445 n = min(nl, nr)
407 446 for i in range(n):
408 447 if i + 1 == nl and nl < nr:
409 448 return -1
410 449 elif i + 1 == nr and nl > nr:
411 450 return +1
412 451 elif l[i] < r[i]:
413 452 return -1
414 453 elif l[i] > r[i]:
415 454 return +1
416 455 return 0
417 456 def entitycompare(l, r):
418 457 return pathcompare(l.file, r.file)
419 458
420 459 c.entries.sort(entitycompare)
421 460
422 461 # Sort changesets by date
423 462
424 463 def cscmp(l, r):
425 464 d = sum(l.date) - sum(r.date)
426 465 if d:
427 466 return d
428 467
429 468 # detect vendor branches and initial commits on a branch
430 469 le = {}
431 470 for e in l.entries:
432 471 le[e.rcs] = e.revision
433 472 re = {}
434 473 for e in r.entries:
435 474 re[e.rcs] = e.revision
436 475
437 476 d = 0
438 477 for e in l.entries:
439 478 if re.get(e.rcs, None) == e.parent:
440 479 assert not d
441 480 d = 1
442 481 break
443 482
444 483 for e in r.entries:
445 484 if le.get(e.rcs, None) == e.parent:
446 485 assert not d
447 486 d = -1
448 487 break
449 488
450 489 return d
451 490
452 491 changesets.sort(cscmp)
453 492
454 493 # Collect tags
455 494
456 495 globaltags = {}
457 496 for c in changesets:
458 497 tags = {}
459 498 for e in c.entries:
460 499 for tag in e.tags:
461 500 # remember which is the latest changeset to have this tag
462 501 globaltags[tag] = c
463 502
464 503 for c in changesets:
465 504 tags = {}
466 505 for e in c.entries:
467 506 for tag in e.tags:
468 507 tags[tag] = True
469 508 # remember tags only if this is the latest changeset to have it
470 509 c.tags = util.sort([tag for tag in tags if globaltags[tag] is c])
471 510
472 511 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
473 512 # by inserting dummy changesets with two parents, and handle
474 513 # {{mergefrombranch BRANCHNAME}} by setting two parents.
475 514
476 515 if mergeto is None:
477 516 mergeto = r'{{mergetobranch ([-\w]+)}}'
478 517 if mergeto:
479 518 mergeto = re.compile(mergeto)
480 519
481 520 if mergefrom is None:
482 521 mergefrom = r'{{mergefrombranch ([-\w]+)}}'
483 522 if mergefrom:
484 523 mergefrom = re.compile(mergefrom)
485 524
486 525 versions = {} # changeset index where we saw any particular file version
487 526 branches = {} # changeset index where we saw a branch
488 527 n = len(changesets)
489 528 i = 0
490 529 while i<n:
491 530 c = changesets[i]
492 531
493 532 for f in c.entries:
494 533 versions[(f.rcs, f.revision)] = i
495 534
496 535 p = None
497 536 if c.branch in branches:
498 537 p = branches[c.branch]
499 538 else:
500 539 for f in c.entries:
501 540 p = max(p, versions.get((f.rcs, f.parent), None))
502 541
503 542 c.parents = []
504 543 if p is not None:
505 544 c.parents.append(changesets[p])
506 545
507 546 if mergefrom:
508 547 m = mergefrom.search(c.comment)
509 548 if m:
510 549 m = m.group(1)
511 550 if m == 'HEAD':
512 551 m = None
513 552 if m in branches and c.branch != m:
514 553 c.parents.append(changesets[branches[m]])
515 554
516 555 if mergeto:
517 556 m = mergeto.search(c.comment)
518 557 if m:
519 558 try:
520 559 m = m.group(1)
521 560 if m == 'HEAD':
522 561 m = None
523 562 except:
524 563 m = None # if no group found then merge to HEAD
525 564 if m in branches and c.branch != m:
526 565 # insert empty changeset for merge
527 566 cc = changeset(author=c.author, branch=m, date=c.date,
528 567 comment='convert-repo: CVS merge from branch %s' % c.branch,
529 568 entries=[], tags=[], parents=[changesets[branches[m]], c])
530 569 changesets.insert(i + 1, cc)
531 570 branches[m] = i + 1
532 571
533 572 # adjust our loop counters now we have inserted a new entry
534 573 n += 1
535 574 i += 2
536 575 continue
537 576
538 577 branches[c.branch] = i
539 578 i += 1
540 579
541 580 # Number changesets
542 581
543 582 for i, c in enumerate(changesets):
544 583 c.id = i + 1
545 584
546 585 ui.status(_('%d changeset entries\n') % len(changesets))
547 586
548 587 return changesets
@@ -1,16 +1,19 b''
1 1 # this is hack to make sure no escape characters are inserted into the output
2 2 import os;
3 3 if 'TERM' in os.environ:
4 4 del os.environ['TERM']
5 5 import doctest
6 6
7 7 import mercurial.changelog
8 8 # test doctest from changelog
9 9
10 10 doctest.testmod(mercurial.changelog)
11 11
12 12 import mercurial.httprepo
13 13 doctest.testmod(mercurial.httprepo)
14 14
15 15 import mercurial.util
16 16 doctest.testmod(mercurial.util)
17
18 import hgext.convert.cvsps
19 doctest.testmod(hgext.convert.cvsps)
General Comments 0
You need to be logged in to leave comments. Login now