##// END OF EJS Templates
cvsps: switch a file open to a with statement...
Augie Fackler -
r43322:3b8a4587 default
parent child Browse files
Show More
@@ -1,965 +1,966 b''
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial import (
15 15 encoding,
16 16 error,
17 17 hook,
18 18 pycompat,
19 19 util,
20 20 )
21 21 from mercurial.utils import (
22 22 dateutil,
23 23 procutil,
24 24 stringutil,
25 25 )
26 26
27 27 pickle = util.pickle
28 28
29 29 class logentry(object):
30 30 '''Class logentry has the following attributes:
31 31 .author - author name as CVS knows it
32 32 .branch - name of branch this revision is on
33 33 .branches - revision tuple of branches starting at this revision
34 34 .comment - commit message
35 35 .commitid - CVS commitid or None
36 36 .date - the commit date as a (time, tz) tuple
37 37 .dead - true if file revision is dead
38 38 .file - Name of file
39 39 .lines - a tuple (+lines, -lines) or None
40 40 .parent - Previous revision of this entry
41 41 .rcs - name of file as returned from CVS
42 42 .revision - revision number as tuple
43 43 .tags - list of tags on the file
44 44 .synthetic - is this a synthetic "file ... added on ..." revision?
45 45 .mergepoint - the branch that has been merged from (if present in
46 46 rlog output) or None
47 47 .branchpoints - the branches that start at the current entry or empty
48 48 '''
49 49 def __init__(self, **entries):
50 50 self.synthetic = False
51 51 self.__dict__.update(entries)
52 52
53 53 def __repr__(self):
54 54 items = (r"%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
55 55 return r"%s(%s)"%(type(self).__name__, r", ".join(items))
56 56
57 57 class logerror(Exception):
58 58 pass
59 59
60 60 def getrepopath(cvspath):
61 61 """Return the repository path from a CVS path.
62 62
63 63 >>> getrepopath(b'/foo/bar')
64 64 '/foo/bar'
65 65 >>> getrepopath(b'c:/foo/bar')
66 66 '/foo/bar'
67 67 >>> getrepopath(b':pserver:10/foo/bar')
68 68 '/foo/bar'
69 69 >>> getrepopath(b':pserver:10c:/foo/bar')
70 70 '/foo/bar'
71 71 >>> getrepopath(b':pserver:/foo/bar')
72 72 '/foo/bar'
73 73 >>> getrepopath(b':pserver:c:/foo/bar')
74 74 '/foo/bar'
75 75 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
76 76 '/foo/bar'
77 77 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
78 78 '/foo/bar'
79 79 >>> getrepopath(b'user@server/path/to/repository')
80 80 '/path/to/repository'
81 81 """
82 82 # According to CVS manual, CVS paths are expressed like:
83 83 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
84 84 #
85 85 # CVSpath is splitted into parts and then position of the first occurrence
86 86 # of the '/' char after the '@' is located. The solution is the rest of the
87 87 # string after that '/' sign including it
88 88
89 89 parts = cvspath.split(':')
90 90 atposition = parts[-1].find('@')
91 91 start = 0
92 92
93 93 if atposition != -1:
94 94 start = atposition
95 95
96 96 repopath = parts[-1][parts[-1].find('/', start):]
97 97 return repopath
98 98
99 99 def createlog(ui, directory=None, root="", rlog=True, cache=None):
100 100 '''Collect the CVS rlog'''
101 101
102 102 # Because we store many duplicate commit log messages, reusing strings
103 103 # saves a lot of memory and pickle storage space.
104 104 _scache = {}
105 105 def scache(s):
106 106 "return a shared version of a string"
107 107 return _scache.setdefault(s, s)
108 108
109 109 ui.status(_('collecting CVS rlog\n'))
110 110
111 111 log = [] # list of logentry objects containing the CVS state
112 112
113 113 # patterns to match in CVS (r)log output, by state of use
114 114 re_00 = re.compile(b'RCS file: (.+)$')
115 115 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
116 116 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
117 117 re_03 = re.compile(b"(Cannot access.+CVSROOT)|"
118 118 b"(can't create temporary directory.+)$")
119 119 re_10 = re.compile(b'Working file: (.+)$')
120 120 re_20 = re.compile(b'symbolic names:')
121 121 re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
122 122 re_31 = re.compile(b'----------------------------$')
123 123 re_32 = re.compile(b'======================================='
124 124 b'======================================$')
125 125 re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
126 126 re_60 = re.compile(br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
127 127 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
128 128 br'(\s+commitid:\s+([^;]+);)?'
129 129 br'(.*mergepoint:\s+([^;]+);)?')
130 130 re_70 = re.compile(b'branches: (.+);$')
131 131
132 132 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
133 133
134 134 prefix = '' # leading path to strip of what we get from CVS
135 135
136 136 if directory is None:
137 137 # Current working directory
138 138
139 139 # Get the real directory in the repository
140 140 try:
141 prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
141 with open(os.path.join(b'CVS', b'Repository'), 'rb') as f:
142 prefix = f.read().strip()
142 143 directory = prefix
143 144 if prefix == ".":
144 145 prefix = ""
145 146 except IOError:
146 147 raise logerror(_('not a CVS sandbox'))
147 148
148 149 if prefix and not prefix.endswith(pycompat.ossep):
149 150 prefix += pycompat.ossep
150 151
151 152 # Use the Root file in the sandbox, if it exists
152 153 try:
153 154 root = open(os.path.join('CVS','Root'), 'rb').read().strip()
154 155 except IOError:
155 156 pass
156 157
157 158 if not root:
158 159 root = encoding.environ.get('CVSROOT', '')
159 160
160 161 # read log cache if one exists
161 162 oldlog = []
162 163 date = None
163 164
164 165 if cache:
165 166 cachedir = os.path.expanduser('~/.hg.cvsps')
166 167 if not os.path.exists(cachedir):
167 168 os.mkdir(cachedir)
168 169
169 170 # The cvsps cache pickle needs a uniquified name, based on the
170 171 # repository location. The address may have all sort of nasties
171 172 # in it, slashes, colons and such. So here we take just the
172 173 # alphanumeric characters, concatenated in a way that does not
173 174 # mix up the various components, so that
174 175 # :pserver:user@server:/path
175 176 # and
176 177 # /pserver/user/server/path
177 178 # are mapped to different cache file names.
178 179 cachefile = root.split(":") + [directory, "cache"]
179 180 cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
180 181 cachefile = os.path.join(cachedir,
181 182 '.'.join([s for s in cachefile if s]))
182 183
183 184 if cache == 'update':
184 185 try:
185 186 ui.note(_('reading cvs log cache %s\n') % cachefile)
186 187 oldlog = pickle.load(open(cachefile, 'rb'))
187 188 for e in oldlog:
188 189 if not (util.safehasattr(e, 'branchpoints') and
189 190 util.safehasattr(e, 'commitid') and
190 191 util.safehasattr(e, 'mergepoint')):
191 192 ui.status(_('ignoring old cache\n'))
192 193 oldlog = []
193 194 break
194 195
195 196 ui.note(_('cache has %d log entries\n') % len(oldlog))
196 197 except Exception as e:
197 198 ui.note(_('error reading cache: %r\n') % e)
198 199
199 200 if oldlog:
200 201 date = oldlog[-1].date # last commit date as a (time,tz) tuple
201 202 date = dateutil.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
202 203
203 204 # build the CVS commandline
204 205 cmd = ['cvs', '-q']
205 206 if root:
206 207 cmd.append('-d%s' % root)
207 208 p = util.normpath(getrepopath(root))
208 209 if not p.endswith('/'):
209 210 p += '/'
210 211 if prefix:
211 212 # looks like normpath replaces "" by "."
212 213 prefix = p + util.normpath(prefix)
213 214 else:
214 215 prefix = p
215 216 cmd.append(['log', 'rlog'][rlog])
216 217 if date:
217 218 # no space between option and date string
218 219 cmd.append('-d>%s' % date)
219 220 cmd.append(directory)
220 221
221 222 # state machine begins here
222 223 tags = {} # dictionary of revisions on current file with their tags
223 224 branchmap = {} # mapping between branch names and revision numbers
224 225 rcsmap = {}
225 226 state = 0
226 227 store = False # set when a new record can be appended
227 228
228 229 cmd = [procutil.shellquote(arg) for arg in cmd]
229 230 ui.note(_("running %s\n") % (' '.join(cmd)))
230 231 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
231 232
232 233 pfp = procutil.popen(' '.join(cmd), 'rb')
233 234 peek = util.fromnativeeol(pfp.readline())
234 235 while True:
235 236 line = peek
236 237 if line == '':
237 238 break
238 239 peek = util.fromnativeeol(pfp.readline())
239 240 if line.endswith('\n'):
240 241 line = line[:-1]
241 242 #ui.debug('state=%d line=%r\n' % (state, line))
242 243
243 244 if state == 0:
244 245 # initial state, consume input until we see 'RCS file'
245 246 match = re_00.match(line)
246 247 if match:
247 248 rcs = match.group(1)
248 249 tags = {}
249 250 if rlog:
250 251 filename = util.normpath(rcs[:-2])
251 252 if filename.startswith(prefix):
252 253 filename = filename[len(prefix):]
253 254 if filename.startswith('/'):
254 255 filename = filename[1:]
255 256 if filename.startswith('Attic/'):
256 257 filename = filename[6:]
257 258 else:
258 259 filename = filename.replace('/Attic/', '/')
259 260 state = 2
260 261 continue
261 262 state = 1
262 263 continue
263 264 match = re_01.match(line)
264 265 if match:
265 266 raise logerror(match.group(1))
266 267 match = re_02.match(line)
267 268 if match:
268 269 raise logerror(match.group(2))
269 270 if re_03.match(line):
270 271 raise logerror(line)
271 272
272 273 elif state == 1:
273 274 # expect 'Working file' (only when using log instead of rlog)
274 275 match = re_10.match(line)
275 276 assert match, _('RCS file must be followed by working file')
276 277 filename = util.normpath(match.group(1))
277 278 state = 2
278 279
279 280 elif state == 2:
280 281 # expect 'symbolic names'
281 282 if re_20.match(line):
282 283 branchmap = {}
283 284 state = 3
284 285
285 286 elif state == 3:
286 287 # read the symbolic names and store as tags
287 288 match = re_30.match(line)
288 289 if match:
289 290 rev = [int(x) for x in match.group(2).split('.')]
290 291
291 292 # Convert magic branch number to an odd-numbered one
292 293 revn = len(rev)
293 294 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
294 295 rev = rev[:-2] + rev[-1:]
295 296 rev = tuple(rev)
296 297
297 298 if rev not in tags:
298 299 tags[rev] = []
299 300 tags[rev].append(match.group(1))
300 301 branchmap[match.group(1)] = match.group(2)
301 302
302 303 elif re_31.match(line):
303 304 state = 5
304 305 elif re_32.match(line):
305 306 state = 0
306 307
307 308 elif state == 4:
308 309 # expecting '------' separator before first revision
309 310 if re_31.match(line):
310 311 state = 5
311 312 else:
312 313 assert not re_32.match(line), _('must have at least '
313 314 'some revisions')
314 315
315 316 elif state == 5:
316 317 # expecting revision number and possibly (ignored) lock indication
317 318 # we create the logentry here from values stored in states 0 to 4,
318 319 # as this state is re-entered for subsequent revisions of a file.
319 320 match = re_50.match(line)
320 321 assert match, _('expected revision number')
321 322 e = logentry(rcs=scache(rcs),
322 323 file=scache(filename),
323 324 revision=tuple([int(x) for x in
324 325 match.group(1).split('.')]),
325 326 branches=[],
326 327 parent=None,
327 328 commitid=None,
328 329 mergepoint=None,
329 330 branchpoints=set())
330 331
331 332 state = 6
332 333
333 334 elif state == 6:
334 335 # expecting date, author, state, lines changed
335 336 match = re_60.match(line)
336 337 assert match, _('revision must be followed by date line')
337 338 d = match.group(1)
338 339 if d[2] == '/':
339 340 # Y2K
340 341 d = '19' + d
341 342
342 343 if len(d.split()) != 3:
343 344 # cvs log dates always in GMT
344 345 d = d + ' UTC'
345 346 e.date = dateutil.parsedate(d, ['%y/%m/%d %H:%M:%S',
346 347 '%Y/%m/%d %H:%M:%S',
347 348 '%Y-%m-%d %H:%M:%S'])
348 349 e.author = scache(match.group(2))
349 350 e.dead = match.group(3).lower() == 'dead'
350 351
351 352 if match.group(5):
352 353 if match.group(6):
353 354 e.lines = (int(match.group(5)), int(match.group(6)))
354 355 else:
355 356 e.lines = (int(match.group(5)), 0)
356 357 elif match.group(6):
357 358 e.lines = (0, int(match.group(6)))
358 359 else:
359 360 e.lines = None
360 361
361 362 if match.group(7): # cvs 1.12 commitid
362 363 e.commitid = match.group(8)
363 364
364 365 if match.group(9): # cvsnt mergepoint
365 366 myrev = match.group(10).split('.')
366 367 if len(myrev) == 2: # head
367 368 e.mergepoint = 'HEAD'
368 369 else:
369 370 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
370 371 branches = [b for b in branchmap if branchmap[b] == myrev]
371 372 assert len(branches) == 1, ('unknown branch: %s'
372 373 % e.mergepoint)
373 374 e.mergepoint = branches[0]
374 375
375 376 e.comment = []
376 377 state = 7
377 378
378 379 elif state == 7:
379 380 # read the revision numbers of branches that start at this revision
380 381 # or store the commit log message otherwise
381 382 m = re_70.match(line)
382 383 if m:
383 384 e.branches = [tuple([int(y) for y in x.strip().split('.')])
384 385 for x in m.group(1).split(';')]
385 386 state = 8
386 387 elif re_31.match(line) and re_50.match(peek):
387 388 state = 5
388 389 store = True
389 390 elif re_32.match(line):
390 391 state = 0
391 392 store = True
392 393 else:
393 394 e.comment.append(line)
394 395
395 396 elif state == 8:
396 397 # store commit log message
397 398 if re_31.match(line):
398 399 cpeek = peek
399 400 if cpeek.endswith('\n'):
400 401 cpeek = cpeek[:-1]
401 402 if re_50.match(cpeek):
402 403 state = 5
403 404 store = True
404 405 else:
405 406 e.comment.append(line)
406 407 elif re_32.match(line):
407 408 state = 0
408 409 store = True
409 410 else:
410 411 e.comment.append(line)
411 412
412 413 # When a file is added on a branch B1, CVS creates a synthetic
413 414 # dead trunk revision 1.1 so that the branch has a root.
414 415 # Likewise, if you merge such a file to a later branch B2 (one
415 416 # that already existed when the file was added on B1), CVS
416 417 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
417 418 # these revisions now, but mark them synthetic so
418 419 # createchangeset() can take care of them.
419 420 if (store and
420 421 e.dead and
421 422 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
422 423 len(e.comment) == 1 and
423 424 file_added_re.match(e.comment[0])):
424 425 ui.debug('found synthetic revision in %s: %r\n'
425 426 % (e.rcs, e.comment[0]))
426 427 e.synthetic = True
427 428
428 429 if store:
429 430 # clean up the results and save in the log.
430 431 store = False
431 432 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
432 433 e.comment = scache('\n'.join(e.comment))
433 434
434 435 revn = len(e.revision)
435 436 if revn > 3 and (revn % 2) == 0:
436 437 e.branch = tags.get(e.revision[:-1], [None])[0]
437 438 else:
438 439 e.branch = None
439 440
440 441 # find the branches starting from this revision
441 442 branchpoints = set()
442 443 for branch, revision in branchmap.iteritems():
443 444 revparts = tuple([int(i) for i in revision.split('.')])
444 445 if len(revparts) < 2: # bad tags
445 446 continue
446 447 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
447 448 # normal branch
448 449 if revparts[:-2] == e.revision:
449 450 branchpoints.add(branch)
450 451 elif revparts == (1, 1, 1): # vendor branch
451 452 if revparts in e.branches:
452 453 branchpoints.add(branch)
453 454 e.branchpoints = branchpoints
454 455
455 456 log.append(e)
456 457
457 458 rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
458 459
459 460 if len(log) % 100 == 0:
460 461 ui.status(stringutil.ellipsis('%d %s' % (len(log), e.file), 80)
461 462 + '\n')
462 463
463 464 log.sort(key=lambda x: (x.rcs, x.revision))
464 465
465 466 # find parent revisions of individual files
466 467 versions = {}
467 468 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
468 469 rcs = e.rcs.replace('/Attic/', '/')
469 470 if rcs in rcsmap:
470 471 e.rcs = rcsmap[rcs]
471 472 branch = e.revision[:-1]
472 473 versions[(e.rcs, branch)] = e.revision
473 474
474 475 for e in log:
475 476 branch = e.revision[:-1]
476 477 p = versions.get((e.rcs, branch), None)
477 478 if p is None:
478 479 p = e.revision[:-2]
479 480 e.parent = p
480 481 versions[(e.rcs, branch)] = e.revision
481 482
482 483 # update the log cache
483 484 if cache:
484 485 if log:
485 486 # join up the old and new logs
486 487 log.sort(key=lambda x: x.date)
487 488
488 489 if oldlog and oldlog[-1].date >= log[0].date:
489 490 raise logerror(_('log cache overlaps with new log entries,'
490 491 ' re-run without cache.'))
491 492
492 493 log = oldlog + log
493 494
494 495 # write the new cachefile
495 496 ui.note(_('writing cvs log cache %s\n') % cachefile)
496 497 pickle.dump(log, open(cachefile, 'wb'))
497 498 else:
498 499 log = oldlog
499 500
500 501 ui.status(_('%d log entries\n') % len(log))
501 502
502 503 encodings = ui.configlist('convert', 'cvsps.logencoding')
503 504 if encodings:
504 505 def revstr(r):
505 506 # this is needed, because logentry.revision is a tuple of "int"
506 507 # (e.g. (1, 2) for "1.2")
507 508 return '.'.join(pycompat.maplist(pycompat.bytestr, r))
508 509
509 510 for entry in log:
510 511 comment = entry.comment
511 512 for e in encodings:
512 513 try:
513 514 entry.comment = comment.decode(
514 515 pycompat.sysstr(e)).encode('utf-8')
515 516 if ui.debugflag:
516 517 ui.debug("transcoding by %s: %s of %s\n" %
517 518 (e, revstr(entry.revision), entry.file))
518 519 break
519 520 except UnicodeDecodeError:
520 521 pass # try next encoding
521 522 except LookupError as inst: # unknown encoding, maybe
522 523 raise error.Abort(inst,
523 524 hint=_('check convert.cvsps.logencoding'
524 525 ' configuration'))
525 526 else:
526 527 raise error.Abort(_("no encoding can transcode"
527 528 " CVS log message for %s of %s")
528 529 % (revstr(entry.revision), entry.file),
529 530 hint=_('check convert.cvsps.logencoding'
530 531 ' configuration'))
531 532
532 533 hook.hook(ui, None, "cvslog", True, log=log)
533 534
534 535 return log
535 536
536 537
537 538 class changeset(object):
538 539 '''Class changeset has the following attributes:
539 540 .id - integer identifying this changeset (list index)
540 541 .author - author name as CVS knows it
541 542 .branch - name of branch this changeset is on, or None
542 543 .comment - commit message
543 544 .commitid - CVS commitid or None
544 545 .date - the commit date as a (time,tz) tuple
545 546 .entries - list of logentry objects in this changeset
546 547 .parents - list of one or two parent changesets
547 548 .tags - list of tags on this changeset
548 549 .synthetic - from synthetic revision "file ... added on branch ..."
549 550 .mergepoint- the branch that has been merged from or None
550 551 .branchpoints- the branches that start at the current entry or empty
551 552 '''
552 553 def __init__(self, **entries):
553 554 self.id = None
554 555 self.synthetic = False
555 556 self.__dict__.update(entries)
556 557
557 558 def __repr__(self):
558 559 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
559 560 return "%s(%s)"%(type(self).__name__, ", ".join(items))
560 561
561 562 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
562 563 '''Convert log into changesets.'''
563 564
564 565 ui.status(_('creating changesets\n'))
565 566
566 567 # try to order commitids by date
567 568 mindate = {}
568 569 for e in log:
569 570 if e.commitid:
570 571 if e.commitid not in mindate:
571 572 mindate[e.commitid] = e.date
572 573 else:
573 574 mindate[e.commitid] = min(e.date, mindate[e.commitid])
574 575
575 576 # Merge changesets
576 577 log.sort(key=lambda x: (mindate.get(x.commitid, (-1, 0)),
577 578 x.commitid or '', x.comment,
578 579 x.author, x.branch or '', x.date, x.branchpoints))
579 580
580 581 changesets = []
581 582 files = set()
582 583 c = None
583 584 for i, e in enumerate(log):
584 585
585 586 # Check if log entry belongs to the current changeset or not.
586 587
587 588 # Since CVS is file-centric, two different file revisions with
588 589 # different branchpoints should be treated as belonging to two
589 590 # different changesets (and the ordering is important and not
590 591 # honoured by cvsps at this point).
591 592 #
592 593 # Consider the following case:
593 594 # foo 1.1 branchpoints: [MYBRANCH]
594 595 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
595 596 #
596 597 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
597 598 # later version of foo may be in MYBRANCH2, so foo should be the
598 599 # first changeset and bar the next and MYBRANCH and MYBRANCH2
599 600 # should both start off of the bar changeset. No provisions are
600 601 # made to ensure that this is, in fact, what happens.
601 602 if not (c and e.branchpoints == c.branchpoints and
602 603 (# cvs commitids
603 604 (e.commitid is not None and e.commitid == c.commitid) or
604 605 (# no commitids, use fuzzy commit detection
605 606 (e.commitid is None or c.commitid is None) and
606 607 e.comment == c.comment and
607 608 e.author == c.author and
608 609 e.branch == c.branch and
609 610 ((c.date[0] + c.date[1]) <=
610 611 (e.date[0] + e.date[1]) <=
611 612 (c.date[0] + c.date[1]) + fuzz) and
612 613 e.file not in files))):
613 614 c = changeset(comment=e.comment, author=e.author,
614 615 branch=e.branch, date=e.date,
615 616 entries=[], mergepoint=e.mergepoint,
616 617 branchpoints=e.branchpoints, commitid=e.commitid)
617 618 changesets.append(c)
618 619
619 620 files = set()
620 621 if len(changesets) % 100 == 0:
621 622 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
622 623 ui.status(stringutil.ellipsis(t, 80) + '\n')
623 624
624 625 c.entries.append(e)
625 626 files.add(e.file)
626 627 c.date = e.date # changeset date is date of latest commit in it
627 628
628 629 # Mark synthetic changesets
629 630
630 631 for c in changesets:
631 632 # Synthetic revisions always get their own changeset, because
632 633 # the log message includes the filename. E.g. if you add file3
633 634 # and file4 on a branch, you get four log entries and three
634 635 # changesets:
635 636 # "File file3 was added on branch ..." (synthetic, 1 entry)
636 637 # "File file4 was added on branch ..." (synthetic, 1 entry)
637 638 # "Add file3 and file4 to fix ..." (real, 2 entries)
638 639 # Hence the check for 1 entry here.
639 640 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
640 641
641 642 # Sort files in each changeset
642 643
643 644 def entitycompare(l, r):
644 645 'Mimic cvsps sorting order'
645 646 l = l.file.split('/')
646 647 r = r.file.split('/')
647 648 nl = len(l)
648 649 nr = len(r)
649 650 n = min(nl, nr)
650 651 for i in range(n):
651 652 if i + 1 == nl and nl < nr:
652 653 return -1
653 654 elif i + 1 == nr and nl > nr:
654 655 return +1
655 656 elif l[i] < r[i]:
656 657 return -1
657 658 elif l[i] > r[i]:
658 659 return +1
659 660 return 0
660 661
661 662 for c in changesets:
662 663 c.entries.sort(key=functools.cmp_to_key(entitycompare))
663 664
664 665 # Sort changesets by date
665 666
666 667 odd = set()
667 668 def cscmp(l, r):
668 669 d = sum(l.date) - sum(r.date)
669 670 if d:
670 671 return d
671 672
672 673 # detect vendor branches and initial commits on a branch
673 674 le = {}
674 675 for e in l.entries:
675 676 le[e.rcs] = e.revision
676 677 re = {}
677 678 for e in r.entries:
678 679 re[e.rcs] = e.revision
679 680
680 681 d = 0
681 682 for e in l.entries:
682 683 if re.get(e.rcs, None) == e.parent:
683 684 assert not d
684 685 d = 1
685 686 break
686 687
687 688 for e in r.entries:
688 689 if le.get(e.rcs, None) == e.parent:
689 690 if d:
690 691 odd.add((l, r))
691 692 d = -1
692 693 break
693 694 # By this point, the changesets are sufficiently compared that
694 695 # we don't really care about ordering. However, this leaves
695 696 # some race conditions in the tests, so we compare on the
696 697 # number of files modified, the files contained in each
697 698 # changeset, and the branchpoints in the change to ensure test
698 699 # output remains stable.
699 700
700 701 # recommended replacement for cmp from
701 702 # https://docs.python.org/3.0/whatsnew/3.0.html
702 703 c = lambda x, y: (x > y) - (x < y)
703 704 # Sort bigger changes first.
704 705 if not d:
705 706 d = c(len(l.entries), len(r.entries))
706 707 # Try sorting by filename in the change.
707 708 if not d:
708 709 d = c([e.file for e in l.entries], [e.file for e in r.entries])
709 710 # Try and put changes without a branch point before ones with
710 711 # a branch point.
711 712 if not d:
712 713 d = c(len(l.branchpoints), len(r.branchpoints))
713 714 return d
714 715
715 716 changesets.sort(key=functools.cmp_to_key(cscmp))
716 717
717 718 # Collect tags
718 719
719 720 globaltags = {}
720 721 for c in changesets:
721 722 for e in c.entries:
722 723 for tag in e.tags:
723 724 # remember which is the latest changeset to have this tag
724 725 globaltags[tag] = c
725 726
726 727 for c in changesets:
727 728 tags = set()
728 729 for e in c.entries:
729 730 tags.update(e.tags)
730 731 # remember tags only if this is the latest changeset to have it
731 732 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
732 733
733 734 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
734 735 # by inserting dummy changesets with two parents, and handle
735 736 # {{mergefrombranch BRANCHNAME}} by setting two parents.
736 737
737 738 if mergeto is None:
738 739 mergeto = br'{{mergetobranch ([-\w]+)}}'
739 740 if mergeto:
740 741 mergeto = re.compile(mergeto)
741 742
742 743 if mergefrom is None:
743 744 mergefrom = br'{{mergefrombranch ([-\w]+)}}'
744 745 if mergefrom:
745 746 mergefrom = re.compile(mergefrom)
746 747
747 748 versions = {} # changeset index where we saw any particular file version
748 749 branches = {} # changeset index where we saw a branch
749 750 n = len(changesets)
750 751 i = 0
751 752 while i < n:
752 753 c = changesets[i]
753 754
754 755 for f in c.entries:
755 756 versions[(f.rcs, f.revision)] = i
756 757
757 758 p = None
758 759 if c.branch in branches:
759 760 p = branches[c.branch]
760 761 else:
761 762 # first changeset on a new branch
762 763 # the parent is a changeset with the branch in its
763 764 # branchpoints such that it is the latest possible
764 765 # commit without any intervening, unrelated commits.
765 766
766 767 for candidate in pycompat.xrange(i):
767 768 if c.branch not in changesets[candidate].branchpoints:
768 769 if p is not None:
769 770 break
770 771 continue
771 772 p = candidate
772 773
773 774 c.parents = []
774 775 if p is not None:
775 776 p = changesets[p]
776 777
777 778 # Ensure no changeset has a synthetic changeset as a parent.
778 779 while p.synthetic:
779 780 assert len(p.parents) <= 1, (
780 781 _('synthetic changeset cannot have multiple parents'))
781 782 if p.parents:
782 783 p = p.parents[0]
783 784 else:
784 785 p = None
785 786 break
786 787
787 788 if p is not None:
788 789 c.parents.append(p)
789 790
790 791 if c.mergepoint:
791 792 if c.mergepoint == 'HEAD':
792 793 c.mergepoint = None
793 794 c.parents.append(changesets[branches[c.mergepoint]])
794 795
795 796 if mergefrom:
796 797 m = mergefrom.search(c.comment)
797 798 if m:
798 799 m = m.group(1)
799 800 if m == 'HEAD':
800 801 m = None
801 802 try:
802 803 candidate = changesets[branches[m]]
803 804 except KeyError:
804 805 ui.warn(_("warning: CVS commit message references "
805 806 "non-existent branch %r:\n%s\n")
806 807 % (pycompat.bytestr(m), c.comment))
807 808 if m in branches and c.branch != m and not candidate.synthetic:
808 809 c.parents.append(candidate)
809 810
810 811 if mergeto:
811 812 m = mergeto.search(c.comment)
812 813 if m:
813 814 if m.groups():
814 815 m = m.group(1)
815 816 if m == 'HEAD':
816 817 m = None
817 818 else:
818 819 m = None # if no group found then merge to HEAD
819 820 if m in branches and c.branch != m:
820 821 # insert empty changeset for merge
821 822 cc = changeset(
822 823 author=c.author, branch=m, date=c.date,
823 824 comment='convert-repo: CVS merge from branch %s'
824 825 % c.branch,
825 826 entries=[], tags=[],
826 827 parents=[changesets[branches[m]], c])
827 828 changesets.insert(i + 1, cc)
828 829 branches[m] = i + 1
829 830
830 831 # adjust our loop counters now we have inserted a new entry
831 832 n += 1
832 833 i += 2
833 834 continue
834 835
835 836 branches[c.branch] = i
836 837 i += 1
837 838
838 839 # Drop synthetic changesets (safe now that we have ensured no other
839 840 # changesets can have them as parents).
840 841 i = 0
841 842 while i < len(changesets):
842 843 if changesets[i].synthetic:
843 844 del changesets[i]
844 845 else:
845 846 i += 1
846 847
847 848 # Number changesets
848 849
849 850 for i, c in enumerate(changesets):
850 851 c.id = i + 1
851 852
852 853 if odd:
853 854 for l, r in odd:
854 855 if l.id is not None and r.id is not None:
855 856 ui.warn(_('changeset %d is both before and after %d\n')
856 857 % (l.id, r.id))
857 858
858 859 ui.status(_('%d changeset entries\n') % len(changesets))
859 860
860 861 hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
861 862
862 863 return changesets
863 864
864 865
865 866 def debugcvsps(ui, *args, **opts):
866 867 '''Read CVS rlog for current directory or named path in
867 868 repository, and convert the log to changesets based on matching
868 869 commit log entries and dates.
869 870 '''
870 871 opts = pycompat.byteskwargs(opts)
871 872 if opts["new_cache"]:
872 873 cache = "write"
873 874 elif opts["update_cache"]:
874 875 cache = "update"
875 876 else:
876 877 cache = None
877 878
878 879 revisions = opts["revisions"]
879 880
880 881 try:
881 882 if args:
882 883 log = []
883 884 for d in args:
884 885 log += createlog(ui, d, root=opts["root"], cache=cache)
885 886 else:
886 887 log = createlog(ui, root=opts["root"], cache=cache)
887 888 except logerror as e:
888 889 ui.write("%r\n"%e)
889 890 return
890 891
891 892 changesets = createchangeset(ui, log, opts["fuzz"])
892 893 del log
893 894
894 895 # Print changesets (optionally filtered)
895 896
896 897 off = len(revisions)
897 898 branches = {} # latest version number in each branch
898 899 ancestors = {} # parent branch
899 900 for cs in changesets:
900 901
901 902 if opts["ancestors"]:
902 903 if cs.branch not in branches and cs.parents and cs.parents[0].id:
903 904 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
904 905 cs.parents[0].id)
905 906 branches[cs.branch] = cs.id
906 907
907 908 # limit by branches
908 909 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
909 910 continue
910 911
911 912 if not off:
912 913 # Note: trailing spaces on several lines here are needed to have
913 914 # bug-for-bug compatibility with cvsps.
914 915 ui.write('---------------------\n')
915 916 ui.write(('PatchSet %d \n' % cs.id))
916 917 ui.write(('Date: %s\n' % dateutil.datestr(cs.date,
917 918 '%Y/%m/%d %H:%M:%S %1%2')))
918 919 ui.write(('Author: %s\n' % cs.author))
919 920 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
920 921 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
921 922 ','.join(cs.tags) or '(none)')))
922 923 if cs.branchpoints:
923 924 ui.write(('Branchpoints: %s \n') %
924 925 ', '.join(sorted(cs.branchpoints)))
925 926 if opts["parents"] and cs.parents:
926 927 if len(cs.parents) > 1:
927 928 ui.write(('Parents: %s\n' %
928 929 (','.join([(b"%d" % p.id) for p in cs.parents]))))
929 930 else:
930 931 ui.write(('Parent: %d\n' % cs.parents[0].id))
931 932
932 933 if opts["ancestors"]:
933 934 b = cs.branch
934 935 r = []
935 936 while b:
936 937 b, c = ancestors[b]
937 938 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
938 939 if r:
939 940 ui.write(('Ancestors: %s\n' % (','.join(r))))
940 941
941 942 ui.write(('Log:\n'))
942 943 ui.write('%s\n\n' % cs.comment)
943 944 ui.write(('Members: \n'))
944 945 for f in cs.entries:
945 946 fn = f.file
946 947 if fn.startswith(opts["prefix"]):
947 948 fn = fn[len(opts["prefix"]):]
948 949 ui.write('\t%s:%s->%s%s \n' % (
949 950 fn,
950 951 '.'.join([b"%d" % x for x in f.parent]) or 'INITIAL',
951 952 '.'.join([(b"%d" % x) for x in f.revision]),
952 953 ['', '(DEAD)'][f.dead]))
953 954 ui.write('\n')
954 955
955 956 # have we seen the start tag?
956 957 if revisions and off:
957 958 if (revisions[0] == (b"%d" % cs.id) or
958 959 revisions[0] in cs.tags):
959 960 off = False
960 961
961 962 # see if we reached the end tag
962 963 if len(revisions) > 1 and not off:
963 964 if (revisions[1] == (b"%d" % cs.id) or
964 965 revisions[1] in cs.tags):
965 966 break
General Comments 0
You need to be logged in to leave comments. Login now