##// END OF EJS Templates
cvsps: wrap bytes in bytestr before %r-ing it...
Augie Fackler -
r37906:d4aad0dd default
parent child Browse files
Show More
@@ -1,960 +1,960
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial import (
15 15 encoding,
16 16 error,
17 17 hook,
18 18 pycompat,
19 19 util,
20 20 )
21 21 from mercurial.utils import (
22 22 dateutil,
23 23 procutil,
24 24 stringutil,
25 25 )
26 26
27 27 pickle = util.pickle
28 28
29 29 class logentry(object):
30 30 '''Class logentry has the following attributes:
31 31 .author - author name as CVS knows it
32 32 .branch - name of branch this revision is on
33 33 .branches - revision tuple of branches starting at this revision
34 34 .comment - commit message
35 35 .commitid - CVS commitid or None
36 36 .date - the commit date as a (time, tz) tuple
37 37 .dead - true if file revision is dead
38 38 .file - Name of file
39 39 .lines - a tuple (+lines, -lines) or None
40 40 .parent - Previous revision of this entry
41 41 .rcs - name of file as returned from CVS
42 42 .revision - revision number as tuple
43 43 .tags - list of tags on the file
44 44 .synthetic - is this a synthetic "file ... added on ..." revision?
45 45 .mergepoint - the branch that has been merged from (if present in
46 46 rlog output) or None
47 47 .branchpoints - the branches that start at the current entry or empty
48 48 '''
49 49 def __init__(self, **entries):
50 50 self.synthetic = False
51 51 self.__dict__.update(entries)
52 52
53 53 def __repr__(self):
54 54 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
55 55 return "%s(%s)"%(type(self).__name__, ", ".join(items))
56 56
57 57 class logerror(Exception):
58 58 pass
59 59
60 60 def getrepopath(cvspath):
61 61 """Return the repository path from a CVS path.
62 62
63 63 >>> getrepopath(b'/foo/bar')
64 64 '/foo/bar'
65 65 >>> getrepopath(b'c:/foo/bar')
66 66 '/foo/bar'
67 67 >>> getrepopath(b':pserver:10/foo/bar')
68 68 '/foo/bar'
69 69 >>> getrepopath(b':pserver:10c:/foo/bar')
70 70 '/foo/bar'
71 71 >>> getrepopath(b':pserver:/foo/bar')
72 72 '/foo/bar'
73 73 >>> getrepopath(b':pserver:c:/foo/bar')
74 74 '/foo/bar'
75 75 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
76 76 '/foo/bar'
77 77 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
78 78 '/foo/bar'
79 79 >>> getrepopath(b'user@server/path/to/repository')
80 80 '/path/to/repository'
81 81 """
82 82 # According to CVS manual, CVS paths are expressed like:
83 83 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
84 84 #
85 85 # CVSpath is splitted into parts and then position of the first occurrence
86 86 # of the '/' char after the '@' is located. The solution is the rest of the
87 87 # string after that '/' sign including it
88 88
89 89 parts = cvspath.split(':')
90 90 atposition = parts[-1].find('@')
91 91 start = 0
92 92
93 93 if atposition != -1:
94 94 start = atposition
95 95
96 96 repopath = parts[-1][parts[-1].find('/', start):]
97 97 return repopath
98 98
99 99 def createlog(ui, directory=None, root="", rlog=True, cache=None):
100 100 '''Collect the CVS rlog'''
101 101
102 102 # Because we store many duplicate commit log messages, reusing strings
103 103 # saves a lot of memory and pickle storage space.
104 104 _scache = {}
105 105 def scache(s):
106 106 "return a shared version of a string"
107 107 return _scache.setdefault(s, s)
108 108
109 109 ui.status(_('collecting CVS rlog\n'))
110 110
111 111 log = [] # list of logentry objects containing the CVS state
112 112
113 113 # patterns to match in CVS (r)log output, by state of use
114 114 re_00 = re.compile(b'RCS file: (.+)$')
115 115 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
116 116 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
117 117 re_03 = re.compile(b"(Cannot access.+CVSROOT)|"
118 118 b"(can't create temporary directory.+)$")
119 119 re_10 = re.compile(b'Working file: (.+)$')
120 120 re_20 = re.compile(b'symbolic names:')
121 121 re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
122 122 re_31 = re.compile(b'----------------------------$')
123 123 re_32 = re.compile(b'======================================='
124 124 b'======================================$')
125 125 re_50 = re.compile(b'revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
126 126 re_60 = re.compile(br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
127 127 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
128 128 br'(\s+commitid:\s+([^;]+);)?'
129 129 br'(.*mergepoint:\s+([^;]+);)?')
130 130 re_70 = re.compile(b'branches: (.+);$')
131 131
132 132 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
133 133
134 134 prefix = '' # leading path to strip of what we get from CVS
135 135
136 136 if directory is None:
137 137 # Current working directory
138 138
139 139 # Get the real directory in the repository
140 140 try:
141 141 prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
142 142 directory = prefix
143 143 if prefix == ".":
144 144 prefix = ""
145 145 except IOError:
146 146 raise logerror(_('not a CVS sandbox'))
147 147
148 148 if prefix and not prefix.endswith(pycompat.ossep):
149 149 prefix += pycompat.ossep
150 150
151 151 # Use the Root file in the sandbox, if it exists
152 152 try:
153 153 root = open(os.path.join('CVS','Root'), 'rb').read().strip()
154 154 except IOError:
155 155 pass
156 156
157 157 if not root:
158 158 root = encoding.environ.get('CVSROOT', '')
159 159
160 160 # read log cache if one exists
161 161 oldlog = []
162 162 date = None
163 163
164 164 if cache:
165 165 cachedir = os.path.expanduser('~/.hg.cvsps')
166 166 if not os.path.exists(cachedir):
167 167 os.mkdir(cachedir)
168 168
169 169 # The cvsps cache pickle needs a uniquified name, based on the
170 170 # repository location. The address may have all sort of nasties
171 171 # in it, slashes, colons and such. So here we take just the
172 172 # alphanumeric characters, concatenated in a way that does not
173 173 # mix up the various components, so that
174 174 # :pserver:user@server:/path
175 175 # and
176 176 # /pserver/user/server/path
177 177 # are mapped to different cache file names.
178 178 cachefile = root.split(":") + [directory, "cache"]
179 179 cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
180 180 cachefile = os.path.join(cachedir,
181 181 '.'.join([s for s in cachefile if s]))
182 182
183 183 if cache == 'update':
184 184 try:
185 185 ui.note(_('reading cvs log cache %s\n') % cachefile)
186 186 oldlog = pickle.load(open(cachefile, 'rb'))
187 187 for e in oldlog:
188 188 if not (util.safehasattr(e, 'branchpoints') and
189 189 util.safehasattr(e, 'commitid') and
190 190 util.safehasattr(e, 'mergepoint')):
191 191 ui.status(_('ignoring old cache\n'))
192 192 oldlog = []
193 193 break
194 194
195 195 ui.note(_('cache has %d log entries\n') % len(oldlog))
196 196 except Exception as e:
197 197 ui.note(_('error reading cache: %r\n') % e)
198 198
199 199 if oldlog:
200 200 date = oldlog[-1].date # last commit date as a (time,tz) tuple
201 201 date = dateutil.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
202 202
203 203 # build the CVS commandline
204 204 cmd = ['cvs', '-q']
205 205 if root:
206 206 cmd.append('-d%s' % root)
207 207 p = util.normpath(getrepopath(root))
208 208 if not p.endswith('/'):
209 209 p += '/'
210 210 if prefix:
211 211 # looks like normpath replaces "" by "."
212 212 prefix = p + util.normpath(prefix)
213 213 else:
214 214 prefix = p
215 215 cmd.append(['log', 'rlog'][rlog])
216 216 if date:
217 217 # no space between option and date string
218 218 cmd.append('-d>%s' % date)
219 219 cmd.append(directory)
220 220
221 221 # state machine begins here
222 222 tags = {} # dictionary of revisions on current file with their tags
223 223 branchmap = {} # mapping between branch names and revision numbers
224 224 rcsmap = {}
225 225 state = 0
226 226 store = False # set when a new record can be appended
227 227
228 228 cmd = [procutil.shellquote(arg) for arg in cmd]
229 229 ui.note(_("running %s\n") % (' '.join(cmd)))
230 230 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
231 231
232 232 pfp = procutil.popen(' '.join(cmd), 'rb')
233 233 peek = util.fromnativeeol(pfp.readline())
234 234 while True:
235 235 line = peek
236 236 if line == '':
237 237 break
238 238 peek = util.fromnativeeol(pfp.readline())
239 239 if line.endswith('\n'):
240 240 line = line[:-1]
241 241 #ui.debug('state=%d line=%r\n' % (state, line))
242 242
243 243 if state == 0:
244 244 # initial state, consume input until we see 'RCS file'
245 245 match = re_00.match(line)
246 246 if match:
247 247 rcs = match.group(1)
248 248 tags = {}
249 249 if rlog:
250 250 filename = util.normpath(rcs[:-2])
251 251 if filename.startswith(prefix):
252 252 filename = filename[len(prefix):]
253 253 if filename.startswith('/'):
254 254 filename = filename[1:]
255 255 if filename.startswith('Attic/'):
256 256 filename = filename[6:]
257 257 else:
258 258 filename = filename.replace('/Attic/', '/')
259 259 state = 2
260 260 continue
261 261 state = 1
262 262 continue
263 263 match = re_01.match(line)
264 264 if match:
265 265 raise logerror(match.group(1))
266 266 match = re_02.match(line)
267 267 if match:
268 268 raise logerror(match.group(2))
269 269 if re_03.match(line):
270 270 raise logerror(line)
271 271
272 272 elif state == 1:
273 273 # expect 'Working file' (only when using log instead of rlog)
274 274 match = re_10.match(line)
275 275 assert match, _('RCS file must be followed by working file')
276 276 filename = util.normpath(match.group(1))
277 277 state = 2
278 278
279 279 elif state == 2:
280 280 # expect 'symbolic names'
281 281 if re_20.match(line):
282 282 branchmap = {}
283 283 state = 3
284 284
285 285 elif state == 3:
286 286 # read the symbolic names and store as tags
287 287 match = re_30.match(line)
288 288 if match:
289 289 rev = [int(x) for x in match.group(2).split('.')]
290 290
291 291 # Convert magic branch number to an odd-numbered one
292 292 revn = len(rev)
293 293 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
294 294 rev = rev[:-2] + rev[-1:]
295 295 rev = tuple(rev)
296 296
297 297 if rev not in tags:
298 298 tags[rev] = []
299 299 tags[rev].append(match.group(1))
300 300 branchmap[match.group(1)] = match.group(2)
301 301
302 302 elif re_31.match(line):
303 303 state = 5
304 304 elif re_32.match(line):
305 305 state = 0
306 306
307 307 elif state == 4:
308 308 # expecting '------' separator before first revision
309 309 if re_31.match(line):
310 310 state = 5
311 311 else:
312 312 assert not re_32.match(line), _('must have at least '
313 313 'some revisions')
314 314
315 315 elif state == 5:
316 316 # expecting revision number and possibly (ignored) lock indication
317 317 # we create the logentry here from values stored in states 0 to 4,
318 318 # as this state is re-entered for subsequent revisions of a file.
319 319 match = re_50.match(line)
320 320 assert match, _('expected revision number')
321 321 e = logentry(rcs=scache(rcs),
322 322 file=scache(filename),
323 323 revision=tuple([int(x) for x in
324 324 match.group(1).split('.')]),
325 325 branches=[],
326 326 parent=None,
327 327 commitid=None,
328 328 mergepoint=None,
329 329 branchpoints=set())
330 330
331 331 state = 6
332 332
333 333 elif state == 6:
334 334 # expecting date, author, state, lines changed
335 335 match = re_60.match(line)
336 336 assert match, _('revision must be followed by date line')
337 337 d = match.group(1)
338 338 if d[2] == '/':
339 339 # Y2K
340 340 d = '19' + d
341 341
342 342 if len(d.split()) != 3:
343 343 # cvs log dates always in GMT
344 344 d = d + ' UTC'
345 345 e.date = dateutil.parsedate(d, ['%y/%m/%d %H:%M:%S',
346 346 '%Y/%m/%d %H:%M:%S',
347 347 '%Y-%m-%d %H:%M:%S'])
348 348 e.author = scache(match.group(2))
349 349 e.dead = match.group(3).lower() == 'dead'
350 350
351 351 if match.group(5):
352 352 if match.group(6):
353 353 e.lines = (int(match.group(5)), int(match.group(6)))
354 354 else:
355 355 e.lines = (int(match.group(5)), 0)
356 356 elif match.group(6):
357 357 e.lines = (0, int(match.group(6)))
358 358 else:
359 359 e.lines = None
360 360
361 361 if match.group(7): # cvs 1.12 commitid
362 362 e.commitid = match.group(8)
363 363
364 364 if match.group(9): # cvsnt mergepoint
365 365 myrev = match.group(10).split('.')
366 366 if len(myrev) == 2: # head
367 367 e.mergepoint = 'HEAD'
368 368 else:
369 369 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
370 370 branches = [b for b in branchmap if branchmap[b] == myrev]
371 371 assert len(branches) == 1, ('unknown branch: %s'
372 372 % e.mergepoint)
373 373 e.mergepoint = branches[0]
374 374
375 375 e.comment = []
376 376 state = 7
377 377
378 378 elif state == 7:
379 379 # read the revision numbers of branches that start at this revision
380 380 # or store the commit log message otherwise
381 381 m = re_70.match(line)
382 382 if m:
383 383 e.branches = [tuple([int(y) for y in x.strip().split('.')])
384 384 for x in m.group(1).split(';')]
385 385 state = 8
386 386 elif re_31.match(line) and re_50.match(peek):
387 387 state = 5
388 388 store = True
389 389 elif re_32.match(line):
390 390 state = 0
391 391 store = True
392 392 else:
393 393 e.comment.append(line)
394 394
395 395 elif state == 8:
396 396 # store commit log message
397 397 if re_31.match(line):
398 398 cpeek = peek
399 399 if cpeek.endswith('\n'):
400 400 cpeek = cpeek[:-1]
401 401 if re_50.match(cpeek):
402 402 state = 5
403 403 store = True
404 404 else:
405 405 e.comment.append(line)
406 406 elif re_32.match(line):
407 407 state = 0
408 408 store = True
409 409 else:
410 410 e.comment.append(line)
411 411
412 412 # When a file is added on a branch B1, CVS creates a synthetic
413 413 # dead trunk revision 1.1 so that the branch has a root.
414 414 # Likewise, if you merge such a file to a later branch B2 (one
415 415 # that already existed when the file was added on B1), CVS
416 416 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
417 417 # these revisions now, but mark them synthetic so
418 418 # createchangeset() can take care of them.
419 419 if (store and
420 420 e.dead and
421 421 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
422 422 len(e.comment) == 1 and
423 423 file_added_re.match(e.comment[0])):
424 424 ui.debug('found synthetic revision in %s: %r\n'
425 425 % (e.rcs, e.comment[0]))
426 426 e.synthetic = True
427 427
428 428 if store:
429 429 # clean up the results and save in the log.
430 430 store = False
431 431 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
432 432 e.comment = scache('\n'.join(e.comment))
433 433
434 434 revn = len(e.revision)
435 435 if revn > 3 and (revn % 2) == 0:
436 436 e.branch = tags.get(e.revision[:-1], [None])[0]
437 437 else:
438 438 e.branch = None
439 439
440 440 # find the branches starting from this revision
441 441 branchpoints = set()
442 442 for branch, revision in branchmap.iteritems():
443 443 revparts = tuple([int(i) for i in revision.split('.')])
444 444 if len(revparts) < 2: # bad tags
445 445 continue
446 446 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
447 447 # normal branch
448 448 if revparts[:-2] == e.revision:
449 449 branchpoints.add(branch)
450 450 elif revparts == (1, 1, 1): # vendor branch
451 451 if revparts in e.branches:
452 452 branchpoints.add(branch)
453 453 e.branchpoints = branchpoints
454 454
455 455 log.append(e)
456 456
457 457 rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
458 458
459 459 if len(log) % 100 == 0:
460 460 ui.status(stringutil.ellipsis('%d %s' % (len(log), e.file), 80)
461 461 + '\n')
462 462
463 463 log.sort(key=lambda x: (x.rcs, x.revision))
464 464
465 465 # find parent revisions of individual files
466 466 versions = {}
467 467 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
468 468 rcs = e.rcs.replace('/Attic/', '/')
469 469 if rcs in rcsmap:
470 470 e.rcs = rcsmap[rcs]
471 471 branch = e.revision[:-1]
472 472 versions[(e.rcs, branch)] = e.revision
473 473
474 474 for e in log:
475 475 branch = e.revision[:-1]
476 476 p = versions.get((e.rcs, branch), None)
477 477 if p is None:
478 478 p = e.revision[:-2]
479 479 e.parent = p
480 480 versions[(e.rcs, branch)] = e.revision
481 481
482 482 # update the log cache
483 483 if cache:
484 484 if log:
485 485 # join up the old and new logs
486 486 log.sort(key=lambda x: x.date)
487 487
488 488 if oldlog and oldlog[-1].date >= log[0].date:
489 489 raise logerror(_('log cache overlaps with new log entries,'
490 490 ' re-run without cache.'))
491 491
492 492 log = oldlog + log
493 493
494 494 # write the new cachefile
495 495 ui.note(_('writing cvs log cache %s\n') % cachefile)
496 496 pickle.dump(log, open(cachefile, 'wb'))
497 497 else:
498 498 log = oldlog
499 499
500 500 ui.status(_('%d log entries\n') % len(log))
501 501
502 502 encodings = ui.configlist('convert', 'cvsps.logencoding')
503 503 if encodings:
504 504 def revstr(r):
505 505 # this is needed, because logentry.revision is a tuple of "int"
506 506 # (e.g. (1, 2) for "1.2")
507 507 return '.'.join(pycompat.maplist(pycompat.bytestr, r))
508 508
509 509 for entry in log:
510 510 comment = entry.comment
511 511 for e in encodings:
512 512 try:
513 513 entry.comment = comment.decode(e).encode('utf-8')
514 514 if ui.debugflag:
515 515 ui.debug("transcoding by %s: %s of %s\n" %
516 516 (e, revstr(entry.revision), entry.file))
517 517 break
518 518 except UnicodeDecodeError:
519 519 pass # try next encoding
520 520 except LookupError as inst: # unknown encoding, maybe
521 521 raise error.Abort(inst,
522 522 hint=_('check convert.cvsps.logencoding'
523 523 ' configuration'))
524 524 else:
525 525 raise error.Abort(_("no encoding can transcode"
526 526 " CVS log message for %s of %s")
527 527 % (revstr(entry.revision), entry.file),
528 528 hint=_('check convert.cvsps.logencoding'
529 529 ' configuration'))
530 530
531 531 hook.hook(ui, None, "cvslog", True, log=log)
532 532
533 533 return log
534 534
535 535
536 536 class changeset(object):
537 537 '''Class changeset has the following attributes:
538 538 .id - integer identifying this changeset (list index)
539 539 .author - author name as CVS knows it
540 540 .branch - name of branch this changeset is on, or None
541 541 .comment - commit message
542 542 .commitid - CVS commitid or None
543 543 .date - the commit date as a (time,tz) tuple
544 544 .entries - list of logentry objects in this changeset
545 545 .parents - list of one or two parent changesets
546 546 .tags - list of tags on this changeset
547 547 .synthetic - from synthetic revision "file ... added on branch ..."
548 548 .mergepoint- the branch that has been merged from or None
549 549 .branchpoints- the branches that start at the current entry or empty
550 550 '''
551 551 def __init__(self, **entries):
552 552 self.id = None
553 553 self.synthetic = False
554 554 self.__dict__.update(entries)
555 555
556 556 def __repr__(self):
557 557 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
558 558 return "%s(%s)"%(type(self).__name__, ", ".join(items))
559 559
560 560 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
561 561 '''Convert log into changesets.'''
562 562
563 563 ui.status(_('creating changesets\n'))
564 564
565 565 # try to order commitids by date
566 566 mindate = {}
567 567 for e in log:
568 568 if e.commitid:
569 569 mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
570 570
571 571 # Merge changesets
572 572 log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
573 573 x.author, x.branch, x.date, x.branchpoints))
574 574
575 575 changesets = []
576 576 files = set()
577 577 c = None
578 578 for i, e in enumerate(log):
579 579
580 580 # Check if log entry belongs to the current changeset or not.
581 581
582 582 # Since CVS is file-centric, two different file revisions with
583 583 # different branchpoints should be treated as belonging to two
584 584 # different changesets (and the ordering is important and not
585 585 # honoured by cvsps at this point).
586 586 #
587 587 # Consider the following case:
588 588 # foo 1.1 branchpoints: [MYBRANCH]
589 589 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
590 590 #
591 591 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
592 592 # later version of foo may be in MYBRANCH2, so foo should be the
593 593 # first changeset and bar the next and MYBRANCH and MYBRANCH2
594 594 # should both start off of the bar changeset. No provisions are
595 595 # made to ensure that this is, in fact, what happens.
596 596 if not (c and e.branchpoints == c.branchpoints and
597 597 (# cvs commitids
598 598 (e.commitid is not None and e.commitid == c.commitid) or
599 599 (# no commitids, use fuzzy commit detection
600 600 (e.commitid is None or c.commitid is None) and
601 601 e.comment == c.comment and
602 602 e.author == c.author and
603 603 e.branch == c.branch and
604 604 ((c.date[0] + c.date[1]) <=
605 605 (e.date[0] + e.date[1]) <=
606 606 (c.date[0] + c.date[1]) + fuzz) and
607 607 e.file not in files))):
608 608 c = changeset(comment=e.comment, author=e.author,
609 609 branch=e.branch, date=e.date,
610 610 entries=[], mergepoint=e.mergepoint,
611 611 branchpoints=e.branchpoints, commitid=e.commitid)
612 612 changesets.append(c)
613 613
614 614 files = set()
615 615 if len(changesets) % 100 == 0:
616 616 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
617 617 ui.status(stringutil.ellipsis(t, 80) + '\n')
618 618
619 619 c.entries.append(e)
620 620 files.add(e.file)
621 621 c.date = e.date # changeset date is date of latest commit in it
622 622
623 623 # Mark synthetic changesets
624 624
625 625 for c in changesets:
626 626 # Synthetic revisions always get their own changeset, because
627 627 # the log message includes the filename. E.g. if you add file3
628 628 # and file4 on a branch, you get four log entries and three
629 629 # changesets:
630 630 # "File file3 was added on branch ..." (synthetic, 1 entry)
631 631 # "File file4 was added on branch ..." (synthetic, 1 entry)
632 632 # "Add file3 and file4 to fix ..." (real, 2 entries)
633 633 # Hence the check for 1 entry here.
634 634 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
635 635
636 636 # Sort files in each changeset
637 637
638 638 def entitycompare(l, r):
639 639 'Mimic cvsps sorting order'
640 640 l = l.file.split('/')
641 641 r = r.file.split('/')
642 642 nl = len(l)
643 643 nr = len(r)
644 644 n = min(nl, nr)
645 645 for i in range(n):
646 646 if i + 1 == nl and nl < nr:
647 647 return -1
648 648 elif i + 1 == nr and nl > nr:
649 649 return +1
650 650 elif l[i] < r[i]:
651 651 return -1
652 652 elif l[i] > r[i]:
653 653 return +1
654 654 return 0
655 655
656 656 for c in changesets:
657 657 c.entries.sort(key=functools.cmp_to_key(entitycompare))
658 658
659 659 # Sort changesets by date
660 660
661 661 odd = set()
662 662 def cscmp(l, r):
663 663 d = sum(l.date) - sum(r.date)
664 664 if d:
665 665 return d
666 666
667 667 # detect vendor branches and initial commits on a branch
668 668 le = {}
669 669 for e in l.entries:
670 670 le[e.rcs] = e.revision
671 671 re = {}
672 672 for e in r.entries:
673 673 re[e.rcs] = e.revision
674 674
675 675 d = 0
676 676 for e in l.entries:
677 677 if re.get(e.rcs, None) == e.parent:
678 678 assert not d
679 679 d = 1
680 680 break
681 681
682 682 for e in r.entries:
683 683 if le.get(e.rcs, None) == e.parent:
684 684 if d:
685 685 odd.add((l, r))
686 686 d = -1
687 687 break
688 688 # By this point, the changesets are sufficiently compared that
689 689 # we don't really care about ordering. However, this leaves
690 690 # some race conditions in the tests, so we compare on the
691 691 # number of files modified, the files contained in each
692 692 # changeset, and the branchpoints in the change to ensure test
693 693 # output remains stable.
694 694
695 695 # recommended replacement for cmp from
696 696 # https://docs.python.org/3.0/whatsnew/3.0.html
697 697 c = lambda x, y: (x > y) - (x < y)
698 698 # Sort bigger changes first.
699 699 if not d:
700 700 d = c(len(l.entries), len(r.entries))
701 701 # Try sorting by filename in the change.
702 702 if not d:
703 703 d = c([e.file for e in l.entries], [e.file for e in r.entries])
704 704 # Try and put changes without a branch point before ones with
705 705 # a branch point.
706 706 if not d:
707 707 d = c(len(l.branchpoints), len(r.branchpoints))
708 708 return d
709 709
710 710 changesets.sort(key=functools.cmp_to_key(cscmp))
711 711
712 712 # Collect tags
713 713
714 714 globaltags = {}
715 715 for c in changesets:
716 716 for e in c.entries:
717 717 for tag in e.tags:
718 718 # remember which is the latest changeset to have this tag
719 719 globaltags[tag] = c
720 720
721 721 for c in changesets:
722 722 tags = set()
723 723 for e in c.entries:
724 724 tags.update(e.tags)
725 725 # remember tags only if this is the latest changeset to have it
726 726 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
727 727
728 728 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
729 729 # by inserting dummy changesets with two parents, and handle
730 730 # {{mergefrombranch BRANCHNAME}} by setting two parents.
731 731
732 732 if mergeto is None:
733 733 mergeto = br'{{mergetobranch ([-\w]+)}}'
734 734 if mergeto:
735 735 mergeto = re.compile(mergeto)
736 736
737 737 if mergefrom is None:
738 738 mergefrom = br'{{mergefrombranch ([-\w]+)}}'
739 739 if mergefrom:
740 740 mergefrom = re.compile(mergefrom)
741 741
742 742 versions = {} # changeset index where we saw any particular file version
743 743 branches = {} # changeset index where we saw a branch
744 744 n = len(changesets)
745 745 i = 0
746 746 while i < n:
747 747 c = changesets[i]
748 748
749 749 for f in c.entries:
750 750 versions[(f.rcs, f.revision)] = i
751 751
752 752 p = None
753 753 if c.branch in branches:
754 754 p = branches[c.branch]
755 755 else:
756 756 # first changeset on a new branch
757 757 # the parent is a changeset with the branch in its
758 758 # branchpoints such that it is the latest possible
759 759 # commit without any intervening, unrelated commits.
760 760
761 761 for candidate in xrange(i):
762 762 if c.branch not in changesets[candidate].branchpoints:
763 763 if p is not None:
764 764 break
765 765 continue
766 766 p = candidate
767 767
768 768 c.parents = []
769 769 if p is not None:
770 770 p = changesets[p]
771 771
772 772 # Ensure no changeset has a synthetic changeset as a parent.
773 773 while p.synthetic:
774 774 assert len(p.parents) <= 1, \
775 775 _('synthetic changeset cannot have multiple parents')
776 776 if p.parents:
777 777 p = p.parents[0]
778 778 else:
779 779 p = None
780 780 break
781 781
782 782 if p is not None:
783 783 c.parents.append(p)
784 784
785 785 if c.mergepoint:
786 786 if c.mergepoint == 'HEAD':
787 787 c.mergepoint = None
788 788 c.parents.append(changesets[branches[c.mergepoint]])
789 789
790 790 if mergefrom:
791 791 m = mergefrom.search(c.comment)
792 792 if m:
793 793 m = m.group(1)
794 794 if m == 'HEAD':
795 795 m = None
796 796 try:
797 797 candidate = changesets[branches[m]]
798 798 except KeyError:
799 799 ui.warn(_("warning: CVS commit message references "
800 800 "non-existent branch %r:\n%s\n")
801 % (m, c.comment))
801 % (pycompat.bytestr(m), c.comment))
802 802 if m in branches and c.branch != m and not candidate.synthetic:
803 803 c.parents.append(candidate)
804 804
805 805 if mergeto:
806 806 m = mergeto.search(c.comment)
807 807 if m:
808 808 if m.groups():
809 809 m = m.group(1)
810 810 if m == 'HEAD':
811 811 m = None
812 812 else:
813 813 m = None # if no group found then merge to HEAD
814 814 if m in branches and c.branch != m:
815 815 # insert empty changeset for merge
816 816 cc = changeset(
817 817 author=c.author, branch=m, date=c.date,
818 818 comment='convert-repo: CVS merge from branch %s'
819 819 % c.branch,
820 820 entries=[], tags=[],
821 821 parents=[changesets[branches[m]], c])
822 822 changesets.insert(i + 1, cc)
823 823 branches[m] = i + 1
824 824
825 825 # adjust our loop counters now we have inserted a new entry
826 826 n += 1
827 827 i += 2
828 828 continue
829 829
830 830 branches[c.branch] = i
831 831 i += 1
832 832
833 833 # Drop synthetic changesets (safe now that we have ensured no other
834 834 # changesets can have them as parents).
835 835 i = 0
836 836 while i < len(changesets):
837 837 if changesets[i].synthetic:
838 838 del changesets[i]
839 839 else:
840 840 i += 1
841 841
842 842 # Number changesets
843 843
844 844 for i, c in enumerate(changesets):
845 845 c.id = i + 1
846 846
847 847 if odd:
848 848 for l, r in odd:
849 849 if l.id is not None and r.id is not None:
850 850 ui.warn(_('changeset %d is both before and after %d\n')
851 851 % (l.id, r.id))
852 852
853 853 ui.status(_('%d changeset entries\n') % len(changesets))
854 854
855 855 hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
856 856
857 857 return changesets
858 858
859 859
860 860 def debugcvsps(ui, *args, **opts):
861 861 '''Read CVS rlog for current directory or named path in
862 862 repository, and convert the log to changesets based on matching
863 863 commit log entries and dates.
864 864 '''
865 865 opts = pycompat.byteskwargs(opts)
866 866 if opts["new_cache"]:
867 867 cache = "write"
868 868 elif opts["update_cache"]:
869 869 cache = "update"
870 870 else:
871 871 cache = None
872 872
873 873 revisions = opts["revisions"]
874 874
875 875 try:
876 876 if args:
877 877 log = []
878 878 for d in args:
879 879 log += createlog(ui, d, root=opts["root"], cache=cache)
880 880 else:
881 881 log = createlog(ui, root=opts["root"], cache=cache)
882 882 except logerror as e:
883 883 ui.write("%r\n"%e)
884 884 return
885 885
886 886 changesets = createchangeset(ui, log, opts["fuzz"])
887 887 del log
888 888
889 889 # Print changesets (optionally filtered)
890 890
891 891 off = len(revisions)
892 892 branches = {} # latest version number in each branch
893 893 ancestors = {} # parent branch
894 894 for cs in changesets:
895 895
896 896 if opts["ancestors"]:
897 897 if cs.branch not in branches and cs.parents and cs.parents[0].id:
898 898 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
899 899 cs.parents[0].id)
900 900 branches[cs.branch] = cs.id
901 901
902 902 # limit by branches
903 903 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
904 904 continue
905 905
906 906 if not off:
907 907 # Note: trailing spaces on several lines here are needed to have
908 908 # bug-for-bug compatibility with cvsps.
909 909 ui.write('---------------------\n')
910 910 ui.write(('PatchSet %d \n' % cs.id))
911 911 ui.write(('Date: %s\n' % dateutil.datestr(cs.date,
912 912 '%Y/%m/%d %H:%M:%S %1%2')))
913 913 ui.write(('Author: %s\n' % cs.author))
914 914 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
915 915 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
916 916 ','.join(cs.tags) or '(none)')))
917 917 if cs.branchpoints:
918 918 ui.write(('Branchpoints: %s \n') %
919 919 ', '.join(sorted(cs.branchpoints)))
920 920 if opts["parents"] and cs.parents:
921 921 if len(cs.parents) > 1:
922 922 ui.write(('Parents: %s\n' %
923 923 (','.join([(b"%d" % p.id) for p in cs.parents]))))
924 924 else:
925 925 ui.write(('Parent: %d\n' % cs.parents[0].id))
926 926
927 927 if opts["ancestors"]:
928 928 b = cs.branch
929 929 r = []
930 930 while b:
931 931 b, c = ancestors[b]
932 932 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
933 933 if r:
934 934 ui.write(('Ancestors: %s\n' % (','.join(r))))
935 935
936 936 ui.write(('Log:\n'))
937 937 ui.write('%s\n\n' % cs.comment)
938 938 ui.write(('Members: \n'))
939 939 for f in cs.entries:
940 940 fn = f.file
941 941 if fn.startswith(opts["prefix"]):
942 942 fn = fn[len(opts["prefix"]):]
943 943 ui.write('\t%s:%s->%s%s \n' % (
944 944 fn,
945 945 '.'.join([b"%d" % x for x in f.parent]) or 'INITIAL',
946 946 '.'.join([(b"%d" % x) for x in f.revision]),
947 947 ['', '(DEAD)'][f.dead]))
948 948 ui.write('\n')
949 949
950 950 # have we seen the start tag?
951 951 if revisions and off:
952 952 if revisions[0] == (b"%d" % cs.id) or \
953 953 revisions[0] in cs.tags:
954 954 off = False
955 955
956 956 # see if we reached the end tag
957 957 if len(revisions) > 1 and not off:
958 958 if revisions[1] == (b"%d" % cs.id) or \
959 959 revisions[1] in cs.tags:
960 960 break
General Comments 0
You need to be logged in to leave comments. Login now