##// END OF EJS Templates
convert: replace repr() by pycompat.byterepr() in cvsps.py (issue6789)
av6 -
r50799:f3e95e5a stable
parent child Browse files
Show More
@@ -1,1068 +1,1071 b''
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import functools
9 9 import os
10 10 import pickle
11 11 import re
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.pycompat import open
15 15 from mercurial import (
16 16 encoding,
17 17 error,
18 18 hook,
19 19 pycompat,
20 20 util,
21 21 )
22 22 from mercurial.utils import (
23 23 dateutil,
24 24 procutil,
25 25 stringutil,
26 26 )
27 27
28 28
29 29 class logentry:
30 30 """Class logentry has the following attributes:
31 31 .author - author name as CVS knows it
32 32 .branch - name of branch this revision is on
33 33 .branches - revision tuple of branches starting at this revision
34 34 .comment - commit message
35 35 .commitid - CVS commitid or None
36 36 .date - the commit date as a (time, tz) tuple
37 37 .dead - true if file revision is dead
38 38 .file - Name of file
39 39 .lines - a tuple (+lines, -lines) or None
40 40 .parent - Previous revision of this entry
41 41 .rcs - name of file as returned from CVS
42 42 .revision - revision number as tuple
43 43 .tags - list of tags on the file
44 44 .synthetic - is this a synthetic "file ... added on ..." revision?
45 45 .mergepoint - the branch that has been merged from (if present in
46 46 rlog output) or None
47 47 .branchpoints - the branches that start at the current entry or empty
48 48 """
49 49
50 50 def __init__(self, **entries):
51 51 self.synthetic = False
52 52 self.__dict__.update(entries)
53 53
54 54 def __repr__(self):
55 55 items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__))
56 56 return "%s(%s)" % (type(self).__name__, ", ".join(items))
57 57
58 58
59 59 class logerror(Exception):
60 60 pass
61 61
62 62
63 63 def getrepopath(cvspath):
64 64 """Return the repository path from a CVS path.
65 65
66 66 >>> getrepopath(b'/foo/bar')
67 67 '/foo/bar'
68 68 >>> getrepopath(b'c:/foo/bar')
69 69 '/foo/bar'
70 70 >>> getrepopath(b':pserver:10/foo/bar')
71 71 '/foo/bar'
72 72 >>> getrepopath(b':pserver:10c:/foo/bar')
73 73 '/foo/bar'
74 74 >>> getrepopath(b':pserver:/foo/bar')
75 75 '/foo/bar'
76 76 >>> getrepopath(b':pserver:c:/foo/bar')
77 77 '/foo/bar'
78 78 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
79 79 '/foo/bar'
80 80 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
81 81 '/foo/bar'
82 82 >>> getrepopath(b'user@server/path/to/repository')
83 83 '/path/to/repository'
84 84 """
85 85 # According to CVS manual, CVS paths are expressed like:
86 86 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
87 87 #
88 88 # CVSpath is splitted into parts and then position of the first occurrence
89 89 # of the '/' char after the '@' is located. The solution is the rest of the
90 90 # string after that '/' sign including it
91 91
92 92 parts = cvspath.split(b':')
93 93 atposition = parts[-1].find(b'@')
94 94 start = 0
95 95
96 96 if atposition != -1:
97 97 start = atposition
98 98
99 99 repopath = parts[-1][parts[-1].find(b'/', start) :]
100 100 return repopath
101 101
102 102
103 103 def createlog(ui, directory=None, root=b"", rlog=True, cache=None):
104 104 '''Collect the CVS rlog'''
105 105
106 106 # Because we store many duplicate commit log messages, reusing strings
107 107 # saves a lot of memory and pickle storage space.
108 108 _scache = {}
109 109
110 110 def scache(s):
111 111 """return a shared version of a string"""
112 112 return _scache.setdefault(s, s)
113 113
114 114 ui.status(_(b'collecting CVS rlog\n'))
115 115
116 116 log = [] # list of logentry objects containing the CVS state
117 117
118 118 # patterns to match in CVS (r)log output, by state of use
119 119 re_00 = re.compile(b'RCS file: (.+)$')
120 120 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
121 121 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
122 122 re_03 = re.compile(
123 123 b"(Cannot access.+CVSROOT)|(can't create temporary directory.+)$"
124 124 )
125 125 re_10 = re.compile(b'Working file: (.+)$')
126 126 re_20 = re.compile(b'symbolic names:')
127 127 re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
128 128 re_31 = re.compile(b'----------------------------$')
129 129 re_32 = re.compile(
130 130 b'======================================='
131 131 b'======================================$'
132 132 )
133 133 re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
134 134 re_60 = re.compile(
135 135 br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
136 136 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
137 137 br'(\s+commitid:\s+([^;]+);)?'
138 138 br'(.*mergepoint:\s+([^;]+);)?'
139 139 )
140 140 re_70 = re.compile(b'branches: (.+);$')
141 141
142 142 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
143 143
144 144 prefix = b'' # leading path to strip of what we get from CVS
145 145
146 146 if directory is None:
147 147 # Current working directory
148 148
149 149 # Get the real directory in the repository
150 150 try:
151 151 with open(os.path.join(b'CVS', b'Repository'), b'rb') as f:
152 152 prefix = f.read().strip()
153 153 directory = prefix
154 154 if prefix == b".":
155 155 prefix = b""
156 156 except IOError:
157 157 raise logerror(_(b'not a CVS sandbox'))
158 158
159 159 if prefix and not prefix.endswith(pycompat.ossep):
160 160 prefix += pycompat.ossep
161 161
162 162 # Use the Root file in the sandbox, if it exists
163 163 try:
164 164 root = open(os.path.join(b'CVS', b'Root'), b'rb').read().strip()
165 165 except IOError:
166 166 pass
167 167
168 168 if not root:
169 169 root = encoding.environ.get(b'CVSROOT', b'')
170 170
171 171 # read log cache if one exists
172 172 oldlog = []
173 173 date = None
174 174
175 175 if cache:
176 176 cachedir = os.path.expanduser(b'~/.hg.cvsps')
177 177 if not os.path.exists(cachedir):
178 178 os.mkdir(cachedir)
179 179
180 180 # The cvsps cache pickle needs a uniquified name, based on the
181 181 # repository location. The address may have all sort of nasties
182 182 # in it, slashes, colons and such. So here we take just the
183 183 # alphanumeric characters, concatenated in a way that does not
184 184 # mix up the various components, so that
185 185 # :pserver:user@server:/path
186 186 # and
187 187 # /pserver/user/server/path
188 188 # are mapped to different cache file names.
189 189 cachefile = root.split(b":") + [directory, b"cache"]
190 190 cachefile = [b'-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
191 191 cachefile = os.path.join(
192 192 cachedir, b'.'.join([s for s in cachefile if s])
193 193 )
194 194
195 195 if cache == b'update':
196 196 try:
197 197 ui.note(_(b'reading cvs log cache %s\n') % cachefile)
198 198 oldlog = pickle.load(open(cachefile, b'rb'))
199 199 for e in oldlog:
200 200 if not (
201 201 util.safehasattr(e, b'branchpoints')
202 202 and util.safehasattr(e, b'commitid')
203 203 and util.safehasattr(e, b'mergepoint')
204 204 ):
205 205 ui.status(_(b'ignoring old cache\n'))
206 206 oldlog = []
207 207 break
208 208
209 209 ui.note(_(b'cache has %d log entries\n') % len(oldlog))
210 210 except Exception as e:
211 211 ui.note(_(b'error reading cache: %r\n') % e)
212 212
213 213 if oldlog:
214 214 date = oldlog[-1].date # last commit date as a (time,tz) tuple
215 215 date = dateutil.datestr(date, b'%Y/%m/%d %H:%M:%S %1%2')
216 216
217 217 # build the CVS commandline
218 218 cmd = [b'cvs', b'-q']
219 219 if root:
220 220 cmd.append(b'-d%s' % root)
221 221 p = util.normpath(getrepopath(root))
222 222 if not p.endswith(b'/'):
223 223 p += b'/'
224 224 if prefix:
225 225 # looks like normpath replaces "" by "."
226 226 prefix = p + util.normpath(prefix)
227 227 else:
228 228 prefix = p
229 229 cmd.append([b'log', b'rlog'][rlog])
230 230 if date:
231 231 # no space between option and date string
232 232 cmd.append(b'-d>%s' % date)
233 233 cmd.append(directory)
234 234
235 235 # state machine begins here
236 236 tags = {} # dictionary of revisions on current file with their tags
237 237 branchmap = {} # mapping between branch names and revision numbers
238 238 rcsmap = {}
239 239 state = 0
240 240 store = False # set when a new record can be appended
241 241
242 242 cmd = [procutil.shellquote(arg) for arg in cmd]
243 243 ui.note(_(b"running %s\n") % (b' '.join(cmd)))
244 244 ui.debug(b"prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
245 245
246 246 pfp = procutil.popen(b' '.join(cmd), b'rb')
247 247 peek = util.fromnativeeol(pfp.readline())
248 248 while True:
249 249 line = peek
250 250 if line == b'':
251 251 break
252 252 peek = util.fromnativeeol(pfp.readline())
253 253 if line.endswith(b'\n'):
254 254 line = line[:-1]
255 255 # ui.debug('state=%d line=%r\n' % (state, line))
256 256
257 257 if state == 0:
258 258 # initial state, consume input until we see 'RCS file'
259 259 match = re_00.match(line)
260 260 if match:
261 261 rcs = match.group(1)
262 262 tags = {}
263 263 if rlog:
264 264 filename = util.normpath(rcs[:-2])
265 265 if filename.startswith(prefix):
266 266 filename = filename[len(prefix) :]
267 267 if filename.startswith(b'/'):
268 268 filename = filename[1:]
269 269 if filename.startswith(b'Attic/'):
270 270 filename = filename[6:]
271 271 else:
272 272 filename = filename.replace(b'/Attic/', b'/')
273 273 state = 2
274 274 continue
275 275 state = 1
276 276 continue
277 277 match = re_01.match(line)
278 278 if match:
279 279 raise logerror(match.group(1))
280 280 match = re_02.match(line)
281 281 if match:
282 282 raise logerror(match.group(2))
283 283 if re_03.match(line):
284 284 raise logerror(line)
285 285
286 286 elif state == 1:
287 287 # expect 'Working file' (only when using log instead of rlog)
288 288 match = re_10.match(line)
289 289 assert match, _(b'RCS file must be followed by working file')
290 290 filename = util.normpath(match.group(1))
291 291 state = 2
292 292
293 293 elif state == 2:
294 294 # expect 'symbolic names'
295 295 if re_20.match(line):
296 296 branchmap = {}
297 297 state = 3
298 298
299 299 elif state == 3:
300 300 # read the symbolic names and store as tags
301 301 match = re_30.match(line)
302 302 if match:
303 303 rev = [int(x) for x in match.group(2).split(b'.')]
304 304
305 305 # Convert magic branch number to an odd-numbered one
306 306 revn = len(rev)
307 307 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
308 308 rev = rev[:-2] + rev[-1:]
309 309 rev = tuple(rev)
310 310
311 311 if rev not in tags:
312 312 tags[rev] = []
313 313 tags[rev].append(match.group(1))
314 314 branchmap[match.group(1)] = match.group(2)
315 315
316 316 elif re_31.match(line):
317 317 state = 5
318 318 elif re_32.match(line):
319 319 state = 0
320 320
321 321 elif state == 4:
322 322 # expecting '------' separator before first revision
323 323 if re_31.match(line):
324 324 state = 5
325 325 else:
326 326 assert not re_32.match(line), _(
327 327 b'must have at least some revisions'
328 328 )
329 329
330 330 elif state == 5:
331 331 # expecting revision number and possibly (ignored) lock indication
332 332 # we create the logentry here from values stored in states 0 to 4,
333 333 # as this state is re-entered for subsequent revisions of a file.
334 334 match = re_50.match(line)
335 335 assert match, _(b'expected revision number')
336 336 e = logentry(
337 337 rcs=scache(rcs),
338 338 file=scache(filename),
339 339 revision=tuple([int(x) for x in match.group(1).split(b'.')]),
340 340 branches=[],
341 341 parent=None,
342 342 commitid=None,
343 343 mergepoint=None,
344 344 branchpoints=set(),
345 345 )
346 346
347 347 state = 6
348 348
349 349 elif state == 6:
350 350 # expecting date, author, state, lines changed
351 351 match = re_60.match(line)
352 352 assert match, _(b'revision must be followed by date line')
353 353 d = match.group(1)
354 354 if d[2] == b'/':
355 355 # Y2K
356 356 d = b'19' + d
357 357
358 358 if len(d.split()) != 3:
359 359 # cvs log dates always in GMT
360 360 d = d + b' UTC'
361 361 e.date = dateutil.parsedate(
362 362 d,
363 363 [
364 364 b'%y/%m/%d %H:%M:%S',
365 365 b'%Y/%m/%d %H:%M:%S',
366 366 b'%Y-%m-%d %H:%M:%S',
367 367 ],
368 368 )
369 369 e.author = scache(match.group(2))
370 370 e.dead = match.group(3).lower() == b'dead'
371 371
372 372 if match.group(5):
373 373 if match.group(6):
374 374 e.lines = (int(match.group(5)), int(match.group(6)))
375 375 else:
376 376 e.lines = (int(match.group(5)), 0)
377 377 elif match.group(6):
378 378 e.lines = (0, int(match.group(6)))
379 379 else:
380 380 e.lines = None
381 381
382 382 if match.group(7): # cvs 1.12 commitid
383 383 e.commitid = match.group(8)
384 384
385 385 if match.group(9): # cvsnt mergepoint
386 386 myrev = match.group(10).split(b'.')
387 387 if len(myrev) == 2: # head
388 388 e.mergepoint = b'HEAD'
389 389 else:
390 390 myrev = b'.'.join(myrev[:-2] + [b'0', myrev[-2]])
391 391 branches = [b for b in branchmap if branchmap[b] == myrev]
392 392 assert len(branches) == 1, (
393 393 b'unknown branch: %s' % e.mergepoint
394 394 )
395 395 e.mergepoint = branches[0]
396 396
397 397 e.comment = []
398 398 state = 7
399 399
400 400 elif state == 7:
401 401 # read the revision numbers of branches that start at this revision
402 402 # or store the commit log message otherwise
403 403 m = re_70.match(line)
404 404 if m:
405 405 e.branches = [
406 406 tuple([int(y) for y in x.strip().split(b'.')])
407 407 for x in m.group(1).split(b';')
408 408 ]
409 409 state = 8
410 410 elif re_31.match(line) and re_50.match(peek):
411 411 state = 5
412 412 store = True
413 413 elif re_32.match(line):
414 414 state = 0
415 415 store = True
416 416 else:
417 417 e.comment.append(line)
418 418
419 419 elif state == 8:
420 420 # store commit log message
421 421 if re_31.match(line):
422 422 cpeek = peek
423 423 if cpeek.endswith(b'\n'):
424 424 cpeek = cpeek[:-1]
425 425 if re_50.match(cpeek):
426 426 state = 5
427 427 store = True
428 428 else:
429 429 e.comment.append(line)
430 430 elif re_32.match(line):
431 431 state = 0
432 432 store = True
433 433 else:
434 434 e.comment.append(line)
435 435
436 436 # When a file is added on a branch B1, CVS creates a synthetic
437 437 # dead trunk revision 1.1 so that the branch has a root.
438 438 # Likewise, if you merge such a file to a later branch B2 (one
439 439 # that already existed when the file was added on B1), CVS
440 440 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
441 441 # these revisions now, but mark them synthetic so
442 442 # createchangeset() can take care of them.
443 443 if (
444 444 store
445 445 and e.dead
446 446 and e.revision[-1] == 1
447 447 and len(e.comment) == 1 # 1.1 or 1.1.x.1
448 448 and file_added_re.match(e.comment[0])
449 449 ):
450 450 ui.debug(
451 451 b'found synthetic revision in %s: %r\n' % (e.rcs, e.comment[0])
452 452 )
453 453 e.synthetic = True
454 454
455 455 if store:
456 456 # clean up the results and save in the log.
457 457 store = False
458 458 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
459 459 e.comment = scache(b'\n'.join(e.comment))
460 460
461 461 revn = len(e.revision)
462 462 if revn > 3 and (revn % 2) == 0:
463 463 e.branch = tags.get(e.revision[:-1], [None])[0]
464 464 else:
465 465 e.branch = None
466 466
467 467 # find the branches starting from this revision
468 468 branchpoints = set()
469 469 for branch, revision in branchmap.items():
470 470 revparts = tuple([int(i) for i in revision.split(b'.')])
471 471 if len(revparts) < 2: # bad tags
472 472 continue
473 473 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
474 474 # normal branch
475 475 if revparts[:-2] == e.revision:
476 476 branchpoints.add(branch)
477 477 elif revparts == (1, 1, 1): # vendor branch
478 478 if revparts in e.branches:
479 479 branchpoints.add(branch)
480 480 e.branchpoints = branchpoints
481 481
482 482 log.append(e)
483 483
484 484 rcsmap[e.rcs.replace(b'/Attic/', b'/')] = e.rcs
485 485
486 486 if len(log) % 100 == 0:
487 487 ui.status(
488 488 stringutil.ellipsis(b'%d %s' % (len(log), e.file), 80)
489 489 + b'\n'
490 490 )
491 491
492 492 log.sort(key=lambda x: (x.rcs, x.revision))
493 493
494 494 # find parent revisions of individual files
495 495 versions = {}
496 496 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
497 497 rcs = e.rcs.replace(b'/Attic/', b'/')
498 498 if rcs in rcsmap:
499 499 e.rcs = rcsmap[rcs]
500 500 branch = e.revision[:-1]
501 501 versions[(e.rcs, branch)] = e.revision
502 502
503 503 for e in log:
504 504 branch = e.revision[:-1]
505 505 p = versions.get((e.rcs, branch), None)
506 506 if p is None:
507 507 p = e.revision[:-2]
508 508 e.parent = p
509 509 versions[(e.rcs, branch)] = e.revision
510 510
511 511 # update the log cache
512 512 if cache:
513 513 if log:
514 514 # join up the old and new logs
515 515 log.sort(key=lambda x: x.date)
516 516
517 517 if oldlog and oldlog[-1].date >= log[0].date:
518 518 raise logerror(
519 519 _(
520 520 b'log cache overlaps with new log entries,'
521 521 b' re-run without cache.'
522 522 )
523 523 )
524 524
525 525 log = oldlog + log
526 526
527 527 # write the new cachefile
528 528 ui.note(_(b'writing cvs log cache %s\n') % cachefile)
529 529 pickle.dump(log, open(cachefile, b'wb'))
530 530 else:
531 531 log = oldlog
532 532
533 533 ui.status(_(b'%d log entries\n') % len(log))
534 534
535 535 encodings = ui.configlist(b'convert', b'cvsps.logencoding')
536 536 if encodings:
537 537
538 538 def revstr(r):
539 539 # this is needed, because logentry.revision is a tuple of "int"
540 540 # (e.g. (1, 2) for "1.2")
541 541 return b'.'.join(pycompat.maplist(pycompat.bytestr, r))
542 542
543 543 for entry in log:
544 544 comment = entry.comment
545 545 for e in encodings:
546 546 try:
547 547 entry.comment = comment.decode(pycompat.sysstr(e)).encode(
548 548 'utf-8'
549 549 )
550 550 if ui.debugflag:
551 551 ui.debug(
552 552 b"transcoding by %s: %s of %s\n"
553 553 % (e, revstr(entry.revision), entry.file)
554 554 )
555 555 break
556 556 except UnicodeDecodeError:
557 557 pass # try next encoding
558 558 except LookupError as inst: # unknown encoding, maybe
559 559 raise error.Abort(
560 560 pycompat.bytestr(inst),
561 561 hint=_(
562 562 b'check convert.cvsps.logencoding configuration'
563 563 ),
564 564 )
565 565 else:
566 566 raise error.Abort(
567 567 _(
568 568 b"no encoding can transcode"
569 569 b" CVS log message for %s of %s"
570 570 )
571 571 % (revstr(entry.revision), entry.file),
572 572 hint=_(b'check convert.cvsps.logencoding configuration'),
573 573 )
574 574
575 575 hook.hook(ui, None, b"cvslog", True, log=log)
576 576
577 577 return log
578 578
579 579
580 580 class changeset:
581 581 """Class changeset has the following attributes:
582 582 .id - integer identifying this changeset (list index)
583 583 .author - author name as CVS knows it
584 584 .branch - name of branch this changeset is on, or None
585 585 .comment - commit message
586 586 .commitid - CVS commitid or None
587 587 .date - the commit date as a (time,tz) tuple
588 588 .entries - list of logentry objects in this changeset
589 589 .parents - list of one or two parent changesets
590 590 .tags - list of tags on this changeset
591 591 .synthetic - from synthetic revision "file ... added on branch ..."
592 592 .mergepoint- the branch that has been merged from or None
593 593 .branchpoints- the branches that start at the current entry or empty
594 594 """
595 595
596 596 def __init__(self, **entries):
597 597 self.id = None
598 598 self.synthetic = False
599 599 self.__dict__.update(entries)
600 600
601 601 def __repr__(self):
602 602 items = (
603 603 b"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)
604 604 )
605 605 return b"%s(%s)" % (type(self).__name__, b", ".join(items))
606 606
607 607
608 608 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
609 609 '''Convert log into changesets.'''
610 610
611 611 ui.status(_(b'creating changesets\n'))
612 612
613 613 # try to order commitids by date
614 614 mindate = {}
615 615 for e in log:
616 616 if e.commitid:
617 617 if e.commitid not in mindate:
618 618 mindate[e.commitid] = e.date
619 619 else:
620 620 mindate[e.commitid] = min(e.date, mindate[e.commitid])
621 621
622 622 # Merge changesets
623 623 log.sort(
624 624 key=lambda x: (
625 625 mindate.get(x.commitid, (-1, 0)),
626 626 x.commitid or b'',
627 627 x.comment,
628 628 x.author,
629 629 x.branch or b'',
630 630 x.date,
631 631 x.branchpoints,
632 632 )
633 633 )
634 634
635 635 changesets = []
636 636 files = set()
637 637 c = None
638 638 for i, e in enumerate(log):
639 639
640 640 # Check if log entry belongs to the current changeset or not.
641 641
642 642 # Since CVS is file-centric, two different file revisions with
643 643 # different branchpoints should be treated as belonging to two
644 644 # different changesets (and the ordering is important and not
645 645 # honoured by cvsps at this point).
646 646 #
647 647 # Consider the following case:
648 648 # foo 1.1 branchpoints: [MYBRANCH]
649 649 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
650 650 #
651 651 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
652 652 # later version of foo may be in MYBRANCH2, so foo should be the
653 653 # first changeset and bar the next and MYBRANCH and MYBRANCH2
654 654 # should both start off of the bar changeset. No provisions are
655 655 # made to ensure that this is, in fact, what happens.
656 656 if not (
657 657 c
658 658 and e.branchpoints == c.branchpoints
659 659 and ( # cvs commitids
660 660 (e.commitid is not None and e.commitid == c.commitid)
661 661 or ( # no commitids, use fuzzy commit detection
662 662 (e.commitid is None or c.commitid is None)
663 663 and e.comment == c.comment
664 664 and e.author == c.author
665 665 and e.branch == c.branch
666 666 and (
667 667 (c.date[0] + c.date[1])
668 668 <= (e.date[0] + e.date[1])
669 669 <= (c.date[0] + c.date[1]) + fuzz
670 670 )
671 671 and e.file not in files
672 672 )
673 673 )
674 674 ):
675 675 c = changeset(
676 676 comment=e.comment,
677 677 author=e.author,
678 678 branch=e.branch,
679 679 date=e.date,
680 680 entries=[],
681 681 mergepoint=e.mergepoint,
682 682 branchpoints=e.branchpoints,
683 683 commitid=e.commitid,
684 684 )
685 685 changesets.append(c)
686 686
687 687 files = set()
688 688 if len(changesets) % 100 == 0:
689 t = b'%d %s' % (len(changesets), repr(e.comment)[1:-1])
689 t = b'%d %s' % (
690 len(changesets),
691 pycompat.byterepr(e.comment)[2:-1],
692 )
690 693 ui.status(stringutil.ellipsis(t, 80) + b'\n')
691 694
692 695 c.entries.append(e)
693 696 files.add(e.file)
694 697 c.date = e.date # changeset date is date of latest commit in it
695 698
696 699 # Mark synthetic changesets
697 700
698 701 for c in changesets:
699 702 # Synthetic revisions always get their own changeset, because
700 703 # the log message includes the filename. E.g. if you add file3
701 704 # and file4 on a branch, you get four log entries and three
702 705 # changesets:
703 706 # "File file3 was added on branch ..." (synthetic, 1 entry)
704 707 # "File file4 was added on branch ..." (synthetic, 1 entry)
705 708 # "Add file3 and file4 to fix ..." (real, 2 entries)
706 709 # Hence the check for 1 entry here.
707 710 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
708 711
709 712 # Sort files in each changeset
710 713
711 714 def entitycompare(l, r):
712 715 """Mimic cvsps sorting order"""
713 716 l = l.file.split(b'/')
714 717 r = r.file.split(b'/')
715 718 nl = len(l)
716 719 nr = len(r)
717 720 n = min(nl, nr)
718 721 for i in range(n):
719 722 if i + 1 == nl and nl < nr:
720 723 return -1
721 724 elif i + 1 == nr and nl > nr:
722 725 return +1
723 726 elif l[i] < r[i]:
724 727 return -1
725 728 elif l[i] > r[i]:
726 729 return +1
727 730 return 0
728 731
729 732 for c in changesets:
730 733 c.entries.sort(key=functools.cmp_to_key(entitycompare))
731 734
732 735 # Sort changesets by date
733 736
734 737 odd = set()
735 738
736 739 def cscmp(l, r):
737 740 d = sum(l.date) - sum(r.date)
738 741 if d:
739 742 return d
740 743
741 744 # detect vendor branches and initial commits on a branch
742 745 le = {}
743 746 for e in l.entries:
744 747 le[e.rcs] = e.revision
745 748 re = {}
746 749 for e in r.entries:
747 750 re[e.rcs] = e.revision
748 751
749 752 d = 0
750 753 for e in l.entries:
751 754 if re.get(e.rcs, None) == e.parent:
752 755 assert not d
753 756 d = 1
754 757 break
755 758
756 759 for e in r.entries:
757 760 if le.get(e.rcs, None) == e.parent:
758 761 if d:
759 762 odd.add((l, r))
760 763 d = -1
761 764 break
762 765 # By this point, the changesets are sufficiently compared that
763 766 # we don't really care about ordering. However, this leaves
764 767 # some race conditions in the tests, so we compare on the
765 768 # number of files modified, the files contained in each
766 769 # changeset, and the branchpoints in the change to ensure test
767 770 # output remains stable.
768 771
769 772 # recommended replacement for cmp from
770 773 # https://docs.python.org/3.0/whatsnew/3.0.html
771 774 c = lambda x, y: (x > y) - (x < y)
772 775 # Sort bigger changes first.
773 776 if not d:
774 777 d = c(len(l.entries), len(r.entries))
775 778 # Try sorting by filename in the change.
776 779 if not d:
777 780 d = c([e.file for e in l.entries], [e.file for e in r.entries])
778 781 # Try and put changes without a branch point before ones with
779 782 # a branch point.
780 783 if not d:
781 784 d = c(len(l.branchpoints), len(r.branchpoints))
782 785 return d
783 786
784 787 changesets.sort(key=functools.cmp_to_key(cscmp))
785 788
786 789 # Collect tags
787 790
788 791 globaltags = {}
789 792 for c in changesets:
790 793 for e in c.entries:
791 794 for tag in e.tags:
792 795 # remember which is the latest changeset to have this tag
793 796 globaltags[tag] = c
794 797
795 798 for c in changesets:
796 799 tags = set()
797 800 for e in c.entries:
798 801 tags.update(e.tags)
799 802 # remember tags only if this is the latest changeset to have it
800 803 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
801 804
802 805 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
803 806 # by inserting dummy changesets with two parents, and handle
804 807 # {{mergefrombranch BRANCHNAME}} by setting two parents.
805 808
806 809 if mergeto is None:
807 810 mergeto = br'{{mergetobranch ([-\w]+)}}'
808 811 if mergeto:
809 812 mergeto = re.compile(mergeto)
810 813
811 814 if mergefrom is None:
812 815 mergefrom = br'{{mergefrombranch ([-\w]+)}}'
813 816 if mergefrom:
814 817 mergefrom = re.compile(mergefrom)
815 818
816 819 versions = {} # changeset index where we saw any particular file version
817 820 branches = {} # changeset index where we saw a branch
818 821 n = len(changesets)
819 822 i = 0
820 823 while i < n:
821 824 c = changesets[i]
822 825
823 826 for f in c.entries:
824 827 versions[(f.rcs, f.revision)] = i
825 828
826 829 p = None
827 830 if c.branch in branches:
828 831 p = branches[c.branch]
829 832 else:
830 833 # first changeset on a new branch
831 834 # the parent is a changeset with the branch in its
832 835 # branchpoints such that it is the latest possible
833 836 # commit without any intervening, unrelated commits.
834 837
835 838 for candidate in range(i):
836 839 if c.branch not in changesets[candidate].branchpoints:
837 840 if p is not None:
838 841 break
839 842 continue
840 843 p = candidate
841 844
842 845 c.parents = []
843 846 if p is not None:
844 847 p = changesets[p]
845 848
846 849 # Ensure no changeset has a synthetic changeset as a parent.
847 850 while p.synthetic:
848 851 assert len(p.parents) <= 1, _(
849 852 b'synthetic changeset cannot have multiple parents'
850 853 )
851 854 if p.parents:
852 855 p = p.parents[0]
853 856 else:
854 857 p = None
855 858 break
856 859
857 860 if p is not None:
858 861 c.parents.append(p)
859 862
860 863 if c.mergepoint:
861 864 if c.mergepoint == b'HEAD':
862 865 c.mergepoint = None
863 866 c.parents.append(changesets[branches[c.mergepoint]])
864 867
865 868 if mergefrom:
866 869 m = mergefrom.search(c.comment)
867 870 if m:
868 871 m = m.group(1)
869 872 if m == b'HEAD':
870 873 m = None
871 874 try:
872 875 candidate = changesets[branches[m]]
873 876 except KeyError:
874 877 ui.warn(
875 878 _(
876 879 b"warning: CVS commit message references "
877 880 b"non-existent branch %r:\n%s\n"
878 881 )
879 882 % (pycompat.bytestr(m), c.comment)
880 883 )
881 884 if m in branches and c.branch != m and not candidate.synthetic:
882 885 c.parents.append(candidate)
883 886
884 887 if mergeto:
885 888 m = mergeto.search(c.comment)
886 889 if m:
887 890 if m.groups():
888 891 m = m.group(1)
889 892 if m == b'HEAD':
890 893 m = None
891 894 else:
892 895 m = None # if no group found then merge to HEAD
893 896 if m in branches and c.branch != m:
894 897 # insert empty changeset for merge
895 898 cc = changeset(
896 899 author=c.author,
897 900 branch=m,
898 901 date=c.date,
899 902 comment=b'convert-repo: CVS merge from branch %s'
900 903 % c.branch,
901 904 entries=[],
902 905 tags=[],
903 906 parents=[changesets[branches[m]], c],
904 907 )
905 908 changesets.insert(i + 1, cc)
906 909 branches[m] = i + 1
907 910
908 911 # adjust our loop counters now we have inserted a new entry
909 912 n += 1
910 913 i += 2
911 914 continue
912 915
913 916 branches[c.branch] = i
914 917 i += 1
915 918
916 919 # Drop synthetic changesets (safe now that we have ensured no other
917 920 # changesets can have them as parents).
918 921 i = 0
919 922 while i < len(changesets):
920 923 if changesets[i].synthetic:
921 924 del changesets[i]
922 925 else:
923 926 i += 1
924 927
925 928 # Number changesets
926 929
927 930 for i, c in enumerate(changesets):
928 931 c.id = i + 1
929 932
930 933 if odd:
931 934 for l, r in odd:
932 935 if l.id is not None and r.id is not None:
933 936 ui.warn(
934 937 _(b'changeset %d is both before and after %d\n')
935 938 % (l.id, r.id)
936 939 )
937 940
938 941 ui.status(_(b'%d changeset entries\n') % len(changesets))
939 942
940 943 hook.hook(ui, None, b"cvschangesets", True, changesets=changesets)
941 944
942 945 return changesets
943 946
944 947
945 948 def debugcvsps(ui, *args, **opts):
946 949 """Read CVS rlog for current directory or named path in
947 950 repository, and convert the log to changesets based on matching
948 951 commit log entries and dates.
949 952 """
950 953 opts = pycompat.byteskwargs(opts)
951 954 if opts[b"new_cache"]:
952 955 cache = b"write"
953 956 elif opts[b"update_cache"]:
954 957 cache = b"update"
955 958 else:
956 959 cache = None
957 960
958 961 revisions = opts[b"revisions"]
959 962
960 963 try:
961 964 if args:
962 965 log = []
963 966 for d in args:
964 967 log += createlog(ui, d, root=opts[b"root"], cache=cache)
965 968 else:
966 969 log = createlog(ui, root=opts[b"root"], cache=cache)
967 970 except logerror as e:
968 971 ui.write(b"%r\n" % e)
969 972 return
970 973
971 974 changesets = createchangeset(ui, log, opts[b"fuzz"])
972 975 del log
973 976
974 977 # Print changesets (optionally filtered)
975 978
976 979 off = len(revisions)
977 980 branches = {} # latest version number in each branch
978 981 ancestors = {} # parent branch
979 982 for cs in changesets:
980 983
981 984 if opts[b"ancestors"]:
982 985 if cs.branch not in branches and cs.parents and cs.parents[0].id:
983 986 ancestors[cs.branch] = (
984 987 changesets[cs.parents[0].id - 1].branch,
985 988 cs.parents[0].id,
986 989 )
987 990 branches[cs.branch] = cs.id
988 991
989 992 # limit by branches
990 993 if (
991 994 opts[b"branches"]
992 995 and (cs.branch or b'HEAD') not in opts[b"branches"]
993 996 ):
994 997 continue
995 998
996 999 if not off:
997 1000 # Note: trailing spaces on several lines here are needed to have
998 1001 # bug-for-bug compatibility with cvsps.
999 1002 ui.write(b'---------------------\n')
1000 1003 ui.write((b'PatchSet %d \n' % cs.id))
1001 1004 ui.write(
1002 1005 (
1003 1006 b'Date: %s\n'
1004 1007 % dateutil.datestr(cs.date, b'%Y/%m/%d %H:%M:%S %1%2')
1005 1008 )
1006 1009 )
1007 1010 ui.write((b'Author: %s\n' % cs.author))
1008 1011 ui.write((b'Branch: %s\n' % (cs.branch or b'HEAD')))
1009 1012 ui.write(
1010 1013 (
1011 1014 b'Tag%s: %s \n'
1012 1015 % (
1013 1016 [b'', b's'][len(cs.tags) > 1],
1014 1017 b','.join(cs.tags) or b'(none)',
1015 1018 )
1016 1019 )
1017 1020 )
1018 1021 if cs.branchpoints:
1019 1022 ui.writenoi18n(
1020 1023 b'Branchpoints: %s \n' % b', '.join(sorted(cs.branchpoints))
1021 1024 )
1022 1025 if opts[b"parents"] and cs.parents:
1023 1026 if len(cs.parents) > 1:
1024 1027 ui.write(
1025 1028 (
1026 1029 b'Parents: %s\n'
1027 1030 % (b','.join([(b"%d" % p.id) for p in cs.parents]))
1028 1031 )
1029 1032 )
1030 1033 else:
1031 1034 ui.write((b'Parent: %d\n' % cs.parents[0].id))
1032 1035
1033 1036 if opts[b"ancestors"]:
1034 1037 b = cs.branch
1035 1038 r = []
1036 1039 while b:
1037 1040 b, c = ancestors[b]
1038 1041 r.append(b'%s:%d:%d' % (b or b"HEAD", c, branches[b]))
1039 1042 if r:
1040 1043 ui.write((b'Ancestors: %s\n' % (b','.join(r))))
1041 1044
1042 1045 ui.writenoi18n(b'Log:\n')
1043 1046 ui.write(b'%s\n\n' % cs.comment)
1044 1047 ui.writenoi18n(b'Members: \n')
1045 1048 for f in cs.entries:
1046 1049 fn = f.file
1047 1050 if fn.startswith(opts[b"prefix"]):
1048 1051 fn = fn[len(opts[b"prefix"]) :]
1049 1052 ui.write(
1050 1053 b'\t%s:%s->%s%s \n'
1051 1054 % (
1052 1055 fn,
1053 1056 b'.'.join([b"%d" % x for x in f.parent]) or b'INITIAL',
1054 1057 b'.'.join([(b"%d" % x) for x in f.revision]),
1055 1058 [b'', b'(DEAD)'][f.dead],
1056 1059 )
1057 1060 )
1058 1061 ui.write(b'\n')
1059 1062
1060 1063 # have we seen the start tag?
1061 1064 if revisions and off:
1062 1065 if revisions[0] == (b"%d" % cs.id) or revisions[0] in cs.tags:
1063 1066 off = False
1064 1067
1065 1068 # see if we reached the end tag
1066 1069 if len(revisions) > 1 and not off:
1067 1070 if revisions[1] == (b"%d" % cs.id) or revisions[1] in cs.tags:
1068 1071 break
General Comments 0
You need to be logged in to leave comments. Login now