##// END OF EJS Templates
cvsps: convert encoding name to sysstr...
Augie Fackler -
r37937:120c343c default
parent child Browse files
Show More
@@ -1,960 +1,961 b''
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial import (
15 15 encoding,
16 16 error,
17 17 hook,
18 18 pycompat,
19 19 util,
20 20 )
21 21 from mercurial.utils import (
22 22 dateutil,
23 23 procutil,
24 24 stringutil,
25 25 )
26 26
27 27 pickle = util.pickle
28 28
29 29 class logentry(object):
30 30 '''Class logentry has the following attributes:
31 31 .author - author name as CVS knows it
32 32 .branch - name of branch this revision is on
33 33 .branches - revision tuple of branches starting at this revision
34 34 .comment - commit message
35 35 .commitid - CVS commitid or None
36 36 .date - the commit date as a (time, tz) tuple
37 37 .dead - true if file revision is dead
38 38 .file - Name of file
39 39 .lines - a tuple (+lines, -lines) or None
40 40 .parent - Previous revision of this entry
41 41 .rcs - name of file as returned from CVS
42 42 .revision - revision number as tuple
43 43 .tags - list of tags on the file
44 44 .synthetic - is this a synthetic "file ... added on ..." revision?
45 45 .mergepoint - the branch that has been merged from (if present in
46 46 rlog output) or None
47 47 .branchpoints - the branches that start at the current entry or empty
48 48 '''
49 49 def __init__(self, **entries):
50 50 self.synthetic = False
51 51 self.__dict__.update(entries)
52 52
53 53 def __repr__(self):
54 54 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
55 55 return "%s(%s)"%(type(self).__name__, ", ".join(items))
56 56
57 57 class logerror(Exception):
58 58 pass
59 59
60 60 def getrepopath(cvspath):
61 61 """Return the repository path from a CVS path.
62 62
63 63 >>> getrepopath(b'/foo/bar')
64 64 '/foo/bar'
65 65 >>> getrepopath(b'c:/foo/bar')
66 66 '/foo/bar'
67 67 >>> getrepopath(b':pserver:10/foo/bar')
68 68 '/foo/bar'
69 69 >>> getrepopath(b':pserver:10c:/foo/bar')
70 70 '/foo/bar'
71 71 >>> getrepopath(b':pserver:/foo/bar')
72 72 '/foo/bar'
73 73 >>> getrepopath(b':pserver:c:/foo/bar')
74 74 '/foo/bar'
75 75 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
76 76 '/foo/bar'
77 77 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
78 78 '/foo/bar'
79 79 >>> getrepopath(b'user@server/path/to/repository')
80 80 '/path/to/repository'
81 81 """
82 82 # According to CVS manual, CVS paths are expressed like:
83 83 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
84 84 #
85 85 # CVSpath is splitted into parts and then position of the first occurrence
86 86 # of the '/' char after the '@' is located. The solution is the rest of the
87 87 # string after that '/' sign including it
88 88
89 89 parts = cvspath.split(':')
90 90 atposition = parts[-1].find('@')
91 91 start = 0
92 92
93 93 if atposition != -1:
94 94 start = atposition
95 95
96 96 repopath = parts[-1][parts[-1].find('/', start):]
97 97 return repopath
98 98
99 99 def createlog(ui, directory=None, root="", rlog=True, cache=None):
100 100 '''Collect the CVS rlog'''
101 101
102 102 # Because we store many duplicate commit log messages, reusing strings
103 103 # saves a lot of memory and pickle storage space.
104 104 _scache = {}
105 105 def scache(s):
106 106 "return a shared version of a string"
107 107 return _scache.setdefault(s, s)
108 108
109 109 ui.status(_('collecting CVS rlog\n'))
110 110
111 111 log = [] # list of logentry objects containing the CVS state
112 112
113 113 # patterns to match in CVS (r)log output, by state of use
114 114 re_00 = re.compile(b'RCS file: (.+)$')
115 115 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
116 116 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
117 117 re_03 = re.compile(b"(Cannot access.+CVSROOT)|"
118 118 b"(can't create temporary directory.+)$")
119 119 re_10 = re.compile(b'Working file: (.+)$')
120 120 re_20 = re.compile(b'symbolic names:')
121 121 re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
122 122 re_31 = re.compile(b'----------------------------$')
123 123 re_32 = re.compile(b'======================================='
124 124 b'======================================$')
125 125 re_50 = re.compile(b'revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
126 126 re_60 = re.compile(br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
127 127 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
128 128 br'(\s+commitid:\s+([^;]+);)?'
129 129 br'(.*mergepoint:\s+([^;]+);)?')
130 130 re_70 = re.compile(b'branches: (.+);$')
131 131
132 132 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
133 133
134 134 prefix = '' # leading path to strip of what we get from CVS
135 135
136 136 if directory is None:
137 137 # Current working directory
138 138
139 139 # Get the real directory in the repository
140 140 try:
141 141 prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
142 142 directory = prefix
143 143 if prefix == ".":
144 144 prefix = ""
145 145 except IOError:
146 146 raise logerror(_('not a CVS sandbox'))
147 147
148 148 if prefix and not prefix.endswith(pycompat.ossep):
149 149 prefix += pycompat.ossep
150 150
151 151 # Use the Root file in the sandbox, if it exists
152 152 try:
153 153 root = open(os.path.join('CVS','Root'), 'rb').read().strip()
154 154 except IOError:
155 155 pass
156 156
157 157 if not root:
158 158 root = encoding.environ.get('CVSROOT', '')
159 159
160 160 # read log cache if one exists
161 161 oldlog = []
162 162 date = None
163 163
164 164 if cache:
165 165 cachedir = os.path.expanduser('~/.hg.cvsps')
166 166 if not os.path.exists(cachedir):
167 167 os.mkdir(cachedir)
168 168
169 169 # The cvsps cache pickle needs a uniquified name, based on the
170 170 # repository location. The address may have all sort of nasties
171 171 # in it, slashes, colons and such. So here we take just the
172 172 # alphanumeric characters, concatenated in a way that does not
173 173 # mix up the various components, so that
174 174 # :pserver:user@server:/path
175 175 # and
176 176 # /pserver/user/server/path
177 177 # are mapped to different cache file names.
178 178 cachefile = root.split(":") + [directory, "cache"]
179 179 cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
180 180 cachefile = os.path.join(cachedir,
181 181 '.'.join([s for s in cachefile if s]))
182 182
183 183 if cache == 'update':
184 184 try:
185 185 ui.note(_('reading cvs log cache %s\n') % cachefile)
186 186 oldlog = pickle.load(open(cachefile, 'rb'))
187 187 for e in oldlog:
188 188 if not (util.safehasattr(e, 'branchpoints') and
189 189 util.safehasattr(e, 'commitid') and
190 190 util.safehasattr(e, 'mergepoint')):
191 191 ui.status(_('ignoring old cache\n'))
192 192 oldlog = []
193 193 break
194 194
195 195 ui.note(_('cache has %d log entries\n') % len(oldlog))
196 196 except Exception as e:
197 197 ui.note(_('error reading cache: %r\n') % e)
198 198
199 199 if oldlog:
200 200 date = oldlog[-1].date # last commit date as a (time,tz) tuple
201 201 date = dateutil.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
202 202
203 203 # build the CVS commandline
204 204 cmd = ['cvs', '-q']
205 205 if root:
206 206 cmd.append('-d%s' % root)
207 207 p = util.normpath(getrepopath(root))
208 208 if not p.endswith('/'):
209 209 p += '/'
210 210 if prefix:
211 211 # looks like normpath replaces "" by "."
212 212 prefix = p + util.normpath(prefix)
213 213 else:
214 214 prefix = p
215 215 cmd.append(['log', 'rlog'][rlog])
216 216 if date:
217 217 # no space between option and date string
218 218 cmd.append('-d>%s' % date)
219 219 cmd.append(directory)
220 220
221 221 # state machine begins here
222 222 tags = {} # dictionary of revisions on current file with their tags
223 223 branchmap = {} # mapping between branch names and revision numbers
224 224 rcsmap = {}
225 225 state = 0
226 226 store = False # set when a new record can be appended
227 227
228 228 cmd = [procutil.shellquote(arg) for arg in cmd]
229 229 ui.note(_("running %s\n") % (' '.join(cmd)))
230 230 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
231 231
232 232 pfp = procutil.popen(' '.join(cmd), 'rb')
233 233 peek = util.fromnativeeol(pfp.readline())
234 234 while True:
235 235 line = peek
236 236 if line == '':
237 237 break
238 238 peek = util.fromnativeeol(pfp.readline())
239 239 if line.endswith('\n'):
240 240 line = line[:-1]
241 241 #ui.debug('state=%d line=%r\n' % (state, line))
242 242
243 243 if state == 0:
244 244 # initial state, consume input until we see 'RCS file'
245 245 match = re_00.match(line)
246 246 if match:
247 247 rcs = match.group(1)
248 248 tags = {}
249 249 if rlog:
250 250 filename = util.normpath(rcs[:-2])
251 251 if filename.startswith(prefix):
252 252 filename = filename[len(prefix):]
253 253 if filename.startswith('/'):
254 254 filename = filename[1:]
255 255 if filename.startswith('Attic/'):
256 256 filename = filename[6:]
257 257 else:
258 258 filename = filename.replace('/Attic/', '/')
259 259 state = 2
260 260 continue
261 261 state = 1
262 262 continue
263 263 match = re_01.match(line)
264 264 if match:
265 265 raise logerror(match.group(1))
266 266 match = re_02.match(line)
267 267 if match:
268 268 raise logerror(match.group(2))
269 269 if re_03.match(line):
270 270 raise logerror(line)
271 271
272 272 elif state == 1:
273 273 # expect 'Working file' (only when using log instead of rlog)
274 274 match = re_10.match(line)
275 275 assert match, _('RCS file must be followed by working file')
276 276 filename = util.normpath(match.group(1))
277 277 state = 2
278 278
279 279 elif state == 2:
280 280 # expect 'symbolic names'
281 281 if re_20.match(line):
282 282 branchmap = {}
283 283 state = 3
284 284
285 285 elif state == 3:
286 286 # read the symbolic names and store as tags
287 287 match = re_30.match(line)
288 288 if match:
289 289 rev = [int(x) for x in match.group(2).split('.')]
290 290
291 291 # Convert magic branch number to an odd-numbered one
292 292 revn = len(rev)
293 293 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
294 294 rev = rev[:-2] + rev[-1:]
295 295 rev = tuple(rev)
296 296
297 297 if rev not in tags:
298 298 tags[rev] = []
299 299 tags[rev].append(match.group(1))
300 300 branchmap[match.group(1)] = match.group(2)
301 301
302 302 elif re_31.match(line):
303 303 state = 5
304 304 elif re_32.match(line):
305 305 state = 0
306 306
307 307 elif state == 4:
308 308 # expecting '------' separator before first revision
309 309 if re_31.match(line):
310 310 state = 5
311 311 else:
312 312 assert not re_32.match(line), _('must have at least '
313 313 'some revisions')
314 314
315 315 elif state == 5:
316 316 # expecting revision number and possibly (ignored) lock indication
317 317 # we create the logentry here from values stored in states 0 to 4,
318 318 # as this state is re-entered for subsequent revisions of a file.
319 319 match = re_50.match(line)
320 320 assert match, _('expected revision number')
321 321 e = logentry(rcs=scache(rcs),
322 322 file=scache(filename),
323 323 revision=tuple([int(x) for x in
324 324 match.group(1).split('.')]),
325 325 branches=[],
326 326 parent=None,
327 327 commitid=None,
328 328 mergepoint=None,
329 329 branchpoints=set())
330 330
331 331 state = 6
332 332
333 333 elif state == 6:
334 334 # expecting date, author, state, lines changed
335 335 match = re_60.match(line)
336 336 assert match, _('revision must be followed by date line')
337 337 d = match.group(1)
338 338 if d[2] == '/':
339 339 # Y2K
340 340 d = '19' + d
341 341
342 342 if len(d.split()) != 3:
343 343 # cvs log dates always in GMT
344 344 d = d + ' UTC'
345 345 e.date = dateutil.parsedate(d, ['%y/%m/%d %H:%M:%S',
346 346 '%Y/%m/%d %H:%M:%S',
347 347 '%Y-%m-%d %H:%M:%S'])
348 348 e.author = scache(match.group(2))
349 349 e.dead = match.group(3).lower() == 'dead'
350 350
351 351 if match.group(5):
352 352 if match.group(6):
353 353 e.lines = (int(match.group(5)), int(match.group(6)))
354 354 else:
355 355 e.lines = (int(match.group(5)), 0)
356 356 elif match.group(6):
357 357 e.lines = (0, int(match.group(6)))
358 358 else:
359 359 e.lines = None
360 360
361 361 if match.group(7): # cvs 1.12 commitid
362 362 e.commitid = match.group(8)
363 363
364 364 if match.group(9): # cvsnt mergepoint
365 365 myrev = match.group(10).split('.')
366 366 if len(myrev) == 2: # head
367 367 e.mergepoint = 'HEAD'
368 368 else:
369 369 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
370 370 branches = [b for b in branchmap if branchmap[b] == myrev]
371 371 assert len(branches) == 1, ('unknown branch: %s'
372 372 % e.mergepoint)
373 373 e.mergepoint = branches[0]
374 374
375 375 e.comment = []
376 376 state = 7
377 377
378 378 elif state == 7:
379 379 # read the revision numbers of branches that start at this revision
380 380 # or store the commit log message otherwise
381 381 m = re_70.match(line)
382 382 if m:
383 383 e.branches = [tuple([int(y) for y in x.strip().split('.')])
384 384 for x in m.group(1).split(';')]
385 385 state = 8
386 386 elif re_31.match(line) and re_50.match(peek):
387 387 state = 5
388 388 store = True
389 389 elif re_32.match(line):
390 390 state = 0
391 391 store = True
392 392 else:
393 393 e.comment.append(line)
394 394
395 395 elif state == 8:
396 396 # store commit log message
397 397 if re_31.match(line):
398 398 cpeek = peek
399 399 if cpeek.endswith('\n'):
400 400 cpeek = cpeek[:-1]
401 401 if re_50.match(cpeek):
402 402 state = 5
403 403 store = True
404 404 else:
405 405 e.comment.append(line)
406 406 elif re_32.match(line):
407 407 state = 0
408 408 store = True
409 409 else:
410 410 e.comment.append(line)
411 411
412 412 # When a file is added on a branch B1, CVS creates a synthetic
413 413 # dead trunk revision 1.1 so that the branch has a root.
414 414 # Likewise, if you merge such a file to a later branch B2 (one
415 415 # that already existed when the file was added on B1), CVS
416 416 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
417 417 # these revisions now, but mark them synthetic so
418 418 # createchangeset() can take care of them.
419 419 if (store and
420 420 e.dead and
421 421 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
422 422 len(e.comment) == 1 and
423 423 file_added_re.match(e.comment[0])):
424 424 ui.debug('found synthetic revision in %s: %r\n'
425 425 % (e.rcs, e.comment[0]))
426 426 e.synthetic = True
427 427
428 428 if store:
429 429 # clean up the results and save in the log.
430 430 store = False
431 431 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
432 432 e.comment = scache('\n'.join(e.comment))
433 433
434 434 revn = len(e.revision)
435 435 if revn > 3 and (revn % 2) == 0:
436 436 e.branch = tags.get(e.revision[:-1], [None])[0]
437 437 else:
438 438 e.branch = None
439 439
440 440 # find the branches starting from this revision
441 441 branchpoints = set()
442 442 for branch, revision in branchmap.iteritems():
443 443 revparts = tuple([int(i) for i in revision.split('.')])
444 444 if len(revparts) < 2: # bad tags
445 445 continue
446 446 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
447 447 # normal branch
448 448 if revparts[:-2] == e.revision:
449 449 branchpoints.add(branch)
450 450 elif revparts == (1, 1, 1): # vendor branch
451 451 if revparts in e.branches:
452 452 branchpoints.add(branch)
453 453 e.branchpoints = branchpoints
454 454
455 455 log.append(e)
456 456
457 457 rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
458 458
459 459 if len(log) % 100 == 0:
460 460 ui.status(stringutil.ellipsis('%d %s' % (len(log), e.file), 80)
461 461 + '\n')
462 462
463 463 log.sort(key=lambda x: (x.rcs, x.revision))
464 464
465 465 # find parent revisions of individual files
466 466 versions = {}
467 467 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
468 468 rcs = e.rcs.replace('/Attic/', '/')
469 469 if rcs in rcsmap:
470 470 e.rcs = rcsmap[rcs]
471 471 branch = e.revision[:-1]
472 472 versions[(e.rcs, branch)] = e.revision
473 473
474 474 for e in log:
475 475 branch = e.revision[:-1]
476 476 p = versions.get((e.rcs, branch), None)
477 477 if p is None:
478 478 p = e.revision[:-2]
479 479 e.parent = p
480 480 versions[(e.rcs, branch)] = e.revision
481 481
482 482 # update the log cache
483 483 if cache:
484 484 if log:
485 485 # join up the old and new logs
486 486 log.sort(key=lambda x: x.date)
487 487
488 488 if oldlog and oldlog[-1].date >= log[0].date:
489 489 raise logerror(_('log cache overlaps with new log entries,'
490 490 ' re-run without cache.'))
491 491
492 492 log = oldlog + log
493 493
494 494 # write the new cachefile
495 495 ui.note(_('writing cvs log cache %s\n') % cachefile)
496 496 pickle.dump(log, open(cachefile, 'wb'))
497 497 else:
498 498 log = oldlog
499 499
500 500 ui.status(_('%d log entries\n') % len(log))
501 501
502 502 encodings = ui.configlist('convert', 'cvsps.logencoding')
503 503 if encodings:
504 504 def revstr(r):
505 505 # this is needed, because logentry.revision is a tuple of "int"
506 506 # (e.g. (1, 2) for "1.2")
507 507 return '.'.join(pycompat.maplist(pycompat.bytestr, r))
508 508
509 509 for entry in log:
510 510 comment = entry.comment
511 511 for e in encodings:
512 512 try:
513 entry.comment = comment.decode(e).encode('utf-8')
513 entry.comment = comment.decode(
514 pycompat.sysstr(e)).encode('utf-8')
514 515 if ui.debugflag:
515 516 ui.debug("transcoding by %s: %s of %s\n" %
516 517 (e, revstr(entry.revision), entry.file))
517 518 break
518 519 except UnicodeDecodeError:
519 520 pass # try next encoding
520 521 except LookupError as inst: # unknown encoding, maybe
521 522 raise error.Abort(inst,
522 523 hint=_('check convert.cvsps.logencoding'
523 524 ' configuration'))
524 525 else:
525 526 raise error.Abort(_("no encoding can transcode"
526 527 " CVS log message for %s of %s")
527 528 % (revstr(entry.revision), entry.file),
528 529 hint=_('check convert.cvsps.logencoding'
529 530 ' configuration'))
530 531
531 532 hook.hook(ui, None, "cvslog", True, log=log)
532 533
533 534 return log
534 535
535 536
536 537 class changeset(object):
537 538 '''Class changeset has the following attributes:
538 539 .id - integer identifying this changeset (list index)
539 540 .author - author name as CVS knows it
540 541 .branch - name of branch this changeset is on, or None
541 542 .comment - commit message
542 543 .commitid - CVS commitid or None
543 544 .date - the commit date as a (time,tz) tuple
544 545 .entries - list of logentry objects in this changeset
545 546 .parents - list of one or two parent changesets
546 547 .tags - list of tags on this changeset
547 548 .synthetic - from synthetic revision "file ... added on branch ..."
548 549 .mergepoint- the branch that has been merged from or None
549 550 .branchpoints- the branches that start at the current entry or empty
550 551 '''
551 552 def __init__(self, **entries):
552 553 self.id = None
553 554 self.synthetic = False
554 555 self.__dict__.update(entries)
555 556
556 557 def __repr__(self):
557 558 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
558 559 return "%s(%s)"%(type(self).__name__, ", ".join(items))
559 560
560 561 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
561 562 '''Convert log into changesets.'''
562 563
563 564 ui.status(_('creating changesets\n'))
564 565
565 566 # try to order commitids by date
566 567 mindate = {}
567 568 for e in log:
568 569 if e.commitid:
569 570 mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
570 571
571 572 # Merge changesets
572 573 log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
573 574 x.author, x.branch, x.date, x.branchpoints))
574 575
575 576 changesets = []
576 577 files = set()
577 578 c = None
578 579 for i, e in enumerate(log):
579 580
580 581 # Check if log entry belongs to the current changeset or not.
581 582
582 583 # Since CVS is file-centric, two different file revisions with
583 584 # different branchpoints should be treated as belonging to two
584 585 # different changesets (and the ordering is important and not
585 586 # honoured by cvsps at this point).
586 587 #
587 588 # Consider the following case:
588 589 # foo 1.1 branchpoints: [MYBRANCH]
589 590 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
590 591 #
591 592 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
592 593 # later version of foo may be in MYBRANCH2, so foo should be the
593 594 # first changeset and bar the next and MYBRANCH and MYBRANCH2
594 595 # should both start off of the bar changeset. No provisions are
595 596 # made to ensure that this is, in fact, what happens.
596 597 if not (c and e.branchpoints == c.branchpoints and
597 598 (# cvs commitids
598 599 (e.commitid is not None and e.commitid == c.commitid) or
599 600 (# no commitids, use fuzzy commit detection
600 601 (e.commitid is None or c.commitid is None) and
601 602 e.comment == c.comment and
602 603 e.author == c.author and
603 604 e.branch == c.branch and
604 605 ((c.date[0] + c.date[1]) <=
605 606 (e.date[0] + e.date[1]) <=
606 607 (c.date[0] + c.date[1]) + fuzz) and
607 608 e.file not in files))):
608 609 c = changeset(comment=e.comment, author=e.author,
609 610 branch=e.branch, date=e.date,
610 611 entries=[], mergepoint=e.mergepoint,
611 612 branchpoints=e.branchpoints, commitid=e.commitid)
612 613 changesets.append(c)
613 614
614 615 files = set()
615 616 if len(changesets) % 100 == 0:
616 617 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
617 618 ui.status(stringutil.ellipsis(t, 80) + '\n')
618 619
619 620 c.entries.append(e)
620 621 files.add(e.file)
621 622 c.date = e.date # changeset date is date of latest commit in it
622 623
623 624 # Mark synthetic changesets
624 625
625 626 for c in changesets:
626 627 # Synthetic revisions always get their own changeset, because
627 628 # the log message includes the filename. E.g. if you add file3
628 629 # and file4 on a branch, you get four log entries and three
629 630 # changesets:
630 631 # "File file3 was added on branch ..." (synthetic, 1 entry)
631 632 # "File file4 was added on branch ..." (synthetic, 1 entry)
632 633 # "Add file3 and file4 to fix ..." (real, 2 entries)
633 634 # Hence the check for 1 entry here.
634 635 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
635 636
636 637 # Sort files in each changeset
637 638
638 639 def entitycompare(l, r):
639 640 'Mimic cvsps sorting order'
640 641 l = l.file.split('/')
641 642 r = r.file.split('/')
642 643 nl = len(l)
643 644 nr = len(r)
644 645 n = min(nl, nr)
645 646 for i in range(n):
646 647 if i + 1 == nl and nl < nr:
647 648 return -1
648 649 elif i + 1 == nr and nl > nr:
649 650 return +1
650 651 elif l[i] < r[i]:
651 652 return -1
652 653 elif l[i] > r[i]:
653 654 return +1
654 655 return 0
655 656
656 657 for c in changesets:
657 658 c.entries.sort(key=functools.cmp_to_key(entitycompare))
658 659
659 660 # Sort changesets by date
660 661
661 662 odd = set()
662 663 def cscmp(l, r):
663 664 d = sum(l.date) - sum(r.date)
664 665 if d:
665 666 return d
666 667
667 668 # detect vendor branches and initial commits on a branch
668 669 le = {}
669 670 for e in l.entries:
670 671 le[e.rcs] = e.revision
671 672 re = {}
672 673 for e in r.entries:
673 674 re[e.rcs] = e.revision
674 675
675 676 d = 0
676 677 for e in l.entries:
677 678 if re.get(e.rcs, None) == e.parent:
678 679 assert not d
679 680 d = 1
680 681 break
681 682
682 683 for e in r.entries:
683 684 if le.get(e.rcs, None) == e.parent:
684 685 if d:
685 686 odd.add((l, r))
686 687 d = -1
687 688 break
688 689 # By this point, the changesets are sufficiently compared that
689 690 # we don't really care about ordering. However, this leaves
690 691 # some race conditions in the tests, so we compare on the
691 692 # number of files modified, the files contained in each
692 693 # changeset, and the branchpoints in the change to ensure test
693 694 # output remains stable.
694 695
695 696 # recommended replacement for cmp from
696 697 # https://docs.python.org/3.0/whatsnew/3.0.html
697 698 c = lambda x, y: (x > y) - (x < y)
698 699 # Sort bigger changes first.
699 700 if not d:
700 701 d = c(len(l.entries), len(r.entries))
701 702 # Try sorting by filename in the change.
702 703 if not d:
703 704 d = c([e.file for e in l.entries], [e.file for e in r.entries])
704 705 # Try and put changes without a branch point before ones with
705 706 # a branch point.
706 707 if not d:
707 708 d = c(len(l.branchpoints), len(r.branchpoints))
708 709 return d
709 710
710 711 changesets.sort(key=functools.cmp_to_key(cscmp))
711 712
712 713 # Collect tags
713 714
714 715 globaltags = {}
715 716 for c in changesets:
716 717 for e in c.entries:
717 718 for tag in e.tags:
718 719 # remember which is the latest changeset to have this tag
719 720 globaltags[tag] = c
720 721
721 722 for c in changesets:
722 723 tags = set()
723 724 for e in c.entries:
724 725 tags.update(e.tags)
725 726 # remember tags only if this is the latest changeset to have it
726 727 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
727 728
728 729 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
729 730 # by inserting dummy changesets with two parents, and handle
730 731 # {{mergefrombranch BRANCHNAME}} by setting two parents.
731 732
732 733 if mergeto is None:
733 734 mergeto = br'{{mergetobranch ([-\w]+)}}'
734 735 if mergeto:
735 736 mergeto = re.compile(mergeto)
736 737
737 738 if mergefrom is None:
738 739 mergefrom = br'{{mergefrombranch ([-\w]+)}}'
739 740 if mergefrom:
740 741 mergefrom = re.compile(mergefrom)
741 742
742 743 versions = {} # changeset index where we saw any particular file version
743 744 branches = {} # changeset index where we saw a branch
744 745 n = len(changesets)
745 746 i = 0
746 747 while i < n:
747 748 c = changesets[i]
748 749
749 750 for f in c.entries:
750 751 versions[(f.rcs, f.revision)] = i
751 752
752 753 p = None
753 754 if c.branch in branches:
754 755 p = branches[c.branch]
755 756 else:
756 757 # first changeset on a new branch
757 758 # the parent is a changeset with the branch in its
758 759 # branchpoints such that it is the latest possible
759 760 # commit without any intervening, unrelated commits.
760 761
761 762 for candidate in xrange(i):
762 763 if c.branch not in changesets[candidate].branchpoints:
763 764 if p is not None:
764 765 break
765 766 continue
766 767 p = candidate
767 768
768 769 c.parents = []
769 770 if p is not None:
770 771 p = changesets[p]
771 772
772 773 # Ensure no changeset has a synthetic changeset as a parent.
773 774 while p.synthetic:
774 775 assert len(p.parents) <= 1, \
775 776 _('synthetic changeset cannot have multiple parents')
776 777 if p.parents:
777 778 p = p.parents[0]
778 779 else:
779 780 p = None
780 781 break
781 782
782 783 if p is not None:
783 784 c.parents.append(p)
784 785
785 786 if c.mergepoint:
786 787 if c.mergepoint == 'HEAD':
787 788 c.mergepoint = None
788 789 c.parents.append(changesets[branches[c.mergepoint]])
789 790
790 791 if mergefrom:
791 792 m = mergefrom.search(c.comment)
792 793 if m:
793 794 m = m.group(1)
794 795 if m == 'HEAD':
795 796 m = None
796 797 try:
797 798 candidate = changesets[branches[m]]
798 799 except KeyError:
799 800 ui.warn(_("warning: CVS commit message references "
800 801 "non-existent branch %r:\n%s\n")
801 802 % (pycompat.bytestr(m), c.comment))
802 803 if m in branches and c.branch != m and not candidate.synthetic:
803 804 c.parents.append(candidate)
804 805
805 806 if mergeto:
806 807 m = mergeto.search(c.comment)
807 808 if m:
808 809 if m.groups():
809 810 m = m.group(1)
810 811 if m == 'HEAD':
811 812 m = None
812 813 else:
813 814 m = None # if no group found then merge to HEAD
814 815 if m in branches and c.branch != m:
815 816 # insert empty changeset for merge
816 817 cc = changeset(
817 818 author=c.author, branch=m, date=c.date,
818 819 comment='convert-repo: CVS merge from branch %s'
819 820 % c.branch,
820 821 entries=[], tags=[],
821 822 parents=[changesets[branches[m]], c])
822 823 changesets.insert(i + 1, cc)
823 824 branches[m] = i + 1
824 825
825 826 # adjust our loop counters now we have inserted a new entry
826 827 n += 1
827 828 i += 2
828 829 continue
829 830
830 831 branches[c.branch] = i
831 832 i += 1
832 833
833 834 # Drop synthetic changesets (safe now that we have ensured no other
834 835 # changesets can have them as parents).
835 836 i = 0
836 837 while i < len(changesets):
837 838 if changesets[i].synthetic:
838 839 del changesets[i]
839 840 else:
840 841 i += 1
841 842
842 843 # Number changesets
843 844
844 845 for i, c in enumerate(changesets):
845 846 c.id = i + 1
846 847
847 848 if odd:
848 849 for l, r in odd:
849 850 if l.id is not None and r.id is not None:
850 851 ui.warn(_('changeset %d is both before and after %d\n')
851 852 % (l.id, r.id))
852 853
853 854 ui.status(_('%d changeset entries\n') % len(changesets))
854 855
855 856 hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
856 857
857 858 return changesets
858 859
859 860
860 861 def debugcvsps(ui, *args, **opts):
861 862 '''Read CVS rlog for current directory or named path in
862 863 repository, and convert the log to changesets based on matching
863 864 commit log entries and dates.
864 865 '''
865 866 opts = pycompat.byteskwargs(opts)
866 867 if opts["new_cache"]:
867 868 cache = "write"
868 869 elif opts["update_cache"]:
869 870 cache = "update"
870 871 else:
871 872 cache = None
872 873
873 874 revisions = opts["revisions"]
874 875
875 876 try:
876 877 if args:
877 878 log = []
878 879 for d in args:
879 880 log += createlog(ui, d, root=opts["root"], cache=cache)
880 881 else:
881 882 log = createlog(ui, root=opts["root"], cache=cache)
882 883 except logerror as e:
883 884 ui.write("%r\n"%e)
884 885 return
885 886
886 887 changesets = createchangeset(ui, log, opts["fuzz"])
887 888 del log
888 889
889 890 # Print changesets (optionally filtered)
890 891
891 892 off = len(revisions)
892 893 branches = {} # latest version number in each branch
893 894 ancestors = {} # parent branch
894 895 for cs in changesets:
895 896
896 897 if opts["ancestors"]:
897 898 if cs.branch not in branches and cs.parents and cs.parents[0].id:
898 899 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
899 900 cs.parents[0].id)
900 901 branches[cs.branch] = cs.id
901 902
902 903 # limit by branches
903 904 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
904 905 continue
905 906
906 907 if not off:
907 908 # Note: trailing spaces on several lines here are needed to have
908 909 # bug-for-bug compatibility with cvsps.
909 910 ui.write('---------------------\n')
910 911 ui.write(('PatchSet %d \n' % cs.id))
911 912 ui.write(('Date: %s\n' % dateutil.datestr(cs.date,
912 913 '%Y/%m/%d %H:%M:%S %1%2')))
913 914 ui.write(('Author: %s\n' % cs.author))
914 915 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
915 916 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
916 917 ','.join(cs.tags) or '(none)')))
917 918 if cs.branchpoints:
918 919 ui.write(('Branchpoints: %s \n') %
919 920 ', '.join(sorted(cs.branchpoints)))
920 921 if opts["parents"] and cs.parents:
921 922 if len(cs.parents) > 1:
922 923 ui.write(('Parents: %s\n' %
923 924 (','.join([(b"%d" % p.id) for p in cs.parents]))))
924 925 else:
925 926 ui.write(('Parent: %d\n' % cs.parents[0].id))
926 927
927 928 if opts["ancestors"]:
928 929 b = cs.branch
929 930 r = []
930 931 while b:
931 932 b, c = ancestors[b]
932 933 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
933 934 if r:
934 935 ui.write(('Ancestors: %s\n' % (','.join(r))))
935 936
936 937 ui.write(('Log:\n'))
937 938 ui.write('%s\n\n' % cs.comment)
938 939 ui.write(('Members: \n'))
939 940 for f in cs.entries:
940 941 fn = f.file
941 942 if fn.startswith(opts["prefix"]):
942 943 fn = fn[len(opts["prefix"]):]
943 944 ui.write('\t%s:%s->%s%s \n' % (
944 945 fn,
945 946 '.'.join([b"%d" % x for x in f.parent]) or 'INITIAL',
946 947 '.'.join([(b"%d" % x) for x in f.revision]),
947 948 ['', '(DEAD)'][f.dead]))
948 949 ui.write('\n')
949 950
950 951 # have we seen the start tag?
951 952 if revisions and off:
952 953 if revisions[0] == (b"%d" % cs.id) or \
953 954 revisions[0] in cs.tags:
954 955 off = False
955 956
956 957 # see if we reached the end tag
957 958 if len(revisions) > 1 and not off:
958 959 if revisions[1] == (b"%d" % cs.id) or \
959 960 revisions[1] in cs.tags:
960 961 break
General Comments 0
You need to be logged in to leave comments. Login now