##// END OF EJS Templates
cvsps: add b prefixes to regular expressions...
Augie Fackler -
r37901:72284d29 default
parent child Browse files
Show More
@@ -1,958 +1,958 b''
1 1 # Mercurial built-in replacement for cvsps.
2 2 #
3 3 # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import os
10 10 import re
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial import (
14 14 encoding,
15 15 error,
16 16 hook,
17 17 pycompat,
18 18 util,
19 19 )
20 20 from mercurial.utils import (
21 21 dateutil,
22 22 procutil,
23 23 stringutil,
24 24 )
25 25
26 26 pickle = util.pickle
27 27
28 28 class logentry(object):
29 29 '''Class logentry has the following attributes:
30 30 .author - author name as CVS knows it
31 31 .branch - name of branch this revision is on
32 32 .branches - revision tuple of branches starting at this revision
33 33 .comment - commit message
34 34 .commitid - CVS commitid or None
35 35 .date - the commit date as a (time, tz) tuple
36 36 .dead - true if file revision is dead
37 37 .file - Name of file
38 38 .lines - a tuple (+lines, -lines) or None
39 39 .parent - Previous revision of this entry
40 40 .rcs - name of file as returned from CVS
41 41 .revision - revision number as tuple
42 42 .tags - list of tags on the file
43 43 .synthetic - is this a synthetic "file ... added on ..." revision?
44 44 .mergepoint - the branch that has been merged from (if present in
45 45 rlog output) or None
46 46 .branchpoints - the branches that start at the current entry or empty
47 47 '''
48 48 def __init__(self, **entries):
49 49 self.synthetic = False
50 50 self.__dict__.update(entries)
51 51
52 52 def __repr__(self):
53 53 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
54 54 return "%s(%s)"%(type(self).__name__, ", ".join(items))
55 55
56 56 class logerror(Exception):
57 57 pass
58 58
59 59 def getrepopath(cvspath):
60 60 """Return the repository path from a CVS path.
61 61
62 62 >>> getrepopath(b'/foo/bar')
63 63 '/foo/bar'
64 64 >>> getrepopath(b'c:/foo/bar')
65 65 '/foo/bar'
66 66 >>> getrepopath(b':pserver:10/foo/bar')
67 67 '/foo/bar'
68 68 >>> getrepopath(b':pserver:10c:/foo/bar')
69 69 '/foo/bar'
70 70 >>> getrepopath(b':pserver:/foo/bar')
71 71 '/foo/bar'
72 72 >>> getrepopath(b':pserver:c:/foo/bar')
73 73 '/foo/bar'
74 74 >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
75 75 '/foo/bar'
76 76 >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
77 77 '/foo/bar'
78 78 >>> getrepopath(b'user@server/path/to/repository')
79 79 '/path/to/repository'
80 80 """
81 81 # According to CVS manual, CVS paths are expressed like:
82 82 # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
83 83 #
84 84 # CVSpath is splitted into parts and then position of the first occurrence
85 85 # of the '/' char after the '@' is located. The solution is the rest of the
86 86 # string after that '/' sign including it
87 87
88 88 parts = cvspath.split(':')
89 89 atposition = parts[-1].find('@')
90 90 start = 0
91 91
92 92 if atposition != -1:
93 93 start = atposition
94 94
95 95 repopath = parts[-1][parts[-1].find('/', start):]
96 96 return repopath
97 97
98 98 def createlog(ui, directory=None, root="", rlog=True, cache=None):
99 99 '''Collect the CVS rlog'''
100 100
101 101 # Because we store many duplicate commit log messages, reusing strings
102 102 # saves a lot of memory and pickle storage space.
103 103 _scache = {}
104 104 def scache(s):
105 105 "return a shared version of a string"
106 106 return _scache.setdefault(s, s)
107 107
108 108 ui.status(_('collecting CVS rlog\n'))
109 109
110 110 log = [] # list of logentry objects containing the CVS state
111 111
112 112 # patterns to match in CVS (r)log output, by state of use
113 re_00 = re.compile('RCS file: (.+)$')
114 re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
115 re_02 = re.compile('cvs (r?log|server): (.+)\n$')
116 re_03 = re.compile("(Cannot access.+CVSROOT)|"
117 "(can't create temporary directory.+)$")
118 re_10 = re.compile('Working file: (.+)$')
119 re_20 = re.compile('symbolic names:')
120 re_30 = re.compile('\t(.+): ([\\d.]+)$')
121 re_31 = re.compile('----------------------------$')
122 re_32 = re.compile('======================================='
123 '======================================$')
124 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
125 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
126 r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
127 r'(\s+commitid:\s+([^;]+);)?'
128 r'(.*mergepoint:\s+([^;]+);)?')
129 re_70 = re.compile('branches: (.+);$')
113 re_00 = re.compile(b'RCS file: (.+)$')
114 re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$')
115 re_02 = re.compile(b'cvs (r?log|server): (.+)\n$')
116 re_03 = re.compile(b"(Cannot access.+CVSROOT)|"
117 b"(can't create temporary directory.+)$")
118 re_10 = re.compile(b'Working file: (.+)$')
119 re_20 = re.compile(b'symbolic names:')
120 re_30 = re.compile(b'\t(.+): ([\\d.]+)$')
121 re_31 = re.compile(b'----------------------------$')
122 re_32 = re.compile(b'======================================='
123 b'======================================$')
124 re_50 = re.compile(b'revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
125 re_60 = re.compile(br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
126 br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
127 br'(\s+commitid:\s+([^;]+);)?'
128 br'(.*mergepoint:\s+([^;]+);)?')
129 re_70 = re.compile(b'branches: (.+);$')
130 130
131 file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
131 file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch')
132 132
133 133 prefix = '' # leading path to strip of what we get from CVS
134 134
135 135 if directory is None:
136 136 # Current working directory
137 137
138 138 # Get the real directory in the repository
139 139 try:
140 140 prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
141 141 directory = prefix
142 142 if prefix == ".":
143 143 prefix = ""
144 144 except IOError:
145 145 raise logerror(_('not a CVS sandbox'))
146 146
147 147 if prefix and not prefix.endswith(pycompat.ossep):
148 148 prefix += pycompat.ossep
149 149
150 150 # Use the Root file in the sandbox, if it exists
151 151 try:
152 152 root = open(os.path.join('CVS','Root'), 'rb').read().strip()
153 153 except IOError:
154 154 pass
155 155
156 156 if not root:
157 157 root = encoding.environ.get('CVSROOT', '')
158 158
159 159 # read log cache if one exists
160 160 oldlog = []
161 161 date = None
162 162
163 163 if cache:
164 164 cachedir = os.path.expanduser('~/.hg.cvsps')
165 165 if not os.path.exists(cachedir):
166 166 os.mkdir(cachedir)
167 167
168 168 # The cvsps cache pickle needs a uniquified name, based on the
169 169 # repository location. The address may have all sort of nasties
170 170 # in it, slashes, colons and such. So here we take just the
171 171 # alphanumeric characters, concatenated in a way that does not
172 172 # mix up the various components, so that
173 173 # :pserver:user@server:/path
174 174 # and
175 175 # /pserver/user/server/path
176 176 # are mapped to different cache file names.
177 177 cachefile = root.split(":") + [directory, "cache"]
178 178 cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
179 179 cachefile = os.path.join(cachedir,
180 180 '.'.join([s for s in cachefile if s]))
181 181
182 182 if cache == 'update':
183 183 try:
184 184 ui.note(_('reading cvs log cache %s\n') % cachefile)
185 185 oldlog = pickle.load(open(cachefile, 'rb'))
186 186 for e in oldlog:
187 187 if not (util.safehasattr(e, 'branchpoints') and
188 188 util.safehasattr(e, 'commitid') and
189 189 util.safehasattr(e, 'mergepoint')):
190 190 ui.status(_('ignoring old cache\n'))
191 191 oldlog = []
192 192 break
193 193
194 194 ui.note(_('cache has %d log entries\n') % len(oldlog))
195 195 except Exception as e:
196 196 ui.note(_('error reading cache: %r\n') % e)
197 197
198 198 if oldlog:
199 199 date = oldlog[-1].date # last commit date as a (time,tz) tuple
200 200 date = dateutil.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
201 201
202 202 # build the CVS commandline
203 203 cmd = ['cvs', '-q']
204 204 if root:
205 205 cmd.append('-d%s' % root)
206 206 p = util.normpath(getrepopath(root))
207 207 if not p.endswith('/'):
208 208 p += '/'
209 209 if prefix:
210 210 # looks like normpath replaces "" by "."
211 211 prefix = p + util.normpath(prefix)
212 212 else:
213 213 prefix = p
214 214 cmd.append(['log', 'rlog'][rlog])
215 215 if date:
216 216 # no space between option and date string
217 217 cmd.append('-d>%s' % date)
218 218 cmd.append(directory)
219 219
220 220 # state machine begins here
221 221 tags = {} # dictionary of revisions on current file with their tags
222 222 branchmap = {} # mapping between branch names and revision numbers
223 223 rcsmap = {}
224 224 state = 0
225 225 store = False # set when a new record can be appended
226 226
227 227 cmd = [procutil.shellquote(arg) for arg in cmd]
228 228 ui.note(_("running %s\n") % (' '.join(cmd)))
229 229 ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
230 230
231 231 pfp = procutil.popen(' '.join(cmd), 'rb')
232 232 peek = util.fromnativeeol(pfp.readline())
233 233 while True:
234 234 line = peek
235 235 if line == '':
236 236 break
237 237 peek = util.fromnativeeol(pfp.readline())
238 238 if line.endswith('\n'):
239 239 line = line[:-1]
240 240 #ui.debug('state=%d line=%r\n' % (state, line))
241 241
242 242 if state == 0:
243 243 # initial state, consume input until we see 'RCS file'
244 244 match = re_00.match(line)
245 245 if match:
246 246 rcs = match.group(1)
247 247 tags = {}
248 248 if rlog:
249 249 filename = util.normpath(rcs[:-2])
250 250 if filename.startswith(prefix):
251 251 filename = filename[len(prefix):]
252 252 if filename.startswith('/'):
253 253 filename = filename[1:]
254 254 if filename.startswith('Attic/'):
255 255 filename = filename[6:]
256 256 else:
257 257 filename = filename.replace('/Attic/', '/')
258 258 state = 2
259 259 continue
260 260 state = 1
261 261 continue
262 262 match = re_01.match(line)
263 263 if match:
264 264 raise logerror(match.group(1))
265 265 match = re_02.match(line)
266 266 if match:
267 267 raise logerror(match.group(2))
268 268 if re_03.match(line):
269 269 raise logerror(line)
270 270
271 271 elif state == 1:
272 272 # expect 'Working file' (only when using log instead of rlog)
273 273 match = re_10.match(line)
274 274 assert match, _('RCS file must be followed by working file')
275 275 filename = util.normpath(match.group(1))
276 276 state = 2
277 277
278 278 elif state == 2:
279 279 # expect 'symbolic names'
280 280 if re_20.match(line):
281 281 branchmap = {}
282 282 state = 3
283 283
284 284 elif state == 3:
285 285 # read the symbolic names and store as tags
286 286 match = re_30.match(line)
287 287 if match:
288 288 rev = [int(x) for x in match.group(2).split('.')]
289 289
290 290 # Convert magic branch number to an odd-numbered one
291 291 revn = len(rev)
292 292 if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
293 293 rev = rev[:-2] + rev[-1:]
294 294 rev = tuple(rev)
295 295
296 296 if rev not in tags:
297 297 tags[rev] = []
298 298 tags[rev].append(match.group(1))
299 299 branchmap[match.group(1)] = match.group(2)
300 300
301 301 elif re_31.match(line):
302 302 state = 5
303 303 elif re_32.match(line):
304 304 state = 0
305 305
306 306 elif state == 4:
307 307 # expecting '------' separator before first revision
308 308 if re_31.match(line):
309 309 state = 5
310 310 else:
311 311 assert not re_32.match(line), _('must have at least '
312 312 'some revisions')
313 313
314 314 elif state == 5:
315 315 # expecting revision number and possibly (ignored) lock indication
316 316 # we create the logentry here from values stored in states 0 to 4,
317 317 # as this state is re-entered for subsequent revisions of a file.
318 318 match = re_50.match(line)
319 319 assert match, _('expected revision number')
320 320 e = logentry(rcs=scache(rcs),
321 321 file=scache(filename),
322 322 revision=tuple([int(x) for x in
323 323 match.group(1).split('.')]),
324 324 branches=[],
325 325 parent=None,
326 326 commitid=None,
327 327 mergepoint=None,
328 328 branchpoints=set())
329 329
330 330 state = 6
331 331
332 332 elif state == 6:
333 333 # expecting date, author, state, lines changed
334 334 match = re_60.match(line)
335 335 assert match, _('revision must be followed by date line')
336 336 d = match.group(1)
337 337 if d[2] == '/':
338 338 # Y2K
339 339 d = '19' + d
340 340
341 341 if len(d.split()) != 3:
342 342 # cvs log dates always in GMT
343 343 d = d + ' UTC'
344 344 e.date = dateutil.parsedate(d, ['%y/%m/%d %H:%M:%S',
345 345 '%Y/%m/%d %H:%M:%S',
346 346 '%Y-%m-%d %H:%M:%S'])
347 347 e.author = scache(match.group(2))
348 348 e.dead = match.group(3).lower() == 'dead'
349 349
350 350 if match.group(5):
351 351 if match.group(6):
352 352 e.lines = (int(match.group(5)), int(match.group(6)))
353 353 else:
354 354 e.lines = (int(match.group(5)), 0)
355 355 elif match.group(6):
356 356 e.lines = (0, int(match.group(6)))
357 357 else:
358 358 e.lines = None
359 359
360 360 if match.group(7): # cvs 1.12 commitid
361 361 e.commitid = match.group(8)
362 362
363 363 if match.group(9): # cvsnt mergepoint
364 364 myrev = match.group(10).split('.')
365 365 if len(myrev) == 2: # head
366 366 e.mergepoint = 'HEAD'
367 367 else:
368 368 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
369 369 branches = [b for b in branchmap if branchmap[b] == myrev]
370 370 assert len(branches) == 1, ('unknown branch: %s'
371 371 % e.mergepoint)
372 372 e.mergepoint = branches[0]
373 373
374 374 e.comment = []
375 375 state = 7
376 376
377 377 elif state == 7:
378 378 # read the revision numbers of branches that start at this revision
379 379 # or store the commit log message otherwise
380 380 m = re_70.match(line)
381 381 if m:
382 382 e.branches = [tuple([int(y) for y in x.strip().split('.')])
383 383 for x in m.group(1).split(';')]
384 384 state = 8
385 385 elif re_31.match(line) and re_50.match(peek):
386 386 state = 5
387 387 store = True
388 388 elif re_32.match(line):
389 389 state = 0
390 390 store = True
391 391 else:
392 392 e.comment.append(line)
393 393
394 394 elif state == 8:
395 395 # store commit log message
396 396 if re_31.match(line):
397 397 cpeek = peek
398 398 if cpeek.endswith('\n'):
399 399 cpeek = cpeek[:-1]
400 400 if re_50.match(cpeek):
401 401 state = 5
402 402 store = True
403 403 else:
404 404 e.comment.append(line)
405 405 elif re_32.match(line):
406 406 state = 0
407 407 store = True
408 408 else:
409 409 e.comment.append(line)
410 410
411 411 # When a file is added on a branch B1, CVS creates a synthetic
412 412 # dead trunk revision 1.1 so that the branch has a root.
413 413 # Likewise, if you merge such a file to a later branch B2 (one
414 414 # that already existed when the file was added on B1), CVS
415 415 # creates a synthetic dead revision 1.1.x.1 on B2. Don't drop
416 416 # these revisions now, but mark them synthetic so
417 417 # createchangeset() can take care of them.
418 418 if (store and
419 419 e.dead and
420 420 e.revision[-1] == 1 and # 1.1 or 1.1.x.1
421 421 len(e.comment) == 1 and
422 422 file_added_re.match(e.comment[0])):
423 423 ui.debug('found synthetic revision in %s: %r\n'
424 424 % (e.rcs, e.comment[0]))
425 425 e.synthetic = True
426 426
427 427 if store:
428 428 # clean up the results and save in the log.
429 429 store = False
430 430 e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
431 431 e.comment = scache('\n'.join(e.comment))
432 432
433 433 revn = len(e.revision)
434 434 if revn > 3 and (revn % 2) == 0:
435 435 e.branch = tags.get(e.revision[:-1], [None])[0]
436 436 else:
437 437 e.branch = None
438 438
439 439 # find the branches starting from this revision
440 440 branchpoints = set()
441 441 for branch, revision in branchmap.iteritems():
442 442 revparts = tuple([int(i) for i in revision.split('.')])
443 443 if len(revparts) < 2: # bad tags
444 444 continue
445 445 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
446 446 # normal branch
447 447 if revparts[:-2] == e.revision:
448 448 branchpoints.add(branch)
449 449 elif revparts == (1, 1, 1): # vendor branch
450 450 if revparts in e.branches:
451 451 branchpoints.add(branch)
452 452 e.branchpoints = branchpoints
453 453
454 454 log.append(e)
455 455
456 456 rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
457 457
458 458 if len(log) % 100 == 0:
459 459 ui.status(stringutil.ellipsis('%d %s' % (len(log), e.file), 80)
460 460 + '\n')
461 461
462 462 log.sort(key=lambda x: (x.rcs, x.revision))
463 463
464 464 # find parent revisions of individual files
465 465 versions = {}
466 466 for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
467 467 rcs = e.rcs.replace('/Attic/', '/')
468 468 if rcs in rcsmap:
469 469 e.rcs = rcsmap[rcs]
470 470 branch = e.revision[:-1]
471 471 versions[(e.rcs, branch)] = e.revision
472 472
473 473 for e in log:
474 474 branch = e.revision[:-1]
475 475 p = versions.get((e.rcs, branch), None)
476 476 if p is None:
477 477 p = e.revision[:-2]
478 478 e.parent = p
479 479 versions[(e.rcs, branch)] = e.revision
480 480
481 481 # update the log cache
482 482 if cache:
483 483 if log:
484 484 # join up the old and new logs
485 485 log.sort(key=lambda x: x.date)
486 486
487 487 if oldlog and oldlog[-1].date >= log[0].date:
488 488 raise logerror(_('log cache overlaps with new log entries,'
489 489 ' re-run without cache.'))
490 490
491 491 log = oldlog + log
492 492
493 493 # write the new cachefile
494 494 ui.note(_('writing cvs log cache %s\n') % cachefile)
495 495 pickle.dump(log, open(cachefile, 'wb'))
496 496 else:
497 497 log = oldlog
498 498
499 499 ui.status(_('%d log entries\n') % len(log))
500 500
501 501 encodings = ui.configlist('convert', 'cvsps.logencoding')
502 502 if encodings:
503 503 def revstr(r):
504 504 # this is needed, because logentry.revision is a tuple of "int"
505 505 # (e.g. (1, 2) for "1.2")
506 506 return '.'.join(pycompat.maplist(pycompat.bytestr, r))
507 507
508 508 for entry in log:
509 509 comment = entry.comment
510 510 for e in encodings:
511 511 try:
512 512 entry.comment = comment.decode(e).encode('utf-8')
513 513 if ui.debugflag:
514 514 ui.debug("transcoding by %s: %s of %s\n" %
515 515 (e, revstr(entry.revision), entry.file))
516 516 break
517 517 except UnicodeDecodeError:
518 518 pass # try next encoding
519 519 except LookupError as inst: # unknown encoding, maybe
520 520 raise error.Abort(inst,
521 521 hint=_('check convert.cvsps.logencoding'
522 522 ' configuration'))
523 523 else:
524 524 raise error.Abort(_("no encoding can transcode"
525 525 " CVS log message for %s of %s")
526 526 % (revstr(entry.revision), entry.file),
527 527 hint=_('check convert.cvsps.logencoding'
528 528 ' configuration'))
529 529
530 530 hook.hook(ui, None, "cvslog", True, log=log)
531 531
532 532 return log
533 533
534 534
535 535 class changeset(object):
536 536 '''Class changeset has the following attributes:
537 537 .id - integer identifying this changeset (list index)
538 538 .author - author name as CVS knows it
539 539 .branch - name of branch this changeset is on, or None
540 540 .comment - commit message
541 541 .commitid - CVS commitid or None
542 542 .date - the commit date as a (time,tz) tuple
543 543 .entries - list of logentry objects in this changeset
544 544 .parents - list of one or two parent changesets
545 545 .tags - list of tags on this changeset
546 546 .synthetic - from synthetic revision "file ... added on branch ..."
547 547 .mergepoint- the branch that has been merged from or None
548 548 .branchpoints- the branches that start at the current entry or empty
549 549 '''
550 550 def __init__(self, **entries):
551 551 self.id = None
552 552 self.synthetic = False
553 553 self.__dict__.update(entries)
554 554
555 555 def __repr__(self):
556 556 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
557 557 return "%s(%s)"%(type(self).__name__, ", ".join(items))
558 558
559 559 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
560 560 '''Convert log into changesets.'''
561 561
562 562 ui.status(_('creating changesets\n'))
563 563
564 564 # try to order commitids by date
565 565 mindate = {}
566 566 for e in log:
567 567 if e.commitid:
568 568 mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
569 569
570 570 # Merge changesets
571 571 log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
572 572 x.author, x.branch, x.date, x.branchpoints))
573 573
574 574 changesets = []
575 575 files = set()
576 576 c = None
577 577 for i, e in enumerate(log):
578 578
579 579 # Check if log entry belongs to the current changeset or not.
580 580
581 581 # Since CVS is file-centric, two different file revisions with
582 582 # different branchpoints should be treated as belonging to two
583 583 # different changesets (and the ordering is important and not
584 584 # honoured by cvsps at this point).
585 585 #
586 586 # Consider the following case:
587 587 # foo 1.1 branchpoints: [MYBRANCH]
588 588 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
589 589 #
590 590 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
591 591 # later version of foo may be in MYBRANCH2, so foo should be the
592 592 # first changeset and bar the next and MYBRANCH and MYBRANCH2
593 593 # should both start off of the bar changeset. No provisions are
594 594 # made to ensure that this is, in fact, what happens.
595 595 if not (c and e.branchpoints == c.branchpoints and
596 596 (# cvs commitids
597 597 (e.commitid is not None and e.commitid == c.commitid) or
598 598 (# no commitids, use fuzzy commit detection
599 599 (e.commitid is None or c.commitid is None) and
600 600 e.comment == c.comment and
601 601 e.author == c.author and
602 602 e.branch == c.branch and
603 603 ((c.date[0] + c.date[1]) <=
604 604 (e.date[0] + e.date[1]) <=
605 605 (c.date[0] + c.date[1]) + fuzz) and
606 606 e.file not in files))):
607 607 c = changeset(comment=e.comment, author=e.author,
608 608 branch=e.branch, date=e.date,
609 609 entries=[], mergepoint=e.mergepoint,
610 610 branchpoints=e.branchpoints, commitid=e.commitid)
611 611 changesets.append(c)
612 612
613 613 files = set()
614 614 if len(changesets) % 100 == 0:
615 615 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
616 616 ui.status(stringutil.ellipsis(t, 80) + '\n')
617 617
618 618 c.entries.append(e)
619 619 files.add(e.file)
620 620 c.date = e.date # changeset date is date of latest commit in it
621 621
622 622 # Mark synthetic changesets
623 623
624 624 for c in changesets:
625 625 # Synthetic revisions always get their own changeset, because
626 626 # the log message includes the filename. E.g. if you add file3
627 627 # and file4 on a branch, you get four log entries and three
628 628 # changesets:
629 629 # "File file3 was added on branch ..." (synthetic, 1 entry)
630 630 # "File file4 was added on branch ..." (synthetic, 1 entry)
631 631 # "Add file3 and file4 to fix ..." (real, 2 entries)
632 632 # Hence the check for 1 entry here.
633 633 c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
634 634
635 635 # Sort files in each changeset
636 636
637 637 def entitycompare(l, r):
638 638 'Mimic cvsps sorting order'
639 639 l = l.file.split('/')
640 640 r = r.file.split('/')
641 641 nl = len(l)
642 642 nr = len(r)
643 643 n = min(nl, nr)
644 644 for i in range(n):
645 645 if i + 1 == nl and nl < nr:
646 646 return -1
647 647 elif i + 1 == nr and nl > nr:
648 648 return +1
649 649 elif l[i] < r[i]:
650 650 return -1
651 651 elif l[i] > r[i]:
652 652 return +1
653 653 return 0
654 654
655 655 for c in changesets:
656 656 c.entries.sort(entitycompare)
657 657
658 658 # Sort changesets by date
659 659
660 660 odd = set()
661 661 def cscmp(l, r):
662 662 d = sum(l.date) - sum(r.date)
663 663 if d:
664 664 return d
665 665
666 666 # detect vendor branches and initial commits on a branch
667 667 le = {}
668 668 for e in l.entries:
669 669 le[e.rcs] = e.revision
670 670 re = {}
671 671 for e in r.entries:
672 672 re[e.rcs] = e.revision
673 673
674 674 d = 0
675 675 for e in l.entries:
676 676 if re.get(e.rcs, None) == e.parent:
677 677 assert not d
678 678 d = 1
679 679 break
680 680
681 681 for e in r.entries:
682 682 if le.get(e.rcs, None) == e.parent:
683 683 if d:
684 684 odd.add((l, r))
685 685 d = -1
686 686 break
687 687 # By this point, the changesets are sufficiently compared that
688 688 # we don't really care about ordering. However, this leaves
689 689 # some race conditions in the tests, so we compare on the
690 690 # number of files modified, the files contained in each
691 691 # changeset, and the branchpoints in the change to ensure test
692 692 # output remains stable.
693 693
694 694 # recommended replacement for cmp from
695 695 # https://docs.python.org/3.0/whatsnew/3.0.html
696 696 c = lambda x, y: (x > y) - (x < y)
697 697 # Sort bigger changes first.
698 698 if not d:
699 699 d = c(len(l.entries), len(r.entries))
700 700 # Try sorting by filename in the change.
701 701 if not d:
702 702 d = c([e.file for e in l.entries], [e.file for e in r.entries])
703 703 # Try and put changes without a branch point before ones with
704 704 # a branch point.
705 705 if not d:
706 706 d = c(len(l.branchpoints), len(r.branchpoints))
707 707 return d
708 708
709 709 changesets.sort(cscmp)
710 710
711 711 # Collect tags
712 712
713 713 globaltags = {}
714 714 for c in changesets:
715 715 for e in c.entries:
716 716 for tag in e.tags:
717 717 # remember which is the latest changeset to have this tag
718 718 globaltags[tag] = c
719 719
720 720 for c in changesets:
721 721 tags = set()
722 722 for e in c.entries:
723 723 tags.update(e.tags)
724 724 # remember tags only if this is the latest changeset to have it
725 725 c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
726 726
727 727 # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
728 728 # by inserting dummy changesets with two parents, and handle
729 729 # {{mergefrombranch BRANCHNAME}} by setting two parents.
730 730
731 731 if mergeto is None:
732 mergeto = r'{{mergetobranch ([-\w]+)}}'
732 mergeto = br'{{mergetobranch ([-\w]+)}}'
733 733 if mergeto:
734 734 mergeto = re.compile(mergeto)
735 735
736 736 if mergefrom is None:
737 mergefrom = r'{{mergefrombranch ([-\w]+)}}'
737 mergefrom = br'{{mergefrombranch ([-\w]+)}}'
738 738 if mergefrom:
739 739 mergefrom = re.compile(mergefrom)
740 740
741 741 versions = {} # changeset index where we saw any particular file version
742 742 branches = {} # changeset index where we saw a branch
743 743 n = len(changesets)
744 744 i = 0
745 745 while i < n:
746 746 c = changesets[i]
747 747
748 748 for f in c.entries:
749 749 versions[(f.rcs, f.revision)] = i
750 750
751 751 p = None
752 752 if c.branch in branches:
753 753 p = branches[c.branch]
754 754 else:
755 755 # first changeset on a new branch
756 756 # the parent is a changeset with the branch in its
757 757 # branchpoints such that it is the latest possible
758 758 # commit without any intervening, unrelated commits.
759 759
760 760 for candidate in xrange(i):
761 761 if c.branch not in changesets[candidate].branchpoints:
762 762 if p is not None:
763 763 break
764 764 continue
765 765 p = candidate
766 766
767 767 c.parents = []
768 768 if p is not None:
769 769 p = changesets[p]
770 770
771 771 # Ensure no changeset has a synthetic changeset as a parent.
772 772 while p.synthetic:
773 773 assert len(p.parents) <= 1, \
774 774 _('synthetic changeset cannot have multiple parents')
775 775 if p.parents:
776 776 p = p.parents[0]
777 777 else:
778 778 p = None
779 779 break
780 780
781 781 if p is not None:
782 782 c.parents.append(p)
783 783
784 784 if c.mergepoint:
785 785 if c.mergepoint == 'HEAD':
786 786 c.mergepoint = None
787 787 c.parents.append(changesets[branches[c.mergepoint]])
788 788
789 789 if mergefrom:
790 790 m = mergefrom.search(c.comment)
791 791 if m:
792 792 m = m.group(1)
793 793 if m == 'HEAD':
794 794 m = None
795 795 try:
796 796 candidate = changesets[branches[m]]
797 797 except KeyError:
798 798 ui.warn(_("warning: CVS commit message references "
799 799 "non-existent branch %r:\n%s\n")
800 800 % (m, c.comment))
801 801 if m in branches and c.branch != m and not candidate.synthetic:
802 802 c.parents.append(candidate)
803 803
804 804 if mergeto:
805 805 m = mergeto.search(c.comment)
806 806 if m:
807 807 if m.groups():
808 808 m = m.group(1)
809 809 if m == 'HEAD':
810 810 m = None
811 811 else:
812 812 m = None # if no group found then merge to HEAD
813 813 if m in branches and c.branch != m:
814 814 # insert empty changeset for merge
815 815 cc = changeset(
816 816 author=c.author, branch=m, date=c.date,
817 817 comment='convert-repo: CVS merge from branch %s'
818 818 % c.branch,
819 819 entries=[], tags=[],
820 820 parents=[changesets[branches[m]], c])
821 821 changesets.insert(i + 1, cc)
822 822 branches[m] = i + 1
823 823
824 824 # adjust our loop counters now we have inserted a new entry
825 825 n += 1
826 826 i += 2
827 827 continue
828 828
829 829 branches[c.branch] = i
830 830 i += 1
831 831
832 832 # Drop synthetic changesets (safe now that we have ensured no other
833 833 # changesets can have them as parents).
834 834 i = 0
835 835 while i < len(changesets):
836 836 if changesets[i].synthetic:
837 837 del changesets[i]
838 838 else:
839 839 i += 1
840 840
841 841 # Number changesets
842 842
843 843 for i, c in enumerate(changesets):
844 844 c.id = i + 1
845 845
846 846 if odd:
847 847 for l, r in odd:
848 848 if l.id is not None and r.id is not None:
849 849 ui.warn(_('changeset %d is both before and after %d\n')
850 850 % (l.id, r.id))
851 851
852 852 ui.status(_('%d changeset entries\n') % len(changesets))
853 853
854 854 hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
855 855
856 856 return changesets
857 857
858 858
859 859 def debugcvsps(ui, *args, **opts):
860 860 '''Read CVS rlog for current directory or named path in
861 861 repository, and convert the log to changesets based on matching
862 862 commit log entries and dates.
863 863 '''
864 864 opts = pycompat.byteskwargs(opts)
865 865 if opts["new_cache"]:
866 866 cache = "write"
867 867 elif opts["update_cache"]:
868 868 cache = "update"
869 869 else:
870 870 cache = None
871 871
872 872 revisions = opts["revisions"]
873 873
874 874 try:
875 875 if args:
876 876 log = []
877 877 for d in args:
878 878 log += createlog(ui, d, root=opts["root"], cache=cache)
879 879 else:
880 880 log = createlog(ui, root=opts["root"], cache=cache)
881 881 except logerror as e:
882 882 ui.write("%r\n"%e)
883 883 return
884 884
885 885 changesets = createchangeset(ui, log, opts["fuzz"])
886 886 del log
887 887
888 888 # Print changesets (optionally filtered)
889 889
890 890 off = len(revisions)
891 891 branches = {} # latest version number in each branch
892 892 ancestors = {} # parent branch
893 893 for cs in changesets:
894 894
895 895 if opts["ancestors"]:
896 896 if cs.branch not in branches and cs.parents and cs.parents[0].id:
897 897 ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
898 898 cs.parents[0].id)
899 899 branches[cs.branch] = cs.id
900 900
901 901 # limit by branches
902 902 if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
903 903 continue
904 904
905 905 if not off:
906 906 # Note: trailing spaces on several lines here are needed to have
907 907 # bug-for-bug compatibility with cvsps.
908 908 ui.write('---------------------\n')
909 909 ui.write(('PatchSet %d \n' % cs.id))
910 910 ui.write(('Date: %s\n' % dateutil.datestr(cs.date,
911 911 '%Y/%m/%d %H:%M:%S %1%2')))
912 912 ui.write(('Author: %s\n' % cs.author))
913 913 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
914 914 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
915 915 ','.join(cs.tags) or '(none)')))
916 916 if cs.branchpoints:
917 917 ui.write(('Branchpoints: %s \n') %
918 918 ', '.join(sorted(cs.branchpoints)))
919 919 if opts["parents"] and cs.parents:
920 920 if len(cs.parents) > 1:
921 921 ui.write(('Parents: %s\n' %
922 922 (','.join([(b"%d" % p.id) for p in cs.parents]))))
923 923 else:
924 924 ui.write(('Parent: %d\n' % cs.parents[0].id))
925 925
926 926 if opts["ancestors"]:
927 927 b = cs.branch
928 928 r = []
929 929 while b:
930 930 b, c = ancestors[b]
931 931 r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
932 932 if r:
933 933 ui.write(('Ancestors: %s\n' % (','.join(r))))
934 934
935 935 ui.write(('Log:\n'))
936 936 ui.write('%s\n\n' % cs.comment)
937 937 ui.write(('Members: \n'))
938 938 for f in cs.entries:
939 939 fn = f.file
940 940 if fn.startswith(opts["prefix"]):
941 941 fn = fn[len(opts["prefix"]):]
942 942 ui.write('\t%s:%s->%s%s \n' % (
943 943 fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
944 944 '.'.join([(b"%d" % x) for x in f.revision]),
945 945 ['', '(DEAD)'][f.dead]))
946 946 ui.write('\n')
947 947
948 948 # have we seen the start tag?
949 949 if revisions and off:
950 950 if revisions[0] == (b"%d" % cs.id) or \
951 951 revisions[0] in cs.tags:
952 952 off = False
953 953
954 954 # see if we reached the end tag
955 955 if len(revisions) > 1 and not off:
956 956 if revisions[1] == (b"%d" % cs.id) or \
957 957 revisions[1] in cs.tags:
958 958 break
General Comments 0
You need to be logged in to leave comments. Login now