cvsps.py
965 lines
| 34.2 KiB
| text/x-python
|
PythonLexer
Frank Kingswood
|
r6687 | # Mercurial built-in replacement for cvsps. | ||
# | ||||
# Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk> | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
timeless
|
r28369 | from __future__ import absolute_import | ||
Frank Kingswood
|
r6687 | |||
Augie Fackler
|
r37903 | import functools | ||
Frank Kingswood
|
r6687 | import os | ||
import re | ||||
timeless
|
r28369 | |||
Yuya Nishihara
|
r29205 | from mercurial.i18n import _ | ||
timeless
|
r28369 | from mercurial import ( | ||
Pulkit Goyal
|
r30638 | encoding, | ||
FUJIWARA Katsunori
|
r33388 | error, | ||
timeless
|
r28369 | hook, | ||
Pulkit Goyal
|
r30616 | pycompat, | ||
timeless
|
r28369 | util, | ||
) | ||||
Yuya Nishihara
|
r37102 | from mercurial.utils import ( | ||
dateutil, | ||||
Yuya Nishihara
|
r37138 | procutil, | ||
Yuya Nishihara
|
r37102 | stringutil, | ||
) | ||||
Frank Kingswood
|
r6687 | |||
Pulkit Goyal
|
r29324 | pickle = util.pickle | ||
Frank Kingswood
|
r6687 | class logentry(object): | ||
'''Class logentry has the following attributes: | ||||
.author - author name as CVS knows it | ||||
.branch - name of branch this revision is on | ||||
.branches - revision tuple of branches starting at this revision | ||||
.comment - commit message | ||||
Frank Kingswood
|
r18261 | .commitid - CVS commitid or None | ||
Frank Kingswood
|
r6687 | .date - the commit date as a (time, tz) tuple | ||
.dead - true if file revision is dead | ||||
.file - Name of file | ||||
.lines - a tuple (+lines, -lines) or None | ||||
.parent - Previous revision of this entry | ||||
.rcs - name of file as returned from CVS | ||||
.revision - revision number as tuple | ||||
.tags - list of tags on the file | ||||
Greg Ward
|
r7862 | .synthetic - is this a synthetic "file ... added on ..." revision? | ||
Frank Kingswood
|
r18261 | .mergepoint - the branch that has been merged from (if present in | ||
rlog output) or None | ||||
.branchpoints - the branches that start at the current entry or empty | ||||
Frank Kingswood
|
r6687 | ''' | ||
def __init__(self, **entries): | ||||
Benoit Boissinot
|
r10701 | self.synthetic = False | ||
Frank Kingswood
|
r6687 | self.__dict__.update(entries) | ||
Greg Ward
|
r8080 | def __repr__(self): | ||
Augie Fackler
|
r38314 | items = (r"%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__)) | ||
return r"%s(%s)"%(type(self).__name__, r", ".join(items)) | ||||
Greg Ward
|
r8080 | |||
Frank Kingswood
|
r6687 | class logerror(Exception): | ||
pass | ||||
Patrick Mezard
|
r7097 | def getrepopath(cvspath): | ||
"""Return the repository path from a CVS path. | ||||
Yuya Nishihara
|
r34133 | >>> getrepopath(b'/foo/bar') | ||
Patrick Mezard
|
r7097 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b'c:/foo/bar') | ||
Blesso
|
r19145 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b':pserver:10/foo/bar') | ||
Patrick Mezard
|
r7097 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b':pserver:10c:/foo/bar') | ||
Patrick Mezard
|
r7097 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b':pserver:/foo/bar') | ||
Patrick Mezard
|
r7097 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b':pserver:c:/foo/bar') | ||
Blesso
|
r19145 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar') | ||
Patrick Mezard
|
r7097 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar') | ||
Blesso
|
r19145 | '/foo/bar' | ||
Yuya Nishihara
|
r34133 | >>> getrepopath(b'user@server/path/to/repository') | ||
Blesso
|
r19145 | '/path/to/repository' | ||
Patrick Mezard
|
r7097 | """ | ||
# According to CVS manual, CVS paths are expressed like: | ||||
# [:method:][[user][:password]@]hostname[:[port]]/path/to/repository | ||||
# | ||||
Blesso
|
r19145 | # CVSpath is splitted into parts and then position of the first occurrence | ||
# of the '/' char after the '@' is located. The solution is the rest of the | ||||
# string after that '/' sign including it | ||||
Patrick Mezard
|
r7097 | parts = cvspath.split(':') | ||
Blesso
|
r19145 | atposition = parts[-1].find('@') | ||
start = 0 | ||||
if atposition != -1: | ||||
start = atposition | ||||
repopath = parts[-1][parts[-1].find('/', start):] | ||||
return repopath | ||||
Patrick Mezard
|
r7097 | |||
Frank Kingswood
|
r6687 | def createlog(ui, directory=None, root="", rlog=True, cache=None): | ||
'''Collect the CVS rlog''' | ||||
# Because we store many duplicate commit log messages, reusing strings | ||||
# saves a lot of memory and pickle storage space. | ||||
_scache = {} | ||||
def scache(s): | ||||
"return a shared version of a string" | ||||
return _scache.setdefault(s, s) | ||||
ui.status(_('collecting CVS rlog\n')) | ||||
log = [] # list of logentry objects containing the CVS state | ||||
# patterns to match in CVS (r)log output, by state of use | ||||
Augie Fackler
|
r37901 | re_00 = re.compile(b'RCS file: (.+)$') | ||
re_01 = re.compile(b'cvs \\[r?log aborted\\]: (.+)$') | ||||
re_02 = re.compile(b'cvs (r?log|server): (.+)\n$') | ||||
re_03 = re.compile(b"(Cannot access.+CVSROOT)|" | ||||
b"(can't create temporary directory.+)$") | ||||
re_10 = re.compile(b'Working file: (.+)$') | ||||
re_20 = re.compile(b'symbolic names:') | ||||
re_30 = re.compile(b'\t(.+): ([\\d.]+)$') | ||||
re_31 = re.compile(b'----------------------------$') | ||||
re_32 = re.compile(b'=======================================' | ||||
b'======================================$') | ||||
re_50 = re.compile(b'revision ([\\d.]+)(\s+locked by:\s+.+;)?$') | ||||
re_60 = re.compile(br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);' | ||||
br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?' | ||||
br'(\s+commitid:\s+([^;]+);)?' | ||||
br'(.*mergepoint:\s+([^;]+);)?') | ||||
re_70 = re.compile(b'branches: (.+);$') | ||||
Frank Kingswood
|
r6687 | |||
Augie Fackler
|
r37901 | file_added_re = re.compile(br'file [^/]+ was (initially )?added on branch') | ||
Greg Ward
|
r7862 | |||
Frank Kingswood
|
r6687 | prefix = '' # leading path to strip of what we get from CVS | ||
if directory is None: | ||||
# Current working directory | ||||
# Get the real directory in the repository | ||||
try: | ||||
Augie Fackler
|
r36149 | prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip() | ||
Mathieu Clabaut
|
r10695 | directory = prefix | ||
Frank Kingswood
|
r6687 | if prefix == ".": | ||
Matt Mackall
|
r6688 | prefix = "" | ||
Frank Kingswood
|
r6687 | except IOError: | ||
Martin Geisler
|
r10939 | raise logerror(_('not a CVS sandbox')) | ||
Frank Kingswood
|
r6687 | |||
Pulkit Goyal
|
r30616 | if prefix and not prefix.endswith(pycompat.ossep): | ||
prefix += pycompat.ossep | ||||
Frank Kingswood
|
r6687 | |||
# Use the Root file in the sandbox, if it exists | ||||
try: | ||||
Augie Fackler
|
r36149 | root = open(os.path.join('CVS','Root'), 'rb').read().strip() | ||
Frank Kingswood
|
r6687 | except IOError: | ||
pass | ||||
if not root: | ||||
Pulkit Goyal
|
r30638 | root = encoding.environ.get('CVSROOT', '') | ||
Frank Kingswood
|
r6687 | |||
# read log cache if one exists | ||||
oldlog = [] | ||||
date = None | ||||
if cache: | ||||
cachedir = os.path.expanduser('~/.hg.cvsps') | ||||
if not os.path.exists(cachedir): | ||||
os.mkdir(cachedir) | ||||
# The cvsps cache pickle needs a uniquified name, based on the | ||||
# repository location. The address may have all sort of nasties | ||||
# in it, slashes, colons and such. So here we take just the | ||||
Mads Kiilerich
|
r17424 | # alphanumeric characters, concatenated in a way that does not | ||
# mix up the various components, so that | ||||
Frank Kingswood
|
r6687 | # :pserver:user@server:/path | ||
# and | ||||
# /pserver/user/server/path | ||||
# are mapped to different cache file names. | ||||
Matt Mackall
|
r6688 | cachefile = root.split(":") + [directory, "cache"] | ||
Pulkit Goyal
|
r36473 | cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s] | ||
Matt Mackall
|
r6688 | cachefile = os.path.join(cachedir, | ||
'.'.join([s for s in cachefile if s])) | ||||
Frank Kingswood
|
r6687 | |||
if cache == 'update': | ||||
try: | ||||
ui.note(_('reading cvs log cache %s\n') % cachefile) | ||||
Augie Fackler
|
r36149 | oldlog = pickle.load(open(cachefile, 'rb')) | ||
Frank Kingswood
|
r18261 | for e in oldlog: | ||
Idan Kamara
|
r18286 | if not (util.safehasattr(e, 'branchpoints') and | ||
util.safehasattr(e, 'commitid') and | ||||
util.safehasattr(e, 'mergepoint')): | ||||
ui.status(_('ignoring old cache\n')) | ||||
oldlog = [] | ||||
break | ||||
Frank Kingswood
|
r18261 | |||
Frank Kingswood
|
r6687 | ui.note(_('cache has %d log entries\n') % len(oldlog)) | ||
Gregory Szorc
|
r25660 | except Exception as e: | ||
Frank Kingswood
|
r6687 | ui.note(_('error reading cache: %r\n') % e) | ||
if oldlog: | ||||
date = oldlog[-1].date # last commit date as a (time,tz) tuple | ||||
Boris Feld
|
r36625 | date = dateutil.datestr(date, '%Y/%m/%d %H:%M:%S %1%2') | ||
Frank Kingswood
|
r6687 | |||
# build the CVS commandline | ||||
cmd = ['cvs', '-q'] | ||||
if root: | ||||
cmd.append('-d%s' % root) | ||||
Patrick Mezard
|
r7097 | p = util.normpath(getrepopath(root)) | ||
Frank Kingswood
|
r6687 | if not p.endswith('/'): | ||
Matt Mackall
|
r6688 | p += '/' | ||
Mathieu Clabaut
|
r10695 | if prefix: | ||
# looks like normpath replaces "" by "." | ||||
prefix = p + util.normpath(prefix) | ||||
else: | ||||
prefix = p | ||||
Frank Kingswood
|
r6687 | cmd.append(['log', 'rlog'][rlog]) | ||
if date: | ||||
# no space between option and date string | ||||
cmd.append('-d>%s' % date) | ||||
cmd.append(directory) | ||||
# state machine begins here | ||||
tags = {} # dictionary of revisions on current file with their tags | ||||
Henrik Stuart
|
r7956 | branchmap = {} # mapping between branch names and revision numbers | ||
Emanuele Giaquinta
|
r26593 | rcsmap = {} | ||
Frank Kingswood
|
r6687 | state = 0 | ||
store = False # set when a new record can be appended | ||||
Yuya Nishihara
|
r37138 | cmd = [procutil.shellquote(arg) for arg in cmd] | ||
Martin Geisler
|
r6956 | ui.note(_("running %s\n") % (' '.join(cmd))) | ||
Martin Geisler
|
r9467 | ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root)) | ||
Frank Kingswood
|
r6687 | |||
Yuya Nishihara
|
r37476 | pfp = procutil.popen(' '.join(cmd), 'rb') | ||
peek = util.fromnativeeol(pfp.readline()) | ||||
David Champion
|
r7593 | while True: | ||
line = peek | ||||
if line == '': | ||||
break | ||||
Yuya Nishihara
|
r37476 | peek = util.fromnativeeol(pfp.readline()) | ||
Frank Kingswood
|
r6687 | if line.endswith('\n'): | ||
line = line[:-1] | ||||
#ui.debug('state=%d line=%r\n' % (state, line)) | ||||
if state == 0: | ||||
# initial state, consume input until we see 'RCS file' | ||||
match = re_00.match(line) | ||||
if match: | ||||
rcs = match.group(1) | ||||
tags = {} | ||||
if rlog: | ||||
Patrick Mezard
|
r7097 | filename = util.normpath(rcs[:-2]) | ||
Frank Kingswood
|
r6687 | if filename.startswith(prefix): | ||
filename = filename[len(prefix):] | ||||
if filename.startswith('/'): | ||||
filename = filename[1:] | ||||
if filename.startswith('Attic/'): | ||||
filename = filename[6:] | ||||
else: | ||||
filename = filename.replace('/Attic/', '/') | ||||
state = 2 | ||||
continue | ||||
state = 1 | ||||
continue | ||||
match = re_01.match(line) | ||||
if match: | ||||
Matt Mackall
|
r11122 | raise logerror(match.group(1)) | ||
Frank Kingswood
|
r6687 | match = re_02.match(line) | ||
if match: | ||||
Matt Mackall
|
r11122 | raise logerror(match.group(2)) | ||
Frank Kingswood
|
r6687 | if re_03.match(line): | ||
Matt Mackall
|
r11122 | raise logerror(line) | ||
Frank Kingswood
|
r6687 | |||
elif state == 1: | ||||
# expect 'Working file' (only when using log instead of rlog) | ||||
match = re_10.match(line) | ||||
assert match, _('RCS file must be followed by working file') | ||||
Patrick Mezard
|
r7097 | filename = util.normpath(match.group(1)) | ||
Frank Kingswood
|
r6687 | state = 2 | ||
elif state == 2: | ||||
# expect 'symbolic names' | ||||
if re_20.match(line): | ||||
Henrik Stuart
|
r7956 | branchmap = {} | ||
Frank Kingswood
|
r6687 | state = 3 | ||
elif state == 3: | ||||
# read the symbolic names and store as tags | ||||
match = re_30.match(line) | ||||
if match: | ||||
rev = [int(x) for x in match.group(2).split('.')] | ||||
# Convert magic branch number to an odd-numbered one | ||||
revn = len(rev) | ||||
Matt Mackall
|
r6688 | if revn > 3 and (revn % 2) == 0 and rev[-2] == 0: | ||
rev = rev[:-2] + rev[-1:] | ||||
Frank Kingswood
|
r6687 | rev = tuple(rev) | ||
if rev not in tags: | ||||
tags[rev] = [] | ||||
tags[rev].append(match.group(1)) | ||||
Henrik Stuart
|
r7956 | branchmap[match.group(1)] = match.group(2) | ||
Frank Kingswood
|
r6687 | |||
elif re_31.match(line): | ||||
state = 5 | ||||
elif re_32.match(line): | ||||
state = 0 | ||||
elif state == 4: | ||||
# expecting '------' separator before first revision | ||||
if re_31.match(line): | ||||
state = 5 | ||||
else: | ||||
Martin Geisler
|
r8661 | assert not re_32.match(line), _('must have at least ' | ||
'some revisions') | ||||
Frank Kingswood
|
r6687 | |||
elif state == 5: | ||||
# expecting revision number and possibly (ignored) lock indication | ||||
# we create the logentry here from values stored in states 0 to 4, | ||||
# as this state is re-entered for subsequent revisions of a file. | ||||
match = re_50.match(line) | ||||
assert match, _('expected revision number') | ||||
Bryan O'Sullivan
|
r18265 | e = logentry(rcs=scache(rcs), | ||
file=scache(filename), | ||||
revision=tuple([int(x) for x in | ||||
match.group(1).split('.')]), | ||||
branches=[], | ||||
parent=None, | ||||
commitid=None, | ||||
mergepoint=None, | ||||
branchpoints=set()) | ||||
Frank Kingswood
|
r18261 | |||
Frank Kingswood
|
r6687 | state = 6 | ||
elif state == 6: | ||||
# expecting date, author, state, lines changed | ||||
match = re_60.match(line) | ||||
assert match, _('revision must be followed by date line') | ||||
d = match.group(1) | ||||
if d[2] == '/': | ||||
# Y2K | ||||
Matt Mackall
|
r6688 | d = '19' + d | ||
Frank Kingswood
|
r6687 | |||
if len(d.split()) != 3: | ||||
# cvs log dates always in GMT | ||||
Matt Mackall
|
r6688 | d = d + ' UTC' | ||
Boris Feld
|
r36625 | e.date = dateutil.parsedate(d, ['%y/%m/%d %H:%M:%S', | ||
Martin Geisler
|
r8661 | '%Y/%m/%d %H:%M:%S', | ||
'%Y-%m-%d %H:%M:%S']) | ||||
Frank Kingswood
|
r6687 | e.author = scache(match.group(2)) | ||
e.dead = match.group(3).lower() == 'dead' | ||||
if match.group(5): | ||||
if match.group(6): | ||||
e.lines = (int(match.group(5)), int(match.group(6))) | ||||
else: | ||||
e.lines = (int(match.group(5)), 0) | ||||
elif match.group(6): | ||||
e.lines = (0, int(match.group(6))) | ||||
else: | ||||
e.lines = None | ||||
Henrik Stuart
|
r7956 | |||
Frank Kingswood
|
r18261 | if match.group(7): # cvs 1.12 commitid | ||
e.commitid = match.group(8) | ||||
if match.group(9): # cvsnt mergepoint | ||||
myrev = match.group(10).split('.') | ||||
Henrik Stuart
|
r7956 | if len(myrev) == 2: # head | ||
e.mergepoint = 'HEAD' | ||||
else: | ||||
myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]]) | ||||
branches = [b for b in branchmap if branchmap[b] == myrev] | ||||
Brodie Rao
|
r16683 | assert len(branches) == 1, ('unknown branch: %s' | ||
% e.mergepoint) | ||||
Henrik Stuart
|
r7956 | e.mergepoint = branches[0] | ||
Frank Kingswood
|
r18261 | |||
Frank Kingswood
|
r6687 | e.comment = [] | ||
state = 7 | ||||
elif state == 7: | ||||
Matt Mackall
|
r6688 | # read the revision numbers of branches that start at this revision | ||
Frank Kingswood
|
r6687 | # or store the commit log message otherwise | ||
m = re_70.match(line) | ||||
if m: | ||||
e.branches = [tuple([int(y) for y in x.strip().split('.')]) | ||||
for x in m.group(1).split(';')] | ||||
state = 8 | ||||
David Champion
|
r7593 | elif re_31.match(line) and re_50.match(peek): | ||
Frank Kingswood
|
r6687 | state = 5 | ||
store = True | ||||
elif re_32.match(line): | ||||
state = 0 | ||||
store = True | ||||
else: | ||||
e.comment.append(line) | ||||
elif state == 8: | ||||
# store commit log message | ||||
if re_31.match(line): | ||||
jakob krainz
|
r15205 | cpeek = peek | ||
if cpeek.endswith('\n'): | ||||
cpeek = cpeek[:-1] | ||||
if re_50.match(cpeek): | ||||
state = 5 | ||||
store = True | ||||
else: | ||||
e.comment.append(line) | ||||
Frank Kingswood
|
r6687 | elif re_32.match(line): | ||
state = 0 | ||||
store = True | ||||
else: | ||||
e.comment.append(line) | ||||
Greg Ward
|
r7862 | # When a file is added on a branch B1, CVS creates a synthetic | ||
# dead trunk revision 1.1 so that the branch has a root. | ||||
# Likewise, if you merge such a file to a later branch B2 (one | ||||
# that already existed when the file was added on B1), CVS | ||||
# creates a synthetic dead revision 1.1.x.1 on B2. Don't drop | ||||
# these revisions now, but mark them synthetic so | ||||
# createchangeset() can take care of them. | ||||
if (store and | ||||
e.dead and | ||||
e.revision[-1] == 1 and # 1.1 or 1.1.x.1 | ||||
len(e.comment) == 1 and | ||||
file_added_re.match(e.comment[0])): | ||||
Martin Geisler
|
r9467 | ui.debug('found synthetic revision in %s: %r\n' | ||
Greg Ward
|
r7862 | % (e.rcs, e.comment[0])) | ||
e.synthetic = True | ||||
Frank Kingswood
|
r6687 | if store: | ||
# clean up the results and save in the log. | ||||
store = False | ||||
Matt Mackall
|
r8209 | e.tags = sorted([scache(x) for x in tags.get(e.revision, [])]) | ||
Frank Kingswood
|
r6687 | e.comment = scache('\n'.join(e.comment)) | ||
revn = len(e.revision) | ||||
Matt Mackall
|
r6688 | if revn > 3 and (revn % 2) == 0: | ||
Frank Kingswood
|
r6687 | e.branch = tags.get(e.revision[:-1], [None])[0] | ||
else: | ||||
e.branch = None | ||||
Henrik Stuart
|
r8756 | # find the branches starting from this revision | ||
branchpoints = set() | ||||
for branch, revision in branchmap.iteritems(): | ||||
revparts = tuple([int(i) for i in revision.split('.')]) | ||||
Brandon Parsons
|
r10950 | if len(revparts) < 2: # bad tags | ||
continue | ||||
Henrik Stuart
|
r8756 | if revparts[-2] == 0 and revparts[-1] % 2 == 0: | ||
# normal branch | ||||
if revparts[:-2] == e.revision: | ||||
branchpoints.add(branch) | ||||
Matt Mackall
|
r10282 | elif revparts == (1, 1, 1): # vendor branch | ||
Henrik Stuart
|
r8756 | if revparts in e.branches: | ||
branchpoints.add(branch) | ||||
e.branchpoints = branchpoints | ||||
Frank Kingswood
|
r6687 | log.append(e) | ||
Emanuele Giaquinta
|
r26593 | rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs | ||
Matt Mackall
|
r6688 | if len(log) % 100 == 0: | ||
Yuya Nishihara
|
r37102 | ui.status(stringutil.ellipsis('%d %s' % (len(log), e.file), 80) | ||
+ '\n') | ||||
Frank Kingswood
|
r6687 | |||
Alejandro Santos
|
r9032 | log.sort(key=lambda x: (x.rcs, x.revision)) | ||
Frank Kingswood
|
r6687 | |||
# find parent revisions of individual files | ||||
versions = {} | ||||
Emanuele Giaquinta
|
r26593 | for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)): | ||
rcs = e.rcs.replace('/Attic/', '/') | ||||
if rcs in rcsmap: | ||||
e.rcs = rcsmap[rcs] | ||||
branch = e.revision[:-1] | ||||
versions[(e.rcs, branch)] = e.revision | ||||
Frank Kingswood
|
r6687 | for e in log: | ||
branch = e.revision[:-1] | ||||
p = versions.get((e.rcs, branch), None) | ||||
if p is None: | ||||
p = e.revision[:-2] | ||||
e.parent = p | ||||
versions[(e.rcs, branch)] = e.revision | ||||
# update the log cache | ||||
if cache: | ||||
if log: | ||||
# join up the old and new logs | ||||
Alejandro Santos
|
r9032 | log.sort(key=lambda x: x.date) | ||
Frank Kingswood
|
r6687 | |||
if oldlog and oldlog[-1].date >= log[0].date: | ||||
Martin Geisler
|
r10939 | raise logerror(_('log cache overlaps with new log entries,' | ||
' re-run without cache.')) | ||||
Frank Kingswood
|
r6687 | |||
Matt Mackall
|
r6688 | log = oldlog + log | ||
Frank Kingswood
|
r6687 | |||
# write the new cachefile | ||||
ui.note(_('writing cvs log cache %s\n') % cachefile) | ||||
Augie Fackler
|
r36149 | pickle.dump(log, open(cachefile, 'wb')) | ||
Frank Kingswood
|
r6687 | else: | ||
log = oldlog | ||||
ui.status(_('%d log entries\n') % len(log)) | ||||
FUJIWARA Katsunori
|
r33388 | encodings = ui.configlist('convert', 'cvsps.logencoding') | ||
if encodings: | ||||
def revstr(r): | ||||
# this is needed, because logentry.revision is a tuple of "int" | ||||
# (e.g. (1, 2) for "1.2") | ||||
return '.'.join(pycompat.maplist(pycompat.bytestr, r)) | ||||
for entry in log: | ||||
comment = entry.comment | ||||
for e in encodings: | ||||
try: | ||||
Augie Fackler
|
r37937 | entry.comment = comment.decode( | ||
pycompat.sysstr(e)).encode('utf-8') | ||||
FUJIWARA Katsunori
|
r33388 | if ui.debugflag: | ||
ui.debug("transcoding by %s: %s of %s\n" % | ||||
(e, revstr(entry.revision), entry.file)) | ||||
break | ||||
except UnicodeDecodeError: | ||||
pass # try next encoding | ||||
except LookupError as inst: # unknown encoding, maybe | ||||
raise error.Abort(inst, | ||||
hint=_('check convert.cvsps.logencoding' | ||||
' configuration')) | ||||
else: | ||||
raise error.Abort(_("no encoding can transcode" | ||||
" CVS log message for %s of %s") | ||||
% (revstr(entry.revision), entry.file), | ||||
hint=_('check convert.cvsps.logencoding' | ||||
' configuration')) | ||||
Frank Kingswood
|
r10095 | hook.hook(ui, None, "cvslog", True, log=log) | ||
Frank Kingswood
|
r6687 | return log | ||
class changeset(object): | ||||
'''Class changeset has the following attributes: | ||||
Greg Ward
|
r8079 | .id - integer identifying this changeset (list index) | ||
Frank Kingswood
|
r6687 | .author - author name as CVS knows it | ||
.branch - name of branch this changeset is on, or None | ||||
.comment - commit message | ||||
Frank Kingswood
|
r18261 | .commitid - CVS commitid or None | ||
Frank Kingswood
|
r6687 | .date - the commit date as a (time,tz) tuple | ||
.entries - list of logentry objects in this changeset | ||||
.parents - list of one or two parent changesets | ||||
.tags - list of tags on this changeset | ||||
Greg Ward
|
r7862 | .synthetic - from synthetic revision "file ... added on branch ..." | ||
Frank Kingswood
|
r18261 | .mergepoint- the branch that has been merged from or None | ||
.branchpoints- the branches that start at the current entry or empty | ||||
Frank Kingswood
|
r6687 | ''' | ||
def __init__(self, **entries): | ||||
Frank Kingswood
|
r19505 | self.id = None | ||
Benoit Boissinot
|
r10701 | self.synthetic = False | ||
Frank Kingswood
|
r6687 | self.__dict__.update(entries) | ||
Greg Ward
|
r8080 | def __repr__(self): | ||
Frank Kingswood
|
r18261 | items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__)) | ||
return "%s(%s)"%(type(self).__name__, ", ".join(items)) | ||||
Greg Ward
|
r8080 | |||
Frank Kingswood
|
r6687 | def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None): | ||
'''Convert log into changesets.''' | ||||
ui.status(_('creating changesets\n')) | ||||
Matt Mackall
|
r18718 | # try to order commitids by date | ||
mindate = {} | ||||
for e in log: | ||||
if e.commitid: | ||||
Augie Fackler
|
r38313 | if e.commitid not in mindate: | ||
mindate[e.commitid] = e.date | ||||
else: | ||||
mindate[e.commitid] = min(e.date, mindate[e.commitid]) | ||||
Matt Mackall
|
r18718 | |||
Frank Kingswood
|
r6687 | # Merge changesets | ||
Augie Fackler
|
r38313 | log.sort(key=lambda x: (mindate.get(x.commitid, (-1, 0)), | ||
x.commitid or '', x.comment, | ||||
x.author, x.branch or '', x.date, x.branchpoints)) | ||||
Frank Kingswood
|
r6687 | |||
changesets = [] | ||||
Benoit Boissinot
|
r8456 | files = set() | ||
Frank Kingswood
|
r6687 | c = None | ||
for i, e in enumerate(log): | ||||
# Check if log entry belongs to the current changeset or not. | ||||
Henrik Stuart
|
r8756 | |||
Mads Kiilerich
|
r17424 | # Since CVS is file-centric, two different file revisions with | ||
Henrik Stuart
|
r8756 | # different branchpoints should be treated as belonging to two | ||
# different changesets (and the ordering is important and not | ||||
# honoured by cvsps at this point). | ||||
# | ||||
# Consider the following case: | ||||
# foo 1.1 branchpoints: [MYBRANCH] | ||||
# bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2] | ||||
# | ||||
# Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a | ||||
# later version of foo may be in MYBRANCH2, so foo should be the | ||||
# first changeset and bar the next and MYBRANCH and MYBRANCH2 | ||||
# should both start off of the bar changeset. No provisions are | ||||
# made to ensure that this is, in fact, what happens. | ||||
Frank Kingswood
|
r18261 | if not (c and e.branchpoints == c.branchpoints and | ||
Bryan O'Sullivan
|
r18265 | (# cvs commitids | ||
(e.commitid is not None and e.commitid == c.commitid) or | ||||
(# no commitids, use fuzzy commit detection | ||||
(e.commitid is None or c.commitid is None) and | ||||
e.comment == c.comment and | ||||
e.author == c.author and | ||||
e.branch == c.branch and | ||||
((c.date[0] + c.date[1]) <= | ||||
(e.date[0] + e.date[1]) <= | ||||
(c.date[0] + c.date[1]) + fuzz) and | ||||
e.file not in files))): | ||||
Frank Kingswood
|
r6687 | c = changeset(comment=e.comment, author=e.author, | ||
Frank Kingswood
|
r18261 | branch=e.branch, date=e.date, | ||
entries=[], mergepoint=e.mergepoint, | ||||
branchpoints=e.branchpoints, commitid=e.commitid) | ||||
Frank Kingswood
|
r6687 | changesets.append(c) | ||
Frank Kingswood
|
r18261 | |||
Benoit Boissinot
|
r8456 | files = set() | ||
Matt Mackall
|
r6688 | if len(changesets) % 100 == 0: | ||
t = '%d %s' % (len(changesets), repr(e.comment)[1:-1]) | ||||
Yuya Nishihara
|
r37102 | ui.status(stringutil.ellipsis(t, 80) + '\n') | ||
Frank Kingswood
|
r6687 | |||
c.entries.append(e) | ||||
Benoit Boissinot
|
r8456 | files.add(e.file) | ||
Frank Kingswood
|
r6687 | c.date = e.date # changeset date is date of latest commit in it | ||
Greg Ward
|
r7862 | # Mark synthetic changesets | ||
for c in changesets: | ||||
# Synthetic revisions always get their own changeset, because | ||||
# the log message includes the filename. E.g. if you add file3 | ||||
# and file4 on a branch, you get four log entries and three | ||||
# changesets: | ||||
# "File file3 was added on branch ..." (synthetic, 1 entry) | ||||
# "File file4 was added on branch ..." (synthetic, 1 entry) | ||||
# "Add file3 and file4 to fix ..." (real, 2 entries) | ||||
# Hence the check for 1 entry here. | ||||
Benoit Boissinot
|
r10701 | c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic | ||
Greg Ward
|
r7862 | |||
Frank Kingswood
|
r6687 | # Sort files in each changeset | ||
Martin Geisler
|
r15790 | def entitycompare(l, r): | ||
'Mimic cvsps sorting order' | ||||
l = l.file.split('/') | ||||
r = r.file.split('/') | ||||
nl = len(l) | ||||
nr = len(r) | ||||
n = min(nl, nr) | ||||
for i in range(n): | ||||
if i + 1 == nl and nl < nr: | ||||
return -1 | ||||
elif i + 1 == nr and nl > nr: | ||||
return +1 | ||||
elif l[i] < r[i]: | ||||
return -1 | ||||
elif l[i] > r[i]: | ||||
return +1 | ||||
return 0 | ||||
Frank Kingswood
|
r6687 | for c in changesets: | ||
Augie Fackler
|
r37903 | c.entries.sort(key=functools.cmp_to_key(entitycompare)) | ||
Frank Kingswood
|
r6687 | |||
# Sort changesets by date | ||||
Frank Kingswood
|
r19505 | odd = set() | ||
Pierre-Yves David
|
r31408 | def cscmp(l, r): | ||
Matt Mackall
|
r6688 | d = sum(l.date) - sum(r.date) | ||
Frank Kingswood
|
r6687 | if d: | ||
return d | ||||
# detect vendor branches and initial commits on a branch | ||||
le = {} | ||||
for e in l.entries: | ||||
le[e.rcs] = e.revision | ||||
re = {} | ||||
for e in r.entries: | ||||
re[e.rcs] = e.revision | ||||
d = 0 | ||||
for e in l.entries: | ||||
if re.get(e.rcs, None) == e.parent: | ||||
assert not d | ||||
d = 1 | ||||
break | ||||
for e in r.entries: | ||||
if le.get(e.rcs, None) == e.parent: | ||||
Frank Kingswood
|
r19505 | if d: | ||
odd.add((l, r)) | ||||
Frank Kingswood
|
r6687 | d = -1 | ||
break | ||||
Augie Fackler
|
r22267 | # By this point, the changesets are sufficiently compared that | ||
# we don't really care about ordering. However, this leaves | ||||
# some race conditions in the tests, so we compare on the | ||||
Augie Fackler
|
r24305 | # number of files modified, the files contained in each | ||
# changeset, and the branchpoints in the change to ensure test | ||||
# output remains stable. | ||||
Frank Kingswood
|
r6687 | |||
Augie Fackler
|
r22267 | # recommended replacement for cmp from | ||
# https://docs.python.org/3.0/whatsnew/3.0.html | ||||
c = lambda x, y: (x > y) - (x < y) | ||||
Augie Fackler
|
r24305 | # Sort bigger changes first. | ||
Augie Fackler
|
r22267 | if not d: | ||
d = c(len(l.entries), len(r.entries)) | ||||
Augie Fackler
|
r24305 | # Try sorting by filename in the change. | ||
if not d: | ||||
d = c([e.file for e in l.entries], [e.file for e in r.entries]) | ||||
# Try and put changes without a branch point before ones with | ||||
# a branch point. | ||||
Augie Fackler
|
r22267 | if not d: | ||
d = c(len(l.branchpoints), len(r.branchpoints)) | ||||
Frank Kingswood
|
r6687 | return d | ||
Augie Fackler
|
r37903 | changesets.sort(key=functools.cmp_to_key(cscmp)) | ||
Frank Kingswood
|
r6687 | |||
# Collect tags | ||||
globaltags = {} | ||||
for c in changesets: | ||||
for e in c.entries: | ||||
for tag in e.tags: | ||||
# remember which is the latest changeset to have this tag | ||||
globaltags[tag] = c | ||||
for c in changesets: | ||||
Benoit Boissinot
|
r8456 | tags = set() | ||
Frank Kingswood
|
r6687 | for e in c.entries: | ||
Martin Geisler
|
r8483 | tags.update(e.tags) | ||
Frank Kingswood
|
r6687 | # remember tags only if this is the latest changeset to have it | ||
Benoit Boissinot
|
r8456 | c.tags = sorted(tag for tag in tags if globaltags[tag] is c) | ||
Frank Kingswood
|
r6687 | |||
# Find parent changesets, handle {{mergetobranch BRANCHNAME}} | ||||
# by inserting dummy changesets with two parents, and handle | ||||
# {{mergefrombranch BRANCHNAME}} by setting two parents. | ||||
if mergeto is None: | ||||
Augie Fackler
|
r37901 | mergeto = br'{{mergetobranch ([-\w]+)}}' | ||
Frank Kingswood
|
r6687 | if mergeto: | ||
mergeto = re.compile(mergeto) | ||||
if mergefrom is None: | ||||
Augie Fackler
|
r37901 | mergefrom = br'{{mergefrombranch ([-\w]+)}}' | ||
Frank Kingswood
|
r6687 | if mergefrom: | ||
mergefrom = re.compile(mergefrom) | ||||
versions = {} # changeset index where we saw any particular file version | ||||
branches = {} # changeset index where we saw a branch | ||||
n = len(changesets) | ||||
i = 0 | ||||
Matt Mackall
|
r10282 | while i < n: | ||
Frank Kingswood
|
r6687 | c = changesets[i] | ||
for f in c.entries: | ||||
versions[(f.rcs, f.revision)] = i | ||||
p = None | ||||
if c.branch in branches: | ||||
p = branches[c.branch] | ||||
else: | ||||
Henrik Stuart
|
r8756 | # first changeset on a new branch | ||
# the parent is a changeset with the branch in its | ||||
# branchpoints such that it is the latest possible | ||||
# commit without any intervening, unrelated commits. | ||||
for candidate in xrange(i): | ||||
if c.branch not in changesets[candidate].branchpoints: | ||||
if p is not None: | ||||
break | ||||
continue | ||||
p = candidate | ||||
Frank Kingswood
|
r6687 | |||
c.parents = [] | ||||
if p is not None: | ||||
Greg Ward
|
r7862 | p = changesets[p] | ||
# Ensure no changeset has a synthetic changeset as a parent. | ||||
while p.synthetic: | ||||
assert len(p.parents) <= 1, \ | ||||
_('synthetic changeset cannot have multiple parents') | ||||
if p.parents: | ||||
p = p.parents[0] | ||||
else: | ||||
p = None | ||||
break | ||||
if p is not None: | ||||
c.parents.append(p) | ||||
Frank Kingswood
|
r6687 | |||
Henrik Stuart
|
r7956 | if c.mergepoint: | ||
if c.mergepoint == 'HEAD': | ||||
c.mergepoint = None | ||||
c.parents.append(changesets[branches[c.mergepoint]]) | ||||
Frank Kingswood
|
r6687 | if mergefrom: | ||
m = mergefrom.search(c.comment) | ||||
if m: | ||||
m = m.group(1) | ||||
if m == 'HEAD': | ||||
m = None | ||||
Greg Ward
|
r8171 | try: | ||
candidate = changesets[branches[m]] | ||||
except KeyError: | ||||
ui.warn(_("warning: CVS commit message references " | ||||
"non-existent branch %r:\n%s\n") | ||||
Augie Fackler
|
r37906 | % (pycompat.bytestr(m), c.comment)) | ||
Greg Ward
|
r7950 | if m in branches and c.branch != m and not candidate.synthetic: | ||
c.parents.append(candidate) | ||||
Frank Kingswood
|
r6687 | |||
if mergeto: | ||||
m = mergeto.search(c.comment) | ||||
if m: | ||||
Brodie Rao
|
r16688 | if m.groups(): | ||
Frank Kingswood
|
r6687 | m = m.group(1) | ||
if m == 'HEAD': | ||||
m = None | ||||
Brodie Rao
|
r16688 | else: | ||
Frank Kingswood
|
r6687 | m = None # if no group found then merge to HEAD | ||
if m in branches and c.branch != m: | ||||
# insert empty changeset for merge | ||||
Matt Mackall
|
r10282 | cc = changeset( | ||
author=c.author, branch=m, date=c.date, | ||||
comment='convert-repo: CVS merge from branch %s' | ||||
% c.branch, | ||||
entries=[], tags=[], | ||||
parents=[changesets[branches[m]], c]) | ||||
Matt Mackall
|
r6688 | changesets.insert(i + 1, cc) | ||
branches[m] = i + 1 | ||||
Frank Kingswood
|
r6687 | |||
# adjust our loop counters now we have inserted a new entry | ||||
n += 1 | ||||
i += 2 | ||||
continue | ||||
branches[c.branch] = i | ||||
i += 1 | ||||
Greg Ward
|
r7862 | # Drop synthetic changesets (safe now that we have ensured no other | ||
# changesets can have them as parents). | ||||
i = 0 | ||||
while i < len(changesets): | ||||
if changesets[i].synthetic: | ||||
del changesets[i] | ||||
else: | ||||
i += 1 | ||||
Frank Kingswood
|
r6687 | # Number changesets | ||
for i, c in enumerate(changesets): | ||||
Matt Mackall
|
r6688 | c.id = i + 1 | ||
Frank Kingswood
|
r6687 | |||
Frank Kingswood
|
r19505 | if odd: | ||
for l, r in odd: | ||||
if l.id is not None and r.id is not None: | ||||
ui.warn(_('changeset %d is both before and after %d\n') | ||||
% (l.id, r.id)) | ||||
Frank Kingswood
|
r6687 | ui.status(_('%d changeset entries\n') % len(changesets)) | ||
Frank Kingswood
|
r10095 | hook.hook(ui, None, "cvschangesets", True, changesets=changesets) | ||
Frank Kingswood
|
r6687 | return changesets | ||
Frank Kingswood
|
r7502 | |||
def debugcvsps(ui, *args, **opts): | ||||
Martin Geisler
|
r8661 | '''Read CVS rlog for current directory or named path in | ||
repository, and convert the log to changesets based on matching | ||||
commit log entries and dates. | ||||
''' | ||||
Pulkit Goyal
|
r36347 | opts = pycompat.byteskwargs(opts) | ||
Frank Kingswood
|
r7502 | if opts["new_cache"]: | ||
cache = "write" | ||||
elif opts["update_cache"]: | ||||
cache = "update" | ||||
else: | ||||
cache = None | ||||
revisions = opts["revisions"] | ||||
try: | ||||
if args: | ||||
log = [] | ||||
for d in args: | ||||
log += createlog(ui, d, root=opts["root"], cache=cache) | ||||
else: | ||||
log = createlog(ui, root=opts["root"], cache=cache) | ||||
Gregory Szorc
|
r25660 | except logerror as e: | ||
Frank Kingswood
|
r7502 | ui.write("%r\n"%e) | ||
return | ||||
changesets = createchangeset(ui, log, opts["fuzz"]) | ||||
del log | ||||
# Print changesets (optionally filtered) | ||||
off = len(revisions) | ||||
branches = {} # latest version number in each branch | ||||
ancestors = {} # parent branch | ||||
for cs in changesets: | ||||
if opts["ancestors"]: | ||||
if cs.branch not in branches and cs.parents and cs.parents[0].id: | ||||
Matt Mackall
|
r10282 | ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch, | ||
Martin Geisler
|
r8661 | cs.parents[0].id) | ||
Frank Kingswood
|
r7502 | branches[cs.branch] = cs.id | ||
# limit by branches | ||||
if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]: | ||||
continue | ||||
if not off: | ||||
# Note: trailing spaces on several lines here are needed to have | ||||
# bug-for-bug compatibility with cvsps. | ||||
ui.write('---------------------\n') | ||||
Matt Mackall
|
r17956 | ui.write(('PatchSet %d \n' % cs.id)) | ||
Boris Feld
|
r36625 | ui.write(('Date: %s\n' % dateutil.datestr(cs.date, | ||
Matt Mackall
|
r17956 | '%Y/%m/%d %H:%M:%S %1%2'))) | ||
ui.write(('Author: %s\n' % cs.author)) | ||||
ui.write(('Branch: %s\n' % (cs.branch or 'HEAD'))) | ||||
ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1], | ||||
','.join(cs.tags) or '(none)'))) | ||||
Frank Kingswood
|
r18261 | if cs.branchpoints: | ||
Mads Kiilerich
|
r18375 | ui.write(('Branchpoints: %s \n') % | ||
', '.join(sorted(cs.branchpoints))) | ||||
Frank Kingswood
|
r7502 | if opts["parents"] and cs.parents: | ||
Matt Mackall
|
r10282 | if len(cs.parents) > 1: | ||
Matt Mackall
|
r17956 | ui.write(('Parents: %s\n' % | ||
Pulkit Goyal
|
r37676 | (','.join([(b"%d" % p.id) for p in cs.parents])))) | ||
Frank Kingswood
|
r7502 | else: | ||
Matt Mackall
|
r17956 | ui.write(('Parent: %d\n' % cs.parents[0].id)) | ||
Frank Kingswood
|
r7502 | |||
if opts["ancestors"]: | ||||
b = cs.branch | ||||
r = [] | ||||
while b: | ||||
b, c = ancestors[b] | ||||
r.append('%s:%d:%d' % (b or "HEAD", c, branches[b])) | ||||
if r: | ||||
Matt Mackall
|
r17956 | ui.write(('Ancestors: %s\n' % (','.join(r)))) | ||
Frank Kingswood
|
r7502 | |||
Matt Mackall
|
r17956 | ui.write(('Log:\n')) | ||
Frank Kingswood
|
r7502 | ui.write('%s\n\n' % cs.comment) | ||
Matt Mackall
|
r17956 | ui.write(('Members: \n')) | ||
Frank Kingswood
|
r7502 | for f in cs.entries: | ||
fn = f.file | ||||
if fn.startswith(opts["prefix"]): | ||||
fn = fn[len(opts["prefix"]):] | ||||
Matt Mackall
|
r10282 | ui.write('\t%s:%s->%s%s \n' % ( | ||
Augie Fackler
|
r37904 | fn, | ||
'.'.join([b"%d" % x for x in f.parent]) or 'INITIAL', | ||||
Pulkit Goyal
|
r37676 | '.'.join([(b"%d" % x) for x in f.revision]), | ||
Matt Mackall
|
r10282 | ['', '(DEAD)'][f.dead])) | ||
Frank Kingswood
|
r7502 | ui.write('\n') | ||
# have we seen the start tag? | ||||
if revisions and off: | ||||
Pulkit Goyal
|
r37676 | if revisions[0] == (b"%d" % cs.id) or \ | ||
Frank Kingswood
|
r7502 | revisions[0] in cs.tags: | ||
off = False | ||||
# see if we reached the end tag | ||||
Matt Mackall
|
r10282 | if len(revisions) > 1 and not off: | ||
Pulkit Goyal
|
r37676 | if revisions[1] == (b"%d" % cs.id) or \ | ||
Frank Kingswood
|
r7502 | revisions[1] in cs.tags: | ||
break | ||||