cvsps.py
586 lines
| 19.4 KiB
| text/x-python
|
PythonLexer
Frank Kingswood
|
r6687 | # | ||
# Mercurial built-in replacement for cvsps. | ||||
# | ||||
# Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk> | ||||
# | ||||
# This software may be used and distributed according to the terms | ||||
# of the GNU General Public License, incorporated herein by reference. | ||||
import os | ||||
import re | ||||
import cPickle as pickle | ||||
from mercurial import util | ||||
from mercurial.i18n import _ | ||||
def listsort(list, key): | ||||
"helper to sort by key in Python 2.3" | ||||
try: | ||||
list.sort(key=key) | ||||
except TypeError: | ||||
Matt Mackall
|
r6688 | list.sort(lambda l, r: cmp(key(l), key(r))) | ||
Frank Kingswood
|
r6687 | |||
class logentry(object): | ||||
'''Class logentry has the following attributes: | ||||
.author - author name as CVS knows it | ||||
.branch - name of branch this revision is on | ||||
.branches - revision tuple of branches starting at this revision | ||||
.comment - commit message | ||||
.date - the commit date as a (time, tz) tuple | ||||
.dead - true if file revision is dead | ||||
.file - Name of file | ||||
.lines - a tuple (+lines, -lines) or None | ||||
.parent - Previous revision of this entry | ||||
.rcs - name of file as returned from CVS | ||||
.revision - revision number as tuple | ||||
.tags - list of tags on the file | ||||
''' | ||||
def __init__(self, **entries): | ||||
self.__dict__.update(entries) | ||||
class logerror(Exception): | ||||
pass | ||||
Patrick Mezard
|
r7097 | def getrepopath(cvspath): | ||
"""Return the repository path from a CVS path. | ||||
>>> getrepopath('/foo/bar') | ||||
'/foo/bar' | ||||
>>> getrepopath('c:/foo/bar') | ||||
'c:/foo/bar' | ||||
>>> getrepopath(':pserver:10/foo/bar') | ||||
'/foo/bar' | ||||
>>> getrepopath(':pserver:10c:/foo/bar') | ||||
'/foo/bar' | ||||
>>> getrepopath(':pserver:/foo/bar') | ||||
'/foo/bar' | ||||
>>> getrepopath(':pserver:c:/foo/bar') | ||||
'c:/foo/bar' | ||||
>>> getrepopath(':pserver:truc@foo.bar:/foo/bar') | ||||
'/foo/bar' | ||||
>>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar') | ||||
'c:/foo/bar' | ||||
""" | ||||
# According to CVS manual, CVS paths are expressed like: | ||||
# [:method:][[user][:password]@]hostname[:[port]]/path/to/repository | ||||
# | ||||
# Unfortunately, Windows absolute paths start with a drive letter | ||||
# like 'c:' making it harder to parse. Here we assume that drive | ||||
# letters are only one character long and any CVS component before | ||||
# the repository path is at least 2 characters long, and use this | ||||
# to disambiguate. | ||||
parts = cvspath.split(':') | ||||
if len(parts) == 1: | ||||
return parts[0] | ||||
# Here there is an ambiguous case if we have a port number | ||||
# immediately followed by a Windows driver letter. We assume this | ||||
# never happens and decide it must be CVS path component, | ||||
# therefore ignoring it. | ||||
if len(parts[-2]) > 1: | ||||
return parts[-1].lstrip('0123456789') | ||||
return parts[-2] + ':' + parts[-1] | ||||
Frank Kingswood
|
r6687 | def createlog(ui, directory=None, root="", rlog=True, cache=None): | ||
'''Collect the CVS rlog''' | ||||
# Because we store many duplicate commit log messages, reusing strings | ||||
# saves a lot of memory and pickle storage space. | ||||
_scache = {} | ||||
def scache(s): | ||||
"return a shared version of a string" | ||||
return _scache.setdefault(s, s) | ||||
ui.status(_('collecting CVS rlog\n')) | ||||
log = [] # list of logentry objects containing the CVS state | ||||
# patterns to match in CVS (r)log output, by state of use | ||||
re_00 = re.compile('RCS file: (.+)$') | ||||
re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$') | ||||
re_02 = re.compile('cvs (r?log|server): (.+)\n$') | ||||
re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$") | ||||
re_10 = re.compile('Working file: (.+)$') | ||||
re_20 = re.compile('symbolic names:') | ||||
re_30 = re.compile('\t(.+): ([\\d.]+)$') | ||||
re_31 = re.compile('----------------------------$') | ||||
re_32 = re.compile('=============================================================================$') | ||||
re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$') | ||||
re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?') | ||||
re_70 = re.compile('branches: (.+);$') | ||||
prefix = '' # leading path to strip of what we get from CVS | ||||
if directory is None: | ||||
# Current working directory | ||||
# Get the real directory in the repository | ||||
try: | ||||
prefix = file(os.path.join('CVS','Repository')).read().strip() | ||||
if prefix == ".": | ||||
Matt Mackall
|
r6688 | prefix = "" | ||
Frank Kingswood
|
r6687 | directory = prefix | ||
except IOError: | ||||
raise logerror('Not a CVS sandbox') | ||||
Patrick Mezard
|
r7097 | if prefix and not prefix.endswith(os.sep): | ||
prefix += os.sep | ||||
Frank Kingswood
|
r6687 | |||
# Use the Root file in the sandbox, if it exists | ||||
try: | ||||
root = file(os.path.join('CVS','Root')).read().strip() | ||||
except IOError: | ||||
pass | ||||
if not root: | ||||
root = os.environ.get('CVSROOT', '') | ||||
# read log cache if one exists | ||||
oldlog = [] | ||||
date = None | ||||
if cache: | ||||
cachedir = os.path.expanduser('~/.hg.cvsps') | ||||
if not os.path.exists(cachedir): | ||||
os.mkdir(cachedir) | ||||
# The cvsps cache pickle needs a uniquified name, based on the | ||||
# repository location. The address may have all sort of nasties | ||||
# in it, slashes, colons and such. So here we take just the | ||||
# alphanumerics, concatenated in a way that does not mix up the | ||||
Dirkjan Ochtman
|
r6696 | # various components, so that | ||
Frank Kingswood
|
r6687 | # :pserver:user@server:/path | ||
# and | ||||
# /pserver/user/server/path | ||||
# are mapped to different cache file names. | ||||
Matt Mackall
|
r6688 | cachefile = root.split(":") + [directory, "cache"] | ||
Frank Kingswood
|
r6687 | cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s] | ||
Matt Mackall
|
r6688 | cachefile = os.path.join(cachedir, | ||
'.'.join([s for s in cachefile if s])) | ||||
Frank Kingswood
|
r6687 | |||
if cache == 'update': | ||||
try: | ||||
ui.note(_('reading cvs log cache %s\n') % cachefile) | ||||
oldlog = pickle.load(file(cachefile)) | ||||
ui.note(_('cache has %d log entries\n') % len(oldlog)) | ||||
except Exception, e: | ||||
ui.note(_('error reading cache: %r\n') % e) | ||||
if oldlog: | ||||
date = oldlog[-1].date # last commit date as a (time,tz) tuple | ||||
date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2') | ||||
# build the CVS commandline | ||||
cmd = ['cvs', '-q'] | ||||
if root: | ||||
cmd.append('-d%s' % root) | ||||
Patrick Mezard
|
r7097 | p = util.normpath(getrepopath(root)) | ||
Frank Kingswood
|
r6687 | if not p.endswith('/'): | ||
Matt Mackall
|
r6688 | p += '/' | ||
Patrick Mezard
|
r7097 | prefix = p + util.normpath(prefix) | ||
Frank Kingswood
|
r6687 | cmd.append(['log', 'rlog'][rlog]) | ||
if date: | ||||
# no space between option and date string | ||||
cmd.append('-d>%s' % date) | ||||
cmd.append(directory) | ||||
# state machine begins here | ||||
tags = {} # dictionary of revisions on current file with their tags | ||||
state = 0 | ||||
store = False # set when a new record can be appended | ||||
cmd = [util.shellquote(arg) for arg in cmd] | ||||
Martin Geisler
|
r6956 | ui.note(_("running %s\n") % (' '.join(cmd))) | ||
ui.debug(_("prefix=%r directory=%r root=%r\n") % (prefix, directory, root)) | ||||
Frank Kingswood
|
r6687 | |||
for line in util.popen(' '.join(cmd)): | ||||
if line.endswith('\n'): | ||||
line = line[:-1] | ||||
#ui.debug('state=%d line=%r\n' % (state, line)) | ||||
if state == 0: | ||||
# initial state, consume input until we see 'RCS file' | ||||
match = re_00.match(line) | ||||
if match: | ||||
rcs = match.group(1) | ||||
tags = {} | ||||
if rlog: | ||||
Patrick Mezard
|
r7097 | filename = util.normpath(rcs[:-2]) | ||
Frank Kingswood
|
r6687 | if filename.startswith(prefix): | ||
filename = filename[len(prefix):] | ||||
if filename.startswith('/'): | ||||
filename = filename[1:] | ||||
if filename.startswith('Attic/'): | ||||
filename = filename[6:] | ||||
else: | ||||
filename = filename.replace('/Attic/', '/') | ||||
state = 2 | ||||
continue | ||||
state = 1 | ||||
continue | ||||
match = re_01.match(line) | ||||
if match: | ||||
raise Exception(match.group(1)) | ||||
match = re_02.match(line) | ||||
if match: | ||||
raise Exception(match.group(2)) | ||||
if re_03.match(line): | ||||
raise Exception(line) | ||||
elif state == 1: | ||||
# expect 'Working file' (only when using log instead of rlog) | ||||
match = re_10.match(line) | ||||
assert match, _('RCS file must be followed by working file') | ||||
Patrick Mezard
|
r7097 | filename = util.normpath(match.group(1)) | ||
Frank Kingswood
|
r6687 | state = 2 | ||
elif state == 2: | ||||
# expect 'symbolic names' | ||||
if re_20.match(line): | ||||
state = 3 | ||||
elif state == 3: | ||||
# read the symbolic names and store as tags | ||||
match = re_30.match(line) | ||||
if match: | ||||
rev = [int(x) for x in match.group(2).split('.')] | ||||
# Convert magic branch number to an odd-numbered one | ||||
revn = len(rev) | ||||
Matt Mackall
|
r6688 | if revn > 3 and (revn % 2) == 0 and rev[-2] == 0: | ||
rev = rev[:-2] + rev[-1:] | ||||
Frank Kingswood
|
r6687 | rev = tuple(rev) | ||
if rev not in tags: | ||||
tags[rev] = [] | ||||
tags[rev].append(match.group(1)) | ||||
elif re_31.match(line): | ||||
state = 5 | ||||
elif re_32.match(line): | ||||
state = 0 | ||||
elif state == 4: | ||||
# expecting '------' separator before first revision | ||||
if re_31.match(line): | ||||
state = 5 | ||||
else: | ||||
assert not re_32.match(line), _('Must have at least some revisions') | ||||
elif state == 5: | ||||
# expecting revision number and possibly (ignored) lock indication | ||||
# we create the logentry here from values stored in states 0 to 4, | ||||
# as this state is re-entered for subsequent revisions of a file. | ||||
match = re_50.match(line) | ||||
assert match, _('expected revision number') | ||||
e = logentry(rcs=scache(rcs), file=scache(filename), | ||||
revision=tuple([int(x) for x in match.group(1).split('.')]), | ||||
branches=[], parent=None) | ||||
state = 6 | ||||
elif state == 6: | ||||
# expecting date, author, state, lines changed | ||||
match = re_60.match(line) | ||||
assert match, _('revision must be followed by date line') | ||||
d = match.group(1) | ||||
if d[2] == '/': | ||||
# Y2K | ||||
Matt Mackall
|
r6688 | d = '19' + d | ||
Frank Kingswood
|
r6687 | |||
if len(d.split()) != 3: | ||||
# cvs log dates always in GMT | ||||
Matt Mackall
|
r6688 | d = d + ' UTC' | ||
Frank Kingswood
|
r6687 | e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S']) | ||
e.author = scache(match.group(2)) | ||||
e.dead = match.group(3).lower() == 'dead' | ||||
if match.group(5): | ||||
if match.group(6): | ||||
e.lines = (int(match.group(5)), int(match.group(6))) | ||||
else: | ||||
e.lines = (int(match.group(5)), 0) | ||||
elif match.group(6): | ||||
e.lines = (0, int(match.group(6))) | ||||
else: | ||||
e.lines = None | ||||
e.comment = [] | ||||
state = 7 | ||||
elif state == 7: | ||||
Matt Mackall
|
r6688 | # read the revision numbers of branches that start at this revision | ||
Frank Kingswood
|
r6687 | # or store the commit log message otherwise | ||
m = re_70.match(line) | ||||
if m: | ||||
e.branches = [tuple([int(y) for y in x.strip().split('.')]) | ||||
for x in m.group(1).split(';')] | ||||
state = 8 | ||||
elif re_31.match(line): | ||||
state = 5 | ||||
store = True | ||||
elif re_32.match(line): | ||||
state = 0 | ||||
store = True | ||||
else: | ||||
e.comment.append(line) | ||||
elif state == 8: | ||||
# store commit log message | ||||
if re_31.match(line): | ||||
state = 5 | ||||
store = True | ||||
elif re_32.match(line): | ||||
state = 0 | ||||
store = True | ||||
else: | ||||
e.comment.append(line) | ||||
if store: | ||||
# clean up the results and save in the log. | ||||
store = False | ||||
Matt Mackall
|
r6762 | e.tags = util.sort([scache(x) for x in tags.get(e.revision, [])]) | ||
Frank Kingswood
|
r6687 | e.comment = scache('\n'.join(e.comment)) | ||
revn = len(e.revision) | ||||
Matt Mackall
|
r6688 | if revn > 3 and (revn % 2) == 0: | ||
Frank Kingswood
|
r6687 | e.branch = tags.get(e.revision[:-1], [None])[0] | ||
else: | ||||
e.branch = None | ||||
log.append(e) | ||||
Matt Mackall
|
r6688 | if len(log) % 100 == 0: | ||
ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n') | ||||
Frank Kingswood
|
r6687 | |||
listsort(log, key=lambda x:(x.rcs, x.revision)) | ||||
# find parent revisions of individual files | ||||
versions = {} | ||||
for e in log: | ||||
branch = e.revision[:-1] | ||||
p = versions.get((e.rcs, branch), None) | ||||
if p is None: | ||||
p = e.revision[:-2] | ||||
e.parent = p | ||||
versions[(e.rcs, branch)] = e.revision | ||||
# update the log cache | ||||
if cache: | ||||
if log: | ||||
# join up the old and new logs | ||||
listsort(log, key=lambda x:x.date) | ||||
if oldlog and oldlog[-1].date >= log[0].date: | ||||
Matt Mackall
|
r6688 | raise logerror('Log cache overlaps with new log entries,' | ||
' re-run without cache.') | ||||
Frank Kingswood
|
r6687 | |||
Matt Mackall
|
r6688 | log = oldlog + log | ||
Frank Kingswood
|
r6687 | |||
# write the new cachefile | ||||
ui.note(_('writing cvs log cache %s\n') % cachefile) | ||||
pickle.dump(log, file(cachefile, 'w')) | ||||
else: | ||||
log = oldlog | ||||
ui.status(_('%d log entries\n') % len(log)) | ||||
return log | ||||
class changeset(object): | ||||
'''Class changeset has the following attributes: | ||||
.author - author name as CVS knows it | ||||
.branch - name of branch this changeset is on, or None | ||||
.comment - commit message | ||||
.date - the commit date as a (time,tz) tuple | ||||
.entries - list of logentry objects in this changeset | ||||
.parents - list of one or two parent changesets | ||||
.tags - list of tags on this changeset | ||||
''' | ||||
def __init__(self, **entries): | ||||
self.__dict__.update(entries) | ||||
def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None): | ||||
'''Convert log into changesets.''' | ||||
ui.status(_('creating changesets\n')) | ||||
# Merge changesets | ||||
listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date)) | ||||
changesets = [] | ||||
files = {} | ||||
c = None | ||||
for i, e in enumerate(log): | ||||
# Check if log entry belongs to the current changeset or not. | ||||
if not (c and | ||||
e.comment == c.comment and | ||||
e.author == c.author and | ||||
e.branch == c.branch and | ||||
Matt Mackall
|
r6688 | ((c.date[0] + c.date[1]) <= | ||
(e.date[0] + e.date[1]) <= | ||||
(c.date[0] + c.date[1]) + fuzz) and | ||||
Frank Kingswood
|
r6687 | e.file not in files): | ||
c = changeset(comment=e.comment, author=e.author, | ||||
branch=e.branch, date=e.date, entries=[]) | ||||
changesets.append(c) | ||||
files = {} | ||||
Matt Mackall
|
r6688 | if len(changesets) % 100 == 0: | ||
t = '%d %s' % (len(changesets), repr(e.comment)[1:-1]) | ||||
ui.status(util.ellipsis(t, 80) + '\n') | ||||
Frank Kingswood
|
r6687 | |||
c.entries.append(e) | ||||
files[e.file] = True | ||||
c.date = e.date # changeset date is date of latest commit in it | ||||
# Sort files in each changeset | ||||
for c in changesets: | ||||
def pathcompare(l, r): | ||||
'Mimic cvsps sorting order' | ||||
l = l.split('/') | ||||
r = r.split('/') | ||||
nl = len(l) | ||||
nr = len(r) | ||||
n = min(nl, nr) | ||||
for i in range(n): | ||||
Matt Mackall
|
r6688 | if i + 1 == nl and nl < nr: | ||
Frank Kingswood
|
r6687 | return -1 | ||
Matt Mackall
|
r6688 | elif i + 1 == nr and nl > nr: | ||
Frank Kingswood
|
r6687 | return +1 | ||
Matt Mackall
|
r6688 | elif l[i] < r[i]: | ||
Frank Kingswood
|
r6687 | return -1 | ||
Matt Mackall
|
r6688 | elif l[i] > r[i]: | ||
Frank Kingswood
|
r6687 | return +1 | ||
return 0 | ||||
def entitycompare(l, r): | ||||
return pathcompare(l.file, r.file) | ||||
c.entries.sort(entitycompare) | ||||
# Sort changesets by date | ||||
def cscmp(l, r): | ||||
Matt Mackall
|
r6688 | d = sum(l.date) - sum(r.date) | ||
Frank Kingswood
|
r6687 | if d: | ||
return d | ||||
# detect vendor branches and initial commits on a branch | ||||
le = {} | ||||
for e in l.entries: | ||||
le[e.rcs] = e.revision | ||||
re = {} | ||||
for e in r.entries: | ||||
re[e.rcs] = e.revision | ||||
d = 0 | ||||
for e in l.entries: | ||||
if re.get(e.rcs, None) == e.parent: | ||||
assert not d | ||||
d = 1 | ||||
break | ||||
for e in r.entries: | ||||
if le.get(e.rcs, None) == e.parent: | ||||
assert not d | ||||
d = -1 | ||||
break | ||||
return d | ||||
changesets.sort(cscmp) | ||||
# Collect tags | ||||
globaltags = {} | ||||
for c in changesets: | ||||
tags = {} | ||||
for e in c.entries: | ||||
for tag in e.tags: | ||||
# remember which is the latest changeset to have this tag | ||||
globaltags[tag] = c | ||||
for c in changesets: | ||||
tags = {} | ||||
for e in c.entries: | ||||
for tag in e.tags: | ||||
tags[tag] = True | ||||
# remember tags only if this is the latest changeset to have it | ||||
Matt Mackall
|
r6762 | c.tags = util.sort([tag for tag in tags if globaltags[tag] is c]) | ||
Frank Kingswood
|
r6687 | |||
# Find parent changesets, handle {{mergetobranch BRANCHNAME}} | ||||
# by inserting dummy changesets with two parents, and handle | ||||
# {{mergefrombranch BRANCHNAME}} by setting two parents. | ||||
if mergeto is None: | ||||
mergeto = r'{{mergetobranch ([-\w]+)}}' | ||||
if mergeto: | ||||
mergeto = re.compile(mergeto) | ||||
if mergefrom is None: | ||||
mergefrom = r'{{mergefrombranch ([-\w]+)}}' | ||||
if mergefrom: | ||||
mergefrom = re.compile(mergefrom) | ||||
versions = {} # changeset index where we saw any particular file version | ||||
branches = {} # changeset index where we saw a branch | ||||
n = len(changesets) | ||||
i = 0 | ||||
while i<n: | ||||
c = changesets[i] | ||||
for f in c.entries: | ||||
versions[(f.rcs, f.revision)] = i | ||||
p = None | ||||
if c.branch in branches: | ||||
p = branches[c.branch] | ||||
else: | ||||
for f in c.entries: | ||||
p = max(p, versions.get((f.rcs, f.parent), None)) | ||||
c.parents = [] | ||||
if p is not None: | ||||
c.parents.append(changesets[p]) | ||||
if mergefrom: | ||||
m = mergefrom.search(c.comment) | ||||
if m: | ||||
m = m.group(1) | ||||
if m == 'HEAD': | ||||
m = None | ||||
if m in branches and c.branch != m: | ||||
c.parents.append(changesets[branches[m]]) | ||||
if mergeto: | ||||
m = mergeto.search(c.comment) | ||||
if m: | ||||
try: | ||||
m = m.group(1) | ||||
if m == 'HEAD': | ||||
m = None | ||||
except: | ||||
m = None # if no group found then merge to HEAD | ||||
if m in branches and c.branch != m: | ||||
# insert empty changeset for merge | ||||
cc = changeset(author=c.author, branch=m, date=c.date, | ||||
comment='convert-repo: CVS merge from branch %s' % c.branch, | ||||
entries=[], tags=[], parents=[changesets[branches[m]], c]) | ||||
Matt Mackall
|
r6688 | changesets.insert(i + 1, cc) | ||
branches[m] = i + 1 | ||||
Frank Kingswood
|
r6687 | |||
# adjust our loop counters now we have inserted a new entry | ||||
n += 1 | ||||
i += 2 | ||||
continue | ||||
branches[c.branch] = i | ||||
i += 1 | ||||
# Number changesets | ||||
for i, c in enumerate(changesets): | ||||
Matt Mackall
|
r6688 | c.id = i + 1 | ||
Frank Kingswood
|
r6687 | |||
ui.status(_('%d changeset entries\n') % len(changesets)) | ||||
return changesets | ||||