|
|
#
|
|
|
# Mercurial built-in replacement for cvsps.
|
|
|
#
|
|
|
# Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
|
|
|
#
|
|
|
# This software may be used and distributed according to the terms
|
|
|
# of the GNU General Public License, incorporated herein by reference.
|
|
|
|
|
|
import os
|
|
|
import re
|
|
|
import cPickle as pickle
|
|
|
from mercurial import util
|
|
|
from mercurial.i18n import _
|
|
|
|
|
|
def listsort(list, key):
|
|
|
"helper to sort by key in Python 2.3"
|
|
|
try:
|
|
|
list.sort(key=key)
|
|
|
except TypeError:
|
|
|
list.sort(lambda l, r: cmp(key(l), key(r)))
|
|
|
|
|
|
class logentry(object):
|
|
|
'''Class logentry has the following attributes:
|
|
|
.author - author name as CVS knows it
|
|
|
.branch - name of branch this revision is on
|
|
|
.branches - revision tuple of branches starting at this revision
|
|
|
.comment - commit message
|
|
|
.date - the commit date as a (time, tz) tuple
|
|
|
.dead - true if file revision is dead
|
|
|
.file - Name of file
|
|
|
.lines - a tuple (+lines, -lines) or None
|
|
|
.parent - Previous revision of this entry
|
|
|
.rcs - name of file as returned from CVS
|
|
|
.revision - revision number as tuple
|
|
|
.tags - list of tags on the file
|
|
|
'''
|
|
|
def __init__(self, **entries):
|
|
|
self.__dict__.update(entries)
|
|
|
|
|
|
class logerror(Exception):
|
|
|
pass
|
|
|
|
|
|
def getrepopath(cvspath):
|
|
|
"""Return the repository path from a CVS path.
|
|
|
|
|
|
>>> getrepopath('/foo/bar')
|
|
|
'/foo/bar'
|
|
|
>>> getrepopath('c:/foo/bar')
|
|
|
'c:/foo/bar'
|
|
|
>>> getrepopath(':pserver:10/foo/bar')
|
|
|
'/foo/bar'
|
|
|
>>> getrepopath(':pserver:10c:/foo/bar')
|
|
|
'/foo/bar'
|
|
|
>>> getrepopath(':pserver:/foo/bar')
|
|
|
'/foo/bar'
|
|
|
>>> getrepopath(':pserver:c:/foo/bar')
|
|
|
'c:/foo/bar'
|
|
|
>>> getrepopath(':pserver:truc@foo.bar:/foo/bar')
|
|
|
'/foo/bar'
|
|
|
>>> getrepopath(':pserver:truc@foo.bar:c:/foo/bar')
|
|
|
'c:/foo/bar'
|
|
|
"""
|
|
|
# According to CVS manual, CVS paths are expressed like:
|
|
|
# [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
|
|
|
#
|
|
|
# Unfortunately, Windows absolute paths start with a drive letter
|
|
|
# like 'c:' making it harder to parse. Here we assume that drive
|
|
|
# letters are only one character long and any CVS component before
|
|
|
# the repository path is at least 2 characters long, and use this
|
|
|
# to disambiguate.
|
|
|
parts = cvspath.split(':')
|
|
|
if len(parts) == 1:
|
|
|
return parts[0]
|
|
|
# Here there is an ambiguous case if we have a port number
|
|
|
# immediately followed by a Windows driver letter. We assume this
|
|
|
# never happens and decide it must be CVS path component,
|
|
|
# therefore ignoring it.
|
|
|
if len(parts[-2]) > 1:
|
|
|
return parts[-1].lstrip('0123456789')
|
|
|
return parts[-2] + ':' + parts[-1]
|
|
|
|
|
|
def createlog(ui, directory=None, root="", rlog=True, cache=None):
|
|
|
'''Collect the CVS rlog'''
|
|
|
|
|
|
# Because we store many duplicate commit log messages, reusing strings
|
|
|
# saves a lot of memory and pickle storage space.
|
|
|
_scache = {}
|
|
|
def scache(s):
|
|
|
"return a shared version of a string"
|
|
|
return _scache.setdefault(s, s)
|
|
|
|
|
|
ui.status(_('collecting CVS rlog\n'))
|
|
|
|
|
|
log = [] # list of logentry objects containing the CVS state
|
|
|
|
|
|
# patterns to match in CVS (r)log output, by state of use
|
|
|
re_00 = re.compile('RCS file: (.+)$')
|
|
|
re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
|
|
|
re_02 = re.compile('cvs (r?log|server): (.+)\n$')
|
|
|
re_03 = re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")
|
|
|
re_10 = re.compile('Working file: (.+)$')
|
|
|
re_20 = re.compile('symbolic names:')
|
|
|
re_30 = re.compile('\t(.+): ([\\d.]+)$')
|
|
|
re_31 = re.compile('----------------------------$')
|
|
|
re_32 = re.compile('=============================================================================$')
|
|
|
re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
|
|
|
re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
|
|
|
re_70 = re.compile('branches: (.+);$')
|
|
|
|
|
|
prefix = '' # leading path to strip of what we get from CVS
|
|
|
|
|
|
if directory is None:
|
|
|
# Current working directory
|
|
|
|
|
|
# Get the real directory in the repository
|
|
|
try:
|
|
|
prefix = file(os.path.join('CVS','Repository')).read().strip()
|
|
|
if prefix == ".":
|
|
|
prefix = ""
|
|
|
directory = prefix
|
|
|
except IOError:
|
|
|
raise logerror('Not a CVS sandbox')
|
|
|
|
|
|
if prefix and not prefix.endswith(os.sep):
|
|
|
prefix += os.sep
|
|
|
|
|
|
# Use the Root file in the sandbox, if it exists
|
|
|
try:
|
|
|
root = file(os.path.join('CVS','Root')).read().strip()
|
|
|
except IOError:
|
|
|
pass
|
|
|
|
|
|
if not root:
|
|
|
root = os.environ.get('CVSROOT', '')
|
|
|
|
|
|
# read log cache if one exists
|
|
|
oldlog = []
|
|
|
date = None
|
|
|
|
|
|
if cache:
|
|
|
cachedir = os.path.expanduser('~/.hg.cvsps')
|
|
|
if not os.path.exists(cachedir):
|
|
|
os.mkdir(cachedir)
|
|
|
|
|
|
# The cvsps cache pickle needs a uniquified name, based on the
|
|
|
# repository location. The address may have all sort of nasties
|
|
|
# in it, slashes, colons and such. So here we take just the
|
|
|
# alphanumerics, concatenated in a way that does not mix up the
|
|
|
# various components, so that
|
|
|
# :pserver:user@server:/path
|
|
|
# and
|
|
|
# /pserver/user/server/path
|
|
|
# are mapped to different cache file names.
|
|
|
cachefile = root.split(":") + [directory, "cache"]
|
|
|
cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
|
|
|
cachefile = os.path.join(cachedir,
|
|
|
'.'.join([s for s in cachefile if s]))
|
|
|
|
|
|
if cache == 'update':
|
|
|
try:
|
|
|
ui.note(_('reading cvs log cache %s\n') % cachefile)
|
|
|
oldlog = pickle.load(file(cachefile))
|
|
|
ui.note(_('cache has %d log entries\n') % len(oldlog))
|
|
|
except Exception, e:
|
|
|
ui.note(_('error reading cache: %r\n') % e)
|
|
|
|
|
|
if oldlog:
|
|
|
date = oldlog[-1].date # last commit date as a (time,tz) tuple
|
|
|
date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
|
|
|
|
|
|
# build the CVS commandline
|
|
|
cmd = ['cvs', '-q']
|
|
|
if root:
|
|
|
cmd.append('-d%s' % root)
|
|
|
p = util.normpath(getrepopath(root))
|
|
|
if not p.endswith('/'):
|
|
|
p += '/'
|
|
|
prefix = p + util.normpath(prefix)
|
|
|
cmd.append(['log', 'rlog'][rlog])
|
|
|
if date:
|
|
|
# no space between option and date string
|
|
|
cmd.append('-d>%s' % date)
|
|
|
cmd.append(directory)
|
|
|
|
|
|
# state machine begins here
|
|
|
tags = {} # dictionary of revisions on current file with their tags
|
|
|
state = 0
|
|
|
store = False # set when a new record can be appended
|
|
|
|
|
|
cmd = [util.shellquote(arg) for arg in cmd]
|
|
|
ui.note(_("running %s\n") % (' '.join(cmd)))
|
|
|
ui.debug(_("prefix=%r directory=%r root=%r\n") % (prefix, directory, root))
|
|
|
|
|
|
for line in util.popen(' '.join(cmd)):
|
|
|
if line.endswith('\n'):
|
|
|
line = line[:-1]
|
|
|
#ui.debug('state=%d line=%r\n' % (state, line))
|
|
|
|
|
|
if state == 0:
|
|
|
# initial state, consume input until we see 'RCS file'
|
|
|
match = re_00.match(line)
|
|
|
if match:
|
|
|
rcs = match.group(1)
|
|
|
tags = {}
|
|
|
if rlog:
|
|
|
filename = util.normpath(rcs[:-2])
|
|
|
if filename.startswith(prefix):
|
|
|
filename = filename[len(prefix):]
|
|
|
if filename.startswith('/'):
|
|
|
filename = filename[1:]
|
|
|
if filename.startswith('Attic/'):
|
|
|
filename = filename[6:]
|
|
|
else:
|
|
|
filename = filename.replace('/Attic/', '/')
|
|
|
state = 2
|
|
|
continue
|
|
|
state = 1
|
|
|
continue
|
|
|
match = re_01.match(line)
|
|
|
if match:
|
|
|
raise Exception(match.group(1))
|
|
|
match = re_02.match(line)
|
|
|
if match:
|
|
|
raise Exception(match.group(2))
|
|
|
if re_03.match(line):
|
|
|
raise Exception(line)
|
|
|
|
|
|
elif state == 1:
|
|
|
# expect 'Working file' (only when using log instead of rlog)
|
|
|
match = re_10.match(line)
|
|
|
assert match, _('RCS file must be followed by working file')
|
|
|
filename = util.normpath(match.group(1))
|
|
|
state = 2
|
|
|
|
|
|
elif state == 2:
|
|
|
# expect 'symbolic names'
|
|
|
if re_20.match(line):
|
|
|
state = 3
|
|
|
|
|
|
elif state == 3:
|
|
|
# read the symbolic names and store as tags
|
|
|
match = re_30.match(line)
|
|
|
if match:
|
|
|
rev = [int(x) for x in match.group(2).split('.')]
|
|
|
|
|
|
# Convert magic branch number to an odd-numbered one
|
|
|
revn = len(rev)
|
|
|
if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
|
|
|
rev = rev[:-2] + rev[-1:]
|
|
|
rev = tuple(rev)
|
|
|
|
|
|
if rev not in tags:
|
|
|
tags[rev] = []
|
|
|
tags[rev].append(match.group(1))
|
|
|
|
|
|
elif re_31.match(line):
|
|
|
state = 5
|
|
|
elif re_32.match(line):
|
|
|
state = 0
|
|
|
|
|
|
elif state == 4:
|
|
|
# expecting '------' separator before first revision
|
|
|
if re_31.match(line):
|
|
|
state = 5
|
|
|
else:
|
|
|
assert not re_32.match(line), _('Must have at least some revisions')
|
|
|
|
|
|
elif state == 5:
|
|
|
# expecting revision number and possibly (ignored) lock indication
|
|
|
# we create the logentry here from values stored in states 0 to 4,
|
|
|
# as this state is re-entered for subsequent revisions of a file.
|
|
|
match = re_50.match(line)
|
|
|
assert match, _('expected revision number')
|
|
|
e = logentry(rcs=scache(rcs), file=scache(filename),
|
|
|
revision=tuple([int(x) for x in match.group(1).split('.')]),
|
|
|
branches=[], parent=None)
|
|
|
state = 6
|
|
|
|
|
|
elif state == 6:
|
|
|
# expecting date, author, state, lines changed
|
|
|
match = re_60.match(line)
|
|
|
assert match, _('revision must be followed by date line')
|
|
|
d = match.group(1)
|
|
|
if d[2] == '/':
|
|
|
# Y2K
|
|
|
d = '19' + d
|
|
|
|
|
|
if len(d.split()) != 3:
|
|
|
# cvs log dates always in GMT
|
|
|
d = d + ' UTC'
|
|
|
e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'])
|
|
|
e.author = scache(match.group(2))
|
|
|
e.dead = match.group(3).lower() == 'dead'
|
|
|
|
|
|
if match.group(5):
|
|
|
if match.group(6):
|
|
|
e.lines = (int(match.group(5)), int(match.group(6)))
|
|
|
else:
|
|
|
e.lines = (int(match.group(5)), 0)
|
|
|
elif match.group(6):
|
|
|
e.lines = (0, int(match.group(6)))
|
|
|
else:
|
|
|
e.lines = None
|
|
|
e.comment = []
|
|
|
state = 7
|
|
|
|
|
|
elif state == 7:
|
|
|
# read the revision numbers of branches that start at this revision
|
|
|
# or store the commit log message otherwise
|
|
|
m = re_70.match(line)
|
|
|
if m:
|
|
|
e.branches = [tuple([int(y) for y in x.strip().split('.')])
|
|
|
for x in m.group(1).split(';')]
|
|
|
state = 8
|
|
|
elif re_31.match(line):
|
|
|
state = 5
|
|
|
store = True
|
|
|
elif re_32.match(line):
|
|
|
state = 0
|
|
|
store = True
|
|
|
else:
|
|
|
e.comment.append(line)
|
|
|
|
|
|
elif state == 8:
|
|
|
# store commit log message
|
|
|
if re_31.match(line):
|
|
|
state = 5
|
|
|
store = True
|
|
|
elif re_32.match(line):
|
|
|
state = 0
|
|
|
store = True
|
|
|
else:
|
|
|
e.comment.append(line)
|
|
|
|
|
|
if store:
|
|
|
# clean up the results and save in the log.
|
|
|
store = False
|
|
|
e.tags = util.sort([scache(x) for x in tags.get(e.revision, [])])
|
|
|
e.comment = scache('\n'.join(e.comment))
|
|
|
|
|
|
revn = len(e.revision)
|
|
|
if revn > 3 and (revn % 2) == 0:
|
|
|
e.branch = tags.get(e.revision[:-1], [None])[0]
|
|
|
else:
|
|
|
e.branch = None
|
|
|
|
|
|
log.append(e)
|
|
|
|
|
|
if len(log) % 100 == 0:
|
|
|
ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
|
|
|
|
|
|
listsort(log, key=lambda x:(x.rcs, x.revision))
|
|
|
|
|
|
# find parent revisions of individual files
|
|
|
versions = {}
|
|
|
for e in log:
|
|
|
branch = e.revision[:-1]
|
|
|
p = versions.get((e.rcs, branch), None)
|
|
|
if p is None:
|
|
|
p = e.revision[:-2]
|
|
|
e.parent = p
|
|
|
versions[(e.rcs, branch)] = e.revision
|
|
|
|
|
|
# update the log cache
|
|
|
if cache:
|
|
|
if log:
|
|
|
# join up the old and new logs
|
|
|
listsort(log, key=lambda x:x.date)
|
|
|
|
|
|
if oldlog and oldlog[-1].date >= log[0].date:
|
|
|
raise logerror('Log cache overlaps with new log entries,'
|
|
|
' re-run without cache.')
|
|
|
|
|
|
log = oldlog + log
|
|
|
|
|
|
# write the new cachefile
|
|
|
ui.note(_('writing cvs log cache %s\n') % cachefile)
|
|
|
pickle.dump(log, file(cachefile, 'w'))
|
|
|
else:
|
|
|
log = oldlog
|
|
|
|
|
|
ui.status(_('%d log entries\n') % len(log))
|
|
|
|
|
|
return log
|
|
|
|
|
|
|
|
|
class changeset(object):
|
|
|
'''Class changeset has the following attributes:
|
|
|
.author - author name as CVS knows it
|
|
|
.branch - name of branch this changeset is on, or None
|
|
|
.comment - commit message
|
|
|
.date - the commit date as a (time,tz) tuple
|
|
|
.entries - list of logentry objects in this changeset
|
|
|
.parents - list of one or two parent changesets
|
|
|
.tags - list of tags on this changeset
|
|
|
'''
|
|
|
def __init__(self, **entries):
|
|
|
self.__dict__.update(entries)
|
|
|
|
|
|
def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
|
|
|
'''Convert log into changesets.'''
|
|
|
|
|
|
ui.status(_('creating changesets\n'))
|
|
|
|
|
|
# Merge changesets
|
|
|
|
|
|
listsort(log, key=lambda x:(x.comment, x.author, x.branch, x.date))
|
|
|
|
|
|
changesets = []
|
|
|
files = {}
|
|
|
c = None
|
|
|
for i, e in enumerate(log):
|
|
|
|
|
|
# Check if log entry belongs to the current changeset or not.
|
|
|
if not (c and
|
|
|
e.comment == c.comment and
|
|
|
e.author == c.author and
|
|
|
e.branch == c.branch and
|
|
|
((c.date[0] + c.date[1]) <=
|
|
|
(e.date[0] + e.date[1]) <=
|
|
|
(c.date[0] + c.date[1]) + fuzz) and
|
|
|
e.file not in files):
|
|
|
c = changeset(comment=e.comment, author=e.author,
|
|
|
branch=e.branch, date=e.date, entries=[])
|
|
|
changesets.append(c)
|
|
|
files = {}
|
|
|
if len(changesets) % 100 == 0:
|
|
|
t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
|
|
|
ui.status(util.ellipsis(t, 80) + '\n')
|
|
|
|
|
|
c.entries.append(e)
|
|
|
files[e.file] = True
|
|
|
c.date = e.date # changeset date is date of latest commit in it
|
|
|
|
|
|
# Sort files in each changeset
|
|
|
|
|
|
for c in changesets:
|
|
|
def pathcompare(l, r):
|
|
|
'Mimic cvsps sorting order'
|
|
|
l = l.split('/')
|
|
|
r = r.split('/')
|
|
|
nl = len(l)
|
|
|
nr = len(r)
|
|
|
n = min(nl, nr)
|
|
|
for i in range(n):
|
|
|
if i + 1 == nl and nl < nr:
|
|
|
return -1
|
|
|
elif i + 1 == nr and nl > nr:
|
|
|
return +1
|
|
|
elif l[i] < r[i]:
|
|
|
return -1
|
|
|
elif l[i] > r[i]:
|
|
|
return +1
|
|
|
return 0
|
|
|
def entitycompare(l, r):
|
|
|
return pathcompare(l.file, r.file)
|
|
|
|
|
|
c.entries.sort(entitycompare)
|
|
|
|
|
|
# Sort changesets by date
|
|
|
|
|
|
def cscmp(l, r):
|
|
|
d = sum(l.date) - sum(r.date)
|
|
|
if d:
|
|
|
return d
|
|
|
|
|
|
# detect vendor branches and initial commits on a branch
|
|
|
le = {}
|
|
|
for e in l.entries:
|
|
|
le[e.rcs] = e.revision
|
|
|
re = {}
|
|
|
for e in r.entries:
|
|
|
re[e.rcs] = e.revision
|
|
|
|
|
|
d = 0
|
|
|
for e in l.entries:
|
|
|
if re.get(e.rcs, None) == e.parent:
|
|
|
assert not d
|
|
|
d = 1
|
|
|
break
|
|
|
|
|
|
for e in r.entries:
|
|
|
if le.get(e.rcs, None) == e.parent:
|
|
|
assert not d
|
|
|
d = -1
|
|
|
break
|
|
|
|
|
|
return d
|
|
|
|
|
|
changesets.sort(cscmp)
|
|
|
|
|
|
# Collect tags
|
|
|
|
|
|
globaltags = {}
|
|
|
for c in changesets:
|
|
|
tags = {}
|
|
|
for e in c.entries:
|
|
|
for tag in e.tags:
|
|
|
# remember which is the latest changeset to have this tag
|
|
|
globaltags[tag] = c
|
|
|
|
|
|
for c in changesets:
|
|
|
tags = {}
|
|
|
for e in c.entries:
|
|
|
for tag in e.tags:
|
|
|
tags[tag] = True
|
|
|
# remember tags only if this is the latest changeset to have it
|
|
|
c.tags = util.sort([tag for tag in tags if globaltags[tag] is c])
|
|
|
|
|
|
# Find parent changesets, handle {{mergetobranch BRANCHNAME}}
|
|
|
# by inserting dummy changesets with two parents, and handle
|
|
|
# {{mergefrombranch BRANCHNAME}} by setting two parents.
|
|
|
|
|
|
if mergeto is None:
|
|
|
mergeto = r'{{mergetobranch ([-\w]+)}}'
|
|
|
if mergeto:
|
|
|
mergeto = re.compile(mergeto)
|
|
|
|
|
|
if mergefrom is None:
|
|
|
mergefrom = r'{{mergefrombranch ([-\w]+)}}'
|
|
|
if mergefrom:
|
|
|
mergefrom = re.compile(mergefrom)
|
|
|
|
|
|
versions = {} # changeset index where we saw any particular file version
|
|
|
branches = {} # changeset index where we saw a branch
|
|
|
n = len(changesets)
|
|
|
i = 0
|
|
|
while i<n:
|
|
|
c = changesets[i]
|
|
|
|
|
|
for f in c.entries:
|
|
|
versions[(f.rcs, f.revision)] = i
|
|
|
|
|
|
p = None
|
|
|
if c.branch in branches:
|
|
|
p = branches[c.branch]
|
|
|
else:
|
|
|
for f in c.entries:
|
|
|
p = max(p, versions.get((f.rcs, f.parent), None))
|
|
|
|
|
|
c.parents = []
|
|
|
if p is not None:
|
|
|
c.parents.append(changesets[p])
|
|
|
|
|
|
if mergefrom:
|
|
|
m = mergefrom.search(c.comment)
|
|
|
if m:
|
|
|
m = m.group(1)
|
|
|
if m == 'HEAD':
|
|
|
m = None
|
|
|
if m in branches and c.branch != m:
|
|
|
c.parents.append(changesets[branches[m]])
|
|
|
|
|
|
if mergeto:
|
|
|
m = mergeto.search(c.comment)
|
|
|
if m:
|
|
|
try:
|
|
|
m = m.group(1)
|
|
|
if m == 'HEAD':
|
|
|
m = None
|
|
|
except:
|
|
|
m = None # if no group found then merge to HEAD
|
|
|
if m in branches and c.branch != m:
|
|
|
# insert empty changeset for merge
|
|
|
cc = changeset(author=c.author, branch=m, date=c.date,
|
|
|
comment='convert-repo: CVS merge from branch %s' % c.branch,
|
|
|
entries=[], tags=[], parents=[changesets[branches[m]], c])
|
|
|
changesets.insert(i + 1, cc)
|
|
|
branches[m] = i + 1
|
|
|
|
|
|
# adjust our loop counters now we have inserted a new entry
|
|
|
n += 1
|
|
|
i += 2
|
|
|
continue
|
|
|
|
|
|
branches[c.branch] = i
|
|
|
i += 1
|
|
|
|
|
|
# Number changesets
|
|
|
|
|
|
for i, c in enumerate(changesets):
|
|
|
c.id = i + 1
|
|
|
|
|
|
ui.status(_('%d changeset entries\n') % len(changesets))
|
|
|
|
|
|
return changesets
|
|
|
|