##// END OF EJS Templates
convert: hg: optionally create branches as clones...
convert: hg: optionally create branches as clones If convert.hg.clonebranches is set, branches will be created as clones of their parent revisions. All clones will be subdirectories of the destination path.

File last commit:

r5145:0751c25e default
r5173:6b4c332f default
Show More
subversion.py
646 lines | 26.2 KiB | text/x-python | PythonLexer
# Subversion 1.4/1.5 Python API backend
#
# Copyright(C) 2007 Daniel Holth et al
#
# Configuration options:
#
# convert.svn.trunk
# Relative path to the trunk (default: "trunk")
# convert.svn.branches
# Relative path to tree of branches (default: "branches")
#
# Set these in a hgrc, or on the command line as follows:
#
# hg convert --config convert.svn.trunk=wackoname [...]
import locale
import os
import sys
import cPickle as pickle
from mercurial import util
# Subversion stuff. Works best with very recent Python SVN bindings
# e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
# these bindings.
from cStringIO import StringIO
from common import NoRepo, commit, converter_source, encodeargs, decodeargs
try:
from svn.core import SubversionException, Pool
import svn
import svn.client
import svn.core
import svn.ra
import svn.delta
import transport
except ImportError:
pass
def geturl(path):
try:
return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
except SubversionException:
pass
if os.path.isdir(path):
return 'file://%s' % os.path.normpath(os.path.abspath(path))
return path
def optrev(number):
optrev = svn.core.svn_opt_revision_t()
optrev.kind = svn.core.svn_opt_revision_number
optrev.value.number = number
return optrev
class changedpath(object):
def __init__(self, p):
self.copyfrom_path = p.copyfrom_path
self.copyfrom_rev = p.copyfrom_rev
self.action = p.action
def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
strict_node_history=False):
protocol = -1
def receiver(orig_paths, revnum, author, date, message, pool):
if orig_paths is not None:
for k, v in orig_paths.iteritems():
orig_paths[k] = changedpath(v)
pickle.dump((orig_paths, revnum, author, date, message),
fp, protocol)
try:
# Use an ra of our own so that our parent can consume
# our results without confusing the server.
t = transport.SvnRaTransport(url=url)
svn.ra.get_log(t.ra, paths, start, end, limit,
discover_changed_paths,
strict_node_history,
receiver)
except SubversionException, (inst, num):
pickle.dump(num, fp, protocol)
else:
pickle.dump(None, fp, protocol)
fp.close()
def debugsvnlog(ui, **opts):
"""Fetch SVN log in a subprocess and channel them back to parent to
avoid memory collection issues.
"""
util.set_binary(sys.stdin)
util.set_binary(sys.stdout)
args = decodeargs(sys.stdin.read())
get_log_child(sys.stdout, *args)
# SVN conversion code stolen from bzr-svn and tailor
class convert_svn(converter_source):
def __init__(self, ui, url, rev=None):
super(convert_svn, self).__init__(ui, url, rev=rev)
try:
SubversionException
except NameError:
msg = 'subversion python bindings could not be loaded\n'
ui.warn(msg)
raise NoRepo(msg)
self.encoding = locale.getpreferredencoding()
self.lastrevs = {}
latest = None
try:
# Support file://path@rev syntax. Useful e.g. to convert
# deleted branches.
at = url.rfind('@')
if at >= 0:
latest = int(url[at+1:])
url = url[:at]
except ValueError, e:
pass
self.url = geturl(url)
self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
try:
self.transport = transport.SvnRaTransport(url=self.url)
self.ra = self.transport.ra
self.ctx = self.transport.client
self.base = svn.ra.get_repos_root(self.ra)
self.module = self.url[len(self.base):]
self.modulemap = {} # revision, module
self.commits = {}
self.paths = {}
self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
except SubversionException, e:
raise NoRepo("couldn't open SVN repo %s" % self.url)
if rev:
try:
latest = int(rev)
except ValueError:
raise util.Abort('svn: revision %s is not an integer' % rev)
try:
self.get_blacklist()
except IOError, e:
pass
self.last_changed = self.latest(self.module, latest)
self.head = self.revid(self.last_changed)
def setrevmap(self, revmap):
lastrevs = {}
for revid in revmap.keys():
uuid, module, revnum = self.revsplit(revid)
lastrevnum = lastrevs.setdefault(module, revnum)
if revnum > lastrevnum:
lastrevs[module] = revnum
self.lastrevs = lastrevs
def exists(self, path, optrev):
try:
return svn.client.ls(self.url.rstrip('/') + '/' + path,
optrev, False, self.ctx)
except SubversionException, err:
return []
def getheads(self):
# detect standard /branches, /tags, /trunk layout
rev = optrev(self.last_changed)
rpath = self.url.strip('/')
cfgtrunk = self.ui.config('convert', 'svn.trunk')
cfgbranches = self.ui.config('convert', 'svn.branches')
trunk = (cfgtrunk or 'trunk').strip('/')
branches = (cfgbranches or 'branches').strip('/')
if self.exists(trunk, rev) and self.exists(branches, rev):
self.ui.note('found trunk at %r and branches at %r\n' %
(trunk, branches))
oldmodule = self.module
self.module += '/' + trunk
lt = self.latest(self.module, self.last_changed)
self.head = self.revid(lt)
self.heads = [self.head]
branchnames = svn.client.ls(rpath + '/' + branches, rev, False,
self.ctx)
for branch in branchnames.keys():
if oldmodule:
module = '/' + oldmodule + '/' + branches + '/' + branch
else:
module = '/' + branches + '/' + branch
brevnum = self.latest(module, self.last_changed)
brev = self.revid(brevnum, module)
self.ui.note('found branch %s at %d\n' % (branch, brevnum))
self.heads.append(brev)
elif cfgtrunk or cfgbranches:
raise util.Abort('trunk/branch layout expected, but not found')
else:
self.ui.note('working with one branch\n')
self.heads = [self.head]
return self.heads
def getfile(self, file, rev):
data, mode = self._getfile(file, rev)
self.modecache[(file, rev)] = mode
return data
def getmode(self, file, rev):
return self.modecache[(file, rev)]
def getchanges(self, rev):
self.modecache = {}
(paths, parents) = self.paths[rev]
files, copies = self.expandpaths(rev, paths, parents)
files.sort()
files = zip(files, [rev] * len(files))
# caller caches the result, so free it here to release memory
del self.paths[rev]
return (files, copies)
def getcommit(self, rev):
if rev not in self.commits:
uuid, module, revnum = self.revsplit(rev)
self.module = module
self.reparent(module)
stop = self.lastrevs.get(module, 0)
self._fetch_revisions(from_revnum=revnum, to_revnum=stop)
commit = self.commits[rev]
# caller caches the result, so free it here to release memory
del self.commits[rev]
return commit
def get_log(self, paths, start, end, limit=0, discover_changed_paths=True,
strict_node_history=False):
def parent(fp):
while True:
entry = pickle.load(fp)
try:
orig_paths, revnum, author, date, message = entry
except:
if entry is None:
break
raise SubversionException("child raised exception", entry)
yield entry
args = [self.url, paths, start, end, limit, discover_changed_paths,
strict_node_history]
arg = encodeargs(args)
hgexe = util.hgexecutable()
cmd = '%s debugsvnlog' % util.shellquote(hgexe)
stdin, stdout = os.popen2(cmd, 'b')
stdin.write(arg)
stdin.close()
for p in parent(stdout):
yield p
def gettags(self):
tags = {}
start = self.revnum(self.head)
try:
for entry in self.get_log(['/tags'], 0, start):
orig_paths, revnum, author, date, message = entry
for path in orig_paths:
if not path.startswith('/tags/'):
continue
ent = orig_paths[path]
source = ent.copyfrom_path
rev = ent.copyfrom_rev
tag = path.split('/', 2)[2]
tags[tag] = self.revid(rev, module=source)
except SubversionException, (inst, num):
self.ui.note('no tags found at revision %d\n' % start)
return tags
# -- helper functions --
def revid(self, revnum, module=None):
if not module:
module = self.module
return (u"svn:%s%s@%s" % (self.uuid, module, revnum)).decode(self.encoding)
def revnum(self, rev):
return int(rev.split('@')[-1])
def revsplit(self, rev):
url, revnum = rev.encode(self.encoding).split('@', 1)
revnum = int(revnum)
parts = url.split('/', 1)
uuid = parts.pop(0)[4:]
mod = ''
if parts:
mod = '/' + parts[0]
return uuid, mod, revnum
def latest(self, path, stop=0):
'find the latest revision affecting path, up to stop'
if not stop:
stop = svn.ra.get_latest_revnum(self.ra)
try:
self.reparent('')
dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
self.reparent(self.module)
except SubversionException:
dirent = None
if not dirent:
raise util.Abort('%s not found up to revision %d' % (path, stop))
return dirent.created_rev
def get_blacklist(self):
"""Avoid certain revision numbers.
It is not uncommon for two nearby revisions to cancel each other
out, e.g. 'I copied trunk into a subdirectory of itself instead
of making a branch'. The converted repository is significantly
smaller if we ignore such revisions."""
self.blacklist = set()
blacklist = self.blacklist
for line in file("blacklist.txt", "r"):
if not line.startswith("#"):
try:
svn_rev = int(line.strip())
blacklist.add(svn_rev)
except ValueError, e:
pass # not an integer or a comment
def is_blacklisted(self, svn_rev):
return svn_rev in self.blacklist
def reparent(self, module):
svn_url = self.base + module
self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
def expandpaths(self, rev, paths, parents):
def get_entry_from_path(path, module=self.module):
# Given the repository url of this wc, say
# "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
# extract the "entry" portion (a relative path) from what
# svn log --xml says, ie
# "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
# that is to say "tests/PloneTestCase.py"
if path.startswith(module):
relative = path[len(module):]
if relative.startswith('/'):
return relative[1:]
else:
return relative
# The path is outside our tracked tree...
self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
return None
entries = []
copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
copies = {}
revnum = self.revnum(rev)
if revnum in self.modulemap:
new_module = self.modulemap[revnum]
if new_module != self.module:
self.module = new_module
self.reparent(self.module)
for path, ent in paths:
entrypath = get_entry_from_path(path, module=self.module)
entry = entrypath.decode(self.encoding)
kind = svn.ra.check_path(self.ra, entrypath, revnum)
if kind == svn.core.svn_node_file:
if ent.copyfrom_path:
copyfrom_path = get_entry_from_path(ent.copyfrom_path)
if copyfrom_path:
self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
# It's probably important for hg that the source
# exists in the revision's parent, not just the
# ent.copyfrom_rev
fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
if fromkind != 0:
copies[self.recode(entry)] = self.recode(copyfrom_path)
entries.append(self.recode(entry))
elif kind == 0: # gone, but had better be a deleted *file*
self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
# if a branch is created but entries are removed in the same
# changeset, get the right fromrev
if parents:
uuid, old_module, fromrev = self.revsplit(parents[0])
else:
fromrev = revnum - 1
# might always need to be revnum - 1 in these 3 lines?
old_module = self.modulemap.get(fromrev, self.module)
basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
def lookup_parts(p):
rc = None
parts = p.split("/")
for i in range(len(parts)):
part = "/".join(parts[:i])
info = part, copyfrom.get(part, None)
if info[1] is not None:
self.ui.debug("Found parent directory %s\n" % info[1])
rc = info
return rc
self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
# need to remove fragment from lookup_parts and replace with copyfrom_path
if frompath is not None:
self.ui.debug("munge-o-matic\n")
self.ui.debug(entrypath + '\n')
self.ui.debug(entrypath[len(frompath):] + '\n')
entrypath = froment.copyfrom_path + entrypath[len(frompath):]
fromrev = froment.copyfrom_rev
self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
if fromkind == svn.core.svn_node_file: # a deleted file
entries.append(self.recode(entry))
elif fromkind == svn.core.svn_node_dir:
# print "Deleted/moved non-file:", revnum, path, ent
# children = self._find_children(path, revnum - 1)
# print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
# Sometimes this is tricky. For example: in
# The Subversion Repository revision 6940 a dir
# was copied and one of its files was deleted
# from the new location in the same commit. This
# code can't deal with that yet.
if ent.action == 'C':
children = self._find_children(path, fromrev)
else:
oroot = entrypath.strip('/')
nroot = path.strip('/')
children = self._find_children(oroot, fromrev)
children = [s.replace(oroot,nroot) for s in children]
# Mark all [files, not directories] as deleted.
for child in children:
# Can we move a child directory and its
# parent in the same commit? (probably can). Could
# cause problems if instead of revnum -1,
# we have to look in (copyfrom_path, revnum - 1)
entrypath = get_entry_from_path("/" + child, module=old_module)
if entrypath:
entry = self.recode(entrypath.decode(self.encoding))
if entry in copies:
# deleted file within a copy
del copies[entry]
else:
entries.append(entry)
else:
self.ui.debug('unknown path in revision %d: %s\n' % \
(revnum, path))
elif kind == svn.core.svn_node_dir:
# Should probably synthesize normal file entries
# and handle as above to clean up copy/rename handling.
# If the directory just had a prop change,
# then we shouldn't need to look for its children.
# Also this could create duplicate entries. Not sure
# whether this will matter. Maybe should make entries a set.
# print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
# This will fail if a directory was copied
# from another branch and then some of its files
# were deleted in the same transaction.
children = self._find_children(path, revnum)
children.sort()
for child in children:
# Can we move a child directory and its
# parent in the same commit? (probably can). Could
# cause problems if instead of revnum -1,
# we have to look in (copyfrom_path, revnum - 1)
entrypath = get_entry_from_path("/" + child, module=self.module)
# print child, self.module, entrypath
if entrypath:
# Need to filter out directories here...
kind = svn.ra.check_path(self.ra, entrypath, revnum)
if kind != svn.core.svn_node_dir:
entries.append(self.recode(entrypath))
# Copies here (must copy all from source)
# Probably not a real problem for us if
# source does not exist
# Can do this with the copy command "hg copy"
# if ent.copyfrom_path:
# copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
# module=self.module)
# copyto_entry = entrypath
#
# print "copy directory", copyfrom_entry, 'to', copyto_entry
#
# copies.append((copyfrom_entry, copyto_entry))
if ent.copyfrom_path:
copyfrom_path = ent.copyfrom_path.decode(self.encoding)
copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
if copyfrom_entry:
copyfrom[path] = ent
self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
# Good, /probably/ a regular copy. Really should check
# to see whether the parent revision actually contains
# the directory in question.
children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
children.sort()
for child in children:
entrypath = get_entry_from_path("/" + child, module=self.module)
if entrypath:
entry = entrypath.decode(self.encoding)
# print "COPY COPY From", copyfrom_entry, entry
copyto_path = path + entry[len(copyfrom_entry):]
copyto_entry = get_entry_from_path(copyto_path, module=self.module)
# print "COPY", entry, "COPY To", copyto_entry
copies[self.recode(copyto_entry)] = self.recode(entry)
# copy from quux splort/quuxfile
return (entries, copies)
def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
self.child_cset = None
def parselogentry(orig_paths, revnum, author, date, message):
self.ui.debug("parsing revision %d (%d changes)\n" %
(revnum, len(orig_paths)))
if revnum in self.modulemap:
new_module = self.modulemap[revnum]
if new_module != self.module:
self.module = new_module
self.reparent(self.module)
rev = self.revid(revnum)
# branch log might return entries for a parent we already have
if (rev in self.commits or
(revnum < self.lastrevs.get(self.module, 0))):
return
parents = []
orig_paths = orig_paths.items()
orig_paths.sort()
# check whether this revision is the start of a branch
path, ent = orig_paths and orig_paths[0] or (None, None)
if ent and path == self.module:
if ent.copyfrom_path:
# ent.copyfrom_rev may not be the actual last revision
prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
self.modulemap[prev] = ent.copyfrom_path
parents = [self.revid(prev, ent.copyfrom_path)]
self.ui.note('found parent of branch %s at %d: %s\n' % \
(self.module, prev, ent.copyfrom_path))
else:
self.ui.debug("No copyfrom path, don't know what to do.\n")
self.modulemap[revnum] = self.module # track backwards in time
paths = []
# filter out unrelated paths
for path, ent in orig_paths:
if not path.startswith(self.module):
self.ui.debug("boring@%s: %s\n" % (revnum, path))
continue
paths.append((path, ent))
self.paths[rev] = (paths, parents)
# Example SVN datetime. Includes microseconds.
# ISO-8601 conformant
# '2007-01-04T17:35:00.902377Z'
date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
log = message and self.recode(message)
author = author and self.recode(author) or ''
try:
branch = self.module.split("/")[-1]
if branch == 'trunk':
branch = ''
except IndexError:
branch = None
cset = commit(author=author,
date=util.datestr(date),
desc=log,
parents=parents,
branch=branch,
rev=rev.encode('utf-8'))
self.commits[rev] = cset
if self.child_cset and not self.child_cset.parents:
self.child_cset.parents = [rev]
self.child_cset = cset
self.ui.note('fetching revision log for "%s" from %d to %d\n' %
(self.module, from_revnum, to_revnum))
try:
for entry in self.get_log([self.module], from_revnum, to_revnum):
orig_paths, revnum, author, date, message = entry
if self.is_blacklisted(revnum):
self.ui.note('skipping blacklisted revision %d\n' % revnum)
continue
if orig_paths is None:
self.ui.debug('revision %d has no entries\n' % revnum)
continue
parselogentry(orig_paths, revnum, author, date, message)
except SubversionException, (inst, num):
if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
raise NoSuchRevision(branch=self,
revision="Revision number %d" % to_revnum)
raise
def _getfile(self, file, rev):
io = StringIO()
# TODO: ra.get_file transmits the whole file instead of diffs.
mode = ''
try:
revnum = self.revnum(rev)
if self.module != self.modulemap[revnum]:
self.module = self.modulemap[revnum]
self.reparent(self.module)
info = svn.ra.get_file(self.ra, file, revnum, io)
if isinstance(info, list):
info = info[-1]
mode = ("svn:executable" in info) and 'x' or ''
mode = ("svn:special" in info) and 'l' or mode
except SubversionException, e:
notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
if e.apr_err in notfound: # File not found
raise IOError()
raise
data = io.getvalue()
if mode == 'l':
link_prefix = "link "
if data.startswith(link_prefix):
data = data[len(link_prefix):]
return data, mode
def _find_children(self, path, revnum):
path = path.strip('/')
pool = Pool()
rpath = '/'.join([self.base, path]).strip('/')
return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]