##// END OF EJS Templates
convert: parse sort mode sooner
convert: parse sort mode sooner

File last commit:

r8660:b9ec04eb default
r8689:9bc95f8e default
Show More
subversion.py
1239 lines | 48.0 KiB | text/x-python | PythonLexer
# Subversion 1.4/1.5 Python API backend
#
# Copyright(C) 2007 Daniel Holth et al
#
# Configuration options:
#
# convert.svn.trunk
# Relative path to the trunk (default: "trunk")
# convert.svn.branches
# Relative path to tree of branches (default: "branches")
# convert.svn.tags
# Relative path to tree of tags (default: "tags")
#
# Set these in a hgrc, or on the command line as follows:
#
# hg convert --config convert.svn.trunk=wackoname [...]
import locale
import os
import re
import sys
import cPickle as pickle
import tempfile
import urllib
from mercurial import strutil, util
from mercurial.i18n import _
# Subversion stuff. Works best with very recent Python SVN bindings
# e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
# these bindings.
from cStringIO import StringIO
from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
from common import commandline, converter_source, converter_sink, mapfile
try:
from svn.core import SubversionException, Pool
import svn
import svn.client
import svn.core
import svn.ra
import svn.delta
import transport
import warnings
warnings.filterwarnings('ignore',
module='svn.core',
category=DeprecationWarning)
except ImportError:
pass
class SvnPathNotFound(Exception):
pass
def geturl(path):
try:
return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
except SubversionException:
pass
if os.path.isdir(path):
path = os.path.normpath(os.path.abspath(path))
if os.name == 'nt':
path = '/' + util.normpath(path)
return 'file://%s' % urllib.quote(path)
return path
def optrev(number):
optrev = svn.core.svn_opt_revision_t()
optrev.kind = svn.core.svn_opt_revision_number
optrev.value.number = number
return optrev
class changedpath(object):
def __init__(self, p):
self.copyfrom_path = p.copyfrom_path
self.copyfrom_rev = p.copyfrom_rev
self.action = p.action
def get_log_child(fp, url, paths, start, end, limit=0, discover_changed_paths=True,
strict_node_history=False):
protocol = -1
def receiver(orig_paths, revnum, author, date, message, pool):
if orig_paths is not None:
for k, v in orig_paths.iteritems():
orig_paths[k] = changedpath(v)
pickle.dump((orig_paths, revnum, author, date, message),
fp, protocol)
try:
# Use an ra of our own so that our parent can consume
# our results without confusing the server.
t = transport.SvnRaTransport(url=url)
svn.ra.get_log(t.ra, paths, start, end, limit,
discover_changed_paths,
strict_node_history,
receiver)
except SubversionException, (inst, num):
pickle.dump(num, fp, protocol)
except IOError:
# Caller may interrupt the iteration
pickle.dump(None, fp, protocol)
else:
pickle.dump(None, fp, protocol)
fp.close()
# With large history, cleanup process goes crazy and suddenly
# consumes *huge* amount of memory. The output file being closed,
# there is no need for clean termination.
os._exit(0)
def debugsvnlog(ui, **opts):
"""Fetch SVN log in a subprocess and channel them back to parent to
avoid memory collection issues.
"""
util.set_binary(sys.stdin)
util.set_binary(sys.stdout)
args = decodeargs(sys.stdin.read())
get_log_child(sys.stdout, *args)
class logstream:
"""Interruptible revision log iterator."""
def __init__(self, stdout):
self._stdout = stdout
def __iter__(self):
while True:
entry = pickle.load(self._stdout)
try:
orig_paths, revnum, author, date, message = entry
except:
if entry is None:
break
raise SubversionException("child raised exception", entry)
yield entry
def close(self):
if self._stdout:
self._stdout.close()
self._stdout = None
# Check to see if the given path is a local Subversion repo. Verify this by
# looking for several svn-specific files and directories in the given
# directory.
def filecheck(path, proto):
for x in ('locks', 'hooks', 'format', 'db', ):
if not os.path.exists(os.path.join(path, x)):
return False
return True
# Check to see if a given path is the root of an svn repo over http. We verify
# this by requesting a version-controlled URL we know can't exist and looking
# for the svn-specific "not found" XML.
def httpcheck(path, proto):
return ('<m:human-readable errcode="160013">' in
urllib.urlopen('%s://%s/!svn/ver/0/.svn' % (proto, path)).read())
protomap = {'http': httpcheck,
'https': httpcheck,
'file': filecheck,
}
def issvnurl(url):
if not '://' in url:
return False
proto, path = url.split('://', 1)
path = urllib.url2pathname(path).replace(os.sep, '/')
check = protomap.get(proto, lambda p, p2: False)
while '/' in path:
if check(path, proto):
return True
path = path.rsplit('/', 1)[0]
return False
# SVN conversion code stolen from bzr-svn and tailor
#
# Subversion looks like a versioned filesystem, branches structures
# are defined by conventions and not enforced by the tool. First,
# we define the potential branches (modules) as "trunk" and "branches"
# children directories. Revisions are then identified by their
# module and revision number (and a repository identifier).
#
# The revision graph is really a tree (or a forest). By default, a
# revision parent is the previous revision in the same module. If the
# module directory is copied/moved from another module then the
# revision is the module root and its parent the source revision in
# the parent module. A revision has at most one parent.
#
class svn_source(converter_source):
def __init__(self, ui, url, rev=None):
super(svn_source, self).__init__(ui, url, rev=rev)
if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
(os.path.exists(url) and
os.path.exists(os.path.join(url, '.svn'))) or
issvnurl(url)):
raise NoRepo("%s does not look like a Subversion repo" % url)
try:
SubversionException
except NameError:
raise MissingTool(_('Subversion python bindings could not be loaded'))
try:
version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
if version < (1, 4):
raise MissingTool(_('Subversion python bindings %d.%d found, '
'1.4 or later required') % version)
except AttributeError:
raise MissingTool(_('Subversion python bindings are too old, 1.4 '
'or later required'))
self.encoding = locale.getpreferredencoding()
self.lastrevs = {}
latest = None
try:
# Support file://path@rev syntax. Useful e.g. to convert
# deleted branches.
at = url.rfind('@')
if at >= 0:
latest = int(url[at+1:])
url = url[:at]
except ValueError:
pass
self.url = geturl(url)
self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
try:
self.transport = transport.SvnRaTransport(url=self.url)
self.ra = self.transport.ra
self.ctx = self.transport.client
self.baseurl = svn.ra.get_repos_root(self.ra)
# Module is either empty or a repository path starting with
# a slash and not ending with a slash.
self.module = urllib.unquote(self.url[len(self.baseurl):])
self.prevmodule = None
self.rootmodule = self.module
self.commits = {}
self.paths = {}
self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
except SubversionException:
ui.traceback()
raise NoRepo("%s does not look like a Subversion repo" % self.url)
if rev:
try:
latest = int(rev)
except ValueError:
raise util.Abort(_('svn: revision %s is not an integer') % rev)
self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
try:
self.startrev = int(self.startrev)
if self.startrev < 0:
self.startrev = 0
except ValueError:
raise util.Abort(_('svn: start revision %s is not an integer')
% self.startrev)
try:
self.get_blacklist()
except IOError:
pass
self.head = self.latest(self.module, latest)
if not self.head:
raise util.Abort(_('no revision found in module %s') %
self.module.encode(self.encoding))
self.last_changed = self.revnum(self.head)
self._changescache = None
if os.path.exists(os.path.join(url, '.svn/entries')):
self.wc = url
else:
self.wc = None
self.convertfp = None
def setrevmap(self, revmap):
lastrevs = {}
for revid in revmap.iterkeys():
uuid, module, revnum = self.revsplit(revid)
lastrevnum = lastrevs.setdefault(module, revnum)
if revnum > lastrevnum:
lastrevs[module] = revnum
self.lastrevs = lastrevs
def exists(self, path, optrev):
try:
svn.client.ls(self.url.rstrip('/') + '/' + urllib.quote(path),
optrev, False, self.ctx)
return True
except SubversionException:
return False
def getheads(self):
def isdir(path, revnum):
kind = self._checkpath(path, revnum)
return kind == svn.core.svn_node_dir
def getcfgpath(name, rev):
cfgpath = self.ui.config('convert', 'svn.' + name)
if cfgpath is not None and cfgpath.strip() == '':
return None
path = (cfgpath or name).strip('/')
if not self.exists(path, rev):
if cfgpath:
raise util.Abort(_('expected %s to be at %r, but not found')
% (name, path))
return None
self.ui.note(_('found %s at %r\n') % (name, path))
return path
rev = optrev(self.last_changed)
oldmodule = ''
trunk = getcfgpath('trunk', rev)
self.tags = getcfgpath('tags', rev)
branches = getcfgpath('branches', rev)
# If the project has a trunk or branches, we will extract heads
# from them. We keep the project root otherwise.
if trunk:
oldmodule = self.module or ''
self.module += '/' + trunk
self.head = self.latest(self.module, self.last_changed)
if not self.head:
raise util.Abort(_('no revision found in module %s') %
self.module.encode(self.encoding))
# First head in the list is the module's head
self.heads = [self.head]
if self.tags is not None:
self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
# Check if branches bring a few more heads to the list
if branches:
rpath = self.url.strip('/')
branchnames = svn.client.ls(rpath + '/' + urllib.quote(branches),
rev, False, self.ctx)
for branch in branchnames.keys():
module = '%s/%s/%s' % (oldmodule, branches, branch)
if not isdir(module, self.last_changed):
continue
brevid = self.latest(module, self.last_changed)
if not brevid:
self.ui.note(_('ignoring empty branch %s\n') %
branch.encode(self.encoding))
continue
self.ui.note(_('found branch %s at %d\n') %
(branch, self.revnum(brevid)))
self.heads.append(brevid)
if self.startrev and self.heads:
if len(self.heads) > 1:
raise util.Abort(_('svn: start revision is not supported '
'with more than one branch'))
revnum = self.revnum(self.heads[0])
if revnum < self.startrev:
raise util.Abort(_('svn: no revision found after start revision %d')
% self.startrev)
return self.heads
def getfile(self, file, rev):
data, mode = self._getfile(file, rev)
self.modecache[(file, rev)] = mode
return data
def getmode(self, file, rev):
return self.modecache[(file, rev)]
def getchanges(self, rev):
if self._changescache and self._changescache[0] == rev:
return self._changescache[1]
self._changescache = None
self.modecache = {}
(paths, parents) = self.paths[rev]
if parents:
files, copies = self.expandpaths(rev, paths, parents)
else:
# Perform a full checkout on roots
uuid, module, revnum = self.revsplit(rev)
entries = svn.client.ls(self.baseurl + urllib.quote(module),
optrev(revnum), True, self.ctx)
files = [n for n,e in entries.iteritems()
if e.kind == svn.core.svn_node_file]
copies = {}
files.sort()
files = zip(files, [rev] * len(files))
# caller caches the result, so free it here to release memory
del self.paths[rev]
return (files, copies)
def getchangedfiles(self, rev, i):
changes = self.getchanges(rev)
self._changescache = (rev, changes)
return [f[0] for f in changes[0]]
def getcommit(self, rev):
if rev not in self.commits:
uuid, module, revnum = self.revsplit(rev)
self.module = module
self.reparent(module)
# We assume that:
# - requests for revisions after "stop" come from the
# revision graph backward traversal. Cache all of them
# down to stop, they will be used eventually.
# - requests for revisions before "stop" come to get
# isolated branches parents. Just fetch what is needed.
stop = self.lastrevs.get(module, 0)
if revnum < stop:
stop = revnum + 1
self._fetch_revisions(revnum, stop)
commit = self.commits[rev]
# caller caches the result, so free it here to release memory
del self.commits[rev]
return commit
def gettags(self):
tags = {}
if self.tags is None:
return tags
# svn tags are just a convention, project branches left in a
# 'tags' directory. There is no other relationship than
# ancestry, which is expensive to discover and makes them hard
# to update incrementally. Worse, past revisions may be
# referenced by tags far away in the future, requiring a deep
# history traversal on every calculation. Current code
# performs a single backward traversal, tracking moves within
# the tags directory (tag renaming) and recording a new tag
# everytime a project is copied from outside the tags
# directory. It also lists deleted tags, this behaviour may
# change in the future.
pendings = []
tagspath = self.tags
start = svn.ra.get_latest_revnum(self.ra)
try:
for entry in self._getlog([self.tags], start, self.startrev):
origpaths, revnum, author, date, message = entry
copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
in origpaths.iteritems() if e.copyfrom_path]
# Apply moves/copies from more specific to general
copies.sort(reverse=True)
srctagspath = tagspath
if copies and copies[-1][2] == tagspath:
# Track tags directory moves
srctagspath = copies.pop()[0]
for source, sourcerev, dest in copies:
if not dest.startswith(tagspath + '/'):
continue
for tag in pendings:
if tag[0].startswith(dest):
tagpath = source + tag[0][len(dest):]
tag[:2] = [tagpath, sourcerev]
break
else:
pendings.append([source, sourcerev, dest])
# Filter out tags with children coming from different
# parts of the repository like:
# /tags/tag.1 (from /trunk:10)
# /tags/tag.1/foo (from /branches/foo:12)
# Here/tags/tag.1 discarded as well as its children.
# It happens with tools like cvs2svn. Such tags cannot
# be represented in mercurial.
addeds = dict((p, e.copyfrom_path) for p,e
in origpaths.iteritems() if e.action == 'A')
badroots = set()
for destroot in addeds:
for source, sourcerev, dest in pendings:
if (not dest.startswith(destroot + '/')
or source.startswith(addeds[destroot] + '/')):
continue
badroots.add(destroot)
break
for badroot in badroots:
pendings = [p for p in pendings if p[2] != badroot
and not p[2].startswith(badroot + '/')]
# Tell tag renamings from tag creations
remainings = []
for source, sourcerev, dest in pendings:
tagname = dest.split('/')[-1]
if source.startswith(srctagspath):
remainings.append([source, sourcerev, tagname])
continue
if tagname in tags:
# Keep the latest tag value
continue
# From revision may be fake, get one with changes
try:
tagid = self.latest(source, sourcerev)
if tagid and tagname not in tags:
tags[tagname] = tagid
except SvnPathNotFound:
# It happens when we are following directories
# we assumed were copied with their parents
# but were really created in the tag
# directory.
pass
pendings = remainings
tagspath = srctagspath
except SubversionException:
self.ui.note(_('no tags found at revision %d\n') % start)
return tags
def converted(self, rev, destrev):
if not self.wc:
return
if self.convertfp is None:
self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
'a')
self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
self.convertfp.flush()
# -- helper functions --
def revid(self, revnum, module=None):
if not module:
module = self.module
return u"svn:%s%s@%s" % (self.uuid, module.decode(self.encoding),
revnum)
def revnum(self, rev):
return int(rev.split('@')[-1])
def revsplit(self, rev):
url, revnum = rev.encode(self.encoding).rsplit('@', 1)
revnum = int(revnum)
parts = url.split('/', 1)
uuid = parts.pop(0)[4:]
mod = ''
if parts:
mod = '/' + parts[0]
return uuid, mod, revnum
def latest(self, path, stop=0):
"""Find the latest revid affecting path, up to stop. It may return
a revision in a different module, since a branch may be moved without
a change being reported. Return None if computed module does not
belong to rootmodule subtree.
"""
if not path.startswith(self.rootmodule):
# Requests on foreign branches may be forbidden at server level
self.ui.debug(_('ignoring foreign branch %r\n') % path)
return None
if not stop:
stop = svn.ra.get_latest_revnum(self.ra)
try:
prevmodule = self.reparent('')
dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
self.reparent(prevmodule)
except SubversionException:
dirent = None
if not dirent:
raise SvnPathNotFound(_('%s not found up to revision %d') % (path, stop))
# stat() gives us the previous revision on this line of
# development, but it might be in *another module*. Fetch the
# log and detect renames down to the latest revision.
stream = self._getlog([path], stop, dirent.created_rev)
try:
for entry in stream:
paths, revnum, author, date, message = entry
if revnum <= dirent.created_rev:
break
for p in paths:
if not path.startswith(p) or not paths[p].copyfrom_path:
continue
newpath = paths[p].copyfrom_path + path[len(p):]
self.ui.debug(_("branch renamed from %s to %s at %d\n") %
(path, newpath, revnum))
path = newpath
break
finally:
stream.close()
if not path.startswith(self.rootmodule):
self.ui.debug(_('ignoring foreign branch %r\n') % path)
return None
return self.revid(dirent.created_rev, path)
def get_blacklist(self):
"""Avoid certain revision numbers.
It is not uncommon for two nearby revisions to cancel each other
out, e.g. 'I copied trunk into a subdirectory of itself instead
of making a branch'. The converted repository is significantly
smaller if we ignore such revisions."""
self.blacklist = set()
blacklist = self.blacklist
for line in file("blacklist.txt", "r"):
if not line.startswith("#"):
try:
svn_rev = int(line.strip())
blacklist.add(svn_rev)
except ValueError:
pass # not an integer or a comment
def is_blacklisted(self, svn_rev):
return svn_rev in self.blacklist
def reparent(self, module):
"""Reparent the svn transport and return the previous parent."""
if self.prevmodule == module:
return module
svnurl = self.baseurl + urllib.quote(module)
prevmodule = self.prevmodule
if prevmodule is None:
prevmodule = ''
self.ui.debug(_("reparent to %s\n") % svnurl)
svn.ra.reparent(self.ra, svnurl)
self.prevmodule = module
return prevmodule
def expandpaths(self, rev, paths, parents):
entries = []
# Map of entrypath, revision for finding source of deleted
# revisions.
copyfrom = {}
copies = {}
new_module, revnum = self.revsplit(rev)[1:]
if new_module != self.module:
self.module = new_module
self.reparent(self.module)
for path, ent in paths:
entrypath = self.getrelpath(path)
entry = entrypath.decode(self.encoding)
kind = self._checkpath(entrypath, revnum)
if kind == svn.core.svn_node_file:
entries.append(self.recode(entry))
if not ent.copyfrom_path or not parents:
continue
# Copy sources not in parent revisions cannot be
# represented, ignore their origin for now
pmodule, prevnum = self.revsplit(parents[0])[1:]
if ent.copyfrom_rev < prevnum:
continue
copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
if not copyfrom_path:
continue
self.ui.debug(_("copied to %s from %s@%s\n") %
(entrypath, copyfrom_path, ent.copyfrom_rev))
copies[self.recode(entry)] = self.recode(copyfrom_path)
elif kind == 0: # gone, but had better be a deleted *file*
self.ui.debug(_("gone from %s\n") % ent.copyfrom_rev)
# if a branch is created but entries are removed in
# the same changeset, get the right fromrev
# parents cannot be empty here, you cannot remove
# things from a root revision.
uuid, old_module, fromrev = self.revsplit(parents[0])
basepath = old_module + "/" + self.getrelpath(path)
entrypath = basepath
def lookup_parts(p):
rc = None
parts = p.split("/")
for i in range(len(parts)):
part = "/".join(parts[:i])
info = part, copyfrom.get(part, None)
if info[1] is not None:
self.ui.debug(_("found parent directory %s\n") % info[1])
rc = info
return rc
self.ui.debug(_("base, entry %s %s\n") % (basepath, entrypath))
frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
# need to remove fragment from lookup_parts and
# replace with copyfrom_path
if frompath is not None:
self.ui.debug(_("munge-o-matic\n"))
self.ui.debug(entrypath + '\n')
self.ui.debug(entrypath[len(frompath):] + '\n')
entrypath = froment.copyfrom_path + entrypath[len(frompath):]
fromrev = froment.copyfrom_rev
self.ui.debug(_("info: %s %s %s %s\n") % (frompath, froment, ent, entrypath))
# We can avoid the reparent calls if the module has
# not changed but it probably does not worth the pain.
prevmodule = self.reparent('')
fromkind = svn.ra.check_path(self.ra, entrypath.strip('/'), fromrev)
self.reparent(prevmodule)
if fromkind == svn.core.svn_node_file: # a deleted file
entries.append(self.recode(entry))
elif fromkind == svn.core.svn_node_dir:
# print "Deleted/moved non-file:", revnum, path, ent
# children = self._find_children(path, revnum - 1)
# print ("find children %s@%d from %d action %s" %
# (path, revnum, ent.copyfrom_rev, ent.action))
# Sometimes this is tricky. For example: in
# The Subversion Repository revision 6940 a dir
# was copied and one of its files was deleted
# from the new location in the same commit. This
# code can't deal with that yet.
if ent.action == 'C':
children = self._find_children(path, fromrev)
else:
oroot = entrypath.strip('/')
nroot = path.strip('/')
children = self._find_children(oroot, fromrev)
children = [s.replace(oroot,nroot) for s in children]
# Mark all [files, not directories] as deleted.
for child in children:
# Can we move a child directory and its
# parent in the same commit? (probably can). Could
# cause problems if instead of revnum -1,
# we have to look in (copyfrom_path, revnum - 1)
entrypath = self.getrelpath("/" + child, module=old_module)
if entrypath:
entry = self.recode(entrypath.decode(self.encoding))
if entry in copies:
# deleted file within a copy
del copies[entry]
else:
entries.append(entry)
else:
self.ui.debug(_('unknown path in revision %d: %s\n') % \
(revnum, path))
elif kind == svn.core.svn_node_dir:
# Should probably synthesize normal file entries
# and handle as above to clean up copy/rename handling.
# If the directory just had a prop change,
# then we shouldn't need to look for its children.
if ent.action == 'M':
continue
# Also this could create duplicate entries. Not sure
# whether this will matter. Maybe should make entries a set.
# print "Changed directory", revnum, path, ent.action, \
# ent.copyfrom_path, ent.copyfrom_rev
# This will fail if a directory was copied
# from another branch and then some of its files
# were deleted in the same transaction.
children = sorted(self._find_children(path, revnum))
for child in children:
# Can we move a child directory and its
# parent in the same commit? (probably can). Could
# cause problems if instead of revnum -1,
# we have to look in (copyfrom_path, revnum - 1)
entrypath = self.getrelpath("/" + child)
# print child, self.module, entrypath
if entrypath:
# Need to filter out directories here...
kind = self._checkpath(entrypath, revnum)
if kind != svn.core.svn_node_dir:
entries.append(self.recode(entrypath))
# Copies here (must copy all from source) Probably not
# a real problem for us if source does not exist
if not ent.copyfrom_path or not parents:
continue
# Copy sources not in parent revisions cannot be
# represented, ignore their origin for now
pmodule, prevnum = self.revsplit(parents[0])[1:]
if ent.copyfrom_rev < prevnum:
continue
copyfrompath = ent.copyfrom_path.decode(self.encoding)
copyfrompath = self.getrelpath(copyfrompath, pmodule)
if not copyfrompath:
continue
copyfrom[path] = ent
self.ui.debug(_("mark %s came from %s:%d\n")
% (path, copyfrompath, ent.copyfrom_rev))
children = self._find_children(ent.copyfrom_path, ent.copyfrom_rev)
children.sort()
for child in children:
entrypath = self.getrelpath("/" + child, pmodule)
if not entrypath:
continue
entry = entrypath.decode(self.encoding)
copytopath = path + entry[len(copyfrompath):]
copytopath = self.getrelpath(copytopath)
copies[self.recode(copytopath)] = self.recode(entry, pmodule)
return (list(set(entries)), copies)
def _fetch_revisions(self, from_revnum, to_revnum):
if from_revnum < to_revnum:
from_revnum, to_revnum = to_revnum, from_revnum
self.child_cset = None
def parselogentry(orig_paths, revnum, author, date, message):
"""Return the parsed commit object or None, and True if
the revision is a branch root.
"""
self.ui.debug(_("parsing revision %d (%d changes)\n") %
(revnum, len(orig_paths)))
branched = False
rev = self.revid(revnum)
# branch log might return entries for a parent we already have
if rev in self.commits or revnum < to_revnum:
return None, branched
parents = []
# check whether this revision is the start of a branch or part
# of a branch renaming
orig_paths = sorted(orig_paths.iteritems())
root_paths = [(p,e) for p,e in orig_paths if self.module.startswith(p)]
if root_paths:
path, ent = root_paths[-1]
if ent.copyfrom_path:
branched = True
newpath = ent.copyfrom_path + self.module[len(path):]
# ent.copyfrom_rev may not be the actual last revision
previd = self.latest(newpath, ent.copyfrom_rev)
if previd is not None:
prevmodule, prevnum = self.revsplit(previd)[1:]
if prevnum >= self.startrev:
parents = [previd]
self.ui.note(_('found parent of branch %s at %d: %s\n') %
(self.module, prevnum, prevmodule))
else:
self.ui.debug(_("no copyfrom path, don't know what to do.\n"))
paths = []
# filter out unrelated paths
for path, ent in orig_paths:
if self.getrelpath(path) is None:
continue
paths.append((path, ent))
# Example SVN datetime. Includes microseconds.
# ISO-8601 conformant
# '2007-01-04T17:35:00.902377Z'
date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
log = message and self.recode(message) or ''
author = author and self.recode(author) or ''
try:
branch = self.module.split("/")[-1]
if branch == 'trunk':
branch = ''
except IndexError:
branch = None
cset = commit(author=author,
date=util.datestr(date),
desc=log,
parents=parents,
branch=branch,
rev=rev.encode('utf-8'))
self.commits[rev] = cset
# The parents list is *shared* among self.paths and the
# commit object. Both will be updated below.
self.paths[rev] = (paths, cset.parents)
if self.child_cset and not self.child_cset.parents:
self.child_cset.parents[:] = [rev]
self.child_cset = cset
return cset, branched
self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
(self.module, from_revnum, to_revnum))
try:
firstcset = None
lastonbranch = False
stream = self._getlog([self.module], from_revnum, to_revnum)
try:
for entry in stream:
paths, revnum, author, date, message = entry
if revnum < self.startrev:
lastonbranch = True
break
if self.is_blacklisted(revnum):
self.ui.note(_('skipping blacklisted revision %d\n')
% revnum)
continue
if not paths:
self.ui.debug(_('revision %d has no entries\n') % revnum)
continue
cset, lastonbranch = parselogentry(paths, revnum, author,
date, message)
if cset:
firstcset = cset
if lastonbranch:
break
finally:
stream.close()
if not lastonbranch and firstcset and not firstcset.parents:
# The first revision of the sequence (the last fetched one)
# has invalid parents if not a branch root. Find the parent
# revision now, if any.
try:
firstrevnum = self.revnum(firstcset.rev)
if firstrevnum > 1:
latest = self.latest(self.module, firstrevnum - 1)
if latest:
firstcset.parents.append(latest)
except SvnPathNotFound:
pass
except SubversionException, (inst, num):
if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
raise util.Abort(_('svn: branch has no revision %s') % to_revnum)
raise
def _getfile(self, file, rev):
# TODO: ra.get_file transmits the whole file instead of diffs.
mode = ''
try:
new_module, revnum = self.revsplit(rev)[1:]
if self.module != new_module:
self.module = new_module
self.reparent(self.module)
io = StringIO()
info = svn.ra.get_file(self.ra, file, revnum, io)
data = io.getvalue()
# ra.get_files() seems to keep a reference on the input buffer
# preventing collection. Release it explicitely.
io.close()
if isinstance(info, list):
info = info[-1]
mode = ("svn:executable" in info) and 'x' or ''
mode = ("svn:special" in info) and 'l' or mode
except SubversionException, e:
notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
if e.apr_err in notfound: # File not found
raise IOError()
raise
if mode == 'l':
link_prefix = "link "
if data.startswith(link_prefix):
data = data[len(link_prefix):]
return data, mode
def _find_children(self, path, revnum):
path = path.strip('/')
pool = Pool()
rpath = '/'.join([self.baseurl, urllib.quote(path)]).strip('/')
return ['%s/%s' % (path, x) for x in
svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool).keys()]
def getrelpath(self, path, module=None):
if module is None:
module = self.module
# Given the repository url of this wc, say
# "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
# extract the "entry" portion (a relative path) from what
# svn log --xml says, ie
# "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
# that is to say "tests/PloneTestCase.py"
if path.startswith(module):
relative = path.rstrip('/')[len(module):]
if relative.startswith('/'):
return relative[1:]
elif relative == '':
return relative
# The path is outside our tracked tree...
self.ui.debug(_('%r is not under %r, ignoring\n') % (path, module))
return None
def _checkpath(self, path, revnum):
# ra.check_path does not like leading slashes very much, it leads
# to PROPFIND subversion errors
return svn.ra.check_path(self.ra, path.strip('/'), revnum)
def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
strict_node_history=False):
# Normalize path names, svn >= 1.5 only wants paths relative to
# supplied URL
relpaths = []
for p in paths:
if not p.startswith('/'):
p = self.module + '/' + p
relpaths.append(p.strip('/'))
args = [self.baseurl, relpaths, start, end, limit, discover_changed_paths,
strict_node_history]
arg = encodeargs(args)
hgexe = util.hgexecutable()
cmd = '%s debugsvnlog' % util.shellquote(hgexe)
stdin, stdout = util.popen2(cmd)
stdin.write(arg)
stdin.close()
return logstream(stdout)
pre_revprop_change = '''#!/bin/sh
REPOS="$1"
REV="$2"
USER="$3"
PROPNAME="$4"
ACTION="$5"
if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
echo "Changing prohibited revision property" >&2
exit 1
'''
class svn_sink(converter_sink, commandline):
commit_re = re.compile(r'Committed revision (\d+).', re.M)
def prerun(self):
if self.wc:
os.chdir(self.wc)
def postrun(self):
if self.wc:
os.chdir(self.cwd)
def join(self, name):
return os.path.join(self.wc, '.svn', name)
def revmapfile(self):
return self.join('hg-shamap')
def authorfile(self):
return self.join('hg-authormap')
def __init__(self, ui, path):
converter_sink.__init__(self, ui, path)
commandline.__init__(self, ui, 'svn')
self.delete = []
self.setexec = []
self.delexec = []
self.copies = []
self.wc = None
self.cwd = os.getcwd()
path = os.path.realpath(path)
created = False
if os.path.isfile(os.path.join(path, '.svn', 'entries')):
self.wc = path
self.run0('update')
else:
wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
if os.path.isdir(os.path.dirname(path)):
if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
ui.status(_('initializing svn repo %r\n') %
os.path.basename(path))
commandline(ui, 'svnadmin').run0('create', path)
created = path
path = util.normpath(path)
if not path.startswith('/'):
path = '/' + path
path = 'file://' + path
ui.status(_('initializing svn wc %r\n') % os.path.basename(wcpath))
self.run0('checkout', path, wcpath)
self.wc = wcpath
self.opener = util.opener(self.wc)
self.wopener = util.opener(self.wc)
self.childmap = mapfile(ui, self.join('hg-childmap'))
self.is_exec = util.checkexec(self.wc) and util.is_exec or None
if created:
hook = os.path.join(created, 'hooks', 'pre-revprop-change')
fp = open(hook, 'w')
fp.write(pre_revprop_change)
fp.close()
util.set_flags(hook, False, True)
xport = transport.SvnRaTransport(url=geturl(path))
self.uuid = svn.ra.get_uuid(xport.ra)
def wjoin(self, *names):
return os.path.join(self.wc, *names)
def putfile(self, filename, flags, data):
if 'l' in flags:
self.wopener.symlink(data, filename)
else:
try:
if os.path.islink(self.wjoin(filename)):
os.unlink(filename)
except OSError:
pass
self.wopener(filename, 'w').write(data)
if self.is_exec:
was_exec = self.is_exec(self.wjoin(filename))
else:
# On filesystems not supporting execute-bit, there is no way
# to know if it is set but asking subversion. Setting it
# systematically is just as expensive and much simpler.
was_exec = 'x' not in flags
util.set_flags(self.wjoin(filename), False, 'x' in flags)
if was_exec:
if 'x' not in flags:
self.delexec.append(filename)
else:
if 'x' in flags:
self.setexec.append(filename)
def _copyfile(self, source, dest):
# SVN's copy command pukes if the destination file exists, but
# our copyfile method expects to record a copy that has
# already occurred. Cross the semantic gap.
wdest = self.wjoin(dest)
exists = os.path.exists(wdest)
if exists:
fd, tempname = tempfile.mkstemp(
prefix='hg-copy-', dir=os.path.dirname(wdest))
os.close(fd)
os.unlink(tempname)
os.rename(wdest, tempname)
try:
self.run0('copy', source, dest)
finally:
if exists:
try:
os.unlink(wdest)
except OSError:
pass
os.rename(tempname, wdest)
def dirs_of(self, files):
dirs = set()
for f in files:
if os.path.isdir(self.wjoin(f)):
dirs.add(f)
for i in strutil.rfindall(f, '/'):
dirs.add(f[:i])
return dirs
def add_dirs(self, files):
add_dirs = [d for d in sorted(self.dirs_of(files))
if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
if add_dirs:
self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
return add_dirs
def add_files(self, files):
if files:
self.xargs(files, 'add', quiet=True)
return files
def tidy_dirs(self, names):
deleted = []
for d in sorted(self.dirs_of(names), reverse=True):
wd = self.wjoin(d)
if os.listdir(wd) == '.svn':
self.run0('delete', d)
deleted.append(d)
return deleted
def addchild(self, parent, child):
self.childmap[parent] = child
def revid(self, rev):
return u"svn:%s@%s" % (self.uuid, rev)
def putcommit(self, files, copies, parents, commit, source):
# Apply changes to working copy
for f, v in files:
try:
data = source.getfile(f, v)
except IOError:
self.delete.append(f)
else:
e = source.getmode(f, v)
self.putfile(f, e, data)
if f in copies:
self.copies.append([copies[f], f])
files = [f[0] for f in files]
for parent in parents:
try:
return self.revid(self.childmap[parent])
except KeyError:
pass
entries = set(self.delete)
files = frozenset(files)
entries.update(self.add_dirs(files.difference(entries)))
if self.copies:
for s, d in self.copies:
self._copyfile(s, d)
self.copies = []
if self.delete:
self.xargs(self.delete, 'delete')
self.delete = []
entries.update(self.add_files(files.difference(entries)))
entries.update(self.tidy_dirs(entries))
if self.delexec:
self.xargs(self.delexec, 'propdel', 'svn:executable')
self.delexec = []
if self.setexec:
self.xargs(self.setexec, 'propset', 'svn:executable', '*')
self.setexec = []
fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
fp = os.fdopen(fd, 'w')
fp.write(commit.desc)
fp.close()
try:
output = self.run0('commit',
username=util.shortuser(commit.author),
file=messagefile,
encoding='utf-8')
try:
rev = self.commit_re.search(output).group(1)
except AttributeError:
self.ui.warn(_('unexpected svn output:\n'))
self.ui.warn(output)
raise util.Abort(_('unable to cope with svn output'))
if commit.rev:
self.run('propset', 'hg:convert-rev', commit.rev,
revprop=True, revision=rev)
if commit.branch and commit.branch != 'default':
self.run('propset', 'hg:convert-branch', commit.branch,
revprop=True, revision=rev)
for parent in parents:
self.addchild(parent, rev)
return self.revid(rev)
finally:
os.unlink(messagefile)
def puttags(self, tags):
self.ui.warn(_('XXX TAGS NOT IMPLEMENTED YET\n'))