##// END OF EJS Templates
convert: bail out in Subversion source if encountering non-ASCII HTTP(S) URL...
convert: bail out in Subversion source if encountering non-ASCII HTTP(S) URL Before this patch, in the tested case, urllib raised `httplib.InvalidURL: URL can't contain control characters. '/\xff/!svn/ver/0/.svn' (found at least '\xff')`, which resulted in that the URL was never recognized as a Subversion repository. This patch adds a check that bails out if the URL contains non-ASCII characters. The warning is not overly user-friendly, but giving the user something to type into a search engine is definitively better than not explaining why the repository was not recognized. We could support non-ASCII chracters by quoting them before passing them to urllib. However, we would want to be compatible with what the `svn` command does, which converts the URL from the locale encoding to UTF-8, percent-encodes it and sends it to the server. If the locale encoding is not UTF-8, the behavior is IMHO not very intuitive, as the `svn` command may send different (percent-encoded) octets than what was passed on the console. Instead of copying this behavior, we better leave it forbidden.

File last commit:

r44187:c190f271 default
r45559:697212a8 stable
Show More
p4.py
405 lines | 12.8 KiB | text/x-python | PythonLexer
# Perforce source for convert extension.
#
# Copyright 2009, Frank Kingswood <frank@kingswood-consulting.co.uk>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import marshal
import re
from mercurial.i18n import _
from mercurial import (
error,
util,
)
from mercurial.utils import (
dateutil,
procutil,
stringutil,
)
from . import common
def loaditer(f):
"""Yield the dictionary objects generated by p4"""
try:
while True:
d = marshal.load(f)
if not d:
break
yield d
except EOFError:
pass
def decodefilename(filename):
"""Perforce escapes special characters @, #, *, or %
with %40, %23, %2A, or %25 respectively
>>> decodefilename(b'portable-net45%252Bnetcore45%252Bwp8%252BMonoAndroid')
'portable-net45%2Bnetcore45%2Bwp8%2BMonoAndroid'
>>> decodefilename(b'//Depot/Directory/%2525/%2523/%23%40.%2A')
'//Depot/Directory/%25/%23/#@.*'
"""
replacements = [
(b'%2A', b'*'),
(b'%23', b'#'),
(b'%40', b'@'),
(b'%25', b'%'),
]
for k, v in replacements:
filename = filename.replace(k, v)
return filename
class p4_source(common.converter_source):
def __init__(self, ui, repotype, path, revs=None):
# avoid import cycle
from . import convcmd
super(p4_source, self).__init__(ui, repotype, path, revs=revs)
if b"/" in path and not path.startswith(b'//'):
raise common.NoRepo(
_(b'%s does not look like a P4 repository') % path
)
common.checktool(b'p4', abort=False)
self.revmap = {}
self.encoding = self.ui.config(
b'convert', b'p4.encoding', convcmd.orig_encoding
)
self.re_type = re.compile(
br"([a-z]+)?(text|binary|symlink|apple|resource|unicode|utf\d+)"
br"(\+\w+)?$"
)
self.re_keywords = re.compile(
br"\$(Id|Header|Date|DateTime|Change|File|Revision|Author)"
br":[^$\n]*\$"
)
self.re_keywords_old = re.compile(br"\$(Id|Header):[^$\n]*\$")
if revs and len(revs) > 1:
raise error.Abort(
_(
b"p4 source does not support specifying "
b"multiple revisions"
)
)
def setrevmap(self, revmap):
"""Sets the parsed revmap dictionary.
Revmap stores mappings from a source revision to a target revision.
It is set in convertcmd.convert and provided by the user as a file
on the commandline.
Revisions in the map are considered beeing present in the
repository and ignored during _parse(). This allows for incremental
imports if a revmap is provided.
"""
self.revmap = revmap
def _parse_view(self, path):
"""Read changes affecting the path"""
cmd = b'p4 -G changes -s submitted %s' % procutil.shellquote(path)
stdout = procutil.popen(cmd, mode=b'rb')
p4changes = {}
for d in loaditer(stdout):
c = d.get(b"change", None)
if c:
p4changes[c] = True
return p4changes
def _parse(self, ui, path):
"""Prepare list of P4 filenames and revisions to import"""
p4changes = {}
changeset = {}
files_map = {}
copies_map = {}
localname = {}
depotname = {}
heads = []
ui.status(_(b'reading p4 views\n'))
# read client spec or view
if b"/" in path:
p4changes.update(self._parse_view(path))
if path.startswith(b"//") and path.endswith(b"/..."):
views = {path[:-3]: b""}
else:
views = {b"//": b""}
else:
cmd = b'p4 -G client -o %s' % procutil.shellquote(path)
clientspec = marshal.load(procutil.popen(cmd, mode=b'rb'))
views = {}
for client in clientspec:
if client.startswith(b"View"):
sview, cview = clientspec[client].split()
p4changes.update(self._parse_view(sview))
if sview.endswith(b"...") and cview.endswith(b"..."):
sview = sview[:-3]
cview = cview[:-3]
cview = cview[2:]
cview = cview[cview.find(b"/") + 1 :]
views[sview] = cview
# list of changes that affect our source files
p4changes = p4changes.keys()
p4changes.sort(key=int)
# list with depot pathnames, longest first
vieworder = views.keys()
vieworder.sort(key=len, reverse=True)
# handle revision limiting
startrev = self.ui.config(b'convert', b'p4.startrev')
# now read the full changelists to get the list of file revisions
ui.status(_(b'collecting p4 changelists\n'))
lastid = None
for change in p4changes:
if startrev and int(change) < int(startrev):
continue
if self.revs and int(change) > int(self.revs[0]):
continue
if change in self.revmap:
# Ignore already present revisions, but set the parent pointer.
lastid = change
continue
if lastid:
parents = [lastid]
else:
parents = []
d = self._fetch_revision(change)
c = self._construct_commit(d, parents)
descarr = c.desc.splitlines(True)
if len(descarr) > 0:
shortdesc = descarr[0].rstrip(b'\r\n')
else:
shortdesc = b'**empty changelist description**'
t = b'%s %s' % (c.rev, repr(shortdesc)[1:-1])
ui.status(stringutil.ellipsis(t, 80) + b'\n')
files = []
copies = {}
copiedfiles = []
i = 0
while (b"depotFile%d" % i) in d and (b"rev%d" % i) in d:
oldname = d[b"depotFile%d" % i]
filename = None
for v in vieworder:
if oldname.lower().startswith(v.lower()):
filename = decodefilename(views[v] + oldname[len(v) :])
break
if filename:
files.append((filename, d[b"rev%d" % i]))
depotname[filename] = oldname
if d.get(b"action%d" % i) == b"move/add":
copiedfiles.append(filename)
localname[oldname] = filename
i += 1
# Collect information about copied files
for filename in copiedfiles:
oldname = depotname[filename]
flcmd = b'p4 -G filelog %s' % procutil.shellquote(oldname)
flstdout = procutil.popen(flcmd, mode=b'rb')
copiedfilename = None
for d in loaditer(flstdout):
copiedoldname = None
i = 0
while (b"change%d" % i) in d:
if (
d[b"change%d" % i] == change
and d[b"action%d" % i] == b"move/add"
):
j = 0
while (b"file%d,%d" % (i, j)) in d:
if d[b"how%d,%d" % (i, j)] == b"moved from":
copiedoldname = d[b"file%d,%d" % (i, j)]
break
j += 1
i += 1
if copiedoldname and copiedoldname in localname:
copiedfilename = localname[copiedoldname]
break
if copiedfilename:
copies[filename] = copiedfilename
else:
ui.warn(
_(b"cannot find source for copied file: %s@%s\n")
% (filename, change)
)
changeset[change] = c
files_map[change] = files
copies_map[change] = copies
lastid = change
if lastid and len(changeset) > 0:
heads = [lastid]
return {
b'changeset': changeset,
b'files': files_map,
b'copies': copies_map,
b'heads': heads,
b'depotname': depotname,
}
@util.propertycache
def _parse_once(self):
return self._parse(self.ui, self.path)
@util.propertycache
def copies(self):
return self._parse_once[b'copies']
@util.propertycache
def files(self):
return self._parse_once[b'files']
@util.propertycache
def changeset(self):
return self._parse_once[b'changeset']
@util.propertycache
def heads(self):
return self._parse_once[b'heads']
@util.propertycache
def depotname(self):
return self._parse_once[b'depotname']
def getheads(self):
return self.heads
def getfile(self, name, rev):
cmd = b'p4 -G print %s' % procutil.shellquote(
b"%s#%s" % (self.depotname[name], rev)
)
lasterror = None
while True:
stdout = procutil.popen(cmd, mode=b'rb')
mode = None
contents = []
keywords = None
for d in loaditer(stdout):
code = d[b"code"]
data = d.get(b"data")
if code == b"error":
# if this is the first time error happened
# re-attempt getting the file
if not lasterror:
lasterror = IOError(d[b"generic"], data)
# this will exit inner-most for-loop
break
else:
raise lasterror
elif code == b"stat":
action = d.get(b"action")
if action in [b"purge", b"delete", b"move/delete"]:
return None, None
p4type = self.re_type.match(d[b"type"])
if p4type:
mode = b""
flags = (p4type.group(1) or b"") + (
p4type.group(3) or b""
)
if b"x" in flags:
mode = b"x"
if p4type.group(2) == b"symlink":
mode = b"l"
if b"ko" in flags:
keywords = self.re_keywords_old
elif b"k" in flags:
keywords = self.re_keywords
elif code == b"text" or code == b"binary":
contents.append(data)
lasterror = None
if not lasterror:
break
if mode is None:
return None, None
contents = b''.join(contents)
if keywords:
contents = keywords.sub(b"$\\1$", contents)
if mode == b"l" and contents.endswith(b"\n"):
contents = contents[:-1]
return contents, mode
def getchanges(self, rev, full):
if full:
raise error.Abort(_(b"convert from p4 does not support --full"))
return self.files[rev], self.copies[rev], set()
def _construct_commit(self, obj, parents=None):
"""
Constructs a common.commit object from an unmarshalled
`p4 describe` output
"""
desc = self.recode(obj.get(b"desc", b""))
date = (int(obj[b"time"]), 0) # timezone not set
if parents is None:
parents = []
return common.commit(
author=self.recode(obj[b"user"]),
date=dateutil.datestr(date, b'%Y-%m-%d %H:%M:%S %1%2'),
parents=parents,
desc=desc,
branch=None,
rev=obj[b'change'],
extra={b"p4": obj[b'change'], b"convert_revision": obj[b'change']},
)
def _fetch_revision(self, rev):
"""Return an output of `p4 describe` including author, commit date as
a dictionary."""
cmd = b"p4 -G describe -s %s" % rev
stdout = procutil.popen(cmd, mode=b'rb')
return marshal.load(stdout)
def getcommit(self, rev):
if rev in self.changeset:
return self.changeset[rev]
elif rev in self.revmap:
d = self._fetch_revision(rev)
return self._construct_commit(d, parents=None)
raise error.Abort(
_(b"cannot find %s in the revmap or parsed changesets") % rev
)
def gettags(self):
return {}
def getchangedfiles(self, rev, i):
return sorted([x[0] for x in self.files[rev]])