upstream/mercurial-mirror Commit - r36473:9e3cb58c

py3: make sure regexes are bytes...

Pulkit Goyal -

r36473:9e3cb58c default

parent child

hgext/convert/cvsps.py

0 +1 -1

              # Mercurial built-in replacement for cvsps.
              #
              # Copyright 2008, Frank Kingswood <frank@kingswood-consulting.co.uk>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import os
              import re
              from mercurial.i18n import _
              from mercurial import (
                  encoding,
                  error,
                  hook,
                  pycompat,
                  util,
              )
              pickle = util.pickle
              class logentry(object):
                  '''Class logentry has the following attributes:
                      .author    - author name as CVS knows it
                      .branch    - name of branch this revision is on
                      .branches  - revision tuple of branches starting at this revision
                      .comment   - commit message
                      .commitid  - CVS commitid or None
                      .date      - the commit date as a (time, tz) tuple
                      .dead      - true if file revision is dead
                      .file      - Name of file
                      .lines     - a tuple (+lines, -lines) or None
                      .parent    - Previous revision of this entry
                      .rcs       - name of file as returned from CVS
                      .revision  - revision number as tuple
                      .tags      - list of tags on the file
                      .synthetic - is this a synthetic "file ... added on ..." revision?
                      .mergepoint - the branch that has been merged from (if present in
                                    rlog output) or None
                      .branchpoints - the branches that start at the current entry or empty
                  '''
                  def __init__(self, **entries):
                      self.synthetic = False
                      self.__dict__.update(entries)
                  def __repr__(self):
                      items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
                      return "%s(%s)"%(type(self).__name__, ", ".join(items))
              class logerror(Exception):
                  pass
              def getrepopath(cvspath):
                  """Return the repository path from a CVS path.
                  >>> getrepopath(b'/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b'c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:10/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:10c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:truc@foo.bar:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b':pserver:truc@foo.bar:c:/foo/bar')
                  '/foo/bar'
                  >>> getrepopath(b'user@server/path/to/repository')
                  '/path/to/repository'
                  """
                  # According to CVS manual, CVS paths are expressed like:
                  # [:method:][[user][:password]@]hostname[:[port]]/path/to/repository
                  #
                  # CVSpath is splitted into parts and then position of the first occurrence
                  # of the '/' char after the '@' is located. The solution is the rest of the
                  # string after that '/' sign including it
                  parts = cvspath.split(':')
                  atposition = parts[-1].find('@')
                  start = 0
                  if atposition != -1:
                      start = atposition
                  repopath = parts[-1][parts[-1].find('/', start):]
                  return repopath
              def createlog(ui, directory=None, root="", rlog=True, cache=None):
                  '''Collect the CVS rlog'''
                  # Because we store many duplicate commit log messages, reusing strings
                  # saves a lot of memory and pickle storage space.
                  _scache = {}
                  def scache(s):
                      "return a shared version of a string"
                      return _scache.setdefault(s, s)
                  ui.status(_('collecting CVS rlog\n'))
                  log = []      # list of logentry objects containing the CVS state
                  # patterns to match in CVS (r)log output, by state of use
                  re_00 = re.compile('RCS file: (.+)$')
                  re_01 = re.compile('cvs \\[r?log aborted\\]: (.+)$')
                  re_02 = re.compile('cvs (r?log|server): (.+)\n$')
                  re_03 = re.compile("(Cannot access.+CVSROOT)|"
                                     "(can't create temporary directory.+)$")
                  re_10 = re.compile('Working file: (.+)$')
                  re_20 = re.compile('symbolic names:')
                  re_30 = re.compile('\t(.+): ([\\d.]+)$')
                  re_31 = re.compile('----------------------------$')
                  re_32 = re.compile('======================================='
                                     '======================================$')
                  re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
                  re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
                                     r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
                                     r'(\s+commitid:\s+([^;]+);)?'
                                     r'(.*mergepoint:\s+([^;]+);)?')
                  re_70 = re.compile('branches: (.+);$')
                  file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
                  prefix = ''   # leading path to strip of what we get from CVS
                  if directory is None:
                      # Current working directory
                      # Get the real directory in the repository
                      try:
                          prefix = open(os.path.join('CVS','Repository'), 'rb').read().strip()
                          directory = prefix
                          if prefix == ".":
                              prefix = ""
                      except IOError:
                          raise logerror(_('not a CVS sandbox'))
                      if prefix and not prefix.endswith(pycompat.ossep):
                          prefix += pycompat.ossep
                      # Use the Root file in the sandbox, if it exists
                      try:
                          root = open(os.path.join('CVS','Root'), 'rb').read().strip()
                      except IOError:
                          pass
                  if not root:
                      root = encoding.environ.get('CVSROOT', '')
                  # read log cache if one exists
                  oldlog = []
                  date = None
                  if cache:
                      cachedir = os.path.expanduser('~/.hg.cvsps')
                      if not os.path.exists(cachedir):
                          os.mkdir(cachedir)
                      # The cvsps cache pickle needs a uniquified name, based on the
                      # repository location. The address may have all sort of nasties
                      # in it, slashes, colons and such. So here we take just the
                      # alphanumeric characters, concatenated in a way that does not
                      # mix up the various components, so that
                      #    :pserver:user@server:/path
                      # and
                      #    /pserver/user/server/path
                      # are mapped to different cache file names.
                      cachefile = root.split(":") + [directory, "cache"]
-                     cachefile = ['-'.join(re.findall(r'\w+', s)) for s in cachefile if s]
+                     cachefile = ['-'.join(re.findall(br'\w+', s)) for s in cachefile if s]
                      cachefile = os.path.join(cachedir,
                                               '.'.join([s for s in cachefile if s]))
                  if cache == 'update':
                      try:
                          ui.note(_('reading cvs log cache %s\n') % cachefile)
                          oldlog = pickle.load(open(cachefile, 'rb'))
                          for e in oldlog:
                              if not (util.safehasattr(e, 'branchpoints') and
                                      util.safehasattr(e, 'commitid') and
                                      util.safehasattr(e, 'mergepoint')):
                                  ui.status(_('ignoring old cache\n'))
                                  oldlog = []
                                  break
                          ui.note(_('cache has %d log entries\n') % len(oldlog))
                      except Exception as e:
                          ui.note(_('error reading cache: %r\n') % e)
                      if oldlog:
                          date = oldlog[-1].date    # last commit date as a (time,tz) tuple
                          date = util.datestr(date, '%Y/%m/%d %H:%M:%S %1%2')
                  # build the CVS commandline
                  cmd = ['cvs', '-q']
                  if root:
                      cmd.append('-d%s' % root)
                      p = util.normpath(getrepopath(root))
                      if not p.endswith('/'):
                          p += '/'
                      if prefix:
                          # looks like normpath replaces "" by "."
                          prefix = p + util.normpath(prefix)
                      else:
                          prefix = p
                  cmd.append(['log', 'rlog'][rlog])
                  if date:
                      # no space between option and date string
                      cmd.append('-d>%s' % date)
                  cmd.append(directory)
                  # state machine begins here
                  tags = {}     # dictionary of revisions on current file with their tags
                  branchmap = {} # mapping between branch names and revision numbers
                  rcsmap = {}
                  state = 0
                  store = False # set when a new record can be appended
                  cmd = [util.shellquote(arg) for arg in cmd]
                  ui.note(_("running %s\n") % (' '.join(cmd)))
                  ui.debug("prefix=%r directory=%r root=%r\n" % (prefix, directory, root))
                  pfp = util.popen(' '.join(cmd))
                  peek = pfp.readline()
                  while True:
                      line = peek
                      if line == '':
                          break
                      peek = pfp.readline()
                      if line.endswith('\n'):
                          line = line[:-1]
                      #ui.debug('state=%d line=%r\n' % (state, line))
                      if state == 0:
                          # initial state, consume input until we see 'RCS file'
                          match = re_00.match(line)
                          if match:
                              rcs = match.group(1)
                              tags = {}
                              if rlog:
                                  filename = util.normpath(rcs[:-2])
                                  if filename.startswith(prefix):
                                      filename = filename[len(prefix):]
                                  if filename.startswith('/'):
                                      filename = filename[1:]
                                  if filename.startswith('Attic/'):
                                      filename = filename[6:]
                                  else:
                                      filename = filename.replace('/Attic/', '/')
                                  state = 2
                                  continue
                              state = 1
                              continue
                          match = re_01.match(line)
                          if match:
                              raise logerror(match.group(1))
                          match = re_02.match(line)
                          if match:
                              raise logerror(match.group(2))
                          if re_03.match(line):
                              raise logerror(line)
                      elif state == 1:
                          # expect 'Working file' (only when using log instead of rlog)
                          match = re_10.match(line)
                          assert match, _('RCS file must be followed by working file')
                          filename = util.normpath(match.group(1))
                          state = 2
                      elif state == 2:
                          # expect 'symbolic names'
                          if re_20.match(line):
                              branchmap = {}
                              state = 3
                      elif state == 3:
                          # read the symbolic names and store as tags
                          match = re_30.match(line)
                          if match:
                              rev = [int(x) for x in match.group(2).split('.')]
                              # Convert magic branch number to an odd-numbered one
                              revn = len(rev)
                              if revn > 3 and (revn % 2) == 0 and rev[-2] == 0:
                                  rev = rev[:-2] + rev[-1:]
                              rev = tuple(rev)
                              if rev not in tags:
                                  tags[rev] = []
                              tags[rev].append(match.group(1))
                              branchmap[match.group(1)] = match.group(2)
                          elif re_31.match(line):
                              state = 5
                          elif re_32.match(line):
                              state = 0
                      elif state == 4:
                          # expecting '------' separator before first revision
                          if re_31.match(line):
                              state = 5
                          else:
                              assert not re_32.match(line), _('must have at least '
                                                              'some revisions')
                      elif state == 5:
                          # expecting revision number and possibly (ignored) lock indication
                          # we create the logentry here from values stored in states 0 to 4,
                          # as this state is re-entered for subsequent revisions of a file.
                          match = re_50.match(line)
                          assert match, _('expected revision number')
                          e = logentry(rcs=scache(rcs),
                                       file=scache(filename),
                                       revision=tuple([int(x) for x in
                                                       match.group(1).split('.')]),
                                       branches=[],
                                       parent=None,
                                       commitid=None,
                                       mergepoint=None,
                                       branchpoints=set())
                          state = 6
                      elif state == 6:
                          # expecting date, author, state, lines changed
                          match = re_60.match(line)
                          assert match, _('revision must be followed by date line')
                          d = match.group(1)
                          if d[2] == '/':
                              # Y2K
                              d = '19' + d
                          if len(d.split()) != 3:
                              # cvs log dates always in GMT
                              d = d + ' UTC'
                          e.date = util.parsedate(d, ['%y/%m/%d %H:%M:%S',
                                                      '%Y/%m/%d %H:%M:%S',
                                                      '%Y-%m-%d %H:%M:%S'])
                          e.author = scache(match.group(2))
                          e.dead = match.group(3).lower() == 'dead'
                          if match.group(5):
                              if match.group(6):
                                  e.lines = (int(match.group(5)), int(match.group(6)))
                              else:
                                  e.lines = (int(match.group(5)), 0)
                          elif match.group(6):
                              e.lines = (0, int(match.group(6)))
                          else:
                              e.lines = None
                          if match.group(7): # cvs 1.12 commitid
                              e.commitid = match.group(8)
                          if match.group(9): # cvsnt mergepoint
                              myrev = match.group(10).split('.')
                              if len(myrev) == 2: # head
                                  e.mergepoint = 'HEAD'
                              else:
                                  myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
                                  branches = [b for b in branchmap if branchmap[b] == myrev]
                                  assert len(branches) == 1, ('unknown branch: %s'
                                                              % e.mergepoint)
                                  e.mergepoint = branches[0]
                          e.comment = []
                          state = 7
                      elif state == 7:
                          # read the revision numbers of branches that start at this revision
                          # or store the commit log message otherwise
                          m = re_70.match(line)
                          if m:
                              e.branches = [tuple([int(y) for y in x.strip().split('.')])
                                              for x in m.group(1).split(';')]
                              state = 8
                          elif re_31.match(line) and re_50.match(peek):
                              state = 5
                              store = True
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      elif state == 8:
                          # store commit log message
                          if re_31.match(line):
                              cpeek = peek
                              if cpeek.endswith('\n'):
                                  cpeek = cpeek[:-1]
                              if re_50.match(cpeek):
                                  state = 5
                                  store = True
                              else:
                                  e.comment.append(line)
                          elif re_32.match(line):
                              state = 0
                              store = True
                          else:
                              e.comment.append(line)
                      # When a file is added on a branch B1, CVS creates a synthetic
                      # dead trunk revision 1.1 so that the branch has a root.
                      # Likewise, if you merge such a file to a later branch B2 (one
                      # that already existed when the file was added on B1), CVS
                      # creates a synthetic dead revision 1.1.x.1 on B2.  Don't drop
                      # these revisions now, but mark them synthetic so
                      # createchangeset() can take care of them.
                      if (store and
                            e.dead and
                            e.revision[-1] == 1 and      # 1.1 or 1.1.x.1
                            len(e.comment) == 1 and
                            file_added_re.match(e.comment[0])):
                          ui.debug('found synthetic revision in %s: %r\n'
                                   % (e.rcs, e.comment[0]))
                          e.synthetic = True
                      if store:
                          # clean up the results and save in the log.
                          store = False
                          e.tags = sorted([scache(x) for x in tags.get(e.revision, [])])
                          e.comment = scache('\n'.join(e.comment))
                          revn = len(e.revision)
                          if revn > 3 and (revn % 2) == 0:
                              e.branch = tags.get(e.revision[:-1], [None])[0]
                          else:
                              e.branch = None
                          # find the branches starting from this revision
                          branchpoints = set()
                          for branch, revision in branchmap.iteritems():
                              revparts = tuple([int(i) for i in revision.split('.')])
                              if len(revparts) < 2: # bad tags
                                  continue
                              if revparts[-2] == 0 and revparts[-1] % 2 == 0:
                                  # normal branch
                                  if revparts[:-2] == e.revision:
                                      branchpoints.add(branch)
                              elif revparts == (1, 1, 1): # vendor branch
                                  if revparts in e.branches:
                                      branchpoints.add(branch)
                          e.branchpoints = branchpoints
                          log.append(e)
                          rcsmap[e.rcs.replace('/Attic/', '/')] = e.rcs
                          if len(log) % 100 == 0:
                              ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
                  log.sort(key=lambda x: (x.rcs, x.revision))
                  # find parent revisions of individual files
                  versions = {}
                  for e in sorted(oldlog, key=lambda x: (x.rcs, x.revision)):
                      rcs = e.rcs.replace('/Attic/', '/')
                      if rcs in rcsmap:
                          e.rcs = rcsmap[rcs]
                      branch = e.revision[:-1]
                      versions[(e.rcs, branch)] = e.revision
                  for e in log:
                      branch = e.revision[:-1]
                      p = versions.get((e.rcs, branch), None)
                      if p is None:
                          p = e.revision[:-2]
                      e.parent = p
                      versions[(e.rcs, branch)] = e.revision
                  # update the log cache
                  if cache:
                      if log:
                          # join up the old and new logs
                          log.sort(key=lambda x: x.date)
                          if oldlog and oldlog[-1].date >= log[0].date:
                              raise logerror(_('log cache overlaps with new log entries,'
                                               ' re-run without cache.'))
                          log = oldlog + log
                          # write the new cachefile
                          ui.note(_('writing cvs log cache %s\n') % cachefile)
                          pickle.dump(log, open(cachefile, 'wb'))
                      else:
                          log = oldlog
                  ui.status(_('%d log entries\n') % len(log))
                  encodings = ui.configlist('convert', 'cvsps.logencoding')
                  if encodings:
                      def revstr(r):
                          # this is needed, because logentry.revision is a tuple of "int"
                          # (e.g. (1, 2) for "1.2")
                          return '.'.join(pycompat.maplist(pycompat.bytestr, r))
                      for entry in log:
                          comment = entry.comment
                          for e in encodings:
                              try:
                                  entry.comment = comment.decode(e).encode('utf-8')
                                  if ui.debugflag:
                                      ui.debug("transcoding by %s: %s of %s\n" %
                                               (e, revstr(entry.revision), entry.file))
                                  break
                              except UnicodeDecodeError:
                                  pass # try next encoding
                              except LookupError as inst: # unknown encoding, maybe
                                  raise error.Abort(inst,
                                                    hint=_('check convert.cvsps.logencoding'
                                                           ' configuration'))
                          else:
                              raise error.Abort(_("no encoding can transcode"
                                                  " CVS log message for %s of %s")
                                                % (revstr(entry.revision), entry.file),
                                                hint=_('check convert.cvsps.logencoding'
                                                       ' configuration'))
                  hook.hook(ui, None, "cvslog", True, log=log)
                  return log
              class changeset(object):
                  '''Class changeset has the following attributes:
                      .id        - integer identifying this changeset (list index)
                      .author    - author name as CVS knows it
                      .branch    - name of branch this changeset is on, or None
                      .comment   - commit message
                      .commitid  - CVS commitid or None
                      .date      - the commit date as a (time,tz) tuple
                      .entries   - list of logentry objects in this changeset
                      .parents   - list of one or two parent changesets
                      .tags      - list of tags on this changeset
                      .synthetic - from synthetic revision "file ... added on branch ..."
                      .mergepoint- the branch that has been merged from or None
                      .branchpoints- the branches that start at the current entry or empty
                  '''
                  def __init__(self, **entries):
                      self.id = None
                      self.synthetic = False
                      self.__dict__.update(entries)
                  def __repr__(self):
                      items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
                      return "%s(%s)"%(type(self).__name__, ", ".join(items))
              def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
                  '''Convert log into changesets.'''
                  ui.status(_('creating changesets\n'))
                  # try to order commitids by date
                  mindate = {}
                  for e in log:
                      if e.commitid:
                          mindate[e.commitid] = min(e.date, mindate.get(e.commitid))
                  # Merge changesets
                  log.sort(key=lambda x: (mindate.get(x.commitid), x.commitid, x.comment,
                                          x.author, x.branch, x.date, x.branchpoints))
                  changesets = []
                  files = set()
                  c = None
                  for i, e in enumerate(log):
                      # Check if log entry belongs to the current changeset or not.
                      # Since CVS is file-centric, two different file revisions with
                      # different branchpoints should be treated as belonging to two
                      # different changesets (and the ordering is important and not
                      # honoured by cvsps at this point).
                      #
                      # Consider the following case:
                      # foo 1.1 branchpoints: [MYBRANCH]
                      # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
                      #
                      # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
                      # later version of foo may be in MYBRANCH2, so foo should be the
                      # first changeset and bar the next and MYBRANCH and MYBRANCH2
                      # should both start off of the bar changeset. No provisions are
                      # made to ensure that this is, in fact, what happens.
                      if not (c and e.branchpoints == c.branchpoints and
                              (# cvs commitids
                               (e.commitid is not None and e.commitid == c.commitid) or
                               (# no commitids, use fuzzy commit detection
                                (e.commitid is None or c.commitid is None) and
                                 e.comment == c.comment and
                                 e.author == c.author and
                                 e.branch == c.branch and
                                 ((c.date[0] + c.date[1]) <=
                                  (e.date[0] + e.date[1]) <=
                                  (c.date[0] + c.date[1]) + fuzz) and
                                 e.file not in files))):
                          c = changeset(comment=e.comment, author=e.author,
                                        branch=e.branch, date=e.date,
                                        entries=[], mergepoint=e.mergepoint,
                                        branchpoints=e.branchpoints, commitid=e.commitid)
                          changesets.append(c)
                          files = set()
                          if len(changesets) % 100 == 0:
                              t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
                              ui.status(util.ellipsis(t, 80) + '\n')
                      c.entries.append(e)
                      files.add(e.file)
                      c.date = e.date       # changeset date is date of latest commit in it
                  # Mark synthetic changesets
                  for c in changesets:
                      # Synthetic revisions always get their own changeset, because
                      # the log message includes the filename.  E.g. if you add file3
                      # and file4 on a branch, you get four log entries and three
                      # changesets:
                      #   "File file3 was added on branch ..." (synthetic, 1 entry)
                      #   "File file4 was added on branch ..." (synthetic, 1 entry)
                      #   "Add file3 and file4 to fix ..."     (real, 2 entries)
                      # Hence the check for 1 entry here.
                      c.synthetic = len(c.entries) == 1 and c.entries[0].synthetic
                  # Sort files in each changeset
                  def entitycompare(l, r):
                      'Mimic cvsps sorting order'
                      l = l.file.split('/')
                      r = r.file.split('/')
                      nl = len(l)
                      nr = len(r)
                      n = min(nl, nr)
                      for i in range(n):
                          if i + 1 == nl and nl < nr:
                              return -1
                          elif i + 1 == nr and nl > nr:
                              return +1
                          elif l[i] < r[i]:
                              return -1
                          elif l[i] > r[i]:
                              return +1
                      return 0
                  for c in changesets:
                      c.entries.sort(entitycompare)
                  # Sort changesets by date
                  odd = set()
                  def cscmp(l, r):
                      d = sum(l.date) - sum(r.date)
                      if d:
                          return d
                      # detect vendor branches and initial commits on a branch
                      le = {}
                      for e in l.entries:
                          le[e.rcs] = e.revision
                      re = {}
                      for e in r.entries:
                          re[e.rcs] = e.revision
                      d = 0
                      for e in l.entries:
                          if re.get(e.rcs, None) == e.parent:
                              assert not d
                              d = 1
                              break
                      for e in r.entries:
                          if le.get(e.rcs, None) == e.parent:
                              if d:
                                  odd.add((l, r))
                              d = -1
                              break
                      # By this point, the changesets are sufficiently compared that
                      # we don't really care about ordering. However, this leaves
                      # some race conditions in the tests, so we compare on the
                      # number of files modified, the files contained in each
                      # changeset, and the branchpoints in the change to ensure test
                      # output remains stable.
                      # recommended replacement for cmp from
                      # https://docs.python.org/3.0/whatsnew/3.0.html
                      c = lambda x, y: (x > y) - (x < y)
                      # Sort bigger changes first.
                      if not d:
                          d = c(len(l.entries), len(r.entries))
                      # Try sorting by filename in the change.
                      if not d:
                          d = c([e.file for e in l.entries], [e.file for e in r.entries])
                      # Try and put changes without a branch point before ones with
                      # a branch point.
                      if not d:
                          d = c(len(l.branchpoints), len(r.branchpoints))
                      return d
                  changesets.sort(cscmp)
                  # Collect tags
                  globaltags = {}
                  for c in changesets:
                      for e in c.entries:
                          for tag in e.tags:
                              # remember which is the latest changeset to have this tag
                              globaltags[tag] = c
                  for c in changesets:
                      tags = set()
                      for e in c.entries:
                          tags.update(e.tags)
                      # remember tags only if this is the latest changeset to have it
                      c.tags = sorted(tag for tag in tags if globaltags[tag] is c)
                  # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
                  # by inserting dummy changesets with two parents, and handle
                  # {{mergefrombranch BRANCHNAME}} by setting two parents.
                  if mergeto is None:
                      mergeto = r'{{mergetobranch ([-\w]+)}}'
                  if mergeto:
                      mergeto = re.compile(mergeto)
                  if mergefrom is None:
                      mergefrom = r'{{mergefrombranch ([-\w]+)}}'
                  if mergefrom:
                      mergefrom = re.compile(mergefrom)
                  versions = {}    # changeset index where we saw any particular file version
                  branches = {}    # changeset index where we saw a branch
                  n = len(changesets)
                  i = 0
                  while i < n:
                      c = changesets[i]
                      for f in c.entries:
                          versions[(f.rcs, f.revision)] = i
                      p = None
                      if c.branch in branches:
                          p = branches[c.branch]
                      else:
                          # first changeset on a new branch
                          # the parent is a changeset with the branch in its
                          # branchpoints such that it is the latest possible
                          # commit without any intervening, unrelated commits.
                          for candidate in xrange(i):
                              if c.branch not in changesets[candidate].branchpoints:
                                  if p is not None:
                                      break
                                  continue
                              p = candidate
                      c.parents = []
                      if p is not None:
                          p = changesets[p]
                          # Ensure no changeset has a synthetic changeset as a parent.
                          while p.synthetic:
                              assert len(p.parents) <= 1, \
                                     _('synthetic changeset cannot have multiple parents')
                              if p.parents:
                                  p = p.parents[0]
                              else:
                                  p = None
                                  break
                          if p is not None:
                              c.parents.append(p)
                      if c.mergepoint:
                          if c.mergepoint == 'HEAD':
                              c.mergepoint = None
                          c.parents.append(changesets[branches[c.mergepoint]])
                      if mergefrom:
                          m = mergefrom.search(c.comment)
                          if m:
                              m = m.group(1)
                              if m == 'HEAD':
                                  m = None
                              try:
                                  candidate = changesets[branches[m]]
                              except KeyError:
                                  ui.warn(_("warning: CVS commit message references "
                                            "non-existent branch %r:\n%s\n")
                                          % (m, c.comment))
                              if m in branches and c.branch != m and not candidate.synthetic:
                                  c.parents.append(candidate)
                      if mergeto:
                          m = mergeto.search(c.comment)
                          if m:
                              if m.groups():
                                  m = m.group(1)
                                  if m == 'HEAD':
                                      m = None
                              else:
                                  m = None   # if no group found then merge to HEAD
                              if m in branches and c.branch != m:
                                  # insert empty changeset for merge
                                  cc = changeset(
                                      author=c.author, branch=m, date=c.date,
                                      comment='convert-repo: CVS merge from branch %s'
                                      % c.branch,
                                      entries=[], tags=[],
                                      parents=[changesets[branches[m]], c])
                                  changesets.insert(i + 1, cc)
                                  branches[m] = i + 1
                                  # adjust our loop counters now we have inserted a new entry
                                  n += 1
                                  i += 2
                                  continue
                      branches[c.branch] = i
                      i += 1
                  # Drop synthetic changesets (safe now that we have ensured no other
                  # changesets can have them as parents).
                  i = 0
                  while i < len(changesets):
                      if changesets[i].synthetic:
                          del changesets[i]
                      else:
                          i += 1
                  # Number changesets
                  for i, c in enumerate(changesets):
                      c.id = i + 1
                  if odd:
                      for l, r in odd:
                          if l.id is not None and r.id is not None:
                              ui.warn(_('changeset %d is both before and after %d\n')
                                      % (l.id, r.id))
                  ui.status(_('%d changeset entries\n') % len(changesets))
                  hook.hook(ui, None, "cvschangesets", True, changesets=changesets)
                  return changesets
              def debugcvsps(ui, *args, **opts):
                  '''Read CVS rlog for current directory or named path in
                  repository, and convert the log to changesets based on matching
                  commit log entries and dates.
                  '''
                  opts = pycompat.byteskwargs(opts)
                  if opts["new_cache"]:
                      cache = "write"
                  elif opts["update_cache"]:
                      cache = "update"
                  else:
                      cache = None
                  revisions = opts["revisions"]
                  try:
                      if args:
                          log = []
                          for d in args:
                              log += createlog(ui, d, root=opts["root"], cache=cache)
                      else:
                          log = createlog(ui, root=opts["root"], cache=cache)
                  except logerror as e:
                      ui.write("%r\n"%e)
                      return
                  changesets = createchangeset(ui, log, opts["fuzz"])
                  del log
                  # Print changesets (optionally filtered)
                  off = len(revisions)
                  branches = {}    # latest version number in each branch
                  ancestors = {}   # parent branch
                  for cs in changesets:
                      if opts["ancestors"]:
                          if cs.branch not in branches and cs.parents and cs.parents[0].id:
                              ancestors[cs.branch] = (changesets[cs.parents[0].id - 1].branch,
                                                      cs.parents[0].id)
                          branches[cs.branch] = cs.id
                      # limit by branches
                      if opts["branches"] and (cs.branch or 'HEAD') not in opts["branches"]:
                          continue
                      if not off:
                          # Note: trailing spaces on several lines here are needed to have
                          #       bug-for-bug compatibility with cvsps.
                          ui.write('---------------------\n')
                          ui.write(('PatchSet %d \n' % cs.id))
                          ui.write(('Date: %s\n' % util.datestr(cs.date,
                                                               '%Y/%m/%d %H:%M:%S %1%2')))
                          ui.write(('Author: %s\n' % cs.author))
                          ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
                          ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
                                                ','.join(cs.tags) or '(none)')))
                          if cs.branchpoints:
                              ui.write(('Branchpoints: %s \n') %
                                       ', '.join(sorted(cs.branchpoints)))
                          if opts["parents"] and cs.parents:
                              if len(cs.parents) > 1:
                                  ui.write(('Parents: %s\n' %
                                           (','.join([str(p.id) for p in cs.parents]))))
                              else:
                                  ui.write(('Parent: %d\n' % cs.parents[0].id))
                          if opts["ancestors"]:
                              b = cs.branch
                              r = []
                              while b:
                                  b, c = ancestors[b]
                                  r.append('%s:%d:%d' % (b or "HEAD", c, branches[b]))
                              if r:
                                  ui.write(('Ancestors: %s\n' % (','.join(r))))
                          ui.write(('Log:\n'))
                          ui.write('%s\n\n' % cs.comment)
                          ui.write(('Members: \n'))
                          for f in cs.entries:
                              fn = f.file
                              if fn.startswith(opts["prefix"]):
                                  fn = fn[len(opts["prefix"]):]
                              ui.write('\t%s:%s->%s%s \n' % (
                                      fn, '.'.join([str(x) for x in f.parent]) or 'INITIAL',
                                      '.'.join([str(x) for x in f.revision]),
                                      ['', '(DEAD)'][f.dead]))
                          ui.write('\n')
                      # have we seen the start tag?
                      if revisions and off:
                          if revisions[0] == str(cs.id) or \
                              revisions[0] in cs.tags:
                              off = False
                      # see if we reached the end tag
                      if len(revisions) > 1 and not off:
                          if revisions[1] == str(cs.id) or \
                              revisions[1] in cs.tags:
                              break

hgext/convert/subversion.py

0 +1 -1

              # Subversion 1.4/1.5 Python API backend
              #
              # Copyright(C) 2007 Daniel Holth et al
              from __future__ import absolute_import
              import os
              import re
              import tempfile
              import xml.dom.minidom
              from mercurial.i18n import _
              from mercurial import (
                  encoding,
                  error,
                  pycompat,
                  util,
                  vfs as vfsmod,
              )
              from . import common
              pickle = util.pickle
              stringio = util.stringio
              propertycache = util.propertycache
              urlerr = util.urlerr
              urlreq = util.urlreq
              commandline = common.commandline
              commit = common.commit
              converter_sink = common.converter_sink
              converter_source = common.converter_source
              decodeargs = common.decodeargs
              encodeargs = common.encodeargs
              makedatetimestamp = common.makedatetimestamp
              mapfile = common.mapfile
              MissingTool = common.MissingTool
              NoRepo = common.NoRepo
              # Subversion stuff. Works best with very recent Python SVN bindings
              # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
              # these bindings.
              try:
                  import svn
                  import svn.client
                  import svn.core
                  import svn.ra
                  import svn.delta
                  from . import transport
                  import warnings
                  warnings.filterwarnings('ignore',
                          module='svn.core',
                          category=DeprecationWarning)
                  svn.core.SubversionException # trigger import to catch error
              except ImportError:
                  svn = None
              class SvnPathNotFound(Exception):
                  pass
              def revsplit(rev):
                  """Parse a revision string and return (uuid, path, revnum).
                  >>> revsplit(b'svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
                  ...          b'/proj%20B/mytrunk/mytrunk@1')
                  ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
                  >>> revsplit(b'svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
                  ('', '', 1)
                  >>> revsplit(b'@7')
                  ('', '', 7)
                  >>> revsplit(b'7')
                  ('', '', 0)
                  >>> revsplit(b'bad')
                  ('', '', 0)
                  """
                  parts = rev.rsplit('@', 1)
                  revnum = 0
                  if len(parts) > 1:
                      revnum = int(parts[1])
                  parts = parts[0].split('/', 1)
                  uuid = ''
                  mod = ''
                  if len(parts) > 1 and parts[0].startswith('svn:'):
                      uuid = parts[0][4:]
                      mod = '/' + parts[1]
                  return uuid, mod, revnum
              def quote(s):
                  # As of svn 1.7, many svn calls expect "canonical" paths. In
                  # theory, we should call svn.core.*canonicalize() on all paths
                  # before passing them to the API.  Instead, we assume the base url
                  # is canonical and copy the behaviour of svn URL encoding function
                  # so we can extend it safely with new components. The "safe"
                  # characters were taken from the "svn_uri__char_validity" table in
                  # libsvn_subr/path.c.
                  return urlreq.quote(s, "!$&'()*+,-./:=@_~")
              def geturl(path):
                  try:
                      return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
                  except svn.core.SubversionException:
                      # svn.client.url_from_path() fails with local repositories
                      pass
                  if os.path.isdir(path):
                      path = os.path.normpath(os.path.abspath(path))
                      if pycompat.iswindows:
                          path = '/' + util.normpath(path)
                      # Module URL is later compared with the repository URL returned
                      # by svn API, which is UTF-8.
                      path = encoding.tolocal(path)
                      path = 'file://%s' % quote(path)
                  return svn.core.svn_path_canonicalize(path)
              def optrev(number):
                  optrev = svn.core.svn_opt_revision_t()
                  optrev.kind = svn.core.svn_opt_revision_number
                  optrev.value.number = number
                  return optrev
              class changedpath(object):
                  def __init__(self, p):
                      self.copyfrom_path = p.copyfrom_path
                      self.copyfrom_rev = p.copyfrom_rev
                      self.action = p.action
              def get_log_child(fp, url, paths, start, end, limit=0,
                                discover_changed_paths=True, strict_node_history=False):
                  protocol = -1
                  def receiver(orig_paths, revnum, author, date, message, pool):
                      paths = {}
                      if orig_paths is not None:
                          for k, v in orig_paths.iteritems():
                              paths[k] = changedpath(v)
                      pickle.dump((paths, revnum, author, date, message),
                                  fp, protocol)
                  try:
                      # Use an ra of our own so that our parent can consume
                      # our results without confusing the server.
                      t = transport.SvnRaTransport(url=url)
                      svn.ra.get_log(t.ra, paths, start, end, limit,
                                     discover_changed_paths,
                                     strict_node_history,
                                     receiver)
                  except IOError:
                      # Caller may interrupt the iteration
                      pickle.dump(None, fp, protocol)
                  except Exception as inst:
                      pickle.dump(str(inst), fp, protocol)
                  else:
                      pickle.dump(None, fp, protocol)
                  fp.close()
                  # With large history, cleanup process goes crazy and suddenly
                  # consumes *huge* amount of memory. The output file being closed,
                  # there is no need for clean termination.
                  os._exit(0)
              def debugsvnlog(ui, **opts):
                  """Fetch SVN log in a subprocess and channel them back to parent to
                  avoid memory collection issues.
                  """
                  if svn is None:
                      raise error.Abort(_('debugsvnlog could not load Subversion python '
                                         'bindings'))
                  args = decodeargs(ui.fin.read())
                  get_log_child(ui.fout, *args)
              class logstream(object):
                  """Interruptible revision log iterator."""
                  def __init__(self, stdout):
                      self._stdout = stdout
                  def __iter__(self):
                      while True:
                          try:
                              entry = pickle.load(self._stdout)
                          except EOFError:
                              raise error.Abort(_('Mercurial failed to run itself, check'
                                                 ' hg executable is in PATH'))
                          try:
                              orig_paths, revnum, author, date, message = entry
                          except (TypeError, ValueError):
                              if entry is None:
                                  break
                              raise error.Abort(_("log stream exception '%s'") % entry)
                          yield entry
                  def close(self):
                      if self._stdout:
                          self._stdout.close()
                          self._stdout = None
              class directlogstream(list):
                  """Direct revision log iterator.
                  This can be used for debugging and development but it will probably leak
                  memory and is not suitable for real conversions."""
                  def __init__(self, url, paths, start, end, limit=0,
                                discover_changed_paths=True, strict_node_history=False):
                      def receiver(orig_paths, revnum, author, date, message, pool):
                          paths = {}
                          if orig_paths is not None:
                              for k, v in orig_paths.iteritems():
                                  paths[k] = changedpath(v)
                          self.append((paths, revnum, author, date, message))
                      # Use an ra of our own so that our parent can consume
                      # our results without confusing the server.
                      t = transport.SvnRaTransport(url=url)
                      svn.ra.get_log(t.ra, paths, start, end, limit,
                                     discover_changed_paths,
                                     strict_node_history,
                                     receiver)
                  def close(self):
                      pass
              # Check to see if the given path is a local Subversion repo. Verify this by
              # looking for several svn-specific files and directories in the given
              # directory.
              def filecheck(ui, path, proto):
                  for x in ('locks', 'hooks', 'format', 'db'):
                      if not os.path.exists(os.path.join(path, x)):
                          return False
                  return True
              # Check to see if a given path is the root of an svn repo over http. We verify
              # this by requesting a version-controlled URL we know can't exist and looking
              # for the svn-specific "not found" XML.
              def httpcheck(ui, path, proto):
                  try:
                      opener = urlreq.buildopener()
                      rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path), 'rb')
                      data = rsp.read()
                  except urlerr.httperror as inst:
                      if inst.code != 404:
                          # Except for 404 we cannot know for sure this is not an svn repo
                          ui.warn(_('svn: cannot probe remote repository, assume it could '
                                    'be a subversion repository. Use --source-type if you '
                                    'know better.\n'))
                          return True
                      data = inst.fp.read()
                  except Exception:
                      # Could be urlerr.urlerror if the URL is invalid or anything else.
                      return False
                  return '<m:human-readable errcode="160013">' in data
              protomap = {'http': httpcheck,
                          'https': httpcheck,
                          'file': filecheck,
                          }
              def issvnurl(ui, url):
                  try:
                      proto, path = url.split('://', 1)
                      if proto == 'file':
                          if (pycompat.iswindows and path[:1] == '/'
                                and path[1:2].isalpha() and path[2:6].lower() == '%3a/'):
                              path = path[:2] + ':/' + path[6:]
                          path = urlreq.url2pathname(path)
                  except ValueError:
                      proto = 'file'
                      path = os.path.abspath(url)
                  if proto == 'file':
                      path = util.pconvert(path)
                  check = protomap.get(proto, lambda *args: False)
                  while '/' in path:
                      if check(ui, path, proto):
                          return True
                      path = path.rsplit('/', 1)[0]
                  return False
              # SVN conversion code stolen from bzr-svn and tailor
              #
              # Subversion looks like a versioned filesystem, branches structures
              # are defined by conventions and not enforced by the tool. First,
              # we define the potential branches (modules) as "trunk" and "branches"
              # children directories. Revisions are then identified by their
              # module and revision number (and a repository identifier).
              #
              # The revision graph is really a tree (or a forest). By default, a
              # revision parent is the previous revision in the same module. If the
              # module directory is copied/moved from another module then the
              # revision is the module root and its parent the source revision in
              # the parent module. A revision has at most one parent.
              #
              class svn_source(converter_source):
                  def __init__(self, ui, repotype, url, revs=None):
                      super(svn_source, self).__init__(ui, repotype, url, revs=revs)
                      if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
                              (os.path.exists(url) and
                               os.path.exists(os.path.join(url, '.svn'))) or
                              issvnurl(ui, url)):
                          raise NoRepo(_("%s does not look like a Subversion repository")
                                       % url)
                      if svn is None:
                          raise MissingTool(_('could not load Subversion python bindings'))
                      try:
                          version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
                          if version < (1, 4):
                              raise MissingTool(_('Subversion python bindings %d.%d found, '
                                                  '1.4 or later required') % version)
                      except AttributeError:
                          raise MissingTool(_('Subversion python bindings are too old, 1.4 '
                                              'or later required'))
                      self.lastrevs = {}
                      latest = None
                      try:
                          # Support file://path@rev syntax. Useful e.g. to convert
                          # deleted branches.
                          at = url.rfind('@')
                          if at >= 0:
                              latest = int(url[at + 1:])
                              url = url[:at]
                      except ValueError:
                          pass
                      self.url = geturl(url)
                      self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
                      try:
                          self.transport = transport.SvnRaTransport(url=self.url)
                          self.ra = self.transport.ra
                          self.ctx = self.transport.client
                          self.baseurl = svn.ra.get_repos_root(self.ra)
                          # Module is either empty or a repository path starting with
                          # a slash and not ending with a slash.
                          self.module = urlreq.unquote(self.url[len(self.baseurl):])
                          self.prevmodule = None
                          self.rootmodule = self.module
                          self.commits = {}
                          self.paths = {}
                          self.uuid = svn.ra.get_uuid(self.ra)
                      except svn.core.SubversionException:
                          ui.traceback()
                          svnversion = '%d.%d.%d' % (svn.core.SVN_VER_MAJOR,
                                                     svn.core.SVN_VER_MINOR,
                                                     svn.core.SVN_VER_MICRO)
                          raise NoRepo(_("%s does not look like a Subversion repository "
                                         "to libsvn version %s")
                                       % (self.url, svnversion))
                      if revs:
                          if len(revs) > 1:
                              raise error.Abort(_('subversion source does not support '
                                                 'specifying multiple revisions'))
                          try:
                              latest = int(revs[0])
                          except ValueError:
                              raise error.Abort(_('svn: revision %s is not an integer') %
                                               revs[0])
                      trunkcfg = self.ui.config('convert', 'svn.trunk')
                      if trunkcfg is None:
                          trunkcfg = 'trunk'
                      self.trunkname = trunkcfg.strip('/')
                      self.startrev = self.ui.config('convert', 'svn.startrev')
                      try:
                          self.startrev = int(self.startrev)
                          if self.startrev < 0:
                              self.startrev = 0
                      except ValueError:
                          raise error.Abort(_('svn: start revision %s is not an integer')
                                           % self.startrev)
                      try:
                          self.head = self.latest(self.module, latest)
                      except SvnPathNotFound:
                          self.head = None
                      if not self.head:
                          raise error.Abort(_('no revision found in module %s')
                                           % self.module)
                      self.last_changed = self.revnum(self.head)
                      self._changescache = (None, None)
                      if os.path.exists(os.path.join(url, '.svn/entries')):
                          self.wc = url
                      else:
                          self.wc = None
                      self.convertfp = None
                  def setrevmap(self, revmap):
                      lastrevs = {}
                      for revid in revmap:
                          uuid, module, revnum = revsplit(revid)
                          lastrevnum = lastrevs.setdefault(module, revnum)
                          if revnum > lastrevnum:
                              lastrevs[module] = revnum
                      self.lastrevs = lastrevs
                  def exists(self, path, optrev):
                      try:
                          svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
                                               optrev, False, self.ctx)
                          return True
                      except svn.core.SubversionException:
                          return False
                  def getheads(self):
                      def isdir(path, revnum):
                          kind = self._checkpath(path, revnum)
                          return kind == svn.core.svn_node_dir
                      def getcfgpath(name, rev):
                          cfgpath = self.ui.config('convert', 'svn.' + name)
                          if cfgpath is not None and cfgpath.strip() == '':
                              return None
                          path = (cfgpath or name).strip('/')
                          if not self.exists(path, rev):
                              if self.module.endswith(path) and name == 'trunk':
                                  # we are converting from inside this directory
                                  return None
                              if cfgpath:
                                  raise error.Abort(_('expected %s to be at %r, but not found'
                                                     ) % (name, path))
                              return None
                          self.ui.note(_('found %s at %r\n') % (name, path))
                          return path
                      rev = optrev(self.last_changed)
                      oldmodule = ''
                      trunk = getcfgpath('trunk', rev)
                      self.tags = getcfgpath('tags', rev)
                      branches = getcfgpath('branches', rev)
                      # If the project has a trunk or branches, we will extract heads
                      # from them. We keep the project root otherwise.
                      if trunk:
                          oldmodule = self.module or ''
                          self.module += '/' + trunk
                          self.head = self.latest(self.module, self.last_changed)
                          if not self.head:
                              raise error.Abort(_('no revision found in module %s')
                                               % self.module)
                      # First head in the list is the module's head
                      self.heads = [self.head]
                      if self.tags is not None:
                          self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
                      # Check if branches bring a few more heads to the list
                      if branches:
                          rpath = self.url.strip('/')
                          branchnames = svn.client.ls(rpath + '/' + quote(branches),
                                                      rev, False, self.ctx)
                          for branch in sorted(branchnames):
                              module = '%s/%s/%s' % (oldmodule, branches, branch)
                              if not isdir(module, self.last_changed):
                                  continue
                              brevid = self.latest(module, self.last_changed)
                              if not brevid:
                                  self.ui.note(_('ignoring empty branch %s\n') % branch)
                                  continue
                              self.ui.note(_('found branch %s at %d\n') %
                                           (branch, self.revnum(brevid)))
                              self.heads.append(brevid)
                      if self.startrev and self.heads:
                          if len(self.heads) > 1:
                              raise error.Abort(_('svn: start revision is not supported '
                                                 'with more than one branch'))
                          revnum = self.revnum(self.heads[0])
                          if revnum < self.startrev:
                              raise error.Abort(
                                  _('svn: no revision found after start revision %d')
                                               % self.startrev)
                      return self.heads
                  def _getchanges(self, rev, full):
                      (paths, parents) = self.paths[rev]
                      copies = {}
                      if parents:
                          files, self.removed, copies = self.expandpaths(rev, paths, parents)
                      if full or not parents:
                          # Perform a full checkout on roots
                          uuid, module, revnum = revsplit(rev)
                          entries = svn.client.ls(self.baseurl + quote(module),
                                                  optrev(revnum), True, self.ctx)
                          files = [n for n, e in entries.iteritems()
                                   if e.kind == svn.core.svn_node_file]
                          self.removed = set()
                      files.sort()
                      files = zip(files, [rev] * len(files))
                      return (files, copies)
                  def getchanges(self, rev, full):
                      # reuse cache from getchangedfiles
                      if self._changescache[0] == rev and not full:
                          (files, copies) = self._changescache[1]
                      else:
                          (files, copies) = self._getchanges(rev, full)
                          # caller caches the result, so free it here to release memory
                          del self.paths[rev]
                      return (files, copies, set())
                  def getchangedfiles(self, rev, i):
                      # called from filemap - cache computed values for reuse in getchanges
                      (files, copies) = self._getchanges(rev, False)
                      self._changescache = (rev, (files, copies))
                      return [f[0] for f in files]
                  def getcommit(self, rev):
                      if rev not in self.commits:
                          uuid, module, revnum = revsplit(rev)
                          self.module = module
                          self.reparent(module)
                          # We assume that:
                          # - requests for revisions after "stop" come from the
                          # revision graph backward traversal. Cache all of them
                          # down to stop, they will be used eventually.
                          # - requests for revisions before "stop" come to get
                          # isolated branches parents. Just fetch what is needed.
                          stop = self.lastrevs.get(module, 0)
                          if revnum < stop:
                              stop = revnum + 1
                          self._fetch_revisions(revnum, stop)
                          if rev not in self.commits:
                              raise error.Abort(_('svn: revision %s not found') % revnum)
                      revcommit = self.commits[rev]
                      # caller caches the result, so free it here to release memory
                      del self.commits[rev]
                      return revcommit
                  def checkrevformat(self, revstr, mapname='splicemap'):
                      """ fails if revision format does not match the correct format"""
                      if not re.match(r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
                                            r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
                                            r'{12,12}(.*)\@[0-9]+$',revstr):
                          raise error.Abort(_('%s entry %s is not a valid revision'
                                             ' identifier') % (mapname, revstr))
                  def numcommits(self):
                      return int(self.head.rsplit('@', 1)[1]) - self.startrev
                  def gettags(self):
                      tags = {}
                      if self.tags is None:
                          return tags
                      # svn tags are just a convention, project branches left in a
                      # 'tags' directory. There is no other relationship than
                      # ancestry, which is expensive to discover and makes them hard
                      # to update incrementally.  Worse, past revisions may be
                      # referenced by tags far away in the future, requiring a deep
                      # history traversal on every calculation.  Current code
                      # performs a single backward traversal, tracking moves within
                      # the tags directory (tag renaming) and recording a new tag
                      # everytime a project is copied from outside the tags
                      # directory. It also lists deleted tags, this behaviour may
                      # change in the future.
                      pendings = []
                      tagspath = self.tags
                      start = svn.ra.get_latest_revnum(self.ra)
                      stream = self._getlog([self.tags], start, self.startrev)
                      try:
                          for entry in stream:
                              origpaths, revnum, author, date, message = entry
                              if not origpaths:
                                  origpaths = []
                              copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
                                        in origpaths.iteritems() if e.copyfrom_path]
                              # Apply moves/copies from more specific to general
                              copies.sort(reverse=True)
                              srctagspath = tagspath
                              if copies and copies[-1][2] == tagspath:
                                  # Track tags directory moves
                                  srctagspath = copies.pop()[0]
                              for source, sourcerev, dest in copies:
                                  if not dest.startswith(tagspath + '/'):
                                      continue
                                  for tag in pendings:
                                      if tag[0].startswith(dest):
                                          tagpath = source + tag[0][len(dest):]
                                          tag[:2] = [tagpath, sourcerev]
                                          break
                                  else:
                                      pendings.append([source, sourcerev, dest])
                              # Filter out tags with children coming from different
                              # parts of the repository like:
                              # /tags/tag.1 (from /trunk:10)
                              # /tags/tag.1/foo (from /branches/foo:12)
                              # Here/tags/tag.1 discarded as well as its children.
                              # It happens with tools like cvs2svn. Such tags cannot
                              # be represented in mercurial.
                              addeds = dict((p, e.copyfrom_path) for p, e
                                            in origpaths.iteritems()
                                            if e.action == 'A' and e.copyfrom_path)
                              badroots = set()
                              for destroot in addeds:
                                  for source, sourcerev, dest in pendings:
                                      if (not dest.startswith(destroot + '/')
                                          or source.startswith(addeds[destroot] + '/')):
                                          continue
                                      badroots.add(destroot)
                                      break
                              for badroot in badroots:
                                  pendings = [p for p in pendings if p[2] != badroot
                                              and not p[2].startswith(badroot + '/')]
                              # Tell tag renamings from tag creations
                              renamings = []
                              for source, sourcerev, dest in pendings:
                                  tagname = dest.split('/')[-1]
                                  if source.startswith(srctagspath):
                                      renamings.append([source, sourcerev, tagname])
                                      continue
                                  if tagname in tags:
                                      # Keep the latest tag value
                                      continue
                                  # From revision may be fake, get one with changes
                                  try:
                                      tagid = self.latest(source, sourcerev)
                                      if tagid and tagname not in tags:
                                          tags[tagname] = tagid
                                  except SvnPathNotFound:
                                      # It happens when we are following directories
                                      # we assumed were copied with their parents
                                      # but were really created in the tag
                                      # directory.
                                      pass
                              pendings = renamings
                              tagspath = srctagspath
                      finally:
                          stream.close()
                      return tags
                  def converted(self, rev, destrev):
                      if not self.wc:
                          return
                      if self.convertfp is None:
                          self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
                                                'ab')
                      self.convertfp.write(util.tonativeeol('%s %d\n'
                                                            % (destrev, self.revnum(rev))))
                      self.convertfp.flush()
                  def revid(self, revnum, module=None):
                      return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
                  def revnum(self, rev):
                      return int(rev.split('@')[-1])
                  def latest(self, path, stop=None):
                      """Find the latest revid affecting path, up to stop revision
                      number. If stop is None, default to repository latest
                      revision. It may return a revision in a different module,
                      since a branch may be moved without a change being
                      reported. Return None if computed module does not belong to
                      rootmodule subtree.
                      """
                      def findchanges(path, start, stop=None):
                          stream = self._getlog([path], start, stop or 1)
                          try:
                              for entry in stream:
                                  paths, revnum, author, date, message = entry
                                  if stop is None and paths:
                                      # We do not know the latest changed revision,
                                      # keep the first one with changed paths.
                                      break
                                  if revnum <= stop:
                                      break
                                  for p in paths:
                                      if (not path.startswith(p) or
                                          not paths[p].copyfrom_path):
                                          continue
                                      newpath = paths[p].copyfrom_path + path[len(p):]
                                      self.ui.debug("branch renamed from %s to %s at %d\n" %
                                                    (path, newpath, revnum))
                                      path = newpath
                                      break
                              if not paths:
                                  revnum = None
                              return revnum, path
                          finally:
                              stream.close()
                      if not path.startswith(self.rootmodule):
                          # Requests on foreign branches may be forbidden at server level
                          self.ui.debug('ignoring foreign branch %r\n' % path)
                          return None
                      if stop is None:
                          stop = svn.ra.get_latest_revnum(self.ra)
                      try:
                          prevmodule = self.reparent('')
                          dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
                          self.reparent(prevmodule)
                      except svn.core.SubversionException:
                          dirent = None
                      if not dirent:
                          raise SvnPathNotFound(_('%s not found up to revision %d')
                                                % (path, stop))
                      # stat() gives us the previous revision on this line of
                      # development, but it might be in *another module*. Fetch the
                      # log and detect renames down to the latest revision.
                      revnum, realpath = findchanges(path, stop, dirent.created_rev)
                      if revnum is None:
                          # Tools like svnsync can create empty revision, when
                          # synchronizing only a subtree for instance. These empty
                          # revisions created_rev still have their original values
                          # despite all changes having disappeared and can be
                          # returned by ra.stat(), at least when stating the root
                          # module. In that case, do not trust created_rev and scan
                          # the whole history.
                          revnum, realpath = findchanges(path, stop)
                          if revnum is None:
                              self.ui.debug('ignoring empty branch %r\n' % realpath)
                              return None
                      if not realpath.startswith(self.rootmodule):
                          self.ui.debug('ignoring foreign branch %r\n' % realpath)
                          return None
                      return self.revid(revnum, realpath)
                  def reparent(self, module):
                      """Reparent the svn transport and return the previous parent."""
                      if self.prevmodule == module:
                          return module
                      svnurl = self.baseurl + quote(module)
                      prevmodule = self.prevmodule
                      if prevmodule is None:
                          prevmodule = ''
                      self.ui.debug("reparent to %s\n" % svnurl)
                      svn.ra.reparent(self.ra, svnurl)
                      self.prevmodule = module
                      return prevmodule
                  def expandpaths(self, rev, paths, parents):
                      changed, removed = set(), set()
                      copies = {}
                      new_module, revnum = revsplit(rev)[1:]
                      if new_module != self.module:
                          self.module = new_module
                          self.reparent(self.module)
                      for i, (path, ent) in enumerate(paths):
                          self.ui.progress(_('scanning paths'), i, item=path,
                                           total=len(paths), unit=_('paths'))
                          entrypath = self.getrelpath(path)
                          kind = self._checkpath(entrypath, revnum)
                          if kind == svn.core.svn_node_file:
                              changed.add(self.recode(entrypath))
                              if not ent.copyfrom_path or not parents:
                                  continue
                              # Copy sources not in parent revisions cannot be
                              # represented, ignore their origin for now
                              pmodule, prevnum = revsplit(parents[0])[1:]
                              if ent.copyfrom_rev < prevnum:
                                  continue
                              copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
                              if not copyfrom_path:
                                  continue
                              self.ui.debug("copied to %s from %s@%s\n" %
                                            (entrypath, copyfrom_path, ent.copyfrom_rev))
                              copies[self.recode(entrypath)] = self.recode(copyfrom_path)
                          elif kind == 0: # gone, but had better be a deleted *file*
                              self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
                              pmodule, prevnum = revsplit(parents[0])[1:]
                              parentpath = pmodule + "/" + entrypath
                              fromkind = self._checkpath(entrypath, prevnum, pmodule)
                              if fromkind == svn.core.svn_node_file:
                                  removed.add(self.recode(entrypath))
                              elif fromkind == svn.core.svn_node_dir:
                                  oroot = parentpath.strip('/')
                                  nroot = path.strip('/')
                                  children = self._iterfiles(oroot, prevnum)
                                  for childpath in children:
                                      childpath = childpath.replace(oroot, nroot)
                                      childpath = self.getrelpath("/" + childpath, pmodule)
                                      if childpath:
                                          removed.add(self.recode(childpath))
                              else:
                                  self.ui.debug('unknown path in revision %d: %s\n' % \
                                                (revnum, path))
                          elif kind == svn.core.svn_node_dir:
                              if ent.action == 'M':
                                  # If the directory just had a prop change,
                                  # then we shouldn't need to look for its children.
                                  continue
                              if ent.action == 'R' and parents:
                                  # If a directory is replacing a file, mark the previous
                                  # file as deleted
                                  pmodule, prevnum = revsplit(parents[0])[1:]
                                  pkind = self._checkpath(entrypath, prevnum, pmodule)
                                  if pkind == svn.core.svn_node_file:
                                      removed.add(self.recode(entrypath))
                                  elif pkind == svn.core.svn_node_dir:
                                      # We do not know what files were kept or removed,
                                      # mark them all as changed.
                                      for childpath in self._iterfiles(pmodule, prevnum):
                                          childpath = self.getrelpath("/" + childpath)
                                          if childpath:
                                              changed.add(self.recode(childpath))
                              for childpath in self._iterfiles(path, revnum):
                                  childpath = self.getrelpath("/" + childpath)
                                  if childpath:
                                      changed.add(self.recode(childpath))
                              # Handle directory copies
                              if not ent.copyfrom_path or not parents:
                                  continue
                              # Copy sources not in parent revisions cannot be
                              # represented, ignore their origin for now
                              pmodule, prevnum = revsplit(parents[0])[1:]
                              if ent.copyfrom_rev < prevnum:
                                  continue
                              copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
                              if not copyfrompath:
                                  continue
                              self.ui.debug("mark %s came from %s:%d\n"
                                            % (path, copyfrompath, ent.copyfrom_rev))
                              children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
                              for childpath in children:
                                  childpath = self.getrelpath("/" + childpath, pmodule)
                                  if not childpath:
                                      continue
                                  copytopath = path + childpath[len(copyfrompath):]
                                  copytopath = self.getrelpath(copytopath)
                                  copies[self.recode(copytopath)] = self.recode(childpath)
                      self.ui.progress(_('scanning paths'), None)
                      changed.update(removed)
                      return (list(changed), removed, copies)
                  def _fetch_revisions(self, from_revnum, to_revnum):
                      if from_revnum < to_revnum:
                          from_revnum, to_revnum = to_revnum, from_revnum
                      self.child_cset = None
                      def parselogentry(orig_paths, revnum, author, date, message):
                          """Return the parsed commit object or None, and True if
                          the revision is a branch root.
                          """
                          self.ui.debug("parsing revision %d (%d changes)\n" %
                                        (revnum, len(orig_paths)))
                          branched = False
                          rev = self.revid(revnum)
                          # branch log might return entries for a parent we already have
                          if rev in self.commits or revnum < to_revnum:
                              return None, branched
                          parents = []
                          # check whether this revision is the start of a branch or part
                          # of a branch renaming
                          orig_paths = sorted(orig_paths.iteritems())
                          root_paths = [(p, e) for p, e in orig_paths
                                        if self.module.startswith(p)]
                          if root_paths:
                              path, ent = root_paths[-1]
                              if ent.copyfrom_path:
                                  branched = True
                                  newpath = ent.copyfrom_path + self.module[len(path):]
                                  # ent.copyfrom_rev may not be the actual last revision
                                  previd = self.latest(newpath, ent.copyfrom_rev)
                                  if previd is not None:
                                      prevmodule, prevnum = revsplit(previd)[1:]
                                      if prevnum >= self.startrev:
                                          parents = [previd]
                                          self.ui.note(
                                              _('found parent of branch %s at %d: %s\n') %
                                              (self.module, prevnum, prevmodule))
                              else:
                                  self.ui.debug("no copyfrom path, don't know what to do.\n")
                          paths = []
                          # filter out unrelated paths
                          for path, ent in orig_paths:
                              if self.getrelpath(path) is None:
                                  continue
                              paths.append((path, ent))
                          # Example SVN datetime. Includes microseconds.
                          # ISO-8601 conformant
                          # '2007-01-04T17:35:00.902377Z'
                          date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
                          if self.ui.configbool('convert', 'localtimezone'):
                              date = makedatetimestamp(date[0])
                          if message:
                              log = self.recode(message)
                          else:
                              log = ''
                          if author:
                              author = self.recode(author)
                          else:
                              author = ''
                          try:
                              branch = self.module.split("/")[-1]
                              if branch == self.trunkname:
                                  branch = None
                          except IndexError:
                              branch = None
                          cset = commit(author=author,
                                        date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
                                        desc=log,
                                        parents=parents,
                                        branch=branch,
                                        rev=rev)
                          self.commits[rev] = cset
                          # The parents list is *shared* among self.paths and the
                          # commit object. Both will be updated below.
                          self.paths[rev] = (paths, cset.parents)
                          if self.child_cset and not self.child_cset.parents:
                              self.child_cset.parents[:] = [rev]
                          self.child_cset = cset
                          return cset, branched
                      self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
                                   (self.module, from_revnum, to_revnum))
                      try:
                          firstcset = None
                          lastonbranch = False
                          stream = self._getlog([self.module], from_revnum, to_revnum)
                          try:
                              for entry in stream:
                                  paths, revnum, author, date, message = entry
                                  if revnum < self.startrev:
                                      lastonbranch = True
                                      break
                                  if not paths:
                                      self.ui.debug('revision %d has no entries\n' % revnum)
                                      # If we ever leave the loop on an empty
                                      # revision, do not try to get a parent branch
                                      lastonbranch = lastonbranch or revnum == 0
                                      continue
                                  cset, lastonbranch = parselogentry(paths, revnum, author,
                                                                     date, message)
                                  if cset:
                                      firstcset = cset
                                  if lastonbranch:
                                      break
                          finally:
                              stream.close()
                          if not lastonbranch and firstcset and not firstcset.parents:
                              # The first revision of the sequence (the last fetched one)
                              # has invalid parents if not a branch root. Find the parent
                              # revision now, if any.
                              try:
                                  firstrevnum = self.revnum(firstcset.rev)
                                  if firstrevnum > 1:
                                      latest = self.latest(self.module, firstrevnum - 1)
                                      if latest:
                                          firstcset.parents.append(latest)
                              except SvnPathNotFound:
                                  pass
                      except svn.core.SubversionException as xxx_todo_changeme:
                          (inst, num) = xxx_todo_changeme.args
                          if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
                              raise error.Abort(_('svn: branch has no revision %s')
                                               % to_revnum)
                          raise
                  def getfile(self, file, rev):
                      # TODO: ra.get_file transmits the whole file instead of diffs.
                      if file in self.removed:
                          return None, None
                      mode = ''
                      try:
                          new_module, revnum = revsplit(rev)[1:]
                          if self.module != new_module:
                              self.module = new_module
                              self.reparent(self.module)
                          io = stringio()
                          info = svn.ra.get_file(self.ra, file, revnum, io)
                          data = io.getvalue()
                          # ra.get_file() seems to keep a reference on the input buffer
                          # preventing collection. Release it explicitly.
                          io.close()
                          if isinstance(info, list):
                              info = info[-1]
                          mode = ("svn:executable" in info) and 'x' or ''
                          mode = ("svn:special" in info) and 'l' or mode
                      except svn.core.SubversionException as e:
                          notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
                              svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
                          if e.apr_err in notfound: # File not found
                              return None, None
                          raise
                      if mode == 'l':
                          link_prefix = "link "
                          if data.startswith(link_prefix):
                              data = data[len(link_prefix):]
                      return data, mode
                  def _iterfiles(self, path, revnum):
                      """Enumerate all files in path at revnum, recursively."""
                      path = path.strip('/')
                      pool = svn.core.Pool()
                      rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
                      entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
                      if path:
                          path += '/'
                      return ((path + p) for p, e in entries.iteritems()
                              if e.kind == svn.core.svn_node_file)
                  def getrelpath(self, path, module=None):
                      if module is None:
                          module = self.module
                      # Given the repository url of this wc, say
                      #   "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
                      # extract the "entry" portion (a relative path) from what
                      # svn log --xml says, i.e.
                      #   "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
                      # that is to say "tests/PloneTestCase.py"
                      if path.startswith(module):
                          relative = path.rstrip('/')[len(module):]
                          if relative.startswith('/'):
                              return relative[1:]
                          elif relative == '':
                              return relative
                      # The path is outside our tracked tree...
                      self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
                      return None
                  def _checkpath(self, path, revnum, module=None):
                      if module is not None:
                          prevmodule = self.reparent('')
                          path = module + '/' + path
                      try:
                          # ra.check_path does not like leading slashes very much, it leads
                          # to PROPFIND subversion errors
                          return svn.ra.check_path(self.ra, path.strip('/'), revnum)
                      finally:
                          if module is not None:
                              self.reparent(prevmodule)
                  def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
                              strict_node_history=False):
                      # Normalize path names, svn >= 1.5 only wants paths relative to
                      # supplied URL
                      relpaths = []
                      for p in paths:
                          if not p.startswith('/'):
                              p = self.module + '/' + p
                          relpaths.append(p.strip('/'))
                      args = [self.baseurl, relpaths, start, end, limit,
                              discover_changed_paths, strict_node_history]
                      # developer config: convert.svn.debugsvnlog
                      if not self.ui.configbool('convert', 'svn.debugsvnlog'):
                          return directlogstream(*args)
                      arg = encodeargs(args)
                      hgexe = util.hgexecutable()
                      cmd = '%s debugsvnlog' % util.shellquote(hgexe)
                      stdin, stdout = util.popen2(util.quotecommand(cmd))
                      stdin.write(arg)
                      try:
                          stdin.close()
                      except IOError:
                          raise error.Abort(_('Mercurial failed to run itself, check'
                                             ' hg executable is in PATH'))
                      return logstream(stdout)
              pre_revprop_change = '''#!/bin/sh
              REPOS="$1"
              REV="$2"
              USER="$3"
              PROPNAME="$4"
              ACTION="$5"
              if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
              if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
              if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
              echo "Changing prohibited revision property" >&2
              exit 1
              '''
              class svn_sink(converter_sink, commandline):
                  commit_re = re.compile(r'Committed revision (\d+).', re.M)
                  uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
                  def prerun(self):
                      if self.wc:
                          os.chdir(self.wc)
                  def postrun(self):
                      if self.wc:
                          os.chdir(self.cwd)
                  def join(self, name):
                      return os.path.join(self.wc, '.svn', name)
                  def revmapfile(self):
                      return self.join('hg-shamap')
                  def authorfile(self):
                      return self.join('hg-authormap')
                  def __init__(self, ui, repotype, path):
                      converter_sink.__init__(self, ui, repotype, path)
                      commandline.__init__(self, ui, 'svn')
                      self.delete = []
                      self.setexec = []
                      self.delexec = []
                      self.copies = []
                      self.wc = None
                      self.cwd = pycompat.getcwd()
                      created = False
                      if os.path.isfile(os.path.join(path, '.svn', 'entries')):
                          self.wc = os.path.realpath(path)
                          self.run0('update')
                      else:
-                         if not re.search(r'^(file|http|https|svn|svn\+ssh)\://', path):
+                         if not re.search(br'^(file|http|https|svn|svn\+ssh)\://', path):
                              path = os.path.realpath(path)
                              if os.path.isdir(os.path.dirname(path)):
                                  if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
                                      ui.status(_('initializing svn repository %r\n') %
                                                os.path.basename(path))
                                      commandline(ui, 'svnadmin').run0('create', path)
                                      created = path
                                  path = util.normpath(path)
                                  if not path.startswith('/'):
                                      path = '/' + path
                                  path = 'file://' + path
                          wcpath = os.path.join(pycompat.getcwd(), os.path.basename(path) +
                                              '-wc')
                          ui.status(_('initializing svn working copy %r\n')
                                    % os.path.basename(wcpath))
                          self.run0('checkout', path, wcpath)
                          self.wc = wcpath
                      self.opener = vfsmod.vfs(self.wc)
                      self.wopener = vfsmod.vfs(self.wc)
                      self.childmap = mapfile(ui, self.join('hg-childmap'))
                      if util.checkexec(self.wc):
                          self.is_exec = util.isexec
                      else:
                          self.is_exec = None
                      if created:
                          hook = os.path.join(created, 'hooks', 'pre-revprop-change')
                          fp = open(hook, 'wb')
                          fp.write(pre_revprop_change)
                          fp.close()
                          util.setflags(hook, False, True)
                      output = self.run0('info')
                      self.uuid = self.uuid_re.search(output).group(1).strip()
                  def wjoin(self, *names):
                      return os.path.join(self.wc, *names)
                  @propertycache
                  def manifest(self):
                      # As of svn 1.7, the "add" command fails when receiving
                      # already tracked entries, so we have to track and filter them
                      # ourselves.
                      m = set()
                      output = self.run0('ls', recursive=True, xml=True)
                      doc = xml.dom.minidom.parseString(output)
                      for e in doc.getElementsByTagName('entry'):
                          for n in e.childNodes:
                              if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
                                  continue
                              name = ''.join(c.data for c in n.childNodes
                                             if c.nodeType == c.TEXT_NODE)
                              # Entries are compared with names coming from
                              # mercurial, so bytes with undefined encoding. Our
                              # best bet is to assume they are in local
                              # encoding. They will be passed to command line calls
                              # later anyway, so they better be.
                              m.add(encoding.unitolocal(name))
                              break
                      return m
                  def putfile(self, filename, flags, data):
                      if 'l' in flags:
                          self.wopener.symlink(data, filename)
                      else:
                          try:
                              if os.path.islink(self.wjoin(filename)):
                                  os.unlink(filename)
                          except OSError:
                              pass
                          self.wopener.write(filename, data)
                          if self.is_exec:
                              if self.is_exec(self.wjoin(filename)):
                                  if 'x' not in flags:
                                      self.delexec.append(filename)
                              else:
                                  if 'x' in flags:
                                      self.setexec.append(filename)
                              util.setflags(self.wjoin(filename), False, 'x' in flags)
                  def _copyfile(self, source, dest):
                      # SVN's copy command pukes if the destination file exists, but
                      # our copyfile method expects to record a copy that has
                      # already occurred.  Cross the semantic gap.
                      wdest = self.wjoin(dest)
                      exists = os.path.lexists(wdest)
                      if exists:
                          fd, tempname = tempfile.mkstemp(
                              prefix='hg-copy-', dir=os.path.dirname(wdest))
                          os.close(fd)
                          os.unlink(tempname)
                          os.rename(wdest, tempname)
                      try:
                          self.run0('copy', source, dest)
                      finally:
                          self.manifest.add(dest)
                          if exists:
                              try:
                                  os.unlink(wdest)
                              except OSError:
                                  pass
                              os.rename(tempname, wdest)
                  def dirs_of(self, files):
                      dirs = set()
                      for f in files:
                          if os.path.isdir(self.wjoin(f)):
                              dirs.add(f)
                          i = len(f)
                          for i in iter(lambda: f.rfind('/', 0, i), -1):
                              dirs.add(f[:i])
                      return dirs
                  def add_dirs(self, files):
                      add_dirs = [d for d in sorted(self.dirs_of(files))
                                  if d not in self.manifest]
                      if add_dirs:
                          self.manifest.update(add_dirs)
                          self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
                      return add_dirs
                  def add_files(self, files):
                      files = [f for f in files if f not in self.manifest]
                      if files:
                          self.manifest.update(files)
                          self.xargs(files, 'add', quiet=True)
                      return files
                  def addchild(self, parent, child):
                      self.childmap[parent] = child
                  def revid(self, rev):
                      return u"svn:%s@%s" % (self.uuid, rev)
                  def putcommit(self, files, copies, parents, commit, source, revmap, full,
                                cleanp2):
                      for parent in parents:
                          try:
                              return self.revid(self.childmap[parent])
                          except KeyError:
                              pass
                      # Apply changes to working copy
                      for f, v in files:
                          data, mode = source.getfile(f, v)
                          if data is None:
                              self.delete.append(f)
                          else:
                              self.putfile(f, mode, data)
                              if f in copies:
                                  self.copies.append([copies[f], f])
                      if full:
                          self.delete.extend(sorted(self.manifest.difference(files)))
                      files = [f[0] for f in files]
                      entries = set(self.delete)
                      files = frozenset(files)
                      entries.update(self.add_dirs(files.difference(entries)))
                      if self.copies:
                          for s, d in self.copies:
                              self._copyfile(s, d)
                          self.copies = []
                      if self.delete:
                          self.xargs(self.delete, 'delete')
                          for f in self.delete:
                              self.manifest.remove(f)
                          self.delete = []
                      entries.update(self.add_files(files.difference(entries)))
                      if self.delexec:
                          self.xargs(self.delexec, 'propdel', 'svn:executable')
                          self.delexec = []
                      if self.setexec:
                          self.xargs(self.setexec, 'propset', 'svn:executable', '*')
                          self.setexec = []
                      fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
                      fp = os.fdopen(fd, pycompat.sysstr('wb'))
                      fp.write(util.tonativeeol(commit.desc))
                      fp.close()
                      try:
                          output = self.run0('commit',
                                             username=util.shortuser(commit.author),
                                             file=messagefile,
                                             encoding='utf-8')
                          try:
                              rev = self.commit_re.search(output).group(1)
                          except AttributeError:
                              if parents and not files:
                                  return parents[0]
                              self.ui.warn(_('unexpected svn output:\n'))
                              self.ui.warn(output)
                              raise error.Abort(_('unable to cope with svn output'))
                          if commit.rev:
                              self.run('propset', 'hg:convert-rev', commit.rev,
                                       revprop=True, revision=rev)
                          if commit.branch and commit.branch != 'default':
                              self.run('propset', 'hg:convert-branch', commit.branch,
                                       revprop=True, revision=rev)
                          for parent in parents:
                              self.addchild(parent, rev)
                          return self.revid(rev)
                      finally:
                          os.unlink(messagefile)
                  def puttags(self, tags):
                      self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
                      return None, None
                  def hascommitfrommap(self, rev):
                      # We trust that revisions referenced in a map still is present
                      # TODO: implement something better if necessary and feasible
                      return True
                  def hascommitforsplicemap(self, rev):
                      # This is not correct as one can convert to an existing subversion
                      # repository and childmap would not list all revisions. Too bad.
                      if rev in self.childmap:
                          return True
                      raise error.Abort(_('splice map revision %s not found in subversion '
                                         'child map (revision lookups are not implemented)')
                                       % rev)

hgext/lfs/blobstore.py

0 +1 -1

              # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
              #
              # Copyright 2017 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import hashlib
              import json
              import os
              import re
              import socket
              from mercurial.i18n import _
              from mercurial import (
                  error,
                  pathutil,
                  url as urlmod,
                  util,
                  vfs as vfsmod,
                  worker,
              )
              from ..largefiles import lfutil
              # 64 bytes for SHA256
-             _lfsre = re.compile(r'\A[a-f0-9]{64}\Z')
+             _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
              class lfsvfs(vfsmod.vfs):
                  def join(self, path):
                      """split the path at first two characters, like: XX/XXXXX..."""
                      if not _lfsre.match(path):
                          raise error.ProgrammingError('unexpected lfs path: %s' % path)
                      return super(lfsvfs, self).join(path[0:2], path[2:])
                  def walk(self, path=None, onerror=None):
                      """Yield (dirpath, [], oids) tuple for blobs under path
                      Oids only exist in the root of this vfs, so dirpath is always ''.
                      """
                      root = os.path.normpath(self.base)
                      # when dirpath == root, dirpath[prefixlen:] becomes empty
                      # because len(dirpath) < prefixlen.
                      prefixlen = len(pathutil.normasprefix(root))
                      oids = []
                      for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
                                                          onerror=onerror):
                          dirpath = dirpath[prefixlen:]
                          # Silently skip unexpected files and directories
                          if len(dirpath) == 2:
                              oids.extend([dirpath + f for f in files
                                           if _lfsre.match(dirpath + f)])
                      yield ('', [], oids)
              class filewithprogress(object):
                  """a file-like object that supports __len__ and read.
                  Useful to provide progress information for how many bytes are read.
                  """
                  def __init__(self, fp, callback):
                      self._fp = fp
                      self._callback = callback # func(readsize)
                      fp.seek(0, os.SEEK_END)
                      self._len = fp.tell()
                      fp.seek(0)
                  def __len__(self):
                      return self._len
                  def read(self, size):
                      if self._fp is None:
                          return b''
                      data = self._fp.read(size)
                      if data:
                          if self._callback:
                              self._callback(len(data))
                      else:
                          self._fp.close()
                          self._fp = None
                      return data
              class local(object):
                  """Local blobstore for large file contents.
                  This blobstore is used both as a cache and as a staging area for large blobs
                  to be uploaded to the remote blobstore.
                  """
                  def __init__(self, repo):
                      fullpath = repo.svfs.join('lfs/objects')
                      self.vfs = lfsvfs(fullpath)
                      usercache = lfutil._usercachedir(repo.ui, 'lfs')
                      self.cachevfs = lfsvfs(usercache)
                      self.ui = repo.ui
                  def open(self, oid):
                      """Open a read-only file descriptor to the named blob, in either the
                      usercache or the local store."""
                      # The usercache is the most likely place to hold the file.  Commit will
                      # write to both it and the local store, as will anything that downloads
                      # the blobs.  However, things like clone without an update won't
                      # populate the local store.  For an init + push of a local clone,
                      # the usercache is the only place it _could_ be.  If not present, the
                      # missing file msg here will indicate the local repo, not the usercache.
                      if self.cachevfs.exists(oid):
                          return self.cachevfs(oid, 'rb')
                      return self.vfs(oid, 'rb')
                  def download(self, oid, src):
                      """Read the blob from the remote source in chunks, verify the content,
                      and write to this local blobstore."""
                      sha256 = hashlib.sha256()
                      with self.vfs(oid, 'wb', atomictemp=True) as fp:
                          for chunk in util.filechunkiter(src, size=1048576):
                              fp.write(chunk)
                              sha256.update(chunk)
                          realoid = sha256.hexdigest()
                          if realoid != oid:
                              raise error.Abort(_('corrupt remote lfs object: %s') % oid)
                      # XXX: should we verify the content of the cache, and hardlink back to
                      # the local store on success, but truncate, write and link on failure?
                      if not self.cachevfs.exists(oid):
                          self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
                          lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
                  def write(self, oid, data):
                      """Write blob to local blobstore.
                      This should only be called from the filelog during a commit or similar.
                      As such, there is no need to verify the data.  Imports from a remote
                      store must use ``download()`` instead."""
                      with self.vfs(oid, 'wb', atomictemp=True) as fp:
                          fp.write(data)
                      # XXX: should we verify the content of the cache, and hardlink back to
                      # the local store on success, but truncate, write and link on failure?
                      if not self.cachevfs.exists(oid):
                          self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
                          lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
                  def read(self, oid, verify=True):
                      """Read blob from local blobstore."""
                      if not self.vfs.exists(oid):
                          blob = self._read(self.cachevfs, oid, verify)
                          # Even if revlog will verify the content, it needs to be verified
                          # now before making the hardlink to avoid propagating corrupt blobs.
                          # Don't abort if corruption is detected, because `hg verify` will
                          # give more useful info about the corruption- simply don't add the
                          # hardlink.
                          if verify or hashlib.sha256(blob).hexdigest() == oid:
                              self.ui.note(_('lfs: found %s in the usercache\n') % oid)
                              lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                      else:
                          self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
                          blob = self._read(self.vfs, oid, verify)
                      return blob
                  def _read(self, vfs, oid, verify):
                      """Read blob (after verifying) from the given store"""
                      blob = vfs.read(oid)
                      if verify:
                          _verify(oid, blob)
                      return blob
                  def has(self, oid):
                      """Returns True if the local blobstore contains the requested blob,
                      False otherwise."""
                      return self.cachevfs.exists(oid) or self.vfs.exists(oid)
              class _gitlfsremote(object):
                  def __init__(self, repo, url):
                      ui = repo.ui
                      self.ui = ui
                      baseurl, authinfo = url.authinfo()
                      self.baseurl = baseurl.rstrip('/')
                      useragent = repo.ui.config('experimental', 'lfs.user-agent')
                      if not useragent:
                          useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
                      self.urlopener = urlmod.opener(ui, authinfo, useragent)
                      self.retry = ui.configint('lfs', 'retry')
                  def writebatch(self, pointers, fromstore):
                      """Batch upload from local to remote blobstore."""
                      self._batch(_deduplicate(pointers), fromstore, 'upload')
                  def readbatch(self, pointers, tostore):
                      """Batch download from remote to local blostore."""
                      self._batch(_deduplicate(pointers), tostore, 'download')
                  def _batchrequest(self, pointers, action):
                      """Get metadata about objects pointed by pointers for given action
                      Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
                      See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
                      """
                      objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
                      requestdata = json.dumps({
                          'objects': objects,
                          'operation': action,
                      })
                      batchreq = util.urlreq.request('%s/objects/batch' % self.baseurl,
                                                     data=requestdata)
                      batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
                      batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
                      try:
                          rawjson = self.urlopener.open(batchreq).read()
                      except util.urlerr.httperror as ex:
                          raise LfsRemoteError(_('LFS HTTP error: %s (action=%s)')
                                               % (ex, action))
                      try:
                          response = json.loads(rawjson)
                      except ValueError:
                          raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
                                               % rawjson)
                      return response
                  def _checkforservererror(self, pointers, responses, action):
                      """Scans errors from objects
                      Raises LfsRemoteError if any objects have an error"""
                      for response in responses:
                          # The server should return 404 when objects cannot be found. Some
                          # server implementation (ex. lfs-test-server)  does not set "error"
                          # but just removes "download" from "actions". Treat that case
                          # as the same as 404 error.
                          notfound = (response.get('error', {}).get('code') == 404
                                      or (action == 'download'
                                          and action not in response.get('actions', [])))
                          if notfound:
                              ptrmap = {p.oid(): p for p in pointers}
                              p = ptrmap.get(response['oid'], None)
                              if p:
                                  filename = getattr(p, 'filename', 'unknown')
                                  raise LfsRemoteError(
                                      _(('LFS server error. Remote object '
                                        'for "%s" not found: %r')) % (filename, response))
                              else:
                                  raise LfsRemoteError(
                                      _('LFS server error. Unsolicited response for oid %s')
                                      % response['oid'])
                          if 'error' in response:
                              raise LfsRemoteError(_('LFS server error: %r') % response)
                  def _extractobjects(self, response, pointers, action):
                      """extract objects from response of the batch API
                      response: parsed JSON object returned by batch API
                      return response['objects'] filtered by action
                      raise if any object has an error
                      """
                      # Scan errors from objects - fail early
                      objects = response.get('objects', [])
                      self._checkforservererror(pointers, objects, action)
                      # Filter objects with given action. Practically, this skips uploading
                      # objects which exist in the server.
                      filteredobjects = [o for o in objects if action in o.get('actions', [])]
                      return filteredobjects
                  def _basictransfer(self, obj, action, localstore):
                      """Download or upload a single object using basic transfer protocol
                      obj: dict, an object description returned by batch API
                      action: string, one of ['upload', 'download']
                      localstore: blobstore.local
                      See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
                      basic-transfers.md
                      """
                      oid = str(obj['oid'])
                      href = str(obj['actions'][action].get('href'))
                      headers = obj['actions'][action].get('header', {}).items()
                      request = util.urlreq.request(href)
                      if action == 'upload':
                          # If uploading blobs, read data from local blobstore.
                          with localstore.open(oid) as fp:
                              _verifyfile(oid, fp)
                          request.data = filewithprogress(localstore.open(oid), None)
                          request.get_method = lambda: 'PUT'
                      for k, v in headers:
                          request.add_header(k, v)
                      response = b''
                      try:
                          req = self.urlopener.open(request)
                          if action == 'download':
                              # If downloading blobs, store downloaded data to local blobstore
                              localstore.download(oid, req)
                          else:
                              while True:
                                  data = req.read(1048576)
                                  if not data:
                                      break
                                  response += data
                              if response:
                                  self.ui.debug('lfs %s response: %s' % (action, response))
                      except util.urlerr.httperror as ex:
                          if self.ui.debugflag:
                              self.ui.debug('%s: %s\n' % (oid, ex.read()))
                          raise LfsRemoteError(_('HTTP error: %s (oid=%s, action=%s)')
                                               % (ex, oid, action))
                  def _batch(self, pointers, localstore, action):
                      if action not in ['upload', 'download']:
                          raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
                      response = self._batchrequest(pointers, action)
                      objects = self._extractobjects(response, pointers, action)
                      total = sum(x.get('size', 0) for x in objects)
                      sizes = {}
                      for obj in objects:
                          sizes[obj.get('oid')] = obj.get('size', 0)
                      topic = {'upload': _('lfs uploading'),
                               'download': _('lfs downloading')}[action]
                      if len(objects) > 1:
                          self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
                                       % (len(objects), util.bytecount(total)))
                      self.ui.progress(topic, 0, total=total)
                      def transfer(chunk):
                          for obj in chunk:
                              objsize = obj.get('size', 0)
                              if self.ui.verbose:
                                  if action == 'download':
                                      msg = _('lfs: downloading %s (%s)\n')
                                  elif action == 'upload':
                                      msg = _('lfs: uploading %s (%s)\n')
                                  self.ui.note(msg % (obj.get('oid'),
                                               util.bytecount(objsize)))
                              retry = self.retry
                              while True:
                                  try:
                                      self._basictransfer(obj, action, localstore)
                                      yield 1, obj.get('oid')
                                      break
                                  except socket.error as ex:
                                      if retry > 0:
                                          self.ui.note(
                                              _('lfs: failed: %r (remaining retry %d)\n')
                                              % (ex, retry))
                                          retry -= 1
                                          continue
                                      raise
                      # Until https multiplexing gets sorted out
                      if self.ui.configbool('experimental', 'lfs.worker-enable'):
                          oids = worker.worker(self.ui, 0.1, transfer, (),
                                               sorted(objects, key=lambda o: o.get('oid')))
                      else:
                          oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
                      processed = 0
                      blobs = 0
                      for _one, oid in oids:
                          processed += sizes[oid]
                          blobs += 1
                          self.ui.progress(topic, processed, total=total)
                          self.ui.note(_('lfs: processed: %s\n') % oid)
                      self.ui.progress(topic, pos=None, total=total)
                      if blobs > 0:
                          if action == 'upload':
                              self.ui.status(_('lfs: uploaded %d files (%s)\n')
                                             % (blobs, util.bytecount(processed)))
                          # TODO: coalesce the download requests, and comment this in
                          #elif action == 'download':
                          #    self.ui.status(_('lfs: downloaded %d files (%s)\n')
                          #                   % (blobs, util.bytecount(processed)))
                  def __del__(self):
                      # copied from mercurial/httppeer.py
                      urlopener = getattr(self, 'urlopener', None)
                      if urlopener:
                          for h in urlopener.handlers:
                              h.close()
                              getattr(h, "close_all", lambda : None)()
              class _dummyremote(object):
                  """Dummy store storing blobs to temp directory."""
                  def __init__(self, repo, url):
                      fullpath = repo.vfs.join('lfs', url.path)
                      self.vfs = lfsvfs(fullpath)
                  def writebatch(self, pointers, fromstore):
                      for p in _deduplicate(pointers):
                          content = fromstore.read(p.oid(), verify=True)
                          with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
                              fp.write(content)
                  def readbatch(self, pointers, tostore):
                      for p in _deduplicate(pointers):
                          with self.vfs(p.oid(), 'rb') as fp:
                              tostore.download(p.oid(), fp)
              class _nullremote(object):
                  """Null store storing blobs to /dev/null."""
                  def __init__(self, repo, url):
                      pass
                  def writebatch(self, pointers, fromstore):
                      pass
                  def readbatch(self, pointers, tostore):
                      pass
              class _promptremote(object):
                  """Prompt user to set lfs.url when accessed."""
                  def __init__(self, repo, url):
                      pass
                  def writebatch(self, pointers, fromstore, ui=None):
                      self._prompt()
                  def readbatch(self, pointers, tostore, ui=None):
                      self._prompt()
                  def _prompt(self):
                      raise error.Abort(_('lfs.url needs to be configured'))
              _storemap = {
                  'https': _gitlfsremote,
                  'http': _gitlfsremote,
                  'file': _dummyremote,
                  'null': _nullremote,
                  None: _promptremote,
              }
              def _deduplicate(pointers):
                  """Remove any duplicate oids that exist in the list"""
                  reduced = util.sortdict()
                  for p in pointers:
                      reduced[p.oid()] = p
                  return reduced.values()
              def _verify(oid, content):
                  realoid = hashlib.sha256(content).hexdigest()
                  if realoid != oid:
                      raise error.Abort(_('detected corrupt lfs object: %s') % oid,
                                        hint=_('run hg verify'))
              def _verifyfile(oid, fp):
                  sha256 = hashlib.sha256()
                  while True:
                      data = fp.read(1024 * 1024)
                      if not data:
                          break
                      sha256.update(data)
                  realoid = sha256.hexdigest()
                  if realoid != oid:
                      raise error.Abort(_('detected corrupt lfs object: %s') % oid,
                                        hint=_('run hg verify'))
              def remote(repo):
                  """remotestore factory. return a store in _storemap depending on config"""
                  url = util.url(repo.ui.config('lfs', 'url') or '')
                  scheme = url.scheme
                  if scheme not in _storemap:
                      raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
                  return _storemap[scheme](repo, url)
              class LfsRemoteError(error.RevlogError):
                  pass

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages