upstream/mercurial-mirror Commit - r22300:35ab037d

convert: introduce --full for converting all files...

Mads Kiilerich -

r22300:35ab037d default

parent child

hgext/convert/__init__.py

0 +10 0

              # convert.py Foreign SCM converter
              #
              # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              '''import revisions from foreign VCS repositories into Mercurial'''
              import convcmd
              import cvsps
              import subversion
              from mercurial import cmdutil, templatekw
              from mercurial.i18n import _
              cmdtable = {}
              command = cmdutil.command(cmdtable)
              testedwith = 'internal'
              # Commands definition was moved elsewhere to ease demandload job.
              @command('convert',
                  [('', 'authors', '',
                    _('username mapping filename (DEPRECATED, use --authormap instead)'),
                    _('FILE')),
                  ('s', 'source-type', '', _('source repository type'), _('TYPE')),
                  ('d', 'dest-type', '', _('destination repository type'), _('TYPE')),
                  ('r', 'rev', '', _('import up to source revision REV'), _('REV')),
                  ('A', 'authormap', '', _('remap usernames using this file'), _('FILE')),
                  ('', 'filemap', '', _('remap file names using contents of file'),
                   _('FILE')),
+                 ('', 'full', None,
+                  _('apply filemap changes by converting all files again')),
                  ('', 'splicemap', '', _('splice synthesized history into place'),
                   _('FILE')),
                  ('', 'branchmap', '', _('change branch names while converting'),
                   _('FILE')),
                  ('', 'branchsort', None, _('try to sort changesets by branches')),
                  ('', 'datesort', None, _('try to sort changesets by date')),
                  ('', 'sourcesort', None, _('preserve source changesets order')),
                  ('', 'closesort', None, _('try to reorder closed revisions'))],
                 _('hg convert [OPTION]... SOURCE [DEST [REVMAP]]'),
                 norepo=True)
              def convert(ui, src, dest=None, revmapfile=None, **opts):
                  """convert a foreign SCM repository to a Mercurial one.
                  Accepted source formats [identifiers]:
                  - Mercurial [hg]
                  - CVS [cvs]
                  - Darcs [darcs]
                  - git [git]
                  - Subversion [svn]
                  - Monotone [mtn]
                  - GNU Arch [gnuarch]
                  - Bazaar [bzr]
                  - Perforce [p4]
                  Accepted destination formats [identifiers]:
                  - Mercurial [hg]
                  - Subversion [svn] (history on branches is not preserved)
                  If no revision is given, all revisions will be converted.
                  Otherwise, convert will only import up to the named revision
                  (given in a format understood by the source).
                  If no destination directory name is specified, it defaults to the
                  basename of the source with ``-hg`` appended. If the destination
                  repository doesn't exist, it will be created.
                  By default, all sources except Mercurial will use --branchsort.
                  Mercurial uses --sourcesort to preserve original revision numbers
                  order. Sort modes have the following effects:
                  --branchsort  convert from parent to child revision when possible,
                                which means branches are usually converted one after
                                the other. It generates more compact repositories.
                  --datesort    sort revisions by date. Converted repositories have
                                good-looking changelogs but are often an order of
                                magnitude larger than the same ones generated by
                                --branchsort.
                  --sourcesort  try to preserve source revisions order, only
                                supported by Mercurial sources.
                  --closesort   try to move closed revisions as close as possible
                                to parent branches, only supported by Mercurial
                                sources.
                  If ``REVMAP`` isn't given, it will be put in a default location
                  (``<dest>/.hg/shamap`` by default). The ``REVMAP`` is a simple
                  text file that maps each source commit ID to the destination ID
                  for that revision, like so::
                    <source ID> <destination ID>
                  If the file doesn't exist, it's automatically created. It's
                  updated on each commit copied, so :hg:`convert` can be interrupted
                  and can be run repeatedly to copy new commits.
                  The authormap is a simple text file that maps each source commit
                  author to a destination commit author. It is handy for source SCMs
                  that use unix logins to identify authors (e.g.: CVS). One line per
                  author mapping and the line format is::
                    source author = destination author
                  Empty lines and lines starting with a ``#`` are ignored.
                  The filemap is a file that allows filtering and remapping of files
                  and directories. Each line can contain one of the following
                  directives::
                    include path/to/file-or-dir
                    exclude path/to/file-or-dir
                    rename path/to/source path/to/destination
                  Comment lines start with ``#``. A specified path matches if it
                  equals the full relative name of a file or one of its parent
                  directories. The ``include`` or ``exclude`` directive with the
                  longest matching path applies, so line order does not matter.
                  The ``include`` directive causes a file, or all files under a
                  directory, to be included in the destination repository. The default
                  if there are no ``include`` statements is to include everything.
                  If there are any ``include`` statements, nothing else is included.
                  The ``exclude`` directive causes files or directories to
                  be omitted. The ``rename`` directive renames a file or directory if
                  it is converted. To rename from a subdirectory into the root of
                  the repository, use ``.`` as the path to rename to.
+                 ``--full`` will make sure the converted changesets contain exactly
+                 the right files with the right content. It will make a full
+                 conversion of all files, not just the ones that have
+                 changed. Files that already are correct will not be changed. This
+                 can be used to apply filemap changes when converting
+                 incrementally. This is currently only supported for Mercurial and
+                 Subversion.
                  The splicemap is a file that allows insertion of synthetic
                  history, letting you specify the parents of a revision. This is
                  useful if you want to e.g. give a Subversion merge two parents, or
                  graft two disconnected series of history together. Each entry
                  contains a key, followed by a space, followed by one or two
                  comma-separated values::
                    key parent1, parent2
                  The key is the revision ID in the source
                  revision control system whose parents should be modified (same
                  format as a key in .hg/shamap). The values are the revision IDs
                  (in either the source or destination revision control system) that
                  should be used as the new parents for that node. For example, if
                  you have merged "release-1.0" into "trunk", then you should
                  specify the revision on "trunk" as the first parent and the one on
                  the "release-1.0" branch as the second.
                  The branchmap is a file that allows you to rename a branch when it is
                  being brought in from whatever external repository. When used in
                  conjunction with a splicemap, it allows for a powerful combination
                  to help fix even the most badly mismanaged repositories and turn them
                  into nicely structured Mercurial repositories. The branchmap contains
                  lines of the form::
                    original_branch_name new_branch_name
                  where "original_branch_name" is the name of the branch in the
                  source repository, and "new_branch_name" is the name of the branch
                  is the destination repository. No whitespace is allowed in the
                  branch names. This can be used to (for instance) move code in one
                  repository from "default" to a named branch.
                  Mercurial Source
                  ################
                  The Mercurial source recognizes the following configuration
                  options, which you can set on the command line with ``--config``:
                  :convert.hg.ignoreerrors: ignore integrity errors when reading.
                      Use it to fix Mercurial repositories with missing revlogs, by
                      converting from and to Mercurial. Default is False.
                  :convert.hg.saverev: store original revision ID in changeset
                      (forces target IDs to change). It takes a boolean argument and
                      defaults to False.
                  :convert.hg.revs: revset specifying the source revisions to convert.
                  CVS Source
                  ##########
                  CVS source will use a sandbox (i.e. a checked-out copy) from CVS
                  to indicate the starting point of what will be converted. Direct
                  access to the repository files is not needed, unless of course the
                  repository is ``:local:``. The conversion uses the top level
                  directory in the sandbox to find the CVS repository, and then uses
                  CVS rlog commands to find files to convert. This means that unless
                  a filemap is given, all files under the starting directory will be
                  converted, and that any directory reorganization in the CVS
                  sandbox is ignored.
                  The following options can be used with ``--config``:
                  :convert.cvsps.cache: Set to False to disable remote log caching,
                      for testing and debugging purposes. Default is True.
                  :convert.cvsps.fuzz: Specify the maximum time (in seconds) that is
                      allowed between commits with identical user and log message in
                      a single changeset. When very large files were checked in as
                      part of a changeset then the default may not be long enough.
                      The default is 60.
                  :convert.cvsps.mergeto: Specify a regular expression to which
                      commit log messages are matched. If a match occurs, then the
                      conversion process will insert a dummy revision merging the
                      branch on which this log message occurs to the branch
                      indicated in the regex. Default is ``{{mergetobranch
                      ([-\\w]+)}}``
                  :convert.cvsps.mergefrom: Specify a regular expression to which
                      commit log messages are matched. If a match occurs, then the
                      conversion process will add the most recent revision on the
                      branch indicated in the regex as the second parent of the
                      changeset. Default is ``{{mergefrombranch ([-\\w]+)}}``
                  :convert.localtimezone: use local time (as determined by the TZ
                      environment variable) for changeset date/times. The default
                      is False (use UTC).
                  :hooks.cvslog: Specify a Python function to be called at the end of
                      gathering the CVS log. The function is passed a list with the
                      log entries, and can modify the entries in-place, or add or
                      delete them.
                  :hooks.cvschangesets: Specify a Python function to be called after
                      the changesets are calculated from the CVS log. The
                      function is passed a list with the changeset entries, and can
                      modify the changesets in-place, or add or delete them.
                  An additional "debugcvsps" Mercurial command allows the builtin
                  changeset merging code to be run without doing a conversion. Its
                  parameters and output are similar to that of cvsps 2.1. Please see
                  the command help for more details.
                  Subversion Source
                  #################
                  Subversion source detects classical trunk/branches/tags layouts.
                  By default, the supplied ``svn://repo/path/`` source URL is
                  converted as a single branch. If ``svn://repo/path/trunk`` exists
                  it replaces the default branch. If ``svn://repo/path/branches``
                  exists, its subdirectories are listed as possible branches. If
                  ``svn://repo/path/tags`` exists, it is looked for tags referencing
                  converted branches. Default ``trunk``, ``branches`` and ``tags``
                  values can be overridden with following options. Set them to paths
                  relative to the source URL, or leave them blank to disable auto
                  detection.
                  The following options can be set with ``--config``:
                  :convert.svn.branches: specify the directory containing branches.
                      The default is ``branches``.
                  :convert.svn.tags: specify the directory containing tags. The
                      default is ``tags``.
                  :convert.svn.trunk: specify the name of the trunk branch. The
                      default is ``trunk``.
                  :convert.localtimezone: use local time (as determined by the TZ
                      environment variable) for changeset date/times. The default
                      is False (use UTC).
                  Source history can be retrieved starting at a specific revision,
                  instead of being integrally converted. Only single branch
                  conversions are supported.
                  :convert.svn.startrev: specify start Subversion revision number.
                      The default is 0.
                  Perforce Source
                  ###############
                  The Perforce (P4) importer can be given a p4 depot path or a
                  client specification as source. It will convert all files in the
                  source to a flat Mercurial repository, ignoring labels, branches
                  and integrations. Note that when a depot path is given you then
                  usually should specify a target directory, because otherwise the
                  target may be named ``...-hg``.
                  It is possible to limit the amount of source history to be
                  converted by specifying an initial Perforce revision:
                  :convert.p4.startrev: specify initial Perforce revision (a
                      Perforce changelist number).
                  Mercurial Destination
                  #####################
                  The following options are supported:
                  :convert.hg.clonebranches: dispatch source branches in separate
                      clones. The default is False.
                  :convert.hg.tagsbranch: branch name for tag revisions, defaults to
                      ``default``.
                  :convert.hg.usebranchnames: preserve branch names. The default is
                      True.
                  """
                  return convcmd.convert(ui, src, dest, revmapfile, **opts)
              @command('debugsvnlog', [], 'hg debugsvnlog', norepo=True)
              def debugsvnlog(ui, **opts):
                  return subversion.debugsvnlog(ui, **opts)
              @command('debugcvsps',
                  [
                  # Main options shared with cvsps-2.1
                  ('b', 'branches', [], _('only return changes on specified branches')),
                  ('p', 'prefix', '', _('prefix to remove from file names')),
                  ('r', 'revisions', [],
                   _('only return changes after or between specified tags')),
                  ('u', 'update-cache', None, _("update cvs log cache")),
                  ('x', 'new-cache', None, _("create new cvs log cache")),
                  ('z', 'fuzz', 60, _('set commit time fuzz in seconds')),
                  ('', 'root', '', _('specify cvsroot')),
                  # Options specific to builtin cvsps
                  ('', 'parents', '', _('show parent changesets')),
                  ('', 'ancestors', '', _('show current changeset in ancestor branches')),
                  # Options that are ignored for compatibility with cvsps-2.1
                  ('A', 'cvs-direct', None, _('ignored for compatibility')),
                  ],
                  _('hg debugcvsps [OPTION]... [PATH]...'),
                  norepo=True)
              def debugcvsps(ui, *args, **opts):
                  '''create changeset information from CVS
                  This command is intended as a debugging tool for the CVS to
                  Mercurial converter, and can be used as a direct replacement for
                  cvsps.
                  Hg debugcvsps reads the CVS rlog for current directory (or any
                  named directory) in the CVS repository, and converts the log to a
                  series of changesets based on matching commit log entries and
                  dates.'''
                  return cvsps.debugcvsps(ui, *args, **opts)
              def kwconverted(ctx, name):
                  rev = ctx.extra().get('convert_revision', '')
                  if rev.startswith('svn:'):
                      if name == 'svnrev':
                          return str(subversion.revsplit(rev)[2])
                      elif name == 'svnpath':
                          return subversion.revsplit(rev)[1]
                      elif name == 'svnuuid':
                          return subversion.revsplit(rev)[0]
                  return rev
              def kwsvnrev(repo, ctx, **args):
                  """:svnrev: String. Converted subversion revision number."""
                  return kwconverted(ctx, 'svnrev')
              def kwsvnpath(repo, ctx, **args):
                  """:svnpath: String. Converted subversion revision project path."""
                  return kwconverted(ctx, 'svnpath')
              def kwsvnuuid(repo, ctx, **args):
                  """:svnuuid: String. Converted subversion revision repository identifier."""
                  return kwconverted(ctx, 'svnuuid')
              def extsetup(ui):
                  templatekw.keywords['svnrev'] = kwsvnrev
                  templatekw.keywords['svnpath'] = kwsvnpath
                  templatekw.keywords['svnuuid'] = kwsvnuuid
              # tell hggettext to extract docstrings from these functions:
              i18nfunctions = [kwsvnrev, kwsvnpath, kwsvnuuid]

hgext/convert/bzr.py

0 +3 -2

              # bzr.py - bzr support for the convert extension
              #
              #  Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              # This module is for handling 'bzr', that was formerly known as Bazaar-NG;
              # it cannot access 'bar' repositories, but they were never used very much
              import os
              from mercurial import demandimport
              # these do not work with demandimport, blacklist
              demandimport.ignore.extend([
                      'bzrlib.transactions',
                      'bzrlib.urlutils',
                      'ElementPath',
                  ])
              from mercurial.i18n import _
              from mercurial import util
              from common import NoRepo, commit, converter_source
              try:
                  # bazaar imports
                  from bzrlib import bzrdir, revision, errors
                  from bzrlib.revisionspec import RevisionSpec
              except ImportError:
                  pass
              supportedkinds = ('file', 'symlink')
              class bzr_source(converter_source):
                  """Reads Bazaar repositories by using the Bazaar Python libraries"""
                  def __init__(self, ui, path, rev=None):
                      super(bzr_source, self).__init__(ui, path, rev=rev)
                      if not os.path.exists(os.path.join(path, '.bzr')):
                          raise NoRepo(_('%s does not look like a Bazaar repository')
                                       % path)
                      try:
                          # access bzrlib stuff
                          bzrdir
                      except NameError:
                          raise NoRepo(_('Bazaar modules could not be loaded'))
                      path = os.path.abspath(path)
                      self._checkrepotype(path)
                      try:
                          self.sourcerepo = bzrdir.BzrDir.open(path).open_repository()
                      except errors.NoRepositoryPresent:
                          raise NoRepo(_('%s does not look like a Bazaar repository')
                                       % path)
                      self._parentids = {}
                  def _checkrepotype(self, path):
                      # Lightweight checkouts detection is informational but probably
                      # fragile at API level. It should not terminate the conversion.
                      try:
                          from bzrlib import bzrdir
                          dir = bzrdir.BzrDir.open_containing(path)[0]
                          try:
                              tree = dir.open_workingtree(recommend_upgrade=False)
                              branch = tree.branch
                          except (errors.NoWorkingTree, errors.NotLocalUrl):
                              tree = None
                              branch = dir.open_branch()
                          if (tree is not None and tree.bzrdir.root_transport.base !=
                              branch.bzrdir.root_transport.base):
                              self.ui.warn(_('warning: lightweight checkouts may cause '
                                             'conversion failures, try with a regular '
                                             'branch instead.\n'))
                      except Exception:
                          self.ui.note(_('bzr source type could not be determined\n'))
                  def before(self):
                      """Before the conversion begins, acquire a read lock
                      for all the operations that might need it. Fortunately
                      read locks don't block other reads or writes to the
                      repository, so this shouldn't have any impact on the usage of
                      the source repository.
                      The alternative would be locking on every operation that
                      needs locks (there are currently two: getting the file and
                      getting the parent map) and releasing immediately after,
                      but this approach can take even 40% longer."""
                      self.sourcerepo.lock_read()
                  def after(self):
                      self.sourcerepo.unlock()
                  def _bzrbranches(self):
                      return self.sourcerepo.find_branches(using=True)
                  def getheads(self):
                      if not self.rev:
                          # Set using=True to avoid nested repositories (see issue3254)
                          heads = sorted([b.last_revision() for b in self._bzrbranches()])
                      else:
                          revid = None
                          for branch in self._bzrbranches():
                              try:
                                  r = RevisionSpec.from_string(self.rev)
                                  info = r.in_history(branch)
                              except errors.BzrError:
                                  pass
                              revid = info.rev_id
                          if revid is None:
                              raise util.Abort(_('%s is not a valid revision') % self.rev)
                          heads = [revid]
                      # Empty repositories return 'null:', which cannot be retrieved
                      heads = [h for h in heads if h != 'null:']
                      return heads
                  def getfile(self, name, rev):
                      revtree = self.sourcerepo.revision_tree(rev)
                      fileid = revtree.path2id(name.decode(self.encoding or 'utf-8'))
                      kind = None
                      if fileid is not None:
                          kind = revtree.kind(fileid)
                      if kind not in supportedkinds:
                          # the file is not available anymore - was deleted
                          return None, None
                      mode = self._modecache[(name, rev)]
                      if kind == 'symlink':
                          target = revtree.get_symlink_target(fileid)
                          if target is None:
                              raise util.Abort(_('%s.%s symlink has no target')
                                               % (name, rev))
                          return target, mode
                      else:
                          sio = revtree.get_file(fileid)
                          return sio.read(), mode
-                 def getchanges(self, version):
-                     # set up caches: modecache and revtree
+                 def getchanges(self, version, full):
+                     if full:
+                         raise util.Abort(_("convert from cvs do not support --full"))
                      self._modecache = {}
                      self._revtree = self.sourcerepo.revision_tree(version)
                      # get the parentids from the cache
                      parentids = self._parentids.pop(version)
                      # only diff against first parent id
                      prevtree = self.sourcerepo.revision_tree(parentids[0])
                      return self._gettreechanges(self._revtree, prevtree)
                  def getcommit(self, version):
                      rev = self.sourcerepo.get_revision(version)
                      # populate parent id cache
                      if not rev.parent_ids:
                          parents = []
                          self._parentids[version] = (revision.NULL_REVISION,)
                      else:
                          parents = self._filterghosts(rev.parent_ids)
                          self._parentids[version] = parents
                      branch = self.recode(rev.properties.get('branch-nick', u'default'))
                      if branch == 'trunk':
                          branch = 'default'
                      return commit(parents=parents,
                              date='%d %d' % (rev.timestamp, -rev.timezone),
                              author=self.recode(rev.committer),
                              desc=self.recode(rev.message),
                              branch=branch,
                              rev=version)
                  def gettags(self):
                      bytetags = {}
                      for branch in self._bzrbranches():
                          if not branch.supports_tags():
                              return {}
                          tagdict = branch.tags.get_tag_dict()
                          for name, rev in tagdict.iteritems():
                              bytetags[self.recode(name)] = rev
                      return bytetags
                  def getchangedfiles(self, rev, i):
                      self._modecache = {}
                      curtree = self.sourcerepo.revision_tree(rev)
                      if i is not None:
                          parentid = self._parentids[rev][i]
                      else:
                          # no parent id, get the empty revision
                          parentid = revision.NULL_REVISION
                      prevtree = self.sourcerepo.revision_tree(parentid)
                      changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
                      return changes
                  def _gettreechanges(self, current, origin):
                      revid = current._revision_id
                      changes = []
                      renames = {}
                      seen = set()
                      # Process the entries by reverse lexicographic name order to
                      # handle nested renames correctly, most specific first.
                      curchanges = sorted(current.iter_changes(origin),
                                          key=lambda c: c[1][0] or c[1][1],
                                          reverse=True)
                      for (fileid, paths, changed_content, versioned, parent, name,
                          kind, executable) in curchanges:
                          if paths[0] == u'' or paths[1] == u'':
                              # ignore changes to tree root
                              continue
                          # bazaar tracks directories, mercurial does not, so
                          # we have to rename the directory contents
                          if kind[1] == 'directory':
                              if kind[0] not in (None, 'directory'):
                                  # Replacing 'something' with a directory, record it
                                  # so it can be removed.
                                  changes.append((self.recode(paths[0]), revid))
                              if kind[0] == 'directory' and None not in paths:
                                  renaming = paths[0] != paths[1]
                                  # neither an add nor an delete - a move
                                  # rename all directory contents manually
                                  subdir = origin.inventory.path2id(paths[0])
                                  # get all child-entries of the directory
                                  for name, entry in origin.inventory.iter_entries(subdir):
                                      # hg does not track directory renames
                                      if entry.kind == 'directory':
                                          continue
                                      frompath = self.recode(paths[0] + '/' + name)
                                      if frompath in seen:
                                          # Already handled by a more specific change entry
                                          # This is important when you have:
                                          # a => b
                                          # a/c => a/c
                                          # Here a/c must not be renamed into b/c
                                          continue
                                      seen.add(frompath)
                                      if not renaming:
                                          continue
                                      topath = self.recode(paths[1] + '/' + name)
                                      # register the files as changed
                                      changes.append((frompath, revid))
                                      changes.append((topath, revid))
                                      # add to mode cache
                                      mode = ((entry.executable and 'x')
                                              or (entry.kind == 'symlink' and 's')
                                              or '')
                                      self._modecache[(topath, revid)] = mode
                                      # register the change as move
                                      renames[topath] = frompath
                              # no further changes, go to the next change
                              continue
                          # we got unicode paths, need to convert them
                          path, topath = paths
                          if path is not None:
                              path = self.recode(path)
                          if topath is not None:
                              topath = self.recode(topath)
                          seen.add(path or topath)
                          if topath is None:
                              # file deleted
                              changes.append((path, revid))
                              continue
                          # renamed
                          if path and path != topath:
                              renames[topath] = path
                              changes.append((path, revid))
                          # populate the mode cache
                          kind, executable = [e[1] for e in (kind, executable)]
                          mode = ((executable and 'x') or (kind == 'symlink' and 'l')
                                  or '')
                          self._modecache[(topath, revid)] = mode
                          changes.append((topath, revid))
                      return changes, renames
                  def _filterghosts(self, ids):
                      """Filters out ghost revisions which hg does not support, see
                      <http://bazaar-vcs.org/GhostRevision>
                      """
                      parentmap = self.sourcerepo.get_parent_map(ids)
                      parents = tuple([parent for parent in ids if parent in parentmap])
                      return parents

hgext/convert/common.py

0 +6 -4

              # common.py - common code for the convert extension
              #
              #  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import base64, errno, subprocess, os, datetime, re
              import cPickle as pickle
              from mercurial import util
              from mercurial.i18n import _
              propertycache = util.propertycache
              def encodeargs(args):
                  def encodearg(s):
                      lines = base64.encodestring(s)
                      lines = [l.splitlines()[0] for l in lines]
                      return ''.join(lines)
                  s = pickle.dumps(args)
                  return encodearg(s)
              def decodeargs(s):
                  s = base64.decodestring(s)
                  return pickle.loads(s)
              class MissingTool(Exception):
                  pass
              def checktool(exe, name=None, abort=True):
                  name = name or exe
                  if not util.findexe(exe):
                      exc = abort and util.Abort or MissingTool
                      raise exc(_('cannot find required "%s" tool') % name)
              class NoRepo(Exception):
                  pass
              SKIPREV = 'SKIP'
              class commit(object):
                  def __init__(self, author, date, desc, parents, branch=None, rev=None,
                               extra={}, sortkey=None):
                      self.author = author or 'unknown'
                      self.date = date or '0 0'
                      self.desc = desc
                      self.parents = parents
                      self.branch = branch
                      self.rev = rev
                      self.extra = extra
                      self.sortkey = sortkey
              class converter_source(object):
                  """Conversion source interface"""
                  def __init__(self, ui, path=None, rev=None):
                      """Initialize conversion source (or raise NoRepo("message")
                      exception if path is not a valid repository)"""
                      self.ui = ui
                      self.path = path
                      self.rev = rev
                      self.encoding = 'utf-8'
                  def checkhexformat(self, revstr, mapname='splicemap'):
                      """ fails if revstr is not a 40 byte hex. mercurial and git both uses
                          such format for their revision numbering
                      """
                      if not re.match(r'[0-9a-fA-F]{40,40}$', revstr):
                          raise util.Abort(_('%s entry %s is not a valid revision'
                                             ' identifier') % (mapname, revstr))
                  def before(self):
                      pass
                  def after(self):
                      pass
                  def setrevmap(self, revmap):
                      """set the map of already-converted revisions"""
                      pass
                  def getheads(self):
                      """Return a list of this repository's heads"""
                      raise NotImplementedError
                  def getfile(self, name, rev):
                      """Return a pair (data, mode) where data is the file content
                      as a string and mode one of '', 'x' or 'l'. rev is the
                      identifier returned by a previous call to getchanges().
                      Data is None if file is missing/deleted in rev.
                      """
                      raise NotImplementedError
-                 def getchanges(self, version):
+                 def getchanges(self, version, full):
                      """Returns a tuple of (files, copies).
                      files is a sorted list of (filename, id) tuples for all files
                      changed between version and its first parent returned by
-                     getcommit(). id is the source revision id of the file.
+                     getcommit(). If full, all files in that revision is returned.
+                     id is the source revision id of the file.
                      copies is a dictionary of dest: source
                      """
                      raise NotImplementedError
                  def getcommit(self, version):
                      """Return the commit object for version"""
                      raise NotImplementedError
                  def gettags(self):
                      """Return the tags as a dictionary of name: revision
                      Tag names must be UTF-8 strings.
                      """
                      raise NotImplementedError
                  def recode(self, s, encoding=None):
                      if not encoding:
                          encoding = self.encoding or 'utf-8'
                      if isinstance(s, unicode):
                          return s.encode("utf-8")
                      try:
                          return s.decode(encoding).encode("utf-8")
                      except UnicodeError:
                          try:
                              return s.decode("latin-1").encode("utf-8")
                          except UnicodeError:
                              return s.decode(encoding, "replace").encode("utf-8")
                  def getchangedfiles(self, rev, i):
                      """Return the files changed by rev compared to parent[i].
                      i is an index selecting one of the parents of rev.  The return
                      value should be the list of files that are different in rev and
                      this parent.
                      If rev has no parents, i is None.
                      This function is only needed to support --filemap
                      """
                      raise NotImplementedError
                  def converted(self, rev, sinkrev):
                      '''Notify the source that a revision has been converted.'''
                      pass
                  def hasnativeorder(self):
                      """Return true if this source has a meaningful, native revision
                      order. For instance, Mercurial revisions are store sequentially
                      while there is no such global ordering with Darcs.
                      """
                      return False
                  def hasnativeclose(self):
                      """Return true if this source has ability to close branch.
                      """
                      return False
                  def lookuprev(self, rev):
                      """If rev is a meaningful revision reference in source, return
                      the referenced identifier in the same format used by getcommit().
                      return None otherwise.
                      """
                      return None
                  def getbookmarks(self):
                      """Return the bookmarks as a dictionary of name: revision
                      Bookmark names are to be UTF-8 strings.
                      """
                      return {}
                  def checkrevformat(self, revstr, mapname='splicemap'):
                      """revstr is a string that describes a revision in the given
                         source control system.  Return true if revstr has correct
                         format.
                      """
                      return True
              class converter_sink(object):
                  """Conversion sink (target) interface"""
                  def __init__(self, ui, path):
                      """Initialize conversion sink (or raise NoRepo("message")
                      exception if path is not a valid repository)
                      created is a list of paths to remove if a fatal error occurs
                      later"""
                      self.ui = ui
                      self.path = path
                      self.created = []
                  def revmapfile(self):
                      """Path to a file that will contain lines
                      source_rev_id sink_rev_id
                      mapping equivalent revision identifiers for each system."""
                      raise NotImplementedError
                  def authorfile(self):
                      """Path to a file that will contain lines
                      srcauthor=dstauthor
                      mapping equivalent authors identifiers for each system."""
                      return None
-                 def putcommit(self, files, copies, parents, commit, source, revmap):
+                 def putcommit(self, files, copies, parents, commit, source, revmap, full):
                      """Create a revision with all changed files listed in 'files'
                      and having listed parents. 'commit' is a commit object
                      containing at a minimum the author, date, and message for this
                      changeset.  'files' is a list of (path, version) tuples,
                      'copies' is a dictionary mapping destinations to sources,
                      'source' is the source repository, and 'revmap' is a mapfile
                      of source revisions to converted revisions. Only getfile() and
-                     lookuprev() should be called on 'source'.
+                     lookuprev() should be called on 'source'. 'full' means that 'files'
+                     is complete and all other files should be removed.
                      Note that the sink repository is not told to update itself to
                      a particular revision (or even what that revision would be)
                      before it receives the file data.
                      """
                      raise NotImplementedError
                  def puttags(self, tags):
                      """Put tags into sink.
                      tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string.
                      Return a pair (tag_revision, tag_parent_revision), or (None, None)
                      if nothing was changed.
                      """
                      raise NotImplementedError
                  def setbranch(self, branch, pbranches):
                      """Set the current branch name. Called before the first putcommit
                      on the branch.
                      branch: branch name for subsequent commits
                      pbranches: (converted parent revision, parent branch) tuples"""
                      pass
                  def setfilemapmode(self, active):
                      """Tell the destination that we're using a filemap
                      Some converter_sources (svn in particular) can claim that a file
                      was changed in a revision, even if there was no change.  This method
                      tells the destination that we're using a filemap and that it should
                      filter empty revisions.
                      """
                      pass
                  def before(self):
                      pass
                  def after(self):
                      pass
                  def putbookmarks(self, bookmarks):
                      """Put bookmarks into sink.
                      bookmarks: {bookmarkname: sink_rev_id, ...}
                      where bookmarkname is an UTF-8 string.
                      """
                      pass
                  def hascommitfrommap(self, rev):
                      """Return False if a rev mentioned in a filemap is known to not be
                      present."""
                      raise NotImplementedError
                  def hascommitforsplicemap(self, rev):
                      """This method is for the special needs for splicemap handling and not
                      for general use. Returns True if the sink contains rev, aborts on some
                      special cases."""
                      raise NotImplementedError
              class commandline(object):
                  def __init__(self, ui, command):
                      self.ui = ui
                      self.command = command
                  def prerun(self):
                      pass
                  def postrun(self):
                      pass
                  def _cmdline(self, cmd, *args, **kwargs):
                      cmdline = [self.command, cmd] + list(args)
                      for k, v in kwargs.iteritems():
                          if len(k) == 1:
                              cmdline.append('-' + k)
                          else:
                              cmdline.append('--' + k.replace('_', '-'))
                          try:
                              if len(k) == 1:
                                  cmdline.append('' + v)
                              else:
                                  cmdline[-1] += '=' + v
                          except TypeError:
                              pass
                      cmdline = [util.shellquote(arg) for arg in cmdline]
                      if not self.ui.debugflag:
                          cmdline += ['2>', os.devnull]
                      cmdline = ' '.join(cmdline)
                      return cmdline
                  def _run(self, cmd, *args, **kwargs):
                      def popen(cmdline):
                          p = subprocess.Popen(cmdline, shell=True, bufsize=-1,
                                  close_fds=util.closefds,
                                  stdout=subprocess.PIPE)
                          return p
                      return self._dorun(popen, cmd, *args, **kwargs)
                  def _run2(self, cmd, *args, **kwargs):
                      return self._dorun(util.popen2, cmd, *args, **kwargs)
                  def _dorun(self, openfunc, cmd,  *args, **kwargs):
                      cmdline = self._cmdline(cmd, *args, **kwargs)
                      self.ui.debug('running: %s\n' % (cmdline,))
                      self.prerun()
                      try:
                          return openfunc(cmdline)
                      finally:
                          self.postrun()
                  def run(self, cmd, *args, **kwargs):
                      p = self._run(cmd, *args, **kwargs)
                      output = p.communicate()[0]
                      self.ui.debug(output)
                      return output, p.returncode
                  def runlines(self, cmd, *args, **kwargs):
                      p = self._run(cmd, *args, **kwargs)
                      output = p.stdout.readlines()
                      p.wait()
                      self.ui.debug(''.join(output))
                      return output, p.returncode
                  def checkexit(self, status, output=''):
                      if status:
                          if output:
                              self.ui.warn(_('%s error:\n') % self.command)
                              self.ui.warn(output)
                          msg = util.explainexit(status)[0]
                          raise util.Abort('%s %s' % (self.command, msg))
                  def run0(self, cmd, *args, **kwargs):
                      output, status = self.run(cmd, *args, **kwargs)
                      self.checkexit(status, output)
                      return output
                  def runlines0(self, cmd, *args, **kwargs):
                      output, status = self.runlines(cmd, *args, **kwargs)
                      self.checkexit(status, ''.join(output))
                      return output
                  @propertycache
                  def argmax(self):
                      # POSIX requires at least 4096 bytes for ARG_MAX
                      argmax = 4096
                      try:
                          argmax = os.sysconf("SC_ARG_MAX")
                      except (AttributeError, ValueError):
                          pass
                      # Windows shells impose their own limits on command line length,
                      # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
                      # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
                      # details about cmd.exe limitations.
                      # Since ARG_MAX is for command line _and_ environment, lower our limit
                      # (and make happy Windows shells while doing this).
                      return argmax // 2 - 1
                  def _limit_arglist(self, arglist, cmd, *args, **kwargs):
                      cmdlen = len(self._cmdline(cmd, *args, **kwargs))
                      limit = self.argmax - cmdlen
                      bytes = 0
                      fl = []
                      for fn in arglist:
                          b = len(fn) + 3
                          if bytes + b < limit or len(fl) == 0:
                              fl.append(fn)
                              bytes += b
                          else:
                              yield fl
                              fl = [fn]
                              bytes = b
                      if fl:
                          yield fl
                  def xargs(self, arglist, cmd, *args, **kwargs):
                      for l in self._limit_arglist(arglist, cmd, *args, **kwargs):
                          self.run0(cmd, *(list(args) + l), **kwargs)
              class mapfile(dict):
                  def __init__(self, ui, path):
                      super(mapfile, self).__init__()
                      self.ui = ui
                      self.path = path
                      self.fp = None
                      self.order = []
                      self._read()
                  def _read(self):
                      if not self.path:
                          return
                      try:
                          fp = open(self.path, 'r')
                      except IOError, err:
                          if err.errno != errno.ENOENT:
                              raise
                          return
                      for i, line in enumerate(fp):
                          line = line.splitlines()[0].rstrip()
                          if not line:
                              # Ignore blank lines
                              continue
                          try:
                              key, value = line.rsplit(' ', 1)
                          except ValueError:
                              raise util.Abort(
                                  _('syntax error in %s(%d): key/value pair expected')
                                  % (self.path, i + 1))
                          if key not in self:
                              self.order.append(key)
                          super(mapfile, self).__setitem__(key, value)
                      fp.close()
                  def __setitem__(self, key, value):
                      if self.fp is None:
                          try:
                              self.fp = open(self.path, 'a')
                          except IOError, err:
                              raise util.Abort(_('could not open map file %r: %s') %
                                               (self.path, err.strerror))
                      self.fp.write('%s %s\n' % (key, value))
                      self.fp.flush()
                      super(mapfile, self).__setitem__(key, value)
                  def close(self):
                      if self.fp:
                          self.fp.close()
                          self.fp = None
              def makedatetimestamp(t):
                  """Like util.makedate() but for time t instead of current time"""
                  delta = (datetime.datetime.utcfromtimestamp(t) -
                           datetime.datetime.fromtimestamp(t))
                  tz = delta.days * 86400 + delta.seconds
                  return t, tz

hgext/convert/convcmd.py

0 +3 -3

              # convcmd - convert extension commands definition
              #
              # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from common import NoRepo, MissingTool, SKIPREV, mapfile
              from cvs import convert_cvs
              from darcs import darcs_source
              from git import convert_git
              from hg import mercurial_source, mercurial_sink
              from subversion import svn_source, svn_sink
              from monotone import monotone_source
              from gnuarch import gnuarch_source
              from bzr import bzr_source
              from p4 import p4_source
              import filemap
              import os, shutil, shlex
              from mercurial import hg, util, encoding
              from mercurial.i18n import _
              orig_encoding = 'ascii'
              def recode(s):
                  if isinstance(s, unicode):
                      return s.encode(orig_encoding, 'replace')
                  else:
                      return s.decode('utf-8').encode(orig_encoding, 'replace')
              source_converters = [
                  ('cvs', convert_cvs, 'branchsort'),
                  ('git', convert_git, 'branchsort'),
                  ('svn', svn_source, 'branchsort'),
                  ('hg', mercurial_source, 'sourcesort'),
                  ('darcs', darcs_source, 'branchsort'),
                  ('mtn', monotone_source, 'branchsort'),
                  ('gnuarch', gnuarch_source, 'branchsort'),
                  ('bzr', bzr_source, 'branchsort'),
                  ('p4', p4_source, 'branchsort'),
                  ]
              sink_converters = [
                  ('hg', mercurial_sink),
                  ('svn', svn_sink),
                  ]
              def convertsource(ui, path, type, rev):
                  exceptions = []
                  if type and type not in [s[0] for s in source_converters]:
                      raise util.Abort(_('%s: invalid source repository type') % type)
                  for name, source, sortmode in source_converters:
                      try:
                          if not type or name == type:
                              return source(ui, path, rev), sortmode
                      except (NoRepo, MissingTool), inst:
                          exceptions.append(inst)
                  if not ui.quiet:
                      for inst in exceptions:
                          ui.write("%s\n" % inst)
                  raise util.Abort(_('%s: missing or unsupported repository') % path)
              def convertsink(ui, path, type):
                  if type and type not in [s[0] for s in sink_converters]:
                      raise util.Abort(_('%s: invalid destination repository type') % type)
                  for name, sink in sink_converters:
                      try:
                          if not type or name == type:
                              return sink(ui, path)
                      except NoRepo, inst:
                          ui.note(_("convert: %s\n") % inst)
                      except MissingTool, inst:
                          raise util.Abort('%s\n' % inst)
                  raise util.Abort(_('%s: unknown repository type') % path)
              class progresssource(object):
                  def __init__(self, ui, source, filecount):
                      self.ui = ui
                      self.source = source
                      self.filecount = filecount
                      self.retrieved = 0
                  def getfile(self, file, rev):
                      self.retrieved += 1
                      self.ui.progress(_('getting files'), self.retrieved,
                                       item=file, total=self.filecount)
                      return self.source.getfile(file, rev)
                  def lookuprev(self, rev):
                      return self.source.lookuprev(rev)
                  def close(self):
                      self.ui.progress(_('getting files'), None)
              class converter(object):
                  def __init__(self, ui, source, dest, revmapfile, opts):
                      self.source = source
                      self.dest = dest
                      self.ui = ui
                      self.opts = opts
                      self.commitcache = {}
                      self.authors = {}
                      self.authorfile = None
                      # Record converted revisions persistently: maps source revision
                      # ID to target revision ID (both strings).  (This is how
                      # incremental conversions work.)
                      self.map = mapfile(ui, revmapfile)
                      # Read first the dst author map if any
                      authorfile = self.dest.authorfile()
                      if authorfile and os.path.exists(authorfile):
                          self.readauthormap(authorfile)
                      # Extend/Override with new author map if necessary
                      if opts.get('authormap'):
                          self.readauthormap(opts.get('authormap'))
                          self.authorfile = self.dest.authorfile()
                      self.splicemap = self.parsesplicemap(opts.get('splicemap'))
                      self.branchmap = mapfile(ui, opts.get('branchmap'))
                  def parsesplicemap(self, path):
                      """ check and validate the splicemap format and
                          return a child/parents dictionary.
                          Format checking has two parts.
 . generic format which is same across all source types
 . specific format checking which may be different for
                             different source type.  This logic is implemented in
                             checkrevformat function in source files like
                             hg.py, subversion.py etc.
                      """
                      if not path:
                          return {}
                      m = {}
                      try:
                          fp = open(path, 'r')
                          for i, line in enumerate(fp):
                              line = line.splitlines()[0].rstrip()
                              if not line:
                                  # Ignore blank lines
                                  continue
                              # split line
                              lex = shlex.shlex(line, posix=True)
                              lex.whitespace_split = True
                              lex.whitespace += ','
                              line = list(lex)
                              # check number of parents
                              if not (2 <= len(line) <= 3):
                                  raise util.Abort(_('syntax error in %s(%d): child parent1'
                                                     '[,parent2] expected') % (path, i + 1))
                              for part in line:
                                  self.source.checkrevformat(part)
                              child, p1, p2 = line[0], line[1:2], line[2:]
                              if p1 == p2:
                                  m[child] = p1
                              else:
                                  m[child] = p1 + p2
                       # if file does not exist or error reading, exit
                      except IOError:
                          raise util.Abort(_('splicemap file not found or error reading %s:')
                                             % path)
                      return m
                  def walktree(self, heads):
                      '''Return a mapping that identifies the uncommitted parents of every
                      uncommitted changeset.'''
                      visit = heads
                      known = set()
                      parents = {}
                      while visit:
                          n = visit.pop(0)
                          if n in known:
                              continue
                          if n in self.map:
                              m = self.map[n]
                              if m == SKIPREV or self.dest.hascommitfrommap(m):
                                  continue
                          known.add(n)
                          self.ui.progress(_('scanning'), len(known), unit=_('revisions'))
                          commit = self.cachecommit(n)
                          parents[n] = []
                          for p in commit.parents:
                              parents[n].append(p)
                              visit.append(p)
                      self.ui.progress(_('scanning'), None)
                      return parents
                  def mergesplicemap(self, parents, splicemap):
                      """A splicemap redefines child/parent relationships. Check the
                      map contains valid revision identifiers and merge the new
                      links in the source graph.
                      """
                      for c in sorted(splicemap):
                          if c not in parents:
                              if not self.dest.hascommitforsplicemap(self.map.get(c, c)):
                                  # Could be in source but not converted during this run
                                  self.ui.warn(_('splice map revision %s is not being '
                                                 'converted, ignoring\n') % c)
                              continue
                          pc = []
                          for p in splicemap[c]:
                              # We do not have to wait for nodes already in dest.
                              if self.dest.hascommitforsplicemap(self.map.get(p, p)):
                                  continue
                              # Parent is not in dest and not being converted, not good
                              if p not in parents:
                                  raise util.Abort(_('unknown splice map parent: %s') % p)
                              pc.append(p)
                          parents[c] = pc
                  def toposort(self, parents, sortmode):
                      '''Return an ordering such that every uncommitted changeset is
                      preceded by all its uncommitted ancestors.'''
                      def mapchildren(parents):
                          """Return a (children, roots) tuple where 'children' maps parent
                          revision identifiers to children ones, and 'roots' is the list of
                          revisions without parents. 'parents' must be a mapping of revision
                          identifier to its parents ones.
                          """
                          visit = sorted(parents)
                          seen = set()
                          children = {}
                          roots = []
                          while visit:
                              n = visit.pop(0)
                              if n in seen:
                                  continue
                              seen.add(n)
                              # Ensure that nodes without parents are present in the
                              # 'children' mapping.
                              children.setdefault(n, [])
                              hasparent = False
                              for p in parents[n]:
                                  if p not in self.map:
                                      visit.append(p)
                                      hasparent = True
                                  children.setdefault(p, []).append(n)
                              if not hasparent:
                                  roots.append(n)
                          return children, roots
                      # Sort functions are supposed to take a list of revisions which
                      # can be converted immediately and pick one
                      def makebranchsorter():
                          """If the previously converted revision has a child in the
                          eligible revisions list, pick it. Return the list head
                          otherwise. Branch sort attempts to minimize branch
                          switching, which is harmful for Mercurial backend
                          compression.
                          """
                          prev = [None]
                          def picknext(nodes):
                              next = nodes[0]
                              for n in nodes:
                                  if prev[0] in parents[n]:
                                      next = n
                                      break
                              prev[0] = next
                              return next
                          return picknext
                      def makesourcesorter():
                          """Source specific sort."""
                          keyfn = lambda n: self.commitcache[n].sortkey
                          def picknext(nodes):
                              return sorted(nodes, key=keyfn)[0]
                          return picknext
                      def makeclosesorter():
                          """Close order sort."""
                          keyfn = lambda n: ('close' not in self.commitcache[n].extra,
                                             self.commitcache[n].sortkey)
                          def picknext(nodes):
                              return sorted(nodes, key=keyfn)[0]
                          return picknext
                      def makedatesorter():
                          """Sort revisions by date."""
                          dates = {}
                          def getdate(n):
                              if n not in dates:
                                  dates[n] = util.parsedate(self.commitcache[n].date)
                              return dates[n]
                          def picknext(nodes):
                              return min([(getdate(n), n) for n in nodes])[1]
                          return picknext
                      if sortmode == 'branchsort':
                          picknext = makebranchsorter()
                      elif sortmode == 'datesort':
                          picknext = makedatesorter()
                      elif sortmode == 'sourcesort':
                          picknext = makesourcesorter()
                      elif sortmode == 'closesort':
                          picknext = makeclosesorter()
                      else:
                          raise util.Abort(_('unknown sort mode: %s') % sortmode)
                      children, actives = mapchildren(parents)
                      s = []
                      pendings = {}
                      while actives:
                          n = picknext(actives)
                          actives.remove(n)
                          s.append(n)
                          # Update dependents list
                          for c in children.get(n, []):
                              if c not in pendings:
                                  pendings[c] = [p for p in parents[c] if p not in self.map]
                              try:
                                  pendings[c].remove(n)
                              except ValueError:
                                  raise util.Abort(_('cycle detected between %s and %s')
                                                     % (recode(c), recode(n)))
                              if not pendings[c]:
                                  # Parents are converted, node is eligible
                                  actives.insert(0, c)
                                  pendings[c] = None
                      if len(s) != len(parents):
                          raise util.Abort(_("not all revisions were sorted"))
                      return s
                  def writeauthormap(self):
                      authorfile = self.authorfile
                      if authorfile:
                          self.ui.status(_('writing author map file %s\n') % authorfile)
                          ofile = open(authorfile, 'w+')
                          for author in self.authors:
                              ofile.write("%s=%s\n" % (author, self.authors[author]))
                          ofile.close()
                  def readauthormap(self, authorfile):
                      afile = open(authorfile, 'r')
                      for line in afile:
                          line = line.strip()
                          if not line or line.startswith('#'):
                              continue
                          try:
                              srcauthor, dstauthor = line.split('=', 1)
                          except ValueError:
                              msg = _('ignoring bad line in author map file %s: %s\n')
                              self.ui.warn(msg % (authorfile, line.rstrip()))
                              continue
                          srcauthor = srcauthor.strip()
                          dstauthor = dstauthor.strip()
                          if self.authors.get(srcauthor) in (None, dstauthor):
                              msg = _('mapping author %s to %s\n')
                              self.ui.debug(msg % (srcauthor, dstauthor))
                              self.authors[srcauthor] = dstauthor
                              continue
                          m = _('overriding mapping for author %s, was %s, will be %s\n')
                          self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor))
                      afile.close()
                  def cachecommit(self, rev):
                      commit = self.source.getcommit(rev)
                      commit.author = self.authors.get(commit.author, commit.author)
                      # If commit.branch is None, this commit is coming from the source
                      # repository's default branch and destined for the default branch in the
                      # destination repository. For such commits, passing a literal "None"
                      # string to branchmap.get() below allows the user to map "None" to an
                      # alternate default branch in the destination repository.
                      commit.branch = self.branchmap.get(str(commit.branch), commit.branch)
                      self.commitcache[rev] = commit
                      return commit
                  def copy(self, rev):
                      commit = self.commitcache[rev]
-                     changes = self.source.getchanges(rev)
+                     full = self.opts.get('full')
+                     changes = self.source.getchanges(rev, full)
                      if isinstance(changes, basestring):
                          if changes == SKIPREV:
                              dest = SKIPREV
                          else:
                              dest = self.map[changes]
                          self.map[rev] = dest
                          return
                      files, copies = changes
                      pbranches = []
                      if commit.parents:
                          for prev in commit.parents:
                              if prev not in self.commitcache:
                                  self.cachecommit(prev)
                              pbranches.append((self.map[prev],
                                                self.commitcache[prev].branch))
                      self.dest.setbranch(commit.branch, pbranches)
                      try:
                          parents = self.splicemap[rev]
                          self.ui.status(_('spliced in %s as parents of %s\n') %
                                         (parents, rev))
                          parents = [self.map.get(p, p) for p in parents]
                      except KeyError:
                          parents = [b[0] for b in pbranches]
                      source = progresssource(self.ui, self.source, len(files))
                      newnode = self.dest.putcommit(files, copies, parents, commit,
-                                                   source, self.map)
+                                                   source, self.map, full)
                      source.close()
                      self.source.converted(rev, newnode)
                      self.map[rev] = newnode
                  def convert(self, sortmode):
                      try:
                          self.source.before()
                          self.dest.before()
                          self.source.setrevmap(self.map)
                          self.ui.status(_("scanning source...\n"))
                          heads = self.source.getheads()
                          parents = self.walktree(heads)
                          self.mergesplicemap(parents, self.splicemap)
                          self.ui.status(_("sorting...\n"))
                          t = self.toposort(parents, sortmode)
                          num = len(t)
                          c = None
                          self.ui.status(_("converting...\n"))
                          for i, c in enumerate(t):
                              num -= 1
                              desc = self.commitcache[c].desc
                              if "\n" in desc:
                                  desc = desc.splitlines()[0]
                              # convert log message to local encoding without using
                              # tolocal() because the encoding.encoding convert()
                              # uses is 'utf-8'
                              self.ui.status("%d %s\n" % (num, recode(desc)))
                              self.ui.note(_("source: %s\n") % recode(c))
                              self.ui.progress(_('converting'), i, unit=_('revisions'),
                                               total=len(t))
                              self.copy(c)
                          self.ui.progress(_('converting'), None)
                          tags = self.source.gettags()
                          ctags = {}
                          for k in tags:
                              v = tags[k]
                              if self.map.get(v, SKIPREV) != SKIPREV:
                                  ctags[k] = self.map[v]
                          if c and ctags:
                              nrev, tagsparent = self.dest.puttags(ctags)
                              if nrev and tagsparent:
                                  # write another hash correspondence to override the previous
                                  # one so we don't end up with extra tag heads
                                  tagsparents = [e for e in self.map.iteritems()
                                                 if e[1] == tagsparent]
                                  if tagsparents:
                                      self.map[tagsparents[0][0]] = nrev
                          bookmarks = self.source.getbookmarks()
                          cbookmarks = {}
                          for k in bookmarks:
                              v = bookmarks[k]
                              if self.map.get(v, SKIPREV) != SKIPREV:
                                  cbookmarks[k] = self.map[v]
                          if c and cbookmarks:
                              self.dest.putbookmarks(cbookmarks)
                          self.writeauthormap()
                      finally:
                          self.cleanup()
                  def cleanup(self):
                      try:
                          self.dest.after()
                      finally:
                          self.source.after()
                      self.map.close()
              def convert(ui, src, dest=None, revmapfile=None, **opts):
                  global orig_encoding
                  orig_encoding = encoding.encoding
                  encoding.encoding = 'UTF-8'
                  # support --authors as an alias for --authormap
                  if not opts.get('authormap'):
                      opts['authormap'] = opts.get('authors')
                  if not dest:
                      dest = hg.defaultdest(src) + "-hg"
                      ui.status(_("assuming destination %s\n") % dest)
                  destc = convertsink(ui, dest, opts.get('dest_type'))
                  try:
                      srcc, defaultsort = convertsource(ui, src, opts.get('source_type'),
                                                        opts.get('rev'))
                  except Exception:
                      for path in destc.created:
                          shutil.rmtree(path, True)
                      raise
                  sortmodes = ('branchsort', 'datesort', 'sourcesort', 'closesort')
                  sortmode = [m for m in sortmodes if opts.get(m)]
                  if len(sortmode) > 1:
                      raise util.Abort(_('more than one sort mode specified'))
                  sortmode = sortmode and sortmode[0] or defaultsort
                  if sortmode == 'sourcesort' and not srcc.hasnativeorder():
                      raise util.Abort(_('--sourcesort is not supported by this data source'))
                  if sortmode == 'closesort' and not srcc.hasnativeclose():
                      raise util.Abort(_('--closesort is not supported by this data source'))
                  fmap = opts.get('filemap')
                  if fmap:
                      srcc = filemap.filemap_source(ui, srcc, fmap)
                      destc.setfilemapmode(True)
                  if not revmapfile:
                      revmapfile = destc.revmapfile()
                  c = converter(ui, srcc, destc, revmapfile, opts)
                  c.convert(sortmode)

hgext/convert/cvs.py

0 +3 -1

              # cvs.py: CVS conversion code inspired by hg-cvs-import and git-cvsimport
              #
              #  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import os, re, socket, errno
              from cStringIO import StringIO
              from mercurial import encoding, util
              from mercurial.i18n import _
              from common import NoRepo, commit, converter_source, checktool
              from common import makedatetimestamp
              import cvsps
              class convert_cvs(converter_source):
                  def __init__(self, ui, path, rev=None):
                      super(convert_cvs, self).__init__(ui, path, rev=rev)
                      cvs = os.path.join(path, "CVS")
                      if not os.path.exists(cvs):
                          raise NoRepo(_("%s does not look like a CVS checkout") % path)
                      checktool('cvs')
                      self.changeset = None
                      self.files = {}
                      self.tags = {}
                      self.lastbranch = {}
                      self.socket = None
                      self.cvsroot = open(os.path.join(cvs, "Root")).read()[:-1]
                      self.cvsrepo = open(os.path.join(cvs, "Repository")).read()[:-1]
                      self.encoding = encoding.encoding
                      self._connect()
                  def _parse(self):
                      if self.changeset is not None:
                          return
                      self.changeset = {}
                      maxrev = 0
                      if self.rev:
                          # TODO: handle tags
                          try:
                              # patchset number?
                              maxrev = int(self.rev)
                          except ValueError:
                              raise util.Abort(_('revision %s is not a patchset number')
                                               % self.rev)
                      d = os.getcwd()
                      try:
                          os.chdir(self.path)
                          id = None
                          cache = 'update'
                          if not self.ui.configbool('convert', 'cvsps.cache', True):
                              cache = None
                          db = cvsps.createlog(self.ui, cache=cache)
                          db = cvsps.createchangeset(self.ui, db,
                              fuzz=int(self.ui.config('convert', 'cvsps.fuzz', 60)),
                              mergeto=self.ui.config('convert', 'cvsps.mergeto', None),
                              mergefrom=self.ui.config('convert', 'cvsps.mergefrom', None))
                          for cs in db:
                              if maxrev and cs.id > maxrev:
                                  break
                              id = str(cs.id)
                              cs.author = self.recode(cs.author)
                              self.lastbranch[cs.branch] = id
                              cs.comment = self.recode(cs.comment)
                              if self.ui.configbool('convert', 'localtimezone'):
                                  cs.date = makedatetimestamp(cs.date[0])
                              date = util.datestr(cs.date, '%Y-%m-%d %H:%M:%S %1%2')
                              self.tags.update(dict.fromkeys(cs.tags, id))
                              files = {}
                              for f in cs.entries:
                                  files[f.file] = "%s%s" % ('.'.join([str(x)
                                                                      for x in f.revision]),
                                                            ['', '(DEAD)'][f.dead])
                              # add current commit to set
                              c = commit(author=cs.author, date=date,
                                         parents=[str(p.id) for p in cs.parents],
                                         desc=cs.comment, branch=cs.branch or '')
                              self.changeset[id] = c
                              self.files[id] = files
                          self.heads = self.lastbranch.values()
                      finally:
                          os.chdir(d)
                  def _connect(self):
                      root = self.cvsroot
                      conntype = None
                      user, host = None, None
                      cmd = ['cvs', 'server']
                      self.ui.status(_("connecting to %s\n") % root)
                      if root.startswith(":pserver:"):
                          root = root[9:]
                          m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
                                       root)
                          if m:
                              conntype = "pserver"
                              user, passw, serv, port, root = m.groups()
                              if not user:
                                  user = "anonymous"
                              if not port:
                                  port = 2401
                              else:
                                  port = int(port)
                              format0 = ":pserver:%s@%s:%s" % (user, serv, root)
                              format1 = ":pserver:%s@%s:%d%s" % (user, serv, port, root)
                              if not passw:
                                  passw = "A"
                                  cvspass = os.path.expanduser("~/.cvspass")
                                  try:
                                      pf = open(cvspass)
                                      for line in pf.read().splitlines():
                                          part1, part2 = line.split(' ', 1)
                                          # /1 :pserver:user@example.com:2401/cvsroot/foo
                                          # Ah<Z
                                          if part1 == '/1':
                                              part1, part2 = part2.split(' ', 1)
                                              format = format1
                                          # :pserver:user@example.com:/cvsroot/foo Ah<Z
                                          else:
                                              format = format0
                                          if part1 == format:
                                              passw = part2
                                              break
                                      pf.close()
                                  except IOError, inst:
                                      if inst.errno != errno.ENOENT:
                                          if not getattr(inst, 'filename', None):
                                              inst.filename = cvspass
                                          raise
                              sck = socket.socket()
                              sck.connect((serv, port))
                              sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
                                                  "END AUTH REQUEST", ""]))
                              if sck.recv(128) != "I LOVE YOU\n":
                                  raise util.Abort(_("CVS pserver authentication failed"))
                              self.writep = self.readp = sck.makefile('r+')
                      if not conntype and root.startswith(":local:"):
                          conntype = "local"
                          root = root[7:]
                      if not conntype:
                          # :ext:user@host/home/user/path/to/cvsroot
                          if root.startswith(":ext:"):
                              root = root[5:]
                          m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
                          # Do not take Windows path "c:\foo\bar" for a connection strings
                          if os.path.isdir(root) or not m:
                              conntype = "local"
                          else:
                              conntype = "rsh"
                              user, host, root = m.group(1), m.group(2), m.group(3)
                      if conntype != "pserver":
                          if conntype == "rsh":
                              rsh = os.environ.get("CVS_RSH") or "ssh"
                              if user:
                                  cmd = [rsh, '-l', user, host] + cmd
                              else:
                                  cmd = [rsh, host] + cmd
                          # popen2 does not support argument lists under Windows
                          cmd = [util.shellquote(arg) for arg in cmd]
                          cmd = util.quotecommand(' '.join(cmd))
                          self.writep, self.readp = util.popen2(cmd)
                      self.realroot = root
                      self.writep.write("Root %s\n" % root)
                      self.writep.write("Valid-responses ok error Valid-requests Mode"
                                        " M Mbinary E Checked-in Created Updated"
                                        " Merged Removed\n")
                      self.writep.write("valid-requests\n")
                      self.writep.flush()
                      r = self.readp.readline()
                      if not r.startswith("Valid-requests"):
                          raise util.Abort(_('unexpected response from CVS server '
                                             '(expected "Valid-requests", but got %r)')
                                           % r)
                      if "UseUnchanged" in r:
                          self.writep.write("UseUnchanged\n")
                          self.writep.flush()
                          r = self.readp.readline()
                  def getheads(self):
                      self._parse()
                      return self.heads
                  def getfile(self, name, rev):
                      def chunkedread(fp, count):
                          # file-objects returned by socket.makefile() do not handle
                          # large read() requests very well.
                          chunksize = 65536
                          output = StringIO()
                          while count > 0:
                              data = fp.read(min(count, chunksize))
                              if not data:
                                  raise util.Abort(_("%d bytes missing from remote file")
                                                   % count)
                              count -= len(data)
                              output.write(data)
                          return output.getvalue()
                      self._parse()
                      if rev.endswith("(DEAD)"):
                          return None, None
                      args = ("-N -P -kk -r %s --" % rev).split()
                      args.append(self.cvsrepo + '/' + name)
                      for x in args:
                          self.writep.write("Argument %s\n" % x)
                      self.writep.write("Directory .\n%s\nco\n" % self.realroot)
                      self.writep.flush()
                      data = ""
                      mode = None
                      while True:
                          line = self.readp.readline()
                          if line.startswith("Created ") or line.startswith("Updated "):
                              self.readp.readline() # path
                              self.readp.readline() # entries
                              mode = self.readp.readline()[:-1]
                              count = int(self.readp.readline()[:-1])
                              data = chunkedread(self.readp, count)
                          elif line.startswith(" "):
                              data += line[1:]
                          elif line.startswith("M "):
                              pass
                          elif line.startswith("Mbinary "):
                              count = int(self.readp.readline()[:-1])
                              data = chunkedread(self.readp, count)
                          else:
                              if line == "ok\n":
                                  if mode is None:
                                      raise util.Abort(_('malformed response from CVS'))
                                  return (data, "x" in mode and "x" or "")
                              elif line.startswith("E "):
                                  self.ui.warn(_("cvs server: %s\n") % line[2:])
                              elif line.startswith("Remove"):
                                  self.readp.readline()
                              else:
                                  raise util.Abort(_("unknown CVS response: %s") % line)
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
+                     if full:
+                         raise util.Abort(_("convert from cvs do not support --full"))
                      self._parse()
                      return sorted(self.files[rev].iteritems()), {}
                  def getcommit(self, rev):
                      self._parse()
                      return self.changeset[rev]
                  def gettags(self):
                      self._parse()
                      return self.tags
                  def getchangedfiles(self, rev, i):
                      self._parse()
                      return sorted(self.files[rev])

hgext/convert/darcs.py

0 +3 -1

              # darcs.py - darcs support for the convert extension
              #
              #  Copyright 2007-2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from common import NoRepo, checktool, commandline, commit, converter_source
              from mercurial.i18n import _
              from mercurial import util
              import os, shutil, tempfile, re, errno
              # The naming drift of ElementTree is fun!
              try:
                  from xml.etree.cElementTree import ElementTree, XMLParser
              except ImportError:
                  try:
                      from xml.etree.ElementTree import ElementTree, XMLParser
                  except ImportError:
                      try:
                          from elementtree.cElementTree import ElementTree, XMLParser
                      except ImportError:
                          try:
                              from elementtree.ElementTree import ElementTree, XMLParser
                          except ImportError:
                              pass
              class darcs_source(converter_source, commandline):
                  def __init__(self, ui, path, rev=None):
                      converter_source.__init__(self, ui, path, rev=rev)
                      commandline.__init__(self, ui, 'darcs')
                      # check for _darcs, ElementTree so that we can easily skip
                      # test-convert-darcs if ElementTree is not around
                      if not os.path.exists(os.path.join(path, '_darcs')):
                          raise NoRepo(_("%s does not look like a darcs repository") % path)
                      checktool('darcs')
                      version = self.run0('--version').splitlines()[0].strip()
                      if version < '2.1':
                          raise util.Abort(_('darcs version 2.1 or newer needed (found %r)') %
                                           version)
                      if "ElementTree" not in globals():
                          raise util.Abort(_("Python ElementTree module is not available"))
                      self.path = os.path.realpath(path)
                      self.lastrev = None
                      self.changes = {}
                      self.parents = {}
                      self.tags = {}
                      # Check darcs repository format
                      format = self.format()
                      if format:
                          if format in ('darcs-1.0', 'hashed'):
                              raise NoRepo(_("%s repository format is unsupported, "
                                             "please upgrade") % format)
                      else:
                          self.ui.warn(_('failed to detect repository format!'))
                  def before(self):
                      self.tmppath = tempfile.mkdtemp(
                          prefix='convert-' + os.path.basename(self.path) + '-')
                      output, status = self.run('init', repodir=self.tmppath)
                      self.checkexit(status)
                      tree = self.xml('changes', xml_output=True, summary=True,
                                      repodir=self.path)
                      tagname = None
                      child = None
                      for elt in tree.findall('patch'):
                          node = elt.get('hash')
                          name = elt.findtext('name', '')
                          if name.startswith('TAG '):
                              tagname = name[4:].strip()
                          elif tagname is not None:
                              self.tags[tagname] = node
                              tagname = None
                          self.changes[node] = elt
                          self.parents[child] = [node]
                          child = node
                      self.parents[child] = []
                  def after(self):
                      self.ui.debug('cleaning up %s\n' % self.tmppath)
                      shutil.rmtree(self.tmppath, ignore_errors=True)
                  def recode(self, s, encoding=None):
                      if isinstance(s, unicode):
                          # XMLParser returns unicode objects for anything it can't
                          # encode into ASCII. We convert them back to str to get
                          # recode's normal conversion behavior.
                          s = s.encode('latin-1')
                      return super(darcs_source, self).recode(s, encoding)
                  def xml(self, cmd, **kwargs):
                      # NOTE: darcs is currently encoding agnostic and will print
                      # patch metadata byte-for-byte, even in the XML changelog.
                      etree = ElementTree()
                      # While we are decoding the XML as latin-1 to be as liberal as
                      # possible, etree will still raise an exception if any
                      # non-printable characters are in the XML changelog.
                      parser = XMLParser(encoding='latin-1')
                      p = self._run(cmd, **kwargs)
                      etree.parse(p.stdout, parser=parser)
                      p.wait()
                      self.checkexit(p.returncode)
                      return etree.getroot()
                  def format(self):
                      output, status = self.run('show', 'repo', no_files=True,
                                                repodir=self.path)
                      self.checkexit(status)
                      m = re.search(r'^\s*Format:\s*(.*)$', output, re.MULTILINE)
                      if not m:
                          return None
                      return ','.join(sorted(f.strip() for f in m.group(1).split(',')))
                  def manifest(self):
                      man = []
                      output, status = self.run('show', 'files', no_directories=True,
                                                repodir=self.tmppath)
                      self.checkexit(status)
                      for line in output.split('\n'):
                          path = line[2:]
                          if path:
                              man.append(path)
                      return man
                  def getheads(self):
                      return self.parents[None]
                  def getcommit(self, rev):
                      elt = self.changes[rev]
                      date = util.strdate(elt.get('local_date'), '%a %b %d %H:%M:%S %Z %Y')
                      desc = elt.findtext('name') + '\n' + elt.findtext('comment', '')
                      # etree can return unicode objects for name, comment, and author,
                      # so recode() is used to ensure str objects are emitted.
                      return commit(author=self.recode(elt.get('author')),
                                    date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
                                    desc=self.recode(desc).strip(),
                                    parents=self.parents[rev])
                  def pull(self, rev):
                      output, status = self.run('pull', self.path, all=True,
                                                match='hash %s' % rev,
                                                no_test=True, no_posthook=True,
                                                external_merge='/bin/false',
                                                repodir=self.tmppath)
                      if status:
                          if output.find('We have conflicts in') == -1:
                              self.checkexit(status, output)
                          output, status = self.run('revert', all=True, repodir=self.tmppath)
                          self.checkexit(status, output)
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
+                     if full:
+                         raise util.Abort(_("convert from darcs do not support --full"))
                      copies = {}
                      changes = []
                      man = None
                      for elt in self.changes[rev].find('summary').getchildren():
                          if elt.tag in ('add_directory', 'remove_directory'):
                              continue
                          if elt.tag == 'move':
                              if man is None:
                                  man = self.manifest()
                              source, dest = elt.get('from'), elt.get('to')
                              if source in man:
                                  # File move
                                  changes.append((source, rev))
                                  changes.append((dest, rev))
                                  copies[dest] = source
                              else:
                                  # Directory move, deduce file moves from manifest
                                  source = source + '/'
                                  for f in man:
                                      if not f.startswith(source):
                                          continue
                                      fdest = dest + '/' + f[len(source):]
                                      changes.append((f, rev))
                                      changes.append((fdest, rev))
                                      copies[fdest] = f
                          else:
                              changes.append((elt.text.strip(), rev))
                      self.pull(rev)
                      self.lastrev = rev
                      return sorted(changes), copies
                  def getfile(self, name, rev):
                      if rev != self.lastrev:
                          raise util.Abort(_('internal calling inconsistency'))
                      path = os.path.join(self.tmppath, name)
                      try:
                          data = util.readfile(path)
                          mode = os.lstat(path).st_mode
                      except IOError, inst:
                          if inst.errno == errno.ENOENT:
                              return None, None
                          raise
                      mode = (mode & 0111) and 'x' or ''
                      return data, mode
                  def gettags(self):
                      return self.tags

hgext/convert/filemap.py

0 +2 -2

              # Copyright 2007 Bryan O'Sullivan <bos@serpentine.com>
              # Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import posixpath
              import shlex
              from mercurial.i18n import _
              from mercurial import util, error
              from common import SKIPREV, converter_source
              def rpairs(path):
                  '''Yield tuples with path split at '/', starting with the full path.
                  No leading, trailing or double '/', please.
                  >>> for x in rpairs('foo/bar/baz'): print x
                  ('foo/bar/baz', '')
                  ('foo/bar', 'baz')
                  ('foo', 'bar/baz')
                  ('.', 'foo/bar/baz')
                  '''
                  i = len(path)
                  while i != -1:
                      yield path[:i], path[i + 1:]
                      i = path.rfind('/', 0, i)
                  yield '.', path
              def normalize(path):
                  ''' We use posixpath.normpath to support cross-platform path format.
                  However, it doesn't handle None input. So we wrap it up. '''
                  if path is None:
                      return None
                  return posixpath.normpath(path)
              class filemapper(object):
                  '''Map and filter filenames when importing.
                  A name can be mapped to itself, a new name, or None (omit from new
                  repository).'''
                  def __init__(self, ui, path=None):
                      self.ui = ui
                      self.include = {}
                      self.exclude = {}
                      self.rename = {}
                      if path:
                          if self.parse(path):
                              raise util.Abort(_('errors in filemap'))
                  def parse(self, path):
                      errs = 0
                      def check(name, mapping, listname):
                          if not name:
                              self.ui.warn(_('%s:%d: path to %s is missing\n') %
                                           (lex.infile, lex.lineno, listname))
                              return 1
                          if name in mapping:
                              self.ui.warn(_('%s:%d: %r already in %s list\n') %
                                           (lex.infile, lex.lineno, name, listname))
                              return 1
                          if (name.startswith('/') or
                              name.endswith('/') or
                              '//' in name):
                              self.ui.warn(_('%s:%d: superfluous / in %s %r\n') %
                                           (lex.infile, lex.lineno, listname, name))
                              return 1
                          return 0
                      lex = shlex.shlex(open(path), path, True)
                      lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
                      cmd = lex.get_token()
                      while cmd:
                          if cmd == 'include':
                              name = normalize(lex.get_token())
                              errs += check(name, self.exclude, 'exclude')
                              self.include[name] = name
                          elif cmd == 'exclude':
                              name = normalize(lex.get_token())
                              errs += check(name, self.include, 'include')
                              errs += check(name, self.rename, 'rename')
                              self.exclude[name] = name
                          elif cmd == 'rename':
                              src = normalize(lex.get_token())
                              dest = normalize(lex.get_token())
                              errs += check(src, self.exclude, 'exclude')
                              self.rename[src] = dest
                          elif cmd == 'source':
                              errs += self.parse(normalize(lex.get_token()))
                          else:
                              self.ui.warn(_('%s:%d: unknown directive %r\n') %
                                           (lex.infile, lex.lineno, cmd))
                              errs += 1
                          cmd = lex.get_token()
                      return errs
                  def lookup(self, name, mapping):
                      name = normalize(name)
                      for pre, suf in rpairs(name):
                          try:
                              return mapping[pre], pre, suf
                          except KeyError:
                              pass
                      return '', name, ''
                  def __call__(self, name):
                      if self.include:
                          inc = self.lookup(name, self.include)[0]
                      else:
                          inc = name
                      if self.exclude:
                          exc = self.lookup(name, self.exclude)[0]
                      else:
                          exc = ''
                      if (not self.include and exc) or (len(inc) <= len(exc)):
                          return None
                      newpre, pre, suf = self.lookup(name, self.rename)
                      if newpre:
                          if newpre == '.':
                              return suf
                          if suf:
                              if newpre.endswith('/'):
                                  return newpre + suf
                              return newpre + '/' + suf
                          return newpre
                      return name
                  def active(self):
                      return bool(self.include or self.exclude or self.rename)
              # This class does two additional things compared to a regular source:
              #
              # - Filter and rename files.  This is mostly wrapped by the filemapper
              #   class above. We hide the original filename in the revision that is
              #   returned by getchanges to be able to find things later in getfile.
              #
              # - Return only revisions that matter for the files we're interested in.
              #   This involves rewriting the parents of the original revision to
              #   create a graph that is restricted to those revisions.
              #
              #   This set of revisions includes not only revisions that directly
              #   touch files we're interested in, but also merges that merge two
              #   or more interesting revisions.
              class filemap_source(converter_source):
                  def __init__(self, ui, baseconverter, filemap):
                      super(filemap_source, self).__init__(ui)
                      self.base = baseconverter
                      self.filemapper = filemapper(ui, filemap)
                      self.commits = {}
                      # if a revision rev has parent p in the original revision graph, then
                      # rev will have parent self.parentmap[p] in the restricted graph.
                      self.parentmap = {}
                      # self.wantedancestors[rev] is the set of all ancestors of rev that
                      # are in the restricted graph.
                      self.wantedancestors = {}
                      self.convertedorder = None
                      self._rebuilt = False
                      self.origparents = {}
                      self.children = {}
                      self.seenchildren = {}
                  def before(self):
                      self.base.before()
                  def after(self):
                      self.base.after()
                  def setrevmap(self, revmap):
                      # rebuild our state to make things restartable
                      #
                      # To avoid calling getcommit for every revision that has already
                      # been converted, we rebuild only the parentmap, delaying the
                      # rebuild of wantedancestors until we need it (i.e. until a
                      # merge).
                      #
                      # We assume the order argument lists the revisions in
                      # topological order, so that we can infer which revisions were
                      # wanted by previous runs.
                      self._rebuilt = not revmap
                      seen = {SKIPREV: SKIPREV}
                      dummyset = set()
                      converted = []
                      for rev in revmap.order:
                          mapped = revmap[rev]
                          wanted = mapped not in seen
                          if wanted:
                              seen[mapped] = rev
                              self.parentmap[rev] = rev
                          else:
                              self.parentmap[rev] = seen[mapped]
                          self.wantedancestors[rev] = dummyset
                          arg = seen[mapped]
                          if arg == SKIPREV:
                              arg = None
                          converted.append((rev, wanted, arg))
                      self.convertedorder = converted
                      return self.base.setrevmap(revmap)
                  def rebuild(self):
                      if self._rebuilt:
                          return True
                      self._rebuilt = True
                      self.parentmap.clear()
                      self.wantedancestors.clear()
                      self.seenchildren.clear()
                      for rev, wanted, arg in self.convertedorder:
                          if rev not in self.origparents:
                              try:
                                  self.origparents[rev] = self.getcommit(rev).parents
                              except error.RepoLookupError:
                                  self.ui.debug("unknown revmap source: %s\n" % rev)
                                  continue
                          if arg is not None:
                              self.children[arg] = self.children.get(arg, 0) + 1
                      for rev, wanted, arg in self.convertedorder:
                          try:
                              parents = self.origparents[rev]
                          except KeyError:
                              continue # unknown revmap source
                          if wanted:
                              self.mark_wanted(rev, parents)
                          else:
                              self.mark_not_wanted(rev, arg)
                          self._discard(arg, *parents)
                      return True
                  def getheads(self):
                      return self.base.getheads()
                  def getcommit(self, rev):
                      # We want to save a reference to the commit objects to be able
                      # to rewrite their parents later on.
                      c = self.commits[rev] = self.base.getcommit(rev)
                      for p in c.parents:
                          self.children[p] = self.children.get(p, 0) + 1
                      return c
                  def _cachedcommit(self, rev):
                      if rev in self.commits:
                          return self.commits[rev]
                      return self.base.getcommit(rev)
                  def _discard(self, *revs):
                      for r in revs:
                          if r is None:
                              continue
                          self.seenchildren[r] = self.seenchildren.get(r, 0) + 1
                          if self.seenchildren[r] == self.children[r]:
                              self.wantedancestors.pop(r, None)
                              self.parentmap.pop(r, None)
                              del self.seenchildren[r]
                              if self._rebuilt:
                                  del self.children[r]
                  def wanted(self, rev, i):
                      # Return True if we're directly interested in rev.
                      #
                      # i is an index selecting one of the parents of rev (if rev
                      # has no parents, i is None).  getchangedfiles will give us
                      # the list of files that are different in rev and in the parent
                      # indicated by i.  If we're interested in any of these files,
                      # we're interested in rev.
                      try:
                          files = self.base.getchangedfiles(rev, i)
                      except NotImplementedError:
                          raise util.Abort(_("source repository doesn't support --filemap"))
                      for f in files:
                          if self.filemapper(f):
                              return True
                      return False
                  def mark_not_wanted(self, rev, p):
                      # Mark rev as not interesting and update data structures.
                      if p is None:
                          # A root revision. Use SKIPREV to indicate that it doesn't
                          # map to any revision in the restricted graph.  Put SKIPREV
                          # in the set of wanted ancestors to simplify code elsewhere
                          self.parentmap[rev] = SKIPREV
                          self.wantedancestors[rev] = set((SKIPREV,))
                          return
                      # Reuse the data from our parent.
                      self.parentmap[rev] = self.parentmap[p]
                      self.wantedancestors[rev] = self.wantedancestors[p]
                  def mark_wanted(self, rev, parents):
                      # Mark rev ss wanted and update data structures.
                      # rev will be in the restricted graph, so children of rev in
                      # the original graph should still have rev as a parent in the
                      # restricted graph.
                      self.parentmap[rev] = rev
                      # The set of wanted ancestors of rev is the union of the sets
                      # of wanted ancestors of its parents. Plus rev itself.
                      wrev = set()
                      for p in parents:
                          if p in self.wantedancestors:
                              wrev.update(self.wantedancestors[p])
                          else:
                              self.ui.warn(_('warning: %s parent %s is missing\n') %
                                           (rev, p))
                      wrev.add(rev)
                      self.wantedancestors[rev] = wrev
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
                      parents = self.commits[rev].parents
                      if len(parents) > 1:
                          self.rebuild()
                      # To decide whether we're interested in rev we:
                      #
                      # - calculate what parents rev will have if it turns out we're
                      #   interested in it.  If it's going to have more than 1 parent,
                      #   we're interested in it.
                      #
                      # - otherwise, we'll compare it with the single parent we found.
                      #   If any of the files we're interested in is different in the
                      #   the two revisions, we're interested in rev.
                      # A parent p is interesting if its mapped version (self.parentmap[p]):
                      # - is not SKIPREV
                      # - is still not in the list of parents (we don't want duplicates)
                      # - is not an ancestor of the mapped versions of the other parents or
                      #   there is no parent in the same branch than the current revision.
                      mparents = []
                      knownparents = set()
                      branch = self.commits[rev].branch
                      hasbranchparent = False
                      for i, p1 in enumerate(parents):
                          mp1 = self.parentmap[p1]
                          if mp1 == SKIPREV or mp1 in knownparents:
                              continue
                          isancestor = util.any(p2 for p2 in parents
                                                if p1 != p2 and mp1 != self.parentmap[p2]
                                                and mp1 in self.wantedancestors[p2])
                          if not isancestor and not hasbranchparent and len(parents) > 1:
                              # This could be expensive, avoid unnecessary calls.
                              if self._cachedcommit(p1).branch == branch:
                                  hasbranchparent = True
                          mparents.append((p1, mp1, i, isancestor))
                          knownparents.add(mp1)
                      # Discard parents ancestors of other parents if there is a
                      # non-ancestor one on the same branch than current revision.
                      if hasbranchparent:
                          mparents = [p for p in mparents if not p[3]]
                      wp = None
                      if mparents:
                          wp = max(p[2] for p in mparents)
                          mparents = [p[1] for p in mparents]
                      elif parents:
                          wp = 0
                      self.origparents[rev] = parents
                      closed = False
                      if 'close' in self.commits[rev].extra:
                          # A branch closing revision is only useful if one of its
                          # parents belong to the branch being closed
                          pbranches = [self._cachedcommit(p).branch for p in mparents]
                          if branch in pbranches:
                              closed = True
                      if len(mparents) < 2 and not closed and not self.wanted(rev, wp):
                          # We don't want this revision.
                          # Update our state and tell the convert process to map this
                          # revision to the same revision its parent as mapped to.
                          p = None
                          if parents:
                              p = parents[wp]
                          self.mark_not_wanted(rev, p)
                          self.convertedorder.append((rev, False, p))
                          self._discard(*parents)
                          return self.parentmap[rev]
                      # We want this revision.
                      # Rewrite the parents of the commit object
                      self.commits[rev].parents = mparents
                      self.mark_wanted(rev, parents)
                      self.convertedorder.append((rev, True, None))
                      self._discard(*parents)
                      # Get the real changes and do the filtering/mapping. To be
                      # able to get the files later on in getfile, we hide the
                      # original filename in the rev part of the return value.
-                     changes, copies = self.base.getchanges(rev)
+                     changes, copies = self.base.getchanges(rev, full)
                      files = {}
                      for f, r in changes:
                          newf = self.filemapper(f)
                          if newf and (newf != f or newf not in files):
                              files[newf] = (f, r)
                      files = sorted(files.items())
                      ncopies = {}
                      for c in copies:
                          newc = self.filemapper(c)
                          if newc:
                              newsource = self.filemapper(copies[c])
                              if newsource:
                                  ncopies[newc] = newsource
                      return files, ncopies
                  def getfile(self, name, rev):
                      realname, realrev = rev
                      return self.base.getfile(realname, realrev)
                  def gettags(self):
                      return self.base.gettags()
                  def hasnativeorder(self):
                      return self.base.hasnativeorder()
                  def lookuprev(self, rev):
                      return self.base.lookuprev(rev)
                  def getbookmarks(self):
                      return self.base.getbookmarks()
                  def converted(self, rev, sinkrev):
                      self.base.converted(rev, sinkrev)

hgext/convert/git.py

0 +3 -1

              # git.py - git support for the convert extension
              #
              #  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import os
              import subprocess
              from mercurial import util, config
              from mercurial.node import hex, nullid
              from mercurial.i18n import _
              from common import NoRepo, commit, converter_source, checktool
              class submodule(object):
                  def __init__(self, path, node, url):
                      self.path = path
                      self.node = node
                      self.url = url
                  def hgsub(self):
                      return "%s = [git]%s" % (self.path, self.url)
                  def hgsubstate(self):
                      return "%s %s" % (self.node, self.path)
              class convert_git(converter_source):
                  # Windows does not support GIT_DIR= construct while other systems
                  # cannot remove environment variable. Just assume none have
                  # both issues.
                  if util.safehasattr(os, 'unsetenv'):
                      def gitopen(self, s, err=None):
                          prevgitdir = os.environ.get('GIT_DIR')
                          os.environ['GIT_DIR'] = self.path
                          try:
                              if err == subprocess.PIPE:
                                  (stdin, stdout, stderr) = util.popen3(s)
                                  return stdout
                              elif err == subprocess.STDOUT:
                                  return self.popen_with_stderr(s)
                              else:
                                  return util.popen(s, 'rb')
                          finally:
                              if prevgitdir is None:
                                  del os.environ['GIT_DIR']
                              else:
                                  os.environ['GIT_DIR'] = prevgitdir
                      def gitpipe(self, s):
                          prevgitdir = os.environ.get('GIT_DIR')
                          os.environ['GIT_DIR'] = self.path
                          try:
                              return util.popen3(s)
                          finally:
                              if prevgitdir is None:
                                  del os.environ['GIT_DIR']
                              else:
                                  os.environ['GIT_DIR'] = prevgitdir
                  else:
                      def gitopen(self, s, err=None):
                          if err == subprocess.PIPE:
                              (sin, so, se) = util.popen3('GIT_DIR=%s %s' % (self.path, s))
                              return so
                          elif err == subprocess.STDOUT:
                                  return self.popen_with_stderr(s)
                          else:
                              return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb')
                      def gitpipe(self, s):
                          return util.popen3('GIT_DIR=%s %s' % (self.path, s))
                  def popen_with_stderr(self, s):
                      p = subprocess.Popen(s, shell=True, bufsize=-1,
                                           close_fds=util.closefds,
                                           stdin=subprocess.PIPE,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.STDOUT,
                                           universal_newlines=False,
                                           env=None)
                      return p.stdout
                  def gitread(self, s):
                      fh = self.gitopen(s)
                      data = fh.read()
                      return data, fh.close()
                  def __init__(self, ui, path, rev=None):
                      super(convert_git, self).__init__(ui, path, rev=rev)
                      if os.path.isdir(path + "/.git"):
                          path += "/.git"
                      if not os.path.exists(path + "/objects"):
                          raise NoRepo(_("%s does not look like a Git repository") % path)
                      checktool('git', 'git')
                      self.path = path
                      self.submodules = []
                      self.catfilepipe = self.gitpipe('git cat-file --batch')
                  def after(self):
                      for f in self.catfilepipe:
                          f.close()
                  def getheads(self):
                      if not self.rev:
                          heads, ret = self.gitread('git rev-parse --branches --remotes')
                          heads = heads.splitlines()
                      else:
                          heads, ret = self.gitread("git rev-parse --verify %s" % self.rev)
                          heads = [heads[:-1]]
                      if ret:
                          raise util.Abort(_('cannot retrieve git heads'))
                      return heads
                  def catfile(self, rev, type):
                      if rev == hex(nullid):
                          raise IOError
                      self.catfilepipe[0].write(rev+'\n')
                      self.catfilepipe[0].flush()
                      info = self.catfilepipe[1].readline().split()
                      if info[1] != type:
                          raise util.Abort(_('cannot read %r object at %s') % (type, rev))
                      size = int(info[2])
                      data = self.catfilepipe[1].read(size)
                      if len(data) < size:
                          raise util.Abort(_('cannot read %r object at %s: unexpected size')
                                           % (type, rev))
                      # read the trailing newline
                      self.catfilepipe[1].read(1)
                      return data
                  def getfile(self, name, rev):
                      if rev == hex(nullid):
                          return None, None
                      if name == '.hgsub':
                          data = '\n'.join([m.hgsub() for m in self.submoditer()])
                          mode = ''
                      elif name == '.hgsubstate':
                          data = '\n'.join([m.hgsubstate() for m in self.submoditer()])
                          mode = ''
                      else:
                          data = self.catfile(rev, "blob")
                          mode = self.modecache[(name, rev)]
                      return data, mode
                  def submoditer(self):
                      null = hex(nullid)
                      for m in sorted(self.submodules, key=lambda p: p.path):
                          if m.node != null:
                              yield m
                  def parsegitmodules(self, content):
                      """Parse the formatted .gitmodules file, example file format:
                      [submodule "sub"]\n
                      \tpath = sub\n
                      \turl = git://giturl\n
                      """
                      self.submodules = []
                      c = config.config()
                      # Each item in .gitmodules starts with \t that cant be parsed
                      c.parse('.gitmodules', content.replace('\t',''))
                      for sec in c.sections():
                          s = c[sec]
                          if 'url' in s and 'path' in s:
                              self.submodules.append(submodule(s['path'], '', s['url']))
                  def retrievegitmodules(self, version):
                      modules, ret = self.gitread("git show %s:%s" % (version, '.gitmodules'))
                      if ret:
                          raise util.Abort(_('cannot read submodules config file in %s') %
                                           version)
                      self.parsegitmodules(modules)
                      for m in self.submodules:
                          node, ret = self.gitread("git rev-parse %s:%s" % (version, m.path))
                          if ret:
                              continue
                          m.node = node.strip()
-                 def getchanges(self, version):
+                 def getchanges(self, version, full):
+                     if full:
+                         raise util.Abort(_("convert from git do not support --full"))
                      self.modecache = {}
                      fh = self.gitopen("git diff-tree -z --root -m -r %s" % version)
                      changes = []
                      seen = set()
                      entry = None
                      subexists = False
                      subdeleted = False
                      for l in fh.read().split('\x00'):
                          if not entry:
                              if not l.startswith(':'):
                                  continue
                              entry = l
                              continue
                          f = l
                          if f not in seen:
                              seen.add(f)
                              entry = entry.split()
                              h = entry[3]
                              p = (entry[1] == "100755")
                              s = (entry[1] == "120000")
                              if f == '.gitmodules':
                                  subexists = True
                                  if entry[4] == 'D':
                                      subdeleted = True
                                      changes.append(('.hgsub', hex(nullid)))
                                  else:
                                      changes.append(('.hgsub', ''))
                              elif entry[1] == '160000' or entry[0] == ':160000':
                                  subexists = True
                              else:
                                  self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
                                  changes.append((f, h))
                          entry = None
                      if fh.close():
                          raise util.Abort(_('cannot read changes in %s') % version)
                      if subexists:
                          if subdeleted:
                              changes.append(('.hgsubstate', hex(nullid)))
                          else:
                              self.retrievegitmodules(version)
                              changes.append(('.hgsubstate', ''))
                      return (changes, {})
                  def getcommit(self, version):
                      c = self.catfile(version, "commit") # read the commit hash
                      end = c.find("\n\n")
                      message = c[end + 2:]
                      message = self.recode(message)
                      l = c[:end].splitlines()
                      parents = []
                      author = committer = None
                      for e in l[1:]:
                          n, v = e.split(" ", 1)
                          if n == "author":
                              p = v.split()
                              tm, tz = p[-2:]
                              author = " ".join(p[:-2])
                              if author[0] == "<": author = author[1:-1]
                              author = self.recode(author)
                          if n == "committer":
                              p = v.split()
                              tm, tz = p[-2:]
                              committer = " ".join(p[:-2])
                              if committer[0] == "<": committer = committer[1:-1]
                              committer = self.recode(committer)
                          if n == "parent":
                              parents.append(v)
                      if committer and committer != author:
                          message += "\ncommitter: %s\n" % committer
                      tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
                      tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
                      date = tm + " " + str(tz)
                      c = commit(parents=parents, date=date, author=author, desc=message,
                                 rev=version)
                      return c
                  def gettags(self):
                      tags = {}
                      alltags = {}
                      fh = self.gitopen('git ls-remote --tags "%s"' % self.path,
                                        err=subprocess.STDOUT)
                      prefix = 'refs/tags/'
                      # Build complete list of tags, both annotated and bare ones
                      for line in fh:
                          line = line.strip()
                          if line.startswith("error:") or line.startswith("fatal:"):
                              raise util.Abort(_('cannot read tags from %s') % self.path)
                          node, tag = line.split(None, 1)
                          if not tag.startswith(prefix):
                              continue
                          alltags[tag[len(prefix):]] = node
                      if fh.close():
                          raise util.Abort(_('cannot read tags from %s') % self.path)
                      # Filter out tag objects for annotated tag refs
                      for tag in alltags:
                          if tag.endswith('^{}'):
                              tags[tag[:-3]] = alltags[tag]
                          else:
                              if tag + '^{}' in alltags:
                                  continue
                              else:
                                  tags[tag] = alltags[tag]
                      return tags
                  def getchangedfiles(self, version, i):
                      changes = []
                      if i is None:
                          fh = self.gitopen("git diff-tree --root -m -r %s" % version)
                          for l in fh:
                              if "\t" not in l:
                                  continue
                              m, f = l[:-1].split("\t")
                              changes.append(f)
                      else:
                          fh = self.gitopen('git diff-tree --name-only --root -r %s '
                                            '"%s^%s" --' % (version, version, i + 1))
                          changes = [f.rstrip('\n') for f in fh]
                      if fh.close():
                          raise util.Abort(_('cannot read changes in %s') % version)
                      return changes
                  def getbookmarks(self):
                      bookmarks = {}
                      # Interesting references in git are prefixed
                      prefix = 'refs/heads/'
                      prefixlen = len(prefix)
                      # factor two commands
                      gitcmd = { 'remote/': 'git ls-remote --heads origin',
                                        '': 'git show-ref'}
                      # Origin heads
                      for reftype in gitcmd:
                          try:
                              fh = self.gitopen(gitcmd[reftype], err=subprocess.PIPE)
                              for line in fh:
                                  line = line.strip()
                                  rev, name = line.split(None, 1)
                                  if not name.startswith(prefix):
                                      continue
                                  name = '%s%s' % (reftype, name[prefixlen:])
                                  bookmarks[name] = rev
                          except Exception:
                              pass
                      return bookmarks
                  def checkrevformat(self, revstr, mapname='splicemap'):
                      """ git revision string is a 40 byte hex """
                      self.checkhexformat(revstr, mapname)

hgext/convert/gnuarch.py

0 +3 -1

              # gnuarch.py - GNU Arch support for the convert extension
              #
              #  Copyright 2008, 2009 Aleix Conchillo Flaque <aleix@member.fsf.org>
              #  and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from common import NoRepo, commandline, commit, converter_source
              from mercurial.i18n import _
              from mercurial import encoding, util
              import os, shutil, tempfile, stat
              from email.Parser import Parser
              class gnuarch_source(converter_source, commandline):
                  class gnuarch_rev(object):
                      def __init__(self, rev):
                          self.rev = rev
                          self.summary = ''
                          self.date = None
                          self.author = ''
                          self.continuationof = None
                          self.add_files = []
                          self.mod_files = []
                          self.del_files = []
                          self.ren_files = {}
                          self.ren_dirs = {}
                  def __init__(self, ui, path, rev=None):
                      super(gnuarch_source, self).__init__(ui, path, rev=rev)
                      if not os.path.exists(os.path.join(path, '{arch}')):
                          raise NoRepo(_("%s does not look like a GNU Arch repository")
                                       % path)
                      # Could use checktool, but we want to check for baz or tla.
                      self.execmd = None
                      if util.findexe('baz'):
                          self.execmd = 'baz'
                      else:
                          if util.findexe('tla'):
                              self.execmd = 'tla'
                          else:
                              raise util.Abort(_('cannot find a GNU Arch tool'))
                      commandline.__init__(self, ui, self.execmd)
                      self.path = os.path.realpath(path)
                      self.tmppath = None
                      self.treeversion = None
                      self.lastrev = None
                      self.changes = {}
                      self.parents = {}
                      self.tags = {}
                      self.catlogparser = Parser()
                      self.encoding = encoding.encoding
                      self.archives = []
                  def before(self):
                      # Get registered archives
                      self.archives = [i.rstrip('\n')
                                       for i in self.runlines0('archives', '-n')]
                      if self.execmd == 'tla':
                          output = self.run0('tree-version', self.path)
                      else:
                          output = self.run0('tree-version', '-d', self.path)
                      self.treeversion = output.strip()
                      # Get name of temporary directory
                      version = self.treeversion.split('/')
                      self.tmppath = os.path.join(tempfile.gettempdir(),
                                                  'hg-%s' % version[1])
                      # Generate parents dictionary
                      self.parents[None] = []
                      treeversion = self.treeversion
                      child = None
                      while treeversion:
                          self.ui.status(_('analyzing tree version %s...\n') % treeversion)
                          archive = treeversion.split('/')[0]
                          if archive not in self.archives:
                              self.ui.status(_('tree analysis stopped because it points to '
                                               'an unregistered archive %s...\n') % archive)
                              break
                          # Get the complete list of revisions for that tree version
                          output, status = self.runlines('revisions', '-r', '-f', treeversion)
                          self.checkexit(status, 'failed retrieving revisions for %s'
                                         % treeversion)
                          # No new iteration unless a revision has a continuation-of header
                          treeversion = None
                          for l in output:
                              rev = l.strip()
                              self.changes[rev] = self.gnuarch_rev(rev)
                              self.parents[rev] = []
                              # Read author, date and summary
                              catlog, status = self.run('cat-log', '-d', self.path, rev)
                              if status:
                                  catlog  = self.run0('cat-archive-log', rev)
                              self._parsecatlog(catlog, rev)
                              # Populate the parents map
                              self.parents[child].append(rev)
                              # Keep track of the current revision as the child of the next
                              # revision scanned
                              child = rev
                              # Check if we have to follow the usual incremental history
                              # or if we have to 'jump' to a different treeversion given
                              # by the continuation-of header.
                              if self.changes[rev].continuationof:
                                  treeversion = '--'.join(
                                      self.changes[rev].continuationof.split('--')[:-1])
                                  break
                              # If we reached a base-0 revision w/o any continuation-of
                              # header, it means the tree history ends here.
                              if rev[-6:] == 'base-0':
                                  break
                  def after(self):
                      self.ui.debug('cleaning up %s\n' % self.tmppath)
                      shutil.rmtree(self.tmppath, ignore_errors=True)
                  def getheads(self):
                      return self.parents[None]
                  def getfile(self, name, rev):
                      if rev != self.lastrev:
                          raise util.Abort(_('internal calling inconsistency'))
                      if not os.path.lexists(os.path.join(self.tmppath, name)):
                          return None, None
                      return self._getfile(name, rev)
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
+                     if full:
+                         raise util.Abort(_("convert from arch do not support --full"))
                      self._update(rev)
                      changes = []
                      copies = {}
                      for f in self.changes[rev].add_files:
                          changes.append((f, rev))
                      for f in self.changes[rev].mod_files:
                          changes.append((f, rev))
                      for f in self.changes[rev].del_files:
                          changes.append((f, rev))
                      for src in self.changes[rev].ren_files:
                          to = self.changes[rev].ren_files[src]
                          changes.append((src, rev))
                          changes.append((to, rev))
                          copies[to] = src
                      for src in self.changes[rev].ren_dirs:
                          to = self.changes[rev].ren_dirs[src]
                          chgs, cps = self._rendirchanges(src, to)
                          changes += [(f, rev) for f in chgs]
                          copies.update(cps)
                      self.lastrev = rev
                      return sorted(set(changes)), copies
                  def getcommit(self, rev):
                      changes = self.changes[rev]
                      return commit(author=changes.author, date=changes.date,
                                    desc=changes.summary, parents=self.parents[rev], rev=rev)
                  def gettags(self):
                      return self.tags
                  def _execute(self, cmd, *args, **kwargs):
                      cmdline = [self.execmd, cmd]
                      cmdline += args
                      cmdline = [util.shellquote(arg) for arg in cmdline]
                      cmdline += ['>', os.devnull, '2>', os.devnull]
                      cmdline = util.quotecommand(' '.join(cmdline))
                      self.ui.debug(cmdline, '\n')
                      return os.system(cmdline)
                  def _update(self, rev):
                      self.ui.debug('applying revision %s...\n' % rev)
                      changeset, status = self.runlines('replay', '-d', self.tmppath,
                                                            rev)
                      if status:
                          # Something went wrong while merging (baz or tla
                          # issue?), get latest revision and try from there
                          shutil.rmtree(self.tmppath, ignore_errors=True)
                          self._obtainrevision(rev)
                      else:
                          old_rev = self.parents[rev][0]
                          self.ui.debug('computing changeset between %s and %s...\n'
                                        % (old_rev, rev))
                          self._parsechangeset(changeset, rev)
                  def _getfile(self, name, rev):
                      mode = os.lstat(os.path.join(self.tmppath, name)).st_mode
                      if stat.S_ISLNK(mode):
                          data = os.readlink(os.path.join(self.tmppath, name))
                          mode = mode and 'l' or ''
                      else:
                          data = open(os.path.join(self.tmppath, name), 'rb').read()
                          mode = (mode & 0111) and 'x' or ''
                      return data, mode
                  def _exclude(self, name):
                      exclude = ['{arch}', '.arch-ids', '.arch-inventory']
                      for exc in exclude:
                          if name.find(exc) != -1:
                              return True
                      return False
                  def _readcontents(self, path):
                      files = []
                      contents = os.listdir(path)
                      while len(contents) > 0:
                          c = contents.pop()
                          p = os.path.join(path, c)
                          # os.walk could be used, but here we avoid internal GNU
                          # Arch files and directories, thus saving a lot time.
                          if not self._exclude(p):
                              if os.path.isdir(p):
                                  contents += [os.path.join(c, f) for f in os.listdir(p)]
                              else:
                                  files.append(c)
                      return files
                  def _rendirchanges(self, src, dest):
                      changes = []
                      copies = {}
                      files = self._readcontents(os.path.join(self.tmppath, dest))
                      for f in files:
                          s = os.path.join(src, f)
                          d = os.path.join(dest, f)
                          changes.append(s)
                          changes.append(d)
                          copies[d] = s
                      return changes, copies
                  def _obtainrevision(self, rev):
                      self.ui.debug('obtaining revision %s...\n' % rev)
                      output = self._execute('get', rev, self.tmppath)
                      self.checkexit(output)
                      self.ui.debug('analyzing revision %s...\n' % rev)
                      files = self._readcontents(self.tmppath)
                      self.changes[rev].add_files += files
                  def _stripbasepath(self, path):
                      if path.startswith('./'):
                          return path[2:]
                      return path
                  def _parsecatlog(self, data, rev):
                      try:
                          catlog = self.catlogparser.parsestr(data)
                          # Commit date
                          self.changes[rev].date = util.datestr(
                              util.strdate(catlog['Standard-date'],
                                           '%Y-%m-%d %H:%M:%S'))
                          # Commit author
                          self.changes[rev].author = self.recode(catlog['Creator'])
                          # Commit description
                          self.changes[rev].summary = '\n\n'.join((catlog['Summary'],
                                                                  catlog.get_payload()))
                          self.changes[rev].summary = self.recode(self.changes[rev].summary)
                          # Commit revision origin when dealing with a branch or tag
                          if 'Continuation-of' in catlog:
                              self.changes[rev].continuationof = self.recode(
                                  catlog['Continuation-of'])
                      except Exception:
                          raise util.Abort(_('could not parse cat-log of %s') % rev)
                  def _parsechangeset(self, data, rev):
                      for l in data:
                          l = l.strip()
                          # Added file (ignore added directory)
                          if l.startswith('A') and not l.startswith('A/'):
                              file = self._stripbasepath(l[1:].strip())
                              if not self._exclude(file):
                                  self.changes[rev].add_files.append(file)
                          # Deleted file (ignore deleted directory)
                          elif l.startswith('D') and not l.startswith('D/'):
                              file = self._stripbasepath(l[1:].strip())
                              if not self._exclude(file):
                                  self.changes[rev].del_files.append(file)
                          # Modified binary file
                          elif l.startswith('Mb'):
                              file = self._stripbasepath(l[2:].strip())
                              if not self._exclude(file):
                                  self.changes[rev].mod_files.append(file)
                          # Modified link
                          elif l.startswith('M->'):
                              file = self._stripbasepath(l[3:].strip())
                              if not self._exclude(file):
                                  self.changes[rev].mod_files.append(file)
                          # Modified file
                          elif l.startswith('M'):
                              file = self._stripbasepath(l[1:].strip())
                              if not self._exclude(file):
                                  self.changes[rev].mod_files.append(file)
                          # Renamed file (or link)
                          elif l.startswith('=>'):
                              files = l[2:].strip().split(' ')
                              if len(files) == 1:
                                  files = l[2:].strip().split('\t')
                              src = self._stripbasepath(files[0])
                              dst = self._stripbasepath(files[1])
                              if not self._exclude(src) and not self._exclude(dst):
                                  self.changes[rev].ren_files[src] = dst
                          # Conversion from file to link or from link to file (modified)
                          elif l.startswith('ch'):
                              file = self._stripbasepath(l[2:].strip())
                              if not self._exclude(file):
                                  self.changes[rev].mod_files.append(file)
                          # Renamed directory
                          elif l.startswith('/>'):
                              dirs = l[2:].strip().split(' ')
                              if len(dirs) == 1:
                                  dirs = l[2:].strip().split('\t')
                              src = self._stripbasepath(dirs[0])
                              dst = self._stripbasepath(dirs[1])
                              if not self._exclude(src) and not self._exclude(dst):
                                  self.changes[rev].ren_dirs[src] = dst

hgext/convert/hg.py

0 +14 -8

              # hg.py - hg backend for convert extension
              #
              #  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              # Notes for hg->hg conversion:
              #
              # * Old versions of Mercurial didn't trim the whitespace from the ends
              #   of commit messages, but new versions do.  Changesets created by
              #   those older versions, then converted, may thus have different
              #   hashes for changesets that are otherwise identical.
              #
              # * Using "--config convert.hg.saverev=true" will make the source
              #   identifier to be stored in the converted revision. This will cause
              #   the converted revision to have a different identity than the
              #   source.
              import os, time, cStringIO
              from mercurial.i18n import _
              from mercurial.node import bin, hex, nullid
              from mercurial import hg, util, context, bookmarks, error, scmutil
              from common import NoRepo, commit, converter_source, converter_sink
              import re
              sha1re = re.compile(r'\b[0-9a-f]{6,40}\b')
              class mercurial_sink(converter_sink):
                  def __init__(self, ui, path):
                      converter_sink.__init__(self, ui, path)
                      self.branchnames = ui.configbool('convert', 'hg.usebranchnames', True)
                      self.clonebranches = ui.configbool('convert', 'hg.clonebranches', False)
                      self.tagsbranch = ui.config('convert', 'hg.tagsbranch', 'default')
                      self.lastbranch = None
                      if os.path.isdir(path) and len(os.listdir(path)) > 0:
                          try:
                              self.repo = hg.repository(self.ui, path)
                              if not self.repo.local():
                                  raise NoRepo(_('%s is not a local Mercurial repository')
                                               % path)
                          except error.RepoError, err:
                              ui.traceback()
                              raise NoRepo(err.args[0])
                      else:
                          try:
                              ui.status(_('initializing destination %s repository\n') % path)
                              self.repo = hg.repository(self.ui, path, create=True)
                              if not self.repo.local():
                                  raise NoRepo(_('%s is not a local Mercurial repository')
                                               % path)
                              self.created.append(path)
                          except error.RepoError:
                              ui.traceback()
                              raise NoRepo(_("could not create hg repository %s as sink")
                                           % path)
                      self.lock = None
                      self.wlock = None
                      self.filemapmode = False
                  def before(self):
                      self.ui.debug('run hg sink pre-conversion action\n')
                      self.wlock = self.repo.wlock()
                      self.lock = self.repo.lock()
                  def after(self):
                      self.ui.debug('run hg sink post-conversion action\n')
                      if self.lock:
                          self.lock.release()
                      if self.wlock:
                          self.wlock.release()
                  def revmapfile(self):
                      return self.repo.join("shamap")
                  def authorfile(self):
                      return self.repo.join("authormap")
                  def setbranch(self, branch, pbranches):
                      if not self.clonebranches:
                          return
                      setbranch = (branch != self.lastbranch)
                      self.lastbranch = branch
                      if not branch:
                          branch = 'default'
                      pbranches = [(b[0], b[1] and b[1] or 'default') for b in pbranches]
                      pbranch = pbranches and pbranches[0][1] or 'default'
                      branchpath = os.path.join(self.path, branch)
                      if setbranch:
                          self.after()
                          try:
                              self.repo = hg.repository(self.ui, branchpath)
                          except Exception:
                              self.repo = hg.repository(self.ui, branchpath, create=True)
                          self.before()
                      # pbranches may bring revisions from other branches (merge parents)
                      # Make sure we have them, or pull them.
                      missings = {}
                      for b in pbranches:
                          try:
                              self.repo.lookup(b[0])
                          except Exception:
                              missings.setdefault(b[1], []).append(b[0])
                      if missings:
                          self.after()
                          for pbranch, heads in sorted(missings.iteritems()):
                              pbranchpath = os.path.join(self.path, pbranch)
                              prepo = hg.peer(self.ui, {}, pbranchpath)
                              self.ui.note(_('pulling from %s into %s\n') % (pbranch, branch))
                              self.repo.pull(prepo, [prepo.lookup(h) for h in heads])
                          self.before()
                  def _rewritetags(self, source, revmap, data):
                      fp = cStringIO.StringIO()
                      for line in data.splitlines():
                          s = line.split(' ', 1)
                          if len(s) != 2:
                              continue
                          revid = revmap.get(source.lookuprev(s[0]))
                          if not revid:
                              continue
                          fp.write('%s %s\n' % (revid, s[1]))
                      return fp.getvalue()
-                 def putcommit(self, files, copies, parents, commit, source, revmap):
+                 def putcommit(self, files, copies, parents, commit, source, revmap, full):
                      files = dict(files)
                      def getfilectx(repo, memctx, f):
-                         v = files[f]
+                         try:
+                             v = files[f]
+                         except KeyError:
+                             return None
                          data, mode = source.getfile(f, v)
                          if data is None:
                              return None
                          if f == '.hgtags':
                              data = self._rewritetags(source, revmap, data)
                          return context.memfilectx(self.repo, f, data, 'l' in mode,
                                                    'x' in mode, copies.get(f))
                      pl = []
                      for p in parents:
                          if p not in pl:
                              pl.append(p)
                      parents = pl
                      nparents = len(parents)
                      if self.filemapmode and nparents == 1:
                          m1node = self.repo.changelog.read(bin(parents[0]))[0]
                          parent = parents[0]
                      if len(parents) < 2:
                          parents.append(nullid)
                      if len(parents) < 2:
                          parents.append(nullid)
                      p2 = parents.pop(0)
                      text = commit.desc
                      sha1s = re.findall(sha1re, text)
                      for sha1 in sha1s:
                          oldrev = source.lookuprev(sha1)
                          newrev = revmap.get(oldrev)
                          if newrev is not None:
                              text = text.replace(sha1, newrev[:len(sha1)])
                      extra = commit.extra.copy()
                      for label in ('source', 'transplant_source', 'rebase_source'):
                          node = extra.get(label)
                          if node is None:
                              continue
                          # Only transplant stores its reference in binary
                          if label == 'transplant_source':
                              node = hex(node)
                          newrev = revmap.get(node)
                          if newrev is not None:
                              if label == 'transplant_source':
                                  newrev = bin(newrev)
                              extra[label] = newrev
                      if self.branchnames and commit.branch:
                          extra['branch'] = commit.branch
                      if commit.rev:
                          extra['convert_revision'] = commit.rev
                      while parents:
                          p1 = p2
                          p2 = parents.pop(0)
-                         ctx = context.memctx(self.repo, (p1, p2), text, files.keys(),
+                         fileset = set(files)
+                         if full:
+                             fileset.update(self.repo[p1], self.repo[p2])
+                         ctx = context.memctx(self.repo, (p1, p2), text, fileset,
                                               getfilectx, commit.author, commit.date, extra)
                          self.repo.commitctx(ctx)
                          text = "(octopus merge fixup)\n"
                          p2 = hex(self.repo.changelog.tip())
                      if self.filemapmode and nparents == 1:
                          man = self.repo.manifest
                          mnode = self.repo.changelog.read(bin(p2))[0]
                          closed = 'close' in commit.extra
                          if not closed and not man.cmp(m1node, man.revision(mnode)):
                              self.ui.status(_("filtering out empty revision\n"))
                              self.repo.rollback(force=True)
                              return parent
                      return p2
                  def puttags(self, tags):
                      try:
                          parentctx = self.repo[self.tagsbranch]
                          tagparent = parentctx.node()
                      except error.RepoError:
                          parentctx = None
                          tagparent = nullid
                      oldlines = set()
                      for branch, heads in self.repo.branchmap().iteritems():
                          for h in heads:
                              if '.hgtags' in self.repo[h]:
                                  oldlines.update(
                                      set(self.repo[h]['.hgtags'].data().splitlines(True)))
                      oldlines = sorted(list(oldlines))
                      newlines = sorted([("%s %s\n" % (tags[tag], tag)) for tag in tags])
                      if newlines == oldlines:
                          return None, None
                      # if the old and new tags match, then there is nothing to update
                      oldtags = set()
                      newtags = set()
                      for line in oldlines:
                          s = line.strip().split(' ', 1)
                          if len(s) != 2:
                              continue
                          oldtags.add(s[1])
                      for line in newlines:
                          s = line.strip().split(' ', 1)
                          if len(s) != 2:
                              continue
                          if s[1] not in oldtags:
                              newtags.add(s[1].strip())
                      if not newtags:
                          return None, None
                      data = "".join(newlines)
                      def getfilectx(repo, memctx, f):
                          return context.memfilectx(repo, f, data, False, False, None)
                      self.ui.status(_("updating tags\n"))
                      date = "%s 0" % int(time.mktime(time.gmtime()))
                      extra = {'branch': self.tagsbranch}
                      ctx = context.memctx(self.repo, (tagparent, None), "update tags",
                                           [".hgtags"], getfilectx, "convert-repo", date,
                                           extra)
                      self.repo.commitctx(ctx)
                      return hex(self.repo.changelog.tip()), hex(tagparent)
                  def setfilemapmode(self, active):
                      self.filemapmode = active
                  def putbookmarks(self, updatedbookmark):
                      if not len(updatedbookmark):
                          return
                      self.ui.status(_("updating bookmarks\n"))
                      destmarks = self.repo._bookmarks
                      for bookmark in updatedbookmark:
                          destmarks[bookmark] = bin(updatedbookmark[bookmark])
                      destmarks.write()
                  def hascommitfrommap(self, rev):
                      # the exact semantics of clonebranches is unclear so we can't say no
                      return rev in self.repo or self.clonebranches
                  def hascommitforsplicemap(self, rev):
                      if rev not in self.repo and self.clonebranches:
                          raise util.Abort(_('revision %s not found in destination '
                                             'repository (lookups with clonebranches=true '
                                             'are not implemented)') % rev)
                      return rev in self.repo
              class mercurial_source(converter_source):
                  def __init__(self, ui, path, rev=None):
                      converter_source.__init__(self, ui, path, rev)
                      self.ignoreerrors = ui.configbool('convert', 'hg.ignoreerrors', False)
                      self.ignored = set()
                      self.saverev = ui.configbool('convert', 'hg.saverev', False)
                      try:
                          self.repo = hg.repository(self.ui, path)
                          # try to provoke an exception if this isn't really a hg
                          # repo, but some other bogus compatible-looking url
                          if not self.repo.local():
                              raise error.RepoError
                      except error.RepoError:
                          ui.traceback()
                          raise NoRepo(_("%s is not a local Mercurial repository") % path)
                      self.lastrev = None
                      self.lastctx = None
                      self._changescache = None, None
                      self.convertfp = None
                      # Restrict converted revisions to startrev descendants
                      startnode = ui.config('convert', 'hg.startrev')
                      hgrevs = ui.config('convert', 'hg.revs')
                      if hgrevs is None:
                          if startnode is not None:
                              try:
                                  startnode = self.repo.lookup(startnode)
                              except error.RepoError:
                                  raise util.Abort(_('%s is not a valid start revision')
                                                   % startnode)
                              startrev = self.repo.changelog.rev(startnode)
                              children = {startnode: 1}
                              for r in self.repo.changelog.descendants([startrev]):
                                  children[self.repo.changelog.node(r)] = 1
                              self.keep = children.__contains__
                          else:
                              self.keep = util.always
                          if rev:
                              self._heads = [self.repo[rev].node()]
                          else:
                              self._heads = self.repo.heads()
                      else:
                          if rev or startnode is not None:
                              raise util.Abort(_('hg.revs cannot be combined with '
                                                 'hg.startrev or --rev'))
                          nodes = set()
                          parents = set()
                          for r in scmutil.revrange(self.repo, [hgrevs]):
                              ctx = self.repo[r]
                              nodes.add(ctx.node())
                              parents.update(p.node() for p in ctx.parents())
                          self.keep = nodes.__contains__
                          self._heads = nodes - parents
                  def changectx(self, rev):
                      if self.lastrev != rev:
                          self.lastctx = self.repo[rev]
                          self.lastrev = rev
                      return self.lastctx
                  def parents(self, ctx):
                      return [p for p in ctx.parents() if p and self.keep(p.node())]
                  def getheads(self):
                      return [hex(h) for h in self._heads if self.keep(h)]
                  def getfile(self, name, rev):
                      try:
                          fctx = self.changectx(rev)[name]
                          return fctx.data(), fctx.flags()
                      except error.LookupError:
                          return None, None
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
                      ctx = self.changectx(rev)
                      parents = self.parents(ctx)
-                     if not parents:
+                     if full or not parents:
                          files = copyfiles = ctx.manifest()
-                     else:
+                     if parents:
                          if self._changescache[0] == rev:
                              m, a, r = self._changescache[1]
                          else:
                              m, a, r = self.repo.status(parents[0].node(), ctx.node())[:3]
-                         files = m + a + r
+                         if not full:
+                             files = m + a + r
                          copyfiles = m + a
                      # getcopies() is also run for roots and before filtering so missing
                      # revlogs are detected early
                      copies = self.getcopies(ctx, parents, copyfiles)
                      changes = [(f, rev) for f in files if f not in self.ignored]
                      changes.sort()
                      return changes, copies
                  def getcopies(self, ctx, parents, files):
                      copies = {}
                      for name in files:
                          if name in self.ignored:
                              continue
                          try:
                              copysource, _copynode = ctx.filectx(name).renamed()
                              if copysource in self.ignored:
                                  continue
                              # Ignore copy sources not in parent revisions
                              found = False
                              for p in parents:
                                  if copysource in p:
                                      found = True
                                      break
                              if not found:
                                  continue
                              copies[name] = copysource
                          except TypeError:
                              pass
                          except error.LookupError, e:
                              if not self.ignoreerrors:
                                  raise
                              self.ignored.add(name)
                              self.ui.warn(_('ignoring: %s\n') % e)
                      return copies
                  def getcommit(self, rev):
                      ctx = self.changectx(rev)
                      parents = [p.hex() for p in self.parents(ctx)]
                      if self.saverev:
                          crev = rev
                      else:
                          crev = None
                      return commit(author=ctx.user(),
                                    date=util.datestr(ctx.date(), '%Y-%m-%d %H:%M:%S %1%2'),
                                    desc=ctx.description(), rev=crev, parents=parents,
                                    branch=ctx.branch(), extra=ctx.extra(),
                                    sortkey=ctx.rev())
                  def gettags(self):
                      # This will get written to .hgtags, filter non global tags out.
                      tags = [t for t in self.repo.tagslist()
                              if self.repo.tagtype(t[0]) == 'global']
                      return dict([(name, hex(node)) for name, node in tags
                                   if self.keep(node)])
                  def getchangedfiles(self, rev, i):
                      ctx = self.changectx(rev)
                      parents = self.parents(ctx)
                      if not parents and i is None:
                          i = 0
                          changes = [], ctx.manifest().keys(), []
                      else:
                          i = i or 0
                          changes = self.repo.status(parents[i].node(), ctx.node())[:3]
                      changes = [[f for f in l if f not in self.ignored] for l in changes]
                      if i == 0:
                          self._changescache = (rev, changes)
                      return changes[0] + changes[1] + changes[2]
                  def converted(self, rev, destrev):
                      if self.convertfp is None:
                          self.convertfp = open(self.repo.join('shamap'), 'a')
                      self.convertfp.write('%s %s\n' % (destrev, rev))
                      self.convertfp.flush()
                  def before(self):
                      self.ui.debug('run hg source pre-conversion action\n')
                  def after(self):
                      self.ui.debug('run hg source post-conversion action\n')
                  def hasnativeorder(self):
                      return True
                  def hasnativeclose(self):
                      return True
                  def lookuprev(self, rev):
                      try:
                          return hex(self.repo.lookup(rev))
                      except error.RepoError:
                          return None
                  def getbookmarks(self):
                      return bookmarks.listbookmarks(self.repo)
                  def checkrevformat(self, revstr, mapname='splicemap'):
                      """ Mercurial, revision string is a 40 byte hex """
                      self.checkhexformat(revstr, mapname)

hgext/convert/monotone.py

0 +3 -1

              # monotone.py - monotone support for the convert extension
              #
              #  Copyright 2008, 2009 Mikkel Fahnoe Jorgensen <mikkel@dvide.com> and
              #  others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import os, re
              from mercurial import util
              from common import NoRepo, commit, converter_source, checktool
              from common import commandline
              from mercurial.i18n import _
              class monotone_source(converter_source, commandline):
                  def __init__(self, ui, path=None, rev=None):
                      converter_source.__init__(self, ui, path, rev)
                      commandline.__init__(self, ui, 'mtn')
                      self.ui = ui
                      self.path = path
                      self.automatestdio = False
                      self.rev = rev
                      norepo = NoRepo(_("%s does not look like a monotone repository")
                                      % path)
                      if not os.path.exists(os.path.join(path, '_MTN')):
                          # Could be a monotone repository (SQLite db file)
                          try:
                              f = file(path, 'rb')
                              header = f.read(16)
                              f.close()
                          except IOError:
                              header = ''
                          if header != 'SQLite format 3\x00':
                              raise norepo
                      # regular expressions for parsing monotone output
                      space    = r'\s*'
                      name     = r'\s+"((?:\\"|[^"])*)"\s*'
                      value    = name
                      revision = r'\s+\[(\w+)\]\s*'
                      lines    = r'(?:.|\n)+'
                      self.dir_re      = re.compile(space + "dir" + name)
                      self.file_re     = re.compile(space + "file" + name +
                                                    "content" + revision)
                      self.add_file_re = re.compile(space + "add_file" + name +
                                                    "content" + revision)
                      self.patch_re    = re.compile(space + "patch" + name +
                                                    "from" + revision + "to" + revision)
                      self.rename_re   = re.compile(space + "rename" + name + "to" + name)
                      self.delete_re   = re.compile(space + "delete" + name)
                      self.tag_re      = re.compile(space + "tag" + name + "revision" +
                                                    revision)
                      self.cert_re     = re.compile(lines + space + "name" + name +
                                                    "value" + value)
                      attr = space + "file" + lines + space + "attr" + space
                      self.attr_execute_re = re.compile(attr  + '"mtn:execute"' +
                                                        space + '"true"')
                      # cached data
                      self.manifest_rev = None
                      self.manifest = None
                      self.files = None
                      self.dirs  = None
                      checktool('mtn', abort=False)
                  def mtnrun(self, *args, **kwargs):
                      if self.automatestdio:
                          return self.mtnrunstdio(*args, **kwargs)
                      else:
                          return self.mtnrunsingle(*args, **kwargs)
                  def mtnrunsingle(self, *args, **kwargs):
                      kwargs['d'] = self.path
                      return self.run0('automate', *args, **kwargs)
                  def mtnrunstdio(self, *args, **kwargs):
                      # Prepare the command in automate stdio format
                      command = []
                      for k, v in kwargs.iteritems():
                          command.append("%s:%s" % (len(k), k))
                          if v:
                              command.append("%s:%s" % (len(v), v))
                      if command:
                          command.insert(0, 'o')
                          command.append('e')
                      command.append('l')
                      for arg in args:
                          command += "%s:%s" % (len(arg), arg)
                      command.append('e')
                      command = ''.join(command)
                      self.ui.debug("mtn: sending '%s'\n" % command)
                      self.mtnwritefp.write(command)
                      self.mtnwritefp.flush()
                      return self.mtnstdioreadcommandoutput(command)
                  def mtnstdioreadpacket(self):
                      read = None
                      commandnbr = ''
                      while read != ':':
                          read = self.mtnreadfp.read(1)
                          if not read:
                              raise util.Abort(_('bad mtn packet - no end of commandnbr'))
                          commandnbr += read
                      commandnbr = commandnbr[:-1]
                      stream = self.mtnreadfp.read(1)
                      if stream not in 'mewptl':
                          raise util.Abort(_('bad mtn packet - bad stream type %s') % stream)
                      read = self.mtnreadfp.read(1)
                      if read != ':':
                          raise util.Abort(_('bad mtn packet - no divider before size'))
                      read = None
                      lengthstr = ''
                      while read != ':':
                          read = self.mtnreadfp.read(1)
                          if not read:
                              raise util.Abort(_('bad mtn packet - no end of packet size'))
                          lengthstr += read
                      try:
                          length = long(lengthstr[:-1])
                      except TypeError:
                          raise util.Abort(_('bad mtn packet - bad packet size %s')
                              % lengthstr)
                      read = self.mtnreadfp.read(length)
                      if len(read) != length:
                          raise util.Abort(_("bad mtn packet - unable to read full packet "
                              "read %s of %s") % (len(read), length))
                      return (commandnbr, stream, length, read)
                  def mtnstdioreadcommandoutput(self, command):
                      retval = []
                      while True:
                          commandnbr, stream, length, output = self.mtnstdioreadpacket()
                          self.ui.debug('mtn: read packet %s:%s:%s\n' %
                              (commandnbr, stream, length))
                          if stream == 'l':
                              # End of command
                              if output != '0':
                                  raise util.Abort(_("mtn command '%s' returned %s") %
                                      (command, output))
                              break
                          elif stream in 'ew':
                              # Error, warning output
                              self.ui.warn(_('%s error:\n') % self.command)
                              self.ui.warn(output)
                          elif stream == 'p':
                              # Progress messages
                              self.ui.debug('mtn: ' + output)
                          elif stream == 'm':
                              # Main stream - command output
                              retval.append(output)
                      return ''.join(retval)
                  def mtnloadmanifest(self, rev):
                      if self.manifest_rev == rev:
                          return
                      self.manifest = self.mtnrun("get_manifest_of", rev).split("\n\n")
                      self.manifest_rev = rev
                      self.files = {}
                      self.dirs = {}
                      for e in self.manifest:
                          m = self.file_re.match(e)
                          if m:
                              attr = ""
                              name = m.group(1)
                              node = m.group(2)
                              if self.attr_execute_re.match(e):
                                  attr += "x"
                              self.files[name] = (node, attr)
                          m = self.dir_re.match(e)
                          if m:
                              self.dirs[m.group(1)] = True
                  def mtnisfile(self, name, rev):
                      # a non-file could be a directory or a deleted or renamed file
                      self.mtnloadmanifest(rev)
                      return name in self.files
                  def mtnisdir(self, name, rev):
                      self.mtnloadmanifest(rev)
                      return name in self.dirs
                  def mtngetcerts(self, rev):
                      certs = {"author":"<missing>", "date":"<missing>",
                          "changelog":"<missing>", "branch":"<missing>"}
                      certlist = self.mtnrun("certs", rev)
                      # mtn < 0.45:
                      #   key "test@selenic.com"
                      # mtn >= 0.45:
                      #   key [ff58a7ffb771907c4ff68995eada1c4da068d328]
                      certlist = re.split('\n\n      key ["\[]', certlist)
                      for e in certlist:
                          m = self.cert_re.match(e)
                          if m:
                              name, value = m.groups()
                              value = value.replace(r'\"', '"')
                              value = value.replace(r'\\', '\\')
                              certs[name] = value
                      # Monotone may have subsecond dates: 2005-02-05T09:39:12.364306
                      # and all times are stored in UTC
                      certs["date"] = certs["date"].split('.')[0] + " UTC"
                      return certs
                  # implement the converter_source interface:
                  def getheads(self):
                      if not self.rev:
                          return self.mtnrun("leaves").splitlines()
                      else:
                          return [self.rev]
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
+                     if full:
+                         raise util.Abort(_("convert from monotone do not support --full"))
                      revision = self.mtnrun("get_revision", rev).split("\n\n")
                      files = {}
                      ignoremove = {}
                      renameddirs = []
                      copies = {}
                      for e in revision:
                          m = self.add_file_re.match(e)
                          if m:
                              files[m.group(1)] = rev
                              ignoremove[m.group(1)] = rev
                          m = self.patch_re.match(e)
                          if m:
                              files[m.group(1)] = rev
                          # Delete/rename is handled later when the convert engine
                          # discovers an IOError exception from getfile,
                          # but only if we add the "from" file to the list of changes.
                          m = self.delete_re.match(e)
                          if m:
                              files[m.group(1)] = rev
                          m = self.rename_re.match(e)
                          if m:
                              toname = m.group(2)
                              fromname = m.group(1)
                              if self.mtnisfile(toname, rev):
                                  ignoremove[toname] = 1
                                  copies[toname] = fromname
                                  files[toname] = rev
                                  files[fromname] = rev
                              elif self.mtnisdir(toname, rev):
                                  renameddirs.append((fromname, toname))
                      # Directory renames can be handled only once we have recorded
                      # all new files
                      for fromdir, todir in renameddirs:
                          renamed = {}
                          for tofile in self.files:
                              if tofile in ignoremove:
                                  continue
                              if tofile.startswith(todir + '/'):
                                  renamed[tofile] = fromdir + tofile[len(todir):]
                                  # Avoid chained moves like:
                                  # d1(/a) => d3/d1(/a)
                                  # d2 => d3
                                  ignoremove[tofile] = 1
                          for tofile, fromfile in renamed.items():
                              self.ui.debug (_("copying file in renamed directory "
                                               "from '%s' to '%s'")
                                             % (fromfile, tofile), '\n')
                              files[tofile] = rev
                              copies[tofile] = fromfile
                          for fromfile in renamed.values():
                              files[fromfile] = rev
                      return (files.items(), copies)
                  def getfile(self, name, rev):
                      if not self.mtnisfile(name, rev):
                          return None, None
                      try:
                          data = self.mtnrun("get_file_of", name, r=rev)
                      except Exception:
                          return None, None
                      self.mtnloadmanifest(rev)
                      node, attr = self.files.get(name, (None, ""))
                      return data, attr
                  def getcommit(self, rev):
                      extra = {}
                      certs = self.mtngetcerts(rev)
                      if certs.get('suspend') == certs["branch"]:
                          extra['close'] = '1'
                      return commit(
                          author=certs["author"],
                          date=util.datestr(util.strdate(certs["date"], "%Y-%m-%dT%H:%M:%S")),
                          desc=certs["changelog"],
                          rev=rev,
                          parents=self.mtnrun("parents", rev).splitlines(),
                          branch=certs["branch"],
                          extra=extra)
                  def gettags(self):
                      tags = {}
                      for e in self.mtnrun("tags").split("\n\n"):
                          m = self.tag_re.match(e)
                          if m:
                              tags[m.group(1)] = m.group(2)
                      return tags
                  def getchangedfiles(self, rev, i):
                      # This function is only needed to support --filemap
                      # ... and we don't support that
                      raise NotImplementedError
                  def before(self):
                      # Check if we have a new enough version to use automate stdio
                      version = 0.0
                      try:
                          versionstr = self.mtnrunsingle("interface_version")
                          version = float(versionstr)
                      except Exception:
                          raise util.Abort(_("unable to determine mtn automate interface "
                              "version"))
                      if version >= 12.0:
                          self.automatestdio = True
                          self.ui.debug("mtn automate version %s - using automate stdio\n" %
                              version)
                          # launch the long-running automate stdio process
                          self.mtnwritefp, self.mtnreadfp = self._run2('automate', 'stdio',
                              '-d', self.path)
                          # read the headers
                          read = self.mtnreadfp.readline()
                          if read != 'format-version: 2\n':
                              raise util.Abort(_('mtn automate stdio header unexpected: %s')
                                  % read)
                          while read != '\n':
                              read = self.mtnreadfp.readline()
                              if not read:
                                  raise util.Abort(_("failed to reach end of mtn automate "
                                      "stdio headers"))
                      else:
                          self.ui.debug("mtn automate version %s - not using automate stdio "
                              "(automate >= 12.0 - mtn >= 0.46 is needed)\n" % version)
                  def after(self):
                      if self.automatestdio:
                          self.mtnwritefp.close()
                          self.mtnwritefp = None
                          self.mtnreadfp.close()
                          self.mtnreadfp = None

hgext/convert/p4.py

0 +3 -1

              # Perforce source for convert extension.
              #
              # Copyright 2009, Frank Kingswood <frank@kingswood-consulting.co.uk>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from mercurial import util
              from mercurial.i18n import _
              from common import commit, converter_source, checktool, NoRepo
              import marshal
              import re
              def loaditer(f):
                  "Yield the dictionary objects generated by p4"
                  try:
                      while True:
                          d = marshal.load(f)
                          if not d:
                              break
                          yield d
                  except EOFError:
                      pass
              class p4_source(converter_source):
                  def __init__(self, ui, path, rev=None):
                      super(p4_source, self).__init__(ui, path, rev=rev)
                      if "/" in path and not path.startswith('//'):
                          raise NoRepo(_('%s does not look like a P4 repository') % path)
                      checktool('p4', abort=False)
                      self.p4changes = {}
                      self.heads = {}
                      self.changeset = {}
                      self.files = {}
                      self.tags = {}
                      self.lastbranch = {}
                      self.parent = {}
                      self.encoding = "latin_1"
                      self.depotname = {}           # mapping from local name to depot name
                      self.re_type = re.compile(
                          "([a-z]+)?(text|binary|symlink|apple|resource|unicode|utf\d+)"
                          "(\+\w+)?$")
                      self.re_keywords = re.compile(
                          r"\$(Id|Header|Date|DateTime|Change|File|Revision|Author)"
                          r":[^$\n]*\$")
                      self.re_keywords_old = re.compile("\$(Id|Header):[^$\n]*\$")
                      self._parse(ui, path)
                  def _parse_view(self, path):
                      "Read changes affecting the path"
                      cmd = 'p4 -G changes -s submitted %s' % util.shellquote(path)
                      stdout = util.popen(cmd, mode='rb')
                      for d in loaditer(stdout):
                          c = d.get("change", None)
                          if c:
                              self.p4changes[c] = True
                  def _parse(self, ui, path):
                      "Prepare list of P4 filenames and revisions to import"
                      ui.status(_('reading p4 views\n'))
                      # read client spec or view
                      if "/" in path:
                          self._parse_view(path)
                          if path.startswith("//") and path.endswith("/..."):
                              views = {path[:-3]:""}
                          else:
                              views = {"//": ""}
                      else:
                          cmd = 'p4 -G client -o %s' % util.shellquote(path)
                          clientspec = marshal.load(util.popen(cmd, mode='rb'))
                          views = {}
                          for client in clientspec:
                              if client.startswith("View"):
                                  sview, cview = clientspec[client].split()
                                  self._parse_view(sview)
                                  if sview.endswith("...") and cview.endswith("..."):
                                      sview = sview[:-3]
                                      cview = cview[:-3]
                                  cview = cview[2:]
                                  cview = cview[cview.find("/") + 1:]
                                  views[sview] = cview
                      # list of changes that affect our source files
                      self.p4changes = self.p4changes.keys()
                      self.p4changes.sort(key=int)
                      # list with depot pathnames, longest first
                      vieworder = views.keys()
                      vieworder.sort(key=len, reverse=True)
                      # handle revision limiting
                      startrev = self.ui.config('convert', 'p4.startrev', default=0)
                      self.p4changes = [x for x in self.p4changes
                                        if ((not startrev or int(x) >= int(startrev)) and
                                            (not self.rev or int(x) <= int(self.rev)))]
                      # now read the full changelists to get the list of file revisions
                      ui.status(_('collecting p4 changelists\n'))
                      lastid = None
                      for change in self.p4changes:
                          cmd = "p4 -G describe -s %s" % change
                          stdout = util.popen(cmd, mode='rb')
                          d = marshal.load(stdout)
                          desc = self.recode(d.get("desc", ""))
                          shortdesc = desc.split("\n", 1)[0]
                          t = '%s %s' % (d["change"], repr(shortdesc)[1:-1])
                          ui.status(util.ellipsis(t, 80) + '\n')
                          if lastid:
                              parents = [lastid]
                          else:
                              parents = []
                          date = (int(d["time"]), 0)     # timezone not set
                          c = commit(author=self.recode(d["user"]),
                                     date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
                                     parents=parents, desc=desc, branch='',
                                     extra={"p4": change})
                          files = []
                          i = 0
                          while ("depotFile%d" % i) in d and ("rev%d" % i) in d:
                              oldname = d["depotFile%d" % i]
                              filename = None
                              for v in vieworder:
                                  if oldname.startswith(v):
                                      filename = views[v] + oldname[len(v):]
                                      break
                              if filename:
                                  files.append((filename, d["rev%d" % i]))
                                  self.depotname[filename] = oldname
                              i += 1
                          self.changeset[change] = c
                          self.files[change] = files
                          lastid = change
                      if lastid:
                          self.heads = [lastid]
                  def getheads(self):
                      return self.heads
                  def getfile(self, name, rev):
                      cmd = 'p4 -G print %s' \
                          % util.shellquote("%s#%s" % (self.depotname[name], rev))
                      stdout = util.popen(cmd, mode='rb')
                      mode = None
                      contents = ""
                      keywords = None
                      for d in loaditer(stdout):
                          code = d["code"]
                          data = d.get("data")
                          if code == "error":
                              raise IOError(d["generic"], data)
                          elif code == "stat":
                              if d.get("action") == "purge":
                                  return None, None
                              p4type = self.re_type.match(d["type"])
                              if p4type:
                                  mode = ""
                                  flags = (p4type.group(1) or "") + (p4type.group(3) or "")
                                  if "x" in flags:
                                      mode = "x"
                                  if p4type.group(2) == "symlink":
                                      mode = "l"
                                  if "ko" in flags:
                                      keywords = self.re_keywords_old
                                  elif "k" in flags:
                                      keywords = self.re_keywords
                          elif code == "text" or code == "binary":
                              contents += data
                      if mode is None:
                          return None, None
                      if keywords:
                          contents = keywords.sub("$\\1$", contents)
                      if mode == "l" and contents.endswith("\n"):
                          contents = contents[:-1]
                      return contents, mode
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
+                     if full:
+                         raise util.Abort(_("convert from p4 do not support --full"))
                      return self.files[rev], {}
                  def getcommit(self, rev):
                      return self.changeset[rev]
                  def gettags(self):
                      return self.tags
                  def getchangedfiles(self, rev, i):
                      return sorted([x[0] for x in self.files[rev]])

hgext/convert/subversion.py

0 +10 -8

              # Subversion 1.4/1.5 Python API backend
              #
              # Copyright(C) 2007 Daniel Holth et al
              import os, re, sys, tempfile, urllib, urllib2
              import xml.dom.minidom
              import cPickle as pickle
              from mercurial import strutil, scmutil, util, encoding
              from mercurial.i18n import _
              propertycache = util.propertycache
              # Subversion stuff. Works best with very recent Python SVN bindings
              # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
              # these bindings.
              from cStringIO import StringIO
              from common import NoRepo, MissingTool, commit, encodeargs, decodeargs
              from common import commandline, converter_source, converter_sink, mapfile
              from common import makedatetimestamp
              try:
                  from svn.core import SubversionException, Pool
                  import svn
                  import svn.client
                  import svn.core
                  import svn.ra
                  import svn.delta
                  import transport
                  import warnings
                  warnings.filterwarnings('ignore',
                          module='svn.core',
                          category=DeprecationWarning)
              except ImportError:
                  svn = None
              class SvnPathNotFound(Exception):
                  pass
              def revsplit(rev):
                  """Parse a revision string and return (uuid, path, revnum).
                  >>> revsplit('svn:a2147622-4a9f-4db4-a8d3-13562ff547b2'
                  ...          '/proj%20B/mytrunk/mytrunk@1')
                  ('a2147622-4a9f-4db4-a8d3-13562ff547b2', '/proj%20B/mytrunk/mytrunk', 1)
                  >>> revsplit('svn:8af66a51-67f5-4354-b62c-98d67cc7be1d@1')
                  ('', '', 1)
                  >>> revsplit('@7')
                  ('', '', 7)
                  >>> revsplit('7')
                  ('', '', 0)
                  >>> revsplit('bad')
                  ('', '', 0)
                  """
                  parts = rev.rsplit('@', 1)
                  revnum = 0
                  if len(parts) > 1:
                      revnum = int(parts[1])
                  parts = parts[0].split('/', 1)
                  uuid = ''
                  mod = ''
                  if len(parts) > 1 and parts[0].startswith('svn:'):
                      uuid = parts[0][4:]
                      mod = '/' + parts[1]
                  return uuid, mod, revnum
              def quote(s):
                  # As of svn 1.7, many svn calls expect "canonical" paths. In
                  # theory, we should call svn.core.*canonicalize() on all paths
                  # before passing them to the API.  Instead, we assume the base url
                  # is canonical and copy the behaviour of svn URL encoding function
                  # so we can extend it safely with new components. The "safe"
                  # characters were taken from the "svn_uri__char_validity" table in
                  # libsvn_subr/path.c.
                  return urllib.quote(s, "!$&'()*+,-./:=@_~")
              def geturl(path):
                  try:
                      return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
                  except SubversionException:
                      # svn.client.url_from_path() fails with local repositories
                      pass
                  if os.path.isdir(path):
                      path = os.path.normpath(os.path.abspath(path))
                      if os.name == 'nt':
                          path = '/' + util.normpath(path)
                      # Module URL is later compared with the repository URL returned
                      # by svn API, which is UTF-8.
                      path = encoding.tolocal(path)
                      path = 'file://%s' % quote(path)
                  return svn.core.svn_path_canonicalize(path)
              def optrev(number):
                  optrev = svn.core.svn_opt_revision_t()
                  optrev.kind = svn.core.svn_opt_revision_number
                  optrev.value.number = number
                  return optrev
              class changedpath(object):
                  def __init__(self, p):
                      self.copyfrom_path = p.copyfrom_path
                      self.copyfrom_rev = p.copyfrom_rev
                      self.action = p.action
              def get_log_child(fp, url, paths, start, end, limit=0,
                                discover_changed_paths=True, strict_node_history=False):
                  protocol = -1
                  def receiver(orig_paths, revnum, author, date, message, pool):
                      paths = {}
                      if orig_paths is not None:
                          for k, v in orig_paths.iteritems():
                              paths[k] = changedpath(v)
                      pickle.dump((paths, revnum, author, date, message),
                                  fp, protocol)
                  try:
                      # Use an ra of our own so that our parent can consume
                      # our results without confusing the server.
                      t = transport.SvnRaTransport(url=url)
                      svn.ra.get_log(t.ra, paths, start, end, limit,
                                     discover_changed_paths,
                                     strict_node_history,
                                     receiver)
                  except IOError:
                      # Caller may interrupt the iteration
                      pickle.dump(None, fp, protocol)
                  except Exception, inst:
                      pickle.dump(str(inst), fp, protocol)
                  else:
                      pickle.dump(None, fp, protocol)
                  fp.close()
                  # With large history, cleanup process goes crazy and suddenly
                  # consumes *huge* amount of memory. The output file being closed,
                  # there is no need for clean termination.
                  os._exit(0)
              def debugsvnlog(ui, **opts):
                  """Fetch SVN log in a subprocess and channel them back to parent to
                  avoid memory collection issues.
                  """
                  if svn is None:
                      raise util.Abort(_('debugsvnlog could not load Subversion python '
                                         'bindings'))
                  util.setbinary(sys.stdin)
                  util.setbinary(sys.stdout)
                  args = decodeargs(sys.stdin.read())
                  get_log_child(sys.stdout, *args)
              class logstream(object):
                  """Interruptible revision log iterator."""
                  def __init__(self, stdout):
                      self._stdout = stdout
                  def __iter__(self):
                      while True:
                          try:
                              entry = pickle.load(self._stdout)
                          except EOFError:
                              raise util.Abort(_('Mercurial failed to run itself, check'
                                                 ' hg executable is in PATH'))
                          try:
                              orig_paths, revnum, author, date, message = entry
                          except (TypeError, ValueError):
                              if entry is None:
                                  break
                              raise util.Abort(_("log stream exception '%s'") % entry)
                          yield entry
                  def close(self):
                      if self._stdout:
                          self._stdout.close()
                          self._stdout = None
              class directlogstream(list):
                  """Direct revision log iterator.
                  This can be used for debugging and development but it will probably leak
                  memory and is not suitable for real conversions."""
                  def __init__(self, url, paths, start, end, limit=0,
                                discover_changed_paths=True, strict_node_history=False):
                      def receiver(orig_paths, revnum, author, date, message, pool):
                          paths = {}
                          if orig_paths is not None:
                              for k, v in orig_paths.iteritems():
                                  paths[k] = changedpath(v)
                          self.append((paths, revnum, author, date, message))
                      # Use an ra of our own so that our parent can consume
                      # our results without confusing the server.
                      t = transport.SvnRaTransport(url=url)
                      svn.ra.get_log(t.ra, paths, start, end, limit,
                                     discover_changed_paths,
                                     strict_node_history,
                                     receiver)
                  def close(self):
                      pass
              # Check to see if the given path is a local Subversion repo. Verify this by
              # looking for several svn-specific files and directories in the given
              # directory.
              def filecheck(ui, path, proto):
                  for x in ('locks', 'hooks', 'format', 'db'):
                      if not os.path.exists(os.path.join(path, x)):
                          return False
                  return True
              # Check to see if a given path is the root of an svn repo over http. We verify
              # this by requesting a version-controlled URL we know can't exist and looking
              # for the svn-specific "not found" XML.
              def httpcheck(ui, path, proto):
                  try:
                      opener = urllib2.build_opener()
                      rsp = opener.open('%s://%s/!svn/ver/0/.svn' % (proto, path))
                      data = rsp.read()
                  except urllib2.HTTPError, inst:
                      if inst.code != 404:
                          # Except for 404 we cannot know for sure this is not an svn repo
                          ui.warn(_('svn: cannot probe remote repository, assume it could '
                                    'be a subversion repository. Use --source-type if you '
                                    'know better.\n'))
                          return True
                      data = inst.fp.read()
                  except Exception:
                      # Could be urllib2.URLError if the URL is invalid or anything else.
                      return False
                  return '<m:human-readable errcode="160013">' in data
              protomap = {'http': httpcheck,
                          'https': httpcheck,
                          'file': filecheck,
                          }
              def issvnurl(ui, url):
                  try:
                      proto, path = url.split('://', 1)
                      if proto == 'file':
                          if (os.name == 'nt' and path[:1] == '/' and path[1:2].isalpha()
                              and path[2:6].lower() == '%3a/'):
                              path = path[:2] + ':/' + path[6:]
                          path = urllib.url2pathname(path)
                  except ValueError:
                      proto = 'file'
                      path = os.path.abspath(url)
                  if proto == 'file':
                      path = util.pconvert(path)
                  check = protomap.get(proto, lambda *args: False)
                  while '/' in path:
                      if check(ui, path, proto):
                          return True
                      path = path.rsplit('/', 1)[0]
                  return False
              # SVN conversion code stolen from bzr-svn and tailor
              #
              # Subversion looks like a versioned filesystem, branches structures
              # are defined by conventions and not enforced by the tool. First,
              # we define the potential branches (modules) as "trunk" and "branches"
              # children directories. Revisions are then identified by their
              # module and revision number (and a repository identifier).
              #
              # The revision graph is really a tree (or a forest). By default, a
              # revision parent is the previous revision in the same module. If the
              # module directory is copied/moved from another module then the
              # revision is the module root and its parent the source revision in
              # the parent module. A revision has at most one parent.
              #
              class svn_source(converter_source):
                  def __init__(self, ui, url, rev=None):
                      super(svn_source, self).__init__(ui, url, rev=rev)
                      if not (url.startswith('svn://') or url.startswith('svn+ssh://') or
                              (os.path.exists(url) and
                               os.path.exists(os.path.join(url, '.svn'))) or
                              issvnurl(ui, url)):
                          raise NoRepo(_("%s does not look like a Subversion repository")
                                       % url)
                      if svn is None:
                          raise MissingTool(_('could not load Subversion python bindings'))
                      try:
                          version = svn.core.SVN_VER_MAJOR, svn.core.SVN_VER_MINOR
                          if version < (1, 4):
                              raise MissingTool(_('Subversion python bindings %d.%d found, '
                                                  '1.4 or later required') % version)
                      except AttributeError:
                          raise MissingTool(_('Subversion python bindings are too old, 1.4 '
                                              'or later required'))
                      self.lastrevs = {}
                      latest = None
                      try:
                          # Support file://path@rev syntax. Useful e.g. to convert
                          # deleted branches.
                          at = url.rfind('@')
                          if at >= 0:
                              latest = int(url[at + 1:])
                              url = url[:at]
                      except ValueError:
                          pass
                      self.url = geturl(url)
                      self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
                      try:
                          self.transport = transport.SvnRaTransport(url=self.url)
                          self.ra = self.transport.ra
                          self.ctx = self.transport.client
                          self.baseurl = svn.ra.get_repos_root(self.ra)
                          # Module is either empty or a repository path starting with
                          # a slash and not ending with a slash.
                          self.module = urllib.unquote(self.url[len(self.baseurl):])
                          self.prevmodule = None
                          self.rootmodule = self.module
                          self.commits = {}
                          self.paths = {}
                          self.uuid = svn.ra.get_uuid(self.ra)
                      except SubversionException:
                          ui.traceback()
                          raise NoRepo(_("%s does not look like a Subversion repository")
                                       % self.url)
                      if rev:
                          try:
                              latest = int(rev)
                          except ValueError:
                              raise util.Abort(_('svn: revision %s is not an integer') % rev)
                      self.trunkname = self.ui.config('convert', 'svn.trunk',
                                                      'trunk').strip('/')
                      self.startrev = self.ui.config('convert', 'svn.startrev', default=0)
                      try:
                          self.startrev = int(self.startrev)
                          if self.startrev < 0:
                              self.startrev = 0
                      except ValueError:
                          raise util.Abort(_('svn: start revision %s is not an integer')
                                           % self.startrev)
                      try:
                          self.head = self.latest(self.module, latest)
                      except SvnPathNotFound:
                          self.head = None
                      if not self.head:
                          raise util.Abort(_('no revision found in module %s')
                                           % self.module)
                      self.last_changed = self.revnum(self.head)
                      self._changescache = (None, None)
                      if os.path.exists(os.path.join(url, '.svn/entries')):
                          self.wc = url
                      else:
                          self.wc = None
                      self.convertfp = None
                  def setrevmap(self, revmap):
                      lastrevs = {}
                      for revid in revmap.iterkeys():
                          uuid, module, revnum = revsplit(revid)
                          lastrevnum = lastrevs.setdefault(module, revnum)
                          if revnum > lastrevnum:
                              lastrevs[module] = revnum
                      self.lastrevs = lastrevs
                  def exists(self, path, optrev):
                      try:
                          svn.client.ls(self.url.rstrip('/') + '/' + quote(path),
                                               optrev, False, self.ctx)
                          return True
                      except SubversionException:
                          return False
                  def getheads(self):
                      def isdir(path, revnum):
                          kind = self._checkpath(path, revnum)
                          return kind == svn.core.svn_node_dir
                      def getcfgpath(name, rev):
                          cfgpath = self.ui.config('convert', 'svn.' + name)
                          if cfgpath is not None and cfgpath.strip() == '':
                              return None
                          path = (cfgpath or name).strip('/')
                          if not self.exists(path, rev):
                              if self.module.endswith(path) and name == 'trunk':
                                  # we are converting from inside this directory
                                  return None
                              if cfgpath:
                                  raise util.Abort(_('expected %s to be at %r, but not found')
                                               % (name, path))
                              return None
                          self.ui.note(_('found %s at %r\n') % (name, path))
                          return path
                      rev = optrev(self.last_changed)
                      oldmodule = ''
                      trunk = getcfgpath('trunk', rev)
                      self.tags = getcfgpath('tags', rev)
                      branches = getcfgpath('branches', rev)
                      # If the project has a trunk or branches, we will extract heads
                      # from them. We keep the project root otherwise.
                      if trunk:
                          oldmodule = self.module or ''
                          self.module += '/' + trunk
                          self.head = self.latest(self.module, self.last_changed)
                          if not self.head:
                              raise util.Abort(_('no revision found in module %s')
                                               % self.module)
                      # First head in the list is the module's head
                      self.heads = [self.head]
                      if self.tags is not None:
                          self.tags = '%s/%s' % (oldmodule , (self.tags or 'tags'))
                      # Check if branches bring a few more heads to the list
                      if branches:
                          rpath = self.url.strip('/')
                          branchnames = svn.client.ls(rpath + '/' + quote(branches),
                                                      rev, False, self.ctx)
                          for branch in sorted(branchnames):
                              module = '%s/%s/%s' % (oldmodule, branches, branch)
                              if not isdir(module, self.last_changed):
                                  continue
                              brevid = self.latest(module, self.last_changed)
                              if not brevid:
                                  self.ui.note(_('ignoring empty branch %s\n') % branch)
                                  continue
                              self.ui.note(_('found branch %s at %d\n') %
                                           (branch, self.revnum(brevid)))
                              self.heads.append(brevid)
                      if self.startrev and self.heads:
                          if len(self.heads) > 1:
                              raise util.Abort(_('svn: start revision is not supported '
                                                 'with more than one branch'))
                          revnum = self.revnum(self.heads[0])
                          if revnum < self.startrev:
                              raise util.Abort(
                                  _('svn: no revision found after start revision %d')
                                               % self.startrev)
                      return self.heads
-                 def _getchanges(self, rev):
+                 def _getchanges(self, rev, full):
                      (paths, parents) = self.paths[rev]
+                     copies = {}
                      if parents:
                          files, self.removed, copies = self.expandpaths(rev, paths, parents)
-                     else:
+                     if full or not parents:
                          # Perform a full checkout on roots
                          uuid, module, revnum = revsplit(rev)
                          entries = svn.client.ls(self.baseurl + quote(module),
                                                  optrev(revnum), True, self.ctx)
                          files = [n for n, e in entries.iteritems()
                                   if e.kind == svn.core.svn_node_file]
-                         copies = {}
                          self.removed = set()
                      files.sort()
                      files = zip(files, [rev] * len(files))
                      return (files, copies)
-                 def getchanges(self, rev):
+                 def getchanges(self, rev, full):
                      # reuse cache from getchangedfiles
-                     if self._changescache[0] == rev:
+                     if self._changescache[0] == rev and not full:
                          (files, copies) = self._changescache[1]
                      else:
-                         (files, copies) = self._getchanges(rev)
+                         (files, copies) = self._getchanges(rev, full)
                          # caller caches the result, so free it here to release memory
                          del self.paths[rev]
                      return (files, copies)
                  def getchangedfiles(self, rev, i):
                      # called from filemap - cache computed values for reuse in getchanges
-                     (files, copies) = self._getchanges(rev)
+                     (files, copies) = self._getchanges(rev, False)
                      self._changescache = (rev, (files, copies))
                      return [f[0] for f in files]
                  def getcommit(self, rev):
                      if rev not in self.commits:
                          uuid, module, revnum = revsplit(rev)
                          self.module = module
                          self.reparent(module)
                          # We assume that:
                          # - requests for revisions after "stop" come from the
                          # revision graph backward traversal. Cache all of them
                          # down to stop, they will be used eventually.
                          # - requests for revisions before "stop" come to get
                          # isolated branches parents. Just fetch what is needed.
                          stop = self.lastrevs.get(module, 0)
                          if revnum < stop:
                              stop = revnum + 1
                          self._fetch_revisions(revnum, stop)
                          if rev not in self.commits:
                              raise util.Abort(_('svn: revision %s not found') % revnum)
                      revcommit = self.commits[rev]
                      # caller caches the result, so free it here to release memory
                      del self.commits[rev]
                      return revcommit
                  def checkrevformat(self, revstr, mapname='splicemap'):
                      """ fails if revision format does not match the correct format"""
                      if not re.match(r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-'
                                            '[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]'
                                            '{12,12}(.*)\@[0-9]+$',revstr):
                          raise util.Abort(_('%s entry %s is not a valid revision'
                                             ' identifier') % (mapname, revstr))
                  def gettags(self):
                      tags = {}
                      if self.tags is None:
                          return tags
                      # svn tags are just a convention, project branches left in a
                      # 'tags' directory. There is no other relationship than
                      # ancestry, which is expensive to discover and makes them hard
                      # to update incrementally.  Worse, past revisions may be
                      # referenced by tags far away in the future, requiring a deep
                      # history traversal on every calculation.  Current code
                      # performs a single backward traversal, tracking moves within
                      # the tags directory (tag renaming) and recording a new tag
                      # everytime a project is copied from outside the tags
                      # directory. It also lists deleted tags, this behaviour may
                      # change in the future.
                      pendings = []
                      tagspath = self.tags
                      start = svn.ra.get_latest_revnum(self.ra)
                      stream = self._getlog([self.tags], start, self.startrev)
                      try:
                          for entry in stream:
                              origpaths, revnum, author, date, message = entry
                              if not origpaths:
                                  origpaths = []
                              copies = [(e.copyfrom_path, e.copyfrom_rev, p) for p, e
                                        in origpaths.iteritems() if e.copyfrom_path]
                              # Apply moves/copies from more specific to general
                              copies.sort(reverse=True)
                              srctagspath = tagspath
                              if copies and copies[-1][2] == tagspath:
                                  # Track tags directory moves
                                  srctagspath = copies.pop()[0]
                              for source, sourcerev, dest in copies:
                                  if not dest.startswith(tagspath + '/'):
                                      continue
                                  for tag in pendings:
                                      if tag[0].startswith(dest):
                                          tagpath = source + tag[0][len(dest):]
                                          tag[:2] = [tagpath, sourcerev]
                                          break
                                  else:
                                      pendings.append([source, sourcerev, dest])
                              # Filter out tags with children coming from different
                              # parts of the repository like:
                              # /tags/tag.1 (from /trunk:10)
                              # /tags/tag.1/foo (from /branches/foo:12)
                              # Here/tags/tag.1 discarded as well as its children.
                              # It happens with tools like cvs2svn. Such tags cannot
                              # be represented in mercurial.
                              addeds = dict((p, e.copyfrom_path) for p, e
                                            in origpaths.iteritems()
                                            if e.action == 'A' and e.copyfrom_path)
                              badroots = set()
                              for destroot in addeds:
                                  for source, sourcerev, dest in pendings:
                                      if (not dest.startswith(destroot + '/')
                                          or source.startswith(addeds[destroot] + '/')):
                                          continue
                                      badroots.add(destroot)
                                      break
                              for badroot in badroots:
                                  pendings = [p for p in pendings if p[2] != badroot
                                              and not p[2].startswith(badroot + '/')]
                              # Tell tag renamings from tag creations
                              renamings = []
                              for source, sourcerev, dest in pendings:
                                  tagname = dest.split('/')[-1]
                                  if source.startswith(srctagspath):
                                      renamings.append([source, sourcerev, tagname])
                                      continue
                                  if tagname in tags:
                                      # Keep the latest tag value
                                      continue
                                  # From revision may be fake, get one with changes
                                  try:
                                      tagid = self.latest(source, sourcerev)
                                      if tagid and tagname not in tags:
                                          tags[tagname] = tagid
                                  except SvnPathNotFound:
                                      # It happens when we are following directories
                                      # we assumed were copied with their parents
                                      # but were really created in the tag
                                      # directory.
                                      pass
                              pendings = renamings
                              tagspath = srctagspath
                      finally:
                          stream.close()
                      return tags
                  def converted(self, rev, destrev):
                      if not self.wc:
                          return
                      if self.convertfp is None:
                          self.convertfp = open(os.path.join(self.wc, '.svn', 'hg-shamap'),
                                                'a')
                      self.convertfp.write('%s %d\n' % (destrev, self.revnum(rev)))
                      self.convertfp.flush()
                  def revid(self, revnum, module=None):
                      return 'svn:%s%s@%s' % (self.uuid, module or self.module, revnum)
                  def revnum(self, rev):
                      return int(rev.split('@')[-1])
                  def latest(self, path, stop=None):
                      """Find the latest revid affecting path, up to stop revision
                      number. If stop is None, default to repository latest
                      revision. It may return a revision in a different module,
                      since a branch may be moved without a change being
                      reported. Return None if computed module does not belong to
                      rootmodule subtree.
                      """
                      def findchanges(path, start, stop=None):
                          stream = self._getlog([path], start, stop or 1)
                          try:
                              for entry in stream:
                                  paths, revnum, author, date, message = entry
                                  if stop is None and paths:
                                      # We do not know the latest changed revision,
                                      # keep the first one with changed paths.
                                      break
                                  if revnum <= stop:
                                      break
                                  for p in paths:
                                      if (not path.startswith(p) or
                                          not paths[p].copyfrom_path):
                                          continue
                                      newpath = paths[p].copyfrom_path + path[len(p):]
                                      self.ui.debug("branch renamed from %s to %s at %d\n" %
                                                    (path, newpath, revnum))
                                      path = newpath
                                      break
                              if not paths:
                                  revnum = None
                              return revnum, path
                          finally:
                              stream.close()
                      if not path.startswith(self.rootmodule):
                          # Requests on foreign branches may be forbidden at server level
                          self.ui.debug('ignoring foreign branch %r\n' % path)
                          return None
                      if stop is None:
                          stop = svn.ra.get_latest_revnum(self.ra)
                      try:
                          prevmodule = self.reparent('')
                          dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
                          self.reparent(prevmodule)
                      except SubversionException:
                          dirent = None
                      if not dirent:
                          raise SvnPathNotFound(_('%s not found up to revision %d')
                                                % (path, stop))
                      # stat() gives us the previous revision on this line of
                      # development, but it might be in *another module*. Fetch the
                      # log and detect renames down to the latest revision.
                      revnum, realpath = findchanges(path, stop, dirent.created_rev)
                      if revnum is None:
                          # Tools like svnsync can create empty revision, when
                          # synchronizing only a subtree for instance. These empty
                          # revisions created_rev still have their original values
                          # despite all changes having disappeared and can be
                          # returned by ra.stat(), at least when stating the root
                          # module. In that case, do not trust created_rev and scan
                          # the whole history.
                          revnum, realpath = findchanges(path, stop)
                          if revnum is None:
                              self.ui.debug('ignoring empty branch %r\n' % realpath)
                              return None
                      if not realpath.startswith(self.rootmodule):
                          self.ui.debug('ignoring foreign branch %r\n' % realpath)
                          return None
                      return self.revid(revnum, realpath)
                  def reparent(self, module):
                      """Reparent the svn transport and return the previous parent."""
                      if self.prevmodule == module:
                          return module
                      svnurl = self.baseurl + quote(module)
                      prevmodule = self.prevmodule
                      if prevmodule is None:
                          prevmodule = ''
                      self.ui.debug("reparent to %s\n" % svnurl)
                      svn.ra.reparent(self.ra, svnurl)
                      self.prevmodule = module
                      return prevmodule
                  def expandpaths(self, rev, paths, parents):
                      changed, removed = set(), set()
                      copies = {}
                      new_module, revnum = revsplit(rev)[1:]
                      if new_module != self.module:
                          self.module = new_module
                          self.reparent(self.module)
                      for i, (path, ent) in enumerate(paths):
                          self.ui.progress(_('scanning paths'), i, item=path,
                                           total=len(paths))
                          entrypath = self.getrelpath(path)
                          kind = self._checkpath(entrypath, revnum)
                          if kind == svn.core.svn_node_file:
                              changed.add(self.recode(entrypath))
                              if not ent.copyfrom_path or not parents:
                                  continue
                              # Copy sources not in parent revisions cannot be
                              # represented, ignore their origin for now
                              pmodule, prevnum = revsplit(parents[0])[1:]
                              if ent.copyfrom_rev < prevnum:
                                  continue
                              copyfrom_path = self.getrelpath(ent.copyfrom_path, pmodule)
                              if not copyfrom_path:
                                  continue
                              self.ui.debug("copied to %s from %s@%s\n" %
                                            (entrypath, copyfrom_path, ent.copyfrom_rev))
                              copies[self.recode(entrypath)] = self.recode(copyfrom_path)
                          elif kind == 0: # gone, but had better be a deleted *file*
                              self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
                              pmodule, prevnum = revsplit(parents[0])[1:]
                              parentpath = pmodule + "/" + entrypath
                              fromkind = self._checkpath(entrypath, prevnum, pmodule)
                              if fromkind == svn.core.svn_node_file:
                                  removed.add(self.recode(entrypath))
                              elif fromkind == svn.core.svn_node_dir:
                                  oroot = parentpath.strip('/')
                                  nroot = path.strip('/')
                                  children = self._iterfiles(oroot, prevnum)
                                  for childpath in children:
                                      childpath = childpath.replace(oroot, nroot)
                                      childpath = self.getrelpath("/" + childpath, pmodule)
                                      if childpath:
                                          removed.add(self.recode(childpath))
                              else:
                                  self.ui.debug('unknown path in revision %d: %s\n' % \
                                                (revnum, path))
                          elif kind == svn.core.svn_node_dir:
                              if ent.action == 'M':
                                  # If the directory just had a prop change,
                                  # then we shouldn't need to look for its children.
                                  continue
                              if ent.action == 'R' and parents:
                                  # If a directory is replacing a file, mark the previous
                                  # file as deleted
                                  pmodule, prevnum = revsplit(parents[0])[1:]
                                  pkind = self._checkpath(entrypath, prevnum, pmodule)
                                  if pkind == svn.core.svn_node_file:
                                      removed.add(self.recode(entrypath))
                                  elif pkind == svn.core.svn_node_dir:
                                      # We do not know what files were kept or removed,
                                      # mark them all as changed.
                                      for childpath in self._iterfiles(pmodule, prevnum):
                                          childpath = self.getrelpath("/" + childpath)
                                          if childpath:
                                              changed.add(self.recode(childpath))
                              for childpath in self._iterfiles(path, revnum):
                                  childpath = self.getrelpath("/" + childpath)
                                  if childpath:
                                      changed.add(self.recode(childpath))
                              # Handle directory copies
                              if not ent.copyfrom_path or not parents:
                                  continue
                              # Copy sources not in parent revisions cannot be
                              # represented, ignore their origin for now
                              pmodule, prevnum = revsplit(parents[0])[1:]
                              if ent.copyfrom_rev < prevnum:
                                  continue
                              copyfrompath = self.getrelpath(ent.copyfrom_path, pmodule)
                              if not copyfrompath:
                                  continue
                              self.ui.debug("mark %s came from %s:%d\n"
                                            % (path, copyfrompath, ent.copyfrom_rev))
                              children = self._iterfiles(ent.copyfrom_path, ent.copyfrom_rev)
                              for childpath in children:
                                  childpath = self.getrelpath("/" + childpath, pmodule)
                                  if not childpath:
                                      continue
                                  copytopath = path + childpath[len(copyfrompath):]
                                  copytopath = self.getrelpath(copytopath)
                                  copies[self.recode(copytopath)] = self.recode(childpath)
                      self.ui.progress(_('scanning paths'), None)
                      changed.update(removed)
                      return (list(changed), removed, copies)
                  def _fetch_revisions(self, from_revnum, to_revnum):
                      if from_revnum < to_revnum:
                          from_revnum, to_revnum = to_revnum, from_revnum
                      self.child_cset = None
                      def parselogentry(orig_paths, revnum, author, date, message):
                          """Return the parsed commit object or None, and True if
                          the revision is a branch root.
                          """
                          self.ui.debug("parsing revision %d (%d changes)\n" %
                                        (revnum, len(orig_paths)))
                          branched = False
                          rev = self.revid(revnum)
                          # branch log might return entries for a parent we already have
                          if rev in self.commits or revnum < to_revnum:
                              return None, branched
                          parents = []
                          # check whether this revision is the start of a branch or part
                          # of a branch renaming
                          orig_paths = sorted(orig_paths.iteritems())
                          root_paths = [(p, e) for p, e in orig_paths
                                        if self.module.startswith(p)]
                          if root_paths:
                              path, ent = root_paths[-1]
                              if ent.copyfrom_path:
                                  branched = True
                                  newpath = ent.copyfrom_path + self.module[len(path):]
                                  # ent.copyfrom_rev may not be the actual last revision
                                  previd = self.latest(newpath, ent.copyfrom_rev)
                                  if previd is not None:
                                      prevmodule, prevnum = revsplit(previd)[1:]
                                      if prevnum >= self.startrev:
                                          parents = [previd]
                                          self.ui.note(
                                              _('found parent of branch %s at %d: %s\n') %
                                              (self.module, prevnum, prevmodule))
                              else:
                                  self.ui.debug("no copyfrom path, don't know what to do.\n")
                          paths = []
                          # filter out unrelated paths
                          for path, ent in orig_paths:
                              if self.getrelpath(path) is None:
                                  continue
                              paths.append((path, ent))
                          # Example SVN datetime. Includes microseconds.
                          # ISO-8601 conformant
                          # '2007-01-04T17:35:00.902377Z'
                          date = util.parsedate(date[:19] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
                          if self.ui.configbool('convert', 'localtimezone'):
                              date = makedatetimestamp(date[0])
                          log = message and self.recode(message) or ''
                          author = author and self.recode(author) or ''
                          try:
                              branch = self.module.split("/")[-1]
                              if branch == self.trunkname:
                                  branch = None
                          except IndexError:
                              branch = None
                          cset = commit(author=author,
                                        date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
                                        desc=log,
                                        parents=parents,
                                        branch=branch,
                                        rev=rev)
                          self.commits[rev] = cset
                          # The parents list is *shared* among self.paths and the
                          # commit object. Both will be updated below.
                          self.paths[rev] = (paths, cset.parents)
                          if self.child_cset and not self.child_cset.parents:
                              self.child_cset.parents[:] = [rev]
                          self.child_cset = cset
                          return cset, branched
                      self.ui.note(_('fetching revision log for "%s" from %d to %d\n') %
                                   (self.module, from_revnum, to_revnum))
                      try:
                          firstcset = None
                          lastonbranch = False
                          stream = self._getlog([self.module], from_revnum, to_revnum)
                          try:
                              for entry in stream:
                                  paths, revnum, author, date, message = entry
                                  if revnum < self.startrev:
                                      lastonbranch = True
                                      break
                                  if not paths:
                                      self.ui.debug('revision %d has no entries\n' % revnum)
                                      # If we ever leave the loop on an empty
                                      # revision, do not try to get a parent branch
                                      lastonbranch = lastonbranch or revnum == 0
                                      continue
                                  cset, lastonbranch = parselogentry(paths, revnum, author,
                                                                     date, message)
                                  if cset:
                                      firstcset = cset
                                  if lastonbranch:
                                      break
                          finally:
                              stream.close()
                          if not lastonbranch and firstcset and not firstcset.parents:
                              # The first revision of the sequence (the last fetched one)
                              # has invalid parents if not a branch root. Find the parent
                              # revision now, if any.
                              try:
                                  firstrevnum = self.revnum(firstcset.rev)
                                  if firstrevnum > 1:
                                      latest = self.latest(self.module, firstrevnum - 1)
                                      if latest:
                                          firstcset.parents.append(latest)
                              except SvnPathNotFound:
                                  pass
                      except SubversionException, (inst, num):
                          if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
                              raise util.Abort(_('svn: branch has no revision %s')
                                               % to_revnum)
                          raise
                  def getfile(self, file, rev):
                      # TODO: ra.get_file transmits the whole file instead of diffs.
                      if file in self.removed:
                          return None, None
                      mode = ''
                      try:
                          new_module, revnum = revsplit(rev)[1:]
                          if self.module != new_module:
                              self.module = new_module
                              self.reparent(self.module)
                          io = StringIO()
                          info = svn.ra.get_file(self.ra, file, revnum, io)
                          data = io.getvalue()
                          # ra.get_file() seems to keep a reference on the input buffer
                          # preventing collection. Release it explicitly.
                          io.close()
                          if isinstance(info, list):
                              info = info[-1]
                          mode = ("svn:executable" in info) and 'x' or ''
                          mode = ("svn:special" in info) and 'l' or mode
                      except SubversionException, e:
                          notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
                              svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
                          if e.apr_err in notfound: # File not found
                              return None, None
                          raise
                      if mode == 'l':
                          link_prefix = "link "
                          if data.startswith(link_prefix):
                              data = data[len(link_prefix):]
                      return data, mode
                  def _iterfiles(self, path, revnum):
                      """Enumerate all files in path at revnum, recursively."""
                      path = path.strip('/')
                      pool = Pool()
                      rpath = '/'.join([self.baseurl, quote(path)]).strip('/')
                      entries = svn.client.ls(rpath, optrev(revnum), True, self.ctx, pool)
                      if path:
                          path += '/'
                      return ((path + p) for p, e in entries.iteritems()
                              if e.kind == svn.core.svn_node_file)
                  def getrelpath(self, path, module=None):
                      if module is None:
                          module = self.module
                      # Given the repository url of this wc, say
                      #   "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
                      # extract the "entry" portion (a relative path) from what
                      # svn log --xml says, i.e.
                      #   "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
                      # that is to say "tests/PloneTestCase.py"
                      if path.startswith(module):
                          relative = path.rstrip('/')[len(module):]
                          if relative.startswith('/'):
                              return relative[1:]
                          elif relative == '':
                              return relative
                      # The path is outside our tracked tree...
                      self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
                      return None
                  def _checkpath(self, path, revnum, module=None):
                      if module is not None:
                          prevmodule = self.reparent('')
                          path = module + '/' + path
                      try:
                          # ra.check_path does not like leading slashes very much, it leads
                          # to PROPFIND subversion errors
                          return svn.ra.check_path(self.ra, path.strip('/'), revnum)
                      finally:
                          if module is not None:
                              self.reparent(prevmodule)
                  def _getlog(self, paths, start, end, limit=0, discover_changed_paths=True,
                              strict_node_history=False):
                      # Normalize path names, svn >= 1.5 only wants paths relative to
                      # supplied URL
                      relpaths = []
                      for p in paths:
                          if not p.startswith('/'):
                              p = self.module + '/' + p
                          relpaths.append(p.strip('/'))
                      args = [self.baseurl, relpaths, start, end, limit,
                              discover_changed_paths, strict_node_history]
                      # undocumented feature: debugsvnlog can be disabled
                      if not self.ui.configbool('convert', 'svn.debugsvnlog', True):
                          return directlogstream(*args)
                      arg = encodeargs(args)
                      hgexe = util.hgexecutable()
                      cmd = '%s debugsvnlog' % util.shellquote(hgexe)
                      stdin, stdout = util.popen2(util.quotecommand(cmd))
                      stdin.write(arg)
                      try:
                          stdin.close()
                      except IOError:
                          raise util.Abort(_('Mercurial failed to run itself, check'
                                             ' hg executable is in PATH'))
                      return logstream(stdout)
              pre_revprop_change = '''#!/bin/sh
              REPOS="$1"
              REV="$2"
              USER="$3"
              PROPNAME="$4"
              ACTION="$5"
              if [ "$ACTION" = "M" -a "$PROPNAME" = "svn:log" ]; then exit 0; fi
              if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-branch" ]; then exit 0; fi
              if [ "$ACTION" = "A" -a "$PROPNAME" = "hg:convert-rev" ]; then exit 0; fi
              echo "Changing prohibited revision property" >&2
              exit 1
              '''
              class svn_sink(converter_sink, commandline):
                  commit_re = re.compile(r'Committed revision (\d+).', re.M)
                  uuid_re = re.compile(r'Repository UUID:\s*(\S+)', re.M)
                  def prerun(self):
                      if self.wc:
                          os.chdir(self.wc)
                  def postrun(self):
                      if self.wc:
                          os.chdir(self.cwd)
                  def join(self, name):
                      return os.path.join(self.wc, '.svn', name)
                  def revmapfile(self):
                      return self.join('hg-shamap')
                  def authorfile(self):
                      return self.join('hg-authormap')
                  def __init__(self, ui, path):
                      converter_sink.__init__(self, ui, path)
                      commandline.__init__(self, ui, 'svn')
                      self.delete = []
                      self.setexec = []
                      self.delexec = []
                      self.copies = []
                      self.wc = None
                      self.cwd = os.getcwd()
                      created = False
                      if os.path.isfile(os.path.join(path, '.svn', 'entries')):
                          self.wc = os.path.realpath(path)
                          self.run0('update')
                      else:
                          if not re.search(r'^(file|http|https|svn|svn\+ssh)\://', path):
                              path = os.path.realpath(path)
                              if os.path.isdir(os.path.dirname(path)):
                                  if not os.path.exists(os.path.join(path, 'db', 'fs-type')):
                                      ui.status(_('initializing svn repository %r\n') %
                                                os.path.basename(path))
                                      commandline(ui, 'svnadmin').run0('create', path)
                                      created = path
                                  path = util.normpath(path)
                                  if not path.startswith('/'):
                                      path = '/' + path
                                  path = 'file://' + path
                          wcpath = os.path.join(os.getcwd(), os.path.basename(path) + '-wc')
                          ui.status(_('initializing svn working copy %r\n')
                                    % os.path.basename(wcpath))
                          self.run0('checkout', path, wcpath)
                          self.wc = wcpath
                      self.opener = scmutil.opener(self.wc)
                      self.wopener = scmutil.opener(self.wc)
                      self.childmap = mapfile(ui, self.join('hg-childmap'))
                      self.is_exec = util.checkexec(self.wc) and util.isexec or None
                      if created:
                          hook = os.path.join(created, 'hooks', 'pre-revprop-change')
                          fp = open(hook, 'w')
                          fp.write(pre_revprop_change)
                          fp.close()
                          util.setflags(hook, False, True)
                      output = self.run0('info')
                      self.uuid = self.uuid_re.search(output).group(1).strip()
                  def wjoin(self, *names):
                      return os.path.join(self.wc, *names)
                  @propertycache
                  def manifest(self):
                      # As of svn 1.7, the "add" command fails when receiving
                      # already tracked entries, so we have to track and filter them
                      # ourselves.
                      m = set()
                      output = self.run0('ls', recursive=True, xml=True)
                      doc = xml.dom.minidom.parseString(output)
                      for e in doc.getElementsByTagName('entry'):
                          for n in e.childNodes:
                              if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name':
                                  continue
                              name = ''.join(c.data for c in n.childNodes
                                             if c.nodeType == c.TEXT_NODE)
                              # Entries are compared with names coming from
                              # mercurial, so bytes with undefined encoding. Our
                              # best bet is to assume they are in local
                              # encoding. They will be passed to command line calls
                              # later anyway, so they better be.
                              m.add(encoding.tolocal(name.encode('utf-8')))
                              break
                      return m
                  def putfile(self, filename, flags, data):
                      if 'l' in flags:
                          self.wopener.symlink(data, filename)
                      else:
                          try:
                              if os.path.islink(self.wjoin(filename)):
                                  os.unlink(filename)
                          except OSError:
                              pass
                          self.wopener.write(filename, data)
                          if self.is_exec:
                              if self.is_exec(self.wjoin(filename)):
                                  if 'x' not in flags:
                                      self.delexec.append(filename)
                              else:
                                  if 'x' in flags:
                                      self.setexec.append(filename)
                              util.setflags(self.wjoin(filename), False, 'x' in flags)
                  def _copyfile(self, source, dest):
                      # SVN's copy command pukes if the destination file exists, but
                      # our copyfile method expects to record a copy that has
                      # already occurred.  Cross the semantic gap.
                      wdest = self.wjoin(dest)
                      exists = os.path.lexists(wdest)
                      if exists:
                          fd, tempname = tempfile.mkstemp(
                              prefix='hg-copy-', dir=os.path.dirname(wdest))
                          os.close(fd)
                          os.unlink(tempname)
                          os.rename(wdest, tempname)
                      try:
                          self.run0('copy', source, dest)
                      finally:
                          self.manifest.add(dest)
                          if exists:
                              try:
                                  os.unlink(wdest)
                              except OSError:
                                  pass
                              os.rename(tempname, wdest)
                  def dirs_of(self, files):
                      dirs = set()
                      for f in files:
                          if os.path.isdir(self.wjoin(f)):
                              dirs.add(f)
                          for i in strutil.rfindall(f, '/'):
                              dirs.add(f[:i])
                      return dirs
                  def add_dirs(self, files):
                      add_dirs = [d for d in sorted(self.dirs_of(files))
                                  if d not in self.manifest]
                      if add_dirs:
                          self.manifest.update(add_dirs)
                          self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
                      return add_dirs
                  def add_files(self, files):
                      files = [f for f in files if f not in self.manifest]
                      if files:
                          self.manifest.update(files)
                          self.xargs(files, 'add', quiet=True)
                      return files
                  def addchild(self, parent, child):
                      self.childmap[parent] = child
                  def revid(self, rev):
                      return u"svn:%s@%s" % (self.uuid, rev)
-                 def putcommit(self, files, copies, parents, commit, source, revmap):
+                 def putcommit(self, files, copies, parents, commit, source, revmap, full):
                      for parent in parents:
                          try:
                              return self.revid(self.childmap[parent])
                          except KeyError:
                              pass
                      # Apply changes to working copy
                      for f, v in files:
                          data, mode = source.getfile(f, v)
                          if data is None:
                              self.delete.append(f)
                          else:
                              self.putfile(f, mode, data)
                              if f in copies:
                                  self.copies.append([copies[f], f])
+                     if full:
+                         self.delete.extend(sorted(self.manifest.difference(files)))
                      files = [f[0] for f in files]
                      entries = set(self.delete)
                      files = frozenset(files)
                      entries.update(self.add_dirs(files.difference(entries)))
                      if self.copies:
                          for s, d in self.copies:
                              self._copyfile(s, d)
                          self.copies = []
                      if self.delete:
                          self.xargs(self.delete, 'delete')
                          for f in self.delete:
                              self.manifest.remove(f)
                          self.delete = []
                      entries.update(self.add_files(files.difference(entries)))
                      if self.delexec:
                          self.xargs(self.delexec, 'propdel', 'svn:executable')
                          self.delexec = []
                      if self.setexec:
                          self.xargs(self.setexec, 'propset', 'svn:executable', '*')
                          self.setexec = []
                      fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')
                      fp = os.fdopen(fd, 'w')
                      fp.write(commit.desc)
                      fp.close()
                      try:
                          output = self.run0('commit',
                                             username=util.shortuser(commit.author),
                                             file=messagefile,
                                             encoding='utf-8')
                          try:
                              rev = self.commit_re.search(output).group(1)
                          except AttributeError:
                              if not files:
                                  return parents[0]
                              self.ui.warn(_('unexpected svn output:\n'))
                              self.ui.warn(output)
                              raise util.Abort(_('unable to cope with svn output'))
                          if commit.rev:
                              self.run('propset', 'hg:convert-rev', commit.rev,
                                       revprop=True, revision=rev)
                          if commit.branch and commit.branch != 'default':
                              self.run('propset', 'hg:convert-branch', commit.branch,
                                       revprop=True, revision=rev)
                          for parent in parents:
                              self.addchild(parent, rev)
                          return self.revid(rev)
                      finally:
                          os.unlink(messagefile)
                  def puttags(self, tags):
                      self.ui.warn(_('writing Subversion tags is not yet implemented\n'))
                      return None, None
                  def hascommitfrommap(self, rev):
                      # We trust that revisions referenced in a map still is present
                      # TODO: implement something better if necessary and feasible
                      return True
                  def hascommitforsplicemap(self, rev):
                      # This is not correct as one can convert to an existing subversion
                      # repository and childmap would not list all revisions. Too bad.
                      if rev in self.childmap:
                          return True
                      raise util.Abort(_('splice map revision %s not found in subversion '
                                         'child map (revision lookups are not implemented)')
                                       % rev)

tests/test-convert-hg-sink.t

0 +13 0

                $ cat >> $HGRCPATH <<EOF
                > [extensions]
                > convert=
                > [convert]
                > hg.saverev=False
                > EOF
                $ hg init orig
                $ cd orig
                $ echo foo > foo
                $ echo bar > bar
                $ hg ci -qAm 'add foo and bar'
                $ hg rm foo
                $ hg ci -m 'remove foo'
                $ mkdir foo
                $ echo file > foo/file
                $ hg ci -qAm 'add foo/file'
                $ hg tag some-tag
                $ hg tag -l local-tag
                $ hg log
                changeset:   3:593cbf6fb2b4
                tag:         local-tag
                tag:         tip
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                summary:     Added tag some-tag for changeset ad681a868e44
                changeset:   2:ad681a868e44
                tag:         some-tag
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                summary:     add foo/file
                changeset:   1:cbba8ecc03b7
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                summary:     remove foo
                changeset:   0:327daa9251fa
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                summary:     add foo and bar
                $ cd ..
                $ hg convert orig new 2>&1 | grep -v 'subversion python bindings could not be loaded'
                initializing destination new repository
                scanning source...
                sorting...
                converting...
 add foo and bar
 remove foo
 add foo/file
 Added tag some-tag for changeset ad681a868e44
                $ cd new
                $ hg out ../orig
                comparing with ../orig
                searching for changes
                no changes found
                [1]
              dirstate should be empty:
                $ hg debugstate
                $ hg parents -q
                $ hg up -C
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ hg copy bar baz
              put something in the dirstate:
                $ hg debugstate > debugstate
                $ grep baz debugstate
                a   0         -1 unset               baz
                copy: bar -> baz
              add a new revision in the original repo
                $ cd ../orig
                $ echo baz > baz
                $ hg ci -qAm 'add baz'
                $ cd ..
                $ hg convert orig new 2>&1 | grep -v 'subversion python bindings could not be loaded'
                scanning source...
                sorting...
                converting...
 add baz
                $ cd new
                $ hg out ../orig
                comparing with ../orig
                searching for changes
                no changes found
                [1]
              dirstate should be the same (no output below):
                $ hg debugstate > new-debugstate
                $ diff debugstate new-debugstate
              no copies
                $ hg up -C
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ hg debugrename baz
                baz not renamed
                $ cd ..
              test tag rewriting
                $ cat > filemap <<EOF
                > exclude foo
                > EOF
                $ hg convert --filemap filemap orig new-filemap 2>&1 | grep -v 'subversion python bindings could not be loaded'
                initializing destination new-filemap repository
                scanning source...
                sorting...
                converting...
 add foo and bar
 remove foo
 add foo/file
 Added tag some-tag for changeset ad681a868e44
 add baz
                $ cd new-filemap
                $ hg tags
                tip                                2:3c74706b1ff8
                some-tag                           0:ba8636729451
                $ cd ..
              Test cases for hg-hg roundtrip
              Helper
                $ glog()
                > {
                >     hg log -G --template '{rev} {node|short} "{desc}" files: {files}\n' $*
                > }
              Create a tricky source repo
                $ hg init source
                $ cd source
                $ echo 0 > 0
                $ hg ci -Aqm '0: add 0'
                $ echo a > a
                $ mkdir dir
                $ echo b > dir/b
                $ hg ci -qAm '1: add a and dir/b'
                $ echo c > dir/c
                $ hg ci -qAm '2: add dir/c'
                $ hg copy a e
                $ echo b >> b
                $ hg ci -qAm '3: copy a to e, change b'
                $ hg up -qr -3
                $ echo a >> a
                $ hg ci -qAm '4: change a'
                $ hg merge
                merging a and e to e
 files updated, 1 files merged, 0 files removed, 0 files unresolved
                (branch merge, don't forget to commit)
                $ hg copy b dir/d
                $ hg ci -qAm '5: merge 2 and 3, copy b to dir/d'
                $ echo a >> a
                $ hg ci -qAm '6: change a'
                $ hg mani
 
                a
                b
                dir/b
                dir/c
                dir/d
                e
                $ glog
                @  6 0613c8e59a3d "6: change a" files: a
                |
                o    5 717e9b37cdb7 "5: merge 2 and 3, copy b to dir/d" files: dir/d e
                |\
                | o  4 86a55cb968d5 "4: change a" files: a
                | |
                o |  3 0e6e235919dd "3: copy a to e, change b" files: b e
                | |
                o |  2 0394b0d5e4f7 "2: add dir/c" files: dir/c
                |/
                o  1 333546584845 "1: add a and dir/b" files: a dir/b
                |
                o  0 d1a24e2ebd23 "0: add 0" files: 0
                $ cd ..
              Convert excluding rev 0 and dir/ (and thus rev2):
                $ cat << EOF > filemap
                > exclude dir
                > EOF
                $ hg convert --filemap filemap source dest --config convert.hg.revs=1::
                initializing destination dest repository
                scanning source...
                sorting...
                converting...
 1: add a and dir/b
 2: add dir/c
 3: copy a to e, change b
 4: change a
 5: merge 2 and 3, copy b to dir/d
 6: change a
              Verify that conversion skipped rev 2:
                $ glog -R dest
                o  4 78814e84a217 "6: change a" files: a
                |
                o    3 f7cff662c5e5 "5: merge 2 and 3, copy b to dir/d" files: e
                |\
                | o  2 ab40a95b0072 "4: change a" files: a
                | |
                o |  1 bd51f17597bf "3: copy a to e, change b" files: b e
                |/
                o  0 a4a1dae0fe35 "1: add a and dir/b" files: 0 a
              Verify mapping correct in both directions:
                $ cat source/.hg/shamap
                a4a1dae0fe3514cefd9b8541b7abbc8f44f946d5 333546584845f70c4cfecb992341aaef0e708166
                bd51f17597bf32268e68a560b206898c3960cda2 0e6e235919dd8e9285ba8eb5adf703af9ad99378
                ab40a95b00725307e79c2fd271000aa8af9759f4 86a55cb968d51770cba2a1630d6cc637b574580a
                f7cff662c5e581e6f3f1a85ffdd2bcb35825f6ba 717e9b37cdb7eb9917ca8e30aa3f986e6d5b177d
 e84a217894517c2de392b903ed05e6871a4 0613c8e59a3ddb9789072ef52f1ed13496489bb4
                $ cat dest/.hg/shamap
                333546584845f70c4cfecb992341aaef0e708166 a4a1dae0fe3514cefd9b8541b7abbc8f44f946d5
 b0d5e4f761ced559fd0bbdc6afc16cb3f7d1 a4a1dae0fe3514cefd9b8541b7abbc8f44f946d5
 e6e235919dd8e9285ba8eb5adf703af9ad99378 bd51f17597bf32268e68a560b206898c3960cda2
 a55cb968d51770cba2a1630d6cc637b574580a ab40a95b00725307e79c2fd271000aa8af9759f4
 e9b37cdb7eb9917ca8e30aa3f986e6d5b177d f7cff662c5e581e6f3f1a85ffdd2bcb35825f6ba
 c8e59a3ddb9789072ef52f1ed13496489bb4 78814e84a217894517c2de392b903ed05e6871a4
              Verify meta data converted correctly:
                $ hg -R dest log -r 1 --debug -p --git
                changeset:   1:bd51f17597bf32268e68a560b206898c3960cda2
                phase:       draft
                parent:      0:a4a1dae0fe3514cefd9b8541b7abbc8f44f946d5
                parent:      -1:0000000000000000000000000000000000000000
                manifest:    1:040c72ed9b101773c24ac314776bfc846943781f
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                files+:      b e
                extra:       branch=default
                description:
 : copy a to e, change b
                diff --git a/b b/b
                new file mode 100644
                --- /dev/null
                +++ b/b
@@ -0,0 +1,1 @@
                +b
                diff --git a/a b/e
                copy from a
                copy to e
              Verify files included and excluded correctly:
                $ hg -R dest manifest -r tip
 
                a
                b
                e
              Make changes in dest and convert back:
                $ hg -R dest up -q
                $ echo dest > dest/dest
                $ hg -R dest ci -Aqm 'change in dest'
                $ hg -R dest tip
                changeset:   5:a2e0e3cc6d1d
                tag:         tip
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                summary:     change in dest
              (converting merges back after using a filemap will probably cause chaos so we
              exclude merges.)
                $ hg convert dest source --config convert.hg.revs='!merge()'
                scanning source...
                sorting...
                converting...
 change in dest
              Verify the conversion back:
                $ hg -R source log --debug -r tip
                changeset:   7:e6d364a69ff1248b2099e603b0c145504cade6f0
                tag:         tip
                phase:       draft
                parent:      6:0613c8e59a3ddb9789072ef52f1ed13496489bb4
                parent:      -1:0000000000000000000000000000000000000000
                manifest:    7:aa3e9542f3b76d4f1f1b2e9c7ce9dbb48b6a95ec
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                files+:      dest
                extra:       branch=default
                description:
                change in dest
              Files that had been excluded are still present:
                $ hg -R source manifest -r tip
 
                a
                b
                dest
                dir/b
                dir/c
                dir/d
                e
              More source changes
                $ cd source
                $ echo 1 >> a
                $ hg ci -m '8: source first branch'
                created new head
                $ hg up -qr -2
                $ echo 2 >> a
                $ hg ci -m '9: source second branch'
                $ hg merge -q --tool internal:local
                $ hg ci -m '10: source merge'
                $ echo >> a
                $ hg ci -m '11: source change'
                $ hg mani
 
                a
                b
                dest
                dir/b
                dir/c
                dir/d
                e
                $ glog -r 6:
                @  11 0c8927d1f7f4 "11: source change" files: a
                |
                o    10 9ccb7ee8d261 "10: source merge" files: a
                |\
                | o  9 f131b1518dba "9: source second branch" files: a
                | |
                o |  8 669cf0e74b50 "8: source first branch" files: a
                | |
                | o  7 e6d364a69ff1 "change in dest" files: dest
                |/
                o  6 0613c8e59a3d "6: change a" files: a
                |
                $ cd ..
                $ hg convert --filemap filemap source dest --config convert.hg.revs=3:
                scanning source...
                sorting...
                converting...
 8: source first branch
 9: source second branch
 10: source merge
 11: source change
                $ glog -R dest
                o  9 8432d597b263 "11: source change" files: a
                |
                o    8 632ffacdcd6f "10: source merge" files: a
                |\
                | o  7 049cfee90ee6 "9: source second branch" files: a
                | |
                o |  6 9b6845e036e5 "8: source first branch" files: a
                | |
                | @  5 a2e0e3cc6d1d "change in dest" files: dest
                |/
                o  4 78814e84a217 "6: change a" files: a
                |
                o    3 f7cff662c5e5 "5: merge 2 and 3, copy b to dir/d" files: e
                |\
                | o  2 ab40a95b0072 "4: change a" files: a
                | |
                o |  1 bd51f17597bf "3: copy a to e, change b" files: b e
                |/
                o  0 a4a1dae0fe35 "1: add a and dir/b" files: 0 a
                $ cd ..
              Two way tests
                $ hg init 0
                $ echo f > 0/f
                $ echo a > 0/a-only
                $ echo b > 0/b-only
                $ hg -R 0 ci -Aqm0
                $ cat << EOF > filemap-a
                > exclude b-only
                > EOF
                $ cat << EOF > filemap-b
                > exclude a-only
                > EOF
                $ hg convert --filemap filemap-a 0 a
                initializing destination a repository
                scanning source...
                sorting...
                converting...
 0
                $ hg -R a up -q
                $ echo a > a/f
                $ hg -R a ci -ma
                $ hg convert --filemap filemap-b 0 b
                initializing destination b repository
                scanning source...
                sorting...
                converting...
 0
                $ hg -R b up -q
                $ echo b > b/f
                $ hg -R b ci -mb
                $ tail */.hg/shamap
                ==> 0/.hg/shamap <==
 f3f774ffb682bffb5dc3c1d3b3da637cb9a0d6 8a028c7c77f6c7bd6d63bc3f02ca9f779eabf16a
                dd9f218eb91fb857f2a62fe023e1d64a4e7812fe 8a028c7c77f6c7bd6d63bc3f02ca9f779eabf16a
                ==> a/.hg/shamap <==
 a028c7c77f6c7bd6d63bc3f02ca9f779eabf16a 86f3f774ffb682bffb5dc3c1d3b3da637cb9a0d6
                ==> b/.hg/shamap <==
 a028c7c77f6c7bd6d63bc3f02ca9f779eabf16a dd9f218eb91fb857f2a62fe023e1d64a4e7812fe
                $ hg convert a 0
                scanning source...
                sorting...
                converting...
 a
                $ hg convert b 0
                scanning source...
                sorting...
                converting...
 b
                $ hg -R 0 log -G
                o  changeset:   2:637fbbbe96b6
                |  tag:         tip
                |  parent:      0:8a028c7c77f6
                |  user:        test
                |  date:        Thu Jan 01 00:00:00 1970 +0000
                |  summary:     b
                |
                | o  changeset:   1:ec7b9c96e692
                |/   user:        test
                |    date:        Thu Jan 01 00:00:00 1970 +0000
                |    summary:     a
                |
                @  changeset:   0:8a028c7c77f6
                   user:        test
                   date:        Thu Jan 01 00:00:00 1970 +0000
                   summary:     0
                $ hg convert --filemap filemap-b 0 a --config convert.hg.revs=1::
                scanning source...
                sorting...
                converting...
                $ hg -R 0 up -r1
 files updated, 0 files merged, 0 files removed, 0 files unresolved
                $ echo f >> 0/f
                $ hg -R 0 ci -mx
                $ hg convert --filemap filemap-b 0 a --config convert.hg.revs=1::
                scanning source...
                sorting...
                converting...
 x
                $ hg -R a log -G -T '{rev} {desc|firstline} ({files})\n'
                o  2 x (f)
                |
                @  1 a (f)
                |
                o  0 0 (a-only f)
                $ hg -R a mani -r tip
                a-only
                f
              An additional round, demonstrating that unchanged files don't get converted
                $ echo f >> 0/f
                $ echo f >> 0/a-only
                $ hg -R 0 ci -m "extra f+a-only change"
                $ hg convert --filemap filemap-b 0 a --config convert.hg.revs=1::
                scanning source...
                sorting...
                converting...
 extra f+a-only change
                $ hg -R a log -G -T '{rev} {desc|firstline} ({files})\n'
                o  3 extra f+a-only change (f)
                |
                o  2 x (f)
                |
                @  1 a (f)
                |
                o  0 0 (a-only f)
              Conversion after rollback
                $ hg -R a rollback -f
                repository tip rolled back to revision 2 (undo commit)
                $ hg convert --filemap filemap-b 0 a --config convert.hg.revs=1::
                scanning source...
                sorting...
                converting...
 extra f+a-only change
                $ hg -R a log -G -T '{rev} {desc|firstline} ({files})\n'
                o  3 extra f+a-only change (f)
                |
                o  2 x (f)
                |
                @  1 a (f)
                |
                o  0 0 (a-only f)
+             Convert with --full adds and removes files that didn't change
+               $ echo f >> 0/f
+               $ hg -R 0 ci -m "f"
+               $ hg convert --filemap filemap-b --full 0 a --config convert.hg.revs=1::
+               scanning source...
+               sorting...
+               converting...
+f
+               $ hg -R a status --change tip
+               M f
+               A b-only
+               R a-only

tests/test-convert-svn-sink.t

0 +25 0

              #require svn13
                $ svnupanddisplay()
                > {
                >     (
                >        cd $1;
                >        svn up -q;
                >        svn st -v | sed 's/  */ /g' | sort
                >        limit=''
                >        if [ $2 -gt 0 ]; then
                >            limit="--limit=$2"
                >        fi
                >        svn log --xml -v $limit | python "$TESTDIR/svnxml.py"
                >     )
                > }
                $ cat >> $HGRCPATH <<EOF
                > [extensions]
                > convert =
                > EOF
                $ hg init a
              Add
                $ echo a > a/a
                $ mkdir -p a/d1/d2
                $ echo b > a/d1/d2/b
                $ hg --cwd a ci -d '0 0' -A -m 'add a file'
                adding a
                adding d1/d2/b
              Modify
                $ "$TESTDIR/svn-safe-append.py" a a/a
                $ hg --cwd a ci -d '1 0' -m 'modify a file'
                $ hg --cwd a tip -q
 :e0e2b8a9156b
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn repository 'a-hg'
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 add a file
 modify a file
                $ svnupanddisplay a-hg-wc 2
 1 test d1
 1 test d1/d2 (glob)
 1 test d1/d2/b (glob)
 2 test .
 2 test a
                revision: 2
                author: test
                msg: modify a file
                 M /a
                revision: 1
                author: test
                msg: add a file
                 A /a
                 A /d1
                 A /d1/d2
                 A /d1/d2/b
                $ ls a a-hg-wc
                a:
                a
                d1
                a-hg-wc:
                a
                d1
                $ cmp a/a a-hg-wc/a
              Rename
                $ hg --cwd a mv a b
                $ hg --cwd a ci -d '2 0' -m 'rename a file'
                $ hg --cwd a tip -q
 :eb5169441d43
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 rename a file
                $ svnupanddisplay a-hg-wc 1
 1 test d1
 1 test d1/d2 (glob)
 1 test d1/d2/b (glob)
 3 test .
 3 test b
                revision: 3
                author: test
                msg: rename a file
                 D /a
                 A /b (from /a@2)
                $ ls a a-hg-wc
                a:
                b
                d1
                a-hg-wc:
                b
                d1
              Copy
                $ hg --cwd a cp b c
                $ hg --cwd a ci -d '3 0' -m 'copy a file'
                $ hg --cwd a tip -q
 :60effef6ab48
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 copy a file
                $ svnupanddisplay a-hg-wc 1
 1 test d1
 1 test d1/d2 (glob)
 1 test d1/d2/b (glob)
 3 test b
 4 test .
 4 test c
                revision: 4
                author: test
                msg: copy a file
                 A /c (from /b@3)
                $ ls a a-hg-wc
                a:
                b
                c
                d1
                a-hg-wc:
                b
                c
                d1
                $ hg --cwd a rm b
              Remove
                $ hg --cwd a ci -d '4 0' -m 'remove a file'
                $ hg --cwd a tip -q
 :87bbe3013fb6
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 remove a file
                $ svnupanddisplay a-hg-wc 1
 1 test d1
 1 test d1/d2 (glob)
 1 test d1/d2/b (glob)
 4 test c
 5 test .
                revision: 5
                author: test
                msg: remove a file
                 D /b
                $ ls a a-hg-wc
                a:
                c
                d1
                a-hg-wc:
                c
                d1
              Executable
              #if execbit
                $ chmod +x a/c
              #else
                $ echo fake >> a/c
              #endif
                $ hg --cwd a ci -d '5 0' -m 'make a file executable'
              #if execbit
                $ hg --cwd a tip -q
 :ff42e473c340
              #else
                $ hg --cwd a tip -q
 :817a700c8cf1
              #endif
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 make a file executable
                $ svnupanddisplay a-hg-wc 1
 1 test d1
 1 test d1/d2 (glob)
 1 test d1/d2/b (glob)
 6 test .
 6 test c
                revision: 6
                author: test
                msg: make a file executable
                 M /c
              #if execbit
                $ test -x a-hg-wc/c
              #endif
              #if symlink
              Symlinks
                $ ln -s a/missing a/link
                $ hg --cwd a commit -Am 'add symlink'
                adding link
                $ hg --cwd a mv link newlink
                $ hg --cwd a commit -m 'move symlink'
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 add symlink
 move symlink
                $ svnupanddisplay a-hg-wc 1
 1 test d1
 1 test d1/d2
 1 test d1/d2/b
 6 test c
 8 test .
 8 test newlink
                revision: 8
                author: test
                msg: move symlink
                 D /link
                 A /newlink (from /link@7)
              #endif
+             Convert with --full adds and removes files that didn't change
+               $ touch a/f
+               $ hg -R a ci -Aqmf
+               $ echo "rename c d" > filemap
+               $ hg convert -d svn a --filemap filemap --full
+               assuming destination a-hg
+               initializing svn working copy 'a-hg-wc'
+               scanning source...
+               sorting...
+               converting...
+f
+               $ svnupanddisplay a-hg-wc 1
+9 test .
+9 test d
+9 test f
+               revision: 9
+               author: test
+               msg: f
+                D /c
+                A /d
+                D /d1
+                A /f
+                D /newlink
                $ rm -rf a a-hg a-hg-wc
              Executable in new directory
                $ hg init a
                $ mkdir a/d1
                $ echo a > a/d1/a
-             #if execbit
                $ chmod +x a/d1/a
-             #else
                $ echo fake >> a/d1/a
-             #endif
-               $ hg --cwd a ci -d '0 0' -A -m 'add executable file in new directory'
                adding d1/a
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn repository 'a-hg'
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 add executable file in new directory
                $ svnupanddisplay a-hg-wc 1
 1 test .
 1 test d1
 1 test d1/a (glob)
                revision: 1
                author: test
                msg: add executable file in new directory
                 A /d1
                 A /d1/a
-             #if execbit
                $ test -x a-hg-wc/d1/a
-             #endif
              Copy to new directory
                $ mkdir a/d2
                $ hg --cwd a cp d1/a d2/a
-               $ hg --cwd a ci -d '1 0' -A -m 'copy file to new directory'
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 copy file to new directory
                $ svnupanddisplay a-hg-wc 1
 1 test d1
 1 test d1/a (glob)
 2 test .
 2 test d2
 2 test d2/a (glob)
                revision: 2
                author: test
                msg: copy file to new directory
                 A /d2
                 A /d2/a (from /d1/a@1)
              Branchy history
                $ hg init b
                $ echo base > b/b
-               $ hg --cwd b ci -d '0 0' -Ambase
                adding b
                $ "$TESTDIR/svn-safe-append.py" left-1 b/b
                $ echo left-1 > b/left-1
-               $ hg --cwd b ci -d '1 0' -Amleft-1
                adding left-1
                $ "$TESTDIR/svn-safe-append.py" left-2 b/b
                $ echo left-2 > b/left-2
-               $ hg --cwd b ci -d '2 0' -Amleft-2
                adding left-2
                $ hg --cwd b up 0
 files updated, 0 files merged, 2 files removed, 0 files unresolved
                $ "$TESTDIR/svn-safe-append.py" right-1 b/b
                $ echo right-1 > b/right-1
-               $ hg --cwd b ci -d '3 0' -Amright-1
                adding right-1
                created new head
                $ "$TESTDIR/svn-safe-append.py" right-2 b/b
                $ echo right-2 > b/right-2
-               $ hg --cwd b ci -d '4 0' -Amright-2
                adding right-2
                $ hg --cwd b up -C 2
 files updated, 0 files merged, 2 files removed, 0 files unresolved
                $ hg --cwd b merge
                merging b
                warning: conflicts during merge.
-               merging b incomplete! (edit conflicts, then use 'hg resolve --mark')
 files updated, 0 files merged, 0 files removed, 1 files unresolved
-               use 'hg resolve' to retry unresolved file merges or 'hg update -C .' to abandon
                [1]
                $ hg --cwd b revert -r 2 b
                $ hg --cwd b resolve -m b
                (no more unresolved files)
-               $ hg --cwd b ci -d '5 0' -m 'merge'
              Expect 4 changes
                $ hg convert -d svn b
                assuming destination b-hg
                initializing svn repository 'b-hg'
                initializing svn working copy 'b-hg-wc'
                scanning source...
                sorting...
                converting...
 base
 left-1
 left-2
 right-1
 right-2
 merge
                $ svnupanddisplay b-hg-wc 0
 2 test left-1
 3 test b
 3 test left-2
 4 test .
 4 test right-1
 4 test right-2
                revision: 4
                author: test
                msg: merge
                 A /right-1
                 A /right-2
                revision: 3
                author: test
                msg: left-2
                 M /b
                 A /left-2
                revision: 2
                author: test
                msg: left-1
                 M /b
                 A /left-1
                revision: 1
                author: test
                msg: base
                 A /b
              Tags are not supported, but must not break conversion
                $ rm -rf a a-hg a-hg-wc
                $ hg init a
                $ echo a > a/a
-               $ hg --cwd a ci -d '0 0' -A -m 'Add file a'
                adding a
-               $ hg --cwd a tag -d '1 0' -m 'Tagged as v1.0' v1.0
                $ hg convert -d svn a
                assuming destination a-hg
                initializing svn repository 'a-hg'
                initializing svn working copy 'a-hg-wc'
                scanning source...
                sorting...
                converting...
 Add file a
 Tagged as v1.0
                writing Subversion tags is not yet implemented
                $ svnupanddisplay a-hg-wc 2
 1 test a
 2 test .
 2 test .hgtags
                revision: 2
                author: test
                msg: Tagged as v1.0
                 A /.hgtags
                revision: 1
                author: test
                msg: Add file a
                 A /a
                $ rm -rf a a-hg a-hg-wc

tests/test-convert-svn-source.t

0 +21 0

              #require svn svn-bindings
                $ cat >> $HGRCPATH <<EOF
                > [extensions]
                > convert =
                > [convert]
                > svn.trunk = mytrunk
                > EOF
                $ svnadmin create svn-repo
                $ SVNREPOPATH=`pwd`/svn-repo
              #if windows
                $ SVNREPOURL=file:///`python -c "import urllib, sys; sys.stdout.write(urllib.quote(sys.argv[1]))" "$SVNREPOPATH"`
              #else
                $ SVNREPOURL=file://`python -c "import urllib, sys; sys.stdout.write(urllib.quote(sys.argv[1]))" "$SVNREPOPATH"`
              #endif
                $ INVALIDREVISIONID=svn:x2147622-4a9f-4db4-a8d3-13562ff547b2/proj%20B/mytrunk@1
                $ VALIDREVISIONID=svn:a2147622-4a9f-4db4-a8d3-13562ff547b2/proj%20B/mytrunk/mytrunk@1
              Now test that it works with trunk/tags layout, but no branches yet.
              Initial svn import
                $ mkdir projB
                $ cd projB
                $ mkdir mytrunk
                $ mkdir tags
                $ cd ..
                $ svn import -m "init projB" projB "$SVNREPOURL/proj%20B" | sort
                Adding         projB/mytrunk (glob)
                Adding         projB/tags (glob)
                Committed revision 1.
              Update svn repository
                $ svn co "$SVNREPOURL/proj%20B/mytrunk" B
                Checked out revision 1.
                $ cd B
                $ echo hello > 'letter .txt'
                $ svn add 'letter .txt'
                A         letter .txt
                $ svn ci -m hello
                Adding         letter .txt
                Transmitting file data .
                Committed revision 2.
                $ "$TESTDIR/svn-safe-append.py" world 'letter .txt'
                $ svn ci -m world
                Sending        letter .txt
                Transmitting file data .
                Committed revision 3.
                $ svn copy -m "tag v0.1" "$SVNREPOURL/proj%20B/mytrunk" "$SVNREPOURL/proj%20B/tags/v0.1"
                Committed revision 4.
                $ "$TESTDIR/svn-safe-append.py" 'nice day today!' 'letter .txt'
                $ svn ci -m "nice day"
                Sending        letter .txt
                Transmitting file data .
                Committed revision 5.
                $ cd ..
              Convert to hg once and also test localtimezone option
              NOTE: This doesn't check all time zones -- it merely determines that
              the configuration option is taking effect.
              An arbitrary (U.S.) time zone is used here.  TZ=US/Hawaii is selected
              since it does not use DST (unlike other U.S. time zones) and is always
              a fixed difference from UTC.
                $ TZ=US/Hawaii hg convert --config convert.localtimezone=True "$SVNREPOURL/proj%20B" B-hg
                initializing destination B-hg repository
                scanning source...
                sorting...
                converting...
 init projB
 hello
 world
 nice day
                updating tags
              Update svn repository again
                $ cd B
                $ "$TESTDIR/svn-safe-append.py" "see second letter" 'letter .txt'
                $ echo "nice to meet you" > letter2.txt
                $ svn add letter2.txt
                A         letter2.txt
                $ svn ci -m "second letter"
                Sending        letter .txt
                Adding         letter2.txt
                Transmitting file data ..
                Committed revision 6.
                $ svn copy -m "tag v0.2" "$SVNREPOURL/proj%20B/mytrunk" "$SVNREPOURL/proj%20B/tags/v0.2"
                Committed revision 7.
                $ "$TESTDIR/svn-safe-append.py" "blah-blah-blah" letter2.txt
                $ svn ci -m "work in progress"
                Sending        letter2.txt
                Transmitting file data .
                Committed revision 8.
                $ cd ..
                $ hg convert -s svn "$SVNREPOURL/proj%20B/non-existent-path" dest
                initializing destination dest repository
                abort: no revision found in module /proj B/non-existent-path
                [255]
              ########################################
              Test incremental conversion
                $ TZ=US/Hawaii hg convert --config convert.localtimezone=True "$SVNREPOURL/proj%20B" B-hg
                scanning source...
                sorting...
                converting...
 second letter
 work in progress
                updating tags
                $ cd B-hg
                $ hg log -G --template '{rev} {desc|firstline} date: {date|date} files: {files}\n'
                o  7 update tags date: * +0000 files: .hgtags (glob)
                |
                o  6 work in progress date: * -1000 files: letter2.txt (glob)
                |
                o  5 second letter date: * -1000 files: letter .txt letter2.txt (glob)
                |
                o  4 update tags date: * +0000 files: .hgtags (glob)
                |
                o  3 nice day date: * -1000 files: letter .txt (glob)
                |
                o  2 world date: * -1000 files: letter .txt (glob)
                |
                o  1 hello date: * -1000 files: letter .txt (glob)
                |
                o  0 init projB date: * -1000 files: (glob)
                $ hg tags -q
                tip
                v0.2
                v0.1
                $ cd ..
              Test filemap
                $ echo 'include letter2.txt' > filemap
                $ hg convert --filemap filemap "$SVNREPOURL/proj%20B/mytrunk" fmap
                initializing destination fmap repository
                scanning source...
                sorting...
                converting...
 init projB
 hello
 world
 nice day
 second letter
 work in progress
                $ hg -R fmap branch -q
                default
                $ hg log -G -R fmap --template '{rev} {desc|firstline} files: {files}\n'
                o  1 work in progress files: letter2.txt
                |
                o  0 second letter files: letter2.txt
+             Convert with --full adds and removes files that didn't change
+               $ cd B
+               $ echo >> "letter .txt"
+               $ svn ci -m 'nothing'
+               Sending        letter .txt
+               Transmitting file data .
+               Committed revision 9.
+               $ cd ..
+               $ echo 'rename letter2.txt letter3.txt' > filemap
+               $ hg convert --filemap filemap --full "$SVNREPOURL/proj%20B/mytrunk" fmap
+               scanning source...
+               sorting...
+               converting...
+nothing
+               $ hg -R fmap st --change tip
+               A letter .txt
+               A letter3.txt
+               R letter2.txt
              test invalid splicemap1
                $ cat > splicemap <<EOF
-               > $INVALIDREVISIONID $VALIDREVISIONID
                > EOF
-               $ hg convert --splicemap splicemap "$SVNREPOURL/proj%20B/mytrunk" smap
-               initializing destination smap repository
-               abort: splicemap entry svn:x2147622-4a9f-4db4-a8d3-13562ff547b2/proj%20B/mytrunk@1 is not a valid revision identifier
                [255]
-             Test stop revision
-               $ hg convert --rev 1 "$SVNREPOURL/proj%20B/mytrunk" stoprev
-               initializing destination stoprev repository
-               scanning source...
-               sorting...
-               converting...
-init projB
-               $ hg -R stoprev branch -q
                default
-             Check convert_revision extra-records.
-             This is also the only place testing more than one extra field in a revision.
-               $ cd stoprev
-               $ hg tip --debug | grep extra
-               extra:       branch=default
-               extra:       convert_revision=svn:........-....-....-....-............/proj B/mytrunk@1 (re)
-               $ cd ..
-             Test converting empty heads (issue3347).
-             Also tests getting logs directly without debugsvnlog.
-               $ svnadmin create svn-empty
-               $ svnadmin load -q svn-empty < "$TESTDIR/svn/empty.svndump"
-               $ hg --config convert.svn.trunk= --config convert.svn.debugsvnlog=0 convert svn-empty
-               assuming destination svn-empty-hg
-               initializing destination svn-empty-hg repository
-               scanning source...
-               sorting...
-               converting...
-init projA
-adddir
-               $ hg --config convert.svn.trunk= convert "$SVNREPOURL/../svn-empty/trunk"
-               assuming destination trunk-hg
-               initializing destination trunk-hg repository
-               scanning source...
-               sorting...
-               converting...
-init projA
-adddir

tests/test-convert.t

0 +8 0

                $ cat >> $HGRCPATH <<EOF
                > [extensions]
                > convert=
                > [convert]
                > hg.saverev=False
                > EOF
                $ hg help convert
                hg convert [OPTION]... SOURCE [DEST [REVMAP]]
                convert a foreign SCM repository to a Mercurial one.
                    Accepted source formats [identifiers]:
                    - Mercurial [hg]
                    - CVS [cvs]
                    - Darcs [darcs]
                    - git [git]
                    - Subversion [svn]
                    - Monotone [mtn]
                    - GNU Arch [gnuarch]
                    - Bazaar [bzr]
                    - Perforce [p4]
                    Accepted destination formats [identifiers]:
                    - Mercurial [hg]
                    - Subversion [svn] (history on branches is not preserved)
                    If no revision is given, all revisions will be converted. Otherwise,
                    convert will only import up to the named revision (given in a format
                    understood by the source).
                    If no destination directory name is specified, it defaults to the basename
                    of the source with "-hg" appended. If the destination repository doesn't
                    exist, it will be created.
                    By default, all sources except Mercurial will use --branchsort. Mercurial
                    uses --sourcesort to preserve original revision numbers order. Sort modes
                    have the following effects:
                    --branchsort  convert from parent to child revision when possible, which
                                  means branches are usually converted one after the other.
                                  It generates more compact repositories.
                    --datesort    sort revisions by date. Converted repositories have good-
                                  looking changelogs but are often an order of magnitude
                                  larger than the same ones generated by --branchsort.
                    --sourcesort  try to preserve source revisions order, only supported by
                                  Mercurial sources.
                    --closesort   try to move closed revisions as close as possible to parent
                                  branches, only supported by Mercurial sources.
                    If "REVMAP" isn't given, it will be put in a default location
                    ("<dest>/.hg/shamap" by default). The "REVMAP" is a simple text file that
                    maps each source commit ID to the destination ID for that revision, like
                    so:
                      <source ID> <destination ID>
                    If the file doesn't exist, it's automatically created. It's updated on
                    each commit copied, so "hg convert" can be interrupted and can be run
                    repeatedly to copy new commits.
                    The authormap is a simple text file that maps each source commit author to
                    a destination commit author. It is handy for source SCMs that use unix
                    logins to identify authors (e.g.: CVS). One line per author mapping and
                    the line format is:
                      source author = destination author
                    Empty lines and lines starting with a "#" are ignored.
                    The filemap is a file that allows filtering and remapping of files and
                    directories. Each line can contain one of the following directives:
                      include path/to/file-or-dir
                      exclude path/to/file-or-dir
                      rename path/to/source path/to/destination
                    Comment lines start with "#". A specified path matches if it equals the
                    full relative name of a file or one of its parent directories. The
                    "include" or "exclude" directive with the longest matching path applies,
                    so line order does not matter.
                    The "include" directive causes a file, or all files under a directory, to
                    be included in the destination repository. The default if there are no
                    "include" statements is to include everything. If there are any "include"
                    statements, nothing else is included. The "exclude" directive causes files
                    or directories to be omitted. The "rename" directive renames a file or
                    directory if it is converted. To rename from a subdirectory into the root
                    of the repository, use "." as the path to rename to.
+                   "--full" will make sure the converted changesets contain exactly the right
+                   files with the right content. It will make a full conversion of all files,
+                   not just the ones that have changed. Files that already are correct will
+                   not be changed. This can be used to apply filemap changes when converting
+                   incrementally. This is currently only supported for Mercurial and
+                   Subversion.
                    The splicemap is a file that allows insertion of synthetic history,
                    letting you specify the parents of a revision. This is useful if you want
                    to e.g. give a Subversion merge two parents, or graft two disconnected
                    series of history together. Each entry contains a key, followed by a
                    space, followed by one or two comma-separated values:
                      key parent1, parent2
                    The key is the revision ID in the source revision control system whose
                    parents should be modified (same format as a key in .hg/shamap). The
                    values are the revision IDs (in either the source or destination revision
                    control system) that should be used as the new parents for that node. For
                    example, if you have merged "release-1.0" into "trunk", then you should
                    specify the revision on "trunk" as the first parent and the one on the
                    "release-1.0" branch as the second.
                    The branchmap is a file that allows you to rename a branch when it is
                    being brought in from whatever external repository. When used in
                    conjunction with a splicemap, it allows for a powerful combination to help
                    fix even the most badly mismanaged repositories and turn them into nicely
                    structured Mercurial repositories. The branchmap contains lines of the
                    form:
                      original_branch_name new_branch_name
                    where "original_branch_name" is the name of the branch in the source
                    repository, and "new_branch_name" is the name of the branch is the
                    destination repository. No whitespace is allowed in the branch names. This
                    can be used to (for instance) move code in one repository from "default"
                    to a named branch.
                    Mercurial Source
                    ################
                    The Mercurial source recognizes the following configuration options, which
                    you can set on the command line with "--config":
                    convert.hg.ignoreerrors
                                  ignore integrity errors when reading. Use it to fix
                                  Mercurial repositories with missing revlogs, by converting
                                  from and to Mercurial. Default is False.
                    convert.hg.saverev
                                  store original revision ID in changeset (forces target IDs
                                  to change). It takes a boolean argument and defaults to
                                  False.
                    convert.hg.revs
                                  revset specifying the source revisions to convert.
                    CVS Source
                    ##########
                    CVS source will use a sandbox (i.e. a checked-out copy) from CVS to
                    indicate the starting point of what will be converted. Direct access to
                    the repository files is not needed, unless of course the repository is
                    ":local:". The conversion uses the top level directory in the sandbox to
                    find the CVS repository, and then uses CVS rlog commands to find files to
                    convert. This means that unless a filemap is given, all files under the
                    starting directory will be converted, and that any directory
                    reorganization in the CVS sandbox is ignored.
                    The following options can be used with "--config":
                    convert.cvsps.cache
                                  Set to False to disable remote log caching, for testing and
                                  debugging purposes. Default is True.
                    convert.cvsps.fuzz
                                  Specify the maximum time (in seconds) that is allowed
                                  between commits with identical user and log message in a
                                  single changeset. When very large files were checked in as
                                  part of a changeset then the default may not be long enough.
                                  The default is 60.
                    convert.cvsps.mergeto
                                  Specify a regular expression to which commit log messages
                                  are matched. If a match occurs, then the conversion process
                                  will insert a dummy revision merging the branch on which
                                  this log message occurs to the branch indicated in the
                                  regex. Default is "{{mergetobranch ([-\w]+)}}"
                    convert.cvsps.mergefrom
                                  Specify a regular expression to which commit log messages
                                  are matched. If a match occurs, then the conversion process
                                  will add the most recent revision on the branch indicated in
                                  the regex as the second parent of the changeset. Default is
                                  "{{mergefrombranch ([-\w]+)}}"
                    convert.localtimezone
                                  use local time (as determined by the TZ environment
                                  variable) for changeset date/times. The default is False
                                  (use UTC).
                    hooks.cvslog  Specify a Python function to be called at the end of
                                  gathering the CVS log. The function is passed a list with
                                  the log entries, and can modify the entries in-place, or add
                                  or delete them.
                    hooks.cvschangesets
                                  Specify a Python function to be called after the changesets
                                  are calculated from the CVS log. The function is passed a
                                  list with the changeset entries, and can modify the
                                  changesets in-place, or add or delete them.
                    An additional "debugcvsps" Mercurial command allows the builtin changeset
                    merging code to be run without doing a conversion. Its parameters and
                    output are similar to that of cvsps 2.1. Please see the command help for
                    more details.
                    Subversion Source
                    #################
                    Subversion source detects classical trunk/branches/tags layouts. By
                    default, the supplied "svn://repo/path/" source URL is converted as a
                    single branch. If "svn://repo/path/trunk" exists it replaces the default
                    branch. If "svn://repo/path/branches" exists, its subdirectories are
                    listed as possible branches. If "svn://repo/path/tags" exists, it is
                    looked for tags referencing converted branches. Default "trunk",
                    "branches" and "tags" values can be overridden with following options. Set
                    them to paths relative to the source URL, or leave them blank to disable
                    auto detection.
                    The following options can be set with "--config":
                    convert.svn.branches
                                  specify the directory containing branches. The default is
                                  "branches".
                    convert.svn.tags
                                  specify the directory containing tags. The default is
                                  "tags".
                    convert.svn.trunk
                                  specify the name of the trunk branch. The default is
                                  "trunk".
                    convert.localtimezone
                                  use local time (as determined by the TZ environment
                                  variable) for changeset date/times. The default is False
                                  (use UTC).
                    Source history can be retrieved starting at a specific revision, instead
                    of being integrally converted. Only single branch conversions are
                    supported.
                    convert.svn.startrev
                                  specify start Subversion revision number. The default is 0.
                    Perforce Source
                    ###############
                    The Perforce (P4) importer can be given a p4 depot path or a client
                    specification as source. It will convert all files in the source to a flat
                    Mercurial repository, ignoring labels, branches and integrations. Note
                    that when a depot path is given you then usually should specify a target
                    directory, because otherwise the target may be named "...-hg".
                    It is possible to limit the amount of source history to be converted by
                    specifying an initial Perforce revision:
                    convert.p4.startrev
                                  specify initial Perforce revision (a Perforce changelist
                                  number).
                    Mercurial Destination
                    #####################
                    The following options are supported:
                    convert.hg.clonebranches
                                  dispatch source branches in separate clones. The default is
                                  False.
                    convert.hg.tagsbranch
                                  branch name for tag revisions, defaults to "default".
                    convert.hg.usebranchnames
                                  preserve branch names. The default is True.
                options:
                 -s --source-type TYPE source repository type
                 -d --dest-type TYPE   destination repository type
                 -r --rev REV          import up to source revision REV
                 -A --authormap FILE   remap usernames using this file
                    --filemap FILE     remap file names using contents of file
+                   --full             apply filemap changes by converting all files again
                    --splicemap FILE   splice synthesized history into place
                    --branchmap FILE   change branch names while converting
                    --branchsort       try to sort changesets by branches
                    --datesort         try to sort changesets by date
                    --sourcesort       preserve source changesets order
                    --closesort        try to reorder closed revisions
                (some details hidden, use --verbose to show complete help)
                $ hg init a
                $ cd a
                $ echo a > a
                $ hg ci -d'0 0' -Ama
                adding a
                $ hg cp a b
                $ hg ci -d'1 0' -mb
                $ hg rm a
                $ hg ci -d'2 0' -mc
                $ hg mv b a
                $ hg ci -d'3 0' -md
                $ echo a >> a
                $ hg ci -d'4 0' -me
                $ cd ..
                $ hg convert a 2>&1 | grep -v 'subversion python bindings could not be loaded'
                assuming destination a-hg
                initializing destination a-hg repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
                $ hg --cwd a-hg pull ../a
                pulling from ../a
                searching for changes
                no changes found
              conversion to existing file should fail
                $ touch bogusfile
                $ hg convert a bogusfile
                initializing destination bogusfile repository
                abort: cannot create new bundle repository
                [255]
              #if unix-permissions no-root
              conversion to dir without permissions should fail
                $ mkdir bogusdir
                $ chmod 000 bogusdir
                $ hg convert a bogusdir
                abort: Permission denied: 'bogusdir'
                [255]
              user permissions should succeed
                $ chmod 700 bogusdir
                $ hg convert a bogusdir
                initializing destination bogusdir repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
              #endif
              test pre and post conversion actions
                $ echo 'include b' > filemap
                $ hg convert --debug --filemap filemap a partialb | \
                >     grep 'run hg'
                run hg source pre-conversion action
                run hg sink pre-conversion action
                run hg sink post-conversion action
                run hg source post-conversion action
              converting empty dir should fail "nicely
                $ mkdir emptydir
              override $PATH to ensure p4 not visible; use $PYTHON in case we're
              running from a devel copy, not a temp installation
                $ PATH="$BINDIR" $PYTHON "$BINDIR"/hg convert emptydir
                assuming destination emptydir-hg
                initializing destination emptydir-hg repository
                emptydir does not look like a CVS checkout
                emptydir does not look like a Git repository
                emptydir does not look like a Subversion repository
                emptydir is not a local Mercurial repository
                emptydir does not look like a darcs repository
                emptydir does not look like a monotone repository
                emptydir does not look like a GNU Arch repository
                emptydir does not look like a Bazaar repository
                cannot find required "p4" tool
                abort: emptydir: missing or unsupported repository
                [255]
              convert with imaginary source type
                $ hg convert --source-type foo a a-foo
                initializing destination a-foo repository
                abort: foo: invalid source repository type
                [255]
              convert with imaginary sink type
                $ hg convert --dest-type foo a a-foo
                abort: foo: invalid destination repository type
                [255]
              testing: convert must not produce duplicate entries in fncache
                $ hg convert a b
                initializing destination b repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
              contents of fncache file:
                $ cat b/.hg/store/fncache | sort
                data/a.i
                data/b.i
              test bogus URL
                $ hg convert -q bzr+ssh://foobar@selenic.com/baz baz
                abort: bzr+ssh://foobar@selenic.com/baz: missing or unsupported repository
                [255]
              test revset converted() lookup
                $ hg --config convert.hg.saverev=True convert a c
                initializing destination c repository
                scanning source...
                sorting...
                converting...
 a
 b
 c
 d
 e
                $ echo f > c/f
                $ hg -R c ci -d'0 0' -Amf
                adding f
                created new head
                $ hg -R c log -r "converted(09d945a62ce6)"
                changeset:   1:98c3dd46a874
                user:        test
                date:        Thu Jan 01 00:00:01 1970 +0000
                summary:     b
                $ hg -R c log -r "converted()"
                changeset:   0:31ed57b2037c
                user:        test
                date:        Thu Jan 01 00:00:00 1970 +0000
                summary:     a
                changeset:   1:98c3dd46a874
                user:        test
                date:        Thu Jan 01 00:00:01 1970 +0000
                summary:     b
                changeset:   2:3b9ca06ef716
                user:        test
                date:        Thu Jan 01 00:00:02 1970 +0000
                summary:     c
                changeset:   3:4e0debd37cf2
                user:        test
                date:        Thu Jan 01 00:00:03 1970 +0000
                summary:     d
                changeset:   4:9de3bc9349c5
                user:        test
                date:        Thu Jan 01 00:00:04 1970 +0000
                summary:     e

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages