upstream/mercurial-mirror Files · hgext/convert/bzr.py

convert: add config option to control storing original revision...

convert: add config option to control storing original revision common.commit.__init__ sets saverev=True by default. The side effect of this is that the hg sink will always set the "convert_revision" extras key to the commit being converted. This patch adds a config option to disable this behavior. While most consumers will want "convert_revision" to be a) written b) with the exact Git commit that was converted, some have use cases that prefer otherwise. In my case, I am performing significant rewrites of a Git repository *before* it is fed into `hg convert`. I have to do this because `hg convert` does not easily support the kind of transform I desire, even with extensions. (For the curious, I am "linearizing" the history of a GitHub repo by removing merge commits which add little value to the final history. It isn't easy to do this during `hg convert` because of Mercurial's file copy/rename metadata requirements.) In my scenario, my pre-convert transform stores a "convert_revision" key in the Git commit object containing the original Git commit ID. I want this original Git commit ID carried forward to Mercurial. By disabling the setting of this extra during `hg convert` and copying the value from the Git commit object, I can have the final "convert_revision" extra key contain the original Git commit ID. An added test verifies this exact scenario. This feature could likely be implemented for other VCS sources. But until someone needs the feature, I'm inclined to hold off implementing.

Kevin Bullock - - Load All Authors

File last commit:

r29612:a234b32b stable


                r30661:ced0d686

default

Download file

             bzr.py
        
                    297 lines
            
             | 11.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / hgext / convert / bzr.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # bzr.py - bzr support for the convert extension

      #

      #  Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      # This module is for handling 'bzr', that was formerly known as Bazaar-NG;

      # it cannot access 'bar' repositories, but they were never used very much

      from __future__ import absolute_import

      import os

      from mercurial.i18n import _

      from mercurial import (

          demandimport,

          error

      )

      from . import common

      # these do not work with demandimport, blacklist

      demandimport.ignore.extend([

              'bzrlib.transactions',

              'bzrlib.urlutils',

              'ElementPath',

          ])

      try:

          # bazaar imports

          import bzrlib.bzrdir

          import bzrlib.errors

          import bzrlib.revision

          import bzrlib.revisionspec.RevisionSpec

          bzrdir = bzrlib.bzrdir

          errors = bzrlib.errors

          revision = bzrlib.revision

          revisionspec = bzrlib.revisionspec

      except ImportError:

          pass

      supportedkinds = ('file', 'symlink')

      class bzr_source(common.converter_source):

          """Reads Bazaar repositories by using the Bazaar Python libraries"""

          def __init__(self, ui, path, revs=None):

              super(bzr_source, self).__init__(ui, path, revs=revs)

              if not os.path.exists(os.path.join(path, '.bzr')):

                  raise common.NoRepo(_('%s does not look like a Bazaar repository')

                                    % path)

              try:

                  # access bzrlib stuff

                  bzrdir

              except NameError:

                  raise common.NoRepo(_('Bazaar modules could not be loaded'))

              path = os.path.abspath(path)

              self._checkrepotype(path)

              try:

                  self.sourcerepo = bzrdir.BzrDir.open(path).open_repository()

              except errors.NoRepositoryPresent:

                  raise common.NoRepo(_('%s does not look like a Bazaar repository')

                                    % path)

              self._parentids = {}

          def _checkrepotype(self, path):

              # Lightweight checkouts detection is informational but probably

              # fragile at API level. It should not terminate the conversion.

              try:

                  dir = bzrdir.BzrDir.open_containing(path)[0]

                  try:

                      tree = dir.open_workingtree(recommend_upgrade=False)

                      branch = tree.branch

                  except (errors.NoWorkingTree, errors.NotLocalUrl):

                      tree = None

                      branch = dir.open_branch()

                  if (tree is not None and tree.bzrdir.root_transport.base !=

                      branch.bzrdir.root_transport.base):

                      self.ui.warn(_('warning: lightweight checkouts may cause '

                                     'conversion failures, try with a regular '

                                     'branch instead.\n'))

              except Exception:

                  self.ui.note(_('bzr source type could not be determined\n'))

          def before(self):

              """Before the conversion begins, acquire a read lock

              for all the operations that might need it. Fortunately

              read locks don't block other reads or writes to the

              repository, so this shouldn't have any impact on the usage of

              the source repository.

              The alternative would be locking on every operation that

              needs locks (there are currently two: getting the file and

              getting the parent map) and releasing immediately after,

              but this approach can take even 40% longer."""

              self.sourcerepo.lock_read()

          def after(self):

              self.sourcerepo.unlock()

          def _bzrbranches(self):

              return self.sourcerepo.find_branches(using=True)

          def getheads(self):

              if not self.revs:

                  # Set using=True to avoid nested repositories (see issue3254)

                  heads = sorted([b.last_revision() for b in self._bzrbranches()])

              else:

                  revid = None

                  for branch in self._bzrbranches():

                      try:

                          r = revisionspec.RevisionSpec.from_string(self.revs[0])

                          info = r.in_history(branch)

                      except errors.BzrError:

                          pass

                      revid = info.rev_id

                  if revid is None:

                      raise error.Abort(_('%s is not a valid revision')

                                        % self.revs[0])

                  heads = [revid]

              # Empty repositories return 'null:', which cannot be retrieved

              heads = [h for h in heads if h != 'null:']

              return heads

          def getfile(self, name, rev):

              revtree = self.sourcerepo.revision_tree(rev)

              fileid = revtree.path2id(name.decode(self.encoding or 'utf-8'))

              kind = None

              if fileid is not None:

                  kind = revtree.kind(fileid)

              if kind not in supportedkinds:

                  # the file is not available anymore - was deleted

                  return None, None

              mode = self._modecache[(name, rev)]

              if kind == 'symlink':

                  target = revtree.get_symlink_target(fileid)

                  if target is None:

                      raise error.Abort(_('%s.%s symlink has no target')

                                       % (name, rev))

                  return target, mode

              else:

                  sio = revtree.get_file(fileid)

                  return sio.read(), mode

          def getchanges(self, version, full):

              if full:

                  raise error.Abort(_("convert from cvs does not support --full"))

              self._modecache = {}

              self._revtree = self.sourcerepo.revision_tree(version)

              # get the parentids from the cache

              parentids = self._parentids.pop(version)

              # only diff against first parent id

              prevtree = self.sourcerepo.revision_tree(parentids[0])

              files, changes = self._gettreechanges(self._revtree, prevtree)

              return files, changes, set()

          def getcommit(self, version):

              rev = self.sourcerepo.get_revision(version)

              # populate parent id cache

              if not rev.parent_ids:

                  parents = []

                  self._parentids[version] = (revision.NULL_REVISION,)

              else:

                  parents = self._filterghosts(rev.parent_ids)

                  self._parentids[version] = parents

              branch = self.recode(rev.properties.get('branch-nick', u'default'))

              if branch == 'trunk':

                  branch = 'default'

              return common.commit(parents=parents,

                      date='%d %d' % (rev.timestamp, -rev.timezone),

                      author=self.recode(rev.committer),

                      desc=self.recode(rev.message),

                      branch=branch,

                      rev=version)

          def gettags(self):

              bytetags = {}

              for branch in self._bzrbranches():

                  if not branch.supports_tags():

                      return {}

                  tagdict = branch.tags.get_tag_dict()

                  for name, rev in tagdict.iteritems():

                      bytetags[self.recode(name)] = rev

              return bytetags

          def getchangedfiles(self, rev, i):

              self._modecache = {}

              curtree = self.sourcerepo.revision_tree(rev)

              if i is not None:

                  parentid = self._parentids[rev][i]

              else:

                  # no parent id, get the empty revision

                  parentid = revision.NULL_REVISION

              prevtree = self.sourcerepo.revision_tree(parentid)

              changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]

              return changes

          def _gettreechanges(self, current, origin):

              revid = current._revision_id

              changes = []

              renames = {}

              seen = set()

              # Process the entries by reverse lexicographic name order to

              # handle nested renames correctly, most specific first.

              curchanges = sorted(current.iter_changes(origin),

                                  key=lambda c: c[1][0] or c[1][1],

                                  reverse=True)

              for (fileid, paths, changed_content, versioned, parent, name,

                  kind, executable) in curchanges:

                  if paths[0] == u'' or paths[1] == u'':

                      # ignore changes to tree root

                      continue

                  # bazaar tracks directories, mercurial does not, so

                  # we have to rename the directory contents

                  if kind[1] == 'directory':

                      if kind[0] not in (None, 'directory'):

                          # Replacing 'something' with a directory, record it

                          # so it can be removed.

                          changes.append((self.recode(paths[0]), revid))

                      if kind[0] == 'directory' and None not in paths:

                          renaming = paths[0] != paths[1]

                          # neither an add nor an delete - a move

                          # rename all directory contents manually

                          subdir = origin.root_inventory.path2id(paths[0])

                          # get all child-entries of the directory

                          for name, entry in origin.root_inventory.iter_entries(

                                  subdir):

                              # hg does not track directory renames

                              if entry.kind == 'directory':

                                  continue

                              frompath = self.recode(paths[0] + '/' + name)

                              if frompath in seen:

                                  # Already handled by a more specific change entry

                                  # This is important when you have:

                                  # a => b

                                  # a/c => a/c

                                  # Here a/c must not be renamed into b/c

                                  continue

                              seen.add(frompath)

                              if not renaming:

                                  continue

                              topath = self.recode(paths[1] + '/' + name)

                              # register the files as changed

                              changes.append((frompath, revid))

                              changes.append((topath, revid))

                              # add to mode cache

                              mode = ((entry.executable and 'x')

                                      or (entry.kind == 'symlink' and 's')

                                      or '')

                              self._modecache[(topath, revid)] = mode

                              # register the change as move

                              renames[topath] = frompath

                      # no further changes, go to the next change

                      continue

                  # we got unicode paths, need to convert them

                  path, topath = paths

                  if path is not None:

                      path = self.recode(path)

                  if topath is not None:

                      topath = self.recode(topath)

                  seen.add(path or topath)

                  if topath is None:

                      # file deleted

                      changes.append((path, revid))

                      continue

                  # renamed

                  if path and path != topath:

                      renames[topath] = path

                      changes.append((path, revid))

                  # populate the mode cache

                  kind, executable = [e[1] for e in (kind, executable)]

                  mode = ((executable and 'x') or (kind == 'symlink' and 'l')

                          or '')

                  self._modecache[(topath, revid)] = mode

                  changes.append((topath, revid))

              return changes, renames

          def _filterghosts(self, ids):

              """Filters out ghost revisions which hg does not support, see

              <http://bazaar-vcs.org/GhostRevision>

              """

              parentmap = self.sourcerepo.get_parent_map(ids)

              parents = tuple([parent for parent in ids if parent in parentmap])

              return parents

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# bzr.py - bzr support for the convert extension
				#
				# Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				# This module is for handling 'bzr', that was formerly known as Bazaar-NG;
				# it cannot access 'bar' repositories, but they were never used very much
				from __future__ import absolute_import

				import os

				from mercurial.i18n import _
				from mercurial import (
				demandimport,
				error
				)
				from . import common

				# these do not work with demandimport, blacklist
				demandimport.ignore.extend([
				'bzrlib.transactions',
				'bzrlib.urlutils',
				'ElementPath',
				])

				try:
				# bazaar imports
				import bzrlib.bzrdir
				import bzrlib.errors
				import bzrlib.revision
				import bzrlib.revisionspec.RevisionSpec
				bzrdir = bzrlib.bzrdir
				errors = bzrlib.errors
				revision = bzrlib.revision
				revisionspec = bzrlib.revisionspec
				except ImportError:
				pass

				supportedkinds = ('file', 'symlink')

				class bzr_source(common.converter_source):
				"""Reads Bazaar repositories by using the Bazaar Python libraries"""

				def __init__(self, ui, path, revs=None):
				super(bzr_source, self).__init__(ui, path, revs=revs)

				if not os.path.exists(os.path.join(path, '.bzr')):
				raise common.NoRepo(_('%s does not look like a Bazaar repository')
				% path)

				try:
				# access bzrlib stuff
				bzrdir
				except NameError:
				raise common.NoRepo(_('Bazaar modules could not be loaded'))

				path = os.path.abspath(path)
				self._checkrepotype(path)
				try:
				self.sourcerepo = bzrdir.BzrDir.open(path).open_repository()
				except errors.NoRepositoryPresent:
				raise common.NoRepo(_('%s does not look like a Bazaar repository')
				% path)
				self._parentids = {}

				def _checkrepotype(self, path):
				# Lightweight checkouts detection is informational but probably
				# fragile at API level. It should not terminate the conversion.
				try:
				dir = bzrdir.BzrDir.open_containing(path)[0]
				try:
				tree = dir.open_workingtree(recommend_upgrade=False)
				branch = tree.branch
				except (errors.NoWorkingTree, errors.NotLocalUrl):
				tree = None
				branch = dir.open_branch()
				if (tree is not None and tree.bzrdir.root_transport.base !=
				branch.bzrdir.root_transport.base):
				self.ui.warn(_('warning: lightweight checkouts may cause '
				'conversion failures, try with a regular '
				'branch instead.\n'))
				except Exception:
				self.ui.note(_('bzr source type could not be determined\n'))

				def before(self):
				"""Before the conversion begins, acquire a read lock
				for all the operations that might need it. Fortunately
				read locks don't block other reads or writes to the
				repository, so this shouldn't have any impact on the usage of
				the source repository.

				The alternative would be locking on every operation that
				needs locks (there are currently two: getting the file and
				getting the parent map) and releasing immediately after,
				but this approach can take even 40% longer."""
				self.sourcerepo.lock_read()

				def after(self):
				self.sourcerepo.unlock()

				def _bzrbranches(self):
				return self.sourcerepo.find_branches(using=True)

				def getheads(self):
				if not self.revs:
				# Set using=True to avoid nested repositories (see issue3254)
				heads = sorted([b.last_revision() for b in self._bzrbranches()])
				else:
				revid = None
				for branch in self._bzrbranches():
				try:
				r = revisionspec.RevisionSpec.from_string(self.revs[0])
				info = r.in_history(branch)
				except errors.BzrError:
				pass
				revid = info.rev_id
				if revid is None:
				raise error.Abort(_('%s is not a valid revision')
				% self.revs[0])
				heads = [revid]
				# Empty repositories return 'null:', which cannot be retrieved
				heads = [h for h in heads if h != 'null:']
				return heads

				def getfile(self, name, rev):
				revtree = self.sourcerepo.revision_tree(rev)
				fileid = revtree.path2id(name.decode(self.encoding or 'utf-8'))
				kind = None
				if fileid is not None:
				kind = revtree.kind(fileid)
				if kind not in supportedkinds:
				# the file is not available anymore - was deleted
				return None, None
				mode = self._modecache[(name, rev)]
				if kind == 'symlink':
				target = revtree.get_symlink_target(fileid)
				if target is None:
				raise error.Abort(_('%s.%s symlink has no target')
				% (name, rev))
				return target, mode
				else:
				sio = revtree.get_file(fileid)
				return sio.read(), mode

				def getchanges(self, version, full):
				if full:
				raise error.Abort(_("convert from cvs does not support --full"))
				self._modecache = {}
				self._revtree = self.sourcerepo.revision_tree(version)
				# get the parentids from the cache
				parentids = self._parentids.pop(version)
				# only diff against first parent id
				prevtree = self.sourcerepo.revision_tree(parentids[0])
				files, changes = self._gettreechanges(self._revtree, prevtree)
				return files, changes, set()

				def getcommit(self, version):
				rev = self.sourcerepo.get_revision(version)
				# populate parent id cache
				if not rev.parent_ids:
				parents = []
				self._parentids[version] = (revision.NULL_REVISION,)
				else:
				parents = self._filterghosts(rev.parent_ids)
				self._parentids[version] = parents

				branch = self.recode(rev.properties.get('branch-nick', u'default'))
				if branch == 'trunk':
				branch = 'default'
				return common.commit(parents=parents,
				date='%d %d' % (rev.timestamp, -rev.timezone),
				author=self.recode(rev.committer),
				desc=self.recode(rev.message),
				branch=branch,
				rev=version)

				def gettags(self):
				bytetags = {}
				for branch in self._bzrbranches():
				if not branch.supports_tags():
				return {}
				tagdict = branch.tags.get_tag_dict()
				for name, rev in tagdict.iteritems():
				bytetags[self.recode(name)] = rev
				return bytetags

				def getchangedfiles(self, rev, i):
				self._modecache = {}
				curtree = self.sourcerepo.revision_tree(rev)
				if i is not None:
				parentid = self._parentids[rev][i]
				else:
				# no parent id, get the empty revision
				parentid = revision.NULL_REVISION

				prevtree = self.sourcerepo.revision_tree(parentid)
				changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
				return changes

				def _gettreechanges(self, current, origin):
				revid = current._revision_id
				changes = []
				renames = {}
				seen = set()
				# Process the entries by reverse lexicographic name order to
				# handle nested renames correctly, most specific first.
				curchanges = sorted(current.iter_changes(origin),
				key=lambda c: c[1][0] or c[1][1],
				reverse=True)
				for (fileid, paths, changed_content, versioned, parent, name,
				kind, executable) in curchanges:

				if paths[0] == u'' or paths[1] == u'':
				# ignore changes to tree root
				continue

				# bazaar tracks directories, mercurial does not, so
				# we have to rename the directory contents
				if kind[1] == 'directory':
				if kind[0] not in (None, 'directory'):
				# Replacing 'something' with a directory, record it
				# so it can be removed.
				changes.append((self.recode(paths[0]), revid))

				if kind[0] == 'directory' and None not in paths:
				renaming = paths[0] != paths[1]
				# neither an add nor an delete - a move
				# rename all directory contents manually
				subdir = origin.root_inventory.path2id(paths[0])
				# get all child-entries of the directory
				for name, entry in origin.root_inventory.iter_entries(
				subdir):
				# hg does not track directory renames
				if entry.kind == 'directory':
				continue
				frompath = self.recode(paths[0] + '/' + name)
				if frompath in seen:
				# Already handled by a more specific change entry
				# This is important when you have:
				# a => b
				# a/c => a/c
				# Here a/c must not be renamed into b/c
				continue
				seen.add(frompath)
				if not renaming:
				continue
				topath = self.recode(paths[1] + '/' + name)
				# register the files as changed
				changes.append((frompath, revid))
				changes.append((topath, revid))
				# add to mode cache
				mode = ((entry.executable and 'x')
				or (entry.kind == 'symlink' and 's')
				or '')
				self._modecache[(topath, revid)] = mode
				# register the change as move
				renames[topath] = frompath

				# no further changes, go to the next change
				continue

				# we got unicode paths, need to convert them
				path, topath = paths
				if path is not None:
				path = self.recode(path)
				if topath is not None:
				topath = self.recode(topath)
				seen.add(path or topath)

				if topath is None:
				# file deleted
				changes.append((path, revid))
				continue

				# renamed
				if path and path != topath:
				renames[topath] = path
				changes.append((path, revid))

				# populate the mode cache
				kind, executable = [e[1] for e in (kind, executable)]
				mode = ((executable and 'x') or (kind == 'symlink' and 'l')
				or '')
				self._modecache[(topath, revid)] = mode
				changes.append((topath, revid))

				return changes, renames

				def _filterghosts(self, ids):
				"""Filters out ghost revisions which hg does not support, see
				<http://bazaar-vcs.org/GhostRevision>
				"""
				parentmap = self.sourcerepo.get_parent_map(ids)
				parents = tuple([parent for parent in ids if parent in parentmap])
				return parents