upstream/mercurial-mirror Files · hgext/convert/bzr.py

pathcopies: give up any optimization based on `introrev`...

pathcopies: give up any optimization based on `introrev` Between and d98fb3f42f33, we sped up the search for the introduction revision during path copies. However, further checking show that finding the introduction revision is still expensive and that we are better off without it. So we simply drop it and only rely on the linkrev optimisation. I ran `perfpathcopies` on 6989 pair of revision in the pypy repository (`hg perfhelper-pathcopies`. The result is massively in favor of dropping this condition. The result of the copy tracing are unchanged. Attempt to use a smaller changes preserving linkrev usage were unsuccessful, it can return wrong result. The following changesets broke test-mv-cp-st-diff.t - if not f.isintroducedafter(limit): + if limit >= 0 and f.linkrev() < limit: return None Here are various numbers (before this changeset/after this changesets) source destination before after saved-time ratio worth cases 695dfb0f493b 1.062843 1.246369 -0.183526 1.172675 8d60fe293e79 1.036985 1.196414 -0.159429 1.153743 fbb1c9fd86c0 0.879926 1.038682 -0.158756 1.180420 a4878080a536 0.909952 1.063801 -0.153849 1.169074 920958a93997 0.993622 1.147452 -0.153830 1.154817 worse 1% aea8f2fd3593 1.016595 1.082999 -0.066404 1.065320 worse 5% 7d29d5e39734 0.453694 0.471156 -0.017462 1.038488 worse 10% 2aef0e942480 0.035140 0.037535 -0.002395 1.068156 worse 25% 801748ba582a 0.009267 0.009325 -0.000058 1.006259 median e6991321d78b 0.000665 0.000651 0.000014 0.978947 best 25% 385b31354be6 0.040743 0.040363 0.000380 0.990673 best 10% 19c10384d3e7 0.431658 0.411490 0.020168 0.953278 best 5% 813c99f810ac 1.141404 1.075346 0.066058 0.942126 best 1% 99ae11866969 1.833297 0.063823 1.769474 0.034813 best cases 743a0fcaa4eb 1101.811740 2.735970 1099.075770 0.002483 9ba6ab77fd29 1116.753953 2.800729 1113.953224 0.002508 57e249b7a3ea 1246.128485 3.042762 1243.085723 0.002442 0354a250d371 1253.111894 3.085796 1250.026098 0.002463 3ec1002a818c 1261.786294 3.138607 1258.647687 0.002487 As one can see, the average case is not really impacted. However, the worth case we get after this changeset are much better than the one we had before it. We have 30 pairs where improvements are above 10 minutes. This reflect in the combined time for all pairs before: 26256s after: 1300s (-95%) If we remove these pathological 30 cases, we still see a significant improvements: before: 1631s after: 1245s (-24%)

Gregory Szorc - - Load All Authors

File last commit:

r43375:649d3ac3 default


                r43469:c16fe77e

default

Download file

             bzr.py
        
                    331 lines
            
             | 11.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / hgext / convert / bzr.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # bzr.py - bzr support for the convert extension

      #

      #  Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      # This module is for handling 'bzr', that was formerly known as Bazaar-NG;

      # it cannot access 'bar' repositories, but they were never used very much

      from __future__ import absolute_import

      import os

      from mercurial.i18n import _

      from mercurial import (

          demandimport,

          error,

          pycompat,

      )

      from . import common

      # these do not work with demandimport, blacklist

      demandimport.IGNORES.update(

          [b'bzrlib.transactions', b'bzrlib.urlutils', b'ElementPath',]

      )

      try:

          # bazaar imports

          import bzrlib.bzrdir

          import bzrlib.errors

          import bzrlib.revision

          import bzrlib.revisionspec

          bzrdir = bzrlib.bzrdir

          errors = bzrlib.errors

          revision = bzrlib.revision

          revisionspec = bzrlib.revisionspec

          revisionspec.RevisionSpec

      except ImportError:

          pass

      supportedkinds = (b'file', b'symlink')

      class bzr_source(common.converter_source):

          """Reads Bazaar repositories by using the Bazaar Python libraries"""

          def __init__(self, ui, repotype, path, revs=None):

              super(bzr_source, self).__init__(ui, repotype, path, revs=revs)

              if not os.path.exists(os.path.join(path, b'.bzr')):

                  raise common.NoRepo(

                      _(b'%s does not look like a Bazaar repository') % path

                  )

              try:

                  # access bzrlib stuff

                  bzrdir

              except NameError:

                  raise common.NoRepo(_(b'Bazaar modules could not be loaded'))

              path = os.path.abspath(path)

              self._checkrepotype(path)

              try:

                  self.sourcerepo = bzrdir.BzrDir.open(path).open_repository()

              except errors.NoRepositoryPresent:

                  raise common.NoRepo(

                      _(b'%s does not look like a Bazaar repository') % path

                  )

              self._parentids = {}

              self._saverev = ui.configbool(b'convert', b'bzr.saverev')

          def _checkrepotype(self, path):

              # Lightweight checkouts detection is informational but probably

              # fragile at API level. It should not terminate the conversion.

              try:

                  dir = bzrdir.BzrDir.open_containing(path)[0]

                  try:

                      tree = dir.open_workingtree(recommend_upgrade=False)

                      branch = tree.branch

                  except (errors.NoWorkingTree, errors.NotLocalUrl):

                      tree = None

                      branch = dir.open_branch()

                  if (

                      tree is not None

                      and tree.bzrdir.root_transport.base

                      != branch.bzrdir.root_transport.base

                  ):

                      self.ui.warn(

                          _(

                              b'warning: lightweight checkouts may cause '

                              b'conversion failures, try with a regular '

                              b'branch instead.\n'

                          )

                      )

              except Exception:

                  self.ui.note(_(b'bzr source type could not be determined\n'))

          def before(self):

              """Before the conversion begins, acquire a read lock

              for all the operations that might need it. Fortunately

              read locks don't block other reads or writes to the

              repository, so this shouldn't have any impact on the usage of

              the source repository.

              The alternative would be locking on every operation that

              needs locks (there are currently two: getting the file and

              getting the parent map) and releasing immediately after,

              but this approach can take even 40% longer."""

              self.sourcerepo.lock_read()

          def after(self):

              self.sourcerepo.unlock()

          def _bzrbranches(self):

              return self.sourcerepo.find_branches(using=True)

          def getheads(self):

              if not self.revs:

                  # Set using=True to avoid nested repositories (see issue3254)

                  heads = sorted([b.last_revision() for b in self._bzrbranches()])

              else:

                  revid = None

                  for branch in self._bzrbranches():

                      try:

                          r = revisionspec.RevisionSpec.from_string(self.revs[0])

                          info = r.in_history(branch)

                      except errors.BzrError:

                          pass

                      revid = info.rev_id

                  if revid is None:

                      raise error.Abort(

                          _(b'%s is not a valid revision') % self.revs[0]

                      )

                  heads = [revid]

              # Empty repositories return 'null:', which cannot be retrieved

              heads = [h for h in heads if h != b'null:']

              return heads

          def getfile(self, name, rev):

              revtree = self.sourcerepo.revision_tree(rev)

              fileid = revtree.path2id(name.decode(self.encoding or b'utf-8'))

              kind = None

              if fileid is not None:

                  kind = revtree.kind(fileid)

              if kind not in supportedkinds:

                  # the file is not available anymore - was deleted

                  return None, None

              mode = self._modecache[(name, rev)]

              if kind == b'symlink':

                  target = revtree.get_symlink_target(fileid)

                  if target is None:

                      raise error.Abort(

                          _(b'%s.%s symlink has no target') % (name, rev)

                      )

                  return target, mode

              else:

                  sio = revtree.get_file(fileid)

                  return sio.read(), mode

          def getchanges(self, version, full):

              if full:

                  raise error.Abort(_(b"convert from cvs does not support --full"))

              self._modecache = {}

              self._revtree = self.sourcerepo.revision_tree(version)

              # get the parentids from the cache

              parentids = self._parentids.pop(version)

              # only diff against first parent id

              prevtree = self.sourcerepo.revision_tree(parentids[0])

              files, changes = self._gettreechanges(self._revtree, prevtree)

              return files, changes, set()

          def getcommit(self, version):

              rev = self.sourcerepo.get_revision(version)

              # populate parent id cache

              if not rev.parent_ids:

                  parents = []

                  self._parentids[version] = (revision.NULL_REVISION,)

              else:

                  parents = self._filterghosts(rev.parent_ids)

                  self._parentids[version] = parents

              branch = self.recode(rev.properties.get(b'branch-nick', u'default'))

              if branch == b'trunk':

                  branch = b'default'

              return common.commit(

                  parents=parents,

                  date=b'%d %d' % (rev.timestamp, -rev.timezone),

                  author=self.recode(rev.committer),

                  desc=self.recode(rev.message),

                  branch=branch,

                  rev=version,

                  saverev=self._saverev,

              )

          def gettags(self):

              bytetags = {}

              for branch in self._bzrbranches():

                  if not branch.supports_tags():

                      return {}

                  tagdict = branch.tags.get_tag_dict()

                  for name, rev in pycompat.iteritems(tagdict):

                      bytetags[self.recode(name)] = rev

              return bytetags

          def getchangedfiles(self, rev, i):

              self._modecache = {}

              curtree = self.sourcerepo.revision_tree(rev)

              if i is not None:

                  parentid = self._parentids[rev][i]

              else:

                  # no parent id, get the empty revision

                  parentid = revision.NULL_REVISION

              prevtree = self.sourcerepo.revision_tree(parentid)

              changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]

              return changes

          def _gettreechanges(self, current, origin):

              revid = current._revision_id

              changes = []

              renames = {}

              seen = set()

              # Fall back to the deprecated attribute for legacy installations.

              try:

                  inventory = origin.root_inventory

              except AttributeError:

                  inventory = origin.inventory

              # Process the entries by reverse lexicographic name order to

              # handle nested renames correctly, most specific first.

              curchanges = sorted(

                  current.iter_changes(origin),

                  key=lambda c: c[1][0] or c[1][1],

                  reverse=True,

              )

              for (

                  fileid,

                  paths,

                  changed_content,

                  versioned,

                  parent,

                  name,

                  kind,

                  executable,

              ) in curchanges:

                  if paths[0] == u'' or paths[1] == u'':

                      # ignore changes to tree root

                      continue

                  # bazaar tracks directories, mercurial does not, so

                  # we have to rename the directory contents

                  if kind[1] == b'directory':

                      if kind[0] not in (None, b'directory'):

                          # Replacing 'something' with a directory, record it

                          # so it can be removed.

                          changes.append((self.recode(paths[0]), revid))

                      if kind[0] == b'directory' and None not in paths:

                          renaming = paths[0] != paths[1]

                          # neither an add nor an delete - a move

                          # rename all directory contents manually

                          subdir = inventory.path2id(paths[0])

                          # get all child-entries of the directory

                          for name, entry in inventory.iter_entries(subdir):

                              # hg does not track directory renames

                              if entry.kind == b'directory':

                                  continue

                              frompath = self.recode(paths[0] + b'/' + name)

                              if frompath in seen:

                                  # Already handled by a more specific change entry

                                  # This is important when you have:

                                  # a => b

                                  # a/c => a/c

                                  # Here a/c must not be renamed into b/c

                                  continue

                              seen.add(frompath)

                              if not renaming:

                                  continue

                              topath = self.recode(paths[1] + b'/' + name)

                              # register the files as changed

                              changes.append((frompath, revid))

                              changes.append((topath, revid))

                              # add to mode cache

                              mode = (

                                  (entry.executable and b'x')

                                  or (entry.kind == b'symlink' and b's')

                                  or b''

                              )

                              self._modecache[(topath, revid)] = mode

                              # register the change as move

                              renames[topath] = frompath

                      # no further changes, go to the next change

                      continue

                  # we got unicode paths, need to convert them

                  path, topath = paths

                  if path is not None:

                      path = self.recode(path)

                  if topath is not None:

                      topath = self.recode(topath)

                  seen.add(path or topath)

                  if topath is None:

                      # file deleted

                      changes.append((path, revid))

                      continue

                  # renamed

                  if path and path != topath:

                      renames[topath] = path

                      changes.append((path, revid))

                  # populate the mode cache

                  kind, executable = [e[1] for e in (kind, executable)]

                  mode = (executable and b'x') or (kind == b'symlink' and b'l') or b''

                  self._modecache[(topath, revid)] = mode

                  changes.append((topath, revid))

              return changes, renames

          def _filterghosts(self, ids):

              """Filters out ghost revisions which hg does not support, see

              <http://bazaar-vcs.org/GhostRevision>

              """

              parentmap = self.sourcerepo.get_parent_map(ids)

              parents = tuple([parent for parent in ids if parent in parentmap])

              return parents

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# bzr.py - bzr support for the convert extension
				#
				# Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				# This module is for handling 'bzr', that was formerly known as Bazaar-NG;
				# it cannot access 'bar' repositories, but they were never used very much
				from __future__ import absolute_import

				import os

				from mercurial.i18n import _
				from mercurial import (
				demandimport,
				error,
				pycompat,
				)
				from . import common

				# these do not work with demandimport, blacklist
				demandimport.IGNORES.update(
				[b'bzrlib.transactions', b'bzrlib.urlutils', b'ElementPath',]
				)

				try:
				# bazaar imports
				import bzrlib.bzrdir
				import bzrlib.errors
				import bzrlib.revision
				import bzrlib.revisionspec

				bzrdir = bzrlib.bzrdir
				errors = bzrlib.errors
				revision = bzrlib.revision
				revisionspec = bzrlib.revisionspec
				revisionspec.RevisionSpec
				except ImportError:
				pass

				supportedkinds = (b'file', b'symlink')


				class bzr_source(common.converter_source):
				"""Reads Bazaar repositories by using the Bazaar Python libraries"""

				def __init__(self, ui, repotype, path, revs=None):
				super(bzr_source, self).__init__(ui, repotype, path, revs=revs)

				if not os.path.exists(os.path.join(path, b'.bzr')):
				raise common.NoRepo(
				_(b'%s does not look like a Bazaar repository') % path
				)

				try:
				# access bzrlib stuff
				bzrdir
				except NameError:
				raise common.NoRepo(_(b'Bazaar modules could not be loaded'))

				path = os.path.abspath(path)
				self._checkrepotype(path)
				try:
				self.sourcerepo = bzrdir.BzrDir.open(path).open_repository()
				except errors.NoRepositoryPresent:
				raise common.NoRepo(
				_(b'%s does not look like a Bazaar repository') % path
				)
				self._parentids = {}
				self._saverev = ui.configbool(b'convert', b'bzr.saverev')

				def _checkrepotype(self, path):
				# Lightweight checkouts detection is informational but probably
				# fragile at API level. It should not terminate the conversion.
				try:
				dir = bzrdir.BzrDir.open_containing(path)[0]
				try:
				tree = dir.open_workingtree(recommend_upgrade=False)
				branch = tree.branch
				except (errors.NoWorkingTree, errors.NotLocalUrl):
				tree = None
				branch = dir.open_branch()
				if (
				tree is not None
				and tree.bzrdir.root_transport.base
				!= branch.bzrdir.root_transport.base
				):
				self.ui.warn(
				_(
				b'warning: lightweight checkouts may cause '
				b'conversion failures, try with a regular '
				b'branch instead.\n'
				)
				)
				except Exception:
				self.ui.note(_(b'bzr source type could not be determined\n'))

				def before(self):
				"""Before the conversion begins, acquire a read lock
				for all the operations that might need it. Fortunately
				read locks don't block other reads or writes to the
				repository, so this shouldn't have any impact on the usage of
				the source repository.

				The alternative would be locking on every operation that
				needs locks (there are currently two: getting the file and
				getting the parent map) and releasing immediately after,
				but this approach can take even 40% longer."""
				self.sourcerepo.lock_read()

				def after(self):
				self.sourcerepo.unlock()

				def _bzrbranches(self):
				return self.sourcerepo.find_branches(using=True)

				def getheads(self):
				if not self.revs:
				# Set using=True to avoid nested repositories (see issue3254)
				heads = sorted([b.last_revision() for b in self._bzrbranches()])
				else:
				revid = None
				for branch in self._bzrbranches():
				try:
				r = revisionspec.RevisionSpec.from_string(self.revs[0])
				info = r.in_history(branch)
				except errors.BzrError:
				pass
				revid = info.rev_id
				if revid is None:
				raise error.Abort(
				_(b'%s is not a valid revision') % self.revs[0]
				)
				heads = [revid]
				# Empty repositories return 'null:', which cannot be retrieved
				heads = [h for h in heads if h != b'null:']
				return heads

				def getfile(self, name, rev):
				revtree = self.sourcerepo.revision_tree(rev)
				fileid = revtree.path2id(name.decode(self.encoding or b'utf-8'))
				kind = None
				if fileid is not None:
				kind = revtree.kind(fileid)
				if kind not in supportedkinds:
				# the file is not available anymore - was deleted
				return None, None
				mode = self._modecache[(name, rev)]
				if kind == b'symlink':
				target = revtree.get_symlink_target(fileid)
				if target is None:
				raise error.Abort(
				_(b'%s.%s symlink has no target') % (name, rev)
				)
				return target, mode
				else:
				sio = revtree.get_file(fileid)
				return sio.read(), mode

				def getchanges(self, version, full):
				if full:
				raise error.Abort(_(b"convert from cvs does not support --full"))
				self._modecache = {}
				self._revtree = self.sourcerepo.revision_tree(version)
				# get the parentids from the cache
				parentids = self._parentids.pop(version)
				# only diff against first parent id
				prevtree = self.sourcerepo.revision_tree(parentids[0])
				files, changes = self._gettreechanges(self._revtree, prevtree)
				return files, changes, set()

				def getcommit(self, version):
				rev = self.sourcerepo.get_revision(version)
				# populate parent id cache
				if not rev.parent_ids:
				parents = []
				self._parentids[version] = (revision.NULL_REVISION,)
				else:
				parents = self._filterghosts(rev.parent_ids)
				self._parentids[version] = parents

				branch = self.recode(rev.properties.get(b'branch-nick', u'default'))
				if branch == b'trunk':
				branch = b'default'
				return common.commit(
				parents=parents,
				date=b'%d %d' % (rev.timestamp, -rev.timezone),
				author=self.recode(rev.committer),
				desc=self.recode(rev.message),
				branch=branch,
				rev=version,
				saverev=self._saverev,
				)

				def gettags(self):
				bytetags = {}
				for branch in self._bzrbranches():
				if not branch.supports_tags():
				return {}
				tagdict = branch.tags.get_tag_dict()
				for name, rev in pycompat.iteritems(tagdict):
				bytetags[self.recode(name)] = rev
				return bytetags

				def getchangedfiles(self, rev, i):
				self._modecache = {}
				curtree = self.sourcerepo.revision_tree(rev)
				if i is not None:
				parentid = self._parentids[rev][i]
				else:
				# no parent id, get the empty revision
				parentid = revision.NULL_REVISION

				prevtree = self.sourcerepo.revision_tree(parentid)
				changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
				return changes

				def _gettreechanges(self, current, origin):
				revid = current._revision_id
				changes = []
				renames = {}
				seen = set()

				# Fall back to the deprecated attribute for legacy installations.
				try:
				inventory = origin.root_inventory
				except AttributeError:
				inventory = origin.inventory

				# Process the entries by reverse lexicographic name order to
				# handle nested renames correctly, most specific first.
				curchanges = sorted(
				current.iter_changes(origin),
				key=lambda c: c[1][0] or c[1][1],
				reverse=True,
				)
				for (
				fileid,
				paths,
				changed_content,
				versioned,
				parent,
				name,
				kind,
				executable,
				) in curchanges:

				if paths[0] == u'' or paths[1] == u'':
				# ignore changes to tree root
				continue

				# bazaar tracks directories, mercurial does not, so
				# we have to rename the directory contents
				if kind[1] == b'directory':
				if kind[0] not in (None, b'directory'):
				# Replacing 'something' with a directory, record it
				# so it can be removed.
				changes.append((self.recode(paths[0]), revid))

				if kind[0] == b'directory' and None not in paths:
				renaming = paths[0] != paths[1]
				# neither an add nor an delete - a move
				# rename all directory contents manually
				subdir = inventory.path2id(paths[0])
				# get all child-entries of the directory
				for name, entry in inventory.iter_entries(subdir):
				# hg does not track directory renames
				if entry.kind == b'directory':
				continue
				frompath = self.recode(paths[0] + b'/' + name)
				if frompath in seen:
				# Already handled by a more specific change entry
				# This is important when you have:
				# a => b
				# a/c => a/c
				# Here a/c must not be renamed into b/c
				continue
				seen.add(frompath)
				if not renaming:
				continue
				topath = self.recode(paths[1] + b'/' + name)
				# register the files as changed
				changes.append((frompath, revid))
				changes.append((topath, revid))
				# add to mode cache
				mode = (
				(entry.executable and b'x')
				or (entry.kind == b'symlink' and b's')
				or b''
				)
				self._modecache[(topath, revid)] = mode
				# register the change as move
				renames[topath] = frompath

				# no further changes, go to the next change
				continue

				# we got unicode paths, need to convert them
				path, topath = paths
				if path is not None:
				path = self.recode(path)
				if topath is not None:
				topath = self.recode(topath)
				seen.add(path or topath)

				if topath is None:
				# file deleted
				changes.append((path, revid))
				continue

				# renamed
				if path and path != topath:
				renames[topath] = path
				changes.append((path, revid))

				# populate the mode cache
				kind, executable = [e[1] for e in (kind, executable)]
				mode = (executable and b'x') or (kind == b'symlink' and b'l') or b''
				self._modecache[(topath, revid)] = mode
				changes.append((topath, revid))

				return changes, renames

				def _filterghosts(self, ids):
				"""Filters out ghost revisions which hg does not support, see
				<http://bazaar-vcs.org/GhostRevision>
				"""
				parentmap = self.sourcerepo.get_parent_map(ids)
				parents = tuple([parent for parent in ids if parent in parentmap])
				return parents