upstream/mercurial-mirror Files · mercurial/repair.py

findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes....

findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes. We speed up 'findrenames' for the usecase when a user specifies they want a similarity of 100% by matching files by their exact SHA1 hash value. This reduces the number of comparisons required to find exact matches from O(n^2) to O(n). While it would be nice if we could just use mercurial's pre-calculated SHA1 hash for existing files, this hash includes the file's ancestor information making it unsuitable for our purposes. Instead, we calculate the hash of old content from scratch. The following benchmarks were taken on the current head of crew: addremove 100% similarity: rm -rf *; hg up -C; mv tests tests.new hg --time addremove -s100 --dry-run before: real 176.350 secs (user 128.890+0.000 sys 47.430+0.000) after: real 2.130 secs (user 1.890+0.000 sys 0.240+0.000) addremove 75% similarity: rm -rf *; hg up -C; mv tests tests.new; \ for i in tests.new/*; do echo x >> $i; done hg --time addremove -s75 --dry-run before: real 264.560 secs (user 215.130+0.000 sys 49.410+0.000) after: real 218.710 secs (user 172.790+0.000 sys 45.870+0.000)

Steve Borho - - Load All Authors

File last commit:

r10881:a685011e default


                r11060:e6df0177

default

Download file

             repair.py
        
                    145 lines
            
             | 4.7 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / repair.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # repair.py - functions for repository repair for mercurial

      #

      # Copyright 2005, 2006 Chris Mason <mason@suse.com>

      # Copyright 2007 Matt Mackall

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      import changegroup

      from node import nullrev, short

      from i18n import _

      import os

      def _bundle(repo, bases, heads, node, suffix, extranodes=None):

          """create a bundle with the specified revisions as a backup"""

          cg = repo.changegroupsubset(bases, heads, 'strip', extranodes)

          backupdir = repo.join("strip-backup")

          if not os.path.isdir(backupdir):

              os.mkdir(backupdir)

          name = os.path.join(backupdir, "%s-%s" % (short(node), suffix))

          repo.ui.warn(_("saving bundle to %s\n") % name)

          return changegroup.writebundle(cg, name, "HG10BZ")

      def _collectfiles(repo, striprev):

          """find out the filelogs affected by the strip"""

          files = set()

          for x in xrange(striprev, len(repo)):

              files.update(repo[x].files())

          return sorted(files)

      def _collectextranodes(repo, files, link):

          """return the nodes that have to be saved before the strip"""

          def collectone(revlog):

              extra = []

              startrev = count = len(revlog)

              # find the truncation point of the revlog

              for i in xrange(count):

                  lrev = revlog.linkrev(i)

                  if lrev >= link:

                      startrev = i + 1

                      break

              # see if any revision after that point has a linkrev less than link

              # (we have to manually save these guys)

              for i in xrange(startrev, count):

                  node = revlog.node(i)

                  lrev = revlog.linkrev(i)

                  if lrev < link:

                      extra.append((node, cl.node(lrev)))

              return extra

          extranodes = {}

          cl = repo.changelog

          extra = collectone(repo.manifest)

          if extra:

              extranodes[1] = extra

          for fname in files:

              f = repo.file(fname)

              extra = collectone(f)

              if extra:

                  extranodes[fname] = extra

          return extranodes

      def strip(ui, repo, node, backup="all"):

          cl = repo.changelog

          # TODO delete the undo files, and handle undo of merge sets

          striprev = cl.rev(node)

          # Some revisions with rev > striprev may not be descendants of striprev.

          # We have to find these revisions and put them in a bundle, so that

          # we can restore them after the truncations.

          # To create the bundle we use repo.changegroupsubset which requires

          # the list of heads and bases of the set of interesting revisions.

          # (head = revision in the set that has no descendant in the set;

          #  base = revision in the set that has no ancestor in the set)

          tostrip = set((striprev,))

          saveheads = set()

          savebases = []

          for r in xrange(striprev + 1, len(cl)):

              parents = cl.parentrevs(r)

              if parents[0] in tostrip or parents[1] in tostrip:

                  # r is a descendant of striprev

                  tostrip.add(r)

                  # if this is a merge and one of the parents does not descend

                  # from striprev, mark that parent as a savehead.

                  if parents[1] != nullrev:

                      for p in parents:

                          if p not in tostrip and p > striprev:

                              saveheads.add(p)

              else:

                  # if no parents of this revision will be stripped, mark it as

                  # a savebase

                  if parents[0] < striprev and parents[1] < striprev:

                      savebases.append(cl.node(r))

                  saveheads.difference_update(parents)

                  saveheads.add(r)

          saveheads = [cl.node(r) for r in saveheads]

          files = _collectfiles(repo, striprev)

          extranodes = _collectextranodes(repo, files, striprev)

          # create a changegroup for all the branches we need to keep

          if backup == "all":

              _bundle(repo, [node], cl.heads(), node, 'backup')

          if saveheads or extranodes:

              chgrpfile = _bundle(repo, savebases, saveheads, node, 'temp',

                                  extranodes)

          mfst = repo.manifest

          tr = repo.transaction("strip")

          offset = len(tr.entries)

          tr.startgroup()

          cl.strip(striprev, tr)

          mfst.strip(striprev, tr)

          for fn in files:

              repo.file(fn).strip(striprev, tr)

          tr.endgroup()

          try:

              for i in xrange(offset, len(tr.entries)):

                  file, troffset, ignore = tr.entries[i]

                  repo.sopener(file, 'a').truncate(troffset)

              tr.close()

          except:

              tr.abort()

              raise

          if saveheads or extranodes:

              ui.status(_("adding branch\n"))

              f = open(chgrpfile, "rb")

              gen = changegroup.readbundle(f, chgrpfile)

              repo.addchangegroup(gen, 'strip', 'bundle:' + chgrpfile, True)

              f.close()

              if backup != "strip":

                  os.unlink(chgrpfile)

          repo.destroyed()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# repair.py - functions for repository repair for mercurial
				#
				# Copyright 2005, 2006 Chris Mason <mason@suse.com>
				# Copyright 2007 Matt Mackall
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				import changegroup
				from node import nullrev, short
				from i18n import _
				import os

				def _bundle(repo, bases, heads, node, suffix, extranodes=None):
				"""create a bundle with the specified revisions as a backup"""
				cg = repo.changegroupsubset(bases, heads, 'strip', extranodes)
				backupdir = repo.join("strip-backup")
				if not os.path.isdir(backupdir):
				os.mkdir(backupdir)
				name = os.path.join(backupdir, "%s-%s" % (short(node), suffix))
				repo.ui.warn(_("saving bundle to %s\n") % name)
				return changegroup.writebundle(cg, name, "HG10BZ")

				def _collectfiles(repo, striprev):
				"""find out the filelogs affected by the strip"""
				files = set()

				for x in xrange(striprev, len(repo)):
				files.update(repo[x].files())

				return sorted(files)

				def _collectextranodes(repo, files, link):
				"""return the nodes that have to be saved before the strip"""
				def collectone(revlog):
				extra = []
				startrev = count = len(revlog)
				# find the truncation point of the revlog
				for i in xrange(count):
				lrev = revlog.linkrev(i)
				if lrev >= link:
				startrev = i + 1
				break

				# see if any revision after that point has a linkrev less than link
				# (we have to manually save these guys)
				for i in xrange(startrev, count):
				node = revlog.node(i)
				lrev = revlog.linkrev(i)
				if lrev < link:
				extra.append((node, cl.node(lrev)))

				return extra

				extranodes = {}
				cl = repo.changelog
				extra = collectone(repo.manifest)
				if extra:
				extranodes[1] = extra
				for fname in files:
				f = repo.file(fname)
				extra = collectone(f)
				if extra:
				extranodes[fname] = extra

				return extranodes

				def strip(ui, repo, node, backup="all"):
				cl = repo.changelog
				# TODO delete the undo files, and handle undo of merge sets
				striprev = cl.rev(node)

				# Some revisions with rev > striprev may not be descendants of striprev.
				# We have to find these revisions and put them in a bundle, so that
				# we can restore them after the truncations.
				# To create the bundle we use repo.changegroupsubset which requires
				# the list of heads and bases of the set of interesting revisions.
				# (head = revision in the set that has no descendant in the set;
				# base = revision in the set that has no ancestor in the set)
				tostrip = set((striprev,))
				saveheads = set()
				savebases = []
				for r in xrange(striprev + 1, len(cl)):
				parents = cl.parentrevs(r)
				if parents[0] in tostrip or parents[1] in tostrip:
				# r is a descendant of striprev
				tostrip.add(r)
				# if this is a merge and one of the parents does not descend
				# from striprev, mark that parent as a savehead.
				if parents[1] != nullrev:
				for p in parents:
				if p not in tostrip and p > striprev:
				saveheads.add(p)
				else:
				# if no parents of this revision will be stripped, mark it as
				# a savebase
				if parents[0] < striprev and parents[1] < striprev:
				savebases.append(cl.node(r))

				saveheads.difference_update(parents)
				saveheads.add(r)

				saveheads = [cl.node(r) for r in saveheads]
				files = _collectfiles(repo, striprev)

				extranodes = _collectextranodes(repo, files, striprev)

				# create a changegroup for all the branches we need to keep
				if backup == "all":
				_bundle(repo, [node], cl.heads(), node, 'backup')
				if saveheads or extranodes:
				chgrpfile = _bundle(repo, savebases, saveheads, node, 'temp',
				extranodes)

				mfst = repo.manifest

				tr = repo.transaction("strip")
				offset = len(tr.entries)

				tr.startgroup()
				cl.strip(striprev, tr)
				mfst.strip(striprev, tr)
				for fn in files:
				repo.file(fn).strip(striprev, tr)
				tr.endgroup()

				try:
				for i in xrange(offset, len(tr.entries)):
				file, troffset, ignore = tr.entries[i]
				repo.sopener(file, 'a').truncate(troffset)
				tr.close()
				except:
				tr.abort()
				raise

				if saveheads or extranodes:
				ui.status(_("adding branch\n"))
				f = open(chgrpfile, "rb")
				gen = changegroup.readbundle(f, chgrpfile)
				repo.addchangegroup(gen, 'strip', 'bundle:' + chgrpfile, True)
				f.close()
				if backup != "strip":
				os.unlink(chgrpfile)

				repo.destroyed()