upstream/mercurial-mirror Files · mercurial/graphmod.py

findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes....

findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes. We speed up 'findrenames' for the usecase when a user specifies they want a similarity of 100% by matching files by their exact SHA1 hash value. This reduces the number of comparisons required to find exact matches from O(n^2) to O(n). While it would be nice if we could just use mercurial's pre-calculated SHA1 hash for existing files, this hash includes the file's ancestor information making it unsuitable for our purposes. Instead, we calculate the hash of old content from scratch. The following benchmarks were taken on the current head of crew: addremove 100% similarity: rm -rf *; hg up -C; mv tests tests.new hg --time addremove -s100 --dry-run before: real 176.350 secs (user 128.890+0.000 sys 47.430+0.000) after: real 2.130 secs (user 1.890+0.000 sys 0.240+0.000) addremove 75% similarity: rm -rf *; hg up -C; mv tests tests.new; \ for i in tests.new/*; do echo x >> $i; done hg --time addremove -s75 --dry-run before: real 264.560 secs (user 215.130+0.000 sys 49.410+0.000) after: real 218.710 secs (user 172.790+0.000 sys 45.870+0.000)

Benoit Boissinot - - Load All Authors

File last commit:

r10602:94145b53 default


                r11060:e6df0177

default

Download file

             graphmod.py
        
                    122 lines
            
             | 3.8 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / graphmod.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # Revision graph generator for Mercurial

      #

      # Copyright 2008 Dirkjan Ochtman <dirkjan@ochtman.nl>

      # Copyright 2007 Joel Rosdahl <joel@rosdahl.net>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      """supports walking the history as DAGs suitable for graphical output

      The most basic format we use is that of::

        (id, type, data, [parentids])

      The node and parent ids are arbitrary integers which identify a node in the

      context of the graph returned. Type is a constant specifying the node type.

      Data depends on type.

      """

      from mercurial.node import nullrev

      CHANGESET = 'C'

      def revisions(repo, start, stop):

          """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

          This generator function walks through the revision history from revision

          start to revision stop (which must be less than or equal to start). It

          returns a tuple for each node. The node and parent ids are arbitrary

          integers which identify a node in the context of the graph returned.

          """

          cur = start

          while cur >= stop:

              ctx = repo[cur]

              parents = [p.rev() for p in ctx.parents() if p.rev() != nullrev]

              yield (cur, CHANGESET, ctx, sorted(parents))

              cur -= 1

      def filerevs(repo, path, start, stop, limit=None):

          """file cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

          This generator function walks through the revision history of a single

          file from revision start down to revision stop.

          """

          filerev = len(repo.file(path)) - 1

          rev = stop + 1

          count = 0

          while filerev >= 0 and rev > stop:

              fctx = repo.filectx(path, fileid=filerev)

              parents = [f.linkrev() for f in fctx.parents() if f.path() == path]

              rev = fctx.rev()

              if rev <= start:

                  yield (rev, CHANGESET, fctx.changectx(), sorted(parents))

                  count += 1

                  if count == limit:

                      break

              filerev -= 1

      def nodes(repo, nodes):

          """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

          This generator function walks the given nodes. It only returns parents

          that are in nodes, too.

          """

          include = set(nodes)

          for node in nodes:

              ctx = repo[node]

              parents = [p.rev() for p in ctx.parents() if p.node() in include]

              yield (ctx.rev(), CHANGESET, ctx, sorted(parents))

      def colored(dag):

          """annotates a DAG with colored edge information

          For each DAG node this function emits tuples::

            (id, type, data, (col, color), [(col, nextcol, color)])

          with the following new elements:

            - Tuple (col, color) with column and color index for the current node

            - A list of tuples indicating the edges between the current node and its

              parents.

          """

          seen = []

          colors = {}

          newcolor = 1

          for (cur, type, data, parents) in dag:

              # Compute seen and next

              if cur not in seen:

                  seen.append(cur) # new head

                  colors[cur] = newcolor

                  newcolor += 1

              col = seen.index(cur)

              color = colors.pop(cur)

              next = seen[:]

              # Add parents to next

              addparents = [p for p in parents if p not in next]

              next[col:col + 1] = addparents

              # Set colors for the parents

              for i, p in enumerate(addparents):

                  if not i:

                      colors[p] = color

                  else:

                      colors[p] = newcolor

                      newcolor += 1

              # Add edges to the graph

              edges = []

              for ecol, eid in enumerate(seen):

                  if eid in next:

                      edges.append((ecol, next.index(eid), colors[eid]))

                  elif eid == cur:

                      for p in parents:

                          edges.append((ecol, next.index(p), color))

              # Yield and move on

              yield (cur, type, data, (col, color), edges)

              seen = next

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# Revision graph generator for Mercurial
				#
				# Copyright 2008 Dirkjan Ochtman <dirkjan@ochtman.nl>
				# Copyright 2007 Joel Rosdahl <joel@rosdahl.net>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				"""supports walking the history as DAGs suitable for graphical output

				The most basic format we use is that of::

				(id, type, data, [parentids])

				The node and parent ids are arbitrary integers which identify a node in the
				context of the graph returned. Type is a constant specifying the node type.
				Data depends on type.
				"""

				from mercurial.node import nullrev

				CHANGESET = 'C'

				def revisions(repo, start, stop):
				"""cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

				This generator function walks through the revision history from revision
				start to revision stop (which must be less than or equal to start). It
				returns a tuple for each node. The node and parent ids are arbitrary
				integers which identify a node in the context of the graph returned.
				"""
				cur = start
				while cur >= stop:
				ctx = repo[cur]
				parents = [p.rev() for p in ctx.parents() if p.rev() != nullrev]
				yield (cur, CHANGESET, ctx, sorted(parents))
				cur -= 1

				def filerevs(repo, path, start, stop, limit=None):
				"""file cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

				This generator function walks through the revision history of a single
				file from revision start down to revision stop.
				"""
				filerev = len(repo.file(path)) - 1
				rev = stop + 1
				count = 0
				while filerev >= 0 and rev > stop:
				fctx = repo.filectx(path, fileid=filerev)
				parents = [f.linkrev() for f in fctx.parents() if f.path() == path]
				rev = fctx.rev()
				if rev <= start:
				yield (rev, CHANGESET, fctx.changectx(), sorted(parents))
				count += 1
				if count == limit:
				break
				filerev -= 1

				def nodes(repo, nodes):
				"""cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

				This generator function walks the given nodes. It only returns parents
				that are in nodes, too.
				"""
				include = set(nodes)
				for node in nodes:
				ctx = repo[node]
				parents = [p.rev() for p in ctx.parents() if p.node() in include]
				yield (ctx.rev(), CHANGESET, ctx, sorted(parents))

				def colored(dag):
				"""annotates a DAG with colored edge information

				For each DAG node this function emits tuples::

				(id, type, data, (col, color), [(col, nextcol, color)])

				with the following new elements:

				- Tuple (col, color) with column and color index for the current node
				- A list of tuples indicating the edges between the current node and its
				parents.
				"""
				seen = []
				colors = {}
				newcolor = 1
				for (cur, type, data, parents) in dag:

				# Compute seen and next
				if cur not in seen:
				seen.append(cur) # new head
				colors[cur] = newcolor
				newcolor += 1

				col = seen.index(cur)
				color = colors.pop(cur)
				next = seen[:]

				# Add parents to next
				addparents = [p for p in parents if p not in next]
				next[col:col + 1] = addparents

				# Set colors for the parents
				for i, p in enumerate(addparents):
				if not i:
				colors[p] = color
				else:
				colors[p] = newcolor
				newcolor += 1

				# Add edges to the graph
				edges = []
				for ecol, eid in enumerate(seen):
				if eid in next:
				edges.append((ecol, next.index(eid), colors[eid]))
				elif eid == cur:
				for p in parents:
				edges.append((ecol, next.index(p), color))

				# Yield and move on
				yield (cur, type, data, (col, color), edges)
				seen = next