upstream/mercurial-mirror Files · mercurial/similar.py

Move 'findrenames' code into its own file....

Move 'findrenames' code into its own file. The next few patches will increase the size of the "findrenames" functionality. This patch simply moves the function into its own file to avoid clutter building up in 'cmdutil.py'.

David Greenaway - - Load All Authors

File last commit:

r11059:ef4aa90b default


                r11059:ef4aa90b

default

Download file

             similar.py
        
                    59 lines
            
             | 1.7 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / similar.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        David Greenaway
    
Move 'findrenames' code into its own file....

              r11059
            
      # similar.py - mechanisms for finding similar files

      #

      # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from i18n import _

      import util

      import mdiff

      import bdiff

      def findrenames(repo, added, removed, threshold):

          '''find renamed files -- yields (before, after, score) tuples'''

          copies = {}

          ctx = repo['.']

          for i, r in enumerate(removed):

              repo.ui.progress(_('searching'), i, total=len(removed))

              if r not in ctx:

                  continue

              fctx = ctx.filectx(r)

              # lazily load text

              @util.cachefunc

              def data():

                  orig = fctx.data()

                  return orig, mdiff.splitnewlines(orig)

              def score(text):

                  if not len(text):

                      return 0.0

                  if not fctx.cmp(text):

                      return 1.0

                  if threshold == 1.0:

                      return 0.0

                  orig, lines = data()

                  # bdiff.blocks() returns blocks of matching lines

                  # count the number of bytes in each

                  equal = 0

                  matches = bdiff.blocks(text, orig)

                  for x1, x2, y1, y2 in matches:

                      for line in lines[y1:y2]:

                          equal += len(line)

                  lengths = len(text) + len(orig)

                  return equal * 2.0 / lengths

              for a in added:

                  bestscore = copies.get(a, (None, threshold))[1]

                  myscore = score(repo.wread(a))

                  if myscore >= bestscore:

                      copies[a] = (r, myscore)

          repo.ui.progress(_('searching'), None)

          for dest, v in copies.iteritems():

              source, score = v

              yield source, dest, score

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

David Greenaway Move 'findrenames' code into its own file....	r11059	# similar.py - mechanisms for finding similar files
		#
		# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.

		from i18n import _
		import util
		import mdiff
		import bdiff

		def findrenames(repo, added, removed, threshold):
		'''find renamed files -- yields (before, after, score) tuples'''
		copies = {}
		ctx = repo['.']
		for i, r in enumerate(removed):
		repo.ui.progress(_('searching'), i, total=len(removed))
		if r not in ctx:
		continue
		fctx = ctx.filectx(r)

		# lazily load text
		@util.cachefunc
		def data():
		orig = fctx.data()
		return orig, mdiff.splitnewlines(orig)

		def score(text):
		if not len(text):
		return 0.0
		if not fctx.cmp(text):
		return 1.0
		if threshold == 1.0:
		return 0.0
		orig, lines = data()
		# bdiff.blocks() returns blocks of matching lines
		# count the number of bytes in each
		equal = 0
		matches = bdiff.blocks(text, orig)
		for x1, x2, y1, y2 in matches:
		for line in lines[y1:y2]:
		equal += len(line)

		lengths = len(text) + len(orig)
		return equal * 2.0 / lengths

		for a in added:
		bestscore = copies.get(a, (None, threshold))[1]
		myscore = score(repo.wread(a))
		if myscore >= bestscore:
		copies[a] = (r, myscore)
		repo.ui.progress(_('searching'), None)

		for dest, v in copies.iteritems():
		source, score = v
		yield source, dest, score