upstream/mercurial-mirror Files · mercurial/ignore.py

findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes....

findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes. We speed up 'findrenames' for the usecase when a user specifies they want a similarity of 100% by matching files by their exact SHA1 hash value. This reduces the number of comparisons required to find exact matches from O(n^2) to O(n). While it would be nice if we could just use mercurial's pre-calculated SHA1 hash for existing files, this hash includes the file's ancestor information making it unsuitable for our purposes. Instead, we calculate the hash of old content from scratch. The following benchmarks were taken on the current head of crew: addremove 100% similarity: rm -rf *; hg up -C; mv tests tests.new hg --time addremove -s100 --dry-run before: real 176.350 secs (user 128.890+0.000 sys 47.430+0.000) after: real 2.130 secs (user 1.890+0.000 sys 0.240+0.000) addremove 75% similarity: rm -rf *; hg up -C; mv tests tests.new; \ for i in tests.new/*; do echo x >> $i; done hg --time addremove -s75 --dry-run before: real 264.560 secs (user 215.130+0.000 sys 49.410+0.000) after: real 218.710 secs (user 172.790+0.000 sys 45.870+0.000)

Matt Mackall - - Load All Authors

File last commit:

r10263:25e57239 stable


                r11060:e6df0177

default

Download file

             ignore.py
        
                    103 lines
            
             | 3.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / ignore.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # ignore.py - ignored file handling for mercurial

      #

      # Copyright 2007 Matt Mackall <mpm@selenic.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from i18n import _

      import util, match

      import re

      _commentre = None

      def ignorepats(lines):

          '''parse lines (iterable) of .hgignore text, returning a tuple of

          (patterns, parse errors). These patterns should be given to compile()

          to be validated and converted into a match function.'''

          syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}

          syntax = 'relre:'

          patterns = []

          warnings = []

          for line in lines:

              if "#" in line:

                  global _commentre

                  if not _commentre:

                      _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')

                  # remove comments prefixed by an even number of escapes

                  line = _commentre.sub(r'\1', line)

                  # fixup properly escaped comments that survived the above

                  line = line.replace("\\#", "#")

              line = line.rstrip()

              if not line:

                  continue

              if line.startswith('syntax:'):

                  s = line[7:].strip()

                  try:

                      syntax = syntaxes[s]

                  except KeyError:

                      warnings.append(_("ignoring invalid syntax '%s'") % s)

                  continue

              pat = syntax + line

              for s, rels in syntaxes.iteritems():

                  if line.startswith(rels):

                      pat = line

                      break

                  elif line.startswith(s+':'):

                      pat = rels + line[len(s)+1:]

                      break

              patterns.append(pat)

          return patterns, warnings

      def ignore(root, files, warn):

          '''return matcher covering patterns in 'files'.

          the files parsed for patterns include:

          .hgignore in the repository root

          any additional files specified in the [ui] section of ~/.hgrc

          trailing white space is dropped.

          the escape character is backslash.

          comments start with #.

          empty lines are skipped.

          lines can be of the following formats:

          syntax: regexp # defaults following lines to non-rooted regexps

          syntax: glob   # defaults following lines to non-rooted globs

          re:pattern     # non-rooted regular expression

          glob:pattern   # non-rooted glob

          pattern        # pattern of the current default type'''

          pats = {}

          for f in files:

              try:

                  pats[f] = []

                  fp = open(f)

                  pats[f], warnings = ignorepats(fp)

                  for warning in warnings:

                      warn("%s: %s\n" % (f, warning))

              except IOError, inst:

                  if f != files[0]:

                      warn(_("skipping unreadable ignore file '%s': %s\n") %

                           (f, inst.strerror))

          allpats = []

          [allpats.extend(patlist) for patlist in pats.values()]

          if not allpats:

              return util.never

          try:

              ignorefunc = match.match(root, '', [], allpats)

          except util.Abort:

              # Re-raise an exception where the src is the right file

              for f, patlist in pats.iteritems():

                  try:

                      match.match(root, '', [], patlist)

                  except util.Abort, inst:

                      raise util.Abort('%s: %s' % (f, inst[0]))

          return ignorefunc

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# ignore.py - ignored file handling for mercurial
				#
				# Copyright 2007 Matt Mackall <mpm@selenic.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				from i18n import _
				import util, match
				import re

				_commentre = None

				def ignorepats(lines):
				'''parse lines (iterable) of .hgignore text, returning a tuple of
				(patterns, parse errors). These patterns should be given to compile()
				to be validated and converted into a match function.'''
				syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
				syntax = 'relre:'
				patterns = []
				warnings = []

				for line in lines:
				if "#" in line:
				global _commentre
				if not _commentre:
				_commentre = re.compile(r'((^\|[^\\])(\\\\))#.')
				# remove comments prefixed by an even number of escapes
				line = _commentre.sub(r'\1', line)
				# fixup properly escaped comments that survived the above
				line = line.replace("\\#", "#")
				line = line.rstrip()
				if not line:
				continue

				if line.startswith('syntax:'):
				s = line[7:].strip()
				try:
				syntax = syntaxes[s]
				except KeyError:
				warnings.append(_("ignoring invalid syntax '%s'") % s)
				continue
				pat = syntax + line
				for s, rels in syntaxes.iteritems():
				if line.startswith(rels):
				pat = line
				break
				elif line.startswith(s+':'):
				pat = rels + line[len(s)+1:]
				break
				patterns.append(pat)

				return patterns, warnings

				def ignore(root, files, warn):
				'''return matcher covering patterns in 'files'.

				the files parsed for patterns include:
				.hgignore in the repository root
				any additional files specified in the [ui] section of ~/.hgrc

				trailing white space is dropped.
				the escape character is backslash.
				comments start with #.
				empty lines are skipped.

				lines can be of the following formats:

				syntax: regexp # defaults following lines to non-rooted regexps
				syntax: glob # defaults following lines to non-rooted globs
				re:pattern # non-rooted regular expression
				glob:pattern # non-rooted glob
				pattern # pattern of the current default type'''

				pats = {}
				for f in files:
				try:
				pats[f] = []
				fp = open(f)
				pats[f], warnings = ignorepats(fp)
				for warning in warnings:
				warn("%s: %s\n" % (f, warning))
				except IOError, inst:
				if f != files[0]:
				warn(_("skipping unreadable ignore file '%s': %s\n") %
				(f, inst.strerror))

				allpats = []
				[allpats.extend(patlist) for patlist in pats.values()]
				if not allpats:
				return util.never

				try:
				ignorefunc = match.match(root, '', [], allpats)
				except util.Abort:
				# Re-raise an exception where the src is the right file
				for f, patlist in pats.iteritems():
				try:
				match.match(root, '', [], patlist)
				except util.Abort, inst:
				raise util.Abort('%s: %s' % (f, inst[0]))

				return ignorefunc