upstream/mercurial-mirror Files · mercurial/match.py

url: catch UNC paths as yet another Windows special case (issue2808)

Matt Mackall - - Load All Authors

File last commit:

r14675:cfc89398 default


                r14699:388af80c

stable

Download file

             match.py
        
                    338 lines
            
             | 10.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / match.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # match.py - filename matching

      #

      #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      import re

      import scmutil, util, fileset

      from i18n import _

      def _expandsets(pats, ctx):

          '''convert set: patterns into a list of files in the given context'''

          fset = set()

          other = []

          for kind, expr in pats:

              if kind == 'set':

                  if not ctx:

                      raise util.Abort("fileset expression with no context")

                  s = fileset.getfileset(ctx, expr)

                  fset.update(s)

                  continue

              other.append((kind, expr))

          return fset, other

      class match(object):

          def __init__(self, root, cwd, patterns, include=[], exclude=[],

                       default='glob', exact=False, auditor=None, ctx=None):

              """build an object to match a set of file patterns

              arguments:

              root - the canonical root of the tree you're matching against

              cwd - the current working directory, if relevant

              patterns - patterns to find

              include - patterns to include

              exclude - patterns to exclude

              default - if a pattern in names has no explicit type, assume this one

              exact - patterns are actually literals

              a pattern is one of:

              'glob:<glob>' - a glob relative to cwd

              're:<regexp>' - a regular expression

              'path:<path>' - a path relative to canonroot

              'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

              'relpath:<path>' - a path relative to cwd

              'relre:<regexp>' - a regexp that needn't match the start of a name

              'set:<fileset>' - a fileset expression

              '<something>' - a pattern of the specified default type

              """

              self._ctx = None

              self._root = root

              self._cwd = cwd

              self._files = []

              self._anypats = bool(include or exclude)

              self._ctx = ctx

              if include:

                  pats = _normalize(include, 'glob', root, cwd, auditor)

                  self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')

              if exclude:

                  pats = _normalize(exclude, 'glob', root, cwd, auditor)

                  self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')

              if exact:

                  self._files = patterns

                  pm = self.exact

              elif patterns:

                  pats = _normalize(patterns, default, root, cwd, auditor)

                  self._files = _roots(pats)

                  self._anypats = self._anypats or _anypats(pats)

                  self.patternspat, pm = _buildmatch(ctx, pats, '$')

              if patterns or exact:

                  if include:

                      if exclude:

                          m = lambda f: im(f) and not em(f) and pm(f)

                      else:

                          m = lambda f: im(f) and pm(f)

                  else:

                      if exclude:

                          m = lambda f: not em(f) and pm(f)

                      else:

                          m = pm

              else:

                  if include:

                      if exclude:

                          m = lambda f: im(f) and not em(f)

                      else:

                          m = im

                  else:

                      if exclude:

                          m = lambda f: not em(f)

                      else:

                          m = lambda f: True

              self.matchfn = m

              self._fmap = set(self._files)

          def __call__(self, fn):

              return self.matchfn(fn)

          def __iter__(self):

              for f in self._files:

                  yield f

          def bad(self, f, msg):

              '''callback for each explicit file that can't be

              found/accessed, with an error message

              '''

              pass

          def dir(self, f):

              pass

          def missing(self, f):

              pass

          def exact(self, f):

              return f in self._fmap

          def rel(self, f):

              return util.pathto(self._root, self._cwd, f)

          def files(self):

              return self._files

          def anypats(self):

              return self._anypats

      class exact(match):

          def __init__(self, root, cwd, files):

              match.__init__(self, root, cwd, files, exact = True)

      class always(match):

          def __init__(self, root, cwd):

              match.__init__(self, root, cwd, [])

      class narrowmatcher(match):

          """Adapt a matcher to work on a subdirectory only.

          The paths are remapped to remove/insert the path as needed:

          >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])

          >>> m2 = narrowmatcher('sub', m1)

          >>> bool(m2('a.txt'))

          False

          >>> bool(m2('b.txt'))

          True

          >>> bool(m2.matchfn('a.txt'))

          False

          >>> bool(m2.matchfn('b.txt'))

          True

          >>> m2.files()

          ['b.txt']

          >>> m2.exact('b.txt')

          True

          >>> m2.rel('b.txt')

          'b.txt'

          >>> def bad(f, msg):

          ...     print "%s: %s" % (f, msg)

          >>> m1.bad = bad

          >>> m2.bad('x.txt', 'No such file')

          sub/x.txt: No such file

          """

          def __init__(self, path, matcher):

              self._root = matcher._root

              self._cwd = matcher._cwd

              self._path = path

              self._matcher = matcher

              self._files = [f[len(path) + 1:] for f in matcher._files

                             if f.startswith(path + "/")]

              self._anypats = matcher._anypats

              self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)

              self._fmap = set(self._files)

          def bad(self, f, msg):

              self._matcher.bad(self._path + "/" + f, msg)

      def patkind(pat):

          return _patsplit(pat, None)[0]

      def _patsplit(pat, default):

          """Split a string into an optional pattern kind prefix and the

          actual pattern."""

          if ':' in pat:

              kind, val = pat.split(':', 1)

              if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',

                          'listfile', 'listfile0', 'set'):

                  return kind, val

          return default, pat

      def _globre(pat):

          "convert a glob pattern into a regexp"

          i, n = 0, len(pat)

          res = ''

          group = 0

          escape = re.escape

          def peek():

              return i < n and pat[i]

          while i < n:

              c = pat[i]

              i += 1

              if c not in '*?[{},\\':

                  res += escape(c)

              elif c == '*':

                  if peek() == '*':

                      i += 1

                      res += '.*'

                  else:

                      res += '[^/]*'

              elif c == '?':

                  res += '.'

              elif c == '[':

                  j = i

                  if j < n and pat[j] in '!]':

                      j += 1

                  while j < n and pat[j] != ']':

                      j += 1

                  if j >= n:

                      res += '\\['

                  else:

                      stuff = pat[i:j].replace('\\','\\\\')

                      i = j + 1

                      if stuff[0] == '!':

                          stuff = '^' + stuff[1:]

                      elif stuff[0] == '^':

                          stuff = '\\' + stuff

                      res = '%s[%s]' % (res, stuff)

              elif c == '{':

                  group += 1

                  res += '(?:'

              elif c == '}' and group:

                  res += ')'

                  group -= 1

              elif c == ',' and group:

                  res += '|'

              elif c == '\\':

                  p = peek()

                  if p:

                      i += 1

                      res += escape(p)

                  else:

                      res += escape(c)

              else:

                  res += escape(c)

          return res

      def _regex(kind, name, tail):

          '''convert a pattern into a regular expression'''

          if not name:

              return ''

          if kind == 're':

              return name

          elif kind == 'path':

              return '^' + re.escape(name) + '(?:/|$)'

          elif kind == 'relglob':

              return '(?:|.*/)' + _globre(name) + tail

          elif kind == 'relpath':

              return re.escape(name) + '(?:/|$)'

          elif kind == 'relre':

              if name.startswith('^'):

                  return name

              return '.*' + name

          return _globre(name) + tail

      def _buildmatch(ctx, pats, tail):

          fset, pats = _expandsets(pats, ctx)

          if not pats:

              return "", fset.__contains__

          pat, mf = _buildregexmatch(pats, tail)

          if fset:

              return pat, lambda f: f in fset or mf(f)

          return pat, mf

      def _buildregexmatch(pats, tail):

          """build a matching function from a set of patterns"""

          try:

              pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])

              if len(pat) > 20000:

                  raise OverflowError()

              return pat, re.compile(pat).match

          except OverflowError:

              # We're using a Python with a tiny regex engine and we

              # made it explode, so we'll divide the pattern list in two

              # until it works

              l = len(pats)

              if l < 2:

                  raise

              pata, a = _buildmatch(pats[:l//2], tail)

              patb, b = _buildmatch(pats[l//2:], tail)

              return pat, lambda s: a(s) or b(s)

          except re.error:

              for k, p in pats:

                  try:

                      re.compile('(?:%s)' % _regex(k, p, tail))

                  except re.error:

                      raise util.Abort(_("invalid pattern (%s): %s") % (k, p))

              raise util.Abort(_("invalid pattern"))

      def _normalize(names, default, root, cwd, auditor):

          pats = []

          for kind, name in [_patsplit(p, default) for p in names]:

              if kind in ('glob', 'relpath'):

                  name = scmutil.canonpath(root, cwd, name, auditor)

              elif kind in ('relglob', 'path'):

                  name = util.normpath(name)

              elif kind in ('listfile', 'listfile0'):

                  try:

                      files = util.readfile(name)

                      if kind == 'listfile0':

                          files = files.split('\0')

                      else:

                          files = files.splitlines()

                      files = [f for f in files if f]

                  except EnvironmentError:

                      raise util.Abort(_("unable to read file list (%s)") % name)

                  pats += _normalize(files, default, root, cwd, auditor)

                  continue

              pats.append((kind, name))

          return pats

      def _roots(patterns):

          r = []

          for kind, name in patterns:

              if kind == 'glob': # find the non-glob prefix

                  root = []

                  for p in name.split('/'):

                      if '[' in p or '{' in p or '*' in p or '?' in p:

                          break

                      root.append(p)

                  r.append('/'.join(root) or '.')

              elif kind in ('relpath', 'path'):

                  r.append(name or '.')

              elif kind == 'relglob':

                  r.append('.')

          return r

      def _anypats(patterns):

          for kind, name in patterns:

              if kind in ('glob', 're', 'relglob', 'relre'):

                  return True

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# match.py - filename matching
				#
				# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				import re
				import scmutil, util, fileset
				from i18n import _

				def _expandsets(pats, ctx):
				'''convert set: patterns into a list of files in the given context'''
				fset = set()
				other = []

				for kind, expr in pats:
				if kind == 'set':
				if not ctx:
				raise util.Abort("fileset expression with no context")
				s = fileset.getfileset(ctx, expr)
				fset.update(s)
				continue
				other.append((kind, expr))
				return fset, other

				class match(object):
				def __init__(self, root, cwd, patterns, include=[], exclude=[],
				default='glob', exact=False, auditor=None, ctx=None):
				"""build an object to match a set of file patterns

				arguments:
				root - the canonical root of the tree you're matching against
				cwd - the current working directory, if relevant
				patterns - patterns to find
				include - patterns to include
				exclude - patterns to exclude
				default - if a pattern in names has no explicit type, assume this one
				exact - patterns are actually literals

				a pattern is one of:
				'glob:<glob>' - a glob relative to cwd
				're:<regexp>' - a regular expression
				'path:<path>' - a path relative to canonroot
				'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
				'relpath:<path>' - a path relative to cwd
				'relre:<regexp>' - a regexp that needn't match the start of a name
				'set:<fileset>' - a fileset expression
				'<something>' - a pattern of the specified default type
				"""

				self._ctx = None
				self._root = root
				self._cwd = cwd
				self._files = []
				self._anypats = bool(include or exclude)
				self._ctx = ctx

				if include:
				pats = _normalize(include, 'glob', root, cwd, auditor)
				self.includepat, im = _buildmatch(ctx, pats, '(?:/\|$)')
				if exclude:
				pats = _normalize(exclude, 'glob', root, cwd, auditor)
				self.excludepat, em = _buildmatch(ctx, pats, '(?:/\|$)')
				if exact:
				self._files = patterns
				pm = self.exact
				elif patterns:
				pats = _normalize(patterns, default, root, cwd, auditor)
				self._files = _roots(pats)
				self._anypats = self._anypats or _anypats(pats)
				self.patternspat, pm = _buildmatch(ctx, pats, '$')

				if patterns or exact:
				if include:
				if exclude:
				m = lambda f: im(f) and not em(f) and pm(f)
				else:
				m = lambda f: im(f) and pm(f)
				else:
				if exclude:
				m = lambda f: not em(f) and pm(f)
				else:
				m = pm
				else:
				if include:
				if exclude:
				m = lambda f: im(f) and not em(f)
				else:
				m = im
				else:
				if exclude:
				m = lambda f: not em(f)
				else:
				m = lambda f: True

				self.matchfn = m
				self._fmap = set(self._files)

				def __call__(self, fn):
				return self.matchfn(fn)
				def __iter__(self):
				for f in self._files:
				yield f
				def bad(self, f, msg):
				'''callback for each explicit file that can't be
				found/accessed, with an error message
				'''
				pass
				def dir(self, f):
				pass
				def missing(self, f):
				pass
				def exact(self, f):
				return f in self._fmap
				def rel(self, f):
				return util.pathto(self._root, self._cwd, f)
				def files(self):
				return self._files
				def anypats(self):
				return self._anypats

				class exact(match):
				def __init__(self, root, cwd, files):
				match.__init__(self, root, cwd, files, exact = True)

				class always(match):
				def __init__(self, root, cwd):
				match.__init__(self, root, cwd, [])

				class narrowmatcher(match):
				"""Adapt a matcher to work on a subdirectory only.

				The paths are remapped to remove/insert the path as needed:

				>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
				>>> m2 = narrowmatcher('sub', m1)
				>>> bool(m2('a.txt'))
				False
				>>> bool(m2('b.txt'))
				True
				>>> bool(m2.matchfn('a.txt'))
				False
				>>> bool(m2.matchfn('b.txt'))
				True
				>>> m2.files()
				['b.txt']
				>>> m2.exact('b.txt')
				True
				>>> m2.rel('b.txt')
				'b.txt'
				>>> def bad(f, msg):
				... print "%s: %s" % (f, msg)
				>>> m1.bad = bad
				>>> m2.bad('x.txt', 'No such file')
				sub/x.txt: No such file
				"""

				def __init__(self, path, matcher):
				self._root = matcher._root
				self._cwd = matcher._cwd
				self._path = path
				self._matcher = matcher

				self._files = [f[len(path) + 1:] for f in matcher._files
				if f.startswith(path + "/")]
				self._anypats = matcher._anypats
				self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
				self._fmap = set(self._files)

				def bad(self, f, msg):
				self._matcher.bad(self._path + "/" + f, msg)

				def patkind(pat):
				return _patsplit(pat, None)[0]

				def _patsplit(pat, default):
				"""Split a string into an optional pattern kind prefix and the
				actual pattern."""
				if ':' in pat:
				kind, val = pat.split(':', 1)
				if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
				'listfile', 'listfile0', 'set'):
				return kind, val
				return default, pat

				def _globre(pat):
				"convert a glob pattern into a regexp"
				i, n = 0, len(pat)
				res = ''
				group = 0
				escape = re.escape
				def peek():
				return i < n and pat[i]
				while i < n:
				c = pat[i]
				i += 1
				if c not in '*?[{},\\':
				res += escape(c)
				elif c == '*':
				if peek() == '*':
				i += 1
				res += '.*'
				else:
				res += '[^/]*'
				elif c == '?':
				res += '.'
				elif c == '[':
				j = i
				if j < n and pat[j] in '!]':
				j += 1
				while j < n and pat[j] != ']':
				j += 1
				if j >= n:
				res += '\\['
				else:
				stuff = pat[i:j].replace('\\','\\\\')
				i = j + 1
				if stuff[0] == '!':
				stuff = '^' + stuff[1:]
				elif stuff[0] == '^':
				stuff = '\\' + stuff
				res = '%s[%s]' % (res, stuff)
				elif c == '{':
				group += 1
				res += '(?:'
				elif c == '}' and group:
				res += ')'
				group -= 1
				elif c == ',' and group:
				res += '\|'
				elif c == '\\':
				p = peek()
				if p:
				i += 1
				res += escape(p)
				else:
				res += escape(c)
				else:
				res += escape(c)
				return res

				def _regex(kind, name, tail):
				'''convert a pattern into a regular expression'''
				if not name:
				return ''
				if kind == 're':
				return name
				elif kind == 'path':
				return '^' + re.escape(name) + '(?:/\|$)'
				elif kind == 'relglob':
				return '(?:\|.*/)' + _globre(name) + tail
				elif kind == 'relpath':
				return re.escape(name) + '(?:/\|$)'
				elif kind == 'relre':
				if name.startswith('^'):
				return name
				return '.*' + name
				return _globre(name) + tail

				def _buildmatch(ctx, pats, tail):
				fset, pats = _expandsets(pats, ctx)
				if not pats:
				return "", fset.__contains__

				pat, mf = _buildregexmatch(pats, tail)
				if fset:
				return pat, lambda f: f in fset or mf(f)
				return pat, mf

				def _buildregexmatch(pats, tail):
				"""build a matching function from a set of patterns"""
				try:
				pat = '(?:%s)' % '\|'.join([_regex(k, p, tail) for (k, p) in pats])
				if len(pat) > 20000:
				raise OverflowError()
				return pat, re.compile(pat).match
				except OverflowError:
				# We're using a Python with a tiny regex engine and we
				# made it explode, so we'll divide the pattern list in two
				# until it works
				l = len(pats)
				if l < 2:
				raise
				pata, a = _buildmatch(pats[:l//2], tail)
				patb, b = _buildmatch(pats[l//2:], tail)
				return pat, lambda s: a(s) or b(s)
				except re.error:
				for k, p in pats:
				try:
				re.compile('(?:%s)' % _regex(k, p, tail))
				except re.error:
				raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
				raise util.Abort(_("invalid pattern"))

				def _normalize(names, default, root, cwd, auditor):
				pats = []
				for kind, name in [_patsplit(p, default) for p in names]:
				if kind in ('glob', 'relpath'):
				name = scmutil.canonpath(root, cwd, name, auditor)
				elif kind in ('relglob', 'path'):
				name = util.normpath(name)
				elif kind in ('listfile', 'listfile0'):
				try:
				files = util.readfile(name)
				if kind == 'listfile0':
				files = files.split('\0')
				else:
				files = files.splitlines()
				files = [f for f in files if f]
				except EnvironmentError:
				raise util.Abort(_("unable to read file list (%s)") % name)
				pats += _normalize(files, default, root, cwd, auditor)
				continue

				pats.append((kind, name))
				return pats

				def _roots(patterns):
				r = []
				for kind, name in patterns:
				if kind == 'glob': # find the non-glob prefix
				root = []
				for p in name.split('/'):
				if '[' in p or '{' in p or '*' in p or '?' in p:
				break
				root.append(p)
				r.append('/'.join(root) or '.')
				elif kind in ('relpath', 'path'):
				r.append(name or '.')
				elif kind == 'relglob':
				r.append('.')
				return r

				def _anypats(patterns):
				for kind, name in patterns:
				if kind in ('glob', 're', 'relglob', 'relre'):
				return True