upstream/mercurial-mirror Commit - r8586:347fe1ac

1

# match.py - file name matching

1

# match.py - file name matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2, incorporated herein by reference.

6

# GNU General Public License version 2, incorporated herein by reference.

7

8

import util, re

8

import util, re

9

10

class _match(object):

10

class _match(object):

11

def __init__(self, root, cwd, files, mf, ap):

11

def __init__(self, root, cwd, files, mf, ap):

12

self._root = root

12

self._root = root

13

self._cwd = cwd

13

self._cwd = cwd

14

self._files = files

14

self._files = files

15

self._fmap = set(files)

15

self._fmap = set(files)

16

self.matchfn = mf

16

self.matchfn = mf

17

self._anypats = ap

17

self._anypats = ap

18

def __call__(self, fn):

18

def __call__(self, fn):

19

return self.matchfn(fn)

19

return self.matchfn(fn)

20

def __iter__(self):

20

def __iter__(self):

21

for f in self._files:

21

for f in self._files:

22

yield f

22

yield f

23

def bad(self, f, msg):

23

def bad(self, f, msg):

24

return True

24

return True

25

def dir(self, f):

25

def dir(self, f):

26

pass

26

pass

27

def missing(self, f):

27

def missing(self, f):

28

pass

28

pass

29

def exact(self, f):

29

def exact(self, f):

30

return f in self._fmap

30

return f in self._fmap

31

def rel(self, f):

31

def rel(self, f):

32

return util.pathto(self._root, self._cwd, f)

32

return util.pathto(self._root, self._cwd, f)

33

def files(self):

33

def files(self):

34

return self._files

34

return self._files

35

def anypats(self):

35

def anypats(self):

36

return self._anypats

36

return self._anypats

37

38

class match(_match):

38

class match(_match):

39

def __init__(self, root, cwd, patterns, include=[], exclude=[],

39

def __init__(self, root, cwd, patterns, include=[], exclude=[],

40

default='glob'):

40

default='glob', exact=False):

41

"""build an object to match a set of file patterns

41

"""build an object to match a set of file patterns

42

43

arguments:

43

arguments:

44

root - the canonical root of the tree you're matching against

44

root - the canonical root of the tree you're matching against

45

cwd - the current working directory, if relevant

45

cwd - the current working directory, if relevant

46

patterns - patterns to find

46

patterns - patterns to find

47

include - patterns to include

47

include - patterns to include

48

exclude - patterns to exclude

48

exclude - patterns to exclude

49

default - if a pattern in names has no explicit type, assume this one

49

default - if a pattern in names has no explicit type, assume this one

50

exact - patterns are actually literals

50

51

a pattern is one of:

52

a pattern is one of:

52

'glob:<glob>' - a glob relative to cwd

53

'glob:<glob>' - a glob relative to cwd

53

're:<regexp>' - a regular expression

54

're:<regexp>' - a regular expression

54

'path:<path>' - a path relative to canonroot

55

'path:<path>' - a path relative to canonroot

55

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

56

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

56

'relpath:<path>' - a path relative to cwd

57

'relpath:<path>' - a path relative to cwd

57

'relre:<regexp>' - a regexp that doesn't have to match the start of a name

58

'relre:<regexp>' - a regexp that doesn't have to match the start of a name

58

'<something>' - one of the cases above, selected by the dflt_pat argument

59

'<something>' - one of the cases above, selected by the dflt_pat argument

59

"""

60

"""

60

61

roots = []

62

roots = []

62

anypats = bool(include or exclude)

63

anypats = bool(include or exclude)

63

64

if ~~patterns~~:

65

if include:

66

im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')

67

if exclude:

68

em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')

69

if exact:

70

roots = patterns

71

pm = self.exact

72

elif patterns:

65

pats = _normalize(patterns, default, root, cwd)

73

pats = _normalize(patterns, default, root, cwd)

66

roots = _roots(pats)

74

roots = _roots(pats)

67

anypats = anypats or _anypats(pats)

75

anypats = anypats or _anypats(pats)

68

pm = _buildmatch(pats, '$')

76

pm = _buildmatch(pats, '$')

69

if include:

70

im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')

71

if exclude:

72

em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')

73

77

74

if patterns:

78

if patterns or exact:

75

if include:

79

if include:

76

if exclude:

80

if exclude:

77

m = lambda f: im(f) and not em(f) and pm(f)

81

m = lambda f: im(f) and not em(f) and pm(f)

78

else:

82

else:

79

m = lambda f: im(f) and pm(f)

83

m = lambda f: im(f) and pm(f)

80

else:

84

else:

81

if exclude:

85

if exclude:

82

m = lambda f: not em(f) and pm(f)

86

m = lambda f: not em(f) and pm(f)

83

else:

87

else:

84

m = pm

88

m = pm

85

else:

89

else:

86

if include:

90

if include:

87

if exclude:

91

if exclude:

88

m = lambda f: im(f) and not em(f)

92

m = lambda f: im(f) and not em(f)

89

else:

93

else:

90

m = im

94

m = im

91

else:

95

else:

92

if exclude:

96

if exclude:

93

m = lambda f: not em(f)

97

m = lambda f: not em(f)

94

else:

98

else:

95

m = lambda f: True

99

m = lambda f: True

96

100

97

_match.__init__(self, root, cwd, roots, m, anypats)

101

_match.__init__(self, root, cwd, roots, m, anypats)

98

102

99

class exact(_match):

103

class exact(match):

100

def __init__(self, root, cwd, files):

104

def __init__(self, root, cwd, files):

101

_match.__init__(self, root, cwd, files, ~~self~~.exact, ~~Fals~~e)

105

match.__init__(self, root, cwd, files, exact = True)

102

106

103

class always(match):

107

class always(match):

104

def __init__(self, root, cwd):

108

def __init__(self, root, cwd):

105

match.__init__(self, root, cwd, [])

109

match.__init__(self, root, cwd, [])

106

110

107

class never(~~exact~~):

111

class never(match):

108

def __init__(self, root, cwd):

112

def __init__(self, root, cwd):

109

~~exact~~.__init__(self, root, cwd, [])

113

match.__init__(self, root, cwd, [], exact = True)

110

114

111

def patkind(pat):

115

def patkind(pat):

112

return _patsplit(pat, None)[0]

116

return _patsplit(pat, None)[0]

113

117

114

def _patsplit(pat, default):

118

def _patsplit(pat, default):

115

"""Split a string into an optional pattern kind prefix and the

119

"""Split a string into an optional pattern kind prefix and the

116

actual pattern."""

120

actual pattern."""

117

if ':' in pat:

121

if ':' in pat:

118

pat, val = pat.split(':', 1)

122

pat, val = pat.split(':', 1)

119

if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):

123

if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):

120

return pat, val

124

return pat, val

121

return default, pat

125

return default, pat

122

126

123

def _globre(pat):

127

def _globre(pat):

124

"convert a glob pattern into a regexp"

128

"convert a glob pattern into a regexp"

125

i, n = 0, len(pat)

129

i, n = 0, len(pat)

126

res = ''

130

res = ''

127

group = 0

131

group = 0

128

escape = re.escape

132

escape = re.escape

129

def peek(): return i < n and pat[i]

133

def peek(): return i < n and pat[i]

130

while i < n:

134

while i < n:

131

c = pat[i]

135

c = pat[i]

132

i = i+1

136

i = i+1

133

if c not in '*?[{},\\':

137

if c not in '*?[{},\\':

134

res += escape(c)

138

res += escape(c)

135

elif c == '*':

139

elif c == '*':

136

if peek() == '*':

140

if peek() == '*':

137

i += 1

141

i += 1

138

res += '.*'

142

res += '.*'

139

else:

143

else:

140

res += '[^/]*'

144

res += '[^/]*'

141

elif c == '?':

145

elif c == '?':

142

res += '.'

146

res += '.'

143

elif c == '[':

147

elif c == '[':

144

j = i

148

j = i

145

if j < n and pat[j] in '!]':

149

if j < n and pat[j] in '!]':

146

j += 1

150

j += 1

147

while j < n and pat[j] != ']':

151

while j < n and pat[j] != ']':

148

j += 1

152

j += 1

149

if j >= n:

153

if j >= n:

150

res += '\\['

154

res += '\\['

151

else:

155

else:

152

stuff = pat[i:j].replace('\\','\\\\')

156

stuff = pat[i:j].replace('\\','\\\\')

153

i = j + 1

157

i = j + 1

154

if stuff[0] == '!':

158

if stuff[0] == '!':

155

stuff = '^' + stuff[1:]

159

stuff = '^' + stuff[1:]

156

elif stuff[0] == '^':

160

elif stuff[0] == '^':

157

stuff = '\\' + stuff

161

stuff = '\\' + stuff

158

res = '%s[%s]' % (res, stuff)

162

res = '%s[%s]' % (res, stuff)

159

elif c == '{':

163

elif c == '{':

160

group += 1

164

group += 1

161

res += '(?:'

165

res += '(?:'

162

elif c == '}' and group:

166

elif c == '}' and group:

163

res += ')'

167

res += ')'

164

group -= 1

168

group -= 1

165

elif c == ',' and group:

169

elif c == ',' and group:

166

res += '|'

170

res += '|'

167

elif c == '\\':

171

elif c == '\\':

168

p = peek()

172

p = peek()

169

if p:

173

if p:

170

i += 1

174

i += 1

171

res += escape(p)

175

res += escape(p)

172

else:

176

else:

173

res += escape(c)

177

res += escape(c)

174

else:

178

else:

175

res += escape(c)

179

res += escape(c)

176

return res

180

return res

177

181

178

def _regex(kind, name, tail):

182

def _regex(kind, name, tail):

179

'''convert a pattern into a regular expression'''

183

'''convert a pattern into a regular expression'''

180

if not name:

184

if not name:

181

return ''

185

return ''

182

if kind == 're':

186

if kind == 're':

183

return name

187

return name

184

elif kind == 'path':

188

elif kind == 'path':

185

return '^' + re.escape(name) + '(?:/|$)'

189

return '^' + re.escape(name) + '(?:/|$)'

186

elif kind == 'relglob':

190

elif kind == 'relglob':

187

return '(?:|.*/)' + _globre(name) + tail

191

return '(?:|.*/)' + _globre(name) + tail

188

elif kind == 'relpath':

192

elif kind == 'relpath':

189

return re.escape(name) + '(?:/|$)'

193

return re.escape(name) + '(?:/|$)'

190

elif kind == 'relre':

194

elif kind == 'relre':

191

if name.startswith('^'):

195

if name.startswith('^'):

192

return name

196

return name

193

return '.*' + name

197

return '.*' + name

194

return _globre(name) + tail

198

return _globre(name) + tail

195

199

196

def _buildmatch(pats, tail):

200

def _buildmatch(pats, tail):

197

"""build a matching function from a set of patterns"""

201

"""build a matching function from a set of patterns"""

198

try:

202

try:

199

pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])

203

pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])

200

if len(pat) > 20000:

204

if len(pat) > 20000:

201

raise OverflowError()

205

raise OverflowError()

202

return re.compile(pat).match

206

return re.compile(pat).match

203

except OverflowError:

207

except OverflowError:

204

# We're using a Python with a tiny regex engine and we

208

# We're using a Python with a tiny regex engine and we

205

# made it explode, so we'll divide the pattern list in two

209

# made it explode, so we'll divide the pattern list in two

206

# until it works

210

# until it works

207

l = len(pats)

211

l = len(pats)

208

if l < 2:

212

if l < 2:

209

raise

213

raise

210

a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)

214

a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)

211

return lambda s: a(s) or b(s)

215

return lambda s: a(s) or b(s)

212

except re.error:

216

except re.error:

213

for k, p in pats:

217

for k, p in pats:

214

try:

218

try:

215

re.compile('(?:%s)' % _regex(k, p, tail))

219

re.compile('(?:%s)' % _regex(k, p, tail))

216

except re.error:

220

except re.error:

217

raise util.Abort("invalid pattern (%s): %s" % (k, p))

221

raise util.Abort("invalid pattern (%s): %s" % (k, p))

218

raise util.Abort("invalid pattern")

222

raise util.Abort("invalid pattern")

219

223

220

def _normalize(names, default, root, cwd):

224

def _normalize(names, default, root, cwd):

221

pats = []

225

pats = []

222

for kind, name in [_patsplit(p, default) for p in names]:

226

for kind, name in [_patsplit(p, default) for p in names]:

223

if kind in ('glob', 'relpath'):

227

if kind in ('glob', 'relpath'):

224

name = util.canonpath(root, cwd, name)

228

name = util.canonpath(root, cwd, name)

225

elif kind in ('relglob', 'path'):

229

elif kind in ('relglob', 'path'):

226

name = util.normpath(name)

230

name = util.normpath(name)

227

231

228

pats.append((kind, name))

232

pats.append((kind, name))

229

return pats

233

return pats

230

234

231

def _roots(patterns):

235

def _roots(patterns):

232

r = []

236

r = []

233

for kind, name in patterns:

237

for kind, name in patterns:

234

if kind == 'glob': # find the non-glob prefix

238

if kind == 'glob': # find the non-glob prefix

235

root = []

239

root = []

236

for p in name.split('/'):

240

for p in name.split('/'):

237

if '[' in p or '{' in p or '*' in p or '?' in p:

241

if '[' in p or '{' in p or '*' in p or '?' in p:

238

break

242

break

239

root.append(p)

243

root.append(p)

240

r.append('/'.join(root) or '.')

244

r.append('/'.join(root) or '.')

241

elif kind in ('relpath', 'path'):

245

elif kind in ('relpath', 'path'):

242

r.append(name or '.')

246

r.append(name or '.')

243

elif kind == 'relglob':

247

elif kind == 'relglob':

244

r.append('.')

248

r.append('.')

245

return r

249

return r

246

250

247

def _anypats(patterns):

251

def _anypats(patterns):

248

for kind, name in patterns:

252

for kind, name in patterns:

249

if kind in ('glob', 're', 'relglob', 'relre'):

253

if kind in ('glob', 're', 'relglob', 'relre'):

250

return True

254

return True

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - file name matching
             #
             #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2, incorporated herein by reference.
             import util, re
             class _match(object):
                 def __init__(self, root, cwd, files, mf, ap):
                     self._root = root
                     self._cwd = cwd
                     self._files = files
                     self._fmap = set(files)
                     self.matchfn = mf
                     self._anypats = ap
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 def __iter__(self):
                     for f in self._files:
                         yield f
                 def bad(self, f, msg):
                     return True
                 def dir(self, f):
                     pass
                 def missing(self, f):
                     pass
                 def exact(self, f):
                     return f in self._fmap
                 def rel(self, f):
                     return util.pathto(self._root, self._cwd, f)
                 def files(self):
                     return self._files
                 def anypats(self):
                     return self._anypats
             class match(_match):
                 def __init__(self, root, cwd, patterns, include=[], exclude=[],
-                             default='glob'):
+                             default='glob', exact=False):
                     """build an object to match a set of file patterns
                     arguments:
                     root - the canonical root of the tree you're matching against
                     cwd - the current working directory, if relevant
                     patterns - patterns to find
                     include - patterns to include
                     exclude - patterns to exclude
                     default - if a pattern in names has no explicit type, assume this one
+                    exact - patterns are actually literals
                     a pattern is one of:
                     'glob:<glob>' - a glob relative to cwd
                     're:<regexp>' - a regular expression
                     'path:<path>' - a path relative to canonroot
                     'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                     'relpath:<path>' - a path relative to cwd
                     'relre:<regexp>' - a regexp that doesn't have to match the start of a name
                     '<something>' - one of the cases above, selected by the dflt_pat argument
                     """
                     roots = []
                     anypats = bool(include or exclude)
-                    if patterns:
+                    if include:
+                        im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
+                    if exclude:
+                        em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
+                    if exact:
+                        roots = patterns
+                        pm = self.exact
+                    elif patterns:
                         pats = _normalize(patterns, default, root, cwd)
                         roots = _roots(pats)
                         anypats = anypats or _anypats(pats)
                         pm = _buildmatch(pats, '$')
-                    if include:
-                        im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
-                    if exclude:
-                        em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
-                    if patterns:
+                    if patterns or exact:
                         if include:
                             if exclude:
                                 m = lambda f: im(f) and not em(f) and pm(f)
                             else:
                                 m = lambda f: im(f) and pm(f)
                         else:
                             if exclude:
                                 m = lambda f: not em(f) and pm(f)
                             else:
                                 m = pm
                     else:
                         if include:
                             if exclude:
                                 m = lambda f: im(f) and not em(f)
                             else:
                                 m = im
                         else:
                             if exclude:
                                 m = lambda f: not em(f)
                             else:
                                 m = lambda f: True
                     _match.__init__(self, root, cwd, roots, m, anypats)
-            class exact(_match):
+            class exact(match):
                 def __init__(self, root, cwd, files):
-                    _match.__init__(self, root, cwd, files, self.exact, False)
+                    match.__init__(self, root, cwd, files, exact = True)
             class always(match):
                 def __init__(self, root, cwd):
                     match.__init__(self, root, cwd, [])
-            class never(exact):
+            class never(match):
                 def __init__(self, root, cwd):
-                    exact.__init__(self, root, cwd, [])
+                    match.__init__(self, root, cwd, [], exact = True)
             def patkind(pat):
                 return _patsplit(pat, None)[0]
             def _patsplit(pat, default):
                 """Split a string into an optional pattern kind prefix and the
                 actual pattern."""
                 if ':' in pat:
                     pat, val = pat.split(':', 1)
                     if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
                         return pat, val
                 return default, pat
             def _globre(pat):
                 "convert a glob pattern into a regexp"
                 i, n = 0, len(pat)
                 res = ''
                 group = 0
                 escape = re.escape
                 def peek(): return i < n and pat[i]
                 while i < n:
                     c = pat[i]
                     i = i+1
                     if c not in '*?[{},\\':
                         res += escape(c)
                     elif c == '*':
                         if peek() == '*':
                             i += 1
                             res += '.*'
                         else:
                             res += '[^/]*'
                     elif c == '?':
                         res += '.'
                     elif c == '[':
                         j = i
                         if j < n and pat[j] in '!]':
                             j += 1
                         while j < n and pat[j] != ']':
                             j += 1
                         if j >= n:
                             res += '\\['
                         else:
                             stuff = pat[i:j].replace('\\','\\\\')
                             i = j + 1
                             if stuff[0] == '!':
                                 stuff = '^' + stuff[1:]
                             elif stuff[0] == '^':
                                 stuff = '\\' + stuff
                             res = '%s[%s]' % (res, stuff)
                     elif c == '{':
                         group += 1
                         res += '(?:'
                     elif c == '}' and group:
                         res += ')'
                         group -= 1
                     elif c == ',' and group:
                         res += '|'
                     elif c == '\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p)
                         else:
                             res += escape(c)
                     else:
                         res += escape(c)
                 return res
             def _regex(kind, name, tail):
                 '''convert a pattern into a regular expression'''
                 if not name:
                     return ''
                 if kind == 're':
                     return name
                 elif kind == 'path':
                     return '^' + re.escape(name) + '(?:/|$)'
                 elif kind == 'relglob':
                     return '(?:|.*/)' + _globre(name) + tail
                 elif kind == 'relpath':
                     return re.escape(name) + '(?:/|$)'
                 elif kind == 'relre':
                     if name.startswith('^'):
                         return name
                     return '.*' + name
                 return _globre(name) + tail
             def _buildmatch(pats, tail):
                 """build a matching function from a set of patterns"""
                 try:
                     pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
                     if len(pat) > 20000:
                         raise OverflowError()
                     return re.compile(pat).match
                 except OverflowError:
                     # We're using a Python with a tiny regex engine and we
                     # made it explode, so we'll divide the pattern list in two
                     # until it works
                     l = len(pats)
                     if l < 2:
                         raise
                     a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
                     return lambda s: a(s) or b(s)
                 except re.error:
                     for k, p in pats:
                         try:
                             re.compile('(?:%s)' % _regex(k, p, tail))
                         except re.error:
                             raise util.Abort("invalid pattern (%s): %s" % (k, p))
                     raise util.Abort("invalid pattern")
             def _normalize(names, default, root, cwd):
                 pats = []
                 for kind, name in [_patsplit(p, default) for p in names]:
                     if kind in ('glob', 'relpath'):
                         name = util.canonpath(root, cwd, name)
                     elif kind in ('relglob', 'path'):
                         name = util.normpath(name)
                     pats.append((kind, name))
                 return pats
             def _roots(patterns):
                 r = []
                 for kind, name in patterns:
                     if kind == 'glob': # find the non-glob prefix
                         root = []
                         for p in name.split('/'):
                             if '[' in p or '{' in p or '*' in p or '?' in p:
                                 break
                             root.append(p)
                         r.append('/'.join(root) or '.')
                     elif kind in ('relpath', 'path'):
                         r.append(name or '.')
                     elif kind == 'relglob':
                         r.append('.')
                 return r
             def _anypats(patterns):
                 for kind, name in patterns:
                     if kind in ('glob', 're', 'relglob', 'relre'):
                         return True