##// END OF EJS Templates
match: support reading pattern lists from files
Steve Borho -
r13218:1f4721de default
parent child Browse files
Show More
@@ -1,41 +1,51
1 1 Mercurial accepts several notations for identifying one or more files
2 2 at a time.
3 3
4 4 By default, Mercurial treats filenames as shell-style extended glob
5 5 patterns.
6 6
7 7 Alternate pattern notations must be specified explicitly.
8 8
9 9 To use a plain path name without any pattern matching, start it with
10 10 ``path:``. These path names must completely match starting at the
11 11 current repository root.
12 12
13 13 To use an extended glob, start a name with ``glob:``. Globs are rooted
14 14 at the current directory; a glob such as ``*.c`` will only match files
15 15 in the current directory ending with ``.c``.
16 16
17 17 The supported glob syntax extensions are ``**`` to match any string
18 18 across path separators and ``{a,b}`` to mean "a or b".
19 19
20 20 To use a Perl/Python regular expression, start a name with ``re:``.
21 21 Regexp pattern matching is anchored at the root of the repository.
22 22
23 To read name patterns from a file, use ``listfile:`` or ``listfile0:``.
24 The latter expects null delimited patterns while the former expects line
25 feeds. Each string read from the file is itself treated as a file
26 pattern.
27
23 28 Plain examples::
24 29
25 30 path:foo/bar a name bar in a directory named foo in the root
26 31 of the repository
27 32 path:path:name a file or directory named "path:name"
28 33
29 34 Glob examples::
30 35
31 36 glob:*.c any name ending in ".c" in the current directory
32 37 *.c any name ending in ".c" in the current directory
33 38 **.c any name ending in ".c" in any subdirectory of the
34 39 current directory including itself.
35 40 foo/*.c any name ending in ".c" in the directory foo
36 41 foo/**.c any name ending in ".c" in any subdirectory of foo
37 42 including itself.
38 43
39 44 Regexp examples::
40 45
41 46 re:.*\.c$ any name ending in ".c", anywhere in the repository
47
48 File examples::
49
50 listfile:list.txt read list from list.txt with one file pattern per line
51 listfile0:list.txt read list from list.txt with null byte delimiters
@@ -1,296 +1,306
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util
10 10 from i18n import _
11 11
12 12 class match(object):
13 13 def __init__(self, root, cwd, patterns, include=[], exclude=[],
14 14 default='glob', exact=False, auditor=None):
15 15 """build an object to match a set of file patterns
16 16
17 17 arguments:
18 18 root - the canonical root of the tree you're matching against
19 19 cwd - the current working directory, if relevant
20 20 patterns - patterns to find
21 21 include - patterns to include
22 22 exclude - patterns to exclude
23 23 default - if a pattern in names has no explicit type, assume this one
24 24 exact - patterns are actually literals
25 25
26 26 a pattern is one of:
27 27 'glob:<glob>' - a glob relative to cwd
28 28 're:<regexp>' - a regular expression
29 29 'path:<path>' - a path relative to canonroot
30 30 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
31 31 'relpath:<path>' - a path relative to cwd
32 32 'relre:<regexp>' - a regexp that needn't match the start of a name
33 33 '<something>' - a pattern of the specified default type
34 34 """
35 35
36 36 self._root = root
37 37 self._cwd = cwd
38 38 self._files = []
39 39 self._anypats = bool(include or exclude)
40 40
41 41 if include:
42 42 im = _buildmatch(_normalize(include, 'glob', root, cwd, auditor),
43 43 '(?:/|$)')
44 44 if exclude:
45 45 em = _buildmatch(_normalize(exclude, 'glob', root, cwd, auditor),
46 46 '(?:/|$)')
47 47 if exact:
48 48 self._files = patterns
49 49 pm = self.exact
50 50 elif patterns:
51 51 pats = _normalize(patterns, default, root, cwd, auditor)
52 52 self._files = _roots(pats)
53 53 self._anypats = self._anypats or _anypats(pats)
54 54 pm = _buildmatch(pats, '$')
55 55
56 56 if patterns or exact:
57 57 if include:
58 58 if exclude:
59 59 m = lambda f: im(f) and not em(f) and pm(f)
60 60 else:
61 61 m = lambda f: im(f) and pm(f)
62 62 else:
63 63 if exclude:
64 64 m = lambda f: not em(f) and pm(f)
65 65 else:
66 66 m = pm
67 67 else:
68 68 if include:
69 69 if exclude:
70 70 m = lambda f: im(f) and not em(f)
71 71 else:
72 72 m = im
73 73 else:
74 74 if exclude:
75 75 m = lambda f: not em(f)
76 76 else:
77 77 m = lambda f: True
78 78
79 79 self.matchfn = m
80 80 self._fmap = set(self._files)
81 81
82 82 def __call__(self, fn):
83 83 return self.matchfn(fn)
84 84 def __iter__(self):
85 85 for f in self._files:
86 86 yield f
87 87 def bad(self, f, msg):
88 88 '''callback for each explicit file that can't be
89 89 found/accessed, with an error message
90 90 '''
91 91 pass
92 92 def dir(self, f):
93 93 pass
94 94 def missing(self, f):
95 95 pass
96 96 def exact(self, f):
97 97 return f in self._fmap
98 98 def rel(self, f):
99 99 return util.pathto(self._root, self._cwd, f)
100 100 def files(self):
101 101 return self._files
102 102 def anypats(self):
103 103 return self._anypats
104 104
105 105 class exact(match):
106 106 def __init__(self, root, cwd, files):
107 107 match.__init__(self, root, cwd, files, exact = True)
108 108
109 109 class always(match):
110 110 def __init__(self, root, cwd):
111 111 match.__init__(self, root, cwd, [])
112 112
113 113 class narrowmatcher(match):
114 114 """Adapt a matcher to work on a subdirectory only.
115 115
116 116 The paths are remapped to remove/insert the path as needed:
117 117
118 118 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
119 119 >>> m2 = narrowmatcher('sub', m1)
120 120 >>> bool(m2('a.txt'))
121 121 False
122 122 >>> bool(m2('b.txt'))
123 123 True
124 124 >>> bool(m2.matchfn('a.txt'))
125 125 False
126 126 >>> bool(m2.matchfn('b.txt'))
127 127 True
128 128 >>> m2.files()
129 129 ['b.txt']
130 130 >>> m2.exact('b.txt')
131 131 True
132 132 >>> m2.rel('b.txt')
133 133 'b.txt'
134 134 >>> def bad(f, msg):
135 135 ... print "%s: %s" % (f, msg)
136 136 >>> m1.bad = bad
137 137 >>> m2.bad('x.txt', 'No such file')
138 138 sub/x.txt: No such file
139 139 """
140 140
141 141 def __init__(self, path, matcher):
142 142 self._root = matcher._root
143 143 self._cwd = matcher._cwd
144 144 self._path = path
145 145 self._matcher = matcher
146 146
147 147 self._files = [f[len(path) + 1:] for f in matcher._files
148 148 if f.startswith(path + "/")]
149 149 self._anypats = matcher._anypats
150 150 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
151 151 self._fmap = set(self._files)
152 152
153 153 def bad(self, f, msg):
154 154 self._matcher.bad(self._path + "/" + f, msg)
155 155
156 156 def patkind(pat):
157 157 return _patsplit(pat, None)[0]
158 158
159 159 def _patsplit(pat, default):
160 160 """Split a string into an optional pattern kind prefix and the
161 161 actual pattern."""
162 162 if ':' in pat:
163 163 kind, val = pat.split(':', 1)
164 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
164 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
165 'listfile', 'listfile0'):
165 166 return kind, val
166 167 return default, pat
167 168
168 169 def _globre(pat):
169 170 "convert a glob pattern into a regexp"
170 171 i, n = 0, len(pat)
171 172 res = ''
172 173 group = 0
173 174 escape = re.escape
174 175 def peek():
175 176 return i < n and pat[i]
176 177 while i < n:
177 178 c = pat[i]
178 179 i += 1
179 180 if c not in '*?[{},\\':
180 181 res += escape(c)
181 182 elif c == '*':
182 183 if peek() == '*':
183 184 i += 1
184 185 res += '.*'
185 186 else:
186 187 res += '[^/]*'
187 188 elif c == '?':
188 189 res += '.'
189 190 elif c == '[':
190 191 j = i
191 192 if j < n and pat[j] in '!]':
192 193 j += 1
193 194 while j < n and pat[j] != ']':
194 195 j += 1
195 196 if j >= n:
196 197 res += '\\['
197 198 else:
198 199 stuff = pat[i:j].replace('\\','\\\\')
199 200 i = j + 1
200 201 if stuff[0] == '!':
201 202 stuff = '^' + stuff[1:]
202 203 elif stuff[0] == '^':
203 204 stuff = '\\' + stuff
204 205 res = '%s[%s]' % (res, stuff)
205 206 elif c == '{':
206 207 group += 1
207 208 res += '(?:'
208 209 elif c == '}' and group:
209 210 res += ')'
210 211 group -= 1
211 212 elif c == ',' and group:
212 213 res += '|'
213 214 elif c == '\\':
214 215 p = peek()
215 216 if p:
216 217 i += 1
217 218 res += escape(p)
218 219 else:
219 220 res += escape(c)
220 221 else:
221 222 res += escape(c)
222 223 return res
223 224
224 225 def _regex(kind, name, tail):
225 226 '''convert a pattern into a regular expression'''
226 227 if not name:
227 228 return ''
228 229 if kind == 're':
229 230 return name
230 231 elif kind == 'path':
231 232 return '^' + re.escape(name) + '(?:/|$)'
232 233 elif kind == 'relglob':
233 234 return '(?:|.*/)' + _globre(name) + tail
234 235 elif kind == 'relpath':
235 236 return re.escape(name) + '(?:/|$)'
236 237 elif kind == 'relre':
237 238 if name.startswith('^'):
238 239 return name
239 240 return '.*' + name
240 241 return _globre(name) + tail
241 242
242 243 def _buildmatch(pats, tail):
243 244 """build a matching function from a set of patterns"""
244 245 try:
245 246 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
246 247 if len(pat) > 20000:
247 248 raise OverflowError()
248 249 return re.compile(pat).match
249 250 except OverflowError:
250 251 # We're using a Python with a tiny regex engine and we
251 252 # made it explode, so we'll divide the pattern list in two
252 253 # until it works
253 254 l = len(pats)
254 255 if l < 2:
255 256 raise
256 257 a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
257 258 return lambda s: a(s) or b(s)
258 259 except re.error:
259 260 for k, p in pats:
260 261 try:
261 262 re.compile('(?:%s)' % _regex(k, p, tail))
262 263 except re.error:
263 264 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
264 265 raise util.Abort(_("invalid pattern"))
265 266
266 267 def _normalize(names, default, root, cwd, auditor):
267 268 pats = []
268 269 for kind, name in [_patsplit(p, default) for p in names]:
269 270 if kind in ('glob', 'relpath'):
270 271 name = util.canonpath(root, cwd, name, auditor)
271 272 elif kind in ('relglob', 'path'):
272 273 name = util.normpath(name)
274 elif kind in ('listfile', 'listfile0'):
275 delimiter = kind == 'listfile0' and '\0' or '\n'
276 try:
277 files = open(name, 'r').read().split(delimiter)
278 files = [f for f in files if f]
279 except EnvironmentError:
280 raise util.Abort(_("unable to read file list (%s)") % name)
281 pats += _normalize(files, default, root, cwd, auditor)
282 continue
273 283
274 284 pats.append((kind, name))
275 285 return pats
276 286
277 287 def _roots(patterns):
278 288 r = []
279 289 for kind, name in patterns:
280 290 if kind == 'glob': # find the non-glob prefix
281 291 root = []
282 292 for p in name.split('/'):
283 293 if '[' in p or '{' in p or '*' in p or '?' in p:
284 294 break
285 295 root.append(p)
286 296 r.append('/'.join(root) or '.')
287 297 elif kind in ('relpath', 'path'):
288 298 r.append(name or '.')
289 299 elif kind == 'relglob':
290 300 r.append('.')
291 301 return r
292 302
293 303 def _anypats(patterns):
294 304 for kind, name in patterns:
295 305 if kind in ('glob', 're', 'relglob', 'relre'):
296 306 return True
General Comments 0
You need to be logged in to leave comments. Login now