##// END OF EJS Templates
match: handle exact matching using new exactmatcher
Martin von Zweigbergk -
r32499:a3583852 default
parent child Browse files
Show More
@@ -1,949 +1,979
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import copy
10 import copy
11 import os
11 import os
12 import re
12 import re
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 pathutil,
17 pathutil,
18 util,
18 util,
19 )
19 )
20
20
21 propertycache = util.propertycache
21 propertycache = util.propertycache
22
22
23 def _rematcher(regex):
23 def _rematcher(regex):
24 '''compile the regexp with the best available regexp engine and return a
24 '''compile the regexp with the best available regexp engine and return a
25 matcher function'''
25 matcher function'''
26 m = util.re.compile(regex)
26 m = util.re.compile(regex)
27 try:
27 try:
28 # slightly faster, provided by facebook's re2 bindings
28 # slightly faster, provided by facebook's re2 bindings
29 return m.test_match
29 return m.test_match
30 except AttributeError:
30 except AttributeError:
31 return m.match
31 return m.match
32
32
33 def _expandsets(kindpats, ctx, listsubrepos):
33 def _expandsets(kindpats, ctx, listsubrepos):
34 '''Returns the kindpats list with the 'set' patterns expanded.'''
34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 fset = set()
35 fset = set()
36 other = []
36 other = []
37
37
38 for kind, pat, source in kindpats:
38 for kind, pat, source in kindpats:
39 if kind == 'set':
39 if kind == 'set':
40 if not ctx:
40 if not ctx:
41 raise error.ProgrammingError("fileset expression with no "
41 raise error.ProgrammingError("fileset expression with no "
42 "context")
42 "context")
43 s = ctx.getfileset(pat)
43 s = ctx.getfileset(pat)
44 fset.update(s)
44 fset.update(s)
45
45
46 if listsubrepos:
46 if listsubrepos:
47 for subpath in ctx.substate:
47 for subpath in ctx.substate:
48 s = ctx.sub(subpath).getfileset(pat)
48 s = ctx.sub(subpath).getfileset(pat)
49 fset.update(subpath + '/' + f for f in s)
49 fset.update(subpath + '/' + f for f in s)
50
50
51 continue
51 continue
52 other.append((kind, pat, source))
52 other.append((kind, pat, source))
53 return fset, other
53 return fset, other
54
54
55 def _expandsubinclude(kindpats, root):
55 def _expandsubinclude(kindpats, root):
56 '''Returns the list of subinclude matcher args and the kindpats without the
56 '''Returns the list of subinclude matcher args and the kindpats without the
57 subincludes in it.'''
57 subincludes in it.'''
58 relmatchers = []
58 relmatchers = []
59 other = []
59 other = []
60
60
61 for kind, pat, source in kindpats:
61 for kind, pat, source in kindpats:
62 if kind == 'subinclude':
62 if kind == 'subinclude':
63 sourceroot = pathutil.dirname(util.normpath(source))
63 sourceroot = pathutil.dirname(util.normpath(source))
64 pat = util.pconvert(pat)
64 pat = util.pconvert(pat)
65 path = pathutil.join(sourceroot, pat)
65 path = pathutil.join(sourceroot, pat)
66
66
67 newroot = pathutil.dirname(path)
67 newroot = pathutil.dirname(path)
68 matcherargs = (newroot, '', [], ['include:%s' % path])
68 matcherargs = (newroot, '', [], ['include:%s' % path])
69
69
70 prefix = pathutil.canonpath(root, root, newroot)
70 prefix = pathutil.canonpath(root, root, newroot)
71 if prefix:
71 if prefix:
72 prefix += '/'
72 prefix += '/'
73 relmatchers.append((prefix, matcherargs))
73 relmatchers.append((prefix, matcherargs))
74 else:
74 else:
75 other.append((kind, pat, source))
75 other.append((kind, pat, source))
76
76
77 return relmatchers, other
77 return relmatchers, other
78
78
79 def _kindpatsalwaysmatch(kindpats):
79 def _kindpatsalwaysmatch(kindpats):
80 """"Checks whether the kindspats match everything, as e.g.
80 """"Checks whether the kindspats match everything, as e.g.
81 'relpath:.' does.
81 'relpath:.' does.
82 """
82 """
83 for kind, pat, source in kindpats:
83 for kind, pat, source in kindpats:
84 if pat != '' or kind not in ['relpath', 'glob']:
84 if pat != '' or kind not in ['relpath', 'glob']:
85 return False
85 return False
86 return True
86 return True
87
87
88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 badfn=None, icasefs=False):
90 badfn=None, icasefs=False):
91 """build an object to match a set of file patterns
91 """build an object to match a set of file patterns
92
92
93 arguments:
93 arguments:
94 root - the canonical root of the tree you're matching against
94 root - the canonical root of the tree you're matching against
95 cwd - the current working directory, if relevant
95 cwd - the current working directory, if relevant
96 patterns - patterns to find
96 patterns - patterns to find
97 include - patterns to include (unless they are excluded)
97 include - patterns to include (unless they are excluded)
98 exclude - patterns to exclude (even if they are included)
98 exclude - patterns to exclude (even if they are included)
99 default - if a pattern in patterns has no explicit type, assume this one
99 default - if a pattern in patterns has no explicit type, assume this one
100 exact - patterns are actually filenames (include/exclude still apply)
100 exact - patterns are actually filenames (include/exclude still apply)
101 warn - optional function used for printing warnings
101 warn - optional function used for printing warnings
102 badfn - optional bad() callback for this matcher instead of the default
102 badfn - optional bad() callback for this matcher instead of the default
103 icasefs - make a matcher for wdir on case insensitive filesystems, which
103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 normalizes the given patterns to the case in the filesystem
104 normalizes the given patterns to the case in the filesystem
105
105
106 a pattern is one of:
106 a pattern is one of:
107 'glob:<glob>' - a glob relative to cwd
107 'glob:<glob>' - a glob relative to cwd
108 're:<regexp>' - a regular expression
108 're:<regexp>' - a regular expression
109 'path:<path>' - a path relative to repository root, which is matched
109 'path:<path>' - a path relative to repository root, which is matched
110 recursively
110 recursively
111 'rootfilesin:<path>' - a path relative to repository root, which is
111 'rootfilesin:<path>' - a path relative to repository root, which is
112 matched non-recursively (will not match subdirectories)
112 matched non-recursively (will not match subdirectories)
113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 'relpath:<path>' - a path relative to cwd
114 'relpath:<path>' - a path relative to cwd
115 'relre:<regexp>' - a regexp that needn't match the start of a name
115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 'set:<fileset>' - a fileset expression
116 'set:<fileset>' - a fileset expression
117 'include:<path>' - a file of patterns to read and include
117 'include:<path>' - a file of patterns to read and include
118 'subinclude:<path>' - a file of patterns to match against files under
118 'subinclude:<path>' - a file of patterns to match against files under
119 the same directory
119 the same directory
120 '<something>' - a pattern of the specified default type
120 '<something>' - a pattern of the specified default type
121 """
121 """
122 normalize = _donormalize
122 normalize = _donormalize
123 if icasefs:
123 if icasefs:
124 if exact:
124 if exact:
125 raise error.ProgrammingError("a case-insensitive exact matcher "
125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 "doesn't make sense")
126 "doesn't make sense")
127 dirstate = ctx.repo().dirstate
127 dirstate = ctx.repo().dirstate
128 dsnormalize = dirstate.normalize
128 dsnormalize = dirstate.normalize
129
129
130 def normalize(patterns, default, root, cwd, auditor, warn):
130 def normalize(patterns, default, root, cwd, auditor, warn):
131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 kindpats = []
132 kindpats = []
133 for kind, pats, source in kp:
133 for kind, pats, source in kp:
134 if kind not in ('re', 'relre'): # regex can't be normalized
134 if kind not in ('re', 'relre'): # regex can't be normalized
135 p = pats
135 p = pats
136 pats = dsnormalize(pats)
136 pats = dsnormalize(pats)
137
137
138 # Preserve the original to handle a case only rename.
138 # Preserve the original to handle a case only rename.
139 if p != pats and p in dirstate:
139 if p != pats and p in dirstate:
140 kindpats.append((kind, p, source))
140 kindpats.append((kind, p, source))
141
141
142 kindpats.append((kind, pats, source))
142 kindpats.append((kind, pats, source))
143 return kindpats
143 return kindpats
144
144
145 m = matcher(root, cwd, normalize, patterns, include=None,
145 if exact:
146 default=default, exact=exact, auditor=auditor, ctx=ctx,
146 m = exactmatcher(root, cwd, patterns, badfn)
147 listsubrepos=listsubrepos, warn=warn, badfn=badfn)
147 else:
148 m = matcher(root, cwd, normalize, patterns, include=None,
149 default=default, exact=exact, auditor=auditor, ctx=ctx,
150 listsubrepos=listsubrepos, warn=warn, badfn=badfn)
148 if include:
151 if include:
149 im = matcher(root, cwd, normalize, [], include=include, default=default,
152 im = matcher(root, cwd, normalize, [], include=include, default=default,
150 exact=False, auditor=auditor, ctx=ctx,
153 exact=False, auditor=auditor, ctx=ctx,
151 listsubrepos=listsubrepos, warn=warn, badfn=None)
154 listsubrepos=listsubrepos, warn=warn, badfn=None)
152 m = intersectmatchers(m, im)
155 m = intersectmatchers(m, im)
153 if exclude:
156 if exclude:
154 em = matcher(root, cwd, normalize, [], include=exclude, default=default,
157 em = matcher(root, cwd, normalize, [], include=exclude, default=default,
155 exact=False, auditor=auditor, ctx=ctx,
158 exact=False, auditor=auditor, ctx=ctx,
156 listsubrepos=listsubrepos, warn=warn, badfn=None)
159 listsubrepos=listsubrepos, warn=warn, badfn=None)
157 m = differencematcher(m, em)
160 m = differencematcher(m, em)
158 return m
161 return m
159
162
160 def exact(root, cwd, files, badfn=None):
163 def exact(root, cwd, files, badfn=None):
161 return match(root, cwd, files, exact=True, badfn=badfn)
164 return exactmatcher(root, cwd, files, badfn=badfn)
162
165
163 def always(root, cwd):
166 def always(root, cwd):
164 return match(root, cwd, [])
167 return match(root, cwd, [])
165
168
166 def badmatch(match, badfn):
169 def badmatch(match, badfn):
167 """Make a copy of the given matcher, replacing its bad method with the given
170 """Make a copy of the given matcher, replacing its bad method with the given
168 one.
171 one.
169 """
172 """
170 m = copy.copy(match)
173 m = copy.copy(match)
171 m.bad = badfn
174 m.bad = badfn
172 return m
175 return m
173
176
174 def _donormalize(patterns, default, root, cwd, auditor, warn):
177 def _donormalize(patterns, default, root, cwd, auditor, warn):
175 '''Convert 'kind:pat' from the patterns list to tuples with kind and
178 '''Convert 'kind:pat' from the patterns list to tuples with kind and
176 normalized and rooted patterns and with listfiles expanded.'''
179 normalized and rooted patterns and with listfiles expanded.'''
177 kindpats = []
180 kindpats = []
178 for kind, pat in [_patsplit(p, default) for p in patterns]:
181 for kind, pat in [_patsplit(p, default) for p in patterns]:
179 if kind in ('glob', 'relpath'):
182 if kind in ('glob', 'relpath'):
180 pat = pathutil.canonpath(root, cwd, pat, auditor)
183 pat = pathutil.canonpath(root, cwd, pat, auditor)
181 elif kind in ('relglob', 'path', 'rootfilesin'):
184 elif kind in ('relglob', 'path', 'rootfilesin'):
182 pat = util.normpath(pat)
185 pat = util.normpath(pat)
183 elif kind in ('listfile', 'listfile0'):
186 elif kind in ('listfile', 'listfile0'):
184 try:
187 try:
185 files = util.readfile(pat)
188 files = util.readfile(pat)
186 if kind == 'listfile0':
189 if kind == 'listfile0':
187 files = files.split('\0')
190 files = files.split('\0')
188 else:
191 else:
189 files = files.splitlines()
192 files = files.splitlines()
190 files = [f for f in files if f]
193 files = [f for f in files if f]
191 except EnvironmentError:
194 except EnvironmentError:
192 raise error.Abort(_("unable to read file list (%s)") % pat)
195 raise error.Abort(_("unable to read file list (%s)") % pat)
193 for k, p, source in _donormalize(files, default, root, cwd,
196 for k, p, source in _donormalize(files, default, root, cwd,
194 auditor, warn):
197 auditor, warn):
195 kindpats.append((k, p, pat))
198 kindpats.append((k, p, pat))
196 continue
199 continue
197 elif kind == 'include':
200 elif kind == 'include':
198 try:
201 try:
199 fullpath = os.path.join(root, util.localpath(pat))
202 fullpath = os.path.join(root, util.localpath(pat))
200 includepats = readpatternfile(fullpath, warn)
203 includepats = readpatternfile(fullpath, warn)
201 for k, p, source in _donormalize(includepats, default,
204 for k, p, source in _donormalize(includepats, default,
202 root, cwd, auditor, warn):
205 root, cwd, auditor, warn):
203 kindpats.append((k, p, source or pat))
206 kindpats.append((k, p, source or pat))
204 except error.Abort as inst:
207 except error.Abort as inst:
205 raise error.Abort('%s: %s' % (pat, inst[0]))
208 raise error.Abort('%s: %s' % (pat, inst[0]))
206 except IOError as inst:
209 except IOError as inst:
207 if warn:
210 if warn:
208 warn(_("skipping unreadable pattern file '%s': %s\n") %
211 warn(_("skipping unreadable pattern file '%s': %s\n") %
209 (pat, inst.strerror))
212 (pat, inst.strerror))
210 continue
213 continue
211 # else: re or relre - which cannot be normalized
214 # else: re or relre - which cannot be normalized
212 kindpats.append((kind, pat, ''))
215 kindpats.append((kind, pat, ''))
213 return kindpats
216 return kindpats
214
217
215 class basematcher(object):
218 class basematcher(object):
216
219
217 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
220 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
218 self._root = root
221 self._root = root
219 self._cwd = cwd
222 self._cwd = cwd
220 if badfn is not None:
223 if badfn is not None:
221 self.bad = badfn
224 self.bad = badfn
222 self._relativeuipath = relativeuipath
225 self._relativeuipath = relativeuipath
223
226
224 def __call__(self, fn):
227 def __call__(self, fn):
225 return self.matchfn(fn)
228 return self.matchfn(fn)
226 def __iter__(self):
229 def __iter__(self):
227 for f in self._files:
230 for f in self._files:
228 yield f
231 yield f
229 # Callbacks related to how the matcher is used by dirstate.walk.
232 # Callbacks related to how the matcher is used by dirstate.walk.
230 # Subscribers to these events must monkeypatch the matcher object.
233 # Subscribers to these events must monkeypatch the matcher object.
231 def bad(self, f, msg):
234 def bad(self, f, msg):
232 '''Callback from dirstate.walk for each explicit file that can't be
235 '''Callback from dirstate.walk for each explicit file that can't be
233 found/accessed, with an error message.'''
236 found/accessed, with an error message.'''
234 pass
237 pass
235
238
236 # If an explicitdir is set, it will be called when an explicitly listed
239 # If an explicitdir is set, it will be called when an explicitly listed
237 # directory is visited.
240 # directory is visited.
238 explicitdir = None
241 explicitdir = None
239
242
240 # If an traversedir is set, it will be called when a directory discovered
243 # If an traversedir is set, it will be called when a directory discovered
241 # by recursive traversal is visited.
244 # by recursive traversal is visited.
242 traversedir = None
245 traversedir = None
243
246
244 def abs(self, f):
247 def abs(self, f):
245 '''Convert a repo path back to path that is relative to the root of the
248 '''Convert a repo path back to path that is relative to the root of the
246 matcher.'''
249 matcher.'''
247 return f
250 return f
248
251
249 def rel(self, f):
252 def rel(self, f):
250 '''Convert repo path back to path that is relative to cwd of matcher.'''
253 '''Convert repo path back to path that is relative to cwd of matcher.'''
251 return util.pathto(self._root, self._cwd, f)
254 return util.pathto(self._root, self._cwd, f)
252
255
253 def uipath(self, f):
256 def uipath(self, f):
254 '''Convert repo path to a display path. If patterns or -I/-X were used
257 '''Convert repo path to a display path. If patterns or -I/-X were used
255 to create this matcher, the display path will be relative to cwd.
258 to create this matcher, the display path will be relative to cwd.
256 Otherwise it is relative to the root of the repo.'''
259 Otherwise it is relative to the root of the repo.'''
257 return (self._relativeuipath and self.rel(f)) or self.abs(f)
260 return (self._relativeuipath and self.rel(f)) or self.abs(f)
258
261
259 @propertycache
262 @propertycache
260 def _files(self):
263 def _files(self):
261 return []
264 return []
262
265
263 def files(self):
266 def files(self):
264 '''Explicitly listed files or patterns or roots:
267 '''Explicitly listed files or patterns or roots:
265 if no patterns or .always(): empty list,
268 if no patterns or .always(): empty list,
266 if exact: list exact files,
269 if exact: list exact files,
267 if not .anypats(): list all files and dirs,
270 if not .anypats(): list all files and dirs,
268 else: optimal roots'''
271 else: optimal roots'''
269 return self._files
272 return self._files
270
273
271 @propertycache
274 @propertycache
272 def _fileset(self):
275 def _fileset(self):
273 return set(self._files)
276 return set(self._files)
274
277
275 def exact(self, f):
278 def exact(self, f):
276 '''Returns True if f is in .files().'''
279 '''Returns True if f is in .files().'''
277 return f in self._fileset
280 return f in self._fileset
278
281
279 def matchfn(self, f):
282 def matchfn(self, f):
280 return False
283 return False
281
284
282 def visitdir(self, dir):
285 def visitdir(self, dir):
283 '''Decides whether a directory should be visited based on whether it
286 '''Decides whether a directory should be visited based on whether it
284 has potential matches in it or one of its subdirectories. This is
287 has potential matches in it or one of its subdirectories. This is
285 based on the match's primary, included, and excluded patterns.
288 based on the match's primary, included, and excluded patterns.
286
289
287 Returns the string 'all' if the given directory and all subdirectories
290 Returns the string 'all' if the given directory and all subdirectories
288 should be visited. Otherwise returns True or False indicating whether
291 should be visited. Otherwise returns True or False indicating whether
289 the given directory should be visited.
292 the given directory should be visited.
290
293
291 This function's behavior is undefined if it has returned False for
294 This function's behavior is undefined if it has returned False for
292 one of the dir's parent directories.
295 one of the dir's parent directories.
293 '''
296 '''
294 return False
297 return False
295
298
296 def anypats(self):
299 def anypats(self):
297 '''Matcher uses patterns or include/exclude.'''
300 '''Matcher uses patterns or include/exclude.'''
298 return False
301 return False
299
302
300 def always(self):
303 def always(self):
301 '''Matcher will match everything and .files() will be empty
304 '''Matcher will match everything and .files() will be empty
302 - optimization might be possible and necessary.'''
305 - optimization might be possible and necessary.'''
303 return False
306 return False
304
307
305 def isexact(self):
308 def isexact(self):
306 return False
309 return False
307
310
308 def prefix(self):
311 def prefix(self):
309 return not self.always() and not self.isexact() and not self.anypats()
312 return not self.always() and not self.isexact() and not self.anypats()
310
313
311 class matcher(basematcher):
314 class matcher(basematcher):
312
315
313 def __init__(self, root, cwd, normalize, patterns, include=None,
316 def __init__(self, root, cwd, normalize, patterns, include=None,
314 default='glob', exact=False, auditor=None, ctx=None,
317 default='glob', exact=False, auditor=None, ctx=None,
315 listsubrepos=False, warn=None, badfn=None):
318 listsubrepos=False, warn=None, badfn=None):
316 super(matcher, self).__init__(root, cwd, badfn,
319 super(matcher, self).__init__(root, cwd, badfn,
317 relativeuipath=bool(include or patterns))
320 relativeuipath=bool(include or patterns))
318 if include is None:
321 if include is None:
319 include = []
322 include = []
320
323
321 self._anypats = bool(include)
324 self._anypats = bool(include)
322 self._anyincludepats = False
325 self._anyincludepats = False
323 self._always = False
326 self._always = False
324 self.patternspat = None
327 self.patternspat = None
325 self.includepat = None
328 self.includepat = None
326
329
327 # roots are directories which are recursively included.
330 # roots are directories which are recursively included.
328 self._includeroots = set()
331 self._includeroots = set()
329 # dirs are directories which are non-recursively included.
332 # dirs are directories which are non-recursively included.
330 self._includedirs = set()
333 self._includedirs = set()
331
334
332 matchfns = []
335 matchfns = []
333 if include:
336 if include:
334 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
337 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
335 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
338 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
336 listsubrepos, root)
339 listsubrepos, root)
337 self._anyincludepats = _anypats(kindpats)
340 self._anyincludepats = _anypats(kindpats)
338 roots, dirs = _rootsanddirs(kindpats)
341 roots, dirs = _rootsanddirs(kindpats)
339 self._includeroots.update(roots)
342 self._includeroots.update(roots)
340 self._includedirs.update(dirs)
343 self._includedirs.update(dirs)
341 matchfns.append(im)
344 matchfns.append(im)
342 if exact:
345 if exact:
343 if isinstance(patterns, list):
346 if isinstance(patterns, list):
344 self._files = patterns
347 self._files = patterns
345 else:
348 else:
346 self._files = list(patterns)
349 self._files = list(patterns)
347 matchfns.append(self.exact)
350 matchfns.append(self.exact)
348 elif patterns:
351 elif patterns:
349 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
352 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
350 if not _kindpatsalwaysmatch(kindpats):
353 if not _kindpatsalwaysmatch(kindpats):
351 self._files = _explicitfiles(kindpats)
354 self._files = _explicitfiles(kindpats)
352 self._anypats = self._anypats or _anypats(kindpats)
355 self._anypats = self._anypats or _anypats(kindpats)
353 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
356 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
354 listsubrepos, root)
357 listsubrepos, root)
355 matchfns.append(pm)
358 matchfns.append(pm)
356
359
357 if not matchfns:
360 if not matchfns:
358 m = util.always
361 m = util.always
359 self._always = True
362 self._always = True
360 elif len(matchfns) == 1:
363 elif len(matchfns) == 1:
361 m = matchfns[0]
364 m = matchfns[0]
362 else:
365 else:
363 def m(f):
366 def m(f):
364 for matchfn in matchfns:
367 for matchfn in matchfns:
365 if not matchfn(f):
368 if not matchfn(f):
366 return False
369 return False
367 return True
370 return True
368
371
369 self.matchfn = m
372 self.matchfn = m
370
373
371 @propertycache
374 @propertycache
372 def _dirs(self):
375 def _dirs(self):
373 return set(util.dirs(self._fileset)) | {'.'}
376 return set(util.dirs(self._fileset)) | {'.'}
374
377
375 def visitdir(self, dir):
378 def visitdir(self, dir):
376 if self.prefix() and dir in self._fileset:
379 if self.prefix() and dir in self._fileset:
377 return 'all'
380 return 'all'
378 if self._includeroots or self._includedirs:
381 if self._includeroots or self._includedirs:
379 if (not self._anyincludepats and
382 if (not self._anyincludepats and
380 dir in self._includeroots):
383 dir in self._includeroots):
381 # The condition above is essentially self.prefix() for includes
384 # The condition above is essentially self.prefix() for includes
382 return 'all'
385 return 'all'
383 if ('.' not in self._includeroots and
386 if ('.' not in self._includeroots and
384 dir not in self._includeroots and
387 dir not in self._includeroots and
385 dir not in self._includedirs and
388 dir not in self._includedirs and
386 not any(parent in self._includeroots
389 not any(parent in self._includeroots
387 for parent in util.finddirs(dir))):
390 for parent in util.finddirs(dir))):
388 return False
391 return False
389 return (not self._fileset or
392 return (not self._fileset or
390 '.' in self._fileset or
393 '.' in self._fileset or
391 dir in self._fileset or
394 dir in self._fileset or
392 dir in self._dirs or
395 dir in self._dirs or
393 any(parentdir in self._fileset
396 any(parentdir in self._fileset
394 for parentdir in util.finddirs(dir)))
397 for parentdir in util.finddirs(dir)))
395
398
396 def anypats(self):
399 def anypats(self):
397 return self._anypats
400 return self._anypats
398
401
399 def always(self):
402 def always(self):
400 return self._always
403 return self._always
401
404
402 def isexact(self):
405 def isexact(self):
403 return self.matchfn == self.exact
406 return self.matchfn == self.exact
404
407
405 def __repr__(self):
408 def __repr__(self):
406 return ('<matcher files=%r, patterns=%r, includes=%r>' %
409 return ('<matcher files=%r, patterns=%r, includes=%r>' %
407 (self._files, self.patternspat, self.includepat))
410 (self._files, self.patternspat, self.includepat))
408
411
412 class exactmatcher(basematcher):
413 '''Matches the input files exactly. They are interpreted as paths, not
414 patterns (so no kind-prefixes).
415 '''
416
417 def __init__(self, root, cwd, files, badfn=None):
418 super(exactmatcher, self).__init__(root, cwd, badfn)
419
420 if isinstance(files, list):
421 self._files = files
422 else:
423 self._files = list(files)
424 self.matchfn = self.exact
425
426 @propertycache
427 def _dirs(self):
428 return set(util.dirs(self._fileset)) | {'.'}
429
430 def visitdir(self, dir):
431 return dir in self._dirs
432
433 def isexact(self):
434 return True
435
436 def __repr__(self):
437 return ('<exactmatcher files=%r>' % self._files)
438
409 class differencematcher(basematcher):
439 class differencematcher(basematcher):
410 '''Composes two matchers by matching if the first matches and the second
440 '''Composes two matchers by matching if the first matches and the second
411 does not. Well, almost... If the user provides a pattern like "-X foo foo",
441 does not. Well, almost... If the user provides a pattern like "-X foo foo",
412 Mercurial actually does match "foo" against that. That's because exact
442 Mercurial actually does match "foo" against that. That's because exact
413 matches are treated specially. So, since this differencematcher is used for
443 matches are treated specially. So, since this differencematcher is used for
414 excludes, it needs to special-case exact matching.
444 excludes, it needs to special-case exact matching.
415
445
416 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
446 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
417 traversedir) are ignored.
447 traversedir) are ignored.
418
448
419 TODO: If we want to keep the behavior described above for exact matches, we
449 TODO: If we want to keep the behavior described above for exact matches, we
420 should consider instead treating the above case something like this:
450 should consider instead treating the above case something like this:
421 union(exact(foo), difference(pattern(foo), include(foo)))
451 union(exact(foo), difference(pattern(foo), include(foo)))
422 '''
452 '''
423 def __init__(self, m1, m2):
453 def __init__(self, m1, m2):
424 super(differencematcher, self).__init__(m1._root, m1._cwd)
454 super(differencematcher, self).__init__(m1._root, m1._cwd)
425 self._m1 = m1
455 self._m1 = m1
426 self._m2 = m2
456 self._m2 = m2
427 self.bad = m1.bad
457 self.bad = m1.bad
428 self.explicitdir = m1.explicitdir
458 self.explicitdir = m1.explicitdir
429 self.traversedir = m1.traversedir
459 self.traversedir = m1.traversedir
430
460
431 def matchfn(self, f):
461 def matchfn(self, f):
432 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
462 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
433
463
434 @propertycache
464 @propertycache
435 def _files(self):
465 def _files(self):
436 if self.isexact():
466 if self.isexact():
437 return [f for f in self._m1.files() if self(f)]
467 return [f for f in self._m1.files() if self(f)]
438 # If m1 is not an exact matcher, we can't easily figure out the set of
468 # If m1 is not an exact matcher, we can't easily figure out the set of
439 # files, because its files() are not always files. For example, if
469 # files, because its files() are not always files. For example, if
440 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
470 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
441 # want to remove "dir" from the set even though it would match m2,
471 # want to remove "dir" from the set even though it would match m2,
442 # because the "dir" in m1 may not be a file.
472 # because the "dir" in m1 may not be a file.
443 return self._m1.files()
473 return self._m1.files()
444
474
445 def visitdir(self, dir):
475 def visitdir(self, dir):
446 if self._m2.visitdir(dir) == 'all':
476 if self._m2.visitdir(dir) == 'all':
447 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
477 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
448 # 'dir' (recursively), we should still visit 'dir' due to the
478 # 'dir' (recursively), we should still visit 'dir' due to the
449 # exception we have for exact matches.
479 # exception we have for exact matches.
450 return False
480 return False
451 return bool(self._m1.visitdir(dir))
481 return bool(self._m1.visitdir(dir))
452
482
453 def isexact(self):
483 def isexact(self):
454 return self._m1.isexact()
484 return self._m1.isexact()
455
485
456 def anypats(self):
486 def anypats(self):
457 return self._m1.anypats() or self._m2.anypats()
487 return self._m1.anypats() or self._m2.anypats()
458
488
459 def prefix(self):
489 def prefix(self):
460 return not self.always() and not self.isexact() and not self.anypats()
490 return not self.always() and not self.isexact() and not self.anypats()
461
491
462 def __repr__(self):
492 def __repr__(self):
463 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
493 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
464
494
465 def intersectmatchers(m1, m2):
495 def intersectmatchers(m1, m2):
466 '''Composes two matchers by matching if both of them match.
496 '''Composes two matchers by matching if both of them match.
467
497
468 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
498 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
469 traversedir) are ignored.
499 traversedir) are ignored.
470 '''
500 '''
471 if m1 is None or m2 is None:
501 if m1 is None or m2 is None:
472 return m1 or m2
502 return m1 or m2
473 if m1.always():
503 if m1.always():
474 m = copy.copy(m2)
504 m = copy.copy(m2)
475 # TODO: Consider encapsulating these things in a class so there's only
505 # TODO: Consider encapsulating these things in a class so there's only
476 # one thing to copy from m1.
506 # one thing to copy from m1.
477 m.bad = m1.bad
507 m.bad = m1.bad
478 m.explicitdir = m1.explicitdir
508 m.explicitdir = m1.explicitdir
479 m.traversedir = m1.traversedir
509 m.traversedir = m1.traversedir
480 m.abs = m1.abs
510 m.abs = m1.abs
481 m.rel = m1.rel
511 m.rel = m1.rel
482 m._relativeuipath |= m1._relativeuipath
512 m._relativeuipath |= m1._relativeuipath
483 return m
513 return m
484 if m2.always():
514 if m2.always():
485 m = copy.copy(m1)
515 m = copy.copy(m1)
486 m._relativeuipath |= m2._relativeuipath
516 m._relativeuipath |= m2._relativeuipath
487 return m
517 return m
488 return intersectionmatcher(m1, m2)
518 return intersectionmatcher(m1, m2)
489
519
490 class intersectionmatcher(basematcher):
520 class intersectionmatcher(basematcher):
491 def __init__(self, m1, m2):
521 def __init__(self, m1, m2):
492 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
522 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
493 self._m1 = m1
523 self._m1 = m1
494 self._m2 = m2
524 self._m2 = m2
495 self.bad = m1.bad
525 self.bad = m1.bad
496 self.explicitdir = m1.explicitdir
526 self.explicitdir = m1.explicitdir
497 self.traversedir = m1.traversedir
527 self.traversedir = m1.traversedir
498
528
499 @propertycache
529 @propertycache
500 def _files(self):
530 def _files(self):
501 if self.isexact():
531 if self.isexact():
502 m1, m2 = self._m1, self._m2
532 m1, m2 = self._m1, self._m2
503 if not m1.isexact():
533 if not m1.isexact():
504 m1, m2 = m2, m1
534 m1, m2 = m2, m1
505 return [f for f in m1.files() if m2(f)]
535 return [f for f in m1.files() if m2(f)]
506 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
536 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
507 # the set of files, because their files() are not always files. For
537 # the set of files, because their files() are not always files. For
508 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
538 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
509 # "path:dir2", we don't want to remove "dir2" from the set.
539 # "path:dir2", we don't want to remove "dir2" from the set.
510 return self._m1.files() + self._m2.files()
540 return self._m1.files() + self._m2.files()
511
541
512 def matchfn(self, f):
542 def matchfn(self, f):
513 return self._m1(f) and self._m2(f)
543 return self._m1(f) and self._m2(f)
514
544
515 def visitdir(self, dir):
545 def visitdir(self, dir):
516 visit1 = self._m1.visitdir(dir)
546 visit1 = self._m1.visitdir(dir)
517 if visit1 == 'all':
547 if visit1 == 'all':
518 return self._m2.visitdir(dir)
548 return self._m2.visitdir(dir)
519 # bool() because visit1=True + visit2='all' should not be 'all'
549 # bool() because visit1=True + visit2='all' should not be 'all'
520 return bool(visit1 and self._m2.visitdir(dir))
550 return bool(visit1 and self._m2.visitdir(dir))
521
551
522 def always(self):
552 def always(self):
523 return self._m1.always() and self._m2.always()
553 return self._m1.always() and self._m2.always()
524
554
525 def isexact(self):
555 def isexact(self):
526 return self._m1.isexact() or self._m2.isexact()
556 return self._m1.isexact() or self._m2.isexact()
527
557
528 def anypats(self):
558 def anypats(self):
529 return self._m1.anypats() or self._m2.anypats()
559 return self._m1.anypats() or self._m2.anypats()
530
560
531 def __repr__(self):
561 def __repr__(self):
532 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
562 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
533
563
534 class subdirmatcher(basematcher):
564 class subdirmatcher(basematcher):
535 """Adapt a matcher to work on a subdirectory only.
565 """Adapt a matcher to work on a subdirectory only.
536
566
537 The paths are remapped to remove/insert the path as needed:
567 The paths are remapped to remove/insert the path as needed:
538
568
539 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
569 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
540 >>> m2 = subdirmatcher('sub', m1)
570 >>> m2 = subdirmatcher('sub', m1)
541 >>> bool(m2('a.txt'))
571 >>> bool(m2('a.txt'))
542 False
572 False
543 >>> bool(m2('b.txt'))
573 >>> bool(m2('b.txt'))
544 True
574 True
545 >>> bool(m2.matchfn('a.txt'))
575 >>> bool(m2.matchfn('a.txt'))
546 False
576 False
547 >>> bool(m2.matchfn('b.txt'))
577 >>> bool(m2.matchfn('b.txt'))
548 True
578 True
549 >>> m2.files()
579 >>> m2.files()
550 ['b.txt']
580 ['b.txt']
551 >>> m2.exact('b.txt')
581 >>> m2.exact('b.txt')
552 True
582 True
553 >>> util.pconvert(m2.rel('b.txt'))
583 >>> util.pconvert(m2.rel('b.txt'))
554 'sub/b.txt'
584 'sub/b.txt'
555 >>> def bad(f, msg):
585 >>> def bad(f, msg):
556 ... print "%s: %s" % (f, msg)
586 ... print "%s: %s" % (f, msg)
557 >>> m1.bad = bad
587 >>> m1.bad = bad
558 >>> m2.bad('x.txt', 'No such file')
588 >>> m2.bad('x.txt', 'No such file')
559 sub/x.txt: No such file
589 sub/x.txt: No such file
560 >>> m2.abs('c.txt')
590 >>> m2.abs('c.txt')
561 'sub/c.txt'
591 'sub/c.txt'
562 """
592 """
563
593
564 def __init__(self, path, matcher):
594 def __init__(self, path, matcher):
565 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
595 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
566 self._path = path
596 self._path = path
567 self._matcher = matcher
597 self._matcher = matcher
568 self._always = matcher.always()
598 self._always = matcher.always()
569
599
570 self._files = [f[len(path) + 1:] for f in matcher._files
600 self._files = [f[len(path) + 1:] for f in matcher._files
571 if f.startswith(path + "/")]
601 if f.startswith(path + "/")]
572
602
573 # If the parent repo had a path to this subrepo and the matcher is
603 # If the parent repo had a path to this subrepo and the matcher is
574 # a prefix matcher, this submatcher always matches.
604 # a prefix matcher, this submatcher always matches.
575 if matcher.prefix():
605 if matcher.prefix():
576 self._always = any(f == path for f in matcher._files)
606 self._always = any(f == path for f in matcher._files)
577
607
578 def bad(self, f, msg):
608 def bad(self, f, msg):
579 self._matcher.bad(self._path + "/" + f, msg)
609 self._matcher.bad(self._path + "/" + f, msg)
580
610
581 def abs(self, f):
611 def abs(self, f):
582 return self._matcher.abs(self._path + "/" + f)
612 return self._matcher.abs(self._path + "/" + f)
583
613
584 def rel(self, f):
614 def rel(self, f):
585 return self._matcher.rel(self._path + "/" + f)
615 return self._matcher.rel(self._path + "/" + f)
586
616
587 def uipath(self, f):
617 def uipath(self, f):
588 return self._matcher.uipath(self._path + "/" + f)
618 return self._matcher.uipath(self._path + "/" + f)
589
619
590 def matchfn(self, f):
620 def matchfn(self, f):
591 # Some information is lost in the superclass's constructor, so we
621 # Some information is lost in the superclass's constructor, so we
592 # can not accurately create the matching function for the subdirectory
622 # can not accurately create the matching function for the subdirectory
593 # from the inputs. Instead, we override matchfn() and visitdir() to
623 # from the inputs. Instead, we override matchfn() and visitdir() to
594 # call the original matcher with the subdirectory path prepended.
624 # call the original matcher with the subdirectory path prepended.
595 return self._matcher.matchfn(self._path + "/" + f)
625 return self._matcher.matchfn(self._path + "/" + f)
596
626
597 def visitdir(self, dir):
627 def visitdir(self, dir):
598 if dir == '.':
628 if dir == '.':
599 dir = self._path
629 dir = self._path
600 else:
630 else:
601 dir = self._path + "/" + dir
631 dir = self._path + "/" + dir
602 return self._matcher.visitdir(dir)
632 return self._matcher.visitdir(dir)
603
633
604 def always(self):
634 def always(self):
605 return self._always
635 return self._always
606
636
607 def anypats(self):
637 def anypats(self):
608 return self._matcher.anypats()
638 return self._matcher.anypats()
609
639
610 def patkind(pattern, default=None):
640 def patkind(pattern, default=None):
611 '''If pattern is 'kind:pat' with a known kind, return kind.'''
641 '''If pattern is 'kind:pat' with a known kind, return kind.'''
612 return _patsplit(pattern, default)[0]
642 return _patsplit(pattern, default)[0]
613
643
614 def _patsplit(pattern, default):
644 def _patsplit(pattern, default):
615 """Split a string into the optional pattern kind prefix and the actual
645 """Split a string into the optional pattern kind prefix and the actual
616 pattern."""
646 pattern."""
617 if ':' in pattern:
647 if ':' in pattern:
618 kind, pat = pattern.split(':', 1)
648 kind, pat = pattern.split(':', 1)
619 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
649 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
620 'listfile', 'listfile0', 'set', 'include', 'subinclude',
650 'listfile', 'listfile0', 'set', 'include', 'subinclude',
621 'rootfilesin'):
651 'rootfilesin'):
622 return kind, pat
652 return kind, pat
623 return default, pattern
653 return default, pattern
624
654
625 def _globre(pat):
655 def _globre(pat):
626 r'''Convert an extended glob string to a regexp string.
656 r'''Convert an extended glob string to a regexp string.
627
657
628 >>> print _globre(r'?')
658 >>> print _globre(r'?')
629 .
659 .
630 >>> print _globre(r'*')
660 >>> print _globre(r'*')
631 [^/]*
661 [^/]*
632 >>> print _globre(r'**')
662 >>> print _globre(r'**')
633 .*
663 .*
634 >>> print _globre(r'**/a')
664 >>> print _globre(r'**/a')
635 (?:.*/)?a
665 (?:.*/)?a
636 >>> print _globre(r'a/**/b')
666 >>> print _globre(r'a/**/b')
637 a\/(?:.*/)?b
667 a\/(?:.*/)?b
638 >>> print _globre(r'[a*?!^][^b][!c]')
668 >>> print _globre(r'[a*?!^][^b][!c]')
639 [a*?!^][\^b][^c]
669 [a*?!^][\^b][^c]
640 >>> print _globre(r'{a,b}')
670 >>> print _globre(r'{a,b}')
641 (?:a|b)
671 (?:a|b)
642 >>> print _globre(r'.\*\?')
672 >>> print _globre(r'.\*\?')
643 \.\*\?
673 \.\*\?
644 '''
674 '''
645 i, n = 0, len(pat)
675 i, n = 0, len(pat)
646 res = ''
676 res = ''
647 group = 0
677 group = 0
648 escape = util.re.escape
678 escape = util.re.escape
649 def peek():
679 def peek():
650 return i < n and pat[i:i + 1]
680 return i < n and pat[i:i + 1]
651 while i < n:
681 while i < n:
652 c = pat[i:i + 1]
682 c = pat[i:i + 1]
653 i += 1
683 i += 1
654 if c not in '*?[{},\\':
684 if c not in '*?[{},\\':
655 res += escape(c)
685 res += escape(c)
656 elif c == '*':
686 elif c == '*':
657 if peek() == '*':
687 if peek() == '*':
658 i += 1
688 i += 1
659 if peek() == '/':
689 if peek() == '/':
660 i += 1
690 i += 1
661 res += '(?:.*/)?'
691 res += '(?:.*/)?'
662 else:
692 else:
663 res += '.*'
693 res += '.*'
664 else:
694 else:
665 res += '[^/]*'
695 res += '[^/]*'
666 elif c == '?':
696 elif c == '?':
667 res += '.'
697 res += '.'
668 elif c == '[':
698 elif c == '[':
669 j = i
699 j = i
670 if j < n and pat[j:j + 1] in '!]':
700 if j < n and pat[j:j + 1] in '!]':
671 j += 1
701 j += 1
672 while j < n and pat[j:j + 1] != ']':
702 while j < n and pat[j:j + 1] != ']':
673 j += 1
703 j += 1
674 if j >= n:
704 if j >= n:
675 res += '\\['
705 res += '\\['
676 else:
706 else:
677 stuff = pat[i:j].replace('\\','\\\\')
707 stuff = pat[i:j].replace('\\','\\\\')
678 i = j + 1
708 i = j + 1
679 if stuff[0:1] == '!':
709 if stuff[0:1] == '!':
680 stuff = '^' + stuff[1:]
710 stuff = '^' + stuff[1:]
681 elif stuff[0:1] == '^':
711 elif stuff[0:1] == '^':
682 stuff = '\\' + stuff
712 stuff = '\\' + stuff
683 res = '%s[%s]' % (res, stuff)
713 res = '%s[%s]' % (res, stuff)
684 elif c == '{':
714 elif c == '{':
685 group += 1
715 group += 1
686 res += '(?:'
716 res += '(?:'
687 elif c == '}' and group:
717 elif c == '}' and group:
688 res += ')'
718 res += ')'
689 group -= 1
719 group -= 1
690 elif c == ',' and group:
720 elif c == ',' and group:
691 res += '|'
721 res += '|'
692 elif c == '\\':
722 elif c == '\\':
693 p = peek()
723 p = peek()
694 if p:
724 if p:
695 i += 1
725 i += 1
696 res += escape(p)
726 res += escape(p)
697 else:
727 else:
698 res += escape(c)
728 res += escape(c)
699 else:
729 else:
700 res += escape(c)
730 res += escape(c)
701 return res
731 return res
702
732
703 def _regex(kind, pat, globsuffix):
733 def _regex(kind, pat, globsuffix):
704 '''Convert a (normalized) pattern of any kind into a regular expression.
734 '''Convert a (normalized) pattern of any kind into a regular expression.
705 globsuffix is appended to the regexp of globs.'''
735 globsuffix is appended to the regexp of globs.'''
706 if not pat:
736 if not pat:
707 return ''
737 return ''
708 if kind == 're':
738 if kind == 're':
709 return pat
739 return pat
710 if kind == 'path':
740 if kind == 'path':
711 if pat == '.':
741 if pat == '.':
712 return ''
742 return ''
713 return '^' + util.re.escape(pat) + '(?:/|$)'
743 return '^' + util.re.escape(pat) + '(?:/|$)'
714 if kind == 'rootfilesin':
744 if kind == 'rootfilesin':
715 if pat == '.':
745 if pat == '.':
716 escaped = ''
746 escaped = ''
717 else:
747 else:
718 # Pattern is a directory name.
748 # Pattern is a directory name.
719 escaped = util.re.escape(pat) + '/'
749 escaped = util.re.escape(pat) + '/'
720 # Anything after the pattern must be a non-directory.
750 # Anything after the pattern must be a non-directory.
721 return '^' + escaped + '[^/]+$'
751 return '^' + escaped + '[^/]+$'
722 if kind == 'relglob':
752 if kind == 'relglob':
723 return '(?:|.*/)' + _globre(pat) + globsuffix
753 return '(?:|.*/)' + _globre(pat) + globsuffix
724 if kind == 'relpath':
754 if kind == 'relpath':
725 return util.re.escape(pat) + '(?:/|$)'
755 return util.re.escape(pat) + '(?:/|$)'
726 if kind == 'relre':
756 if kind == 'relre':
727 if pat.startswith('^'):
757 if pat.startswith('^'):
728 return pat
758 return pat
729 return '.*' + pat
759 return '.*' + pat
730 return _globre(pat) + globsuffix
760 return _globre(pat) + globsuffix
731
761
732 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
762 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
733 '''Return regexp string and a matcher function for kindpats.
763 '''Return regexp string and a matcher function for kindpats.
734 globsuffix is appended to the regexp of globs.'''
764 globsuffix is appended to the regexp of globs.'''
735 matchfuncs = []
765 matchfuncs = []
736
766
737 subincludes, kindpats = _expandsubinclude(kindpats, root)
767 subincludes, kindpats = _expandsubinclude(kindpats, root)
738 if subincludes:
768 if subincludes:
739 submatchers = {}
769 submatchers = {}
740 def matchsubinclude(f):
770 def matchsubinclude(f):
741 for prefix, matcherargs in subincludes:
771 for prefix, matcherargs in subincludes:
742 if f.startswith(prefix):
772 if f.startswith(prefix):
743 mf = submatchers.get(prefix)
773 mf = submatchers.get(prefix)
744 if mf is None:
774 if mf is None:
745 mf = match(*matcherargs)
775 mf = match(*matcherargs)
746 submatchers[prefix] = mf
776 submatchers[prefix] = mf
747
777
748 if mf(f[len(prefix):]):
778 if mf(f[len(prefix):]):
749 return True
779 return True
750 return False
780 return False
751 matchfuncs.append(matchsubinclude)
781 matchfuncs.append(matchsubinclude)
752
782
753 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
783 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
754 if fset:
784 if fset:
755 matchfuncs.append(fset.__contains__)
785 matchfuncs.append(fset.__contains__)
756
786
757 regex = ''
787 regex = ''
758 if kindpats:
788 if kindpats:
759 regex, mf = _buildregexmatch(kindpats, globsuffix)
789 regex, mf = _buildregexmatch(kindpats, globsuffix)
760 matchfuncs.append(mf)
790 matchfuncs.append(mf)
761
791
762 if len(matchfuncs) == 1:
792 if len(matchfuncs) == 1:
763 return regex, matchfuncs[0]
793 return regex, matchfuncs[0]
764 else:
794 else:
765 return regex, lambda f: any(mf(f) for mf in matchfuncs)
795 return regex, lambda f: any(mf(f) for mf in matchfuncs)
766
796
767 def _buildregexmatch(kindpats, globsuffix):
797 def _buildregexmatch(kindpats, globsuffix):
768 """Build a match function from a list of kinds and kindpats,
798 """Build a match function from a list of kinds and kindpats,
769 return regexp string and a matcher function."""
799 return regexp string and a matcher function."""
770 try:
800 try:
771 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
801 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
772 for (k, p, s) in kindpats])
802 for (k, p, s) in kindpats])
773 if len(regex) > 20000:
803 if len(regex) > 20000:
774 raise OverflowError
804 raise OverflowError
775 return regex, _rematcher(regex)
805 return regex, _rematcher(regex)
776 except OverflowError:
806 except OverflowError:
777 # We're using a Python with a tiny regex engine and we
807 # We're using a Python with a tiny regex engine and we
778 # made it explode, so we'll divide the pattern list in two
808 # made it explode, so we'll divide the pattern list in two
779 # until it works
809 # until it works
780 l = len(kindpats)
810 l = len(kindpats)
781 if l < 2:
811 if l < 2:
782 raise
812 raise
783 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
813 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
784 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
814 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
785 return regex, lambda s: a(s) or b(s)
815 return regex, lambda s: a(s) or b(s)
786 except re.error:
816 except re.error:
787 for k, p, s in kindpats:
817 for k, p, s in kindpats:
788 try:
818 try:
789 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
819 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
790 except re.error:
820 except re.error:
791 if s:
821 if s:
792 raise error.Abort(_("%s: invalid pattern (%s): %s") %
822 raise error.Abort(_("%s: invalid pattern (%s): %s") %
793 (s, k, p))
823 (s, k, p))
794 else:
824 else:
795 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
825 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
796 raise error.Abort(_("invalid pattern"))
826 raise error.Abort(_("invalid pattern"))
797
827
798 def _patternrootsanddirs(kindpats):
828 def _patternrootsanddirs(kindpats):
799 '''Returns roots and directories corresponding to each pattern.
829 '''Returns roots and directories corresponding to each pattern.
800
830
801 This calculates the roots and directories exactly matching the patterns and
831 This calculates the roots and directories exactly matching the patterns and
802 returns a tuple of (roots, dirs) for each. It does not return other
832 returns a tuple of (roots, dirs) for each. It does not return other
803 directories which may also need to be considered, like the parent
833 directories which may also need to be considered, like the parent
804 directories.
834 directories.
805 '''
835 '''
806 r = []
836 r = []
807 d = []
837 d = []
808 for kind, pat, source in kindpats:
838 for kind, pat, source in kindpats:
809 if kind == 'glob': # find the non-glob prefix
839 if kind == 'glob': # find the non-glob prefix
810 root = []
840 root = []
811 for p in pat.split('/'):
841 for p in pat.split('/'):
812 if '[' in p or '{' in p or '*' in p or '?' in p:
842 if '[' in p or '{' in p or '*' in p or '?' in p:
813 break
843 break
814 root.append(p)
844 root.append(p)
815 r.append('/'.join(root) or '.')
845 r.append('/'.join(root) or '.')
816 elif kind in ('relpath', 'path'):
846 elif kind in ('relpath', 'path'):
817 r.append(pat or '.')
847 r.append(pat or '.')
818 elif kind in ('rootfilesin',):
848 elif kind in ('rootfilesin',):
819 d.append(pat or '.')
849 d.append(pat or '.')
820 else: # relglob, re, relre
850 else: # relglob, re, relre
821 r.append('.')
851 r.append('.')
822 return r, d
852 return r, d
823
853
824 def _roots(kindpats):
854 def _roots(kindpats):
825 '''Returns root directories to match recursively from the given patterns.'''
855 '''Returns root directories to match recursively from the given patterns.'''
826 roots, dirs = _patternrootsanddirs(kindpats)
856 roots, dirs = _patternrootsanddirs(kindpats)
827 return roots
857 return roots
828
858
829 def _rootsanddirs(kindpats):
859 def _rootsanddirs(kindpats):
830 '''Returns roots and exact directories from patterns.
860 '''Returns roots and exact directories from patterns.
831
861
832 roots are directories to match recursively, whereas exact directories should
862 roots are directories to match recursively, whereas exact directories should
833 be matched non-recursively. The returned (roots, dirs) tuple will also
863 be matched non-recursively. The returned (roots, dirs) tuple will also
834 include directories that need to be implicitly considered as either, such as
864 include directories that need to be implicitly considered as either, such as
835 parent directories.
865 parent directories.
836
866
837 >>> _rootsanddirs(\
867 >>> _rootsanddirs(\
838 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
868 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
839 (['g/h', 'g/h', '.'], ['g', '.'])
869 (['g/h', 'g/h', '.'], ['g', '.'])
840 >>> _rootsanddirs(\
870 >>> _rootsanddirs(\
841 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
871 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
842 ([], ['g/h', '.', 'g', '.'])
872 ([], ['g/h', '.', 'g', '.'])
843 >>> _rootsanddirs(\
873 >>> _rootsanddirs(\
844 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
874 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
845 (['r', 'p/p', '.'], ['p', '.'])
875 (['r', 'p/p', '.'], ['p', '.'])
846 >>> _rootsanddirs(\
876 >>> _rootsanddirs(\
847 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
877 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
848 (['.', '.', '.'], ['.'])
878 (['.', '.', '.'], ['.'])
849 '''
879 '''
850 r, d = _patternrootsanddirs(kindpats)
880 r, d = _patternrootsanddirs(kindpats)
851
881
852 # Append the parents as non-recursive/exact directories, since they must be
882 # Append the parents as non-recursive/exact directories, since they must be
853 # scanned to get to either the roots or the other exact directories.
883 # scanned to get to either the roots or the other exact directories.
854 d.extend(util.dirs(d))
884 d.extend(util.dirs(d))
855 d.extend(util.dirs(r))
885 d.extend(util.dirs(r))
856 # util.dirs() does not include the root directory, so add it manually
886 # util.dirs() does not include the root directory, so add it manually
857 d.append('.')
887 d.append('.')
858
888
859 return r, d
889 return r, d
860
890
861 def _explicitfiles(kindpats):
891 def _explicitfiles(kindpats):
862 '''Returns the potential explicit filenames from the patterns.
892 '''Returns the potential explicit filenames from the patterns.
863
893
864 >>> _explicitfiles([('path', 'foo/bar', '')])
894 >>> _explicitfiles([('path', 'foo/bar', '')])
865 ['foo/bar']
895 ['foo/bar']
866 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
896 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
867 []
897 []
868 '''
898 '''
869 # Keep only the pattern kinds where one can specify filenames (vs only
899 # Keep only the pattern kinds where one can specify filenames (vs only
870 # directory names).
900 # directory names).
871 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
901 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
872 return _roots(filable)
902 return _roots(filable)
873
903
874 def _anypats(kindpats):
904 def _anypats(kindpats):
875 for kind, pat, source in kindpats:
905 for kind, pat, source in kindpats:
876 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
906 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
877 return True
907 return True
878
908
879 _commentre = None
909 _commentre = None
880
910
881 def readpatternfile(filepath, warn, sourceinfo=False):
911 def readpatternfile(filepath, warn, sourceinfo=False):
882 '''parse a pattern file, returning a list of
912 '''parse a pattern file, returning a list of
883 patterns. These patterns should be given to compile()
913 patterns. These patterns should be given to compile()
884 to be validated and converted into a match function.
914 to be validated and converted into a match function.
885
915
886 trailing white space is dropped.
916 trailing white space is dropped.
887 the escape character is backslash.
917 the escape character is backslash.
888 comments start with #.
918 comments start with #.
889 empty lines are skipped.
919 empty lines are skipped.
890
920
891 lines can be of the following formats:
921 lines can be of the following formats:
892
922
893 syntax: regexp # defaults following lines to non-rooted regexps
923 syntax: regexp # defaults following lines to non-rooted regexps
894 syntax: glob # defaults following lines to non-rooted globs
924 syntax: glob # defaults following lines to non-rooted globs
895 re:pattern # non-rooted regular expression
925 re:pattern # non-rooted regular expression
896 glob:pattern # non-rooted glob
926 glob:pattern # non-rooted glob
897 pattern # pattern of the current default type
927 pattern # pattern of the current default type
898
928
899 if sourceinfo is set, returns a list of tuples:
929 if sourceinfo is set, returns a list of tuples:
900 (pattern, lineno, originalline). This is useful to debug ignore patterns.
930 (pattern, lineno, originalline). This is useful to debug ignore patterns.
901 '''
931 '''
902
932
903 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
933 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
904 'include': 'include', 'subinclude': 'subinclude'}
934 'include': 'include', 'subinclude': 'subinclude'}
905 syntax = 'relre:'
935 syntax = 'relre:'
906 patterns = []
936 patterns = []
907
937
908 fp = open(filepath, 'rb')
938 fp = open(filepath, 'rb')
909 for lineno, line in enumerate(util.iterfile(fp), start=1):
939 for lineno, line in enumerate(util.iterfile(fp), start=1):
910 if "#" in line:
940 if "#" in line:
911 global _commentre
941 global _commentre
912 if not _commentre:
942 if not _commentre:
913 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
943 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
914 # remove comments prefixed by an even number of escapes
944 # remove comments prefixed by an even number of escapes
915 m = _commentre.search(line)
945 m = _commentre.search(line)
916 if m:
946 if m:
917 line = line[:m.end(1)]
947 line = line[:m.end(1)]
918 # fixup properly escaped comments that survived the above
948 # fixup properly escaped comments that survived the above
919 line = line.replace("\\#", "#")
949 line = line.replace("\\#", "#")
920 line = line.rstrip()
950 line = line.rstrip()
921 if not line:
951 if not line:
922 continue
952 continue
923
953
924 if line.startswith('syntax:'):
954 if line.startswith('syntax:'):
925 s = line[7:].strip()
955 s = line[7:].strip()
926 try:
956 try:
927 syntax = syntaxes[s]
957 syntax = syntaxes[s]
928 except KeyError:
958 except KeyError:
929 if warn:
959 if warn:
930 warn(_("%s: ignoring invalid syntax '%s'\n") %
960 warn(_("%s: ignoring invalid syntax '%s'\n") %
931 (filepath, s))
961 (filepath, s))
932 continue
962 continue
933
963
934 linesyntax = syntax
964 linesyntax = syntax
935 for s, rels in syntaxes.iteritems():
965 for s, rels in syntaxes.iteritems():
936 if line.startswith(rels):
966 if line.startswith(rels):
937 linesyntax = rels
967 linesyntax = rels
938 line = line[len(rels):]
968 line = line[len(rels):]
939 break
969 break
940 elif line.startswith(s+':'):
970 elif line.startswith(s+':'):
941 linesyntax = rels
971 linesyntax = rels
942 line = line[len(s) + 1:]
972 line = line[len(s) + 1:]
943 break
973 break
944 if sourceinfo:
974 if sourceinfo:
945 patterns.append((linesyntax + line, lineno, line))
975 patterns.append((linesyntax + line, lineno, line))
946 else:
976 else:
947 patterns.append(linesyntax + line)
977 patterns.append(linesyntax + line)
948 fp.close()
978 fp.close()
949 return patterns
979 return patterns
General Comments 0
You need to be logged in to leave comments. Login now