##// END OF EJS Templates
match: let regex match function return a boolean...
Denis Laxalde -
r42256:2e2699af default
parent child Browse files
Show More
@@ -1,1478 +1,1479 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 'rootglob',
28 'rootglob',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 'rootfilesin')
30 'rootfilesin')
31 cwdrelativepatternkinds = ('relpath', 'glob')
31 cwdrelativepatternkinds = ('relpath', 'glob')
32
32
33 propertycache = util.propertycache
33 propertycache = util.propertycache
34
34
35 def _rematcher(regex):
35 def _rematcher(regex):
36 '''compile the regexp with the best available regexp engine and return a
36 '''compile the regexp with the best available regexp engine and return a
37 matcher function'''
37 matcher function'''
38 m = util.re.compile(regex)
38 m = util.re.compile(regex)
39 try:
39 try:
40 # slightly faster, provided by facebook's re2 bindings
40 # slightly faster, provided by facebook's re2 bindings
41 return m.test_match
41 return m.test_match
42 except AttributeError:
42 except AttributeError:
43 return m.match
43 return m.match
44
44
45 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
45 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 matchers = []
47 matchers = []
48 other = []
48 other = []
49
49
50 for kind, pat, source in kindpats:
50 for kind, pat, source in kindpats:
51 if kind == 'set':
51 if kind == 'set':
52 if ctx is None:
52 if ctx is None:
53 raise error.ProgrammingError("fileset expression with no "
53 raise error.ProgrammingError("fileset expression with no "
54 "context")
54 "context")
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56
56
57 if listsubrepos:
57 if listsubrepos:
58 for subpath in ctx.substate:
58 for subpath in ctx.substate:
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 matchers.append(pm)
61 matchers.append(pm)
62
62
63 continue
63 continue
64 other.append((kind, pat, source))
64 other.append((kind, pat, source))
65 return matchers, other
65 return matchers, other
66
66
67 def _expandsubinclude(kindpats, root):
67 def _expandsubinclude(kindpats, root):
68 '''Returns the list of subinclude matcher args and the kindpats without the
68 '''Returns the list of subinclude matcher args and the kindpats without the
69 subincludes in it.'''
69 subincludes in it.'''
70 relmatchers = []
70 relmatchers = []
71 other = []
71 other = []
72
72
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if kind == 'subinclude':
74 if kind == 'subinclude':
75 sourceroot = pathutil.dirname(util.normpath(source))
75 sourceroot = pathutil.dirname(util.normpath(source))
76 pat = util.pconvert(pat)
76 pat = util.pconvert(pat)
77 path = pathutil.join(sourceroot, pat)
77 path = pathutil.join(sourceroot, pat)
78
78
79 newroot = pathutil.dirname(path)
79 newroot = pathutil.dirname(path)
80 matcherargs = (newroot, '', [], ['include:%s' % path])
80 matcherargs = (newroot, '', [], ['include:%s' % path])
81
81
82 prefix = pathutil.canonpath(root, root, newroot)
82 prefix = pathutil.canonpath(root, root, newroot)
83 if prefix:
83 if prefix:
84 prefix += '/'
84 prefix += '/'
85 relmatchers.append((prefix, matcherargs))
85 relmatchers.append((prefix, matcherargs))
86 else:
86 else:
87 other.append((kind, pat, source))
87 other.append((kind, pat, source))
88
88
89 return relmatchers, other
89 return relmatchers, other
90
90
91 def _kindpatsalwaysmatch(kindpats):
91 def _kindpatsalwaysmatch(kindpats):
92 """"Checks whether the kindspats match everything, as e.g.
92 """"Checks whether the kindspats match everything, as e.g.
93 'relpath:.' does.
93 'relpath:.' does.
94 """
94 """
95 for kind, pat, source in kindpats:
95 for kind, pat, source in kindpats:
96 if pat != '' or kind not in ['relpath', 'glob']:
96 if pat != '' or kind not in ['relpath', 'glob']:
97 return False
97 return False
98 return True
98 return True
99
99
100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 listsubrepos=False, badfn=None):
101 listsubrepos=False, badfn=None):
102 matchers = []
102 matchers = []
103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 listsubrepos=listsubrepos, badfn=badfn)
104 listsubrepos=listsubrepos, badfn=badfn)
105 if kindpats:
105 if kindpats:
106 m = matchercls(root, kindpats, badfn=badfn)
106 m = matchercls(root, kindpats, badfn=badfn)
107 matchers.append(m)
107 matchers.append(m)
108 if fms:
108 if fms:
109 matchers.extend(fms)
109 matchers.extend(fms)
110 if not matchers:
110 if not matchers:
111 return nevermatcher(badfn=badfn)
111 return nevermatcher(badfn=badfn)
112 if len(matchers) == 1:
112 if len(matchers) == 1:
113 return matchers[0]
113 return matchers[0]
114 return unionmatcher(matchers)
114 return unionmatcher(matchers)
115
115
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 badfn=None, icasefs=False):
118 badfn=None, icasefs=False):
119 r"""build an object to match a set of file patterns
119 r"""build an object to match a set of file patterns
120
120
121 arguments:
121 arguments:
122 root - the canonical root of the tree you're matching against
122 root - the canonical root of the tree you're matching against
123 cwd - the current working directory, if relevant
123 cwd - the current working directory, if relevant
124 patterns - patterns to find
124 patterns - patterns to find
125 include - patterns to include (unless they are excluded)
125 include - patterns to include (unless they are excluded)
126 exclude - patterns to exclude (even if they are included)
126 exclude - patterns to exclude (even if they are included)
127 default - if a pattern in patterns has no explicit type, assume this one
127 default - if a pattern in patterns has no explicit type, assume this one
128 auditor - optional path auditor
128 auditor - optional path auditor
129 ctx - optional changecontext
129 ctx - optional changecontext
130 listsubrepos - if True, recurse into subrepositories
130 listsubrepos - if True, recurse into subrepositories
131 warn - optional function used for printing warnings
131 warn - optional function used for printing warnings
132 badfn - optional bad() callback for this matcher instead of the default
132 badfn - optional bad() callback for this matcher instead of the default
133 icasefs - make a matcher for wdir on case insensitive filesystems, which
133 icasefs - make a matcher for wdir on case insensitive filesystems, which
134 normalizes the given patterns to the case in the filesystem
134 normalizes the given patterns to the case in the filesystem
135
135
136 a pattern is one of:
136 a pattern is one of:
137 'glob:<glob>' - a glob relative to cwd
137 'glob:<glob>' - a glob relative to cwd
138 're:<regexp>' - a regular expression
138 're:<regexp>' - a regular expression
139 'path:<path>' - a path relative to repository root, which is matched
139 'path:<path>' - a path relative to repository root, which is matched
140 recursively
140 recursively
141 'rootfilesin:<path>' - a path relative to repository root, which is
141 'rootfilesin:<path>' - a path relative to repository root, which is
142 matched non-recursively (will not match subdirectories)
142 matched non-recursively (will not match subdirectories)
143 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
143 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
144 'relpath:<path>' - a path relative to cwd
144 'relpath:<path>' - a path relative to cwd
145 'relre:<regexp>' - a regexp that needn't match the start of a name
145 'relre:<regexp>' - a regexp that needn't match the start of a name
146 'set:<fileset>' - a fileset expression
146 'set:<fileset>' - a fileset expression
147 'include:<path>' - a file of patterns to read and include
147 'include:<path>' - a file of patterns to read and include
148 'subinclude:<path>' - a file of patterns to match against files under
148 'subinclude:<path>' - a file of patterns to match against files under
149 the same directory
149 the same directory
150 '<something>' - a pattern of the specified default type
150 '<something>' - a pattern of the specified default type
151
151
152 Usually a patternmatcher is returned:
152 Usually a patternmatcher is returned:
153 >>> match('foo', '.', ['re:.*\.c$', 'path:foo/a', '*.py'])
153 >>> match('foo', '.', ['re:.*\.c$', 'path:foo/a', '*.py'])
154 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
154 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
155
155
156 Combining 'patterns' with 'include' (resp. 'exclude') gives an
156 Combining 'patterns' with 'include' (resp. 'exclude') gives an
157 intersectionmatcher (resp. a differencematcher):
157 intersectionmatcher (resp. a differencematcher):
158 >>> type(match('foo', '.', ['re:.*\.c$'], include=['path:lib']))
158 >>> type(match('foo', '.', ['re:.*\.c$'], include=['path:lib']))
159 <class 'mercurial.match.intersectionmatcher'>
159 <class 'mercurial.match.intersectionmatcher'>
160 >>> type(match('foo', '.', ['re:.*\.c$'], exclude=['path:build']))
160 >>> type(match('foo', '.', ['re:.*\.c$'], exclude=['path:build']))
161 <class 'mercurial.match.differencematcher'>
161 <class 'mercurial.match.differencematcher'>
162
162
163 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
163 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
164 >>> match('foo', '.', [])
164 >>> match('foo', '.', [])
165 <alwaysmatcher>
165 <alwaysmatcher>
166
166
167 The 'default' argument determines which kind of pattern is assumed if a
167 The 'default' argument determines which kind of pattern is assumed if a
168 pattern has no prefix:
168 pattern has no prefix:
169 >>> match('foo', '.', ['.*\.c$'], default='re')
169 >>> match('foo', '.', ['.*\.c$'], default='re')
170 <patternmatcher patterns='.*\\.c$'>
170 <patternmatcher patterns='.*\\.c$'>
171 >>> match('foo', '.', ['main.py'], default='relpath')
171 >>> match('foo', '.', ['main.py'], default='relpath')
172 <patternmatcher patterns='main\\.py(?:/|$)'>
172 <patternmatcher patterns='main\\.py(?:/|$)'>
173 >>> match('foo', '.', ['main.py'], default='re')
173 >>> match('foo', '.', ['main.py'], default='re')
174 <patternmatcher patterns='main.py'>
174 <patternmatcher patterns='main.py'>
175
175
176 The primary use of matchers is to check whether a value (usually a file
176 The primary use of matchers is to check whether a value (usually a file
177 name) matches againset one of the patterns given at initialization. There
177 name) matches againset one of the patterns given at initialization. There
178 are two ways of doing this check.
178 are two ways of doing this check.
179
179
180 >>> m = match('foo', '', ['re:.*\.c$', 'relpath:a'])
180 >>> m = match('foo', '', ['re:.*\.c$', 'relpath:a'])
181
181
182 1. Calling the matcher with a file name returns True if any pattern
182 1. Calling the matcher with a file name returns True if any pattern
183 matches that file name:
183 matches that file name:
184 >>> bool(m('a'))
184 >>> m('a')
185 True
185 True
186 >>> bool(m('main.c'))
186 >>> m('main.c')
187 True
187 True
188 >>> bool(m('test.py'))
188 >>> m('test.py')
189 False
189 False
190
190
191 2. Using the exact() method only returns True if the file name matches one
191 2. Using the exact() method only returns True if the file name matches one
192 of the exact patterns (i.e. not re: or glob: patterns):
192 of the exact patterns (i.e. not re: or glob: patterns):
193 >>> m.exact('a')
193 >>> m.exact('a')
194 True
194 True
195 >>> m.exact('main.c')
195 >>> m.exact('main.c')
196 False
196 False
197 """
197 """
198 normalize = _donormalize
198 normalize = _donormalize
199 if icasefs:
199 if icasefs:
200 dirstate = ctx.repo().dirstate
200 dirstate = ctx.repo().dirstate
201 dsnormalize = dirstate.normalize
201 dsnormalize = dirstate.normalize
202
202
203 def normalize(patterns, default, root, cwd, auditor, warn):
203 def normalize(patterns, default, root, cwd, auditor, warn):
204 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
204 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
205 kindpats = []
205 kindpats = []
206 for kind, pats, source in kp:
206 for kind, pats, source in kp:
207 if kind not in ('re', 'relre'): # regex can't be normalized
207 if kind not in ('re', 'relre'): # regex can't be normalized
208 p = pats
208 p = pats
209 pats = dsnormalize(pats)
209 pats = dsnormalize(pats)
210
210
211 # Preserve the original to handle a case only rename.
211 # Preserve the original to handle a case only rename.
212 if p != pats and p in dirstate:
212 if p != pats and p in dirstate:
213 kindpats.append((kind, p, source))
213 kindpats.append((kind, p, source))
214
214
215 kindpats.append((kind, pats, source))
215 kindpats.append((kind, pats, source))
216 return kindpats
216 return kindpats
217
217
218 if patterns:
218 if patterns:
219 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
219 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
220 if _kindpatsalwaysmatch(kindpats):
220 if _kindpatsalwaysmatch(kindpats):
221 m = alwaysmatcher(badfn)
221 m = alwaysmatcher(badfn)
222 else:
222 else:
223 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
223 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
224 listsubrepos=listsubrepos, badfn=badfn)
224 listsubrepos=listsubrepos, badfn=badfn)
225 else:
225 else:
226 # It's a little strange that no patterns means to match everything.
226 # It's a little strange that no patterns means to match everything.
227 # Consider changing this to match nothing (probably using nevermatcher).
227 # Consider changing this to match nothing (probably using nevermatcher).
228 m = alwaysmatcher(badfn)
228 m = alwaysmatcher(badfn)
229
229
230 if include:
230 if include:
231 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
231 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
232 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
232 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
233 listsubrepos=listsubrepos, badfn=None)
233 listsubrepos=listsubrepos, badfn=None)
234 m = intersectmatchers(m, im)
234 m = intersectmatchers(m, im)
235 if exclude:
235 if exclude:
236 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
236 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
237 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
237 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
238 listsubrepos=listsubrepos, badfn=None)
238 listsubrepos=listsubrepos, badfn=None)
239 m = differencematcher(m, em)
239 m = differencematcher(m, em)
240 return m
240 return m
241
241
242 def exact(files, badfn=None):
242 def exact(files, badfn=None):
243 return exactmatcher(files, badfn=badfn)
243 return exactmatcher(files, badfn=badfn)
244
244
245 def always(badfn=None):
245 def always(badfn=None):
246 return alwaysmatcher(badfn)
246 return alwaysmatcher(badfn)
247
247
248 def never(badfn=None):
248 def never(badfn=None):
249 return nevermatcher(badfn)
249 return nevermatcher(badfn)
250
250
251 def badmatch(match, badfn):
251 def badmatch(match, badfn):
252 """Make a copy of the given matcher, replacing its bad method with the given
252 """Make a copy of the given matcher, replacing its bad method with the given
253 one.
253 one.
254 """
254 """
255 m = copy.copy(match)
255 m = copy.copy(match)
256 m.bad = badfn
256 m.bad = badfn
257 return m
257 return m
258
258
259 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
259 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
260 '''Convert 'kind:pat' from the patterns list to tuples with kind and
260 '''Convert 'kind:pat' from the patterns list to tuples with kind and
261 normalized and rooted patterns and with listfiles expanded.'''
261 normalized and rooted patterns and with listfiles expanded.'''
262 kindpats = []
262 kindpats = []
263 for kind, pat in [_patsplit(p, default) for p in patterns]:
263 for kind, pat in [_patsplit(p, default) for p in patterns]:
264 if kind in cwdrelativepatternkinds:
264 if kind in cwdrelativepatternkinds:
265 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
265 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
266 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
266 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
267 pat = util.normpath(pat)
267 pat = util.normpath(pat)
268 elif kind in ('listfile', 'listfile0'):
268 elif kind in ('listfile', 'listfile0'):
269 try:
269 try:
270 files = util.readfile(pat)
270 files = util.readfile(pat)
271 if kind == 'listfile0':
271 if kind == 'listfile0':
272 files = files.split('\0')
272 files = files.split('\0')
273 else:
273 else:
274 files = files.splitlines()
274 files = files.splitlines()
275 files = [f for f in files if f]
275 files = [f for f in files if f]
276 except EnvironmentError:
276 except EnvironmentError:
277 raise error.Abort(_("unable to read file list (%s)") % pat)
277 raise error.Abort(_("unable to read file list (%s)") % pat)
278 for k, p, source in _donormalize(files, default, root, cwd,
278 for k, p, source in _donormalize(files, default, root, cwd,
279 auditor, warn):
279 auditor, warn):
280 kindpats.append((k, p, pat))
280 kindpats.append((k, p, pat))
281 continue
281 continue
282 elif kind == 'include':
282 elif kind == 'include':
283 try:
283 try:
284 fullpath = os.path.join(root, util.localpath(pat))
284 fullpath = os.path.join(root, util.localpath(pat))
285 includepats = readpatternfile(fullpath, warn)
285 includepats = readpatternfile(fullpath, warn)
286 for k, p, source in _donormalize(includepats, default,
286 for k, p, source in _donormalize(includepats, default,
287 root, cwd, auditor, warn):
287 root, cwd, auditor, warn):
288 kindpats.append((k, p, source or pat))
288 kindpats.append((k, p, source or pat))
289 except error.Abort as inst:
289 except error.Abort as inst:
290 raise error.Abort('%s: %s' % (pat, inst[0]))
290 raise error.Abort('%s: %s' % (pat, inst[0]))
291 except IOError as inst:
291 except IOError as inst:
292 if warn:
292 if warn:
293 warn(_("skipping unreadable pattern file '%s': %s\n") %
293 warn(_("skipping unreadable pattern file '%s': %s\n") %
294 (pat, stringutil.forcebytestr(inst.strerror)))
294 (pat, stringutil.forcebytestr(inst.strerror)))
295 continue
295 continue
296 # else: re or relre - which cannot be normalized
296 # else: re or relre - which cannot be normalized
297 kindpats.append((kind, pat, ''))
297 kindpats.append((kind, pat, ''))
298 return kindpats
298 return kindpats
299
299
300 class basematcher(object):
300 class basematcher(object):
301
301
302 def __init__(self, badfn=None):
302 def __init__(self, badfn=None):
303 if badfn is not None:
303 if badfn is not None:
304 self.bad = badfn
304 self.bad = badfn
305
305
306 def __call__(self, fn):
306 def __call__(self, fn):
307 return self.matchfn(fn)
307 return self.matchfn(fn)
308 def __iter__(self):
308 def __iter__(self):
309 for f in self._files:
309 for f in self._files:
310 yield f
310 yield f
311 # Callbacks related to how the matcher is used by dirstate.walk.
311 # Callbacks related to how the matcher is used by dirstate.walk.
312 # Subscribers to these events must monkeypatch the matcher object.
312 # Subscribers to these events must monkeypatch the matcher object.
313 def bad(self, f, msg):
313 def bad(self, f, msg):
314 '''Callback from dirstate.walk for each explicit file that can't be
314 '''Callback from dirstate.walk for each explicit file that can't be
315 found/accessed, with an error message.'''
315 found/accessed, with an error message.'''
316
316
317 # If an explicitdir is set, it will be called when an explicitly listed
317 # If an explicitdir is set, it will be called when an explicitly listed
318 # directory is visited.
318 # directory is visited.
319 explicitdir = None
319 explicitdir = None
320
320
321 # If an traversedir is set, it will be called when a directory discovered
321 # If an traversedir is set, it will be called when a directory discovered
322 # by recursive traversal is visited.
322 # by recursive traversal is visited.
323 traversedir = None
323 traversedir = None
324
324
325 @propertycache
325 @propertycache
326 def _files(self):
326 def _files(self):
327 return []
327 return []
328
328
329 def files(self):
329 def files(self):
330 '''Explicitly listed files or patterns or roots:
330 '''Explicitly listed files or patterns or roots:
331 if no patterns or .always(): empty list,
331 if no patterns or .always(): empty list,
332 if exact: list exact files,
332 if exact: list exact files,
333 if not .anypats(): list all files and dirs,
333 if not .anypats(): list all files and dirs,
334 else: optimal roots'''
334 else: optimal roots'''
335 return self._files
335 return self._files
336
336
337 @propertycache
337 @propertycache
338 def _fileset(self):
338 def _fileset(self):
339 return set(self._files)
339 return set(self._files)
340
340
341 def exact(self, f):
341 def exact(self, f):
342 '''Returns True if f is in .files().'''
342 '''Returns True if f is in .files().'''
343 return f in self._fileset
343 return f in self._fileset
344
344
345 def matchfn(self, f):
345 def matchfn(self, f):
346 return False
346 return False
347
347
348 def visitdir(self, dir):
348 def visitdir(self, dir):
349 '''Decides whether a directory should be visited based on whether it
349 '''Decides whether a directory should be visited based on whether it
350 has potential matches in it or one of its subdirectories. This is
350 has potential matches in it or one of its subdirectories. This is
351 based on the match's primary, included, and excluded patterns.
351 based on the match's primary, included, and excluded patterns.
352
352
353 Returns the string 'all' if the given directory and all subdirectories
353 Returns the string 'all' if the given directory and all subdirectories
354 should be visited. Otherwise returns True or False indicating whether
354 should be visited. Otherwise returns True or False indicating whether
355 the given directory should be visited.
355 the given directory should be visited.
356 '''
356 '''
357 return True
357 return True
358
358
359 def visitchildrenset(self, dir):
359 def visitchildrenset(self, dir):
360 '''Decides whether a directory should be visited based on whether it
360 '''Decides whether a directory should be visited based on whether it
361 has potential matches in it or one of its subdirectories, and
361 has potential matches in it or one of its subdirectories, and
362 potentially lists which subdirectories of that directory should be
362 potentially lists which subdirectories of that directory should be
363 visited. This is based on the match's primary, included, and excluded
363 visited. This is based on the match's primary, included, and excluded
364 patterns.
364 patterns.
365
365
366 This function is very similar to 'visitdir', and the following mapping
366 This function is very similar to 'visitdir', and the following mapping
367 can be applied:
367 can be applied:
368
368
369 visitdir | visitchildrenlist
369 visitdir | visitchildrenlist
370 ----------+-------------------
370 ----------+-------------------
371 False | set()
371 False | set()
372 'all' | 'all'
372 'all' | 'all'
373 True | 'this' OR non-empty set of subdirs -or files- to visit
373 True | 'this' OR non-empty set of subdirs -or files- to visit
374
374
375 Example:
375 Example:
376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
377 the following values (assuming the implementation of visitchildrenset
377 the following values (assuming the implementation of visitchildrenset
378 is capable of recognizing this; some implementations are not).
378 is capable of recognizing this; some implementations are not).
379
379
380 '.' -> {'foo', 'qux'}
380 '.' -> {'foo', 'qux'}
381 'baz' -> set()
381 'baz' -> set()
382 'foo' -> {'bar'}
382 'foo' -> {'bar'}
383 # Ideally this would be 'all', but since the prefix nature of matchers
383 # Ideally this would be 'all', but since the prefix nature of matchers
384 # is applied to the entire matcher, we have to downgrade this to
384 # is applied to the entire matcher, we have to downgrade this to
385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
386 # in.
386 # in.
387 'foo/bar' -> 'this'
387 'foo/bar' -> 'this'
388 'qux' -> 'this'
388 'qux' -> 'this'
389
389
390 Important:
390 Important:
391 Most matchers do not know if they're representing files or
391 Most matchers do not know if they're representing files or
392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
393 file or a directory, so visitchildrenset('dir') for most matchers will
393 file or a directory, so visitchildrenset('dir') for most matchers will
394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
395 does), it may return 'this'. Do not rely on the return being a set
395 does), it may return 'this'. Do not rely on the return being a set
396 indicating that there are no files in this dir to investigate (or
396 indicating that there are no files in this dir to investigate (or
397 equivalently that if there are files to investigate in 'dir' that it
397 equivalently that if there are files to investigate in 'dir' that it
398 will always return 'this').
398 will always return 'this').
399 '''
399 '''
400 return 'this'
400 return 'this'
401
401
402 def always(self):
402 def always(self):
403 '''Matcher will match everything and .files() will be empty --
403 '''Matcher will match everything and .files() will be empty --
404 optimization might be possible.'''
404 optimization might be possible.'''
405 return False
405 return False
406
406
407 def isexact(self):
407 def isexact(self):
408 '''Matcher will match exactly the list of files in .files() --
408 '''Matcher will match exactly the list of files in .files() --
409 optimization might be possible.'''
409 optimization might be possible.'''
410 return False
410 return False
411
411
412 def prefix(self):
412 def prefix(self):
413 '''Matcher will match the paths in .files() recursively --
413 '''Matcher will match the paths in .files() recursively --
414 optimization might be possible.'''
414 optimization might be possible.'''
415 return False
415 return False
416
416
417 def anypats(self):
417 def anypats(self):
418 '''None of .always(), .isexact(), and .prefix() is true --
418 '''None of .always(), .isexact(), and .prefix() is true --
419 optimizations will be difficult.'''
419 optimizations will be difficult.'''
420 return not self.always() and not self.isexact() and not self.prefix()
420 return not self.always() and not self.isexact() and not self.prefix()
421
421
422 class alwaysmatcher(basematcher):
422 class alwaysmatcher(basematcher):
423 '''Matches everything.'''
423 '''Matches everything.'''
424
424
425 def __init__(self, badfn=None):
425 def __init__(self, badfn=None):
426 super(alwaysmatcher, self).__init__(badfn)
426 super(alwaysmatcher, self).__init__(badfn)
427
427
428 def always(self):
428 def always(self):
429 return True
429 return True
430
430
431 def matchfn(self, f):
431 def matchfn(self, f):
432 return True
432 return True
433
433
434 def visitdir(self, dir):
434 def visitdir(self, dir):
435 return 'all'
435 return 'all'
436
436
437 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
438 return 'all'
438 return 'all'
439
439
440 def __repr__(self):
440 def __repr__(self):
441 return r'<alwaysmatcher>'
441 return r'<alwaysmatcher>'
442
442
443 class nevermatcher(basematcher):
443 class nevermatcher(basematcher):
444 '''Matches nothing.'''
444 '''Matches nothing.'''
445
445
446 def __init__(self, badfn=None):
446 def __init__(self, badfn=None):
447 super(nevermatcher, self).__init__(badfn)
447 super(nevermatcher, self).__init__(badfn)
448
448
449 # It's a little weird to say that the nevermatcher is an exact matcher
449 # It's a little weird to say that the nevermatcher is an exact matcher
450 # or a prefix matcher, but it seems to make sense to let callers take
450 # or a prefix matcher, but it seems to make sense to let callers take
451 # fast paths based on either. There will be no exact matches, nor any
451 # fast paths based on either. There will be no exact matches, nor any
452 # prefixes (files() returns []), so fast paths iterating over them should
452 # prefixes (files() returns []), so fast paths iterating over them should
453 # be efficient (and correct).
453 # be efficient (and correct).
454 def isexact(self):
454 def isexact(self):
455 return True
455 return True
456
456
457 def prefix(self):
457 def prefix(self):
458 return True
458 return True
459
459
460 def visitdir(self, dir):
460 def visitdir(self, dir):
461 return False
461 return False
462
462
463 def visitchildrenset(self, dir):
463 def visitchildrenset(self, dir):
464 return set()
464 return set()
465
465
466 def __repr__(self):
466 def __repr__(self):
467 return r'<nevermatcher>'
467 return r'<nevermatcher>'
468
468
469 class predicatematcher(basematcher):
469 class predicatematcher(basematcher):
470 """A matcher adapter for a simple boolean function"""
470 """A matcher adapter for a simple boolean function"""
471
471
472 def __init__(self, predfn, predrepr=None, badfn=None):
472 def __init__(self, predfn, predrepr=None, badfn=None):
473 super(predicatematcher, self).__init__(badfn)
473 super(predicatematcher, self).__init__(badfn)
474 self.matchfn = predfn
474 self.matchfn = predfn
475 self._predrepr = predrepr
475 self._predrepr = predrepr
476
476
477 @encoding.strmethod
477 @encoding.strmethod
478 def __repr__(self):
478 def __repr__(self):
479 s = (stringutil.buildrepr(self._predrepr)
479 s = (stringutil.buildrepr(self._predrepr)
480 or pycompat.byterepr(self.matchfn))
480 or pycompat.byterepr(self.matchfn))
481 return '<predicatenmatcher pred=%s>' % s
481 return '<predicatenmatcher pred=%s>' % s
482
482
483 class patternmatcher(basematcher):
483 class patternmatcher(basematcher):
484 """Matches a set of (kind, pat, source) against a 'root' directory.
484 """Matches a set of (kind, pat, source) against a 'root' directory.
485
485
486 >>> kindpats = [
486 >>> kindpats = [
487 ... ('re', '.*\.c$', ''),
487 ... ('re', '.*\.c$', ''),
488 ... ('path', 'foo/a', ''),
488 ... ('path', 'foo/a', ''),
489 ... ('relpath', 'b', ''),
489 ... ('relpath', 'b', ''),
490 ... ('glob', '*.h', ''),
490 ... ('glob', '*.h', ''),
491 ... ]
491 ... ]
492 >>> m = patternmatcher('foo', kindpats)
492 >>> m = patternmatcher('foo', kindpats)
493 >>> bool(m('main.c')) # matches re:.*\.c$
493 >>> m('main.c') # matches re:.*\.c$
494 True
494 True
495 >>> bool(m('b.txt'))
495 >>> m('b.txt')
496 False
496 False
497 >>> bool(m('foo/a')) # matches path:foo/a
497 >>> m('foo/a') # matches path:foo/a
498 True
498 True
499 >>> bool(m('a')) # does not match path:b, since 'root' is 'foo'
499 >>> m('a') # does not match path:b, since 'root' is 'foo'
500 False
500 False
501 >>> bool(m('b')) # matches relpath:b, since 'root' is 'foo'
501 >>> m('b') # matches relpath:b, since 'root' is 'foo'
502 True
502 True
503 >>> bool(m('lib.h')) # matches glob:*.h
503 >>> m('lib.h') # matches glob:*.h
504 True
504 True
505
505
506 >>> m.files()
506 >>> m.files()
507 ['.', 'foo/a', 'b', '.']
507 ['.', 'foo/a', 'b', '.']
508 >>> m.exact('foo/a')
508 >>> m.exact('foo/a')
509 True
509 True
510 >>> m.exact('b')
510 >>> m.exact('b')
511 True
511 True
512 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
512 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
513 False
513 False
514 """
514 """
515
515
516 def __init__(self, root, kindpats, badfn=None):
516 def __init__(self, root, kindpats, badfn=None):
517 super(patternmatcher, self).__init__(badfn)
517 super(patternmatcher, self).__init__(badfn)
518
518
519 self._files = _explicitfiles(kindpats)
519 self._files = _explicitfiles(kindpats)
520 self._prefix = _prefix(kindpats)
520 self._prefix = _prefix(kindpats)
521 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
521 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
522
522
523 @propertycache
523 @propertycache
524 def _dirs(self):
524 def _dirs(self):
525 return set(util.dirs(self._fileset)) | {'.'}
525 return set(util.dirs(self._fileset)) | {'.'}
526
526
527 def visitdir(self, dir):
527 def visitdir(self, dir):
528 if self._prefix and dir in self._fileset:
528 if self._prefix and dir in self._fileset:
529 return 'all'
529 return 'all'
530 return ('.' in self._fileset or
530 return ('.' in self._fileset or
531 dir in self._fileset or
531 dir in self._fileset or
532 dir in self._dirs or
532 dir in self._dirs or
533 any(parentdir in self._fileset
533 any(parentdir in self._fileset
534 for parentdir in util.finddirs(dir)))
534 for parentdir in util.finddirs(dir)))
535
535
536 def visitchildrenset(self, dir):
536 def visitchildrenset(self, dir):
537 ret = self.visitdir(dir)
537 ret = self.visitdir(dir)
538 if ret is True:
538 if ret is True:
539 return 'this'
539 return 'this'
540 elif not ret:
540 elif not ret:
541 return set()
541 return set()
542 assert ret == 'all'
542 assert ret == 'all'
543 return 'all'
543 return 'all'
544
544
545 def prefix(self):
545 def prefix(self):
546 return self._prefix
546 return self._prefix
547
547
548 @encoding.strmethod
548 @encoding.strmethod
549 def __repr__(self):
549 def __repr__(self):
550 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
550 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
551
551
552 # This is basically a reimplementation of util.dirs that stores the children
552 # This is basically a reimplementation of util.dirs that stores the children
553 # instead of just a count of them, plus a small optional optimization to avoid
553 # instead of just a count of them, plus a small optional optimization to avoid
554 # some directories we don't need.
554 # some directories we don't need.
555 class _dirchildren(object):
555 class _dirchildren(object):
556 def __init__(self, paths, onlyinclude=None):
556 def __init__(self, paths, onlyinclude=None):
557 self._dirs = {}
557 self._dirs = {}
558 self._onlyinclude = onlyinclude or []
558 self._onlyinclude = onlyinclude or []
559 addpath = self.addpath
559 addpath = self.addpath
560 for f in paths:
560 for f in paths:
561 addpath(f)
561 addpath(f)
562
562
563 def addpath(self, path):
563 def addpath(self, path):
564 if path == '.':
564 if path == '.':
565 return
565 return
566 dirs = self._dirs
566 dirs = self._dirs
567 findsplitdirs = _dirchildren._findsplitdirs
567 findsplitdirs = _dirchildren._findsplitdirs
568 for d, b in findsplitdirs(path):
568 for d, b in findsplitdirs(path):
569 if d not in self._onlyinclude:
569 if d not in self._onlyinclude:
570 continue
570 continue
571 dirs.setdefault(d, set()).add(b)
571 dirs.setdefault(d, set()).add(b)
572
572
573 @staticmethod
573 @staticmethod
574 def _findsplitdirs(path):
574 def _findsplitdirs(path):
575 # yields (dirname, basename) tuples, walking back to the root. This is
575 # yields (dirname, basename) tuples, walking back to the root. This is
576 # very similar to util.finddirs, except:
576 # very similar to util.finddirs, except:
577 # - produces a (dirname, basename) tuple, not just 'dirname'
577 # - produces a (dirname, basename) tuple, not just 'dirname'
578 # - includes root dir
578 # - includes root dir
579 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
579 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
580 # slash, and produces '.' for the root instead of ''.
580 # slash, and produces '.' for the root instead of ''.
581 oldpos = len(path)
581 oldpos = len(path)
582 pos = path.rfind('/')
582 pos = path.rfind('/')
583 while pos != -1:
583 while pos != -1:
584 yield path[:pos], path[pos + 1:oldpos]
584 yield path[:pos], path[pos + 1:oldpos]
585 oldpos = pos
585 oldpos = pos
586 pos = path.rfind('/', 0, pos)
586 pos = path.rfind('/', 0, pos)
587 yield '.', path[:oldpos]
587 yield '.', path[:oldpos]
588
588
589 def get(self, path):
589 def get(self, path):
590 return self._dirs.get(path, set())
590 return self._dirs.get(path, set())
591
591
592 class includematcher(basematcher):
592 class includematcher(basematcher):
593
593
594 def __init__(self, root, kindpats, badfn=None):
594 def __init__(self, root, kindpats, badfn=None):
595 super(includematcher, self).__init__(badfn)
595 super(includematcher, self).__init__(badfn)
596
596
597 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
597 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
598 self._prefix = _prefix(kindpats)
598 self._prefix = _prefix(kindpats)
599 roots, dirs, parents = _rootsdirsandparents(kindpats)
599 roots, dirs, parents = _rootsdirsandparents(kindpats)
600 # roots are directories which are recursively included.
600 # roots are directories which are recursively included.
601 self._roots = set(roots)
601 self._roots = set(roots)
602 # dirs are directories which are non-recursively included.
602 # dirs are directories which are non-recursively included.
603 self._dirs = set(dirs)
603 self._dirs = set(dirs)
604 # parents are directories which are non-recursively included because
604 # parents are directories which are non-recursively included because
605 # they are needed to get to items in _dirs or _roots.
605 # they are needed to get to items in _dirs or _roots.
606 self._parents = set(parents)
606 self._parents = set(parents)
607
607
608 def visitdir(self, dir):
608 def visitdir(self, dir):
609 if self._prefix and dir in self._roots:
609 if self._prefix and dir in self._roots:
610 return 'all'
610 return 'all'
611 return ('.' in self._roots or
611 return ('.' in self._roots or
612 dir in self._roots or
612 dir in self._roots or
613 dir in self._dirs or
613 dir in self._dirs or
614 dir in self._parents or
614 dir in self._parents or
615 any(parentdir in self._roots
615 any(parentdir in self._roots
616 for parentdir in util.finddirs(dir)))
616 for parentdir in util.finddirs(dir)))
617
617
618 @propertycache
618 @propertycache
619 def _allparentschildren(self):
619 def _allparentschildren(self):
620 # It may seem odd that we add dirs, roots, and parents, and then
620 # It may seem odd that we add dirs, roots, and parents, and then
621 # restrict to only parents. This is to catch the case of:
621 # restrict to only parents. This is to catch the case of:
622 # dirs = ['foo/bar']
622 # dirs = ['foo/bar']
623 # parents = ['foo']
623 # parents = ['foo']
624 # if we asked for the children of 'foo', but had only added
624 # if we asked for the children of 'foo', but had only added
625 # self._parents, we wouldn't be able to respond ['bar'].
625 # self._parents, we wouldn't be able to respond ['bar'].
626 return _dirchildren(
626 return _dirchildren(
627 itertools.chain(self._dirs, self._roots, self._parents),
627 itertools.chain(self._dirs, self._roots, self._parents),
628 onlyinclude=self._parents)
628 onlyinclude=self._parents)
629
629
630 def visitchildrenset(self, dir):
630 def visitchildrenset(self, dir):
631 if self._prefix and dir in self._roots:
631 if self._prefix and dir in self._roots:
632 return 'all'
632 return 'all'
633 # Note: this does *not* include the 'dir in self._parents' case from
633 # Note: this does *not* include the 'dir in self._parents' case from
634 # visitdir, that's handled below.
634 # visitdir, that's handled below.
635 if ('.' in self._roots or
635 if ('.' in self._roots or
636 dir in self._roots or
636 dir in self._roots or
637 dir in self._dirs or
637 dir in self._dirs or
638 any(parentdir in self._roots
638 any(parentdir in self._roots
639 for parentdir in util.finddirs(dir))):
639 for parentdir in util.finddirs(dir))):
640 return 'this'
640 return 'this'
641
641
642 if dir in self._parents:
642 if dir in self._parents:
643 return self._allparentschildren.get(dir) or set()
643 return self._allparentschildren.get(dir) or set()
644 return set()
644 return set()
645
645
646 @encoding.strmethod
646 @encoding.strmethod
647 def __repr__(self):
647 def __repr__(self):
648 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
648 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
649
649
650 class exactmatcher(basematcher):
650 class exactmatcher(basematcher):
651 r'''Matches the input files exactly. They are interpreted as paths, not
651 r'''Matches the input files exactly. They are interpreted as paths, not
652 patterns (so no kind-prefixes).
652 patterns (so no kind-prefixes).
653
653
654 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
654 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
655 >>> m('a.txt')
655 >>> m('a.txt')
656 True
656 True
657 >>> m('b.txt')
657 >>> m('b.txt')
658 False
658 False
659
659
660 Input files that would be matched are exactly those returned by .files()
660 Input files that would be matched are exactly those returned by .files()
661 >>> m.files()
661 >>> m.files()
662 ['a.txt', 're:.*\\.c$']
662 ['a.txt', 're:.*\\.c$']
663
663
664 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
664 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
665 >>> m('main.c')
665 >>> m('main.c')
666 False
666 False
667 >>> m('re:.*\.c$')
667 >>> m('re:.*\.c$')
668 True
668 True
669 '''
669 '''
670
670
671 def __init__(self, files, badfn=None):
671 def __init__(self, files, badfn=None):
672 super(exactmatcher, self).__init__(badfn)
672 super(exactmatcher, self).__init__(badfn)
673
673
674 if isinstance(files, list):
674 if isinstance(files, list):
675 self._files = files
675 self._files = files
676 else:
676 else:
677 self._files = list(files)
677 self._files = list(files)
678
678
679 matchfn = basematcher.exact
679 matchfn = basematcher.exact
680
680
681 @propertycache
681 @propertycache
682 def _dirs(self):
682 def _dirs(self):
683 return set(util.dirs(self._fileset)) | {'.'}
683 return set(util.dirs(self._fileset)) | {'.'}
684
684
685 def visitdir(self, dir):
685 def visitdir(self, dir):
686 return dir in self._dirs
686 return dir in self._dirs
687
687
688 def visitchildrenset(self, dir):
688 def visitchildrenset(self, dir):
689 if not self._fileset or dir not in self._dirs:
689 if not self._fileset or dir not in self._dirs:
690 return set()
690 return set()
691
691
692 candidates = self._fileset | self._dirs - {'.'}
692 candidates = self._fileset | self._dirs - {'.'}
693 if dir != '.':
693 if dir != '.':
694 d = dir + '/'
694 d = dir + '/'
695 candidates = set(c[len(d):] for c in candidates if
695 candidates = set(c[len(d):] for c in candidates if
696 c.startswith(d))
696 c.startswith(d))
697 # self._dirs includes all of the directories, recursively, so if
697 # self._dirs includes all of the directories, recursively, so if
698 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
698 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
699 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
699 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
700 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
700 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
701 # immediate subdir will be in there without a slash.
701 # immediate subdir will be in there without a slash.
702 ret = {c for c in candidates if '/' not in c}
702 ret = {c for c in candidates if '/' not in c}
703 # We really do not expect ret to be empty, since that would imply that
703 # We really do not expect ret to be empty, since that would imply that
704 # there's something in _dirs that didn't have a file in _fileset.
704 # there's something in _dirs that didn't have a file in _fileset.
705 assert ret
705 assert ret
706 return ret
706 return ret
707
707
708 def isexact(self):
708 def isexact(self):
709 return True
709 return True
710
710
711 @encoding.strmethod
711 @encoding.strmethod
712 def __repr__(self):
712 def __repr__(self):
713 return ('<exactmatcher files=%r>' % self._files)
713 return ('<exactmatcher files=%r>' % self._files)
714
714
715 class differencematcher(basematcher):
715 class differencematcher(basematcher):
716 '''Composes two matchers by matching if the first matches and the second
716 '''Composes two matchers by matching if the first matches and the second
717 does not.
717 does not.
718
718
719 The second matcher's non-matching-attributes (bad, explicitdir,
719 The second matcher's non-matching-attributes (bad, explicitdir,
720 traversedir) are ignored.
720 traversedir) are ignored.
721 '''
721 '''
722 def __init__(self, m1, m2):
722 def __init__(self, m1, m2):
723 super(differencematcher, self).__init__()
723 super(differencematcher, self).__init__()
724 self._m1 = m1
724 self._m1 = m1
725 self._m2 = m2
725 self._m2 = m2
726 self.bad = m1.bad
726 self.bad = m1.bad
727 self.explicitdir = m1.explicitdir
727 self.explicitdir = m1.explicitdir
728 self.traversedir = m1.traversedir
728 self.traversedir = m1.traversedir
729
729
730 def matchfn(self, f):
730 def matchfn(self, f):
731 return self._m1(f) and not self._m2(f)
731 return self._m1(f) and not self._m2(f)
732
732
733 @propertycache
733 @propertycache
734 def _files(self):
734 def _files(self):
735 if self.isexact():
735 if self.isexact():
736 return [f for f in self._m1.files() if self(f)]
736 return [f for f in self._m1.files() if self(f)]
737 # If m1 is not an exact matcher, we can't easily figure out the set of
737 # If m1 is not an exact matcher, we can't easily figure out the set of
738 # files, because its files() are not always files. For example, if
738 # files, because its files() are not always files. For example, if
739 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
739 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
740 # want to remove "dir" from the set even though it would match m2,
740 # want to remove "dir" from the set even though it would match m2,
741 # because the "dir" in m1 may not be a file.
741 # because the "dir" in m1 may not be a file.
742 return self._m1.files()
742 return self._m1.files()
743
743
744 def visitdir(self, dir):
744 def visitdir(self, dir):
745 if self._m2.visitdir(dir) == 'all':
745 if self._m2.visitdir(dir) == 'all':
746 return False
746 return False
747 elif not self._m2.visitdir(dir):
747 elif not self._m2.visitdir(dir):
748 # m2 does not match dir, we can return 'all' here if possible
748 # m2 does not match dir, we can return 'all' here if possible
749 return self._m1.visitdir(dir)
749 return self._m1.visitdir(dir)
750 return bool(self._m1.visitdir(dir))
750 return bool(self._m1.visitdir(dir))
751
751
752 def visitchildrenset(self, dir):
752 def visitchildrenset(self, dir):
753 m2_set = self._m2.visitchildrenset(dir)
753 m2_set = self._m2.visitchildrenset(dir)
754 if m2_set == 'all':
754 if m2_set == 'all':
755 return set()
755 return set()
756 m1_set = self._m1.visitchildrenset(dir)
756 m1_set = self._m1.visitchildrenset(dir)
757 # Possible values for m1: 'all', 'this', set(...), set()
757 # Possible values for m1: 'all', 'this', set(...), set()
758 # Possible values for m2: 'this', set(...), set()
758 # Possible values for m2: 'this', set(...), set()
759 # If m2 has nothing under here that we care about, return m1, even if
759 # If m2 has nothing under here that we care about, return m1, even if
760 # it's 'all'. This is a change in behavior from visitdir, which would
760 # it's 'all'. This is a change in behavior from visitdir, which would
761 # return True, not 'all', for some reason.
761 # return True, not 'all', for some reason.
762 if not m2_set:
762 if not m2_set:
763 return m1_set
763 return m1_set
764 if m1_set in ['all', 'this']:
764 if m1_set in ['all', 'this']:
765 # Never return 'all' here if m2_set is any kind of non-empty (either
765 # Never return 'all' here if m2_set is any kind of non-empty (either
766 # 'this' or set(foo)), since m2 might return set() for a
766 # 'this' or set(foo)), since m2 might return set() for a
767 # subdirectory.
767 # subdirectory.
768 return 'this'
768 return 'this'
769 # Possible values for m1: set(...), set()
769 # Possible values for m1: set(...), set()
770 # Possible values for m2: 'this', set(...)
770 # Possible values for m2: 'this', set(...)
771 # We ignore m2's set results. They're possibly incorrect:
771 # We ignore m2's set results. They're possibly incorrect:
772 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
772 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
773 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
773 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
774 # return set(), which is *not* correct, we still need to visit 'dir'!
774 # return set(), which is *not* correct, we still need to visit 'dir'!
775 return m1_set
775 return m1_set
776
776
777 def isexact(self):
777 def isexact(self):
778 return self._m1.isexact()
778 return self._m1.isexact()
779
779
780 @encoding.strmethod
780 @encoding.strmethod
781 def __repr__(self):
781 def __repr__(self):
782 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
782 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
783
783
784 def intersectmatchers(m1, m2):
784 def intersectmatchers(m1, m2):
785 '''Composes two matchers by matching if both of them match.
785 '''Composes two matchers by matching if both of them match.
786
786
787 The second matcher's non-matching-attributes (bad, explicitdir,
787 The second matcher's non-matching-attributes (bad, explicitdir,
788 traversedir) are ignored.
788 traversedir) are ignored.
789 '''
789 '''
790 if m1 is None or m2 is None:
790 if m1 is None or m2 is None:
791 return m1 or m2
791 return m1 or m2
792 if m1.always():
792 if m1.always():
793 m = copy.copy(m2)
793 m = copy.copy(m2)
794 # TODO: Consider encapsulating these things in a class so there's only
794 # TODO: Consider encapsulating these things in a class so there's only
795 # one thing to copy from m1.
795 # one thing to copy from m1.
796 m.bad = m1.bad
796 m.bad = m1.bad
797 m.explicitdir = m1.explicitdir
797 m.explicitdir = m1.explicitdir
798 m.traversedir = m1.traversedir
798 m.traversedir = m1.traversedir
799 return m
799 return m
800 if m2.always():
800 if m2.always():
801 m = copy.copy(m1)
801 m = copy.copy(m1)
802 return m
802 return m
803 return intersectionmatcher(m1, m2)
803 return intersectionmatcher(m1, m2)
804
804
805 class intersectionmatcher(basematcher):
805 class intersectionmatcher(basematcher):
806 def __init__(self, m1, m2):
806 def __init__(self, m1, m2):
807 super(intersectionmatcher, self).__init__()
807 super(intersectionmatcher, self).__init__()
808 self._m1 = m1
808 self._m1 = m1
809 self._m2 = m2
809 self._m2 = m2
810 self.bad = m1.bad
810 self.bad = m1.bad
811 self.explicitdir = m1.explicitdir
811 self.explicitdir = m1.explicitdir
812 self.traversedir = m1.traversedir
812 self.traversedir = m1.traversedir
813
813
814 @propertycache
814 @propertycache
815 def _files(self):
815 def _files(self):
816 if self.isexact():
816 if self.isexact():
817 m1, m2 = self._m1, self._m2
817 m1, m2 = self._m1, self._m2
818 if not m1.isexact():
818 if not m1.isexact():
819 m1, m2 = m2, m1
819 m1, m2 = m2, m1
820 return [f for f in m1.files() if m2(f)]
820 return [f for f in m1.files() if m2(f)]
821 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
821 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
822 # the set of files, because their files() are not always files. For
822 # the set of files, because their files() are not always files. For
823 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
823 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
824 # "path:dir2", we don't want to remove "dir2" from the set.
824 # "path:dir2", we don't want to remove "dir2" from the set.
825 return self._m1.files() + self._m2.files()
825 return self._m1.files() + self._m2.files()
826
826
827 def matchfn(self, f):
827 def matchfn(self, f):
828 return self._m1(f) and self._m2(f)
828 return self._m1(f) and self._m2(f)
829
829
830 def visitdir(self, dir):
830 def visitdir(self, dir):
831 visit1 = self._m1.visitdir(dir)
831 visit1 = self._m1.visitdir(dir)
832 if visit1 == 'all':
832 if visit1 == 'all':
833 return self._m2.visitdir(dir)
833 return self._m2.visitdir(dir)
834 # bool() because visit1=True + visit2='all' should not be 'all'
834 # bool() because visit1=True + visit2='all' should not be 'all'
835 return bool(visit1 and self._m2.visitdir(dir))
835 return bool(visit1 and self._m2.visitdir(dir))
836
836
837 def visitchildrenset(self, dir):
837 def visitchildrenset(self, dir):
838 m1_set = self._m1.visitchildrenset(dir)
838 m1_set = self._m1.visitchildrenset(dir)
839 if not m1_set:
839 if not m1_set:
840 return set()
840 return set()
841 m2_set = self._m2.visitchildrenset(dir)
841 m2_set = self._m2.visitchildrenset(dir)
842 if not m2_set:
842 if not m2_set:
843 return set()
843 return set()
844
844
845 if m1_set == 'all':
845 if m1_set == 'all':
846 return m2_set
846 return m2_set
847 elif m2_set == 'all':
847 elif m2_set == 'all':
848 return m1_set
848 return m1_set
849
849
850 if m1_set == 'this' or m2_set == 'this':
850 if m1_set == 'this' or m2_set == 'this':
851 return 'this'
851 return 'this'
852
852
853 assert isinstance(m1_set, set) and isinstance(m2_set, set)
853 assert isinstance(m1_set, set) and isinstance(m2_set, set)
854 return m1_set.intersection(m2_set)
854 return m1_set.intersection(m2_set)
855
855
856 def always(self):
856 def always(self):
857 return self._m1.always() and self._m2.always()
857 return self._m1.always() and self._m2.always()
858
858
859 def isexact(self):
859 def isexact(self):
860 return self._m1.isexact() or self._m2.isexact()
860 return self._m1.isexact() or self._m2.isexact()
861
861
862 @encoding.strmethod
862 @encoding.strmethod
863 def __repr__(self):
863 def __repr__(self):
864 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
864 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
865
865
866 class subdirmatcher(basematcher):
866 class subdirmatcher(basematcher):
867 """Adapt a matcher to work on a subdirectory only.
867 """Adapt a matcher to work on a subdirectory only.
868
868
869 The paths are remapped to remove/insert the path as needed:
869 The paths are remapped to remove/insert the path as needed:
870
870
871 >>> from . import pycompat
871 >>> from . import pycompat
872 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
872 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
873 >>> m2 = subdirmatcher(b'sub', m1)
873 >>> m2 = subdirmatcher(b'sub', m1)
874 >>> bool(m2(b'a.txt'))
874 >>> m2(b'a.txt')
875 False
875 False
876 >>> bool(m2(b'b.txt'))
876 >>> m2(b'b.txt')
877 True
877 True
878 >>> bool(m2.matchfn(b'a.txt'))
878 >>> m2.matchfn(b'a.txt')
879 False
879 False
880 >>> bool(m2.matchfn(b'b.txt'))
880 >>> m2.matchfn(b'b.txt')
881 True
881 True
882 >>> m2.files()
882 >>> m2.files()
883 ['b.txt']
883 ['b.txt']
884 >>> m2.exact(b'b.txt')
884 >>> m2.exact(b'b.txt')
885 True
885 True
886 >>> def bad(f, msg):
886 >>> def bad(f, msg):
887 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
887 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
888 >>> m1.bad = bad
888 >>> m1.bad = bad
889 >>> m2.bad(b'x.txt', b'No such file')
889 >>> m2.bad(b'x.txt', b'No such file')
890 sub/x.txt: No such file
890 sub/x.txt: No such file
891 """
891 """
892
892
893 def __init__(self, path, matcher):
893 def __init__(self, path, matcher):
894 super(subdirmatcher, self).__init__()
894 super(subdirmatcher, self).__init__()
895 self._path = path
895 self._path = path
896 self._matcher = matcher
896 self._matcher = matcher
897 self._always = matcher.always()
897 self._always = matcher.always()
898
898
899 self._files = [f[len(path) + 1:] for f in matcher._files
899 self._files = [f[len(path) + 1:] for f in matcher._files
900 if f.startswith(path + "/")]
900 if f.startswith(path + "/")]
901
901
902 # If the parent repo had a path to this subrepo and the matcher is
902 # If the parent repo had a path to this subrepo and the matcher is
903 # a prefix matcher, this submatcher always matches.
903 # a prefix matcher, this submatcher always matches.
904 if matcher.prefix():
904 if matcher.prefix():
905 self._always = any(f == path for f in matcher._files)
905 self._always = any(f == path for f in matcher._files)
906
906
907 def bad(self, f, msg):
907 def bad(self, f, msg):
908 self._matcher.bad(self._path + "/" + f, msg)
908 self._matcher.bad(self._path + "/" + f, msg)
909
909
910 def matchfn(self, f):
910 def matchfn(self, f):
911 # Some information is lost in the superclass's constructor, so we
911 # Some information is lost in the superclass's constructor, so we
912 # can not accurately create the matching function for the subdirectory
912 # can not accurately create the matching function for the subdirectory
913 # from the inputs. Instead, we override matchfn() and visitdir() to
913 # from the inputs. Instead, we override matchfn() and visitdir() to
914 # call the original matcher with the subdirectory path prepended.
914 # call the original matcher with the subdirectory path prepended.
915 return self._matcher.matchfn(self._path + "/" + f)
915 return self._matcher.matchfn(self._path + "/" + f)
916
916
917 def visitdir(self, dir):
917 def visitdir(self, dir):
918 if dir == '.':
918 if dir == '.':
919 dir = self._path
919 dir = self._path
920 else:
920 else:
921 dir = self._path + "/" + dir
921 dir = self._path + "/" + dir
922 return self._matcher.visitdir(dir)
922 return self._matcher.visitdir(dir)
923
923
924 def visitchildrenset(self, dir):
924 def visitchildrenset(self, dir):
925 if dir == '.':
925 if dir == '.':
926 dir = self._path
926 dir = self._path
927 else:
927 else:
928 dir = self._path + "/" + dir
928 dir = self._path + "/" + dir
929 return self._matcher.visitchildrenset(dir)
929 return self._matcher.visitchildrenset(dir)
930
930
931 def always(self):
931 def always(self):
932 return self._always
932 return self._always
933
933
934 def prefix(self):
934 def prefix(self):
935 return self._matcher.prefix() and not self._always
935 return self._matcher.prefix() and not self._always
936
936
937 @encoding.strmethod
937 @encoding.strmethod
938 def __repr__(self):
938 def __repr__(self):
939 return ('<subdirmatcher path=%r, matcher=%r>' %
939 return ('<subdirmatcher path=%r, matcher=%r>' %
940 (self._path, self._matcher))
940 (self._path, self._matcher))
941
941
942 class prefixdirmatcher(basematcher):
942 class prefixdirmatcher(basematcher):
943 """Adapt a matcher to work on a parent directory.
943 """Adapt a matcher to work on a parent directory.
944
944
945 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
945 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
946 ignored.
946 ignored.
947
947
948 The prefix path should usually be the relative path from the root of
948 The prefix path should usually be the relative path from the root of
949 this matcher to the root of the wrapped matcher.
949 this matcher to the root of the wrapped matcher.
950
950
951 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
951 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
952 >>> m2 = prefixdirmatcher(b'd/e', m1)
952 >>> m2 = prefixdirmatcher(b'd/e', m1)
953 >>> bool(m2(b'a.txt'),)
953 >>> m2(b'a.txt')
954 False
954 False
955 >>> bool(m2(b'd/e/a.txt'))
955 >>> m2(b'd/e/a.txt')
956 True
956 True
957 >>> bool(m2(b'd/e/b.txt'))
957 >>> m2(b'd/e/b.txt')
958 False
958 False
959 >>> m2.files()
959 >>> m2.files()
960 ['d/e/a.txt', 'd/e/f/b.txt']
960 ['d/e/a.txt', 'd/e/f/b.txt']
961 >>> m2.exact(b'd/e/a.txt')
961 >>> m2.exact(b'd/e/a.txt')
962 True
962 True
963 >>> m2.visitdir(b'd')
963 >>> m2.visitdir(b'd')
964 True
964 True
965 >>> m2.visitdir(b'd/e')
965 >>> m2.visitdir(b'd/e')
966 True
966 True
967 >>> m2.visitdir(b'd/e/f')
967 >>> m2.visitdir(b'd/e/f')
968 True
968 True
969 >>> m2.visitdir(b'd/e/g')
969 >>> m2.visitdir(b'd/e/g')
970 False
970 False
971 >>> m2.visitdir(b'd/ef')
971 >>> m2.visitdir(b'd/ef')
972 False
972 False
973 """
973 """
974
974
975 def __init__(self, path, matcher, badfn=None):
975 def __init__(self, path, matcher, badfn=None):
976 super(prefixdirmatcher, self).__init__(badfn)
976 super(prefixdirmatcher, self).__init__(badfn)
977 if not path:
977 if not path:
978 raise error.ProgrammingError('prefix path must not be empty')
978 raise error.ProgrammingError('prefix path must not be empty')
979 self._path = path
979 self._path = path
980 self._pathprefix = path + '/'
980 self._pathprefix = path + '/'
981 self._matcher = matcher
981 self._matcher = matcher
982
982
983 @propertycache
983 @propertycache
984 def _files(self):
984 def _files(self):
985 return [self._pathprefix + f for f in self._matcher._files]
985 return [self._pathprefix + f for f in self._matcher._files]
986
986
987 def matchfn(self, f):
987 def matchfn(self, f):
988 if not f.startswith(self._pathprefix):
988 if not f.startswith(self._pathprefix):
989 return False
989 return False
990 return self._matcher.matchfn(f[len(self._pathprefix):])
990 return self._matcher.matchfn(f[len(self._pathprefix):])
991
991
992 @propertycache
992 @propertycache
993 def _pathdirs(self):
993 def _pathdirs(self):
994 return set(util.finddirs(self._path)) | {'.'}
994 return set(util.finddirs(self._path)) | {'.'}
995
995
996 def visitdir(self, dir):
996 def visitdir(self, dir):
997 if dir == self._path:
997 if dir == self._path:
998 return self._matcher.visitdir('.')
998 return self._matcher.visitdir('.')
999 if dir.startswith(self._pathprefix):
999 if dir.startswith(self._pathprefix):
1000 return self._matcher.visitdir(dir[len(self._pathprefix):])
1000 return self._matcher.visitdir(dir[len(self._pathprefix):])
1001 return dir in self._pathdirs
1001 return dir in self._pathdirs
1002
1002
1003 def visitchildrenset(self, dir):
1003 def visitchildrenset(self, dir):
1004 if dir == self._path:
1004 if dir == self._path:
1005 return self._matcher.visitchildrenset('.')
1005 return self._matcher.visitchildrenset('.')
1006 if dir.startswith(self._pathprefix):
1006 if dir.startswith(self._pathprefix):
1007 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1007 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1008 if dir in self._pathdirs:
1008 if dir in self._pathdirs:
1009 return 'this'
1009 return 'this'
1010 return set()
1010 return set()
1011
1011
1012 def isexact(self):
1012 def isexact(self):
1013 return self._matcher.isexact()
1013 return self._matcher.isexact()
1014
1014
1015 def prefix(self):
1015 def prefix(self):
1016 return self._matcher.prefix()
1016 return self._matcher.prefix()
1017
1017
1018 @encoding.strmethod
1018 @encoding.strmethod
1019 def __repr__(self):
1019 def __repr__(self):
1020 return ('<prefixdirmatcher path=%r, matcher=%r>'
1020 return ('<prefixdirmatcher path=%r, matcher=%r>'
1021 % (pycompat.bytestr(self._path), self._matcher))
1021 % (pycompat.bytestr(self._path), self._matcher))
1022
1022
1023 class unionmatcher(basematcher):
1023 class unionmatcher(basematcher):
1024 """A matcher that is the union of several matchers.
1024 """A matcher that is the union of several matchers.
1025
1025
1026 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1026 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1027 the first matcher.
1027 the first matcher.
1028 """
1028 """
1029
1029
1030 def __init__(self, matchers):
1030 def __init__(self, matchers):
1031 m1 = matchers[0]
1031 m1 = matchers[0]
1032 super(unionmatcher, self).__init__()
1032 super(unionmatcher, self).__init__()
1033 self.explicitdir = m1.explicitdir
1033 self.explicitdir = m1.explicitdir
1034 self.traversedir = m1.traversedir
1034 self.traversedir = m1.traversedir
1035 self._matchers = matchers
1035 self._matchers = matchers
1036
1036
1037 def matchfn(self, f):
1037 def matchfn(self, f):
1038 for match in self._matchers:
1038 for match in self._matchers:
1039 if match(f):
1039 if match(f):
1040 return True
1040 return True
1041 return False
1041 return False
1042
1042
1043 def visitdir(self, dir):
1043 def visitdir(self, dir):
1044 r = False
1044 r = False
1045 for m in self._matchers:
1045 for m in self._matchers:
1046 v = m.visitdir(dir)
1046 v = m.visitdir(dir)
1047 if v == 'all':
1047 if v == 'all':
1048 return v
1048 return v
1049 r |= v
1049 r |= v
1050 return r
1050 return r
1051
1051
1052 def visitchildrenset(self, dir):
1052 def visitchildrenset(self, dir):
1053 r = set()
1053 r = set()
1054 this = False
1054 this = False
1055 for m in self._matchers:
1055 for m in self._matchers:
1056 v = m.visitchildrenset(dir)
1056 v = m.visitchildrenset(dir)
1057 if not v:
1057 if not v:
1058 continue
1058 continue
1059 if v == 'all':
1059 if v == 'all':
1060 return v
1060 return v
1061 if this or v == 'this':
1061 if this or v == 'this':
1062 this = True
1062 this = True
1063 # don't break, we might have an 'all' in here.
1063 # don't break, we might have an 'all' in here.
1064 continue
1064 continue
1065 assert isinstance(v, set)
1065 assert isinstance(v, set)
1066 r = r.union(v)
1066 r = r.union(v)
1067 if this:
1067 if this:
1068 return 'this'
1068 return 'this'
1069 return r
1069 return r
1070
1070
1071 @encoding.strmethod
1071 @encoding.strmethod
1072 def __repr__(self):
1072 def __repr__(self):
1073 return ('<unionmatcher matchers=%r>' % self._matchers)
1073 return ('<unionmatcher matchers=%r>' % self._matchers)
1074
1074
1075 def patkind(pattern, default=None):
1075 def patkind(pattern, default=None):
1076 '''If pattern is 'kind:pat' with a known kind, return kind.
1076 '''If pattern is 'kind:pat' with a known kind, return kind.
1077
1077
1078 >>> patkind('re:.*\.c$')
1078 >>> patkind('re:.*\.c$')
1079 're'
1079 're'
1080 >>> patkind('glob:*.c')
1080 >>> patkind('glob:*.c')
1081 'glob'
1081 'glob'
1082 >>> patkind('relpath:test.py')
1082 >>> patkind('relpath:test.py')
1083 'relpath'
1083 'relpath'
1084 >>> patkind('main.py')
1084 >>> patkind('main.py')
1085 >>> patkind('main.py', default='re')
1085 >>> patkind('main.py', default='re')
1086 're'
1086 're'
1087 '''
1087 '''
1088 return _patsplit(pattern, default)[0]
1088 return _patsplit(pattern, default)[0]
1089
1089
1090 def _patsplit(pattern, default):
1090 def _patsplit(pattern, default):
1091 """Split a string into the optional pattern kind prefix and the actual
1091 """Split a string into the optional pattern kind prefix and the actual
1092 pattern."""
1092 pattern."""
1093 if ':' in pattern:
1093 if ':' in pattern:
1094 kind, pat = pattern.split(':', 1)
1094 kind, pat = pattern.split(':', 1)
1095 if kind in allpatternkinds:
1095 if kind in allpatternkinds:
1096 return kind, pat
1096 return kind, pat
1097 return default, pattern
1097 return default, pattern
1098
1098
1099 def _globre(pat):
1099 def _globre(pat):
1100 r'''Convert an extended glob string to a regexp string.
1100 r'''Convert an extended glob string to a regexp string.
1101
1101
1102 >>> from . import pycompat
1102 >>> from . import pycompat
1103 >>> def bprint(s):
1103 >>> def bprint(s):
1104 ... print(pycompat.sysstr(s))
1104 ... print(pycompat.sysstr(s))
1105 >>> bprint(_globre(br'?'))
1105 >>> bprint(_globre(br'?'))
1106 .
1106 .
1107 >>> bprint(_globre(br'*'))
1107 >>> bprint(_globre(br'*'))
1108 [^/]*
1108 [^/]*
1109 >>> bprint(_globre(br'**'))
1109 >>> bprint(_globre(br'**'))
1110 .*
1110 .*
1111 >>> bprint(_globre(br'**/a'))
1111 >>> bprint(_globre(br'**/a'))
1112 (?:.*/)?a
1112 (?:.*/)?a
1113 >>> bprint(_globre(br'a/**/b'))
1113 >>> bprint(_globre(br'a/**/b'))
1114 a/(?:.*/)?b
1114 a/(?:.*/)?b
1115 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1115 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1116 [a*?!^][\^b][^c]
1116 [a*?!^][\^b][^c]
1117 >>> bprint(_globre(br'{a,b}'))
1117 >>> bprint(_globre(br'{a,b}'))
1118 (?:a|b)
1118 (?:a|b)
1119 >>> bprint(_globre(br'.\*\?'))
1119 >>> bprint(_globre(br'.\*\?'))
1120 \.\*\?
1120 \.\*\?
1121 '''
1121 '''
1122 i, n = 0, len(pat)
1122 i, n = 0, len(pat)
1123 res = ''
1123 res = ''
1124 group = 0
1124 group = 0
1125 escape = util.stringutil.regexbytesescapemap.get
1125 escape = util.stringutil.regexbytesescapemap.get
1126 def peek():
1126 def peek():
1127 return i < n and pat[i:i + 1]
1127 return i < n and pat[i:i + 1]
1128 while i < n:
1128 while i < n:
1129 c = pat[i:i + 1]
1129 c = pat[i:i + 1]
1130 i += 1
1130 i += 1
1131 if c not in '*?[{},\\':
1131 if c not in '*?[{},\\':
1132 res += escape(c, c)
1132 res += escape(c, c)
1133 elif c == '*':
1133 elif c == '*':
1134 if peek() == '*':
1134 if peek() == '*':
1135 i += 1
1135 i += 1
1136 if peek() == '/':
1136 if peek() == '/':
1137 i += 1
1137 i += 1
1138 res += '(?:.*/)?'
1138 res += '(?:.*/)?'
1139 else:
1139 else:
1140 res += '.*'
1140 res += '.*'
1141 else:
1141 else:
1142 res += '[^/]*'
1142 res += '[^/]*'
1143 elif c == '?':
1143 elif c == '?':
1144 res += '.'
1144 res += '.'
1145 elif c == '[':
1145 elif c == '[':
1146 j = i
1146 j = i
1147 if j < n and pat[j:j + 1] in '!]':
1147 if j < n and pat[j:j + 1] in '!]':
1148 j += 1
1148 j += 1
1149 while j < n and pat[j:j + 1] != ']':
1149 while j < n and pat[j:j + 1] != ']':
1150 j += 1
1150 j += 1
1151 if j >= n:
1151 if j >= n:
1152 res += '\\['
1152 res += '\\['
1153 else:
1153 else:
1154 stuff = pat[i:j].replace('\\','\\\\')
1154 stuff = pat[i:j].replace('\\','\\\\')
1155 i = j + 1
1155 i = j + 1
1156 if stuff[0:1] == '!':
1156 if stuff[0:1] == '!':
1157 stuff = '^' + stuff[1:]
1157 stuff = '^' + stuff[1:]
1158 elif stuff[0:1] == '^':
1158 elif stuff[0:1] == '^':
1159 stuff = '\\' + stuff
1159 stuff = '\\' + stuff
1160 res = '%s[%s]' % (res, stuff)
1160 res = '%s[%s]' % (res, stuff)
1161 elif c == '{':
1161 elif c == '{':
1162 group += 1
1162 group += 1
1163 res += '(?:'
1163 res += '(?:'
1164 elif c == '}' and group:
1164 elif c == '}' and group:
1165 res += ')'
1165 res += ')'
1166 group -= 1
1166 group -= 1
1167 elif c == ',' and group:
1167 elif c == ',' and group:
1168 res += '|'
1168 res += '|'
1169 elif c == '\\':
1169 elif c == '\\':
1170 p = peek()
1170 p = peek()
1171 if p:
1171 if p:
1172 i += 1
1172 i += 1
1173 res += escape(p, p)
1173 res += escape(p, p)
1174 else:
1174 else:
1175 res += escape(c, c)
1175 res += escape(c, c)
1176 else:
1176 else:
1177 res += escape(c, c)
1177 res += escape(c, c)
1178 return res
1178 return res
1179
1179
1180 def _regex(kind, pat, globsuffix):
1180 def _regex(kind, pat, globsuffix):
1181 '''Convert a (normalized) pattern of any kind into a regular expression.
1181 '''Convert a (normalized) pattern of any kind into a regular expression.
1182 globsuffix is appended to the regexp of globs.'''
1182 globsuffix is appended to the regexp of globs.'''
1183 if not pat:
1183 if not pat:
1184 return ''
1184 return ''
1185 if kind == 're':
1185 if kind == 're':
1186 return pat
1186 return pat
1187 if kind in ('path', 'relpath'):
1187 if kind in ('path', 'relpath'):
1188 if pat == '.':
1188 if pat == '.':
1189 return ''
1189 return ''
1190 return util.stringutil.reescape(pat) + '(?:/|$)'
1190 return util.stringutil.reescape(pat) + '(?:/|$)'
1191 if kind == 'rootfilesin':
1191 if kind == 'rootfilesin':
1192 if pat == '.':
1192 if pat == '.':
1193 escaped = ''
1193 escaped = ''
1194 else:
1194 else:
1195 # Pattern is a directory name.
1195 # Pattern is a directory name.
1196 escaped = util.stringutil.reescape(pat) + '/'
1196 escaped = util.stringutil.reescape(pat) + '/'
1197 # Anything after the pattern must be a non-directory.
1197 # Anything after the pattern must be a non-directory.
1198 return escaped + '[^/]+$'
1198 return escaped + '[^/]+$'
1199 if kind == 'relglob':
1199 if kind == 'relglob':
1200 return '(?:|.*/)' + _globre(pat) + globsuffix
1200 return '(?:|.*/)' + _globre(pat) + globsuffix
1201 if kind == 'relre':
1201 if kind == 'relre':
1202 if pat.startswith('^'):
1202 if pat.startswith('^'):
1203 return pat
1203 return pat
1204 return '.*' + pat
1204 return '.*' + pat
1205 if kind in ('glob', 'rootglob'):
1205 if kind in ('glob', 'rootglob'):
1206 return _globre(pat) + globsuffix
1206 return _globre(pat) + globsuffix
1207 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1207 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1208
1208
1209 def _buildmatch(kindpats, globsuffix, root):
1209 def _buildmatch(kindpats, globsuffix, root):
1210 '''Return regexp string and a matcher function for kindpats.
1210 '''Return regexp string and a matcher function for kindpats.
1211 globsuffix is appended to the regexp of globs.'''
1211 globsuffix is appended to the regexp of globs.'''
1212 matchfuncs = []
1212 matchfuncs = []
1213
1213
1214 subincludes, kindpats = _expandsubinclude(kindpats, root)
1214 subincludes, kindpats = _expandsubinclude(kindpats, root)
1215 if subincludes:
1215 if subincludes:
1216 submatchers = {}
1216 submatchers = {}
1217 def matchsubinclude(f):
1217 def matchsubinclude(f):
1218 for prefix, matcherargs in subincludes:
1218 for prefix, matcherargs in subincludes:
1219 if f.startswith(prefix):
1219 if f.startswith(prefix):
1220 mf = submatchers.get(prefix)
1220 mf = submatchers.get(prefix)
1221 if mf is None:
1221 if mf is None:
1222 mf = match(*matcherargs)
1222 mf = match(*matcherargs)
1223 submatchers[prefix] = mf
1223 submatchers[prefix] = mf
1224
1224
1225 if mf(f[len(prefix):]):
1225 if mf(f[len(prefix):]):
1226 return True
1226 return True
1227 return False
1227 return False
1228 matchfuncs.append(matchsubinclude)
1228 matchfuncs.append(matchsubinclude)
1229
1229
1230 regex = ''
1230 regex = ''
1231 if kindpats:
1231 if kindpats:
1232 if all(k == 'rootfilesin' for k, p, s in kindpats):
1232 if all(k == 'rootfilesin' for k, p, s in kindpats):
1233 dirs = {p for k, p, s in kindpats}
1233 dirs = {p for k, p, s in kindpats}
1234 def mf(f):
1234 def mf(f):
1235 i = f.rfind('/')
1235 i = f.rfind('/')
1236 if i >= 0:
1236 if i >= 0:
1237 dir = f[:i]
1237 dir = f[:i]
1238 else:
1238 else:
1239 dir = '.'
1239 dir = '.'
1240 return dir in dirs
1240 return dir in dirs
1241 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1241 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1242 matchfuncs.append(mf)
1242 matchfuncs.append(mf)
1243 else:
1243 else:
1244 regex, mf = _buildregexmatch(kindpats, globsuffix)
1244 regex, mf = _buildregexmatch(kindpats, globsuffix)
1245 matchfuncs.append(mf)
1245 matchfuncs.append(mf)
1246
1246
1247 if len(matchfuncs) == 1:
1247 if len(matchfuncs) == 1:
1248 return regex, matchfuncs[0]
1248 return regex, matchfuncs[0]
1249 else:
1249 else:
1250 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1250 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1251
1251
1252 MAX_RE_SIZE = 20000
1252 MAX_RE_SIZE = 20000
1253
1253
1254 def _joinregexes(regexps):
1254 def _joinregexes(regexps):
1255 """gather multiple regular expressions into a single one"""
1255 """gather multiple regular expressions into a single one"""
1256 return '|'.join(regexps)
1256 return '|'.join(regexps)
1257
1257
1258 def _buildregexmatch(kindpats, globsuffix):
1258 def _buildregexmatch(kindpats, globsuffix):
1259 """Build a match function from a list of kinds and kindpats,
1259 """Build a match function from a list of kinds and kindpats,
1260 return regexp string and a matcher function.
1260 return regexp string and a matcher function.
1261
1261
1262 Test too large input
1262 Test too large input
1263 >>> _buildregexmatch([
1263 >>> _buildregexmatch([
1264 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1264 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1265 ... ], b'$')
1265 ... ], b'$')
1266 Traceback (most recent call last):
1266 Traceback (most recent call last):
1267 ...
1267 ...
1268 Abort: matcher pattern is too long (20009 bytes)
1268 Abort: matcher pattern is too long (20009 bytes)
1269 """
1269 """
1270 try:
1270 try:
1271 allgroups = []
1271 allgroups = []
1272 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1272 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1273 fullregexp = _joinregexes(regexps)
1273 fullregexp = _joinregexes(regexps)
1274
1274
1275 startidx = 0
1275 startidx = 0
1276 groupsize = 0
1276 groupsize = 0
1277 for idx, r in enumerate(regexps):
1277 for idx, r in enumerate(regexps):
1278 piecesize = len(r)
1278 piecesize = len(r)
1279 if piecesize > MAX_RE_SIZE:
1279 if piecesize > MAX_RE_SIZE:
1280 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1280 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1281 raise error.Abort(msg)
1281 raise error.Abort(msg)
1282 elif (groupsize + piecesize) > MAX_RE_SIZE:
1282 elif (groupsize + piecesize) > MAX_RE_SIZE:
1283 group = regexps[startidx:idx]
1283 group = regexps[startidx:idx]
1284 allgroups.append(_joinregexes(group))
1284 allgroups.append(_joinregexes(group))
1285 startidx = idx
1285 startidx = idx
1286 groupsize = 0
1286 groupsize = 0
1287 groupsize += piecesize + 1
1287 groupsize += piecesize + 1
1288
1288
1289 if startidx == 0:
1289 if startidx == 0:
1290 func = _rematcher(fullregexp)
1290 matcher = _rematcher(fullregexp)
1291 func = lambda s: matcher(s) is not None
1291 else:
1292 else:
1292 group = regexps[startidx:]
1293 group = regexps[startidx:]
1293 allgroups.append(_joinregexes(group))
1294 allgroups.append(_joinregexes(group))
1294 allmatchers = [_rematcher(g) for g in allgroups]
1295 allmatchers = [_rematcher(g) for g in allgroups]
1295 func = lambda s: any(m(s) for m in allmatchers)
1296 func = lambda s: any(m(s) for m in allmatchers)
1296 return fullregexp, func
1297 return fullregexp, func
1297 except re.error:
1298 except re.error:
1298 for k, p, s in kindpats:
1299 for k, p, s in kindpats:
1299 try:
1300 try:
1300 _rematcher(_regex(k, p, globsuffix))
1301 _rematcher(_regex(k, p, globsuffix))
1301 except re.error:
1302 except re.error:
1302 if s:
1303 if s:
1303 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1304 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1304 (s, k, p))
1305 (s, k, p))
1305 else:
1306 else:
1306 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1307 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1307 raise error.Abort(_("invalid pattern"))
1308 raise error.Abort(_("invalid pattern"))
1308
1309
1309 def _patternrootsanddirs(kindpats):
1310 def _patternrootsanddirs(kindpats):
1310 '''Returns roots and directories corresponding to each pattern.
1311 '''Returns roots and directories corresponding to each pattern.
1311
1312
1312 This calculates the roots and directories exactly matching the patterns and
1313 This calculates the roots and directories exactly matching the patterns and
1313 returns a tuple of (roots, dirs) for each. It does not return other
1314 returns a tuple of (roots, dirs) for each. It does not return other
1314 directories which may also need to be considered, like the parent
1315 directories which may also need to be considered, like the parent
1315 directories.
1316 directories.
1316 '''
1317 '''
1317 r = []
1318 r = []
1318 d = []
1319 d = []
1319 for kind, pat, source in kindpats:
1320 for kind, pat, source in kindpats:
1320 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1321 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1321 root = []
1322 root = []
1322 for p in pat.split('/'):
1323 for p in pat.split('/'):
1323 if '[' in p or '{' in p or '*' in p or '?' in p:
1324 if '[' in p or '{' in p or '*' in p or '?' in p:
1324 break
1325 break
1325 root.append(p)
1326 root.append(p)
1326 r.append('/'.join(root) or '.')
1327 r.append('/'.join(root) or '.')
1327 elif kind in ('relpath', 'path'):
1328 elif kind in ('relpath', 'path'):
1328 r.append(pat or '.')
1329 r.append(pat or '.')
1329 elif kind in ('rootfilesin',):
1330 elif kind in ('rootfilesin',):
1330 d.append(pat or '.')
1331 d.append(pat or '.')
1331 else: # relglob, re, relre
1332 else: # relglob, re, relre
1332 r.append('.')
1333 r.append('.')
1333 return r, d
1334 return r, d
1334
1335
1335 def _roots(kindpats):
1336 def _roots(kindpats):
1336 '''Returns root directories to match recursively from the given patterns.'''
1337 '''Returns root directories to match recursively from the given patterns.'''
1337 roots, dirs = _patternrootsanddirs(kindpats)
1338 roots, dirs = _patternrootsanddirs(kindpats)
1338 return roots
1339 return roots
1339
1340
1340 def _rootsdirsandparents(kindpats):
1341 def _rootsdirsandparents(kindpats):
1341 '''Returns roots and exact directories from patterns.
1342 '''Returns roots and exact directories from patterns.
1342
1343
1343 `roots` are directories to match recursively, `dirs` should
1344 `roots` are directories to match recursively, `dirs` should
1344 be matched non-recursively, and `parents` are the implicitly required
1345 be matched non-recursively, and `parents` are the implicitly required
1345 directories to walk to items in either roots or dirs.
1346 directories to walk to items in either roots or dirs.
1346
1347
1347 Returns a tuple of (roots, dirs, parents).
1348 Returns a tuple of (roots, dirs, parents).
1348
1349
1349 >>> _rootsdirsandparents(
1350 >>> _rootsdirsandparents(
1350 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1351 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1351 ... (b'glob', b'g*', b'')])
1352 ... (b'glob', b'g*', b'')])
1352 (['g/h', 'g/h', '.'], [], ['g', '.'])
1353 (['g/h', 'g/h', '.'], [], ['g', '.'])
1353 >>> _rootsdirsandparents(
1354 >>> _rootsdirsandparents(
1354 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1355 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1355 ([], ['g/h', '.'], ['g', '.'])
1356 ([], ['g/h', '.'], ['g', '.'])
1356 >>> _rootsdirsandparents(
1357 >>> _rootsdirsandparents(
1357 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1358 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1358 ... (b'path', b'', b'')])
1359 ... (b'path', b'', b'')])
1359 (['r', 'p/p', '.'], [], ['p', '.'])
1360 (['r', 'p/p', '.'], [], ['p', '.'])
1360 >>> _rootsdirsandparents(
1361 >>> _rootsdirsandparents(
1361 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1362 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1362 ... (b'relre', b'rr', b'')])
1363 ... (b'relre', b'rr', b'')])
1363 (['.', '.', '.'], [], ['.'])
1364 (['.', '.', '.'], [], ['.'])
1364 '''
1365 '''
1365 r, d = _patternrootsanddirs(kindpats)
1366 r, d = _patternrootsanddirs(kindpats)
1366
1367
1367 p = []
1368 p = []
1368 # Append the parents as non-recursive/exact directories, since they must be
1369 # Append the parents as non-recursive/exact directories, since they must be
1369 # scanned to get to either the roots or the other exact directories.
1370 # scanned to get to either the roots or the other exact directories.
1370 p.extend(util.dirs(d))
1371 p.extend(util.dirs(d))
1371 p.extend(util.dirs(r))
1372 p.extend(util.dirs(r))
1372 # util.dirs() does not include the root directory, so add it manually
1373 # util.dirs() does not include the root directory, so add it manually
1373 p.append('.')
1374 p.append('.')
1374
1375
1375 # FIXME: all uses of this function convert these to sets, do so before
1376 # FIXME: all uses of this function convert these to sets, do so before
1376 # returning.
1377 # returning.
1377 # FIXME: all uses of this function do not need anything in 'roots' and
1378 # FIXME: all uses of this function do not need anything in 'roots' and
1378 # 'dirs' to also be in 'parents', consider removing them before returning.
1379 # 'dirs' to also be in 'parents', consider removing them before returning.
1379 return r, d, p
1380 return r, d, p
1380
1381
1381 def _explicitfiles(kindpats):
1382 def _explicitfiles(kindpats):
1382 '''Returns the potential explicit filenames from the patterns.
1383 '''Returns the potential explicit filenames from the patterns.
1383
1384
1384 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1385 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1385 ['foo/bar']
1386 ['foo/bar']
1386 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1387 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1387 []
1388 []
1388 '''
1389 '''
1389 # Keep only the pattern kinds where one can specify filenames (vs only
1390 # Keep only the pattern kinds where one can specify filenames (vs only
1390 # directory names).
1391 # directory names).
1391 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1392 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1392 return _roots(filable)
1393 return _roots(filable)
1393
1394
1394 def _prefix(kindpats):
1395 def _prefix(kindpats):
1395 '''Whether all the patterns match a prefix (i.e. recursively)'''
1396 '''Whether all the patterns match a prefix (i.e. recursively)'''
1396 for kind, pat, source in kindpats:
1397 for kind, pat, source in kindpats:
1397 if kind not in ('path', 'relpath'):
1398 if kind not in ('path', 'relpath'):
1398 return False
1399 return False
1399 return True
1400 return True
1400
1401
1401 _commentre = None
1402 _commentre = None
1402
1403
1403 def readpatternfile(filepath, warn, sourceinfo=False):
1404 def readpatternfile(filepath, warn, sourceinfo=False):
1404 '''parse a pattern file, returning a list of
1405 '''parse a pattern file, returning a list of
1405 patterns. These patterns should be given to compile()
1406 patterns. These patterns should be given to compile()
1406 to be validated and converted into a match function.
1407 to be validated and converted into a match function.
1407
1408
1408 trailing white space is dropped.
1409 trailing white space is dropped.
1409 the escape character is backslash.
1410 the escape character is backslash.
1410 comments start with #.
1411 comments start with #.
1411 empty lines are skipped.
1412 empty lines are skipped.
1412
1413
1413 lines can be of the following formats:
1414 lines can be of the following formats:
1414
1415
1415 syntax: regexp # defaults following lines to non-rooted regexps
1416 syntax: regexp # defaults following lines to non-rooted regexps
1416 syntax: glob # defaults following lines to non-rooted globs
1417 syntax: glob # defaults following lines to non-rooted globs
1417 re:pattern # non-rooted regular expression
1418 re:pattern # non-rooted regular expression
1418 glob:pattern # non-rooted glob
1419 glob:pattern # non-rooted glob
1419 rootglob:pat # rooted glob (same root as ^ in regexps)
1420 rootglob:pat # rooted glob (same root as ^ in regexps)
1420 pattern # pattern of the current default type
1421 pattern # pattern of the current default type
1421
1422
1422 if sourceinfo is set, returns a list of tuples:
1423 if sourceinfo is set, returns a list of tuples:
1423 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1424 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1424 '''
1425 '''
1425
1426
1426 syntaxes = {
1427 syntaxes = {
1427 're': 'relre:',
1428 're': 'relre:',
1428 'regexp': 'relre:',
1429 'regexp': 'relre:',
1429 'glob': 'relglob:',
1430 'glob': 'relglob:',
1430 'rootglob': 'rootglob:',
1431 'rootglob': 'rootglob:',
1431 'include': 'include',
1432 'include': 'include',
1432 'subinclude': 'subinclude',
1433 'subinclude': 'subinclude',
1433 }
1434 }
1434 syntax = 'relre:'
1435 syntax = 'relre:'
1435 patterns = []
1436 patterns = []
1436
1437
1437 fp = open(filepath, 'rb')
1438 fp = open(filepath, 'rb')
1438 for lineno, line in enumerate(util.iterfile(fp), start=1):
1439 for lineno, line in enumerate(util.iterfile(fp), start=1):
1439 if "#" in line:
1440 if "#" in line:
1440 global _commentre
1441 global _commentre
1441 if not _commentre:
1442 if not _commentre:
1442 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1443 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1443 # remove comments prefixed by an even number of escapes
1444 # remove comments prefixed by an even number of escapes
1444 m = _commentre.search(line)
1445 m = _commentre.search(line)
1445 if m:
1446 if m:
1446 line = line[:m.end(1)]
1447 line = line[:m.end(1)]
1447 # fixup properly escaped comments that survived the above
1448 # fixup properly escaped comments that survived the above
1448 line = line.replace("\\#", "#")
1449 line = line.replace("\\#", "#")
1449 line = line.rstrip()
1450 line = line.rstrip()
1450 if not line:
1451 if not line:
1451 continue
1452 continue
1452
1453
1453 if line.startswith('syntax:'):
1454 if line.startswith('syntax:'):
1454 s = line[7:].strip()
1455 s = line[7:].strip()
1455 try:
1456 try:
1456 syntax = syntaxes[s]
1457 syntax = syntaxes[s]
1457 except KeyError:
1458 except KeyError:
1458 if warn:
1459 if warn:
1459 warn(_("%s: ignoring invalid syntax '%s'\n") %
1460 warn(_("%s: ignoring invalid syntax '%s'\n") %
1460 (filepath, s))
1461 (filepath, s))
1461 continue
1462 continue
1462
1463
1463 linesyntax = syntax
1464 linesyntax = syntax
1464 for s, rels in syntaxes.iteritems():
1465 for s, rels in syntaxes.iteritems():
1465 if line.startswith(rels):
1466 if line.startswith(rels):
1466 linesyntax = rels
1467 linesyntax = rels
1467 line = line[len(rels):]
1468 line = line[len(rels):]
1468 break
1469 break
1469 elif line.startswith(s+':'):
1470 elif line.startswith(s+':'):
1470 linesyntax = rels
1471 linesyntax = rels
1471 line = line[len(s) + 1:]
1472 line = line[len(s) + 1:]
1472 break
1473 break
1473 if sourceinfo:
1474 if sourceinfo:
1474 patterns.append((linesyntax + line, lineno, line))
1475 patterns.append((linesyntax + line, lineno, line))
1475 else:
1476 else:
1476 patterns.append(linesyntax + line)
1477 patterns.append(linesyntax + line)
1477 fp.close()
1478 fp.close()
1478 return patterns
1479 return patterns
General Comments 0
You need to be logged in to leave comments. Login now