##// END OF EJS Templates
match: make subinclude construction lazy...
Durham Goode -
r32132:6dea1701 default
parent child Browse files
Show More
@@ -1,788 +1,795 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import copy
10 import copy
11 import os
11 import os
12 import re
12 import re
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 pathutil,
17 pathutil,
18 util,
18 util,
19 )
19 )
20
20
21 propertycache = util.propertycache
21 propertycache = util.propertycache
22
22
23 def _rematcher(regex):
23 def _rematcher(regex):
24 '''compile the regexp with the best available regexp engine and return a
24 '''compile the regexp with the best available regexp engine and return a
25 matcher function'''
25 matcher function'''
26 m = util.re.compile(regex)
26 m = util.re.compile(regex)
27 try:
27 try:
28 # slightly faster, provided by facebook's re2 bindings
28 # slightly faster, provided by facebook's re2 bindings
29 return m.test_match
29 return m.test_match
30 except AttributeError:
30 except AttributeError:
31 return m.match
31 return m.match
32
32
33 def _expandsets(kindpats, ctx, listsubrepos):
33 def _expandsets(kindpats, ctx, listsubrepos):
34 '''Returns the kindpats list with the 'set' patterns expanded.'''
34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 fset = set()
35 fset = set()
36 other = []
36 other = []
37
37
38 for kind, pat, source in kindpats:
38 for kind, pat, source in kindpats:
39 if kind == 'set':
39 if kind == 'set':
40 if not ctx:
40 if not ctx:
41 raise error.Abort(_("fileset expression with no context"))
41 raise error.Abort(_("fileset expression with no context"))
42 s = ctx.getfileset(pat)
42 s = ctx.getfileset(pat)
43 fset.update(s)
43 fset.update(s)
44
44
45 if listsubrepos:
45 if listsubrepos:
46 for subpath in ctx.substate:
46 for subpath in ctx.substate:
47 s = ctx.sub(subpath).getfileset(pat)
47 s = ctx.sub(subpath).getfileset(pat)
48 fset.update(subpath + '/' + f for f in s)
48 fset.update(subpath + '/' + f for f in s)
49
49
50 continue
50 continue
51 other.append((kind, pat, source))
51 other.append((kind, pat, source))
52 return fset, other
52 return fset, other
53
53
54 def _expandsubinclude(kindpats, root):
54 def _expandsubinclude(kindpats, root):
55 '''Returns the list of subinclude matchers and the kindpats without the
55 '''Returns the list of subinclude matcher args and the kindpats without the
56 subincludes in it.'''
56 subincludes in it.'''
57 relmatchers = []
57 relmatchers = []
58 other = []
58 other = []
59
59
60 for kind, pat, source in kindpats:
60 for kind, pat, source in kindpats:
61 if kind == 'subinclude':
61 if kind == 'subinclude':
62 sourceroot = pathutil.dirname(util.normpath(source))
62 sourceroot = pathutil.dirname(util.normpath(source))
63 pat = util.pconvert(pat)
63 pat = util.pconvert(pat)
64 path = pathutil.join(sourceroot, pat)
64 path = pathutil.join(sourceroot, pat)
65
65
66 newroot = pathutil.dirname(path)
66 newroot = pathutil.dirname(path)
67 relmatcher = match(newroot, '', [], ['include:%s' % path])
67 matcherargs = (newroot, '', [], ['include:%s' % path])
68
68
69 prefix = pathutil.canonpath(root, root, newroot)
69 prefix = pathutil.canonpath(root, root, newroot)
70 if prefix:
70 if prefix:
71 prefix += '/'
71 prefix += '/'
72 relmatchers.append((prefix, relmatcher))
72 relmatchers.append((prefix, matcherargs))
73 else:
73 else:
74 other.append((kind, pat, source))
74 other.append((kind, pat, source))
75
75
76 return relmatchers, other
76 return relmatchers, other
77
77
78 def _kindpatsalwaysmatch(kindpats):
78 def _kindpatsalwaysmatch(kindpats):
79 """"Checks whether the kindspats match everything, as e.g.
79 """"Checks whether the kindspats match everything, as e.g.
80 'relpath:.' does.
80 'relpath:.' does.
81 """
81 """
82 for kind, pat, source in kindpats:
82 for kind, pat, source in kindpats:
83 if pat != '' or kind not in ['relpath', 'glob']:
83 if pat != '' or kind not in ['relpath', 'glob']:
84 return False
84 return False
85 return True
85 return True
86
86
87 class match(object):
87 class match(object):
88 def __init__(self, root, cwd, patterns, include=None, exclude=None,
88 def __init__(self, root, cwd, patterns, include=None, exclude=None,
89 default='glob', exact=False, auditor=None, ctx=None,
89 default='glob', exact=False, auditor=None, ctx=None,
90 listsubrepos=False, warn=None, badfn=None):
90 listsubrepos=False, warn=None, badfn=None):
91 """build an object to match a set of file patterns
91 """build an object to match a set of file patterns
92
92
93 arguments:
93 arguments:
94 root - the canonical root of the tree you're matching against
94 root - the canonical root of the tree you're matching against
95 cwd - the current working directory, if relevant
95 cwd - the current working directory, if relevant
96 patterns - patterns to find
96 patterns - patterns to find
97 include - patterns to include (unless they are excluded)
97 include - patterns to include (unless they are excluded)
98 exclude - patterns to exclude (even if they are included)
98 exclude - patterns to exclude (even if they are included)
99 default - if a pattern in patterns has no explicit type, assume this one
99 default - if a pattern in patterns has no explicit type, assume this one
100 exact - patterns are actually filenames (include/exclude still apply)
100 exact - patterns are actually filenames (include/exclude still apply)
101 warn - optional function used for printing warnings
101 warn - optional function used for printing warnings
102 badfn - optional bad() callback for this matcher instead of the default
102 badfn - optional bad() callback for this matcher instead of the default
103
103
104 a pattern is one of:
104 a pattern is one of:
105 'glob:<glob>' - a glob relative to cwd
105 'glob:<glob>' - a glob relative to cwd
106 're:<regexp>' - a regular expression
106 're:<regexp>' - a regular expression
107 'path:<path>' - a path relative to repository root, which is matched
107 'path:<path>' - a path relative to repository root, which is matched
108 recursively
108 recursively
109 'rootfilesin:<path>' - a path relative to repository root, which is
109 'rootfilesin:<path>' - a path relative to repository root, which is
110 matched non-recursively (will not match subdirectories)
110 matched non-recursively (will not match subdirectories)
111 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
111 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
112 'relpath:<path>' - a path relative to cwd
112 'relpath:<path>' - a path relative to cwd
113 'relre:<regexp>' - a regexp that needn't match the start of a name
113 'relre:<regexp>' - a regexp that needn't match the start of a name
114 'set:<fileset>' - a fileset expression
114 'set:<fileset>' - a fileset expression
115 'include:<path>' - a file of patterns to read and include
115 'include:<path>' - a file of patterns to read and include
116 'subinclude:<path>' - a file of patterns to match against files under
116 'subinclude:<path>' - a file of patterns to match against files under
117 the same directory
117 the same directory
118 '<something>' - a pattern of the specified default type
118 '<something>' - a pattern of the specified default type
119 """
119 """
120 if include is None:
120 if include is None:
121 include = []
121 include = []
122 if exclude is None:
122 if exclude is None:
123 exclude = []
123 exclude = []
124
124
125 self._root = root
125 self._root = root
126 self._cwd = cwd
126 self._cwd = cwd
127 self._files = [] # exact files and roots of patterns
127 self._files = [] # exact files and roots of patterns
128 self._anypats = bool(include or exclude)
128 self._anypats = bool(include or exclude)
129 self._always = False
129 self._always = False
130 self._pathrestricted = bool(include or exclude or patterns)
130 self._pathrestricted = bool(include or exclude or patterns)
131 self._warn = warn
131 self._warn = warn
132
132
133 # roots are directories which are recursively included/excluded.
133 # roots are directories which are recursively included/excluded.
134 self._includeroots = set()
134 self._includeroots = set()
135 self._excluderoots = set()
135 self._excluderoots = set()
136 # dirs are directories which are non-recursively included.
136 # dirs are directories which are non-recursively included.
137 self._includedirs = set(['.'])
137 self._includedirs = set(['.'])
138
138
139 if badfn is not None:
139 if badfn is not None:
140 self.bad = badfn
140 self.bad = badfn
141
141
142 matchfns = []
142 matchfns = []
143 if include:
143 if include:
144 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
144 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
145 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
145 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
146 listsubrepos, root)
146 listsubrepos, root)
147 roots, dirs = _rootsanddirs(kindpats)
147 roots, dirs = _rootsanddirs(kindpats)
148 self._includeroots.update(roots)
148 self._includeroots.update(roots)
149 self._includedirs.update(dirs)
149 self._includedirs.update(dirs)
150 matchfns.append(im)
150 matchfns.append(im)
151 if exclude:
151 if exclude:
152 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
152 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
153 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
153 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
154 listsubrepos, root)
154 listsubrepos, root)
155 if not _anypats(kindpats):
155 if not _anypats(kindpats):
156 # Only consider recursive excludes as such - if a non-recursive
156 # Only consider recursive excludes as such - if a non-recursive
157 # exclude is used, we must still recurse into the excluded
157 # exclude is used, we must still recurse into the excluded
158 # directory, at least to find subdirectories. In such a case,
158 # directory, at least to find subdirectories. In such a case,
159 # the regex still won't match the non-recursively-excluded
159 # the regex still won't match the non-recursively-excluded
160 # files.
160 # files.
161 self._excluderoots.update(_roots(kindpats))
161 self._excluderoots.update(_roots(kindpats))
162 matchfns.append(lambda f: not em(f))
162 matchfns.append(lambda f: not em(f))
163 if exact:
163 if exact:
164 if isinstance(patterns, list):
164 if isinstance(patterns, list):
165 self._files = patterns
165 self._files = patterns
166 else:
166 else:
167 self._files = list(patterns)
167 self._files = list(patterns)
168 matchfns.append(self.exact)
168 matchfns.append(self.exact)
169 elif patterns:
169 elif patterns:
170 kindpats = self._normalize(patterns, default, root, cwd, auditor)
170 kindpats = self._normalize(patterns, default, root, cwd, auditor)
171 if not _kindpatsalwaysmatch(kindpats):
171 if not _kindpatsalwaysmatch(kindpats):
172 self._files = _explicitfiles(kindpats)
172 self._files = _explicitfiles(kindpats)
173 self._anypats = self._anypats or _anypats(kindpats)
173 self._anypats = self._anypats or _anypats(kindpats)
174 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
174 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
175 listsubrepos, root)
175 listsubrepos, root)
176 matchfns.append(pm)
176 matchfns.append(pm)
177
177
178 if not matchfns:
178 if not matchfns:
179 m = util.always
179 m = util.always
180 self._always = True
180 self._always = True
181 elif len(matchfns) == 1:
181 elif len(matchfns) == 1:
182 m = matchfns[0]
182 m = matchfns[0]
183 else:
183 else:
184 def m(f):
184 def m(f):
185 for matchfn in matchfns:
185 for matchfn in matchfns:
186 if not matchfn(f):
186 if not matchfn(f):
187 return False
187 return False
188 return True
188 return True
189
189
190 self.matchfn = m
190 self.matchfn = m
191 self._fileroots = set(self._files)
191 self._fileroots = set(self._files)
192
192
193 def __call__(self, fn):
193 def __call__(self, fn):
194 return self.matchfn(fn)
194 return self.matchfn(fn)
195 def __iter__(self):
195 def __iter__(self):
196 for f in self._files:
196 for f in self._files:
197 yield f
197 yield f
198
198
199 # Callbacks related to how the matcher is used by dirstate.walk.
199 # Callbacks related to how the matcher is used by dirstate.walk.
200 # Subscribers to these events must monkeypatch the matcher object.
200 # Subscribers to these events must monkeypatch the matcher object.
201 def bad(self, f, msg):
201 def bad(self, f, msg):
202 '''Callback from dirstate.walk for each explicit file that can't be
202 '''Callback from dirstate.walk for each explicit file that can't be
203 found/accessed, with an error message.'''
203 found/accessed, with an error message.'''
204 pass
204 pass
205
205
206 # If an explicitdir is set, it will be called when an explicitly listed
206 # If an explicitdir is set, it will be called when an explicitly listed
207 # directory is visited.
207 # directory is visited.
208 explicitdir = None
208 explicitdir = None
209
209
210 # If an traversedir is set, it will be called when a directory discovered
210 # If an traversedir is set, it will be called when a directory discovered
211 # by recursive traversal is visited.
211 # by recursive traversal is visited.
212 traversedir = None
212 traversedir = None
213
213
214 def abs(self, f):
214 def abs(self, f):
215 '''Convert a repo path back to path that is relative to the root of the
215 '''Convert a repo path back to path that is relative to the root of the
216 matcher.'''
216 matcher.'''
217 return f
217 return f
218
218
219 def rel(self, f):
219 def rel(self, f):
220 '''Convert repo path back to path that is relative to cwd of matcher.'''
220 '''Convert repo path back to path that is relative to cwd of matcher.'''
221 return util.pathto(self._root, self._cwd, f)
221 return util.pathto(self._root, self._cwd, f)
222
222
223 def uipath(self, f):
223 def uipath(self, f):
224 '''Convert repo path to a display path. If patterns or -I/-X were used
224 '''Convert repo path to a display path. If patterns or -I/-X were used
225 to create this matcher, the display path will be relative to cwd.
225 to create this matcher, the display path will be relative to cwd.
226 Otherwise it is relative to the root of the repo.'''
226 Otherwise it is relative to the root of the repo.'''
227 return (self._pathrestricted and self.rel(f)) or self.abs(f)
227 return (self._pathrestricted and self.rel(f)) or self.abs(f)
228
228
229 def files(self):
229 def files(self):
230 '''Explicitly listed files or patterns or roots:
230 '''Explicitly listed files or patterns or roots:
231 if no patterns or .always(): empty list,
231 if no patterns or .always(): empty list,
232 if exact: list exact files,
232 if exact: list exact files,
233 if not .anypats(): list all files and dirs,
233 if not .anypats(): list all files and dirs,
234 else: optimal roots'''
234 else: optimal roots'''
235 return self._files
235 return self._files
236
236
237 @propertycache
237 @propertycache
238 def _dirs(self):
238 def _dirs(self):
239 return set(util.dirs(self._fileroots)) | set(['.'])
239 return set(util.dirs(self._fileroots)) | set(['.'])
240
240
241 def visitdir(self, dir):
241 def visitdir(self, dir):
242 '''Decides whether a directory should be visited based on whether it
242 '''Decides whether a directory should be visited based on whether it
243 has potential matches in it or one of its subdirectories. This is
243 has potential matches in it or one of its subdirectories. This is
244 based on the match's primary, included, and excluded patterns.
244 based on the match's primary, included, and excluded patterns.
245
245
246 Returns the string 'all' if the given directory and all subdirectories
246 Returns the string 'all' if the given directory and all subdirectories
247 should be visited. Otherwise returns True or False indicating whether
247 should be visited. Otherwise returns True or False indicating whether
248 the given directory should be visited.
248 the given directory should be visited.
249
249
250 This function's behavior is undefined if it has returned False for
250 This function's behavior is undefined if it has returned False for
251 one of the dir's parent directories.
251 one of the dir's parent directories.
252 '''
252 '''
253 if self.prefix() and dir in self._fileroots:
253 if self.prefix() and dir in self._fileroots:
254 return 'all'
254 return 'all'
255 if dir in self._excluderoots:
255 if dir in self._excluderoots:
256 return False
256 return False
257 if ((self._includeroots or self._includedirs != set(['.'])) and
257 if ((self._includeroots or self._includedirs != set(['.'])) and
258 '.' not in self._includeroots and
258 '.' not in self._includeroots and
259 dir not in self._includeroots and
259 dir not in self._includeroots and
260 dir not in self._includedirs and
260 dir not in self._includedirs and
261 not any(parent in self._includeroots
261 not any(parent in self._includeroots
262 for parent in util.finddirs(dir))):
262 for parent in util.finddirs(dir))):
263 return False
263 return False
264 return (not self._fileroots or
264 return (not self._fileroots or
265 '.' in self._fileroots or
265 '.' in self._fileroots or
266 dir in self._fileroots or
266 dir in self._fileroots or
267 dir in self._dirs or
267 dir in self._dirs or
268 any(parentdir in self._fileroots
268 any(parentdir in self._fileroots
269 for parentdir in util.finddirs(dir)))
269 for parentdir in util.finddirs(dir)))
270
270
271 def exact(self, f):
271 def exact(self, f):
272 '''Returns True if f is in .files().'''
272 '''Returns True if f is in .files().'''
273 return f in self._fileroots
273 return f in self._fileroots
274
274
275 def anypats(self):
275 def anypats(self):
276 '''Matcher uses patterns or include/exclude.'''
276 '''Matcher uses patterns or include/exclude.'''
277 return self._anypats
277 return self._anypats
278
278
279 def always(self):
279 def always(self):
280 '''Matcher will match everything and .files() will be empty
280 '''Matcher will match everything and .files() will be empty
281 - optimization might be possible and necessary.'''
281 - optimization might be possible and necessary.'''
282 return self._always
282 return self._always
283
283
284 def ispartial(self):
284 def ispartial(self):
285 '''True if the matcher won't always match.
285 '''True if the matcher won't always match.
286
286
287 Although it's just the inverse of _always in this implementation,
287 Although it's just the inverse of _always in this implementation,
288 an extension such as narrowhg might make it return something
288 an extension such as narrowhg might make it return something
289 slightly different.'''
289 slightly different.'''
290 return not self._always
290 return not self._always
291
291
292 def isexact(self):
292 def isexact(self):
293 return self.matchfn == self.exact
293 return self.matchfn == self.exact
294
294
295 def prefix(self):
295 def prefix(self):
296 return not self.always() and not self.isexact() and not self.anypats()
296 return not self.always() and not self.isexact() and not self.anypats()
297
297
298 def _normalize(self, patterns, default, root, cwd, auditor):
298 def _normalize(self, patterns, default, root, cwd, auditor):
299 '''Convert 'kind:pat' from the patterns list to tuples with kind and
299 '''Convert 'kind:pat' from the patterns list to tuples with kind and
300 normalized and rooted patterns and with listfiles expanded.'''
300 normalized and rooted patterns and with listfiles expanded.'''
301 kindpats = []
301 kindpats = []
302 for kind, pat in [_patsplit(p, default) for p in patterns]:
302 for kind, pat in [_patsplit(p, default) for p in patterns]:
303 if kind in ('glob', 'relpath'):
303 if kind in ('glob', 'relpath'):
304 pat = pathutil.canonpath(root, cwd, pat, auditor)
304 pat = pathutil.canonpath(root, cwd, pat, auditor)
305 elif kind in ('relglob', 'path', 'rootfilesin'):
305 elif kind in ('relglob', 'path', 'rootfilesin'):
306 pat = util.normpath(pat)
306 pat = util.normpath(pat)
307 elif kind in ('listfile', 'listfile0'):
307 elif kind in ('listfile', 'listfile0'):
308 try:
308 try:
309 files = util.readfile(pat)
309 files = util.readfile(pat)
310 if kind == 'listfile0':
310 if kind == 'listfile0':
311 files = files.split('\0')
311 files = files.split('\0')
312 else:
312 else:
313 files = files.splitlines()
313 files = files.splitlines()
314 files = [f for f in files if f]
314 files = [f for f in files if f]
315 except EnvironmentError:
315 except EnvironmentError:
316 raise error.Abort(_("unable to read file list (%s)") % pat)
316 raise error.Abort(_("unable to read file list (%s)") % pat)
317 for k, p, source in self._normalize(files, default, root, cwd,
317 for k, p, source in self._normalize(files, default, root, cwd,
318 auditor):
318 auditor):
319 kindpats.append((k, p, pat))
319 kindpats.append((k, p, pat))
320 continue
320 continue
321 elif kind == 'include':
321 elif kind == 'include':
322 try:
322 try:
323 fullpath = os.path.join(root, util.localpath(pat))
323 fullpath = os.path.join(root, util.localpath(pat))
324 includepats = readpatternfile(fullpath, self._warn)
324 includepats = readpatternfile(fullpath, self._warn)
325 for k, p, source in self._normalize(includepats, default,
325 for k, p, source in self._normalize(includepats, default,
326 root, cwd, auditor):
326 root, cwd, auditor):
327 kindpats.append((k, p, source or pat))
327 kindpats.append((k, p, source or pat))
328 except error.Abort as inst:
328 except error.Abort as inst:
329 raise error.Abort('%s: %s' % (pat, inst[0]))
329 raise error.Abort('%s: %s' % (pat, inst[0]))
330 except IOError as inst:
330 except IOError as inst:
331 if self._warn:
331 if self._warn:
332 self._warn(_("skipping unreadable pattern file "
332 self._warn(_("skipping unreadable pattern file "
333 "'%s': %s\n") % (pat, inst.strerror))
333 "'%s': %s\n") % (pat, inst.strerror))
334 continue
334 continue
335 # else: re or relre - which cannot be normalized
335 # else: re or relre - which cannot be normalized
336 kindpats.append((kind, pat, ''))
336 kindpats.append((kind, pat, ''))
337 return kindpats
337 return kindpats
338
338
339 def exact(root, cwd, files, badfn=None):
339 def exact(root, cwd, files, badfn=None):
340 return match(root, cwd, files, exact=True, badfn=badfn)
340 return match(root, cwd, files, exact=True, badfn=badfn)
341
341
342 def always(root, cwd):
342 def always(root, cwd):
343 return match(root, cwd, [])
343 return match(root, cwd, [])
344
344
345 def badmatch(match, badfn):
345 def badmatch(match, badfn):
346 """Make a copy of the given matcher, replacing its bad method with the given
346 """Make a copy of the given matcher, replacing its bad method with the given
347 one.
347 one.
348 """
348 """
349 m = copy.copy(match)
349 m = copy.copy(match)
350 m.bad = badfn
350 m.bad = badfn
351 return m
351 return m
352
352
353 class subdirmatcher(match):
353 class subdirmatcher(match):
354 """Adapt a matcher to work on a subdirectory only.
354 """Adapt a matcher to work on a subdirectory only.
355
355
356 The paths are remapped to remove/insert the path as needed:
356 The paths are remapped to remove/insert the path as needed:
357
357
358 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
358 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
359 >>> m2 = subdirmatcher('sub', m1)
359 >>> m2 = subdirmatcher('sub', m1)
360 >>> bool(m2('a.txt'))
360 >>> bool(m2('a.txt'))
361 False
361 False
362 >>> bool(m2('b.txt'))
362 >>> bool(m2('b.txt'))
363 True
363 True
364 >>> bool(m2.matchfn('a.txt'))
364 >>> bool(m2.matchfn('a.txt'))
365 False
365 False
366 >>> bool(m2.matchfn('b.txt'))
366 >>> bool(m2.matchfn('b.txt'))
367 True
367 True
368 >>> m2.files()
368 >>> m2.files()
369 ['b.txt']
369 ['b.txt']
370 >>> m2.exact('b.txt')
370 >>> m2.exact('b.txt')
371 True
371 True
372 >>> util.pconvert(m2.rel('b.txt'))
372 >>> util.pconvert(m2.rel('b.txt'))
373 'sub/b.txt'
373 'sub/b.txt'
374 >>> def bad(f, msg):
374 >>> def bad(f, msg):
375 ... print "%s: %s" % (f, msg)
375 ... print "%s: %s" % (f, msg)
376 >>> m1.bad = bad
376 >>> m1.bad = bad
377 >>> m2.bad('x.txt', 'No such file')
377 >>> m2.bad('x.txt', 'No such file')
378 sub/x.txt: No such file
378 sub/x.txt: No such file
379 >>> m2.abs('c.txt')
379 >>> m2.abs('c.txt')
380 'sub/c.txt'
380 'sub/c.txt'
381 """
381 """
382
382
383 def __init__(self, path, matcher):
383 def __init__(self, path, matcher):
384 self._root = matcher._root
384 self._root = matcher._root
385 self._cwd = matcher._cwd
385 self._cwd = matcher._cwd
386 self._path = path
386 self._path = path
387 self._matcher = matcher
387 self._matcher = matcher
388 self._always = matcher._always
388 self._always = matcher._always
389 self._pathrestricted = matcher._pathrestricted
389 self._pathrestricted = matcher._pathrestricted
390
390
391 self._files = [f[len(path) + 1:] for f in matcher._files
391 self._files = [f[len(path) + 1:] for f in matcher._files
392 if f.startswith(path + "/")]
392 if f.startswith(path + "/")]
393
393
394 # If the parent repo had a path to this subrepo and no patterns are
394 # If the parent repo had a path to this subrepo and no patterns are
395 # specified, this submatcher always matches.
395 # specified, this submatcher always matches.
396 if not self._always and not matcher._anypats:
396 if not self._always and not matcher._anypats:
397 self._always = any(f == path for f in matcher._files)
397 self._always = any(f == path for f in matcher._files)
398
398
399 self._anypats = matcher._anypats
399 self._anypats = matcher._anypats
400 # Some information is lost in the superclass's constructor, so we
400 # Some information is lost in the superclass's constructor, so we
401 # can not accurately create the matching function for the subdirectory
401 # can not accurately create the matching function for the subdirectory
402 # from the inputs. Instead, we override matchfn() and visitdir() to
402 # from the inputs. Instead, we override matchfn() and visitdir() to
403 # call the original matcher with the subdirectory path prepended.
403 # call the original matcher with the subdirectory path prepended.
404 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
404 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
405 def visitdir(dir):
405 def visitdir(dir):
406 if dir == '.':
406 if dir == '.':
407 return matcher.visitdir(self._path)
407 return matcher.visitdir(self._path)
408 return matcher.visitdir(self._path + "/" + dir)
408 return matcher.visitdir(self._path + "/" + dir)
409 self.visitdir = visitdir
409 self.visitdir = visitdir
410 self._fileroots = set(self._files)
410 self._fileroots = set(self._files)
411
411
412 def abs(self, f):
412 def abs(self, f):
413 return self._matcher.abs(self._path + "/" + f)
413 return self._matcher.abs(self._path + "/" + f)
414
414
415 def bad(self, f, msg):
415 def bad(self, f, msg):
416 self._matcher.bad(self._path + "/" + f, msg)
416 self._matcher.bad(self._path + "/" + f, msg)
417
417
418 def rel(self, f):
418 def rel(self, f):
419 return self._matcher.rel(self._path + "/" + f)
419 return self._matcher.rel(self._path + "/" + f)
420
420
421 class icasefsmatcher(match):
421 class icasefsmatcher(match):
422 """A matcher for wdir on case insensitive filesystems, which normalizes the
422 """A matcher for wdir on case insensitive filesystems, which normalizes the
423 given patterns to the case in the filesystem.
423 given patterns to the case in the filesystem.
424 """
424 """
425
425
426 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
426 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
427 ctx, listsubrepos=False, badfn=None):
427 ctx, listsubrepos=False, badfn=None):
428 init = super(icasefsmatcher, self).__init__
428 init = super(icasefsmatcher, self).__init__
429 self._dirstate = ctx.repo().dirstate
429 self._dirstate = ctx.repo().dirstate
430 self._dsnormalize = self._dirstate.normalize
430 self._dsnormalize = self._dirstate.normalize
431
431
432 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
432 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
433 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
433 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
434
434
435 # m.exact(file) must be based off of the actual user input, otherwise
435 # m.exact(file) must be based off of the actual user input, otherwise
436 # inexact case matches are treated as exact, and not noted without -v.
436 # inexact case matches are treated as exact, and not noted without -v.
437 if self._files:
437 if self._files:
438 roots, dirs = _rootsanddirs(self._kp)
438 roots, dirs = _rootsanddirs(self._kp)
439 self._fileroots = set(roots)
439 self._fileroots = set(roots)
440 self._fileroots.update(dirs)
440 self._fileroots.update(dirs)
441
441
442 def _normalize(self, patterns, default, root, cwd, auditor):
442 def _normalize(self, patterns, default, root, cwd, auditor):
443 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
443 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
444 root, cwd, auditor)
444 root, cwd, auditor)
445 kindpats = []
445 kindpats = []
446 for kind, pats, source in self._kp:
446 for kind, pats, source in self._kp:
447 if kind not in ('re', 'relre'): # regex can't be normalized
447 if kind not in ('re', 'relre'): # regex can't be normalized
448 p = pats
448 p = pats
449 pats = self._dsnormalize(pats)
449 pats = self._dsnormalize(pats)
450
450
451 # Preserve the original to handle a case only rename.
451 # Preserve the original to handle a case only rename.
452 if p != pats and p in self._dirstate:
452 if p != pats and p in self._dirstate:
453 kindpats.append((kind, p, source))
453 kindpats.append((kind, p, source))
454
454
455 kindpats.append((kind, pats, source))
455 kindpats.append((kind, pats, source))
456 return kindpats
456 return kindpats
457
457
458 def patkind(pattern, default=None):
458 def patkind(pattern, default=None):
459 '''If pattern is 'kind:pat' with a known kind, return kind.'''
459 '''If pattern is 'kind:pat' with a known kind, return kind.'''
460 return _patsplit(pattern, default)[0]
460 return _patsplit(pattern, default)[0]
461
461
462 def _patsplit(pattern, default):
462 def _patsplit(pattern, default):
463 """Split a string into the optional pattern kind prefix and the actual
463 """Split a string into the optional pattern kind prefix and the actual
464 pattern."""
464 pattern."""
465 if ':' in pattern:
465 if ':' in pattern:
466 kind, pat = pattern.split(':', 1)
466 kind, pat = pattern.split(':', 1)
467 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
467 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
468 'listfile', 'listfile0', 'set', 'include', 'subinclude',
468 'listfile', 'listfile0', 'set', 'include', 'subinclude',
469 'rootfilesin'):
469 'rootfilesin'):
470 return kind, pat
470 return kind, pat
471 return default, pattern
471 return default, pattern
472
472
473 def _globre(pat):
473 def _globre(pat):
474 r'''Convert an extended glob string to a regexp string.
474 r'''Convert an extended glob string to a regexp string.
475
475
476 >>> print _globre(r'?')
476 >>> print _globre(r'?')
477 .
477 .
478 >>> print _globre(r'*')
478 >>> print _globre(r'*')
479 [^/]*
479 [^/]*
480 >>> print _globre(r'**')
480 >>> print _globre(r'**')
481 .*
481 .*
482 >>> print _globre(r'**/a')
482 >>> print _globre(r'**/a')
483 (?:.*/)?a
483 (?:.*/)?a
484 >>> print _globre(r'a/**/b')
484 >>> print _globre(r'a/**/b')
485 a\/(?:.*/)?b
485 a\/(?:.*/)?b
486 >>> print _globre(r'[a*?!^][^b][!c]')
486 >>> print _globre(r'[a*?!^][^b][!c]')
487 [a*?!^][\^b][^c]
487 [a*?!^][\^b][^c]
488 >>> print _globre(r'{a,b}')
488 >>> print _globre(r'{a,b}')
489 (?:a|b)
489 (?:a|b)
490 >>> print _globre(r'.\*\?')
490 >>> print _globre(r'.\*\?')
491 \.\*\?
491 \.\*\?
492 '''
492 '''
493 i, n = 0, len(pat)
493 i, n = 0, len(pat)
494 res = ''
494 res = ''
495 group = 0
495 group = 0
496 escape = util.re.escape
496 escape = util.re.escape
497 def peek():
497 def peek():
498 return i < n and pat[i:i + 1]
498 return i < n and pat[i:i + 1]
499 while i < n:
499 while i < n:
500 c = pat[i:i + 1]
500 c = pat[i:i + 1]
501 i += 1
501 i += 1
502 if c not in '*?[{},\\':
502 if c not in '*?[{},\\':
503 res += escape(c)
503 res += escape(c)
504 elif c == '*':
504 elif c == '*':
505 if peek() == '*':
505 if peek() == '*':
506 i += 1
506 i += 1
507 if peek() == '/':
507 if peek() == '/':
508 i += 1
508 i += 1
509 res += '(?:.*/)?'
509 res += '(?:.*/)?'
510 else:
510 else:
511 res += '.*'
511 res += '.*'
512 else:
512 else:
513 res += '[^/]*'
513 res += '[^/]*'
514 elif c == '?':
514 elif c == '?':
515 res += '.'
515 res += '.'
516 elif c == '[':
516 elif c == '[':
517 j = i
517 j = i
518 if j < n and pat[j:j + 1] in '!]':
518 if j < n and pat[j:j + 1] in '!]':
519 j += 1
519 j += 1
520 while j < n and pat[j:j + 1] != ']':
520 while j < n and pat[j:j + 1] != ']':
521 j += 1
521 j += 1
522 if j >= n:
522 if j >= n:
523 res += '\\['
523 res += '\\['
524 else:
524 else:
525 stuff = pat[i:j].replace('\\','\\\\')
525 stuff = pat[i:j].replace('\\','\\\\')
526 i = j + 1
526 i = j + 1
527 if stuff[0:1] == '!':
527 if stuff[0:1] == '!':
528 stuff = '^' + stuff[1:]
528 stuff = '^' + stuff[1:]
529 elif stuff[0:1] == '^':
529 elif stuff[0:1] == '^':
530 stuff = '\\' + stuff
530 stuff = '\\' + stuff
531 res = '%s[%s]' % (res, stuff)
531 res = '%s[%s]' % (res, stuff)
532 elif c == '{':
532 elif c == '{':
533 group += 1
533 group += 1
534 res += '(?:'
534 res += '(?:'
535 elif c == '}' and group:
535 elif c == '}' and group:
536 res += ')'
536 res += ')'
537 group -= 1
537 group -= 1
538 elif c == ',' and group:
538 elif c == ',' and group:
539 res += '|'
539 res += '|'
540 elif c == '\\':
540 elif c == '\\':
541 p = peek()
541 p = peek()
542 if p:
542 if p:
543 i += 1
543 i += 1
544 res += escape(p)
544 res += escape(p)
545 else:
545 else:
546 res += escape(c)
546 res += escape(c)
547 else:
547 else:
548 res += escape(c)
548 res += escape(c)
549 return res
549 return res
550
550
551 def _regex(kind, pat, globsuffix):
551 def _regex(kind, pat, globsuffix):
552 '''Convert a (normalized) pattern of any kind into a regular expression.
552 '''Convert a (normalized) pattern of any kind into a regular expression.
553 globsuffix is appended to the regexp of globs.'''
553 globsuffix is appended to the regexp of globs.'''
554 if not pat:
554 if not pat:
555 return ''
555 return ''
556 if kind == 're':
556 if kind == 're':
557 return pat
557 return pat
558 if kind == 'path':
558 if kind == 'path':
559 if pat == '.':
559 if pat == '.':
560 return ''
560 return ''
561 return '^' + util.re.escape(pat) + '(?:/|$)'
561 return '^' + util.re.escape(pat) + '(?:/|$)'
562 if kind == 'rootfilesin':
562 if kind == 'rootfilesin':
563 if pat == '.':
563 if pat == '.':
564 escaped = ''
564 escaped = ''
565 else:
565 else:
566 # Pattern is a directory name.
566 # Pattern is a directory name.
567 escaped = util.re.escape(pat) + '/'
567 escaped = util.re.escape(pat) + '/'
568 # Anything after the pattern must be a non-directory.
568 # Anything after the pattern must be a non-directory.
569 return '^' + escaped + '[^/]+$'
569 return '^' + escaped + '[^/]+$'
570 if kind == 'relglob':
570 if kind == 'relglob':
571 return '(?:|.*/)' + _globre(pat) + globsuffix
571 return '(?:|.*/)' + _globre(pat) + globsuffix
572 if kind == 'relpath':
572 if kind == 'relpath':
573 return util.re.escape(pat) + '(?:/|$)'
573 return util.re.escape(pat) + '(?:/|$)'
574 if kind == 'relre':
574 if kind == 'relre':
575 if pat.startswith('^'):
575 if pat.startswith('^'):
576 return pat
576 return pat
577 return '.*' + pat
577 return '.*' + pat
578 return _globre(pat) + globsuffix
578 return _globre(pat) + globsuffix
579
579
580 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
580 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
581 '''Return regexp string and a matcher function for kindpats.
581 '''Return regexp string and a matcher function for kindpats.
582 globsuffix is appended to the regexp of globs.'''
582 globsuffix is appended to the regexp of globs.'''
583 matchfuncs = []
583 matchfuncs = []
584
584
585 subincludes, kindpats = _expandsubinclude(kindpats, root)
585 subincludes, kindpats = _expandsubinclude(kindpats, root)
586 if subincludes:
586 if subincludes:
587 submatchers = {}
587 def matchsubinclude(f):
588 def matchsubinclude(f):
588 for prefix, mf in subincludes:
589 for prefix, matcherargs in subincludes:
589 if f.startswith(prefix) and mf(f[len(prefix):]):
590 if f.startswith(prefix):
591 mf = submatchers.get(prefix)
592 if mf is None:
593 mf = match(*matcherargs)
594 submatchers[prefix] = mf
595
596 if mf(f[len(prefix):]):
590 return True
597 return True
591 return False
598 return False
592 matchfuncs.append(matchsubinclude)
599 matchfuncs.append(matchsubinclude)
593
600
594 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
601 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
595 if fset:
602 if fset:
596 matchfuncs.append(fset.__contains__)
603 matchfuncs.append(fset.__contains__)
597
604
598 regex = ''
605 regex = ''
599 if kindpats:
606 if kindpats:
600 regex, mf = _buildregexmatch(kindpats, globsuffix)
607 regex, mf = _buildregexmatch(kindpats, globsuffix)
601 matchfuncs.append(mf)
608 matchfuncs.append(mf)
602
609
603 if len(matchfuncs) == 1:
610 if len(matchfuncs) == 1:
604 return regex, matchfuncs[0]
611 return regex, matchfuncs[0]
605 else:
612 else:
606 return regex, lambda f: any(mf(f) for mf in matchfuncs)
613 return regex, lambda f: any(mf(f) for mf in matchfuncs)
607
614
608 def _buildregexmatch(kindpats, globsuffix):
615 def _buildregexmatch(kindpats, globsuffix):
609 """Build a match function from a list of kinds and kindpats,
616 """Build a match function from a list of kinds and kindpats,
610 return regexp string and a matcher function."""
617 return regexp string and a matcher function."""
611 try:
618 try:
612 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
619 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
613 for (k, p, s) in kindpats])
620 for (k, p, s) in kindpats])
614 if len(regex) > 20000:
621 if len(regex) > 20000:
615 raise OverflowError
622 raise OverflowError
616 return regex, _rematcher(regex)
623 return regex, _rematcher(regex)
617 except OverflowError:
624 except OverflowError:
618 # We're using a Python with a tiny regex engine and we
625 # We're using a Python with a tiny regex engine and we
619 # made it explode, so we'll divide the pattern list in two
626 # made it explode, so we'll divide the pattern list in two
620 # until it works
627 # until it works
621 l = len(kindpats)
628 l = len(kindpats)
622 if l < 2:
629 if l < 2:
623 raise
630 raise
624 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
631 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
625 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
632 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
626 return regex, lambda s: a(s) or b(s)
633 return regex, lambda s: a(s) or b(s)
627 except re.error:
634 except re.error:
628 for k, p, s in kindpats:
635 for k, p, s in kindpats:
629 try:
636 try:
630 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
637 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
631 except re.error:
638 except re.error:
632 if s:
639 if s:
633 raise error.Abort(_("%s: invalid pattern (%s): %s") %
640 raise error.Abort(_("%s: invalid pattern (%s): %s") %
634 (s, k, p))
641 (s, k, p))
635 else:
642 else:
636 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
643 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
637 raise error.Abort(_("invalid pattern"))
644 raise error.Abort(_("invalid pattern"))
638
645
639 def _patternrootsanddirs(kindpats):
646 def _patternrootsanddirs(kindpats):
640 '''Returns roots and directories corresponding to each pattern.
647 '''Returns roots and directories corresponding to each pattern.
641
648
642 This calculates the roots and directories exactly matching the patterns and
649 This calculates the roots and directories exactly matching the patterns and
643 returns a tuple of (roots, dirs) for each. It does not return other
650 returns a tuple of (roots, dirs) for each. It does not return other
644 directories which may also need to be considered, like the parent
651 directories which may also need to be considered, like the parent
645 directories.
652 directories.
646 '''
653 '''
647 r = []
654 r = []
648 d = []
655 d = []
649 for kind, pat, source in kindpats:
656 for kind, pat, source in kindpats:
650 if kind == 'glob': # find the non-glob prefix
657 if kind == 'glob': # find the non-glob prefix
651 root = []
658 root = []
652 for p in pat.split('/'):
659 for p in pat.split('/'):
653 if '[' in p or '{' in p or '*' in p or '?' in p:
660 if '[' in p or '{' in p or '*' in p or '?' in p:
654 break
661 break
655 root.append(p)
662 root.append(p)
656 r.append('/'.join(root) or '.')
663 r.append('/'.join(root) or '.')
657 elif kind in ('relpath', 'path'):
664 elif kind in ('relpath', 'path'):
658 r.append(pat or '.')
665 r.append(pat or '.')
659 elif kind in ('rootfilesin',):
666 elif kind in ('rootfilesin',):
660 d.append(pat or '.')
667 d.append(pat or '.')
661 else: # relglob, re, relre
668 else: # relglob, re, relre
662 r.append('.')
669 r.append('.')
663 return r, d
670 return r, d
664
671
665 def _roots(kindpats):
672 def _roots(kindpats):
666 '''Returns root directories to match recursively from the given patterns.'''
673 '''Returns root directories to match recursively from the given patterns.'''
667 roots, dirs = _patternrootsanddirs(kindpats)
674 roots, dirs = _patternrootsanddirs(kindpats)
668 return roots
675 return roots
669
676
670 def _rootsanddirs(kindpats):
677 def _rootsanddirs(kindpats):
671 '''Returns roots and exact directories from patterns.
678 '''Returns roots and exact directories from patterns.
672
679
673 roots are directories to match recursively, whereas exact directories should
680 roots are directories to match recursively, whereas exact directories should
674 be matched non-recursively. The returned (roots, dirs) tuple will also
681 be matched non-recursively. The returned (roots, dirs) tuple will also
675 include directories that need to be implicitly considered as either, such as
682 include directories that need to be implicitly considered as either, such as
676 parent directories.
683 parent directories.
677
684
678 >>> _rootsanddirs(\
685 >>> _rootsanddirs(\
679 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
686 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
680 (['g/h', 'g/h', '.'], ['g'])
687 (['g/h', 'g/h', '.'], ['g'])
681 >>> _rootsanddirs(\
688 >>> _rootsanddirs(\
682 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
689 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
683 ([], ['g/h', '.', 'g'])
690 ([], ['g/h', '.', 'g'])
684 >>> _rootsanddirs(\
691 >>> _rootsanddirs(\
685 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
692 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
686 (['r', 'p/p', '.'], ['p'])
693 (['r', 'p/p', '.'], ['p'])
687 >>> _rootsanddirs(\
694 >>> _rootsanddirs(\
688 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
695 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
689 (['.', '.', '.'], [])
696 (['.', '.', '.'], [])
690 '''
697 '''
691 r, d = _patternrootsanddirs(kindpats)
698 r, d = _patternrootsanddirs(kindpats)
692
699
693 # Append the parents as non-recursive/exact directories, since they must be
700 # Append the parents as non-recursive/exact directories, since they must be
694 # scanned to get to either the roots or the other exact directories.
701 # scanned to get to either the roots or the other exact directories.
695 d.extend(util.dirs(d))
702 d.extend(util.dirs(d))
696 d.extend(util.dirs(r))
703 d.extend(util.dirs(r))
697
704
698 return r, d
705 return r, d
699
706
700 def _explicitfiles(kindpats):
707 def _explicitfiles(kindpats):
701 '''Returns the potential explicit filenames from the patterns.
708 '''Returns the potential explicit filenames from the patterns.
702
709
703 >>> _explicitfiles([('path', 'foo/bar', '')])
710 >>> _explicitfiles([('path', 'foo/bar', '')])
704 ['foo/bar']
711 ['foo/bar']
705 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
712 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
706 []
713 []
707 '''
714 '''
708 # Keep only the pattern kinds where one can specify filenames (vs only
715 # Keep only the pattern kinds where one can specify filenames (vs only
709 # directory names).
716 # directory names).
710 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
717 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
711 return _roots(filable)
718 return _roots(filable)
712
719
713 def _anypats(kindpats):
720 def _anypats(kindpats):
714 for kind, pat, source in kindpats:
721 for kind, pat, source in kindpats:
715 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
722 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
716 return True
723 return True
717
724
718 _commentre = None
725 _commentre = None
719
726
720 def readpatternfile(filepath, warn, sourceinfo=False):
727 def readpatternfile(filepath, warn, sourceinfo=False):
721 '''parse a pattern file, returning a list of
728 '''parse a pattern file, returning a list of
722 patterns. These patterns should be given to compile()
729 patterns. These patterns should be given to compile()
723 to be validated and converted into a match function.
730 to be validated and converted into a match function.
724
731
725 trailing white space is dropped.
732 trailing white space is dropped.
726 the escape character is backslash.
733 the escape character is backslash.
727 comments start with #.
734 comments start with #.
728 empty lines are skipped.
735 empty lines are skipped.
729
736
730 lines can be of the following formats:
737 lines can be of the following formats:
731
738
732 syntax: regexp # defaults following lines to non-rooted regexps
739 syntax: regexp # defaults following lines to non-rooted regexps
733 syntax: glob # defaults following lines to non-rooted globs
740 syntax: glob # defaults following lines to non-rooted globs
734 re:pattern # non-rooted regular expression
741 re:pattern # non-rooted regular expression
735 glob:pattern # non-rooted glob
742 glob:pattern # non-rooted glob
736 pattern # pattern of the current default type
743 pattern # pattern of the current default type
737
744
738 if sourceinfo is set, returns a list of tuples:
745 if sourceinfo is set, returns a list of tuples:
739 (pattern, lineno, originalline). This is useful to debug ignore patterns.
746 (pattern, lineno, originalline). This is useful to debug ignore patterns.
740 '''
747 '''
741
748
742 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
749 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
743 'include': 'include', 'subinclude': 'subinclude'}
750 'include': 'include', 'subinclude': 'subinclude'}
744 syntax = 'relre:'
751 syntax = 'relre:'
745 patterns = []
752 patterns = []
746
753
747 fp = open(filepath, 'rb')
754 fp = open(filepath, 'rb')
748 for lineno, line in enumerate(util.iterfile(fp), start=1):
755 for lineno, line in enumerate(util.iterfile(fp), start=1):
749 if "#" in line:
756 if "#" in line:
750 global _commentre
757 global _commentre
751 if not _commentre:
758 if not _commentre:
752 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
759 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
753 # remove comments prefixed by an even number of escapes
760 # remove comments prefixed by an even number of escapes
754 m = _commentre.search(line)
761 m = _commentre.search(line)
755 if m:
762 if m:
756 line = line[:m.end(1)]
763 line = line[:m.end(1)]
757 # fixup properly escaped comments that survived the above
764 # fixup properly escaped comments that survived the above
758 line = line.replace("\\#", "#")
765 line = line.replace("\\#", "#")
759 line = line.rstrip()
766 line = line.rstrip()
760 if not line:
767 if not line:
761 continue
768 continue
762
769
763 if line.startswith('syntax:'):
770 if line.startswith('syntax:'):
764 s = line[7:].strip()
771 s = line[7:].strip()
765 try:
772 try:
766 syntax = syntaxes[s]
773 syntax = syntaxes[s]
767 except KeyError:
774 except KeyError:
768 if warn:
775 if warn:
769 warn(_("%s: ignoring invalid syntax '%s'\n") %
776 warn(_("%s: ignoring invalid syntax '%s'\n") %
770 (filepath, s))
777 (filepath, s))
771 continue
778 continue
772
779
773 linesyntax = syntax
780 linesyntax = syntax
774 for s, rels in syntaxes.iteritems():
781 for s, rels in syntaxes.iteritems():
775 if line.startswith(rels):
782 if line.startswith(rels):
776 linesyntax = rels
783 linesyntax = rels
777 line = line[len(rels):]
784 line = line[len(rels):]
778 break
785 break
779 elif line.startswith(s+':'):
786 elif line.startswith(s+':'):
780 linesyntax = rels
787 linesyntax = rels
781 line = line[len(s) + 1:]
788 line = line[len(s) + 1:]
782 break
789 break
783 if sourceinfo:
790 if sourceinfo:
784 patterns.append((linesyntax + line, lineno, line))
791 patterns.append((linesyntax + line, lineno, line))
785 else:
792 else:
786 patterns.append(linesyntax + line)
793 patterns.append(linesyntax + line)
787 fp.close()
794 fp.close()
788 return patterns
795 return patterns
General Comments 0
You need to be logged in to leave comments. Login now