##// END OF EJS Templates
match: introduce boolean prefix() method...
Martin von Zweigbergk -
r25233:9789b4a7 default
parent child Browse files
Show More
@@ -1,610 +1,613 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx, listsubrepos):
24 def _expandsets(kindpats, ctx, listsubrepos):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat, source in kindpats:
29 for kind, pat, source in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35
35
36 if listsubrepos:
36 if listsubrepos:
37 for subpath in ctx.substate:
37 for subpath in ctx.substate:
38 s = ctx.sub(subpath).getfileset(pat)
38 s = ctx.sub(subpath).getfileset(pat)
39 fset.update(subpath + '/' + f for f in s)
39 fset.update(subpath + '/' + f for f in s)
40
40
41 continue
41 continue
42 other.append((kind, pat, source))
42 other.append((kind, pat, source))
43 return fset, other
43 return fset, other
44
44
45 def _kindpatsalwaysmatch(kindpats):
45 def _kindpatsalwaysmatch(kindpats):
46 """"Checks whether the kindspats match everything, as e.g.
46 """"Checks whether the kindspats match everything, as e.g.
47 'relpath:.' does.
47 'relpath:.' does.
48 """
48 """
49 for kind, pat, source in kindpats:
49 for kind, pat, source in kindpats:
50 if pat != '' or kind not in ['relpath', 'glob']:
50 if pat != '' or kind not in ['relpath', 'glob']:
51 return False
51 return False
52 return True
52 return True
53
53
54 class match(object):
54 class match(object):
55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
56 default='glob', exact=False, auditor=None, ctx=None,
56 default='glob', exact=False, auditor=None, ctx=None,
57 listsubrepos=False, warn=None):
57 listsubrepos=False, warn=None):
58 """build an object to match a set of file patterns
58 """build an object to match a set of file patterns
59
59
60 arguments:
60 arguments:
61 root - the canonical root of the tree you're matching against
61 root - the canonical root of the tree you're matching against
62 cwd - the current working directory, if relevant
62 cwd - the current working directory, if relevant
63 patterns - patterns to find
63 patterns - patterns to find
64 include - patterns to include (unless they are excluded)
64 include - patterns to include (unless they are excluded)
65 exclude - patterns to exclude (even if they are included)
65 exclude - patterns to exclude (even if they are included)
66 default - if a pattern in patterns has no explicit type, assume this one
66 default - if a pattern in patterns has no explicit type, assume this one
67 exact - patterns are actually filenames (include/exclude still apply)
67 exact - patterns are actually filenames (include/exclude still apply)
68 warn - optional function used for printing warnings
68 warn - optional function used for printing warnings
69
69
70 a pattern is one of:
70 a pattern is one of:
71 'glob:<glob>' - a glob relative to cwd
71 'glob:<glob>' - a glob relative to cwd
72 're:<regexp>' - a regular expression
72 're:<regexp>' - a regular expression
73 'path:<path>' - a path relative to repository root
73 'path:<path>' - a path relative to repository root
74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
75 'relpath:<path>' - a path relative to cwd
75 'relpath:<path>' - a path relative to cwd
76 'relre:<regexp>' - a regexp that needn't match the start of a name
76 'relre:<regexp>' - a regexp that needn't match the start of a name
77 'set:<fileset>' - a fileset expression
77 'set:<fileset>' - a fileset expression
78 'include:<path>' - a file of patterns to read and include
78 'include:<path>' - a file of patterns to read and include
79 '<something>' - a pattern of the specified default type
79 '<something>' - a pattern of the specified default type
80 """
80 """
81
81
82 self._root = root
82 self._root = root
83 self._cwd = cwd
83 self._cwd = cwd
84 self._files = [] # exact files and roots of patterns
84 self._files = [] # exact files and roots of patterns
85 self._anypats = bool(include or exclude)
85 self._anypats = bool(include or exclude)
86 self._always = False
86 self._always = False
87 self._pathrestricted = bool(include or exclude or patterns)
87 self._pathrestricted = bool(include or exclude or patterns)
88 self._warn = warn
88 self._warn = warn
89 self._includeroots = set()
89 self._includeroots = set()
90 self._includedirs = set(['.'])
90 self._includedirs = set(['.'])
91 self._excluderoots = set()
91 self._excluderoots = set()
92
92
93 matchfns = []
93 matchfns = []
94 if include:
94 if include:
95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
97 listsubrepos)
97 listsubrepos)
98 self._includeroots.update(_roots(kindpats))
98 self._includeroots.update(_roots(kindpats))
99 self._includeroots.discard('.')
99 self._includeroots.discard('.')
100 self._includedirs.update(util.dirs(self._includeroots))
100 self._includedirs.update(util.dirs(self._includeroots))
101 matchfns.append(im)
101 matchfns.append(im)
102 if exclude:
102 if exclude:
103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
105 listsubrepos)
105 listsubrepos)
106 self._excluderoots.update(_roots(kindpats))
106 self._excluderoots.update(_roots(kindpats))
107 self._excluderoots.discard('.')
107 self._excluderoots.discard('.')
108 matchfns.append(lambda f: not em(f))
108 matchfns.append(lambda f: not em(f))
109 if exact:
109 if exact:
110 if isinstance(patterns, list):
110 if isinstance(patterns, list):
111 self._files = patterns
111 self._files = patterns
112 else:
112 else:
113 self._files = list(patterns)
113 self._files = list(patterns)
114 matchfns.append(self.exact)
114 matchfns.append(self.exact)
115 elif patterns:
115 elif patterns:
116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
117 if not _kindpatsalwaysmatch(kindpats):
117 if not _kindpatsalwaysmatch(kindpats):
118 self._files = _roots(kindpats)
118 self._files = _roots(kindpats)
119 self._anypats = self._anypats or _anypats(kindpats)
119 self._anypats = self._anypats or _anypats(kindpats)
120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
121 listsubrepos)
121 listsubrepos)
122 matchfns.append(pm)
122 matchfns.append(pm)
123
123
124 if not matchfns:
124 if not matchfns:
125 m = util.always
125 m = util.always
126 self._always = True
126 self._always = True
127 elif len(matchfns) == 1:
127 elif len(matchfns) == 1:
128 m = matchfns[0]
128 m = matchfns[0]
129 else:
129 else:
130 def m(f):
130 def m(f):
131 for matchfn in matchfns:
131 for matchfn in matchfns:
132 if not matchfn(f):
132 if not matchfn(f):
133 return False
133 return False
134 return True
134 return True
135
135
136 self.matchfn = m
136 self.matchfn = m
137 self._fileroots = set(self._files)
137 self._fileroots = set(self._files)
138
138
139 def __call__(self, fn):
139 def __call__(self, fn):
140 return self.matchfn(fn)
140 return self.matchfn(fn)
141 def __iter__(self):
141 def __iter__(self):
142 for f in self._files:
142 for f in self._files:
143 yield f
143 yield f
144
144
145 # Callbacks related to how the matcher is used by dirstate.walk.
145 # Callbacks related to how the matcher is used by dirstate.walk.
146 # Subscribers to these events must monkeypatch the matcher object.
146 # Subscribers to these events must monkeypatch the matcher object.
147 def bad(self, f, msg):
147 def bad(self, f, msg):
148 '''Callback from dirstate.walk for each explicit file that can't be
148 '''Callback from dirstate.walk for each explicit file that can't be
149 found/accessed, with an error message.'''
149 found/accessed, with an error message.'''
150 pass
150 pass
151
151
152 # If an explicitdir is set, it will be called when an explicitly listed
152 # If an explicitdir is set, it will be called when an explicitly listed
153 # directory is visited.
153 # directory is visited.
154 explicitdir = None
154 explicitdir = None
155
155
156 # If an traversedir is set, it will be called when a directory discovered
156 # If an traversedir is set, it will be called when a directory discovered
157 # by recursive traversal is visited.
157 # by recursive traversal is visited.
158 traversedir = None
158 traversedir = None
159
159
160 def abs(self, f):
160 def abs(self, f):
161 '''Convert a repo path back to path that is relative to the root of the
161 '''Convert a repo path back to path that is relative to the root of the
162 matcher.'''
162 matcher.'''
163 return f
163 return f
164
164
165 def rel(self, f):
165 def rel(self, f):
166 '''Convert repo path back to path that is relative to cwd of matcher.'''
166 '''Convert repo path back to path that is relative to cwd of matcher.'''
167 return util.pathto(self._root, self._cwd, f)
167 return util.pathto(self._root, self._cwd, f)
168
168
169 def uipath(self, f):
169 def uipath(self, f):
170 '''Convert repo path to a display path. If patterns or -I/-X were used
170 '''Convert repo path to a display path. If patterns or -I/-X were used
171 to create this matcher, the display path will be relative to cwd.
171 to create this matcher, the display path will be relative to cwd.
172 Otherwise it is relative to the root of the repo.'''
172 Otherwise it is relative to the root of the repo.'''
173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
174
174
175 def files(self):
175 def files(self):
176 '''Explicitly listed files or patterns or roots:
176 '''Explicitly listed files or patterns or roots:
177 if no patterns or .always(): empty list,
177 if no patterns or .always(): empty list,
178 if exact: list exact files,
178 if exact: list exact files,
179 if not .anypats(): list all files and dirs,
179 if not .anypats(): list all files and dirs,
180 else: optimal roots'''
180 else: optimal roots'''
181 return self._files
181 return self._files
182
182
183 @propertycache
183 @propertycache
184 def _dirs(self):
184 def _dirs(self):
185 return set(util.dirs(self._fileroots)) | set(['.'])
185 return set(util.dirs(self._fileroots)) | set(['.'])
186
186
187 def visitdir(self, dir):
187 def visitdir(self, dir):
188 '''Decides whether a directory should be visited based on whether it
188 '''Decides whether a directory should be visited based on whether it
189 has potential matches in it or one of its subdirectories. This is
189 has potential matches in it or one of its subdirectories. This is
190 based on the match's primary, included, and excluded patterns.
190 based on the match's primary, included, and excluded patterns.
191
191
192 This function's behavior is undefined if it has returned False for
192 This function's behavior is undefined if it has returned False for
193 one of the dir's parent directories.
193 one of the dir's parent directories.
194 '''
194 '''
195 if dir in self._excluderoots:
195 if dir in self._excluderoots:
196 return False
196 return False
197 parentdirs = None
197 parentdirs = None
198 if (self._includeroots and dir not in self._includeroots and
198 if (self._includeroots and dir not in self._includeroots and
199 dir not in self._includedirs):
199 dir not in self._includedirs):
200 parentdirs = util.finddirs(dir)
200 parentdirs = util.finddirs(dir)
201 if not any(parent in self._includeroots for parent in parentdirs):
201 if not any(parent in self._includeroots for parent in parentdirs):
202 return False
202 return False
203 return (not self._fileroots or '.' in self._fileroots or
203 return (not self._fileroots or '.' in self._fileroots or
204 dir in self._fileroots or dir in self._dirs or
204 dir in self._fileroots or dir in self._dirs or
205 any(parentdir in self._fileroots
205 any(parentdir in self._fileroots
206 for parentdir in parentdirs or util.finddirs(dir)))
206 for parentdir in parentdirs or util.finddirs(dir)))
207
207
208 def exact(self, f):
208 def exact(self, f):
209 '''Returns True if f is in .files().'''
209 '''Returns True if f is in .files().'''
210 return f in self._fileroots
210 return f in self._fileroots
211
211
212 def anypats(self):
212 def anypats(self):
213 '''Matcher uses patterns or include/exclude.'''
213 '''Matcher uses patterns or include/exclude.'''
214 return self._anypats
214 return self._anypats
215
215
216 def always(self):
216 def always(self):
217 '''Matcher will match everything and .files() will be empty
217 '''Matcher will match everything and .files() will be empty
218 - optimization might be possible and necessary.'''
218 - optimization might be possible and necessary.'''
219 return self._always
219 return self._always
220
220
221 def ispartial(self):
221 def ispartial(self):
222 '''True if the matcher won't always match.
222 '''True if the matcher won't always match.
223
223
224 Although it's just the inverse of _always in this implementation,
224 Although it's just the inverse of _always in this implementation,
225 an extenion such as narrowhg might make it return something
225 an extenion such as narrowhg might make it return something
226 slightly different.'''
226 slightly different.'''
227 return not self._always
227 return not self._always
228
228
229 def isexact(self):
229 def isexact(self):
230 return self.matchfn == self.exact
230 return self.matchfn == self.exact
231
231
232 def prefix(self):
233 return not self.always() and not self.isexact() and not self.anypats()
234
232 def _normalize(self, patterns, default, root, cwd, auditor):
235 def _normalize(self, patterns, default, root, cwd, auditor):
233 '''Convert 'kind:pat' from the patterns list to tuples with kind and
236 '''Convert 'kind:pat' from the patterns list to tuples with kind and
234 normalized and rooted patterns and with listfiles expanded.'''
237 normalized and rooted patterns and with listfiles expanded.'''
235 kindpats = []
238 kindpats = []
236 for kind, pat in [_patsplit(p, default) for p in patterns]:
239 for kind, pat in [_patsplit(p, default) for p in patterns]:
237 if kind in ('glob', 'relpath'):
240 if kind in ('glob', 'relpath'):
238 pat = pathutil.canonpath(root, cwd, pat, auditor)
241 pat = pathutil.canonpath(root, cwd, pat, auditor)
239 elif kind in ('relglob', 'path'):
242 elif kind in ('relglob', 'path'):
240 pat = util.normpath(pat)
243 pat = util.normpath(pat)
241 elif kind in ('listfile', 'listfile0'):
244 elif kind in ('listfile', 'listfile0'):
242 try:
245 try:
243 files = util.readfile(pat)
246 files = util.readfile(pat)
244 if kind == 'listfile0':
247 if kind == 'listfile0':
245 files = files.split('\0')
248 files = files.split('\0')
246 else:
249 else:
247 files = files.splitlines()
250 files = files.splitlines()
248 files = [f for f in files if f]
251 files = [f for f in files if f]
249 except EnvironmentError:
252 except EnvironmentError:
250 raise util.Abort(_("unable to read file list (%s)") % pat)
253 raise util.Abort(_("unable to read file list (%s)") % pat)
251 for k, p, source in self._normalize(files, default, root, cwd,
254 for k, p, source in self._normalize(files, default, root, cwd,
252 auditor):
255 auditor):
253 kindpats.append((k, p, pat))
256 kindpats.append((k, p, pat))
254 continue
257 continue
255 elif kind == 'include':
258 elif kind == 'include':
256 try:
259 try:
257 includepats = readpatternfile(pat, self._warn)
260 includepats = readpatternfile(pat, self._warn)
258 for k, p, source in self._normalize(includepats, default,
261 for k, p, source in self._normalize(includepats, default,
259 root, cwd, auditor):
262 root, cwd, auditor):
260 kindpats.append((k, p, source or pat))
263 kindpats.append((k, p, source or pat))
261 except util.Abort, inst:
264 except util.Abort, inst:
262 raise util.Abort('%s: %s' % (pat, inst[0]))
265 raise util.Abort('%s: %s' % (pat, inst[0]))
263 except IOError, inst:
266 except IOError, inst:
264 if self._warn:
267 if self._warn:
265 self._warn(_("skipping unreadable pattern file "
268 self._warn(_("skipping unreadable pattern file "
266 "'%s': %s\n") % (pat, inst.strerror))
269 "'%s': %s\n") % (pat, inst.strerror))
267 continue
270 continue
268 # else: re or relre - which cannot be normalized
271 # else: re or relre - which cannot be normalized
269 kindpats.append((kind, pat, ''))
272 kindpats.append((kind, pat, ''))
270 return kindpats
273 return kindpats
271
274
272 def exact(root, cwd, files):
275 def exact(root, cwd, files):
273 return match(root, cwd, files, exact=True)
276 return match(root, cwd, files, exact=True)
274
277
275 def always(root, cwd):
278 def always(root, cwd):
276 return match(root, cwd, [])
279 return match(root, cwd, [])
277
280
278 class narrowmatcher(match):
281 class narrowmatcher(match):
279 """Adapt a matcher to work on a subdirectory only.
282 """Adapt a matcher to work on a subdirectory only.
280
283
281 The paths are remapped to remove/insert the path as needed:
284 The paths are remapped to remove/insert the path as needed:
282
285
283 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
286 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
284 >>> m2 = narrowmatcher('sub', m1)
287 >>> m2 = narrowmatcher('sub', m1)
285 >>> bool(m2('a.txt'))
288 >>> bool(m2('a.txt'))
286 False
289 False
287 >>> bool(m2('b.txt'))
290 >>> bool(m2('b.txt'))
288 True
291 True
289 >>> bool(m2.matchfn('a.txt'))
292 >>> bool(m2.matchfn('a.txt'))
290 False
293 False
291 >>> bool(m2.matchfn('b.txt'))
294 >>> bool(m2.matchfn('b.txt'))
292 True
295 True
293 >>> m2.files()
296 >>> m2.files()
294 ['b.txt']
297 ['b.txt']
295 >>> m2.exact('b.txt')
298 >>> m2.exact('b.txt')
296 True
299 True
297 >>> util.pconvert(m2.rel('b.txt'))
300 >>> util.pconvert(m2.rel('b.txt'))
298 'sub/b.txt'
301 'sub/b.txt'
299 >>> def bad(f, msg):
302 >>> def bad(f, msg):
300 ... print "%s: %s" % (f, msg)
303 ... print "%s: %s" % (f, msg)
301 >>> m1.bad = bad
304 >>> m1.bad = bad
302 >>> m2.bad('x.txt', 'No such file')
305 >>> m2.bad('x.txt', 'No such file')
303 sub/x.txt: No such file
306 sub/x.txt: No such file
304 >>> m2.abs('c.txt')
307 >>> m2.abs('c.txt')
305 'sub/c.txt'
308 'sub/c.txt'
306 """
309 """
307
310
308 def __init__(self, path, matcher):
311 def __init__(self, path, matcher):
309 self._root = matcher._root
312 self._root = matcher._root
310 self._cwd = matcher._cwd
313 self._cwd = matcher._cwd
311 self._path = path
314 self._path = path
312 self._matcher = matcher
315 self._matcher = matcher
313 self._always = matcher._always
316 self._always = matcher._always
314 self._pathrestricted = matcher._pathrestricted
317 self._pathrestricted = matcher._pathrestricted
315
318
316 self._files = [f[len(path) + 1:] for f in matcher._files
319 self._files = [f[len(path) + 1:] for f in matcher._files
317 if f.startswith(path + "/")]
320 if f.startswith(path + "/")]
318
321
319 # If the parent repo had a path to this subrepo and no patterns are
322 # If the parent repo had a path to this subrepo and no patterns are
320 # specified, this submatcher always matches.
323 # specified, this submatcher always matches.
321 if not self._always and not matcher._anypats:
324 if not self._always and not matcher._anypats:
322 self._always = any(f == path for f in matcher._files)
325 self._always = any(f == path for f in matcher._files)
323
326
324 self._anypats = matcher._anypats
327 self._anypats = matcher._anypats
325 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
328 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
326 self._fileroots = set(self._files)
329 self._fileroots = set(self._files)
327
330
328 def abs(self, f):
331 def abs(self, f):
329 return self._matcher.abs(self._path + "/" + f)
332 return self._matcher.abs(self._path + "/" + f)
330
333
331 def bad(self, f, msg):
334 def bad(self, f, msg):
332 self._matcher.bad(self._path + "/" + f, msg)
335 self._matcher.bad(self._path + "/" + f, msg)
333
336
334 def rel(self, f):
337 def rel(self, f):
335 return self._matcher.rel(self._path + "/" + f)
338 return self._matcher.rel(self._path + "/" + f)
336
339
337 class icasefsmatcher(match):
340 class icasefsmatcher(match):
338 """A matcher for wdir on case insensitive filesystems, which normalizes the
341 """A matcher for wdir on case insensitive filesystems, which normalizes the
339 given patterns to the case in the filesystem.
342 given patterns to the case in the filesystem.
340 """
343 """
341
344
342 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
345 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
343 ctx, listsubrepos=False):
346 ctx, listsubrepos=False):
344 init = super(icasefsmatcher, self).__init__
347 init = super(icasefsmatcher, self).__init__
345 self._dsnormalize = ctx.repo().dirstate.normalize
348 self._dsnormalize = ctx.repo().dirstate.normalize
346
349
347 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
350 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
348 ctx=ctx, listsubrepos=listsubrepos)
351 ctx=ctx, listsubrepos=listsubrepos)
349
352
350 # m.exact(file) must be based off of the actual user input, otherwise
353 # m.exact(file) must be based off of the actual user input, otherwise
351 # inexact case matches are treated as exact, and not noted without -v.
354 # inexact case matches are treated as exact, and not noted without -v.
352 if self._files:
355 if self._files:
353 self._fileroots = set(_roots(self._kp))
356 self._fileroots = set(_roots(self._kp))
354
357
355 def _normalize(self, patterns, default, root, cwd, auditor):
358 def _normalize(self, patterns, default, root, cwd, auditor):
356 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
359 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
357 root, cwd, auditor)
360 root, cwd, auditor)
358 kindpats = []
361 kindpats = []
359 for kind, pats, source in self._kp:
362 for kind, pats, source in self._kp:
360 if kind not in ('re', 'relre'): # regex can't be normalized
363 if kind not in ('re', 'relre'): # regex can't be normalized
361 pats = self._dsnormalize(pats)
364 pats = self._dsnormalize(pats)
362 kindpats.append((kind, pats, source))
365 kindpats.append((kind, pats, source))
363 return kindpats
366 return kindpats
364
367
365 def patkind(pattern, default=None):
368 def patkind(pattern, default=None):
366 '''If pattern is 'kind:pat' with a known kind, return kind.'''
369 '''If pattern is 'kind:pat' with a known kind, return kind.'''
367 return _patsplit(pattern, default)[0]
370 return _patsplit(pattern, default)[0]
368
371
369 def _patsplit(pattern, default):
372 def _patsplit(pattern, default):
370 """Split a string into the optional pattern kind prefix and the actual
373 """Split a string into the optional pattern kind prefix and the actual
371 pattern."""
374 pattern."""
372 if ':' in pattern:
375 if ':' in pattern:
373 kind, pat = pattern.split(':', 1)
376 kind, pat = pattern.split(':', 1)
374 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
377 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
375 'listfile', 'listfile0', 'set', 'include'):
378 'listfile', 'listfile0', 'set', 'include'):
376 return kind, pat
379 return kind, pat
377 return default, pattern
380 return default, pattern
378
381
379 def _globre(pat):
382 def _globre(pat):
380 r'''Convert an extended glob string to a regexp string.
383 r'''Convert an extended glob string to a regexp string.
381
384
382 >>> print _globre(r'?')
385 >>> print _globre(r'?')
383 .
386 .
384 >>> print _globre(r'*')
387 >>> print _globre(r'*')
385 [^/]*
388 [^/]*
386 >>> print _globre(r'**')
389 >>> print _globre(r'**')
387 .*
390 .*
388 >>> print _globre(r'**/a')
391 >>> print _globre(r'**/a')
389 (?:.*/)?a
392 (?:.*/)?a
390 >>> print _globre(r'a/**/b')
393 >>> print _globre(r'a/**/b')
391 a\/(?:.*/)?b
394 a\/(?:.*/)?b
392 >>> print _globre(r'[a*?!^][^b][!c]')
395 >>> print _globre(r'[a*?!^][^b][!c]')
393 [a*?!^][\^b][^c]
396 [a*?!^][\^b][^c]
394 >>> print _globre(r'{a,b}')
397 >>> print _globre(r'{a,b}')
395 (?:a|b)
398 (?:a|b)
396 >>> print _globre(r'.\*\?')
399 >>> print _globre(r'.\*\?')
397 \.\*\?
400 \.\*\?
398 '''
401 '''
399 i, n = 0, len(pat)
402 i, n = 0, len(pat)
400 res = ''
403 res = ''
401 group = 0
404 group = 0
402 escape = util.re.escape
405 escape = util.re.escape
403 def peek():
406 def peek():
404 return i < n and pat[i]
407 return i < n and pat[i]
405 while i < n:
408 while i < n:
406 c = pat[i]
409 c = pat[i]
407 i += 1
410 i += 1
408 if c not in '*?[{},\\':
411 if c not in '*?[{},\\':
409 res += escape(c)
412 res += escape(c)
410 elif c == '*':
413 elif c == '*':
411 if peek() == '*':
414 if peek() == '*':
412 i += 1
415 i += 1
413 if peek() == '/':
416 if peek() == '/':
414 i += 1
417 i += 1
415 res += '(?:.*/)?'
418 res += '(?:.*/)?'
416 else:
419 else:
417 res += '.*'
420 res += '.*'
418 else:
421 else:
419 res += '[^/]*'
422 res += '[^/]*'
420 elif c == '?':
423 elif c == '?':
421 res += '.'
424 res += '.'
422 elif c == '[':
425 elif c == '[':
423 j = i
426 j = i
424 if j < n and pat[j] in '!]':
427 if j < n and pat[j] in '!]':
425 j += 1
428 j += 1
426 while j < n and pat[j] != ']':
429 while j < n and pat[j] != ']':
427 j += 1
430 j += 1
428 if j >= n:
431 if j >= n:
429 res += '\\['
432 res += '\\['
430 else:
433 else:
431 stuff = pat[i:j].replace('\\','\\\\')
434 stuff = pat[i:j].replace('\\','\\\\')
432 i = j + 1
435 i = j + 1
433 if stuff[0] == '!':
436 if stuff[0] == '!':
434 stuff = '^' + stuff[1:]
437 stuff = '^' + stuff[1:]
435 elif stuff[0] == '^':
438 elif stuff[0] == '^':
436 stuff = '\\' + stuff
439 stuff = '\\' + stuff
437 res = '%s[%s]' % (res, stuff)
440 res = '%s[%s]' % (res, stuff)
438 elif c == '{':
441 elif c == '{':
439 group += 1
442 group += 1
440 res += '(?:'
443 res += '(?:'
441 elif c == '}' and group:
444 elif c == '}' and group:
442 res += ')'
445 res += ')'
443 group -= 1
446 group -= 1
444 elif c == ',' and group:
447 elif c == ',' and group:
445 res += '|'
448 res += '|'
446 elif c == '\\':
449 elif c == '\\':
447 p = peek()
450 p = peek()
448 if p:
451 if p:
449 i += 1
452 i += 1
450 res += escape(p)
453 res += escape(p)
451 else:
454 else:
452 res += escape(c)
455 res += escape(c)
453 else:
456 else:
454 res += escape(c)
457 res += escape(c)
455 return res
458 return res
456
459
457 def _regex(kind, pat, globsuffix):
460 def _regex(kind, pat, globsuffix):
458 '''Convert a (normalized) pattern of any kind into a regular expression.
461 '''Convert a (normalized) pattern of any kind into a regular expression.
459 globsuffix is appended to the regexp of globs.'''
462 globsuffix is appended to the regexp of globs.'''
460 if not pat:
463 if not pat:
461 return ''
464 return ''
462 if kind == 're':
465 if kind == 're':
463 return pat
466 return pat
464 if kind == 'path':
467 if kind == 'path':
465 return '^' + util.re.escape(pat) + '(?:/|$)'
468 return '^' + util.re.escape(pat) + '(?:/|$)'
466 if kind == 'relglob':
469 if kind == 'relglob':
467 return '(?:|.*/)' + _globre(pat) + globsuffix
470 return '(?:|.*/)' + _globre(pat) + globsuffix
468 if kind == 'relpath':
471 if kind == 'relpath':
469 return util.re.escape(pat) + '(?:/|$)'
472 return util.re.escape(pat) + '(?:/|$)'
470 if kind == 'relre':
473 if kind == 'relre':
471 if pat.startswith('^'):
474 if pat.startswith('^'):
472 return pat
475 return pat
473 return '.*' + pat
476 return '.*' + pat
474 return _globre(pat) + globsuffix
477 return _globre(pat) + globsuffix
475
478
476 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos):
479 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos):
477 '''Return regexp string and a matcher function for kindpats.
480 '''Return regexp string and a matcher function for kindpats.
478 globsuffix is appended to the regexp of globs.'''
481 globsuffix is appended to the regexp of globs.'''
479 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
482 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
480 if not kindpats:
483 if not kindpats:
481 return "", fset.__contains__
484 return "", fset.__contains__
482
485
483 regex, mf = _buildregexmatch(kindpats, globsuffix)
486 regex, mf = _buildregexmatch(kindpats, globsuffix)
484 if fset:
487 if fset:
485 return regex, lambda f: f in fset or mf(f)
488 return regex, lambda f: f in fset or mf(f)
486 return regex, mf
489 return regex, mf
487
490
488 def _buildregexmatch(kindpats, globsuffix):
491 def _buildregexmatch(kindpats, globsuffix):
489 """Build a match function from a list of kinds and kindpats,
492 """Build a match function from a list of kinds and kindpats,
490 return regexp string and a matcher function."""
493 return regexp string and a matcher function."""
491 try:
494 try:
492 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
495 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
493 for (k, p, s) in kindpats])
496 for (k, p, s) in kindpats])
494 if len(regex) > 20000:
497 if len(regex) > 20000:
495 raise OverflowError
498 raise OverflowError
496 return regex, _rematcher(regex)
499 return regex, _rematcher(regex)
497 except OverflowError:
500 except OverflowError:
498 # We're using a Python with a tiny regex engine and we
501 # We're using a Python with a tiny regex engine and we
499 # made it explode, so we'll divide the pattern list in two
502 # made it explode, so we'll divide the pattern list in two
500 # until it works
503 # until it works
501 l = len(kindpats)
504 l = len(kindpats)
502 if l < 2:
505 if l < 2:
503 raise
506 raise
504 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
507 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
505 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
508 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
506 return regex, lambda s: a(s) or b(s)
509 return regex, lambda s: a(s) or b(s)
507 except re.error:
510 except re.error:
508 for k, p, s in kindpats:
511 for k, p, s in kindpats:
509 try:
512 try:
510 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
513 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
511 except re.error:
514 except re.error:
512 if s:
515 if s:
513 raise util.Abort(_("%s: invalid pattern (%s): %s") %
516 raise util.Abort(_("%s: invalid pattern (%s): %s") %
514 (s, k, p))
517 (s, k, p))
515 else:
518 else:
516 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
519 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
517 raise util.Abort(_("invalid pattern"))
520 raise util.Abort(_("invalid pattern"))
518
521
519 def _roots(kindpats):
522 def _roots(kindpats):
520 '''return roots and exact explicitly listed files from patterns
523 '''return roots and exact explicitly listed files from patterns
521
524
522 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
525 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
523 ['g', 'g', '.']
526 ['g', 'g', '.']
524 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
527 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
525 ['r', 'p/p', '.']
528 ['r', 'p/p', '.']
526 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
529 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
527 ['.', '.', '.']
530 ['.', '.', '.']
528 '''
531 '''
529 r = []
532 r = []
530 for kind, pat, source in kindpats:
533 for kind, pat, source in kindpats:
531 if kind == 'glob': # find the non-glob prefix
534 if kind == 'glob': # find the non-glob prefix
532 root = []
535 root = []
533 for p in pat.split('/'):
536 for p in pat.split('/'):
534 if '[' in p or '{' in p or '*' in p or '?' in p:
537 if '[' in p or '{' in p or '*' in p or '?' in p:
535 break
538 break
536 root.append(p)
539 root.append(p)
537 r.append('/'.join(root) or '.')
540 r.append('/'.join(root) or '.')
538 elif kind in ('relpath', 'path'):
541 elif kind in ('relpath', 'path'):
539 r.append(pat or '.')
542 r.append(pat or '.')
540 else: # relglob, re, relre
543 else: # relglob, re, relre
541 r.append('.')
544 r.append('.')
542 return r
545 return r
543
546
544 def _anypats(kindpats):
547 def _anypats(kindpats):
545 for kind, pat, source in kindpats:
548 for kind, pat, source in kindpats:
546 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
549 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
547 return True
550 return True
548
551
549 _commentre = None
552 _commentre = None
550
553
551 def readpatternfile(filepath, warn):
554 def readpatternfile(filepath, warn):
552 '''parse a pattern file, returning a list of
555 '''parse a pattern file, returning a list of
553 patterns. These patterns should be given to compile()
556 patterns. These patterns should be given to compile()
554 to be validated and converted into a match function.
557 to be validated and converted into a match function.
555
558
556 trailing white space is dropped.
559 trailing white space is dropped.
557 the escape character is backslash.
560 the escape character is backslash.
558 comments start with #.
561 comments start with #.
559 empty lines are skipped.
562 empty lines are skipped.
560
563
561 lines can be of the following formats:
564 lines can be of the following formats:
562
565
563 syntax: regexp # defaults following lines to non-rooted regexps
566 syntax: regexp # defaults following lines to non-rooted regexps
564 syntax: glob # defaults following lines to non-rooted globs
567 syntax: glob # defaults following lines to non-rooted globs
565 re:pattern # non-rooted regular expression
568 re:pattern # non-rooted regular expression
566 glob:pattern # non-rooted glob
569 glob:pattern # non-rooted glob
567 pattern # pattern of the current default type'''
570 pattern # pattern of the current default type'''
568
571
569 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
572 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
570 'include': 'include'}
573 'include': 'include'}
571 syntax = 'relre:'
574 syntax = 'relre:'
572 patterns = []
575 patterns = []
573
576
574 fp = open(filepath)
577 fp = open(filepath)
575 for line in fp:
578 for line in fp:
576 if "#" in line:
579 if "#" in line:
577 global _commentre
580 global _commentre
578 if not _commentre:
581 if not _commentre:
579 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
582 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
580 # remove comments prefixed by an even number of escapes
583 # remove comments prefixed by an even number of escapes
581 line = _commentre.sub(r'\1', line)
584 line = _commentre.sub(r'\1', line)
582 # fixup properly escaped comments that survived the above
585 # fixup properly escaped comments that survived the above
583 line = line.replace("\\#", "#")
586 line = line.replace("\\#", "#")
584 line = line.rstrip()
587 line = line.rstrip()
585 if not line:
588 if not line:
586 continue
589 continue
587
590
588 if line.startswith('syntax:'):
591 if line.startswith('syntax:'):
589 s = line[7:].strip()
592 s = line[7:].strip()
590 try:
593 try:
591 syntax = syntaxes[s]
594 syntax = syntaxes[s]
592 except KeyError:
595 except KeyError:
593 if warn:
596 if warn:
594 warn(_("%s: ignoring invalid syntax '%s'\n") %
597 warn(_("%s: ignoring invalid syntax '%s'\n") %
595 (filepath, s))
598 (filepath, s))
596 continue
599 continue
597
600
598 linesyntax = syntax
601 linesyntax = syntax
599 for s, rels in syntaxes.iteritems():
602 for s, rels in syntaxes.iteritems():
600 if line.startswith(rels):
603 if line.startswith(rels):
601 linesyntax = rels
604 linesyntax = rels
602 line = line[len(rels):]
605 line = line[len(rels):]
603 break
606 break
604 elif line.startswith(s+':'):
607 elif line.startswith(s+':'):
605 linesyntax = rels
608 linesyntax = rels
606 line = line[len(s) + 1:]
609 line = line[len(s) + 1:]
607 break
610 break
608 patterns.append(linesyntax + line)
611 patterns.append(linesyntax + line)
609 fp.close()
612 fp.close()
610 return patterns
613 return patterns
General Comments 0
You need to be logged in to leave comments. Login now