|
@@
-1,1622
+1,1601
b''
|
|
1
|
1
|
# match.py - filename matching
|
|
2
|
2
|
#
|
|
3
|
3
|
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
|
|
4
|
4
|
#
|
|
5
|
5
|
# This software may be used and distributed according to the terms of the
|
|
6
|
6
|
# GNU General Public License version 2 or any later version.
|
|
7
|
7
|
|
|
8
|
8
|
from __future__ import absolute_import, print_function
|
|
9
|
9
|
|
|
10
|
10
|
import copy
|
|
11
|
11
|
import itertools
|
|
12
|
12
|
import os
|
|
13
|
13
|
import re
|
|
14
|
14
|
|
|
15
|
15
|
from .i18n import _
|
|
16
|
16
|
from .pycompat import open
|
|
17
|
17
|
from . import (
|
|
18
|
18
|
encoding,
|
|
19
|
19
|
error,
|
|
20
|
20
|
pathutil,
|
|
21
|
21
|
policy,
|
|
22
|
22
|
pycompat,
|
|
23
|
23
|
util,
|
|
24
|
24
|
)
|
|
25
|
25
|
from .utils import stringutil
|
|
26
|
26
|
|
|
27
|
|
rustmod = policy.importrust('filepatterns')
|
|
|
27
|
rustmod = policy.importrust('dirstate')
|
|
28
|
28
|
|
|
29
|
29
|
allpatternkinds = (
|
|
30
|
30
|
b're',
|
|
31
|
31
|
b'glob',
|
|
32
|
32
|
b'path',
|
|
33
|
33
|
b'relglob',
|
|
34
|
34
|
b'relpath',
|
|
35
|
35
|
b'relre',
|
|
36
|
36
|
b'rootglob',
|
|
37
|
37
|
b'listfile',
|
|
38
|
38
|
b'listfile0',
|
|
39
|
39
|
b'set',
|
|
40
|
40
|
b'include',
|
|
41
|
41
|
b'subinclude',
|
|
42
|
42
|
b'rootfilesin',
|
|
43
|
43
|
)
|
|
44
|
44
|
cwdrelativepatternkinds = (b'relpath', b'glob')
|
|
45
|
45
|
|
|
46
|
46
|
propertycache = util.propertycache
|
|
47
|
47
|
|
|
48
|
48
|
|
|
49
|
49
|
def _rematcher(regex):
|
|
50
|
50
|
'''compile the regexp with the best available regexp engine and return a
|
|
51
|
51
|
matcher function'''
|
|
52
|
52
|
m = util.re.compile(regex)
|
|
53
|
53
|
try:
|
|
54
|
54
|
# slightly faster, provided by facebook's re2 bindings
|
|
55
|
55
|
return m.test_match
|
|
56
|
56
|
except AttributeError:
|
|
57
|
57
|
return m.match
|
|
58
|
58
|
|
|
59
|
59
|
|
|
60
|
60
|
def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
|
|
61
|
61
|
'''Returns the kindpats list with the 'set' patterns expanded to matchers'''
|
|
62
|
62
|
matchers = []
|
|
63
|
63
|
other = []
|
|
64
|
64
|
|
|
65
|
65
|
for kind, pat, source in kindpats:
|
|
66
|
66
|
if kind == b'set':
|
|
67
|
67
|
if ctx is None:
|
|
68
|
68
|
raise error.ProgrammingError(
|
|
69
|
69
|
b"fileset expression with no context"
|
|
70
|
70
|
)
|
|
71
|
71
|
matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
|
|
72
|
72
|
|
|
73
|
73
|
if listsubrepos:
|
|
74
|
74
|
for subpath in ctx.substate:
|
|
75
|
75
|
sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
|
|
76
|
76
|
pm = prefixdirmatcher(subpath, sm, badfn=badfn)
|
|
77
|
77
|
matchers.append(pm)
|
|
78
|
78
|
|
|
79
|
79
|
continue
|
|
80
|
80
|
other.append((kind, pat, source))
|
|
81
|
81
|
return matchers, other
|
|
82
|
82
|
|
|
83
|
83
|
|
|
84
|
84
|
def _expandsubinclude(kindpats, root):
|
|
85
|
85
|
'''Returns the list of subinclude matcher args and the kindpats without the
|
|
86
|
86
|
subincludes in it.'''
|
|
87
|
87
|
relmatchers = []
|
|
88
|
88
|
other = []
|
|
89
|
89
|
|
|
90
|
90
|
for kind, pat, source in kindpats:
|
|
91
|
91
|
if kind == b'subinclude':
|
|
92
|
92
|
sourceroot = pathutil.dirname(util.normpath(source))
|
|
93
|
93
|
pat = util.pconvert(pat)
|
|
94
|
94
|
path = pathutil.join(sourceroot, pat)
|
|
95
|
95
|
|
|
96
|
96
|
newroot = pathutil.dirname(path)
|
|
97
|
97
|
matcherargs = (newroot, b'', [], [b'include:%s' % path])
|
|
98
|
98
|
|
|
99
|
99
|
prefix = pathutil.canonpath(root, root, newroot)
|
|
100
|
100
|
if prefix:
|
|
101
|
101
|
prefix += b'/'
|
|
102
|
102
|
relmatchers.append((prefix, matcherargs))
|
|
103
|
103
|
else:
|
|
104
|
104
|
other.append((kind, pat, source))
|
|
105
|
105
|
|
|
106
|
106
|
return relmatchers, other
|
|
107
|
107
|
|
|
108
|
108
|
|
|
109
|
109
|
def _kindpatsalwaysmatch(kindpats):
|
|
110
|
110
|
""""Checks whether the kindspats match everything, as e.g.
|
|
111
|
111
|
'relpath:.' does.
|
|
112
|
112
|
"""
|
|
113
|
113
|
for kind, pat, source in kindpats:
|
|
114
|
114
|
if pat != b'' or kind not in [b'relpath', b'glob']:
|
|
115
|
115
|
return False
|
|
116
|
116
|
return True
|
|
117
|
117
|
|
|
118
|
118
|
|
|
119
|
119
|
def _buildkindpatsmatcher(
|
|
120
|
120
|
matchercls, root, cwd, kindpats, ctx=None, listsubrepos=False, badfn=None,
|
|
121
|
121
|
):
|
|
122
|
122
|
matchers = []
|
|
123
|
123
|
fms, kindpats = _expandsets(
|
|
124
|
124
|
cwd, kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn,
|
|
125
|
125
|
)
|
|
126
|
126
|
if kindpats:
|
|
127
|
127
|
m = matchercls(root, kindpats, badfn=badfn)
|
|
128
|
128
|
matchers.append(m)
|
|
129
|
129
|
if fms:
|
|
130
|
130
|
matchers.extend(fms)
|
|
131
|
131
|
if not matchers:
|
|
132
|
132
|
return nevermatcher(badfn=badfn)
|
|
133
|
133
|
if len(matchers) == 1:
|
|
134
|
134
|
return matchers[0]
|
|
135
|
135
|
return unionmatcher(matchers)
|
|
136
|
136
|
|
|
137
|
137
|
|
|
138
|
138
|
def match(
|
|
139
|
139
|
root,
|
|
140
|
140
|
cwd,
|
|
141
|
141
|
patterns=None,
|
|
142
|
142
|
include=None,
|
|
143
|
143
|
exclude=None,
|
|
144
|
144
|
default=b'glob',
|
|
145
|
145
|
auditor=None,
|
|
146
|
146
|
ctx=None,
|
|
147
|
147
|
listsubrepos=False,
|
|
148
|
148
|
warn=None,
|
|
149
|
149
|
badfn=None,
|
|
150
|
150
|
icasefs=False,
|
|
151
|
151
|
):
|
|
152
|
152
|
r"""build an object to match a set of file patterns
|
|
153
|
153
|
|
|
154
|
154
|
arguments:
|
|
155
|
155
|
root - the canonical root of the tree you're matching against
|
|
156
|
156
|
cwd - the current working directory, if relevant
|
|
157
|
157
|
patterns - patterns to find
|
|
158
|
158
|
include - patterns to include (unless they are excluded)
|
|
159
|
159
|
exclude - patterns to exclude (even if they are included)
|
|
160
|
160
|
default - if a pattern in patterns has no explicit type, assume this one
|
|
161
|
161
|
auditor - optional path auditor
|
|
162
|
162
|
ctx - optional changecontext
|
|
163
|
163
|
listsubrepos - if True, recurse into subrepositories
|
|
164
|
164
|
warn - optional function used for printing warnings
|
|
165
|
165
|
badfn - optional bad() callback for this matcher instead of the default
|
|
166
|
166
|
icasefs - make a matcher for wdir on case insensitive filesystems, which
|
|
167
|
167
|
normalizes the given patterns to the case in the filesystem
|
|
168
|
168
|
|
|
169
|
169
|
a pattern is one of:
|
|
170
|
170
|
'glob:<glob>' - a glob relative to cwd
|
|
171
|
171
|
're:<regexp>' - a regular expression
|
|
172
|
172
|
'path:<path>' - a path relative to repository root, which is matched
|
|
173
|
173
|
recursively
|
|
174
|
174
|
'rootfilesin:<path>' - a path relative to repository root, which is
|
|
175
|
175
|
matched non-recursively (will not match subdirectories)
|
|
176
|
176
|
'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
|
|
177
|
177
|
'relpath:<path>' - a path relative to cwd
|
|
178
|
178
|
'relre:<regexp>' - a regexp that needn't match the start of a name
|
|
179
|
179
|
'set:<fileset>' - a fileset expression
|
|
180
|
180
|
'include:<path>' - a file of patterns to read and include
|
|
181
|
181
|
'subinclude:<path>' - a file of patterns to match against files under
|
|
182
|
182
|
the same directory
|
|
183
|
183
|
'<something>' - a pattern of the specified default type
|
|
184
|
184
|
|
|
185
|
185
|
>>> def _match(root, *args, **kwargs):
|
|
186
|
186
|
... return match(util.localpath(root), *args, **kwargs)
|
|
187
|
187
|
|
|
188
|
188
|
Usually a patternmatcher is returned:
|
|
189
|
189
|
>>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
|
|
190
|
190
|
<patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
|
|
191
|
191
|
|
|
192
|
192
|
Combining 'patterns' with 'include' (resp. 'exclude') gives an
|
|
193
|
193
|
intersectionmatcher (resp. a differencematcher):
|
|
194
|
194
|
>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
|
|
195
|
195
|
<class 'mercurial.match.intersectionmatcher'>
|
|
196
|
196
|
>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
|
|
197
|
197
|
<class 'mercurial.match.differencematcher'>
|
|
198
|
198
|
|
|
199
|
199
|
Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
|
|
200
|
200
|
>>> _match(b'/foo', b'.', [])
|
|
201
|
201
|
<alwaysmatcher>
|
|
202
|
202
|
|
|
203
|
203
|
The 'default' argument determines which kind of pattern is assumed if a
|
|
204
|
204
|
pattern has no prefix:
|
|
205
|
205
|
>>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
|
|
206
|
206
|
<patternmatcher patterns='.*\\.c$'>
|
|
207
|
207
|
>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
|
|
208
|
208
|
<patternmatcher patterns='main\\.py(?:/|$)'>
|
|
209
|
209
|
>>> _match(b'/foo', b'.', [b'main.py'], default=b're')
|
|
210
|
210
|
<patternmatcher patterns='main.py'>
|
|
211
|
211
|
|
|
212
|
212
|
The primary use of matchers is to check whether a value (usually a file
|
|
213
|
213
|
name) matches againset one of the patterns given at initialization. There
|
|
214
|
214
|
are two ways of doing this check.
|
|
215
|
215
|
|
|
216
|
216
|
>>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
|
|
217
|
217
|
|
|
218
|
218
|
1. Calling the matcher with a file name returns True if any pattern
|
|
219
|
219
|
matches that file name:
|
|
220
|
220
|
>>> m(b'a')
|
|
221
|
221
|
True
|
|
222
|
222
|
>>> m(b'main.c')
|
|
223
|
223
|
True
|
|
224
|
224
|
>>> m(b'test.py')
|
|
225
|
225
|
False
|
|
226
|
226
|
|
|
227
|
227
|
2. Using the exact() method only returns True if the file name matches one
|
|
228
|
228
|
of the exact patterns (i.e. not re: or glob: patterns):
|
|
229
|
229
|
>>> m.exact(b'a')
|
|
230
|
230
|
True
|
|
231
|
231
|
>>> m.exact(b'main.c')
|
|
232
|
232
|
False
|
|
233
|
233
|
"""
|
|
234
|
234
|
assert os.path.isabs(root)
|
|
235
|
235
|
cwd = os.path.join(root, util.localpath(cwd))
|
|
236
|
236
|
normalize = _donormalize
|
|
237
|
237
|
if icasefs:
|
|
238
|
238
|
dirstate = ctx.repo().dirstate
|
|
239
|
239
|
dsnormalize = dirstate.normalize
|
|
240
|
240
|
|
|
241
|
241
|
def normalize(patterns, default, root, cwd, auditor, warn):
|
|
242
|
242
|
kp = _donormalize(patterns, default, root, cwd, auditor, warn)
|
|
243
|
243
|
kindpats = []
|
|
244
|
244
|
for kind, pats, source in kp:
|
|
245
|
245
|
if kind not in (b're', b'relre'): # regex can't be normalized
|
|
246
|
246
|
p = pats
|
|
247
|
247
|
pats = dsnormalize(pats)
|
|
248
|
248
|
|
|
249
|
249
|
# Preserve the original to handle a case only rename.
|
|
250
|
250
|
if p != pats and p in dirstate:
|
|
251
|
251
|
kindpats.append((kind, p, source))
|
|
252
|
252
|
|
|
253
|
253
|
kindpats.append((kind, pats, source))
|
|
254
|
254
|
return kindpats
|
|
255
|
255
|
|
|
256
|
256
|
if patterns:
|
|
257
|
257
|
kindpats = normalize(patterns, default, root, cwd, auditor, warn)
|
|
258
|
258
|
if _kindpatsalwaysmatch(kindpats):
|
|
259
|
259
|
m = alwaysmatcher(badfn)
|
|
260
|
260
|
else:
|
|
261
|
261
|
m = _buildkindpatsmatcher(
|
|
262
|
262
|
patternmatcher,
|
|
263
|
263
|
root,
|
|
264
|
264
|
cwd,
|
|
265
|
265
|
kindpats,
|
|
266
|
266
|
ctx=ctx,
|
|
267
|
267
|
listsubrepos=listsubrepos,
|
|
268
|
268
|
badfn=badfn,
|
|
269
|
269
|
)
|
|
270
|
270
|
else:
|
|
271
|
271
|
# It's a little strange that no patterns means to match everything.
|
|
272
|
272
|
# Consider changing this to match nothing (probably using nevermatcher).
|
|
273
|
273
|
m = alwaysmatcher(badfn)
|
|
274
|
274
|
|
|
275
|
275
|
if include:
|
|
276
|
276
|
kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
|
|
277
|
277
|
im = _buildkindpatsmatcher(
|
|
278
|
278
|
includematcher,
|
|
279
|
279
|
root,
|
|
280
|
280
|
cwd,
|
|
281
|
281
|
kindpats,
|
|
282
|
282
|
ctx=ctx,
|
|
283
|
283
|
listsubrepos=listsubrepos,
|
|
284
|
284
|
badfn=None,
|
|
285
|
285
|
)
|
|
286
|
286
|
m = intersectmatchers(m, im)
|
|
287
|
287
|
if exclude:
|
|
288
|
288
|
kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
|
|
289
|
289
|
em = _buildkindpatsmatcher(
|
|
290
|
290
|
includematcher,
|
|
291
|
291
|
root,
|
|
292
|
292
|
cwd,
|
|
293
|
293
|
kindpats,
|
|
294
|
294
|
ctx=ctx,
|
|
295
|
295
|
listsubrepos=listsubrepos,
|
|
296
|
296
|
badfn=None,
|
|
297
|
297
|
)
|
|
298
|
298
|
m = differencematcher(m, em)
|
|
299
|
299
|
return m
|
|
300
|
300
|
|
|
301
|
301
|
|
|
302
|
302
|
def exact(files, badfn=None):
|
|
303
|
303
|
return exactmatcher(files, badfn=badfn)
|
|
304
|
304
|
|
|
305
|
305
|
|
|
306
|
306
|
def always(badfn=None):
|
|
307
|
307
|
return alwaysmatcher(badfn)
|
|
308
|
308
|
|
|
309
|
309
|
|
|
310
|
310
|
def never(badfn=None):
|
|
311
|
311
|
return nevermatcher(badfn)
|
|
312
|
312
|
|
|
313
|
313
|
|
|
314
|
314
|
def badmatch(match, badfn):
|
|
315
|
315
|
"""Make a copy of the given matcher, replacing its bad method with the given
|
|
316
|
316
|
one.
|
|
317
|
317
|
"""
|
|
318
|
318
|
m = copy.copy(match)
|
|
319
|
319
|
m.bad = badfn
|
|
320
|
320
|
return m
|
|
321
|
321
|
|
|
322
|
322
|
|
|
323
|
323
|
def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
|
|
324
|
324
|
'''Convert 'kind:pat' from the patterns list to tuples with kind and
|
|
325
|
325
|
normalized and rooted patterns and with listfiles expanded.'''
|
|
326
|
326
|
kindpats = []
|
|
327
|
327
|
for kind, pat in [_patsplit(p, default) for p in patterns]:
|
|
328
|
328
|
if kind in cwdrelativepatternkinds:
|
|
329
|
329
|
pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
|
|
330
|
330
|
elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
|
|
331
|
331
|
pat = util.normpath(pat)
|
|
332
|
332
|
elif kind in (b'listfile', b'listfile0'):
|
|
333
|
333
|
try:
|
|
334
|
334
|
files = util.readfile(pat)
|
|
335
|
335
|
if kind == b'listfile0':
|
|
336
|
336
|
files = files.split(b'\0')
|
|
337
|
337
|
else:
|
|
338
|
338
|
files = files.splitlines()
|
|
339
|
339
|
files = [f for f in files if f]
|
|
340
|
340
|
except EnvironmentError:
|
|
341
|
341
|
raise error.Abort(_(b"unable to read file list (%s)") % pat)
|
|
342
|
342
|
for k, p, source in _donormalize(
|
|
343
|
343
|
files, default, root, cwd, auditor, warn
|
|
344
|
344
|
):
|
|
345
|
345
|
kindpats.append((k, p, pat))
|
|
346
|
346
|
continue
|
|
347
|
347
|
elif kind == b'include':
|
|
348
|
348
|
try:
|
|
349
|
349
|
fullpath = os.path.join(root, util.localpath(pat))
|
|
350
|
350
|
includepats = readpatternfile(fullpath, warn)
|
|
351
|
351
|
for k, p, source in _donormalize(
|
|
352
|
352
|
includepats, default, root, cwd, auditor, warn
|
|
353
|
353
|
):
|
|
354
|
354
|
kindpats.append((k, p, source or pat))
|
|
355
|
355
|
except error.Abort as inst:
|
|
356
|
356
|
raise error.Abort(
|
|
357
|
357
|
b'%s: %s'
|
|
358
|
358
|
% (pat, inst[0]) # pytype: disable=unsupported-operands
|
|
359
|
359
|
)
|
|
360
|
360
|
except IOError as inst:
|
|
361
|
361
|
if warn:
|
|
362
|
362
|
warn(
|
|
363
|
363
|
_(b"skipping unreadable pattern file '%s': %s\n")
|
|
364
|
364
|
% (pat, stringutil.forcebytestr(inst.strerror))
|
|
365
|
365
|
)
|
|
366
|
366
|
continue
|
|
367
|
367
|
# else: re or relre - which cannot be normalized
|
|
368
|
368
|
kindpats.append((kind, pat, b''))
|
|
369
|
369
|
return kindpats
|
|
370
|
370
|
|
|
371
|
371
|
|
|
372
|
372
|
class basematcher(object):
|
|
373
|
373
|
def __init__(self, badfn=None):
|
|
374
|
374
|
if badfn is not None:
|
|
375
|
375
|
self.bad = badfn
|
|
376
|
376
|
|
|
377
|
377
|
def __call__(self, fn):
|
|
378
|
378
|
return self.matchfn(fn)
|
|
379
|
379
|
|
|
380
|
380
|
# Callbacks related to how the matcher is used by dirstate.walk.
|
|
381
|
381
|
# Subscribers to these events must monkeypatch the matcher object.
|
|
382
|
382
|
def bad(self, f, msg):
|
|
383
|
383
|
'''Callback from dirstate.walk for each explicit file that can't be
|
|
384
|
384
|
found/accessed, with an error message.'''
|
|
385
|
385
|
|
|
386
|
386
|
# If an traversedir is set, it will be called when a directory discovered
|
|
387
|
387
|
# by recursive traversal is visited.
|
|
388
|
388
|
traversedir = None
|
|
389
|
389
|
|
|
390
|
390
|
@propertycache
|
|
391
|
391
|
def _files(self):
|
|
392
|
392
|
return []
|
|
393
|
393
|
|
|
394
|
394
|
def files(self):
|
|
395
|
395
|
'''Explicitly listed files or patterns or roots:
|
|
396
|
396
|
if no patterns or .always(): empty list,
|
|
397
|
397
|
if exact: list exact files,
|
|
398
|
398
|
if not .anypats(): list all files and dirs,
|
|
399
|
399
|
else: optimal roots'''
|
|
400
|
400
|
return self._files
|
|
401
|
401
|
|
|
402
|
402
|
@propertycache
|
|
403
|
403
|
def _fileset(self):
|
|
404
|
404
|
return set(self._files)
|
|
405
|
405
|
|
|
406
|
406
|
def exact(self, f):
|
|
407
|
407
|
'''Returns True if f is in .files().'''
|
|
408
|
408
|
return f in self._fileset
|
|
409
|
409
|
|
|
410
|
410
|
def matchfn(self, f):
|
|
411
|
411
|
return False
|
|
412
|
412
|
|
|
413
|
413
|
def visitdir(self, dir):
|
|
414
|
414
|
'''Decides whether a directory should be visited based on whether it
|
|
415
|
415
|
has potential matches in it or one of its subdirectories. This is
|
|
416
|
416
|
based on the match's primary, included, and excluded patterns.
|
|
417
|
417
|
|
|
418
|
418
|
Returns the string 'all' if the given directory and all subdirectories
|
|
419
|
419
|
should be visited. Otherwise returns True or False indicating whether
|
|
420
|
420
|
the given directory should be visited.
|
|
421
|
421
|
'''
|
|
422
|
422
|
return True
|
|
423
|
423
|
|
|
424
|
424
|
def visitchildrenset(self, dir):
|
|
425
|
425
|
'''Decides whether a directory should be visited based on whether it
|
|
426
|
426
|
has potential matches in it or one of its subdirectories, and
|
|
427
|
427
|
potentially lists which subdirectories of that directory should be
|
|
428
|
428
|
visited. This is based on the match's primary, included, and excluded
|
|
429
|
429
|
patterns.
|
|
430
|
430
|
|
|
431
|
431
|
This function is very similar to 'visitdir', and the following mapping
|
|
432
|
432
|
can be applied:
|
|
433
|
433
|
|
|
434
|
434
|
visitdir | visitchildrenlist
|
|
435
|
435
|
----------+-------------------
|
|
436
|
436
|
False | set()
|
|
437
|
437
|
'all' | 'all'
|
|
438
|
438
|
True | 'this' OR non-empty set of subdirs -or files- to visit
|
|
439
|
439
|
|
|
440
|
440
|
Example:
|
|
441
|
441
|
Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
|
|
442
|
442
|
the following values (assuming the implementation of visitchildrenset
|
|
443
|
443
|
is capable of recognizing this; some implementations are not).
|
|
444
|
444
|
|
|
445
|
445
|
'' -> {'foo', 'qux'}
|
|
446
|
446
|
'baz' -> set()
|
|
447
|
447
|
'foo' -> {'bar'}
|
|
448
|
448
|
# Ideally this would be 'all', but since the prefix nature of matchers
|
|
449
|
449
|
# is applied to the entire matcher, we have to downgrade this to
|
|
450
|
450
|
# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
|
|
451
|
451
|
# in.
|
|
452
|
452
|
'foo/bar' -> 'this'
|
|
453
|
453
|
'qux' -> 'this'
|
|
454
|
454
|
|
|
455
|
455
|
Important:
|
|
456
|
456
|
Most matchers do not know if they're representing files or
|
|
457
|
457
|
directories. They see ['path:dir/f'] and don't know whether 'f' is a
|
|
458
|
458
|
file or a directory, so visitchildrenset('dir') for most matchers will
|
|
459
|
459
|
return {'f'}, but if the matcher knows it's a file (like exactmatcher
|
|
460
|
460
|
does), it may return 'this'. Do not rely on the return being a set
|
|
461
|
461
|
indicating that there are no files in this dir to investigate (or
|
|
462
|
462
|
equivalently that if there are files to investigate in 'dir' that it
|
|
463
|
463
|
will always return 'this').
|
|
464
|
464
|
'''
|
|
465
|
465
|
return b'this'
|
|
466
|
466
|
|
|
467
|
467
|
def always(self):
|
|
468
|
468
|
'''Matcher will match everything and .files() will be empty --
|
|
469
|
469
|
optimization might be possible.'''
|
|
470
|
470
|
return False
|
|
471
|
471
|
|
|
472
|
472
|
def isexact(self):
|
|
473
|
473
|
'''Matcher will match exactly the list of files in .files() --
|
|
474
|
474
|
optimization might be possible.'''
|
|
475
|
475
|
return False
|
|
476
|
476
|
|
|
477
|
477
|
def prefix(self):
|
|
478
|
478
|
'''Matcher will match the paths in .files() recursively --
|
|
479
|
479
|
optimization might be possible.'''
|
|
480
|
480
|
return False
|
|
481
|
481
|
|
|
482
|
482
|
def anypats(self):
|
|
483
|
483
|
'''None of .always(), .isexact(), and .prefix() is true --
|
|
484
|
484
|
optimizations will be difficult.'''
|
|
485
|
485
|
return not self.always() and not self.isexact() and not self.prefix()
|
|
486
|
486
|
|
|
487
|
487
|
|
|
488
|
488
|
class alwaysmatcher(basematcher):
|
|
489
|
489
|
'''Matches everything.'''
|
|
490
|
490
|
|
|
491
|
491
|
def __init__(self, badfn=None):
|
|
492
|
492
|
super(alwaysmatcher, self).__init__(badfn)
|
|
493
|
493
|
|
|
494
|
494
|
def always(self):
|
|
495
|
495
|
return True
|
|
496
|
496
|
|
|
497
|
497
|
def matchfn(self, f):
|
|
498
|
498
|
return True
|
|
499
|
499
|
|
|
500
|
500
|
def visitdir(self, dir):
|
|
501
|
501
|
return b'all'
|
|
502
|
502
|
|
|
503
|
503
|
def visitchildrenset(self, dir):
|
|
504
|
504
|
return b'all'
|
|
505
|
505
|
|
|
506
|
506
|
def __repr__(self):
|
|
507
|
507
|
return r'<alwaysmatcher>'
|
|
508
|
508
|
|
|
509
|
509
|
|
|
510
|
510
|
class nevermatcher(basematcher):
|
|
511
|
511
|
'''Matches nothing.'''
|
|
512
|
512
|
|
|
513
|
513
|
def __init__(self, badfn=None):
|
|
514
|
514
|
super(nevermatcher, self).__init__(badfn)
|
|
515
|
515
|
|
|
516
|
516
|
# It's a little weird to say that the nevermatcher is an exact matcher
|
|
517
|
517
|
# or a prefix matcher, but it seems to make sense to let callers take
|
|
518
|
518
|
# fast paths based on either. There will be no exact matches, nor any
|
|
519
|
519
|
# prefixes (files() returns []), so fast paths iterating over them should
|
|
520
|
520
|
# be efficient (and correct).
|
|
521
|
521
|
def isexact(self):
|
|
522
|
522
|
return True
|
|
523
|
523
|
|
|
524
|
524
|
def prefix(self):
|
|
525
|
525
|
return True
|
|
526
|
526
|
|
|
527
|
527
|
def visitdir(self, dir):
|
|
528
|
528
|
return False
|
|
529
|
529
|
|
|
530
|
530
|
def visitchildrenset(self, dir):
|
|
531
|
531
|
return set()
|
|
532
|
532
|
|
|
533
|
533
|
def __repr__(self):
|
|
534
|
534
|
return r'<nevermatcher>'
|
|
535
|
535
|
|
|
536
|
536
|
|
|
537
|
537
|
class predicatematcher(basematcher):
|
|
538
|
538
|
"""A matcher adapter for a simple boolean function"""
|
|
539
|
539
|
|
|
540
|
540
|
def __init__(self, predfn, predrepr=None, badfn=None):
|
|
541
|
541
|
super(predicatematcher, self).__init__(badfn)
|
|
542
|
542
|
self.matchfn = predfn
|
|
543
|
543
|
self._predrepr = predrepr
|
|
544
|
544
|
|
|
545
|
545
|
@encoding.strmethod
|
|
546
|
546
|
def __repr__(self):
|
|
547
|
547
|
s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
|
|
548
|
548
|
self.matchfn
|
|
549
|
549
|
)
|
|
550
|
550
|
return b'<predicatenmatcher pred=%s>' % s
|
|
551
|
551
|
|
|
552
|
552
|
|
|
553
|
553
|
class patternmatcher(basematcher):
|
|
554
|
554
|
r"""Matches a set of (kind, pat, source) against a 'root' directory.
|
|
555
|
555
|
|
|
556
|
556
|
>>> kindpats = [
|
|
557
|
557
|
... (b're', br'.*\.c$', b''),
|
|
558
|
558
|
... (b'path', b'foo/a', b''),
|
|
559
|
559
|
... (b'relpath', b'b', b''),
|
|
560
|
560
|
... (b'glob', b'*.h', b''),
|
|
561
|
561
|
... ]
|
|
562
|
562
|
>>> m = patternmatcher(b'foo', kindpats)
|
|
563
|
563
|
>>> m(b'main.c') # matches re:.*\.c$
|
|
564
|
564
|
True
|
|
565
|
565
|
>>> m(b'b.txt')
|
|
566
|
566
|
False
|
|
567
|
567
|
>>> m(b'foo/a') # matches path:foo/a
|
|
568
|
568
|
True
|
|
569
|
569
|
>>> m(b'a') # does not match path:b, since 'root' is 'foo'
|
|
570
|
570
|
False
|
|
571
|
571
|
>>> m(b'b') # matches relpath:b, since 'root' is 'foo'
|
|
572
|
572
|
True
|
|
573
|
573
|
>>> m(b'lib.h') # matches glob:*.h
|
|
574
|
574
|
True
|
|
575
|
575
|
|
|
576
|
576
|
>>> m.files()
|
|
577
|
577
|
['', 'foo/a', 'b', '']
|
|
578
|
578
|
>>> m.exact(b'foo/a')
|
|
579
|
579
|
True
|
|
580
|
580
|
>>> m.exact(b'b')
|
|
581
|
581
|
True
|
|
582
|
582
|
>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
|
|
583
|
583
|
False
|
|
584
|
584
|
"""
|
|
585
|
585
|
|
|
586
|
586
|
def __init__(self, root, kindpats, badfn=None):
|
|
587
|
587
|
super(patternmatcher, self).__init__(badfn)
|
|
588
|
588
|
|
|
589
|
589
|
self._files = _explicitfiles(kindpats)
|
|
590
|
590
|
self._prefix = _prefix(kindpats)
|
|
591
|
591
|
self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
|
|
592
|
592
|
|
|
593
|
593
|
@propertycache
|
|
594
|
594
|
def _dirs(self):
|
|
595
|
595
|
return set(pathutil.dirs(self._fileset))
|
|
596
|
596
|
|
|
597
|
597
|
def visitdir(self, dir):
|
|
598
|
598
|
if self._prefix and dir in self._fileset:
|
|
599
|
599
|
return b'all'
|
|
600
|
600
|
return (
|
|
601
|
601
|
dir in self._fileset
|
|
602
|
602
|
or dir in self._dirs
|
|
603
|
603
|
or any(
|
|
604
|
604
|
parentdir in self._fileset
|
|
605
|
605
|
for parentdir in pathutil.finddirs(dir)
|
|
606
|
606
|
)
|
|
607
|
607
|
)
|
|
608
|
608
|
|
|
609
|
609
|
def visitchildrenset(self, dir):
|
|
610
|
610
|
ret = self.visitdir(dir)
|
|
611
|
611
|
if ret is True:
|
|
612
|
612
|
return b'this'
|
|
613
|
613
|
elif not ret:
|
|
614
|
614
|
return set()
|
|
615
|
615
|
assert ret == b'all'
|
|
616
|
616
|
return b'all'
|
|
617
|
617
|
|
|
618
|
618
|
def prefix(self):
|
|
619
|
619
|
return self._prefix
|
|
620
|
620
|
|
|
621
|
621
|
@encoding.strmethod
|
|
622
|
622
|
def __repr__(self):
|
|
623
|
623
|
return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
|
|
624
|
624
|
|
|
625
|
625
|
|
|
626
|
626
|
# This is basically a reimplementation of pathutil.dirs that stores the
|
|
627
|
627
|
# children instead of just a count of them, plus a small optional optimization
|
|
628
|
628
|
# to avoid some directories we don't need.
|
|
629
|
629
|
class _dirchildren(object):
|
|
630
|
630
|
def __init__(self, paths, onlyinclude=None):
|
|
631
|
631
|
self._dirs = {}
|
|
632
|
632
|
self._onlyinclude = onlyinclude or []
|
|
633
|
633
|
addpath = self.addpath
|
|
634
|
634
|
for f in paths:
|
|
635
|
635
|
addpath(f)
|
|
636
|
636
|
|
|
637
|
637
|
def addpath(self, path):
|
|
638
|
638
|
if path == b'':
|
|
639
|
639
|
return
|
|
640
|
640
|
dirs = self._dirs
|
|
641
|
641
|
findsplitdirs = _dirchildren._findsplitdirs
|
|
642
|
642
|
for d, b in findsplitdirs(path):
|
|
643
|
643
|
if d not in self._onlyinclude:
|
|
644
|
644
|
continue
|
|
645
|
645
|
dirs.setdefault(d, set()).add(b)
|
|
646
|
646
|
|
|
647
|
647
|
@staticmethod
|
|
648
|
648
|
def _findsplitdirs(path):
|
|
649
|
649
|
# yields (dirname, basename) tuples, walking back to the root. This is
|
|
650
|
650
|
# very similar to pathutil.finddirs, except:
|
|
651
|
651
|
# - produces a (dirname, basename) tuple, not just 'dirname'
|
|
652
|
652
|
# Unlike manifest._splittopdir, this does not suffix `dirname` with a
|
|
653
|
653
|
# slash.
|
|
654
|
654
|
oldpos = len(path)
|
|
655
|
655
|
pos = path.rfind(b'/')
|
|
656
|
656
|
while pos != -1:
|
|
657
|
657
|
yield path[:pos], path[pos + 1 : oldpos]
|
|
658
|
658
|
oldpos = pos
|
|
659
|
659
|
pos = path.rfind(b'/', 0, pos)
|
|
660
|
660
|
yield b'', path[:oldpos]
|
|
661
|
661
|
|
|
662
|
662
|
def get(self, path):
|
|
663
|
663
|
return self._dirs.get(path, set())
|
|
664
|
664
|
|
|
665
|
665
|
|
|
666
|
666
|
class includematcher(basematcher):
|
|
667
|
667
|
def __init__(self, root, kindpats, badfn=None):
|
|
668
|
668
|
super(includematcher, self).__init__(badfn)
|
|
669
|
669
|
|
|
670
|
670
|
self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
|
|
671
|
671
|
self._prefix = _prefix(kindpats)
|
|
672
|
672
|
roots, dirs, parents = _rootsdirsandparents(kindpats)
|
|
673
|
673
|
# roots are directories which are recursively included.
|
|
674
|
674
|
self._roots = set(roots)
|
|
675
|
675
|
# dirs are directories which are non-recursively included.
|
|
676
|
676
|
self._dirs = set(dirs)
|
|
677
|
677
|
# parents are directories which are non-recursively included because
|
|
678
|
678
|
# they are needed to get to items in _dirs or _roots.
|
|
679
|
679
|
self._parents = parents
|
|
680
|
680
|
|
|
681
|
681
|
def visitdir(self, dir):
|
|
682
|
682
|
if self._prefix and dir in self._roots:
|
|
683
|
683
|
return b'all'
|
|
684
|
684
|
return (
|
|
685
|
685
|
dir in self._roots
|
|
686
|
686
|
or dir in self._dirs
|
|
687
|
687
|
or dir in self._parents
|
|
688
|
688
|
or any(
|
|
689
|
689
|
parentdir in self._roots for parentdir in pathutil.finddirs(dir)
|
|
690
|
690
|
)
|
|
691
|
691
|
)
|
|
692
|
692
|
|
|
693
|
693
|
@propertycache
|
|
694
|
694
|
def _allparentschildren(self):
|
|
695
|
695
|
# It may seem odd that we add dirs, roots, and parents, and then
|
|
696
|
696
|
# restrict to only parents. This is to catch the case of:
|
|
697
|
697
|
# dirs = ['foo/bar']
|
|
698
|
698
|
# parents = ['foo']
|
|
699
|
699
|
# if we asked for the children of 'foo', but had only added
|
|
700
|
700
|
# self._parents, we wouldn't be able to respond ['bar'].
|
|
701
|
701
|
return _dirchildren(
|
|
702
|
702
|
itertools.chain(self._dirs, self._roots, self._parents),
|
|
703
|
703
|
onlyinclude=self._parents,
|
|
704
|
704
|
)
|
|
705
|
705
|
|
|
706
|
706
|
def visitchildrenset(self, dir):
|
|
707
|
707
|
if self._prefix and dir in self._roots:
|
|
708
|
708
|
return b'all'
|
|
709
|
709
|
# Note: this does *not* include the 'dir in self._parents' case from
|
|
710
|
710
|
# visitdir, that's handled below.
|
|
711
|
711
|
if (
|
|
712
|
712
|
b'' in self._roots
|
|
713
|
713
|
or dir in self._roots
|
|
714
|
714
|
or dir in self._dirs
|
|
715
|
715
|
or any(
|
|
716
|
716
|
parentdir in self._roots for parentdir in pathutil.finddirs(dir)
|
|
717
|
717
|
)
|
|
718
|
718
|
):
|
|
719
|
719
|
return b'this'
|
|
720
|
720
|
|
|
721
|
721
|
if dir in self._parents:
|
|
722
|
722
|
return self._allparentschildren.get(dir) or set()
|
|
723
|
723
|
return set()
|
|
724
|
724
|
|
|
725
|
725
|
@encoding.strmethod
|
|
726
|
726
|
def __repr__(self):
|
|
727
|
727
|
return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
|
|
728
|
728
|
|
|
729
|
729
|
|
|
730
|
730
|
class exactmatcher(basematcher):
|
|
731
|
731
|
r'''Matches the input files exactly. They are interpreted as paths, not
|
|
732
|
732
|
patterns (so no kind-prefixes).
|
|
733
|
733
|
|
|
734
|
734
|
>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
|
|
735
|
735
|
>>> m(b'a.txt')
|
|
736
|
736
|
True
|
|
737
|
737
|
>>> m(b'b.txt')
|
|
738
|
738
|
False
|
|
739
|
739
|
|
|
740
|
740
|
Input files that would be matched are exactly those returned by .files()
|
|
741
|
741
|
>>> m.files()
|
|
742
|
742
|
['a.txt', 're:.*\\.c$']
|
|
743
|
743
|
|
|
744
|
744
|
So pattern 're:.*\.c$' is not considered as a regex, but as a file name
|
|
745
|
745
|
>>> m(b'main.c')
|
|
746
|
746
|
False
|
|
747
|
747
|
>>> m(br're:.*\.c$')
|
|
748
|
748
|
True
|
|
749
|
749
|
'''
|
|
750
|
750
|
|
|
751
|
751
|
def __init__(self, files, badfn=None):
|
|
752
|
752
|
super(exactmatcher, self).__init__(badfn)
|
|
753
|
753
|
|
|
754
|
754
|
if isinstance(files, list):
|
|
755
|
755
|
self._files = files
|
|
756
|
756
|
else:
|
|
757
|
757
|
self._files = list(files)
|
|
758
|
758
|
|
|
759
|
759
|
matchfn = basematcher.exact
|
|
760
|
760
|
|
|
761
|
761
|
@propertycache
|
|
762
|
762
|
def _dirs(self):
|
|
763
|
763
|
return set(pathutil.dirs(self._fileset))
|
|
764
|
764
|
|
|
765
|
765
|
def visitdir(self, dir):
|
|
766
|
766
|
return dir in self._dirs
|
|
767
|
767
|
|
|
768
|
768
|
def visitchildrenset(self, dir):
|
|
769
|
769
|
if not self._fileset or dir not in self._dirs:
|
|
770
|
770
|
return set()
|
|
771
|
771
|
|
|
772
|
772
|
candidates = self._fileset | self._dirs - {b''}
|
|
773
|
773
|
if dir != b'':
|
|
774
|
774
|
d = dir + b'/'
|
|
775
|
775
|
candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
|
|
776
|
776
|
# self._dirs includes all of the directories, recursively, so if
|
|
777
|
777
|
# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
|
|
778
|
778
|
# 'foo/bar' in it. Thus we can safely ignore a candidate that has a
|
|
779
|
779
|
# '/' in it, indicating a it's for a subdir-of-a-subdir; the
|
|
780
|
780
|
# immediate subdir will be in there without a slash.
|
|
781
|
781
|
ret = {c for c in candidates if b'/' not in c}
|
|
782
|
782
|
# We really do not expect ret to be empty, since that would imply that
|
|
783
|
783
|
# there's something in _dirs that didn't have a file in _fileset.
|
|
784
|
784
|
assert ret
|
|
785
|
785
|
return ret
|
|
786
|
786
|
|
|
787
|
787
|
def isexact(self):
|
|
788
|
788
|
return True
|
|
789
|
789
|
|
|
790
|
790
|
@encoding.strmethod
|
|
791
|
791
|
def __repr__(self):
|
|
792
|
792
|
return b'<exactmatcher files=%r>' % self._files
|
|
793
|
793
|
|
|
794
|
794
|
|
|
795
|
795
|
class differencematcher(basematcher):
|
|
796
|
796
|
'''Composes two matchers by matching if the first matches and the second
|
|
797
|
797
|
does not.
|
|
798
|
798
|
|
|
799
|
799
|
The second matcher's non-matching-attributes (bad, traversedir) are ignored.
|
|
800
|
800
|
'''
|
|
801
|
801
|
|
|
802
|
802
|
def __init__(self, m1, m2):
|
|
803
|
803
|
super(differencematcher, self).__init__()
|
|
804
|
804
|
self._m1 = m1
|
|
805
|
805
|
self._m2 = m2
|
|
806
|
806
|
self.bad = m1.bad
|
|
807
|
807
|
self.traversedir = m1.traversedir
|
|
808
|
808
|
|
|
809
|
809
|
def matchfn(self, f):
|
|
810
|
810
|
return self._m1(f) and not self._m2(f)
|
|
811
|
811
|
|
|
812
|
812
|
@propertycache
|
|
813
|
813
|
def _files(self):
|
|
814
|
814
|
if self.isexact():
|
|
815
|
815
|
return [f for f in self._m1.files() if self(f)]
|
|
816
|
816
|
# If m1 is not an exact matcher, we can't easily figure out the set of
|
|
817
|
817
|
# files, because its files() are not always files. For example, if
|
|
818
|
818
|
# m1 is "path:dir" and m2 is "rootfileins:.", we don't
|
|
819
|
819
|
# want to remove "dir" from the set even though it would match m2,
|
|
820
|
820
|
# because the "dir" in m1 may not be a file.
|
|
821
|
821
|
return self._m1.files()
|
|
822
|
822
|
|
|
823
|
823
|
def visitdir(self, dir):
|
|
824
|
824
|
if self._m2.visitdir(dir) == b'all':
|
|
825
|
825
|
return False
|
|
826
|
826
|
elif not self._m2.visitdir(dir):
|
|
827
|
827
|
# m2 does not match dir, we can return 'all' here if possible
|
|
828
|
828
|
return self._m1.visitdir(dir)
|
|
829
|
829
|
return bool(self._m1.visitdir(dir))
|
|
830
|
830
|
|
|
831
|
831
|
def visitchildrenset(self, dir):
|
|
832
|
832
|
m2_set = self._m2.visitchildrenset(dir)
|
|
833
|
833
|
if m2_set == b'all':
|
|
834
|
834
|
return set()
|
|
835
|
835
|
m1_set = self._m1.visitchildrenset(dir)
|
|
836
|
836
|
# Possible values for m1: 'all', 'this', set(...), set()
|
|
837
|
837
|
# Possible values for m2: 'this', set(...), set()
|
|
838
|
838
|
# If m2 has nothing under here that we care about, return m1, even if
|
|
839
|
839
|
# it's 'all'. This is a change in behavior from visitdir, which would
|
|
840
|
840
|
# return True, not 'all', for some reason.
|
|
841
|
841
|
if not m2_set:
|
|
842
|
842
|
return m1_set
|
|
843
|
843
|
if m1_set in [b'all', b'this']:
|
|
844
|
844
|
# Never return 'all' here if m2_set is any kind of non-empty (either
|
|
845
|
845
|
# 'this' or set(foo)), since m2 might return set() for a
|
|
846
|
846
|
# subdirectory.
|
|
847
|
847
|
return b'this'
|
|
848
|
848
|
# Possible values for m1: set(...), set()
|
|
849
|
849
|
# Possible values for m2: 'this', set(...)
|
|
850
|
850
|
# We ignore m2's set results. They're possibly incorrect:
|
|
851
|
851
|
# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
|
|
852
|
852
|
# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
|
|
853
|
853
|
# return set(), which is *not* correct, we still need to visit 'dir'!
|
|
854
|
854
|
return m1_set
|
|
855
|
855
|
|
|
856
|
856
|
def isexact(self):
|
|
857
|
857
|
return self._m1.isexact()
|
|
858
|
858
|
|
|
859
|
859
|
@encoding.strmethod
|
|
860
|
860
|
def __repr__(self):
|
|
861
|
861
|
return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
|
|
862
|
862
|
|
|
863
|
863
|
|
|
864
|
864
|
def intersectmatchers(m1, m2):
|
|
865
|
865
|
'''Composes two matchers by matching if both of them match.
|
|
866
|
866
|
|
|
867
|
867
|
The second matcher's non-matching-attributes (bad, traversedir) are ignored.
|
|
868
|
868
|
'''
|
|
869
|
869
|
if m1 is None or m2 is None:
|
|
870
|
870
|
return m1 or m2
|
|
871
|
871
|
if m1.always():
|
|
872
|
872
|
m = copy.copy(m2)
|
|
873
|
873
|
# TODO: Consider encapsulating these things in a class so there's only
|
|
874
|
874
|
# one thing to copy from m1.
|
|
875
|
875
|
m.bad = m1.bad
|
|
876
|
876
|
m.traversedir = m1.traversedir
|
|
877
|
877
|
return m
|
|
878
|
878
|
if m2.always():
|
|
879
|
879
|
m = copy.copy(m1)
|
|
880
|
880
|
return m
|
|
881
|
881
|
return intersectionmatcher(m1, m2)
|
|
882
|
882
|
|
|
883
|
883
|
|
|
884
|
884
|
class intersectionmatcher(basematcher):
|
|
885
|
885
|
def __init__(self, m1, m2):
|
|
886
|
886
|
super(intersectionmatcher, self).__init__()
|
|
887
|
887
|
self._m1 = m1
|
|
888
|
888
|
self._m2 = m2
|
|
889
|
889
|
self.bad = m1.bad
|
|
890
|
890
|
self.traversedir = m1.traversedir
|
|
891
|
891
|
|
|
892
|
892
|
@propertycache
|
|
893
|
893
|
def _files(self):
|
|
894
|
894
|
if self.isexact():
|
|
895
|
895
|
m1, m2 = self._m1, self._m2
|
|
896
|
896
|
if not m1.isexact():
|
|
897
|
897
|
m1, m2 = m2, m1
|
|
898
|
898
|
return [f for f in m1.files() if m2(f)]
|
|
899
|
899
|
# It neither m1 nor m2 is an exact matcher, we can't easily intersect
|
|
900
|
900
|
# the set of files, because their files() are not always files. For
|
|
901
|
901
|
# example, if intersecting a matcher "-I glob:foo.txt" with matcher of
|
|
902
|
902
|
# "path:dir2", we don't want to remove "dir2" from the set.
|
|
903
|
903
|
return self._m1.files() + self._m2.files()
|
|
904
|
904
|
|
|
905
|
905
|
def matchfn(self, f):
|
|
906
|
906
|
return self._m1(f) and self._m2(f)
|
|
907
|
907
|
|
|
908
|
908
|
def visitdir(self, dir):
|
|
909
|
909
|
visit1 = self._m1.visitdir(dir)
|
|
910
|
910
|
if visit1 == b'all':
|
|
911
|
911
|
return self._m2.visitdir(dir)
|
|
912
|
912
|
# bool() because visit1=True + visit2='all' should not be 'all'
|
|
913
|
913
|
return bool(visit1 and self._m2.visitdir(dir))
|
|
914
|
914
|
|
|
915
|
915
|
def visitchildrenset(self, dir):
|
|
916
|
916
|
m1_set = self._m1.visitchildrenset(dir)
|
|
917
|
917
|
if not m1_set:
|
|
918
|
918
|
return set()
|
|
919
|
919
|
m2_set = self._m2.visitchildrenset(dir)
|
|
920
|
920
|
if not m2_set:
|
|
921
|
921
|
return set()
|
|
922
|
922
|
|
|
923
|
923
|
if m1_set == b'all':
|
|
924
|
924
|
return m2_set
|
|
925
|
925
|
elif m2_set == b'all':
|
|
926
|
926
|
return m1_set
|
|
927
|
927
|
|
|
928
|
928
|
if m1_set == b'this' or m2_set == b'this':
|
|
929
|
929
|
return b'this'
|
|
930
|
930
|
|
|
931
|
931
|
assert isinstance(m1_set, set) and isinstance(m2_set, set)
|
|
932
|
932
|
return m1_set.intersection(m2_set)
|
|
933
|
933
|
|
|
934
|
934
|
def always(self):
|
|
935
|
935
|
return self._m1.always() and self._m2.always()
|
|
936
|
936
|
|
|
937
|
937
|
def isexact(self):
|
|
938
|
938
|
return self._m1.isexact() or self._m2.isexact()
|
|
939
|
939
|
|
|
940
|
940
|
@encoding.strmethod
|
|
941
|
941
|
def __repr__(self):
|
|
942
|
942
|
return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
|
|
943
|
943
|
|
|
944
|
944
|
|
|
945
|
945
|
class subdirmatcher(basematcher):
|
|
946
|
946
|
"""Adapt a matcher to work on a subdirectory only.
|
|
947
|
947
|
|
|
948
|
948
|
The paths are remapped to remove/insert the path as needed:
|
|
949
|
949
|
|
|
950
|
950
|
>>> from . import pycompat
|
|
951
|
951
|
>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
|
|
952
|
952
|
>>> m2 = subdirmatcher(b'sub', m1)
|
|
953
|
953
|
>>> m2(b'a.txt')
|
|
954
|
954
|
False
|
|
955
|
955
|
>>> m2(b'b.txt')
|
|
956
|
956
|
True
|
|
957
|
957
|
>>> m2.matchfn(b'a.txt')
|
|
958
|
958
|
False
|
|
959
|
959
|
>>> m2.matchfn(b'b.txt')
|
|
960
|
960
|
True
|
|
961
|
961
|
>>> m2.files()
|
|
962
|
962
|
['b.txt']
|
|
963
|
963
|
>>> m2.exact(b'b.txt')
|
|
964
|
964
|
True
|
|
965
|
965
|
>>> def bad(f, msg):
|
|
966
|
966
|
... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
|
|
967
|
967
|
>>> m1.bad = bad
|
|
968
|
968
|
>>> m2.bad(b'x.txt', b'No such file')
|
|
969
|
969
|
sub/x.txt: No such file
|
|
970
|
970
|
"""
|
|
971
|
971
|
|
|
972
|
972
|
def __init__(self, path, matcher):
|
|
973
|
973
|
super(subdirmatcher, self).__init__()
|
|
974
|
974
|
self._path = path
|
|
975
|
975
|
self._matcher = matcher
|
|
976
|
976
|
self._always = matcher.always()
|
|
977
|
977
|
|
|
978
|
978
|
self._files = [
|
|
979
|
979
|
f[len(path) + 1 :]
|
|
980
|
980
|
for f in matcher._files
|
|
981
|
981
|
if f.startswith(path + b"/")
|
|
982
|
982
|
]
|
|
983
|
983
|
|
|
984
|
984
|
# If the parent repo had a path to this subrepo and the matcher is
|
|
985
|
985
|
# a prefix matcher, this submatcher always matches.
|
|
986
|
986
|
if matcher.prefix():
|
|
987
|
987
|
self._always = any(f == path for f in matcher._files)
|
|
988
|
988
|
|
|
989
|
989
|
def bad(self, f, msg):
|
|
990
|
990
|
self._matcher.bad(self._path + b"/" + f, msg)
|
|
991
|
991
|
|
|
992
|
992
|
def matchfn(self, f):
|
|
993
|
993
|
# Some information is lost in the superclass's constructor, so we
|
|
994
|
994
|
# can not accurately create the matching function for the subdirectory
|
|
995
|
995
|
# from the inputs. Instead, we override matchfn() and visitdir() to
|
|
996
|
996
|
# call the original matcher with the subdirectory path prepended.
|
|
997
|
997
|
return self._matcher.matchfn(self._path + b"/" + f)
|
|
998
|
998
|
|
|
999
|
999
|
def visitdir(self, dir):
|
|
1000
|
1000
|
if dir == b'':
|
|
1001
|
1001
|
dir = self._path
|
|
1002
|
1002
|
else:
|
|
1003
|
1003
|
dir = self._path + b"/" + dir
|
|
1004
|
1004
|
return self._matcher.visitdir(dir)
|
|
1005
|
1005
|
|
|
1006
|
1006
|
def visitchildrenset(self, dir):
|
|
1007
|
1007
|
if dir == b'':
|
|
1008
|
1008
|
dir = self._path
|
|
1009
|
1009
|
else:
|
|
1010
|
1010
|
dir = self._path + b"/" + dir
|
|
1011
|
1011
|
return self._matcher.visitchildrenset(dir)
|
|
1012
|
1012
|
|
|
1013
|
1013
|
def always(self):
|
|
1014
|
1014
|
return self._always
|
|
1015
|
1015
|
|
|
1016
|
1016
|
def prefix(self):
|
|
1017
|
1017
|
return self._matcher.prefix() and not self._always
|
|
1018
|
1018
|
|
|
1019
|
1019
|
@encoding.strmethod
|
|
1020
|
1020
|
def __repr__(self):
|
|
1021
|
1021
|
return b'<subdirmatcher path=%r, matcher=%r>' % (
|
|
1022
|
1022
|
self._path,
|
|
1023
|
1023
|
self._matcher,
|
|
1024
|
1024
|
)
|
|
1025
|
1025
|
|
|
1026
|
1026
|
|
|
1027
|
1027
|
class prefixdirmatcher(basematcher):
|
|
1028
|
1028
|
"""Adapt a matcher to work on a parent directory.
|
|
1029
|
1029
|
|
|
1030
|
1030
|
The matcher's non-matching-attributes (bad, traversedir) are ignored.
|
|
1031
|
1031
|
|
|
1032
|
1032
|
The prefix path should usually be the relative path from the root of
|
|
1033
|
1033
|
this matcher to the root of the wrapped matcher.
|
|
1034
|
1034
|
|
|
1035
|
1035
|
>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
|
|
1036
|
1036
|
>>> m2 = prefixdirmatcher(b'd/e', m1)
|
|
1037
|
1037
|
>>> m2(b'a.txt')
|
|
1038
|
1038
|
False
|
|
1039
|
1039
|
>>> m2(b'd/e/a.txt')
|
|
1040
|
1040
|
True
|
|
1041
|
1041
|
>>> m2(b'd/e/b.txt')
|
|
1042
|
1042
|
False
|
|
1043
|
1043
|
>>> m2.files()
|
|
1044
|
1044
|
['d/e/a.txt', 'd/e/f/b.txt']
|
|
1045
|
1045
|
>>> m2.exact(b'd/e/a.txt')
|
|
1046
|
1046
|
True
|
|
1047
|
1047
|
>>> m2.visitdir(b'd')
|
|
1048
|
1048
|
True
|
|
1049
|
1049
|
>>> m2.visitdir(b'd/e')
|
|
1050
|
1050
|
True
|
|
1051
|
1051
|
>>> m2.visitdir(b'd/e/f')
|
|
1052
|
1052
|
True
|
|
1053
|
1053
|
>>> m2.visitdir(b'd/e/g')
|
|
1054
|
1054
|
False
|
|
1055
|
1055
|
>>> m2.visitdir(b'd/ef')
|
|
1056
|
1056
|
False
|
|
1057
|
1057
|
"""
|
|
1058
|
1058
|
|
|
1059
|
1059
|
def __init__(self, path, matcher, badfn=None):
|
|
1060
|
1060
|
super(prefixdirmatcher, self).__init__(badfn)
|
|
1061
|
1061
|
if not path:
|
|
1062
|
1062
|
raise error.ProgrammingError(b'prefix path must not be empty')
|
|
1063
|
1063
|
self._path = path
|
|
1064
|
1064
|
self._pathprefix = path + b'/'
|
|
1065
|
1065
|
self._matcher = matcher
|
|
1066
|
1066
|
|
|
1067
|
1067
|
@propertycache
|
|
1068
|
1068
|
def _files(self):
|
|
1069
|
1069
|
return [self._pathprefix + f for f in self._matcher._files]
|
|
1070
|
1070
|
|
|
1071
|
1071
|
def matchfn(self, f):
|
|
1072
|
1072
|
if not f.startswith(self._pathprefix):
|
|
1073
|
1073
|
return False
|
|
1074
|
1074
|
return self._matcher.matchfn(f[len(self._pathprefix) :])
|
|
1075
|
1075
|
|
|
1076
|
1076
|
@propertycache
|
|
1077
|
1077
|
def _pathdirs(self):
|
|
1078
|
1078
|
return set(pathutil.finddirs(self._path))
|
|
1079
|
1079
|
|
|
1080
|
1080
|
def visitdir(self, dir):
|
|
1081
|
1081
|
if dir == self._path:
|
|
1082
|
1082
|
return self._matcher.visitdir(b'')
|
|
1083
|
1083
|
if dir.startswith(self._pathprefix):
|
|
1084
|
1084
|
return self._matcher.visitdir(dir[len(self._pathprefix) :])
|
|
1085
|
1085
|
return dir in self._pathdirs
|
|
1086
|
1086
|
|
|
1087
|
1087
|
def visitchildrenset(self, dir):
|
|
1088
|
1088
|
if dir == self._path:
|
|
1089
|
1089
|
return self._matcher.visitchildrenset(b'')
|
|
1090
|
1090
|
if dir.startswith(self._pathprefix):
|
|
1091
|
1091
|
return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
|
|
1092
|
1092
|
if dir in self._pathdirs:
|
|
1093
|
1093
|
return b'this'
|
|
1094
|
1094
|
return set()
|
|
1095
|
1095
|
|
|
1096
|
1096
|
def isexact(self):
|
|
1097
|
1097
|
return self._matcher.isexact()
|
|
1098
|
1098
|
|
|
1099
|
1099
|
def prefix(self):
|
|
1100
|
1100
|
return self._matcher.prefix()
|
|
1101
|
1101
|
|
|
1102
|
1102
|
@encoding.strmethod
|
|
1103
|
1103
|
def __repr__(self):
|
|
1104
|
1104
|
return b'<prefixdirmatcher path=%r, matcher=%r>' % (
|
|
1105
|
1105
|
pycompat.bytestr(self._path),
|
|
1106
|
1106
|
self._matcher,
|
|
1107
|
1107
|
)
|
|
1108
|
1108
|
|
|
1109
|
1109
|
|
|
1110
|
1110
|
class unionmatcher(basematcher):
|
|
1111
|
1111
|
"""A matcher that is the union of several matchers.
|
|
1112
|
1112
|
|
|
1113
|
1113
|
The non-matching-attributes (bad, traversedir) are taken from the first
|
|
1114
|
1114
|
matcher.
|
|
1115
|
1115
|
"""
|
|
1116
|
1116
|
|
|
1117
|
1117
|
def __init__(self, matchers):
|
|
1118
|
1118
|
m1 = matchers[0]
|
|
1119
|
1119
|
super(unionmatcher, self).__init__()
|
|
1120
|
1120
|
self.traversedir = m1.traversedir
|
|
1121
|
1121
|
self._matchers = matchers
|
|
1122
|
1122
|
|
|
1123
|
1123
|
def matchfn(self, f):
|
|
1124
|
1124
|
for match in self._matchers:
|
|
1125
|
1125
|
if match(f):
|
|
1126
|
1126
|
return True
|
|
1127
|
1127
|
return False
|
|
1128
|
1128
|
|
|
1129
|
1129
|
def visitdir(self, dir):
|
|
1130
|
1130
|
r = False
|
|
1131
|
1131
|
for m in self._matchers:
|
|
1132
|
1132
|
v = m.visitdir(dir)
|
|
1133
|
1133
|
if v == b'all':
|
|
1134
|
1134
|
return v
|
|
1135
|
1135
|
r |= v
|
|
1136
|
1136
|
return r
|
|
1137
|
1137
|
|
|
1138
|
1138
|
def visitchildrenset(self, dir):
|
|
1139
|
1139
|
r = set()
|
|
1140
|
1140
|
this = False
|
|
1141
|
1141
|
for m in self._matchers:
|
|
1142
|
1142
|
v = m.visitchildrenset(dir)
|
|
1143
|
1143
|
if not v:
|
|
1144
|
1144
|
continue
|
|
1145
|
1145
|
if v == b'all':
|
|
1146
|
1146
|
return v
|
|
1147
|
1147
|
if this or v == b'this':
|
|
1148
|
1148
|
this = True
|
|
1149
|
1149
|
# don't break, we might have an 'all' in here.
|
|
1150
|
1150
|
continue
|
|
1151
|
1151
|
assert isinstance(v, set)
|
|
1152
|
1152
|
r = r.union(v)
|
|
1153
|
1153
|
if this:
|
|
1154
|
1154
|
return b'this'
|
|
1155
|
1155
|
return r
|
|
1156
|
1156
|
|
|
1157
|
1157
|
@encoding.strmethod
|
|
1158
|
1158
|
def __repr__(self):
|
|
1159
|
1159
|
return b'<unionmatcher matchers=%r>' % self._matchers
|
|
1160
|
1160
|
|
|
1161
|
1161
|
|
|
1162
|
1162
|
def patkind(pattern, default=None):
|
|
1163
|
1163
|
r'''If pattern is 'kind:pat' with a known kind, return kind.
|
|
1164
|
1164
|
|
|
1165
|
1165
|
>>> patkind(br're:.*\.c$')
|
|
1166
|
1166
|
're'
|
|
1167
|
1167
|
>>> patkind(b'glob:*.c')
|
|
1168
|
1168
|
'glob'
|
|
1169
|
1169
|
>>> patkind(b'relpath:test.py')
|
|
1170
|
1170
|
'relpath'
|
|
1171
|
1171
|
>>> patkind(b'main.py')
|
|
1172
|
1172
|
>>> patkind(b'main.py', default=b're')
|
|
1173
|
1173
|
're'
|
|
1174
|
1174
|
'''
|
|
1175
|
1175
|
return _patsplit(pattern, default)[0]
|
|
1176
|
1176
|
|
|
1177
|
1177
|
|
|
1178
|
1178
|
def _patsplit(pattern, default):
|
|
1179
|
1179
|
"""Split a string into the optional pattern kind prefix and the actual
|
|
1180
|
1180
|
pattern."""
|
|
1181
|
1181
|
if b':' in pattern:
|
|
1182
|
1182
|
kind, pat = pattern.split(b':', 1)
|
|
1183
|
1183
|
if kind in allpatternkinds:
|
|
1184
|
1184
|
return kind, pat
|
|
1185
|
1185
|
return default, pattern
|
|
1186
|
1186
|
|
|
1187
|
1187
|
|
|
1188
|
1188
|
def _globre(pat):
|
|
1189
|
1189
|
r'''Convert an extended glob string to a regexp string.
|
|
1190
|
1190
|
|
|
1191
|
1191
|
>>> from . import pycompat
|
|
1192
|
1192
|
>>> def bprint(s):
|
|
1193
|
1193
|
... print(pycompat.sysstr(s))
|
|
1194
|
1194
|
>>> bprint(_globre(br'?'))
|
|
1195
|
1195
|
.
|
|
1196
|
1196
|
>>> bprint(_globre(br'*'))
|
|
1197
|
1197
|
[^/]*
|
|
1198
|
1198
|
>>> bprint(_globre(br'**'))
|
|
1199
|
1199
|
.*
|
|
1200
|
1200
|
>>> bprint(_globre(br'**/a'))
|
|
1201
|
1201
|
(?:.*/)?a
|
|
1202
|
1202
|
>>> bprint(_globre(br'a/**/b'))
|
|
1203
|
1203
|
a/(?:.*/)?b
|
|
1204
|
1204
|
>>> bprint(_globre(br'[a*?!^][^b][!c]'))
|
|
1205
|
1205
|
[a*?!^][\^b][^c]
|
|
1206
|
1206
|
>>> bprint(_globre(br'{a,b}'))
|
|
1207
|
1207
|
(?:a|b)
|
|
1208
|
1208
|
>>> bprint(_globre(br'.\*\?'))
|
|
1209
|
1209
|
\.\*\?
|
|
1210
|
1210
|
'''
|
|
1211
|
1211
|
i, n = 0, len(pat)
|
|
1212
|
1212
|
res = b''
|
|
1213
|
1213
|
group = 0
|
|
1214
|
1214
|
escape = util.stringutil.regexbytesescapemap.get
|
|
1215
|
1215
|
|
|
1216
|
1216
|
def peek():
|
|
1217
|
1217
|
return i < n and pat[i : i + 1]
|
|
1218
|
1218
|
|
|
1219
|
1219
|
while i < n:
|
|
1220
|
1220
|
c = pat[i : i + 1]
|
|
1221
|
1221
|
i += 1
|
|
1222
|
1222
|
if c not in b'*?[{},\\':
|
|
1223
|
1223
|
res += escape(c, c)
|
|
1224
|
1224
|
elif c == b'*':
|
|
1225
|
1225
|
if peek() == b'*':
|
|
1226
|
1226
|
i += 1
|
|
1227
|
1227
|
if peek() == b'/':
|
|
1228
|
1228
|
i += 1
|
|
1229
|
1229
|
res += b'(?:.*/)?'
|
|
1230
|
1230
|
else:
|
|
1231
|
1231
|
res += b'.*'
|
|
1232
|
1232
|
else:
|
|
1233
|
1233
|
res += b'[^/]*'
|
|
1234
|
1234
|
elif c == b'?':
|
|
1235
|
1235
|
res += b'.'
|
|
1236
|
1236
|
elif c == b'[':
|
|
1237
|
1237
|
j = i
|
|
1238
|
1238
|
if j < n and pat[j : j + 1] in b'!]':
|
|
1239
|
1239
|
j += 1
|
|
1240
|
1240
|
while j < n and pat[j : j + 1] != b']':
|
|
1241
|
1241
|
j += 1
|
|
1242
|
1242
|
if j >= n:
|
|
1243
|
1243
|
res += b'\\['
|
|
1244
|
1244
|
else:
|
|
1245
|
1245
|
stuff = pat[i:j].replace(b'\\', b'\\\\')
|
|
1246
|
1246
|
i = j + 1
|
|
1247
|
1247
|
if stuff[0:1] == b'!':
|
|
1248
|
1248
|
stuff = b'^' + stuff[1:]
|
|
1249
|
1249
|
elif stuff[0:1] == b'^':
|
|
1250
|
1250
|
stuff = b'\\' + stuff
|
|
1251
|
1251
|
res = b'%s[%s]' % (res, stuff)
|
|
1252
|
1252
|
elif c == b'{':
|
|
1253
|
1253
|
group += 1
|
|
1254
|
1254
|
res += b'(?:'
|
|
1255
|
1255
|
elif c == b'}' and group:
|
|
1256
|
1256
|
res += b')'
|
|
1257
|
1257
|
group -= 1
|
|
1258
|
1258
|
elif c == b',' and group:
|
|
1259
|
1259
|
res += b'|'
|
|
1260
|
1260
|
elif c == b'\\':
|
|
1261
|
1261
|
p = peek()
|
|
1262
|
1262
|
if p:
|
|
1263
|
1263
|
i += 1
|
|
1264
|
1264
|
res += escape(p, p)
|
|
1265
|
1265
|
else:
|
|
1266
|
1266
|
res += escape(c, c)
|
|
1267
|
1267
|
else:
|
|
1268
|
1268
|
res += escape(c, c)
|
|
1269
|
1269
|
return res
|
|
1270
|
1270
|
|
|
1271
|
1271
|
|
|
1272
|
1272
|
def _regex(kind, pat, globsuffix):
|
|
1273
|
1273
|
'''Convert a (normalized) pattern of any kind into a
|
|
1274
|
1274
|
regular expression.
|
|
1275
|
1275
|
globsuffix is appended to the regexp of globs.'''
|
|
1276
|
|
|
|
1277
|
|
if rustmod is not None:
|
|
1278
|
|
try:
|
|
1279
|
|
return rustmod.build_single_regex(kind, pat, globsuffix)
|
|
1280
|
|
except rustmod.PatternError:
|
|
1281
|
|
raise error.ProgrammingError(
|
|
1282
|
|
b'not a regex pattern: %s:%s' % (kind, pat)
|
|
1283
|
|
)
|
|
1284
|
|
|
|
1285
|
1276
|
if not pat and kind in (b'glob', b'relpath'):
|
|
1286
|
1277
|
return b''
|
|
1287
|
1278
|
if kind == b're':
|
|
1288
|
1279
|
return pat
|
|
1289
|
1280
|
if kind in (b'path', b'relpath'):
|
|
1290
|
1281
|
if pat == b'.':
|
|
1291
|
1282
|
return b''
|
|
1292
|
1283
|
return util.stringutil.reescape(pat) + b'(?:/|$)'
|
|
1293
|
1284
|
if kind == b'rootfilesin':
|
|
1294
|
1285
|
if pat == b'.':
|
|
1295
|
1286
|
escaped = b''
|
|
1296
|
1287
|
else:
|
|
1297
|
1288
|
# Pattern is a directory name.
|
|
1298
|
1289
|
escaped = util.stringutil.reescape(pat) + b'/'
|
|
1299
|
1290
|
# Anything after the pattern must be a non-directory.
|
|
1300
|
1291
|
return escaped + b'[^/]+$'
|
|
1301
|
1292
|
if kind == b'relglob':
|
|
1302
|
1293
|
globre = _globre(pat)
|
|
1303
|
1294
|
if globre.startswith(b'[^/]*'):
|
|
1304
|
1295
|
# When pat has the form *XYZ (common), make the returned regex more
|
|
1305
|
1296
|
# legible by returning the regex for **XYZ instead of **/*XYZ.
|
|
1306
|
1297
|
return b'.*' + globre[len(b'[^/]*') :] + globsuffix
|
|
1307
|
1298
|
return b'(?:|.*/)' + globre + globsuffix
|
|
1308
|
1299
|
if kind == b'relre':
|
|
1309
|
1300
|
if pat.startswith(b'^'):
|
|
1310
|
1301
|
return pat
|
|
1311
|
1302
|
return b'.*' + pat
|
|
1312
|
1303
|
if kind in (b'glob', b'rootglob'):
|
|
1313
|
1304
|
return _globre(pat) + globsuffix
|
|
1314
|
1305
|
raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
|
|
1315
|
1306
|
|
|
1316
|
1307
|
|
|
1317
|
1308
|
def _buildmatch(kindpats, globsuffix, root):
|
|
1318
|
1309
|
'''Return regexp string and a matcher function for kindpats.
|
|
1319
|
1310
|
globsuffix is appended to the regexp of globs.'''
|
|
1320
|
1311
|
matchfuncs = []
|
|
1321
|
1312
|
|
|
1322
|
1313
|
subincludes, kindpats = _expandsubinclude(kindpats, root)
|
|
1323
|
1314
|
if subincludes:
|
|
1324
|
1315
|
submatchers = {}
|
|
1325
|
1316
|
|
|
1326
|
1317
|
def matchsubinclude(f):
|
|
1327
|
1318
|
for prefix, matcherargs in subincludes:
|
|
1328
|
1319
|
if f.startswith(prefix):
|
|
1329
|
1320
|
mf = submatchers.get(prefix)
|
|
1330
|
1321
|
if mf is None:
|
|
1331
|
1322
|
mf = match(*matcherargs)
|
|
1332
|
1323
|
submatchers[prefix] = mf
|
|
1333
|
1324
|
|
|
1334
|
1325
|
if mf(f[len(prefix) :]):
|
|
1335
|
1326
|
return True
|
|
1336
|
1327
|
return False
|
|
1337
|
1328
|
|
|
1338
|
1329
|
matchfuncs.append(matchsubinclude)
|
|
1339
|
1330
|
|
|
1340
|
1331
|
regex = b''
|
|
1341
|
1332
|
if kindpats:
|
|
1342
|
1333
|
if all(k == b'rootfilesin' for k, p, s in kindpats):
|
|
1343
|
1334
|
dirs = {p for k, p, s in kindpats}
|
|
1344
|
1335
|
|
|
1345
|
1336
|
def mf(f):
|
|
1346
|
1337
|
i = f.rfind(b'/')
|
|
1347
|
1338
|
if i >= 0:
|
|
1348
|
1339
|
dir = f[:i]
|
|
1349
|
1340
|
else:
|
|
1350
|
1341
|
dir = b'.'
|
|
1351
|
1342
|
return dir in dirs
|
|
1352
|
1343
|
|
|
1353
|
1344
|
regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
|
|
1354
|
1345
|
matchfuncs.append(mf)
|
|
1355
|
1346
|
else:
|
|
1356
|
1347
|
regex, mf = _buildregexmatch(kindpats, globsuffix)
|
|
1357
|
1348
|
matchfuncs.append(mf)
|
|
1358
|
1349
|
|
|
1359
|
1350
|
if len(matchfuncs) == 1:
|
|
1360
|
1351
|
return regex, matchfuncs[0]
|
|
1361
|
1352
|
else:
|
|
1362
|
1353
|
return regex, lambda f: any(mf(f) for mf in matchfuncs)
|
|
1363
|
1354
|
|
|
1364
|
1355
|
|
|
1365
|
1356
|
MAX_RE_SIZE = 20000
|
|
1366
|
1357
|
|
|
1367
|
1358
|
|
|
1368
|
1359
|
def _joinregexes(regexps):
|
|
1369
|
1360
|
"""gather multiple regular expressions into a single one"""
|
|
1370
|
1361
|
return b'|'.join(regexps)
|
|
1371
|
1362
|
|
|
1372
|
1363
|
|
|
1373
|
1364
|
def _buildregexmatch(kindpats, globsuffix):
|
|
1374
|
1365
|
"""Build a match function from a list of kinds and kindpats,
|
|
1375
|
1366
|
return regexp string and a matcher function.
|
|
1376
|
1367
|
|
|
1377
|
1368
|
Test too large input
|
|
1378
|
1369
|
>>> _buildregexmatch([
|
|
1379
|
1370
|
... (b'relglob', b'?' * MAX_RE_SIZE, b'')
|
|
1380
|
1371
|
... ], b'$')
|
|
1381
|
1372
|
Traceback (most recent call last):
|
|
1382
|
1373
|
...
|
|
1383
|
1374
|
Abort: matcher pattern is too long (20009 bytes)
|
|
1384
|
1375
|
"""
|
|
1385
|
1376
|
try:
|
|
1386
|
1377
|
allgroups = []
|
|
1387
|
1378
|
regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
|
|
1388
|
1379
|
fullregexp = _joinregexes(regexps)
|
|
1389
|
1380
|
|
|
1390
|
1381
|
startidx = 0
|
|
1391
|
1382
|
groupsize = 0
|
|
1392
|
1383
|
for idx, r in enumerate(regexps):
|
|
1393
|
1384
|
piecesize = len(r)
|
|
1394
|
1385
|
if piecesize > MAX_RE_SIZE:
|
|
1395
|
1386
|
msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
|
|
1396
|
1387
|
raise error.Abort(msg)
|
|
1397
|
1388
|
elif (groupsize + piecesize) > MAX_RE_SIZE:
|
|
1398
|
1389
|
group = regexps[startidx:idx]
|
|
1399
|
1390
|
allgroups.append(_joinregexes(group))
|
|
1400
|
1391
|
startidx = idx
|
|
1401
|
1392
|
groupsize = 0
|
|
1402
|
1393
|
groupsize += piecesize + 1
|
|
1403
|
1394
|
|
|
1404
|
1395
|
if startidx == 0:
|
|
1405
|
1396
|
matcher = _rematcher(fullregexp)
|
|
1406
|
1397
|
func = lambda s: bool(matcher(s))
|
|
1407
|
1398
|
else:
|
|
1408
|
1399
|
group = regexps[startidx:]
|
|
1409
|
1400
|
allgroups.append(_joinregexes(group))
|
|
1410
|
1401
|
allmatchers = [_rematcher(g) for g in allgroups]
|
|
1411
|
1402
|
func = lambda s: any(m(s) for m in allmatchers)
|
|
1412
|
1403
|
return fullregexp, func
|
|
1413
|
1404
|
except re.error:
|
|
1414
|
1405
|
for k, p, s in kindpats:
|
|
1415
|
1406
|
try:
|
|
1416
|
1407
|
_rematcher(_regex(k, p, globsuffix))
|
|
1417
|
1408
|
except re.error:
|
|
1418
|
1409
|
if s:
|
|
1419
|
1410
|
raise error.Abort(
|
|
1420
|
1411
|
_(b"%s: invalid pattern (%s): %s") % (s, k, p)
|
|
1421
|
1412
|
)
|
|
1422
|
1413
|
else:
|
|
1423
|
1414
|
raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
|
|
1424
|
1415
|
raise error.Abort(_(b"invalid pattern"))
|
|
1425
|
1416
|
|
|
1426
|
1417
|
|
|
1427
|
1418
|
def _patternrootsanddirs(kindpats):
|
|
1428
|
1419
|
'''Returns roots and directories corresponding to each pattern.
|
|
1429
|
1420
|
|
|
1430
|
1421
|
This calculates the roots and directories exactly matching the patterns and
|
|
1431
|
1422
|
returns a tuple of (roots, dirs) for each. It does not return other
|
|
1432
|
1423
|
directories which may also need to be considered, like the parent
|
|
1433
|
1424
|
directories.
|
|
1434
|
1425
|
'''
|
|
1435
|
1426
|
r = []
|
|
1436
|
1427
|
d = []
|
|
1437
|
1428
|
for kind, pat, source in kindpats:
|
|
1438
|
1429
|
if kind in (b'glob', b'rootglob'): # find the non-glob prefix
|
|
1439
|
1430
|
root = []
|
|
1440
|
1431
|
for p in pat.split(b'/'):
|
|
1441
|
1432
|
if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
|
|
1442
|
1433
|
break
|
|
1443
|
1434
|
root.append(p)
|
|
1444
|
1435
|
r.append(b'/'.join(root))
|
|
1445
|
1436
|
elif kind in (b'relpath', b'path'):
|
|
1446
|
1437
|
if pat == b'.':
|
|
1447
|
1438
|
pat = b''
|
|
1448
|
1439
|
r.append(pat)
|
|
1449
|
1440
|
elif kind in (b'rootfilesin',):
|
|
1450
|
1441
|
if pat == b'.':
|
|
1451
|
1442
|
pat = b''
|
|
1452
|
1443
|
d.append(pat)
|
|
1453
|
1444
|
else: # relglob, re, relre
|
|
1454
|
1445
|
r.append(b'')
|
|
1455
|
1446
|
return r, d
|
|
1456
|
1447
|
|
|
1457
|
1448
|
|
|
1458
|
1449
|
def _roots(kindpats):
|
|
1459
|
1450
|
'''Returns root directories to match recursively from the given patterns.'''
|
|
1460
|
1451
|
roots, dirs = _patternrootsanddirs(kindpats)
|
|
1461
|
1452
|
return roots
|
|
1462
|
1453
|
|
|
1463
|
1454
|
|
|
1464
|
1455
|
def _rootsdirsandparents(kindpats):
|
|
1465
|
1456
|
'''Returns roots and exact directories from patterns.
|
|
1466
|
1457
|
|
|
1467
|
1458
|
`roots` are directories to match recursively, `dirs` should
|
|
1468
|
1459
|
be matched non-recursively, and `parents` are the implicitly required
|
|
1469
|
1460
|
directories to walk to items in either roots or dirs.
|
|
1470
|
1461
|
|
|
1471
|
1462
|
Returns a tuple of (roots, dirs, parents).
|
|
1472
|
1463
|
|
|
1473
|
1464
|
>>> r = _rootsdirsandparents(
|
|
1474
|
1465
|
... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
|
|
1475
|
1466
|
... (b'glob', b'g*', b'')])
|
|
1476
|
1467
|
>>> print(r[0:2], sorted(r[2])) # the set has an unstable output
|
|
1477
|
1468
|
(['g/h', 'g/h', ''], []) ['', 'g']
|
|
1478
|
1469
|
>>> r = _rootsdirsandparents(
|
|
1479
|
1470
|
... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
|
|
1480
|
1471
|
>>> print(r[0:2], sorted(r[2])) # the set has an unstable output
|
|
1481
|
1472
|
([], ['g/h', '']) ['', 'g']
|
|
1482
|
1473
|
>>> r = _rootsdirsandparents(
|
|
1483
|
1474
|
... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
|
|
1484
|
1475
|
... (b'path', b'', b'')])
|
|
1485
|
1476
|
>>> print(r[0:2], sorted(r[2])) # the set has an unstable output
|
|
1486
|
1477
|
(['r', 'p/p', ''], []) ['', 'p']
|
|
1487
|
1478
|
>>> r = _rootsdirsandparents(
|
|
1488
|
1479
|
... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
|
|
1489
|
1480
|
... (b'relre', b'rr', b'')])
|
|
1490
|
1481
|
>>> print(r[0:2], sorted(r[2])) # the set has an unstable output
|
|
1491
|
1482
|
(['', '', ''], []) ['']
|
|
1492
|
1483
|
'''
|
|
1493
|
1484
|
r, d = _patternrootsanddirs(kindpats)
|
|
1494
|
1485
|
|
|
1495
|
1486
|
p = set()
|
|
1496
|
1487
|
# Add the parents as non-recursive/exact directories, since they must be
|
|
1497
|
1488
|
# scanned to get to either the roots or the other exact directories.
|
|
1498
|
1489
|
p.update(pathutil.dirs(d))
|
|
1499
|
1490
|
p.update(pathutil.dirs(r))
|
|
1500
|
1491
|
|
|
1501
|
1492
|
# FIXME: all uses of this function convert these to sets, do so before
|
|
1502
|
1493
|
# returning.
|
|
1503
|
1494
|
# FIXME: all uses of this function do not need anything in 'roots' and
|
|
1504
|
1495
|
# 'dirs' to also be in 'parents', consider removing them before returning.
|
|
1505
|
1496
|
return r, d, p
|
|
1506
|
1497
|
|
|
1507
|
1498
|
|
|
1508
|
1499
|
def _explicitfiles(kindpats):
|
|
1509
|
1500
|
'''Returns the potential explicit filenames from the patterns.
|
|
1510
|
1501
|
|
|
1511
|
1502
|
>>> _explicitfiles([(b'path', b'foo/bar', b'')])
|
|
1512
|
1503
|
['foo/bar']
|
|
1513
|
1504
|
>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
|
|
1514
|
1505
|
[]
|
|
1515
|
1506
|
'''
|
|
1516
|
1507
|
# Keep only the pattern kinds where one can specify filenames (vs only
|
|
1517
|
1508
|
# directory names).
|
|
1518
|
1509
|
filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
|
|
1519
|
1510
|
return _roots(filable)
|
|
1520
|
1511
|
|
|
1521
|
1512
|
|
|
1522
|
1513
|
def _prefix(kindpats):
|
|
1523
|
1514
|
'''Whether all the patterns match a prefix (i.e. recursively)'''
|
|
1524
|
1515
|
for kind, pat, source in kindpats:
|
|
1525
|
1516
|
if kind not in (b'path', b'relpath'):
|
|
1526
|
1517
|
return False
|
|
1527
|
1518
|
return True
|
|
1528
|
1519
|
|
|
1529
|
1520
|
|
|
1530
|
1521
|
_commentre = None
|
|
1531
|
1522
|
|
|
1532
|
1523
|
|
|
1533
|
1524
|
def readpatternfile(filepath, warn, sourceinfo=False):
|
|
1534
|
1525
|
'''parse a pattern file, returning a list of
|
|
1535
|
1526
|
patterns. These patterns should be given to compile()
|
|
1536
|
1527
|
to be validated and converted into a match function.
|
|
1537
|
1528
|
|
|
1538
|
1529
|
trailing white space is dropped.
|
|
1539
|
1530
|
the escape character is backslash.
|
|
1540
|
1531
|
comments start with #.
|
|
1541
|
1532
|
empty lines are skipped.
|
|
1542
|
1533
|
|
|
1543
|
1534
|
lines can be of the following formats:
|
|
1544
|
1535
|
|
|
1545
|
1536
|
syntax: regexp # defaults following lines to non-rooted regexps
|
|
1546
|
1537
|
syntax: glob # defaults following lines to non-rooted globs
|
|
1547
|
1538
|
re:pattern # non-rooted regular expression
|
|
1548
|
1539
|
glob:pattern # non-rooted glob
|
|
1549
|
1540
|
rootglob:pat # rooted glob (same root as ^ in regexps)
|
|
1550
|
1541
|
pattern # pattern of the current default type
|
|
1551
|
1542
|
|
|
1552
|
1543
|
if sourceinfo is set, returns a list of tuples:
|
|
1553
|
1544
|
(pattern, lineno, originalline).
|
|
1554
|
1545
|
This is useful to debug ignore patterns.
|
|
1555
|
1546
|
'''
|
|
1556
|
1547
|
|
|
1557
|
|
if rustmod is not None:
|
|
1558
|
|
result, warnings = rustmod.read_pattern_file(
|
|
1559
|
|
filepath, bool(warn), sourceinfo,
|
|
1560
|
|
)
|
|
1561
|
|
|
|
1562
|
|
for warning_params in warnings:
|
|
1563
|
|
# Can't be easily emitted from Rust, because it would require
|
|
1564
|
|
# a mechanism for both gettext and calling the `warn` function.
|
|
1565
|
|
warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
|
|
1566
|
|
|
|
1567
|
|
return result
|
|
1568
|
|
|
|
1569
|
1548
|
syntaxes = {
|
|
1570
|
1549
|
b're': b'relre:',
|
|
1571
|
1550
|
b'regexp': b'relre:',
|
|
1572
|
1551
|
b'glob': b'relglob:',
|
|
1573
|
1552
|
b'rootglob': b'rootglob:',
|
|
1574
|
1553
|
b'include': b'include',
|
|
1575
|
1554
|
b'subinclude': b'subinclude',
|
|
1576
|
1555
|
}
|
|
1577
|
1556
|
syntax = b'relre:'
|
|
1578
|
1557
|
patterns = []
|
|
1579
|
1558
|
|
|
1580
|
1559
|
fp = open(filepath, b'rb')
|
|
1581
|
1560
|
for lineno, line in enumerate(util.iterfile(fp), start=1):
|
|
1582
|
1561
|
if b"#" in line:
|
|
1583
|
1562
|
global _commentre
|
|
1584
|
1563
|
if not _commentre:
|
|
1585
|
1564
|
_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
|
|
1586
|
1565
|
# remove comments prefixed by an even number of escapes
|
|
1587
|
1566
|
m = _commentre.search(line)
|
|
1588
|
1567
|
if m:
|
|
1589
|
1568
|
line = line[: m.end(1)]
|
|
1590
|
1569
|
# fixup properly escaped comments that survived the above
|
|
1591
|
1570
|
line = line.replace(b"\\#", b"#")
|
|
1592
|
1571
|
line = line.rstrip()
|
|
1593
|
1572
|
if not line:
|
|
1594
|
1573
|
continue
|
|
1595
|
1574
|
|
|
1596
|
1575
|
if line.startswith(b'syntax:'):
|
|
1597
|
1576
|
s = line[7:].strip()
|
|
1598
|
1577
|
try:
|
|
1599
|
1578
|
syntax = syntaxes[s]
|
|
1600
|
1579
|
except KeyError:
|
|
1601
|
1580
|
if warn:
|
|
1602
|
1581
|
warn(
|
|
1603
|
1582
|
_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
|
|
1604
|
1583
|
)
|
|
1605
|
1584
|
continue
|
|
1606
|
1585
|
|
|
1607
|
1586
|
linesyntax = syntax
|
|
1608
|
1587
|
for s, rels in pycompat.iteritems(syntaxes):
|
|
1609
|
1588
|
if line.startswith(rels):
|
|
1610
|
1589
|
linesyntax = rels
|
|
1611
|
1590
|
line = line[len(rels) :]
|
|
1612
|
1591
|
break
|
|
1613
|
1592
|
elif line.startswith(s + b':'):
|
|
1614
|
1593
|
linesyntax = rels
|
|
1615
|
1594
|
line = line[len(s) + 1 :]
|
|
1616
|
1595
|
break
|
|
1617
|
1596
|
if sourceinfo:
|
|
1618
|
1597
|
patterns.append((linesyntax + line, lineno, line))
|
|
1619
|
1598
|
else:
|
|
1620
|
1599
|
patterns.append(linesyntax + line)
|
|
1621
|
1600
|
fp.close()
|
|
1622
|
1601
|
return patterns
|