##// END OF EJS Templates
rust-filepatterns: call new Rust implementations from Python...
Raphaël Gomès -
r42516:c7652f74 default
parent child Browse files
Show More
@@ -1,1476 +1,1511 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 try:
28 from . import rustext
29 rustext.__name__ # force actual import (see hgdemandimport)
30 except ImportError:
31 rustext = None
32
27 33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 34 'rootglob',
29 35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 36 'rootfilesin')
31 37 cwdrelativepatternkinds = ('relpath', 'glob')
32 38
33 39 propertycache = util.propertycache
34 40
35 41 def _rematcher(regex):
36 42 '''compile the regexp with the best available regexp engine and return a
37 43 matcher function'''
38 44 m = util.re.compile(regex)
39 45 try:
40 46 # slightly faster, provided by facebook's re2 bindings
41 47 return m.test_match
42 48 except AttributeError:
43 49 return m.match
44 50
45 51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
46 52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 53 matchers = []
48 54 other = []
49 55
50 56 for kind, pat, source in kindpats:
51 57 if kind == 'set':
52 58 if ctx is None:
53 59 raise error.ProgrammingError("fileset expression with no "
54 60 "context")
55 61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56 62
57 63 if listsubrepos:
58 64 for subpath in ctx.substate:
59 65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 67 matchers.append(pm)
62 68
63 69 continue
64 70 other.append((kind, pat, source))
65 71 return matchers, other
66 72
67 73 def _expandsubinclude(kindpats, root):
68 74 '''Returns the list of subinclude matcher args and the kindpats without the
69 75 subincludes in it.'''
70 76 relmatchers = []
71 77 other = []
72 78
73 79 for kind, pat, source in kindpats:
74 80 if kind == 'subinclude':
75 81 sourceroot = pathutil.dirname(util.normpath(source))
76 82 pat = util.pconvert(pat)
77 83 path = pathutil.join(sourceroot, pat)
78 84
79 85 newroot = pathutil.dirname(path)
80 86 matcherargs = (newroot, '', [], ['include:%s' % path])
81 87
82 88 prefix = pathutil.canonpath(root, root, newroot)
83 89 if prefix:
84 90 prefix += '/'
85 91 relmatchers.append((prefix, matcherargs))
86 92 else:
87 93 other.append((kind, pat, source))
88 94
89 95 return relmatchers, other
90 96
91 97 def _kindpatsalwaysmatch(kindpats):
92 98 """"Checks whether the kindspats match everything, as e.g.
93 99 'relpath:.' does.
94 100 """
95 101 for kind, pat, source in kindpats:
96 102 if pat != '' or kind not in ['relpath', 'glob']:
97 103 return False
98 104 return True
99 105
100 106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 107 listsubrepos=False, badfn=None):
102 108 matchers = []
103 109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 110 listsubrepos=listsubrepos, badfn=badfn)
105 111 if kindpats:
106 112 m = matchercls(root, kindpats, badfn=badfn)
107 113 matchers.append(m)
108 114 if fms:
109 115 matchers.extend(fms)
110 116 if not matchers:
111 117 return nevermatcher(badfn=badfn)
112 118 if len(matchers) == 1:
113 119 return matchers[0]
114 120 return unionmatcher(matchers)
115 121
116 122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 123 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 124 badfn=None, icasefs=False):
119 125 r"""build an object to match a set of file patterns
120 126
121 127 arguments:
122 128 root - the canonical root of the tree you're matching against
123 129 cwd - the current working directory, if relevant
124 130 patterns - patterns to find
125 131 include - patterns to include (unless they are excluded)
126 132 exclude - patterns to exclude (even if they are included)
127 133 default - if a pattern in patterns has no explicit type, assume this one
128 134 auditor - optional path auditor
129 135 ctx - optional changecontext
130 136 listsubrepos - if True, recurse into subrepositories
131 137 warn - optional function used for printing warnings
132 138 badfn - optional bad() callback for this matcher instead of the default
133 139 icasefs - make a matcher for wdir on case insensitive filesystems, which
134 140 normalizes the given patterns to the case in the filesystem
135 141
136 142 a pattern is one of:
137 143 'glob:<glob>' - a glob relative to cwd
138 144 're:<regexp>' - a regular expression
139 145 'path:<path>' - a path relative to repository root, which is matched
140 146 recursively
141 147 'rootfilesin:<path>' - a path relative to repository root, which is
142 148 matched non-recursively (will not match subdirectories)
143 149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
144 150 'relpath:<path>' - a path relative to cwd
145 151 'relre:<regexp>' - a regexp that needn't match the start of a name
146 152 'set:<fileset>' - a fileset expression
147 153 'include:<path>' - a file of patterns to read and include
148 154 'subinclude:<path>' - a file of patterns to match against files under
149 155 the same directory
150 156 '<something>' - a pattern of the specified default type
151 157
152 158 Usually a patternmatcher is returned:
153 159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
154 160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
155 161
156 162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
157 163 intersectionmatcher (resp. a differencematcher):
158 164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
159 165 <class 'mercurial.match.intersectionmatcher'>
160 166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
161 167 <class 'mercurial.match.differencematcher'>
162 168
163 169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
164 170 >>> match(b'foo', b'.', [])
165 171 <alwaysmatcher>
166 172
167 173 The 'default' argument determines which kind of pattern is assumed if a
168 174 pattern has no prefix:
169 175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
170 176 <patternmatcher patterns='.*\\.c$'>
171 177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
172 178 <patternmatcher patterns='main\\.py(?:/|$)'>
173 179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
174 180 <patternmatcher patterns='main.py'>
175 181
176 182 The primary use of matchers is to check whether a value (usually a file
177 183 name) matches againset one of the patterns given at initialization. There
178 184 are two ways of doing this check.
179 185
180 186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
181 187
182 188 1. Calling the matcher with a file name returns True if any pattern
183 189 matches that file name:
184 190 >>> m(b'a')
185 191 True
186 192 >>> m(b'main.c')
187 193 True
188 194 >>> m(b'test.py')
189 195 False
190 196
191 197 2. Using the exact() method only returns True if the file name matches one
192 198 of the exact patterns (i.e. not re: or glob: patterns):
193 199 >>> m.exact(b'a')
194 200 True
195 201 >>> m.exact(b'main.c')
196 202 False
197 203 """
198 204 normalize = _donormalize
199 205 if icasefs:
200 206 dirstate = ctx.repo().dirstate
201 207 dsnormalize = dirstate.normalize
202 208
203 209 def normalize(patterns, default, root, cwd, auditor, warn):
204 210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
205 211 kindpats = []
206 212 for kind, pats, source in kp:
207 213 if kind not in ('re', 'relre'): # regex can't be normalized
208 214 p = pats
209 215 pats = dsnormalize(pats)
210 216
211 217 # Preserve the original to handle a case only rename.
212 218 if p != pats and p in dirstate:
213 219 kindpats.append((kind, p, source))
214 220
215 221 kindpats.append((kind, pats, source))
216 222 return kindpats
217 223
218 224 if patterns:
219 225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
220 226 if _kindpatsalwaysmatch(kindpats):
221 227 m = alwaysmatcher(badfn)
222 228 else:
223 229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
224 230 listsubrepos=listsubrepos, badfn=badfn)
225 231 else:
226 232 # It's a little strange that no patterns means to match everything.
227 233 # Consider changing this to match nothing (probably using nevermatcher).
228 234 m = alwaysmatcher(badfn)
229 235
230 236 if include:
231 237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
232 238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
233 239 listsubrepos=listsubrepos, badfn=None)
234 240 m = intersectmatchers(m, im)
235 241 if exclude:
236 242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
237 243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
238 244 listsubrepos=listsubrepos, badfn=None)
239 245 m = differencematcher(m, em)
240 246 return m
241 247
242 248 def exact(files, badfn=None):
243 249 return exactmatcher(files, badfn=badfn)
244 250
245 251 def always(badfn=None):
246 252 return alwaysmatcher(badfn)
247 253
248 254 def never(badfn=None):
249 255 return nevermatcher(badfn)
250 256
251 257 def badmatch(match, badfn):
252 258 """Make a copy of the given matcher, replacing its bad method with the given
253 259 one.
254 260 """
255 261 m = copy.copy(match)
256 262 m.bad = badfn
257 263 return m
258 264
259 265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
260 266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
261 267 normalized and rooted patterns and with listfiles expanded.'''
262 268 kindpats = []
263 269 for kind, pat in [_patsplit(p, default) for p in patterns]:
264 270 if kind in cwdrelativepatternkinds:
265 271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
266 272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
267 273 pat = util.normpath(pat)
268 274 elif kind in ('listfile', 'listfile0'):
269 275 try:
270 276 files = util.readfile(pat)
271 277 if kind == 'listfile0':
272 278 files = files.split('\0')
273 279 else:
274 280 files = files.splitlines()
275 281 files = [f for f in files if f]
276 282 except EnvironmentError:
277 283 raise error.Abort(_("unable to read file list (%s)") % pat)
278 284 for k, p, source in _donormalize(files, default, root, cwd,
279 285 auditor, warn):
280 286 kindpats.append((k, p, pat))
281 287 continue
282 288 elif kind == 'include':
283 289 try:
284 290 fullpath = os.path.join(root, util.localpath(pat))
285 291 includepats = readpatternfile(fullpath, warn)
286 292 for k, p, source in _donormalize(includepats, default,
287 293 root, cwd, auditor, warn):
288 294 kindpats.append((k, p, source or pat))
289 295 except error.Abort as inst:
290 296 raise error.Abort('%s: %s' % (pat, inst[0]))
291 297 except IOError as inst:
292 298 if warn:
293 299 warn(_("skipping unreadable pattern file '%s': %s\n") %
294 300 (pat, stringutil.forcebytestr(inst.strerror)))
295 301 continue
296 302 # else: re or relre - which cannot be normalized
297 303 kindpats.append((kind, pat, ''))
298 304 return kindpats
299 305
300 306 class basematcher(object):
301 307
302 308 def __init__(self, badfn=None):
303 309 if badfn is not None:
304 310 self.bad = badfn
305 311
306 312 def __call__(self, fn):
307 313 return self.matchfn(fn)
308 314 # Callbacks related to how the matcher is used by dirstate.walk.
309 315 # Subscribers to these events must monkeypatch the matcher object.
310 316 def bad(self, f, msg):
311 317 '''Callback from dirstate.walk for each explicit file that can't be
312 318 found/accessed, with an error message.'''
313 319
314 320 # If an explicitdir is set, it will be called when an explicitly listed
315 321 # directory is visited.
316 322 explicitdir = None
317 323
318 324 # If an traversedir is set, it will be called when a directory discovered
319 325 # by recursive traversal is visited.
320 326 traversedir = None
321 327
322 328 @propertycache
323 329 def _files(self):
324 330 return []
325 331
326 332 def files(self):
327 333 '''Explicitly listed files or patterns or roots:
328 334 if no patterns or .always(): empty list,
329 335 if exact: list exact files,
330 336 if not .anypats(): list all files and dirs,
331 337 else: optimal roots'''
332 338 return self._files
333 339
334 340 @propertycache
335 341 def _fileset(self):
336 342 return set(self._files)
337 343
338 344 def exact(self, f):
339 345 '''Returns True if f is in .files().'''
340 346 return f in self._fileset
341 347
342 348 def matchfn(self, f):
343 349 return False
344 350
345 351 def visitdir(self, dir):
346 352 '''Decides whether a directory should be visited based on whether it
347 353 has potential matches in it or one of its subdirectories. This is
348 354 based on the match's primary, included, and excluded patterns.
349 355
350 356 Returns the string 'all' if the given directory and all subdirectories
351 357 should be visited. Otherwise returns True or False indicating whether
352 358 the given directory should be visited.
353 359 '''
354 360 return True
355 361
356 362 def visitchildrenset(self, dir):
357 363 '''Decides whether a directory should be visited based on whether it
358 364 has potential matches in it or one of its subdirectories, and
359 365 potentially lists which subdirectories of that directory should be
360 366 visited. This is based on the match's primary, included, and excluded
361 367 patterns.
362 368
363 369 This function is very similar to 'visitdir', and the following mapping
364 370 can be applied:
365 371
366 372 visitdir | visitchildrenlist
367 373 ----------+-------------------
368 374 False | set()
369 375 'all' | 'all'
370 376 True | 'this' OR non-empty set of subdirs -or files- to visit
371 377
372 378 Example:
373 379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
374 380 the following values (assuming the implementation of visitchildrenset
375 381 is capable of recognizing this; some implementations are not).
376 382
377 383 '.' -> {'foo', 'qux'}
378 384 'baz' -> set()
379 385 'foo' -> {'bar'}
380 386 # Ideally this would be 'all', but since the prefix nature of matchers
381 387 # is applied to the entire matcher, we have to downgrade this to
382 388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
383 389 # in.
384 390 'foo/bar' -> 'this'
385 391 'qux' -> 'this'
386 392
387 393 Important:
388 394 Most matchers do not know if they're representing files or
389 395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
390 396 file or a directory, so visitchildrenset('dir') for most matchers will
391 397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
392 398 does), it may return 'this'. Do not rely on the return being a set
393 399 indicating that there are no files in this dir to investigate (or
394 400 equivalently that if there are files to investigate in 'dir' that it
395 401 will always return 'this').
396 402 '''
397 403 return 'this'
398 404
399 405 def always(self):
400 406 '''Matcher will match everything and .files() will be empty --
401 407 optimization might be possible.'''
402 408 return False
403 409
404 410 def isexact(self):
405 411 '''Matcher will match exactly the list of files in .files() --
406 412 optimization might be possible.'''
407 413 return False
408 414
409 415 def prefix(self):
410 416 '''Matcher will match the paths in .files() recursively --
411 417 optimization might be possible.'''
412 418 return False
413 419
414 420 def anypats(self):
415 421 '''None of .always(), .isexact(), and .prefix() is true --
416 422 optimizations will be difficult.'''
417 423 return not self.always() and not self.isexact() and not self.prefix()
418 424
419 425 class alwaysmatcher(basematcher):
420 426 '''Matches everything.'''
421 427
422 428 def __init__(self, badfn=None):
423 429 super(alwaysmatcher, self).__init__(badfn)
424 430
425 431 def always(self):
426 432 return True
427 433
428 434 def matchfn(self, f):
429 435 return True
430 436
431 437 def visitdir(self, dir):
432 438 return 'all'
433 439
434 440 def visitchildrenset(self, dir):
435 441 return 'all'
436 442
437 443 def __repr__(self):
438 444 return r'<alwaysmatcher>'
439 445
440 446 class nevermatcher(basematcher):
441 447 '''Matches nothing.'''
442 448
443 449 def __init__(self, badfn=None):
444 450 super(nevermatcher, self).__init__(badfn)
445 451
446 452 # It's a little weird to say that the nevermatcher is an exact matcher
447 453 # or a prefix matcher, but it seems to make sense to let callers take
448 454 # fast paths based on either. There will be no exact matches, nor any
449 455 # prefixes (files() returns []), so fast paths iterating over them should
450 456 # be efficient (and correct).
451 457 def isexact(self):
452 458 return True
453 459
454 460 def prefix(self):
455 461 return True
456 462
457 463 def visitdir(self, dir):
458 464 return False
459 465
460 466 def visitchildrenset(self, dir):
461 467 return set()
462 468
463 469 def __repr__(self):
464 470 return r'<nevermatcher>'
465 471
466 472 class predicatematcher(basematcher):
467 473 """A matcher adapter for a simple boolean function"""
468 474
469 475 def __init__(self, predfn, predrepr=None, badfn=None):
470 476 super(predicatematcher, self).__init__(badfn)
471 477 self.matchfn = predfn
472 478 self._predrepr = predrepr
473 479
474 480 @encoding.strmethod
475 481 def __repr__(self):
476 482 s = (stringutil.buildrepr(self._predrepr)
477 483 or pycompat.byterepr(self.matchfn))
478 484 return '<predicatenmatcher pred=%s>' % s
479 485
480 486 class patternmatcher(basematcher):
481 487 """Matches a set of (kind, pat, source) against a 'root' directory.
482 488
483 489 >>> kindpats = [
484 490 ... (b're', br'.*\.c$', b''),
485 491 ... (b'path', b'foo/a', b''),
486 492 ... (b'relpath', b'b', b''),
487 493 ... (b'glob', b'*.h', b''),
488 494 ... ]
489 495 >>> m = patternmatcher(b'foo', kindpats)
490 496 >>> m(b'main.c') # matches re:.*\.c$
491 497 True
492 498 >>> m(b'b.txt')
493 499 False
494 500 >>> m(b'foo/a') # matches path:foo/a
495 501 True
496 502 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
497 503 False
498 504 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
499 505 True
500 506 >>> m(b'lib.h') # matches glob:*.h
501 507 True
502 508
503 509 >>> m.files()
504 510 ['.', 'foo/a', 'b', '.']
505 511 >>> m.exact(b'foo/a')
506 512 True
507 513 >>> m.exact(b'b')
508 514 True
509 515 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
510 516 False
511 517 """
512 518
513 519 def __init__(self, root, kindpats, badfn=None):
514 520 super(patternmatcher, self).__init__(badfn)
515 521
516 522 self._files = _explicitfiles(kindpats)
517 523 self._prefix = _prefix(kindpats)
518 524 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
519 525
520 526 @propertycache
521 527 def _dirs(self):
522 528 return set(util.dirs(self._fileset)) | {'.'}
523 529
524 530 def visitdir(self, dir):
525 531 if self._prefix and dir in self._fileset:
526 532 return 'all'
527 533 return ('.' in self._fileset or
528 534 dir in self._fileset or
529 535 dir in self._dirs or
530 536 any(parentdir in self._fileset
531 537 for parentdir in util.finddirs(dir)))
532 538
533 539 def visitchildrenset(self, dir):
534 540 ret = self.visitdir(dir)
535 541 if ret is True:
536 542 return 'this'
537 543 elif not ret:
538 544 return set()
539 545 assert ret == 'all'
540 546 return 'all'
541 547
542 548 def prefix(self):
543 549 return self._prefix
544 550
545 551 @encoding.strmethod
546 552 def __repr__(self):
547 553 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
548 554
549 555 # This is basically a reimplementation of util.dirs that stores the children
550 556 # instead of just a count of them, plus a small optional optimization to avoid
551 557 # some directories we don't need.
552 558 class _dirchildren(object):
553 559 def __init__(self, paths, onlyinclude=None):
554 560 self._dirs = {}
555 561 self._onlyinclude = onlyinclude or []
556 562 addpath = self.addpath
557 563 for f in paths:
558 564 addpath(f)
559 565
560 566 def addpath(self, path):
561 567 if path == '.':
562 568 return
563 569 dirs = self._dirs
564 570 findsplitdirs = _dirchildren._findsplitdirs
565 571 for d, b in findsplitdirs(path):
566 572 if d not in self._onlyinclude:
567 573 continue
568 574 dirs.setdefault(d, set()).add(b)
569 575
570 576 @staticmethod
571 577 def _findsplitdirs(path):
572 578 # yields (dirname, basename) tuples, walking back to the root. This is
573 579 # very similar to util.finddirs, except:
574 580 # - produces a (dirname, basename) tuple, not just 'dirname'
575 581 # - includes root dir
576 582 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
577 583 # slash, and produces '.' for the root instead of ''.
578 584 oldpos = len(path)
579 585 pos = path.rfind('/')
580 586 while pos != -1:
581 587 yield path[:pos], path[pos + 1:oldpos]
582 588 oldpos = pos
583 589 pos = path.rfind('/', 0, pos)
584 590 yield '.', path[:oldpos]
585 591
586 592 def get(self, path):
587 593 return self._dirs.get(path, set())
588 594
589 595 class includematcher(basematcher):
590 596
591 597 def __init__(self, root, kindpats, badfn=None):
592 598 super(includematcher, self).__init__(badfn)
593 599
594 600 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
595 601 self._prefix = _prefix(kindpats)
596 602 roots, dirs, parents = _rootsdirsandparents(kindpats)
597 603 # roots are directories which are recursively included.
598 604 self._roots = set(roots)
599 605 # dirs are directories which are non-recursively included.
600 606 self._dirs = set(dirs)
601 607 # parents are directories which are non-recursively included because
602 608 # they are needed to get to items in _dirs or _roots.
603 609 self._parents = set(parents)
604 610
605 611 def visitdir(self, dir):
606 612 if self._prefix and dir in self._roots:
607 613 return 'all'
608 614 return ('.' in self._roots or
609 615 dir in self._roots or
610 616 dir in self._dirs or
611 617 dir in self._parents or
612 618 any(parentdir in self._roots
613 619 for parentdir in util.finddirs(dir)))
614 620
615 621 @propertycache
616 622 def _allparentschildren(self):
617 623 # It may seem odd that we add dirs, roots, and parents, and then
618 624 # restrict to only parents. This is to catch the case of:
619 625 # dirs = ['foo/bar']
620 626 # parents = ['foo']
621 627 # if we asked for the children of 'foo', but had only added
622 628 # self._parents, we wouldn't be able to respond ['bar'].
623 629 return _dirchildren(
624 630 itertools.chain(self._dirs, self._roots, self._parents),
625 631 onlyinclude=self._parents)
626 632
627 633 def visitchildrenset(self, dir):
628 634 if self._prefix and dir in self._roots:
629 635 return 'all'
630 636 # Note: this does *not* include the 'dir in self._parents' case from
631 637 # visitdir, that's handled below.
632 638 if ('.' in self._roots or
633 639 dir in self._roots or
634 640 dir in self._dirs or
635 641 any(parentdir in self._roots
636 642 for parentdir in util.finddirs(dir))):
637 643 return 'this'
638 644
639 645 if dir in self._parents:
640 646 return self._allparentschildren.get(dir) or set()
641 647 return set()
642 648
643 649 @encoding.strmethod
644 650 def __repr__(self):
645 651 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
646 652
647 653 class exactmatcher(basematcher):
648 654 r'''Matches the input files exactly. They are interpreted as paths, not
649 655 patterns (so no kind-prefixes).
650 656
651 657 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
652 658 >>> m(b'a.txt')
653 659 True
654 660 >>> m(b'b.txt')
655 661 False
656 662
657 663 Input files that would be matched are exactly those returned by .files()
658 664 >>> m.files()
659 665 ['a.txt', 're:.*\\.c$']
660 666
661 667 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
662 668 >>> m(b'main.c')
663 669 False
664 670 >>> m(br're:.*\.c$')
665 671 True
666 672 '''
667 673
668 674 def __init__(self, files, badfn=None):
669 675 super(exactmatcher, self).__init__(badfn)
670 676
671 677 if isinstance(files, list):
672 678 self._files = files
673 679 else:
674 680 self._files = list(files)
675 681
676 682 matchfn = basematcher.exact
677 683
678 684 @propertycache
679 685 def _dirs(self):
680 686 return set(util.dirs(self._fileset)) | {'.'}
681 687
682 688 def visitdir(self, dir):
683 689 return dir in self._dirs
684 690
685 691 def visitchildrenset(self, dir):
686 692 if not self._fileset or dir not in self._dirs:
687 693 return set()
688 694
689 695 candidates = self._fileset | self._dirs - {'.'}
690 696 if dir != '.':
691 697 d = dir + '/'
692 698 candidates = set(c[len(d):] for c in candidates if
693 699 c.startswith(d))
694 700 # self._dirs includes all of the directories, recursively, so if
695 701 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
696 702 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
697 703 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
698 704 # immediate subdir will be in there without a slash.
699 705 ret = {c for c in candidates if '/' not in c}
700 706 # We really do not expect ret to be empty, since that would imply that
701 707 # there's something in _dirs that didn't have a file in _fileset.
702 708 assert ret
703 709 return ret
704 710
705 711 def isexact(self):
706 712 return True
707 713
708 714 @encoding.strmethod
709 715 def __repr__(self):
710 716 return ('<exactmatcher files=%r>' % self._files)
711 717
712 718 class differencematcher(basematcher):
713 719 '''Composes two matchers by matching if the first matches and the second
714 720 does not.
715 721
716 722 The second matcher's non-matching-attributes (bad, explicitdir,
717 723 traversedir) are ignored.
718 724 '''
719 725 def __init__(self, m1, m2):
720 726 super(differencematcher, self).__init__()
721 727 self._m1 = m1
722 728 self._m2 = m2
723 729 self.bad = m1.bad
724 730 self.explicitdir = m1.explicitdir
725 731 self.traversedir = m1.traversedir
726 732
727 733 def matchfn(self, f):
728 734 return self._m1(f) and not self._m2(f)
729 735
730 736 @propertycache
731 737 def _files(self):
732 738 if self.isexact():
733 739 return [f for f in self._m1.files() if self(f)]
734 740 # If m1 is not an exact matcher, we can't easily figure out the set of
735 741 # files, because its files() are not always files. For example, if
736 742 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
737 743 # want to remove "dir" from the set even though it would match m2,
738 744 # because the "dir" in m1 may not be a file.
739 745 return self._m1.files()
740 746
741 747 def visitdir(self, dir):
742 748 if self._m2.visitdir(dir) == 'all':
743 749 return False
744 750 elif not self._m2.visitdir(dir):
745 751 # m2 does not match dir, we can return 'all' here if possible
746 752 return self._m1.visitdir(dir)
747 753 return bool(self._m1.visitdir(dir))
748 754
749 755 def visitchildrenset(self, dir):
750 756 m2_set = self._m2.visitchildrenset(dir)
751 757 if m2_set == 'all':
752 758 return set()
753 759 m1_set = self._m1.visitchildrenset(dir)
754 760 # Possible values for m1: 'all', 'this', set(...), set()
755 761 # Possible values for m2: 'this', set(...), set()
756 762 # If m2 has nothing under here that we care about, return m1, even if
757 763 # it's 'all'. This is a change in behavior from visitdir, which would
758 764 # return True, not 'all', for some reason.
759 765 if not m2_set:
760 766 return m1_set
761 767 if m1_set in ['all', 'this']:
762 768 # Never return 'all' here if m2_set is any kind of non-empty (either
763 769 # 'this' or set(foo)), since m2 might return set() for a
764 770 # subdirectory.
765 771 return 'this'
766 772 # Possible values for m1: set(...), set()
767 773 # Possible values for m2: 'this', set(...)
768 774 # We ignore m2's set results. They're possibly incorrect:
769 775 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
770 776 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
771 777 # return set(), which is *not* correct, we still need to visit 'dir'!
772 778 return m1_set
773 779
774 780 def isexact(self):
775 781 return self._m1.isexact()
776 782
777 783 @encoding.strmethod
778 784 def __repr__(self):
779 785 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
780 786
781 787 def intersectmatchers(m1, m2):
782 788 '''Composes two matchers by matching if both of them match.
783 789
784 790 The second matcher's non-matching-attributes (bad, explicitdir,
785 791 traversedir) are ignored.
786 792 '''
787 793 if m1 is None or m2 is None:
788 794 return m1 or m2
789 795 if m1.always():
790 796 m = copy.copy(m2)
791 797 # TODO: Consider encapsulating these things in a class so there's only
792 798 # one thing to copy from m1.
793 799 m.bad = m1.bad
794 800 m.explicitdir = m1.explicitdir
795 801 m.traversedir = m1.traversedir
796 802 return m
797 803 if m2.always():
798 804 m = copy.copy(m1)
799 805 return m
800 806 return intersectionmatcher(m1, m2)
801 807
802 808 class intersectionmatcher(basematcher):
803 809 def __init__(self, m1, m2):
804 810 super(intersectionmatcher, self).__init__()
805 811 self._m1 = m1
806 812 self._m2 = m2
807 813 self.bad = m1.bad
808 814 self.explicitdir = m1.explicitdir
809 815 self.traversedir = m1.traversedir
810 816
811 817 @propertycache
812 818 def _files(self):
813 819 if self.isexact():
814 820 m1, m2 = self._m1, self._m2
815 821 if not m1.isexact():
816 822 m1, m2 = m2, m1
817 823 return [f for f in m1.files() if m2(f)]
818 824 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
819 825 # the set of files, because their files() are not always files. For
820 826 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
821 827 # "path:dir2", we don't want to remove "dir2" from the set.
822 828 return self._m1.files() + self._m2.files()
823 829
824 830 def matchfn(self, f):
825 831 return self._m1(f) and self._m2(f)
826 832
827 833 def visitdir(self, dir):
828 834 visit1 = self._m1.visitdir(dir)
829 835 if visit1 == 'all':
830 836 return self._m2.visitdir(dir)
831 837 # bool() because visit1=True + visit2='all' should not be 'all'
832 838 return bool(visit1 and self._m2.visitdir(dir))
833 839
834 840 def visitchildrenset(self, dir):
835 841 m1_set = self._m1.visitchildrenset(dir)
836 842 if not m1_set:
837 843 return set()
838 844 m2_set = self._m2.visitchildrenset(dir)
839 845 if not m2_set:
840 846 return set()
841 847
842 848 if m1_set == 'all':
843 849 return m2_set
844 850 elif m2_set == 'all':
845 851 return m1_set
846 852
847 853 if m1_set == 'this' or m2_set == 'this':
848 854 return 'this'
849 855
850 856 assert isinstance(m1_set, set) and isinstance(m2_set, set)
851 857 return m1_set.intersection(m2_set)
852 858
853 859 def always(self):
854 860 return self._m1.always() and self._m2.always()
855 861
856 862 def isexact(self):
857 863 return self._m1.isexact() or self._m2.isexact()
858 864
859 865 @encoding.strmethod
860 866 def __repr__(self):
861 867 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
862 868
863 869 class subdirmatcher(basematcher):
864 870 """Adapt a matcher to work on a subdirectory only.
865 871
866 872 The paths are remapped to remove/insert the path as needed:
867 873
868 874 >>> from . import pycompat
869 875 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
870 876 >>> m2 = subdirmatcher(b'sub', m1)
871 877 >>> m2(b'a.txt')
872 878 False
873 879 >>> m2(b'b.txt')
874 880 True
875 881 >>> m2.matchfn(b'a.txt')
876 882 False
877 883 >>> m2.matchfn(b'b.txt')
878 884 True
879 885 >>> m2.files()
880 886 ['b.txt']
881 887 >>> m2.exact(b'b.txt')
882 888 True
883 889 >>> def bad(f, msg):
884 890 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
885 891 >>> m1.bad = bad
886 892 >>> m2.bad(b'x.txt', b'No such file')
887 893 sub/x.txt: No such file
888 894 """
889 895
890 896 def __init__(self, path, matcher):
891 897 super(subdirmatcher, self).__init__()
892 898 self._path = path
893 899 self._matcher = matcher
894 900 self._always = matcher.always()
895 901
896 902 self._files = [f[len(path) + 1:] for f in matcher._files
897 903 if f.startswith(path + "/")]
898 904
899 905 # If the parent repo had a path to this subrepo and the matcher is
900 906 # a prefix matcher, this submatcher always matches.
901 907 if matcher.prefix():
902 908 self._always = any(f == path for f in matcher._files)
903 909
904 910 def bad(self, f, msg):
905 911 self._matcher.bad(self._path + "/" + f, msg)
906 912
907 913 def matchfn(self, f):
908 914 # Some information is lost in the superclass's constructor, so we
909 915 # can not accurately create the matching function for the subdirectory
910 916 # from the inputs. Instead, we override matchfn() and visitdir() to
911 917 # call the original matcher with the subdirectory path prepended.
912 918 return self._matcher.matchfn(self._path + "/" + f)
913 919
914 920 def visitdir(self, dir):
915 921 if dir == '.':
916 922 dir = self._path
917 923 else:
918 924 dir = self._path + "/" + dir
919 925 return self._matcher.visitdir(dir)
920 926
921 927 def visitchildrenset(self, dir):
922 928 if dir == '.':
923 929 dir = self._path
924 930 else:
925 931 dir = self._path + "/" + dir
926 932 return self._matcher.visitchildrenset(dir)
927 933
928 934 def always(self):
929 935 return self._always
930 936
931 937 def prefix(self):
932 938 return self._matcher.prefix() and not self._always
933 939
934 940 @encoding.strmethod
935 941 def __repr__(self):
936 942 return ('<subdirmatcher path=%r, matcher=%r>' %
937 943 (self._path, self._matcher))
938 944
939 945 class prefixdirmatcher(basematcher):
940 946 """Adapt a matcher to work on a parent directory.
941 947
942 948 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
943 949 ignored.
944 950
945 951 The prefix path should usually be the relative path from the root of
946 952 this matcher to the root of the wrapped matcher.
947 953
948 954 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
949 955 >>> m2 = prefixdirmatcher(b'd/e', m1)
950 956 >>> m2(b'a.txt')
951 957 False
952 958 >>> m2(b'd/e/a.txt')
953 959 True
954 960 >>> m2(b'd/e/b.txt')
955 961 False
956 962 >>> m2.files()
957 963 ['d/e/a.txt', 'd/e/f/b.txt']
958 964 >>> m2.exact(b'd/e/a.txt')
959 965 True
960 966 >>> m2.visitdir(b'd')
961 967 True
962 968 >>> m2.visitdir(b'd/e')
963 969 True
964 970 >>> m2.visitdir(b'd/e/f')
965 971 True
966 972 >>> m2.visitdir(b'd/e/g')
967 973 False
968 974 >>> m2.visitdir(b'd/ef')
969 975 False
970 976 """
971 977
972 978 def __init__(self, path, matcher, badfn=None):
973 979 super(prefixdirmatcher, self).__init__(badfn)
974 980 if not path:
975 981 raise error.ProgrammingError('prefix path must not be empty')
976 982 self._path = path
977 983 self._pathprefix = path + '/'
978 984 self._matcher = matcher
979 985
980 986 @propertycache
981 987 def _files(self):
982 988 return [self._pathprefix + f for f in self._matcher._files]
983 989
984 990 def matchfn(self, f):
985 991 if not f.startswith(self._pathprefix):
986 992 return False
987 993 return self._matcher.matchfn(f[len(self._pathprefix):])
988 994
989 995 @propertycache
990 996 def _pathdirs(self):
991 997 return set(util.finddirs(self._path)) | {'.'}
992 998
993 999 def visitdir(self, dir):
994 1000 if dir == self._path:
995 1001 return self._matcher.visitdir('.')
996 1002 if dir.startswith(self._pathprefix):
997 1003 return self._matcher.visitdir(dir[len(self._pathprefix):])
998 1004 return dir in self._pathdirs
999 1005
1000 1006 def visitchildrenset(self, dir):
1001 1007 if dir == self._path:
1002 1008 return self._matcher.visitchildrenset('.')
1003 1009 if dir.startswith(self._pathprefix):
1004 1010 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1005 1011 if dir in self._pathdirs:
1006 1012 return 'this'
1007 1013 return set()
1008 1014
1009 1015 def isexact(self):
1010 1016 return self._matcher.isexact()
1011 1017
1012 1018 def prefix(self):
1013 1019 return self._matcher.prefix()
1014 1020
1015 1021 @encoding.strmethod
1016 1022 def __repr__(self):
1017 1023 return ('<prefixdirmatcher path=%r, matcher=%r>'
1018 1024 % (pycompat.bytestr(self._path), self._matcher))
1019 1025
1020 1026 class unionmatcher(basematcher):
1021 1027 """A matcher that is the union of several matchers.
1022 1028
1023 1029 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1024 1030 the first matcher.
1025 1031 """
1026 1032
1027 1033 def __init__(self, matchers):
1028 1034 m1 = matchers[0]
1029 1035 super(unionmatcher, self).__init__()
1030 1036 self.explicitdir = m1.explicitdir
1031 1037 self.traversedir = m1.traversedir
1032 1038 self._matchers = matchers
1033 1039
1034 1040 def matchfn(self, f):
1035 1041 for match in self._matchers:
1036 1042 if match(f):
1037 1043 return True
1038 1044 return False
1039 1045
1040 1046 def visitdir(self, dir):
1041 1047 r = False
1042 1048 for m in self._matchers:
1043 1049 v = m.visitdir(dir)
1044 1050 if v == 'all':
1045 1051 return v
1046 1052 r |= v
1047 1053 return r
1048 1054
1049 1055 def visitchildrenset(self, dir):
1050 1056 r = set()
1051 1057 this = False
1052 1058 for m in self._matchers:
1053 1059 v = m.visitchildrenset(dir)
1054 1060 if not v:
1055 1061 continue
1056 1062 if v == 'all':
1057 1063 return v
1058 1064 if this or v == 'this':
1059 1065 this = True
1060 1066 # don't break, we might have an 'all' in here.
1061 1067 continue
1062 1068 assert isinstance(v, set)
1063 1069 r = r.union(v)
1064 1070 if this:
1065 1071 return 'this'
1066 1072 return r
1067 1073
1068 1074 @encoding.strmethod
1069 1075 def __repr__(self):
1070 1076 return ('<unionmatcher matchers=%r>' % self._matchers)
1071 1077
1072 1078 def patkind(pattern, default=None):
1073 1079 '''If pattern is 'kind:pat' with a known kind, return kind.
1074 1080
1075 1081 >>> patkind(br're:.*\.c$')
1076 1082 're'
1077 1083 >>> patkind(b'glob:*.c')
1078 1084 'glob'
1079 1085 >>> patkind(b'relpath:test.py')
1080 1086 'relpath'
1081 1087 >>> patkind(b'main.py')
1082 1088 >>> patkind(b'main.py', default=b're')
1083 1089 're'
1084 1090 '''
1085 1091 return _patsplit(pattern, default)[0]
1086 1092
1087 1093 def _patsplit(pattern, default):
1088 1094 """Split a string into the optional pattern kind prefix and the actual
1089 1095 pattern."""
1090 1096 if ':' in pattern:
1091 1097 kind, pat = pattern.split(':', 1)
1092 1098 if kind in allpatternkinds:
1093 1099 return kind, pat
1094 1100 return default, pattern
1095 1101
1096 1102 def _globre(pat):
1097 1103 r'''Convert an extended glob string to a regexp string.
1098 1104
1099 1105 >>> from . import pycompat
1100 1106 >>> def bprint(s):
1101 1107 ... print(pycompat.sysstr(s))
1102 1108 >>> bprint(_globre(br'?'))
1103 1109 .
1104 1110 >>> bprint(_globre(br'*'))
1105 1111 [^/]*
1106 1112 >>> bprint(_globre(br'**'))
1107 1113 .*
1108 1114 >>> bprint(_globre(br'**/a'))
1109 1115 (?:.*/)?a
1110 1116 >>> bprint(_globre(br'a/**/b'))
1111 1117 a/(?:.*/)?b
1112 1118 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1113 1119 [a*?!^][\^b][^c]
1114 1120 >>> bprint(_globre(br'{a,b}'))
1115 1121 (?:a|b)
1116 1122 >>> bprint(_globre(br'.\*\?'))
1117 1123 \.\*\?
1118 1124 '''
1119 1125 i, n = 0, len(pat)
1120 1126 res = ''
1121 1127 group = 0
1122 1128 escape = util.stringutil.regexbytesescapemap.get
1123 1129 def peek():
1124 1130 return i < n and pat[i:i + 1]
1125 1131 while i < n:
1126 1132 c = pat[i:i + 1]
1127 1133 i += 1
1128 1134 if c not in '*?[{},\\':
1129 1135 res += escape(c, c)
1130 1136 elif c == '*':
1131 1137 if peek() == '*':
1132 1138 i += 1
1133 1139 if peek() == '/':
1134 1140 i += 1
1135 1141 res += '(?:.*/)?'
1136 1142 else:
1137 1143 res += '.*'
1138 1144 else:
1139 1145 res += '[^/]*'
1140 1146 elif c == '?':
1141 1147 res += '.'
1142 1148 elif c == '[':
1143 1149 j = i
1144 1150 if j < n and pat[j:j + 1] in '!]':
1145 1151 j += 1
1146 1152 while j < n and pat[j:j + 1] != ']':
1147 1153 j += 1
1148 1154 if j >= n:
1149 1155 res += '\\['
1150 1156 else:
1151 1157 stuff = pat[i:j].replace('\\','\\\\')
1152 1158 i = j + 1
1153 1159 if stuff[0:1] == '!':
1154 1160 stuff = '^' + stuff[1:]
1155 1161 elif stuff[0:1] == '^':
1156 1162 stuff = '\\' + stuff
1157 1163 res = '%s[%s]' % (res, stuff)
1158 1164 elif c == '{':
1159 1165 group += 1
1160 1166 res += '(?:'
1161 1167 elif c == '}' and group:
1162 1168 res += ')'
1163 1169 group -= 1
1164 1170 elif c == ',' and group:
1165 1171 res += '|'
1166 1172 elif c == '\\':
1167 1173 p = peek()
1168 1174 if p:
1169 1175 i += 1
1170 1176 res += escape(p, p)
1171 1177 else:
1172 1178 res += escape(c, c)
1173 1179 else:
1174 1180 res += escape(c, c)
1175 1181 return res
1176 1182
1177 1183 def _regex(kind, pat, globsuffix):
1178 '''Convert a (normalized) pattern of any kind into a regular expression.
1184 '''Convert a (normalized) pattern of any kind into a
1185 regular expression.
1179 1186 globsuffix is appended to the regexp of globs.'''
1187
1188 if rustext is not None:
1189 try:
1190 return rustext.filepatterns.build_single_regex(
1191 kind,
1192 pat,
1193 globsuffix
1194 )
1195 except rustext.filepatterns.PatternError:
1196 raise error.ProgrammingError(
1197 'not a regex pattern: %s:%s' % (kind, pat)
1198 )
1199
1180 1200 if not pat:
1181 1201 return ''
1182 1202 if kind == 're':
1183 1203 return pat
1184 1204 if kind in ('path', 'relpath'):
1185 1205 if pat == '.':
1186 1206 return ''
1187 1207 return util.stringutil.reescape(pat) + '(?:/|$)'
1188 1208 if kind == 'rootfilesin':
1189 1209 if pat == '.':
1190 1210 escaped = ''
1191 1211 else:
1192 1212 # Pattern is a directory name.
1193 1213 escaped = util.stringutil.reescape(pat) + '/'
1194 1214 # Anything after the pattern must be a non-directory.
1195 1215 return escaped + '[^/]+$'
1196 1216 if kind == 'relglob':
1197 1217 return '(?:|.*/)' + _globre(pat) + globsuffix
1198 1218 if kind == 'relre':
1199 1219 if pat.startswith('^'):
1200 1220 return pat
1201 1221 return '.*' + pat
1202 1222 if kind in ('glob', 'rootglob'):
1203 1223 return _globre(pat) + globsuffix
1204 1224 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1205 1225
1206 1226 def _buildmatch(kindpats, globsuffix, root):
1207 1227 '''Return regexp string and a matcher function for kindpats.
1208 1228 globsuffix is appended to the regexp of globs.'''
1209 1229 matchfuncs = []
1210 1230
1211 1231 subincludes, kindpats = _expandsubinclude(kindpats, root)
1212 1232 if subincludes:
1213 1233 submatchers = {}
1214 1234 def matchsubinclude(f):
1215 1235 for prefix, matcherargs in subincludes:
1216 1236 if f.startswith(prefix):
1217 1237 mf = submatchers.get(prefix)
1218 1238 if mf is None:
1219 1239 mf = match(*matcherargs)
1220 1240 submatchers[prefix] = mf
1221 1241
1222 1242 if mf(f[len(prefix):]):
1223 1243 return True
1224 1244 return False
1225 1245 matchfuncs.append(matchsubinclude)
1226 1246
1227 1247 regex = ''
1228 1248 if kindpats:
1229 1249 if all(k == 'rootfilesin' for k, p, s in kindpats):
1230 1250 dirs = {p for k, p, s in kindpats}
1231 1251 def mf(f):
1232 1252 i = f.rfind('/')
1233 1253 if i >= 0:
1234 1254 dir = f[:i]
1235 1255 else:
1236 1256 dir = '.'
1237 1257 return dir in dirs
1238 1258 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1239 1259 matchfuncs.append(mf)
1240 1260 else:
1241 1261 regex, mf = _buildregexmatch(kindpats, globsuffix)
1242 1262 matchfuncs.append(mf)
1243 1263
1244 1264 if len(matchfuncs) == 1:
1245 1265 return regex, matchfuncs[0]
1246 1266 else:
1247 1267 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1248 1268
1249 1269 MAX_RE_SIZE = 20000
1250 1270
1251 1271 def _joinregexes(regexps):
1252 1272 """gather multiple regular expressions into a single one"""
1253 1273 return '|'.join(regexps)
1254 1274
1255 1275 def _buildregexmatch(kindpats, globsuffix):
1256 1276 """Build a match function from a list of kinds and kindpats,
1257 1277 return regexp string and a matcher function.
1258 1278
1259 1279 Test too large input
1260 1280 >>> _buildregexmatch([
1261 1281 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1262 1282 ... ], b'$')
1263 1283 Traceback (most recent call last):
1264 1284 ...
1265 1285 Abort: matcher pattern is too long (20009 bytes)
1266 1286 """
1267 1287 try:
1268 1288 allgroups = []
1269 1289 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1270 1290 fullregexp = _joinregexes(regexps)
1271 1291
1272 1292 startidx = 0
1273 1293 groupsize = 0
1274 1294 for idx, r in enumerate(regexps):
1275 1295 piecesize = len(r)
1276 1296 if piecesize > MAX_RE_SIZE:
1277 1297 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1278 1298 raise error.Abort(msg)
1279 1299 elif (groupsize + piecesize) > MAX_RE_SIZE:
1280 1300 group = regexps[startidx:idx]
1281 1301 allgroups.append(_joinregexes(group))
1282 1302 startidx = idx
1283 1303 groupsize = 0
1284 1304 groupsize += piecesize + 1
1285 1305
1286 1306 if startidx == 0:
1287 1307 matcher = _rematcher(fullregexp)
1288 1308 func = lambda s: bool(matcher(s))
1289 1309 else:
1290 1310 group = regexps[startidx:]
1291 1311 allgroups.append(_joinregexes(group))
1292 1312 allmatchers = [_rematcher(g) for g in allgroups]
1293 1313 func = lambda s: any(m(s) for m in allmatchers)
1294 1314 return fullregexp, func
1295 1315 except re.error:
1296 1316 for k, p, s in kindpats:
1297 1317 try:
1298 1318 _rematcher(_regex(k, p, globsuffix))
1299 1319 except re.error:
1300 1320 if s:
1301 1321 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1302 1322 (s, k, p))
1303 1323 else:
1304 1324 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1305 1325 raise error.Abort(_("invalid pattern"))
1306 1326
1307 1327 def _patternrootsanddirs(kindpats):
1308 1328 '''Returns roots and directories corresponding to each pattern.
1309 1329
1310 1330 This calculates the roots and directories exactly matching the patterns and
1311 1331 returns a tuple of (roots, dirs) for each. It does not return other
1312 1332 directories which may also need to be considered, like the parent
1313 1333 directories.
1314 1334 '''
1315 1335 r = []
1316 1336 d = []
1317 1337 for kind, pat, source in kindpats:
1318 1338 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1319 1339 root = []
1320 1340 for p in pat.split('/'):
1321 1341 if '[' in p or '{' in p or '*' in p or '?' in p:
1322 1342 break
1323 1343 root.append(p)
1324 1344 r.append('/'.join(root) or '.')
1325 1345 elif kind in ('relpath', 'path'):
1326 1346 r.append(pat or '.')
1327 1347 elif kind in ('rootfilesin',):
1328 1348 d.append(pat or '.')
1329 1349 else: # relglob, re, relre
1330 1350 r.append('.')
1331 1351 return r, d
1332 1352
1333 1353 def _roots(kindpats):
1334 1354 '''Returns root directories to match recursively from the given patterns.'''
1335 1355 roots, dirs = _patternrootsanddirs(kindpats)
1336 1356 return roots
1337 1357
1338 1358 def _rootsdirsandparents(kindpats):
1339 1359 '''Returns roots and exact directories from patterns.
1340 1360
1341 1361 `roots` are directories to match recursively, `dirs` should
1342 1362 be matched non-recursively, and `parents` are the implicitly required
1343 1363 directories to walk to items in either roots or dirs.
1344 1364
1345 1365 Returns a tuple of (roots, dirs, parents).
1346 1366
1347 1367 >>> _rootsdirsandparents(
1348 1368 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1349 1369 ... (b'glob', b'g*', b'')])
1350 1370 (['g/h', 'g/h', '.'], [], ['g', '.'])
1351 1371 >>> _rootsdirsandparents(
1352 1372 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1353 1373 ([], ['g/h', '.'], ['g', '.'])
1354 1374 >>> _rootsdirsandparents(
1355 1375 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1356 1376 ... (b'path', b'', b'')])
1357 1377 (['r', 'p/p', '.'], [], ['p', '.'])
1358 1378 >>> _rootsdirsandparents(
1359 1379 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1360 1380 ... (b'relre', b'rr', b'')])
1361 1381 (['.', '.', '.'], [], ['.'])
1362 1382 '''
1363 1383 r, d = _patternrootsanddirs(kindpats)
1364 1384
1365 1385 p = []
1366 1386 # Append the parents as non-recursive/exact directories, since they must be
1367 1387 # scanned to get to either the roots or the other exact directories.
1368 1388 p.extend(util.dirs(d))
1369 1389 p.extend(util.dirs(r))
1370 1390 # util.dirs() does not include the root directory, so add it manually
1371 1391 p.append('.')
1372 1392
1373 1393 # FIXME: all uses of this function convert these to sets, do so before
1374 1394 # returning.
1375 1395 # FIXME: all uses of this function do not need anything in 'roots' and
1376 1396 # 'dirs' to also be in 'parents', consider removing them before returning.
1377 1397 return r, d, p
1378 1398
1379 1399 def _explicitfiles(kindpats):
1380 1400 '''Returns the potential explicit filenames from the patterns.
1381 1401
1382 1402 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1383 1403 ['foo/bar']
1384 1404 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1385 1405 []
1386 1406 '''
1387 1407 # Keep only the pattern kinds where one can specify filenames (vs only
1388 1408 # directory names).
1389 1409 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1390 1410 return _roots(filable)
1391 1411
1392 1412 def _prefix(kindpats):
1393 1413 '''Whether all the patterns match a prefix (i.e. recursively)'''
1394 1414 for kind, pat, source in kindpats:
1395 1415 if kind not in ('path', 'relpath'):
1396 1416 return False
1397 1417 return True
1398 1418
1399 1419 _commentre = None
1400 1420
1401 1421 def readpatternfile(filepath, warn, sourceinfo=False):
1402 1422 '''parse a pattern file, returning a list of
1403 1423 patterns. These patterns should be given to compile()
1404 1424 to be validated and converted into a match function.
1405 1425
1406 1426 trailing white space is dropped.
1407 1427 the escape character is backslash.
1408 1428 comments start with #.
1409 1429 empty lines are skipped.
1410 1430
1411 1431 lines can be of the following formats:
1412 1432
1413 1433 syntax: regexp # defaults following lines to non-rooted regexps
1414 1434 syntax: glob # defaults following lines to non-rooted globs
1415 1435 re:pattern # non-rooted regular expression
1416 1436 glob:pattern # non-rooted glob
1417 1437 rootglob:pat # rooted glob (same root as ^ in regexps)
1418 1438 pattern # pattern of the current default type
1419 1439
1420 1440 if sourceinfo is set, returns a list of tuples:
1421 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1441 (pattern, lineno, originalline).
1442 This is useful to debug ignore patterns.
1422 1443 '''
1423 1444
1445 if rustext is not None:
1446 result, warnings = rustext.filepatterns.read_pattern_file(
1447 filepath,
1448 bool(warn),
1449 sourceinfo,
1450 )
1451
1452 for warning_params in warnings:
1453 # Can't be easily emitted from Rust, because it would require
1454 # a mechanism for both gettext and calling the `warn` function.
1455 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1456
1457 return result
1458
1424 1459 syntaxes = {
1425 1460 're': 'relre:',
1426 1461 'regexp': 'relre:',
1427 1462 'glob': 'relglob:',
1428 1463 'rootglob': 'rootglob:',
1429 1464 'include': 'include',
1430 1465 'subinclude': 'subinclude',
1431 1466 }
1432 1467 syntax = 'relre:'
1433 1468 patterns = []
1434 1469
1435 1470 fp = open(filepath, 'rb')
1436 1471 for lineno, line in enumerate(util.iterfile(fp), start=1):
1437 1472 if "#" in line:
1438 1473 global _commentre
1439 1474 if not _commentre:
1440 1475 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1441 1476 # remove comments prefixed by an even number of escapes
1442 1477 m = _commentre.search(line)
1443 1478 if m:
1444 1479 line = line[:m.end(1)]
1445 1480 # fixup properly escaped comments that survived the above
1446 1481 line = line.replace("\\#", "#")
1447 1482 line = line.rstrip()
1448 1483 if not line:
1449 1484 continue
1450 1485
1451 1486 if line.startswith('syntax:'):
1452 1487 s = line[7:].strip()
1453 1488 try:
1454 1489 syntax = syntaxes[s]
1455 1490 except KeyError:
1456 1491 if warn:
1457 1492 warn(_("%s: ignoring invalid syntax '%s'\n") %
1458 1493 (filepath, s))
1459 1494 continue
1460 1495
1461 1496 linesyntax = syntax
1462 1497 for s, rels in syntaxes.iteritems():
1463 1498 if line.startswith(rels):
1464 1499 linesyntax = rels
1465 1500 line = line[len(rels):]
1466 1501 break
1467 1502 elif line.startswith(s+':'):
1468 1503 linesyntax = rels
1469 1504 line = line[len(s) + 1:]
1470 1505 break
1471 1506 if sourceinfo:
1472 1507 patterns.append((linesyntax + line, lineno, line))
1473 1508 else:
1474 1509 patterns.append(linesyntax + line)
1475 1510 fp.close()
1476 1511 return patterns
@@ -1,200 +1,205 b''
1 1 # common patterns in test at can safely be replaced
2 2 from __future__ import absolute_import
3 3
4 4 import os
5 5
6 6 substitutions = [
7 7 # list of possible compressions
8 8 (br'(zstd,)?zlib,none,bzip2',
9 9 br'$USUAL_COMPRESSIONS$'
10 10 ),
11 11 (br'=(zstd,)?zlib',
12 12 br'=$BUNDLE2_COMPRESSIONS$'
13 13 ),
14 14 # capabilities sent through http
15 15 (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
16 16 br'bookmarks%250A'
17 17 br'changegroup%253D01%252C02%250A'
18 18 br'digests%253Dmd5%252Csha1%252Csha512%250A'
19 19 br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
20 20 br'hgtagsfnodes%250A'
21 21 br'listkeys%250A'
22 22 br'phases%253Dheads%250A'
23 23 br'pushkey%250A'
24 24 br'remote-changegroup%253Dhttp%252Chttps%250A'
25 25 br'rev-branch-cache%250A'
26 26 br'stream%253Dv2',
27 27 # (the replacement patterns)
28 28 br'$USUAL_BUNDLE_CAPS$'
29 29 ),
30 30 (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
31 31 br'bookmarks%250A'
32 32 br'changegroup%253D01%252C02%250A'
33 33 br'digests%253Dmd5%252Csha1%252Csha512%250A'
34 34 br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
35 35 br'hgtagsfnodes%250A'
36 36 br'listkeys%250A'
37 37 br'phases%253Dheads%250A'
38 38 br'pushkey%250A'
39 39 br'remote-changegroup%253Dhttp%252Chttps',
40 40 # (the replacement patterns)
41 41 br'$USUAL_BUNDLE_CAPS_SERVER$'
42 42 ),
43 43 # bundle2 capabilities sent through ssh
44 44 (br'bundle2=HG20%0A'
45 45 br'bookmarks%0A'
46 46 br'changegroup%3D01%2C02%0A'
47 47 br'digests%3Dmd5%2Csha1%2Csha512%0A'
48 48 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
49 49 br'hgtagsfnodes%0A'
50 50 br'listkeys%0A'
51 51 br'phases%3Dheads%0A'
52 52 br'pushkey%0A'
53 53 br'remote-changegroup%3Dhttp%2Chttps%0A'
54 54 br'rev-branch-cache%0A'
55 55 br'stream%3Dv2',
56 56 # (replacement patterns)
57 57 br'$USUAL_BUNDLE2_CAPS$'
58 58 ),
59 59 # bundle2 capabilities advertised by the server
60 60 (br'bundle2=HG20%0A'
61 61 br'bookmarks%0A'
62 62 br'changegroup%3D01%2C02%0A'
63 63 br'digests%3Dmd5%2Csha1%2Csha512%0A'
64 64 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
65 65 br'hgtagsfnodes%0A'
66 66 br'listkeys%0A'
67 67 br'phases%3Dheads%0A'
68 68 br'pushkey%0A'
69 69 br'remote-changegroup%3Dhttp%2Chttps%0A'
70 70 br'rev-branch-cache',
71 71 # (replacement patterns)
72 72 br'$USUAL_BUNDLE2_CAPS_SERVER$'
73 73 ),
74 74 (
75 75 br'bundle2=HG20%0A'
76 76 br'bookmarks%0A'
77 77 br'changegroup%3D01%2C02%0A'
78 78 br'digests%3Dmd5%2Csha1%2Csha512%0A'
79 79 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
80 80 br'hgtagsfnodes%0A'
81 81 br'listkeys%0A'
82 82 br'pushkey%0A'
83 83 br'remote-changegroup%3Dhttp%2Chttps%0A'
84 84 br'rev-branch-cache%0A'
85 85 br'stream%3Dv2',
86 86 # (replacement patterns)
87 87 br'$USUAL_BUNDLE2_CAPS_NO_PHASES$'
88 88 ),
89 89 # HTTP access log dates
90 90 (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] "(GET|PUT|POST)',
91 91 lambda m: br' - - [$LOGDATE$] "' + m.group(1)
92 92 ),
93 93 # HTTP error log dates
94 94 (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] (HG error:|Exception)',
95 95 lambda m: br' - - [$ERRDATE$] ' + m.group(1)
96 96 ),
97 97 # HTTP header dates- RFC 1123
98 98 (br'([Dd]ate): [A-Za-z]{3}, \d\d [A-Za-z]{3} \d{4} \d\d:\d\d:\d\d GMT',
99 99 lambda m: br'%s: $HTTP_DATE$' % m.group(1)
100 100 ),
101 101 # LFS expiration value
102 102 (br'"expires_at": "\d{4}-\d\d-\d\dT\d\d:\d\d:\d\dZ"',
103 103 br'"expires_at": "$ISO_8601_DATE_TIME$"'
104 104 ),
105 105 # Windows has an extra '/' in the following lines that get globbed away:
106 106 # pushing to file:/*/$TESTTMP/r2 (glob)
107 107 # comparing with file:/*/$TESTTMP/r2 (glob)
108 108 # sub/maybelarge.dat: largefile 34..9c not available from
109 109 # file:/*/$TESTTMP/largefiles-repo (glob)
110 110 (br'(.*file:/)/?(/\$TESTTMP.*)',
111 111 lambda m: m.group(1) + b'*' + m.group(2) + b' (glob)'
112 112 ),
113 113 ]
114 114
115 115 # Various platform error strings, keyed on a common replacement string
116 116 _errors = {
117 117 br'$ENOENT$': (
118 # IOError in Python does not have the same error message
119 # than in Rust, and automatic conversion is not possible
120 # because of module member privacy.
121 br'No such file or directory \(os error 2\)',
122
118 123 # strerror()
119 124 br'No such file or directory',
120 125
121 126 # FormatMessage(ERROR_FILE_NOT_FOUND)
122 127 br'The system cannot find the file specified',
123 128 ),
124 129 br'$ENOTDIR$': (
125 130 # strerror()
126 131 br'Not a directory',
127 132
128 133 # FormatMessage(ERROR_PATH_NOT_FOUND)
129 134 br'The system cannot find the path specified',
130 135 ),
131 136 br'$ECONNRESET$': (
132 137 # strerror()
133 138 br'Connection reset by peer',
134 139
135 140 # FormatMessage(WSAECONNRESET)
136 141 br'An existing connection was forcibly closed by the remote host',
137 142 ),
138 143 br'$EADDRINUSE$': (
139 144 # strerror()
140 145 br'Address already in use',
141 146
142 147 # FormatMessage(WSAEADDRINUSE)
143 148 br'Only one usage of each socket address'
144 149 br' \(protocol/network address/port\) is normally permitted',
145 150 ),
146 151 br'$EADDRNOTAVAIL$': (
147 152 # strerror()
148 153 br'Cannot assign requested address',
149 154
150 155 # FormatMessage(WSAEADDRNOTAVAIL)
151 156 )
152 157 }
153 158
154 159 for replace, msgs in _errors.items():
155 160 substitutions.extend((m, replace) for m in msgs)
156 161
157 162 # Output lines on Windows that can be autocorrected for '\' vs '/' path
158 163 # differences.
159 164 _winpathfixes = [
160 165 # cloning subrepo s\ss from $TESTTMP/t/s/ss
161 166 # cloning subrepo foo\bar from http://localhost:$HGPORT/foo/bar
162 167 br'(?m)^cloning subrepo \S+\\.*',
163 168
164 169 # pulling from $TESTTMP\issue1852a
165 170 br'(?m)^pulling from \$TESTTMP\\.*',
166 171
167 172 # pushing to $TESTTMP\a
168 173 br'(?m)^pushing to \$TESTTMP\\.*',
169 174
170 175 # pushing subrepo s\ss to $TESTTMP/t/s/ss
171 176 br'(?m)^pushing subrepo \S+\\\S+ to.*',
172 177
173 178 # moving d1\d11\a1 to d3/d11/a1
174 179 br'(?m)^moving \S+\\.*',
175 180
176 181 # d1\a: not recording move - dummy does not exist
177 182 br'\S+\\\S+: not recording move .+',
178 183
179 184 # reverting s\a
180 185 br'(?m)^reverting (?!subrepo ).*\\.*',
181 186
182 187 # saved backup bundle to
183 188 # $TESTTMP\test\.hg\strip-backup/443431ffac4f-2fc5398a-backup.hg
184 189 br'(?m)^saved backup bundle to \$TESTTMP.*\.hg',
185 190
186 191 # no changes made to subrepo s\ss since last push to ../tcc/s/ss
187 192 br'(?m)^no changes made to subrepo \S+\\\S+ since.*',
188 193
189 194 # changeset 5:9cc5aa7204f0: stuff/maybelarge.dat references missing
190 195 # $TESTTMP\largefiles-repo-hg\.hg\largefiles\76..38
191 196 br'(?m)^changeset .* references (corrupted|missing) \$TESTTMP\\.*',
192 197
193 198 # stuff/maybelarge.dat: largefile 76..38 not available from
194 199 # file:/*/$TESTTMP\largefiles-repo (glob)
195 200 br'.*: largefile \S+ not available from file:/\*/.+',
196 201 ]
197 202
198 203 if os.name == 'nt':
199 204 substitutions.extend([(s, lambda match: match.group().replace(b'\\', b'/'))
200 205 for s in _winpathfixes])
General Comments 0
You need to be logged in to leave comments. Login now