##// END OF EJS Templates
rust-filepatterns: remove bridge code for filepatterns-related functions...
Raphaël Gomès -
r44589:3bd77c64 default
parent child Browse files
Show More
@@ -1,1622 +1,1601 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 rustmod = policy.importrust('filepatterns')
27 rustmod = policy.importrust('dirstate')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 '''compile the regexp with the best available regexp engine and return a
51 51 matcher function'''
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 '''Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it.'''
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """"Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls, root, cwd, kindpats, ctx=None, listsubrepos=False, badfn=None,
121 121 ):
122 122 matchers = []
123 123 fms, kindpats = _expandsets(
124 124 cwd, kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn,
125 125 )
126 126 if kindpats:
127 127 m = matchercls(root, kindpats, badfn=badfn)
128 128 matchers.append(m)
129 129 if fms:
130 130 matchers.extend(fms)
131 131 if not matchers:
132 132 return nevermatcher(badfn=badfn)
133 133 if len(matchers) == 1:
134 134 return matchers[0]
135 135 return unionmatcher(matchers)
136 136
137 137
138 138 def match(
139 139 root,
140 140 cwd,
141 141 patterns=None,
142 142 include=None,
143 143 exclude=None,
144 144 default=b'glob',
145 145 auditor=None,
146 146 ctx=None,
147 147 listsubrepos=False,
148 148 warn=None,
149 149 badfn=None,
150 150 icasefs=False,
151 151 ):
152 152 r"""build an object to match a set of file patterns
153 153
154 154 arguments:
155 155 root - the canonical root of the tree you're matching against
156 156 cwd - the current working directory, if relevant
157 157 patterns - patterns to find
158 158 include - patterns to include (unless they are excluded)
159 159 exclude - patterns to exclude (even if they are included)
160 160 default - if a pattern in patterns has no explicit type, assume this one
161 161 auditor - optional path auditor
162 162 ctx - optional changecontext
163 163 listsubrepos - if True, recurse into subrepositories
164 164 warn - optional function used for printing warnings
165 165 badfn - optional bad() callback for this matcher instead of the default
166 166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 167 normalizes the given patterns to the case in the filesystem
168 168
169 169 a pattern is one of:
170 170 'glob:<glob>' - a glob relative to cwd
171 171 're:<regexp>' - a regular expression
172 172 'path:<path>' - a path relative to repository root, which is matched
173 173 recursively
174 174 'rootfilesin:<path>' - a path relative to repository root, which is
175 175 matched non-recursively (will not match subdirectories)
176 176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 177 'relpath:<path>' - a path relative to cwd
178 178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 179 'set:<fileset>' - a fileset expression
180 180 'include:<path>' - a file of patterns to read and include
181 181 'subinclude:<path>' - a file of patterns to match against files under
182 182 the same directory
183 183 '<something>' - a pattern of the specified default type
184 184
185 185 >>> def _match(root, *args, **kwargs):
186 186 ... return match(util.localpath(root), *args, **kwargs)
187 187
188 188 Usually a patternmatcher is returned:
189 189 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
190 190 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
191 191
192 192 Combining 'patterns' with 'include' (resp. 'exclude') gives an
193 193 intersectionmatcher (resp. a differencematcher):
194 194 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
195 195 <class 'mercurial.match.intersectionmatcher'>
196 196 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
197 197 <class 'mercurial.match.differencematcher'>
198 198
199 199 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
200 200 >>> _match(b'/foo', b'.', [])
201 201 <alwaysmatcher>
202 202
203 203 The 'default' argument determines which kind of pattern is assumed if a
204 204 pattern has no prefix:
205 205 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
206 206 <patternmatcher patterns='.*\\.c$'>
207 207 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
208 208 <patternmatcher patterns='main\\.py(?:/|$)'>
209 209 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
210 210 <patternmatcher patterns='main.py'>
211 211
212 212 The primary use of matchers is to check whether a value (usually a file
213 213 name) matches againset one of the patterns given at initialization. There
214 214 are two ways of doing this check.
215 215
216 216 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
217 217
218 218 1. Calling the matcher with a file name returns True if any pattern
219 219 matches that file name:
220 220 >>> m(b'a')
221 221 True
222 222 >>> m(b'main.c')
223 223 True
224 224 >>> m(b'test.py')
225 225 False
226 226
227 227 2. Using the exact() method only returns True if the file name matches one
228 228 of the exact patterns (i.e. not re: or glob: patterns):
229 229 >>> m.exact(b'a')
230 230 True
231 231 >>> m.exact(b'main.c')
232 232 False
233 233 """
234 234 assert os.path.isabs(root)
235 235 cwd = os.path.join(root, util.localpath(cwd))
236 236 normalize = _donormalize
237 237 if icasefs:
238 238 dirstate = ctx.repo().dirstate
239 239 dsnormalize = dirstate.normalize
240 240
241 241 def normalize(patterns, default, root, cwd, auditor, warn):
242 242 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
243 243 kindpats = []
244 244 for kind, pats, source in kp:
245 245 if kind not in (b're', b'relre'): # regex can't be normalized
246 246 p = pats
247 247 pats = dsnormalize(pats)
248 248
249 249 # Preserve the original to handle a case only rename.
250 250 if p != pats and p in dirstate:
251 251 kindpats.append((kind, p, source))
252 252
253 253 kindpats.append((kind, pats, source))
254 254 return kindpats
255 255
256 256 if patterns:
257 257 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
258 258 if _kindpatsalwaysmatch(kindpats):
259 259 m = alwaysmatcher(badfn)
260 260 else:
261 261 m = _buildkindpatsmatcher(
262 262 patternmatcher,
263 263 root,
264 264 cwd,
265 265 kindpats,
266 266 ctx=ctx,
267 267 listsubrepos=listsubrepos,
268 268 badfn=badfn,
269 269 )
270 270 else:
271 271 # It's a little strange that no patterns means to match everything.
272 272 # Consider changing this to match nothing (probably using nevermatcher).
273 273 m = alwaysmatcher(badfn)
274 274
275 275 if include:
276 276 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
277 277 im = _buildkindpatsmatcher(
278 278 includematcher,
279 279 root,
280 280 cwd,
281 281 kindpats,
282 282 ctx=ctx,
283 283 listsubrepos=listsubrepos,
284 284 badfn=None,
285 285 )
286 286 m = intersectmatchers(m, im)
287 287 if exclude:
288 288 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
289 289 em = _buildkindpatsmatcher(
290 290 includematcher,
291 291 root,
292 292 cwd,
293 293 kindpats,
294 294 ctx=ctx,
295 295 listsubrepos=listsubrepos,
296 296 badfn=None,
297 297 )
298 298 m = differencematcher(m, em)
299 299 return m
300 300
301 301
302 302 def exact(files, badfn=None):
303 303 return exactmatcher(files, badfn=badfn)
304 304
305 305
306 306 def always(badfn=None):
307 307 return alwaysmatcher(badfn)
308 308
309 309
310 310 def never(badfn=None):
311 311 return nevermatcher(badfn)
312 312
313 313
314 314 def badmatch(match, badfn):
315 315 """Make a copy of the given matcher, replacing its bad method with the given
316 316 one.
317 317 """
318 318 m = copy.copy(match)
319 319 m.bad = badfn
320 320 return m
321 321
322 322
323 323 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
324 324 '''Convert 'kind:pat' from the patterns list to tuples with kind and
325 325 normalized and rooted patterns and with listfiles expanded.'''
326 326 kindpats = []
327 327 for kind, pat in [_patsplit(p, default) for p in patterns]:
328 328 if kind in cwdrelativepatternkinds:
329 329 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
330 330 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
331 331 pat = util.normpath(pat)
332 332 elif kind in (b'listfile', b'listfile0'):
333 333 try:
334 334 files = util.readfile(pat)
335 335 if kind == b'listfile0':
336 336 files = files.split(b'\0')
337 337 else:
338 338 files = files.splitlines()
339 339 files = [f for f in files if f]
340 340 except EnvironmentError:
341 341 raise error.Abort(_(b"unable to read file list (%s)") % pat)
342 342 for k, p, source in _donormalize(
343 343 files, default, root, cwd, auditor, warn
344 344 ):
345 345 kindpats.append((k, p, pat))
346 346 continue
347 347 elif kind == b'include':
348 348 try:
349 349 fullpath = os.path.join(root, util.localpath(pat))
350 350 includepats = readpatternfile(fullpath, warn)
351 351 for k, p, source in _donormalize(
352 352 includepats, default, root, cwd, auditor, warn
353 353 ):
354 354 kindpats.append((k, p, source or pat))
355 355 except error.Abort as inst:
356 356 raise error.Abort(
357 357 b'%s: %s'
358 358 % (pat, inst[0]) # pytype: disable=unsupported-operands
359 359 )
360 360 except IOError as inst:
361 361 if warn:
362 362 warn(
363 363 _(b"skipping unreadable pattern file '%s': %s\n")
364 364 % (pat, stringutil.forcebytestr(inst.strerror))
365 365 )
366 366 continue
367 367 # else: re or relre - which cannot be normalized
368 368 kindpats.append((kind, pat, b''))
369 369 return kindpats
370 370
371 371
372 372 class basematcher(object):
373 373 def __init__(self, badfn=None):
374 374 if badfn is not None:
375 375 self.bad = badfn
376 376
377 377 def __call__(self, fn):
378 378 return self.matchfn(fn)
379 379
380 380 # Callbacks related to how the matcher is used by dirstate.walk.
381 381 # Subscribers to these events must monkeypatch the matcher object.
382 382 def bad(self, f, msg):
383 383 '''Callback from dirstate.walk for each explicit file that can't be
384 384 found/accessed, with an error message.'''
385 385
386 386 # If an traversedir is set, it will be called when a directory discovered
387 387 # by recursive traversal is visited.
388 388 traversedir = None
389 389
390 390 @propertycache
391 391 def _files(self):
392 392 return []
393 393
394 394 def files(self):
395 395 '''Explicitly listed files or patterns or roots:
396 396 if no patterns or .always(): empty list,
397 397 if exact: list exact files,
398 398 if not .anypats(): list all files and dirs,
399 399 else: optimal roots'''
400 400 return self._files
401 401
402 402 @propertycache
403 403 def _fileset(self):
404 404 return set(self._files)
405 405
406 406 def exact(self, f):
407 407 '''Returns True if f is in .files().'''
408 408 return f in self._fileset
409 409
410 410 def matchfn(self, f):
411 411 return False
412 412
413 413 def visitdir(self, dir):
414 414 '''Decides whether a directory should be visited based on whether it
415 415 has potential matches in it or one of its subdirectories. This is
416 416 based on the match's primary, included, and excluded patterns.
417 417
418 418 Returns the string 'all' if the given directory and all subdirectories
419 419 should be visited. Otherwise returns True or False indicating whether
420 420 the given directory should be visited.
421 421 '''
422 422 return True
423 423
424 424 def visitchildrenset(self, dir):
425 425 '''Decides whether a directory should be visited based on whether it
426 426 has potential matches in it or one of its subdirectories, and
427 427 potentially lists which subdirectories of that directory should be
428 428 visited. This is based on the match's primary, included, and excluded
429 429 patterns.
430 430
431 431 This function is very similar to 'visitdir', and the following mapping
432 432 can be applied:
433 433
434 434 visitdir | visitchildrenlist
435 435 ----------+-------------------
436 436 False | set()
437 437 'all' | 'all'
438 438 True | 'this' OR non-empty set of subdirs -or files- to visit
439 439
440 440 Example:
441 441 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
442 442 the following values (assuming the implementation of visitchildrenset
443 443 is capable of recognizing this; some implementations are not).
444 444
445 445 '' -> {'foo', 'qux'}
446 446 'baz' -> set()
447 447 'foo' -> {'bar'}
448 448 # Ideally this would be 'all', but since the prefix nature of matchers
449 449 # is applied to the entire matcher, we have to downgrade this to
450 450 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
451 451 # in.
452 452 'foo/bar' -> 'this'
453 453 'qux' -> 'this'
454 454
455 455 Important:
456 456 Most matchers do not know if they're representing files or
457 457 directories. They see ['path:dir/f'] and don't know whether 'f' is a
458 458 file or a directory, so visitchildrenset('dir') for most matchers will
459 459 return {'f'}, but if the matcher knows it's a file (like exactmatcher
460 460 does), it may return 'this'. Do not rely on the return being a set
461 461 indicating that there are no files in this dir to investigate (or
462 462 equivalently that if there are files to investigate in 'dir' that it
463 463 will always return 'this').
464 464 '''
465 465 return b'this'
466 466
467 467 def always(self):
468 468 '''Matcher will match everything and .files() will be empty --
469 469 optimization might be possible.'''
470 470 return False
471 471
472 472 def isexact(self):
473 473 '''Matcher will match exactly the list of files in .files() --
474 474 optimization might be possible.'''
475 475 return False
476 476
477 477 def prefix(self):
478 478 '''Matcher will match the paths in .files() recursively --
479 479 optimization might be possible.'''
480 480 return False
481 481
482 482 def anypats(self):
483 483 '''None of .always(), .isexact(), and .prefix() is true --
484 484 optimizations will be difficult.'''
485 485 return not self.always() and not self.isexact() and not self.prefix()
486 486
487 487
488 488 class alwaysmatcher(basematcher):
489 489 '''Matches everything.'''
490 490
491 491 def __init__(self, badfn=None):
492 492 super(alwaysmatcher, self).__init__(badfn)
493 493
494 494 def always(self):
495 495 return True
496 496
497 497 def matchfn(self, f):
498 498 return True
499 499
500 500 def visitdir(self, dir):
501 501 return b'all'
502 502
503 503 def visitchildrenset(self, dir):
504 504 return b'all'
505 505
506 506 def __repr__(self):
507 507 return r'<alwaysmatcher>'
508 508
509 509
510 510 class nevermatcher(basematcher):
511 511 '''Matches nothing.'''
512 512
513 513 def __init__(self, badfn=None):
514 514 super(nevermatcher, self).__init__(badfn)
515 515
516 516 # It's a little weird to say that the nevermatcher is an exact matcher
517 517 # or a prefix matcher, but it seems to make sense to let callers take
518 518 # fast paths based on either. There will be no exact matches, nor any
519 519 # prefixes (files() returns []), so fast paths iterating over them should
520 520 # be efficient (and correct).
521 521 def isexact(self):
522 522 return True
523 523
524 524 def prefix(self):
525 525 return True
526 526
527 527 def visitdir(self, dir):
528 528 return False
529 529
530 530 def visitchildrenset(self, dir):
531 531 return set()
532 532
533 533 def __repr__(self):
534 534 return r'<nevermatcher>'
535 535
536 536
537 537 class predicatematcher(basematcher):
538 538 """A matcher adapter for a simple boolean function"""
539 539
540 540 def __init__(self, predfn, predrepr=None, badfn=None):
541 541 super(predicatematcher, self).__init__(badfn)
542 542 self.matchfn = predfn
543 543 self._predrepr = predrepr
544 544
545 545 @encoding.strmethod
546 546 def __repr__(self):
547 547 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
548 548 self.matchfn
549 549 )
550 550 return b'<predicatenmatcher pred=%s>' % s
551 551
552 552
553 553 class patternmatcher(basematcher):
554 554 r"""Matches a set of (kind, pat, source) against a 'root' directory.
555 555
556 556 >>> kindpats = [
557 557 ... (b're', br'.*\.c$', b''),
558 558 ... (b'path', b'foo/a', b''),
559 559 ... (b'relpath', b'b', b''),
560 560 ... (b'glob', b'*.h', b''),
561 561 ... ]
562 562 >>> m = patternmatcher(b'foo', kindpats)
563 563 >>> m(b'main.c') # matches re:.*\.c$
564 564 True
565 565 >>> m(b'b.txt')
566 566 False
567 567 >>> m(b'foo/a') # matches path:foo/a
568 568 True
569 569 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
570 570 False
571 571 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
572 572 True
573 573 >>> m(b'lib.h') # matches glob:*.h
574 574 True
575 575
576 576 >>> m.files()
577 577 ['', 'foo/a', 'b', '']
578 578 >>> m.exact(b'foo/a')
579 579 True
580 580 >>> m.exact(b'b')
581 581 True
582 582 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
583 583 False
584 584 """
585 585
586 586 def __init__(self, root, kindpats, badfn=None):
587 587 super(patternmatcher, self).__init__(badfn)
588 588
589 589 self._files = _explicitfiles(kindpats)
590 590 self._prefix = _prefix(kindpats)
591 591 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
592 592
593 593 @propertycache
594 594 def _dirs(self):
595 595 return set(pathutil.dirs(self._fileset))
596 596
597 597 def visitdir(self, dir):
598 598 if self._prefix and dir in self._fileset:
599 599 return b'all'
600 600 return (
601 601 dir in self._fileset
602 602 or dir in self._dirs
603 603 or any(
604 604 parentdir in self._fileset
605 605 for parentdir in pathutil.finddirs(dir)
606 606 )
607 607 )
608 608
609 609 def visitchildrenset(self, dir):
610 610 ret = self.visitdir(dir)
611 611 if ret is True:
612 612 return b'this'
613 613 elif not ret:
614 614 return set()
615 615 assert ret == b'all'
616 616 return b'all'
617 617
618 618 def prefix(self):
619 619 return self._prefix
620 620
621 621 @encoding.strmethod
622 622 def __repr__(self):
623 623 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
624 624
625 625
626 626 # This is basically a reimplementation of pathutil.dirs that stores the
627 627 # children instead of just a count of them, plus a small optional optimization
628 628 # to avoid some directories we don't need.
629 629 class _dirchildren(object):
630 630 def __init__(self, paths, onlyinclude=None):
631 631 self._dirs = {}
632 632 self._onlyinclude = onlyinclude or []
633 633 addpath = self.addpath
634 634 for f in paths:
635 635 addpath(f)
636 636
637 637 def addpath(self, path):
638 638 if path == b'':
639 639 return
640 640 dirs = self._dirs
641 641 findsplitdirs = _dirchildren._findsplitdirs
642 642 for d, b in findsplitdirs(path):
643 643 if d not in self._onlyinclude:
644 644 continue
645 645 dirs.setdefault(d, set()).add(b)
646 646
647 647 @staticmethod
648 648 def _findsplitdirs(path):
649 649 # yields (dirname, basename) tuples, walking back to the root. This is
650 650 # very similar to pathutil.finddirs, except:
651 651 # - produces a (dirname, basename) tuple, not just 'dirname'
652 652 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
653 653 # slash.
654 654 oldpos = len(path)
655 655 pos = path.rfind(b'/')
656 656 while pos != -1:
657 657 yield path[:pos], path[pos + 1 : oldpos]
658 658 oldpos = pos
659 659 pos = path.rfind(b'/', 0, pos)
660 660 yield b'', path[:oldpos]
661 661
662 662 def get(self, path):
663 663 return self._dirs.get(path, set())
664 664
665 665
666 666 class includematcher(basematcher):
667 667 def __init__(self, root, kindpats, badfn=None):
668 668 super(includematcher, self).__init__(badfn)
669 669
670 670 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
671 671 self._prefix = _prefix(kindpats)
672 672 roots, dirs, parents = _rootsdirsandparents(kindpats)
673 673 # roots are directories which are recursively included.
674 674 self._roots = set(roots)
675 675 # dirs are directories which are non-recursively included.
676 676 self._dirs = set(dirs)
677 677 # parents are directories which are non-recursively included because
678 678 # they are needed to get to items in _dirs or _roots.
679 679 self._parents = parents
680 680
681 681 def visitdir(self, dir):
682 682 if self._prefix and dir in self._roots:
683 683 return b'all'
684 684 return (
685 685 dir in self._roots
686 686 or dir in self._dirs
687 687 or dir in self._parents
688 688 or any(
689 689 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
690 690 )
691 691 )
692 692
693 693 @propertycache
694 694 def _allparentschildren(self):
695 695 # It may seem odd that we add dirs, roots, and parents, and then
696 696 # restrict to only parents. This is to catch the case of:
697 697 # dirs = ['foo/bar']
698 698 # parents = ['foo']
699 699 # if we asked for the children of 'foo', but had only added
700 700 # self._parents, we wouldn't be able to respond ['bar'].
701 701 return _dirchildren(
702 702 itertools.chain(self._dirs, self._roots, self._parents),
703 703 onlyinclude=self._parents,
704 704 )
705 705
706 706 def visitchildrenset(self, dir):
707 707 if self._prefix and dir in self._roots:
708 708 return b'all'
709 709 # Note: this does *not* include the 'dir in self._parents' case from
710 710 # visitdir, that's handled below.
711 711 if (
712 712 b'' in self._roots
713 713 or dir in self._roots
714 714 or dir in self._dirs
715 715 or any(
716 716 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
717 717 )
718 718 ):
719 719 return b'this'
720 720
721 721 if dir in self._parents:
722 722 return self._allparentschildren.get(dir) or set()
723 723 return set()
724 724
725 725 @encoding.strmethod
726 726 def __repr__(self):
727 727 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
728 728
729 729
730 730 class exactmatcher(basematcher):
731 731 r'''Matches the input files exactly. They are interpreted as paths, not
732 732 patterns (so no kind-prefixes).
733 733
734 734 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
735 735 >>> m(b'a.txt')
736 736 True
737 737 >>> m(b'b.txt')
738 738 False
739 739
740 740 Input files that would be matched are exactly those returned by .files()
741 741 >>> m.files()
742 742 ['a.txt', 're:.*\\.c$']
743 743
744 744 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
745 745 >>> m(b'main.c')
746 746 False
747 747 >>> m(br're:.*\.c$')
748 748 True
749 749 '''
750 750
751 751 def __init__(self, files, badfn=None):
752 752 super(exactmatcher, self).__init__(badfn)
753 753
754 754 if isinstance(files, list):
755 755 self._files = files
756 756 else:
757 757 self._files = list(files)
758 758
759 759 matchfn = basematcher.exact
760 760
761 761 @propertycache
762 762 def _dirs(self):
763 763 return set(pathutil.dirs(self._fileset))
764 764
765 765 def visitdir(self, dir):
766 766 return dir in self._dirs
767 767
768 768 def visitchildrenset(self, dir):
769 769 if not self._fileset or dir not in self._dirs:
770 770 return set()
771 771
772 772 candidates = self._fileset | self._dirs - {b''}
773 773 if dir != b'':
774 774 d = dir + b'/'
775 775 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
776 776 # self._dirs includes all of the directories, recursively, so if
777 777 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
778 778 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
779 779 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
780 780 # immediate subdir will be in there without a slash.
781 781 ret = {c for c in candidates if b'/' not in c}
782 782 # We really do not expect ret to be empty, since that would imply that
783 783 # there's something in _dirs that didn't have a file in _fileset.
784 784 assert ret
785 785 return ret
786 786
787 787 def isexact(self):
788 788 return True
789 789
790 790 @encoding.strmethod
791 791 def __repr__(self):
792 792 return b'<exactmatcher files=%r>' % self._files
793 793
794 794
795 795 class differencematcher(basematcher):
796 796 '''Composes two matchers by matching if the first matches and the second
797 797 does not.
798 798
799 799 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
800 800 '''
801 801
802 802 def __init__(self, m1, m2):
803 803 super(differencematcher, self).__init__()
804 804 self._m1 = m1
805 805 self._m2 = m2
806 806 self.bad = m1.bad
807 807 self.traversedir = m1.traversedir
808 808
809 809 def matchfn(self, f):
810 810 return self._m1(f) and not self._m2(f)
811 811
812 812 @propertycache
813 813 def _files(self):
814 814 if self.isexact():
815 815 return [f for f in self._m1.files() if self(f)]
816 816 # If m1 is not an exact matcher, we can't easily figure out the set of
817 817 # files, because its files() are not always files. For example, if
818 818 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
819 819 # want to remove "dir" from the set even though it would match m2,
820 820 # because the "dir" in m1 may not be a file.
821 821 return self._m1.files()
822 822
823 823 def visitdir(self, dir):
824 824 if self._m2.visitdir(dir) == b'all':
825 825 return False
826 826 elif not self._m2.visitdir(dir):
827 827 # m2 does not match dir, we can return 'all' here if possible
828 828 return self._m1.visitdir(dir)
829 829 return bool(self._m1.visitdir(dir))
830 830
831 831 def visitchildrenset(self, dir):
832 832 m2_set = self._m2.visitchildrenset(dir)
833 833 if m2_set == b'all':
834 834 return set()
835 835 m1_set = self._m1.visitchildrenset(dir)
836 836 # Possible values for m1: 'all', 'this', set(...), set()
837 837 # Possible values for m2: 'this', set(...), set()
838 838 # If m2 has nothing under here that we care about, return m1, even if
839 839 # it's 'all'. This is a change in behavior from visitdir, which would
840 840 # return True, not 'all', for some reason.
841 841 if not m2_set:
842 842 return m1_set
843 843 if m1_set in [b'all', b'this']:
844 844 # Never return 'all' here if m2_set is any kind of non-empty (either
845 845 # 'this' or set(foo)), since m2 might return set() for a
846 846 # subdirectory.
847 847 return b'this'
848 848 # Possible values for m1: set(...), set()
849 849 # Possible values for m2: 'this', set(...)
850 850 # We ignore m2's set results. They're possibly incorrect:
851 851 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
852 852 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
853 853 # return set(), which is *not* correct, we still need to visit 'dir'!
854 854 return m1_set
855 855
856 856 def isexact(self):
857 857 return self._m1.isexact()
858 858
859 859 @encoding.strmethod
860 860 def __repr__(self):
861 861 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
862 862
863 863
864 864 def intersectmatchers(m1, m2):
865 865 '''Composes two matchers by matching if both of them match.
866 866
867 867 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
868 868 '''
869 869 if m1 is None or m2 is None:
870 870 return m1 or m2
871 871 if m1.always():
872 872 m = copy.copy(m2)
873 873 # TODO: Consider encapsulating these things in a class so there's only
874 874 # one thing to copy from m1.
875 875 m.bad = m1.bad
876 876 m.traversedir = m1.traversedir
877 877 return m
878 878 if m2.always():
879 879 m = copy.copy(m1)
880 880 return m
881 881 return intersectionmatcher(m1, m2)
882 882
883 883
884 884 class intersectionmatcher(basematcher):
885 885 def __init__(self, m1, m2):
886 886 super(intersectionmatcher, self).__init__()
887 887 self._m1 = m1
888 888 self._m2 = m2
889 889 self.bad = m1.bad
890 890 self.traversedir = m1.traversedir
891 891
892 892 @propertycache
893 893 def _files(self):
894 894 if self.isexact():
895 895 m1, m2 = self._m1, self._m2
896 896 if not m1.isexact():
897 897 m1, m2 = m2, m1
898 898 return [f for f in m1.files() if m2(f)]
899 899 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
900 900 # the set of files, because their files() are not always files. For
901 901 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
902 902 # "path:dir2", we don't want to remove "dir2" from the set.
903 903 return self._m1.files() + self._m2.files()
904 904
905 905 def matchfn(self, f):
906 906 return self._m1(f) and self._m2(f)
907 907
908 908 def visitdir(self, dir):
909 909 visit1 = self._m1.visitdir(dir)
910 910 if visit1 == b'all':
911 911 return self._m2.visitdir(dir)
912 912 # bool() because visit1=True + visit2='all' should not be 'all'
913 913 return bool(visit1 and self._m2.visitdir(dir))
914 914
915 915 def visitchildrenset(self, dir):
916 916 m1_set = self._m1.visitchildrenset(dir)
917 917 if not m1_set:
918 918 return set()
919 919 m2_set = self._m2.visitchildrenset(dir)
920 920 if not m2_set:
921 921 return set()
922 922
923 923 if m1_set == b'all':
924 924 return m2_set
925 925 elif m2_set == b'all':
926 926 return m1_set
927 927
928 928 if m1_set == b'this' or m2_set == b'this':
929 929 return b'this'
930 930
931 931 assert isinstance(m1_set, set) and isinstance(m2_set, set)
932 932 return m1_set.intersection(m2_set)
933 933
934 934 def always(self):
935 935 return self._m1.always() and self._m2.always()
936 936
937 937 def isexact(self):
938 938 return self._m1.isexact() or self._m2.isexact()
939 939
940 940 @encoding.strmethod
941 941 def __repr__(self):
942 942 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
943 943
944 944
945 945 class subdirmatcher(basematcher):
946 946 """Adapt a matcher to work on a subdirectory only.
947 947
948 948 The paths are remapped to remove/insert the path as needed:
949 949
950 950 >>> from . import pycompat
951 951 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
952 952 >>> m2 = subdirmatcher(b'sub', m1)
953 953 >>> m2(b'a.txt')
954 954 False
955 955 >>> m2(b'b.txt')
956 956 True
957 957 >>> m2.matchfn(b'a.txt')
958 958 False
959 959 >>> m2.matchfn(b'b.txt')
960 960 True
961 961 >>> m2.files()
962 962 ['b.txt']
963 963 >>> m2.exact(b'b.txt')
964 964 True
965 965 >>> def bad(f, msg):
966 966 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
967 967 >>> m1.bad = bad
968 968 >>> m2.bad(b'x.txt', b'No such file')
969 969 sub/x.txt: No such file
970 970 """
971 971
972 972 def __init__(self, path, matcher):
973 973 super(subdirmatcher, self).__init__()
974 974 self._path = path
975 975 self._matcher = matcher
976 976 self._always = matcher.always()
977 977
978 978 self._files = [
979 979 f[len(path) + 1 :]
980 980 for f in matcher._files
981 981 if f.startswith(path + b"/")
982 982 ]
983 983
984 984 # If the parent repo had a path to this subrepo and the matcher is
985 985 # a prefix matcher, this submatcher always matches.
986 986 if matcher.prefix():
987 987 self._always = any(f == path for f in matcher._files)
988 988
989 989 def bad(self, f, msg):
990 990 self._matcher.bad(self._path + b"/" + f, msg)
991 991
992 992 def matchfn(self, f):
993 993 # Some information is lost in the superclass's constructor, so we
994 994 # can not accurately create the matching function for the subdirectory
995 995 # from the inputs. Instead, we override matchfn() and visitdir() to
996 996 # call the original matcher with the subdirectory path prepended.
997 997 return self._matcher.matchfn(self._path + b"/" + f)
998 998
999 999 def visitdir(self, dir):
1000 1000 if dir == b'':
1001 1001 dir = self._path
1002 1002 else:
1003 1003 dir = self._path + b"/" + dir
1004 1004 return self._matcher.visitdir(dir)
1005 1005
1006 1006 def visitchildrenset(self, dir):
1007 1007 if dir == b'':
1008 1008 dir = self._path
1009 1009 else:
1010 1010 dir = self._path + b"/" + dir
1011 1011 return self._matcher.visitchildrenset(dir)
1012 1012
1013 1013 def always(self):
1014 1014 return self._always
1015 1015
1016 1016 def prefix(self):
1017 1017 return self._matcher.prefix() and not self._always
1018 1018
1019 1019 @encoding.strmethod
1020 1020 def __repr__(self):
1021 1021 return b'<subdirmatcher path=%r, matcher=%r>' % (
1022 1022 self._path,
1023 1023 self._matcher,
1024 1024 )
1025 1025
1026 1026
1027 1027 class prefixdirmatcher(basematcher):
1028 1028 """Adapt a matcher to work on a parent directory.
1029 1029
1030 1030 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1031 1031
1032 1032 The prefix path should usually be the relative path from the root of
1033 1033 this matcher to the root of the wrapped matcher.
1034 1034
1035 1035 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1036 1036 >>> m2 = prefixdirmatcher(b'd/e', m1)
1037 1037 >>> m2(b'a.txt')
1038 1038 False
1039 1039 >>> m2(b'd/e/a.txt')
1040 1040 True
1041 1041 >>> m2(b'd/e/b.txt')
1042 1042 False
1043 1043 >>> m2.files()
1044 1044 ['d/e/a.txt', 'd/e/f/b.txt']
1045 1045 >>> m2.exact(b'd/e/a.txt')
1046 1046 True
1047 1047 >>> m2.visitdir(b'd')
1048 1048 True
1049 1049 >>> m2.visitdir(b'd/e')
1050 1050 True
1051 1051 >>> m2.visitdir(b'd/e/f')
1052 1052 True
1053 1053 >>> m2.visitdir(b'd/e/g')
1054 1054 False
1055 1055 >>> m2.visitdir(b'd/ef')
1056 1056 False
1057 1057 """
1058 1058
1059 1059 def __init__(self, path, matcher, badfn=None):
1060 1060 super(prefixdirmatcher, self).__init__(badfn)
1061 1061 if not path:
1062 1062 raise error.ProgrammingError(b'prefix path must not be empty')
1063 1063 self._path = path
1064 1064 self._pathprefix = path + b'/'
1065 1065 self._matcher = matcher
1066 1066
1067 1067 @propertycache
1068 1068 def _files(self):
1069 1069 return [self._pathprefix + f for f in self._matcher._files]
1070 1070
1071 1071 def matchfn(self, f):
1072 1072 if not f.startswith(self._pathprefix):
1073 1073 return False
1074 1074 return self._matcher.matchfn(f[len(self._pathprefix) :])
1075 1075
1076 1076 @propertycache
1077 1077 def _pathdirs(self):
1078 1078 return set(pathutil.finddirs(self._path))
1079 1079
1080 1080 def visitdir(self, dir):
1081 1081 if dir == self._path:
1082 1082 return self._matcher.visitdir(b'')
1083 1083 if dir.startswith(self._pathprefix):
1084 1084 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1085 1085 return dir in self._pathdirs
1086 1086
1087 1087 def visitchildrenset(self, dir):
1088 1088 if dir == self._path:
1089 1089 return self._matcher.visitchildrenset(b'')
1090 1090 if dir.startswith(self._pathprefix):
1091 1091 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1092 1092 if dir in self._pathdirs:
1093 1093 return b'this'
1094 1094 return set()
1095 1095
1096 1096 def isexact(self):
1097 1097 return self._matcher.isexact()
1098 1098
1099 1099 def prefix(self):
1100 1100 return self._matcher.prefix()
1101 1101
1102 1102 @encoding.strmethod
1103 1103 def __repr__(self):
1104 1104 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1105 1105 pycompat.bytestr(self._path),
1106 1106 self._matcher,
1107 1107 )
1108 1108
1109 1109
1110 1110 class unionmatcher(basematcher):
1111 1111 """A matcher that is the union of several matchers.
1112 1112
1113 1113 The non-matching-attributes (bad, traversedir) are taken from the first
1114 1114 matcher.
1115 1115 """
1116 1116
1117 1117 def __init__(self, matchers):
1118 1118 m1 = matchers[0]
1119 1119 super(unionmatcher, self).__init__()
1120 1120 self.traversedir = m1.traversedir
1121 1121 self._matchers = matchers
1122 1122
1123 1123 def matchfn(self, f):
1124 1124 for match in self._matchers:
1125 1125 if match(f):
1126 1126 return True
1127 1127 return False
1128 1128
1129 1129 def visitdir(self, dir):
1130 1130 r = False
1131 1131 for m in self._matchers:
1132 1132 v = m.visitdir(dir)
1133 1133 if v == b'all':
1134 1134 return v
1135 1135 r |= v
1136 1136 return r
1137 1137
1138 1138 def visitchildrenset(self, dir):
1139 1139 r = set()
1140 1140 this = False
1141 1141 for m in self._matchers:
1142 1142 v = m.visitchildrenset(dir)
1143 1143 if not v:
1144 1144 continue
1145 1145 if v == b'all':
1146 1146 return v
1147 1147 if this or v == b'this':
1148 1148 this = True
1149 1149 # don't break, we might have an 'all' in here.
1150 1150 continue
1151 1151 assert isinstance(v, set)
1152 1152 r = r.union(v)
1153 1153 if this:
1154 1154 return b'this'
1155 1155 return r
1156 1156
1157 1157 @encoding.strmethod
1158 1158 def __repr__(self):
1159 1159 return b'<unionmatcher matchers=%r>' % self._matchers
1160 1160
1161 1161
1162 1162 def patkind(pattern, default=None):
1163 1163 r'''If pattern is 'kind:pat' with a known kind, return kind.
1164 1164
1165 1165 >>> patkind(br're:.*\.c$')
1166 1166 're'
1167 1167 >>> patkind(b'glob:*.c')
1168 1168 'glob'
1169 1169 >>> patkind(b'relpath:test.py')
1170 1170 'relpath'
1171 1171 >>> patkind(b'main.py')
1172 1172 >>> patkind(b'main.py', default=b're')
1173 1173 're'
1174 1174 '''
1175 1175 return _patsplit(pattern, default)[0]
1176 1176
1177 1177
1178 1178 def _patsplit(pattern, default):
1179 1179 """Split a string into the optional pattern kind prefix and the actual
1180 1180 pattern."""
1181 1181 if b':' in pattern:
1182 1182 kind, pat = pattern.split(b':', 1)
1183 1183 if kind in allpatternkinds:
1184 1184 return kind, pat
1185 1185 return default, pattern
1186 1186
1187 1187
1188 1188 def _globre(pat):
1189 1189 r'''Convert an extended glob string to a regexp string.
1190 1190
1191 1191 >>> from . import pycompat
1192 1192 >>> def bprint(s):
1193 1193 ... print(pycompat.sysstr(s))
1194 1194 >>> bprint(_globre(br'?'))
1195 1195 .
1196 1196 >>> bprint(_globre(br'*'))
1197 1197 [^/]*
1198 1198 >>> bprint(_globre(br'**'))
1199 1199 .*
1200 1200 >>> bprint(_globre(br'**/a'))
1201 1201 (?:.*/)?a
1202 1202 >>> bprint(_globre(br'a/**/b'))
1203 1203 a/(?:.*/)?b
1204 1204 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1205 1205 [a*?!^][\^b][^c]
1206 1206 >>> bprint(_globre(br'{a,b}'))
1207 1207 (?:a|b)
1208 1208 >>> bprint(_globre(br'.\*\?'))
1209 1209 \.\*\?
1210 1210 '''
1211 1211 i, n = 0, len(pat)
1212 1212 res = b''
1213 1213 group = 0
1214 1214 escape = util.stringutil.regexbytesescapemap.get
1215 1215
1216 1216 def peek():
1217 1217 return i < n and pat[i : i + 1]
1218 1218
1219 1219 while i < n:
1220 1220 c = pat[i : i + 1]
1221 1221 i += 1
1222 1222 if c not in b'*?[{},\\':
1223 1223 res += escape(c, c)
1224 1224 elif c == b'*':
1225 1225 if peek() == b'*':
1226 1226 i += 1
1227 1227 if peek() == b'/':
1228 1228 i += 1
1229 1229 res += b'(?:.*/)?'
1230 1230 else:
1231 1231 res += b'.*'
1232 1232 else:
1233 1233 res += b'[^/]*'
1234 1234 elif c == b'?':
1235 1235 res += b'.'
1236 1236 elif c == b'[':
1237 1237 j = i
1238 1238 if j < n and pat[j : j + 1] in b'!]':
1239 1239 j += 1
1240 1240 while j < n and pat[j : j + 1] != b']':
1241 1241 j += 1
1242 1242 if j >= n:
1243 1243 res += b'\\['
1244 1244 else:
1245 1245 stuff = pat[i:j].replace(b'\\', b'\\\\')
1246 1246 i = j + 1
1247 1247 if stuff[0:1] == b'!':
1248 1248 stuff = b'^' + stuff[1:]
1249 1249 elif stuff[0:1] == b'^':
1250 1250 stuff = b'\\' + stuff
1251 1251 res = b'%s[%s]' % (res, stuff)
1252 1252 elif c == b'{':
1253 1253 group += 1
1254 1254 res += b'(?:'
1255 1255 elif c == b'}' and group:
1256 1256 res += b')'
1257 1257 group -= 1
1258 1258 elif c == b',' and group:
1259 1259 res += b'|'
1260 1260 elif c == b'\\':
1261 1261 p = peek()
1262 1262 if p:
1263 1263 i += 1
1264 1264 res += escape(p, p)
1265 1265 else:
1266 1266 res += escape(c, c)
1267 1267 else:
1268 1268 res += escape(c, c)
1269 1269 return res
1270 1270
1271 1271
1272 1272 def _regex(kind, pat, globsuffix):
1273 1273 '''Convert a (normalized) pattern of any kind into a
1274 1274 regular expression.
1275 1275 globsuffix is appended to the regexp of globs.'''
1276
1277 if rustmod is not None:
1278 try:
1279 return rustmod.build_single_regex(kind, pat, globsuffix)
1280 except rustmod.PatternError:
1281 raise error.ProgrammingError(
1282 b'not a regex pattern: %s:%s' % (kind, pat)
1283 )
1284
1285 1276 if not pat and kind in (b'glob', b'relpath'):
1286 1277 return b''
1287 1278 if kind == b're':
1288 1279 return pat
1289 1280 if kind in (b'path', b'relpath'):
1290 1281 if pat == b'.':
1291 1282 return b''
1292 1283 return util.stringutil.reescape(pat) + b'(?:/|$)'
1293 1284 if kind == b'rootfilesin':
1294 1285 if pat == b'.':
1295 1286 escaped = b''
1296 1287 else:
1297 1288 # Pattern is a directory name.
1298 1289 escaped = util.stringutil.reescape(pat) + b'/'
1299 1290 # Anything after the pattern must be a non-directory.
1300 1291 return escaped + b'[^/]+$'
1301 1292 if kind == b'relglob':
1302 1293 globre = _globre(pat)
1303 1294 if globre.startswith(b'[^/]*'):
1304 1295 # When pat has the form *XYZ (common), make the returned regex more
1305 1296 # legible by returning the regex for **XYZ instead of **/*XYZ.
1306 1297 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1307 1298 return b'(?:|.*/)' + globre + globsuffix
1308 1299 if kind == b'relre':
1309 1300 if pat.startswith(b'^'):
1310 1301 return pat
1311 1302 return b'.*' + pat
1312 1303 if kind in (b'glob', b'rootglob'):
1313 1304 return _globre(pat) + globsuffix
1314 1305 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1315 1306
1316 1307
1317 1308 def _buildmatch(kindpats, globsuffix, root):
1318 1309 '''Return regexp string and a matcher function for kindpats.
1319 1310 globsuffix is appended to the regexp of globs.'''
1320 1311 matchfuncs = []
1321 1312
1322 1313 subincludes, kindpats = _expandsubinclude(kindpats, root)
1323 1314 if subincludes:
1324 1315 submatchers = {}
1325 1316
1326 1317 def matchsubinclude(f):
1327 1318 for prefix, matcherargs in subincludes:
1328 1319 if f.startswith(prefix):
1329 1320 mf = submatchers.get(prefix)
1330 1321 if mf is None:
1331 1322 mf = match(*matcherargs)
1332 1323 submatchers[prefix] = mf
1333 1324
1334 1325 if mf(f[len(prefix) :]):
1335 1326 return True
1336 1327 return False
1337 1328
1338 1329 matchfuncs.append(matchsubinclude)
1339 1330
1340 1331 regex = b''
1341 1332 if kindpats:
1342 1333 if all(k == b'rootfilesin' for k, p, s in kindpats):
1343 1334 dirs = {p for k, p, s in kindpats}
1344 1335
1345 1336 def mf(f):
1346 1337 i = f.rfind(b'/')
1347 1338 if i >= 0:
1348 1339 dir = f[:i]
1349 1340 else:
1350 1341 dir = b'.'
1351 1342 return dir in dirs
1352 1343
1353 1344 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1354 1345 matchfuncs.append(mf)
1355 1346 else:
1356 1347 regex, mf = _buildregexmatch(kindpats, globsuffix)
1357 1348 matchfuncs.append(mf)
1358 1349
1359 1350 if len(matchfuncs) == 1:
1360 1351 return regex, matchfuncs[0]
1361 1352 else:
1362 1353 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1363 1354
1364 1355
1365 1356 MAX_RE_SIZE = 20000
1366 1357
1367 1358
1368 1359 def _joinregexes(regexps):
1369 1360 """gather multiple regular expressions into a single one"""
1370 1361 return b'|'.join(regexps)
1371 1362
1372 1363
1373 1364 def _buildregexmatch(kindpats, globsuffix):
1374 1365 """Build a match function from a list of kinds and kindpats,
1375 1366 return regexp string and a matcher function.
1376 1367
1377 1368 Test too large input
1378 1369 >>> _buildregexmatch([
1379 1370 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1380 1371 ... ], b'$')
1381 1372 Traceback (most recent call last):
1382 1373 ...
1383 1374 Abort: matcher pattern is too long (20009 bytes)
1384 1375 """
1385 1376 try:
1386 1377 allgroups = []
1387 1378 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1388 1379 fullregexp = _joinregexes(regexps)
1389 1380
1390 1381 startidx = 0
1391 1382 groupsize = 0
1392 1383 for idx, r in enumerate(regexps):
1393 1384 piecesize = len(r)
1394 1385 if piecesize > MAX_RE_SIZE:
1395 1386 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1396 1387 raise error.Abort(msg)
1397 1388 elif (groupsize + piecesize) > MAX_RE_SIZE:
1398 1389 group = regexps[startidx:idx]
1399 1390 allgroups.append(_joinregexes(group))
1400 1391 startidx = idx
1401 1392 groupsize = 0
1402 1393 groupsize += piecesize + 1
1403 1394
1404 1395 if startidx == 0:
1405 1396 matcher = _rematcher(fullregexp)
1406 1397 func = lambda s: bool(matcher(s))
1407 1398 else:
1408 1399 group = regexps[startidx:]
1409 1400 allgroups.append(_joinregexes(group))
1410 1401 allmatchers = [_rematcher(g) for g in allgroups]
1411 1402 func = lambda s: any(m(s) for m in allmatchers)
1412 1403 return fullregexp, func
1413 1404 except re.error:
1414 1405 for k, p, s in kindpats:
1415 1406 try:
1416 1407 _rematcher(_regex(k, p, globsuffix))
1417 1408 except re.error:
1418 1409 if s:
1419 1410 raise error.Abort(
1420 1411 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1421 1412 )
1422 1413 else:
1423 1414 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1424 1415 raise error.Abort(_(b"invalid pattern"))
1425 1416
1426 1417
1427 1418 def _patternrootsanddirs(kindpats):
1428 1419 '''Returns roots and directories corresponding to each pattern.
1429 1420
1430 1421 This calculates the roots and directories exactly matching the patterns and
1431 1422 returns a tuple of (roots, dirs) for each. It does not return other
1432 1423 directories which may also need to be considered, like the parent
1433 1424 directories.
1434 1425 '''
1435 1426 r = []
1436 1427 d = []
1437 1428 for kind, pat, source in kindpats:
1438 1429 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1439 1430 root = []
1440 1431 for p in pat.split(b'/'):
1441 1432 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1442 1433 break
1443 1434 root.append(p)
1444 1435 r.append(b'/'.join(root))
1445 1436 elif kind in (b'relpath', b'path'):
1446 1437 if pat == b'.':
1447 1438 pat = b''
1448 1439 r.append(pat)
1449 1440 elif kind in (b'rootfilesin',):
1450 1441 if pat == b'.':
1451 1442 pat = b''
1452 1443 d.append(pat)
1453 1444 else: # relglob, re, relre
1454 1445 r.append(b'')
1455 1446 return r, d
1456 1447
1457 1448
1458 1449 def _roots(kindpats):
1459 1450 '''Returns root directories to match recursively from the given patterns.'''
1460 1451 roots, dirs = _patternrootsanddirs(kindpats)
1461 1452 return roots
1462 1453
1463 1454
1464 1455 def _rootsdirsandparents(kindpats):
1465 1456 '''Returns roots and exact directories from patterns.
1466 1457
1467 1458 `roots` are directories to match recursively, `dirs` should
1468 1459 be matched non-recursively, and `parents` are the implicitly required
1469 1460 directories to walk to items in either roots or dirs.
1470 1461
1471 1462 Returns a tuple of (roots, dirs, parents).
1472 1463
1473 1464 >>> r = _rootsdirsandparents(
1474 1465 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1475 1466 ... (b'glob', b'g*', b'')])
1476 1467 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1477 1468 (['g/h', 'g/h', ''], []) ['', 'g']
1478 1469 >>> r = _rootsdirsandparents(
1479 1470 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1480 1471 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1481 1472 ([], ['g/h', '']) ['', 'g']
1482 1473 >>> r = _rootsdirsandparents(
1483 1474 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1484 1475 ... (b'path', b'', b'')])
1485 1476 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1486 1477 (['r', 'p/p', ''], []) ['', 'p']
1487 1478 >>> r = _rootsdirsandparents(
1488 1479 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1489 1480 ... (b'relre', b'rr', b'')])
1490 1481 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1491 1482 (['', '', ''], []) ['']
1492 1483 '''
1493 1484 r, d = _patternrootsanddirs(kindpats)
1494 1485
1495 1486 p = set()
1496 1487 # Add the parents as non-recursive/exact directories, since they must be
1497 1488 # scanned to get to either the roots or the other exact directories.
1498 1489 p.update(pathutil.dirs(d))
1499 1490 p.update(pathutil.dirs(r))
1500 1491
1501 1492 # FIXME: all uses of this function convert these to sets, do so before
1502 1493 # returning.
1503 1494 # FIXME: all uses of this function do not need anything in 'roots' and
1504 1495 # 'dirs' to also be in 'parents', consider removing them before returning.
1505 1496 return r, d, p
1506 1497
1507 1498
1508 1499 def _explicitfiles(kindpats):
1509 1500 '''Returns the potential explicit filenames from the patterns.
1510 1501
1511 1502 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1512 1503 ['foo/bar']
1513 1504 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1514 1505 []
1515 1506 '''
1516 1507 # Keep only the pattern kinds where one can specify filenames (vs only
1517 1508 # directory names).
1518 1509 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1519 1510 return _roots(filable)
1520 1511
1521 1512
1522 1513 def _prefix(kindpats):
1523 1514 '''Whether all the patterns match a prefix (i.e. recursively)'''
1524 1515 for kind, pat, source in kindpats:
1525 1516 if kind not in (b'path', b'relpath'):
1526 1517 return False
1527 1518 return True
1528 1519
1529 1520
1530 1521 _commentre = None
1531 1522
1532 1523
1533 1524 def readpatternfile(filepath, warn, sourceinfo=False):
1534 1525 '''parse a pattern file, returning a list of
1535 1526 patterns. These patterns should be given to compile()
1536 1527 to be validated and converted into a match function.
1537 1528
1538 1529 trailing white space is dropped.
1539 1530 the escape character is backslash.
1540 1531 comments start with #.
1541 1532 empty lines are skipped.
1542 1533
1543 1534 lines can be of the following formats:
1544 1535
1545 1536 syntax: regexp # defaults following lines to non-rooted regexps
1546 1537 syntax: glob # defaults following lines to non-rooted globs
1547 1538 re:pattern # non-rooted regular expression
1548 1539 glob:pattern # non-rooted glob
1549 1540 rootglob:pat # rooted glob (same root as ^ in regexps)
1550 1541 pattern # pattern of the current default type
1551 1542
1552 1543 if sourceinfo is set, returns a list of tuples:
1553 1544 (pattern, lineno, originalline).
1554 1545 This is useful to debug ignore patterns.
1555 1546 '''
1556 1547
1557 if rustmod is not None:
1558 result, warnings = rustmod.read_pattern_file(
1559 filepath, bool(warn), sourceinfo,
1560 )
1561
1562 for warning_params in warnings:
1563 # Can't be easily emitted from Rust, because it would require
1564 # a mechanism for both gettext and calling the `warn` function.
1565 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1566
1567 return result
1568
1569 1548 syntaxes = {
1570 1549 b're': b'relre:',
1571 1550 b'regexp': b'relre:',
1572 1551 b'glob': b'relglob:',
1573 1552 b'rootglob': b'rootglob:',
1574 1553 b'include': b'include',
1575 1554 b'subinclude': b'subinclude',
1576 1555 }
1577 1556 syntax = b'relre:'
1578 1557 patterns = []
1579 1558
1580 1559 fp = open(filepath, b'rb')
1581 1560 for lineno, line in enumerate(util.iterfile(fp), start=1):
1582 1561 if b"#" in line:
1583 1562 global _commentre
1584 1563 if not _commentre:
1585 1564 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1586 1565 # remove comments prefixed by an even number of escapes
1587 1566 m = _commentre.search(line)
1588 1567 if m:
1589 1568 line = line[: m.end(1)]
1590 1569 # fixup properly escaped comments that survived the above
1591 1570 line = line.replace(b"\\#", b"#")
1592 1571 line = line.rstrip()
1593 1572 if not line:
1594 1573 continue
1595 1574
1596 1575 if line.startswith(b'syntax:'):
1597 1576 s = line[7:].strip()
1598 1577 try:
1599 1578 syntax = syntaxes[s]
1600 1579 except KeyError:
1601 1580 if warn:
1602 1581 warn(
1603 1582 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1604 1583 )
1605 1584 continue
1606 1585
1607 1586 linesyntax = syntax
1608 1587 for s, rels in pycompat.iteritems(syntaxes):
1609 1588 if line.startswith(rels):
1610 1589 linesyntax = rels
1611 1590 line = line[len(rels) :]
1612 1591 break
1613 1592 elif line.startswith(s + b':'):
1614 1593 linesyntax = rels
1615 1594 line = line[len(s) + 1 :]
1616 1595 break
1617 1596 if sourceinfo:
1618 1597 patterns.append((linesyntax + line, lineno, line))
1619 1598 else:
1620 1599 patterns.append(linesyntax + line)
1621 1600 fp.close()
1622 1601 return patterns
@@ -1,72 +1,44 b''
1 1 // ancestors.rs
2 2 //
3 3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for Rust errors
9 9 //!
10 10 //! [`GraphError`] exposes `hg::GraphError` as a subclass of `ValueError`
11 11 //! but some variants of `hg::GraphError` can be converted directly to other
12 12 //! existing Python exceptions if appropriate.
13 13 //!
14 14 //! [`GraphError`]: struct.GraphError.html
15 15 use cpython::{
16 exc::{IOError, RuntimeError, ValueError},
16 exc::{RuntimeError, ValueError},
17 17 py_exception, PyErr, Python,
18 18 };
19 19 use hg;
20 20
21 21 py_exception!(rustext, GraphError, ValueError);
22 22
23 23 impl GraphError {
24 24 pub fn pynew(py: Python, inner: hg::GraphError) -> PyErr {
25 25 match inner {
26 26 hg::GraphError::ParentOutOfRange(r) => {
27 27 GraphError::new(py, ("ParentOutOfRange", r))
28 28 }
29 29 hg::GraphError::WorkingDirectoryUnsupported => {
30 30 match py
31 31 .import("mercurial.error")
32 32 .and_then(|m| m.get(py, "WdirUnsupported"))
33 33 {
34 34 Err(e) => e,
35 35 Ok(cls) => PyErr::from_instance(py, cls),
36 36 }
37 37 }
38 38 }
39 39 }
40 40 }
41 41
42 py_exception!(rustext, PatternError, RuntimeError);
43 py_exception!(rustext, PatternFileError, RuntimeError);
44 42 py_exception!(rustext, HgPathPyError, RuntimeError);
45 43
46 impl PatternError {
47 pub fn pynew(py: Python, inner: hg::PatternError) -> PyErr {
48 match inner {
49 hg::PatternError::UnsupportedSyntax(m) => {
50 PatternError::new(py, ("PatternError", m))
51 }
52 }
53 }
54 }
55
56 impl PatternFileError {
57 pub fn pynew(py: Python, inner: hg::PatternFileError) -> PyErr {
58 match inner {
59 hg::PatternFileError::IO(e) => {
60 let value = (e.raw_os_error().unwrap_or(2), e.to_string());
61 PyErr::new::<IOError, _>(py, value)
62 }
63 hg::PatternFileError::Pattern(e, l) => match e {
64 hg::PatternError::UnsupportedSyntax(m) => {
65 PatternFileError::new(py, ("PatternFileError", m, l))
66 }
67 },
68 }
69 }
70 }
71
72 44 py_exception!(shared_ref, AlreadyBorrowed, RuntimeError);
@@ -1,83 +1,67 b''
1 1 // lib.rs
2 2 //
3 3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Python bindings of `hg-core` objects using the `cpython` crate.
9 9 //! Once compiled, the resulting single shared library object can be placed in
10 10 //! the `mercurial` package directly as `rustext.so` or `rustext.dll`.
11 11 //! It holds several modules, so that from the point of view of Python,
12 12 //! it behaves as the `cext` package.
13 13 //!
14 14 //! Example:
15 15 //!
16 16 //! ```text
17 17 //! >>> from mercurial.rustext import ancestor
18 18 //! >>> ancestor.__doc__
19 19 //! 'Generic DAG ancestor algorithms - Rust implementation'
20 20 //! ```
21 21
22 22 /// This crate uses nested private macros, `extern crate` is still needed in
23 23 /// 2018 edition.
24 24 #[macro_use]
25 25 extern crate cpython;
26 26
27 27 pub mod ancestors;
28 28 mod cindex;
29 29 mod conversion;
30 30 #[macro_use]
31 31 pub mod ref_sharing;
32 32 pub mod dagops;
33 33 pub mod dirstate;
34 34 pub mod discovery;
35 35 pub mod exceptions;
36 pub mod filepatterns;
37 36 pub mod parsers;
38 37 pub mod revlog;
39 38 pub mod utils;
40 39
41 40 py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| {
42 41 m.add(
43 42 py,
44 43 "__doc__",
45 44 "Mercurial core concepts - Rust implementation",
46 45 )?;
47 46
48 47 let dotted_name: String = m.get(py, "__name__")?.extract(py)?;
49 48 m.add(py, "ancestor", ancestors::init_module(py, &dotted_name)?)?;
50 49 m.add(py, "dagop", dagops::init_module(py, &dotted_name)?)?;
51 50 m.add(py, "discovery", discovery::init_module(py, &dotted_name)?)?;
52 51 m.add(py, "dirstate", dirstate::init_module(py, &dotted_name)?)?;
53 52 m.add(py, "revlog", revlog::init_module(py, &dotted_name)?)?;
54 53 m.add(
55 54 py,
56 "filepatterns",
57 filepatterns::init_module(py, &dotted_name)?,
58 )?;
59 m.add(
60 py,
61 55 "parsers",
62 56 parsers::init_parsers_module(py, &dotted_name)?,
63 57 )?;
64 58 m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
65 m.add(
66 py,
67 "PatternFileError",
68 py.get_type::<exceptions::PatternFileError>(),
69 )?;
70 m.add(
71 py,
72 "PatternError",
73 py.get_type::<exceptions::PatternError>(),
74 )?;
75 59 Ok(())
76 60 });
77 61
78 62 #[cfg(not(any(feature = "python27-bin", feature = "python3-bin")))]
79 63 #[test]
80 64 #[ignore]
81 65 fn libpython_must_be_linked_to_run_tests() {
82 66 // stub function to tell that some tests wouldn't run
83 67 }
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now