##// END OF EJS Templates
rust: create wrapper struct to reduce `regex` contention issues...
Raphaël Gomès -
r50861:04f1dba5 6.3 stable
parent child Browse files
Show More
@@ -479,6 +479,7 b' dependencies = ['
479 479 "same-file",
480 480 "sha-1 0.10.0",
481 481 "tempfile",
482 "thread_local",
482 483 "twox-hash",
483 484 "zstd",
484 485 ]
@@ -1120,6 +1121,15 b' dependencies = ['
1120 1121 ]
1121 1122
1122 1123 [[package]]
1124 name = "thread_local"
1125 version = "1.1.4"
1126 source = "registry+https://github.com/rust-lang/crates.io-index"
1127 checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
1128 dependencies = [
1129 "once_cell",
1130 ]
1131
1132 [[package]]
1123 1133 name = "time"
1124 1134 version = "0.1.44"
1125 1135 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -29,6 +29,7 b' sha-1 = "0.10.0"'
29 29 twox-hash = "1.6.2"
30 30 same-file = "1.0.6"
31 31 tempfile = "3.1.0"
32 thread_local = "1.1.4"
32 33 crossbeam-channel = "0.5.0"
33 34 micro-timer = "0.4.0"
34 35 log = "0.4.8"
@@ -573,6 +573,39 b' impl DifferenceMatcher {'
573 573 }
574 574 }
575 575
576 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
577 /// contexts.
578 ///
579 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
580 /// from many threads at once is prone to contention, probably within the
581 /// scratch space needed as the regex DFA is built lazily.
582 ///
583 /// We are in the process of raising the issue upstream, but for now
584 /// the workaround used here is to store the `Regex` in a lazily populated
585 /// thread-local variable, sharing the initial read-only compilation, but
586 /// not the lazy dfa scratch space mentioned above.
587 ///
588 /// This reduces the contention observed with 16+ threads, but does not
589 /// completely remove it. Hopefully this can be addressed upstream.
590 struct RegexMatcher {
591 /// Compiled at the start of the status algorithm, used as a base for
592 /// cloning in each thread-local `self.local`, thus sharing the expensive
593 /// first compilation.
594 base: regex::bytes::Regex,
595 /// Thread-local variable that holds the `Regex` that is actually queried
596 /// from each thread.
597 local: thread_local::ThreadLocal<regex::bytes::Regex>,
598 }
599
600 impl RegexMatcher {
601 /// Returns whether the path matches the stored `Regex`.
602 pub fn is_match(&self, path: &HgPath) -> bool {
603 self.local
604 .get_or(|| self.base.clone())
605 .is_match(path.as_bytes())
606 }
607 }
608
576 609 /// Returns a function that matches an `HgPath` against the given regex
577 610 /// pattern.
578 611 ///
@@ -580,9 +613,7 b' impl DifferenceMatcher {'
580 613 /// underlying engine (the `regex` crate), for instance anything with
581 614 /// back-references.
582 615 #[timed]
583 fn re_matcher(
584 pattern: &[u8],
585 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
616 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
586 617 use std::io::Write;
587 618
588 619 // The `regex` crate adds `.*` to the start and end of expressions if there
@@ -611,7 +642,10 b' fn re_matcher('
611 642 .build()
612 643 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
613 644
614 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
645 Ok(RegexMatcher {
646 base: re,
647 local: Default::default(),
648 })
615 649 }
616 650
617 651 /// Returns the regex pattern and a function that matches an `HgPath` against
@@ -638,7 +672,7 b" fn build_regex_match<'a, 'b>("
638 672 let func = if !(regexps.is_empty()) {
639 673 let matcher = re_matcher(&full_regex)?;
640 674 let func = move |filename: &HgPath| {
641 exact_set.contains(filename) || matcher(filename)
675 exact_set.contains(filename) || matcher.is_match(filename)
642 676 };
643 677 Box::new(func) as IgnoreFnType
644 678 } else {
General Comments 0
You need to be logged in to leave comments. Login now