##// END OF EJS Templates
rust: create wrapper struct to reduce `regex` contention issues...
Raphaël Gomès -
r50476:04f1dba5 6.3 stable
parent child Browse files
Show More
@@ -479,6 +479,7 b' dependencies = ['
479 "same-file",
479 "same-file",
480 "sha-1 0.10.0",
480 "sha-1 0.10.0",
481 "tempfile",
481 "tempfile",
482 "thread_local",
482 "twox-hash",
483 "twox-hash",
483 "zstd",
484 "zstd",
484 ]
485 ]
@@ -1120,6 +1121,15 b' dependencies = ['
1120 ]
1121 ]
1121
1122
1122 [[package]]
1123 [[package]]
1124 name = "thread_local"
1125 version = "1.1.4"
1126 source = "registry+https://github.com/rust-lang/crates.io-index"
1127 checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
1128 dependencies = [
1129 "once_cell",
1130 ]
1131
1132 [[package]]
1123 name = "time"
1133 name = "time"
1124 version = "0.1.44"
1134 version = "0.1.44"
1125 source = "registry+https://github.com/rust-lang/crates.io-index"
1135 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -29,13 +29,14 b' sha-1 = "0.10.0"'
29 twox-hash = "1.6.2"
29 twox-hash = "1.6.2"
30 same-file = "1.0.6"
30 same-file = "1.0.6"
31 tempfile = "3.1.0"
31 tempfile = "3.1.0"
32 thread_local = "1.1.4"
32 crossbeam-channel = "0.5.0"
33 crossbeam-channel = "0.5.0"
33 micro-timer = "0.4.0"
34 micro-timer = "0.4.0"
34 log = "0.4.8"
35 log = "0.4.8"
35 memmap2 = { version = "0.5.3", features = ["stable_deref_trait"] }
36 memmap2 = { version = "0.5.3", features = ["stable_deref_trait"] }
36 zstd = "0.5.3"
37 zstd = "0.5.3"
37 format-bytes = "0.3.0"
38 format-bytes = "0.3.0"
38 # once_cell 1.15 uses edition 2021, while the heptapod CI
39 # once_cell 1.15 uses edition 2021, while the heptapod CI
39 # uses an old version of Cargo that doesn't support it.
40 # uses an old version of Cargo that doesn't support it.
40 once_cell = "=1.14.0"
41 once_cell = "=1.14.0"
41
42
@@ -573,6 +573,39 b' impl DifferenceMatcher {'
573 }
573 }
574 }
574 }
575
575
576 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
577 /// contexts.
578 ///
579 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
580 /// from many threads at once is prone to contention, probably within the
581 /// scratch space needed as the regex DFA is built lazily.
582 ///
583 /// We are in the process of raising the issue upstream, but for now
584 /// the workaround used here is to store the `Regex` in a lazily populated
585 /// thread-local variable, sharing the initial read-only compilation, but
586 /// not the lazy dfa scratch space mentioned above.
587 ///
588 /// This reduces the contention observed with 16+ threads, but does not
589 /// completely remove it. Hopefully this can be addressed upstream.
590 struct RegexMatcher {
591 /// Compiled at the start of the status algorithm, used as a base for
592 /// cloning in each thread-local `self.local`, thus sharing the expensive
593 /// first compilation.
594 base: regex::bytes::Regex,
595 /// Thread-local variable that holds the `Regex` that is actually queried
596 /// from each thread.
597 local: thread_local::ThreadLocal<regex::bytes::Regex>,
598 }
599
600 impl RegexMatcher {
601 /// Returns whether the path matches the stored `Regex`.
602 pub fn is_match(&self, path: &HgPath) -> bool {
603 self.local
604 .get_or(|| self.base.clone())
605 .is_match(path.as_bytes())
606 }
607 }
608
576 /// Returns a function that matches an `HgPath` against the given regex
609 /// Returns a function that matches an `HgPath` against the given regex
577 /// pattern.
610 /// pattern.
578 ///
611 ///
@@ -580,9 +613,7 b' impl DifferenceMatcher {'
580 /// underlying engine (the `regex` crate), for instance anything with
613 /// underlying engine (the `regex` crate), for instance anything with
581 /// back-references.
614 /// back-references.
582 #[timed]
615 #[timed]
583 fn re_matcher(
616 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
584 pattern: &[u8],
585 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
586 use std::io::Write;
617 use std::io::Write;
587
618
588 // The `regex` crate adds `.*` to the start and end of expressions if there
619 // The `regex` crate adds `.*` to the start and end of expressions if there
@@ -611,7 +642,10 b' fn re_matcher('
611 .build()
642 .build()
612 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
643 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
613
644
614 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
645 Ok(RegexMatcher {
646 base: re,
647 local: Default::default(),
648 })
615 }
649 }
616
650
617 /// Returns the regex pattern and a function that matches an `HgPath` against
651 /// Returns the regex pattern and a function that matches an `HgPath` against
@@ -638,7 +672,7 b" fn build_regex_match<'a, 'b>("
638 let func = if !(regexps.is_empty()) {
672 let func = if !(regexps.is_empty()) {
639 let matcher = re_matcher(&full_regex)?;
673 let matcher = re_matcher(&full_regex)?;
640 let func = move |filename: &HgPath| {
674 let func = move |filename: &HgPath| {
641 exact_set.contains(filename) || matcher(filename)
675 exact_set.contains(filename) || matcher.is_match(filename)
642 };
676 };
643 Box::new(func) as IgnoreFnType
677 Box::new(func) as IgnoreFnType
644 } else {
678 } else {
General Comments 0
You need to be logged in to leave comments. Login now