Show More
@@ -479,6 +479,7 b' dependencies = [' | |||||
479 | "same-file", |
|
479 | "same-file", | |
480 | "sha-1 0.10.0", |
|
480 | "sha-1 0.10.0", | |
481 | "tempfile", |
|
481 | "tempfile", | |
|
482 | "thread_local", | |||
482 | "twox-hash", |
|
483 | "twox-hash", | |
483 | "zstd", |
|
484 | "zstd", | |
484 | ] |
|
485 | ] | |
@@ -1120,6 +1121,15 b' dependencies = [' | |||||
1120 | ] |
|
1121 | ] | |
1121 |
|
1122 | |||
1122 | [[package]] |
|
1123 | [[package]] | |
|
1124 | name = "thread_local" | |||
|
1125 | version = "1.1.4" | |||
|
1126 | source = "registry+https://github.com/rust-lang/crates.io-index" | |||
|
1127 | checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" | |||
|
1128 | dependencies = [ | |||
|
1129 | "once_cell", | |||
|
1130 | ] | |||
|
1131 | ||||
|
1132 | [[package]] | |||
1123 | name = "time" |
|
1133 | name = "time" | |
1124 | version = "0.1.44" |
|
1134 | version = "0.1.44" | |
1125 | source = "registry+https://github.com/rust-lang/crates.io-index" |
|
1135 | source = "registry+https://github.com/rust-lang/crates.io-index" |
@@ -29,6 +29,7 b' sha-1 = "0.10.0"' | |||||
29 | twox-hash = "1.6.2" |
|
29 | twox-hash = "1.6.2" | |
30 | same-file = "1.0.6" |
|
30 | same-file = "1.0.6" | |
31 | tempfile = "3.1.0" |
|
31 | tempfile = "3.1.0" | |
|
32 | thread_local = "1.1.4" | |||
32 | crossbeam-channel = "0.5.0" |
|
33 | crossbeam-channel = "0.5.0" | |
33 | micro-timer = "0.4.0" |
|
34 | micro-timer = "0.4.0" | |
34 | log = "0.4.8" |
|
35 | log = "0.4.8" |
@@ -573,6 +573,39 b' impl DifferenceMatcher {' | |||||
573 | } |
|
573 | } | |
574 | } |
|
574 | } | |
575 |
|
575 | |||
|
576 | /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded | |||
|
577 | /// contexts. | |||
|
578 | /// | |||
|
579 | /// The `status` algorithm makes heavy use of threads, and calling `is_match` | |||
|
580 | /// from many threads at once is prone to contention, probably within the | |||
|
581 | /// scratch space needed as the regex DFA is built lazily. | |||
|
582 | /// | |||
|
583 | /// We are in the process of raising the issue upstream, but for now | |||
|
584 | /// the workaround used here is to store the `Regex` in a lazily populated | |||
|
585 | /// thread-local variable, sharing the initial read-only compilation, but | |||
|
586 | /// not the lazy dfa scratch space mentioned above. | |||
|
587 | /// | |||
|
588 | /// This reduces the contention observed with 16+ threads, but does not | |||
|
589 | /// completely remove it. Hopefully this can be addressed upstream. | |||
|
590 | struct RegexMatcher { | |||
|
591 | /// Compiled at the start of the status algorithm, used as a base for | |||
|
592 | /// cloning in each thread-local `self.local`, thus sharing the expensive | |||
|
593 | /// first compilation. | |||
|
594 | base: regex::bytes::Regex, | |||
|
595 | /// Thread-local variable that holds the `Regex` that is actually queried | |||
|
596 | /// from each thread. | |||
|
597 | local: thread_local::ThreadLocal<regex::bytes::Regex>, | |||
|
598 | } | |||
|
599 | ||||
|
600 | impl RegexMatcher { | |||
|
601 | /// Returns whether the path matches the stored `Regex`. | |||
|
602 | pub fn is_match(&self, path: &HgPath) -> bool { | |||
|
603 | self.local | |||
|
604 | .get_or(|| self.base.clone()) | |||
|
605 | .is_match(path.as_bytes()) | |||
|
606 | } | |||
|
607 | } | |||
|
608 | ||||
576 | /// Returns a function that matches an `HgPath` against the given regex |
|
609 | /// Returns a function that matches an `HgPath` against the given regex | |
577 | /// pattern. |
|
610 | /// pattern. | |
578 | /// |
|
611 | /// | |
@@ -580,9 +613,7 b' impl DifferenceMatcher {' | |||||
580 | /// underlying engine (the `regex` crate), for instance anything with |
|
613 | /// underlying engine (the `regex` crate), for instance anything with | |
581 | /// back-references. |
|
614 | /// back-references. | |
582 | #[timed] |
|
615 | #[timed] | |
583 | fn re_matcher( |
|
616 | fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> { | |
584 | pattern: &[u8], |
|
|||
585 | ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> { |
|
|||
586 | use std::io::Write; |
|
617 | use std::io::Write; | |
587 |
|
618 | |||
588 | // The `regex` crate adds `.*` to the start and end of expressions if there |
|
619 | // The `regex` crate adds `.*` to the start and end of expressions if there | |
@@ -611,7 +642,10 b' fn re_matcher(' | |||||
611 | .build() |
|
642 | .build() | |
612 | .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; |
|
643 | .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; | |
613 |
|
644 | |||
614 | Ok(move |path: &HgPath| re.is_match(path.as_bytes())) |
|
645 | Ok(RegexMatcher { | |
|
646 | base: re, | |||
|
647 | local: Default::default(), | |||
|
648 | }) | |||
615 | } |
|
649 | } | |
616 |
|
650 | |||
617 | /// Returns the regex pattern and a function that matches an `HgPath` against |
|
651 | /// Returns the regex pattern and a function that matches an `HgPath` against | |
@@ -638,7 +672,7 b" fn build_regex_match<'a, 'b>(" | |||||
638 | let func = if !(regexps.is_empty()) { |
|
672 | let func = if !(regexps.is_empty()) { | |
639 | let matcher = re_matcher(&full_regex)?; |
|
673 | let matcher = re_matcher(&full_regex)?; | |
640 | let func = move |filename: &HgPath| { |
|
674 | let func = move |filename: &HgPath| { | |
641 | exact_set.contains(filename) || matcher(filename) |
|
675 | exact_set.contains(filename) || matcher.is_match(filename) | |
642 | }; |
|
676 | }; | |
643 | Box::new(func) as IgnoreFnType |
|
677 | Box::new(func) as IgnoreFnType | |
644 | } else { |
|
678 | } else { |
General Comments 0
You need to be logged in to leave comments.
Login now