Show More
@@ -479,6 +479,7 b' dependencies = [' | |||
|
479 | 479 | "same-file", |
|
480 | 480 | "sha-1 0.10.0", |
|
481 | 481 | "tempfile", |
|
482 | "thread_local", | |
|
482 | 483 | "twox-hash", |
|
483 | 484 | "zstd", |
|
484 | 485 | ] |
@@ -1120,6 +1121,15 b' dependencies = [' | |||
|
1120 | 1121 | ] |
|
1121 | 1122 | |
|
1122 | 1123 | [[package]] |
|
1124 | name = "thread_local" | |
|
1125 | version = "1.1.4" | |
|
1126 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
1127 | checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" | |
|
1128 | dependencies = [ | |
|
1129 | "once_cell", | |
|
1130 | ] | |
|
1131 | ||
|
1132 | [[package]] | |
|
1123 | 1133 | name = "time" |
|
1124 | 1134 | version = "0.1.44" |
|
1125 | 1135 | source = "registry+https://github.com/rust-lang/crates.io-index" |
@@ -29,13 +29,14 b' sha-1 = "0.10.0"' | |||
|
29 | 29 | twox-hash = "1.6.2" |
|
30 | 30 | same-file = "1.0.6" |
|
31 | 31 | tempfile = "3.1.0" |
|
32 | thread_local = "1.1.4" | |
|
32 | 33 | crossbeam-channel = "0.5.0" |
|
33 | 34 | micro-timer = "0.4.0" |
|
34 | 35 | log = "0.4.8" |
|
35 | 36 | memmap2 = { version = "0.5.3", features = ["stable_deref_trait"] } |
|
36 | 37 | zstd = "0.5.3" |
|
37 | 38 | format-bytes = "0.3.0" |
|
38 |
# once_cell 1.15 uses edition 2021, while the heptapod CI |
|
|
39 | # once_cell 1.15 uses edition 2021, while the heptapod CI | |
|
39 | 40 | # uses an old version of Cargo that doesn't support it. |
|
40 | 41 | once_cell = "=1.14.0" |
|
41 | 42 |
@@ -573,6 +573,39 b' impl DifferenceMatcher {' | |||
|
573 | 573 | } |
|
574 | 574 | } |
|
575 | 575 | |
|
576 | /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded | |
|
577 | /// contexts. | |
|
578 | /// | |
|
579 | /// The `status` algorithm makes heavy use of threads, and calling `is_match` | |
|
580 | /// from many threads at once is prone to contention, probably within the | |
|
581 | /// scratch space needed as the regex DFA is built lazily. | |
|
582 | /// | |
|
583 | /// We are in the process of raising the issue upstream, but for now | |
|
584 | /// the workaround used here is to store the `Regex` in a lazily populated | |
|
585 | /// thread-local variable, sharing the initial read-only compilation, but | |
|
586 | /// not the lazy dfa scratch space mentioned above. | |
|
587 | /// | |
|
588 | /// This reduces the contention observed with 16+ threads, but does not | |
|
589 | /// completely remove it. Hopefully this can be addressed upstream. | |
|
590 | struct RegexMatcher { | |
|
591 | /// Compiled at the start of the status algorithm, used as a base for | |
|
592 | /// cloning in each thread-local `self.local`, thus sharing the expensive | |
|
593 | /// first compilation. | |
|
594 | base: regex::bytes::Regex, | |
|
595 | /// Thread-local variable that holds the `Regex` that is actually queried | |
|
596 | /// from each thread. | |
|
597 | local: thread_local::ThreadLocal<regex::bytes::Regex>, | |
|
598 | } | |
|
599 | ||
|
600 | impl RegexMatcher { | |
|
601 | /// Returns whether the path matches the stored `Regex`. | |
|
602 | pub fn is_match(&self, path: &HgPath) -> bool { | |
|
603 | self.local | |
|
604 | .get_or(|| self.base.clone()) | |
|
605 | .is_match(path.as_bytes()) | |
|
606 | } | |
|
607 | } | |
|
608 | ||
|
576 | 609 | /// Returns a function that matches an `HgPath` against the given regex |
|
577 | 610 | /// pattern. |
|
578 | 611 | /// |
@@ -580,9 +613,7 b' impl DifferenceMatcher {' | |||
|
580 | 613 | /// underlying engine (the `regex` crate), for instance anything with |
|
581 | 614 | /// back-references. |
|
582 | 615 | #[timed] |
|
583 | fn re_matcher( | |
|
584 | pattern: &[u8], | |
|
585 | ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> { | |
|
616 | fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> { | |
|
586 | 617 | use std::io::Write; |
|
587 | 618 | |
|
588 | 619 | // The `regex` crate adds `.*` to the start and end of expressions if there |
@@ -611,7 +642,10 b' fn re_matcher(' | |||
|
611 | 642 | .build() |
|
612 | 643 | .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; |
|
613 | 644 | |
|
614 | Ok(move |path: &HgPath| re.is_match(path.as_bytes())) | |
|
645 | Ok(RegexMatcher { | |
|
646 | base: re, | |
|
647 | local: Default::default(), | |
|
648 | }) | |
|
615 | 649 | } |
|
616 | 650 | |
|
617 | 651 | /// Returns the regex pattern and a function that matches an `HgPath` against |
@@ -638,7 +672,7 b" fn build_regex_match<'a, 'b>(" | |||
|
638 | 672 | let func = if !(regexps.is_empty()) { |
|
639 | 673 | let matcher = re_matcher(&full_regex)?; |
|
640 | 674 | let func = move |filename: &HgPath| { |
|
641 | exact_set.contains(filename) || matcher(filename) | |
|
675 | exact_set.contains(filename) || matcher.is_match(filename) | |
|
642 | 676 | }; |
|
643 | 677 | Box::new(func) as IgnoreFnType |
|
644 | 678 | } else { |
General Comments 0
You need to be logged in to leave comments.
Login now