##// END OF EJS Templates
rust: add a utility function to merge ordered fallible iterators...
Arseniy Alekseyev -
r52047:aba622c7 default
parent child Browse files
Show More
@@ -1,532 +1,572 b''
1 // utils module
1 // utils module
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Contains useful functions, traits, structs, etc. for use in core.
8 //! Contains useful functions, traits, structs, etc. for use in core.
9
9
10 use crate::errors::{HgError, IoErrorContext};
10 use crate::errors::{HgError, IoErrorContext};
11 use crate::utils::hg_path::HgPath;
11 use crate::utils::hg_path::HgPath;
12 use im_rc::ordmap::DiffItem;
12 use im_rc::ordmap::DiffItem;
13 use im_rc::ordmap::OrdMap;
13 use im_rc::ordmap::OrdMap;
14 use itertools::EitherOrBoth;
15 use itertools::Itertools;
14 use std::cell::Cell;
16 use std::cell::Cell;
17 use std::cmp::Ordering;
15 use std::fmt;
18 use std::fmt;
16 use std::{io::Write, ops::Deref};
19 use std::{io::Write, ops::Deref};
17
20
18 pub mod debug;
21 pub mod debug;
19 pub mod files;
22 pub mod files;
20 pub mod hg_path;
23 pub mod hg_path;
21 pub mod path_auditor;
24 pub mod path_auditor;
22
25
23 /// Useful until rust/issues/56345 is stable
26 /// Useful until rust/issues/56345 is stable
24 ///
27 ///
25 /// # Examples
28 /// # Examples
26 ///
29 ///
27 /// ```
30 /// ```
28 /// use crate::hg::utils::find_slice_in_slice;
31 /// use crate::hg::utils::find_slice_in_slice;
29 ///
32 ///
30 /// let haystack = b"This is the haystack".to_vec();
33 /// let haystack = b"This is the haystack".to_vec();
31 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
34 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
32 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
35 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
33 /// ```
36 /// ```
34 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
37 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
35 where
38 where
36 for<'a> &'a [T]: PartialEq,
39 for<'a> &'a [T]: PartialEq,
37 {
40 {
38 slice
41 slice
39 .windows(needle.len())
42 .windows(needle.len())
40 .position(|window| window == needle)
43 .position(|window| window == needle)
41 }
44 }
42
45
43 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
46 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
44 ///
47 ///
45 /// # Examples
48 /// # Examples
46 ///
49 ///
47 /// ```
50 /// ```
48 /// use crate::hg::utils::replace_slice;
51 /// use crate::hg::utils::replace_slice;
49 /// let mut line = b"I hate writing tests!".to_vec();
52 /// let mut line = b"I hate writing tests!".to_vec();
50 /// replace_slice(&mut line, b"hate", b"love");
53 /// replace_slice(&mut line, b"hate", b"love");
51 /// assert_eq!(
54 /// assert_eq!(
52 /// line,
55 /// line,
53 /// b"I love writing tests!".to_vec()
56 /// b"I love writing tests!".to_vec()
54 /// );
57 /// );
55 /// ```
58 /// ```
56 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
59 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
57 where
60 where
58 T: Clone + PartialEq,
61 T: Clone + PartialEq,
59 {
62 {
60 if buf.len() < from.len() || from.len() != to.len() {
63 if buf.len() < from.len() || from.len() != to.len() {
61 return;
64 return;
62 }
65 }
63 for i in 0..=buf.len() - from.len() {
66 for i in 0..=buf.len() - from.len() {
64 if buf[i..].starts_with(from) {
67 if buf[i..].starts_with(from) {
65 buf[i..(i + from.len())].clone_from_slice(to);
68 buf[i..(i + from.len())].clone_from_slice(to);
66 }
69 }
67 }
70 }
68 }
71 }
69
72
70 pub trait SliceExt {
73 pub trait SliceExt {
71 fn trim_end(&self) -> &Self;
74 fn trim_end(&self) -> &Self;
72 fn trim_start(&self) -> &Self;
75 fn trim_start(&self) -> &Self;
73 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
76 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
74 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
77 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
75 fn trim(&self) -> &Self;
78 fn trim(&self) -> &Self;
76 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
79 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
77 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
80 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
78 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
81 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
79 }
82 }
80
83
81 impl SliceExt for [u8] {
84 impl SliceExt for [u8] {
82 fn trim_end(&self) -> &[u8] {
85 fn trim_end(&self) -> &[u8] {
83 self.trim_end_matches(|byte| byte.is_ascii_whitespace())
86 self.trim_end_matches(|byte| byte.is_ascii_whitespace())
84 }
87 }
85
88
86 fn trim_start(&self) -> &[u8] {
89 fn trim_start(&self) -> &[u8] {
87 self.trim_start_matches(|byte| byte.is_ascii_whitespace())
90 self.trim_start_matches(|byte| byte.is_ascii_whitespace())
88 }
91 }
89
92
90 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
93 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
91 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
94 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
92 &self[..=last]
95 &self[..=last]
93 } else {
96 } else {
94 &[]
97 &[]
95 }
98 }
96 }
99 }
97
100
98 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
101 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
99 if let Some(first) = self.iter().position(|&byte| !f(byte)) {
102 if let Some(first) = self.iter().position(|&byte| !f(byte)) {
100 &self[first..]
103 &self[first..]
101 } else {
104 } else {
102 &[]
105 &[]
103 }
106 }
104 }
107 }
105
108
106 /// ```
109 /// ```
107 /// use hg::utils::SliceExt;
110 /// use hg::utils::SliceExt;
108 /// assert_eq!(
111 /// assert_eq!(
109 /// b" to trim ".trim(),
112 /// b" to trim ".trim(),
110 /// b"to trim"
113 /// b"to trim"
111 /// );
114 /// );
112 /// assert_eq!(
115 /// assert_eq!(
113 /// b"to trim ".trim(),
116 /// b"to trim ".trim(),
114 /// b"to trim"
117 /// b"to trim"
115 /// );
118 /// );
116 /// assert_eq!(
119 /// assert_eq!(
117 /// b" to trim".trim(),
120 /// b" to trim".trim(),
118 /// b"to trim"
121 /// b"to trim"
119 /// );
122 /// );
120 /// ```
123 /// ```
121 fn trim(&self) -> &[u8] {
124 fn trim(&self) -> &[u8] {
122 self.trim_start().trim_end()
125 self.trim_start().trim_end()
123 }
126 }
124
127
125 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
128 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
126 if self.starts_with(needle) {
129 if self.starts_with(needle) {
127 Some(&self[needle.len()..])
130 Some(&self[needle.len()..])
128 } else {
131 } else {
129 None
132 None
130 }
133 }
131 }
134 }
132
135
133 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
136 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
134 let mut iter = self.splitn(2, |&byte| byte == separator);
137 let mut iter = self.splitn(2, |&byte| byte == separator);
135 let a = iter.next()?;
138 let a = iter.next()?;
136 let b = iter.next()?;
139 let b = iter.next()?;
137 Some((a, b))
140 Some((a, b))
138 }
141 }
139
142
140 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
143 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
141 find_slice_in_slice(self, separator)
144 find_slice_in_slice(self, separator)
142 .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
145 .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
143 }
146 }
144 }
147 }
145
148
146 pub trait Escaped {
149 pub trait Escaped {
147 /// Return bytes escaped for display to the user
150 /// Return bytes escaped for display to the user
148 fn escaped_bytes(&self) -> Vec<u8>;
151 fn escaped_bytes(&self) -> Vec<u8>;
149 }
152 }
150
153
151 impl Escaped for u8 {
154 impl Escaped for u8 {
152 fn escaped_bytes(&self) -> Vec<u8> {
155 fn escaped_bytes(&self) -> Vec<u8> {
153 let mut acc = vec![];
156 let mut acc = vec![];
154 match self {
157 match self {
155 c @ b'\'' | c @ b'\\' => {
158 c @ b'\'' | c @ b'\\' => {
156 acc.push(b'\\');
159 acc.push(b'\\');
157 acc.push(*c);
160 acc.push(*c);
158 }
161 }
159 b'\t' => {
162 b'\t' => {
160 acc.extend(br"\\t");
163 acc.extend(br"\\t");
161 }
164 }
162 b'\n' => {
165 b'\n' => {
163 acc.extend(br"\\n");
166 acc.extend(br"\\n");
164 }
167 }
165 b'\r' => {
168 b'\r' => {
166 acc.extend(br"\\r");
169 acc.extend(br"\\r");
167 }
170 }
168 c if (*c < b' ' || *c >= 127) => {
171 c if (*c < b' ' || *c >= 127) => {
169 write!(acc, "\\x{:x}", self).unwrap();
172 write!(acc, "\\x{:x}", self).unwrap();
170 }
173 }
171 c => {
174 c => {
172 acc.push(*c);
175 acc.push(*c);
173 }
176 }
174 }
177 }
175 acc
178 acc
176 }
179 }
177 }
180 }
178
181
179 impl<'a, T: Escaped> Escaped for &'a [T] {
182 impl<'a, T: Escaped> Escaped for &'a [T] {
180 fn escaped_bytes(&self) -> Vec<u8> {
183 fn escaped_bytes(&self) -> Vec<u8> {
181 self.iter().flat_map(Escaped::escaped_bytes).collect()
184 self.iter().flat_map(Escaped::escaped_bytes).collect()
182 }
185 }
183 }
186 }
184
187
185 impl<T: Escaped> Escaped for Vec<T> {
188 impl<T: Escaped> Escaped for Vec<T> {
186 fn escaped_bytes(&self) -> Vec<u8> {
189 fn escaped_bytes(&self) -> Vec<u8> {
187 self.deref().escaped_bytes()
190 self.deref().escaped_bytes()
188 }
191 }
189 }
192 }
190
193
191 impl<'a> Escaped for &'a HgPath {
194 impl<'a> Escaped for &'a HgPath {
192 fn escaped_bytes(&self) -> Vec<u8> {
195 fn escaped_bytes(&self) -> Vec<u8> {
193 self.as_bytes().escaped_bytes()
196 self.as_bytes().escaped_bytes()
194 }
197 }
195 }
198 }
196
199
197 #[cfg(unix)]
200 #[cfg(unix)]
198 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
201 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
199 if value.iter().all(|&byte| {
202 if value.iter().all(|&byte| {
200 matches!(
203 matches!(
201 byte,
204 byte,
202 b'a'..=b'z'
205 b'a'..=b'z'
203 | b'A'..=b'Z'
206 | b'A'..=b'Z'
204 | b'0'..=b'9'
207 | b'0'..=b'9'
205 | b'.'
208 | b'.'
206 | b'_'
209 | b'_'
207 | b'/'
210 | b'/'
208 | b'+'
211 | b'+'
209 | b'-'
212 | b'-'
210 )
213 )
211 }) {
214 }) {
212 value.to_owned()
215 value.to_owned()
213 } else {
216 } else {
214 let mut quoted = Vec::with_capacity(value.len() + 2);
217 let mut quoted = Vec::with_capacity(value.len() + 2);
215 quoted.push(b'\'');
218 quoted.push(b'\'');
216 for &byte in value {
219 for &byte in value {
217 if byte == b'\'' {
220 if byte == b'\'' {
218 quoted.push(b'\\');
221 quoted.push(b'\\');
219 }
222 }
220 quoted.push(byte);
223 quoted.push(byte);
221 }
224 }
222 quoted.push(b'\'');
225 quoted.push(b'\'');
223 quoted
226 quoted
224 }
227 }
225 }
228 }
226
229
227 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
230 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
228 std::env::current_dir().map_err(|error| HgError::IoError {
231 std::env::current_dir().map_err(|error| HgError::IoError {
229 error,
232 error,
230 context: IoErrorContext::CurrentDir,
233 context: IoErrorContext::CurrentDir,
231 })
234 })
232 }
235 }
233
236
234 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
237 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
235 std::env::current_exe().map_err(|error| HgError::IoError {
238 std::env::current_exe().map_err(|error| HgError::IoError {
236 error,
239 error,
237 context: IoErrorContext::CurrentExe,
240 context: IoErrorContext::CurrentExe,
238 })
241 })
239 }
242 }
240
243
241 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
244 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
242 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
245 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
243 lazy_static::lazy_static! {
246 lazy_static::lazy_static! {
244 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
247 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
245 /// The `x` makes whitespace ignored.
248 /// The `x` makes whitespace ignored.
246 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
249 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
247 static ref VAR_RE: regex::bytes::Regex =
250 static ref VAR_RE: regex::bytes::Regex =
248 regex::bytes::Regex::new(r"(?x-u)
251 regex::bytes::Regex::new(r"(?x-u)
249 \$
252 \$
250 (?:
253 (?:
251 (\w+)
254 (\w+)
252 |
255 |
253 \{
256 \{
254 ([^}]*)
257 ([^}]*)
255 \}
258 \}
256 )
259 )
257 ").unwrap();
260 ").unwrap();
258 }
261 }
259 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
262 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
260 let var_name = files::get_os_str_from_bytes(
263 let var_name = files::get_os_str_from_bytes(
261 captures
264 captures
262 .get(1)
265 .get(1)
263 .or_else(|| captures.get(2))
266 .or_else(|| captures.get(2))
264 .expect("either side of `|` must participate in match")
267 .expect("either side of `|` must participate in match")
265 .as_bytes(),
268 .as_bytes(),
266 );
269 );
267 std::env::var_os(var_name)
270 std::env::var_os(var_name)
268 .map(files::get_bytes_from_os_str)
271 .map(files::get_bytes_from_os_str)
269 .unwrap_or_else(|| {
272 .unwrap_or_else(|| {
270 // Referencing an environment variable that does not exist.
273 // Referencing an environment variable that does not exist.
271 // Leave the $FOO reference as-is.
274 // Leave the $FOO reference as-is.
272 captures[0].to_owned()
275 captures[0].to_owned()
273 })
276 })
274 })
277 })
275 }
278 }
276
279
277 #[test]
280 #[test]
278 fn test_expand_vars() {
281 fn test_expand_vars() {
279 // Modifying process-global state in a test isn’t great,
282 // Modifying process-global state in a test isn’t great,
280 // but hopefully this won’t collide with anything.
283 // but hopefully this won’t collide with anything.
281 std::env::set_var("TEST_EXPAND_VAR", "1");
284 std::env::set_var("TEST_EXPAND_VAR", "1");
282 assert_eq!(
285 assert_eq!(
283 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
286 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
284 &b"before/1/after"[..]
287 &b"before/1/after"[..]
285 );
288 );
286 assert_eq!(
289 assert_eq!(
287 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
290 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
288 &b"before111after"[..]
291 &b"before111after"[..]
289 );
292 );
290 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
293 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
291 assert_eq!(expand_vars(s), &s[..]);
294 assert_eq!(expand_vars(s), &s[..]);
292 }
295 }
293
296
294 pub(crate) enum MergeResult<V> {
297 pub(crate) enum MergeResult<V> {
295 Left,
298 Left,
296 Right,
299 Right,
297 New(V),
300 New(V),
298 }
301 }
299
302
300 /// Return the union of the two given maps,
303 /// Return the union of the two given maps,
301 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
304 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
302 /// both.
305 /// both.
303 ///
306 ///
304 /// CC <https://github.com/bodil/im-rs/issues/166>
307 /// CC <https://github.com/bodil/im-rs/issues/166>
305 pub(crate) fn ordmap_union_with_merge<K, V>(
308 pub(crate) fn ordmap_union_with_merge<K, V>(
306 left: OrdMap<K, V>,
309 left: OrdMap<K, V>,
307 right: OrdMap<K, V>,
310 right: OrdMap<K, V>,
308 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
311 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
309 ) -> OrdMap<K, V>
312 ) -> OrdMap<K, V>
310 where
313 where
311 K: Clone + Ord,
314 K: Clone + Ord,
312 V: Clone + PartialEq,
315 V: Clone + PartialEq,
313 {
316 {
314 if left.ptr_eq(&right) {
317 if left.ptr_eq(&right) {
315 // One of the two maps is an unmodified clone of the other
318 // One of the two maps is an unmodified clone of the other
316 left
319 left
317 } else if left.len() / 2 > right.len() {
320 } else if left.len() / 2 > right.len() {
318 // When two maps have different sizes,
321 // When two maps have different sizes,
319 // their size difference is a lower bound on
322 // their size difference is a lower bound on
320 // how many keys of the larger map are not also in the smaller map.
323 // how many keys of the larger map are not also in the smaller map.
321 // This in turn is a lower bound on the number of differences in
324 // This in turn is a lower bound on the number of differences in
322 // `OrdMap::diff` and the "amount of work" that would be done
325 // `OrdMap::diff` and the "amount of work" that would be done
323 // by `ordmap_union_with_merge_by_diff`.
326 // by `ordmap_union_with_merge_by_diff`.
324 //
327 //
325 // Here `left` is more than twice the size of `right`,
328 // Here `left` is more than twice the size of `right`,
326 // so the number of differences is more than the total size of
329 // so the number of differences is more than the total size of
327 // `right`. Therefore an algorithm based on iterating `right`
330 // `right`. Therefore an algorithm based on iterating `right`
328 // is more efficient.
331 // is more efficient.
329 //
332 //
330 // This helps a lot when a tiny (or empty) map is merged
333 // This helps a lot when a tiny (or empty) map is merged
331 // with a large one.
334 // with a large one.
332 ordmap_union_with_merge_by_iter(left, right, merge)
335 ordmap_union_with_merge_by_iter(left, right, merge)
333 } else if left.len() < right.len() / 2 {
336 } else if left.len() < right.len() / 2 {
334 // Same as above but with `left` and `right` swapped
337 // Same as above but with `left` and `right` swapped
335 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
338 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
336 // Also swapped in `merge` arguments:
339 // Also swapped in `merge` arguments:
337 match merge(key, b, a) {
340 match merge(key, b, a) {
338 MergeResult::New(v) => MergeResult::New(v),
341 MergeResult::New(v) => MergeResult::New(v),
339 // … and swap back in `merge` result:
342 // … and swap back in `merge` result:
340 MergeResult::Left => MergeResult::Right,
343 MergeResult::Left => MergeResult::Right,
341 MergeResult::Right => MergeResult::Left,
344 MergeResult::Right => MergeResult::Left,
342 }
345 }
343 })
346 })
344 } else {
347 } else {
345 // For maps of similar size, use the algorithm based on `OrdMap::diff`
348 // For maps of similar size, use the algorithm based on `OrdMap::diff`
346 ordmap_union_with_merge_by_diff(left, right, merge)
349 ordmap_union_with_merge_by_diff(left, right, merge)
347 }
350 }
348 }
351 }
349
352
350 /// Efficient if `right` is much smaller than `left`
353 /// Efficient if `right` is much smaller than `left`
351 fn ordmap_union_with_merge_by_iter<K, V>(
354 fn ordmap_union_with_merge_by_iter<K, V>(
352 mut left: OrdMap<K, V>,
355 mut left: OrdMap<K, V>,
353 right: OrdMap<K, V>,
356 right: OrdMap<K, V>,
354 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
357 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
355 ) -> OrdMap<K, V>
358 ) -> OrdMap<K, V>
356 where
359 where
357 K: Clone + Ord,
360 K: Clone + Ord,
358 V: Clone,
361 V: Clone,
359 {
362 {
360 for (key, right_value) in right {
363 for (key, right_value) in right {
361 match left.get(&key) {
364 match left.get(&key) {
362 None => {
365 None => {
363 left.insert(key, right_value);
366 left.insert(key, right_value);
364 }
367 }
365 Some(left_value) => match merge(&key, left_value, &right_value) {
368 Some(left_value) => match merge(&key, left_value, &right_value) {
366 MergeResult::Left => {}
369 MergeResult::Left => {}
367 MergeResult::Right => {
370 MergeResult::Right => {
368 left.insert(key, right_value);
371 left.insert(key, right_value);
369 }
372 }
370 MergeResult::New(new_value) => {
373 MergeResult::New(new_value) => {
371 left.insert(key, new_value);
374 left.insert(key, new_value);
372 }
375 }
373 },
376 },
374 }
377 }
375 }
378 }
376 left
379 left
377 }
380 }
378
381
379 /// Fallback when both maps are of similar size
382 /// Fallback when both maps are of similar size
380 fn ordmap_union_with_merge_by_diff<K, V>(
383 fn ordmap_union_with_merge_by_diff<K, V>(
381 mut left: OrdMap<K, V>,
384 mut left: OrdMap<K, V>,
382 mut right: OrdMap<K, V>,
385 mut right: OrdMap<K, V>,
383 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
386 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
384 ) -> OrdMap<K, V>
387 ) -> OrdMap<K, V>
385 where
388 where
386 K: Clone + Ord,
389 K: Clone + Ord,
387 V: Clone + PartialEq,
390 V: Clone + PartialEq,
388 {
391 {
389 // (key, value) pairs that would need to be inserted in either map
392 // (key, value) pairs that would need to be inserted in either map
390 // in order to turn it into the union.
393 // in order to turn it into the union.
391 //
394 //
392 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
395 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
393 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
396 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
394 // with `left_updates` only borrowing from `right` and `right_updates` from
397 // with `left_updates` only borrowing from `right` and `right_updates` from
395 // `left`, and with `Cow::Owned` used for `MergeResult::New`.
398 // `left`, and with `Cow::Owned` used for `MergeResult::New`.
396 //
399 //
397 // This would allow moving all `.clone()` calls to after we’ve decided
400 // This would allow moving all `.clone()` calls to after we’ve decided
398 // which of `right_updates` or `left_updates` to use
401 // which of `right_updates` or `left_updates` to use
399 // (value ones becoming `Cow::into_owned`),
402 // (value ones becoming `Cow::into_owned`),
400 // and avoid making clones we don’t end up using.
403 // and avoid making clones we don’t end up using.
401 let mut left_updates = Vec::new();
404 let mut left_updates = Vec::new();
402 let mut right_updates = Vec::new();
405 let mut right_updates = Vec::new();
403
406
404 for difference in left.diff(&right) {
407 for difference in left.diff(&right) {
405 match difference {
408 match difference {
406 DiffItem::Add(key, value) => {
409 DiffItem::Add(key, value) => {
407 left_updates.push((key.clone(), value.clone()))
410 left_updates.push((key.clone(), value.clone()))
408 }
411 }
409 DiffItem::Remove(key, value) => {
412 DiffItem::Remove(key, value) => {
410 right_updates.push((key.clone(), value.clone()))
413 right_updates.push((key.clone(), value.clone()))
411 }
414 }
412 DiffItem::Update {
415 DiffItem::Update {
413 old: (key, left_value),
416 old: (key, left_value),
414 new: (_, right_value),
417 new: (_, right_value),
415 } => match merge(key, left_value, right_value) {
418 } => match merge(key, left_value, right_value) {
416 MergeResult::Left => {
419 MergeResult::Left => {
417 right_updates.push((key.clone(), left_value.clone()))
420 right_updates.push((key.clone(), left_value.clone()))
418 }
421 }
419 MergeResult::Right => {
422 MergeResult::Right => {
420 left_updates.push((key.clone(), right_value.clone()))
423 left_updates.push((key.clone(), right_value.clone()))
421 }
424 }
422 MergeResult::New(new_value) => {
425 MergeResult::New(new_value) => {
423 left_updates.push((key.clone(), new_value.clone()));
426 left_updates.push((key.clone(), new_value.clone()));
424 right_updates.push((key.clone(), new_value))
427 right_updates.push((key.clone(), new_value))
425 }
428 }
426 },
429 },
427 }
430 }
428 }
431 }
429 if left_updates.len() < right_updates.len() {
432 if left_updates.len() < right_updates.len() {
430 for (key, value) in left_updates {
433 for (key, value) in left_updates {
431 left.insert(key, value);
434 left.insert(key, value);
432 }
435 }
433 left
436 left
434 } else {
437 } else {
435 for (key, value) in right_updates {
438 for (key, value) in right_updates {
436 right.insert(key, value);
439 right.insert(key, value);
437 }
440 }
438 right
441 right
439 }
442 }
440 }
443 }
441
444
442 /// Join items of the iterable with the given separator, similar to Python’s
445 /// Join items of the iterable with the given separator, similar to Python’s
443 /// `separator.join(iter)`.
446 /// `separator.join(iter)`.
444 ///
447 ///
445 /// Formatting the return value consumes the iterator.
448 /// Formatting the return value consumes the iterator.
446 /// Formatting it again will produce an empty string.
449 /// Formatting it again will produce an empty string.
447 pub fn join_display(
450 pub fn join_display(
448 iter: impl IntoIterator<Item = impl fmt::Display>,
451 iter: impl IntoIterator<Item = impl fmt::Display>,
449 separator: impl fmt::Display,
452 separator: impl fmt::Display,
450 ) -> impl fmt::Display {
453 ) -> impl fmt::Display {
451 JoinDisplay {
454 JoinDisplay {
452 iter: Cell::new(Some(iter.into_iter())),
455 iter: Cell::new(Some(iter.into_iter())),
453 separator,
456 separator,
454 }
457 }
455 }
458 }
456
459
457 struct JoinDisplay<I, S> {
460 struct JoinDisplay<I, S> {
458 iter: Cell<Option<I>>,
461 iter: Cell<Option<I>>,
459 separator: S,
462 separator: S,
460 }
463 }
461
464
462 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
465 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
463 where
466 where
464 I: Iterator<Item = T>,
467 I: Iterator<Item = T>,
465 T: fmt::Display,
468 T: fmt::Display,
466 S: fmt::Display,
469 S: fmt::Display,
467 {
470 {
468 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
471 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
469 if let Some(mut iter) = self.iter.take() {
472 if let Some(mut iter) = self.iter.take() {
470 if let Some(first) = iter.next() {
473 if let Some(first) = iter.next() {
471 first.fmt(f)?;
474 first.fmt(f)?;
472 }
475 }
473 for value in iter {
476 for value in iter {
474 self.separator.fmt(f)?;
477 self.separator.fmt(f)?;
475 value.fmt(f)?;
478 value.fmt(f)?;
476 }
479 }
477 }
480 }
478 Ok(())
481 Ok(())
479 }
482 }
480 }
483 }
481
484
482 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
485 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
483 ///
486 ///
484 /// The callback is only called for incoming `Ok` values. Errors are passed
487 /// The callback is only called for incoming `Ok` values. Errors are passed
485 /// through as-is. In order to let it use the `?` operator the callback is
488 /// through as-is. In order to let it use the `?` operator the callback is
486 /// expected to return a `Result` of `Option`, instead of an `Option` of
489 /// expected to return a `Result` of `Option`, instead of an `Option` of
487 /// `Result`.
490 /// `Result`.
488 pub fn filter_map_results<'a, I, F, A, B, E>(
491 pub fn filter_map_results<'a, I, F, A, B, E>(
489 iter: I,
492 iter: I,
490 f: F,
493 f: F,
491 ) -> impl Iterator<Item = Result<B, E>> + 'a
494 ) -> impl Iterator<Item = Result<B, E>> + 'a
492 where
495 where
493 I: Iterator<Item = Result<A, E>> + 'a,
496 I: Iterator<Item = Result<A, E>> + 'a,
494 F: Fn(A) -> Result<Option<B>, E> + 'a,
497 F: Fn(A) -> Result<Option<B>, E> + 'a,
495 {
498 {
496 iter.filter_map(move |result| match result {
499 iter.filter_map(move |result| match result {
497 Ok(node) => f(node).transpose(),
500 Ok(node) => f(node).transpose(),
498 Err(e) => Some(Err(e)),
501 Err(e) => Some(Err(e)),
499 })
502 })
500 }
503 }
501
504
505 /// Like `itertools::merge_join_by`, but merges fallible iterators.
506 ///
507 /// The callback is only used for Ok values. Errors are passed through as-is.
508 /// Errors compare less than Ok values, which makes the error handling
509 /// conservative.
510 pub fn merge_join_results_by<'a, I1, I2, F, A, B, E>(
511 iter1: I1,
512 iter2: I2,
513 f: F,
514 ) -> impl Iterator<Item = Result<EitherOrBoth<A, B>, E>> + 'a
515 where
516 I1: Iterator<Item = Result<A, E>> + 'a,
517 I2: Iterator<Item = Result<B, E>> + 'a,
518 F: FnMut(&A, &B) -> Ordering + 'a,
519 {
520 let mut g = f;
521 iter1
522 .merge_join_by(iter2, move |i1, i2| match i1 {
523 Err(_) => Ordering::Less,
524 Ok(i1) => match i2 {
525 Err(_) => Ordering::Greater,
526 Ok(i2) => g(i1, i2),
527 },
528 })
529 .map(|result| match result {
530 EitherOrBoth::Left(Err(e)) => Err(e),
531 EitherOrBoth::Right(Err(e)) => Err(e),
532 EitherOrBoth::Both(Err(e), _) => Err(e),
533 EitherOrBoth::Both(_, Err(e)) => Err(e),
534 EitherOrBoth::Left(Ok(v)) => Ok(EitherOrBoth::Left(v)),
535 EitherOrBoth::Right(Ok(v)) => Ok(EitherOrBoth::Right(v)),
536 EitherOrBoth::Both(Ok(v1), Ok(v2)) => {
537 Ok(EitherOrBoth::Both(v1, v2))
538 }
539 })
540 }
541
502 /// Force the global rayon threadpool to not exceed 16 concurrent threads
542 /// Force the global rayon threadpool to not exceed 16 concurrent threads
503 /// unless the user has specified a value.
543 /// unless the user has specified a value.
504 /// This is a stop-gap measure until we figure out why using more than 16
544 /// This is a stop-gap measure until we figure out why using more than 16
505 /// threads makes `status` slower for each additional thread.
545 /// threads makes `status` slower for each additional thread.
506 ///
546 ///
507 /// TODO find the underlying cause and fix it, then remove this.
547 /// TODO find the underlying cause and fix it, then remove this.
508 ///
548 ///
509 /// # Errors
549 /// # Errors
510 ///
550 ///
511 /// Returns an error if the global threadpool has already been initialized if
551 /// Returns an error if the global threadpool has already been initialized if
512 /// we try to initialize it.
552 /// we try to initialize it.
513 pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> {
553 pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> {
514 const THREAD_CAP: usize = 16;
554 const THREAD_CAP: usize = 16;
515
555
516 if std::env::var("RAYON_NUM_THREADS").is_err() {
556 if std::env::var("RAYON_NUM_THREADS").is_err() {
517 let available_parallelism = std::thread::available_parallelism()
557 let available_parallelism = std::thread::available_parallelism()
518 .map(usize::from)
558 .map(usize::from)
519 .unwrap_or(1);
559 .unwrap_or(1);
520 let new_thread_count = THREAD_CAP.min(available_parallelism);
560 let new_thread_count = THREAD_CAP.min(available_parallelism);
521 let res = rayon::ThreadPoolBuilder::new()
561 let res = rayon::ThreadPoolBuilder::new()
522 .num_threads(new_thread_count)
562 .num_threads(new_thread_count)
523 .build_global();
563 .build_global();
524 if res.is_ok() {
564 if res.is_ok() {
525 log::trace!(
565 log::trace!(
526 "Capped the rayon threadpool to {new_thread_count} threads",
566 "Capped the rayon threadpool to {new_thread_count} threads",
527 );
567 );
528 }
568 }
529 return res;
569 return res;
530 }
570 }
531 Ok(())
571 Ok(())
532 }
572 }
General Comments 0
You need to be logged in to leave comments. Login now