##// END OF EJS Templates
rust: add a utility function to merge ordered fallible iterators...
Arseniy Alekseyev -
r52047:aba622c7 default
parent child Browse files
Show More
@@ -1,532 +1,572 b''
1 1 // utils module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 use crate::errors::{HgError, IoErrorContext};
11 11 use crate::utils::hg_path::HgPath;
12 12 use im_rc::ordmap::DiffItem;
13 13 use im_rc::ordmap::OrdMap;
14 use itertools::EitherOrBoth;
15 use itertools::Itertools;
14 16 use std::cell::Cell;
17 use std::cmp::Ordering;
15 18 use std::fmt;
16 19 use std::{io::Write, ops::Deref};
17 20
18 21 pub mod debug;
19 22 pub mod files;
20 23 pub mod hg_path;
21 24 pub mod path_auditor;
22 25
23 26 /// Useful until rust/issues/56345 is stable
24 27 ///
25 28 /// # Examples
26 29 ///
27 30 /// ```
28 31 /// use crate::hg::utils::find_slice_in_slice;
29 32 ///
30 33 /// let haystack = b"This is the haystack".to_vec();
31 34 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
32 35 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
33 36 /// ```
34 37 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
35 38 where
36 39 for<'a> &'a [T]: PartialEq,
37 40 {
38 41 slice
39 42 .windows(needle.len())
40 43 .position(|window| window == needle)
41 44 }
42 45
43 46 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
44 47 ///
45 48 /// # Examples
46 49 ///
47 50 /// ```
48 51 /// use crate::hg::utils::replace_slice;
49 52 /// let mut line = b"I hate writing tests!".to_vec();
50 53 /// replace_slice(&mut line, b"hate", b"love");
51 54 /// assert_eq!(
52 55 /// line,
53 56 /// b"I love writing tests!".to_vec()
54 57 /// );
55 58 /// ```
56 59 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
57 60 where
58 61 T: Clone + PartialEq,
59 62 {
60 63 if buf.len() < from.len() || from.len() != to.len() {
61 64 return;
62 65 }
63 66 for i in 0..=buf.len() - from.len() {
64 67 if buf[i..].starts_with(from) {
65 68 buf[i..(i + from.len())].clone_from_slice(to);
66 69 }
67 70 }
68 71 }
69 72
70 73 pub trait SliceExt {
71 74 fn trim_end(&self) -> &Self;
72 75 fn trim_start(&self) -> &Self;
73 76 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
74 77 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
75 78 fn trim(&self) -> &Self;
76 79 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
77 80 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
78 81 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
79 82 }
80 83
81 84 impl SliceExt for [u8] {
82 85 fn trim_end(&self) -> &[u8] {
83 86 self.trim_end_matches(|byte| byte.is_ascii_whitespace())
84 87 }
85 88
86 89 fn trim_start(&self) -> &[u8] {
87 90 self.trim_start_matches(|byte| byte.is_ascii_whitespace())
88 91 }
89 92
90 93 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
91 94 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
92 95 &self[..=last]
93 96 } else {
94 97 &[]
95 98 }
96 99 }
97 100
98 101 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
99 102 if let Some(first) = self.iter().position(|&byte| !f(byte)) {
100 103 &self[first..]
101 104 } else {
102 105 &[]
103 106 }
104 107 }
105 108
106 109 /// ```
107 110 /// use hg::utils::SliceExt;
108 111 /// assert_eq!(
109 112 /// b" to trim ".trim(),
110 113 /// b"to trim"
111 114 /// );
112 115 /// assert_eq!(
113 116 /// b"to trim ".trim(),
114 117 /// b"to trim"
115 118 /// );
116 119 /// assert_eq!(
117 120 /// b" to trim".trim(),
118 121 /// b"to trim"
119 122 /// );
120 123 /// ```
121 124 fn trim(&self) -> &[u8] {
122 125 self.trim_start().trim_end()
123 126 }
124 127
125 128 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
126 129 if self.starts_with(needle) {
127 130 Some(&self[needle.len()..])
128 131 } else {
129 132 None
130 133 }
131 134 }
132 135
133 136 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
134 137 let mut iter = self.splitn(2, |&byte| byte == separator);
135 138 let a = iter.next()?;
136 139 let b = iter.next()?;
137 140 Some((a, b))
138 141 }
139 142
140 143 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
141 144 find_slice_in_slice(self, separator)
142 145 .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
143 146 }
144 147 }
145 148
146 149 pub trait Escaped {
147 150 /// Return bytes escaped for display to the user
148 151 fn escaped_bytes(&self) -> Vec<u8>;
149 152 }
150 153
151 154 impl Escaped for u8 {
152 155 fn escaped_bytes(&self) -> Vec<u8> {
153 156 let mut acc = vec![];
154 157 match self {
155 158 c @ b'\'' | c @ b'\\' => {
156 159 acc.push(b'\\');
157 160 acc.push(*c);
158 161 }
159 162 b'\t' => {
160 163 acc.extend(br"\\t");
161 164 }
162 165 b'\n' => {
163 166 acc.extend(br"\\n");
164 167 }
165 168 b'\r' => {
166 169 acc.extend(br"\\r");
167 170 }
168 171 c if (*c < b' ' || *c >= 127) => {
169 172 write!(acc, "\\x{:x}", self).unwrap();
170 173 }
171 174 c => {
172 175 acc.push(*c);
173 176 }
174 177 }
175 178 acc
176 179 }
177 180 }
178 181
179 182 impl<'a, T: Escaped> Escaped for &'a [T] {
180 183 fn escaped_bytes(&self) -> Vec<u8> {
181 184 self.iter().flat_map(Escaped::escaped_bytes).collect()
182 185 }
183 186 }
184 187
185 188 impl<T: Escaped> Escaped for Vec<T> {
186 189 fn escaped_bytes(&self) -> Vec<u8> {
187 190 self.deref().escaped_bytes()
188 191 }
189 192 }
190 193
191 194 impl<'a> Escaped for &'a HgPath {
192 195 fn escaped_bytes(&self) -> Vec<u8> {
193 196 self.as_bytes().escaped_bytes()
194 197 }
195 198 }
196 199
197 200 #[cfg(unix)]
198 201 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
199 202 if value.iter().all(|&byte| {
200 203 matches!(
201 204 byte,
202 205 b'a'..=b'z'
203 206 | b'A'..=b'Z'
204 207 | b'0'..=b'9'
205 208 | b'.'
206 209 | b'_'
207 210 | b'/'
208 211 | b'+'
209 212 | b'-'
210 213 )
211 214 }) {
212 215 value.to_owned()
213 216 } else {
214 217 let mut quoted = Vec::with_capacity(value.len() + 2);
215 218 quoted.push(b'\'');
216 219 for &byte in value {
217 220 if byte == b'\'' {
218 221 quoted.push(b'\\');
219 222 }
220 223 quoted.push(byte);
221 224 }
222 225 quoted.push(b'\'');
223 226 quoted
224 227 }
225 228 }
226 229
227 230 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
228 231 std::env::current_dir().map_err(|error| HgError::IoError {
229 232 error,
230 233 context: IoErrorContext::CurrentDir,
231 234 })
232 235 }
233 236
234 237 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
235 238 std::env::current_exe().map_err(|error| HgError::IoError {
236 239 error,
237 240 context: IoErrorContext::CurrentExe,
238 241 })
239 242 }
240 243
241 244 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
242 245 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
243 246 lazy_static::lazy_static! {
244 247 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
245 248 /// The `x` makes whitespace ignored.
246 249 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
247 250 static ref VAR_RE: regex::bytes::Regex =
248 251 regex::bytes::Regex::new(r"(?x-u)
249 252 \$
250 253 (?:
251 254 (\w+)
252 255 |
253 256 \{
254 257 ([^}]*)
255 258 \}
256 259 )
257 260 ").unwrap();
258 261 }
259 262 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
260 263 let var_name = files::get_os_str_from_bytes(
261 264 captures
262 265 .get(1)
263 266 .or_else(|| captures.get(2))
264 267 .expect("either side of `|` must participate in match")
265 268 .as_bytes(),
266 269 );
267 270 std::env::var_os(var_name)
268 271 .map(files::get_bytes_from_os_str)
269 272 .unwrap_or_else(|| {
270 273 // Referencing an environment variable that does not exist.
271 274 // Leave the $FOO reference as-is.
272 275 captures[0].to_owned()
273 276 })
274 277 })
275 278 }
276 279
277 280 #[test]
278 281 fn test_expand_vars() {
279 282 // Modifying process-global state in a test isn’t great,
280 283 // but hopefully this won’t collide with anything.
281 284 std::env::set_var("TEST_EXPAND_VAR", "1");
282 285 assert_eq!(
283 286 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
284 287 &b"before/1/after"[..]
285 288 );
286 289 assert_eq!(
287 290 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
288 291 &b"before111after"[..]
289 292 );
290 293 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
291 294 assert_eq!(expand_vars(s), &s[..]);
292 295 }
293 296
294 297 pub(crate) enum MergeResult<V> {
295 298 Left,
296 299 Right,
297 300 New(V),
298 301 }
299 302
300 303 /// Return the union of the two given maps,
301 304 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
302 305 /// both.
303 306 ///
304 307 /// CC <https://github.com/bodil/im-rs/issues/166>
305 308 pub(crate) fn ordmap_union_with_merge<K, V>(
306 309 left: OrdMap<K, V>,
307 310 right: OrdMap<K, V>,
308 311 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
309 312 ) -> OrdMap<K, V>
310 313 where
311 314 K: Clone + Ord,
312 315 V: Clone + PartialEq,
313 316 {
314 317 if left.ptr_eq(&right) {
315 318 // One of the two maps is an unmodified clone of the other
316 319 left
317 320 } else if left.len() / 2 > right.len() {
318 321 // When two maps have different sizes,
319 322 // their size difference is a lower bound on
320 323 // how many keys of the larger map are not also in the smaller map.
321 324 // This in turn is a lower bound on the number of differences in
322 325 // `OrdMap::diff` and the "amount of work" that would be done
323 326 // by `ordmap_union_with_merge_by_diff`.
324 327 //
325 328 // Here `left` is more than twice the size of `right`,
326 329 // so the number of differences is more than the total size of
327 330 // `right`. Therefore an algorithm based on iterating `right`
328 331 // is more efficient.
329 332 //
330 333 // This helps a lot when a tiny (or empty) map is merged
331 334 // with a large one.
332 335 ordmap_union_with_merge_by_iter(left, right, merge)
333 336 } else if left.len() < right.len() / 2 {
334 337 // Same as above but with `left` and `right` swapped
335 338 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
336 339 // Also swapped in `merge` arguments:
337 340 match merge(key, b, a) {
338 341 MergeResult::New(v) => MergeResult::New(v),
339 342 // … and swap back in `merge` result:
340 343 MergeResult::Left => MergeResult::Right,
341 344 MergeResult::Right => MergeResult::Left,
342 345 }
343 346 })
344 347 } else {
345 348 // For maps of similar size, use the algorithm based on `OrdMap::diff`
346 349 ordmap_union_with_merge_by_diff(left, right, merge)
347 350 }
348 351 }
349 352
350 353 /// Efficient if `right` is much smaller than `left`
351 354 fn ordmap_union_with_merge_by_iter<K, V>(
352 355 mut left: OrdMap<K, V>,
353 356 right: OrdMap<K, V>,
354 357 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
355 358 ) -> OrdMap<K, V>
356 359 where
357 360 K: Clone + Ord,
358 361 V: Clone,
359 362 {
360 363 for (key, right_value) in right {
361 364 match left.get(&key) {
362 365 None => {
363 366 left.insert(key, right_value);
364 367 }
365 368 Some(left_value) => match merge(&key, left_value, &right_value) {
366 369 MergeResult::Left => {}
367 370 MergeResult::Right => {
368 371 left.insert(key, right_value);
369 372 }
370 373 MergeResult::New(new_value) => {
371 374 left.insert(key, new_value);
372 375 }
373 376 },
374 377 }
375 378 }
376 379 left
377 380 }
378 381
379 382 /// Fallback when both maps are of similar size
380 383 fn ordmap_union_with_merge_by_diff<K, V>(
381 384 mut left: OrdMap<K, V>,
382 385 mut right: OrdMap<K, V>,
383 386 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
384 387 ) -> OrdMap<K, V>
385 388 where
386 389 K: Clone + Ord,
387 390 V: Clone + PartialEq,
388 391 {
389 392 // (key, value) pairs that would need to be inserted in either map
390 393 // in order to turn it into the union.
391 394 //
392 395 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
393 396 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
394 397 // with `left_updates` only borrowing from `right` and `right_updates` from
395 398 // `left`, and with `Cow::Owned` used for `MergeResult::New`.
396 399 //
397 400 // This would allow moving all `.clone()` calls to after we’ve decided
398 401 // which of `right_updates` or `left_updates` to use
399 402 // (value ones becoming `Cow::into_owned`),
400 403 // and avoid making clones we don’t end up using.
401 404 let mut left_updates = Vec::new();
402 405 let mut right_updates = Vec::new();
403 406
404 407 for difference in left.diff(&right) {
405 408 match difference {
406 409 DiffItem::Add(key, value) => {
407 410 left_updates.push((key.clone(), value.clone()))
408 411 }
409 412 DiffItem::Remove(key, value) => {
410 413 right_updates.push((key.clone(), value.clone()))
411 414 }
412 415 DiffItem::Update {
413 416 old: (key, left_value),
414 417 new: (_, right_value),
415 418 } => match merge(key, left_value, right_value) {
416 419 MergeResult::Left => {
417 420 right_updates.push((key.clone(), left_value.clone()))
418 421 }
419 422 MergeResult::Right => {
420 423 left_updates.push((key.clone(), right_value.clone()))
421 424 }
422 425 MergeResult::New(new_value) => {
423 426 left_updates.push((key.clone(), new_value.clone()));
424 427 right_updates.push((key.clone(), new_value))
425 428 }
426 429 },
427 430 }
428 431 }
429 432 if left_updates.len() < right_updates.len() {
430 433 for (key, value) in left_updates {
431 434 left.insert(key, value);
432 435 }
433 436 left
434 437 } else {
435 438 for (key, value) in right_updates {
436 439 right.insert(key, value);
437 440 }
438 441 right
439 442 }
440 443 }
441 444
442 445 /// Join items of the iterable with the given separator, similar to Python’s
443 446 /// `separator.join(iter)`.
444 447 ///
445 448 /// Formatting the return value consumes the iterator.
446 449 /// Formatting it again will produce an empty string.
447 450 pub fn join_display(
448 451 iter: impl IntoIterator<Item = impl fmt::Display>,
449 452 separator: impl fmt::Display,
450 453 ) -> impl fmt::Display {
451 454 JoinDisplay {
452 455 iter: Cell::new(Some(iter.into_iter())),
453 456 separator,
454 457 }
455 458 }
456 459
457 460 struct JoinDisplay<I, S> {
458 461 iter: Cell<Option<I>>,
459 462 separator: S,
460 463 }
461 464
462 465 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
463 466 where
464 467 I: Iterator<Item = T>,
465 468 T: fmt::Display,
466 469 S: fmt::Display,
467 470 {
468 471 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
469 472 if let Some(mut iter) = self.iter.take() {
470 473 if let Some(first) = iter.next() {
471 474 first.fmt(f)?;
472 475 }
473 476 for value in iter {
474 477 self.separator.fmt(f)?;
475 478 value.fmt(f)?;
476 479 }
477 480 }
478 481 Ok(())
479 482 }
480 483 }
481 484
482 485 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
483 486 ///
484 487 /// The callback is only called for incoming `Ok` values. Errors are passed
485 488 /// through as-is. In order to let it use the `?` operator the callback is
486 489 /// expected to return a `Result` of `Option`, instead of an `Option` of
487 490 /// `Result`.
488 491 pub fn filter_map_results<'a, I, F, A, B, E>(
489 492 iter: I,
490 493 f: F,
491 494 ) -> impl Iterator<Item = Result<B, E>> + 'a
492 495 where
493 496 I: Iterator<Item = Result<A, E>> + 'a,
494 497 F: Fn(A) -> Result<Option<B>, E> + 'a,
495 498 {
496 499 iter.filter_map(move |result| match result {
497 500 Ok(node) => f(node).transpose(),
498 501 Err(e) => Some(Err(e)),
499 502 })
500 503 }
501 504
505 /// Like `itertools::merge_join_by`, but merges fallible iterators.
506 ///
507 /// The callback is only used for Ok values. Errors are passed through as-is.
508 /// Errors compare less than Ok values, which makes the error handling
509 /// conservative.
510 pub fn merge_join_results_by<'a, I1, I2, F, A, B, E>(
511 iter1: I1,
512 iter2: I2,
513 f: F,
514 ) -> impl Iterator<Item = Result<EitherOrBoth<A, B>, E>> + 'a
515 where
516 I1: Iterator<Item = Result<A, E>> + 'a,
517 I2: Iterator<Item = Result<B, E>> + 'a,
518 F: FnMut(&A, &B) -> Ordering + 'a,
519 {
520 let mut g = f;
521 iter1
522 .merge_join_by(iter2, move |i1, i2| match i1 {
523 Err(_) => Ordering::Less,
524 Ok(i1) => match i2 {
525 Err(_) => Ordering::Greater,
526 Ok(i2) => g(i1, i2),
527 },
528 })
529 .map(|result| match result {
530 EitherOrBoth::Left(Err(e)) => Err(e),
531 EitherOrBoth::Right(Err(e)) => Err(e),
532 EitherOrBoth::Both(Err(e), _) => Err(e),
533 EitherOrBoth::Both(_, Err(e)) => Err(e),
534 EitherOrBoth::Left(Ok(v)) => Ok(EitherOrBoth::Left(v)),
535 EitherOrBoth::Right(Ok(v)) => Ok(EitherOrBoth::Right(v)),
536 EitherOrBoth::Both(Ok(v1), Ok(v2)) => {
537 Ok(EitherOrBoth::Both(v1, v2))
538 }
539 })
540 }
541
502 542 /// Force the global rayon threadpool to not exceed 16 concurrent threads
503 543 /// unless the user has specified a value.
504 544 /// This is a stop-gap measure until we figure out why using more than 16
505 545 /// threads makes `status` slower for each additional thread.
506 546 ///
507 547 /// TODO find the underlying cause and fix it, then remove this.
508 548 ///
509 549 /// # Errors
510 550 ///
511 551 /// Returns an error if the global threadpool has already been initialized if
512 552 /// we try to initialize it.
513 553 pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> {
514 554 const THREAD_CAP: usize = 16;
515 555
516 556 if std::env::var("RAYON_NUM_THREADS").is_err() {
517 557 let available_parallelism = std::thread::available_parallelism()
518 558 .map(usize::from)
519 559 .unwrap_or(1);
520 560 let new_thread_count = THREAD_CAP.min(available_parallelism);
521 561 let res = rayon::ThreadPoolBuilder::new()
522 562 .num_threads(new_thread_count)
523 563 .build_global();
524 564 if res.is_ok() {
525 565 log::trace!(
526 566 "Capped the rayon threadpool to {new_thread_count} threads",
527 567 );
528 568 }
529 569 return res;
530 570 }
531 571 Ok(())
532 572 }
General Comments 0
You need to be logged in to leave comments. Login now