Show More
@@ -1,532 +1,572 b'' | |||||
1 | // utils module |
|
1 | // utils module | |
2 | // |
|
2 | // | |
3 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> |
|
3 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
4 | // |
|
4 | // | |
5 | // This software may be used and distributed according to the terms of the |
|
5 | // This software may be used and distributed according to the terms of the | |
6 | // GNU General Public License version 2 or any later version. |
|
6 | // GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | //! Contains useful functions, traits, structs, etc. for use in core. |
|
8 | //! Contains useful functions, traits, structs, etc. for use in core. | |
9 |
|
9 | |||
10 | use crate::errors::{HgError, IoErrorContext}; |
|
10 | use crate::errors::{HgError, IoErrorContext}; | |
11 | use crate::utils::hg_path::HgPath; |
|
11 | use crate::utils::hg_path::HgPath; | |
12 | use im_rc::ordmap::DiffItem; |
|
12 | use im_rc::ordmap::DiffItem; | |
13 | use im_rc::ordmap::OrdMap; |
|
13 | use im_rc::ordmap::OrdMap; | |
|
14 | use itertools::EitherOrBoth; | |||
|
15 | use itertools::Itertools; | |||
14 | use std::cell::Cell; |
|
16 | use std::cell::Cell; | |
|
17 | use std::cmp::Ordering; | |||
15 | use std::fmt; |
|
18 | use std::fmt; | |
16 | use std::{io::Write, ops::Deref}; |
|
19 | use std::{io::Write, ops::Deref}; | |
17 |
|
20 | |||
18 | pub mod debug; |
|
21 | pub mod debug; | |
19 | pub mod files; |
|
22 | pub mod files; | |
20 | pub mod hg_path; |
|
23 | pub mod hg_path; | |
21 | pub mod path_auditor; |
|
24 | pub mod path_auditor; | |
22 |
|
25 | |||
23 | /// Useful until rust/issues/56345 is stable |
|
26 | /// Useful until rust/issues/56345 is stable | |
24 | /// |
|
27 | /// | |
25 | /// # Examples |
|
28 | /// # Examples | |
26 | /// |
|
29 | /// | |
27 | /// ``` |
|
30 | /// ``` | |
28 | /// use crate::hg::utils::find_slice_in_slice; |
|
31 | /// use crate::hg::utils::find_slice_in_slice; | |
29 | /// |
|
32 | /// | |
30 | /// let haystack = b"This is the haystack".to_vec(); |
|
33 | /// let haystack = b"This is the haystack".to_vec(); | |
31 | /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8)); |
|
34 | /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8)); | |
32 | /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None); |
|
35 | /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None); | |
33 | /// ``` |
|
36 | /// ``` | |
34 | pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize> |
|
37 | pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize> | |
35 | where |
|
38 | where | |
36 | for<'a> &'a [T]: PartialEq, |
|
39 | for<'a> &'a [T]: PartialEq, | |
37 | { |
|
40 | { | |
38 | slice |
|
41 | slice | |
39 | .windows(needle.len()) |
|
42 | .windows(needle.len()) | |
40 | .position(|window| window == needle) |
|
43 | .position(|window| window == needle) | |
41 | } |
|
44 | } | |
42 |
|
45 | |||
43 | /// Replaces the `from` slice with the `to` slice inside the `buf` slice. |
|
46 | /// Replaces the `from` slice with the `to` slice inside the `buf` slice. | |
44 | /// |
|
47 | /// | |
45 | /// # Examples |
|
48 | /// # Examples | |
46 | /// |
|
49 | /// | |
47 | /// ``` |
|
50 | /// ``` | |
48 | /// use crate::hg::utils::replace_slice; |
|
51 | /// use crate::hg::utils::replace_slice; | |
49 | /// let mut line = b"I hate writing tests!".to_vec(); |
|
52 | /// let mut line = b"I hate writing tests!".to_vec(); | |
50 | /// replace_slice(&mut line, b"hate", b"love"); |
|
53 | /// replace_slice(&mut line, b"hate", b"love"); | |
51 | /// assert_eq!( |
|
54 | /// assert_eq!( | |
52 | /// line, |
|
55 | /// line, | |
53 | /// b"I love writing tests!".to_vec() |
|
56 | /// b"I love writing tests!".to_vec() | |
54 | /// ); |
|
57 | /// ); | |
55 | /// ``` |
|
58 | /// ``` | |
56 | pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T]) |
|
59 | pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T]) | |
57 | where |
|
60 | where | |
58 | T: Clone + PartialEq, |
|
61 | T: Clone + PartialEq, | |
59 | { |
|
62 | { | |
60 | if buf.len() < from.len() || from.len() != to.len() { |
|
63 | if buf.len() < from.len() || from.len() != to.len() { | |
61 | return; |
|
64 | return; | |
62 | } |
|
65 | } | |
63 | for i in 0..=buf.len() - from.len() { |
|
66 | for i in 0..=buf.len() - from.len() { | |
64 | if buf[i..].starts_with(from) { |
|
67 | if buf[i..].starts_with(from) { | |
65 | buf[i..(i + from.len())].clone_from_slice(to); |
|
68 | buf[i..(i + from.len())].clone_from_slice(to); | |
66 | } |
|
69 | } | |
67 | } |
|
70 | } | |
68 | } |
|
71 | } | |
69 |
|
72 | |||
70 | pub trait SliceExt { |
|
73 | pub trait SliceExt { | |
71 | fn trim_end(&self) -> &Self; |
|
74 | fn trim_end(&self) -> &Self; | |
72 | fn trim_start(&self) -> &Self; |
|
75 | fn trim_start(&self) -> &Self; | |
73 | fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; |
|
76 | fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | |
74 | fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; |
|
77 | fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | |
75 | fn trim(&self) -> &Self; |
|
78 | fn trim(&self) -> &Self; | |
76 | fn drop_prefix(&self, needle: &Self) -> Option<&Self>; |
|
79 | fn drop_prefix(&self, needle: &Self) -> Option<&Self>; | |
77 | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; |
|
80 | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; | |
78 | fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>; |
|
81 | fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>; | |
79 | } |
|
82 | } | |
80 |
|
83 | |||
81 | impl SliceExt for [u8] { |
|
84 | impl SliceExt for [u8] { | |
82 | fn trim_end(&self) -> &[u8] { |
|
85 | fn trim_end(&self) -> &[u8] { | |
83 | self.trim_end_matches(|byte| byte.is_ascii_whitespace()) |
|
86 | self.trim_end_matches(|byte| byte.is_ascii_whitespace()) | |
84 | } |
|
87 | } | |
85 |
|
88 | |||
86 | fn trim_start(&self) -> &[u8] { |
|
89 | fn trim_start(&self) -> &[u8] { | |
87 | self.trim_start_matches(|byte| byte.is_ascii_whitespace()) |
|
90 | self.trim_start_matches(|byte| byte.is_ascii_whitespace()) | |
88 | } |
|
91 | } | |
89 |
|
92 | |||
90 | fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { |
|
93 | fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { | |
91 | if let Some(last) = self.iter().rposition(|&byte| !f(byte)) { |
|
94 | if let Some(last) = self.iter().rposition(|&byte| !f(byte)) { | |
92 | &self[..=last] |
|
95 | &self[..=last] | |
93 | } else { |
|
96 | } else { | |
94 | &[] |
|
97 | &[] | |
95 | } |
|
98 | } | |
96 | } |
|
99 | } | |
97 |
|
100 | |||
98 | fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { |
|
101 | fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { | |
99 | if let Some(first) = self.iter().position(|&byte| !f(byte)) { |
|
102 | if let Some(first) = self.iter().position(|&byte| !f(byte)) { | |
100 | &self[first..] |
|
103 | &self[first..] | |
101 | } else { |
|
104 | } else { | |
102 | &[] |
|
105 | &[] | |
103 | } |
|
106 | } | |
104 | } |
|
107 | } | |
105 |
|
108 | |||
106 | /// ``` |
|
109 | /// ``` | |
107 | /// use hg::utils::SliceExt; |
|
110 | /// use hg::utils::SliceExt; | |
108 | /// assert_eq!( |
|
111 | /// assert_eq!( | |
109 | /// b" to trim ".trim(), |
|
112 | /// b" to trim ".trim(), | |
110 | /// b"to trim" |
|
113 | /// b"to trim" | |
111 | /// ); |
|
114 | /// ); | |
112 | /// assert_eq!( |
|
115 | /// assert_eq!( | |
113 | /// b"to trim ".trim(), |
|
116 | /// b"to trim ".trim(), | |
114 | /// b"to trim" |
|
117 | /// b"to trim" | |
115 | /// ); |
|
118 | /// ); | |
116 | /// assert_eq!( |
|
119 | /// assert_eq!( | |
117 | /// b" to trim".trim(), |
|
120 | /// b" to trim".trim(), | |
118 | /// b"to trim" |
|
121 | /// b"to trim" | |
119 | /// ); |
|
122 | /// ); | |
120 | /// ``` |
|
123 | /// ``` | |
121 | fn trim(&self) -> &[u8] { |
|
124 | fn trim(&self) -> &[u8] { | |
122 | self.trim_start().trim_end() |
|
125 | self.trim_start().trim_end() | |
123 | } |
|
126 | } | |
124 |
|
127 | |||
125 | fn drop_prefix(&self, needle: &Self) -> Option<&Self> { |
|
128 | fn drop_prefix(&self, needle: &Self) -> Option<&Self> { | |
126 | if self.starts_with(needle) { |
|
129 | if self.starts_with(needle) { | |
127 | Some(&self[needle.len()..]) |
|
130 | Some(&self[needle.len()..]) | |
128 | } else { |
|
131 | } else { | |
129 | None |
|
132 | None | |
130 | } |
|
133 | } | |
131 | } |
|
134 | } | |
132 |
|
135 | |||
133 | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { |
|
136 | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { | |
134 | let mut iter = self.splitn(2, |&byte| byte == separator); |
|
137 | let mut iter = self.splitn(2, |&byte| byte == separator); | |
135 | let a = iter.next()?; |
|
138 | let a = iter.next()?; | |
136 | let b = iter.next()?; |
|
139 | let b = iter.next()?; | |
137 | Some((a, b)) |
|
140 | Some((a, b)) | |
138 | } |
|
141 | } | |
139 |
|
142 | |||
140 | fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> { |
|
143 | fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> { | |
141 | find_slice_in_slice(self, separator) |
|
144 | find_slice_in_slice(self, separator) | |
142 | .map(|pos| (&self[..pos], &self[pos + separator.len()..])) |
|
145 | .map(|pos| (&self[..pos], &self[pos + separator.len()..])) | |
143 | } |
|
146 | } | |
144 | } |
|
147 | } | |
145 |
|
148 | |||
146 | pub trait Escaped { |
|
149 | pub trait Escaped { | |
147 | /// Return bytes escaped for display to the user |
|
150 | /// Return bytes escaped for display to the user | |
148 | fn escaped_bytes(&self) -> Vec<u8>; |
|
151 | fn escaped_bytes(&self) -> Vec<u8>; | |
149 | } |
|
152 | } | |
150 |
|
153 | |||
151 | impl Escaped for u8 { |
|
154 | impl Escaped for u8 { | |
152 | fn escaped_bytes(&self) -> Vec<u8> { |
|
155 | fn escaped_bytes(&self) -> Vec<u8> { | |
153 | let mut acc = vec![]; |
|
156 | let mut acc = vec![]; | |
154 | match self { |
|
157 | match self { | |
155 | c @ b'\'' | c @ b'\\' => { |
|
158 | c @ b'\'' | c @ b'\\' => { | |
156 | acc.push(b'\\'); |
|
159 | acc.push(b'\\'); | |
157 | acc.push(*c); |
|
160 | acc.push(*c); | |
158 | } |
|
161 | } | |
159 | b'\t' => { |
|
162 | b'\t' => { | |
160 | acc.extend(br"\\t"); |
|
163 | acc.extend(br"\\t"); | |
161 | } |
|
164 | } | |
162 | b'\n' => { |
|
165 | b'\n' => { | |
163 | acc.extend(br"\\n"); |
|
166 | acc.extend(br"\\n"); | |
164 | } |
|
167 | } | |
165 | b'\r' => { |
|
168 | b'\r' => { | |
166 | acc.extend(br"\\r"); |
|
169 | acc.extend(br"\\r"); | |
167 | } |
|
170 | } | |
168 | c if (*c < b' ' || *c >= 127) => { |
|
171 | c if (*c < b' ' || *c >= 127) => { | |
169 | write!(acc, "\\x{:x}", self).unwrap(); |
|
172 | write!(acc, "\\x{:x}", self).unwrap(); | |
170 | } |
|
173 | } | |
171 | c => { |
|
174 | c => { | |
172 | acc.push(*c); |
|
175 | acc.push(*c); | |
173 | } |
|
176 | } | |
174 | } |
|
177 | } | |
175 | acc |
|
178 | acc | |
176 | } |
|
179 | } | |
177 | } |
|
180 | } | |
178 |
|
181 | |||
179 | impl<'a, T: Escaped> Escaped for &'a [T] { |
|
182 | impl<'a, T: Escaped> Escaped for &'a [T] { | |
180 | fn escaped_bytes(&self) -> Vec<u8> { |
|
183 | fn escaped_bytes(&self) -> Vec<u8> { | |
181 | self.iter().flat_map(Escaped::escaped_bytes).collect() |
|
184 | self.iter().flat_map(Escaped::escaped_bytes).collect() | |
182 | } |
|
185 | } | |
183 | } |
|
186 | } | |
184 |
|
187 | |||
185 | impl<T: Escaped> Escaped for Vec<T> { |
|
188 | impl<T: Escaped> Escaped for Vec<T> { | |
186 | fn escaped_bytes(&self) -> Vec<u8> { |
|
189 | fn escaped_bytes(&self) -> Vec<u8> { | |
187 | self.deref().escaped_bytes() |
|
190 | self.deref().escaped_bytes() | |
188 | } |
|
191 | } | |
189 | } |
|
192 | } | |
190 |
|
193 | |||
191 | impl<'a> Escaped for &'a HgPath { |
|
194 | impl<'a> Escaped for &'a HgPath { | |
192 | fn escaped_bytes(&self) -> Vec<u8> { |
|
195 | fn escaped_bytes(&self) -> Vec<u8> { | |
193 | self.as_bytes().escaped_bytes() |
|
196 | self.as_bytes().escaped_bytes() | |
194 | } |
|
197 | } | |
195 | } |
|
198 | } | |
196 |
|
199 | |||
197 | #[cfg(unix)] |
|
200 | #[cfg(unix)] | |
198 | pub fn shell_quote(value: &[u8]) -> Vec<u8> { |
|
201 | pub fn shell_quote(value: &[u8]) -> Vec<u8> { | |
199 | if value.iter().all(|&byte| { |
|
202 | if value.iter().all(|&byte| { | |
200 | matches!( |
|
203 | matches!( | |
201 | byte, |
|
204 | byte, | |
202 | b'a'..=b'z' |
|
205 | b'a'..=b'z' | |
203 | | b'A'..=b'Z' |
|
206 | | b'A'..=b'Z' | |
204 | | b'0'..=b'9' |
|
207 | | b'0'..=b'9' | |
205 | | b'.' |
|
208 | | b'.' | |
206 | | b'_' |
|
209 | | b'_' | |
207 | | b'/' |
|
210 | | b'/' | |
208 | | b'+' |
|
211 | | b'+' | |
209 | | b'-' |
|
212 | | b'-' | |
210 | ) |
|
213 | ) | |
211 | }) { |
|
214 | }) { | |
212 | value.to_owned() |
|
215 | value.to_owned() | |
213 | } else { |
|
216 | } else { | |
214 | let mut quoted = Vec::with_capacity(value.len() + 2); |
|
217 | let mut quoted = Vec::with_capacity(value.len() + 2); | |
215 | quoted.push(b'\''); |
|
218 | quoted.push(b'\''); | |
216 | for &byte in value { |
|
219 | for &byte in value { | |
217 | if byte == b'\'' { |
|
220 | if byte == b'\'' { | |
218 | quoted.push(b'\\'); |
|
221 | quoted.push(b'\\'); | |
219 | } |
|
222 | } | |
220 | quoted.push(byte); |
|
223 | quoted.push(byte); | |
221 | } |
|
224 | } | |
222 | quoted.push(b'\''); |
|
225 | quoted.push(b'\''); | |
223 | quoted |
|
226 | quoted | |
224 | } |
|
227 | } | |
225 | } |
|
228 | } | |
226 |
|
229 | |||
227 | pub fn current_dir() -> Result<std::path::PathBuf, HgError> { |
|
230 | pub fn current_dir() -> Result<std::path::PathBuf, HgError> { | |
228 | std::env::current_dir().map_err(|error| HgError::IoError { |
|
231 | std::env::current_dir().map_err(|error| HgError::IoError { | |
229 | error, |
|
232 | error, | |
230 | context: IoErrorContext::CurrentDir, |
|
233 | context: IoErrorContext::CurrentDir, | |
231 | }) |
|
234 | }) | |
232 | } |
|
235 | } | |
233 |
|
236 | |||
234 | pub fn current_exe() -> Result<std::path::PathBuf, HgError> { |
|
237 | pub fn current_exe() -> Result<std::path::PathBuf, HgError> { | |
235 | std::env::current_exe().map_err(|error| HgError::IoError { |
|
238 | std::env::current_exe().map_err(|error| HgError::IoError { | |
236 | error, |
|
239 | error, | |
237 | context: IoErrorContext::CurrentExe, |
|
240 | context: IoErrorContext::CurrentExe, | |
238 | }) |
|
241 | }) | |
239 | } |
|
242 | } | |
240 |
|
243 | |||
241 | /// Expand `$FOO` and `${FOO}` environment variables in the given byte string |
|
244 | /// Expand `$FOO` and `${FOO}` environment variables in the given byte string | |
242 | pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> { |
|
245 | pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> { | |
243 | lazy_static::lazy_static! { |
|
246 | lazy_static::lazy_static! { | |
244 | /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301 |
|
247 | /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301 | |
245 | /// The `x` makes whitespace ignored. |
|
248 | /// The `x` makes whitespace ignored. | |
246 | /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag. |
|
249 | /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag. | |
247 | static ref VAR_RE: regex::bytes::Regex = |
|
250 | static ref VAR_RE: regex::bytes::Regex = | |
248 | regex::bytes::Regex::new(r"(?x-u) |
|
251 | regex::bytes::Regex::new(r"(?x-u) | |
249 | \$ |
|
252 | \$ | |
250 | (?: |
|
253 | (?: | |
251 | (\w+) |
|
254 | (\w+) | |
252 | | |
|
255 | | | |
253 | \{ |
|
256 | \{ | |
254 | ([^}]*) |
|
257 | ([^}]*) | |
255 | \} |
|
258 | \} | |
256 | ) |
|
259 | ) | |
257 | ").unwrap(); |
|
260 | ").unwrap(); | |
258 | } |
|
261 | } | |
259 | VAR_RE.replace_all(s, |captures: ®ex::bytes::Captures| { |
|
262 | VAR_RE.replace_all(s, |captures: ®ex::bytes::Captures| { | |
260 | let var_name = files::get_os_str_from_bytes( |
|
263 | let var_name = files::get_os_str_from_bytes( | |
261 | captures |
|
264 | captures | |
262 | .get(1) |
|
265 | .get(1) | |
263 | .or_else(|| captures.get(2)) |
|
266 | .or_else(|| captures.get(2)) | |
264 | .expect("either side of `|` must participate in match") |
|
267 | .expect("either side of `|` must participate in match") | |
265 | .as_bytes(), |
|
268 | .as_bytes(), | |
266 | ); |
|
269 | ); | |
267 | std::env::var_os(var_name) |
|
270 | std::env::var_os(var_name) | |
268 | .map(files::get_bytes_from_os_str) |
|
271 | .map(files::get_bytes_from_os_str) | |
269 | .unwrap_or_else(|| { |
|
272 | .unwrap_or_else(|| { | |
270 | // Referencing an environment variable that does not exist. |
|
273 | // Referencing an environment variable that does not exist. | |
271 | // Leave the $FOO reference as-is. |
|
274 | // Leave the $FOO reference as-is. | |
272 | captures[0].to_owned() |
|
275 | captures[0].to_owned() | |
273 | }) |
|
276 | }) | |
274 | }) |
|
277 | }) | |
275 | } |
|
278 | } | |
276 |
|
279 | |||
277 | #[test] |
|
280 | #[test] | |
278 | fn test_expand_vars() { |
|
281 | fn test_expand_vars() { | |
279 | // Modifying process-global state in a test isn’t great, |
|
282 | // Modifying process-global state in a test isn’t great, | |
280 | // but hopefully this won’t collide with anything. |
|
283 | // but hopefully this won’t collide with anything. | |
281 | std::env::set_var("TEST_EXPAND_VAR", "1"); |
|
284 | std::env::set_var("TEST_EXPAND_VAR", "1"); | |
282 | assert_eq!( |
|
285 | assert_eq!( | |
283 | expand_vars(b"before/$TEST_EXPAND_VAR/after"), |
|
286 | expand_vars(b"before/$TEST_EXPAND_VAR/after"), | |
284 | &b"before/1/after"[..] |
|
287 | &b"before/1/after"[..] | |
285 | ); |
|
288 | ); | |
286 | assert_eq!( |
|
289 | assert_eq!( | |
287 | expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"), |
|
290 | expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"), | |
288 | &b"before111after"[..] |
|
291 | &b"before111after"[..] | |
289 | ); |
|
292 | ); | |
290 | let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after"; |
|
293 | let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after"; | |
291 | assert_eq!(expand_vars(s), &s[..]); |
|
294 | assert_eq!(expand_vars(s), &s[..]); | |
292 | } |
|
295 | } | |
293 |
|
296 | |||
294 | pub(crate) enum MergeResult<V> { |
|
297 | pub(crate) enum MergeResult<V> { | |
295 | Left, |
|
298 | Left, | |
296 | Right, |
|
299 | Right, | |
297 | New(V), |
|
300 | New(V), | |
298 | } |
|
301 | } | |
299 |
|
302 | |||
300 | /// Return the union of the two given maps, |
|
303 | /// Return the union of the two given maps, | |
301 | /// calling `merge(key, left_value, right_value)` to resolve keys that exist in |
|
304 | /// calling `merge(key, left_value, right_value)` to resolve keys that exist in | |
302 | /// both. |
|
305 | /// both. | |
303 | /// |
|
306 | /// | |
304 | /// CC <https://github.com/bodil/im-rs/issues/166> |
|
307 | /// CC <https://github.com/bodil/im-rs/issues/166> | |
305 | pub(crate) fn ordmap_union_with_merge<K, V>( |
|
308 | pub(crate) fn ordmap_union_with_merge<K, V>( | |
306 | left: OrdMap<K, V>, |
|
309 | left: OrdMap<K, V>, | |
307 | right: OrdMap<K, V>, |
|
310 | right: OrdMap<K, V>, | |
308 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, |
|
311 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |
309 | ) -> OrdMap<K, V> |
|
312 | ) -> OrdMap<K, V> | |
310 | where |
|
313 | where | |
311 | K: Clone + Ord, |
|
314 | K: Clone + Ord, | |
312 | V: Clone + PartialEq, |
|
315 | V: Clone + PartialEq, | |
313 | { |
|
316 | { | |
314 | if left.ptr_eq(&right) { |
|
317 | if left.ptr_eq(&right) { | |
315 | // One of the two maps is an unmodified clone of the other |
|
318 | // One of the two maps is an unmodified clone of the other | |
316 | left |
|
319 | left | |
317 | } else if left.len() / 2 > right.len() { |
|
320 | } else if left.len() / 2 > right.len() { | |
318 | // When two maps have different sizes, |
|
321 | // When two maps have different sizes, | |
319 | // their size difference is a lower bound on |
|
322 | // their size difference is a lower bound on | |
320 | // how many keys of the larger map are not also in the smaller map. |
|
323 | // how many keys of the larger map are not also in the smaller map. | |
321 | // This in turn is a lower bound on the number of differences in |
|
324 | // This in turn is a lower bound on the number of differences in | |
322 | // `OrdMap::diff` and the "amount of work" that would be done |
|
325 | // `OrdMap::diff` and the "amount of work" that would be done | |
323 | // by `ordmap_union_with_merge_by_diff`. |
|
326 | // by `ordmap_union_with_merge_by_diff`. | |
324 | // |
|
327 | // | |
325 | // Here `left` is more than twice the size of `right`, |
|
328 | // Here `left` is more than twice the size of `right`, | |
326 | // so the number of differences is more than the total size of |
|
329 | // so the number of differences is more than the total size of | |
327 | // `right`. Therefore an algorithm based on iterating `right` |
|
330 | // `right`. Therefore an algorithm based on iterating `right` | |
328 | // is more efficient. |
|
331 | // is more efficient. | |
329 | // |
|
332 | // | |
330 | // This helps a lot when a tiny (or empty) map is merged |
|
333 | // This helps a lot when a tiny (or empty) map is merged | |
331 | // with a large one. |
|
334 | // with a large one. | |
332 | ordmap_union_with_merge_by_iter(left, right, merge) |
|
335 | ordmap_union_with_merge_by_iter(left, right, merge) | |
333 | } else if left.len() < right.len() / 2 { |
|
336 | } else if left.len() < right.len() / 2 { | |
334 | // Same as above but with `left` and `right` swapped |
|
337 | // Same as above but with `left` and `right` swapped | |
335 | ordmap_union_with_merge_by_iter(right, left, |key, a, b| { |
|
338 | ordmap_union_with_merge_by_iter(right, left, |key, a, b| { | |
336 | // Also swapped in `merge` arguments: |
|
339 | // Also swapped in `merge` arguments: | |
337 | match merge(key, b, a) { |
|
340 | match merge(key, b, a) { | |
338 | MergeResult::New(v) => MergeResult::New(v), |
|
341 | MergeResult::New(v) => MergeResult::New(v), | |
339 | // … and swap back in `merge` result: |
|
342 | // … and swap back in `merge` result: | |
340 | MergeResult::Left => MergeResult::Right, |
|
343 | MergeResult::Left => MergeResult::Right, | |
341 | MergeResult::Right => MergeResult::Left, |
|
344 | MergeResult::Right => MergeResult::Left, | |
342 | } |
|
345 | } | |
343 | }) |
|
346 | }) | |
344 | } else { |
|
347 | } else { | |
345 | // For maps of similar size, use the algorithm based on `OrdMap::diff` |
|
348 | // For maps of similar size, use the algorithm based on `OrdMap::diff` | |
346 | ordmap_union_with_merge_by_diff(left, right, merge) |
|
349 | ordmap_union_with_merge_by_diff(left, right, merge) | |
347 | } |
|
350 | } | |
348 | } |
|
351 | } | |
349 |
|
352 | |||
350 | /// Efficient if `right` is much smaller than `left` |
|
353 | /// Efficient if `right` is much smaller than `left` | |
351 | fn ordmap_union_with_merge_by_iter<K, V>( |
|
354 | fn ordmap_union_with_merge_by_iter<K, V>( | |
352 | mut left: OrdMap<K, V>, |
|
355 | mut left: OrdMap<K, V>, | |
353 | right: OrdMap<K, V>, |
|
356 | right: OrdMap<K, V>, | |
354 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, |
|
357 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |
355 | ) -> OrdMap<K, V> |
|
358 | ) -> OrdMap<K, V> | |
356 | where |
|
359 | where | |
357 | K: Clone + Ord, |
|
360 | K: Clone + Ord, | |
358 | V: Clone, |
|
361 | V: Clone, | |
359 | { |
|
362 | { | |
360 | for (key, right_value) in right { |
|
363 | for (key, right_value) in right { | |
361 | match left.get(&key) { |
|
364 | match left.get(&key) { | |
362 | None => { |
|
365 | None => { | |
363 | left.insert(key, right_value); |
|
366 | left.insert(key, right_value); | |
364 | } |
|
367 | } | |
365 | Some(left_value) => match merge(&key, left_value, &right_value) { |
|
368 | Some(left_value) => match merge(&key, left_value, &right_value) { | |
366 | MergeResult::Left => {} |
|
369 | MergeResult::Left => {} | |
367 | MergeResult::Right => { |
|
370 | MergeResult::Right => { | |
368 | left.insert(key, right_value); |
|
371 | left.insert(key, right_value); | |
369 | } |
|
372 | } | |
370 | MergeResult::New(new_value) => { |
|
373 | MergeResult::New(new_value) => { | |
371 | left.insert(key, new_value); |
|
374 | left.insert(key, new_value); | |
372 | } |
|
375 | } | |
373 | }, |
|
376 | }, | |
374 | } |
|
377 | } | |
375 | } |
|
378 | } | |
376 | left |
|
379 | left | |
377 | } |
|
380 | } | |
378 |
|
381 | |||
379 | /// Fallback when both maps are of similar size |
|
382 | /// Fallback when both maps are of similar size | |
380 | fn ordmap_union_with_merge_by_diff<K, V>( |
|
383 | fn ordmap_union_with_merge_by_diff<K, V>( | |
381 | mut left: OrdMap<K, V>, |
|
384 | mut left: OrdMap<K, V>, | |
382 | mut right: OrdMap<K, V>, |
|
385 | mut right: OrdMap<K, V>, | |
383 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, |
|
386 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, | |
384 | ) -> OrdMap<K, V> |
|
387 | ) -> OrdMap<K, V> | |
385 | where |
|
388 | where | |
386 | K: Clone + Ord, |
|
389 | K: Clone + Ord, | |
387 | V: Clone + PartialEq, |
|
390 | V: Clone + PartialEq, | |
388 | { |
|
391 | { | |
389 | // (key, value) pairs that would need to be inserted in either map |
|
392 | // (key, value) pairs that would need to be inserted in either map | |
390 | // in order to turn it into the union. |
|
393 | // in order to turn it into the union. | |
391 | // |
|
394 | // | |
392 | // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, |
|
395 | // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, | |
393 | // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>` |
|
396 | // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>` | |
394 | // with `left_updates` only borrowing from `right` and `right_updates` from |
|
397 | // with `left_updates` only borrowing from `right` and `right_updates` from | |
395 | // `left`, and with `Cow::Owned` used for `MergeResult::New`. |
|
398 | // `left`, and with `Cow::Owned` used for `MergeResult::New`. | |
396 | // |
|
399 | // | |
397 | // This would allow moving all `.clone()` calls to after we’ve decided |
|
400 | // This would allow moving all `.clone()` calls to after we’ve decided | |
398 | // which of `right_updates` or `left_updates` to use |
|
401 | // which of `right_updates` or `left_updates` to use | |
399 | // (value ones becoming `Cow::into_owned`), |
|
402 | // (value ones becoming `Cow::into_owned`), | |
400 | // and avoid making clones we don’t end up using. |
|
403 | // and avoid making clones we don’t end up using. | |
401 | let mut left_updates = Vec::new(); |
|
404 | let mut left_updates = Vec::new(); | |
402 | let mut right_updates = Vec::new(); |
|
405 | let mut right_updates = Vec::new(); | |
403 |
|
406 | |||
404 | for difference in left.diff(&right) { |
|
407 | for difference in left.diff(&right) { | |
405 | match difference { |
|
408 | match difference { | |
406 | DiffItem::Add(key, value) => { |
|
409 | DiffItem::Add(key, value) => { | |
407 | left_updates.push((key.clone(), value.clone())) |
|
410 | left_updates.push((key.clone(), value.clone())) | |
408 | } |
|
411 | } | |
409 | DiffItem::Remove(key, value) => { |
|
412 | DiffItem::Remove(key, value) => { | |
410 | right_updates.push((key.clone(), value.clone())) |
|
413 | right_updates.push((key.clone(), value.clone())) | |
411 | } |
|
414 | } | |
412 | DiffItem::Update { |
|
415 | DiffItem::Update { | |
413 | old: (key, left_value), |
|
416 | old: (key, left_value), | |
414 | new: (_, right_value), |
|
417 | new: (_, right_value), | |
415 | } => match merge(key, left_value, right_value) { |
|
418 | } => match merge(key, left_value, right_value) { | |
416 | MergeResult::Left => { |
|
419 | MergeResult::Left => { | |
417 | right_updates.push((key.clone(), left_value.clone())) |
|
420 | right_updates.push((key.clone(), left_value.clone())) | |
418 | } |
|
421 | } | |
419 | MergeResult::Right => { |
|
422 | MergeResult::Right => { | |
420 | left_updates.push((key.clone(), right_value.clone())) |
|
423 | left_updates.push((key.clone(), right_value.clone())) | |
421 | } |
|
424 | } | |
422 | MergeResult::New(new_value) => { |
|
425 | MergeResult::New(new_value) => { | |
423 | left_updates.push((key.clone(), new_value.clone())); |
|
426 | left_updates.push((key.clone(), new_value.clone())); | |
424 | right_updates.push((key.clone(), new_value)) |
|
427 | right_updates.push((key.clone(), new_value)) | |
425 | } |
|
428 | } | |
426 | }, |
|
429 | }, | |
427 | } |
|
430 | } | |
428 | } |
|
431 | } | |
429 | if left_updates.len() < right_updates.len() { |
|
432 | if left_updates.len() < right_updates.len() { | |
430 | for (key, value) in left_updates { |
|
433 | for (key, value) in left_updates { | |
431 | left.insert(key, value); |
|
434 | left.insert(key, value); | |
432 | } |
|
435 | } | |
433 | left |
|
436 | left | |
434 | } else { |
|
437 | } else { | |
435 | for (key, value) in right_updates { |
|
438 | for (key, value) in right_updates { | |
436 | right.insert(key, value); |
|
439 | right.insert(key, value); | |
437 | } |
|
440 | } | |
438 | right |
|
441 | right | |
439 | } |
|
442 | } | |
440 | } |
|
443 | } | |
441 |
|
444 | |||
442 | /// Join items of the iterable with the given separator, similar to Python’s |
|
445 | /// Join items of the iterable with the given separator, similar to Python’s | |
443 | /// `separator.join(iter)`. |
|
446 | /// `separator.join(iter)`. | |
444 | /// |
|
447 | /// | |
445 | /// Formatting the return value consumes the iterator. |
|
448 | /// Formatting the return value consumes the iterator. | |
446 | /// Formatting it again will produce an empty string. |
|
449 | /// Formatting it again will produce an empty string. | |
447 | pub fn join_display( |
|
450 | pub fn join_display( | |
448 | iter: impl IntoIterator<Item = impl fmt::Display>, |
|
451 | iter: impl IntoIterator<Item = impl fmt::Display>, | |
449 | separator: impl fmt::Display, |
|
452 | separator: impl fmt::Display, | |
450 | ) -> impl fmt::Display { |
|
453 | ) -> impl fmt::Display { | |
451 | JoinDisplay { |
|
454 | JoinDisplay { | |
452 | iter: Cell::new(Some(iter.into_iter())), |
|
455 | iter: Cell::new(Some(iter.into_iter())), | |
453 | separator, |
|
456 | separator, | |
454 | } |
|
457 | } | |
455 | } |
|
458 | } | |
456 |
|
459 | |||
457 | struct JoinDisplay<I, S> { |
|
460 | struct JoinDisplay<I, S> { | |
458 | iter: Cell<Option<I>>, |
|
461 | iter: Cell<Option<I>>, | |
459 | separator: S, |
|
462 | separator: S, | |
460 | } |
|
463 | } | |
461 |
|
464 | |||
462 | impl<I, T, S> fmt::Display for JoinDisplay<I, S> |
|
465 | impl<I, T, S> fmt::Display for JoinDisplay<I, S> | |
463 | where |
|
466 | where | |
464 | I: Iterator<Item = T>, |
|
467 | I: Iterator<Item = T>, | |
465 | T: fmt::Display, |
|
468 | T: fmt::Display, | |
466 | S: fmt::Display, |
|
469 | S: fmt::Display, | |
467 | { |
|
470 | { | |
468 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
|
471 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
469 | if let Some(mut iter) = self.iter.take() { |
|
472 | if let Some(mut iter) = self.iter.take() { | |
470 | if let Some(first) = iter.next() { |
|
473 | if let Some(first) = iter.next() { | |
471 | first.fmt(f)?; |
|
474 | first.fmt(f)?; | |
472 | } |
|
475 | } | |
473 | for value in iter { |
|
476 | for value in iter { | |
474 | self.separator.fmt(f)?; |
|
477 | self.separator.fmt(f)?; | |
475 | value.fmt(f)?; |
|
478 | value.fmt(f)?; | |
476 | } |
|
479 | } | |
477 | } |
|
480 | } | |
478 | Ok(()) |
|
481 | Ok(()) | |
479 | } |
|
482 | } | |
480 | } |
|
483 | } | |
481 |
|
484 | |||
482 | /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. |
|
485 | /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. | |
483 | /// |
|
486 | /// | |
484 | /// The callback is only called for incoming `Ok` values. Errors are passed |
|
487 | /// The callback is only called for incoming `Ok` values. Errors are passed | |
485 | /// through as-is. In order to let it use the `?` operator the callback is |
|
488 | /// through as-is. In order to let it use the `?` operator the callback is | |
486 | /// expected to return a `Result` of `Option`, instead of an `Option` of |
|
489 | /// expected to return a `Result` of `Option`, instead of an `Option` of | |
487 | /// `Result`. |
|
490 | /// `Result`. | |
488 | pub fn filter_map_results<'a, I, F, A, B, E>( |
|
491 | pub fn filter_map_results<'a, I, F, A, B, E>( | |
489 | iter: I, |
|
492 | iter: I, | |
490 | f: F, |
|
493 | f: F, | |
491 | ) -> impl Iterator<Item = Result<B, E>> + 'a |
|
494 | ) -> impl Iterator<Item = Result<B, E>> + 'a | |
492 | where |
|
495 | where | |
493 | I: Iterator<Item = Result<A, E>> + 'a, |
|
496 | I: Iterator<Item = Result<A, E>> + 'a, | |
494 | F: Fn(A) -> Result<Option<B>, E> + 'a, |
|
497 | F: Fn(A) -> Result<Option<B>, E> + 'a, | |
495 | { |
|
498 | { | |
496 | iter.filter_map(move |result| match result { |
|
499 | iter.filter_map(move |result| match result { | |
497 | Ok(node) => f(node).transpose(), |
|
500 | Ok(node) => f(node).transpose(), | |
498 | Err(e) => Some(Err(e)), |
|
501 | Err(e) => Some(Err(e)), | |
499 | }) |
|
502 | }) | |
500 | } |
|
503 | } | |
501 |
|
504 | |||
|
505 | /// Like `itertools::merge_join_by`, but merges fallible iterators. | |||
|
506 | /// | |||
|
507 | /// The callback is only used for Ok values. Errors are passed through as-is. | |||
|
508 | /// Errors compare less than Ok values, which makes the error handling | |||
|
509 | /// conservative. | |||
|
510 | pub fn merge_join_results_by<'a, I1, I2, F, A, B, E>( | |||
|
511 | iter1: I1, | |||
|
512 | iter2: I2, | |||
|
513 | f: F, | |||
|
514 | ) -> impl Iterator<Item = Result<EitherOrBoth<A, B>, E>> + 'a | |||
|
515 | where | |||
|
516 | I1: Iterator<Item = Result<A, E>> + 'a, | |||
|
517 | I2: Iterator<Item = Result<B, E>> + 'a, | |||
|
518 | F: FnMut(&A, &B) -> Ordering + 'a, | |||
|
519 | { | |||
|
520 | let mut g = f; | |||
|
521 | iter1 | |||
|
522 | .merge_join_by(iter2, move |i1, i2| match i1 { | |||
|
523 | Err(_) => Ordering::Less, | |||
|
524 | Ok(i1) => match i2 { | |||
|
525 | Err(_) => Ordering::Greater, | |||
|
526 | Ok(i2) => g(i1, i2), | |||
|
527 | }, | |||
|
528 | }) | |||
|
529 | .map(|result| match result { | |||
|
530 | EitherOrBoth::Left(Err(e)) => Err(e), | |||
|
531 | EitherOrBoth::Right(Err(e)) => Err(e), | |||
|
532 | EitherOrBoth::Both(Err(e), _) => Err(e), | |||
|
533 | EitherOrBoth::Both(_, Err(e)) => Err(e), | |||
|
534 | EitherOrBoth::Left(Ok(v)) => Ok(EitherOrBoth::Left(v)), | |||
|
535 | EitherOrBoth::Right(Ok(v)) => Ok(EitherOrBoth::Right(v)), | |||
|
536 | EitherOrBoth::Both(Ok(v1), Ok(v2)) => { | |||
|
537 | Ok(EitherOrBoth::Both(v1, v2)) | |||
|
538 | } | |||
|
539 | }) | |||
|
540 | } | |||
|
541 | ||||
502 | /// Force the global rayon threadpool to not exceed 16 concurrent threads |
|
542 | /// Force the global rayon threadpool to not exceed 16 concurrent threads | |
503 | /// unless the user has specified a value. |
|
543 | /// unless the user has specified a value. | |
504 | /// This is a stop-gap measure until we figure out why using more than 16 |
|
544 | /// This is a stop-gap measure until we figure out why using more than 16 | |
505 | /// threads makes `status` slower for each additional thread. |
|
545 | /// threads makes `status` slower for each additional thread. | |
506 | /// |
|
546 | /// | |
507 | /// TODO find the underlying cause and fix it, then remove this. |
|
547 | /// TODO find the underlying cause and fix it, then remove this. | |
508 | /// |
|
548 | /// | |
509 | /// # Errors |
|
549 | /// # Errors | |
510 | /// |
|
550 | /// | |
511 | /// Returns an error if the global threadpool has already been initialized if |
|
551 | /// Returns an error if the global threadpool has already been initialized if | |
512 | /// we try to initialize it. |
|
552 | /// we try to initialize it. | |
513 | pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> { |
|
553 | pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> { | |
514 | const THREAD_CAP: usize = 16; |
|
554 | const THREAD_CAP: usize = 16; | |
515 |
|
555 | |||
516 | if std::env::var("RAYON_NUM_THREADS").is_err() { |
|
556 | if std::env::var("RAYON_NUM_THREADS").is_err() { | |
517 | let available_parallelism = std::thread::available_parallelism() |
|
557 | let available_parallelism = std::thread::available_parallelism() | |
518 | .map(usize::from) |
|
558 | .map(usize::from) | |
519 | .unwrap_or(1); |
|
559 | .unwrap_or(1); | |
520 | let new_thread_count = THREAD_CAP.min(available_parallelism); |
|
560 | let new_thread_count = THREAD_CAP.min(available_parallelism); | |
521 | let res = rayon::ThreadPoolBuilder::new() |
|
561 | let res = rayon::ThreadPoolBuilder::new() | |
522 | .num_threads(new_thread_count) |
|
562 | .num_threads(new_thread_count) | |
523 | .build_global(); |
|
563 | .build_global(); | |
524 | if res.is_ok() { |
|
564 | if res.is_ok() { | |
525 | log::trace!( |
|
565 | log::trace!( | |
526 | "Capped the rayon threadpool to {new_thread_count} threads", |
|
566 | "Capped the rayon threadpool to {new_thread_count} threads", | |
527 | ); |
|
567 | ); | |
528 | } |
|
568 | } | |
529 | return res; |
|
569 | return res; | |
530 | } |
|
570 | } | |
531 | Ok(()) |
|
571 | Ok(()) | |
532 | } |
|
572 | } |
General Comments 0
You need to be logged in to leave comments.
Login now