##// END OF EJS Templates
rust: Generalize the `trim_end_newlines` utility of byte strings...
Simon Sapin -
r48761:696abab1 default
parent child Browse files
Show More
@@ -1,315 +1,316
1 1 use crate::config::{Config, ConfigError, ConfigParseError};
2 2 use crate::errors::{HgError, IoErrorContext, IoResultExt};
3 3 use crate::exit_codes;
4 4 use crate::requirements;
5 5 use crate::utils::files::get_path_from_bytes;
6 6 use crate::utils::SliceExt;
7 7 use memmap::{Mmap, MmapOptions};
8 8 use std::collections::HashSet;
9 9 use std::io::ErrorKind;
10 10 use std::path::{Path, PathBuf};
11 11
12 12 /// A repository on disk
13 13 pub struct Repo {
14 14 working_directory: PathBuf,
15 15 dot_hg: PathBuf,
16 16 store: PathBuf,
17 17 requirements: HashSet<String>,
18 18 config: Config,
19 19 }
20 20
21 21 #[derive(Debug, derive_more::From)]
22 22 pub enum RepoError {
23 23 NotFound {
24 24 at: PathBuf,
25 25 },
26 26 #[from]
27 27 ConfigParseError(ConfigParseError),
28 28 #[from]
29 29 Other(HgError),
30 30 }
31 31
32 32 impl From<ConfigError> for RepoError {
33 33 fn from(error: ConfigError) -> Self {
34 34 match error {
35 35 ConfigError::Parse(error) => error.into(),
36 36 ConfigError::Other(error) => error.into(),
37 37 }
38 38 }
39 39 }
40 40
41 41 /// Filesystem access abstraction for the contents of a given "base" diretory
42 42 #[derive(Clone, Copy)]
43 43 pub struct Vfs<'a> {
44 44 pub(crate) base: &'a Path,
45 45 }
46 46
47 47 impl Repo {
48 48 /// tries to find nearest repository root in current working directory or
49 49 /// its ancestors
50 50 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
51 51 let current_directory = crate::utils::current_dir()?;
52 52 // ancestors() is inclusive: it first yields `current_directory`
53 53 // as-is.
54 54 for ancestor in current_directory.ancestors() {
55 55 if is_dir(ancestor.join(".hg"))? {
56 56 return Ok(ancestor.to_path_buf());
57 57 }
58 58 }
59 59 return Err(RepoError::NotFound {
60 60 at: current_directory,
61 61 });
62 62 }
63 63
64 64 /// Find a repository, either at the given path (which must contain a `.hg`
65 65 /// sub-directory) or by searching the current directory and its
66 66 /// ancestors.
67 67 ///
68 68 /// A method with two very different "modes" like this usually a code smell
69 69 /// to make two methods instead, but in this case an `Option` is what rhg
70 70 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
71 71 /// Having two methods would just move that `if` to almost all callers.
72 72 pub fn find(
73 73 config: &Config,
74 74 explicit_path: Option<PathBuf>,
75 75 ) -> Result<Self, RepoError> {
76 76 if let Some(root) = explicit_path {
77 77 if is_dir(root.join(".hg"))? {
78 78 Self::new_at_path(root.to_owned(), config)
79 79 } else if is_file(&root)? {
80 80 Err(HgError::unsupported("bundle repository").into())
81 81 } else {
82 82 Err(RepoError::NotFound {
83 83 at: root.to_owned(),
84 84 })
85 85 }
86 86 } else {
87 87 let root = Self::find_repo_root()?;
88 88 Self::new_at_path(root, config)
89 89 }
90 90 }
91 91
92 92 /// To be called after checking that `.hg` is a sub-directory
93 93 fn new_at_path(
94 94 working_directory: PathBuf,
95 95 config: &Config,
96 96 ) -> Result<Self, RepoError> {
97 97 let dot_hg = working_directory.join(".hg");
98 98
99 99 let mut repo_config_files = Vec::new();
100 100 repo_config_files.push(dot_hg.join("hgrc"));
101 101 repo_config_files.push(dot_hg.join("hgrc-not-shared"));
102 102
103 103 let hg_vfs = Vfs { base: &dot_hg };
104 104 let mut reqs = requirements::load_if_exists(hg_vfs)?;
105 105 let relative =
106 106 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
107 107 let shared =
108 108 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
109 109
110 110 // From `mercurial/localrepo.py`:
111 111 //
112 112 // if .hg/requires contains the sharesafe requirement, it means
113 113 // there exists a `.hg/store/requires` too and we should read it
114 114 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
115 115 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
116 116 // is not present, refer checkrequirementscompat() for that
117 117 //
118 118 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
119 119 // repository was shared the old way. We check the share source
120 120 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
121 121 // current repository needs to be reshared
122 122 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
123 123
124 124 let store_path;
125 125 if !shared {
126 126 store_path = dot_hg.join("store");
127 127 } else {
128 128 let bytes = hg_vfs.read("sharedpath")?;
129 129 let mut shared_path =
130 get_path_from_bytes(bytes.trim_end_newlines()).to_owned();
130 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
131 .to_owned();
131 132 if relative {
132 133 shared_path = dot_hg.join(shared_path)
133 134 }
134 135 if !is_dir(&shared_path)? {
135 136 return Err(HgError::corrupted(format!(
136 137 ".hg/sharedpath points to nonexistent directory {}",
137 138 shared_path.display()
138 139 ))
139 140 .into());
140 141 }
141 142
142 143 store_path = shared_path.join("store");
143 144
144 145 let source_is_share_safe =
145 146 requirements::load(Vfs { base: &shared_path })?
146 147 .contains(requirements::SHARESAFE_REQUIREMENT);
147 148
148 149 if share_safe && !source_is_share_safe {
149 150 return Err(match config
150 151 .get(b"share", b"safe-mismatch.source-not-safe")
151 152 {
152 153 Some(b"abort") | None => HgError::abort(
153 154 "abort: share source does not support share-safe requirement\n\
154 155 (see `hg help config.format.use-share-safe` for more information)",
155 156 exit_codes::ABORT,
156 157 ),
157 158 _ => HgError::unsupported("share-safe downgrade"),
158 159 }
159 160 .into());
160 161 } else if source_is_share_safe && !share_safe {
161 162 return Err(
162 163 match config.get(b"share", b"safe-mismatch.source-safe") {
163 164 Some(b"abort") | None => HgError::abort(
164 165 "abort: version mismatch: source uses share-safe \
165 166 functionality while the current share does not\n\
166 167 (see `hg help config.format.use-share-safe` for more information)",
167 168 exit_codes::ABORT,
168 169 ),
169 170 _ => HgError::unsupported("share-safe upgrade"),
170 171 }
171 172 .into(),
172 173 );
173 174 }
174 175
175 176 if share_safe {
176 177 repo_config_files.insert(0, shared_path.join("hgrc"))
177 178 }
178 179 }
179 180 if share_safe {
180 181 reqs.extend(requirements::load(Vfs { base: &store_path })?);
181 182 }
182 183
183 184 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
184 185 config.combine_with_repo(&repo_config_files)?
185 186 } else {
186 187 config.clone()
187 188 };
188 189
189 190 let repo = Self {
190 191 requirements: reqs,
191 192 working_directory,
192 193 store: store_path,
193 194 dot_hg,
194 195 config: repo_config,
195 196 };
196 197
197 198 requirements::check(&repo)?;
198 199
199 200 Ok(repo)
200 201 }
201 202
202 203 pub fn working_directory_path(&self) -> &Path {
203 204 &self.working_directory
204 205 }
205 206
206 207 pub fn requirements(&self) -> &HashSet<String> {
207 208 &self.requirements
208 209 }
209 210
210 211 pub fn config(&self) -> &Config {
211 212 &self.config
212 213 }
213 214
214 215 /// For accessing repository files (in `.hg`), except for the store
215 216 /// (`.hg/store`).
216 217 pub fn hg_vfs(&self) -> Vfs<'_> {
217 218 Vfs { base: &self.dot_hg }
218 219 }
219 220
220 221 /// For accessing repository store files (in `.hg/store`)
221 222 pub fn store_vfs(&self) -> Vfs<'_> {
222 223 Vfs { base: &self.store }
223 224 }
224 225
225 226 /// For accessing the working copy
226 227 pub fn working_directory_vfs(&self) -> Vfs<'_> {
227 228 Vfs {
228 229 base: &self.working_directory,
229 230 }
230 231 }
231 232
232 233 pub fn has_dirstate_v2(&self) -> bool {
233 234 self.requirements
234 235 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
235 236 }
236 237
237 238 pub fn dirstate_parents(
238 239 &self,
239 240 ) -> Result<crate::dirstate::DirstateParents, HgError> {
240 241 let dirstate = self.hg_vfs().mmap_open("dirstate")?;
241 242 if dirstate.is_empty() {
242 243 return Ok(crate::dirstate::DirstateParents::NULL);
243 244 }
244 245 let parents = if self.has_dirstate_v2() {
245 246 crate::dirstate_tree::on_disk::read_docket(&dirstate)?.parents()
246 247 } else {
247 248 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
248 249 .clone()
249 250 };
250 251 Ok(parents)
251 252 }
252 253 }
253 254
254 255 impl Vfs<'_> {
255 256 pub fn join(&self, relative_path: impl AsRef<Path>) -> PathBuf {
256 257 self.base.join(relative_path)
257 258 }
258 259
259 260 pub fn read(
260 261 &self,
261 262 relative_path: impl AsRef<Path>,
262 263 ) -> Result<Vec<u8>, HgError> {
263 264 let path = self.join(relative_path);
264 265 std::fs::read(&path).when_reading_file(&path)
265 266 }
266 267
267 268 pub fn mmap_open(
268 269 &self,
269 270 relative_path: impl AsRef<Path>,
270 271 ) -> Result<Mmap, HgError> {
271 272 let path = self.base.join(relative_path);
272 273 let file = std::fs::File::open(&path).when_reading_file(&path)?;
273 274 // TODO: what are the safety requirements here?
274 275 let mmap = unsafe { MmapOptions::new().map(&file) }
275 276 .when_reading_file(&path)?;
276 277 Ok(mmap)
277 278 }
278 279
279 280 pub fn rename(
280 281 &self,
281 282 relative_from: impl AsRef<Path>,
282 283 relative_to: impl AsRef<Path>,
283 284 ) -> Result<(), HgError> {
284 285 let from = self.join(relative_from);
285 286 let to = self.join(relative_to);
286 287 std::fs::rename(&from, &to)
287 288 .with_context(|| IoErrorContext::RenamingFile { from, to })
288 289 }
289 290 }
290 291
291 292 fn fs_metadata(
292 293 path: impl AsRef<Path>,
293 294 ) -> Result<Option<std::fs::Metadata>, HgError> {
294 295 let path = path.as_ref();
295 296 match std::fs::metadata(path) {
296 297 Ok(meta) => Ok(Some(meta)),
297 298 Err(error) => match error.kind() {
298 299 // TODO: when we require a Rust version where `NotADirectory` is
299 300 // stable, invert this logic and return None for it and `NotFound`
300 301 // and propagate any other error.
301 302 ErrorKind::PermissionDenied => Err(error).with_context(|| {
302 303 IoErrorContext::ReadingMetadata(path.to_owned())
303 304 }),
304 305 _ => Ok(None),
305 306 },
306 307 }
307 308 }
308 309
309 310 fn is_dir(path: impl AsRef<Path>) -> Result<bool, HgError> {
310 311 Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_dir()))
311 312 }
312 313
313 314 fn is_file(path: impl AsRef<Path>) -> Result<bool, HgError> {
314 315 Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_file()))
315 316 }
@@ -1,483 +1,481
1 1 // utils module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 use crate::errors::{HgError, IoErrorContext};
11 11 use crate::utils::hg_path::HgPath;
12 12 use im_rc::ordmap::DiffItem;
13 13 use im_rc::ordmap::OrdMap;
14 14 use std::cell::Cell;
15 15 use std::fmt;
16 16 use std::{io::Write, ops::Deref};
17 17
18 18 pub mod files;
19 19 pub mod hg_path;
20 20 pub mod path_auditor;
21 21
22 22 /// Useful until rust/issues/56345 is stable
23 23 ///
24 24 /// # Examples
25 25 ///
26 26 /// ```
27 27 /// use crate::hg::utils::find_slice_in_slice;
28 28 ///
29 29 /// let haystack = b"This is the haystack".to_vec();
30 30 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
31 31 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
32 32 /// ```
33 33 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
34 34 where
35 35 for<'a> &'a [T]: PartialEq,
36 36 {
37 37 slice
38 38 .windows(needle.len())
39 39 .position(|window| window == needle)
40 40 }
41 41
42 42 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
43 43 ///
44 44 /// # Examples
45 45 ///
46 46 /// ```
47 47 /// use crate::hg::utils::replace_slice;
48 48 /// let mut line = b"I hate writing tests!".to_vec();
49 49 /// replace_slice(&mut line, b"hate", b"love");
50 50 /// assert_eq!(
51 51 /// line,
52 52 /// b"I love writing tests!".to_vec()
53 53 /// );
54 54 /// ```
55 55 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
56 56 where
57 57 T: Clone + PartialEq,
58 58 {
59 59 if buf.len() < from.len() || from.len() != to.len() {
60 60 return;
61 61 }
62 62 for i in 0..=buf.len() - from.len() {
63 63 if buf[i..].starts_with(from) {
64 64 buf[i..(i + from.len())].clone_from_slice(to);
65 65 }
66 66 }
67 67 }
68 68
69 69 pub trait SliceExt {
70 fn trim_end_newlines(&self) -> &Self;
71 70 fn trim_end(&self) -> &Self;
72 71 fn trim_start(&self) -> &Self;
72 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
73 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
73 74 fn trim(&self) -> &Self;
74 75 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
75 76 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
76 77 }
77 78
78 #[allow(clippy::trivially_copy_pass_by_ref)]
79 fn is_not_whitespace(c: &u8) -> bool {
80 !(*c as char).is_whitespace()
81 }
79 impl SliceExt for [u8] {
80 fn trim_end(&self) -> &[u8] {
81 self.trim_end_matches(|byte| byte.is_ascii_whitespace())
82 }
82 83
83 impl SliceExt for [u8] {
84 fn trim_end_newlines(&self) -> &[u8] {
85 if let Some(last) = self.iter().rposition(|&byte| byte != b'\n') {
84 fn trim_start(&self) -> &[u8] {
85 self.trim_start_matches(|byte| byte.is_ascii_whitespace())
86 }
87
88 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
89 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
86 90 &self[..=last]
87 91 } else {
88 92 &[]
89 93 }
90 94 }
91 fn trim_end(&self) -> &[u8] {
92 if let Some(last) = self.iter().rposition(is_not_whitespace) {
93 &self[..=last]
94 } else {
95 &[]
96 }
97 }
98 fn trim_start(&self) -> &[u8] {
99 if let Some(first) = self.iter().position(is_not_whitespace) {
95
96 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
97 if let Some(first) = self.iter().position(|&byte| !f(byte)) {
100 98 &self[first..]
101 99 } else {
102 100 &[]
103 101 }
104 102 }
105 103
106 104 /// ```
107 105 /// use hg::utils::SliceExt;
108 106 /// assert_eq!(
109 107 /// b" to trim ".trim(),
110 108 /// b"to trim"
111 109 /// );
112 110 /// assert_eq!(
113 111 /// b"to trim ".trim(),
114 112 /// b"to trim"
115 113 /// );
116 114 /// assert_eq!(
117 115 /// b" to trim".trim(),
118 116 /// b"to trim"
119 117 /// );
120 118 /// ```
121 119 fn trim(&self) -> &[u8] {
122 120 self.trim_start().trim_end()
123 121 }
124 122
125 123 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
126 124 if self.starts_with(needle) {
127 125 Some(&self[needle.len()..])
128 126 } else {
129 127 None
130 128 }
131 129 }
132 130
133 131 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
134 132 let mut iter = self.splitn(2, |&byte| byte == separator);
135 133 let a = iter.next()?;
136 134 let b = iter.next()?;
137 135 Some((a, b))
138 136 }
139 137 }
140 138
141 139 pub trait Escaped {
142 140 /// Return bytes escaped for display to the user
143 141 fn escaped_bytes(&self) -> Vec<u8>;
144 142 }
145 143
146 144 impl Escaped for u8 {
147 145 fn escaped_bytes(&self) -> Vec<u8> {
148 146 let mut acc = vec![];
149 147 match self {
150 148 c @ b'\'' | c @ b'\\' => {
151 149 acc.push(b'\\');
152 150 acc.push(*c);
153 151 }
154 152 b'\t' => {
155 153 acc.extend(br"\\t");
156 154 }
157 155 b'\n' => {
158 156 acc.extend(br"\\n");
159 157 }
160 158 b'\r' => {
161 159 acc.extend(br"\\r");
162 160 }
163 161 c if (*c < b' ' || *c >= 127) => {
164 162 write!(acc, "\\x{:x}", self).unwrap();
165 163 }
166 164 c => {
167 165 acc.push(*c);
168 166 }
169 167 }
170 168 acc
171 169 }
172 170 }
173 171
174 172 impl<'a, T: Escaped> Escaped for &'a [T] {
175 173 fn escaped_bytes(&self) -> Vec<u8> {
176 174 self.iter().flat_map(Escaped::escaped_bytes).collect()
177 175 }
178 176 }
179 177
180 178 impl<T: Escaped> Escaped for Vec<T> {
181 179 fn escaped_bytes(&self) -> Vec<u8> {
182 180 self.deref().escaped_bytes()
183 181 }
184 182 }
185 183
186 184 impl<'a> Escaped for &'a HgPath {
187 185 fn escaped_bytes(&self) -> Vec<u8> {
188 186 self.as_bytes().escaped_bytes()
189 187 }
190 188 }
191 189
192 190 // TODO: use the str method when we require Rust 1.45
193 191 pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
194 192 if s.ends_with(suffix) {
195 193 Some(&s[..s.len() - suffix.len()])
196 194 } else {
197 195 None
198 196 }
199 197 }
200 198
201 199 #[cfg(unix)]
202 200 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
203 201 // TODO: Use the `matches!` macro when we require Rust 1.42+
204 202 if value.iter().all(|&byte| match byte {
205 203 b'a'..=b'z'
206 204 | b'A'..=b'Z'
207 205 | b'0'..=b'9'
208 206 | b'.'
209 207 | b'_'
210 208 | b'/'
211 209 | b'+'
212 210 | b'-' => true,
213 211 _ => false,
214 212 }) {
215 213 value.to_owned()
216 214 } else {
217 215 let mut quoted = Vec::with_capacity(value.len() + 2);
218 216 quoted.push(b'\'');
219 217 for &byte in value {
220 218 if byte == b'\'' {
221 219 quoted.push(b'\\');
222 220 }
223 221 quoted.push(byte);
224 222 }
225 223 quoted.push(b'\'');
226 224 quoted
227 225 }
228 226 }
229 227
230 228 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
231 229 std::env::current_dir().map_err(|error| HgError::IoError {
232 230 error,
233 231 context: IoErrorContext::CurrentDir,
234 232 })
235 233 }
236 234
237 235 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
238 236 std::env::current_exe().map_err(|error| HgError::IoError {
239 237 error,
240 238 context: IoErrorContext::CurrentExe,
241 239 })
242 240 }
243 241
244 242 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
245 243 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
246 244 lazy_static::lazy_static! {
247 245 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
248 246 /// The `x` makes whitespace ignored.
249 247 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
250 248 static ref VAR_RE: regex::bytes::Regex =
251 249 regex::bytes::Regex::new(r"(?x-u)
252 250 \$
253 251 (?:
254 252 (\w+)
255 253 |
256 254 \{
257 255 ([^}]*)
258 256 \}
259 257 )
260 258 ").unwrap();
261 259 }
262 260 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
263 261 let var_name = files::get_os_str_from_bytes(
264 262 captures
265 263 .get(1)
266 264 .or_else(|| captures.get(2))
267 265 .expect("either side of `|` must participate in match")
268 266 .as_bytes(),
269 267 );
270 268 std::env::var_os(var_name)
271 269 .map(files::get_bytes_from_os_str)
272 270 .unwrap_or_else(|| {
273 271 // Referencing an environment variable that does not exist.
274 272 // Leave the $FOO reference as-is.
275 273 captures[0].to_owned()
276 274 })
277 275 })
278 276 }
279 277
280 278 #[test]
281 279 fn test_expand_vars() {
282 280 // Modifying process-global state in a test isn’t great,
283 281 // but hopefully this won’t collide with anything.
284 282 std::env::set_var("TEST_EXPAND_VAR", "1");
285 283 assert_eq!(
286 284 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
287 285 &b"before/1/after"[..]
288 286 );
289 287 assert_eq!(
290 288 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
291 289 &b"before111after"[..]
292 290 );
293 291 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
294 292 assert_eq!(expand_vars(s), &s[..]);
295 293 }
296 294
297 295 pub(crate) enum MergeResult<V> {
298 296 UseLeftValue,
299 297 UseRightValue,
300 298 UseNewValue(V),
301 299 }
302 300
303 301 /// Return the union of the two given maps,
304 302 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
305 303 /// both.
306 304 ///
307 305 /// CC https://github.com/bodil/im-rs/issues/166
308 306 pub(crate) fn ordmap_union_with_merge<K, V>(
309 307 left: OrdMap<K, V>,
310 308 right: OrdMap<K, V>,
311 309 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
312 310 ) -> OrdMap<K, V>
313 311 where
314 312 K: Clone + Ord,
315 313 V: Clone + PartialEq,
316 314 {
317 315 if left.ptr_eq(&right) {
318 316 // One of the two maps is an unmodified clone of the other
319 317 left
320 318 } else if left.len() / 2 > right.len() {
321 319 // When two maps have different sizes,
322 320 // their size difference is a lower bound on
323 321 // how many keys of the larger map are not also in the smaller map.
324 322 // This in turn is a lower bound on the number of differences in
325 323 // `OrdMap::diff` and the "amount of work" that would be done
326 324 // by `ordmap_union_with_merge_by_diff`.
327 325 //
328 326 // Here `left` is more than twice the size of `right`,
329 327 // so the number of differences is more than the total size of
330 328 // `right`. Therefore an algorithm based on iterating `right`
331 329 // is more efficient.
332 330 //
333 331 // This helps a lot when a tiny (or empty) map is merged
334 332 // with a large one.
335 333 ordmap_union_with_merge_by_iter(left, right, merge)
336 334 } else if left.len() < right.len() / 2 {
337 335 // Same as above but with `left` and `right` swapped
338 336 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
339 337 // Also swapped in `merge` arguments:
340 338 match merge(key, b, a) {
341 339 MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
342 340 // … and swap back in `merge` result:
343 341 MergeResult::UseLeftValue => MergeResult::UseRightValue,
344 342 MergeResult::UseRightValue => MergeResult::UseLeftValue,
345 343 }
346 344 })
347 345 } else {
348 346 // For maps of similar size, use the algorithm based on `OrdMap::diff`
349 347 ordmap_union_with_merge_by_diff(left, right, merge)
350 348 }
351 349 }
352 350
353 351 /// Efficient if `right` is much smaller than `left`
354 352 fn ordmap_union_with_merge_by_iter<K, V>(
355 353 mut left: OrdMap<K, V>,
356 354 right: OrdMap<K, V>,
357 355 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
358 356 ) -> OrdMap<K, V>
359 357 where
360 358 K: Clone + Ord,
361 359 V: Clone,
362 360 {
363 361 for (key, right_value) in right {
364 362 match left.get(&key) {
365 363 None => {
366 364 left.insert(key, right_value);
367 365 }
368 366 Some(left_value) => match merge(&key, left_value, &right_value) {
369 367 MergeResult::UseLeftValue => {}
370 368 MergeResult::UseRightValue => {
371 369 left.insert(key, right_value);
372 370 }
373 371 MergeResult::UseNewValue(new_value) => {
374 372 left.insert(key, new_value);
375 373 }
376 374 },
377 375 }
378 376 }
379 377 left
380 378 }
381 379
382 380 /// Fallback when both maps are of similar size
383 381 fn ordmap_union_with_merge_by_diff<K, V>(
384 382 mut left: OrdMap<K, V>,
385 383 mut right: OrdMap<K, V>,
386 384 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
387 385 ) -> OrdMap<K, V>
388 386 where
389 387 K: Clone + Ord,
390 388 V: Clone + PartialEq,
391 389 {
392 390 // (key, value) pairs that would need to be inserted in either map
393 391 // in order to turn it into the union.
394 392 //
395 393 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
396 394 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
397 395 // with `left_updates` only borrowing from `right` and `right_updates` from
398 396 // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
399 397 //
400 398 // This would allow moving all `.clone()` calls to after we’ve decided
401 399 // which of `right_updates` or `left_updates` to use
402 400 // (value ones becoming `Cow::into_owned`),
403 401 // and avoid making clones we don’t end up using.
404 402 let mut left_updates = Vec::new();
405 403 let mut right_updates = Vec::new();
406 404
407 405 for difference in left.diff(&right) {
408 406 match difference {
409 407 DiffItem::Add(key, value) => {
410 408 left_updates.push((key.clone(), value.clone()))
411 409 }
412 410 DiffItem::Remove(key, value) => {
413 411 right_updates.push((key.clone(), value.clone()))
414 412 }
415 413 DiffItem::Update {
416 414 old: (key, left_value),
417 415 new: (_, right_value),
418 416 } => match merge(key, left_value, right_value) {
419 417 MergeResult::UseLeftValue => {
420 418 right_updates.push((key.clone(), left_value.clone()))
421 419 }
422 420 MergeResult::UseRightValue => {
423 421 left_updates.push((key.clone(), right_value.clone()))
424 422 }
425 423 MergeResult::UseNewValue(new_value) => {
426 424 left_updates.push((key.clone(), new_value.clone()));
427 425 right_updates.push((key.clone(), new_value))
428 426 }
429 427 },
430 428 }
431 429 }
432 430 if left_updates.len() < right_updates.len() {
433 431 for (key, value) in left_updates {
434 432 left.insert(key, value);
435 433 }
436 434 left
437 435 } else {
438 436 for (key, value) in right_updates {
439 437 right.insert(key, value);
440 438 }
441 439 right
442 440 }
443 441 }
444 442
445 443 /// Join items of the iterable with the given separator, similar to Python’s
446 444 /// `separator.join(iter)`.
447 445 ///
448 446 /// Formatting the return value consumes the iterator.
449 447 /// Formatting it again will produce an empty string.
450 448 pub fn join_display(
451 449 iter: impl IntoIterator<Item = impl fmt::Display>,
452 450 separator: impl fmt::Display,
453 451 ) -> impl fmt::Display {
454 452 JoinDisplay {
455 453 iter: Cell::new(Some(iter.into_iter())),
456 454 separator,
457 455 }
458 456 }
459 457
460 458 struct JoinDisplay<I, S> {
461 459 iter: Cell<Option<I>>,
462 460 separator: S,
463 461 }
464 462
465 463 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
466 464 where
467 465 I: Iterator<Item = T>,
468 466 T: fmt::Display,
469 467 S: fmt::Display,
470 468 {
471 469 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
472 470 if let Some(mut iter) = self.iter.take() {
473 471 if let Some(first) = iter.next() {
474 472 first.fmt(f)?;
475 473 }
476 474 for value in iter {
477 475 self.separator.fmt(f)?;
478 476 value.fmt(f)?;
479 477 }
480 478 }
481 479 Ok(())
482 480 }
483 481 }
General Comments 0
You need to be logged in to leave comments. Login now