##// END OF EJS Templates
hg-core: remove unneeded util now that we support Rust 1.42+
Raphaël Gomès -
r50527:048f829a default
parent child Browse files
Show More
@@ -1,108 +1,108 b''
1 1 use crate::errors::{HgError, HgResultExt};
2 2 use bytes_cast::{unaligned, BytesCast};
3 3 use memmap2::Mmap;
4 4 use std::path::{Path, PathBuf};
5 5
6 use crate::utils::strip_suffix;
7 6 use crate::vfs::Vfs;
8 7
9 8 const ONDISK_VERSION: u8 = 1;
10 9
11 10 pub(super) struct NodeMapDocket {
12 11 pub data_length: usize,
13 12 // TODO: keep here more of the data from `parse()` when we need it
14 13 }
15 14
16 15 #[derive(BytesCast)]
17 16 #[repr(C)]
18 17 struct DocketHeader {
19 18 uid_size: u8,
20 19 _tip_rev: unaligned::U64Be,
21 20 data_length: unaligned::U64Be,
22 21 _data_unused: unaligned::U64Be,
23 22 tip_node_size: unaligned::U64Be,
24 23 }
25 24
26 25 impl NodeMapDocket {
27 26 /// Return `Ok(None)` when the caller should proceed without a persistent
28 27 /// nodemap:
29 28 ///
30 29 /// * This revlog does not have a `.n` docket file (it is not generated for
31 30 /// small revlogs), or
32 31 /// * The docket has an unsupported version number (repositories created by
33 32 /// later hg, maybe that should be a requirement instead?), or
34 33 /// * The docket file points to a missing (likely deleted) data file (this
35 34 /// can happen in a rare race condition).
36 35 pub fn read_from_file(
37 36 store_vfs: &Vfs,
38 37 index_path: &Path,
39 38 ) -> Result<Option<(Self, Mmap)>, HgError> {
40 39 let docket_path = index_path.with_extension("n");
41 40 let docket_bytes = if let Some(bytes) =
42 41 store_vfs.read(&docket_path).io_not_found_as_none()?
43 42 {
44 43 bytes
45 44 } else {
46 45 return Ok(None);
47 46 };
48 47
49 48 let input = if let Some((&ONDISK_VERSION, rest)) =
50 49 docket_bytes.split_first()
51 50 {
52 51 rest
53 52 } else {
54 53 return Ok(None);
55 54 };
56 55
57 56 /// Treat any error as a parse error
58 57 fn parse<T, E>(result: Result<T, E>) -> Result<T, HgError> {
59 58 result
60 59 .map_err(|_| HgError::corrupted("nodemap docket parse error"))
61 60 }
62 61
63 62 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
64 63 let uid_size = header.uid_size as usize;
65 64 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
66 65 // systems?
67 66 let tip_node_size = header.tip_node_size.get() as usize;
68 67 let data_length = header.data_length.get() as usize;
69 68 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
70 69 let (_tip_node, _rest) =
71 70 parse(u8::slice_from_bytes(rest, tip_node_size))?;
72 71 let uid = parse(std::str::from_utf8(uid))?;
73 72 let docket = NodeMapDocket { data_length };
74 73
75 74 let data_path = rawdata_path(&docket_path, uid);
76 75 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
77 76 // config is false?
78 77 if let Some(mmap) =
79 78 store_vfs.mmap_open(&data_path).io_not_found_as_none()?
80 79 {
81 80 if mmap.len() >= data_length {
82 81 Ok(Some((docket, mmap)))
83 82 } else {
84 83 Err(HgError::corrupted("persistent nodemap too short"))
85 84 }
86 85 } else {
87 86 // Even if .hg/requires opted in, some revlogs are deemed small
88 87 // enough to not need a persistent nodemap.
89 88 Ok(None)
90 89 }
91 90 }
92 91 }
93 92
94 93 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
95 94 let docket_name = docket_path
96 95 .file_name()
97 96 .expect("expected a base name")
98 97 .to_str()
99 98 .expect("expected an ASCII file name in the store");
100 let prefix = strip_suffix(docket_name, ".n.a")
101 .or_else(|| strip_suffix(docket_name, ".n"))
99 let prefix = docket_name
100 .strip_suffix(".n.a")
101 .or_else(|| docket_name.strip_suffix(".n"))
102 102 .expect("expected docket path in .n or .n.a");
103 103 let name = format!("{}-{}.nd", prefix, uid);
104 104 docket_path
105 105 .parent()
106 106 .expect("expected a non-root path")
107 107 .join(name)
108 108 }
@@ -1,490 +1,481 b''
1 1 // utils module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 use crate::errors::{HgError, IoErrorContext};
11 11 use crate::utils::hg_path::HgPath;
12 12 use im_rc::ordmap::DiffItem;
13 13 use im_rc::ordmap::OrdMap;
14 14 use std::cell::Cell;
15 15 use std::fmt;
16 16 use std::{io::Write, ops::Deref};
17 17
18 18 pub mod files;
19 19 pub mod hg_path;
20 20 pub mod path_auditor;
21 21
22 22 /// Useful until rust/issues/56345 is stable
23 23 ///
24 24 /// # Examples
25 25 ///
26 26 /// ```
27 27 /// use crate::hg::utils::find_slice_in_slice;
28 28 ///
29 29 /// let haystack = b"This is the haystack".to_vec();
30 30 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
31 31 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
32 32 /// ```
33 33 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
34 34 where
35 35 for<'a> &'a [T]: PartialEq,
36 36 {
37 37 slice
38 38 .windows(needle.len())
39 39 .position(|window| window == needle)
40 40 }
41 41
42 42 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
43 43 ///
44 44 /// # Examples
45 45 ///
46 46 /// ```
47 47 /// use crate::hg::utils::replace_slice;
48 48 /// let mut line = b"I hate writing tests!".to_vec();
49 49 /// replace_slice(&mut line, b"hate", b"love");
50 50 /// assert_eq!(
51 51 /// line,
52 52 /// b"I love writing tests!".to_vec()
53 53 /// );
54 54 /// ```
55 55 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
56 56 where
57 57 T: Clone + PartialEq,
58 58 {
59 59 if buf.len() < from.len() || from.len() != to.len() {
60 60 return;
61 61 }
62 62 for i in 0..=buf.len() - from.len() {
63 63 if buf[i..].starts_with(from) {
64 64 buf[i..(i + from.len())].clone_from_slice(to);
65 65 }
66 66 }
67 67 }
68 68
69 69 pub trait SliceExt {
70 70 fn trim_end(&self) -> &Self;
71 71 fn trim_start(&self) -> &Self;
72 72 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
73 73 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
74 74 fn trim(&self) -> &Self;
75 75 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
76 76 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
77 77 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
78 78 }
79 79
80 80 impl SliceExt for [u8] {
81 81 fn trim_end(&self) -> &[u8] {
82 82 self.trim_end_matches(|byte| byte.is_ascii_whitespace())
83 83 }
84 84
85 85 fn trim_start(&self) -> &[u8] {
86 86 self.trim_start_matches(|byte| byte.is_ascii_whitespace())
87 87 }
88 88
89 89 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
90 90 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
91 91 &self[..=last]
92 92 } else {
93 93 &[]
94 94 }
95 95 }
96 96
97 97 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
98 98 if let Some(first) = self.iter().position(|&byte| !f(byte)) {
99 99 &self[first..]
100 100 } else {
101 101 &[]
102 102 }
103 103 }
104 104
105 105 /// ```
106 106 /// use hg::utils::SliceExt;
107 107 /// assert_eq!(
108 108 /// b" to trim ".trim(),
109 109 /// b"to trim"
110 110 /// );
111 111 /// assert_eq!(
112 112 /// b"to trim ".trim(),
113 113 /// b"to trim"
114 114 /// );
115 115 /// assert_eq!(
116 116 /// b" to trim".trim(),
117 117 /// b"to trim"
118 118 /// );
119 119 /// ```
120 120 fn trim(&self) -> &[u8] {
121 121 self.trim_start().trim_end()
122 122 }
123 123
124 124 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
125 125 if self.starts_with(needle) {
126 126 Some(&self[needle.len()..])
127 127 } else {
128 128 None
129 129 }
130 130 }
131 131
132 132 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
133 133 let mut iter = self.splitn(2, |&byte| byte == separator);
134 134 let a = iter.next()?;
135 135 let b = iter.next()?;
136 136 Some((a, b))
137 137 }
138 138
139 139 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
140 140 if let Some(pos) = find_slice_in_slice(self, separator) {
141 141 Some((&self[..pos], &self[pos + separator.len()..]))
142 142 } else {
143 143 None
144 144 }
145 145 }
146 146 }
147 147
148 148 pub trait Escaped {
149 149 /// Return bytes escaped for display to the user
150 150 fn escaped_bytes(&self) -> Vec<u8>;
151 151 }
152 152
153 153 impl Escaped for u8 {
154 154 fn escaped_bytes(&self) -> Vec<u8> {
155 155 let mut acc = vec![];
156 156 match self {
157 157 c @ b'\'' | c @ b'\\' => {
158 158 acc.push(b'\\');
159 159 acc.push(*c);
160 160 }
161 161 b'\t' => {
162 162 acc.extend(br"\\t");
163 163 }
164 164 b'\n' => {
165 165 acc.extend(br"\\n");
166 166 }
167 167 b'\r' => {
168 168 acc.extend(br"\\r");
169 169 }
170 170 c if (*c < b' ' || *c >= 127) => {
171 171 write!(acc, "\\x{:x}", self).unwrap();
172 172 }
173 173 c => {
174 174 acc.push(*c);
175 175 }
176 176 }
177 177 acc
178 178 }
179 179 }
180 180
181 181 impl<'a, T: Escaped> Escaped for &'a [T] {
182 182 fn escaped_bytes(&self) -> Vec<u8> {
183 183 self.iter().flat_map(Escaped::escaped_bytes).collect()
184 184 }
185 185 }
186 186
187 187 impl<T: Escaped> Escaped for Vec<T> {
188 188 fn escaped_bytes(&self) -> Vec<u8> {
189 189 self.deref().escaped_bytes()
190 190 }
191 191 }
192 192
193 193 impl<'a> Escaped for &'a HgPath {
194 194 fn escaped_bytes(&self) -> Vec<u8> {
195 195 self.as_bytes().escaped_bytes()
196 196 }
197 197 }
198 198
199 // TODO: use the str method when we require Rust 1.45
200 pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
201 if s.ends_with(suffix) {
202 Some(&s[..s.len() - suffix.len()])
203 } else {
204 None
205 }
206 }
207
208 199 #[cfg(unix)]
209 200 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
210 201 // TODO: Use the `matches!` macro when we require Rust 1.42+
211 202 if value.iter().all(|&byte| match byte {
212 203 b'a'..=b'z'
213 204 | b'A'..=b'Z'
214 205 | b'0'..=b'9'
215 206 | b'.'
216 207 | b'_'
217 208 | b'/'
218 209 | b'+'
219 210 | b'-' => true,
220 211 _ => false,
221 212 }) {
222 213 value.to_owned()
223 214 } else {
224 215 let mut quoted = Vec::with_capacity(value.len() + 2);
225 216 quoted.push(b'\'');
226 217 for &byte in value {
227 218 if byte == b'\'' {
228 219 quoted.push(b'\\');
229 220 }
230 221 quoted.push(byte);
231 222 }
232 223 quoted.push(b'\'');
233 224 quoted
234 225 }
235 226 }
236 227
237 228 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
238 229 std::env::current_dir().map_err(|error| HgError::IoError {
239 230 error,
240 231 context: IoErrorContext::CurrentDir,
241 232 })
242 233 }
243 234
244 235 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
245 236 std::env::current_exe().map_err(|error| HgError::IoError {
246 237 error,
247 238 context: IoErrorContext::CurrentExe,
248 239 })
249 240 }
250 241
251 242 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
252 243 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
253 244 lazy_static::lazy_static! {
254 245 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
255 246 /// The `x` makes whitespace ignored.
256 247 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
257 248 static ref VAR_RE: regex::bytes::Regex =
258 249 regex::bytes::Regex::new(r"(?x-u)
259 250 \$
260 251 (?:
261 252 (\w+)
262 253 |
263 254 \{
264 255 ([^}]*)
265 256 \}
266 257 )
267 258 ").unwrap();
268 259 }
269 260 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
270 261 let var_name = files::get_os_str_from_bytes(
271 262 captures
272 263 .get(1)
273 264 .or_else(|| captures.get(2))
274 265 .expect("either side of `|` must participate in match")
275 266 .as_bytes(),
276 267 );
277 268 std::env::var_os(var_name)
278 269 .map(files::get_bytes_from_os_str)
279 270 .unwrap_or_else(|| {
280 271 // Referencing an environment variable that does not exist.
281 272 // Leave the $FOO reference as-is.
282 273 captures[0].to_owned()
283 274 })
284 275 })
285 276 }
286 277
287 278 #[test]
288 279 fn test_expand_vars() {
289 280 // Modifying process-global state in a test isn’t great,
290 281 // but hopefully this won’t collide with anything.
291 282 std::env::set_var("TEST_EXPAND_VAR", "1");
292 283 assert_eq!(
293 284 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
294 285 &b"before/1/after"[..]
295 286 );
296 287 assert_eq!(
297 288 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
298 289 &b"before111after"[..]
299 290 );
300 291 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
301 292 assert_eq!(expand_vars(s), &s[..]);
302 293 }
303 294
304 295 pub(crate) enum MergeResult<V> {
305 296 UseLeftValue,
306 297 UseRightValue,
307 298 UseNewValue(V),
308 299 }
309 300
310 301 /// Return the union of the two given maps,
311 302 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
312 303 /// both.
313 304 ///
314 305 /// CC https://github.com/bodil/im-rs/issues/166
315 306 pub(crate) fn ordmap_union_with_merge<K, V>(
316 307 left: OrdMap<K, V>,
317 308 right: OrdMap<K, V>,
318 309 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
319 310 ) -> OrdMap<K, V>
320 311 where
321 312 K: Clone + Ord,
322 313 V: Clone + PartialEq,
323 314 {
324 315 if left.ptr_eq(&right) {
325 316 // One of the two maps is an unmodified clone of the other
326 317 left
327 318 } else if left.len() / 2 > right.len() {
328 319 // When two maps have different sizes,
329 320 // their size difference is a lower bound on
330 321 // how many keys of the larger map are not also in the smaller map.
331 322 // This in turn is a lower bound on the number of differences in
332 323 // `OrdMap::diff` and the "amount of work" that would be done
333 324 // by `ordmap_union_with_merge_by_diff`.
334 325 //
335 326 // Here `left` is more than twice the size of `right`,
336 327 // so the number of differences is more than the total size of
337 328 // `right`. Therefore an algorithm based on iterating `right`
338 329 // is more efficient.
339 330 //
340 331 // This helps a lot when a tiny (or empty) map is merged
341 332 // with a large one.
342 333 ordmap_union_with_merge_by_iter(left, right, merge)
343 334 } else if left.len() < right.len() / 2 {
344 335 // Same as above but with `left` and `right` swapped
345 336 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
346 337 // Also swapped in `merge` arguments:
347 338 match merge(key, b, a) {
348 339 MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
349 340 // … and swap back in `merge` result:
350 341 MergeResult::UseLeftValue => MergeResult::UseRightValue,
351 342 MergeResult::UseRightValue => MergeResult::UseLeftValue,
352 343 }
353 344 })
354 345 } else {
355 346 // For maps of similar size, use the algorithm based on `OrdMap::diff`
356 347 ordmap_union_with_merge_by_diff(left, right, merge)
357 348 }
358 349 }
359 350
360 351 /// Efficient if `right` is much smaller than `left`
361 352 fn ordmap_union_with_merge_by_iter<K, V>(
362 353 mut left: OrdMap<K, V>,
363 354 right: OrdMap<K, V>,
364 355 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
365 356 ) -> OrdMap<K, V>
366 357 where
367 358 K: Clone + Ord,
368 359 V: Clone,
369 360 {
370 361 for (key, right_value) in right {
371 362 match left.get(&key) {
372 363 None => {
373 364 left.insert(key, right_value);
374 365 }
375 366 Some(left_value) => match merge(&key, left_value, &right_value) {
376 367 MergeResult::UseLeftValue => {}
377 368 MergeResult::UseRightValue => {
378 369 left.insert(key, right_value);
379 370 }
380 371 MergeResult::UseNewValue(new_value) => {
381 372 left.insert(key, new_value);
382 373 }
383 374 },
384 375 }
385 376 }
386 377 left
387 378 }
388 379
389 380 /// Fallback when both maps are of similar size
390 381 fn ordmap_union_with_merge_by_diff<K, V>(
391 382 mut left: OrdMap<K, V>,
392 383 mut right: OrdMap<K, V>,
393 384 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
394 385 ) -> OrdMap<K, V>
395 386 where
396 387 K: Clone + Ord,
397 388 V: Clone + PartialEq,
398 389 {
399 390 // (key, value) pairs that would need to be inserted in either map
400 391 // in order to turn it into the union.
401 392 //
402 393 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
403 394 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
404 395 // with `left_updates` only borrowing from `right` and `right_updates` from
405 396 // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
406 397 //
407 398 // This would allow moving all `.clone()` calls to after we’ve decided
408 399 // which of `right_updates` or `left_updates` to use
409 400 // (value ones becoming `Cow::into_owned`),
410 401 // and avoid making clones we don’t end up using.
411 402 let mut left_updates = Vec::new();
412 403 let mut right_updates = Vec::new();
413 404
414 405 for difference in left.diff(&right) {
415 406 match difference {
416 407 DiffItem::Add(key, value) => {
417 408 left_updates.push((key.clone(), value.clone()))
418 409 }
419 410 DiffItem::Remove(key, value) => {
420 411 right_updates.push((key.clone(), value.clone()))
421 412 }
422 413 DiffItem::Update {
423 414 old: (key, left_value),
424 415 new: (_, right_value),
425 416 } => match merge(key, left_value, right_value) {
426 417 MergeResult::UseLeftValue => {
427 418 right_updates.push((key.clone(), left_value.clone()))
428 419 }
429 420 MergeResult::UseRightValue => {
430 421 left_updates.push((key.clone(), right_value.clone()))
431 422 }
432 423 MergeResult::UseNewValue(new_value) => {
433 424 left_updates.push((key.clone(), new_value.clone()));
434 425 right_updates.push((key.clone(), new_value))
435 426 }
436 427 },
437 428 }
438 429 }
439 430 if left_updates.len() < right_updates.len() {
440 431 for (key, value) in left_updates {
441 432 left.insert(key, value);
442 433 }
443 434 left
444 435 } else {
445 436 for (key, value) in right_updates {
446 437 right.insert(key, value);
447 438 }
448 439 right
449 440 }
450 441 }
451 442
452 443 /// Join items of the iterable with the given separator, similar to Python’s
453 444 /// `separator.join(iter)`.
454 445 ///
455 446 /// Formatting the return value consumes the iterator.
456 447 /// Formatting it again will produce an empty string.
457 448 pub fn join_display(
458 449 iter: impl IntoIterator<Item = impl fmt::Display>,
459 450 separator: impl fmt::Display,
460 451 ) -> impl fmt::Display {
461 452 JoinDisplay {
462 453 iter: Cell::new(Some(iter.into_iter())),
463 454 separator,
464 455 }
465 456 }
466 457
467 458 struct JoinDisplay<I, S> {
468 459 iter: Cell<Option<I>>,
469 460 separator: S,
470 461 }
471 462
472 463 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
473 464 where
474 465 I: Iterator<Item = T>,
475 466 T: fmt::Display,
476 467 S: fmt::Display,
477 468 {
478 469 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
479 470 if let Some(mut iter) = self.iter.take() {
480 471 if let Some(first) = iter.next() {
481 472 first.fmt(f)?;
482 473 }
483 474 for value in iter {
484 475 self.separator.fmt(f)?;
485 476 value.fmt(f)?;
486 477 }
487 478 }
488 479 Ok(())
489 480 }
490 481 }
General Comments 0
You need to be logged in to leave comments. Login now