Show More
@@ -1,115 +1,115 | |||||
1 | // list_tracked_files.rs |
|
1 | // list_tracked_files.rs | |
2 | // |
|
2 | // | |
3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> | |
4 | // |
|
4 | // | |
5 | // This software may be used and distributed according to the terms of the |
|
5 | // This software may be used and distributed according to the terms of the | |
6 | // GNU General Public License version 2 or any later version. |
|
6 | // GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | use crate::repo::Repo; |
|
8 | use crate::repo::Repo; | |
9 | use crate::revlog::Node; |
|
9 | use crate::revlog::Node; | |
10 | use crate::revlog::RevlogError; |
|
10 | use crate::revlog::RevlogError; | |
11 |
|
11 | |||
12 | use crate::utils::hg_path::HgPath; |
|
12 | use crate::utils::hg_path::HgPath; | |
13 |
|
13 | |||
14 | use crate::errors::HgError; |
|
14 | use crate::errors::HgError; | |
15 | use crate::manifest::Manifest; |
|
15 | use crate::manifest::Manifest; | |
16 | use crate::manifest::ManifestEntry; |
|
16 | use crate::manifest::ManifestEntry; | |
17 | use itertools::put_back; |
|
17 | use itertools::put_back; | |
18 | use itertools::PutBack; |
|
18 | use itertools::PutBack; | |
19 | use std::cmp::Ordering; |
|
19 | use std::cmp::Ordering; | |
20 |
|
20 | |||
21 | pub struct CatOutput<'a> { |
|
21 | pub struct CatOutput<'a> { | |
22 | /// Whether any file in the manifest matched the paths given as CLI |
|
22 | /// Whether any file in the manifest matched the paths given as CLI | |
23 | /// arguments |
|
23 | /// arguments | |
24 | pub found_any: bool, |
|
24 | pub found_any: bool, | |
25 | /// The contents of matching files, in manifest order |
|
25 | /// The contents of matching files, in manifest order | |
26 | pub results: Vec<(&'a HgPath, Vec<u8>)>, |
|
26 | pub results: Vec<(&'a HgPath, Vec<u8>)>, | |
27 | /// Which of the CLI arguments did not match any manifest file |
|
27 | /// Which of the CLI arguments did not match any manifest file | |
28 | pub missing: Vec<&'a HgPath>, |
|
28 | pub missing: Vec<&'a HgPath>, | |
29 | /// The node ID that the given revset was resolved to |
|
29 | /// The node ID that the given revset was resolved to | |
30 | pub node: Node, |
|
30 | pub node: Node, | |
31 | } |
|
31 | } | |
32 |
|
32 | |||
33 | // Find an item in an iterator over a sorted collection. |
|
33 | // Find an item in an iterator over a sorted collection. | |
34 | fn find_item<'a>( |
|
34 | fn find_item<'a>( | |
35 | i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>, |
|
35 | i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>, | |
36 | needle: &HgPath, |
|
36 | needle: &HgPath, | |
37 | ) -> Result<Option<Node>, HgError> { |
|
37 | ) -> Result<Option<Node>, HgError> { | |
38 | loop { |
|
38 | loop { | |
39 | match i.next() { |
|
39 | match i.next() { | |
40 | None => return Ok(None), |
|
40 | None => return Ok(None), | |
41 | Some(result) => { |
|
41 | Some(result) => { | |
42 | let entry = result?; |
|
42 | let entry = result?; | |
43 | match needle.as_bytes().cmp(entry.path.as_bytes()) { |
|
43 | match needle.as_bytes().cmp(entry.path.as_bytes()) { | |
44 | Ordering::Less => { |
|
44 | Ordering::Less => { | |
45 | i.put_back(Ok(entry)); |
|
45 | i.put_back(Ok(entry)); | |
46 | return Ok(None); |
|
46 | return Ok(None); | |
47 | } |
|
47 | } | |
48 | Ordering::Greater => continue, |
|
48 | Ordering::Greater => continue, | |
49 | Ordering::Equal => return Ok(Some(entry.node_id()?)), |
|
49 | Ordering::Equal => return Ok(Some(entry.node_id()?)), | |
50 | } |
|
50 | } | |
51 | } |
|
51 | } | |
52 | } |
|
52 | } | |
53 | } |
|
53 | } | |
54 | } |
|
54 | } | |
55 |
|
55 | |||
56 | // Tuple of (missing, found) paths in the manifest |
|
56 | // Tuple of (missing, found) paths in the manifest | |
57 | type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>); |
|
57 | type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>); | |
58 |
|
58 | |||
59 | fn find_files_in_manifest<'query>( |
|
59 | fn find_files_in_manifest<'query>( | |
60 | manifest: &Manifest, |
|
60 | manifest: &Manifest, | |
61 | query: impl Iterator<Item = &'query HgPath>, |
|
61 | query: impl Iterator<Item = &'query HgPath>, | |
62 | ) -> Result<ManifestQueryResponse<'query>, HgError> { |
|
62 | ) -> Result<ManifestQueryResponse<'query>, HgError> { | |
63 | let mut manifest = put_back(manifest.iter()); |
|
63 | let mut manifest = put_back(manifest.iter()); | |
64 | let mut res = vec![]; |
|
64 | let mut res = vec![]; | |
65 | let mut missing = vec![]; |
|
65 | let mut missing = vec![]; | |
66 |
|
66 | |||
67 | for file in query { |
|
67 | for file in query { | |
68 | match find_item(&mut manifest, file)? { |
|
68 | match find_item(&mut manifest, file)? { | |
69 | None => missing.push(file), |
|
69 | None => missing.push(file), | |
70 | Some(item) => res.push((file, item)), |
|
70 | Some(item) => res.push((file, item)), | |
71 | } |
|
71 | } | |
72 | } |
|
72 | } | |
73 | Ok((res, missing)) |
|
73 | Ok((res, missing)) | |
74 | } |
|
74 | } | |
75 |
|
75 | |||
76 | /// Output the given revision of files |
|
76 | /// Output the given revision of files | |
77 | /// |
|
77 | /// | |
78 | /// * `root`: Repository root |
|
78 | /// * `root`: Repository root | |
79 | /// * `rev`: The revision to cat the files from. |
|
79 | /// * `rev`: The revision to cat the files from. | |
80 | /// * `files`: The files to output. |
|
80 | /// * `files`: The files to output. | |
81 | pub fn cat<'a>( |
|
81 | pub fn cat<'a>( | |
82 | repo: &Repo, |
|
82 | repo: &Repo, | |
83 | revset: &str, |
|
83 | revset: &str, | |
84 | mut files: Vec<&'a HgPath>, |
|
84 | mut files: Vec<&'a HgPath>, | |
85 | ) -> Result<CatOutput<'a>, RevlogError> { |
|
85 | ) -> Result<CatOutput<'a>, RevlogError> { | |
86 | let rev = crate::revset::resolve_single(revset, repo)?; |
|
86 | let rev = crate::revset::resolve_single(revset, repo)?; | |
87 | let manifest = repo.manifest_for_rev(rev)?; |
|
87 | let manifest = repo.manifest_for_rev(rev.into())?; | |
88 | let node = *repo |
|
88 | let node = *repo | |
89 | .changelog()? |
|
89 | .changelog()? | |
90 | .node_from_rev(rev) |
|
90 | .node_from_rev(rev.into()) | |
91 | .expect("should succeed when repo.manifest did"); |
|
91 | .expect("should succeed when repo.manifest did"); | |
92 | let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![]; |
|
92 | let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![]; | |
93 | let mut found_any = false; |
|
93 | let mut found_any = false; | |
94 |
|
94 | |||
95 | files.sort_unstable(); |
|
95 | files.sort_unstable(); | |
96 |
|
96 | |||
97 | let (found, missing) = |
|
97 | let (found, missing) = | |
98 | find_files_in_manifest(&manifest, files.into_iter())?; |
|
98 | find_files_in_manifest(&manifest, files.into_iter())?; | |
99 |
|
99 | |||
100 | for (file_path, file_node) in found { |
|
100 | for (file_path, file_node) in found { | |
101 | found_any = true; |
|
101 | found_any = true; | |
102 | let file_log = repo.filelog(file_path)?; |
|
102 | let file_log = repo.filelog(file_path)?; | |
103 | results.push(( |
|
103 | results.push(( | |
104 | file_path, |
|
104 | file_path, | |
105 | file_log.data_for_node(file_node)?.into_file_data()?, |
|
105 | file_log.data_for_node(file_node)?.into_file_data()?, | |
106 | )); |
|
106 | )); | |
107 | } |
|
107 | } | |
108 |
|
108 | |||
109 | Ok(CatOutput { |
|
109 | Ok(CatOutput { | |
110 | found_any, |
|
110 | found_any, | |
111 | results, |
|
111 | results, | |
112 | missing, |
|
112 | missing, | |
113 | node, |
|
113 | node, | |
114 | }) |
|
114 | }) | |
115 | } |
|
115 | } |
@@ -1,38 +1,38 | |||||
1 | // debugdata.rs |
|
1 | // debugdata.rs | |
2 | // |
|
2 | // | |
3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> | |
4 | // |
|
4 | // | |
5 | // This software may be used and distributed according to the terms of the |
|
5 | // This software may be used and distributed according to the terms of the | |
6 | // GNU General Public License version 2 or any later version. |
|
6 | // GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | use crate::repo::Repo; |
|
8 | use crate::repo::Repo; | |
9 | use crate::requirements; |
|
9 | use crate::requirements; | |
10 | use crate::revlog::{Revlog, RevlogError}; |
|
10 | use crate::revlog::{Revlog, RevlogError}; | |
11 |
|
11 | |||
12 | /// Kind of data to debug |
|
12 | /// Kind of data to debug | |
13 | #[derive(Debug, Copy, Clone)] |
|
13 | #[derive(Debug, Copy, Clone)] | |
14 | pub enum DebugDataKind { |
|
14 | pub enum DebugDataKind { | |
15 | Changelog, |
|
15 | Changelog, | |
16 | Manifest, |
|
16 | Manifest, | |
17 | } |
|
17 | } | |
18 |
|
18 | |||
19 | /// Dump the contents data of a revision. |
|
19 | /// Dump the contents data of a revision. | |
20 | pub fn debug_data( |
|
20 | pub fn debug_data( | |
21 | repo: &Repo, |
|
21 | repo: &Repo, | |
22 | revset: &str, |
|
22 | revset: &str, | |
23 | kind: DebugDataKind, |
|
23 | kind: DebugDataKind, | |
24 | ) -> Result<Vec<u8>, RevlogError> { |
|
24 | ) -> Result<Vec<u8>, RevlogError> { | |
25 | let index_file = match kind { |
|
25 | let index_file = match kind { | |
26 | DebugDataKind::Changelog => "00changelog.i", |
|
26 | DebugDataKind::Changelog => "00changelog.i", | |
27 | DebugDataKind::Manifest => "00manifest.i", |
|
27 | DebugDataKind::Manifest => "00manifest.i", | |
28 | }; |
|
28 | }; | |
29 | let use_nodemap = repo |
|
29 | let use_nodemap = repo | |
30 | .requirements() |
|
30 | .requirements() | |
31 | .contains(requirements::NODEMAP_REQUIREMENT); |
|
31 | .contains(requirements::NODEMAP_REQUIREMENT); | |
32 | let revlog = |
|
32 | let revlog = | |
33 | Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?; |
|
33 | Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?; | |
34 | let rev = |
|
34 | let rev = | |
35 | crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?; |
|
35 | crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?; | |
36 | let data = revlog.get_rev_data(rev)?; |
|
36 | let data = revlog.get_rev_data_for_checked_rev(rev)?; | |
37 | Ok(data.into_owned()) |
|
37 | Ok(data.into_owned()) | |
38 | } |
|
38 | } |
@@ -1,45 +1,45 | |||||
1 | // list_tracked_files.rs |
|
1 | // list_tracked_files.rs | |
2 | // |
|
2 | // | |
3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> | |
4 | // |
|
4 | // | |
5 | // This software may be used and distributed according to the terms of the |
|
5 | // This software may be used and distributed according to the terms of the | |
6 | // GNU General Public License version 2 or any later version. |
|
6 | // GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | use crate::errors::HgError; |
|
8 | use crate::errors::HgError; | |
9 | use crate::matchers::Matcher; |
|
9 | use crate::matchers::Matcher; | |
10 | use crate::repo::Repo; |
|
10 | use crate::repo::Repo; | |
11 | use crate::revlog::manifest::Manifest; |
|
11 | use crate::revlog::manifest::Manifest; | |
12 | use crate::revlog::RevlogError; |
|
12 | use crate::revlog::RevlogError; | |
13 | use crate::utils::filter_map_results; |
|
13 | use crate::utils::filter_map_results; | |
14 | use crate::utils::hg_path::HgPath; |
|
14 | use crate::utils::hg_path::HgPath; | |
15 |
|
15 | |||
16 | /// List files under Mercurial control at a given revision. |
|
16 | /// List files under Mercurial control at a given revision. | |
17 | pub fn list_rev_tracked_files( |
|
17 | pub fn list_rev_tracked_files( | |
18 | repo: &Repo, |
|
18 | repo: &Repo, | |
19 | revset: &str, |
|
19 | revset: &str, | |
20 | narrow_matcher: Box<dyn Matcher>, |
|
20 | narrow_matcher: Box<dyn Matcher>, | |
21 | ) -> Result<FilesForRev, RevlogError> { |
|
21 | ) -> Result<FilesForRev, RevlogError> { | |
22 | let rev = crate::revset::resolve_single(revset, repo)?; |
|
22 | let rev = crate::revset::resolve_single(revset, repo)?; | |
23 | Ok(FilesForRev { |
|
23 | Ok(FilesForRev { | |
24 | manifest: repo.manifest_for_rev(rev)?, |
|
24 | manifest: repo.manifest_for_rev(rev.into())?, | |
25 | narrow_matcher, |
|
25 | narrow_matcher, | |
26 | }) |
|
26 | }) | |
27 | } |
|
27 | } | |
28 |
|
28 | |||
29 | pub struct FilesForRev { |
|
29 | pub struct FilesForRev { | |
30 | manifest: Manifest, |
|
30 | manifest: Manifest, | |
31 | narrow_matcher: Box<dyn Matcher>, |
|
31 | narrow_matcher: Box<dyn Matcher>, | |
32 | } |
|
32 | } | |
33 |
|
33 | |||
34 | impl FilesForRev { |
|
34 | impl FilesForRev { | |
35 | pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> { |
|
35 | pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> { | |
36 | filter_map_results(self.manifest.iter(), |entry| { |
|
36 | filter_map_results(self.manifest.iter(), |entry| { | |
37 | let path = entry.path; |
|
37 | let path = entry.path; | |
38 | Ok(if self.narrow_matcher.matches(path) { |
|
38 | Ok(if self.narrow_matcher.matches(path) { | |
39 | Some(path) |
|
39 | Some(path) | |
40 | } else { |
|
40 | } else { | |
41 | None |
|
41 | None | |
42 | }) |
|
42 | }) | |
43 | }) |
|
43 | }) | |
44 | } |
|
44 | } | |
45 | } |
|
45 | } |
@@ -1,782 +1,782 | |||||
1 | use crate::changelog::Changelog; |
|
1 | use crate::changelog::Changelog; | |
2 | use crate::config::{Config, ConfigError, ConfigParseError}; |
|
2 | use crate::config::{Config, ConfigError, ConfigParseError}; | |
3 | use crate::dirstate::DirstateParents; |
|
3 | use crate::dirstate::DirstateParents; | |
4 | use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode; |
|
4 | use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode; | |
5 | use crate::dirstate_tree::on_disk::Docket as DirstateDocket; |
|
5 | use crate::dirstate_tree::on_disk::Docket as DirstateDocket; | |
6 | use crate::dirstate_tree::owning::OwningDirstateMap; |
|
6 | use crate::dirstate_tree::owning::OwningDirstateMap; | |
7 | use crate::errors::HgResultExt; |
|
7 | use crate::errors::HgResultExt; | |
8 | use crate::errors::{HgError, IoResultExt}; |
|
8 | use crate::errors::{HgError, IoResultExt}; | |
9 | use crate::lock::{try_with_lock_no_wait, LockError}; |
|
9 | use crate::lock::{try_with_lock_no_wait, LockError}; | |
10 | use crate::manifest::{Manifest, Manifestlog}; |
|
10 | use crate::manifest::{Manifest, Manifestlog}; | |
11 | use crate::revlog::filelog::Filelog; |
|
11 | use crate::revlog::filelog::Filelog; | |
12 | use crate::revlog::RevlogError; |
|
12 | use crate::revlog::RevlogError; | |
13 | use crate::utils::debug::debug_wait_for_file_or_print; |
|
13 | use crate::utils::debug::debug_wait_for_file_or_print; | |
14 | use crate::utils::files::get_path_from_bytes; |
|
14 | use crate::utils::files::get_path_from_bytes; | |
15 | use crate::utils::hg_path::HgPath; |
|
15 | use crate::utils::hg_path::HgPath; | |
16 | use crate::utils::SliceExt; |
|
16 | use crate::utils::SliceExt; | |
17 | use crate::vfs::{is_dir, is_file, Vfs}; |
|
17 | use crate::vfs::{is_dir, is_file, Vfs}; | |
18 | use crate::{requirements, NodePrefix}; |
|
18 | use crate::DirstateError; | |
19 | use crate::{DirstateError, Revision}; |
|
19 | use crate::{requirements, NodePrefix, UncheckedRevision}; | |
20 | use std::cell::{Ref, RefCell, RefMut}; |
|
20 | use std::cell::{Ref, RefCell, RefMut}; | |
21 | use std::collections::HashSet; |
|
21 | use std::collections::HashSet; | |
22 | use std::io::Seek; |
|
22 | use std::io::Seek; | |
23 | use std::io::SeekFrom; |
|
23 | use std::io::SeekFrom; | |
24 | use std::io::Write as IoWrite; |
|
24 | use std::io::Write as IoWrite; | |
25 | use std::path::{Path, PathBuf}; |
|
25 | use std::path::{Path, PathBuf}; | |
26 |
|
26 | |||
27 | const V2_MAX_READ_ATTEMPTS: usize = 5; |
|
27 | const V2_MAX_READ_ATTEMPTS: usize = 5; | |
28 |
|
28 | |||
29 | type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize); |
|
29 | type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize); | |
30 |
|
30 | |||
31 | /// A repository on disk |
|
31 | /// A repository on disk | |
32 | pub struct Repo { |
|
32 | pub struct Repo { | |
33 | working_directory: PathBuf, |
|
33 | working_directory: PathBuf, | |
34 | dot_hg: PathBuf, |
|
34 | dot_hg: PathBuf, | |
35 | store: PathBuf, |
|
35 | store: PathBuf, | |
36 | requirements: HashSet<String>, |
|
36 | requirements: HashSet<String>, | |
37 | config: Config, |
|
37 | config: Config, | |
38 | dirstate_parents: LazyCell<DirstateParents>, |
|
38 | dirstate_parents: LazyCell<DirstateParents>, | |
39 | dirstate_map: LazyCell<OwningDirstateMap>, |
|
39 | dirstate_map: LazyCell<OwningDirstateMap>, | |
40 | changelog: LazyCell<Changelog>, |
|
40 | changelog: LazyCell<Changelog>, | |
41 | manifestlog: LazyCell<Manifestlog>, |
|
41 | manifestlog: LazyCell<Manifestlog>, | |
42 | } |
|
42 | } | |
43 |
|
43 | |||
44 | #[derive(Debug, derive_more::From)] |
|
44 | #[derive(Debug, derive_more::From)] | |
45 | pub enum RepoError { |
|
45 | pub enum RepoError { | |
46 | NotFound { |
|
46 | NotFound { | |
47 | at: PathBuf, |
|
47 | at: PathBuf, | |
48 | }, |
|
48 | }, | |
49 | #[from] |
|
49 | #[from] | |
50 | ConfigParseError(ConfigParseError), |
|
50 | ConfigParseError(ConfigParseError), | |
51 | #[from] |
|
51 | #[from] | |
52 | Other(HgError), |
|
52 | Other(HgError), | |
53 | } |
|
53 | } | |
54 |
|
54 | |||
55 | impl From<ConfigError> for RepoError { |
|
55 | impl From<ConfigError> for RepoError { | |
56 | fn from(error: ConfigError) -> Self { |
|
56 | fn from(error: ConfigError) -> Self { | |
57 | match error { |
|
57 | match error { | |
58 | ConfigError::Parse(error) => error.into(), |
|
58 | ConfigError::Parse(error) => error.into(), | |
59 | ConfigError::Other(error) => error.into(), |
|
59 | ConfigError::Other(error) => error.into(), | |
60 | } |
|
60 | } | |
61 | } |
|
61 | } | |
62 | } |
|
62 | } | |
63 |
|
63 | |||
64 | impl Repo { |
|
64 | impl Repo { | |
65 | /// tries to find nearest repository root in current working directory or |
|
65 | /// tries to find nearest repository root in current working directory or | |
66 | /// its ancestors |
|
66 | /// its ancestors | |
67 | pub fn find_repo_root() -> Result<PathBuf, RepoError> { |
|
67 | pub fn find_repo_root() -> Result<PathBuf, RepoError> { | |
68 | let current_directory = crate::utils::current_dir()?; |
|
68 | let current_directory = crate::utils::current_dir()?; | |
69 | // ancestors() is inclusive: it first yields `current_directory` |
|
69 | // ancestors() is inclusive: it first yields `current_directory` | |
70 | // as-is. |
|
70 | // as-is. | |
71 | for ancestor in current_directory.ancestors() { |
|
71 | for ancestor in current_directory.ancestors() { | |
72 | if is_dir(ancestor.join(".hg"))? { |
|
72 | if is_dir(ancestor.join(".hg"))? { | |
73 | return Ok(ancestor.to_path_buf()); |
|
73 | return Ok(ancestor.to_path_buf()); | |
74 | } |
|
74 | } | |
75 | } |
|
75 | } | |
76 | Err(RepoError::NotFound { |
|
76 | Err(RepoError::NotFound { | |
77 | at: current_directory, |
|
77 | at: current_directory, | |
78 | }) |
|
78 | }) | |
79 | } |
|
79 | } | |
80 |
|
80 | |||
81 | /// Find a repository, either at the given path (which must contain a `.hg` |
|
81 | /// Find a repository, either at the given path (which must contain a `.hg` | |
82 | /// sub-directory) or by searching the current directory and its |
|
82 | /// sub-directory) or by searching the current directory and its | |
83 | /// ancestors. |
|
83 | /// ancestors. | |
84 | /// |
|
84 | /// | |
85 | /// A method with two very different "modes" like this usually a code smell |
|
85 | /// A method with two very different "modes" like this usually a code smell | |
86 | /// to make two methods instead, but in this case an `Option` is what rhg |
|
86 | /// to make two methods instead, but in this case an `Option` is what rhg | |
87 | /// sub-commands get from Clap for the `-R` / `--repository` CLI argument. |
|
87 | /// sub-commands get from Clap for the `-R` / `--repository` CLI argument. | |
88 | /// Having two methods would just move that `if` to almost all callers. |
|
88 | /// Having two methods would just move that `if` to almost all callers. | |
89 | pub fn find( |
|
89 | pub fn find( | |
90 | config: &Config, |
|
90 | config: &Config, | |
91 | explicit_path: Option<PathBuf>, |
|
91 | explicit_path: Option<PathBuf>, | |
92 | ) -> Result<Self, RepoError> { |
|
92 | ) -> Result<Self, RepoError> { | |
93 | if let Some(root) = explicit_path { |
|
93 | if let Some(root) = explicit_path { | |
94 | if is_dir(root.join(".hg"))? { |
|
94 | if is_dir(root.join(".hg"))? { | |
95 | Self::new_at_path(root, config) |
|
95 | Self::new_at_path(root, config) | |
96 | } else if is_file(&root)? { |
|
96 | } else if is_file(&root)? { | |
97 | Err(HgError::unsupported("bundle repository").into()) |
|
97 | Err(HgError::unsupported("bundle repository").into()) | |
98 | } else { |
|
98 | } else { | |
99 | Err(RepoError::NotFound { at: root }) |
|
99 | Err(RepoError::NotFound { at: root }) | |
100 | } |
|
100 | } | |
101 | } else { |
|
101 | } else { | |
102 | let root = Self::find_repo_root()?; |
|
102 | let root = Self::find_repo_root()?; | |
103 | Self::new_at_path(root, config) |
|
103 | Self::new_at_path(root, config) | |
104 | } |
|
104 | } | |
105 | } |
|
105 | } | |
106 |
|
106 | |||
107 | /// To be called after checking that `.hg` is a sub-directory |
|
107 | /// To be called after checking that `.hg` is a sub-directory | |
108 | fn new_at_path( |
|
108 | fn new_at_path( | |
109 | working_directory: PathBuf, |
|
109 | working_directory: PathBuf, | |
110 | config: &Config, |
|
110 | config: &Config, | |
111 | ) -> Result<Self, RepoError> { |
|
111 | ) -> Result<Self, RepoError> { | |
112 | let dot_hg = working_directory.join(".hg"); |
|
112 | let dot_hg = working_directory.join(".hg"); | |
113 |
|
113 | |||
114 | let mut repo_config_files = |
|
114 | let mut repo_config_files = | |
115 | vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")]; |
|
115 | vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")]; | |
116 |
|
116 | |||
117 | let hg_vfs = Vfs { base: &dot_hg }; |
|
117 | let hg_vfs = Vfs { base: &dot_hg }; | |
118 | let mut reqs = requirements::load_if_exists(hg_vfs)?; |
|
118 | let mut reqs = requirements::load_if_exists(hg_vfs)?; | |
119 | let relative = |
|
119 | let relative = | |
120 | reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT); |
|
120 | reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT); | |
121 | let shared = |
|
121 | let shared = | |
122 | reqs.contains(requirements::SHARED_REQUIREMENT) || relative; |
|
122 | reqs.contains(requirements::SHARED_REQUIREMENT) || relative; | |
123 |
|
123 | |||
124 | // From `mercurial/localrepo.py`: |
|
124 | // From `mercurial/localrepo.py`: | |
125 | // |
|
125 | // | |
126 | // if .hg/requires contains the sharesafe requirement, it means |
|
126 | // if .hg/requires contains the sharesafe requirement, it means | |
127 | // there exists a `.hg/store/requires` too and we should read it |
|
127 | // there exists a `.hg/store/requires` too and we should read it | |
128 | // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement |
|
128 | // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement | |
129 | // is present. We never write SHARESAFE_REQUIREMENT for a repo if store |
|
129 | // is present. We never write SHARESAFE_REQUIREMENT for a repo if store | |
130 | // is not present, refer checkrequirementscompat() for that |
|
130 | // is not present, refer checkrequirementscompat() for that | |
131 | // |
|
131 | // | |
132 | // However, if SHARESAFE_REQUIREMENT is not present, it means that the |
|
132 | // However, if SHARESAFE_REQUIREMENT is not present, it means that the | |
133 | // repository was shared the old way. We check the share source |
|
133 | // repository was shared the old way. We check the share source | |
134 | // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the |
|
134 | // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the | |
135 | // current repository needs to be reshared |
|
135 | // current repository needs to be reshared | |
136 | let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT); |
|
136 | let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT); | |
137 |
|
137 | |||
138 | let store_path; |
|
138 | let store_path; | |
139 | if !shared { |
|
139 | if !shared { | |
140 | store_path = dot_hg.join("store"); |
|
140 | store_path = dot_hg.join("store"); | |
141 | } else { |
|
141 | } else { | |
142 | let bytes = hg_vfs.read("sharedpath")?; |
|
142 | let bytes = hg_vfs.read("sharedpath")?; | |
143 | let mut shared_path = |
|
143 | let mut shared_path = | |
144 | get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n')) |
|
144 | get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n')) | |
145 | .to_owned(); |
|
145 | .to_owned(); | |
146 | if relative { |
|
146 | if relative { | |
147 | shared_path = dot_hg.join(shared_path) |
|
147 | shared_path = dot_hg.join(shared_path) | |
148 | } |
|
148 | } | |
149 | if !is_dir(&shared_path)? { |
|
149 | if !is_dir(&shared_path)? { | |
150 | return Err(HgError::corrupted(format!( |
|
150 | return Err(HgError::corrupted(format!( | |
151 | ".hg/sharedpath points to nonexistent directory {}", |
|
151 | ".hg/sharedpath points to nonexistent directory {}", | |
152 | shared_path.display() |
|
152 | shared_path.display() | |
153 | )) |
|
153 | )) | |
154 | .into()); |
|
154 | .into()); | |
155 | } |
|
155 | } | |
156 |
|
156 | |||
157 | store_path = shared_path.join("store"); |
|
157 | store_path = shared_path.join("store"); | |
158 |
|
158 | |||
159 | let source_is_share_safe = |
|
159 | let source_is_share_safe = | |
160 | requirements::load(Vfs { base: &shared_path })? |
|
160 | requirements::load(Vfs { base: &shared_path })? | |
161 | .contains(requirements::SHARESAFE_REQUIREMENT); |
|
161 | .contains(requirements::SHARESAFE_REQUIREMENT); | |
162 |
|
162 | |||
163 | if share_safe != source_is_share_safe { |
|
163 | if share_safe != source_is_share_safe { | |
164 | return Err(HgError::unsupported("share-safe mismatch").into()); |
|
164 | return Err(HgError::unsupported("share-safe mismatch").into()); | |
165 | } |
|
165 | } | |
166 |
|
166 | |||
167 | if share_safe { |
|
167 | if share_safe { | |
168 | repo_config_files.insert(0, shared_path.join("hgrc")) |
|
168 | repo_config_files.insert(0, shared_path.join("hgrc")) | |
169 | } |
|
169 | } | |
170 | } |
|
170 | } | |
171 | if share_safe { |
|
171 | if share_safe { | |
172 | reqs.extend(requirements::load(Vfs { base: &store_path })?); |
|
172 | reqs.extend(requirements::load(Vfs { base: &store_path })?); | |
173 | } |
|
173 | } | |
174 |
|
174 | |||
175 | let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() { |
|
175 | let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() { | |
176 | config.combine_with_repo(&repo_config_files)? |
|
176 | config.combine_with_repo(&repo_config_files)? | |
177 | } else { |
|
177 | } else { | |
178 | config.clone() |
|
178 | config.clone() | |
179 | }; |
|
179 | }; | |
180 |
|
180 | |||
181 | let repo = Self { |
|
181 | let repo = Self { | |
182 | requirements: reqs, |
|
182 | requirements: reqs, | |
183 | working_directory, |
|
183 | working_directory, | |
184 | store: store_path, |
|
184 | store: store_path, | |
185 | dot_hg, |
|
185 | dot_hg, | |
186 | config: repo_config, |
|
186 | config: repo_config, | |
187 | dirstate_parents: LazyCell::new(), |
|
187 | dirstate_parents: LazyCell::new(), | |
188 | dirstate_map: LazyCell::new(), |
|
188 | dirstate_map: LazyCell::new(), | |
189 | changelog: LazyCell::new(), |
|
189 | changelog: LazyCell::new(), | |
190 | manifestlog: LazyCell::new(), |
|
190 | manifestlog: LazyCell::new(), | |
191 | }; |
|
191 | }; | |
192 |
|
192 | |||
193 | requirements::check(&repo)?; |
|
193 | requirements::check(&repo)?; | |
194 |
|
194 | |||
195 | Ok(repo) |
|
195 | Ok(repo) | |
196 | } |
|
196 | } | |
197 |
|
197 | |||
198 | pub fn working_directory_path(&self) -> &Path { |
|
198 | pub fn working_directory_path(&self) -> &Path { | |
199 | &self.working_directory |
|
199 | &self.working_directory | |
200 | } |
|
200 | } | |
201 |
|
201 | |||
202 | pub fn requirements(&self) -> &HashSet<String> { |
|
202 | pub fn requirements(&self) -> &HashSet<String> { | |
203 | &self.requirements |
|
203 | &self.requirements | |
204 | } |
|
204 | } | |
205 |
|
205 | |||
206 | pub fn config(&self) -> &Config { |
|
206 | pub fn config(&self) -> &Config { | |
207 | &self.config |
|
207 | &self.config | |
208 | } |
|
208 | } | |
209 |
|
209 | |||
210 | /// For accessing repository files (in `.hg`), except for the store |
|
210 | /// For accessing repository files (in `.hg`), except for the store | |
211 | /// (`.hg/store`). |
|
211 | /// (`.hg/store`). | |
212 | pub fn hg_vfs(&self) -> Vfs<'_> { |
|
212 | pub fn hg_vfs(&self) -> Vfs<'_> { | |
213 | Vfs { base: &self.dot_hg } |
|
213 | Vfs { base: &self.dot_hg } | |
214 | } |
|
214 | } | |
215 |
|
215 | |||
216 | /// For accessing repository store files (in `.hg/store`) |
|
216 | /// For accessing repository store files (in `.hg/store`) | |
217 | pub fn store_vfs(&self) -> Vfs<'_> { |
|
217 | pub fn store_vfs(&self) -> Vfs<'_> { | |
218 | Vfs { base: &self.store } |
|
218 | Vfs { base: &self.store } | |
219 | } |
|
219 | } | |
220 |
|
220 | |||
221 | /// For accessing the working copy |
|
221 | /// For accessing the working copy | |
222 | pub fn working_directory_vfs(&self) -> Vfs<'_> { |
|
222 | pub fn working_directory_vfs(&self) -> Vfs<'_> { | |
223 | Vfs { |
|
223 | Vfs { | |
224 | base: &self.working_directory, |
|
224 | base: &self.working_directory, | |
225 | } |
|
225 | } | |
226 | } |
|
226 | } | |
227 |
|
227 | |||
228 | pub fn try_with_wlock_no_wait<R>( |
|
228 | pub fn try_with_wlock_no_wait<R>( | |
229 | &self, |
|
229 | &self, | |
230 | f: impl FnOnce() -> R, |
|
230 | f: impl FnOnce() -> R, | |
231 | ) -> Result<R, LockError> { |
|
231 | ) -> Result<R, LockError> { | |
232 | try_with_lock_no_wait(self.hg_vfs(), "wlock", f) |
|
232 | try_with_lock_no_wait(self.hg_vfs(), "wlock", f) | |
233 | } |
|
233 | } | |
234 |
|
234 | |||
235 | /// Whether this repo should use dirstate-v2. |
|
235 | /// Whether this repo should use dirstate-v2. | |
236 | /// The presence of `dirstate-v2` in the requirements does not mean that |
|
236 | /// The presence of `dirstate-v2` in the requirements does not mean that | |
237 | /// the on-disk dirstate is necessarily in version 2. In most cases, |
|
237 | /// the on-disk dirstate is necessarily in version 2. In most cases, | |
238 | /// a dirstate-v2 file will indeed be found, but in rare cases (like the |
|
238 | /// a dirstate-v2 file will indeed be found, but in rare cases (like the | |
239 | /// upgrade mechanism being cut short), the on-disk version will be a |
|
239 | /// upgrade mechanism being cut short), the on-disk version will be a | |
240 | /// v1 file. |
|
240 | /// v1 file. | |
241 | /// Semantically, having a requirement only means that a client cannot |
|
241 | /// Semantically, having a requirement only means that a client cannot | |
242 | /// properly understand or properly update the repo if it lacks the support |
|
242 | /// properly understand or properly update the repo if it lacks the support | |
243 | /// for the required feature, but not that that feature is actually used |
|
243 | /// for the required feature, but not that that feature is actually used | |
244 | /// in all occasions. |
|
244 | /// in all occasions. | |
245 | pub fn use_dirstate_v2(&self) -> bool { |
|
245 | pub fn use_dirstate_v2(&self) -> bool { | |
246 | self.requirements |
|
246 | self.requirements | |
247 | .contains(requirements::DIRSTATE_V2_REQUIREMENT) |
|
247 | .contains(requirements::DIRSTATE_V2_REQUIREMENT) | |
248 | } |
|
248 | } | |
249 |
|
249 | |||
250 | pub fn has_sparse(&self) -> bool { |
|
250 | pub fn has_sparse(&self) -> bool { | |
251 | self.requirements.contains(requirements::SPARSE_REQUIREMENT) |
|
251 | self.requirements.contains(requirements::SPARSE_REQUIREMENT) | |
252 | } |
|
252 | } | |
253 |
|
253 | |||
254 | pub fn has_narrow(&self) -> bool { |
|
254 | pub fn has_narrow(&self) -> bool { | |
255 | self.requirements.contains(requirements::NARROW_REQUIREMENT) |
|
255 | self.requirements.contains(requirements::NARROW_REQUIREMENT) | |
256 | } |
|
256 | } | |
257 |
|
257 | |||
258 | pub fn has_nodemap(&self) -> bool { |
|
258 | pub fn has_nodemap(&self) -> bool { | |
259 | self.requirements |
|
259 | self.requirements | |
260 | .contains(requirements::NODEMAP_REQUIREMENT) |
|
260 | .contains(requirements::NODEMAP_REQUIREMENT) | |
261 | } |
|
261 | } | |
262 |
|
262 | |||
263 | fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> { |
|
263 | fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> { | |
264 | Ok(self |
|
264 | Ok(self | |
265 | .hg_vfs() |
|
265 | .hg_vfs() | |
266 | .read("dirstate") |
|
266 | .read("dirstate") | |
267 | .io_not_found_as_none()? |
|
267 | .io_not_found_as_none()? | |
268 | .unwrap_or_default()) |
|
268 | .unwrap_or_default()) | |
269 | } |
|
269 | } | |
270 |
|
270 | |||
271 | fn dirstate_identity(&self) -> Result<Option<u64>, HgError> { |
|
271 | fn dirstate_identity(&self) -> Result<Option<u64>, HgError> { | |
272 | use std::os::unix::fs::MetadataExt; |
|
272 | use std::os::unix::fs::MetadataExt; | |
273 | Ok(self |
|
273 | Ok(self | |
274 | .hg_vfs() |
|
274 | .hg_vfs() | |
275 | .symlink_metadata("dirstate") |
|
275 | .symlink_metadata("dirstate") | |
276 | .io_not_found_as_none()? |
|
276 | .io_not_found_as_none()? | |
277 | .map(|meta| meta.ino())) |
|
277 | .map(|meta| meta.ino())) | |
278 | } |
|
278 | } | |
279 |
|
279 | |||
280 | pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> { |
|
280 | pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> { | |
281 | Ok(*self |
|
281 | Ok(*self | |
282 | .dirstate_parents |
|
282 | .dirstate_parents | |
283 | .get_or_init(|| self.read_dirstate_parents())?) |
|
283 | .get_or_init(|| self.read_dirstate_parents())?) | |
284 | } |
|
284 | } | |
285 |
|
285 | |||
286 | fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> { |
|
286 | fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> { | |
287 | let dirstate = self.dirstate_file_contents()?; |
|
287 | let dirstate = self.dirstate_file_contents()?; | |
288 | let parents = if dirstate.is_empty() { |
|
288 | let parents = if dirstate.is_empty() { | |
289 | DirstateParents::NULL |
|
289 | DirstateParents::NULL | |
290 | } else if self.use_dirstate_v2() { |
|
290 | } else if self.use_dirstate_v2() { | |
291 | let docket_res = |
|
291 | let docket_res = | |
292 | crate::dirstate_tree::on_disk::read_docket(&dirstate); |
|
292 | crate::dirstate_tree::on_disk::read_docket(&dirstate); | |
293 | match docket_res { |
|
293 | match docket_res { | |
294 | Ok(docket) => docket.parents(), |
|
294 | Ok(docket) => docket.parents(), | |
295 | Err(_) => { |
|
295 | Err(_) => { | |
296 | log::info!( |
|
296 | log::info!( | |
297 | "Parsing dirstate docket failed, \ |
|
297 | "Parsing dirstate docket failed, \ | |
298 | falling back to dirstate-v1" |
|
298 | falling back to dirstate-v1" | |
299 | ); |
|
299 | ); | |
300 | *crate::dirstate::parsers::parse_dirstate_parents( |
|
300 | *crate::dirstate::parsers::parse_dirstate_parents( | |
301 | &dirstate, |
|
301 | &dirstate, | |
302 | )? |
|
302 | )? | |
303 | } |
|
303 | } | |
304 | } |
|
304 | } | |
305 | } else { |
|
305 | } else { | |
306 | *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? |
|
306 | *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? | |
307 | }; |
|
307 | }; | |
308 | self.dirstate_parents.set(parents); |
|
308 | self.dirstate_parents.set(parents); | |
309 | Ok(parents) |
|
309 | Ok(parents) | |
310 | } |
|
310 | } | |
311 |
|
311 | |||
312 | /// Returns the information read from the dirstate docket necessary to |
|
312 | /// Returns the information read from the dirstate docket necessary to | |
313 | /// check if the data file has been updated/deleted by another process |
|
313 | /// check if the data file has been updated/deleted by another process | |
314 | /// since we last read the dirstate. |
|
314 | /// since we last read the dirstate. | |
315 | /// Namely, the inode, data file uuid and the data size. |
|
315 | /// Namely, the inode, data file uuid and the data size. | |
316 | fn get_dirstate_data_file_integrity( |
|
316 | fn get_dirstate_data_file_integrity( | |
317 | &self, |
|
317 | &self, | |
318 | ) -> Result<DirstateMapIdentity, HgError> { |
|
318 | ) -> Result<DirstateMapIdentity, HgError> { | |
319 | assert!( |
|
319 | assert!( | |
320 | self.use_dirstate_v2(), |
|
320 | self.use_dirstate_v2(), | |
321 | "accessing dirstate data file ID without dirstate-v2" |
|
321 | "accessing dirstate data file ID without dirstate-v2" | |
322 | ); |
|
322 | ); | |
323 | // Get the identity before the contents since we could have a race |
|
323 | // Get the identity before the contents since we could have a race | |
324 | // between the two. Having an identity that is too old is fine, but |
|
324 | // between the two. Having an identity that is too old is fine, but | |
325 | // one that is younger than the content change is bad. |
|
325 | // one that is younger than the content change is bad. | |
326 | let identity = self.dirstate_identity()?; |
|
326 | let identity = self.dirstate_identity()?; | |
327 | let dirstate = self.dirstate_file_contents()?; |
|
327 | let dirstate = self.dirstate_file_contents()?; | |
328 | if dirstate.is_empty() { |
|
328 | if dirstate.is_empty() { | |
329 | self.dirstate_parents.set(DirstateParents::NULL); |
|
329 | self.dirstate_parents.set(DirstateParents::NULL); | |
330 | Ok((identity, None, 0)) |
|
330 | Ok((identity, None, 0)) | |
331 | } else { |
|
331 | } else { | |
332 | let docket_res = |
|
332 | let docket_res = | |
333 | crate::dirstate_tree::on_disk::read_docket(&dirstate); |
|
333 | crate::dirstate_tree::on_disk::read_docket(&dirstate); | |
334 | match docket_res { |
|
334 | match docket_res { | |
335 | Ok(docket) => { |
|
335 | Ok(docket) => { | |
336 | self.dirstate_parents.set(docket.parents()); |
|
336 | self.dirstate_parents.set(docket.parents()); | |
337 | Ok(( |
|
337 | Ok(( | |
338 | identity, |
|
338 | identity, | |
339 | Some(docket.uuid.to_owned()), |
|
339 | Some(docket.uuid.to_owned()), | |
340 | docket.data_size(), |
|
340 | docket.data_size(), | |
341 | )) |
|
341 | )) | |
342 | } |
|
342 | } | |
343 | Err(_) => { |
|
343 | Err(_) => { | |
344 | log::info!( |
|
344 | log::info!( | |
345 | "Parsing dirstate docket failed, \ |
|
345 | "Parsing dirstate docket failed, \ | |
346 | falling back to dirstate-v1" |
|
346 | falling back to dirstate-v1" | |
347 | ); |
|
347 | ); | |
348 | let parents = |
|
348 | let parents = | |
349 | *crate::dirstate::parsers::parse_dirstate_parents( |
|
349 | *crate::dirstate::parsers::parse_dirstate_parents( | |
350 | &dirstate, |
|
350 | &dirstate, | |
351 | )?; |
|
351 | )?; | |
352 | self.dirstate_parents.set(parents); |
|
352 | self.dirstate_parents.set(parents); | |
353 | Ok((identity, None, 0)) |
|
353 | Ok((identity, None, 0)) | |
354 | } |
|
354 | } | |
355 | } |
|
355 | } | |
356 | } |
|
356 | } | |
357 | } |
|
357 | } | |
358 |
|
358 | |||
359 | fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> { |
|
359 | fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> { | |
360 | if self.use_dirstate_v2() { |
|
360 | if self.use_dirstate_v2() { | |
361 | // The v2 dirstate is split into a docket and a data file. |
|
361 | // The v2 dirstate is split into a docket and a data file. | |
362 | // Since we don't always take the `wlock` to read it |
|
362 | // Since we don't always take the `wlock` to read it | |
363 | // (like in `hg status`), it is susceptible to races. |
|
363 | // (like in `hg status`), it is susceptible to races. | |
364 | // A simple retry method should be enough since full rewrites |
|
364 | // A simple retry method should be enough since full rewrites | |
365 | // only happen when too much garbage data is present and |
|
365 | // only happen when too much garbage data is present and | |
366 | // this race is unlikely. |
|
366 | // this race is unlikely. | |
367 | let mut tries = 0; |
|
367 | let mut tries = 0; | |
368 |
|
368 | |||
369 | while tries < V2_MAX_READ_ATTEMPTS { |
|
369 | while tries < V2_MAX_READ_ATTEMPTS { | |
370 | tries += 1; |
|
370 | tries += 1; | |
371 | match self.read_docket_and_data_file() { |
|
371 | match self.read_docket_and_data_file() { | |
372 | Ok(m) => { |
|
372 | Ok(m) => { | |
373 | return Ok(m); |
|
373 | return Ok(m); | |
374 | } |
|
374 | } | |
375 | Err(e) => match e { |
|
375 | Err(e) => match e { | |
376 | DirstateError::Common(HgError::RaceDetected( |
|
376 | DirstateError::Common(HgError::RaceDetected( | |
377 | context, |
|
377 | context, | |
378 | )) => { |
|
378 | )) => { | |
379 | log::info!( |
|
379 | log::info!( | |
380 | "dirstate read race detected {} (retry {}/{})", |
|
380 | "dirstate read race detected {} (retry {}/{})", | |
381 | context, |
|
381 | context, | |
382 | tries, |
|
382 | tries, | |
383 | V2_MAX_READ_ATTEMPTS, |
|
383 | V2_MAX_READ_ATTEMPTS, | |
384 | ); |
|
384 | ); | |
385 | continue; |
|
385 | continue; | |
386 | } |
|
386 | } | |
387 | _ => { |
|
387 | _ => { | |
388 | log::info!( |
|
388 | log::info!( | |
389 | "Reading dirstate v2 failed, \ |
|
389 | "Reading dirstate v2 failed, \ | |
390 | falling back to v1" |
|
390 | falling back to v1" | |
391 | ); |
|
391 | ); | |
392 | return self.new_dirstate_map_v1(); |
|
392 | return self.new_dirstate_map_v1(); | |
393 | } |
|
393 | } | |
394 | }, |
|
394 | }, | |
395 | } |
|
395 | } | |
396 | } |
|
396 | } | |
397 | let error = HgError::abort( |
|
397 | let error = HgError::abort( | |
398 | format!("dirstate read race happened {tries} times in a row"), |
|
398 | format!("dirstate read race happened {tries} times in a row"), | |
399 | 255, |
|
399 | 255, | |
400 | None, |
|
400 | None, | |
401 | ); |
|
401 | ); | |
402 | Err(DirstateError::Common(error)) |
|
402 | Err(DirstateError::Common(error)) | |
403 | } else { |
|
403 | } else { | |
404 | self.new_dirstate_map_v1() |
|
404 | self.new_dirstate_map_v1() | |
405 | } |
|
405 | } | |
406 | } |
|
406 | } | |
407 |
|
407 | |||
408 | fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> { |
|
408 | fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> { | |
409 | debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); |
|
409 | debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); | |
410 | let identity = self.dirstate_identity()?; |
|
410 | let identity = self.dirstate_identity()?; | |
411 | let dirstate_file_contents = self.dirstate_file_contents()?; |
|
411 | let dirstate_file_contents = self.dirstate_file_contents()?; | |
412 | if dirstate_file_contents.is_empty() { |
|
412 | if dirstate_file_contents.is_empty() { | |
413 | self.dirstate_parents.set(DirstateParents::NULL); |
|
413 | self.dirstate_parents.set(DirstateParents::NULL); | |
414 | Ok(OwningDirstateMap::new_empty(Vec::new())) |
|
414 | Ok(OwningDirstateMap::new_empty(Vec::new())) | |
415 | } else { |
|
415 | } else { | |
416 | let (map, parents) = |
|
416 | let (map, parents) = | |
417 | OwningDirstateMap::new_v1(dirstate_file_contents, identity)?; |
|
417 | OwningDirstateMap::new_v1(dirstate_file_contents, identity)?; | |
418 | self.dirstate_parents.set(parents); |
|
418 | self.dirstate_parents.set(parents); | |
419 | Ok(map) |
|
419 | Ok(map) | |
420 | } |
|
420 | } | |
421 | } |
|
421 | } | |
422 |
|
422 | |||
423 | fn read_docket_and_data_file( |
|
423 | fn read_docket_and_data_file( | |
424 | &self, |
|
424 | &self, | |
425 | ) -> Result<OwningDirstateMap, DirstateError> { |
|
425 | ) -> Result<OwningDirstateMap, DirstateError> { | |
426 | debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); |
|
426 | debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); | |
427 | let dirstate_file_contents = self.dirstate_file_contents()?; |
|
427 | let dirstate_file_contents = self.dirstate_file_contents()?; | |
428 | let identity = self.dirstate_identity()?; |
|
428 | let identity = self.dirstate_identity()?; | |
429 | if dirstate_file_contents.is_empty() { |
|
429 | if dirstate_file_contents.is_empty() { | |
430 | self.dirstate_parents.set(DirstateParents::NULL); |
|
430 | self.dirstate_parents.set(DirstateParents::NULL); | |
431 | return Ok(OwningDirstateMap::new_empty(Vec::new())); |
|
431 | return Ok(OwningDirstateMap::new_empty(Vec::new())); | |
432 | } |
|
432 | } | |
433 | let docket = crate::dirstate_tree::on_disk::read_docket( |
|
433 | let docket = crate::dirstate_tree::on_disk::read_docket( | |
434 | &dirstate_file_contents, |
|
434 | &dirstate_file_contents, | |
435 | )?; |
|
435 | )?; | |
436 | debug_wait_for_file_or_print( |
|
436 | debug_wait_for_file_or_print( | |
437 | self.config(), |
|
437 | self.config(), | |
438 | "dirstate.post-docket-read-file", |
|
438 | "dirstate.post-docket-read-file", | |
439 | ); |
|
439 | ); | |
440 | self.dirstate_parents.set(docket.parents()); |
|
440 | self.dirstate_parents.set(docket.parents()); | |
441 | let uuid = docket.uuid.to_owned(); |
|
441 | let uuid = docket.uuid.to_owned(); | |
442 | let data_size = docket.data_size(); |
|
442 | let data_size = docket.data_size(); | |
443 |
|
443 | |||
444 | let context = "between reading dirstate docket and data file"; |
|
444 | let context = "between reading dirstate docket and data file"; | |
445 | let race_error = HgError::RaceDetected(context.into()); |
|
445 | let race_error = HgError::RaceDetected(context.into()); | |
446 | let metadata = docket.tree_metadata(); |
|
446 | let metadata = docket.tree_metadata(); | |
447 |
|
447 | |||
448 | let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) { |
|
448 | let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) { | |
449 | // Don't mmap on NFS to prevent `SIGBUS` error on deletion |
|
449 | // Don't mmap on NFS to prevent `SIGBUS` error on deletion | |
450 | let contents = self.hg_vfs().read(docket.data_filename()); |
|
450 | let contents = self.hg_vfs().read(docket.data_filename()); | |
451 | let contents = match contents { |
|
451 | let contents = match contents { | |
452 | Ok(c) => c, |
|
452 | Ok(c) => c, | |
453 | Err(HgError::IoError { error, context }) => { |
|
453 | Err(HgError::IoError { error, context }) => { | |
454 | match error.raw_os_error().expect("real os error") { |
|
454 | match error.raw_os_error().expect("real os error") { | |
455 | // 2 = ENOENT, No such file or directory |
|
455 | // 2 = ENOENT, No such file or directory | |
456 | // 116 = ESTALE, Stale NFS file handle |
|
456 | // 116 = ESTALE, Stale NFS file handle | |
457 | // |
|
457 | // | |
458 | // TODO match on `error.kind()` when |
|
458 | // TODO match on `error.kind()` when | |
459 | // `ErrorKind::StaleNetworkFileHandle` is stable. |
|
459 | // `ErrorKind::StaleNetworkFileHandle` is stable. | |
460 | 2 | 116 => { |
|
460 | 2 | 116 => { | |
461 | // Race where the data file was deleted right after |
|
461 | // Race where the data file was deleted right after | |
462 | // we read the docket, try again |
|
462 | // we read the docket, try again | |
463 | return Err(race_error.into()); |
|
463 | return Err(race_error.into()); | |
464 | } |
|
464 | } | |
465 | _ => { |
|
465 | _ => { | |
466 | return Err( |
|
466 | return Err( | |
467 | HgError::IoError { error, context }.into() |
|
467 | HgError::IoError { error, context }.into() | |
468 | ) |
|
468 | ) | |
469 | } |
|
469 | } | |
470 | } |
|
470 | } | |
471 | } |
|
471 | } | |
472 | Err(e) => return Err(e.into()), |
|
472 | Err(e) => return Err(e.into()), | |
473 | }; |
|
473 | }; | |
474 | OwningDirstateMap::new_v2( |
|
474 | OwningDirstateMap::new_v2( | |
475 | contents, data_size, metadata, uuid, identity, |
|
475 | contents, data_size, metadata, uuid, identity, | |
476 | ) |
|
476 | ) | |
477 | } else { |
|
477 | } else { | |
478 | match self |
|
478 | match self | |
479 | .hg_vfs() |
|
479 | .hg_vfs() | |
480 | .mmap_open(docket.data_filename()) |
|
480 | .mmap_open(docket.data_filename()) | |
481 | .io_not_found_as_none() |
|
481 | .io_not_found_as_none() | |
482 | { |
|
482 | { | |
483 | Ok(Some(data_mmap)) => OwningDirstateMap::new_v2( |
|
483 | Ok(Some(data_mmap)) => OwningDirstateMap::new_v2( | |
484 | data_mmap, data_size, metadata, uuid, identity, |
|
484 | data_mmap, data_size, metadata, uuid, identity, | |
485 | ), |
|
485 | ), | |
486 | Ok(None) => { |
|
486 | Ok(None) => { | |
487 | // Race where the data file was deleted right after we |
|
487 | // Race where the data file was deleted right after we | |
488 | // read the docket, try again |
|
488 | // read the docket, try again | |
489 | return Err(race_error.into()); |
|
489 | return Err(race_error.into()); | |
490 | } |
|
490 | } | |
491 | Err(e) => return Err(e.into()), |
|
491 | Err(e) => return Err(e.into()), | |
492 | } |
|
492 | } | |
493 | }?; |
|
493 | }?; | |
494 |
|
494 | |||
495 | let write_mode_config = self |
|
495 | let write_mode_config = self | |
496 | .config() |
|
496 | .config() | |
497 | .get_str(b"devel", b"dirstate.v2.data_update_mode") |
|
497 | .get_str(b"devel", b"dirstate.v2.data_update_mode") | |
498 | .unwrap_or(Some("auto")) |
|
498 | .unwrap_or(Some("auto")) | |
499 | .unwrap_or("auto"); // don't bother for devel options |
|
499 | .unwrap_or("auto"); // don't bother for devel options | |
500 | let write_mode = match write_mode_config { |
|
500 | let write_mode = match write_mode_config { | |
501 | "auto" => DirstateMapWriteMode::Auto, |
|
501 | "auto" => DirstateMapWriteMode::Auto, | |
502 | "force-new" => DirstateMapWriteMode::ForceNewDataFile, |
|
502 | "force-new" => DirstateMapWriteMode::ForceNewDataFile, | |
503 | "force-append" => DirstateMapWriteMode::ForceAppend, |
|
503 | "force-append" => DirstateMapWriteMode::ForceAppend, | |
504 | _ => DirstateMapWriteMode::Auto, |
|
504 | _ => DirstateMapWriteMode::Auto, | |
505 | }; |
|
505 | }; | |
506 |
|
506 | |||
507 | map.with_dmap_mut(|m| m.set_write_mode(write_mode)); |
|
507 | map.with_dmap_mut(|m| m.set_write_mode(write_mode)); | |
508 |
|
508 | |||
509 | Ok(map) |
|
509 | Ok(map) | |
510 | } |
|
510 | } | |
511 |
|
511 | |||
512 | pub fn dirstate_map( |
|
512 | pub fn dirstate_map( | |
513 | &self, |
|
513 | &self, | |
514 | ) -> Result<Ref<OwningDirstateMap>, DirstateError> { |
|
514 | ) -> Result<Ref<OwningDirstateMap>, DirstateError> { | |
515 | self.dirstate_map.get_or_init(|| self.new_dirstate_map()) |
|
515 | self.dirstate_map.get_or_init(|| self.new_dirstate_map()) | |
516 | } |
|
516 | } | |
517 |
|
517 | |||
518 | pub fn dirstate_map_mut( |
|
518 | pub fn dirstate_map_mut( | |
519 | &self, |
|
519 | &self, | |
520 | ) -> Result<RefMut<OwningDirstateMap>, DirstateError> { |
|
520 | ) -> Result<RefMut<OwningDirstateMap>, DirstateError> { | |
521 | self.dirstate_map |
|
521 | self.dirstate_map | |
522 | .get_mut_or_init(|| self.new_dirstate_map()) |
|
522 | .get_mut_or_init(|| self.new_dirstate_map()) | |
523 | } |
|
523 | } | |
524 |
|
524 | |||
525 | fn new_changelog(&self) -> Result<Changelog, HgError> { |
|
525 | fn new_changelog(&self) -> Result<Changelog, HgError> { | |
526 | Changelog::open(&self.store_vfs(), self.has_nodemap()) |
|
526 | Changelog::open(&self.store_vfs(), self.has_nodemap()) | |
527 | } |
|
527 | } | |
528 |
|
528 | |||
529 | pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> { |
|
529 | pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> { | |
530 | self.changelog.get_or_init(|| self.new_changelog()) |
|
530 | self.changelog.get_or_init(|| self.new_changelog()) | |
531 | } |
|
531 | } | |
532 |
|
532 | |||
533 | pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> { |
|
533 | pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> { | |
534 | self.changelog.get_mut_or_init(|| self.new_changelog()) |
|
534 | self.changelog.get_mut_or_init(|| self.new_changelog()) | |
535 | } |
|
535 | } | |
536 |
|
536 | |||
537 | fn new_manifestlog(&self) -> Result<Manifestlog, HgError> { |
|
537 | fn new_manifestlog(&self) -> Result<Manifestlog, HgError> { | |
538 | Manifestlog::open(&self.store_vfs(), self.has_nodemap()) |
|
538 | Manifestlog::open(&self.store_vfs(), self.has_nodemap()) | |
539 | } |
|
539 | } | |
540 |
|
540 | |||
541 | pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> { |
|
541 | pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> { | |
542 | self.manifestlog.get_or_init(|| self.new_manifestlog()) |
|
542 | self.manifestlog.get_or_init(|| self.new_manifestlog()) | |
543 | } |
|
543 | } | |
544 |
|
544 | |||
545 | pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> { |
|
545 | pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> { | |
546 | self.manifestlog.get_mut_or_init(|| self.new_manifestlog()) |
|
546 | self.manifestlog.get_mut_or_init(|| self.new_manifestlog()) | |
547 | } |
|
547 | } | |
548 |
|
548 | |||
549 | /// Returns the manifest of the *changeset* with the given node ID |
|
549 | /// Returns the manifest of the *changeset* with the given node ID | |
550 | pub fn manifest_for_node( |
|
550 | pub fn manifest_for_node( | |
551 | &self, |
|
551 | &self, | |
552 | node: impl Into<NodePrefix>, |
|
552 | node: impl Into<NodePrefix>, | |
553 | ) -> Result<Manifest, RevlogError> { |
|
553 | ) -> Result<Manifest, RevlogError> { | |
554 | self.manifestlog()?.data_for_node( |
|
554 | self.manifestlog()?.data_for_node( | |
555 | self.changelog()? |
|
555 | self.changelog()? | |
556 | .data_for_node(node.into())? |
|
556 | .data_for_node(node.into())? | |
557 | .manifest_node()? |
|
557 | .manifest_node()? | |
558 | .into(), |
|
558 | .into(), | |
559 | ) |
|
559 | ) | |
560 | } |
|
560 | } | |
561 |
|
561 | |||
562 | /// Returns the manifest of the *changeset* with the given revision number |
|
562 | /// Returns the manifest of the *changeset* with the given revision number | |
563 | pub fn manifest_for_rev( |
|
563 | pub fn manifest_for_rev( | |
564 | &self, |
|
564 | &self, | |
565 | revision: Revision, |
|
565 | revision: UncheckedRevision, | |
566 | ) -> Result<Manifest, RevlogError> { |
|
566 | ) -> Result<Manifest, RevlogError> { | |
567 | self.manifestlog()?.data_for_node( |
|
567 | self.manifestlog()?.data_for_node( | |
568 | self.changelog()? |
|
568 | self.changelog()? | |
569 | .data_for_rev(revision)? |
|
569 | .data_for_rev(revision)? | |
570 | .manifest_node()? |
|
570 | .manifest_node()? | |
571 | .into(), |
|
571 | .into(), | |
572 | ) |
|
572 | ) | |
573 | } |
|
573 | } | |
574 |
|
574 | |||
575 | pub fn has_subrepos(&self) -> Result<bool, DirstateError> { |
|
575 | pub fn has_subrepos(&self) -> Result<bool, DirstateError> { | |
576 | if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? { |
|
576 | if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? { | |
577 | Ok(entry.tracked()) |
|
577 | Ok(entry.tracked()) | |
578 | } else { |
|
578 | } else { | |
579 | Ok(false) |
|
579 | Ok(false) | |
580 | } |
|
580 | } | |
581 | } |
|
581 | } | |
582 |
|
582 | |||
583 | pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> { |
|
583 | pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> { | |
584 | Filelog::open(self, path) |
|
584 | Filelog::open(self, path) | |
585 | } |
|
585 | } | |
586 |
|
586 | |||
587 | /// Write to disk any updates that were made through `dirstate_map_mut`. |
|
587 | /// Write to disk any updates that were made through `dirstate_map_mut`. | |
588 | /// |
|
588 | /// | |
589 | /// The "wlock" must be held while calling this. |
|
589 | /// The "wlock" must be held while calling this. | |
590 | /// See for example `try_with_wlock_no_wait`. |
|
590 | /// See for example `try_with_wlock_no_wait`. | |
591 | /// |
|
591 | /// | |
592 | /// TODO: have a `WritableRepo` type only accessible while holding the |
|
592 | /// TODO: have a `WritableRepo` type only accessible while holding the | |
593 | /// lock? |
|
593 | /// lock? | |
594 | pub fn write_dirstate(&self) -> Result<(), DirstateError> { |
|
594 | pub fn write_dirstate(&self) -> Result<(), DirstateError> { | |
595 | let map = self.dirstate_map()?; |
|
595 | let map = self.dirstate_map()?; | |
596 | // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if |
|
596 | // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if | |
597 | // it’s unset |
|
597 | // it’s unset | |
598 | let parents = self.dirstate_parents()?; |
|
598 | let parents = self.dirstate_parents()?; | |
599 | let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() { |
|
599 | let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() { | |
600 | let (identity, uuid, data_size) = |
|
600 | let (identity, uuid, data_size) = | |
601 | self.get_dirstate_data_file_integrity()?; |
|
601 | self.get_dirstate_data_file_integrity()?; | |
602 | let identity_changed = identity != map.old_identity(); |
|
602 | let identity_changed = identity != map.old_identity(); | |
603 | let uuid_changed = uuid.as_deref() != map.old_uuid(); |
|
603 | let uuid_changed = uuid.as_deref() != map.old_uuid(); | |
604 | let data_length_changed = data_size != map.old_data_size(); |
|
604 | let data_length_changed = data_size != map.old_data_size(); | |
605 |
|
605 | |||
606 | if identity_changed || uuid_changed || data_length_changed { |
|
606 | if identity_changed || uuid_changed || data_length_changed { | |
607 | // If any of identity, uuid or length have changed since |
|
607 | // If any of identity, uuid or length have changed since | |
608 | // last disk read, don't write. |
|
608 | // last disk read, don't write. | |
609 | // This is fine because either we're in a command that doesn't |
|
609 | // This is fine because either we're in a command that doesn't | |
610 | // write anything too important (like `hg status`), or we're in |
|
610 | // write anything too important (like `hg status`), or we're in | |
611 | // `hg add` and we're supposed to have taken the lock before |
|
611 | // `hg add` and we're supposed to have taken the lock before | |
612 | // reading anyway. |
|
612 | // reading anyway. | |
613 | // |
|
613 | // | |
614 | // TODO complain loudly if we've changed anything important |
|
614 | // TODO complain loudly if we've changed anything important | |
615 | // without taking the lock. |
|
615 | // without taking the lock. | |
616 | // (see `hg help config.format.use-dirstate-tracked-hint`) |
|
616 | // (see `hg help config.format.use-dirstate-tracked-hint`) | |
617 | log::debug!( |
|
617 | log::debug!( | |
618 | "dirstate has changed since last read, not updating." |
|
618 | "dirstate has changed since last read, not updating." | |
619 | ); |
|
619 | ); | |
620 | return Ok(()); |
|
620 | return Ok(()); | |
621 | } |
|
621 | } | |
622 |
|
622 | |||
623 | let uuid_opt = map.old_uuid(); |
|
623 | let uuid_opt = map.old_uuid(); | |
624 | let write_mode = if uuid_opt.is_some() { |
|
624 | let write_mode = if uuid_opt.is_some() { | |
625 | DirstateMapWriteMode::Auto |
|
625 | DirstateMapWriteMode::Auto | |
626 | } else { |
|
626 | } else { | |
627 | DirstateMapWriteMode::ForceNewDataFile |
|
627 | DirstateMapWriteMode::ForceNewDataFile | |
628 | }; |
|
628 | }; | |
629 | let (data, tree_metadata, append, old_data_size) = |
|
629 | let (data, tree_metadata, append, old_data_size) = | |
630 | map.pack_v2(write_mode)?; |
|
630 | map.pack_v2(write_mode)?; | |
631 |
|
631 | |||
632 | // Reuse the uuid, or generate a new one, keeping the old for |
|
632 | // Reuse the uuid, or generate a new one, keeping the old for | |
633 | // deletion. |
|
633 | // deletion. | |
634 | let (uuid, old_uuid) = match uuid_opt { |
|
634 | let (uuid, old_uuid) = match uuid_opt { | |
635 | Some(uuid) => { |
|
635 | Some(uuid) => { | |
636 | let as_str = std::str::from_utf8(uuid) |
|
636 | let as_str = std::str::from_utf8(uuid) | |
637 | .map_err(|_| { |
|
637 | .map_err(|_| { | |
638 | HgError::corrupted( |
|
638 | HgError::corrupted( | |
639 | "non-UTF-8 dirstate data file ID", |
|
639 | "non-UTF-8 dirstate data file ID", | |
640 | ) |
|
640 | ) | |
641 | })? |
|
641 | })? | |
642 | .to_owned(); |
|
642 | .to_owned(); | |
643 | if append { |
|
643 | if append { | |
644 | (as_str, None) |
|
644 | (as_str, None) | |
645 | } else { |
|
645 | } else { | |
646 | (DirstateDocket::new_uid(), Some(as_str)) |
|
646 | (DirstateDocket::new_uid(), Some(as_str)) | |
647 | } |
|
647 | } | |
648 | } |
|
648 | } | |
649 | None => (DirstateDocket::new_uid(), None), |
|
649 | None => (DirstateDocket::new_uid(), None), | |
650 | }; |
|
650 | }; | |
651 |
|
651 | |||
652 | let data_filename = format!("dirstate.{}", uuid); |
|
652 | let data_filename = format!("dirstate.{}", uuid); | |
653 | let data_filename = self.hg_vfs().join(data_filename); |
|
653 | let data_filename = self.hg_vfs().join(data_filename); | |
654 | let mut options = std::fs::OpenOptions::new(); |
|
654 | let mut options = std::fs::OpenOptions::new(); | |
655 | options.write(true); |
|
655 | options.write(true); | |
656 |
|
656 | |||
657 | // Why are we not using the O_APPEND flag when appending? |
|
657 | // Why are we not using the O_APPEND flag when appending? | |
658 | // |
|
658 | // | |
659 | // - O_APPEND makes it trickier to deal with garbage at the end of |
|
659 | // - O_APPEND makes it trickier to deal with garbage at the end of | |
660 | // the file, left by a previous uncommitted transaction. By |
|
660 | // the file, left by a previous uncommitted transaction. By | |
661 | // starting the write at [old_data_size] we make sure we erase |
|
661 | // starting the write at [old_data_size] we make sure we erase | |
662 | // all such garbage. |
|
662 | // all such garbage. | |
663 | // |
|
663 | // | |
664 | // - O_APPEND requires to special-case 0-byte writes, whereas we |
|
664 | // - O_APPEND requires to special-case 0-byte writes, whereas we | |
665 | // don't need that. |
|
665 | // don't need that. | |
666 | // |
|
666 | // | |
667 | // - Some OSes have bugs in implementation O_APPEND: |
|
667 | // - Some OSes have bugs in implementation O_APPEND: | |
668 | // revlog.py talks about a Solaris bug, but we also saw some ZFS |
|
668 | // revlog.py talks about a Solaris bug, but we also saw some ZFS | |
669 | // bug: https://github.com/openzfs/zfs/pull/3124, |
|
669 | // bug: https://github.com/openzfs/zfs/pull/3124, | |
670 | // https://github.com/openzfs/zfs/issues/13370 |
|
670 | // https://github.com/openzfs/zfs/issues/13370 | |
671 | // |
|
671 | // | |
672 | if !append { |
|
672 | if !append { | |
673 | log::trace!("creating a new dirstate data file"); |
|
673 | log::trace!("creating a new dirstate data file"); | |
674 | options.create_new(true); |
|
674 | options.create_new(true); | |
675 | } else { |
|
675 | } else { | |
676 | log::trace!("appending to the dirstate data file"); |
|
676 | log::trace!("appending to the dirstate data file"); | |
677 | } |
|
677 | } | |
678 |
|
678 | |||
679 | let data_size = (|| { |
|
679 | let data_size = (|| { | |
680 | // TODO: loop and try another random ID if !append and this |
|
680 | // TODO: loop and try another random ID if !append and this | |
681 | // returns `ErrorKind::AlreadyExists`? Collision chance of two |
|
681 | // returns `ErrorKind::AlreadyExists`? Collision chance of two | |
682 | // random IDs is one in 2**32 |
|
682 | // random IDs is one in 2**32 | |
683 | let mut file = options.open(&data_filename)?; |
|
683 | let mut file = options.open(&data_filename)?; | |
684 | if append { |
|
684 | if append { | |
685 | file.seek(SeekFrom::Start(old_data_size as u64))?; |
|
685 | file.seek(SeekFrom::Start(old_data_size as u64))?; | |
686 | } |
|
686 | } | |
687 | file.write_all(&data)?; |
|
687 | file.write_all(&data)?; | |
688 | file.flush()?; |
|
688 | file.flush()?; | |
689 | file.seek(SeekFrom::Current(0)) |
|
689 | file.seek(SeekFrom::Current(0)) | |
690 | })() |
|
690 | })() | |
691 | .when_writing_file(&data_filename)?; |
|
691 | .when_writing_file(&data_filename)?; | |
692 |
|
692 | |||
693 | let packed_dirstate = DirstateDocket::serialize( |
|
693 | let packed_dirstate = DirstateDocket::serialize( | |
694 | parents, |
|
694 | parents, | |
695 | tree_metadata, |
|
695 | tree_metadata, | |
696 | data_size, |
|
696 | data_size, | |
697 | uuid.as_bytes(), |
|
697 | uuid.as_bytes(), | |
698 | ) |
|
698 | ) | |
699 | .map_err(|_: std::num::TryFromIntError| { |
|
699 | .map_err(|_: std::num::TryFromIntError| { | |
700 | HgError::corrupted("overflow in dirstate docket serialization") |
|
700 | HgError::corrupted("overflow in dirstate docket serialization") | |
701 | })?; |
|
701 | })?; | |
702 |
|
702 | |||
703 | (packed_dirstate, old_uuid) |
|
703 | (packed_dirstate, old_uuid) | |
704 | } else { |
|
704 | } else { | |
705 | let identity = self.dirstate_identity()?; |
|
705 | let identity = self.dirstate_identity()?; | |
706 | if identity != map.old_identity() { |
|
706 | if identity != map.old_identity() { | |
707 | // If identity changed since last disk read, don't write. |
|
707 | // If identity changed since last disk read, don't write. | |
708 | // This is fine because either we're in a command that doesn't |
|
708 | // This is fine because either we're in a command that doesn't | |
709 | // write anything too important (like `hg status`), or we're in |
|
709 | // write anything too important (like `hg status`), or we're in | |
710 | // `hg add` and we're supposed to have taken the lock before |
|
710 | // `hg add` and we're supposed to have taken the lock before | |
711 | // reading anyway. |
|
711 | // reading anyway. | |
712 | // |
|
712 | // | |
713 | // TODO complain loudly if we've changed anything important |
|
713 | // TODO complain loudly if we've changed anything important | |
714 | // without taking the lock. |
|
714 | // without taking the lock. | |
715 | // (see `hg help config.format.use-dirstate-tracked-hint`) |
|
715 | // (see `hg help config.format.use-dirstate-tracked-hint`) | |
716 | log::debug!( |
|
716 | log::debug!( | |
717 | "dirstate has changed since last read, not updating." |
|
717 | "dirstate has changed since last read, not updating." | |
718 | ); |
|
718 | ); | |
719 | return Ok(()); |
|
719 | return Ok(()); | |
720 | } |
|
720 | } | |
721 | (map.pack_v1(parents)?, None) |
|
721 | (map.pack_v1(parents)?, None) | |
722 | }; |
|
722 | }; | |
723 |
|
723 | |||
724 | let vfs = self.hg_vfs(); |
|
724 | let vfs = self.hg_vfs(); | |
725 | vfs.atomic_write("dirstate", &packed_dirstate)?; |
|
725 | vfs.atomic_write("dirstate", &packed_dirstate)?; | |
726 | if let Some(uuid) = old_uuid_to_remove { |
|
726 | if let Some(uuid) = old_uuid_to_remove { | |
727 | // Remove the old data file after the new docket pointing to the |
|
727 | // Remove the old data file after the new docket pointing to the | |
728 | // new data file was written. |
|
728 | // new data file was written. | |
729 | vfs.remove_file(format!("dirstate.{}", uuid))?; |
|
729 | vfs.remove_file(format!("dirstate.{}", uuid))?; | |
730 | } |
|
730 | } | |
731 | Ok(()) |
|
731 | Ok(()) | |
732 | } |
|
732 | } | |
733 | } |
|
733 | } | |
734 |
|
734 | |||
735 | /// Lazily-initialized component of `Repo` with interior mutability |
|
735 | /// Lazily-initialized component of `Repo` with interior mutability | |
736 | /// |
|
736 | /// | |
737 | /// This differs from `OnceCell` in that the value can still be "deinitialized" |
|
737 | /// This differs from `OnceCell` in that the value can still be "deinitialized" | |
738 | /// later by setting its inner `Option` to `None`. It also takes the |
|
738 | /// later by setting its inner `Option` to `None`. It also takes the | |
739 | /// initialization function as an argument when the value is requested, not |
|
739 | /// initialization function as an argument when the value is requested, not | |
740 | /// when the instance is created. |
|
740 | /// when the instance is created. | |
741 | struct LazyCell<T> { |
|
741 | struct LazyCell<T> { | |
742 | value: RefCell<Option<T>>, |
|
742 | value: RefCell<Option<T>>, | |
743 | } |
|
743 | } | |
744 |
|
744 | |||
745 | impl<T> LazyCell<T> { |
|
745 | impl<T> LazyCell<T> { | |
746 | fn new() -> Self { |
|
746 | fn new() -> Self { | |
747 | Self { |
|
747 | Self { | |
748 | value: RefCell::new(None), |
|
748 | value: RefCell::new(None), | |
749 | } |
|
749 | } | |
750 | } |
|
750 | } | |
751 |
|
751 | |||
752 | fn set(&self, value: T) { |
|
752 | fn set(&self, value: T) { | |
753 | *self.value.borrow_mut() = Some(value) |
|
753 | *self.value.borrow_mut() = Some(value) | |
754 | } |
|
754 | } | |
755 |
|
755 | |||
756 | fn get_or_init<E>( |
|
756 | fn get_or_init<E>( | |
757 | &self, |
|
757 | &self, | |
758 | init: impl Fn() -> Result<T, E>, |
|
758 | init: impl Fn() -> Result<T, E>, | |
759 | ) -> Result<Ref<T>, E> { |
|
759 | ) -> Result<Ref<T>, E> { | |
760 | let mut borrowed = self.value.borrow(); |
|
760 | let mut borrowed = self.value.borrow(); | |
761 | if borrowed.is_none() { |
|
761 | if borrowed.is_none() { | |
762 | drop(borrowed); |
|
762 | drop(borrowed); | |
763 | // Only use `borrow_mut` if it is really needed to avoid panic in |
|
763 | // Only use `borrow_mut` if it is really needed to avoid panic in | |
764 | // case there is another outstanding borrow but mutation is not |
|
764 | // case there is another outstanding borrow but mutation is not | |
765 | // needed. |
|
765 | // needed. | |
766 | *self.value.borrow_mut() = Some(init()?); |
|
766 | *self.value.borrow_mut() = Some(init()?); | |
767 | borrowed = self.value.borrow() |
|
767 | borrowed = self.value.borrow() | |
768 | } |
|
768 | } | |
769 | Ok(Ref::map(borrowed, |option| option.as_ref().unwrap())) |
|
769 | Ok(Ref::map(borrowed, |option| option.as_ref().unwrap())) | |
770 | } |
|
770 | } | |
771 |
|
771 | |||
772 | fn get_mut_or_init<E>( |
|
772 | fn get_mut_or_init<E>( | |
773 | &self, |
|
773 | &self, | |
774 | init: impl Fn() -> Result<T, E>, |
|
774 | init: impl Fn() -> Result<T, E>, | |
775 | ) -> Result<RefMut<T>, E> { |
|
775 | ) -> Result<RefMut<T>, E> { | |
776 | let mut borrowed = self.value.borrow_mut(); |
|
776 | let mut borrowed = self.value.borrow_mut(); | |
777 | if borrowed.is_none() { |
|
777 | if borrowed.is_none() { | |
778 | *borrowed = Some(init()?); |
|
778 | *borrowed = Some(init()?); | |
779 | } |
|
779 | } | |
780 | Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap())) |
|
780 | Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap())) | |
781 | } |
|
781 | } | |
782 | } |
|
782 | } |
@@ -1,343 +1,353 | |||||
1 | use crate::errors::HgError; |
|
1 | use crate::errors::HgError; | |
2 | use crate::revlog::Revision; |
|
2 | use crate::revlog::Revision; | |
3 | use crate::revlog::{Node, NodePrefix}; |
|
3 | use crate::revlog::{Node, NodePrefix}; | |
4 | use crate::revlog::{Revlog, RevlogEntry, RevlogError}; |
|
4 | use crate::revlog::{Revlog, RevlogEntry, RevlogError}; | |
5 | use crate::utils::hg_path::HgPath; |
|
5 | use crate::utils::hg_path::HgPath; | |
6 | use crate::vfs::Vfs; |
|
6 | use crate::vfs::Vfs; | |
|
7 | use crate::UncheckedRevision; | |||
7 | use itertools::Itertools; |
|
8 | use itertools::Itertools; | |
8 | use std::ascii::escape_default; |
|
9 | use std::ascii::escape_default; | |
9 | use std::borrow::Cow; |
|
10 | use std::borrow::Cow; | |
10 | use std::fmt::{Debug, Formatter}; |
|
11 | use std::fmt::{Debug, Formatter}; | |
11 |
|
12 | |||
12 | /// A specialized `Revlog` to work with changelog data format. |
|
13 | /// A specialized `Revlog` to work with changelog data format. | |
13 | pub struct Changelog { |
|
14 | pub struct Changelog { | |
14 | /// The generic `revlog` format. |
|
15 | /// The generic `revlog` format. | |
15 | pub(crate) revlog: Revlog, |
|
16 | pub(crate) revlog: Revlog, | |
16 | } |
|
17 | } | |
17 |
|
18 | |||
18 | impl Changelog { |
|
19 | impl Changelog { | |
19 | /// Open the `changelog` of a repository given by its root. |
|
20 | /// Open the `changelog` of a repository given by its root. | |
20 | pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { |
|
21 | pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { | |
21 | let revlog = |
|
22 | let revlog = | |
22 | Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?; |
|
23 | Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?; | |
23 | Ok(Self { revlog }) |
|
24 | Ok(Self { revlog }) | |
24 | } |
|
25 | } | |
25 |
|
26 | |||
26 | /// Return the `ChangelogRevisionData` for the given node ID. |
|
27 | /// Return the `ChangelogRevisionData` for the given node ID. | |
27 | pub fn data_for_node( |
|
28 | pub fn data_for_node( | |
28 | &self, |
|
29 | &self, | |
29 | node: NodePrefix, |
|
30 | node: NodePrefix, | |
30 | ) -> Result<ChangelogRevisionData, RevlogError> { |
|
31 | ) -> Result<ChangelogRevisionData, RevlogError> { | |
31 | let rev = self.revlog.rev_from_node(node)?; |
|
32 | let rev = self.revlog.rev_from_node(node)?; | |
32 |
self. |
|
33 | self.entry_for_checked_rev(rev)?.data() | |
33 | } |
|
34 | } | |
34 |
|
35 | |||
35 | /// Return the [`ChangelogEntry`] for the given revision number. |
|
36 | /// Return the [`ChangelogEntry`] for the given revision number. | |
36 | pub fn entry_for_rev( |
|
37 | pub fn entry_for_rev( | |
37 | &self, |
|
38 | &self, | |
|
39 | rev: UncheckedRevision, | |||
|
40 | ) -> Result<ChangelogEntry, RevlogError> { | |||
|
41 | let revlog_entry = self.revlog.get_entry(rev)?; | |||
|
42 | Ok(ChangelogEntry { revlog_entry }) | |||
|
43 | } | |||
|
44 | ||||
|
45 | /// Same as [`Self::entry_for_rev`] for checked revisions. | |||
|
46 | fn entry_for_checked_rev( | |||
|
47 | &self, | |||
38 | rev: Revision, |
|
48 | rev: Revision, | |
39 | ) -> Result<ChangelogEntry, RevlogError> { |
|
49 | ) -> Result<ChangelogEntry, RevlogError> { | |
40 | let revlog_entry = self.revlog.get_entry(rev)?; |
|
50 | let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?; | |
41 | Ok(ChangelogEntry { revlog_entry }) |
|
51 | Ok(ChangelogEntry { revlog_entry }) | |
42 | } |
|
52 | } | |
43 |
|
53 | |||
44 | /// Return the [`ChangelogRevisionData`] for the given revision number. |
|
54 | /// Return the [`ChangelogRevisionData`] for the given revision number. | |
45 | /// |
|
55 | /// | |
46 | /// This is a useful shortcut in case the caller does not need the |
|
56 | /// This is a useful shortcut in case the caller does not need the | |
47 | /// generic revlog information (parents, hashes etc). Otherwise |
|
57 | /// generic revlog information (parents, hashes etc). Otherwise | |
48 | /// consider taking a [`ChangelogEntry`] with |
|
58 | /// consider taking a [`ChangelogEntry`] with | |
49 | /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there. |
|
59 | /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there. | |
50 | pub fn data_for_rev( |
|
60 | pub fn data_for_rev( | |
51 | &self, |
|
61 | &self, | |
52 | rev: Revision, |
|
62 | rev: UncheckedRevision, | |
53 | ) -> Result<ChangelogRevisionData, RevlogError> { |
|
63 | ) -> Result<ChangelogRevisionData, RevlogError> { | |
54 | self.entry_for_rev(rev)?.data() |
|
64 | self.entry_for_rev(rev)?.data() | |
55 | } |
|
65 | } | |
56 |
|
66 | |||
57 | pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> { |
|
67 | pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { | |
58 | self.revlog.node_from_rev(rev) |
|
68 | self.revlog.node_from_rev(rev) | |
59 | } |
|
69 | } | |
60 |
|
70 | |||
61 | pub fn rev_from_node( |
|
71 | pub fn rev_from_node( | |
62 | &self, |
|
72 | &self, | |
63 | node: NodePrefix, |
|
73 | node: NodePrefix, | |
64 | ) -> Result<Revision, RevlogError> { |
|
74 | ) -> Result<Revision, RevlogError> { | |
65 | self.revlog.rev_from_node(node) |
|
75 | self.revlog.rev_from_node(node) | |
66 | } |
|
76 | } | |
67 | } |
|
77 | } | |
68 |
|
78 | |||
69 | /// A specialized `RevlogEntry` for `changelog` data format |
|
79 | /// A specialized `RevlogEntry` for `changelog` data format | |
70 | /// |
|
80 | /// | |
71 | /// This is a `RevlogEntry` with the added semantics that the associated |
|
81 | /// This is a `RevlogEntry` with the added semantics that the associated | |
72 | /// data should meet the requirements for `changelog`, materialized by |
|
82 | /// data should meet the requirements for `changelog`, materialized by | |
73 | /// the fact that `data()` constructs a `ChangelogRevisionData`. |
|
83 | /// the fact that `data()` constructs a `ChangelogRevisionData`. | |
74 | /// In case that promise would be broken, the `data` method returns an error. |
|
84 | /// In case that promise would be broken, the `data` method returns an error. | |
75 | #[derive(Clone)] |
|
85 | #[derive(Clone)] | |
76 | pub struct ChangelogEntry<'changelog> { |
|
86 | pub struct ChangelogEntry<'changelog> { | |
77 | /// Same data, as a generic `RevlogEntry`. |
|
87 | /// Same data, as a generic `RevlogEntry`. | |
78 | pub(crate) revlog_entry: RevlogEntry<'changelog>, |
|
88 | pub(crate) revlog_entry: RevlogEntry<'changelog>, | |
79 | } |
|
89 | } | |
80 |
|
90 | |||
81 | impl<'changelog> ChangelogEntry<'changelog> { |
|
91 | impl<'changelog> ChangelogEntry<'changelog> { | |
82 | pub fn data<'a>( |
|
92 | pub fn data<'a>( | |
83 | &'a self, |
|
93 | &'a self, | |
84 | ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> { |
|
94 | ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> { | |
85 | let bytes = self.revlog_entry.data()?; |
|
95 | let bytes = self.revlog_entry.data()?; | |
86 | if bytes.is_empty() { |
|
96 | if bytes.is_empty() { | |
87 | Ok(ChangelogRevisionData::null()) |
|
97 | Ok(ChangelogRevisionData::null()) | |
88 | } else { |
|
98 | } else { | |
89 | Ok(ChangelogRevisionData::new(bytes).map_err(|err| { |
|
99 | Ok(ChangelogRevisionData::new(bytes).map_err(|err| { | |
90 | RevlogError::Other(HgError::CorruptedRepository(format!( |
|
100 | RevlogError::Other(HgError::CorruptedRepository(format!( | |
91 | "Invalid changelog data for revision {}: {:?}", |
|
101 | "Invalid changelog data for revision {}: {:?}", | |
92 | self.revlog_entry.revision(), |
|
102 | self.revlog_entry.revision(), | |
93 | err |
|
103 | err | |
94 | ))) |
|
104 | ))) | |
95 | })?) |
|
105 | })?) | |
96 | } |
|
106 | } | |
97 | } |
|
107 | } | |
98 |
|
108 | |||
99 | /// Obtain a reference to the underlying `RevlogEntry`. |
|
109 | /// Obtain a reference to the underlying `RevlogEntry`. | |
100 | /// |
|
110 | /// | |
101 | /// This allows the caller to access the information that is common |
|
111 | /// This allows the caller to access the information that is common | |
102 | /// to all revlog entries: revision number, node id, parent revisions etc. |
|
112 | /// to all revlog entries: revision number, node id, parent revisions etc. | |
103 | pub fn as_revlog_entry(&self) -> &RevlogEntry { |
|
113 | pub fn as_revlog_entry(&self) -> &RevlogEntry { | |
104 | &self.revlog_entry |
|
114 | &self.revlog_entry | |
105 | } |
|
115 | } | |
106 |
|
116 | |||
107 | pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { |
|
117 | pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { | |
108 | Ok(self |
|
118 | Ok(self | |
109 | .revlog_entry |
|
119 | .revlog_entry | |
110 | .p1_entry()? |
|
120 | .p1_entry()? | |
111 | .map(|revlog_entry| Self { revlog_entry })) |
|
121 | .map(|revlog_entry| Self { revlog_entry })) | |
112 | } |
|
122 | } | |
113 |
|
123 | |||
114 | pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { |
|
124 | pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { | |
115 | Ok(self |
|
125 | Ok(self | |
116 | .revlog_entry |
|
126 | .revlog_entry | |
117 | .p2_entry()? |
|
127 | .p2_entry()? | |
118 | .map(|revlog_entry| Self { revlog_entry })) |
|
128 | .map(|revlog_entry| Self { revlog_entry })) | |
119 | } |
|
129 | } | |
120 | } |
|
130 | } | |
121 |
|
131 | |||
122 | /// `Changelog` entry which knows how to interpret the `changelog` data bytes. |
|
132 | /// `Changelog` entry which knows how to interpret the `changelog` data bytes. | |
123 | #[derive(PartialEq)] |
|
133 | #[derive(PartialEq)] | |
124 | pub struct ChangelogRevisionData<'changelog> { |
|
134 | pub struct ChangelogRevisionData<'changelog> { | |
125 | /// The data bytes of the `changelog` entry. |
|
135 | /// The data bytes of the `changelog` entry. | |
126 | bytes: Cow<'changelog, [u8]>, |
|
136 | bytes: Cow<'changelog, [u8]>, | |
127 | /// The end offset for the hex manifest (not including the newline) |
|
137 | /// The end offset for the hex manifest (not including the newline) | |
128 | manifest_end: usize, |
|
138 | manifest_end: usize, | |
129 | /// The end offset for the user+email (not including the newline) |
|
139 | /// The end offset for the user+email (not including the newline) | |
130 | user_end: usize, |
|
140 | user_end: usize, | |
131 | /// The end offset for the timestamp+timezone+extras (not including the |
|
141 | /// The end offset for the timestamp+timezone+extras (not including the | |
132 | /// newline) |
|
142 | /// newline) | |
133 | timestamp_end: usize, |
|
143 | timestamp_end: usize, | |
134 | /// The end offset for the file list (not including the newline) |
|
144 | /// The end offset for the file list (not including the newline) | |
135 | files_end: usize, |
|
145 | files_end: usize, | |
136 | } |
|
146 | } | |
137 |
|
147 | |||
138 | impl<'changelog> ChangelogRevisionData<'changelog> { |
|
148 | impl<'changelog> ChangelogRevisionData<'changelog> { | |
139 | fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> { |
|
149 | fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> { | |
140 | let mut line_iter = bytes.split(|b| b == &b'\n'); |
|
150 | let mut line_iter = bytes.split(|b| b == &b'\n'); | |
141 | let manifest_end = line_iter |
|
151 | let manifest_end = line_iter | |
142 | .next() |
|
152 | .next() | |
143 | .expect("Empty iterator from split()?") |
|
153 | .expect("Empty iterator from split()?") | |
144 | .len(); |
|
154 | .len(); | |
145 | let user_slice = line_iter.next().ok_or_else(|| { |
|
155 | let user_slice = line_iter.next().ok_or_else(|| { | |
146 | HgError::corrupted("Changeset data truncated after manifest line") |
|
156 | HgError::corrupted("Changeset data truncated after manifest line") | |
147 | })?; |
|
157 | })?; | |
148 | let user_end = manifest_end + 1 + user_slice.len(); |
|
158 | let user_end = manifest_end + 1 + user_slice.len(); | |
149 | let timestamp_slice = line_iter.next().ok_or_else(|| { |
|
159 | let timestamp_slice = line_iter.next().ok_or_else(|| { | |
150 | HgError::corrupted("Changeset data truncated after user line") |
|
160 | HgError::corrupted("Changeset data truncated after user line") | |
151 | })?; |
|
161 | })?; | |
152 | let timestamp_end = user_end + 1 + timestamp_slice.len(); |
|
162 | let timestamp_end = user_end + 1 + timestamp_slice.len(); | |
153 | let mut files_end = timestamp_end + 1; |
|
163 | let mut files_end = timestamp_end + 1; | |
154 | loop { |
|
164 | loop { | |
155 | let line = line_iter.next().ok_or_else(|| { |
|
165 | let line = line_iter.next().ok_or_else(|| { | |
156 | HgError::corrupted("Changeset data truncated in files list") |
|
166 | HgError::corrupted("Changeset data truncated in files list") | |
157 | })?; |
|
167 | })?; | |
158 | if line.is_empty() { |
|
168 | if line.is_empty() { | |
159 | if files_end == bytes.len() { |
|
169 | if files_end == bytes.len() { | |
160 | // The list of files ended with a single newline (there |
|
170 | // The list of files ended with a single newline (there | |
161 | // should be two) |
|
171 | // should be two) | |
162 | return Err(HgError::corrupted( |
|
172 | return Err(HgError::corrupted( | |
163 | "Changeset data truncated after files list", |
|
173 | "Changeset data truncated after files list", | |
164 | )); |
|
174 | )); | |
165 | } |
|
175 | } | |
166 | files_end -= 1; |
|
176 | files_end -= 1; | |
167 | break; |
|
177 | break; | |
168 | } |
|
178 | } | |
169 | files_end += line.len() + 1; |
|
179 | files_end += line.len() + 1; | |
170 | } |
|
180 | } | |
171 |
|
181 | |||
172 | Ok(Self { |
|
182 | Ok(Self { | |
173 | bytes, |
|
183 | bytes, | |
174 | manifest_end, |
|
184 | manifest_end, | |
175 | user_end, |
|
185 | user_end, | |
176 | timestamp_end, |
|
186 | timestamp_end, | |
177 | files_end, |
|
187 | files_end, | |
178 | }) |
|
188 | }) | |
179 | } |
|
189 | } | |
180 |
|
190 | |||
181 | fn null() -> Self { |
|
191 | fn null() -> Self { | |
182 | Self::new(Cow::Borrowed( |
|
192 | Self::new(Cow::Borrowed( | |
183 | b"0000000000000000000000000000000000000000\n\n0 0\n\n", |
|
193 | b"0000000000000000000000000000000000000000\n\n0 0\n\n", | |
184 | )) |
|
194 | )) | |
185 | .unwrap() |
|
195 | .unwrap() | |
186 | } |
|
196 | } | |
187 |
|
197 | |||
188 | /// Return an iterator over the lines of the entry. |
|
198 | /// Return an iterator over the lines of the entry. | |
189 | pub fn lines(&self) -> impl Iterator<Item = &[u8]> { |
|
199 | pub fn lines(&self) -> impl Iterator<Item = &[u8]> { | |
190 | self.bytes.split(|b| b == &b'\n') |
|
200 | self.bytes.split(|b| b == &b'\n') | |
191 | } |
|
201 | } | |
192 |
|
202 | |||
193 | /// Return the node id of the `manifest` referenced by this `changelog` |
|
203 | /// Return the node id of the `manifest` referenced by this `changelog` | |
194 | /// entry. |
|
204 | /// entry. | |
195 | pub fn manifest_node(&self) -> Result<Node, HgError> { |
|
205 | pub fn manifest_node(&self) -> Result<Node, HgError> { | |
196 | let manifest_node_hex = &self.bytes[..self.manifest_end]; |
|
206 | let manifest_node_hex = &self.bytes[..self.manifest_end]; | |
197 | Node::from_hex_for_repo(manifest_node_hex) |
|
207 | Node::from_hex_for_repo(manifest_node_hex) | |
198 | } |
|
208 | } | |
199 |
|
209 | |||
200 | /// The full user string (usually a name followed by an email enclosed in |
|
210 | /// The full user string (usually a name followed by an email enclosed in | |
201 | /// angle brackets) |
|
211 | /// angle brackets) | |
202 | pub fn user(&self) -> &[u8] { |
|
212 | pub fn user(&self) -> &[u8] { | |
203 | &self.bytes[self.manifest_end + 1..self.user_end] |
|
213 | &self.bytes[self.manifest_end + 1..self.user_end] | |
204 | } |
|
214 | } | |
205 |
|
215 | |||
206 | /// The full timestamp line (timestamp in seconds, offset in seconds, and |
|
216 | /// The full timestamp line (timestamp in seconds, offset in seconds, and | |
207 | /// possibly extras) |
|
217 | /// possibly extras) | |
208 | // TODO: We should expose this in a more useful way |
|
218 | // TODO: We should expose this in a more useful way | |
209 | pub fn timestamp_line(&self) -> &[u8] { |
|
219 | pub fn timestamp_line(&self) -> &[u8] { | |
210 | &self.bytes[self.user_end + 1..self.timestamp_end] |
|
220 | &self.bytes[self.user_end + 1..self.timestamp_end] | |
211 | } |
|
221 | } | |
212 |
|
222 | |||
213 | /// The files changed in this revision. |
|
223 | /// The files changed in this revision. | |
214 | pub fn files(&self) -> impl Iterator<Item = &HgPath> { |
|
224 | pub fn files(&self) -> impl Iterator<Item = &HgPath> { | |
215 | self.bytes[self.timestamp_end + 1..self.files_end] |
|
225 | self.bytes[self.timestamp_end + 1..self.files_end] | |
216 | .split(|b| b == &b'\n') |
|
226 | .split(|b| b == &b'\n') | |
217 | .map(HgPath::new) |
|
227 | .map(HgPath::new) | |
218 | } |
|
228 | } | |
219 |
|
229 | |||
220 | /// The change description. |
|
230 | /// The change description. | |
221 | pub fn description(&self) -> &[u8] { |
|
231 | pub fn description(&self) -> &[u8] { | |
222 | &self.bytes[self.files_end + 2..] |
|
232 | &self.bytes[self.files_end + 2..] | |
223 | } |
|
233 | } | |
224 | } |
|
234 | } | |
225 |
|
235 | |||
226 | impl Debug for ChangelogRevisionData<'_> { |
|
236 | impl Debug for ChangelogRevisionData<'_> { | |
227 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
|
237 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | |
228 | f.debug_struct("ChangelogRevisionData") |
|
238 | f.debug_struct("ChangelogRevisionData") | |
229 | .field("bytes", &debug_bytes(&self.bytes)) |
|
239 | .field("bytes", &debug_bytes(&self.bytes)) | |
230 | .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end])) |
|
240 | .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end])) | |
231 | .field( |
|
241 | .field( | |
232 | "user", |
|
242 | "user", | |
233 | &debug_bytes( |
|
243 | &debug_bytes( | |
234 | &self.bytes[self.manifest_end + 1..self.user_end], |
|
244 | &self.bytes[self.manifest_end + 1..self.user_end], | |
235 | ), |
|
245 | ), | |
236 | ) |
|
246 | ) | |
237 | .field( |
|
247 | .field( | |
238 | "timestamp", |
|
248 | "timestamp", | |
239 | &debug_bytes( |
|
249 | &debug_bytes( | |
240 | &self.bytes[self.user_end + 1..self.timestamp_end], |
|
250 | &self.bytes[self.user_end + 1..self.timestamp_end], | |
241 | ), |
|
251 | ), | |
242 | ) |
|
252 | ) | |
243 | .field( |
|
253 | .field( | |
244 | "files", |
|
254 | "files", | |
245 | &debug_bytes( |
|
255 | &debug_bytes( | |
246 | &self.bytes[self.timestamp_end + 1..self.files_end], |
|
256 | &self.bytes[self.timestamp_end + 1..self.files_end], | |
247 | ), |
|
257 | ), | |
248 | ) |
|
258 | ) | |
249 | .field( |
|
259 | .field( | |
250 | "description", |
|
260 | "description", | |
251 | &debug_bytes(&self.bytes[self.files_end + 2..]), |
|
261 | &debug_bytes(&self.bytes[self.files_end + 2..]), | |
252 | ) |
|
262 | ) | |
253 | .finish() |
|
263 | .finish() | |
254 | } |
|
264 | } | |
255 | } |
|
265 | } | |
256 |
|
266 | |||
257 | fn debug_bytes(bytes: &[u8]) -> String { |
|
267 | fn debug_bytes(bytes: &[u8]) -> String { | |
258 | String::from_utf8_lossy( |
|
268 | String::from_utf8_lossy( | |
259 | &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(), |
|
269 | &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(), | |
260 | ) |
|
270 | ) | |
261 | .to_string() |
|
271 | .to_string() | |
262 | } |
|
272 | } | |
263 |
|
273 | |||
264 | #[cfg(test)] |
|
274 | #[cfg(test)] | |
265 | mod tests { |
|
275 | mod tests { | |
266 | use super::*; |
|
276 | use super::*; | |
267 | use crate::vfs::Vfs; |
|
277 | use crate::vfs::Vfs; | |
268 | use crate::NULL_REVISION; |
|
278 | use crate::NULL_REVISION; | |
269 | use pretty_assertions::assert_eq; |
|
279 | use pretty_assertions::assert_eq; | |
270 |
|
280 | |||
271 | #[test] |
|
281 | #[test] | |
272 | fn test_create_changelogrevisiondata_invalid() { |
|
282 | fn test_create_changelogrevisiondata_invalid() { | |
273 | // Completely empty |
|
283 | // Completely empty | |
274 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); |
|
284 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); | |
275 | // No newline after manifest |
|
285 | // No newline after manifest | |
276 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); |
|
286 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); | |
277 | // No newline after user |
|
287 | // No newline after user | |
278 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err()); |
|
288 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err()); | |
279 | // No newline after timestamp |
|
289 | // No newline after timestamp | |
280 | assert!( |
|
290 | assert!( | |
281 | ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err() |
|
291 | ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err() | |
282 | ); |
|
292 | ); | |
283 | // Missing newline after files |
|
293 | // Missing newline after files | |
284 | assert!(ChangelogRevisionData::new(Cow::Borrowed( |
|
294 | assert!(ChangelogRevisionData::new(Cow::Borrowed( | |
285 | b"abcd\n\n0 0\nfile1\nfile2" |
|
295 | b"abcd\n\n0 0\nfile1\nfile2" | |
286 | )) |
|
296 | )) | |
287 | .is_err(),); |
|
297 | .is_err(),); | |
288 | // Only one newline after files |
|
298 | // Only one newline after files | |
289 | assert!(ChangelogRevisionData::new(Cow::Borrowed( |
|
299 | assert!(ChangelogRevisionData::new(Cow::Borrowed( | |
290 | b"abcd\n\n0 0\nfile1\nfile2\n" |
|
300 | b"abcd\n\n0 0\nfile1\nfile2\n" | |
291 | )) |
|
301 | )) | |
292 | .is_err(),); |
|
302 | .is_err(),); | |
293 | } |
|
303 | } | |
294 |
|
304 | |||
295 | #[test] |
|
305 | #[test] | |
296 | fn test_create_changelogrevisiondata() { |
|
306 | fn test_create_changelogrevisiondata() { | |
297 | let data = ChangelogRevisionData::new(Cow::Borrowed( |
|
307 | let data = ChangelogRevisionData::new(Cow::Borrowed( | |
298 | b"0123456789abcdef0123456789abcdef01234567 |
|
308 | b"0123456789abcdef0123456789abcdef01234567 | |
299 | Some One <someone@example.com> |
|
309 | Some One <someone@example.com> | |
300 | 0 0 |
|
310 | 0 0 | |
301 | file1 |
|
311 | file1 | |
302 | file2 |
|
312 | file2 | |
303 |
|
313 | |||
304 | some |
|
314 | some | |
305 | commit |
|
315 | commit | |
306 | message", |
|
316 | message", | |
307 | )) |
|
317 | )) | |
308 | .unwrap(); |
|
318 | .unwrap(); | |
309 | assert_eq!( |
|
319 | assert_eq!( | |
310 | data.manifest_node().unwrap(), |
|
320 | data.manifest_node().unwrap(), | |
311 | Node::from_hex("0123456789abcdef0123456789abcdef01234567") |
|
321 | Node::from_hex("0123456789abcdef0123456789abcdef01234567") | |
312 | .unwrap() |
|
322 | .unwrap() | |
313 | ); |
|
323 | ); | |
314 | assert_eq!(data.user(), b"Some One <someone@example.com>"); |
|
324 | assert_eq!(data.user(), b"Some One <someone@example.com>"); | |
315 | assert_eq!(data.timestamp_line(), b"0 0"); |
|
325 | assert_eq!(data.timestamp_line(), b"0 0"); | |
316 | assert_eq!( |
|
326 | assert_eq!( | |
317 | data.files().collect_vec(), |
|
327 | data.files().collect_vec(), | |
318 | vec![HgPath::new("file1"), HgPath::new("file2")] |
|
328 | vec![HgPath::new("file1"), HgPath::new("file2")] | |
319 | ); |
|
329 | ); | |
320 | assert_eq!(data.description(), b"some\ncommit\nmessage"); |
|
330 | assert_eq!(data.description(), b"some\ncommit\nmessage"); | |
321 | } |
|
331 | } | |
322 |
|
332 | |||
323 | #[test] |
|
333 | #[test] | |
324 | fn test_data_from_rev_null() -> Result<(), RevlogError> { |
|
334 | fn test_data_from_rev_null() -> Result<(), RevlogError> { | |
325 | // an empty revlog will be enough for this case |
|
335 | // an empty revlog will be enough for this case | |
326 | let temp = tempfile::tempdir().unwrap(); |
|
336 | let temp = tempfile::tempdir().unwrap(); | |
327 | let vfs = Vfs { base: temp.path() }; |
|
337 | let vfs = Vfs { base: temp.path() }; | |
328 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); |
|
338 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); | |
329 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
339 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); | |
330 |
|
340 | |||
331 | let changelog = Changelog { revlog }; |
|
341 | let changelog = Changelog { revlog }; | |
332 | assert_eq!( |
|
342 | assert_eq!( | |
333 | changelog.data_for_rev(NULL_REVISION)?, |
|
343 | changelog.data_for_rev(NULL_REVISION.into())?, | |
334 | ChangelogRevisionData::null() |
|
344 | ChangelogRevisionData::null() | |
335 | ); |
|
345 | ); | |
336 | // same with the intermediate entry object |
|
346 | // same with the intermediate entry object | |
337 | assert_eq!( |
|
347 | assert_eq!( | |
338 | changelog.entry_for_rev(NULL_REVISION)?.data()?, |
|
348 | changelog.entry_for_rev(NULL_REVISION.into())?.data()?, | |
339 | ChangelogRevisionData::null() |
|
349 | ChangelogRevisionData::null() | |
340 | ); |
|
350 | ); | |
341 | Ok(()) |
|
351 | Ok(()) | |
342 | } |
|
352 | } | |
343 | } |
|
353 | } |
@@ -1,208 +1,231 | |||||
1 | use crate::errors::HgError; |
|
1 | use crate::errors::HgError; | |
|
2 | use crate::exit_codes; | |||
2 | use crate::repo::Repo; |
|
3 | use crate::repo::Repo; | |
3 | use crate::revlog::path_encode::path_encode; |
|
4 | use crate::revlog::path_encode::path_encode; | |
4 | use crate::revlog::NodePrefix; |
|
5 | use crate::revlog::NodePrefix; | |
5 | use crate::revlog::Revision; |
|
6 | use crate::revlog::Revision; | |
6 | use crate::revlog::RevlogEntry; |
|
7 | use crate::revlog::RevlogEntry; | |
7 | use crate::revlog::{Revlog, RevlogError}; |
|
8 | use crate::revlog::{Revlog, RevlogError}; | |
8 | use crate::utils::files::get_path_from_bytes; |
|
9 | use crate::utils::files::get_path_from_bytes; | |
9 | use crate::utils::hg_path::HgPath; |
|
10 | use crate::utils::hg_path::HgPath; | |
10 | use crate::utils::SliceExt; |
|
11 | use crate::utils::SliceExt; | |
|
12 | use crate::UncheckedRevision; | |||
11 | use std::path::PathBuf; |
|
13 | use std::path::PathBuf; | |
12 |
|
14 | |||
13 | /// A specialized `Revlog` to work with file data logs. |
|
15 | /// A specialized `Revlog` to work with file data logs. | |
14 | pub struct Filelog { |
|
16 | pub struct Filelog { | |
15 | /// The generic `revlog` format. |
|
17 | /// The generic `revlog` format. | |
16 | revlog: Revlog, |
|
18 | revlog: Revlog, | |
17 | } |
|
19 | } | |
18 |
|
20 | |||
19 | impl Filelog { |
|
21 | impl Filelog { | |
20 | pub fn open_vfs( |
|
22 | pub fn open_vfs( | |
21 | store_vfs: &crate::vfs::Vfs<'_>, |
|
23 | store_vfs: &crate::vfs::Vfs<'_>, | |
22 | file_path: &HgPath, |
|
24 | file_path: &HgPath, | |
23 | ) -> Result<Self, HgError> { |
|
25 | ) -> Result<Self, HgError> { | |
24 | let index_path = store_path(file_path, b".i"); |
|
26 | let index_path = store_path(file_path, b".i"); | |
25 | let data_path = store_path(file_path, b".d"); |
|
27 | let data_path = store_path(file_path, b".d"); | |
26 | let revlog = |
|
28 | let revlog = | |
27 | Revlog::open(store_vfs, index_path, Some(&data_path), false)?; |
|
29 | Revlog::open(store_vfs, index_path, Some(&data_path), false)?; | |
28 | Ok(Self { revlog }) |
|
30 | Ok(Self { revlog }) | |
29 | } |
|
31 | } | |
30 |
|
32 | |||
31 | pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> { |
|
33 | pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> { | |
32 | Self::open_vfs(&repo.store_vfs(), file_path) |
|
34 | Self::open_vfs(&repo.store_vfs(), file_path) | |
33 | } |
|
35 | } | |
34 |
|
36 | |||
35 | /// The given node ID is that of the file as found in a filelog, not of a |
|
37 | /// The given node ID is that of the file as found in a filelog, not of a | |
36 | /// changeset. |
|
38 | /// changeset. | |
37 | pub fn data_for_node( |
|
39 | pub fn data_for_node( | |
38 | &self, |
|
40 | &self, | |
39 | file_node: impl Into<NodePrefix>, |
|
41 | file_node: impl Into<NodePrefix>, | |
40 | ) -> Result<FilelogRevisionData, RevlogError> { |
|
42 | ) -> Result<FilelogRevisionData, RevlogError> { | |
41 | let file_rev = self.revlog.rev_from_node(file_node.into())?; |
|
43 | let file_rev = self.revlog.rev_from_node(file_node.into())?; | |
42 | self.data_for_rev(file_rev) |
|
44 | self.data_for_rev(file_rev.into()) | |
43 | } |
|
45 | } | |
44 |
|
46 | |||
45 | /// The given revision is that of the file as found in a filelog, not of a |
|
47 | /// The given revision is that of the file as found in a filelog, not of a | |
46 | /// changeset. |
|
48 | /// changeset. | |
47 | pub fn data_for_rev( |
|
49 | pub fn data_for_rev( | |
48 | &self, |
|
50 | &self, | |
49 | file_rev: Revision, |
|
51 | file_rev: UncheckedRevision, | |
50 | ) -> Result<FilelogRevisionData, RevlogError> { |
|
52 | ) -> Result<FilelogRevisionData, RevlogError> { | |
51 | let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); |
|
53 | let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); | |
52 | Ok(FilelogRevisionData(data)) |
|
54 | Ok(FilelogRevisionData(data)) | |
53 | } |
|
55 | } | |
54 |
|
56 | |||
55 | /// The given node ID is that of the file as found in a filelog, not of a |
|
57 | /// The given node ID is that of the file as found in a filelog, not of a | |
56 | /// changeset. |
|
58 | /// changeset. | |
57 | pub fn entry_for_node( |
|
59 | pub fn entry_for_node( | |
58 | &self, |
|
60 | &self, | |
59 | file_node: impl Into<NodePrefix>, |
|
61 | file_node: impl Into<NodePrefix>, | |
60 | ) -> Result<FilelogEntry, RevlogError> { |
|
62 | ) -> Result<FilelogEntry, RevlogError> { | |
61 | let file_rev = self.revlog.rev_from_node(file_node.into())?; |
|
63 | let file_rev = self.revlog.rev_from_node(file_node.into())?; | |
62 | self.entry_for_rev(file_rev) |
|
64 | self.entry_for_checked_rev(file_rev) | |
63 | } |
|
65 | } | |
64 |
|
66 | |||
65 | /// The given revision is that of the file as found in a filelog, not of a |
|
67 | /// The given revision is that of the file as found in a filelog, not of a | |
66 | /// changeset. |
|
68 | /// changeset. | |
67 | pub fn entry_for_rev( |
|
69 | pub fn entry_for_rev( | |
68 | &self, |
|
70 | &self, | |
|
71 | file_rev: UncheckedRevision, | |||
|
72 | ) -> Result<FilelogEntry, RevlogError> { | |||
|
73 | Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) | |||
|
74 | } | |||
|
75 | ||||
|
76 | fn entry_for_checked_rev( | |||
|
77 | &self, | |||
69 | file_rev: Revision, |
|
78 | file_rev: Revision, | |
70 | ) -> Result<FilelogEntry, RevlogError> { |
|
79 | ) -> Result<FilelogEntry, RevlogError> { | |
71 | Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) |
|
80 | Ok(FilelogEntry( | |
|
81 | self.revlog.get_entry_for_checked_rev(file_rev)?, | |||
|
82 | )) | |||
72 | } |
|
83 | } | |
73 | } |
|
84 | } | |
74 |
|
85 | |||
75 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { |
|
86 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { | |
76 | let encoded_bytes = |
|
87 | let encoded_bytes = | |
77 | path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); |
|
88 | path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); | |
78 | get_path_from_bytes(&encoded_bytes).into() |
|
89 | get_path_from_bytes(&encoded_bytes).into() | |
79 | } |
|
90 | } | |
80 |
|
91 | |||
81 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); |
|
92 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); | |
82 |
|
93 | |||
83 | impl FilelogEntry<'_> { |
|
94 | impl FilelogEntry<'_> { | |
84 | /// `self.data()` can be expensive, with decompression and delta |
|
95 | /// `self.data()` can be expensive, with decompression and delta | |
85 | /// resolution. |
|
96 | /// resolution. | |
86 | /// |
|
97 | /// | |
87 | /// *Without* paying this cost, based on revlog index information |
|
98 | /// *Without* paying this cost, based on revlog index information | |
88 | /// including `RevlogEntry::uncompressed_len`: |
|
99 | /// including `RevlogEntry::uncompressed_len`: | |
89 | /// |
|
100 | /// | |
90 | /// * Returns `true` if the length that `self.data().file_data().len()` |
|
101 | /// * Returns `true` if the length that `self.data().file_data().len()` | |
91 | /// would return is definitely **not equal** to `other_len`. |
|
102 | /// would return is definitely **not equal** to `other_len`. | |
92 | /// * Returns `false` if available information is inconclusive. |
|
103 | /// * Returns `false` if available information is inconclusive. | |
93 | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { |
|
104 | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | |
94 | // Relevant code that implement this behavior in Python code: |
|
105 | // Relevant code that implement this behavior in Python code: | |
95 | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, |
|
106 | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | |
96 | // revlog.size, revlog.rawsize |
|
107 | // revlog.size, revlog.rawsize | |
97 |
|
108 | |||
98 | // Let’s call `file_data_len` what would be returned by |
|
109 | // Let’s call `file_data_len` what would be returned by | |
99 | // `self.data().file_data().len()`. |
|
110 | // `self.data().file_data().len()`. | |
100 |
|
111 | |||
101 | if self.0.is_censored() { |
|
112 | if self.0.is_censored() { | |
102 | let file_data_len = 0; |
|
113 | let file_data_len = 0; | |
103 | return other_len != file_data_len; |
|
114 | return other_len != file_data_len; | |
104 | } |
|
115 | } | |
105 |
|
116 | |||
106 | if self.0.has_length_affecting_flag_processor() { |
|
117 | if self.0.has_length_affecting_flag_processor() { | |
107 | // We can’t conclude anything about `file_data_len`. |
|
118 | // We can’t conclude anything about `file_data_len`. | |
108 | return false; |
|
119 | return false; | |
109 | } |
|
120 | } | |
110 |
|
121 | |||
111 | // Revlog revisions (usually) have metadata for the size of |
|
122 | // Revlog revisions (usually) have metadata for the size of | |
112 | // their data after decompression and delta resolution |
|
123 | // their data after decompression and delta resolution | |
113 | // as would be returned by `Revlog::get_rev_data`. |
|
124 | // as would be returned by `Revlog::get_rev_data`. | |
114 | // |
|
125 | // | |
115 | // For filelogs this is the file’s contents preceded by an optional |
|
126 | // For filelogs this is the file’s contents preceded by an optional | |
116 | // metadata block. |
|
127 | // metadata block. | |
117 | let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { |
|
128 | let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { | |
118 | l as u64 |
|
129 | l as u64 | |
119 | } else { |
|
130 | } else { | |
120 | // The field was set to -1, the actual uncompressed len is unknown. |
|
131 | // The field was set to -1, the actual uncompressed len is unknown. | |
121 | // We need to decompress to say more. |
|
132 | // We need to decompress to say more. | |
122 | return false; |
|
133 | return false; | |
123 | }; |
|
134 | }; | |
124 | // `uncompressed_len = file_data_len + optional_metadata_len`, |
|
135 | // `uncompressed_len = file_data_len + optional_metadata_len`, | |
125 | // so `file_data_len <= uncompressed_len`. |
|
136 | // so `file_data_len <= uncompressed_len`. | |
126 | if uncompressed_len < other_len { |
|
137 | if uncompressed_len < other_len { | |
127 | // Transitively, `file_data_len < other_len`. |
|
138 | // Transitively, `file_data_len < other_len`. | |
128 | // So `other_len != file_data_len` definitely. |
|
139 | // So `other_len != file_data_len` definitely. | |
129 | return true; |
|
140 | return true; | |
130 | } |
|
141 | } | |
131 |
|
142 | |||
132 | if uncompressed_len == other_len + 4 { |
|
143 | if uncompressed_len == other_len + 4 { | |
133 | // It’s possible that `file_data_len == other_len` with an empty |
|
144 | // It’s possible that `file_data_len == other_len` with an empty | |
134 | // metadata block (2 start marker bytes + 2 end marker bytes). |
|
145 | // metadata block (2 start marker bytes + 2 end marker bytes). | |
135 | // This happens when there wouldn’t otherwise be metadata, but |
|
146 | // This happens when there wouldn’t otherwise be metadata, but | |
136 | // the first 2 bytes of file data happen to match a start marker |
|
147 | // the first 2 bytes of file data happen to match a start marker | |
137 | // and would be ambiguous. |
|
148 | // and would be ambiguous. | |
138 | return false; |
|
149 | return false; | |
139 | } |
|
150 | } | |
140 |
|
151 | |||
141 | if !self.0.has_p1() { |
|
152 | if !self.0.has_p1() { | |
142 | // There may or may not be copy metadata, so we can’t deduce more |
|
153 | // There may or may not be copy metadata, so we can’t deduce more | |
143 | // about `file_data_len` without computing file data. |
|
154 | // about `file_data_len` without computing file data. | |
144 | return false; |
|
155 | return false; | |
145 | } |
|
156 | } | |
146 |
|
157 | |||
147 | // Filelog ancestry is not meaningful in the way changelog ancestry is. |
|
158 | // Filelog ancestry is not meaningful in the way changelog ancestry is. | |
148 | // It only provides hints to delta generation. |
|
159 | // It only provides hints to delta generation. | |
149 | // p1 and p2 are set to null when making a copy or rename since |
|
160 | // p1 and p2 are set to null when making a copy or rename since | |
150 | // contents are likely unrelatedto what might have previously existed |
|
161 | // contents are likely unrelatedto what might have previously existed | |
151 | // at the destination path. |
|
162 | // at the destination path. | |
152 | // |
|
163 | // | |
153 | // Conversely, since here p1 is non-null, there is no copy metadata. |
|
164 | // Conversely, since here p1 is non-null, there is no copy metadata. | |
154 | // Note that this reasoning may be invalidated in the presence of |
|
165 | // Note that this reasoning may be invalidated in the presence of | |
155 | // merges made by some previous versions of Mercurial that |
|
166 | // merges made by some previous versions of Mercurial that | |
156 | // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> |
|
167 | // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> | |
157 | // and `tests/test-issue6528.t`. |
|
168 | // and `tests/test-issue6528.t`. | |
158 | // |
|
169 | // | |
159 | // Since copy metadata is currently the only kind of metadata |
|
170 | // Since copy metadata is currently the only kind of metadata | |
160 | // kept in revlog data of filelogs, |
|
171 | // kept in revlog data of filelogs, | |
161 | // this `FilelogEntry` does not have such metadata: |
|
172 | // this `FilelogEntry` does not have such metadata: | |
162 | let file_data_len = uncompressed_len; |
|
173 | let file_data_len = uncompressed_len; | |
163 |
|
174 | |||
164 | file_data_len != other_len |
|
175 | file_data_len != other_len | |
165 | } |
|
176 | } | |
166 |
|
177 | |||
167 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { |
|
178 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { | |
168 | Ok(FilelogRevisionData(self.0.data()?.into_owned())) |
|
179 | let data = self.0.data(); | |
|
180 | match data { | |||
|
181 | Ok(data) => Ok(FilelogRevisionData(data.into_owned())), | |||
|
182 | // Errors other than `HgError` should not happen at this point | |||
|
183 | Err(e) => match e { | |||
|
184 | RevlogError::Other(hg_error) => Err(hg_error), | |||
|
185 | revlog_error => Err(HgError::abort( | |||
|
186 | revlog_error.to_string(), | |||
|
187 | exit_codes::ABORT, | |||
|
188 | None, | |||
|
189 | )), | |||
|
190 | }, | |||
|
191 | } | |||
169 | } |
|
192 | } | |
170 | } |
|
193 | } | |
171 |
|
194 | |||
172 | /// The data for one revision in a filelog, uncompressed and delta-resolved. |
|
195 | /// The data for one revision in a filelog, uncompressed and delta-resolved. | |
173 | pub struct FilelogRevisionData(Vec<u8>); |
|
196 | pub struct FilelogRevisionData(Vec<u8>); | |
174 |
|
197 | |||
175 | impl FilelogRevisionData { |
|
198 | impl FilelogRevisionData { | |
176 | /// Split into metadata and data |
|
199 | /// Split into metadata and data | |
177 | pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { |
|
200 | pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { | |
178 | const DELIMITER: &[u8; 2] = &[b'\x01', b'\n']; |
|
201 | const DELIMITER: &[u8; 2] = &[b'\x01', b'\n']; | |
179 |
|
202 | |||
180 | if let Some(rest) = self.0.drop_prefix(DELIMITER) { |
|
203 | if let Some(rest) = self.0.drop_prefix(DELIMITER) { | |
181 | if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { |
|
204 | if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { | |
182 | Ok((Some(metadata), data)) |
|
205 | Ok((Some(metadata), data)) | |
183 | } else { |
|
206 | } else { | |
184 | Err(HgError::corrupted( |
|
207 | Err(HgError::corrupted( | |
185 | "Missing metadata end delimiter in filelog entry", |
|
208 | "Missing metadata end delimiter in filelog entry", | |
186 | )) |
|
209 | )) | |
187 | } |
|
210 | } | |
188 | } else { |
|
211 | } else { | |
189 | Ok((None, &self.0)) |
|
212 | Ok((None, &self.0)) | |
190 | } |
|
213 | } | |
191 | } |
|
214 | } | |
192 |
|
215 | |||
193 | /// Returns the file contents at this revision, stripped of any metadata |
|
216 | /// Returns the file contents at this revision, stripped of any metadata | |
194 | pub fn file_data(&self) -> Result<&[u8], HgError> { |
|
217 | pub fn file_data(&self) -> Result<&[u8], HgError> { | |
195 | let (_metadata, data) = self.split()?; |
|
218 | let (_metadata, data) = self.split()?; | |
196 | Ok(data) |
|
219 | Ok(data) | |
197 | } |
|
220 | } | |
198 |
|
221 | |||
199 | /// Consume the entry, and convert it into data, discarding any metadata, |
|
222 | /// Consume the entry, and convert it into data, discarding any metadata, | |
200 | /// if present. |
|
223 | /// if present. | |
201 | pub fn into_file_data(self) -> Result<Vec<u8>, HgError> { |
|
224 | pub fn into_file_data(self) -> Result<Vec<u8>, HgError> { | |
202 | if let (Some(_metadata), data) = self.split()? { |
|
225 | if let (Some(_metadata), data) = self.split()? { | |
203 | Ok(data.to_owned()) |
|
226 | Ok(data.to_owned()) | |
204 | } else { |
|
227 | } else { | |
205 | Ok(self.0) |
|
228 | Ok(self.0) | |
206 | } |
|
229 | } | |
207 | } |
|
230 | } | |
208 | } |
|
231 | } |
@@ -1,615 +1,622 | |||||
|
1 | use std::fmt::Debug; | |||
1 | use std::ops::Deref; |
|
2 | use std::ops::Deref; | |
2 |
|
3 | |||
3 | use byteorder::{BigEndian, ByteOrder}; |
|
4 | use byteorder::{BigEndian, ByteOrder}; | |
4 |
|
5 | |||
5 | use crate::errors::HgError; |
|
6 | use crate::errors::HgError; | |
6 | use crate::revlog::node::Node; |
|
7 | use crate::revlog::node::Node; | |
7 | use crate::revlog::{Revision, NULL_REVISION}; |
|
8 | use crate::revlog::{Revision, NULL_REVISION}; | |
|
9 | use crate::UncheckedRevision; | |||
8 |
|
10 | |||
9 | pub const INDEX_ENTRY_SIZE: usize = 64; |
|
11 | pub const INDEX_ENTRY_SIZE: usize = 64; | |
10 |
|
12 | |||
11 | pub struct IndexHeader { |
|
13 | pub struct IndexHeader { | |
12 | header_bytes: [u8; 4], |
|
14 | header_bytes: [u8; 4], | |
13 | } |
|
15 | } | |
14 |
|
16 | |||
15 | #[derive(Copy, Clone)] |
|
17 | #[derive(Copy, Clone)] | |
16 | pub struct IndexHeaderFlags { |
|
18 | pub struct IndexHeaderFlags { | |
17 | flags: u16, |
|
19 | flags: u16, | |
18 | } |
|
20 | } | |
19 |
|
21 | |||
20 | /// Corresponds to the high bits of `_format_flags` in python |
|
22 | /// Corresponds to the high bits of `_format_flags` in python | |
21 | impl IndexHeaderFlags { |
|
23 | impl IndexHeaderFlags { | |
22 | /// Corresponds to FLAG_INLINE_DATA in python |
|
24 | /// Corresponds to FLAG_INLINE_DATA in python | |
23 | pub fn is_inline(self) -> bool { |
|
25 | pub fn is_inline(self) -> bool { | |
24 | self.flags & 1 != 0 |
|
26 | self.flags & 1 != 0 | |
25 | } |
|
27 | } | |
26 | /// Corresponds to FLAG_GENERALDELTA in python |
|
28 | /// Corresponds to FLAG_GENERALDELTA in python | |
27 | pub fn uses_generaldelta(self) -> bool { |
|
29 | pub fn uses_generaldelta(self) -> bool { | |
28 | self.flags & 2 != 0 |
|
30 | self.flags & 2 != 0 | |
29 | } |
|
31 | } | |
30 | } |
|
32 | } | |
31 |
|
33 | |||
32 | /// Corresponds to the INDEX_HEADER structure, |
|
34 | /// Corresponds to the INDEX_HEADER structure, | |
33 | /// which is parsed as a `header` variable in `_loadindex` in `revlog.py` |
|
35 | /// which is parsed as a `header` variable in `_loadindex` in `revlog.py` | |
34 | impl IndexHeader { |
|
36 | impl IndexHeader { | |
35 | fn format_flags(&self) -> IndexHeaderFlags { |
|
37 | fn format_flags(&self) -> IndexHeaderFlags { | |
36 | // No "unknown flags" check here, unlike in python. Maybe there should |
|
38 | // No "unknown flags" check here, unlike in python. Maybe there should | |
37 | // be. |
|
39 | // be. | |
38 | IndexHeaderFlags { |
|
40 | IndexHeaderFlags { | |
39 | flags: BigEndian::read_u16(&self.header_bytes[0..2]), |
|
41 | flags: BigEndian::read_u16(&self.header_bytes[0..2]), | |
40 | } |
|
42 | } | |
41 | } |
|
43 | } | |
42 |
|
44 | |||
43 | /// The only revlog version currently supported by rhg. |
|
45 | /// The only revlog version currently supported by rhg. | |
44 | const REVLOGV1: u16 = 1; |
|
46 | const REVLOGV1: u16 = 1; | |
45 |
|
47 | |||
46 | /// Corresponds to `_format_version` in Python. |
|
48 | /// Corresponds to `_format_version` in Python. | |
47 | fn format_version(&self) -> u16 { |
|
49 | fn format_version(&self) -> u16 { | |
48 | BigEndian::read_u16(&self.header_bytes[2..4]) |
|
50 | BigEndian::read_u16(&self.header_bytes[2..4]) | |
49 | } |
|
51 | } | |
50 |
|
52 | |||
51 | const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader { |
|
53 | const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader { | |
52 | // We treat an empty file as a valid index with no entries. |
|
54 | // We treat an empty file as a valid index with no entries. | |
53 | // Here we make an arbitrary choice of what we assume the format of the |
|
55 | // Here we make an arbitrary choice of what we assume the format of the | |
54 | // index to be (V1, using generaldelta). |
|
56 | // index to be (V1, using generaldelta). | |
55 | // This doesn't matter too much, since we're only doing read-only |
|
57 | // This doesn't matter too much, since we're only doing read-only | |
56 | // access. but the value corresponds to the `new_header` variable in |
|
58 | // access. but the value corresponds to the `new_header` variable in | |
57 | // `revlog.py`, `_loadindex` |
|
59 | // `revlog.py`, `_loadindex` | |
58 | header_bytes: [0, 3, 0, 1], |
|
60 | header_bytes: [0, 3, 0, 1], | |
59 | }; |
|
61 | }; | |
60 |
|
62 | |||
61 | fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> { |
|
63 | fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> { | |
62 | if index_bytes.is_empty() { |
|
64 | if index_bytes.is_empty() { | |
63 | return Ok(IndexHeader::EMPTY_INDEX_HEADER); |
|
65 | return Ok(IndexHeader::EMPTY_INDEX_HEADER); | |
64 | } |
|
66 | } | |
65 | if index_bytes.len() < 4 { |
|
67 | if index_bytes.len() < 4 { | |
66 | return Err(HgError::corrupted( |
|
68 | return Err(HgError::corrupted( | |
67 | "corrupted revlog: can't read the index format header", |
|
69 | "corrupted revlog: can't read the index format header", | |
68 | )); |
|
70 | )); | |
69 | } |
|
71 | } | |
70 | Ok(IndexHeader { |
|
72 | Ok(IndexHeader { | |
71 | header_bytes: { |
|
73 | header_bytes: { | |
72 | let bytes: [u8; 4] = |
|
74 | let bytes: [u8; 4] = | |
73 | index_bytes[0..4].try_into().expect("impossible"); |
|
75 | index_bytes[0..4].try_into().expect("impossible"); | |
74 | bytes |
|
76 | bytes | |
75 | }, |
|
77 | }, | |
76 | }) |
|
78 | }) | |
77 | } |
|
79 | } | |
78 | } |
|
80 | } | |
79 |
|
81 | |||
80 | /// A Revlog index |
|
82 | /// A Revlog index | |
81 | pub struct Index { |
|
83 | pub struct Index { | |
82 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
84 | bytes: Box<dyn Deref<Target = [u8]> + Send>, | |
83 | /// Offsets of starts of index blocks. |
|
85 | /// Offsets of starts of index blocks. | |
84 | /// Only needed when the index is interleaved with data. |
|
86 | /// Only needed when the index is interleaved with data. | |
85 | offsets: Option<Vec<usize>>, |
|
87 | offsets: Option<Vec<usize>>, | |
86 | uses_generaldelta: bool, |
|
88 | uses_generaldelta: bool, | |
87 | } |
|
89 | } | |
88 |
|
90 | |||
|
91 | impl Debug for Index { | |||
|
92 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
|
93 | f.debug_struct("Index") | |||
|
94 | .field("offsets", &self.offsets) | |||
|
95 | .field("uses_generaldelta", &self.uses_generaldelta) | |||
|
96 | .finish() | |||
|
97 | } | |||
|
98 | } | |||
|
99 | ||||
89 | impl Index { |
|
100 | impl Index { | |
90 | /// Create an index from bytes. |
|
101 | /// Create an index from bytes. | |
91 | /// Calculate the start of each entry when is_inline is true. |
|
102 | /// Calculate the start of each entry when is_inline is true. | |
92 | pub fn new( |
|
103 | pub fn new( | |
93 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
104 | bytes: Box<dyn Deref<Target = [u8]> + Send>, | |
94 | ) -> Result<Self, HgError> { |
|
105 | ) -> Result<Self, HgError> { | |
95 | let header = IndexHeader::parse(bytes.as_ref())?; |
|
106 | let header = IndexHeader::parse(bytes.as_ref())?; | |
96 |
|
107 | |||
97 | if header.format_version() != IndexHeader::REVLOGV1 { |
|
108 | if header.format_version() != IndexHeader::REVLOGV1 { | |
98 | // A proper new version should have had a repo/store |
|
109 | // A proper new version should have had a repo/store | |
99 | // requirement. |
|
110 | // requirement. | |
100 | return Err(HgError::corrupted("unsupported revlog version")); |
|
111 | return Err(HgError::corrupted("unsupported revlog version")); | |
101 | } |
|
112 | } | |
102 |
|
113 | |||
103 | // This is only correct because we know version is REVLOGV1. |
|
114 | // This is only correct because we know version is REVLOGV1. | |
104 | // In v2 we always use generaldelta, while in v0 we never use |
|
115 | // In v2 we always use generaldelta, while in v0 we never use | |
105 | // generaldelta. Similar for [is_inline] (it's only used in v1). |
|
116 | // generaldelta. Similar for [is_inline] (it's only used in v1). | |
106 | let uses_generaldelta = header.format_flags().uses_generaldelta(); |
|
117 | let uses_generaldelta = header.format_flags().uses_generaldelta(); | |
107 |
|
118 | |||
108 | if header.format_flags().is_inline() { |
|
119 | if header.format_flags().is_inline() { | |
109 | let mut offset: usize = 0; |
|
120 | let mut offset: usize = 0; | |
110 | let mut offsets = Vec::new(); |
|
121 | let mut offsets = Vec::new(); | |
111 |
|
122 | |||
112 | while offset + INDEX_ENTRY_SIZE <= bytes.len() { |
|
123 | while offset + INDEX_ENTRY_SIZE <= bytes.len() { | |
113 | offsets.push(offset); |
|
124 | offsets.push(offset); | |
114 | let end = offset + INDEX_ENTRY_SIZE; |
|
125 | let end = offset + INDEX_ENTRY_SIZE; | |
115 | let entry = IndexEntry { |
|
126 | let entry = IndexEntry { | |
116 | bytes: &bytes[offset..end], |
|
127 | bytes: &bytes[offset..end], | |
117 | offset_override: None, |
|
128 | offset_override: None, | |
118 | }; |
|
129 | }; | |
119 |
|
130 | |||
120 | offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize; |
|
131 | offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize; | |
121 | } |
|
132 | } | |
122 |
|
133 | |||
123 | if offset == bytes.len() { |
|
134 | if offset == bytes.len() { | |
124 | Ok(Self { |
|
135 | Ok(Self { | |
125 | bytes, |
|
136 | bytes, | |
126 | offsets: Some(offsets), |
|
137 | offsets: Some(offsets), | |
127 | uses_generaldelta, |
|
138 | uses_generaldelta, | |
128 | }) |
|
139 | }) | |
129 | } else { |
|
140 | } else { | |
130 | Err(HgError::corrupted("unexpected inline revlog length")) |
|
141 | Err(HgError::corrupted("unexpected inline revlog length")) | |
131 | } |
|
142 | } | |
132 | } else { |
|
143 | } else { | |
133 | Ok(Self { |
|
144 | Ok(Self { | |
134 | bytes, |
|
145 | bytes, | |
135 | offsets: None, |
|
146 | offsets: None, | |
136 | uses_generaldelta, |
|
147 | uses_generaldelta, | |
137 | }) |
|
148 | }) | |
138 | } |
|
149 | } | |
139 | } |
|
150 | } | |
140 |
|
151 | |||
141 | pub fn uses_generaldelta(&self) -> bool { |
|
152 | pub fn uses_generaldelta(&self) -> bool { | |
142 | self.uses_generaldelta |
|
153 | self.uses_generaldelta | |
143 | } |
|
154 | } | |
144 |
|
155 | |||
145 | /// Value of the inline flag. |
|
156 | /// Value of the inline flag. | |
146 | pub fn is_inline(&self) -> bool { |
|
157 | pub fn is_inline(&self) -> bool { | |
147 | self.offsets.is_some() |
|
158 | self.offsets.is_some() | |
148 | } |
|
159 | } | |
149 |
|
160 | |||
150 | /// Return a slice of bytes if `revlog` is inline. Panic if not. |
|
161 | /// Return a slice of bytes if `revlog` is inline. Panic if not. | |
151 | pub fn data(&self, start: usize, end: usize) -> &[u8] { |
|
162 | pub fn data(&self, start: usize, end: usize) -> &[u8] { | |
152 | if !self.is_inline() { |
|
163 | if !self.is_inline() { | |
153 | panic!("tried to access data in the index of a revlog that is not inline"); |
|
164 | panic!("tried to access data in the index of a revlog that is not inline"); | |
154 | } |
|
165 | } | |
155 | &self.bytes[start..end] |
|
166 | &self.bytes[start..end] | |
156 | } |
|
167 | } | |
157 |
|
168 | |||
158 | /// Return number of entries of the revlog index. |
|
169 | /// Return number of entries of the revlog index. | |
159 | pub fn len(&self) -> usize { |
|
170 | pub fn len(&self) -> usize { | |
160 | if let Some(offsets) = &self.offsets { |
|
171 | if let Some(offsets) = &self.offsets { | |
161 | offsets.len() |
|
172 | offsets.len() | |
162 | } else { |
|
173 | } else { | |
163 | self.bytes.len() / INDEX_ENTRY_SIZE |
|
174 | self.bytes.len() / INDEX_ENTRY_SIZE | |
164 | } |
|
175 | } | |
165 | } |
|
176 | } | |
166 |
|
177 | |||
167 | /// Returns `true` if the `Index` has zero `entries`. |
|
178 | /// Returns `true` if the `Index` has zero `entries`. | |
168 | pub fn is_empty(&self) -> bool { |
|
179 | pub fn is_empty(&self) -> bool { | |
169 | self.len() == 0 |
|
180 | self.len() == 0 | |
170 | } |
|
181 | } | |
171 |
|
182 | |||
172 | /// Return the index entry corresponding to the given revision if it |
|
183 | /// Return the index entry corresponding to the given revision if it | |
173 | /// exists. |
|
184 | /// exists. | |
174 | pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { |
|
185 | pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { | |
175 | if rev == NULL_REVISION { |
|
186 | if rev == NULL_REVISION { | |
176 | return None; |
|
187 | return None; | |
177 | } |
|
188 | } | |
178 | if let Some(offsets) = &self.offsets { |
|
189 | Some(if let Some(offsets) = &self.offsets { | |
179 | self.get_entry_inline(rev, offsets) |
|
190 | self.get_entry_inline(rev, offsets) | |
180 | } else { |
|
191 | } else { | |
181 | self.get_entry_separated(rev) |
|
192 | self.get_entry_separated(rev) | |
182 | } |
|
193 | }) | |
183 | } |
|
194 | } | |
184 |
|
195 | |||
185 | fn get_entry_inline( |
|
196 | fn get_entry_inline( | |
186 | &self, |
|
197 | &self, | |
187 | rev: Revision, |
|
198 | rev: Revision, | |
188 | offsets: &[usize], |
|
199 | offsets: &[usize], | |
189 |
) -> |
|
200 | ) -> IndexEntry { | |
190 |
let start = |
|
201 | let start = offsets[rev as usize]; | |
191 |
let end = start |
|
202 | let end = start + INDEX_ENTRY_SIZE; | |
192 | let bytes = &self.bytes[start..end]; |
|
203 | let bytes = &self.bytes[start..end]; | |
193 |
|
204 | |||
194 | // See IndexEntry for an explanation of this override. |
|
205 | // See IndexEntry for an explanation of this override. | |
195 | let offset_override = Some(end); |
|
206 | let offset_override = Some(end); | |
196 |
|
207 | |||
197 |
|
|
208 | IndexEntry { | |
198 | bytes, |
|
209 | bytes, | |
199 | offset_override, |
|
210 | offset_override, | |
200 |
} |
|
211 | } | |
201 | } |
|
212 | } | |
202 |
|
213 | |||
203 |
fn get_entry_separated(&self, rev: Revision) -> |
|
214 | fn get_entry_separated(&self, rev: Revision) -> IndexEntry { | |
204 | let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE; |
|
|||
205 | if rev as usize >= max_rev { |
|
|||
206 | return None; |
|
|||
207 | } |
|
|||
208 | let start = rev as usize * INDEX_ENTRY_SIZE; |
|
215 | let start = rev as usize * INDEX_ENTRY_SIZE; | |
209 | let end = start + INDEX_ENTRY_SIZE; |
|
216 | let end = start + INDEX_ENTRY_SIZE; | |
210 | let bytes = &self.bytes[start..end]; |
|
217 | let bytes = &self.bytes[start..end]; | |
211 |
|
218 | |||
212 | // Override the offset of the first revision as its bytes are used |
|
219 | // Override the offset of the first revision as its bytes are used | |
213 | // for the index's metadata (saving space because it is always 0) |
|
220 | // for the index's metadata (saving space because it is always 0) | |
214 | let offset_override = if rev == 0 { Some(0) } else { None }; |
|
221 | let offset_override = if rev == 0 { Some(0) } else { None }; | |
215 |
|
222 | |||
216 |
|
|
223 | IndexEntry { | |
217 | bytes, |
|
224 | bytes, | |
218 | offset_override, |
|
225 | offset_override, | |
219 |
} |
|
226 | } | |
220 | } |
|
227 | } | |
221 | } |
|
228 | } | |
222 |
|
229 | |||
223 | impl super::RevlogIndex for Index { |
|
230 | impl super::RevlogIndex for Index { | |
224 | fn len(&self) -> usize { |
|
231 | fn len(&self) -> usize { | |
225 | self.len() |
|
232 | self.len() | |
226 | } |
|
233 | } | |
227 |
|
234 | |||
228 | fn node(&self, rev: Revision) -> Option<&Node> { |
|
235 | fn node(&self, rev: Revision) -> Option<&Node> { | |
229 | self.get_entry(rev).map(|entry| entry.hash()) |
|
236 | self.get_entry(rev).map(|entry| entry.hash()) | |
230 | } |
|
237 | } | |
231 | } |
|
238 | } | |
232 |
|
239 | |||
233 | #[derive(Debug)] |
|
240 | #[derive(Debug)] | |
234 | pub struct IndexEntry<'a> { |
|
241 | pub struct IndexEntry<'a> { | |
235 | bytes: &'a [u8], |
|
242 | bytes: &'a [u8], | |
236 | /// Allows to override the offset value of the entry. |
|
243 | /// Allows to override the offset value of the entry. | |
237 | /// |
|
244 | /// | |
238 | /// For interleaved index and data, the offset stored in the index |
|
245 | /// For interleaved index and data, the offset stored in the index | |
239 | /// corresponds to the separated data offset. |
|
246 | /// corresponds to the separated data offset. | |
240 | /// It has to be overridden with the actual offset in the interleaved |
|
247 | /// It has to be overridden with the actual offset in the interleaved | |
241 | /// index which is just after the index block. |
|
248 | /// index which is just after the index block. | |
242 | /// |
|
249 | /// | |
243 | /// For separated index and data, the offset stored in the first index |
|
250 | /// For separated index and data, the offset stored in the first index | |
244 | /// entry is mixed with the index headers. |
|
251 | /// entry is mixed with the index headers. | |
245 | /// It has to be overridden with 0. |
|
252 | /// It has to be overridden with 0. | |
246 | offset_override: Option<usize>, |
|
253 | offset_override: Option<usize>, | |
247 | } |
|
254 | } | |
248 |
|
255 | |||
249 | impl<'a> IndexEntry<'a> { |
|
256 | impl<'a> IndexEntry<'a> { | |
250 | /// Return the offset of the data. |
|
257 | /// Return the offset of the data. | |
251 | pub fn offset(&self) -> usize { |
|
258 | pub fn offset(&self) -> usize { | |
252 | if let Some(offset_override) = self.offset_override { |
|
259 | if let Some(offset_override) = self.offset_override { | |
253 | offset_override |
|
260 | offset_override | |
254 | } else { |
|
261 | } else { | |
255 | let mut bytes = [0; 8]; |
|
262 | let mut bytes = [0; 8]; | |
256 | bytes[2..8].copy_from_slice(&self.bytes[0..=5]); |
|
263 | bytes[2..8].copy_from_slice(&self.bytes[0..=5]); | |
257 | BigEndian::read_u64(&bytes[..]) as usize |
|
264 | BigEndian::read_u64(&bytes[..]) as usize | |
258 | } |
|
265 | } | |
259 | } |
|
266 | } | |
260 |
|
267 | |||
261 | pub fn flags(&self) -> u16 { |
|
268 | pub fn flags(&self) -> u16 { | |
262 | BigEndian::read_u16(&self.bytes[6..=7]) |
|
269 | BigEndian::read_u16(&self.bytes[6..=7]) | |
263 | } |
|
270 | } | |
264 |
|
271 | |||
265 | /// Return the compressed length of the data. |
|
272 | /// Return the compressed length of the data. | |
266 | pub fn compressed_len(&self) -> u32 { |
|
273 | pub fn compressed_len(&self) -> u32 { | |
267 | BigEndian::read_u32(&self.bytes[8..=11]) |
|
274 | BigEndian::read_u32(&self.bytes[8..=11]) | |
268 | } |
|
275 | } | |
269 |
|
276 | |||
270 | /// Return the uncompressed length of the data. |
|
277 | /// Return the uncompressed length of the data. | |
271 | pub fn uncompressed_len(&self) -> i32 { |
|
278 | pub fn uncompressed_len(&self) -> i32 { | |
272 | BigEndian::read_i32(&self.bytes[12..=15]) |
|
279 | BigEndian::read_i32(&self.bytes[12..=15]) | |
273 | } |
|
280 | } | |
274 |
|
281 | |||
275 | /// Return the revision upon which the data has been derived. |
|
282 | /// Return the revision upon which the data has been derived. | |
276 | pub fn base_revision_or_base_of_delta_chain(&self) -> Revision { |
|
283 | pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision { | |
277 | // TODO Maybe return an Option when base_revision == rev? |
|
284 | // TODO Maybe return an Option when base_revision == rev? | |
278 | // Requires to add rev to IndexEntry |
|
285 | // Requires to add rev to IndexEntry | |
279 |
|
286 | |||
280 | BigEndian::read_i32(&self.bytes[16..]) |
|
287 | BigEndian::read_i32(&self.bytes[16..]).into() | |
281 | } |
|
288 | } | |
282 |
|
289 | |||
283 | pub fn link_revision(&self) -> Revision { |
|
290 | pub fn link_revision(&self) -> UncheckedRevision { | |
284 | BigEndian::read_i32(&self.bytes[20..]) |
|
291 | BigEndian::read_i32(&self.bytes[20..]).into() | |
285 | } |
|
292 | } | |
286 |
|
293 | |||
287 | pub fn p1(&self) -> Revision { |
|
294 | pub fn p1(&self) -> UncheckedRevision { | |
288 | BigEndian::read_i32(&self.bytes[24..]) |
|
295 | BigEndian::read_i32(&self.bytes[24..]).into() | |
289 | } |
|
296 | } | |
290 |
|
297 | |||
291 | pub fn p2(&self) -> Revision { |
|
298 | pub fn p2(&self) -> UncheckedRevision { | |
292 | BigEndian::read_i32(&self.bytes[28..]) |
|
299 | BigEndian::read_i32(&self.bytes[28..]).into() | |
293 | } |
|
300 | } | |
294 |
|
301 | |||
295 | /// Return the hash of revision's full text. |
|
302 | /// Return the hash of revision's full text. | |
296 | /// |
|
303 | /// | |
297 | /// Currently, SHA-1 is used and only the first 20 bytes of this field |
|
304 | /// Currently, SHA-1 is used and only the first 20 bytes of this field | |
298 | /// are used. |
|
305 | /// are used. | |
299 | pub fn hash(&self) -> &'a Node { |
|
306 | pub fn hash(&self) -> &'a Node { | |
300 | (&self.bytes[32..52]).try_into().unwrap() |
|
307 | (&self.bytes[32..52]).try_into().unwrap() | |
301 | } |
|
308 | } | |
302 | } |
|
309 | } | |
303 |
|
310 | |||
304 | #[cfg(test)] |
|
311 | #[cfg(test)] | |
305 | mod tests { |
|
312 | mod tests { | |
306 | use super::*; |
|
313 | use super::*; | |
307 | use crate::node::NULL_NODE; |
|
314 | use crate::node::NULL_NODE; | |
308 |
|
315 | |||
309 | #[cfg(test)] |
|
316 | #[cfg(test)] | |
310 | #[derive(Debug, Copy, Clone)] |
|
317 | #[derive(Debug, Copy, Clone)] | |
311 | pub struct IndexEntryBuilder { |
|
318 | pub struct IndexEntryBuilder { | |
312 | is_first: bool, |
|
319 | is_first: bool, | |
313 | is_inline: bool, |
|
320 | is_inline: bool, | |
314 | is_general_delta: bool, |
|
321 | is_general_delta: bool, | |
315 | version: u16, |
|
322 | version: u16, | |
316 | offset: usize, |
|
323 | offset: usize, | |
317 | compressed_len: usize, |
|
324 | compressed_len: usize, | |
318 | uncompressed_len: usize, |
|
325 | uncompressed_len: usize, | |
319 | base_revision_or_base_of_delta_chain: Revision, |
|
326 | base_revision_or_base_of_delta_chain: Revision, | |
320 | link_revision: Revision, |
|
327 | link_revision: Revision, | |
321 | p1: Revision, |
|
328 | p1: Revision, | |
322 | p2: Revision, |
|
329 | p2: Revision, | |
323 | node: Node, |
|
330 | node: Node, | |
324 | } |
|
331 | } | |
325 |
|
332 | |||
326 | #[cfg(test)] |
|
333 | #[cfg(test)] | |
327 | impl IndexEntryBuilder { |
|
334 | impl IndexEntryBuilder { | |
328 | #[allow(clippy::new_without_default)] |
|
335 | #[allow(clippy::new_without_default)] | |
329 | pub fn new() -> Self { |
|
336 | pub fn new() -> Self { | |
330 | Self { |
|
337 | Self { | |
331 | is_first: false, |
|
338 | is_first: false, | |
332 | is_inline: false, |
|
339 | is_inline: false, | |
333 | is_general_delta: true, |
|
340 | is_general_delta: true, | |
334 | version: 1, |
|
341 | version: 1, | |
335 | offset: 0, |
|
342 | offset: 0, | |
336 | compressed_len: 0, |
|
343 | compressed_len: 0, | |
337 | uncompressed_len: 0, |
|
344 | uncompressed_len: 0, | |
338 | base_revision_or_base_of_delta_chain: 0, |
|
345 | base_revision_or_base_of_delta_chain: 0, | |
339 | link_revision: 0, |
|
346 | link_revision: 0, | |
340 | p1: NULL_REVISION, |
|
347 | p1: NULL_REVISION, | |
341 | p2: NULL_REVISION, |
|
348 | p2: NULL_REVISION, | |
342 | node: NULL_NODE, |
|
349 | node: NULL_NODE, | |
343 | } |
|
350 | } | |
344 | } |
|
351 | } | |
345 |
|
352 | |||
346 | pub fn is_first(&mut self, value: bool) -> &mut Self { |
|
353 | pub fn is_first(&mut self, value: bool) -> &mut Self { | |
347 | self.is_first = value; |
|
354 | self.is_first = value; | |
348 | self |
|
355 | self | |
349 | } |
|
356 | } | |
350 |
|
357 | |||
351 | pub fn with_inline(&mut self, value: bool) -> &mut Self { |
|
358 | pub fn with_inline(&mut self, value: bool) -> &mut Self { | |
352 | self.is_inline = value; |
|
359 | self.is_inline = value; | |
353 | self |
|
360 | self | |
354 | } |
|
361 | } | |
355 |
|
362 | |||
356 | pub fn with_general_delta(&mut self, value: bool) -> &mut Self { |
|
363 | pub fn with_general_delta(&mut self, value: bool) -> &mut Self { | |
357 | self.is_general_delta = value; |
|
364 | self.is_general_delta = value; | |
358 | self |
|
365 | self | |
359 | } |
|
366 | } | |
360 |
|
367 | |||
361 | pub fn with_version(&mut self, value: u16) -> &mut Self { |
|
368 | pub fn with_version(&mut self, value: u16) -> &mut Self { | |
362 | self.version = value; |
|
369 | self.version = value; | |
363 | self |
|
370 | self | |
364 | } |
|
371 | } | |
365 |
|
372 | |||
366 | pub fn with_offset(&mut self, value: usize) -> &mut Self { |
|
373 | pub fn with_offset(&mut self, value: usize) -> &mut Self { | |
367 | self.offset = value; |
|
374 | self.offset = value; | |
368 | self |
|
375 | self | |
369 | } |
|
376 | } | |
370 |
|
377 | |||
371 | pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { |
|
378 | pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { | |
372 | self.compressed_len = value; |
|
379 | self.compressed_len = value; | |
373 | self |
|
380 | self | |
374 | } |
|
381 | } | |
375 |
|
382 | |||
376 | pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { |
|
383 | pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { | |
377 | self.uncompressed_len = value; |
|
384 | self.uncompressed_len = value; | |
378 | self |
|
385 | self | |
379 | } |
|
386 | } | |
380 |
|
387 | |||
381 | pub fn with_base_revision_or_base_of_delta_chain( |
|
388 | pub fn with_base_revision_or_base_of_delta_chain( | |
382 | &mut self, |
|
389 | &mut self, | |
383 | value: Revision, |
|
390 | value: Revision, | |
384 | ) -> &mut Self { |
|
391 | ) -> &mut Self { | |
385 | self.base_revision_or_base_of_delta_chain = value; |
|
392 | self.base_revision_or_base_of_delta_chain = value; | |
386 | self |
|
393 | self | |
387 | } |
|
394 | } | |
388 |
|
395 | |||
389 | pub fn with_link_revision(&mut self, value: Revision) -> &mut Self { |
|
396 | pub fn with_link_revision(&mut self, value: Revision) -> &mut Self { | |
390 | self.link_revision = value; |
|
397 | self.link_revision = value; | |
391 | self |
|
398 | self | |
392 | } |
|
399 | } | |
393 |
|
400 | |||
394 | pub fn with_p1(&mut self, value: Revision) -> &mut Self { |
|
401 | pub fn with_p1(&mut self, value: Revision) -> &mut Self { | |
395 | self.p1 = value; |
|
402 | self.p1 = value; | |
396 | self |
|
403 | self | |
397 | } |
|
404 | } | |
398 |
|
405 | |||
399 | pub fn with_p2(&mut self, value: Revision) -> &mut Self { |
|
406 | pub fn with_p2(&mut self, value: Revision) -> &mut Self { | |
400 | self.p2 = value; |
|
407 | self.p2 = value; | |
401 | self |
|
408 | self | |
402 | } |
|
409 | } | |
403 |
|
410 | |||
404 | pub fn with_node(&mut self, value: Node) -> &mut Self { |
|
411 | pub fn with_node(&mut self, value: Node) -> &mut Self { | |
405 | self.node = value; |
|
412 | self.node = value; | |
406 | self |
|
413 | self | |
407 | } |
|
414 | } | |
408 |
|
415 | |||
409 | pub fn build(&self) -> Vec<u8> { |
|
416 | pub fn build(&self) -> Vec<u8> { | |
410 | let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); |
|
417 | let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); | |
411 | if self.is_first { |
|
418 | if self.is_first { | |
412 | bytes.extend(&match (self.is_general_delta, self.is_inline) { |
|
419 | bytes.extend(&match (self.is_general_delta, self.is_inline) { | |
413 | (false, false) => [0u8, 0], |
|
420 | (false, false) => [0u8, 0], | |
414 | (false, true) => [0u8, 1], |
|
421 | (false, true) => [0u8, 1], | |
415 | (true, false) => [0u8, 2], |
|
422 | (true, false) => [0u8, 2], | |
416 | (true, true) => [0u8, 3], |
|
423 | (true, true) => [0u8, 3], | |
417 | }); |
|
424 | }); | |
418 | bytes.extend(&self.version.to_be_bytes()); |
|
425 | bytes.extend(&self.version.to_be_bytes()); | |
419 | // Remaining offset bytes. |
|
426 | // Remaining offset bytes. | |
420 | bytes.extend(&[0u8; 2]); |
|
427 | bytes.extend(&[0u8; 2]); | |
421 | } else { |
|
428 | } else { | |
422 | // Offset stored on 48 bits (6 bytes) |
|
429 | // Offset stored on 48 bits (6 bytes) | |
423 | bytes.extend(&(self.offset as u64).to_be_bytes()[2..]); |
|
430 | bytes.extend(&(self.offset as u64).to_be_bytes()[2..]); | |
424 | } |
|
431 | } | |
425 | bytes.extend(&[0u8; 2]); // Revision flags. |
|
432 | bytes.extend(&[0u8; 2]); // Revision flags. | |
426 | bytes.extend(&(self.compressed_len as u32).to_be_bytes()); |
|
433 | bytes.extend(&(self.compressed_len as u32).to_be_bytes()); | |
427 | bytes.extend(&(self.uncompressed_len as u32).to_be_bytes()); |
|
434 | bytes.extend(&(self.uncompressed_len as u32).to_be_bytes()); | |
428 | bytes.extend( |
|
435 | bytes.extend( | |
429 | &self.base_revision_or_base_of_delta_chain.to_be_bytes(), |
|
436 | &self.base_revision_or_base_of_delta_chain.to_be_bytes(), | |
430 | ); |
|
437 | ); | |
431 | bytes.extend(&self.link_revision.to_be_bytes()); |
|
438 | bytes.extend(&self.link_revision.to_be_bytes()); | |
432 | bytes.extend(&self.p1.to_be_bytes()); |
|
439 | bytes.extend(&self.p1.to_be_bytes()); | |
433 | bytes.extend(&self.p2.to_be_bytes()); |
|
440 | bytes.extend(&self.p2.to_be_bytes()); | |
434 | bytes.extend(self.node.as_bytes()); |
|
441 | bytes.extend(self.node.as_bytes()); | |
435 | bytes.extend(vec![0u8; 12]); |
|
442 | bytes.extend(vec![0u8; 12]); | |
436 | bytes |
|
443 | bytes | |
437 | } |
|
444 | } | |
438 | } |
|
445 | } | |
439 |
|
446 | |||
440 | pub fn is_inline(index_bytes: &[u8]) -> bool { |
|
447 | pub fn is_inline(index_bytes: &[u8]) -> bool { | |
441 | IndexHeader::parse(index_bytes) |
|
448 | IndexHeader::parse(index_bytes) | |
442 | .expect("too short") |
|
449 | .expect("too short") | |
443 | .format_flags() |
|
450 | .format_flags() | |
444 | .is_inline() |
|
451 | .is_inline() | |
445 | } |
|
452 | } | |
446 |
|
453 | |||
447 | pub fn uses_generaldelta(index_bytes: &[u8]) -> bool { |
|
454 | pub fn uses_generaldelta(index_bytes: &[u8]) -> bool { | |
448 | IndexHeader::parse(index_bytes) |
|
455 | IndexHeader::parse(index_bytes) | |
449 | .expect("too short") |
|
456 | .expect("too short") | |
450 | .format_flags() |
|
457 | .format_flags() | |
451 | .uses_generaldelta() |
|
458 | .uses_generaldelta() | |
452 | } |
|
459 | } | |
453 |
|
460 | |||
454 | pub fn get_version(index_bytes: &[u8]) -> u16 { |
|
461 | pub fn get_version(index_bytes: &[u8]) -> u16 { | |
455 | IndexHeader::parse(index_bytes) |
|
462 | IndexHeader::parse(index_bytes) | |
456 | .expect("too short") |
|
463 | .expect("too short") | |
457 | .format_version() |
|
464 | .format_version() | |
458 | } |
|
465 | } | |
459 |
|
466 | |||
460 | #[test] |
|
467 | #[test] | |
461 | fn flags_when_no_inline_flag_test() { |
|
468 | fn flags_when_no_inline_flag_test() { | |
462 | let bytes = IndexEntryBuilder::new() |
|
469 | let bytes = IndexEntryBuilder::new() | |
463 | .is_first(true) |
|
470 | .is_first(true) | |
464 | .with_general_delta(false) |
|
471 | .with_general_delta(false) | |
465 | .with_inline(false) |
|
472 | .with_inline(false) | |
466 | .build(); |
|
473 | .build(); | |
467 |
|
474 | |||
468 | assert!(!is_inline(&bytes)); |
|
475 | assert!(!is_inline(&bytes)); | |
469 | assert!(!uses_generaldelta(&bytes)); |
|
476 | assert!(!uses_generaldelta(&bytes)); | |
470 | } |
|
477 | } | |
471 |
|
478 | |||
472 | #[test] |
|
479 | #[test] | |
473 | fn flags_when_inline_flag_test() { |
|
480 | fn flags_when_inline_flag_test() { | |
474 | let bytes = IndexEntryBuilder::new() |
|
481 | let bytes = IndexEntryBuilder::new() | |
475 | .is_first(true) |
|
482 | .is_first(true) | |
476 | .with_general_delta(false) |
|
483 | .with_general_delta(false) | |
477 | .with_inline(true) |
|
484 | .with_inline(true) | |
478 | .build(); |
|
485 | .build(); | |
479 |
|
486 | |||
480 | assert!(is_inline(&bytes)); |
|
487 | assert!(is_inline(&bytes)); | |
481 | assert!(!uses_generaldelta(&bytes)); |
|
488 | assert!(!uses_generaldelta(&bytes)); | |
482 | } |
|
489 | } | |
483 |
|
490 | |||
484 | #[test] |
|
491 | #[test] | |
485 | fn flags_when_inline_and_generaldelta_flags_test() { |
|
492 | fn flags_when_inline_and_generaldelta_flags_test() { | |
486 | let bytes = IndexEntryBuilder::new() |
|
493 | let bytes = IndexEntryBuilder::new() | |
487 | .is_first(true) |
|
494 | .is_first(true) | |
488 | .with_general_delta(true) |
|
495 | .with_general_delta(true) | |
489 | .with_inline(true) |
|
496 | .with_inline(true) | |
490 | .build(); |
|
497 | .build(); | |
491 |
|
498 | |||
492 | assert!(is_inline(&bytes)); |
|
499 | assert!(is_inline(&bytes)); | |
493 | assert!(uses_generaldelta(&bytes)); |
|
500 | assert!(uses_generaldelta(&bytes)); | |
494 | } |
|
501 | } | |
495 |
|
502 | |||
496 | #[test] |
|
503 | #[test] | |
497 | fn test_offset() { |
|
504 | fn test_offset() { | |
498 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
505 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); | |
499 | let entry = IndexEntry { |
|
506 | let entry = IndexEntry { | |
500 | bytes: &bytes, |
|
507 | bytes: &bytes, | |
501 | offset_override: None, |
|
508 | offset_override: None, | |
502 | }; |
|
509 | }; | |
503 |
|
510 | |||
504 | assert_eq!(entry.offset(), 1) |
|
511 | assert_eq!(entry.offset(), 1) | |
505 | } |
|
512 | } | |
506 |
|
513 | |||
507 | #[test] |
|
514 | #[test] | |
508 | fn test_with_overridden_offset() { |
|
515 | fn test_with_overridden_offset() { | |
509 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
516 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); | |
510 | let entry = IndexEntry { |
|
517 | let entry = IndexEntry { | |
511 | bytes: &bytes, |
|
518 | bytes: &bytes, | |
512 | offset_override: Some(2), |
|
519 | offset_override: Some(2), | |
513 | }; |
|
520 | }; | |
514 |
|
521 | |||
515 | assert_eq!(entry.offset(), 2) |
|
522 | assert_eq!(entry.offset(), 2) | |
516 | } |
|
523 | } | |
517 |
|
524 | |||
518 | #[test] |
|
525 | #[test] | |
519 | fn test_compressed_len() { |
|
526 | fn test_compressed_len() { | |
520 | let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); |
|
527 | let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); | |
521 | let entry = IndexEntry { |
|
528 | let entry = IndexEntry { | |
522 | bytes: &bytes, |
|
529 | bytes: &bytes, | |
523 | offset_override: None, |
|
530 | offset_override: None, | |
524 | }; |
|
531 | }; | |
525 |
|
532 | |||
526 | assert_eq!(entry.compressed_len(), 1) |
|
533 | assert_eq!(entry.compressed_len(), 1) | |
527 | } |
|
534 | } | |
528 |
|
535 | |||
529 | #[test] |
|
536 | #[test] | |
530 | fn test_uncompressed_len() { |
|
537 | fn test_uncompressed_len() { | |
531 | let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); |
|
538 | let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); | |
532 | let entry = IndexEntry { |
|
539 | let entry = IndexEntry { | |
533 | bytes: &bytes, |
|
540 | bytes: &bytes, | |
534 | offset_override: None, |
|
541 | offset_override: None, | |
535 | }; |
|
542 | }; | |
536 |
|
543 | |||
537 | assert_eq!(entry.uncompressed_len(), 1) |
|
544 | assert_eq!(entry.uncompressed_len(), 1) | |
538 | } |
|
545 | } | |
539 |
|
546 | |||
540 | #[test] |
|
547 | #[test] | |
541 | fn test_base_revision_or_base_of_delta_chain() { |
|
548 | fn test_base_revision_or_base_of_delta_chain() { | |
542 | let bytes = IndexEntryBuilder::new() |
|
549 | let bytes = IndexEntryBuilder::new() | |
543 | .with_base_revision_or_base_of_delta_chain(1) |
|
550 | .with_base_revision_or_base_of_delta_chain(1) | |
544 | .build(); |
|
551 | .build(); | |
545 | let entry = IndexEntry { |
|
552 | let entry = IndexEntry { | |
546 | bytes: &bytes, |
|
553 | bytes: &bytes, | |
547 | offset_override: None, |
|
554 | offset_override: None, | |
548 | }; |
|
555 | }; | |
549 |
|
556 | |||
550 | assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1) |
|
557 | assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into()) | |
551 | } |
|
558 | } | |
552 |
|
559 | |||
553 | #[test] |
|
560 | #[test] | |
554 | fn link_revision_test() { |
|
561 | fn link_revision_test() { | |
555 | let bytes = IndexEntryBuilder::new().with_link_revision(123).build(); |
|
562 | let bytes = IndexEntryBuilder::new().with_link_revision(123).build(); | |
556 |
|
563 | |||
557 | let entry = IndexEntry { |
|
564 | let entry = IndexEntry { | |
558 | bytes: &bytes, |
|
565 | bytes: &bytes, | |
559 | offset_override: None, |
|
566 | offset_override: None, | |
560 | }; |
|
567 | }; | |
561 |
|
568 | |||
562 | assert_eq!(entry.link_revision(), 123); |
|
569 | assert_eq!(entry.link_revision(), 123.into()); | |
563 | } |
|
570 | } | |
564 |
|
571 | |||
565 | #[test] |
|
572 | #[test] | |
566 | fn p1_test() { |
|
573 | fn p1_test() { | |
567 | let bytes = IndexEntryBuilder::new().with_p1(123).build(); |
|
574 | let bytes = IndexEntryBuilder::new().with_p1(123).build(); | |
568 |
|
575 | |||
569 | let entry = IndexEntry { |
|
576 | let entry = IndexEntry { | |
570 | bytes: &bytes, |
|
577 | bytes: &bytes, | |
571 | offset_override: None, |
|
578 | offset_override: None, | |
572 | }; |
|
579 | }; | |
573 |
|
580 | |||
574 | assert_eq!(entry.p1(), 123); |
|
581 | assert_eq!(entry.p1(), 123.into()); | |
575 | } |
|
582 | } | |
576 |
|
583 | |||
577 | #[test] |
|
584 | #[test] | |
578 | fn p2_test() { |
|
585 | fn p2_test() { | |
579 | let bytes = IndexEntryBuilder::new().with_p2(123).build(); |
|
586 | let bytes = IndexEntryBuilder::new().with_p2(123).build(); | |
580 |
|
587 | |||
581 | let entry = IndexEntry { |
|
588 | let entry = IndexEntry { | |
582 | bytes: &bytes, |
|
589 | bytes: &bytes, | |
583 | offset_override: None, |
|
590 | offset_override: None, | |
584 | }; |
|
591 | }; | |
585 |
|
592 | |||
586 | assert_eq!(entry.p2(), 123); |
|
593 | assert_eq!(entry.p2(), 123.into()); | |
587 | } |
|
594 | } | |
588 |
|
595 | |||
589 | #[test] |
|
596 | #[test] | |
590 | fn node_test() { |
|
597 | fn node_test() { | |
591 | let node = Node::from_hex("0123456789012345678901234567890123456789") |
|
598 | let node = Node::from_hex("0123456789012345678901234567890123456789") | |
592 | .unwrap(); |
|
599 | .unwrap(); | |
593 | let bytes = IndexEntryBuilder::new().with_node(node).build(); |
|
600 | let bytes = IndexEntryBuilder::new().with_node(node).build(); | |
594 |
|
601 | |||
595 | let entry = IndexEntry { |
|
602 | let entry = IndexEntry { | |
596 | bytes: &bytes, |
|
603 | bytes: &bytes, | |
597 | offset_override: None, |
|
604 | offset_override: None, | |
598 | }; |
|
605 | }; | |
599 |
|
606 | |||
600 | assert_eq!(*entry.hash(), node); |
|
607 | assert_eq!(*entry.hash(), node); | |
601 | } |
|
608 | } | |
602 |
|
609 | |||
603 | #[test] |
|
610 | #[test] | |
604 | fn version_test() { |
|
611 | fn version_test() { | |
605 | let bytes = IndexEntryBuilder::new() |
|
612 | let bytes = IndexEntryBuilder::new() | |
606 | .is_first(true) |
|
613 | .is_first(true) | |
607 | .with_version(2) |
|
614 | .with_version(2) | |
608 | .build(); |
|
615 | .build(); | |
609 |
|
616 | |||
610 | assert_eq!(get_version(&bytes), 2) |
|
617 | assert_eq!(get_version(&bytes), 2) | |
611 | } |
|
618 | } | |
612 | } |
|
619 | } | |
613 |
|
620 | |||
614 | #[cfg(test)] |
|
621 | #[cfg(test)] | |
615 | pub use tests::IndexEntryBuilder; |
|
622 | pub use tests::IndexEntryBuilder; |
@@ -1,194 +1,203 | |||||
1 | use crate::errors::HgError; |
|
1 | use crate::errors::HgError; | |
2 | use crate::revlog::Revision; |
|
|||
3 | use crate::revlog::{Node, NodePrefix}; |
|
2 | use crate::revlog::{Node, NodePrefix}; | |
4 | use crate::revlog::{Revlog, RevlogError}; |
|
3 | use crate::revlog::{Revlog, RevlogError}; | |
5 | use crate::utils::hg_path::HgPath; |
|
4 | use crate::utils::hg_path::HgPath; | |
6 | use crate::utils::SliceExt; |
|
5 | use crate::utils::SliceExt; | |
7 | use crate::vfs::Vfs; |
|
6 | use crate::vfs::Vfs; | |
|
7 | use crate::{Revision, UncheckedRevision}; | |||
8 |
|
8 | |||
9 | /// A specialized `Revlog` to work with `manifest` data format. |
|
9 | /// A specialized `Revlog` to work with `manifest` data format. | |
10 | pub struct Manifestlog { |
|
10 | pub struct Manifestlog { | |
11 | /// The generic `revlog` format. |
|
11 | /// The generic `revlog` format. | |
12 | revlog: Revlog, |
|
12 | revlog: Revlog, | |
13 | } |
|
13 | } | |
14 |
|
14 | |||
15 | impl Manifestlog { |
|
15 | impl Manifestlog { | |
16 | /// Open the `manifest` of a repository given by its root. |
|
16 | /// Open the `manifest` of a repository given by its root. | |
17 | pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { |
|
17 | pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { | |
18 | let revlog = |
|
18 | let revlog = | |
19 | Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?; |
|
19 | Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?; | |
20 | Ok(Self { revlog }) |
|
20 | Ok(Self { revlog }) | |
21 | } |
|
21 | } | |
22 |
|
22 | |||
23 | /// Return the `Manifest` for the given node ID. |
|
23 | /// Return the `Manifest` for the given node ID. | |
24 | /// |
|
24 | /// | |
25 | /// Note: this is a node ID in the manifestlog, typically found through |
|
25 | /// Note: this is a node ID in the manifestlog, typically found through | |
26 | /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any |
|
26 | /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any | |
27 | /// changeset. |
|
27 | /// changeset. | |
28 | /// |
|
28 | /// | |
29 | /// See also `Repo::manifest_for_node` |
|
29 | /// See also `Repo::manifest_for_node` | |
30 | pub fn data_for_node( |
|
30 | pub fn data_for_node( | |
31 | &self, |
|
31 | &self, | |
32 | node: NodePrefix, |
|
32 | node: NodePrefix, | |
33 | ) -> Result<Manifest, RevlogError> { |
|
33 | ) -> Result<Manifest, RevlogError> { | |
34 | let rev = self.revlog.rev_from_node(node)?; |
|
34 | let rev = self.revlog.rev_from_node(node)?; | |
35 | self.data_for_rev(rev) |
|
35 | self.data_for_checked_rev(rev) | |
36 | } |
|
36 | } | |
37 |
|
37 | |||
38 | /// Return the `Manifest` of a given revision number. |
|
38 | /// Return the `Manifest` of a given revision number. | |
39 | /// |
|
39 | /// | |
40 | /// Note: this is a revision number in the manifestlog, *not* of any |
|
40 | /// Note: this is a revision number in the manifestlog, *not* of any | |
41 | /// changeset. |
|
41 | /// changeset. | |
42 | /// |
|
42 | /// | |
43 | /// See also `Repo::manifest_for_rev` |
|
43 | /// See also `Repo::manifest_for_rev` | |
44 | pub fn data_for_rev( |
|
44 | pub fn data_for_rev( | |
45 | &self, |
|
45 | &self, | |
|
46 | rev: UncheckedRevision, | |||
|
47 | ) -> Result<Manifest, RevlogError> { | |||
|
48 | let bytes = self.revlog.get_rev_data(rev)?.into_owned(); | |||
|
49 | Ok(Manifest { bytes }) | |||
|
50 | } | |||
|
51 | ||||
|
52 | pub fn data_for_checked_rev( | |||
|
53 | &self, | |||
46 | rev: Revision, |
|
54 | rev: Revision, | |
47 | ) -> Result<Manifest, RevlogError> { |
|
55 | ) -> Result<Manifest, RevlogError> { | |
48 | let bytes = self.revlog.get_rev_data(rev)?.into_owned(); |
|
56 | let bytes = | |
|
57 | self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned(); | |||
49 | Ok(Manifest { bytes }) |
|
58 | Ok(Manifest { bytes }) | |
50 | } |
|
59 | } | |
51 | } |
|
60 | } | |
52 |
|
61 | |||
53 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
|
62 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. | |
54 | #[derive(Debug)] |
|
63 | #[derive(Debug)] | |
55 | pub struct Manifest { |
|
64 | pub struct Manifest { | |
56 | /// Format for a manifest: flat sequence of variable-size entries, |
|
65 | /// Format for a manifest: flat sequence of variable-size entries, | |
57 | /// sorted by path, each as: |
|
66 | /// sorted by path, each as: | |
58 | /// |
|
67 | /// | |
59 | /// ```text |
|
68 | /// ```text | |
60 | /// <path> \0 <hex_node_id> <flags> \n |
|
69 | /// <path> \0 <hex_node_id> <flags> \n | |
61 | /// ``` |
|
70 | /// ``` | |
62 | /// |
|
71 | /// | |
63 | /// The last entry is also terminated by a newline character. |
|
72 | /// The last entry is also terminated by a newline character. | |
64 | /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`. |
|
73 | /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`. | |
65 | bytes: Vec<u8>, |
|
74 | bytes: Vec<u8>, | |
66 | } |
|
75 | } | |
67 |
|
76 | |||
68 | impl Manifest { |
|
77 | impl Manifest { | |
69 | pub fn iter( |
|
78 | pub fn iter( | |
70 | &self, |
|
79 | &self, | |
71 | ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> { |
|
80 | ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> { | |
72 | self.bytes |
|
81 | self.bytes | |
73 | .split(|b| b == &b'\n') |
|
82 | .split(|b| b == &b'\n') | |
74 | .filter(|line| !line.is_empty()) |
|
83 | .filter(|line| !line.is_empty()) | |
75 | .map(ManifestEntry::from_raw) |
|
84 | .map(ManifestEntry::from_raw) | |
76 | } |
|
85 | } | |
77 |
|
86 | |||
78 | /// If the given path is in this manifest, return its filelog node ID |
|
87 | /// If the given path is in this manifest, return its filelog node ID | |
79 | pub fn find_by_path( |
|
88 | pub fn find_by_path( | |
80 | &self, |
|
89 | &self, | |
81 | path: &HgPath, |
|
90 | path: &HgPath, | |
82 | ) -> Result<Option<ManifestEntry>, HgError> { |
|
91 | ) -> Result<Option<ManifestEntry>, HgError> { | |
83 | use std::cmp::Ordering::*; |
|
92 | use std::cmp::Ordering::*; | |
84 | let path = path.as_bytes(); |
|
93 | let path = path.as_bytes(); | |
85 | // Both boundaries of this `&[u8]` slice are always at the boundary of |
|
94 | // Both boundaries of this `&[u8]` slice are always at the boundary of | |
86 | // an entry |
|
95 | // an entry | |
87 | let mut bytes = &*self.bytes; |
|
96 | let mut bytes = &*self.bytes; | |
88 |
|
97 | |||
89 | // Binary search algorithm derived from `[T]::binary_search_by` |
|
98 | // Binary search algorithm derived from `[T]::binary_search_by` | |
90 | // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221> |
|
99 | // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221> | |
91 | // except we don’t have a slice of entries. Instead we jump to the |
|
100 | // except we don’t have a slice of entries. Instead we jump to the | |
92 | // middle of the byte slice and look around for entry delimiters |
|
101 | // middle of the byte slice and look around for entry delimiters | |
93 | // (newlines). |
|
102 | // (newlines). | |
94 | while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? { |
|
103 | while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? { | |
95 | let (entry_path, rest) = |
|
104 | let (entry_path, rest) = | |
96 | ManifestEntry::split_path(&bytes[entry_range.clone()])?; |
|
105 | ManifestEntry::split_path(&bytes[entry_range.clone()])?; | |
97 | let cmp = entry_path.cmp(path); |
|
106 | let cmp = entry_path.cmp(path); | |
98 | if cmp == Less { |
|
107 | if cmp == Less { | |
99 | let after_newline = entry_range.end + 1; |
|
108 | let after_newline = entry_range.end + 1; | |
100 | bytes = &bytes[after_newline..]; |
|
109 | bytes = &bytes[after_newline..]; | |
101 | } else if cmp == Greater { |
|
110 | } else if cmp == Greater { | |
102 | bytes = &bytes[..entry_range.start]; |
|
111 | bytes = &bytes[..entry_range.start]; | |
103 | } else { |
|
112 | } else { | |
104 | return Ok(Some(ManifestEntry::from_path_and_rest( |
|
113 | return Ok(Some(ManifestEntry::from_path_and_rest( | |
105 | entry_path, rest, |
|
114 | entry_path, rest, | |
106 | ))); |
|
115 | ))); | |
107 | } |
|
116 | } | |
108 | } |
|
117 | } | |
109 | Ok(None) |
|
118 | Ok(None) | |
110 | } |
|
119 | } | |
111 |
|
120 | |||
112 | /// If there is at least one, return the byte range of an entry *excluding* |
|
121 | /// If there is at least one, return the byte range of an entry *excluding* | |
113 | /// the final newline. |
|
122 | /// the final newline. | |
114 | fn find_entry_near_middle_of( |
|
123 | fn find_entry_near_middle_of( | |
115 | bytes: &[u8], |
|
124 | bytes: &[u8], | |
116 | ) -> Result<Option<std::ops::Range<usize>>, HgError> { |
|
125 | ) -> Result<Option<std::ops::Range<usize>>, HgError> { | |
117 | let len = bytes.len(); |
|
126 | let len = bytes.len(); | |
118 | if len > 0 { |
|
127 | if len > 0 { | |
119 | let middle = bytes.len() / 2; |
|
128 | let middle = bytes.len() / 2; | |
120 | // Integer division rounds down, so `middle < len`. |
|
129 | // Integer division rounds down, so `middle < len`. | |
121 | let (before, after) = bytes.split_at(middle); |
|
130 | let (before, after) = bytes.split_at(middle); | |
122 | let is_newline = |&byte: &u8| byte == b'\n'; |
|
131 | let is_newline = |&byte: &u8| byte == b'\n'; | |
123 | let entry_start = match before.iter().rposition(is_newline) { |
|
132 | let entry_start = match before.iter().rposition(is_newline) { | |
124 | Some(i) => i + 1, |
|
133 | Some(i) => i + 1, | |
125 | None => 0, // We choose the first entry in `bytes` |
|
134 | None => 0, // We choose the first entry in `bytes` | |
126 | }; |
|
135 | }; | |
127 | let entry_end = match after.iter().position(is_newline) { |
|
136 | let entry_end = match after.iter().position(is_newline) { | |
128 | Some(i) => { |
|
137 | Some(i) => { | |
129 | // No `+ 1` here to exclude this newline from the range |
|
138 | // No `+ 1` here to exclude this newline from the range | |
130 | middle + i |
|
139 | middle + i | |
131 | } |
|
140 | } | |
132 | None => { |
|
141 | None => { | |
133 | // In a well-formed manifest: |
|
142 | // In a well-formed manifest: | |
134 | // |
|
143 | // | |
135 | // * Since `len > 0`, `bytes` contains at least one entry |
|
144 | // * Since `len > 0`, `bytes` contains at least one entry | |
136 | // * Every entry ends with a newline |
|
145 | // * Every entry ends with a newline | |
137 | // * Since `middle < len`, `after` contains at least the |
|
146 | // * Since `middle < len`, `after` contains at least the | |
138 | // newline at the end of the last entry of `bytes`. |
|
147 | // newline at the end of the last entry of `bytes`. | |
139 | // |
|
148 | // | |
140 | // We didn’t find a newline, so this manifest is not |
|
149 | // We didn’t find a newline, so this manifest is not | |
141 | // well-formed. |
|
150 | // well-formed. | |
142 | return Err(HgError::corrupted( |
|
151 | return Err(HgError::corrupted( | |
143 | "manifest entry without \\n delimiter", |
|
152 | "manifest entry without \\n delimiter", | |
144 | )); |
|
153 | )); | |
145 | } |
|
154 | } | |
146 | }; |
|
155 | }; | |
147 | Ok(Some(entry_start..entry_end)) |
|
156 | Ok(Some(entry_start..entry_end)) | |
148 | } else { |
|
157 | } else { | |
149 | // len == 0 |
|
158 | // len == 0 | |
150 | Ok(None) |
|
159 | Ok(None) | |
151 | } |
|
160 | } | |
152 | } |
|
161 | } | |
153 | } |
|
162 | } | |
154 |
|
163 | |||
155 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
|
164 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. | |
156 | #[derive(Debug)] |
|
165 | #[derive(Debug)] | |
157 | pub struct ManifestEntry<'manifest> { |
|
166 | pub struct ManifestEntry<'manifest> { | |
158 | pub path: &'manifest HgPath, |
|
167 | pub path: &'manifest HgPath, | |
159 | pub hex_node_id: &'manifest [u8], |
|
168 | pub hex_node_id: &'manifest [u8], | |
160 |
|
169 | |||
161 | /// `Some` values are b'x', b'l', or 't' |
|
170 | /// `Some` values are b'x', b'l', or 't' | |
162 | pub flags: Option<u8>, |
|
171 | pub flags: Option<u8>, | |
163 | } |
|
172 | } | |
164 |
|
173 | |||
165 | impl<'a> ManifestEntry<'a> { |
|
174 | impl<'a> ManifestEntry<'a> { | |
166 | fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> { |
|
175 | fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> { | |
167 | bytes.split_2(b'\0').ok_or_else(|| { |
|
176 | bytes.split_2(b'\0').ok_or_else(|| { | |
168 | HgError::corrupted("manifest entry without \\0 delimiter") |
|
177 | HgError::corrupted("manifest entry without \\0 delimiter") | |
169 | }) |
|
178 | }) | |
170 | } |
|
179 | } | |
171 |
|
180 | |||
172 | fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self { |
|
181 | fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self { | |
173 | let (hex_node_id, flags) = match rest.split_last() { |
|
182 | let (hex_node_id, flags) = match rest.split_last() { | |
174 | Some((&b'x', rest)) => (rest, Some(b'x')), |
|
183 | Some((&b'x', rest)) => (rest, Some(b'x')), | |
175 | Some((&b'l', rest)) => (rest, Some(b'l')), |
|
184 | Some((&b'l', rest)) => (rest, Some(b'l')), | |
176 | Some((&b't', rest)) => (rest, Some(b't')), |
|
185 | Some((&b't', rest)) => (rest, Some(b't')), | |
177 | _ => (rest, None), |
|
186 | _ => (rest, None), | |
178 | }; |
|
187 | }; | |
179 | Self { |
|
188 | Self { | |
180 | path: HgPath::new(path), |
|
189 | path: HgPath::new(path), | |
181 | hex_node_id, |
|
190 | hex_node_id, | |
182 | flags, |
|
191 | flags, | |
183 | } |
|
192 | } | |
184 | } |
|
193 | } | |
185 |
|
194 | |||
186 | fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> { |
|
195 | fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> { | |
187 | let (path, rest) = Self::split_path(bytes)?; |
|
196 | let (path, rest) = Self::split_path(bytes)?; | |
188 | Ok(Self::from_path_and_rest(path, rest)) |
|
197 | Ok(Self::from_path_and_rest(path, rest)) | |
189 | } |
|
198 | } | |
190 |
|
199 | |||
191 | pub fn node_id(&self) -> Result<Node, HgError> { |
|
200 | pub fn node_id(&self) -> Result<Node, HgError> { | |
192 | Node::from_hex_for_repo(self.hex_node_id) |
|
201 | Node::from_hex_for_repo(self.hex_node_id) | |
193 | } |
|
202 | } | |
194 | } |
|
203 | } |
@@ -1,849 +1,904 | |||||
1 | // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net> |
|
1 | // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net> | |
2 | // and Mercurial contributors |
|
2 | // and Mercurial contributors | |
3 | // |
|
3 | // | |
4 | // This software may be used and distributed according to the terms of the |
|
4 | // This software may be used and distributed according to the terms of the | |
5 | // GNU General Public License version 2 or any later version. |
|
5 | // GNU General Public License version 2 or any later version. | |
6 | //! Mercurial concepts for handling revision history |
|
6 | //! Mercurial concepts for handling revision history | |
7 |
|
7 | |||
8 | pub mod node; |
|
8 | pub mod node; | |
9 | pub mod nodemap; |
|
9 | pub mod nodemap; | |
10 | mod nodemap_docket; |
|
10 | mod nodemap_docket; | |
11 | pub mod path_encode; |
|
11 | pub mod path_encode; | |
12 | pub use node::{FromHexError, Node, NodePrefix}; |
|
12 | pub use node::{FromHexError, Node, NodePrefix}; | |
13 | pub mod changelog; |
|
13 | pub mod changelog; | |
14 | pub mod filelog; |
|
14 | pub mod filelog; | |
15 | pub mod index; |
|
15 | pub mod index; | |
16 | pub mod manifest; |
|
16 | pub mod manifest; | |
17 | pub mod patch; |
|
17 | pub mod patch; | |
18 |
|
18 | |||
19 | use std::borrow::Cow; |
|
19 | use std::borrow::Cow; | |
20 | use std::io::Read; |
|
20 | use std::io::Read; | |
21 | use std::ops::Deref; |
|
21 | use std::ops::Deref; | |
22 | use std::path::Path; |
|
22 | use std::path::Path; | |
23 |
|
23 | |||
24 | use flate2::read::ZlibDecoder; |
|
24 | use flate2::read::ZlibDecoder; | |
25 | use sha1::{Digest, Sha1}; |
|
25 | use sha1::{Digest, Sha1}; | |
26 | use std::cell::RefCell; |
|
26 | use std::cell::RefCell; | |
27 | use zstd; |
|
27 | use zstd; | |
28 |
|
28 | |||
29 | use self::node::{NODE_BYTES_LENGTH, NULL_NODE}; |
|
29 | use self::node::{NODE_BYTES_LENGTH, NULL_NODE}; | |
30 | use self::nodemap_docket::NodeMapDocket; |
|
30 | use self::nodemap_docket::NodeMapDocket; | |
31 | use super::index::Index; |
|
31 | use super::index::Index; | |
32 | use super::nodemap::{NodeMap, NodeMapError}; |
|
32 | use super::nodemap::{NodeMap, NodeMapError}; | |
33 | use crate::errors::HgError; |
|
33 | use crate::errors::HgError; | |
34 | use crate::vfs::Vfs; |
|
34 | use crate::vfs::Vfs; | |
35 |
|
35 | |||
36 | /// Mercurial revision numbers |
|
36 | /// Mercurial revision numbers | |
37 | /// |
|
37 | /// | |
38 | /// As noted in revlog.c, revision numbers are actually encoded in |
|
38 | /// As noted in revlog.c, revision numbers are actually encoded in | |
39 | /// 4 bytes, and are liberally converted to ints, whence the i32 |
|
39 | /// 4 bytes, and are liberally converted to ints, whence the i32 | |
40 | pub type Revision = i32; |
|
40 | pub type Revision = i32; | |
41 |
|
41 | |||
42 | /// Unchecked Mercurial revision numbers. |
|
42 | /// Unchecked Mercurial revision numbers. | |
43 | /// |
|
43 | /// | |
44 | /// Values of this type have no guarantee of being a valid revision number |
|
44 | /// Values of this type have no guarantee of being a valid revision number | |
45 | /// in any context. Use method `check_revision` to get a valid revision within |
|
45 | /// in any context. Use method `check_revision` to get a valid revision within | |
46 | /// the appropriate index object. |
|
46 | /// the appropriate index object. | |
47 | /// |
|
47 | /// | |
48 | /// As noted in revlog.c, revision numbers are actually encoded in |
|
48 | /// As noted in revlog.c, revision numbers are actually encoded in | |
49 | /// 4 bytes, and are liberally converted to ints, whence the i32 |
|
49 | /// 4 bytes, and are liberally converted to ints, whence the i32 | |
50 | pub type UncheckedRevision = i32; |
|
50 | #[derive( | |
|
51 | Debug, | |||
|
52 | derive_more::Display, | |||
|
53 | Clone, | |||
|
54 | Copy, | |||
|
55 | Hash, | |||
|
56 | PartialEq, | |||
|
57 | Eq, | |||
|
58 | PartialOrd, | |||
|
59 | Ord, | |||
|
60 | )] | |||
|
61 | pub struct UncheckedRevision(i32); | |||
|
62 | ||||
|
63 | impl From<Revision> for UncheckedRevision { | |||
|
64 | fn from(value: Revision) -> Self { | |||
|
65 | Self(value) | |||
|
66 | } | |||
|
67 | } | |||
51 |
|
68 | |||
52 | /// Marker expressing the absence of a parent |
|
69 | /// Marker expressing the absence of a parent | |
53 | /// |
|
70 | /// | |
54 | /// Independently of the actual representation, `NULL_REVISION` is guaranteed |
|
71 | /// Independently of the actual representation, `NULL_REVISION` is guaranteed | |
55 | /// to be smaller than all existing revisions. |
|
72 | /// to be smaller than all existing revisions. | |
56 | pub const NULL_REVISION: Revision = -1; |
|
73 | pub const NULL_REVISION: Revision = -1; | |
57 |
|
74 | |||
58 | /// Same as `mercurial.node.wdirrev` |
|
75 | /// Same as `mercurial.node.wdirrev` | |
59 | /// |
|
76 | /// | |
60 | /// This is also equal to `i32::max_value()`, but it's better to spell |
|
77 | /// This is also equal to `i32::max_value()`, but it's better to spell | |
61 | /// it out explicitely, same as in `mercurial.node` |
|
78 | /// it out explicitely, same as in `mercurial.node` | |
62 | #[allow(clippy::unreadable_literal)] |
|
79 | #[allow(clippy::unreadable_literal)] | |
63 |
pub const WORKING_DIRECTORY_REVISION: Revision = |
|
80 | pub const WORKING_DIRECTORY_REVISION: UncheckedRevision = | |
|
81 | UncheckedRevision(0x7fffffff); | |||
64 |
|
82 | |||
65 | pub const WORKING_DIRECTORY_HEX: &str = |
|
83 | pub const WORKING_DIRECTORY_HEX: &str = | |
66 | "ffffffffffffffffffffffffffffffffffffffff"; |
|
84 | "ffffffffffffffffffffffffffffffffffffffff"; | |
67 |
|
85 | |||
68 | /// The simplest expression of what we need of Mercurial DAGs. |
|
86 | /// The simplest expression of what we need of Mercurial DAGs. | |
69 | pub trait Graph { |
|
87 | pub trait Graph { | |
70 | /// Return the two parents of the given `Revision`. |
|
88 | /// Return the two parents of the given `Revision`. | |
71 | /// |
|
89 | /// | |
72 | /// Each of the parents can be independently `NULL_REVISION` |
|
90 | /// Each of the parents can be independently `NULL_REVISION` | |
73 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>; |
|
91 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>; | |
74 | } |
|
92 | } | |
75 |
|
93 | |||
76 | #[derive(Clone, Debug, PartialEq)] |
|
94 | #[derive(Clone, Debug, PartialEq)] | |
77 | pub enum GraphError { |
|
95 | pub enum GraphError { | |
78 | ParentOutOfRange(Revision), |
|
96 | ParentOutOfRange(Revision), | |
79 | } |
|
97 | } | |
80 |
|
98 | |||
81 | /// The Mercurial Revlog Index |
|
99 | /// The Mercurial Revlog Index | |
82 | /// |
|
100 | /// | |
83 | /// This is currently limited to the minimal interface that is needed for |
|
101 | /// This is currently limited to the minimal interface that is needed for | |
84 | /// the [`nodemap`](nodemap/index.html) module |
|
102 | /// the [`nodemap`](nodemap/index.html) module | |
85 | pub trait RevlogIndex { |
|
103 | pub trait RevlogIndex { | |
86 | /// Total number of Revisions referenced in this index |
|
104 | /// Total number of Revisions referenced in this index | |
87 | fn len(&self) -> usize; |
|
105 | fn len(&self) -> usize; | |
88 |
|
106 | |||
89 | fn is_empty(&self) -> bool { |
|
107 | fn is_empty(&self) -> bool { | |
90 | self.len() == 0 |
|
108 | self.len() == 0 | |
91 | } |
|
109 | } | |
92 |
|
110 | |||
93 |
/// Return a reference to the Node or `None` |
|
111 | /// Return a reference to the Node or `None` for `NULL_REVISION` | |
94 | /// |
|
|||
95 | /// `NULL_REVISION` is not considered to be out of bounds. |
|
|||
96 | fn node(&self, rev: Revision) -> Option<&Node>; |
|
112 | fn node(&self, rev: Revision) -> Option<&Node>; | |
97 |
|
113 | |||
98 | /// Return a [`Revision`] if `rev` is a valid revision number for this |
|
114 | /// Return a [`Revision`] if `rev` is a valid revision number for this | |
99 | /// index |
|
115 | /// index | |
100 | fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { |
|
116 | fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { | |
|
117 | let rev = rev.0; | |||
|
118 | ||||
101 | if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) { |
|
119 | if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) { | |
102 | Some(rev) |
|
120 | Some(rev) | |
103 | } else { |
|
121 | } else { | |
104 | None |
|
122 | None | |
105 | } |
|
123 | } | |
106 | } |
|
124 | } | |
107 | } |
|
125 | } | |
108 |
|
126 | |||
109 | const REVISION_FLAG_CENSORED: u16 = 1 << 15; |
|
127 | const REVISION_FLAG_CENSORED: u16 = 1 << 15; | |
110 | const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14; |
|
128 | const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14; | |
111 | const REVISION_FLAG_EXTSTORED: u16 = 1 << 13; |
|
129 | const REVISION_FLAG_EXTSTORED: u16 = 1 << 13; | |
112 | const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12; |
|
130 | const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12; | |
113 |
|
131 | |||
114 | // Keep this in sync with REVIDX_KNOWN_FLAGS in |
|
132 | // Keep this in sync with REVIDX_KNOWN_FLAGS in | |
115 | // mercurial/revlogutils/flagutil.py |
|
133 | // mercurial/revlogutils/flagutil.py | |
116 | const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED |
|
134 | const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED | |
117 | | REVISION_FLAG_ELLIPSIS |
|
135 | | REVISION_FLAG_ELLIPSIS | |
118 | | REVISION_FLAG_EXTSTORED |
|
136 | | REVISION_FLAG_EXTSTORED | |
119 | | REVISION_FLAG_HASCOPIESINFO; |
|
137 | | REVISION_FLAG_HASCOPIESINFO; | |
120 |
|
138 | |||
121 | const NULL_REVLOG_ENTRY_FLAGS: u16 = 0; |
|
139 | const NULL_REVLOG_ENTRY_FLAGS: u16 = 0; | |
122 |
|
140 | |||
123 | #[derive(Debug, derive_more::From)] |
|
141 | #[derive(Debug, derive_more::From, derive_more::Display)] | |
124 | pub enum RevlogError { |
|
142 | pub enum RevlogError { | |
125 | InvalidRevision, |
|
143 | InvalidRevision, | |
126 | /// Working directory is not supported |
|
144 | /// Working directory is not supported | |
127 | WDirUnsupported, |
|
145 | WDirUnsupported, | |
128 | /// Found more than one entry whose ID match the requested prefix |
|
146 | /// Found more than one entry whose ID match the requested prefix | |
129 | AmbiguousPrefix, |
|
147 | AmbiguousPrefix, | |
130 | #[from] |
|
148 | #[from] | |
131 | Other(HgError), |
|
149 | Other(HgError), | |
132 | } |
|
150 | } | |
133 |
|
151 | |||
134 | impl From<NodeMapError> for RevlogError { |
|
152 | impl From<NodeMapError> for RevlogError { | |
135 | fn from(error: NodeMapError) -> Self { |
|
153 | fn from(error: NodeMapError) -> Self { | |
136 | match error { |
|
154 | match error { | |
137 | NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix, |
|
155 | NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix, | |
138 | NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted( |
|
156 | NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted( | |
139 | format!("nodemap point to revision {} not in index", rev), |
|
157 | format!("nodemap point to revision {} not in index", rev), | |
140 | ), |
|
158 | ), | |
141 | } |
|
159 | } | |
142 | } |
|
160 | } | |
143 | } |
|
161 | } | |
144 |
|
162 | |||
145 | fn corrupted<S: AsRef<str>>(context: S) -> HgError { |
|
163 | fn corrupted<S: AsRef<str>>(context: S) -> HgError { | |
146 | HgError::corrupted(format!("corrupted revlog, {}", context.as_ref())) |
|
164 | HgError::corrupted(format!("corrupted revlog, {}", context.as_ref())) | |
147 | } |
|
165 | } | |
148 |
|
166 | |||
149 | impl RevlogError { |
|
167 | impl RevlogError { | |
150 | fn corrupted<S: AsRef<str>>(context: S) -> Self { |
|
168 | fn corrupted<S: AsRef<str>>(context: S) -> Self { | |
151 | RevlogError::Other(corrupted(context)) |
|
169 | RevlogError::Other(corrupted(context)) | |
152 | } |
|
170 | } | |
153 | } |
|
171 | } | |
154 |
|
172 | |||
155 | /// Read only implementation of revlog. |
|
173 | /// Read only implementation of revlog. | |
156 | pub struct Revlog { |
|
174 | pub struct Revlog { | |
157 | /// When index and data are not interleaved: bytes of the revlog index. |
|
175 | /// When index and data are not interleaved: bytes of the revlog index. | |
158 | /// When index and data are interleaved: bytes of the revlog index and |
|
176 | /// When index and data are interleaved: bytes of the revlog index and | |
159 | /// data. |
|
177 | /// data. | |
160 | index: Index, |
|
178 | index: Index, | |
161 | /// When index and data are not interleaved: bytes of the revlog data |
|
179 | /// When index and data are not interleaved: bytes of the revlog data | |
162 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, |
|
180 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, | |
163 | /// When present on disk: the persistent nodemap for this revlog |
|
181 | /// When present on disk: the persistent nodemap for this revlog | |
164 | nodemap: Option<nodemap::NodeTree>, |
|
182 | nodemap: Option<nodemap::NodeTree>, | |
165 | } |
|
183 | } | |
166 |
|
184 | |||
167 | impl Revlog { |
|
185 | impl Revlog { | |
168 | /// Open a revlog index file. |
|
186 | /// Open a revlog index file. | |
169 | /// |
|
187 | /// | |
170 | /// It will also open the associated data file if index and data are not |
|
188 | /// It will also open the associated data file if index and data are not | |
171 | /// interleaved. |
|
189 | /// interleaved. | |
172 | pub fn open( |
|
190 | pub fn open( | |
173 | store_vfs: &Vfs, |
|
191 | store_vfs: &Vfs, | |
174 | index_path: impl AsRef<Path>, |
|
192 | index_path: impl AsRef<Path>, | |
175 | data_path: Option<&Path>, |
|
193 | data_path: Option<&Path>, | |
176 | use_nodemap: bool, |
|
194 | use_nodemap: bool, | |
177 | ) -> Result<Self, HgError> { |
|
195 | ) -> Result<Self, HgError> { | |
178 | let index_path = index_path.as_ref(); |
|
196 | let index_path = index_path.as_ref(); | |
179 | let index = { |
|
197 | let index = { | |
180 | match store_vfs.mmap_open_opt(&index_path)? { |
|
198 | match store_vfs.mmap_open_opt(&index_path)? { | |
181 | None => Index::new(Box::new(vec![])), |
|
199 | None => Index::new(Box::new(vec![])), | |
182 | Some(index_mmap) => { |
|
200 | Some(index_mmap) => { | |
183 | let index = Index::new(Box::new(index_mmap))?; |
|
201 | let index = Index::new(Box::new(index_mmap))?; | |
184 | Ok(index) |
|
202 | Ok(index) | |
185 | } |
|
203 | } | |
186 | } |
|
204 | } | |
187 | }?; |
|
205 | }?; | |
188 |
|
206 | |||
189 | let default_data_path = index_path.with_extension("d"); |
|
207 | let default_data_path = index_path.with_extension("d"); | |
190 |
|
208 | |||
191 | // type annotation required |
|
209 | // type annotation required | |
192 | // won't recognize Mmap as Deref<Target = [u8]> |
|
210 | // won't recognize Mmap as Deref<Target = [u8]> | |
193 | let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = |
|
211 | let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = | |
194 | if index.is_inline() { |
|
212 | if index.is_inline() { | |
195 | None |
|
213 | None | |
196 | } else { |
|
214 | } else { | |
197 | let data_path = data_path.unwrap_or(&default_data_path); |
|
215 | let data_path = data_path.unwrap_or(&default_data_path); | |
198 | let data_mmap = store_vfs.mmap_open(data_path)?; |
|
216 | let data_mmap = store_vfs.mmap_open(data_path)?; | |
199 | Some(Box::new(data_mmap)) |
|
217 | Some(Box::new(data_mmap)) | |
200 | }; |
|
218 | }; | |
201 |
|
219 | |||
202 | let nodemap = if index.is_inline() || !use_nodemap { |
|
220 | let nodemap = if index.is_inline() || !use_nodemap { | |
203 | None |
|
221 | None | |
204 | } else { |
|
222 | } else { | |
205 | NodeMapDocket::read_from_file(store_vfs, index_path)?.map( |
|
223 | NodeMapDocket::read_from_file(store_vfs, index_path)?.map( | |
206 | |(docket, data)| { |
|
224 | |(docket, data)| { | |
207 | nodemap::NodeTree::load_bytes( |
|
225 | nodemap::NodeTree::load_bytes( | |
208 | Box::new(data), |
|
226 | Box::new(data), | |
209 | docket.data_length, |
|
227 | docket.data_length, | |
210 | ) |
|
228 | ) | |
211 | }, |
|
229 | }, | |
212 | ) |
|
230 | ) | |
213 | }; |
|
231 | }; | |
214 |
|
232 | |||
215 | Ok(Revlog { |
|
233 | Ok(Revlog { | |
216 | index, |
|
234 | index, | |
217 | data_bytes, |
|
235 | data_bytes, | |
218 | nodemap, |
|
236 | nodemap, | |
219 | }) |
|
237 | }) | |
220 | } |
|
238 | } | |
221 |
|
239 | |||
222 | /// Return number of entries of the `Revlog`. |
|
240 | /// Return number of entries of the `Revlog`. | |
223 | pub fn len(&self) -> usize { |
|
241 | pub fn len(&self) -> usize { | |
224 | self.index.len() |
|
242 | self.index.len() | |
225 | } |
|
243 | } | |
226 |
|
244 | |||
227 | /// Returns `true` if the `Revlog` has zero `entries`. |
|
245 | /// Returns `true` if the `Revlog` has zero `entries`. | |
228 | pub fn is_empty(&self) -> bool { |
|
246 | pub fn is_empty(&self) -> bool { | |
229 | self.index.is_empty() |
|
247 | self.index.is_empty() | |
230 | } |
|
248 | } | |
231 |
|
249 | |||
232 | /// Returns the node ID for the given revision number, if it exists in this |
|
250 | /// Returns the node ID for the given revision number, if it exists in this | |
233 | /// revlog |
|
251 | /// revlog | |
234 | pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> { |
|
252 | pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { | |
235 | if rev == NULL_REVISION { |
|
253 | if rev == NULL_REVISION.into() { | |
236 | return Some(&NULL_NODE); |
|
254 | return Some(&NULL_NODE); | |
237 | } |
|
255 | } | |
|
256 | let rev = self.index.check_revision(rev)?; | |||
238 | Some(self.index.get_entry(rev)?.hash()) |
|
257 | Some(self.index.get_entry(rev)?.hash()) | |
239 | } |
|
258 | } | |
240 |
|
259 | |||
241 | /// Return the revision number for the given node ID, if it exists in this |
|
260 | /// Return the revision number for the given node ID, if it exists in this | |
242 | /// revlog |
|
261 | /// revlog | |
243 | pub fn rev_from_node( |
|
262 | pub fn rev_from_node( | |
244 | &self, |
|
263 | &self, | |
245 | node: NodePrefix, |
|
264 | node: NodePrefix, | |
246 | ) -> Result<Revision, RevlogError> { |
|
265 | ) -> Result<Revision, RevlogError> { | |
247 | let looked_up = if let Some(nodemap) = &self.nodemap { |
|
266 | let looked_up = if let Some(nodemap) = &self.nodemap { | |
248 | nodemap |
|
267 | nodemap | |
249 | .find_bin(&self.index, node)? |
|
268 | .find_bin(&self.index, node)? | |
250 | .ok_or(RevlogError::InvalidRevision) |
|
269 | .ok_or(RevlogError::InvalidRevision) | |
251 | } else { |
|
270 | } else { | |
252 | self.rev_from_node_no_persistent_nodemap(node) |
|
271 | self.rev_from_node_no_persistent_nodemap(node) | |
253 | }; |
|
272 | }; | |
254 |
|
273 | |||
255 | if node.is_prefix_of(&NULL_NODE) { |
|
274 | if node.is_prefix_of(&NULL_NODE) { | |
256 | return match looked_up { |
|
275 | return match looked_up { | |
257 | Ok(_) => Err(RevlogError::AmbiguousPrefix), |
|
276 | Ok(_) => Err(RevlogError::AmbiguousPrefix), | |
258 | Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION), |
|
277 | Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION), | |
259 | res => res, |
|
278 | res => res, | |
260 | }; |
|
279 | }; | |
261 | }; |
|
280 | }; | |
262 |
|
281 | |||
263 | looked_up |
|
282 | looked_up | |
264 | } |
|
283 | } | |
265 |
|
284 | |||
266 | /// Same as `rev_from_node`, without using a persistent nodemap |
|
285 | /// Same as `rev_from_node`, without using a persistent nodemap | |
267 | /// |
|
286 | /// | |
268 | /// This is used as fallback when a persistent nodemap is not present. |
|
287 | /// This is used as fallback when a persistent nodemap is not present. | |
269 | /// This happens when the persistent-nodemap experimental feature is not |
|
288 | /// This happens when the persistent-nodemap experimental feature is not | |
270 | /// enabled, or for small revlogs. |
|
289 | /// enabled, or for small revlogs. | |
271 | fn rev_from_node_no_persistent_nodemap( |
|
290 | fn rev_from_node_no_persistent_nodemap( | |
272 | &self, |
|
291 | &self, | |
273 | node: NodePrefix, |
|
292 | node: NodePrefix, | |
274 | ) -> Result<Revision, RevlogError> { |
|
293 | ) -> Result<Revision, RevlogError> { | |
275 | // Linear scan of the revlog |
|
294 | // Linear scan of the revlog | |
276 | // TODO: consider building a non-persistent nodemap in memory to |
|
295 | // TODO: consider building a non-persistent nodemap in memory to | |
277 | // optimize these cases. |
|
296 | // optimize these cases. | |
278 | let mut found_by_prefix = None; |
|
297 | let mut found_by_prefix = None; | |
279 | for rev in (0..self.len() as Revision).rev() { |
|
298 | for rev in (0..self.len() as Revision).rev() { | |
280 | let index_entry = self.index.get_entry(rev).ok_or_else(|| { |
|
299 | let index_entry = self.index.get_entry(rev).ok_or_else(|| { | |
281 | HgError::corrupted( |
|
300 | HgError::corrupted( | |
282 | "revlog references a revision not in the index", |
|
301 | "revlog references a revision not in the index", | |
283 | ) |
|
302 | ) | |
284 | })?; |
|
303 | })?; | |
285 | if node == *index_entry.hash() { |
|
304 | if node == *index_entry.hash() { | |
286 | return Ok(rev); |
|
305 | return Ok(rev); | |
287 | } |
|
306 | } | |
288 | if node.is_prefix_of(index_entry.hash()) { |
|
307 | if node.is_prefix_of(index_entry.hash()) { | |
289 | if found_by_prefix.is_some() { |
|
308 | if found_by_prefix.is_some() { | |
290 | return Err(RevlogError::AmbiguousPrefix); |
|
309 | return Err(RevlogError::AmbiguousPrefix); | |
291 | } |
|
310 | } | |
292 | found_by_prefix = Some(rev) |
|
311 | found_by_prefix = Some(rev) | |
293 | } |
|
312 | } | |
294 | } |
|
313 | } | |
295 | found_by_prefix.ok_or(RevlogError::InvalidRevision) |
|
314 | found_by_prefix.ok_or(RevlogError::InvalidRevision) | |
296 | } |
|
315 | } | |
297 |
|
316 | |||
298 | /// Returns whether the given revision exists in this revlog. |
|
317 | /// Returns whether the given revision exists in this revlog. | |
299 | pub fn has_rev(&self, rev: Revision) -> bool { |
|
318 | pub fn has_rev(&self, rev: UncheckedRevision) -> bool { | |
300 |
self.index. |
|
319 | self.index.check_revision(rev).is_some() | |
301 | } |
|
320 | } | |
302 |
|
321 | |||
303 | /// Return the full data associated to a revision. |
|
322 | /// Return the full data associated to a revision. | |
304 | /// |
|
323 | /// | |
305 | /// All entries required to build the final data out of deltas will be |
|
324 | /// All entries required to build the final data out of deltas will be | |
306 | /// retrieved as needed, and the deltas will be applied to the inital |
|
325 | /// retrieved as needed, and the deltas will be applied to the inital | |
307 | /// snapshot to rebuild the final data. |
|
326 | /// snapshot to rebuild the final data. | |
308 | pub fn get_rev_data( |
|
327 | pub fn get_rev_data( | |
309 | &self, |
|
328 | &self, | |
|
329 | rev: UncheckedRevision, | |||
|
330 | ) -> Result<Cow<[u8]>, RevlogError> { | |||
|
331 | if rev == NULL_REVISION.into() { | |||
|
332 | return Ok(Cow::Borrowed(&[])); | |||
|
333 | }; | |||
|
334 | self.get_entry(rev)?.data() | |||
|
335 | } | |||
|
336 | ||||
|
337 | /// [`Self::get_rev_data`] for checked revisions. | |||
|
338 | pub fn get_rev_data_for_checked_rev( | |||
|
339 | &self, | |||
310 | rev: Revision, |
|
340 | rev: Revision, | |
311 | ) -> Result<Cow<[u8]>, RevlogError> { |
|
341 | ) -> Result<Cow<[u8]>, RevlogError> { | |
312 | if rev == NULL_REVISION { |
|
342 | if rev == NULL_REVISION { | |
313 | return Ok(Cow::Borrowed(&[])); |
|
343 | return Ok(Cow::Borrowed(&[])); | |
314 | }; |
|
344 | }; | |
315 |
|
|
345 | self.get_entry_for_checked_rev(rev)?.data() | |
316 | } |
|
346 | } | |
317 |
|
347 | |||
318 | /// Check the hash of some given data against the recorded hash. |
|
348 | /// Check the hash of some given data against the recorded hash. | |
319 | pub fn check_hash( |
|
349 | pub fn check_hash( | |
320 | &self, |
|
350 | &self, | |
321 | p1: Revision, |
|
351 | p1: Revision, | |
322 | p2: Revision, |
|
352 | p2: Revision, | |
323 | expected: &[u8], |
|
353 | expected: &[u8], | |
324 | data: &[u8], |
|
354 | data: &[u8], | |
325 | ) -> bool { |
|
355 | ) -> bool { | |
326 | let e1 = self.index.get_entry(p1); |
|
356 | let e1 = self.index.get_entry(p1); | |
327 | let h1 = match e1 { |
|
357 | let h1 = match e1 { | |
328 | Some(ref entry) => entry.hash(), |
|
358 | Some(ref entry) => entry.hash(), | |
329 | None => &NULL_NODE, |
|
359 | None => &NULL_NODE, | |
330 | }; |
|
360 | }; | |
331 | let e2 = self.index.get_entry(p2); |
|
361 | let e2 = self.index.get_entry(p2); | |
332 | let h2 = match e2 { |
|
362 | let h2 = match e2 { | |
333 | Some(ref entry) => entry.hash(), |
|
363 | Some(ref entry) => entry.hash(), | |
334 | None => &NULL_NODE, |
|
364 | None => &NULL_NODE, | |
335 | }; |
|
365 | }; | |
336 |
|
366 | |||
337 | hash(data, h1.as_bytes(), h2.as_bytes()) == expected |
|
367 | hash(data, h1.as_bytes(), h2.as_bytes()) == expected | |
338 | } |
|
368 | } | |
339 |
|
369 | |||
340 | /// Build the full data of a revision out its snapshot |
|
370 | /// Build the full data of a revision out its snapshot | |
341 | /// and its deltas. |
|
371 | /// and its deltas. | |
342 | fn build_data_from_deltas( |
|
372 | fn build_data_from_deltas( | |
343 | snapshot: RevlogEntry, |
|
373 | snapshot: RevlogEntry, | |
344 | deltas: &[RevlogEntry], |
|
374 | deltas: &[RevlogEntry], | |
345 | ) -> Result<Vec<u8>, HgError> { |
|
375 | ) -> Result<Vec<u8>, HgError> { | |
346 | let snapshot = snapshot.data_chunk()?; |
|
376 | let snapshot = snapshot.data_chunk()?; | |
347 | let deltas = deltas |
|
377 | let deltas = deltas | |
348 | .iter() |
|
378 | .iter() | |
349 | .rev() |
|
379 | .rev() | |
350 | .map(RevlogEntry::data_chunk) |
|
380 | .map(RevlogEntry::data_chunk) | |
351 | .collect::<Result<Vec<_>, _>>()?; |
|
381 | .collect::<Result<Vec<_>, _>>()?; | |
352 | let patches: Vec<_> = |
|
382 | let patches: Vec<_> = | |
353 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); |
|
383 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); | |
354 | let patch = patch::fold_patch_lists(&patches); |
|
384 | let patch = patch::fold_patch_lists(&patches); | |
355 | Ok(patch.apply(&snapshot)) |
|
385 | Ok(patch.apply(&snapshot)) | |
356 | } |
|
386 | } | |
357 |
|
387 | |||
358 | /// Return the revlog data. |
|
388 | /// Return the revlog data. | |
359 | fn data(&self) -> &[u8] { |
|
389 | fn data(&self) -> &[u8] { | |
360 | match &self.data_bytes { |
|
390 | match &self.data_bytes { | |
361 | Some(data_bytes) => data_bytes, |
|
391 | Some(data_bytes) => data_bytes, | |
362 | None => panic!( |
|
392 | None => panic!( | |
363 | "forgot to load the data or trying to access inline data" |
|
393 | "forgot to load the data or trying to access inline data" | |
364 | ), |
|
394 | ), | |
365 | } |
|
395 | } | |
366 | } |
|
396 | } | |
367 |
|
397 | |||
368 | pub fn make_null_entry(&self) -> RevlogEntry { |
|
398 | pub fn make_null_entry(&self) -> RevlogEntry { | |
369 | RevlogEntry { |
|
399 | RevlogEntry { | |
370 | revlog: self, |
|
400 | revlog: self, | |
371 | rev: NULL_REVISION, |
|
401 | rev: NULL_REVISION, | |
372 | bytes: b"", |
|
402 | bytes: b"", | |
373 | compressed_len: 0, |
|
403 | compressed_len: 0, | |
374 | uncompressed_len: 0, |
|
404 | uncompressed_len: 0, | |
375 | base_rev_or_base_of_delta_chain: None, |
|
405 | base_rev_or_base_of_delta_chain: None, | |
376 | p1: NULL_REVISION, |
|
406 | p1: NULL_REVISION, | |
377 | p2: NULL_REVISION, |
|
407 | p2: NULL_REVISION, | |
378 | flags: NULL_REVLOG_ENTRY_FLAGS, |
|
408 | flags: NULL_REVLOG_ENTRY_FLAGS, | |
379 | hash: NULL_NODE, |
|
409 | hash: NULL_NODE, | |
380 | } |
|
410 | } | |
381 | } |
|
411 | } | |
382 |
|
412 | |||
383 | /// Get an entry of the revlog. |
|
413 | fn get_entry_for_checked_rev( | |
384 | pub fn get_entry( |
|
|||
385 | &self, |
|
414 | &self, | |
386 | rev: Revision, |
|
415 | rev: Revision, | |
387 | ) -> Result<RevlogEntry, RevlogError> { |
|
416 | ) -> Result<RevlogEntry, RevlogError> { | |
388 | if rev == NULL_REVISION { |
|
417 | if rev == NULL_REVISION { | |
389 | return Ok(self.make_null_entry()); |
|
418 | return Ok(self.make_null_entry()); | |
390 | } |
|
419 | } | |
391 | let index_entry = self |
|
420 | let index_entry = self | |
392 | .index |
|
421 | .index | |
393 | .get_entry(rev) |
|
422 | .get_entry(rev) | |
394 | .ok_or(RevlogError::InvalidRevision)?; |
|
423 | .ok_or(RevlogError::InvalidRevision)?; | |
395 | let start = index_entry.offset(); |
|
424 | let start = index_entry.offset(); | |
396 | let end = start + index_entry.compressed_len() as usize; |
|
425 | let end = start + index_entry.compressed_len() as usize; | |
397 | let data = if self.index.is_inline() { |
|
426 | let data = if self.index.is_inline() { | |
398 | self.index.data(start, end) |
|
427 | self.index.data(start, end) | |
399 | } else { |
|
428 | } else { | |
400 | &self.data()[start..end] |
|
429 | &self.data()[start..end] | |
401 | }; |
|
430 | }; | |
|
431 | let base_rev = self | |||
|
432 | .index | |||
|
433 | .check_revision(index_entry.base_revision_or_base_of_delta_chain()) | |||
|
434 | .ok_or_else(|| { | |||
|
435 | RevlogError::corrupted(format!( | |||
|
436 | "base revision for rev {} is invalid", | |||
|
437 | rev | |||
|
438 | )) | |||
|
439 | })?; | |||
|
440 | let p1 = | |||
|
441 | self.index.check_revision(index_entry.p1()).ok_or_else(|| { | |||
|
442 | RevlogError::corrupted(format!( | |||
|
443 | "p1 for rev {} is invalid", | |||
|
444 | rev | |||
|
445 | )) | |||
|
446 | })?; | |||
|
447 | let p2 = | |||
|
448 | self.index.check_revision(index_entry.p2()).ok_or_else(|| { | |||
|
449 | RevlogError::corrupted(format!( | |||
|
450 | "p2 for rev {} is invalid", | |||
|
451 | rev | |||
|
452 | )) | |||
|
453 | })?; | |||
402 | let entry = RevlogEntry { |
|
454 | let entry = RevlogEntry { | |
403 | revlog: self, |
|
455 | revlog: self, | |
404 | rev, |
|
456 | rev, | |
405 | bytes: data, |
|
457 | bytes: data, | |
406 | compressed_len: index_entry.compressed_len(), |
|
458 | compressed_len: index_entry.compressed_len(), | |
407 | uncompressed_len: index_entry.uncompressed_len(), |
|
459 | uncompressed_len: index_entry.uncompressed_len(), | |
408 |
base_rev_or_base_of_delta_chain: if |
|
460 | base_rev_or_base_of_delta_chain: if base_rev == rev { | |
409 | .base_revision_or_base_of_delta_chain() |
|
|||
410 | == rev |
|
|||
411 | { |
|
|||
412 | None |
|
461 | None | |
413 | } else { |
|
462 | } else { | |
414 | Some(index_entry.base_revision_or_base_of_delta_chain()) |
|
463 | Some(base_rev) | |
415 | }, |
|
464 | }, | |
416 |
p1 |
|
465 | p1, | |
417 |
p2 |
|
466 | p2, | |
418 | flags: index_entry.flags(), |
|
467 | flags: index_entry.flags(), | |
419 | hash: *index_entry.hash(), |
|
468 | hash: *index_entry.hash(), | |
420 | }; |
|
469 | }; | |
421 | Ok(entry) |
|
470 | Ok(entry) | |
422 | } |
|
471 | } | |
423 |
|
472 | |||
424 | /// when resolving internal references within revlog, any errors |
|
473 | /// Get an entry of the revlog. | |
425 | /// should be reported as corruption, instead of e.g. "invalid revision" |
|
474 | pub fn get_entry( | |
426 | fn get_entry_internal( |
|
|||
427 | &self, |
|
475 | &self, | |
428 | rev: Revision, |
|
476 | rev: UncheckedRevision, | |
429 |
) -> Result<RevlogEntry, |
|
477 | ) -> Result<RevlogEntry, RevlogError> { | |
430 | self.get_entry(rev) |
|
478 | if rev == NULL_REVISION.into() { | |
431 | .map_err(|_| corrupted(format!("revision {} out of range", rev))) |
|
479 | return Ok(self.make_null_entry()); | |
|
480 | } | |||
|
481 | let rev = self.index.check_revision(rev).ok_or_else(|| { | |||
|
482 | RevlogError::corrupted(format!("rev {} is invalid", rev)) | |||
|
483 | })?; | |||
|
484 | self.get_entry_for_checked_rev(rev) | |||
432 | } |
|
485 | } | |
433 | } |
|
486 | } | |
434 |
|
487 | |||
435 | /// The revlog entry's bytes and the necessary informations to extract |
|
488 | /// The revlog entry's bytes and the necessary informations to extract | |
436 | /// the entry's data. |
|
489 | /// the entry's data. | |
437 | #[derive(Clone)] |
|
490 | #[derive(Clone)] | |
438 | pub struct RevlogEntry<'revlog> { |
|
491 | pub struct RevlogEntry<'revlog> { | |
439 | revlog: &'revlog Revlog, |
|
492 | revlog: &'revlog Revlog, | |
440 | rev: Revision, |
|
493 | rev: Revision, | |
441 | bytes: &'revlog [u8], |
|
494 | bytes: &'revlog [u8], | |
442 | compressed_len: u32, |
|
495 | compressed_len: u32, | |
443 | uncompressed_len: i32, |
|
496 | uncompressed_len: i32, | |
444 | base_rev_or_base_of_delta_chain: Option<Revision>, |
|
497 | base_rev_or_base_of_delta_chain: Option<Revision>, | |
445 | p1: Revision, |
|
498 | p1: Revision, | |
446 | p2: Revision, |
|
499 | p2: Revision, | |
447 | flags: u16, |
|
500 | flags: u16, | |
448 | hash: Node, |
|
501 | hash: Node, | |
449 | } |
|
502 | } | |
450 |
|
503 | |||
451 | thread_local! { |
|
504 | thread_local! { | |
452 | // seems fine to [unwrap] here: this can only fail due to memory allocation |
|
505 | // seems fine to [unwrap] here: this can only fail due to memory allocation | |
453 | // failing, and it's normal for that to cause panic. |
|
506 | // failing, and it's normal for that to cause panic. | |
454 | static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> = |
|
507 | static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> = | |
455 | RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap()); |
|
508 | RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap()); | |
456 | } |
|
509 | } | |
457 |
|
510 | |||
458 | fn zstd_decompress_to_buffer( |
|
511 | fn zstd_decompress_to_buffer( | |
459 | bytes: &[u8], |
|
512 | bytes: &[u8], | |
460 | buf: &mut Vec<u8>, |
|
513 | buf: &mut Vec<u8>, | |
461 | ) -> Result<usize, std::io::Error> { |
|
514 | ) -> Result<usize, std::io::Error> { | |
462 | ZSTD_DECODER |
|
515 | ZSTD_DECODER | |
463 | .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf)) |
|
516 | .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf)) | |
464 | } |
|
517 | } | |
465 |
|
518 | |||
466 | impl<'revlog> RevlogEntry<'revlog> { |
|
519 | impl<'revlog> RevlogEntry<'revlog> { | |
467 | pub fn revision(&self) -> Revision { |
|
520 | pub fn revision(&self) -> Revision { | |
468 | self.rev |
|
521 | self.rev | |
469 | } |
|
522 | } | |
470 |
|
523 | |||
471 | pub fn node(&self) -> &Node { |
|
524 | pub fn node(&self) -> &Node { | |
472 | &self.hash |
|
525 | &self.hash | |
473 | } |
|
526 | } | |
474 |
|
527 | |||
475 | pub fn uncompressed_len(&self) -> Option<u32> { |
|
528 | pub fn uncompressed_len(&self) -> Option<u32> { | |
476 | u32::try_from(self.uncompressed_len).ok() |
|
529 | u32::try_from(self.uncompressed_len).ok() | |
477 | } |
|
530 | } | |
478 |
|
531 | |||
479 | pub fn has_p1(&self) -> bool { |
|
532 | pub fn has_p1(&self) -> bool { | |
480 | self.p1 != NULL_REVISION |
|
533 | self.p1 != NULL_REVISION | |
481 | } |
|
534 | } | |
482 |
|
535 | |||
483 | pub fn p1_entry( |
|
536 | pub fn p1_entry( | |
484 | &self, |
|
537 | &self, | |
485 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { |
|
538 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { | |
486 | if self.p1 == NULL_REVISION { |
|
539 | if self.p1 == NULL_REVISION { | |
487 | Ok(None) |
|
540 | Ok(None) | |
488 | } else { |
|
541 | } else { | |
489 | Ok(Some(self.revlog.get_entry(self.p1)?)) |
|
542 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?)) | |
490 | } |
|
543 | } | |
491 | } |
|
544 | } | |
492 |
|
545 | |||
493 | pub fn p2_entry( |
|
546 | pub fn p2_entry( | |
494 | &self, |
|
547 | &self, | |
495 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { |
|
548 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { | |
496 | if self.p2 == NULL_REVISION { |
|
549 | if self.p2 == NULL_REVISION { | |
497 | Ok(None) |
|
550 | Ok(None) | |
498 | } else { |
|
551 | } else { | |
499 | Ok(Some(self.revlog.get_entry(self.p2)?)) |
|
552 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?)) | |
500 | } |
|
553 | } | |
501 | } |
|
554 | } | |
502 |
|
555 | |||
503 | pub fn p1(&self) -> Option<Revision> { |
|
556 | pub fn p1(&self) -> Option<Revision> { | |
504 | if self.p1 == NULL_REVISION { |
|
557 | if self.p1 == NULL_REVISION { | |
505 | None |
|
558 | None | |
506 | } else { |
|
559 | } else { | |
507 | Some(self.p1) |
|
560 | Some(self.p1) | |
508 | } |
|
561 | } | |
509 | } |
|
562 | } | |
510 |
|
563 | |||
511 | pub fn p2(&self) -> Option<Revision> { |
|
564 | pub fn p2(&self) -> Option<Revision> { | |
512 | if self.p2 == NULL_REVISION { |
|
565 | if self.p2 == NULL_REVISION { | |
513 | None |
|
566 | None | |
514 | } else { |
|
567 | } else { | |
515 | Some(self.p2) |
|
568 | Some(self.p2) | |
516 | } |
|
569 | } | |
517 | } |
|
570 | } | |
518 |
|
571 | |||
519 | pub fn is_censored(&self) -> bool { |
|
572 | pub fn is_censored(&self) -> bool { | |
520 | (self.flags & REVISION_FLAG_CENSORED) != 0 |
|
573 | (self.flags & REVISION_FLAG_CENSORED) != 0 | |
521 | } |
|
574 | } | |
522 |
|
575 | |||
523 | pub fn has_length_affecting_flag_processor(&self) -> bool { |
|
576 | pub fn has_length_affecting_flag_processor(&self) -> bool { | |
524 | // Relevant Python code: revlog.size() |
|
577 | // Relevant Python code: revlog.size() | |
525 | // note: ELLIPSIS is known to not change the content |
|
578 | // note: ELLIPSIS is known to not change the content | |
526 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 |
|
579 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 | |
527 | } |
|
580 | } | |
528 |
|
581 | |||
529 | /// The data for this entry, after resolving deltas if any. |
|
582 | /// The data for this entry, after resolving deltas if any. | |
530 |
pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, |
|
583 | pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
531 | let mut entry = self.clone(); |
|
584 | let mut entry = self.clone(); | |
532 | let mut delta_chain = vec![]; |
|
585 | let mut delta_chain = vec![]; | |
533 |
|
586 | |||
534 | // The meaning of `base_rev_or_base_of_delta_chain` depends on |
|
587 | // The meaning of `base_rev_or_base_of_delta_chain` depends on | |
535 | // generaldelta. See the doc on `ENTRY_DELTA_BASE` in |
|
588 | // generaldelta. See the doc on `ENTRY_DELTA_BASE` in | |
536 | // `mercurial/revlogutils/constants.py` and the code in |
|
589 | // `mercurial/revlogutils/constants.py` and the code in | |
537 | // [_chaininfo] and in [index_deltachain]. |
|
590 | // [_chaininfo] and in [index_deltachain]. | |
538 | let uses_generaldelta = self.revlog.index.uses_generaldelta(); |
|
591 | let uses_generaldelta = self.revlog.index.uses_generaldelta(); | |
539 | while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { |
|
592 | while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { | |
540 | entry = if uses_generaldelta { |
|
593 | entry = if uses_generaldelta { | |
541 | delta_chain.push(entry); |
|
594 | delta_chain.push(entry); | |
542 |
self.revlog.get_entry_ |
|
595 | self.revlog.get_entry_for_checked_rev(base_rev)? | |
543 | } else { |
|
596 | } else { | |
544 | let base_rev = entry.rev - 1; |
|
597 | let base_rev = UncheckedRevision(entry.rev - 1); | |
545 | delta_chain.push(entry); |
|
598 | delta_chain.push(entry); | |
546 |
self.revlog.get_entry |
|
599 | self.revlog.get_entry(base_rev)? | |
547 | }; |
|
600 | }; | |
548 | } |
|
601 | } | |
549 |
|
602 | |||
550 | let data = if delta_chain.is_empty() { |
|
603 | let data = if delta_chain.is_empty() { | |
551 | entry.data_chunk()? |
|
604 | entry.data_chunk()? | |
552 | } else { |
|
605 | } else { | |
553 | Revlog::build_data_from_deltas(entry, &delta_chain)?.into() |
|
606 | Revlog::build_data_from_deltas(entry, &delta_chain)?.into() | |
554 | }; |
|
607 | }; | |
555 |
|
608 | |||
556 | Ok(data) |
|
609 | Ok(data) | |
557 | } |
|
610 | } | |
558 |
|
611 | |||
559 | fn check_data( |
|
612 | fn check_data( | |
560 | &self, |
|
613 | &self, | |
561 | data: Cow<'revlog, [u8]>, |
|
614 | data: Cow<'revlog, [u8]>, | |
562 |
) -> Result<Cow<'revlog, [u8]>, |
|
615 | ) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
563 | if self.revlog.check_hash( |
|
616 | if self.revlog.check_hash( | |
564 | self.p1, |
|
617 | self.p1, | |
565 | self.p2, |
|
618 | self.p2, | |
566 | self.hash.as_bytes(), |
|
619 | self.hash.as_bytes(), | |
567 | &data, |
|
620 | &data, | |
568 | ) { |
|
621 | ) { | |
569 | Ok(data) |
|
622 | Ok(data) | |
570 | } else { |
|
623 | } else { | |
571 | if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 { |
|
624 | if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 { | |
572 | return Err(HgError::unsupported( |
|
625 | return Err(HgError::unsupported( | |
573 | "ellipsis revisions are not supported by rhg", |
|
626 | "ellipsis revisions are not supported by rhg", | |
574 |
) |
|
627 | ) | |
|
628 | .into()); | |||
575 | } |
|
629 | } | |
576 | Err(corrupted(format!( |
|
630 | Err(corrupted(format!( | |
577 | "hash check failed for revision {}", |
|
631 | "hash check failed for revision {}", | |
578 | self.rev |
|
632 | self.rev | |
579 |
)) |
|
633 | )) | |
|
634 | .into()) | |||
580 | } |
|
635 | } | |
581 | } |
|
636 | } | |
582 |
|
637 | |||
583 |
pub fn data(&self) -> Result<Cow<'revlog, [u8]>, |
|
638 | pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
584 | let data = self.rawdata()?; |
|
639 | let data = self.rawdata()?; | |
585 | if self.rev == NULL_REVISION { |
|
640 | if self.rev == NULL_REVISION { | |
586 | return Ok(data); |
|
641 | return Ok(data); | |
587 | } |
|
642 | } | |
588 | if self.is_censored() { |
|
643 | if self.is_censored() { | |
589 | return Err(HgError::CensoredNodeError); |
|
644 | return Err(HgError::CensoredNodeError.into()); | |
590 | } |
|
645 | } | |
591 | self.check_data(data) |
|
646 | self.check_data(data) | |
592 | } |
|
647 | } | |
593 |
|
648 | |||
594 | /// Extract the data contained in the entry. |
|
649 | /// Extract the data contained in the entry. | |
595 | /// This may be a delta. (See `is_delta`.) |
|
650 | /// This may be a delta. (See `is_delta`.) | |
596 | fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> { |
|
651 | fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> { | |
597 | if self.bytes.is_empty() { |
|
652 | if self.bytes.is_empty() { | |
598 | return Ok(Cow::Borrowed(&[])); |
|
653 | return Ok(Cow::Borrowed(&[])); | |
599 | } |
|
654 | } | |
600 | match self.bytes[0] { |
|
655 | match self.bytes[0] { | |
601 | // Revision data is the entirety of the entry, including this |
|
656 | // Revision data is the entirety of the entry, including this | |
602 | // header. |
|
657 | // header. | |
603 | b'\0' => Ok(Cow::Borrowed(self.bytes)), |
|
658 | b'\0' => Ok(Cow::Borrowed(self.bytes)), | |
604 | // Raw revision data follows. |
|
659 | // Raw revision data follows. | |
605 | b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), |
|
660 | b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), | |
606 | // zlib (RFC 1950) data. |
|
661 | // zlib (RFC 1950) data. | |
607 | b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), |
|
662 | b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), | |
608 | // zstd data. |
|
663 | // zstd data. | |
609 | b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), |
|
664 | b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), | |
610 | // A proper new format should have had a repo/store requirement. |
|
665 | // A proper new format should have had a repo/store requirement. | |
611 | format_type => Err(corrupted(format!( |
|
666 | format_type => Err(corrupted(format!( | |
612 | "unknown compression header '{}'", |
|
667 | "unknown compression header '{}'", | |
613 | format_type |
|
668 | format_type | |
614 | ))), |
|
669 | ))), | |
615 | } |
|
670 | } | |
616 | } |
|
671 | } | |
617 |
|
672 | |||
618 | fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> { |
|
673 | fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> { | |
619 | let mut decoder = ZlibDecoder::new(self.bytes); |
|
674 | let mut decoder = ZlibDecoder::new(self.bytes); | |
620 | if self.is_delta() { |
|
675 | if self.is_delta() { | |
621 | let mut buf = Vec::with_capacity(self.compressed_len as usize); |
|
676 | let mut buf = Vec::with_capacity(self.compressed_len as usize); | |
622 | decoder |
|
677 | decoder | |
623 | .read_to_end(&mut buf) |
|
678 | .read_to_end(&mut buf) | |
624 | .map_err(|e| corrupted(e.to_string()))?; |
|
679 | .map_err(|e| corrupted(e.to_string()))?; | |
625 | Ok(buf) |
|
680 | Ok(buf) | |
626 | } else { |
|
681 | } else { | |
627 | let cap = self.uncompressed_len.max(0) as usize; |
|
682 | let cap = self.uncompressed_len.max(0) as usize; | |
628 | let mut buf = vec![0; cap]; |
|
683 | let mut buf = vec![0; cap]; | |
629 | decoder |
|
684 | decoder | |
630 | .read_exact(&mut buf) |
|
685 | .read_exact(&mut buf) | |
631 | .map_err(|e| corrupted(e.to_string()))?; |
|
686 | .map_err(|e| corrupted(e.to_string()))?; | |
632 | Ok(buf) |
|
687 | Ok(buf) | |
633 | } |
|
688 | } | |
634 | } |
|
689 | } | |
635 |
|
690 | |||
636 | fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> { |
|
691 | fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> { | |
637 | let cap = self.uncompressed_len.max(0) as usize; |
|
692 | let cap = self.uncompressed_len.max(0) as usize; | |
638 | if self.is_delta() { |
|
693 | if self.is_delta() { | |
639 | // [cap] is usually an over-estimate of the space needed because |
|
694 | // [cap] is usually an over-estimate of the space needed because | |
640 | // it's the length of delta-decoded data, but we're interested |
|
695 | // it's the length of delta-decoded data, but we're interested | |
641 | // in the size of the delta. |
|
696 | // in the size of the delta. | |
642 | // This means we have to [shrink_to_fit] to avoid holding on |
|
697 | // This means we have to [shrink_to_fit] to avoid holding on | |
643 | // to a large chunk of memory, but it also means we must have a |
|
698 | // to a large chunk of memory, but it also means we must have a | |
644 | // fallback branch, for the case when the delta is longer than |
|
699 | // fallback branch, for the case when the delta is longer than | |
645 | // the original data (surprisingly, this does happen in practice) |
|
700 | // the original data (surprisingly, this does happen in practice) | |
646 | let mut buf = Vec::with_capacity(cap); |
|
701 | let mut buf = Vec::with_capacity(cap); | |
647 | match zstd_decompress_to_buffer(self.bytes, &mut buf) { |
|
702 | match zstd_decompress_to_buffer(self.bytes, &mut buf) { | |
648 | Ok(_) => buf.shrink_to_fit(), |
|
703 | Ok(_) => buf.shrink_to_fit(), | |
649 | Err(_) => { |
|
704 | Err(_) => { | |
650 | buf.clear(); |
|
705 | buf.clear(); | |
651 | zstd::stream::copy_decode(self.bytes, &mut buf) |
|
706 | zstd::stream::copy_decode(self.bytes, &mut buf) | |
652 | .map_err(|e| corrupted(e.to_string()))?; |
|
707 | .map_err(|e| corrupted(e.to_string()))?; | |
653 | } |
|
708 | } | |
654 | }; |
|
709 | }; | |
655 | Ok(buf) |
|
710 | Ok(buf) | |
656 | } else { |
|
711 | } else { | |
657 | let mut buf = Vec::with_capacity(cap); |
|
712 | let mut buf = Vec::with_capacity(cap); | |
658 | let len = zstd_decompress_to_buffer(self.bytes, &mut buf) |
|
713 | let len = zstd_decompress_to_buffer(self.bytes, &mut buf) | |
659 | .map_err(|e| corrupted(e.to_string()))?; |
|
714 | .map_err(|e| corrupted(e.to_string()))?; | |
660 | if len != self.uncompressed_len as usize { |
|
715 | if len != self.uncompressed_len as usize { | |
661 | Err(corrupted("uncompressed length does not match")) |
|
716 | Err(corrupted("uncompressed length does not match")) | |
662 | } else { |
|
717 | } else { | |
663 | Ok(buf) |
|
718 | Ok(buf) | |
664 | } |
|
719 | } | |
665 | } |
|
720 | } | |
666 | } |
|
721 | } | |
667 |
|
722 | |||
668 | /// Tell if the entry is a snapshot or a delta |
|
723 | /// Tell if the entry is a snapshot or a delta | |
669 | /// (influences on decompression). |
|
724 | /// (influences on decompression). | |
670 | fn is_delta(&self) -> bool { |
|
725 | fn is_delta(&self) -> bool { | |
671 | self.base_rev_or_base_of_delta_chain.is_some() |
|
726 | self.base_rev_or_base_of_delta_chain.is_some() | |
672 | } |
|
727 | } | |
673 | } |
|
728 | } | |
674 |
|
729 | |||
675 | /// Calculate the hash of a revision given its data and its parents. |
|
730 | /// Calculate the hash of a revision given its data and its parents. | |
676 | fn hash( |
|
731 | fn hash( | |
677 | data: &[u8], |
|
732 | data: &[u8], | |
678 | p1_hash: &[u8], |
|
733 | p1_hash: &[u8], | |
679 | p2_hash: &[u8], |
|
734 | p2_hash: &[u8], | |
680 | ) -> [u8; NODE_BYTES_LENGTH] { |
|
735 | ) -> [u8; NODE_BYTES_LENGTH] { | |
681 | let mut hasher = Sha1::new(); |
|
736 | let mut hasher = Sha1::new(); | |
682 | let (a, b) = (p1_hash, p2_hash); |
|
737 | let (a, b) = (p1_hash, p2_hash); | |
683 | if a > b { |
|
738 | if a > b { | |
684 | hasher.update(b); |
|
739 | hasher.update(b); | |
685 | hasher.update(a); |
|
740 | hasher.update(a); | |
686 | } else { |
|
741 | } else { | |
687 | hasher.update(a); |
|
742 | hasher.update(a); | |
688 | hasher.update(b); |
|
743 | hasher.update(b); | |
689 | } |
|
744 | } | |
690 | hasher.update(data); |
|
745 | hasher.update(data); | |
691 | *hasher.finalize().as_ref() |
|
746 | *hasher.finalize().as_ref() | |
692 | } |
|
747 | } | |
693 |
|
748 | |||
694 | #[cfg(test)] |
|
749 | #[cfg(test)] | |
695 | mod tests { |
|
750 | mod tests { | |
696 | use super::*; |
|
751 | use super::*; | |
697 | use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE}; |
|
752 | use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE}; | |
698 | use itertools::Itertools; |
|
753 | use itertools::Itertools; | |
699 |
|
754 | |||
700 | #[test] |
|
755 | #[test] | |
701 | fn test_empty() { |
|
756 | fn test_empty() { | |
702 | let temp = tempfile::tempdir().unwrap(); |
|
757 | let temp = tempfile::tempdir().unwrap(); | |
703 | let vfs = Vfs { base: temp.path() }; |
|
758 | let vfs = Vfs { base: temp.path() }; | |
704 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); |
|
759 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); | |
705 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
760 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); | |
706 | assert!(revlog.is_empty()); |
|
761 | assert!(revlog.is_empty()); | |
707 | assert_eq!(revlog.len(), 0); |
|
762 | assert_eq!(revlog.len(), 0); | |
708 | assert!(revlog.get_entry(0).is_err()); |
|
763 | assert!(revlog.get_entry(0.into()).is_err()); | |
709 | assert!(!revlog.has_rev(0)); |
|
764 | assert!(!revlog.has_rev(0.into())); | |
710 | assert_eq!( |
|
765 | assert_eq!( | |
711 | revlog.rev_from_node(NULL_NODE.into()).unwrap(), |
|
766 | revlog.rev_from_node(NULL_NODE.into()).unwrap(), | |
712 | NULL_REVISION |
|
767 | NULL_REVISION | |
713 | ); |
|
768 | ); | |
714 | let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap(); |
|
769 | let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap(); | |
715 | assert_eq!(null_entry.revision(), NULL_REVISION); |
|
770 | assert_eq!(null_entry.revision(), NULL_REVISION); | |
716 | assert!(null_entry.data().unwrap().is_empty()); |
|
771 | assert!(null_entry.data().unwrap().is_empty()); | |
717 | } |
|
772 | } | |
718 |
|
773 | |||
719 | #[test] |
|
774 | #[test] | |
720 | fn test_inline() { |
|
775 | fn test_inline() { | |
721 | let temp = tempfile::tempdir().unwrap(); |
|
776 | let temp = tempfile::tempdir().unwrap(); | |
722 | let vfs = Vfs { base: temp.path() }; |
|
777 | let vfs = Vfs { base: temp.path() }; | |
723 | let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd") |
|
778 | let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd") | |
724 | .unwrap(); |
|
779 | .unwrap(); | |
725 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") |
|
780 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") | |
726 | .unwrap(); |
|
781 | .unwrap(); | |
727 | let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582") |
|
782 | let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582") | |
728 | .unwrap(); |
|
783 | .unwrap(); | |
729 | let entry0_bytes = IndexEntryBuilder::new() |
|
784 | let entry0_bytes = IndexEntryBuilder::new() | |
730 | .is_first(true) |
|
785 | .is_first(true) | |
731 | .with_version(1) |
|
786 | .with_version(1) | |
732 | .with_inline(true) |
|
787 | .with_inline(true) | |
733 | .with_offset(INDEX_ENTRY_SIZE) |
|
788 | .with_offset(INDEX_ENTRY_SIZE) | |
734 | .with_node(node0) |
|
789 | .with_node(node0) | |
735 | .build(); |
|
790 | .build(); | |
736 | let entry1_bytes = IndexEntryBuilder::new() |
|
791 | let entry1_bytes = IndexEntryBuilder::new() | |
737 | .with_offset(INDEX_ENTRY_SIZE) |
|
792 | .with_offset(INDEX_ENTRY_SIZE) | |
738 | .with_node(node1) |
|
793 | .with_node(node1) | |
739 | .build(); |
|
794 | .build(); | |
740 | let entry2_bytes = IndexEntryBuilder::new() |
|
795 | let entry2_bytes = IndexEntryBuilder::new() | |
741 | .with_offset(INDEX_ENTRY_SIZE) |
|
796 | .with_offset(INDEX_ENTRY_SIZE) | |
742 | .with_p1(0) |
|
797 | .with_p1(0) | |
743 | .with_p2(1) |
|
798 | .with_p2(1) | |
744 | .with_node(node2) |
|
799 | .with_node(node2) | |
745 | .build(); |
|
800 | .build(); | |
746 | let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes] |
|
801 | let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes] | |
747 | .into_iter() |
|
802 | .into_iter() | |
748 | .flatten() |
|
803 | .flatten() | |
749 | .collect_vec(); |
|
804 | .collect_vec(); | |
750 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); |
|
805 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); | |
751 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
806 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); | |
752 |
|
807 | |||
753 | let entry0 = revlog.get_entry(0).ok().unwrap(); |
|
808 | let entry0 = revlog.get_entry(0.into()).ok().unwrap(); | |
754 | assert_eq!(entry0.revision(), 0); |
|
809 | assert_eq!(entry0.revision(), 0); | |
755 | assert_eq!(*entry0.node(), node0); |
|
810 | assert_eq!(*entry0.node(), node0); | |
756 | assert!(!entry0.has_p1()); |
|
811 | assert!(!entry0.has_p1()); | |
757 | assert_eq!(entry0.p1(), None); |
|
812 | assert_eq!(entry0.p1(), None); | |
758 | assert_eq!(entry0.p2(), None); |
|
813 | assert_eq!(entry0.p2(), None); | |
759 | let p1_entry = entry0.p1_entry().unwrap(); |
|
814 | let p1_entry = entry0.p1_entry().unwrap(); | |
760 | assert!(p1_entry.is_none()); |
|
815 | assert!(p1_entry.is_none()); | |
761 | let p2_entry = entry0.p2_entry().unwrap(); |
|
816 | let p2_entry = entry0.p2_entry().unwrap(); | |
762 | assert!(p2_entry.is_none()); |
|
817 | assert!(p2_entry.is_none()); | |
763 |
|
818 | |||
764 | let entry1 = revlog.get_entry(1).ok().unwrap(); |
|
819 | let entry1 = revlog.get_entry(1.into()).ok().unwrap(); | |
765 | assert_eq!(entry1.revision(), 1); |
|
820 | assert_eq!(entry1.revision(), 1); | |
766 | assert_eq!(*entry1.node(), node1); |
|
821 | assert_eq!(*entry1.node(), node1); | |
767 | assert!(!entry1.has_p1()); |
|
822 | assert!(!entry1.has_p1()); | |
768 | assert_eq!(entry1.p1(), None); |
|
823 | assert_eq!(entry1.p1(), None); | |
769 | assert_eq!(entry1.p2(), None); |
|
824 | assert_eq!(entry1.p2(), None); | |
770 | let p1_entry = entry1.p1_entry().unwrap(); |
|
825 | let p1_entry = entry1.p1_entry().unwrap(); | |
771 | assert!(p1_entry.is_none()); |
|
826 | assert!(p1_entry.is_none()); | |
772 | let p2_entry = entry1.p2_entry().unwrap(); |
|
827 | let p2_entry = entry1.p2_entry().unwrap(); | |
773 | assert!(p2_entry.is_none()); |
|
828 | assert!(p2_entry.is_none()); | |
774 |
|
829 | |||
775 | let entry2 = revlog.get_entry(2).ok().unwrap(); |
|
830 | let entry2 = revlog.get_entry(2.into()).ok().unwrap(); | |
776 | assert_eq!(entry2.revision(), 2); |
|
831 | assert_eq!(entry2.revision(), 2); | |
777 | assert_eq!(*entry2.node(), node2); |
|
832 | assert_eq!(*entry2.node(), node2); | |
778 | assert!(entry2.has_p1()); |
|
833 | assert!(entry2.has_p1()); | |
779 | assert_eq!(entry2.p1(), Some(0)); |
|
834 | assert_eq!(entry2.p1(), Some(0)); | |
780 | assert_eq!(entry2.p2(), Some(1)); |
|
835 | assert_eq!(entry2.p2(), Some(1)); | |
781 | let p1_entry = entry2.p1_entry().unwrap(); |
|
836 | let p1_entry = entry2.p1_entry().unwrap(); | |
782 | assert!(p1_entry.is_some()); |
|
837 | assert!(p1_entry.is_some()); | |
783 | assert_eq!(p1_entry.unwrap().revision(), 0); |
|
838 | assert_eq!(p1_entry.unwrap().revision(), 0); | |
784 | let p2_entry = entry2.p2_entry().unwrap(); |
|
839 | let p2_entry = entry2.p2_entry().unwrap(); | |
785 | assert!(p2_entry.is_some()); |
|
840 | assert!(p2_entry.is_some()); | |
786 | assert_eq!(p2_entry.unwrap().revision(), 1); |
|
841 | assert_eq!(p2_entry.unwrap().revision(), 1); | |
787 | } |
|
842 | } | |
788 |
|
843 | |||
789 | #[test] |
|
844 | #[test] | |
790 | fn test_nodemap() { |
|
845 | fn test_nodemap() { | |
791 | let temp = tempfile::tempdir().unwrap(); |
|
846 | let temp = tempfile::tempdir().unwrap(); | |
792 | let vfs = Vfs { base: temp.path() }; |
|
847 | let vfs = Vfs { base: temp.path() }; | |
793 |
|
848 | |||
794 | // building a revlog with a forced Node starting with zeros |
|
849 | // building a revlog with a forced Node starting with zeros | |
795 | // This is a corruption, but it does not preclude using the nodemap |
|
850 | // This is a corruption, but it does not preclude using the nodemap | |
796 | // if we don't try and access the data |
|
851 | // if we don't try and access the data | |
797 | let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd") |
|
852 | let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd") | |
798 | .unwrap(); |
|
853 | .unwrap(); | |
799 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") |
|
854 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") | |
800 | .unwrap(); |
|
855 | .unwrap(); | |
801 | let entry0_bytes = IndexEntryBuilder::new() |
|
856 | let entry0_bytes = IndexEntryBuilder::new() | |
802 | .is_first(true) |
|
857 | .is_first(true) | |
803 | .with_version(1) |
|
858 | .with_version(1) | |
804 | .with_inline(true) |
|
859 | .with_inline(true) | |
805 | .with_offset(INDEX_ENTRY_SIZE) |
|
860 | .with_offset(INDEX_ENTRY_SIZE) | |
806 | .with_node(node0) |
|
861 | .with_node(node0) | |
807 | .build(); |
|
862 | .build(); | |
808 | let entry1_bytes = IndexEntryBuilder::new() |
|
863 | let entry1_bytes = IndexEntryBuilder::new() | |
809 | .with_offset(INDEX_ENTRY_SIZE) |
|
864 | .with_offset(INDEX_ENTRY_SIZE) | |
810 | .with_node(node1) |
|
865 | .with_node(node1) | |
811 | .build(); |
|
866 | .build(); | |
812 | let contents = vec![entry0_bytes, entry1_bytes] |
|
867 | let contents = vec![entry0_bytes, entry1_bytes] | |
813 | .into_iter() |
|
868 | .into_iter() | |
814 | .flatten() |
|
869 | .flatten() | |
815 | .collect_vec(); |
|
870 | .collect_vec(); | |
816 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); |
|
871 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); | |
817 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
872 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); | |
818 |
|
873 | |||
819 | // accessing the data shows the corruption |
|
874 | // accessing the data shows the corruption | |
820 | revlog.get_entry(0).unwrap().data().unwrap_err(); |
|
875 | revlog.get_entry(0.into()).unwrap().data().unwrap_err(); | |
821 |
|
876 | |||
822 | assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1); |
|
877 | assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1); | |
823 | assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0); |
|
878 | assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0); | |
824 | assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1); |
|
879 | assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1); | |
825 | assert_eq!( |
|
880 | assert_eq!( | |
826 | revlog |
|
881 | revlog | |
827 | .rev_from_node(NodePrefix::from_hex("000").unwrap()) |
|
882 | .rev_from_node(NodePrefix::from_hex("000").unwrap()) | |
828 | .unwrap(), |
|
883 | .unwrap(), | |
829 | -1 |
|
884 | -1 | |
830 | ); |
|
885 | ); | |
831 | assert_eq!( |
|
886 | assert_eq!( | |
832 | revlog |
|
887 | revlog | |
833 | .rev_from_node(NodePrefix::from_hex("b00").unwrap()) |
|
888 | .rev_from_node(NodePrefix::from_hex("b00").unwrap()) | |
834 | .unwrap(), |
|
889 | .unwrap(), | |
835 | 1 |
|
890 | 1 | |
836 | ); |
|
891 | ); | |
837 | // RevlogError does not implement PartialEq |
|
892 | // RevlogError does not implement PartialEq | |
838 | // (ultimately because io::Error does not) |
|
893 | // (ultimately because io::Error does not) | |
839 | match revlog |
|
894 | match revlog | |
840 | .rev_from_node(NodePrefix::from_hex("00").unwrap()) |
|
895 | .rev_from_node(NodePrefix::from_hex("00").unwrap()) | |
841 | .expect_err("Expected to give AmbiguousPrefix error") |
|
896 | .expect_err("Expected to give AmbiguousPrefix error") | |
842 | { |
|
897 | { | |
843 | RevlogError::AmbiguousPrefix => (), |
|
898 | RevlogError::AmbiguousPrefix => (), | |
844 | e => { |
|
899 | e => { | |
845 | panic!("Got another error than AmbiguousPrefix: {:?}", e); |
|
900 | panic!("Got another error than AmbiguousPrefix: {:?}", e); | |
846 | } |
|
901 | } | |
847 | }; |
|
902 | }; | |
848 | } |
|
903 | } | |
849 | } |
|
904 | } |
@@ -1,1067 +1,1082 | |||||
1 | // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> |
|
1 | // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> | |
2 | // and Mercurial contributors |
|
2 | // and Mercurial contributors | |
3 | // |
|
3 | // | |
4 | // This software may be used and distributed according to the terms of the |
|
4 | // This software may be used and distributed according to the terms of the | |
5 | // GNU General Public License version 2 or any later version. |
|
5 | // GNU General Public License version 2 or any later version. | |
6 | //! Indexing facilities for fast retrieval of `Revision` from `Node` |
|
6 | //! Indexing facilities for fast retrieval of `Revision` from `Node` | |
7 | //! |
|
7 | //! | |
8 | //! This provides a variation on the 16-ary radix tree that is |
|
8 | //! This provides a variation on the 16-ary radix tree that is | |
9 | //! provided as "nodetree" in revlog.c, ready for append-only persistence |
|
9 | //! provided as "nodetree" in revlog.c, ready for append-only persistence | |
10 | //! on disk. |
|
10 | //! on disk. | |
11 | //! |
|
11 | //! | |
12 | //! Following existing implicit conventions, the "nodemap" terminology |
|
12 | //! Following existing implicit conventions, the "nodemap" terminology | |
13 | //! is used in a more abstract context. |
|
13 | //! is used in a more abstract context. | |
14 |
|
14 | |||
|
15 | use crate::UncheckedRevision; | |||
|
16 | ||||
15 | use super::{ |
|
17 | use super::{ | |
16 | node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION, |
|
18 | node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION, | |
17 | }; |
|
19 | }; | |
18 |
|
20 | |||
19 | use bytes_cast::{unaligned, BytesCast}; |
|
21 | use bytes_cast::{unaligned, BytesCast}; | |
20 | use std::cmp::max; |
|
22 | use std::cmp::max; | |
21 | use std::fmt; |
|
23 | use std::fmt; | |
22 | use std::mem::{self, align_of, size_of}; |
|
24 | use std::mem::{self, align_of, size_of}; | |
23 | use std::ops::Deref; |
|
25 | use std::ops::Deref; | |
24 | use std::ops::Index; |
|
26 | use std::ops::Index; | |
25 |
|
27 | |||
26 | #[derive(Debug, PartialEq)] |
|
28 | #[derive(Debug, PartialEq)] | |
27 | pub enum NodeMapError { |
|
29 | pub enum NodeMapError { | |
28 | /// A `NodePrefix` matches several [`Revision`]s. |
|
30 | /// A `NodePrefix` matches several [`Revision`]s. | |
29 | /// |
|
31 | /// | |
30 | /// This can be returned by methods meant for (at most) one match. |
|
32 | /// This can be returned by methods meant for (at most) one match. | |
31 | MultipleResults, |
|
33 | MultipleResults, | |
32 | /// A `Revision` stored in the nodemap could not be found in the index |
|
34 | /// A `Revision` stored in the nodemap could not be found in the index | |
33 | RevisionNotInIndex(Revision), |
|
35 | RevisionNotInIndex(UncheckedRevision), | |
34 | } |
|
36 | } | |
35 |
|
37 | |||
36 | /// Mapping system from Mercurial nodes to revision numbers. |
|
38 | /// Mapping system from Mercurial nodes to revision numbers. | |
37 | /// |
|
39 | /// | |
38 | /// ## `RevlogIndex` and `NodeMap` |
|
40 | /// ## `RevlogIndex` and `NodeMap` | |
39 | /// |
|
41 | /// | |
40 | /// One way to think about their relationship is that |
|
42 | /// One way to think about their relationship is that | |
41 | /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`] |
|
43 | /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`] | |
42 | /// information carried by a [`RevlogIndex`]. |
|
44 | /// information carried by a [`RevlogIndex`]. | |
43 | /// |
|
45 | /// | |
44 | /// Many of the methods in this trait take a `RevlogIndex` argument |
|
46 | /// Many of the methods in this trait take a `RevlogIndex` argument | |
45 | /// which is used for validation of their results. This index must naturally |
|
47 | /// which is used for validation of their results. This index must naturally | |
46 | /// be the one the `NodeMap` is about, and it must be consistent. |
|
48 | /// be the one the `NodeMap` is about, and it must be consistent. | |
47 | /// |
|
49 | /// | |
48 | /// Notably, the `NodeMap` must not store |
|
50 | /// Notably, the `NodeMap` must not store | |
49 | /// information about more `Revision` values than there are in the index. |
|
51 | /// information about more `Revision` values than there are in the index. | |
50 | /// In these methods, an encountered `Revision` is not in the index, a |
|
52 | /// In these methods, an encountered `Revision` is not in the index, a | |
51 | /// [RevisionNotInIndex](NodeMapError) error is returned. |
|
53 | /// [RevisionNotInIndex](NodeMapError) error is returned. | |
52 | /// |
|
54 | /// | |
53 | /// In insert operations, the rule is thus that the `NodeMap` must always |
|
55 | /// In insert operations, the rule is thus that the `NodeMap` must always | |
54 | /// be updated after the `RevlogIndex` it is about. |
|
56 | /// be updated after the `RevlogIndex` it is about. | |
55 | pub trait NodeMap { |
|
57 | pub trait NodeMap { | |
56 | /// Find the unique `Revision` having the given `Node` |
|
58 | /// Find the unique `Revision` having the given `Node` | |
57 | /// |
|
59 | /// | |
58 | /// If no Revision matches the given `Node`, `Ok(None)` is returned. |
|
60 | /// If no Revision matches the given `Node`, `Ok(None)` is returned. | |
59 | fn find_node( |
|
61 | fn find_node( | |
60 | &self, |
|
62 | &self, | |
61 | index: &impl RevlogIndex, |
|
63 | index: &impl RevlogIndex, | |
62 | node: &Node, |
|
64 | node: &Node, | |
63 | ) -> Result<Option<Revision>, NodeMapError> { |
|
65 | ) -> Result<Option<Revision>, NodeMapError> { | |
64 | self.find_bin(index, node.into()) |
|
66 | self.find_bin(index, node.into()) | |
65 | } |
|
67 | } | |
66 |
|
68 | |||
67 | /// Find the unique Revision whose `Node` starts with a given binary prefix |
|
69 | /// Find the unique Revision whose `Node` starts with a given binary prefix | |
68 | /// |
|
70 | /// | |
69 | /// If no Revision matches the given prefix, `Ok(None)` is returned. |
|
71 | /// If no Revision matches the given prefix, `Ok(None)` is returned. | |
70 | /// |
|
72 | /// | |
71 | /// If several Revisions match the given prefix, a |
|
73 | /// If several Revisions match the given prefix, a | |
72 | /// [MultipleResults](NodeMapError) error is returned. |
|
74 | /// [MultipleResults](NodeMapError) error is returned. | |
73 | fn find_bin( |
|
75 | fn find_bin( | |
74 | &self, |
|
76 | &self, | |
75 | idx: &impl RevlogIndex, |
|
77 | idx: &impl RevlogIndex, | |
76 | prefix: NodePrefix, |
|
78 | prefix: NodePrefix, | |
77 | ) -> Result<Option<Revision>, NodeMapError>; |
|
79 | ) -> Result<Option<Revision>, NodeMapError>; | |
78 |
|
80 | |||
79 | /// Give the size of the shortest node prefix that determines |
|
81 | /// Give the size of the shortest node prefix that determines | |
80 | /// the revision uniquely. |
|
82 | /// the revision uniquely. | |
81 | /// |
|
83 | /// | |
82 | /// From a binary node prefix, if it is matched in the node map, this |
|
84 | /// From a binary node prefix, if it is matched in the node map, this | |
83 | /// returns the number of hexadecimal digits that would had sufficed |
|
85 | /// returns the number of hexadecimal digits that would had sufficed | |
84 | /// to find the revision uniquely. |
|
86 | /// to find the revision uniquely. | |
85 | /// |
|
87 | /// | |
86 | /// Returns `None` if no [`Revision`] could be found for the prefix. |
|
88 | /// Returns `None` if no [`Revision`] could be found for the prefix. | |
87 | /// |
|
89 | /// | |
88 | /// If several Revisions match the given prefix, a |
|
90 | /// If several Revisions match the given prefix, a | |
89 | /// [MultipleResults](NodeMapError) error is returned. |
|
91 | /// [MultipleResults](NodeMapError) error is returned. | |
90 | fn unique_prefix_len_bin( |
|
92 | fn unique_prefix_len_bin( | |
91 | &self, |
|
93 | &self, | |
92 | idx: &impl RevlogIndex, |
|
94 | idx: &impl RevlogIndex, | |
93 | node_prefix: NodePrefix, |
|
95 | node_prefix: NodePrefix, | |
94 | ) -> Result<Option<usize>, NodeMapError>; |
|
96 | ) -> Result<Option<usize>, NodeMapError>; | |
95 |
|
97 | |||
96 | /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with |
|
98 | /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with | |
97 | /// a full [`Node`] as input |
|
99 | /// a full [`Node`] as input | |
98 | fn unique_prefix_len_node( |
|
100 | fn unique_prefix_len_node( | |
99 | &self, |
|
101 | &self, | |
100 | idx: &impl RevlogIndex, |
|
102 | idx: &impl RevlogIndex, | |
101 | node: &Node, |
|
103 | node: &Node, | |
102 | ) -> Result<Option<usize>, NodeMapError> { |
|
104 | ) -> Result<Option<usize>, NodeMapError> { | |
103 | self.unique_prefix_len_bin(idx, node.into()) |
|
105 | self.unique_prefix_len_bin(idx, node.into()) | |
104 | } |
|
106 | } | |
105 | } |
|
107 | } | |
106 |
|
108 | |||
107 | pub trait MutableNodeMap: NodeMap { |
|
109 | pub trait MutableNodeMap: NodeMap { | |
108 | fn insert<I: RevlogIndex>( |
|
110 | fn insert<I: RevlogIndex>( | |
109 | &mut self, |
|
111 | &mut self, | |
110 | index: &I, |
|
112 | index: &I, | |
111 | node: &Node, |
|
113 | node: &Node, | |
112 | rev: Revision, |
|
114 | rev: Revision, | |
113 | ) -> Result<(), NodeMapError>; |
|
115 | ) -> Result<(), NodeMapError>; | |
114 | } |
|
116 | } | |
115 |
|
117 | |||
116 | /// Low level NodeTree [`Block`] elements |
|
118 | /// Low level NodeTree [`Block`] elements | |
117 | /// |
|
119 | /// | |
118 | /// These are exactly as for instance on persistent storage. |
|
120 | /// These are exactly as for instance on persistent storage. | |
119 | type RawElement = unaligned::I32Be; |
|
121 | type RawElement = unaligned::I32Be; | |
120 |
|
122 | |||
121 | /// High level representation of values in NodeTree |
|
123 | /// High level representation of values in NodeTree | |
122 | /// [`Blocks`](struct.Block.html) |
|
124 | /// [`Blocks`](struct.Block.html) | |
123 | /// |
|
125 | /// | |
124 | /// This is the high level representation that most algorithms should |
|
126 | /// This is the high level representation that most algorithms should | |
125 | /// use. |
|
127 | /// use. | |
126 | #[derive(Clone, Debug, Eq, PartialEq)] |
|
128 | #[derive(Clone, Debug, Eq, PartialEq)] | |
127 | enum Element { |
|
129 | enum Element { | |
128 | Rev(Revision), |
|
130 | // This is not a Mercurial revision. It's a `i32` because this is the | |
|
131 | // right type for this structure. | |||
|
132 | Rev(i32), | |||
129 | Block(usize), |
|
133 | Block(usize), | |
130 | None, |
|
134 | None, | |
131 | } |
|
135 | } | |
132 |
|
136 | |||
133 | impl From<RawElement> for Element { |
|
137 | impl From<RawElement> for Element { | |
134 | /// Conversion from low level representation, after endianness conversion. |
|
138 | /// Conversion from low level representation, after endianness conversion. | |
135 | /// |
|
139 | /// | |
136 | /// See [`Block`](struct.Block.html) for explanation about the encoding. |
|
140 | /// See [`Block`](struct.Block.html) for explanation about the encoding. | |
137 | fn from(raw: RawElement) -> Element { |
|
141 | fn from(raw: RawElement) -> Element { | |
138 | let int = raw.get(); |
|
142 | let int = raw.get(); | |
139 | if int >= 0 { |
|
143 | if int >= 0 { | |
140 | Element::Block(int as usize) |
|
144 | Element::Block(int as usize) | |
141 | } else if int == -1 { |
|
145 | } else if int == -1 { | |
142 | Element::None |
|
146 | Element::None | |
143 | } else { |
|
147 | } else { | |
144 | Element::Rev(-int - 2) |
|
148 | Element::Rev(-int - 2) | |
145 | } |
|
149 | } | |
146 | } |
|
150 | } | |
147 | } |
|
151 | } | |
148 |
|
152 | |||
149 | impl From<Element> for RawElement { |
|
153 | impl From<Element> for RawElement { | |
150 | fn from(element: Element) -> RawElement { |
|
154 | fn from(element: Element) -> RawElement { | |
151 | RawElement::from(match element { |
|
155 | RawElement::from(match element { | |
152 | Element::None => 0, |
|
156 | Element::None => 0, | |
153 | Element::Block(i) => i as i32, |
|
157 | Element::Block(i) => i as i32, | |
154 | Element::Rev(rev) => -rev - 2, |
|
158 | Element::Rev(rev) => -rev - 2, | |
155 | }) |
|
159 | }) | |
156 | } |
|
160 | } | |
157 | } |
|
161 | } | |
158 |
|
162 | |||
159 | const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble |
|
163 | const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble | |
160 |
|
164 | |||
161 | /// A logical block of the [`NodeTree`], packed with a fixed size. |
|
165 | /// A logical block of the [`NodeTree`], packed with a fixed size. | |
162 | /// |
|
166 | /// | |
163 | /// These are always used in container types implementing `Index<Block>`, |
|
167 | /// These are always used in container types implementing `Index<Block>`, | |
164 | /// such as `&Block` |
|
168 | /// such as `&Block` | |
165 | /// |
|
169 | /// | |
166 | /// As an array of integers, its ith element encodes that the |
|
170 | /// As an array of integers, its ith element encodes that the | |
167 | /// ith potential edge from the block, representing the ith hexadecimal digit |
|
171 | /// ith potential edge from the block, representing the ith hexadecimal digit | |
168 | /// (nybble) `i` is either: |
|
172 | /// (nybble) `i` is either: | |
169 | /// |
|
173 | /// | |
170 | /// - absent (value -1) |
|
174 | /// - absent (value -1) | |
171 | /// - another `Block` in the same indexable container (value ≥ 0) |
|
175 | /// - another `Block` in the same indexable container (value ≥ 0) | |
172 | /// - a [`Revision`] leaf (value ≤ -2) |
|
176 | /// - a [`Revision`] leaf (value ≤ -2) | |
173 | /// |
|
177 | /// | |
174 | /// Endianness has to be fixed for consistency on shared storage across |
|
178 | /// Endianness has to be fixed for consistency on shared storage across | |
175 | /// different architectures. |
|
179 | /// different architectures. | |
176 | /// |
|
180 | /// | |
177 | /// A key difference with the C `nodetree` is that we need to be |
|
181 | /// A key difference with the C `nodetree` is that we need to be | |
178 | /// able to represent the [`Block`] at index 0, hence -1 is the empty marker |
|
182 | /// able to represent the [`Block`] at index 0, hence -1 is the empty marker | |
179 | /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1. |
|
183 | /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1. | |
180 | /// |
|
184 | /// | |
181 | /// Another related difference is that `NULL_REVISION` (-1) is not |
|
185 | /// Another related difference is that `NULL_REVISION` (-1) is not | |
182 | /// represented at all, because we want an immutable empty nodetree |
|
186 | /// represented at all, because we want an immutable empty nodetree | |
183 | /// to be valid. |
|
187 | /// to be valid. | |
184 | #[derive(Copy, Clone, BytesCast, PartialEq)] |
|
188 | #[derive(Copy, Clone, BytesCast, PartialEq)] | |
185 | #[repr(transparent)] |
|
189 | #[repr(transparent)] | |
186 | pub struct Block([RawElement; ELEMENTS_PER_BLOCK]); |
|
190 | pub struct Block([RawElement; ELEMENTS_PER_BLOCK]); | |
187 |
|
191 | |||
188 | impl Block { |
|
192 | impl Block { | |
189 | fn new() -> Self { |
|
193 | fn new() -> Self { | |
190 | let absent_node = RawElement::from(-1); |
|
194 | let absent_node = RawElement::from(-1); | |
191 | Block([absent_node; ELEMENTS_PER_BLOCK]) |
|
195 | Block([absent_node; ELEMENTS_PER_BLOCK]) | |
192 | } |
|
196 | } | |
193 |
|
197 | |||
194 | fn get(&self, nybble: u8) -> Element { |
|
198 | fn get(&self, nybble: u8) -> Element { | |
195 | self.0[nybble as usize].into() |
|
199 | self.0[nybble as usize].into() | |
196 | } |
|
200 | } | |
197 |
|
201 | |||
198 | fn set(&mut self, nybble: u8, element: Element) { |
|
202 | fn set(&mut self, nybble: u8, element: Element) { | |
199 | self.0[nybble as usize] = element.into() |
|
203 | self.0[nybble as usize] = element.into() | |
200 | } |
|
204 | } | |
201 | } |
|
205 | } | |
202 |
|
206 | |||
203 | impl fmt::Debug for Block { |
|
207 | impl fmt::Debug for Block { | |
204 | /// sparse representation for testing and debugging purposes |
|
208 | /// sparse representation for testing and debugging purposes | |
205 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
209 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
206 | f.debug_map() |
|
210 | f.debug_map() | |
207 | .entries((0..16).filter_map(|i| match self.get(i) { |
|
211 | .entries((0..16).filter_map(|i| match self.get(i) { | |
208 | Element::None => None, |
|
212 | Element::None => None, | |
209 | element => Some((i, element)), |
|
213 | element => Some((i, element)), | |
210 | })) |
|
214 | })) | |
211 | .finish() |
|
215 | .finish() | |
212 | } |
|
216 | } | |
213 | } |
|
217 | } | |
214 |
|
218 | |||
215 | /// A mutable 16-radix tree with the root block logically at the end |
|
219 | /// A mutable 16-radix tree with the root block logically at the end | |
216 | /// |
|
220 | /// | |
217 | /// Because of the append only nature of our node trees, we need to |
|
221 | /// Because of the append only nature of our node trees, we need to | |
218 | /// keep the original untouched and store new blocks separately. |
|
222 | /// keep the original untouched and store new blocks separately. | |
219 | /// |
|
223 | /// | |
220 | /// The mutable root [`Block`] is kept apart so that we don't have to rebump |
|
224 | /// The mutable root [`Block`] is kept apart so that we don't have to rebump | |
221 | /// it on each insertion. |
|
225 | /// it on each insertion. | |
222 | pub struct NodeTree { |
|
226 | pub struct NodeTree { | |
223 | readonly: Box<dyn Deref<Target = [Block]> + Send>, |
|
227 | readonly: Box<dyn Deref<Target = [Block]> + Send>, | |
224 | growable: Vec<Block>, |
|
228 | growable: Vec<Block>, | |
225 | root: Block, |
|
229 | root: Block, | |
226 | masked_inner_blocks: usize, |
|
230 | masked_inner_blocks: usize, | |
227 | } |
|
231 | } | |
228 |
|
232 | |||
229 | impl Index<usize> for NodeTree { |
|
233 | impl Index<usize> for NodeTree { | |
230 | type Output = Block; |
|
234 | type Output = Block; | |
231 |
|
235 | |||
232 | fn index(&self, i: usize) -> &Block { |
|
236 | fn index(&self, i: usize) -> &Block { | |
233 | let ro_len = self.readonly.len(); |
|
237 | let ro_len = self.readonly.len(); | |
234 | if i < ro_len { |
|
238 | if i < ro_len { | |
235 | &self.readonly[i] |
|
239 | &self.readonly[i] | |
236 | } else if i == ro_len + self.growable.len() { |
|
240 | } else if i == ro_len + self.growable.len() { | |
237 | &self.root |
|
241 | &self.root | |
238 | } else { |
|
242 | } else { | |
239 | &self.growable[i - ro_len] |
|
243 | &self.growable[i - ro_len] | |
240 | } |
|
244 | } | |
241 | } |
|
245 | } | |
242 | } |
|
246 | } | |
243 |
|
247 | |||
244 | /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`. |
|
248 | /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`. | |
245 | fn has_prefix_or_none( |
|
249 | fn has_prefix_or_none( | |
246 | idx: &impl RevlogIndex, |
|
250 | idx: &impl RevlogIndex, | |
247 | prefix: NodePrefix, |
|
251 | prefix: NodePrefix, | |
248 | rev: Revision, |
|
252 | rev: UncheckedRevision, | |
249 | ) -> Result<Option<Revision>, NodeMapError> { |
|
253 | ) -> Result<Option<Revision>, NodeMapError> { | |
250 | idx.node(rev) |
|
254 | match idx.check_revision(rev) { | |
251 | .ok_or(NodeMapError::RevisionNotInIndex(rev)) |
|
255 | Some(checked) => idx | |
252 | .map(|node| { |
|
256 | .node(checked) | |
253 | if prefix.is_prefix_of(node) { |
|
257 | .ok_or(NodeMapError::RevisionNotInIndex(rev)) | |
254 | Some(rev) |
|
258 | .map(|node| { | |
255 | } else { |
|
259 | if prefix.is_prefix_of(node) { | |
256 |
|
|
260 | Some(checked) | |
257 | } |
|
261 | } else { | |
258 | }) |
|
262 | None | |
|
263 | } | |||
|
264 | }), | |||
|
265 | None => Err(NodeMapError::RevisionNotInIndex(rev)), | |||
|
266 | } | |||
259 | } |
|
267 | } | |
260 |
|
268 | |||
261 | /// validate that the candidate's node starts indeed with given prefix, |
|
269 | /// validate that the candidate's node starts indeed with given prefix, | |
262 | /// and treat ambiguities related to [`NULL_REVISION`]. |
|
270 | /// and treat ambiguities related to [`NULL_REVISION`]. | |
263 | /// |
|
271 | /// | |
264 | /// From the data in the NodeTree, one can only conclude that some |
|
272 | /// From the data in the NodeTree, one can only conclude that some | |
265 | /// revision is the only one for a *subprefix* of the one being looked up. |
|
273 | /// revision is the only one for a *subprefix* of the one being looked up. | |
266 | fn validate_candidate( |
|
274 | fn validate_candidate( | |
267 | idx: &impl RevlogIndex, |
|
275 | idx: &impl RevlogIndex, | |
268 | prefix: NodePrefix, |
|
276 | prefix: NodePrefix, | |
269 | candidate: (Option<Revision>, usize), |
|
277 | candidate: (Option<UncheckedRevision>, usize), | |
270 | ) -> Result<(Option<Revision>, usize), NodeMapError> { |
|
278 | ) -> Result<(Option<Revision>, usize), NodeMapError> { | |
271 | let (rev, steps) = candidate; |
|
279 | let (rev, steps) = candidate; | |
272 | if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) { |
|
280 | if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) { | |
273 | rev.map_or(Ok((None, steps)), |r| { |
|
281 | rev.map_or(Ok((None, steps)), |r| { | |
274 | has_prefix_or_none(idx, prefix, r) |
|
282 | has_prefix_or_none(idx, prefix, r) | |
275 | .map(|opt| (opt, max(steps, nz_nybble + 1))) |
|
283 | .map(|opt| (opt, max(steps, nz_nybble + 1))) | |
276 | }) |
|
284 | }) | |
277 | } else { |
|
285 | } else { | |
278 | // the prefix is only made of zeros; NULL_REVISION always matches it |
|
286 | // the prefix is only made of zeros; NULL_REVISION always matches it | |
279 | // and any other *valid* result is an ambiguity |
|
287 | // and any other *valid* result is an ambiguity | |
280 | match rev { |
|
288 | match rev { | |
281 | None => Ok((Some(NULL_REVISION), steps + 1)), |
|
289 | None => Ok((Some(NULL_REVISION), steps + 1)), | |
282 | Some(r) => match has_prefix_or_none(idx, prefix, r)? { |
|
290 | Some(r) => match has_prefix_or_none(idx, prefix, r)? { | |
283 | None => Ok((Some(NULL_REVISION), steps + 1)), |
|
291 | None => Ok((Some(NULL_REVISION), steps + 1)), | |
284 | _ => Err(NodeMapError::MultipleResults), |
|
292 | _ => Err(NodeMapError::MultipleResults), | |
285 | }, |
|
293 | }, | |
286 | } |
|
294 | } | |
287 | } |
|
295 | } | |
288 | } |
|
296 | } | |
289 |
|
297 | |||
290 | impl NodeTree { |
|
298 | impl NodeTree { | |
291 | /// Initiate a NodeTree from an immutable slice-like of `Block` |
|
299 | /// Initiate a NodeTree from an immutable slice-like of `Block` | |
292 | /// |
|
300 | /// | |
293 | /// We keep `readonly` and clone its root block if it isn't empty. |
|
301 | /// We keep `readonly` and clone its root block if it isn't empty. | |
294 | fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self { |
|
302 | fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self { | |
295 | let root = readonly.last().cloned().unwrap_or_else(Block::new); |
|
303 | let root = readonly.last().cloned().unwrap_or_else(Block::new); | |
296 | NodeTree { |
|
304 | NodeTree { | |
297 | readonly, |
|
305 | readonly, | |
298 | growable: Vec::new(), |
|
306 | growable: Vec::new(), | |
299 | root, |
|
307 | root, | |
300 | masked_inner_blocks: 0, |
|
308 | masked_inner_blocks: 0, | |
301 | } |
|
309 | } | |
302 | } |
|
310 | } | |
303 |
|
311 | |||
304 | /// Create from an opaque bunch of bytes |
|
312 | /// Create from an opaque bunch of bytes | |
305 | /// |
|
313 | /// | |
306 | /// The created [`NodeTreeBytes`] from `bytes`, |
|
314 | /// The created [`NodeTreeBytes`] from `bytes`, | |
307 | /// of which exactly `amount` bytes are used. |
|
315 | /// of which exactly `amount` bytes are used. | |
308 | /// |
|
316 | /// | |
309 | /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects. |
|
317 | /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects. | |
310 | /// - `amount` is expressed in bytes, and is not automatically derived from |
|
318 | /// - `amount` is expressed in bytes, and is not automatically derived from | |
311 | /// `bytes`, so that a caller that manages them atomically can perform |
|
319 | /// `bytes`, so that a caller that manages them atomically can perform | |
312 | /// temporary disk serializations and still rollback easily if needed. |
|
320 | /// temporary disk serializations and still rollback easily if needed. | |
313 | /// First use-case for this would be to support Mercurial shell hooks. |
|
321 | /// First use-case for this would be to support Mercurial shell hooks. | |
314 | /// |
|
322 | /// | |
315 | /// panics if `buffer` is smaller than `amount` |
|
323 | /// panics if `buffer` is smaller than `amount` | |
316 | pub fn load_bytes( |
|
324 | pub fn load_bytes( | |
317 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
325 | bytes: Box<dyn Deref<Target = [u8]> + Send>, | |
318 | amount: usize, |
|
326 | amount: usize, | |
319 | ) -> Self { |
|
327 | ) -> Self { | |
320 | NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount))) |
|
328 | NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount))) | |
321 | } |
|
329 | } | |
322 |
|
330 | |||
323 | /// Retrieve added [`Block`]s and the original immutable data |
|
331 | /// Retrieve added [`Block`]s and the original immutable data | |
324 | pub fn into_readonly_and_added( |
|
332 | pub fn into_readonly_and_added( | |
325 | self, |
|
333 | self, | |
326 | ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) { |
|
334 | ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) { | |
327 | let mut vec = self.growable; |
|
335 | let mut vec = self.growable; | |
328 | let readonly = self.readonly; |
|
336 | let readonly = self.readonly; | |
329 | if readonly.last() != Some(&self.root) { |
|
337 | if readonly.last() != Some(&self.root) { | |
330 | vec.push(self.root); |
|
338 | vec.push(self.root); | |
331 | } |
|
339 | } | |
332 | (readonly, vec) |
|
340 | (readonly, vec) | |
333 | } |
|
341 | } | |
334 |
|
342 | |||
335 | /// Retrieve added [`Block]s as bytes, ready to be written to persistent |
|
343 | /// Retrieve added [`Block]s as bytes, ready to be written to persistent | |
336 | /// storage |
|
344 | /// storage | |
337 | pub fn into_readonly_and_added_bytes( |
|
345 | pub fn into_readonly_and_added_bytes( | |
338 | self, |
|
346 | self, | |
339 | ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) { |
|
347 | ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) { | |
340 | let (readonly, vec) = self.into_readonly_and_added(); |
|
348 | let (readonly, vec) = self.into_readonly_and_added(); | |
341 | // Prevent running `v`'s destructor so we are in complete control |
|
349 | // Prevent running `v`'s destructor so we are in complete control | |
342 | // of the allocation. |
|
350 | // of the allocation. | |
343 | let vec = mem::ManuallyDrop::new(vec); |
|
351 | let vec = mem::ManuallyDrop::new(vec); | |
344 |
|
352 | |||
345 | // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous |
|
353 | // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous | |
346 | // bytes, so this is perfectly safe. |
|
354 | // bytes, so this is perfectly safe. | |
347 | let bytes = unsafe { |
|
355 | let bytes = unsafe { | |
348 | // Check for compatible allocation layout. |
|
356 | // Check for compatible allocation layout. | |
349 | // (Optimized away by constant-folding + dead code elimination.) |
|
357 | // (Optimized away by constant-folding + dead code elimination.) | |
350 | assert_eq!(size_of::<Block>(), 64); |
|
358 | assert_eq!(size_of::<Block>(), 64); | |
351 | assert_eq!(align_of::<Block>(), 1); |
|
359 | assert_eq!(align_of::<Block>(), 1); | |
352 |
|
360 | |||
353 | // /!\ Any use of `vec` after this is use-after-free. |
|
361 | // /!\ Any use of `vec` after this is use-after-free. | |
354 | // TODO: use `into_raw_parts` once stabilized |
|
362 | // TODO: use `into_raw_parts` once stabilized | |
355 | Vec::from_raw_parts( |
|
363 | Vec::from_raw_parts( | |
356 | vec.as_ptr() as *mut u8, |
|
364 | vec.as_ptr() as *mut u8, | |
357 | vec.len() * size_of::<Block>(), |
|
365 | vec.len() * size_of::<Block>(), | |
358 | vec.capacity() * size_of::<Block>(), |
|
366 | vec.capacity() * size_of::<Block>(), | |
359 | ) |
|
367 | ) | |
360 | }; |
|
368 | }; | |
361 | (readonly, bytes) |
|
369 | (readonly, bytes) | |
362 | } |
|
370 | } | |
363 |
|
371 | |||
364 | /// Total number of blocks |
|
372 | /// Total number of blocks | |
365 | fn len(&self) -> usize { |
|
373 | fn len(&self) -> usize { | |
366 | self.readonly.len() + self.growable.len() + 1 |
|
374 | self.readonly.len() + self.growable.len() + 1 | |
367 | } |
|
375 | } | |
368 |
|
376 | |||
369 | /// Implemented for completeness |
|
377 | /// Implemented for completeness | |
370 | /// |
|
378 | /// | |
371 | /// A `NodeTree` always has at least the mutable root block. |
|
379 | /// A `NodeTree` always has at least the mutable root block. | |
372 | #[allow(dead_code)] |
|
380 | #[allow(dead_code)] | |
373 | fn is_empty(&self) -> bool { |
|
381 | fn is_empty(&self) -> bool { | |
374 | false |
|
382 | false | |
375 | } |
|
383 | } | |
376 |
|
384 | |||
377 | /// Main working method for `NodeTree` searches |
|
385 | /// Main working method for `NodeTree` searches | |
378 | /// |
|
386 | /// | |
379 | /// The first returned value is the result of analysing `NodeTree` data |
|
387 | /// The first returned value is the result of analysing `NodeTree` data | |
380 | /// *alone*: whereas `None` guarantees that the given prefix is absent |
|
388 | /// *alone*: whereas `None` guarantees that the given prefix is absent | |
381 | /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with |
|
389 | /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with | |
382 | /// `Some(rev)`, it is to be understood that `rev` is the unique |
|
390 | /// `Some(rev)`, it is to be understood that `rev` is the unique | |
383 | /// [`Revision`] that could match the prefix. Actually, all that can |
|
391 | /// [`Revision`] that could match the prefix. Actually, all that can | |
384 | /// be inferred from |
|
392 | /// be inferred from | |
385 | /// the `NodeTree` data is that `rev` is the revision with the longest |
|
393 | /// the `NodeTree` data is that `rev` is the revision with the longest | |
386 | /// common node prefix with the given prefix. |
|
394 | /// common node prefix with the given prefix. | |
|
395 | /// We return an [`UncheckedRevision`] because we have no guarantee that | |||
|
396 | /// the revision we found is valid for the index. | |||
387 | /// |
|
397 | /// | |
388 | /// The second returned value is the size of the smallest subprefix |
|
398 | /// The second returned value is the size of the smallest subprefix | |
389 | /// of `prefix` that would give the same result, i.e. not the |
|
399 | /// of `prefix` that would give the same result, i.e. not the | |
390 | /// [MultipleResults](NodeMapError) error variant (again, using only the |
|
400 | /// [MultipleResults](NodeMapError) error variant (again, using only the | |
391 | /// data of the [`NodeTree`]). |
|
401 | /// data of the [`NodeTree`]). | |
392 | fn lookup( |
|
402 | fn lookup( | |
393 | &self, |
|
403 | &self, | |
394 | prefix: NodePrefix, |
|
404 | prefix: NodePrefix, | |
395 | ) -> Result<(Option<Revision>, usize), NodeMapError> { |
|
405 | ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> { | |
396 | for (i, visit_item) in self.visit(prefix).enumerate() { |
|
406 | for (i, visit_item) in self.visit(prefix).enumerate() { | |
397 | if let Some(opt) = visit_item.final_revision() { |
|
407 | if let Some(opt) = visit_item.final_revision() { | |
398 | return Ok((opt, i + 1)); |
|
408 | return Ok((opt, i + 1)); | |
399 | } |
|
409 | } | |
400 | } |
|
410 | } | |
401 | Err(NodeMapError::MultipleResults) |
|
411 | Err(NodeMapError::MultipleResults) | |
402 | } |
|
412 | } | |
403 |
|
413 | |||
404 | fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor { |
|
414 | fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor { | |
405 | NodeTreeVisitor { |
|
415 | NodeTreeVisitor { | |
406 | nt: self, |
|
416 | nt: self, | |
407 | prefix, |
|
417 | prefix, | |
408 | visit: self.len() - 1, |
|
418 | visit: self.len() - 1, | |
409 | nybble_idx: 0, |
|
419 | nybble_idx: 0, | |
410 | done: false, |
|
420 | done: false, | |
411 | } |
|
421 | } | |
412 | } |
|
422 | } | |
413 | /// Return a mutable reference for `Block` at index `idx`. |
|
423 | /// Return a mutable reference for `Block` at index `idx`. | |
414 | /// |
|
424 | /// | |
415 | /// If `idx` lies in the immutable area, then the reference is to |
|
425 | /// If `idx` lies in the immutable area, then the reference is to | |
416 | /// a newly appended copy. |
|
426 | /// a newly appended copy. | |
417 | /// |
|
427 | /// | |
418 | /// Returns (new_idx, glen, mut_ref) where |
|
428 | /// Returns (new_idx, glen, mut_ref) where | |
419 | /// |
|
429 | /// | |
420 | /// - `new_idx` is the index of the mutable `Block` |
|
430 | /// - `new_idx` is the index of the mutable `Block` | |
421 | /// - `mut_ref` is a mutable reference to the mutable Block. |
|
431 | /// - `mut_ref` is a mutable reference to the mutable Block. | |
422 | /// - `glen` is the new length of `self.growable` |
|
432 | /// - `glen` is the new length of `self.growable` | |
423 | /// |
|
433 | /// | |
424 | /// Note: the caller wouldn't be allowed to query `self.growable.len()` |
|
434 | /// Note: the caller wouldn't be allowed to query `self.growable.len()` | |
425 | /// itself because of the mutable borrow taken with the returned `Block` |
|
435 | /// itself because of the mutable borrow taken with the returned `Block` | |
426 | fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) { |
|
436 | fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) { | |
427 | let ro_blocks = &self.readonly; |
|
437 | let ro_blocks = &self.readonly; | |
428 | let ro_len = ro_blocks.len(); |
|
438 | let ro_len = ro_blocks.len(); | |
429 | let glen = self.growable.len(); |
|
439 | let glen = self.growable.len(); | |
430 | if idx < ro_len { |
|
440 | if idx < ro_len { | |
431 | self.masked_inner_blocks += 1; |
|
441 | self.masked_inner_blocks += 1; | |
432 | self.growable.push(ro_blocks[idx]); |
|
442 | self.growable.push(ro_blocks[idx]); | |
433 | (glen + ro_len, &mut self.growable[glen], glen + 1) |
|
443 | (glen + ro_len, &mut self.growable[glen], glen + 1) | |
434 | } else if glen + ro_len == idx { |
|
444 | } else if glen + ro_len == idx { | |
435 | (idx, &mut self.root, glen) |
|
445 | (idx, &mut self.root, glen) | |
436 | } else { |
|
446 | } else { | |
437 | (idx, &mut self.growable[idx - ro_len], glen) |
|
447 | (idx, &mut self.growable[idx - ro_len], glen) | |
438 | } |
|
448 | } | |
439 | } |
|
449 | } | |
440 |
|
450 | |||
441 | /// Main insertion method |
|
451 | /// Main insertion method | |
442 | /// |
|
452 | /// | |
443 | /// This will dive in the node tree to find the deepest `Block` for |
|
453 | /// This will dive in the node tree to find the deepest `Block` for | |
444 | /// `node`, split it as much as needed and record `node` in there. |
|
454 | /// `node`, split it as much as needed and record `node` in there. | |
445 | /// The method then backtracks, updating references in all the visited |
|
455 | /// The method then backtracks, updating references in all the visited | |
446 | /// blocks from the root. |
|
456 | /// blocks from the root. | |
447 | /// |
|
457 | /// | |
448 | /// All the mutated `Block` are copied first to the growable part if |
|
458 | /// All the mutated `Block` are copied first to the growable part if | |
449 | /// needed. That happens for those in the immutable part except the root. |
|
459 | /// needed. That happens for those in the immutable part except the root. | |
450 | pub fn insert<I: RevlogIndex>( |
|
460 | pub fn insert<I: RevlogIndex>( | |
451 | &mut self, |
|
461 | &mut self, | |
452 | index: &I, |
|
462 | index: &I, | |
453 | node: &Node, |
|
463 | node: &Node, | |
454 | rev: Revision, |
|
464 | rev: Revision, | |
455 | ) -> Result<(), NodeMapError> { |
|
465 | ) -> Result<(), NodeMapError> { | |
456 | let ro_len = &self.readonly.len(); |
|
466 | let ro_len = &self.readonly.len(); | |
457 |
|
467 | |||
458 | let mut visit_steps: Vec<_> = self.visit(node.into()).collect(); |
|
468 | let mut visit_steps: Vec<_> = self.visit(node.into()).collect(); | |
459 | let read_nybbles = visit_steps.len(); |
|
469 | let read_nybbles = visit_steps.len(); | |
460 | // visit_steps cannot be empty, since we always visit the root block |
|
470 | // visit_steps cannot be empty, since we always visit the root block | |
461 | let deepest = visit_steps.pop().unwrap(); |
|
471 | let deepest = visit_steps.pop().unwrap(); | |
462 |
|
472 | |||
463 | let (mut block_idx, mut block, mut glen) = |
|
473 | let (mut block_idx, mut block, mut glen) = | |
464 | self.mutable_block(deepest.block_idx); |
|
474 | self.mutable_block(deepest.block_idx); | |
465 |
|
475 | |||
466 | if let Element::Rev(old_rev) = deepest.element { |
|
476 | if let Element::Rev(old_rev) = deepest.element { | |
467 | let old_node = index |
|
477 | let old_node = index.node(old_rev).ok_or_else(|| { | |
468 | .node(old_rev) |
|
478 | NodeMapError::RevisionNotInIndex(old_rev.into()) | |
469 | .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?; |
|
479 | })?; | |
470 | if old_node == node { |
|
480 | if old_node == node { | |
471 | return Ok(()); // avoid creating lots of useless blocks |
|
481 | return Ok(()); // avoid creating lots of useless blocks | |
472 | } |
|
482 | } | |
473 |
|
483 | |||
474 | // Looping over the tail of nybbles in both nodes, creating |
|
484 | // Looping over the tail of nybbles in both nodes, creating | |
475 | // new blocks until we find the difference |
|
485 | // new blocks until we find the difference | |
476 | let mut new_block_idx = ro_len + glen; |
|
486 | let mut new_block_idx = ro_len + glen; | |
477 | let mut nybble = deepest.nybble; |
|
487 | let mut nybble = deepest.nybble; | |
478 | for nybble_pos in read_nybbles..node.nybbles_len() { |
|
488 | for nybble_pos in read_nybbles..node.nybbles_len() { | |
479 | block.set(nybble, Element::Block(new_block_idx)); |
|
489 | block.set(nybble, Element::Block(new_block_idx)); | |
480 |
|
490 | |||
481 | let new_nybble = node.get_nybble(nybble_pos); |
|
491 | let new_nybble = node.get_nybble(nybble_pos); | |
482 | let old_nybble = old_node.get_nybble(nybble_pos); |
|
492 | let old_nybble = old_node.get_nybble(nybble_pos); | |
483 |
|
493 | |||
484 | if old_nybble == new_nybble { |
|
494 | if old_nybble == new_nybble { | |
485 | self.growable.push(Block::new()); |
|
495 | self.growable.push(Block::new()); | |
486 | block = &mut self.growable[glen]; |
|
496 | block = &mut self.growable[glen]; | |
487 | glen += 1; |
|
497 | glen += 1; | |
488 | new_block_idx += 1; |
|
498 | new_block_idx += 1; | |
489 | nybble = new_nybble; |
|
499 | nybble = new_nybble; | |
490 | } else { |
|
500 | } else { | |
491 | let mut new_block = Block::new(); |
|
501 | let mut new_block = Block::new(); | |
492 | new_block.set(old_nybble, Element::Rev(old_rev)); |
|
502 | new_block.set(old_nybble, Element::Rev(old_rev)); | |
493 | new_block.set(new_nybble, Element::Rev(rev)); |
|
503 | new_block.set(new_nybble, Element::Rev(rev)); | |
494 | self.growable.push(new_block); |
|
504 | self.growable.push(new_block); | |
495 | break; |
|
505 | break; | |
496 | } |
|
506 | } | |
497 | } |
|
507 | } | |
498 | } else { |
|
508 | } else { | |
499 | // Free slot in the deepest block: no splitting has to be done |
|
509 | // Free slot in the deepest block: no splitting has to be done | |
500 | block.set(deepest.nybble, Element::Rev(rev)); |
|
510 | block.set(deepest.nybble, Element::Rev(rev)); | |
501 | } |
|
511 | } | |
502 |
|
512 | |||
503 | // Backtrack over visit steps to update references |
|
513 | // Backtrack over visit steps to update references | |
504 | while let Some(visited) = visit_steps.pop() { |
|
514 | while let Some(visited) = visit_steps.pop() { | |
505 | let to_write = Element::Block(block_idx); |
|
515 | let to_write = Element::Block(block_idx); | |
506 | if visit_steps.is_empty() { |
|
516 | if visit_steps.is_empty() { | |
507 | self.root.set(visited.nybble, to_write); |
|
517 | self.root.set(visited.nybble, to_write); | |
508 | break; |
|
518 | break; | |
509 | } |
|
519 | } | |
510 | let (new_idx, block, _) = self.mutable_block(visited.block_idx); |
|
520 | let (new_idx, block, _) = self.mutable_block(visited.block_idx); | |
511 | if block.get(visited.nybble) == to_write { |
|
521 | if block.get(visited.nybble) == to_write { | |
512 | break; |
|
522 | break; | |
513 | } |
|
523 | } | |
514 | block.set(visited.nybble, to_write); |
|
524 | block.set(visited.nybble, to_write); | |
515 | block_idx = new_idx; |
|
525 | block_idx = new_idx; | |
516 | } |
|
526 | } | |
517 | Ok(()) |
|
527 | Ok(()) | |
518 | } |
|
528 | } | |
519 |
|
529 | |||
520 | /// Make the whole `NodeTree` logically empty, without touching the |
|
530 | /// Make the whole `NodeTree` logically empty, without touching the | |
521 | /// immutable part. |
|
531 | /// immutable part. | |
522 | pub fn invalidate_all(&mut self) { |
|
532 | pub fn invalidate_all(&mut self) { | |
523 | self.root = Block::new(); |
|
533 | self.root = Block::new(); | |
524 | self.growable = Vec::new(); |
|
534 | self.growable = Vec::new(); | |
525 | self.masked_inner_blocks = self.readonly.len(); |
|
535 | self.masked_inner_blocks = self.readonly.len(); | |
526 | } |
|
536 | } | |
527 |
|
537 | |||
528 | /// Return the number of blocks in the readonly part that are currently |
|
538 | /// Return the number of blocks in the readonly part that are currently | |
529 | /// masked in the mutable part. |
|
539 | /// masked in the mutable part. | |
530 | /// |
|
540 | /// | |
531 | /// The `NodeTree` structure has no efficient way to know how many blocks |
|
541 | /// The `NodeTree` structure has no efficient way to know how many blocks | |
532 | /// are already unreachable in the readonly part. |
|
542 | /// are already unreachable in the readonly part. | |
533 | /// |
|
543 | /// | |
534 | /// After a call to `invalidate_all()`, the returned number can be actually |
|
544 | /// After a call to `invalidate_all()`, the returned number can be actually | |
535 | /// bigger than the whole readonly part, a conventional way to mean that |
|
545 | /// bigger than the whole readonly part, a conventional way to mean that | |
536 | /// all the readonly blocks have been masked. This is what is really |
|
546 | /// all the readonly blocks have been masked. This is what is really | |
537 | /// useful to the caller and does not require to know how many were |
|
547 | /// useful to the caller and does not require to know how many were | |
538 | /// actually unreachable to begin with. |
|
548 | /// actually unreachable to begin with. | |
539 | pub fn masked_readonly_blocks(&self) -> usize { |
|
549 | pub fn masked_readonly_blocks(&self) -> usize { | |
540 | if let Some(readonly_root) = self.readonly.last() { |
|
550 | if let Some(readonly_root) = self.readonly.last() { | |
541 | if readonly_root == &self.root { |
|
551 | if readonly_root == &self.root { | |
542 | return 0; |
|
552 | return 0; | |
543 | } |
|
553 | } | |
544 | } else { |
|
554 | } else { | |
545 | return 0; |
|
555 | return 0; | |
546 | } |
|
556 | } | |
547 | self.masked_inner_blocks + 1 |
|
557 | self.masked_inner_blocks + 1 | |
548 | } |
|
558 | } | |
549 | } |
|
559 | } | |
550 |
|
560 | |||
551 | pub struct NodeTreeBytes { |
|
561 | pub struct NodeTreeBytes { | |
552 | buffer: Box<dyn Deref<Target = [u8]> + Send>, |
|
562 | buffer: Box<dyn Deref<Target = [u8]> + Send>, | |
553 | len_in_blocks: usize, |
|
563 | len_in_blocks: usize, | |
554 | } |
|
564 | } | |
555 |
|
565 | |||
556 | impl NodeTreeBytes { |
|
566 | impl NodeTreeBytes { | |
557 | fn new( |
|
567 | fn new( | |
558 | buffer: Box<dyn Deref<Target = [u8]> + Send>, |
|
568 | buffer: Box<dyn Deref<Target = [u8]> + Send>, | |
559 | amount: usize, |
|
569 | amount: usize, | |
560 | ) -> Self { |
|
570 | ) -> Self { | |
561 | assert!(buffer.len() >= amount); |
|
571 | assert!(buffer.len() >= amount); | |
562 | let len_in_blocks = amount / size_of::<Block>(); |
|
572 | let len_in_blocks = amount / size_of::<Block>(); | |
563 | NodeTreeBytes { |
|
573 | NodeTreeBytes { | |
564 | buffer, |
|
574 | buffer, | |
565 | len_in_blocks, |
|
575 | len_in_blocks, | |
566 | } |
|
576 | } | |
567 | } |
|
577 | } | |
568 | } |
|
578 | } | |
569 |
|
579 | |||
570 | impl Deref for NodeTreeBytes { |
|
580 | impl Deref for NodeTreeBytes { | |
571 | type Target = [Block]; |
|
581 | type Target = [Block]; | |
572 |
|
582 | |||
573 | fn deref(&self) -> &[Block] { |
|
583 | fn deref(&self) -> &[Block] { | |
574 | Block::slice_from_bytes(&self.buffer, self.len_in_blocks) |
|
584 | Block::slice_from_bytes(&self.buffer, self.len_in_blocks) | |
575 | // `NodeTreeBytes::new` already asserted that `self.buffer` is |
|
585 | // `NodeTreeBytes::new` already asserted that `self.buffer` is | |
576 | // large enough. |
|
586 | // large enough. | |
577 | .unwrap() |
|
587 | .unwrap() | |
578 | .0 |
|
588 | .0 | |
579 | } |
|
589 | } | |
580 | } |
|
590 | } | |
581 |
|
591 | |||
582 | struct NodeTreeVisitor<'n> { |
|
592 | struct NodeTreeVisitor<'n> { | |
583 | nt: &'n NodeTree, |
|
593 | nt: &'n NodeTree, | |
584 | prefix: NodePrefix, |
|
594 | prefix: NodePrefix, | |
585 | visit: usize, |
|
595 | visit: usize, | |
586 | nybble_idx: usize, |
|
596 | nybble_idx: usize, | |
587 | done: bool, |
|
597 | done: bool, | |
588 | } |
|
598 | } | |
589 |
|
599 | |||
590 | #[derive(Debug, PartialEq, Clone)] |
|
600 | #[derive(Debug, PartialEq, Clone)] | |
591 | struct NodeTreeVisitItem { |
|
601 | struct NodeTreeVisitItem { | |
592 | block_idx: usize, |
|
602 | block_idx: usize, | |
593 | nybble: u8, |
|
603 | nybble: u8, | |
594 | element: Element, |
|
604 | element: Element, | |
595 | } |
|
605 | } | |
596 |
|
606 | |||
597 | impl<'n> Iterator for NodeTreeVisitor<'n> { |
|
607 | impl<'n> Iterator for NodeTreeVisitor<'n> { | |
598 | type Item = NodeTreeVisitItem; |
|
608 | type Item = NodeTreeVisitItem; | |
599 |
|
609 | |||
600 | fn next(&mut self) -> Option<Self::Item> { |
|
610 | fn next(&mut self) -> Option<Self::Item> { | |
601 | if self.done || self.nybble_idx >= self.prefix.nybbles_len() { |
|
611 | if self.done || self.nybble_idx >= self.prefix.nybbles_len() { | |
602 | return None; |
|
612 | return None; | |
603 | } |
|
613 | } | |
604 |
|
614 | |||
605 | let nybble = self.prefix.get_nybble(self.nybble_idx); |
|
615 | let nybble = self.prefix.get_nybble(self.nybble_idx); | |
606 | self.nybble_idx += 1; |
|
616 | self.nybble_idx += 1; | |
607 |
|
617 | |||
608 | let visit = self.visit; |
|
618 | let visit = self.visit; | |
609 | let element = self.nt[visit].get(nybble); |
|
619 | let element = self.nt[visit].get(nybble); | |
610 | if let Element::Block(idx) = element { |
|
620 | if let Element::Block(idx) = element { | |
611 | self.visit = idx; |
|
621 | self.visit = idx; | |
612 | } else { |
|
622 | } else { | |
613 | self.done = true; |
|
623 | self.done = true; | |
614 | } |
|
624 | } | |
615 |
|
625 | |||
616 | Some(NodeTreeVisitItem { |
|
626 | Some(NodeTreeVisitItem { | |
617 | block_idx: visit, |
|
627 | block_idx: visit, | |
618 | nybble, |
|
628 | nybble, | |
619 | element, |
|
629 | element, | |
620 | }) |
|
630 | }) | |
621 | } |
|
631 | } | |
622 | } |
|
632 | } | |
623 |
|
633 | |||
624 | impl NodeTreeVisitItem { |
|
634 | impl NodeTreeVisitItem { | |
625 | // Return `Some(opt)` if this item is final, with `opt` being the |
|
635 | // Return `Some(opt)` if this item is final, with `opt` being the | |
626 | // `Revision` that it may represent. |
|
636 | // `UncheckedRevision` that it may represent. | |
627 | // |
|
637 | // | |
628 | // If the item is not terminal, return `None` |
|
638 | // If the item is not terminal, return `None` | |
629 | fn final_revision(&self) -> Option<Option<Revision>> { |
|
639 | fn final_revision(&self) -> Option<Option<UncheckedRevision>> { | |
630 | match self.element { |
|
640 | match self.element { | |
631 | Element::Block(_) => None, |
|
641 | Element::Block(_) => None, | |
632 | Element::Rev(r) => Some(Some(r)), |
|
642 | Element::Rev(r) => Some(Some(r.into())), | |
633 | Element::None => Some(None), |
|
643 | Element::None => Some(None), | |
634 | } |
|
644 | } | |
635 | } |
|
645 | } | |
636 | } |
|
646 | } | |
637 |
|
647 | |||
638 | impl From<Vec<Block>> for NodeTree { |
|
648 | impl From<Vec<Block>> for NodeTree { | |
639 | fn from(vec: Vec<Block>) -> Self { |
|
649 | fn from(vec: Vec<Block>) -> Self { | |
640 | Self::new(Box::new(vec)) |
|
650 | Self::new(Box::new(vec)) | |
641 | } |
|
651 | } | |
642 | } |
|
652 | } | |
643 |
|
653 | |||
644 | impl fmt::Debug for NodeTree { |
|
654 | impl fmt::Debug for NodeTree { | |
645 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
|
655 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
646 | let readonly: &[Block] = &*self.readonly; |
|
656 | let readonly: &[Block] = &*self.readonly; | |
647 | write!( |
|
657 | write!( | |
648 | f, |
|
658 | f, | |
649 | "readonly: {:?}, growable: {:?}, root: {:?}", |
|
659 | "readonly: {:?}, growable: {:?}, root: {:?}", | |
650 | readonly, self.growable, self.root |
|
660 | readonly, self.growable, self.root | |
651 | ) |
|
661 | ) | |
652 | } |
|
662 | } | |
653 | } |
|
663 | } | |
654 |
|
664 | |||
655 | impl Default for NodeTree { |
|
665 | impl Default for NodeTree { | |
656 | /// Create a fully mutable empty NodeTree |
|
666 | /// Create a fully mutable empty NodeTree | |
657 | fn default() -> Self { |
|
667 | fn default() -> Self { | |
658 | NodeTree::new(Box::new(Vec::new())) |
|
668 | NodeTree::new(Box::new(Vec::new())) | |
659 | } |
|
669 | } | |
660 | } |
|
670 | } | |
661 |
|
671 | |||
662 | impl NodeMap for NodeTree { |
|
672 | impl NodeMap for NodeTree { | |
663 | fn find_bin<'a>( |
|
673 | fn find_bin<'a>( | |
664 | &self, |
|
674 | &self, | |
665 | idx: &impl RevlogIndex, |
|
675 | idx: &impl RevlogIndex, | |
666 | prefix: NodePrefix, |
|
676 | prefix: NodePrefix, | |
667 | ) -> Result<Option<Revision>, NodeMapError> { |
|
677 | ) -> Result<Option<Revision>, NodeMapError> { | |
668 | validate_candidate(idx, prefix, self.lookup(prefix)?) |
|
678 | validate_candidate(idx, prefix, self.lookup(prefix)?) | |
669 | .map(|(opt, _shortest)| opt) |
|
679 | .map(|(opt, _shortest)| opt) | |
670 | } |
|
680 | } | |
671 |
|
681 | |||
672 | fn unique_prefix_len_bin<'a>( |
|
682 | fn unique_prefix_len_bin<'a>( | |
673 | &self, |
|
683 | &self, | |
674 | idx: &impl RevlogIndex, |
|
684 | idx: &impl RevlogIndex, | |
675 | prefix: NodePrefix, |
|
685 | prefix: NodePrefix, | |
676 | ) -> Result<Option<usize>, NodeMapError> { |
|
686 | ) -> Result<Option<usize>, NodeMapError> { | |
677 | validate_candidate(idx, prefix, self.lookup(prefix)?) |
|
687 | validate_candidate(idx, prefix, self.lookup(prefix)?) | |
678 | .map(|(opt, shortest)| opt.map(|_rev| shortest)) |
|
688 | .map(|(opt, shortest)| opt.map(|_rev| shortest)) | |
679 | } |
|
689 | } | |
680 | } |
|
690 | } | |
681 |
|
691 | |||
682 | #[cfg(test)] |
|
692 | #[cfg(test)] | |
683 | mod tests { |
|
693 | mod tests { | |
684 | use super::NodeMapError::*; |
|
694 | use super::NodeMapError::*; | |
685 | use super::*; |
|
695 | use super::*; | |
686 | use crate::revlog::node::{hex_pad_right, Node}; |
|
696 | use crate::revlog::node::{hex_pad_right, Node}; | |
687 | use std::collections::HashMap; |
|
697 | use std::collections::HashMap; | |
688 |
|
698 | |||
689 | /// Creates a `Block` using a syntax close to the `Debug` output |
|
699 | /// Creates a `Block` using a syntax close to the `Debug` output | |
690 | macro_rules! block { |
|
700 | macro_rules! block { | |
691 | {$($nybble:tt : $variant:ident($val:tt)),*} => ( |
|
701 | {$($nybble:tt : $variant:ident($val:tt)),*} => ( | |
692 | { |
|
702 | { | |
693 | let mut block = Block::new(); |
|
703 | let mut block = Block::new(); | |
694 | $(block.set($nybble, Element::$variant($val)));*; |
|
704 | $(block.set($nybble, Element::$variant($val)));*; | |
695 | block |
|
705 | block | |
696 | } |
|
706 | } | |
697 | ) |
|
707 | ) | |
698 | } |
|
708 | } | |
699 |
|
709 | |||
700 | #[test] |
|
710 | #[test] | |
701 | fn test_block_debug() { |
|
711 | fn test_block_debug() { | |
702 | let mut block = Block::new(); |
|
712 | let mut block = Block::new(); | |
703 | block.set(1, Element::Rev(3)); |
|
713 | block.set(1, Element::Rev(3)); | |
704 | block.set(10, Element::Block(0)); |
|
714 | block.set(10, Element::Block(0)); | |
705 | assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}"); |
|
715 | assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}"); | |
706 | } |
|
716 | } | |
707 |
|
717 | |||
708 | #[test] |
|
718 | #[test] | |
709 | fn test_block_macro() { |
|
719 | fn test_block_macro() { | |
710 | let block = block! {5: Block(2)}; |
|
720 | let block = block! {5: Block(2)}; | |
711 | assert_eq!(format!("{:?}", block), "{5: Block(2)}"); |
|
721 | assert_eq!(format!("{:?}", block), "{5: Block(2)}"); | |
712 |
|
722 | |||
713 | let block = block! {13: Rev(15), 5: Block(2)}; |
|
723 | let block = block! {13: Rev(15), 5: Block(2)}; | |
714 | assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}"); |
|
724 | assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}"); | |
715 | } |
|
725 | } | |
716 |
|
726 | |||
717 | #[test] |
|
727 | #[test] | |
718 | fn test_raw_block() { |
|
728 | fn test_raw_block() { | |
719 | let mut raw = [255u8; 64]; |
|
729 | let mut raw = [255u8; 64]; | |
720 |
|
730 | |||
721 | let mut counter = 0; |
|
731 | let mut counter = 0; | |
722 | for val in [0_i32, 15, -2, -1, -3].iter() { |
|
732 | for val in [0_i32, 15, -2, -1, -3].iter() { | |
723 | for byte in val.to_be_bytes().iter() { |
|
733 | for byte in val.to_be_bytes().iter() { | |
724 | raw[counter] = *byte; |
|
734 | raw[counter] = *byte; | |
725 | counter += 1; |
|
735 | counter += 1; | |
726 | } |
|
736 | } | |
727 | } |
|
737 | } | |
728 | let (block, _) = Block::from_bytes(&raw).unwrap(); |
|
738 | let (block, _) = Block::from_bytes(&raw).unwrap(); | |
729 | assert_eq!(block.get(0), Element::Block(0)); |
|
739 | assert_eq!(block.get(0), Element::Block(0)); | |
730 | assert_eq!(block.get(1), Element::Block(15)); |
|
740 | assert_eq!(block.get(1), Element::Block(15)); | |
731 | assert_eq!(block.get(3), Element::None); |
|
741 | assert_eq!(block.get(3), Element::None); | |
732 | assert_eq!(block.get(2), Element::Rev(0)); |
|
742 | assert_eq!(block.get(2), Element::Rev(0)); | |
733 | assert_eq!(block.get(4), Element::Rev(1)); |
|
743 | assert_eq!(block.get(4), Element::Rev(1)); | |
734 | } |
|
744 | } | |
735 |
|
745 | |||
736 | type TestIndex = HashMap<Revision, Node>; |
|
746 | type TestIndex = HashMap<UncheckedRevision, Node>; | |
737 |
|
747 | |||
738 | impl RevlogIndex for TestIndex { |
|
748 | impl RevlogIndex for TestIndex { | |
739 | fn node(&self, rev: Revision) -> Option<&Node> { |
|
749 | fn node(&self, rev: Revision) -> Option<&Node> { | |
740 | self.get(&rev) |
|
750 | self.get(&rev.into()) | |
741 | } |
|
751 | } | |
742 |
|
752 | |||
743 | fn len(&self) -> usize { |
|
753 | fn len(&self) -> usize { | |
744 | self.len() |
|
754 | self.len() | |
745 | } |
|
755 | } | |
|
756 | ||||
|
757 | fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { | |||
|
758 | self.get(&rev).map(|_| rev.0) | |||
|
759 | } | |||
746 | } |
|
760 | } | |
747 |
|
761 | |||
748 | /// Pad hexadecimal Node prefix with zeros on the right |
|
762 | /// Pad hexadecimal Node prefix with zeros on the right | |
749 | /// |
|
763 | /// | |
750 | /// This avoids having to repeatedly write very long hexadecimal |
|
764 | /// This avoids having to repeatedly write very long hexadecimal | |
751 | /// strings for test data, and brings actual hash size independency. |
|
765 | /// strings for test data, and brings actual hash size independency. | |
752 | #[cfg(test)] |
|
766 | #[cfg(test)] | |
753 | fn pad_node(hex: &str) -> Node { |
|
767 | fn pad_node(hex: &str) -> Node { | |
754 | Node::from_hex(&hex_pad_right(hex)).unwrap() |
|
768 | Node::from_hex(&hex_pad_right(hex)).unwrap() | |
755 | } |
|
769 | } | |
756 |
|
770 | |||
757 | /// Pad hexadecimal Node prefix with zeros on the right, then insert |
|
771 | /// Pad hexadecimal Node prefix with zeros on the right, then insert | |
758 | fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) { |
|
772 | fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) { | |
759 | idx.insert(rev, pad_node(hex)); |
|
773 | idx.insert(rev.into(), pad_node(hex)); | |
760 | } |
|
774 | } | |
761 |
|
775 | |||
762 | fn sample_nodetree() -> NodeTree { |
|
776 | fn sample_nodetree() -> NodeTree { | |
763 | NodeTree::from(vec![ |
|
777 | NodeTree::from(vec![ | |
764 | block![0: Rev(9)], |
|
778 | block![0: Rev(9)], | |
765 | block![0: Rev(0), 1: Rev(9)], |
|
779 | block![0: Rev(0), 1: Rev(9)], | |
766 | block![0: Block(1), 1:Rev(1)], |
|
780 | block![0: Block(1), 1:Rev(1)], | |
767 | ]) |
|
781 | ]) | |
768 | } |
|
782 | } | |
769 |
|
783 | |||
770 | fn hex(s: &str) -> NodePrefix { |
|
784 | fn hex(s: &str) -> NodePrefix { | |
771 | NodePrefix::from_hex(s).unwrap() |
|
785 | NodePrefix::from_hex(s).unwrap() | |
772 | } |
|
786 | } | |
773 |
|
787 | |||
774 | #[test] |
|
788 | #[test] | |
775 | fn test_nt_debug() { |
|
789 | fn test_nt_debug() { | |
776 | let nt = sample_nodetree(); |
|
790 | let nt = sample_nodetree(); | |
777 | assert_eq!( |
|
791 | assert_eq!( | |
778 | format!("{:?}", nt), |
|
792 | format!("{:?}", nt), | |
779 | "readonly: \ |
|
793 | "readonly: \ | |
780 | [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \ |
|
794 | [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \ | |
781 | growable: [], \ |
|
795 | growable: [], \ | |
782 | root: {0: Block(1), 1: Rev(1)}", |
|
796 | root: {0: Block(1), 1: Rev(1)}", | |
783 | ); |
|
797 | ); | |
784 | } |
|
798 | } | |
785 |
|
799 | |||
786 | #[test] |
|
800 | #[test] | |
787 | fn test_immutable_find_simplest() -> Result<(), NodeMapError> { |
|
801 | fn test_immutable_find_simplest() -> Result<(), NodeMapError> { | |
788 | let mut idx: TestIndex = HashMap::new(); |
|
802 | let mut idx: TestIndex = HashMap::new(); | |
789 | pad_insert(&mut idx, 1, "1234deadcafe"); |
|
803 | pad_insert(&mut idx, 1, "1234deadcafe"); | |
790 |
|
804 | |||
791 | let nt = NodeTree::from(vec![block! {1: Rev(1)}]); |
|
805 | let nt = NodeTree::from(vec![block! {1: Rev(1)}]); | |
792 | assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1)); |
|
806 | assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1)); | |
793 | assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1)); |
|
807 | assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1)); | |
794 | assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1)); |
|
808 | assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1)); | |
795 | assert_eq!(nt.find_bin(&idx, hex("1a"))?, None); |
|
809 | assert_eq!(nt.find_bin(&idx, hex("1a"))?, None); | |
796 | assert_eq!(nt.find_bin(&idx, hex("ab"))?, None); |
|
810 | assert_eq!(nt.find_bin(&idx, hex("ab"))?, None); | |
797 |
|
811 | |||
798 | // and with full binary Nodes |
|
812 | // and with full binary Nodes | |
799 | assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1)); |
|
813 | assert_eq!(nt.find_node(&idx, idx.get(&1.into()).unwrap())?, Some(1)); | |
800 | let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap(); |
|
814 | let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap(); | |
801 | assert_eq!(nt.find_node(&idx, &unknown)?, None); |
|
815 | assert_eq!(nt.find_node(&idx, &unknown)?, None); | |
802 | Ok(()) |
|
816 | Ok(()) | |
803 | } |
|
817 | } | |
804 |
|
818 | |||
805 | #[test] |
|
819 | #[test] | |
806 | fn test_immutable_find_one_jump() { |
|
820 | fn test_immutable_find_one_jump() { | |
807 | let mut idx = TestIndex::new(); |
|
821 | let mut idx = TestIndex::new(); | |
808 | pad_insert(&mut idx, 9, "012"); |
|
822 | pad_insert(&mut idx, 9, "012"); | |
809 | pad_insert(&mut idx, 0, "00a"); |
|
823 | pad_insert(&mut idx, 0, "00a"); | |
810 |
|
824 | |||
811 | let nt = sample_nodetree(); |
|
825 | let nt = sample_nodetree(); | |
812 |
|
826 | |||
813 | assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults)); |
|
827 | assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults)); | |
814 | assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9))); |
|
828 | assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9))); | |
815 | assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults)); |
|
829 | assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults)); | |
816 | assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0))); |
|
830 | assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0))); | |
817 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3))); |
|
831 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3))); | |
818 | assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION))); |
|
832 | assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION))); | |
819 | } |
|
833 | } | |
820 |
|
834 | |||
821 | #[test] |
|
835 | #[test] | |
822 | fn test_mutated_find() -> Result<(), NodeMapError> { |
|
836 | fn test_mutated_find() -> Result<(), NodeMapError> { | |
823 | let mut idx = TestIndex::new(); |
|
837 | let mut idx = TestIndex::new(); | |
824 | pad_insert(&mut idx, 9, "012"); |
|
838 | pad_insert(&mut idx, 9, "012"); | |
825 | pad_insert(&mut idx, 0, "00a"); |
|
839 | pad_insert(&mut idx, 0, "00a"); | |
826 | pad_insert(&mut idx, 2, "cafe"); |
|
840 | pad_insert(&mut idx, 2, "cafe"); | |
827 | pad_insert(&mut idx, 3, "15"); |
|
841 | pad_insert(&mut idx, 3, "15"); | |
828 | pad_insert(&mut idx, 1, "10"); |
|
842 | pad_insert(&mut idx, 1, "10"); | |
829 |
|
843 | |||
830 | let nt = NodeTree { |
|
844 | let nt = NodeTree { | |
831 | readonly: sample_nodetree().readonly, |
|
845 | readonly: sample_nodetree().readonly, | |
832 | growable: vec![block![0: Rev(1), 5: Rev(3)]], |
|
846 | growable: vec![block![0: Rev(1), 5: Rev(3)]], | |
833 | root: block![0: Block(1), 1:Block(3), 12: Rev(2)], |
|
847 | root: block![0: Block(1), 1:Block(3), 12: Rev(2)], | |
834 | masked_inner_blocks: 1, |
|
848 | masked_inner_blocks: 1, | |
835 | }; |
|
849 | }; | |
836 | assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1)); |
|
850 | assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1)); | |
837 | assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2)); |
|
851 | assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2)); | |
838 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1)); |
|
852 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1)); | |
839 | assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults)); |
|
853 | assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults)); | |
840 | assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION)); |
|
854 | assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION)); | |
841 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3)); |
|
855 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3)); | |
842 | assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9)); |
|
856 | assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9)); | |
843 | assert_eq!(nt.masked_readonly_blocks(), 2); |
|
857 | assert_eq!(nt.masked_readonly_blocks(), 2); | |
844 | Ok(()) |
|
858 | Ok(()) | |
845 | } |
|
859 | } | |
846 |
|
860 | |||
847 | struct TestNtIndex { |
|
861 | struct TestNtIndex { | |
848 | index: TestIndex, |
|
862 | index: TestIndex, | |
849 | nt: NodeTree, |
|
863 | nt: NodeTree, | |
850 | } |
|
864 | } | |
851 |
|
865 | |||
852 | impl TestNtIndex { |
|
866 | impl TestNtIndex { | |
853 | fn new() -> Self { |
|
867 | fn new() -> Self { | |
854 | TestNtIndex { |
|
868 | TestNtIndex { | |
855 | index: HashMap::new(), |
|
869 | index: HashMap::new(), | |
856 | nt: NodeTree::default(), |
|
870 | nt: NodeTree::default(), | |
857 | } |
|
871 | } | |
858 | } |
|
872 | } | |
859 |
|
873 | |||
860 | fn insert( |
|
874 | fn insert(&mut self, rev: i32, hex: &str) -> Result<(), NodeMapError> { | |
861 | &mut self, |
|
|||
862 | rev: Revision, |
|
|||
863 | hex: &str, |
|
|||
864 | ) -> Result<(), NodeMapError> { |
|
|||
865 | let node = pad_node(hex); |
|
875 | let node = pad_node(hex); | |
|
876 | let rev: UncheckedRevision = rev.into(); | |||
866 | self.index.insert(rev, node); |
|
877 | self.index.insert(rev, node); | |
867 |
self.nt.insert( |
|
878 | self.nt.insert( | |
|
879 | &self.index, | |||
|
880 | &node, | |||
|
881 | self.index.check_revision(rev).unwrap(), | |||
|
882 | )?; | |||
868 | Ok(()) |
|
883 | Ok(()) | |
869 | } |
|
884 | } | |
870 |
|
885 | |||
871 | fn find_hex( |
|
886 | fn find_hex( | |
872 | &self, |
|
887 | &self, | |
873 | prefix: &str, |
|
888 | prefix: &str, | |
874 | ) -> Result<Option<Revision>, NodeMapError> { |
|
889 | ) -> Result<Option<Revision>, NodeMapError> { | |
875 | self.nt.find_bin(&self.index, hex(prefix)) |
|
890 | self.nt.find_bin(&self.index, hex(prefix)) | |
876 | } |
|
891 | } | |
877 |
|
892 | |||
878 | fn unique_prefix_len_hex( |
|
893 | fn unique_prefix_len_hex( | |
879 | &self, |
|
894 | &self, | |
880 | prefix: &str, |
|
895 | prefix: &str, | |
881 | ) -> Result<Option<usize>, NodeMapError> { |
|
896 | ) -> Result<Option<usize>, NodeMapError> { | |
882 | self.nt.unique_prefix_len_bin(&self.index, hex(prefix)) |
|
897 | self.nt.unique_prefix_len_bin(&self.index, hex(prefix)) | |
883 | } |
|
898 | } | |
884 |
|
899 | |||
885 | /// Drain `added` and restart a new one |
|
900 | /// Drain `added` and restart a new one | |
886 | fn commit(self) -> Self { |
|
901 | fn commit(self) -> Self { | |
887 | let mut as_vec: Vec<Block> = |
|
902 | let mut as_vec: Vec<Block> = | |
888 | self.nt.readonly.iter().copied().collect(); |
|
903 | self.nt.readonly.iter().copied().collect(); | |
889 | as_vec.extend(self.nt.growable); |
|
904 | as_vec.extend(self.nt.growable); | |
890 | as_vec.push(self.nt.root); |
|
905 | as_vec.push(self.nt.root); | |
891 |
|
906 | |||
892 | Self { |
|
907 | Self { | |
893 | index: self.index, |
|
908 | index: self.index, | |
894 | nt: NodeTree::from(as_vec), |
|
909 | nt: NodeTree::from(as_vec), | |
895 | } |
|
910 | } | |
896 | } |
|
911 | } | |
897 | } |
|
912 | } | |
898 |
|
913 | |||
899 | #[test] |
|
914 | #[test] | |
900 | fn test_insert_full_mutable() -> Result<(), NodeMapError> { |
|
915 | fn test_insert_full_mutable() -> Result<(), NodeMapError> { | |
901 | let mut idx = TestNtIndex::new(); |
|
916 | let mut idx = TestNtIndex::new(); | |
902 | idx.insert(0, "1234")?; |
|
917 | idx.insert(0, "1234")?; | |
903 | assert_eq!(idx.find_hex("1")?, Some(0)); |
|
918 | assert_eq!(idx.find_hex("1")?, Some(0)); | |
904 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
919 | assert_eq!(idx.find_hex("12")?, Some(0)); | |
905 |
|
920 | |||
906 | // let's trigger a simple split |
|
921 | // let's trigger a simple split | |
907 | idx.insert(1, "1a34")?; |
|
922 | idx.insert(1, "1a34")?; | |
908 | assert_eq!(idx.nt.growable.len(), 1); |
|
923 | assert_eq!(idx.nt.growable.len(), 1); | |
909 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
924 | assert_eq!(idx.find_hex("12")?, Some(0)); | |
910 | assert_eq!(idx.find_hex("1a")?, Some(1)); |
|
925 | assert_eq!(idx.find_hex("1a")?, Some(1)); | |
911 |
|
926 | |||
912 | // reinserting is a no_op |
|
927 | // reinserting is a no_op | |
913 | idx.insert(1, "1a34")?; |
|
928 | idx.insert(1, "1a34")?; | |
914 | assert_eq!(idx.nt.growable.len(), 1); |
|
929 | assert_eq!(idx.nt.growable.len(), 1); | |
915 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
930 | assert_eq!(idx.find_hex("12")?, Some(0)); | |
916 | assert_eq!(idx.find_hex("1a")?, Some(1)); |
|
931 | assert_eq!(idx.find_hex("1a")?, Some(1)); | |
917 |
|
932 | |||
918 | idx.insert(2, "1a01")?; |
|
933 | idx.insert(2, "1a01")?; | |
919 | assert_eq!(idx.nt.growable.len(), 2); |
|
934 | assert_eq!(idx.nt.growable.len(), 2); | |
920 | assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults)); |
|
935 | assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults)); | |
921 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
936 | assert_eq!(idx.find_hex("12")?, Some(0)); | |
922 | assert_eq!(idx.find_hex("1a3")?, Some(1)); |
|
937 | assert_eq!(idx.find_hex("1a3")?, Some(1)); | |
923 | assert_eq!(idx.find_hex("1a0")?, Some(2)); |
|
938 | assert_eq!(idx.find_hex("1a0")?, Some(2)); | |
924 | assert_eq!(idx.find_hex("1a12")?, None); |
|
939 | assert_eq!(idx.find_hex("1a12")?, None); | |
925 |
|
940 | |||
926 | // now let's make it split and create more than one additional block |
|
941 | // now let's make it split and create more than one additional block | |
927 | idx.insert(3, "1a345")?; |
|
942 | idx.insert(3, "1a345")?; | |
928 | assert_eq!(idx.nt.growable.len(), 4); |
|
943 | assert_eq!(idx.nt.growable.len(), 4); | |
929 | assert_eq!(idx.find_hex("1a340")?, Some(1)); |
|
944 | assert_eq!(idx.find_hex("1a340")?, Some(1)); | |
930 | assert_eq!(idx.find_hex("1a345")?, Some(3)); |
|
945 | assert_eq!(idx.find_hex("1a345")?, Some(3)); | |
931 | assert_eq!(idx.find_hex("1a341")?, None); |
|
946 | assert_eq!(idx.find_hex("1a341")?, None); | |
932 |
|
947 | |||
933 | // there's no readonly block to mask |
|
948 | // there's no readonly block to mask | |
934 | assert_eq!(idx.nt.masked_readonly_blocks(), 0); |
|
949 | assert_eq!(idx.nt.masked_readonly_blocks(), 0); | |
935 | Ok(()) |
|
950 | Ok(()) | |
936 | } |
|
951 | } | |
937 |
|
952 | |||
938 | #[test] |
|
953 | #[test] | |
939 | fn test_unique_prefix_len_zero_prefix() { |
|
954 | fn test_unique_prefix_len_zero_prefix() { | |
940 | let mut idx = TestNtIndex::new(); |
|
955 | let mut idx = TestNtIndex::new(); | |
941 | idx.insert(0, "00000abcd").unwrap(); |
|
956 | idx.insert(0, "00000abcd").unwrap(); | |
942 |
|
957 | |||
943 | assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults)); |
|
958 | assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults)); | |
944 | // in the nodetree proper, this will be found at the first nybble |
|
959 | // in the nodetree proper, this will be found at the first nybble | |
945 | // yet the correct answer for unique_prefix_len is not 1, nor 1+1, |
|
960 | // yet the correct answer for unique_prefix_len is not 1, nor 1+1, | |
946 | // but the first difference with `NULL_NODE` |
|
961 | // but the first difference with `NULL_NODE` | |
947 | assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); |
|
962 | assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); | |
948 | assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); |
|
963 | assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); | |
949 |
|
964 | |||
950 | // same with odd result |
|
965 | // same with odd result | |
951 | idx.insert(1, "00123").unwrap(); |
|
966 | idx.insert(1, "00123").unwrap(); | |
952 | assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3))); |
|
967 | assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3))); | |
953 | assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3))); |
|
968 | assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3))); | |
954 |
|
969 | |||
955 | // these are unchanged of course |
|
970 | // these are unchanged of course | |
956 | assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); |
|
971 | assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); | |
957 | assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); |
|
972 | assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); | |
958 | } |
|
973 | } | |
959 |
|
974 | |||
960 | #[test] |
|
975 | #[test] | |
961 | fn test_insert_extreme_splitting() -> Result<(), NodeMapError> { |
|
976 | fn test_insert_extreme_splitting() -> Result<(), NodeMapError> { | |
962 | // check that the splitting loop is long enough |
|
977 | // check that the splitting loop is long enough | |
963 | let mut nt_idx = TestNtIndex::new(); |
|
978 | let mut nt_idx = TestNtIndex::new(); | |
964 | let nt = &mut nt_idx.nt; |
|
979 | let nt = &mut nt_idx.nt; | |
965 | let idx = &mut nt_idx.index; |
|
980 | let idx = &mut nt_idx.index; | |
966 |
|
981 | |||
967 | let node0_hex = hex_pad_right("444444"); |
|
982 | let node0_hex = hex_pad_right("444444"); | |
968 | let mut node1_hex = hex_pad_right("444444"); |
|
983 | let mut node1_hex = hex_pad_right("444444"); | |
969 | node1_hex.pop(); |
|
984 | node1_hex.pop(); | |
970 | node1_hex.push('5'); |
|
985 | node1_hex.push('5'); | |
971 | let node0 = Node::from_hex(&node0_hex).unwrap(); |
|
986 | let node0 = Node::from_hex(&node0_hex).unwrap(); | |
972 | let node1 = Node::from_hex(&node1_hex).unwrap(); |
|
987 | let node1 = Node::from_hex(&node1_hex).unwrap(); | |
973 |
|
988 | |||
974 | idx.insert(0, node0); |
|
989 | idx.insert(0.into(), node0); | |
975 | nt.insert(idx, &node0, 0)?; |
|
990 | nt.insert(idx, &node0, 0)?; | |
976 | idx.insert(1, node1); |
|
991 | idx.insert(1.into(), node1); | |
977 | nt.insert(idx, &node1, 1)?; |
|
992 | nt.insert(idx, &node1, 1)?; | |
978 |
|
993 | |||
979 | assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0)); |
|
994 | assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0)); | |
980 | assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1)); |
|
995 | assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1)); | |
981 | Ok(()) |
|
996 | Ok(()) | |
982 | } |
|
997 | } | |
983 |
|
998 | |||
984 | #[test] |
|
999 | #[test] | |
985 | fn test_insert_partly_immutable() -> Result<(), NodeMapError> { |
|
1000 | fn test_insert_partly_immutable() -> Result<(), NodeMapError> { | |
986 | let mut idx = TestNtIndex::new(); |
|
1001 | let mut idx = TestNtIndex::new(); | |
987 | idx.insert(0, "1234")?; |
|
1002 | idx.insert(0, "1234")?; | |
988 | idx.insert(1, "1235")?; |
|
1003 | idx.insert(1, "1235")?; | |
989 | idx.insert(2, "131")?; |
|
1004 | idx.insert(2, "131")?; | |
990 | idx.insert(3, "cafe")?; |
|
1005 | idx.insert(3, "cafe")?; | |
991 | let mut idx = idx.commit(); |
|
1006 | let mut idx = idx.commit(); | |
992 | assert_eq!(idx.find_hex("1234")?, Some(0)); |
|
1007 | assert_eq!(idx.find_hex("1234")?, Some(0)); | |
993 | assert_eq!(idx.find_hex("1235")?, Some(1)); |
|
1008 | assert_eq!(idx.find_hex("1235")?, Some(1)); | |
994 | assert_eq!(idx.find_hex("131")?, Some(2)); |
|
1009 | assert_eq!(idx.find_hex("131")?, Some(2)); | |
995 | assert_eq!(idx.find_hex("cafe")?, Some(3)); |
|
1010 | assert_eq!(idx.find_hex("cafe")?, Some(3)); | |
996 | // we did not add anything since init from readonly |
|
1011 | // we did not add anything since init from readonly | |
997 | assert_eq!(idx.nt.masked_readonly_blocks(), 0); |
|
1012 | assert_eq!(idx.nt.masked_readonly_blocks(), 0); | |
998 |
|
1013 | |||
999 | idx.insert(4, "123A")?; |
|
1014 | idx.insert(4, "123A")?; | |
1000 | assert_eq!(idx.find_hex("1234")?, Some(0)); |
|
1015 | assert_eq!(idx.find_hex("1234")?, Some(0)); | |
1001 | assert_eq!(idx.find_hex("1235")?, Some(1)); |
|
1016 | assert_eq!(idx.find_hex("1235")?, Some(1)); | |
1002 | assert_eq!(idx.find_hex("131")?, Some(2)); |
|
1017 | assert_eq!(idx.find_hex("131")?, Some(2)); | |
1003 | assert_eq!(idx.find_hex("cafe")?, Some(3)); |
|
1018 | assert_eq!(idx.find_hex("cafe")?, Some(3)); | |
1004 | assert_eq!(idx.find_hex("123A")?, Some(4)); |
|
1019 | assert_eq!(idx.find_hex("123A")?, Some(4)); | |
1005 | // we masked blocks for all prefixes of "123", including the root |
|
1020 | // we masked blocks for all prefixes of "123", including the root | |
1006 | assert_eq!(idx.nt.masked_readonly_blocks(), 4); |
|
1021 | assert_eq!(idx.nt.masked_readonly_blocks(), 4); | |
1007 |
|
1022 | |||
1008 | eprintln!("{:?}", idx.nt); |
|
1023 | eprintln!("{:?}", idx.nt); | |
1009 | idx.insert(5, "c0")?; |
|
1024 | idx.insert(5, "c0")?; | |
1010 | assert_eq!(idx.find_hex("cafe")?, Some(3)); |
|
1025 | assert_eq!(idx.find_hex("cafe")?, Some(3)); | |
1011 | assert_eq!(idx.find_hex("c0")?, Some(5)); |
|
1026 | assert_eq!(idx.find_hex("c0")?, Some(5)); | |
1012 | assert_eq!(idx.find_hex("c1")?, None); |
|
1027 | assert_eq!(idx.find_hex("c1")?, None); | |
1013 | assert_eq!(idx.find_hex("1234")?, Some(0)); |
|
1028 | assert_eq!(idx.find_hex("1234")?, Some(0)); | |
1014 | // inserting "c0" is just splitting the 'c' slot of the mutable root, |
|
1029 | // inserting "c0" is just splitting the 'c' slot of the mutable root, | |
1015 | // it doesn't mask anything |
|
1030 | // it doesn't mask anything | |
1016 | assert_eq!(idx.nt.masked_readonly_blocks(), 4); |
|
1031 | assert_eq!(idx.nt.masked_readonly_blocks(), 4); | |
1017 |
|
1032 | |||
1018 | Ok(()) |
|
1033 | Ok(()) | |
1019 | } |
|
1034 | } | |
1020 |
|
1035 | |||
1021 | #[test] |
|
1036 | #[test] | |
1022 | fn test_invalidate_all() -> Result<(), NodeMapError> { |
|
1037 | fn test_invalidate_all() -> Result<(), NodeMapError> { | |
1023 | let mut idx = TestNtIndex::new(); |
|
1038 | let mut idx = TestNtIndex::new(); | |
1024 | idx.insert(0, "1234")?; |
|
1039 | idx.insert(0, "1234")?; | |
1025 | idx.insert(1, "1235")?; |
|
1040 | idx.insert(1, "1235")?; | |
1026 | idx.insert(2, "131")?; |
|
1041 | idx.insert(2, "131")?; | |
1027 | idx.insert(3, "cafe")?; |
|
1042 | idx.insert(3, "cafe")?; | |
1028 | let mut idx = idx.commit(); |
|
1043 | let mut idx = idx.commit(); | |
1029 |
|
1044 | |||
1030 | idx.nt.invalidate_all(); |
|
1045 | idx.nt.invalidate_all(); | |
1031 |
|
1046 | |||
1032 | assert_eq!(idx.find_hex("1234")?, None); |
|
1047 | assert_eq!(idx.find_hex("1234")?, None); | |
1033 | assert_eq!(idx.find_hex("1235")?, None); |
|
1048 | assert_eq!(idx.find_hex("1235")?, None); | |
1034 | assert_eq!(idx.find_hex("131")?, None); |
|
1049 | assert_eq!(idx.find_hex("131")?, None); | |
1035 | assert_eq!(idx.find_hex("cafe")?, None); |
|
1050 | assert_eq!(idx.find_hex("cafe")?, None); | |
1036 | // all the readonly blocks have been masked, this is the |
|
1051 | // all the readonly blocks have been masked, this is the | |
1037 | // conventional expected response |
|
1052 | // conventional expected response | |
1038 | assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1); |
|
1053 | assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1); | |
1039 | Ok(()) |
|
1054 | Ok(()) | |
1040 | } |
|
1055 | } | |
1041 |
|
1056 | |||
1042 | #[test] |
|
1057 | #[test] | |
1043 | fn test_into_added_empty() { |
|
1058 | fn test_into_added_empty() { | |
1044 | assert!(sample_nodetree().into_readonly_and_added().1.is_empty()); |
|
1059 | assert!(sample_nodetree().into_readonly_and_added().1.is_empty()); | |
1045 | assert!(sample_nodetree() |
|
1060 | assert!(sample_nodetree() | |
1046 | .into_readonly_and_added_bytes() |
|
1061 | .into_readonly_and_added_bytes() | |
1047 | .1 |
|
1062 | .1 | |
1048 | .is_empty()); |
|
1063 | .is_empty()); | |
1049 | } |
|
1064 | } | |
1050 |
|
1065 | |||
1051 | #[test] |
|
1066 | #[test] | |
1052 | fn test_into_added_bytes() -> Result<(), NodeMapError> { |
|
1067 | fn test_into_added_bytes() -> Result<(), NodeMapError> { | |
1053 | let mut idx = TestNtIndex::new(); |
|
1068 | let mut idx = TestNtIndex::new(); | |
1054 | idx.insert(0, "1234")?; |
|
1069 | idx.insert(0, "1234")?; | |
1055 | let mut idx = idx.commit(); |
|
1070 | let mut idx = idx.commit(); | |
1056 | idx.insert(4, "cafe")?; |
|
1071 | idx.insert(4, "cafe")?; | |
1057 | let (_, bytes) = idx.nt.into_readonly_and_added_bytes(); |
|
1072 | let (_, bytes) = idx.nt.into_readonly_and_added_bytes(); | |
1058 |
|
1073 | |||
1059 | // only the root block has been changed |
|
1074 | // only the root block has been changed | |
1060 | assert_eq!(bytes.len(), size_of::<Block>()); |
|
1075 | assert_eq!(bytes.len(), size_of::<Block>()); | |
1061 | // big endian for -2 |
|
1076 | // big endian for -2 | |
1062 | assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]); |
|
1077 | assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]); | |
1063 | // big endian for -6 |
|
1078 | // big endian for -6 | |
1064 | assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]); |
|
1079 | assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]); | |
1065 | Ok(()) |
|
1080 | Ok(()) | |
1066 | } |
|
1081 | } | |
1067 | } |
|
1082 | } |
@@ -1,69 +1,69 | |||||
1 | //! The revset query language |
|
1 | //! The revset query language | |
2 | //! |
|
2 | //! | |
3 | //! <https://www.mercurial-scm.org/repo/hg/help/revsets> |
|
3 | //! <https://www.mercurial-scm.org/repo/hg/help/revsets> | |
4 |
|
4 | |||
5 | use crate::errors::HgError; |
|
5 | use crate::errors::HgError; | |
6 | use crate::repo::Repo; |
|
6 | use crate::repo::Repo; | |
7 | use crate::revlog::NodePrefix; |
|
7 | use crate::revlog::NodePrefix; | |
8 | use crate::revlog::{Revision, NULL_REVISION, WORKING_DIRECTORY_HEX}; |
|
8 | use crate::revlog::{Revision, NULL_REVISION, WORKING_DIRECTORY_HEX}; | |
9 | use crate::revlog::{Revlog, RevlogError}; |
|
9 | use crate::revlog::{Revlog, RevlogError}; | |
10 | use crate::Node; |
|
10 | use crate::Node; | |
11 |
|
11 | |||
12 | /// Resolve a query string into a single revision. |
|
12 | /// Resolve a query string into a single revision. | |
13 | /// |
|
13 | /// | |
14 | /// Only some of the revset language is implemented yet. |
|
14 | /// Only some of the revset language is implemented yet. | |
15 | pub fn resolve_single( |
|
15 | pub fn resolve_single( | |
16 | input: &str, |
|
16 | input: &str, | |
17 | repo: &Repo, |
|
17 | repo: &Repo, | |
18 | ) -> Result<Revision, RevlogError> { |
|
18 | ) -> Result<Revision, RevlogError> { | |
19 | let changelog = repo.changelog()?; |
|
19 | let changelog = repo.changelog()?; | |
20 |
|
20 | |||
21 | match input { |
|
21 | match input { | |
22 | "." => { |
|
22 | "." => { | |
23 | let p1 = repo.dirstate_parents()?.p1; |
|
23 | let p1 = repo.dirstate_parents()?.p1; | |
24 | return changelog.revlog.rev_from_node(p1.into()); |
|
24 | return changelog.revlog.rev_from_node(p1.into()); | |
25 | } |
|
25 | } | |
26 | "null" => return Ok(NULL_REVISION), |
|
26 | "null" => return Ok(NULL_REVISION), | |
27 | _ => {} |
|
27 | _ => {} | |
28 | } |
|
28 | } | |
29 |
|
29 | |||
30 | match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) { |
|
30 | match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) { | |
31 | Err(RevlogError::InvalidRevision) => { |
|
31 | Err(RevlogError::InvalidRevision) => { | |
32 | // TODO: support for the rest of the language here. |
|
32 | // TODO: support for the rest of the language here. | |
33 | let msg = format!("cannot parse revset '{}'", input); |
|
33 | let msg = format!("cannot parse revset '{}'", input); | |
34 | Err(HgError::unsupported(msg).into()) |
|
34 | Err(HgError::unsupported(msg).into()) | |
35 | } |
|
35 | } | |
36 | result => result, |
|
36 | result => result, | |
37 | } |
|
37 | } | |
38 | } |
|
38 | } | |
39 |
|
39 | |||
40 | /// Resolve the small subset of the language suitable for revlogs other than |
|
40 | /// Resolve the small subset of the language suitable for revlogs other than | |
41 | /// the changelog, such as in `hg debugdata --manifest` CLI argument. |
|
41 | /// the changelog, such as in `hg debugdata --manifest` CLI argument. | |
42 | /// |
|
42 | /// | |
43 | /// * A non-negative decimal integer for a revision number, or |
|
43 | /// * A non-negative decimal integer for a revision number, or | |
44 | /// * An hexadecimal string, for the unique node ID that starts with this |
|
44 | /// * An hexadecimal string, for the unique node ID that starts with this | |
45 | /// prefix |
|
45 | /// prefix | |
46 | pub fn resolve_rev_number_or_hex_prefix( |
|
46 | pub fn resolve_rev_number_or_hex_prefix( | |
47 | input: &str, |
|
47 | input: &str, | |
48 | revlog: &Revlog, |
|
48 | revlog: &Revlog, | |
49 | ) -> Result<Revision, RevlogError> { |
|
49 | ) -> Result<Revision, RevlogError> { | |
50 | // The Python equivalent of this is part of `revsymbol` in |
|
50 | // The Python equivalent of this is part of `revsymbol` in | |
51 | // `mercurial/scmutil.py` |
|
51 | // `mercurial/scmutil.py` | |
52 |
|
52 | |||
53 | if let Ok(integer) = input.parse::<i32>() { |
|
53 | if let Ok(integer) = input.parse::<i32>() { | |
54 | if integer.to_string() == input |
|
54 | if integer.to_string() == input | |
55 | && integer >= 0 |
|
55 | && integer >= 0 | |
56 | && revlog.has_rev(integer) |
|
56 | && revlog.has_rev(integer.into()) | |
57 | { |
|
57 | { | |
58 | return Ok(integer); |
|
58 | return Ok(integer); | |
59 | } |
|
59 | } | |
60 | } |
|
60 | } | |
61 | if let Ok(prefix) = NodePrefix::from_hex(input) { |
|
61 | if let Ok(prefix) = NodePrefix::from_hex(input) { | |
62 | if prefix.is_prefix_of(&Node::from_hex(WORKING_DIRECTORY_HEX).unwrap()) |
|
62 | if prefix.is_prefix_of(&Node::from_hex(WORKING_DIRECTORY_HEX).unwrap()) | |
63 | { |
|
63 | { | |
64 | return Err(RevlogError::WDirUnsupported); |
|
64 | return Err(RevlogError::WDirUnsupported); | |
65 | } |
|
65 | } | |
66 | return revlog.rev_from_node(prefix); |
|
66 | return revlog.rev_from_node(prefix); | |
67 | } |
|
67 | } | |
68 | Err(RevlogError::InvalidRevision) |
|
68 | Err(RevlogError::InvalidRevision) | |
69 | } |
|
69 | } |
@@ -1,515 +1,518 | |||||
1 | // revlog.rs |
|
1 | // revlog.rs | |
2 | // |
|
2 | // | |
3 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> |
|
3 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> | |
4 | // |
|
4 | // | |
5 | // This software may be used and distributed according to the terms of the |
|
5 | // This software may be used and distributed according to the terms of the | |
6 | // GNU General Public License version 2 or any later version. |
|
6 | // GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | use crate::{ |
|
8 | use crate::{ | |
9 | cindex, |
|
9 | cindex, | |
10 | utils::{node_from_py_bytes, node_from_py_object}, |
|
10 | utils::{node_from_py_bytes, node_from_py_object}, | |
11 | }; |
|
11 | }; | |
12 | use cpython::{ |
|
12 | use cpython::{ | |
13 | buffer::{Element, PyBuffer}, |
|
13 | buffer::{Element, PyBuffer}, | |
14 | exc::{IndexError, ValueError}, |
|
14 | exc::{IndexError, ValueError}, | |
15 | ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule, |
|
15 | ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule, | |
16 | PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, |
|
16 | PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, | |
17 | }; |
|
17 | }; | |
18 | use hg::{ |
|
18 | use hg::{ | |
19 | nodemap::{Block, NodeMapError, NodeTree}, |
|
19 | nodemap::{Block, NodeMapError, NodeTree}, | |
20 | revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, |
|
20 | revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, | |
21 | Revision, |
|
21 | Revision, UncheckedRevision, | |
22 | }; |
|
22 | }; | |
23 | use std::cell::RefCell; |
|
23 | use std::cell::RefCell; | |
24 |
|
24 | |||
25 | /// Return a Struct implementing the Graph trait |
|
25 | /// Return a Struct implementing the Graph trait | |
26 | pub(crate) fn pyindex_to_graph( |
|
26 | pub(crate) fn pyindex_to_graph( | |
27 | py: Python, |
|
27 | py: Python, | |
28 | index: PyObject, |
|
28 | index: PyObject, | |
29 | ) -> PyResult<cindex::Index> { |
|
29 | ) -> PyResult<cindex::Index> { | |
30 | match index.extract::<MixedIndex>(py) { |
|
30 | match index.extract::<MixedIndex>(py) { | |
31 | Ok(midx) => Ok(midx.clone_cindex(py)), |
|
31 | Ok(midx) => Ok(midx.clone_cindex(py)), | |
32 | Err(_) => cindex::Index::new(py, index), |
|
32 | Err(_) => cindex::Index::new(py, index), | |
33 | } |
|
33 | } | |
34 | } |
|
34 | } | |
35 |
|
35 | |||
36 | py_class!(pub class MixedIndex |py| { |
|
36 | py_class!(pub class MixedIndex |py| { | |
37 | data cindex: RefCell<cindex::Index>; |
|
37 | data cindex: RefCell<cindex::Index>; | |
38 | data nt: RefCell<Option<NodeTree>>; |
|
38 | data nt: RefCell<Option<NodeTree>>; | |
39 | data docket: RefCell<Option<PyObject>>; |
|
39 | data docket: RefCell<Option<PyObject>>; | |
40 | // Holds a reference to the mmap'ed persistent nodemap data |
|
40 | // Holds a reference to the mmap'ed persistent nodemap data | |
41 | data mmap: RefCell<Option<PyBuffer>>; |
|
41 | data mmap: RefCell<Option<PyBuffer>>; | |
42 |
|
42 | |||
43 | def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> { |
|
43 | def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> { | |
44 | Self::new(py, cindex) |
|
44 | Self::new(py, cindex) | |
45 | } |
|
45 | } | |
46 |
|
46 | |||
47 | /// Compatibility layer used for Python consumers needing access to the C index |
|
47 | /// Compatibility layer used for Python consumers needing access to the C index | |
48 | /// |
|
48 | /// | |
49 | /// Only use case so far is `scmutil.shortesthexnodeidprefix`, |
|
49 | /// Only use case so far is `scmutil.shortesthexnodeidprefix`, | |
50 | /// that may need to build a custom `nodetree`, based on a specified revset. |
|
50 | /// that may need to build a custom `nodetree`, based on a specified revset. | |
51 | /// With a Rust implementation of the nodemap, we will be able to get rid of |
|
51 | /// With a Rust implementation of the nodemap, we will be able to get rid of | |
52 | /// this, by exposing our own standalone nodemap class, |
|
52 | /// this, by exposing our own standalone nodemap class, | |
53 | /// ready to accept `MixedIndex`. |
|
53 | /// ready to accept `MixedIndex`. | |
54 | def get_cindex(&self) -> PyResult<PyObject> { |
|
54 | def get_cindex(&self) -> PyResult<PyObject> { | |
55 | Ok(self.cindex(py).borrow().inner().clone_ref(py)) |
|
55 | Ok(self.cindex(py).borrow().inner().clone_ref(py)) | |
56 | } |
|
56 | } | |
57 |
|
57 | |||
58 | // Index API involving nodemap, as defined in mercurial/pure/parsers.py |
|
58 | // Index API involving nodemap, as defined in mercurial/pure/parsers.py | |
59 |
|
59 | |||
60 | /// Return Revision if found, raises a bare `error.RevlogError` |
|
60 | /// Return Revision if found, raises a bare `error.RevlogError` | |
61 | /// in case of ambiguity, same as C version does |
|
61 | /// in case of ambiguity, same as C version does | |
62 | def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> { |
|
62 | def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> { | |
63 | let opt = self.get_nodetree(py)?.borrow(); |
|
63 | let opt = self.get_nodetree(py)?.borrow(); | |
64 | let nt = opt.as_ref().unwrap(); |
|
64 | let nt = opt.as_ref().unwrap(); | |
65 | let idx = &*self.cindex(py).borrow(); |
|
65 | let idx = &*self.cindex(py).borrow(); | |
66 | let node = node_from_py_bytes(py, &node)?; |
|
66 | let node = node_from_py_bytes(py, &node)?; | |
67 | nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e)) |
|
67 | nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e)) | |
68 | } |
|
68 | } | |
69 |
|
69 | |||
70 | /// same as `get_rev()` but raises a bare `error.RevlogError` if node |
|
70 | /// same as `get_rev()` but raises a bare `error.RevlogError` if node | |
71 | /// is not found. |
|
71 | /// is not found. | |
72 | /// |
|
72 | /// | |
73 | /// No need to repeat `node` in the exception, `mercurial/revlog.py` |
|
73 | /// No need to repeat `node` in the exception, `mercurial/revlog.py` | |
74 | /// will catch and rewrap with it |
|
74 | /// will catch and rewrap with it | |
75 | def rev(&self, node: PyBytes) -> PyResult<Revision> { |
|
75 | def rev(&self, node: PyBytes) -> PyResult<Revision> { | |
76 | self.get_rev(py, node)?.ok_or_else(|| revlog_error(py)) |
|
76 | self.get_rev(py, node)?.ok_or_else(|| revlog_error(py)) | |
77 | } |
|
77 | } | |
78 |
|
78 | |||
79 | /// return True if the node exist in the index |
|
79 | /// return True if the node exist in the index | |
80 | def has_node(&self, node: PyBytes) -> PyResult<bool> { |
|
80 | def has_node(&self, node: PyBytes) -> PyResult<bool> { | |
81 | self.get_rev(py, node).map(|opt| opt.is_some()) |
|
81 | self.get_rev(py, node).map(|opt| opt.is_some()) | |
82 | } |
|
82 | } | |
83 |
|
83 | |||
84 | /// find length of shortest hex nodeid of a binary ID |
|
84 | /// find length of shortest hex nodeid of a binary ID | |
85 | def shortest(&self, node: PyBytes) -> PyResult<usize> { |
|
85 | def shortest(&self, node: PyBytes) -> PyResult<usize> { | |
86 | let opt = self.get_nodetree(py)?.borrow(); |
|
86 | let opt = self.get_nodetree(py)?.borrow(); | |
87 | let nt = opt.as_ref().unwrap(); |
|
87 | let nt = opt.as_ref().unwrap(); | |
88 | let idx = &*self.cindex(py).borrow(); |
|
88 | let idx = &*self.cindex(py).borrow(); | |
89 | match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) |
|
89 | match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) | |
90 | { |
|
90 | { | |
91 | Ok(Some(l)) => Ok(l), |
|
91 | Ok(Some(l)) => Ok(l), | |
92 | Ok(None) => Err(revlog_error(py)), |
|
92 | Ok(None) => Err(revlog_error(py)), | |
93 | Err(e) => Err(nodemap_error(py, e)), |
|
93 | Err(e) => Err(nodemap_error(py, e)), | |
94 | } |
|
94 | } | |
95 | } |
|
95 | } | |
96 |
|
96 | |||
97 | def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> { |
|
97 | def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> { | |
98 | let opt = self.get_nodetree(py)?.borrow(); |
|
98 | let opt = self.get_nodetree(py)?.borrow(); | |
99 | let nt = opt.as_ref().unwrap(); |
|
99 | let nt = opt.as_ref().unwrap(); | |
100 | let idx = &*self.cindex(py).borrow(); |
|
100 | let idx = &*self.cindex(py).borrow(); | |
101 |
|
101 | |||
102 | let node_as_string = if cfg!(feature = "python3-sys") { |
|
102 | let node_as_string = if cfg!(feature = "python3-sys") { | |
103 | node.cast_as::<PyString>(py)?.to_string(py)?.to_string() |
|
103 | node.cast_as::<PyString>(py)?.to_string(py)?.to_string() | |
104 | } |
|
104 | } | |
105 | else { |
|
105 | else { | |
106 | let node = node.extract::<PyBytes>(py)?; |
|
106 | let node = node.extract::<PyBytes>(py)?; | |
107 | String::from_utf8_lossy(node.data(py)).to_string() |
|
107 | String::from_utf8_lossy(node.data(py)).to_string() | |
108 | }; |
|
108 | }; | |
109 |
|
109 | |||
110 | let prefix = NodePrefix::from_hex(&node_as_string) |
|
110 | let prefix = NodePrefix::from_hex(&node_as_string) | |
111 | .map_err(|_| PyErr::new::<ValueError, _>( |
|
111 | .map_err(|_| PyErr::new::<ValueError, _>( | |
112 | py, format!("Invalid node or prefix '{}'", node_as_string)) |
|
112 | py, format!("Invalid node or prefix '{}'", node_as_string)) | |
113 | )?; |
|
113 | )?; | |
114 |
|
114 | |||
115 | nt.find_bin(idx, prefix) |
|
115 | nt.find_bin(idx, prefix) | |
116 | // TODO make an inner API returning the node directly |
|
116 | // TODO make an inner API returning the node directly | |
117 | .map(|opt| opt.map( |
|
117 | .map(|opt| opt.map( | |
118 | |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))) |
|
118 | |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))) | |
119 | .map_err(|e| nodemap_error(py, e)) |
|
119 | .map_err(|e| nodemap_error(py, e)) | |
120 |
|
120 | |||
121 | } |
|
121 | } | |
122 |
|
122 | |||
123 | /// append an index entry |
|
123 | /// append an index entry | |
124 | def append(&self, tup: PyTuple) -> PyResult<PyObject> { |
|
124 | def append(&self, tup: PyTuple) -> PyResult<PyObject> { | |
125 | if tup.len(py) < 8 { |
|
125 | if tup.len(py) < 8 { | |
126 | // this is better than the panic promised by tup.get_item() |
|
126 | // this is better than the panic promised by tup.get_item() | |
127 | return Err( |
|
127 | return Err( | |
128 | PyErr::new::<IndexError, _>(py, "tuple index out of range")) |
|
128 | PyErr::new::<IndexError, _>(py, "tuple index out of range")) | |
129 | } |
|
129 | } | |
130 | let node_bytes = tup.get_item(py, 7).extract(py)?; |
|
130 | let node_bytes = tup.get_item(py, 7).extract(py)?; | |
131 | let node = node_from_py_object(py, &node_bytes)?; |
|
131 | let node = node_from_py_object(py, &node_bytes)?; | |
132 |
|
132 | |||
133 | let mut idx = self.cindex(py).borrow_mut(); |
|
133 | let mut idx = self.cindex(py).borrow_mut(); | |
134 | let rev = idx.len() as Revision; |
|
134 | let rev = idx.len() as Revision; | |
135 |
|
135 | |||
136 | idx.append(py, tup)?; |
|
136 | idx.append(py, tup)?; | |
137 | self.get_nodetree(py)?.borrow_mut().as_mut().unwrap() |
|
137 | self.get_nodetree(py)?.borrow_mut().as_mut().unwrap() | |
138 | .insert(&*idx, &node, rev) |
|
138 | .insert(&*idx, &node, rev) | |
139 | .map_err(|e| nodemap_error(py, e))?; |
|
139 | .map_err(|e| nodemap_error(py, e))?; | |
140 | Ok(py.None()) |
|
140 | Ok(py.None()) | |
141 | } |
|
141 | } | |
142 |
|
142 | |||
143 | def __delitem__(&self, key: PyObject) -> PyResult<()> { |
|
143 | def __delitem__(&self, key: PyObject) -> PyResult<()> { | |
144 | // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]` |
|
144 | // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]` | |
145 | self.cindex(py).borrow().inner().del_item(py, key)?; |
|
145 | self.cindex(py).borrow().inner().del_item(py, key)?; | |
146 | let mut opt = self.get_nodetree(py)?.borrow_mut(); |
|
146 | let mut opt = self.get_nodetree(py)?.borrow_mut(); | |
147 | let nt = opt.as_mut().unwrap(); |
|
147 | let nt = opt.as_mut().unwrap(); | |
148 | nt.invalidate_all(); |
|
148 | nt.invalidate_all(); | |
149 | self.fill_nodemap(py, nt)?; |
|
149 | self.fill_nodemap(py, nt)?; | |
150 | Ok(()) |
|
150 | Ok(()) | |
151 | } |
|
151 | } | |
152 |
|
152 | |||
153 | // |
|
153 | // | |
154 | // Reforwarded C index API |
|
154 | // Reforwarded C index API | |
155 | // |
|
155 | // | |
156 |
|
156 | |||
157 | // index_methods (tp_methods). Same ordering as in revlog.c |
|
157 | // index_methods (tp_methods). Same ordering as in revlog.c | |
158 |
|
158 | |||
159 | /// return the gca set of the given revs |
|
159 | /// return the gca set of the given revs | |
160 | def ancestors(&self, *args, **kw) -> PyResult<PyObject> { |
|
160 | def ancestors(&self, *args, **kw) -> PyResult<PyObject> { | |
161 | self.call_cindex(py, "ancestors", args, kw) |
|
161 | self.call_cindex(py, "ancestors", args, kw) | |
162 | } |
|
162 | } | |
163 |
|
163 | |||
164 | /// return the heads of the common ancestors of the given revs |
|
164 | /// return the heads of the common ancestors of the given revs | |
165 | def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> { |
|
165 | def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> { | |
166 | self.call_cindex(py, "commonancestorsheads", args, kw) |
|
166 | self.call_cindex(py, "commonancestorsheads", args, kw) | |
167 | } |
|
167 | } | |
168 |
|
168 | |||
169 | /// Clear the index caches and inner py_class data. |
|
169 | /// Clear the index caches and inner py_class data. | |
170 | /// It is Python's responsibility to call `update_nodemap_data` again. |
|
170 | /// It is Python's responsibility to call `update_nodemap_data` again. | |
171 | def clearcaches(&self, *args, **kw) -> PyResult<PyObject> { |
|
171 | def clearcaches(&self, *args, **kw) -> PyResult<PyObject> { | |
172 | self.nt(py).borrow_mut().take(); |
|
172 | self.nt(py).borrow_mut().take(); | |
173 | self.docket(py).borrow_mut().take(); |
|
173 | self.docket(py).borrow_mut().take(); | |
174 | self.mmap(py).borrow_mut().take(); |
|
174 | self.mmap(py).borrow_mut().take(); | |
175 | self.call_cindex(py, "clearcaches", args, kw) |
|
175 | self.call_cindex(py, "clearcaches", args, kw) | |
176 | } |
|
176 | } | |
177 |
|
177 | |||
178 | /// return the raw binary string representing a revision |
|
178 | /// return the raw binary string representing a revision | |
179 | def entry_binary(&self, *args, **kw) -> PyResult<PyObject> { |
|
179 | def entry_binary(&self, *args, **kw) -> PyResult<PyObject> { | |
180 | self.call_cindex(py, "entry_binary", args, kw) |
|
180 | self.call_cindex(py, "entry_binary", args, kw) | |
181 | } |
|
181 | } | |
182 |
|
182 | |||
183 | /// return a binary packed version of the header |
|
183 | /// return a binary packed version of the header | |
184 | def pack_header(&self, *args, **kw) -> PyResult<PyObject> { |
|
184 | def pack_header(&self, *args, **kw) -> PyResult<PyObject> { | |
185 | self.call_cindex(py, "pack_header", args, kw) |
|
185 | self.call_cindex(py, "pack_header", args, kw) | |
186 | } |
|
186 | } | |
187 |
|
187 | |||
188 | /// get an index entry |
|
188 | /// get an index entry | |
189 | def get(&self, *args, **kw) -> PyResult<PyObject> { |
|
189 | def get(&self, *args, **kw) -> PyResult<PyObject> { | |
190 | self.call_cindex(py, "get", args, kw) |
|
190 | self.call_cindex(py, "get", args, kw) | |
191 | } |
|
191 | } | |
192 |
|
192 | |||
193 | /// compute phases |
|
193 | /// compute phases | |
194 | def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> { |
|
194 | def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> { | |
195 | self.call_cindex(py, "computephasesmapsets", args, kw) |
|
195 | self.call_cindex(py, "computephasesmapsets", args, kw) | |
196 | } |
|
196 | } | |
197 |
|
197 | |||
198 | /// reachableroots |
|
198 | /// reachableroots | |
199 | def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> { |
|
199 | def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> { | |
200 | self.call_cindex(py, "reachableroots2", args, kw) |
|
200 | self.call_cindex(py, "reachableroots2", args, kw) | |
201 | } |
|
201 | } | |
202 |
|
202 | |||
203 | /// get head revisions |
|
203 | /// get head revisions | |
204 | def headrevs(&self, *args, **kw) -> PyResult<PyObject> { |
|
204 | def headrevs(&self, *args, **kw) -> PyResult<PyObject> { | |
205 | self.call_cindex(py, "headrevs", args, kw) |
|
205 | self.call_cindex(py, "headrevs", args, kw) | |
206 | } |
|
206 | } | |
207 |
|
207 | |||
208 | /// get filtered head revisions |
|
208 | /// get filtered head revisions | |
209 | def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> { |
|
209 | def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> { | |
210 | self.call_cindex(py, "headrevsfiltered", args, kw) |
|
210 | self.call_cindex(py, "headrevsfiltered", args, kw) | |
211 | } |
|
211 | } | |
212 |
|
212 | |||
213 | /// True if the object is a snapshot |
|
213 | /// True if the object is a snapshot | |
214 | def issnapshot(&self, *args, **kw) -> PyResult<PyObject> { |
|
214 | def issnapshot(&self, *args, **kw) -> PyResult<PyObject> { | |
215 | self.call_cindex(py, "issnapshot", args, kw) |
|
215 | self.call_cindex(py, "issnapshot", args, kw) | |
216 | } |
|
216 | } | |
217 |
|
217 | |||
218 | /// Gather snapshot data in a cache dict |
|
218 | /// Gather snapshot data in a cache dict | |
219 | def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> { |
|
219 | def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> { | |
220 | self.call_cindex(py, "findsnapshots", args, kw) |
|
220 | self.call_cindex(py, "findsnapshots", args, kw) | |
221 | } |
|
221 | } | |
222 |
|
222 | |||
223 | /// determine revisions with deltas to reconstruct fulltext |
|
223 | /// determine revisions with deltas to reconstruct fulltext | |
224 | def deltachain(&self, *args, **kw) -> PyResult<PyObject> { |
|
224 | def deltachain(&self, *args, **kw) -> PyResult<PyObject> { | |
225 | self.call_cindex(py, "deltachain", args, kw) |
|
225 | self.call_cindex(py, "deltachain", args, kw) | |
226 | } |
|
226 | } | |
227 |
|
227 | |||
228 | /// slice planned chunk read to reach a density threshold |
|
228 | /// slice planned chunk read to reach a density threshold | |
229 | def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> { |
|
229 | def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> { | |
230 | self.call_cindex(py, "slicechunktodensity", args, kw) |
|
230 | self.call_cindex(py, "slicechunktodensity", args, kw) | |
231 | } |
|
231 | } | |
232 |
|
232 | |||
233 | /// stats for the index |
|
233 | /// stats for the index | |
234 | def stats(&self, *args, **kw) -> PyResult<PyObject> { |
|
234 | def stats(&self, *args, **kw) -> PyResult<PyObject> { | |
235 | self.call_cindex(py, "stats", args, kw) |
|
235 | self.call_cindex(py, "stats", args, kw) | |
236 | } |
|
236 | } | |
237 |
|
237 | |||
238 | // index_sequence_methods and index_mapping_methods. |
|
238 | // index_sequence_methods and index_mapping_methods. | |
239 | // |
|
239 | // | |
240 | // Since we call back through the high level Python API, |
|
240 | // Since we call back through the high level Python API, | |
241 | // there's no point making a distinction between index_get |
|
241 | // there's no point making a distinction between index_get | |
242 | // and index_getitem. |
|
242 | // and index_getitem. | |
243 |
|
243 | |||
244 | def __len__(&self) -> PyResult<usize> { |
|
244 | def __len__(&self) -> PyResult<usize> { | |
245 | self.cindex(py).borrow().inner().len(py) |
|
245 | self.cindex(py).borrow().inner().len(py) | |
246 | } |
|
246 | } | |
247 |
|
247 | |||
248 | def __getitem__(&self, key: PyObject) -> PyResult<PyObject> { |
|
248 | def __getitem__(&self, key: PyObject) -> PyResult<PyObject> { | |
249 | // this conversion seems needless, but that's actually because |
|
249 | // this conversion seems needless, but that's actually because | |
250 | // `index_getitem` does not handle conversion from PyLong, |
|
250 | // `index_getitem` does not handle conversion from PyLong, | |
251 | // which expressions such as [e for e in index] internally use. |
|
251 | // which expressions such as [e for e in index] internally use. | |
252 | // Note that we don't seem to have a direct way to call |
|
252 | // Note that we don't seem to have a direct way to call | |
253 | // PySequence_GetItem (does the job), which would possibly be better |
|
253 | // PySequence_GetItem (does the job), which would possibly be better | |
254 | // for performance |
|
254 | // for performance | |
255 |
let key = match key.extract::< |
|
255 | let key = match key.extract::<i32>(py) { | |
256 | Ok(rev) => rev.to_py_object(py).into_object(), |
|
256 | Ok(rev) => rev.to_py_object(py).into_object(), | |
257 | Err(_) => key, |
|
257 | Err(_) => key, | |
258 | }; |
|
258 | }; | |
259 | self.cindex(py).borrow().inner().get_item(py, key) |
|
259 | self.cindex(py).borrow().inner().get_item(py, key) | |
260 | } |
|
260 | } | |
261 |
|
261 | |||
262 | def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> { |
|
262 | def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> { | |
263 | self.cindex(py).borrow().inner().set_item(py, key, value) |
|
263 | self.cindex(py).borrow().inner().set_item(py, key, value) | |
264 | } |
|
264 | } | |
265 |
|
265 | |||
266 | def __contains__(&self, item: PyObject) -> PyResult<bool> { |
|
266 | def __contains__(&self, item: PyObject) -> PyResult<bool> { | |
267 | // ObjectProtocol does not seem to provide contains(), so |
|
267 | // ObjectProtocol does not seem to provide contains(), so | |
268 | // this is an equivalent implementation of the index_contains() |
|
268 | // this is an equivalent implementation of the index_contains() | |
269 | // defined in revlog.c |
|
269 | // defined in revlog.c | |
270 | let cindex = self.cindex(py).borrow(); |
|
270 | let cindex = self.cindex(py).borrow(); | |
271 |
match item.extract::< |
|
271 | match item.extract::<i32>(py) { | |
272 | Ok(rev) => { |
|
272 | Ok(rev) => { | |
273 | Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision) |
|
273 | Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision) | |
274 | } |
|
274 | } | |
275 | Err(_) => { |
|
275 | Err(_) => { | |
276 | cindex.inner().call_method( |
|
276 | cindex.inner().call_method( | |
277 | py, |
|
277 | py, | |
278 | "has_node", |
|
278 | "has_node", | |
279 | PyTuple::new(py, &[item]), |
|
279 | PyTuple::new(py, &[item]), | |
280 | None)? |
|
280 | None)? | |
281 | .extract(py) |
|
281 | .extract(py) | |
282 | } |
|
282 | } | |
283 | } |
|
283 | } | |
284 | } |
|
284 | } | |
285 |
|
285 | |||
286 | def nodemap_data_all(&self) -> PyResult<PyBytes> { |
|
286 | def nodemap_data_all(&self) -> PyResult<PyBytes> { | |
287 | self.inner_nodemap_data_all(py) |
|
287 | self.inner_nodemap_data_all(py) | |
288 | } |
|
288 | } | |
289 |
|
289 | |||
290 | def nodemap_data_incremental(&self) -> PyResult<PyObject> { |
|
290 | def nodemap_data_incremental(&self) -> PyResult<PyObject> { | |
291 | self.inner_nodemap_data_incremental(py) |
|
291 | self.inner_nodemap_data_incremental(py) | |
292 | } |
|
292 | } | |
293 | def update_nodemap_data( |
|
293 | def update_nodemap_data( | |
294 | &self, |
|
294 | &self, | |
295 | docket: PyObject, |
|
295 | docket: PyObject, | |
296 | nm_data: PyObject |
|
296 | nm_data: PyObject | |
297 | ) -> PyResult<PyObject> { |
|
297 | ) -> PyResult<PyObject> { | |
298 | self.inner_update_nodemap_data(py, docket, nm_data) |
|
298 | self.inner_update_nodemap_data(py, docket, nm_data) | |
299 | } |
|
299 | } | |
300 |
|
300 | |||
301 | @property |
|
301 | @property | |
302 | def entry_size(&self) -> PyResult<PyInt> { |
|
302 | def entry_size(&self) -> PyResult<PyInt> { | |
303 | self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py) |
|
303 | self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py) | |
304 | } |
|
304 | } | |
305 |
|
305 | |||
306 | @property |
|
306 | @property | |
307 | def rust_ext_compat(&self) -> PyResult<PyInt> { |
|
307 | def rust_ext_compat(&self) -> PyResult<PyInt> { | |
308 | self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py) |
|
308 | self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py) | |
309 | } |
|
309 | } | |
310 |
|
310 | |||
311 | }); |
|
311 | }); | |
312 |
|
312 | |||
313 | impl MixedIndex { |
|
313 | impl MixedIndex { | |
314 | fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> { |
|
314 | fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> { | |
315 | Self::create_instance( |
|
315 | Self::create_instance( | |
316 | py, |
|
316 | py, | |
317 | RefCell::new(cindex::Index::new(py, cindex)?), |
|
317 | RefCell::new(cindex::Index::new(py, cindex)?), | |
318 | RefCell::new(None), |
|
318 | RefCell::new(None), | |
319 | RefCell::new(None), |
|
319 | RefCell::new(None), | |
320 | RefCell::new(None), |
|
320 | RefCell::new(None), | |
321 | ) |
|
321 | ) | |
322 | } |
|
322 | } | |
323 |
|
323 | |||
324 | /// This is scaffolding at this point, but it could also become |
|
324 | /// This is scaffolding at this point, but it could also become | |
325 | /// a way to start a persistent nodemap or perform a |
|
325 | /// a way to start a persistent nodemap or perform a | |
326 | /// vacuum / repack operation |
|
326 | /// vacuum / repack operation | |
327 | fn fill_nodemap( |
|
327 | fn fill_nodemap( | |
328 | &self, |
|
328 | &self, | |
329 | py: Python, |
|
329 | py: Python, | |
330 | nt: &mut NodeTree, |
|
330 | nt: &mut NodeTree, | |
331 | ) -> PyResult<PyObject> { |
|
331 | ) -> PyResult<PyObject> { | |
332 | let index = self.cindex(py).borrow(); |
|
332 | let index = self.cindex(py).borrow(); | |
333 | for r in 0..index.len() { |
|
333 | for r in 0..index.len() { | |
334 | let rev = r as Revision; |
|
334 | let rev = r as Revision; | |
335 | // in this case node() won't ever return None |
|
335 | // in this case node() won't ever return None | |
336 | nt.insert(&*index, index.node(rev).unwrap(), rev) |
|
336 | nt.insert(&*index, index.node(rev).unwrap(), rev) | |
337 | .map_err(|e| nodemap_error(py, e))? |
|
337 | .map_err(|e| nodemap_error(py, e))? | |
338 | } |
|
338 | } | |
339 | Ok(py.None()) |
|
339 | Ok(py.None()) | |
340 | } |
|
340 | } | |
341 |
|
341 | |||
342 | fn get_nodetree<'a>( |
|
342 | fn get_nodetree<'a>( | |
343 | &'a self, |
|
343 | &'a self, | |
344 | py: Python<'a>, |
|
344 | py: Python<'a>, | |
345 | ) -> PyResult<&'a RefCell<Option<NodeTree>>> { |
|
345 | ) -> PyResult<&'a RefCell<Option<NodeTree>>> { | |
346 | if self.nt(py).borrow().is_none() { |
|
346 | if self.nt(py).borrow().is_none() { | |
347 | let readonly = Box::new(Vec::new()); |
|
347 | let readonly = Box::new(Vec::new()); | |
348 | let mut nt = NodeTree::load_bytes(readonly, 0); |
|
348 | let mut nt = NodeTree::load_bytes(readonly, 0); | |
349 | self.fill_nodemap(py, &mut nt)?; |
|
349 | self.fill_nodemap(py, &mut nt)?; | |
350 | self.nt(py).borrow_mut().replace(nt); |
|
350 | self.nt(py).borrow_mut().replace(nt); | |
351 | } |
|
351 | } | |
352 | Ok(self.nt(py)) |
|
352 | Ok(self.nt(py)) | |
353 | } |
|
353 | } | |
354 |
|
354 | |||
355 | /// forward a method call to the underlying C index |
|
355 | /// forward a method call to the underlying C index | |
356 | fn call_cindex( |
|
356 | fn call_cindex( | |
357 | &self, |
|
357 | &self, | |
358 | py: Python, |
|
358 | py: Python, | |
359 | name: &str, |
|
359 | name: &str, | |
360 | args: &PyTuple, |
|
360 | args: &PyTuple, | |
361 | kwargs: Option<&PyDict>, |
|
361 | kwargs: Option<&PyDict>, | |
362 | ) -> PyResult<PyObject> { |
|
362 | ) -> PyResult<PyObject> { | |
363 | self.cindex(py) |
|
363 | self.cindex(py) | |
364 | .borrow() |
|
364 | .borrow() | |
365 | .inner() |
|
365 | .inner() | |
366 | .call_method(py, name, args, kwargs) |
|
366 | .call_method(py, name, args, kwargs) | |
367 | } |
|
367 | } | |
368 |
|
368 | |||
369 | pub fn clone_cindex(&self, py: Python) -> cindex::Index { |
|
369 | pub fn clone_cindex(&self, py: Python) -> cindex::Index { | |
370 | self.cindex(py).borrow().clone_ref(py) |
|
370 | self.cindex(py).borrow().clone_ref(py) | |
371 | } |
|
371 | } | |
372 |
|
372 | |||
373 | /// Returns the full nodemap bytes to be written as-is to disk |
|
373 | /// Returns the full nodemap bytes to be written as-is to disk | |
374 | fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> { |
|
374 | fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> { | |
375 | let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap(); |
|
375 | let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap(); | |
376 | let (readonly, bytes) = nodemap.into_readonly_and_added_bytes(); |
|
376 | let (readonly, bytes) = nodemap.into_readonly_and_added_bytes(); | |
377 |
|
377 | |||
378 | // If there's anything readonly, we need to build the data again from |
|
378 | // If there's anything readonly, we need to build the data again from | |
379 | // scratch |
|
379 | // scratch | |
380 | let bytes = if readonly.len() > 0 { |
|
380 | let bytes = if readonly.len() > 0 { | |
381 | let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0); |
|
381 | let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0); | |
382 | self.fill_nodemap(py, &mut nt)?; |
|
382 | self.fill_nodemap(py, &mut nt)?; | |
383 |
|
383 | |||
384 | let (readonly, bytes) = nt.into_readonly_and_added_bytes(); |
|
384 | let (readonly, bytes) = nt.into_readonly_and_added_bytes(); | |
385 | assert_eq!(readonly.len(), 0); |
|
385 | assert_eq!(readonly.len(), 0); | |
386 |
|
386 | |||
387 | bytes |
|
387 | bytes | |
388 | } else { |
|
388 | } else { | |
389 | bytes |
|
389 | bytes | |
390 | }; |
|
390 | }; | |
391 |
|
391 | |||
392 | let bytes = PyBytes::new(py, &bytes); |
|
392 | let bytes = PyBytes::new(py, &bytes); | |
393 | Ok(bytes) |
|
393 | Ok(bytes) | |
394 | } |
|
394 | } | |
395 |
|
395 | |||
396 | /// Returns the last saved docket along with the size of any changed data |
|
396 | /// Returns the last saved docket along with the size of any changed data | |
397 | /// (in number of blocks), and said data as bytes. |
|
397 | /// (in number of blocks), and said data as bytes. | |
398 | fn inner_nodemap_data_incremental( |
|
398 | fn inner_nodemap_data_incremental( | |
399 | &self, |
|
399 | &self, | |
400 | py: Python, |
|
400 | py: Python, | |
401 | ) -> PyResult<PyObject> { |
|
401 | ) -> PyResult<PyObject> { | |
402 | let docket = self.docket(py).borrow(); |
|
402 | let docket = self.docket(py).borrow(); | |
403 | let docket = match docket.as_ref() { |
|
403 | let docket = match docket.as_ref() { | |
404 | Some(d) => d, |
|
404 | Some(d) => d, | |
405 | None => return Ok(py.None()), |
|
405 | None => return Ok(py.None()), | |
406 | }; |
|
406 | }; | |
407 |
|
407 | |||
408 | let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap(); |
|
408 | let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap(); | |
409 | let masked_blocks = node_tree.masked_readonly_blocks(); |
|
409 | let masked_blocks = node_tree.masked_readonly_blocks(); | |
410 | let (_, data) = node_tree.into_readonly_and_added_bytes(); |
|
410 | let (_, data) = node_tree.into_readonly_and_added_bytes(); | |
411 | let changed = masked_blocks * std::mem::size_of::<Block>(); |
|
411 | let changed = masked_blocks * std::mem::size_of::<Block>(); | |
412 |
|
412 | |||
413 | Ok((docket, changed, PyBytes::new(py, &data)) |
|
413 | Ok((docket, changed, PyBytes::new(py, &data)) | |
414 | .to_py_object(py) |
|
414 | .to_py_object(py) | |
415 | .into_object()) |
|
415 | .into_object()) | |
416 | } |
|
416 | } | |
417 |
|
417 | |||
418 | /// Update the nodemap from the new (mmaped) data. |
|
418 | /// Update the nodemap from the new (mmaped) data. | |
419 | /// The docket is kept as a reference for later incremental calls. |
|
419 | /// The docket is kept as a reference for later incremental calls. | |
420 | fn inner_update_nodemap_data( |
|
420 | fn inner_update_nodemap_data( | |
421 | &self, |
|
421 | &self, | |
422 | py: Python, |
|
422 | py: Python, | |
423 | docket: PyObject, |
|
423 | docket: PyObject, | |
424 | nm_data: PyObject, |
|
424 | nm_data: PyObject, | |
425 | ) -> PyResult<PyObject> { |
|
425 | ) -> PyResult<PyObject> { | |
426 | let buf = PyBuffer::get(py, &nm_data)?; |
|
426 | let buf = PyBuffer::get(py, &nm_data)?; | |
427 | let len = buf.item_count(); |
|
427 | let len = buf.item_count(); | |
428 |
|
428 | |||
429 | // Build a slice from the mmap'ed buffer data |
|
429 | // Build a slice from the mmap'ed buffer data | |
430 | let cbuf = buf.buf_ptr(); |
|
430 | let cbuf = buf.buf_ptr(); | |
431 | let bytes = if std::mem::size_of::<u8>() == buf.item_size() |
|
431 | let bytes = if std::mem::size_of::<u8>() == buf.item_size() | |
432 | && buf.is_c_contiguous() |
|
432 | && buf.is_c_contiguous() | |
433 | && u8::is_compatible_format(buf.format()) |
|
433 | && u8::is_compatible_format(buf.format()) | |
434 | { |
|
434 | { | |
435 | unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) } |
|
435 | unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) } | |
436 | } else { |
|
436 | } else { | |
437 | return Err(PyErr::new::<ValueError, _>( |
|
437 | return Err(PyErr::new::<ValueError, _>( | |
438 | py, |
|
438 | py, | |
439 | "Nodemap data buffer has an invalid memory representation" |
|
439 | "Nodemap data buffer has an invalid memory representation" | |
440 | .to_string(), |
|
440 | .to_string(), | |
441 | )); |
|
441 | )); | |
442 | }; |
|
442 | }; | |
443 |
|
443 | |||
444 | // Keep a reference to the mmap'ed buffer, otherwise we get a dangling |
|
444 | // Keep a reference to the mmap'ed buffer, otherwise we get a dangling | |
445 | // pointer. |
|
445 | // pointer. | |
446 | self.mmap(py).borrow_mut().replace(buf); |
|
446 | self.mmap(py).borrow_mut().replace(buf); | |
447 |
|
447 | |||
448 | let mut nt = NodeTree::load_bytes(Box::new(bytes), len); |
|
448 | let mut nt = NodeTree::load_bytes(Box::new(bytes), len); | |
449 |
|
449 | |||
450 | let data_tip = |
|
450 | let data_tip = | |
451 |
docket.getattr(py, "tip_rev")?.extract::< |
|
451 | docket.getattr(py, "tip_rev")?.extract::<i32>(py)?.into(); | |
452 | self.docket(py).borrow_mut().replace(docket.clone_ref(py)); |
|
452 | self.docket(py).borrow_mut().replace(docket.clone_ref(py)); | |
453 | let idx = self.cindex(py).borrow(); |
|
453 | let idx = self.cindex(py).borrow(); | |
|
454 | let data_tip = idx.check_revision(data_tip).ok_or_else(|| { | |||
|
455 | nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip)) | |||
|
456 | })?; | |||
454 | let current_tip = idx.len(); |
|
457 | let current_tip = idx.len(); | |
455 |
|
458 | |||
456 | for r in (data_tip + 1)..current_tip as Revision { |
|
459 | for r in (data_tip + 1)..current_tip as Revision { | |
457 | let rev = r as Revision; |
|
460 | let rev = r as Revision; | |
458 | // in this case node() won't ever return None |
|
461 | // in this case node() won't ever return None | |
459 | nt.insert(&*idx, idx.node(rev).unwrap(), rev) |
|
462 | nt.insert(&*idx, idx.node(rev).unwrap(), rev) | |
460 | .map_err(|e| nodemap_error(py, e))? |
|
463 | .map_err(|e| nodemap_error(py, e))? | |
461 | } |
|
464 | } | |
462 |
|
465 | |||
463 | *self.nt(py).borrow_mut() = Some(nt); |
|
466 | *self.nt(py).borrow_mut() = Some(nt); | |
464 |
|
467 | |||
465 | Ok(py.None()) |
|
468 | Ok(py.None()) | |
466 | } |
|
469 | } | |
467 | } |
|
470 | } | |
468 |
|
471 | |||
469 | fn revlog_error(py: Python) -> PyErr { |
|
472 | fn revlog_error(py: Python) -> PyErr { | |
470 | match py |
|
473 | match py | |
471 | .import("mercurial.error") |
|
474 | .import("mercurial.error") | |
472 | .and_then(|m| m.get(py, "RevlogError")) |
|
475 | .and_then(|m| m.get(py, "RevlogError")) | |
473 | { |
|
476 | { | |
474 | Err(e) => e, |
|
477 | Err(e) => e, | |
475 | Ok(cls) => PyErr::from_instance( |
|
478 | Ok(cls) => PyErr::from_instance( | |
476 | py, |
|
479 | py, | |
477 | cls.call(py, (py.None(),), None).ok().into_py_object(py), |
|
480 | cls.call(py, (py.None(),), None).ok().into_py_object(py), | |
478 | ), |
|
481 | ), | |
479 | } |
|
482 | } | |
480 | } |
|
483 | } | |
481 |
|
484 | |||
482 | fn rev_not_in_index(py: Python, rev: Revision) -> PyErr { |
|
485 | fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr { | |
483 | PyErr::new::<ValueError, _>( |
|
486 | PyErr::new::<ValueError, _>( | |
484 | py, |
|
487 | py, | |
485 | format!( |
|
488 | format!( | |
486 | "Inconsistency: Revision {} found in nodemap \ |
|
489 | "Inconsistency: Revision {} found in nodemap \ | |
487 | is not in revlog index", |
|
490 | is not in revlog index", | |
488 | rev |
|
491 | rev | |
489 | ), |
|
492 | ), | |
490 | ) |
|
493 | ) | |
491 | } |
|
494 | } | |
492 |
|
495 | |||
493 | /// Standard treatment of NodeMapError |
|
496 | /// Standard treatment of NodeMapError | |
494 | fn nodemap_error(py: Python, err: NodeMapError) -> PyErr { |
|
497 | fn nodemap_error(py: Python, err: NodeMapError) -> PyErr { | |
495 | match err { |
|
498 | match err { | |
496 | NodeMapError::MultipleResults => revlog_error(py), |
|
499 | NodeMapError::MultipleResults => revlog_error(py), | |
497 | NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r), |
|
500 | NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r), | |
498 | } |
|
501 | } | |
499 | } |
|
502 | } | |
500 |
|
503 | |||
501 | /// Create the module, with __package__ given from parent |
|
504 | /// Create the module, with __package__ given from parent | |
502 | pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> { |
|
505 | pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> { | |
503 | let dotted_name = &format!("{}.revlog", package); |
|
506 | let dotted_name = &format!("{}.revlog", package); | |
504 | let m = PyModule::new(py, dotted_name)?; |
|
507 | let m = PyModule::new(py, dotted_name)?; | |
505 | m.add(py, "__package__", package)?; |
|
508 | m.add(py, "__package__", package)?; | |
506 | m.add(py, "__doc__", "RevLog - Rust implementations")?; |
|
509 | m.add(py, "__doc__", "RevLog - Rust implementations")?; | |
507 |
|
510 | |||
508 | m.add_class::<MixedIndex>(py)?; |
|
511 | m.add_class::<MixedIndex>(py)?; | |
509 |
|
512 | |||
510 | let sys = PyModule::import(py, "sys")?; |
|
513 | let sys = PyModule::import(py, "sys")?; | |
511 | let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; |
|
514 | let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; | |
512 | sys_modules.set_item(py, dotted_name, &m)?; |
|
515 | sys_modules.set_item(py, dotted_name, &m)?; | |
513 |
|
516 | |||
514 | Ok(m) |
|
517 | Ok(m) | |
515 | } |
|
518 | } |
General Comments 0
You need to be logged in to leave comments.
Login now