Show More
@@ -1,115 +1,115 b'' | |||
|
1 | 1 | // list_tracked_files.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | use crate::repo::Repo; |
|
9 | 9 | use crate::revlog::Node; |
|
10 | 10 | use crate::revlog::RevlogError; |
|
11 | 11 | |
|
12 | 12 | use crate::utils::hg_path::HgPath; |
|
13 | 13 | |
|
14 | 14 | use crate::errors::HgError; |
|
15 | 15 | use crate::manifest::Manifest; |
|
16 | 16 | use crate::manifest::ManifestEntry; |
|
17 | 17 | use itertools::put_back; |
|
18 | 18 | use itertools::PutBack; |
|
19 | 19 | use std::cmp::Ordering; |
|
20 | 20 | |
|
21 | 21 | pub struct CatOutput<'a> { |
|
22 | 22 | /// Whether any file in the manifest matched the paths given as CLI |
|
23 | 23 | /// arguments |
|
24 | 24 | pub found_any: bool, |
|
25 | 25 | /// The contents of matching files, in manifest order |
|
26 | 26 | pub results: Vec<(&'a HgPath, Vec<u8>)>, |
|
27 | 27 | /// Which of the CLI arguments did not match any manifest file |
|
28 | 28 | pub missing: Vec<&'a HgPath>, |
|
29 | 29 | /// The node ID that the given revset was resolved to |
|
30 | 30 | pub node: Node, |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | // Find an item in an iterator over a sorted collection. |
|
34 | 34 | fn find_item<'a>( |
|
35 | 35 | i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>, |
|
36 | 36 | needle: &HgPath, |
|
37 | 37 | ) -> Result<Option<Node>, HgError> { |
|
38 | 38 | loop { |
|
39 | 39 | match i.next() { |
|
40 | 40 | None => return Ok(None), |
|
41 | 41 | Some(result) => { |
|
42 | 42 | let entry = result?; |
|
43 | 43 | match needle.as_bytes().cmp(entry.path.as_bytes()) { |
|
44 | 44 | Ordering::Less => { |
|
45 | 45 | i.put_back(Ok(entry)); |
|
46 | 46 | return Ok(None); |
|
47 | 47 | } |
|
48 | 48 | Ordering::Greater => continue, |
|
49 | 49 | Ordering::Equal => return Ok(Some(entry.node_id()?)), |
|
50 | 50 | } |
|
51 | 51 | } |
|
52 | 52 | } |
|
53 | 53 | } |
|
54 | 54 | } |
|
55 | 55 | |
|
56 | 56 | // Tuple of (missing, found) paths in the manifest |
|
57 | 57 | type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>); |
|
58 | 58 | |
|
59 | 59 | fn find_files_in_manifest<'query>( |
|
60 | 60 | manifest: &Manifest, |
|
61 | 61 | query: impl Iterator<Item = &'query HgPath>, |
|
62 | 62 | ) -> Result<ManifestQueryResponse<'query>, HgError> { |
|
63 | 63 | let mut manifest = put_back(manifest.iter()); |
|
64 | 64 | let mut res = vec![]; |
|
65 | 65 | let mut missing = vec![]; |
|
66 | 66 | |
|
67 | 67 | for file in query { |
|
68 | 68 | match find_item(&mut manifest, file)? { |
|
69 | 69 | None => missing.push(file), |
|
70 | 70 | Some(item) => res.push((file, item)), |
|
71 | 71 | } |
|
72 | 72 | } |
|
73 | 73 | Ok((res, missing)) |
|
74 | 74 | } |
|
75 | 75 | |
|
76 | 76 | /// Output the given revision of files |
|
77 | 77 | /// |
|
78 | 78 | /// * `root`: Repository root |
|
79 | 79 | /// * `rev`: The revision to cat the files from. |
|
80 | 80 | /// * `files`: The files to output. |
|
81 | 81 | pub fn cat<'a>( |
|
82 | 82 | repo: &Repo, |
|
83 | 83 | revset: &str, |
|
84 | 84 | mut files: Vec<&'a HgPath>, |
|
85 | 85 | ) -> Result<CatOutput<'a>, RevlogError> { |
|
86 | 86 | let rev = crate::revset::resolve_single(revset, repo)?; |
|
87 | let manifest = repo.manifest_for_rev(rev)?; | |
|
87 | let manifest = repo.manifest_for_rev(rev.into())?; | |
|
88 | 88 | let node = *repo |
|
89 | 89 | .changelog()? |
|
90 | .node_from_rev(rev) | |
|
90 | .node_from_rev(rev.into()) | |
|
91 | 91 | .expect("should succeed when repo.manifest did"); |
|
92 | 92 | let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![]; |
|
93 | 93 | let mut found_any = false; |
|
94 | 94 | |
|
95 | 95 | files.sort_unstable(); |
|
96 | 96 | |
|
97 | 97 | let (found, missing) = |
|
98 | 98 | find_files_in_manifest(&manifest, files.into_iter())?; |
|
99 | 99 | |
|
100 | 100 | for (file_path, file_node) in found { |
|
101 | 101 | found_any = true; |
|
102 | 102 | let file_log = repo.filelog(file_path)?; |
|
103 | 103 | results.push(( |
|
104 | 104 | file_path, |
|
105 | 105 | file_log.data_for_node(file_node)?.into_file_data()?, |
|
106 | 106 | )); |
|
107 | 107 | } |
|
108 | 108 | |
|
109 | 109 | Ok(CatOutput { |
|
110 | 110 | found_any, |
|
111 | 111 | results, |
|
112 | 112 | missing, |
|
113 | 113 | node, |
|
114 | 114 | }) |
|
115 | 115 | } |
@@ -1,38 +1,38 b'' | |||
|
1 | 1 | // debugdata.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | use crate::repo::Repo; |
|
9 | 9 | use crate::requirements; |
|
10 | 10 | use crate::revlog::{Revlog, RevlogError}; |
|
11 | 11 | |
|
12 | 12 | /// Kind of data to debug |
|
13 | 13 | #[derive(Debug, Copy, Clone)] |
|
14 | 14 | pub enum DebugDataKind { |
|
15 | 15 | Changelog, |
|
16 | 16 | Manifest, |
|
17 | 17 | } |
|
18 | 18 | |
|
19 | 19 | /// Dump the contents data of a revision. |
|
20 | 20 | pub fn debug_data( |
|
21 | 21 | repo: &Repo, |
|
22 | 22 | revset: &str, |
|
23 | 23 | kind: DebugDataKind, |
|
24 | 24 | ) -> Result<Vec<u8>, RevlogError> { |
|
25 | 25 | let index_file = match kind { |
|
26 | 26 | DebugDataKind::Changelog => "00changelog.i", |
|
27 | 27 | DebugDataKind::Manifest => "00manifest.i", |
|
28 | 28 | }; |
|
29 | 29 | let use_nodemap = repo |
|
30 | 30 | .requirements() |
|
31 | 31 | .contains(requirements::NODEMAP_REQUIREMENT); |
|
32 | 32 | let revlog = |
|
33 | 33 | Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?; |
|
34 | 34 | let rev = |
|
35 | 35 | crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?; |
|
36 | let data = revlog.get_rev_data(rev)?; | |
|
36 | let data = revlog.get_rev_data_for_checked_rev(rev)?; | |
|
37 | 37 | Ok(data.into_owned()) |
|
38 | 38 | } |
@@ -1,45 +1,45 b'' | |||
|
1 | 1 | // list_tracked_files.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | use crate::errors::HgError; |
|
9 | 9 | use crate::matchers::Matcher; |
|
10 | 10 | use crate::repo::Repo; |
|
11 | 11 | use crate::revlog::manifest::Manifest; |
|
12 | 12 | use crate::revlog::RevlogError; |
|
13 | 13 | use crate::utils::filter_map_results; |
|
14 | 14 | use crate::utils::hg_path::HgPath; |
|
15 | 15 | |
|
16 | 16 | /// List files under Mercurial control at a given revision. |
|
17 | 17 | pub fn list_rev_tracked_files( |
|
18 | 18 | repo: &Repo, |
|
19 | 19 | revset: &str, |
|
20 | 20 | narrow_matcher: Box<dyn Matcher>, |
|
21 | 21 | ) -> Result<FilesForRev, RevlogError> { |
|
22 | 22 | let rev = crate::revset::resolve_single(revset, repo)?; |
|
23 | 23 | Ok(FilesForRev { |
|
24 | manifest: repo.manifest_for_rev(rev)?, | |
|
24 | manifest: repo.manifest_for_rev(rev.into())?, | |
|
25 | 25 | narrow_matcher, |
|
26 | 26 | }) |
|
27 | 27 | } |
|
28 | 28 | |
|
29 | 29 | pub struct FilesForRev { |
|
30 | 30 | manifest: Manifest, |
|
31 | 31 | narrow_matcher: Box<dyn Matcher>, |
|
32 | 32 | } |
|
33 | 33 | |
|
34 | 34 | impl FilesForRev { |
|
35 | 35 | pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> { |
|
36 | 36 | filter_map_results(self.manifest.iter(), |entry| { |
|
37 | 37 | let path = entry.path; |
|
38 | 38 | Ok(if self.narrow_matcher.matches(path) { |
|
39 | 39 | Some(path) |
|
40 | 40 | } else { |
|
41 | 41 | None |
|
42 | 42 | }) |
|
43 | 43 | }) |
|
44 | 44 | } |
|
45 | 45 | } |
@@ -1,782 +1,782 b'' | |||
|
1 | 1 | use crate::changelog::Changelog; |
|
2 | 2 | use crate::config::{Config, ConfigError, ConfigParseError}; |
|
3 | 3 | use crate::dirstate::DirstateParents; |
|
4 | 4 | use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode; |
|
5 | 5 | use crate::dirstate_tree::on_disk::Docket as DirstateDocket; |
|
6 | 6 | use crate::dirstate_tree::owning::OwningDirstateMap; |
|
7 | 7 | use crate::errors::HgResultExt; |
|
8 | 8 | use crate::errors::{HgError, IoResultExt}; |
|
9 | 9 | use crate::lock::{try_with_lock_no_wait, LockError}; |
|
10 | 10 | use crate::manifest::{Manifest, Manifestlog}; |
|
11 | 11 | use crate::revlog::filelog::Filelog; |
|
12 | 12 | use crate::revlog::RevlogError; |
|
13 | 13 | use crate::utils::debug::debug_wait_for_file_or_print; |
|
14 | 14 | use crate::utils::files::get_path_from_bytes; |
|
15 | 15 | use crate::utils::hg_path::HgPath; |
|
16 | 16 | use crate::utils::SliceExt; |
|
17 | 17 | use crate::vfs::{is_dir, is_file, Vfs}; |
|
18 | use crate::{requirements, NodePrefix}; | |
|
19 | use crate::{DirstateError, Revision}; | |
|
18 | use crate::DirstateError; | |
|
19 | use crate::{requirements, NodePrefix, UncheckedRevision}; | |
|
20 | 20 | use std::cell::{Ref, RefCell, RefMut}; |
|
21 | 21 | use std::collections::HashSet; |
|
22 | 22 | use std::io::Seek; |
|
23 | 23 | use std::io::SeekFrom; |
|
24 | 24 | use std::io::Write as IoWrite; |
|
25 | 25 | use std::path::{Path, PathBuf}; |
|
26 | 26 | |
|
27 | 27 | const V2_MAX_READ_ATTEMPTS: usize = 5; |
|
28 | 28 | |
|
29 | 29 | type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize); |
|
30 | 30 | |
|
31 | 31 | /// A repository on disk |
|
32 | 32 | pub struct Repo { |
|
33 | 33 | working_directory: PathBuf, |
|
34 | 34 | dot_hg: PathBuf, |
|
35 | 35 | store: PathBuf, |
|
36 | 36 | requirements: HashSet<String>, |
|
37 | 37 | config: Config, |
|
38 | 38 | dirstate_parents: LazyCell<DirstateParents>, |
|
39 | 39 | dirstate_map: LazyCell<OwningDirstateMap>, |
|
40 | 40 | changelog: LazyCell<Changelog>, |
|
41 | 41 | manifestlog: LazyCell<Manifestlog>, |
|
42 | 42 | } |
|
43 | 43 | |
|
44 | 44 | #[derive(Debug, derive_more::From)] |
|
45 | 45 | pub enum RepoError { |
|
46 | 46 | NotFound { |
|
47 | 47 | at: PathBuf, |
|
48 | 48 | }, |
|
49 | 49 | #[from] |
|
50 | 50 | ConfigParseError(ConfigParseError), |
|
51 | 51 | #[from] |
|
52 | 52 | Other(HgError), |
|
53 | 53 | } |
|
54 | 54 | |
|
55 | 55 | impl From<ConfigError> for RepoError { |
|
56 | 56 | fn from(error: ConfigError) -> Self { |
|
57 | 57 | match error { |
|
58 | 58 | ConfigError::Parse(error) => error.into(), |
|
59 | 59 | ConfigError::Other(error) => error.into(), |
|
60 | 60 | } |
|
61 | 61 | } |
|
62 | 62 | } |
|
63 | 63 | |
|
64 | 64 | impl Repo { |
|
65 | 65 | /// tries to find nearest repository root in current working directory or |
|
66 | 66 | /// its ancestors |
|
67 | 67 | pub fn find_repo_root() -> Result<PathBuf, RepoError> { |
|
68 | 68 | let current_directory = crate::utils::current_dir()?; |
|
69 | 69 | // ancestors() is inclusive: it first yields `current_directory` |
|
70 | 70 | // as-is. |
|
71 | 71 | for ancestor in current_directory.ancestors() { |
|
72 | 72 | if is_dir(ancestor.join(".hg"))? { |
|
73 | 73 | return Ok(ancestor.to_path_buf()); |
|
74 | 74 | } |
|
75 | 75 | } |
|
76 | 76 | Err(RepoError::NotFound { |
|
77 | 77 | at: current_directory, |
|
78 | 78 | }) |
|
79 | 79 | } |
|
80 | 80 | |
|
81 | 81 | /// Find a repository, either at the given path (which must contain a `.hg` |
|
82 | 82 | /// sub-directory) or by searching the current directory and its |
|
83 | 83 | /// ancestors. |
|
84 | 84 | /// |
|
85 | 85 | /// A method with two very different "modes" like this usually a code smell |
|
86 | 86 | /// to make two methods instead, but in this case an `Option` is what rhg |
|
87 | 87 | /// sub-commands get from Clap for the `-R` / `--repository` CLI argument. |
|
88 | 88 | /// Having two methods would just move that `if` to almost all callers. |
|
89 | 89 | pub fn find( |
|
90 | 90 | config: &Config, |
|
91 | 91 | explicit_path: Option<PathBuf>, |
|
92 | 92 | ) -> Result<Self, RepoError> { |
|
93 | 93 | if let Some(root) = explicit_path { |
|
94 | 94 | if is_dir(root.join(".hg"))? { |
|
95 | 95 | Self::new_at_path(root, config) |
|
96 | 96 | } else if is_file(&root)? { |
|
97 | 97 | Err(HgError::unsupported("bundle repository").into()) |
|
98 | 98 | } else { |
|
99 | 99 | Err(RepoError::NotFound { at: root }) |
|
100 | 100 | } |
|
101 | 101 | } else { |
|
102 | 102 | let root = Self::find_repo_root()?; |
|
103 | 103 | Self::new_at_path(root, config) |
|
104 | 104 | } |
|
105 | 105 | } |
|
106 | 106 | |
|
107 | 107 | /// To be called after checking that `.hg` is a sub-directory |
|
108 | 108 | fn new_at_path( |
|
109 | 109 | working_directory: PathBuf, |
|
110 | 110 | config: &Config, |
|
111 | 111 | ) -> Result<Self, RepoError> { |
|
112 | 112 | let dot_hg = working_directory.join(".hg"); |
|
113 | 113 | |
|
114 | 114 | let mut repo_config_files = |
|
115 | 115 | vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")]; |
|
116 | 116 | |
|
117 | 117 | let hg_vfs = Vfs { base: &dot_hg }; |
|
118 | 118 | let mut reqs = requirements::load_if_exists(hg_vfs)?; |
|
119 | 119 | let relative = |
|
120 | 120 | reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT); |
|
121 | 121 | let shared = |
|
122 | 122 | reqs.contains(requirements::SHARED_REQUIREMENT) || relative; |
|
123 | 123 | |
|
124 | 124 | // From `mercurial/localrepo.py`: |
|
125 | 125 | // |
|
126 | 126 | // if .hg/requires contains the sharesafe requirement, it means |
|
127 | 127 | // there exists a `.hg/store/requires` too and we should read it |
|
128 | 128 | // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement |
|
129 | 129 | // is present. We never write SHARESAFE_REQUIREMENT for a repo if store |
|
130 | 130 | // is not present, refer checkrequirementscompat() for that |
|
131 | 131 | // |
|
132 | 132 | // However, if SHARESAFE_REQUIREMENT is not present, it means that the |
|
133 | 133 | // repository was shared the old way. We check the share source |
|
134 | 134 | // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the |
|
135 | 135 | // current repository needs to be reshared |
|
136 | 136 | let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT); |
|
137 | 137 | |
|
138 | 138 | let store_path; |
|
139 | 139 | if !shared { |
|
140 | 140 | store_path = dot_hg.join("store"); |
|
141 | 141 | } else { |
|
142 | 142 | let bytes = hg_vfs.read("sharedpath")?; |
|
143 | 143 | let mut shared_path = |
|
144 | 144 | get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n')) |
|
145 | 145 | .to_owned(); |
|
146 | 146 | if relative { |
|
147 | 147 | shared_path = dot_hg.join(shared_path) |
|
148 | 148 | } |
|
149 | 149 | if !is_dir(&shared_path)? { |
|
150 | 150 | return Err(HgError::corrupted(format!( |
|
151 | 151 | ".hg/sharedpath points to nonexistent directory {}", |
|
152 | 152 | shared_path.display() |
|
153 | 153 | )) |
|
154 | 154 | .into()); |
|
155 | 155 | } |
|
156 | 156 | |
|
157 | 157 | store_path = shared_path.join("store"); |
|
158 | 158 | |
|
159 | 159 | let source_is_share_safe = |
|
160 | 160 | requirements::load(Vfs { base: &shared_path })? |
|
161 | 161 | .contains(requirements::SHARESAFE_REQUIREMENT); |
|
162 | 162 | |
|
163 | 163 | if share_safe != source_is_share_safe { |
|
164 | 164 | return Err(HgError::unsupported("share-safe mismatch").into()); |
|
165 | 165 | } |
|
166 | 166 | |
|
167 | 167 | if share_safe { |
|
168 | 168 | repo_config_files.insert(0, shared_path.join("hgrc")) |
|
169 | 169 | } |
|
170 | 170 | } |
|
171 | 171 | if share_safe { |
|
172 | 172 | reqs.extend(requirements::load(Vfs { base: &store_path })?); |
|
173 | 173 | } |
|
174 | 174 | |
|
175 | 175 | let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() { |
|
176 | 176 | config.combine_with_repo(&repo_config_files)? |
|
177 | 177 | } else { |
|
178 | 178 | config.clone() |
|
179 | 179 | }; |
|
180 | 180 | |
|
181 | 181 | let repo = Self { |
|
182 | 182 | requirements: reqs, |
|
183 | 183 | working_directory, |
|
184 | 184 | store: store_path, |
|
185 | 185 | dot_hg, |
|
186 | 186 | config: repo_config, |
|
187 | 187 | dirstate_parents: LazyCell::new(), |
|
188 | 188 | dirstate_map: LazyCell::new(), |
|
189 | 189 | changelog: LazyCell::new(), |
|
190 | 190 | manifestlog: LazyCell::new(), |
|
191 | 191 | }; |
|
192 | 192 | |
|
193 | 193 | requirements::check(&repo)?; |
|
194 | 194 | |
|
195 | 195 | Ok(repo) |
|
196 | 196 | } |
|
197 | 197 | |
|
198 | 198 | pub fn working_directory_path(&self) -> &Path { |
|
199 | 199 | &self.working_directory |
|
200 | 200 | } |
|
201 | 201 | |
|
202 | 202 | pub fn requirements(&self) -> &HashSet<String> { |
|
203 | 203 | &self.requirements |
|
204 | 204 | } |
|
205 | 205 | |
|
206 | 206 | pub fn config(&self) -> &Config { |
|
207 | 207 | &self.config |
|
208 | 208 | } |
|
209 | 209 | |
|
210 | 210 | /// For accessing repository files (in `.hg`), except for the store |
|
211 | 211 | /// (`.hg/store`). |
|
212 | 212 | pub fn hg_vfs(&self) -> Vfs<'_> { |
|
213 | 213 | Vfs { base: &self.dot_hg } |
|
214 | 214 | } |
|
215 | 215 | |
|
216 | 216 | /// For accessing repository store files (in `.hg/store`) |
|
217 | 217 | pub fn store_vfs(&self) -> Vfs<'_> { |
|
218 | 218 | Vfs { base: &self.store } |
|
219 | 219 | } |
|
220 | 220 | |
|
221 | 221 | /// For accessing the working copy |
|
222 | 222 | pub fn working_directory_vfs(&self) -> Vfs<'_> { |
|
223 | 223 | Vfs { |
|
224 | 224 | base: &self.working_directory, |
|
225 | 225 | } |
|
226 | 226 | } |
|
227 | 227 | |
|
228 | 228 | pub fn try_with_wlock_no_wait<R>( |
|
229 | 229 | &self, |
|
230 | 230 | f: impl FnOnce() -> R, |
|
231 | 231 | ) -> Result<R, LockError> { |
|
232 | 232 | try_with_lock_no_wait(self.hg_vfs(), "wlock", f) |
|
233 | 233 | } |
|
234 | 234 | |
|
235 | 235 | /// Whether this repo should use dirstate-v2. |
|
236 | 236 | /// The presence of `dirstate-v2` in the requirements does not mean that |
|
237 | 237 | /// the on-disk dirstate is necessarily in version 2. In most cases, |
|
238 | 238 | /// a dirstate-v2 file will indeed be found, but in rare cases (like the |
|
239 | 239 | /// upgrade mechanism being cut short), the on-disk version will be a |
|
240 | 240 | /// v1 file. |
|
241 | 241 | /// Semantically, having a requirement only means that a client cannot |
|
242 | 242 | /// properly understand or properly update the repo if it lacks the support |
|
243 | 243 | /// for the required feature, but not that that feature is actually used |
|
244 | 244 | /// in all occasions. |
|
245 | 245 | pub fn use_dirstate_v2(&self) -> bool { |
|
246 | 246 | self.requirements |
|
247 | 247 | .contains(requirements::DIRSTATE_V2_REQUIREMENT) |
|
248 | 248 | } |
|
249 | 249 | |
|
250 | 250 | pub fn has_sparse(&self) -> bool { |
|
251 | 251 | self.requirements.contains(requirements::SPARSE_REQUIREMENT) |
|
252 | 252 | } |
|
253 | 253 | |
|
254 | 254 | pub fn has_narrow(&self) -> bool { |
|
255 | 255 | self.requirements.contains(requirements::NARROW_REQUIREMENT) |
|
256 | 256 | } |
|
257 | 257 | |
|
258 | 258 | pub fn has_nodemap(&self) -> bool { |
|
259 | 259 | self.requirements |
|
260 | 260 | .contains(requirements::NODEMAP_REQUIREMENT) |
|
261 | 261 | } |
|
262 | 262 | |
|
263 | 263 | fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> { |
|
264 | 264 | Ok(self |
|
265 | 265 | .hg_vfs() |
|
266 | 266 | .read("dirstate") |
|
267 | 267 | .io_not_found_as_none()? |
|
268 | 268 | .unwrap_or_default()) |
|
269 | 269 | } |
|
270 | 270 | |
|
271 | 271 | fn dirstate_identity(&self) -> Result<Option<u64>, HgError> { |
|
272 | 272 | use std::os::unix::fs::MetadataExt; |
|
273 | 273 | Ok(self |
|
274 | 274 | .hg_vfs() |
|
275 | 275 | .symlink_metadata("dirstate") |
|
276 | 276 | .io_not_found_as_none()? |
|
277 | 277 | .map(|meta| meta.ino())) |
|
278 | 278 | } |
|
279 | 279 | |
|
280 | 280 | pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> { |
|
281 | 281 | Ok(*self |
|
282 | 282 | .dirstate_parents |
|
283 | 283 | .get_or_init(|| self.read_dirstate_parents())?) |
|
284 | 284 | } |
|
285 | 285 | |
|
286 | 286 | fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> { |
|
287 | 287 | let dirstate = self.dirstate_file_contents()?; |
|
288 | 288 | let parents = if dirstate.is_empty() { |
|
289 | 289 | DirstateParents::NULL |
|
290 | 290 | } else if self.use_dirstate_v2() { |
|
291 | 291 | let docket_res = |
|
292 | 292 | crate::dirstate_tree::on_disk::read_docket(&dirstate); |
|
293 | 293 | match docket_res { |
|
294 | 294 | Ok(docket) => docket.parents(), |
|
295 | 295 | Err(_) => { |
|
296 | 296 | log::info!( |
|
297 | 297 | "Parsing dirstate docket failed, \ |
|
298 | 298 | falling back to dirstate-v1" |
|
299 | 299 | ); |
|
300 | 300 | *crate::dirstate::parsers::parse_dirstate_parents( |
|
301 | 301 | &dirstate, |
|
302 | 302 | )? |
|
303 | 303 | } |
|
304 | 304 | } |
|
305 | 305 | } else { |
|
306 | 306 | *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? |
|
307 | 307 | }; |
|
308 | 308 | self.dirstate_parents.set(parents); |
|
309 | 309 | Ok(parents) |
|
310 | 310 | } |
|
311 | 311 | |
|
312 | 312 | /// Returns the information read from the dirstate docket necessary to |
|
313 | 313 | /// check if the data file has been updated/deleted by another process |
|
314 | 314 | /// since we last read the dirstate. |
|
315 | 315 | /// Namely, the inode, data file uuid and the data size. |
|
316 | 316 | fn get_dirstate_data_file_integrity( |
|
317 | 317 | &self, |
|
318 | 318 | ) -> Result<DirstateMapIdentity, HgError> { |
|
319 | 319 | assert!( |
|
320 | 320 | self.use_dirstate_v2(), |
|
321 | 321 | "accessing dirstate data file ID without dirstate-v2" |
|
322 | 322 | ); |
|
323 | 323 | // Get the identity before the contents since we could have a race |
|
324 | 324 | // between the two. Having an identity that is too old is fine, but |
|
325 | 325 | // one that is younger than the content change is bad. |
|
326 | 326 | let identity = self.dirstate_identity()?; |
|
327 | 327 | let dirstate = self.dirstate_file_contents()?; |
|
328 | 328 | if dirstate.is_empty() { |
|
329 | 329 | self.dirstate_parents.set(DirstateParents::NULL); |
|
330 | 330 | Ok((identity, None, 0)) |
|
331 | 331 | } else { |
|
332 | 332 | let docket_res = |
|
333 | 333 | crate::dirstate_tree::on_disk::read_docket(&dirstate); |
|
334 | 334 | match docket_res { |
|
335 | 335 | Ok(docket) => { |
|
336 | 336 | self.dirstate_parents.set(docket.parents()); |
|
337 | 337 | Ok(( |
|
338 | 338 | identity, |
|
339 | 339 | Some(docket.uuid.to_owned()), |
|
340 | 340 | docket.data_size(), |
|
341 | 341 | )) |
|
342 | 342 | } |
|
343 | 343 | Err(_) => { |
|
344 | 344 | log::info!( |
|
345 | 345 | "Parsing dirstate docket failed, \ |
|
346 | 346 | falling back to dirstate-v1" |
|
347 | 347 | ); |
|
348 | 348 | let parents = |
|
349 | 349 | *crate::dirstate::parsers::parse_dirstate_parents( |
|
350 | 350 | &dirstate, |
|
351 | 351 | )?; |
|
352 | 352 | self.dirstate_parents.set(parents); |
|
353 | 353 | Ok((identity, None, 0)) |
|
354 | 354 | } |
|
355 | 355 | } |
|
356 | 356 | } |
|
357 | 357 | } |
|
358 | 358 | |
|
359 | 359 | fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> { |
|
360 | 360 | if self.use_dirstate_v2() { |
|
361 | 361 | // The v2 dirstate is split into a docket and a data file. |
|
362 | 362 | // Since we don't always take the `wlock` to read it |
|
363 | 363 | // (like in `hg status`), it is susceptible to races. |
|
364 | 364 | // A simple retry method should be enough since full rewrites |
|
365 | 365 | // only happen when too much garbage data is present and |
|
366 | 366 | // this race is unlikely. |
|
367 | 367 | let mut tries = 0; |
|
368 | 368 | |
|
369 | 369 | while tries < V2_MAX_READ_ATTEMPTS { |
|
370 | 370 | tries += 1; |
|
371 | 371 | match self.read_docket_and_data_file() { |
|
372 | 372 | Ok(m) => { |
|
373 | 373 | return Ok(m); |
|
374 | 374 | } |
|
375 | 375 | Err(e) => match e { |
|
376 | 376 | DirstateError::Common(HgError::RaceDetected( |
|
377 | 377 | context, |
|
378 | 378 | )) => { |
|
379 | 379 | log::info!( |
|
380 | 380 | "dirstate read race detected {} (retry {}/{})", |
|
381 | 381 | context, |
|
382 | 382 | tries, |
|
383 | 383 | V2_MAX_READ_ATTEMPTS, |
|
384 | 384 | ); |
|
385 | 385 | continue; |
|
386 | 386 | } |
|
387 | 387 | _ => { |
|
388 | 388 | log::info!( |
|
389 | 389 | "Reading dirstate v2 failed, \ |
|
390 | 390 | falling back to v1" |
|
391 | 391 | ); |
|
392 | 392 | return self.new_dirstate_map_v1(); |
|
393 | 393 | } |
|
394 | 394 | }, |
|
395 | 395 | } |
|
396 | 396 | } |
|
397 | 397 | let error = HgError::abort( |
|
398 | 398 | format!("dirstate read race happened {tries} times in a row"), |
|
399 | 399 | 255, |
|
400 | 400 | None, |
|
401 | 401 | ); |
|
402 | 402 | Err(DirstateError::Common(error)) |
|
403 | 403 | } else { |
|
404 | 404 | self.new_dirstate_map_v1() |
|
405 | 405 | } |
|
406 | 406 | } |
|
407 | 407 | |
|
408 | 408 | fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> { |
|
409 | 409 | debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); |
|
410 | 410 | let identity = self.dirstate_identity()?; |
|
411 | 411 | let dirstate_file_contents = self.dirstate_file_contents()?; |
|
412 | 412 | if dirstate_file_contents.is_empty() { |
|
413 | 413 | self.dirstate_parents.set(DirstateParents::NULL); |
|
414 | 414 | Ok(OwningDirstateMap::new_empty(Vec::new())) |
|
415 | 415 | } else { |
|
416 | 416 | let (map, parents) = |
|
417 | 417 | OwningDirstateMap::new_v1(dirstate_file_contents, identity)?; |
|
418 | 418 | self.dirstate_parents.set(parents); |
|
419 | 419 | Ok(map) |
|
420 | 420 | } |
|
421 | 421 | } |
|
422 | 422 | |
|
423 | 423 | fn read_docket_and_data_file( |
|
424 | 424 | &self, |
|
425 | 425 | ) -> Result<OwningDirstateMap, DirstateError> { |
|
426 | 426 | debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); |
|
427 | 427 | let dirstate_file_contents = self.dirstate_file_contents()?; |
|
428 | 428 | let identity = self.dirstate_identity()?; |
|
429 | 429 | if dirstate_file_contents.is_empty() { |
|
430 | 430 | self.dirstate_parents.set(DirstateParents::NULL); |
|
431 | 431 | return Ok(OwningDirstateMap::new_empty(Vec::new())); |
|
432 | 432 | } |
|
433 | 433 | let docket = crate::dirstate_tree::on_disk::read_docket( |
|
434 | 434 | &dirstate_file_contents, |
|
435 | 435 | )?; |
|
436 | 436 | debug_wait_for_file_or_print( |
|
437 | 437 | self.config(), |
|
438 | 438 | "dirstate.post-docket-read-file", |
|
439 | 439 | ); |
|
440 | 440 | self.dirstate_parents.set(docket.parents()); |
|
441 | 441 | let uuid = docket.uuid.to_owned(); |
|
442 | 442 | let data_size = docket.data_size(); |
|
443 | 443 | |
|
444 | 444 | let context = "between reading dirstate docket and data file"; |
|
445 | 445 | let race_error = HgError::RaceDetected(context.into()); |
|
446 | 446 | let metadata = docket.tree_metadata(); |
|
447 | 447 | |
|
448 | 448 | let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) { |
|
449 | 449 | // Don't mmap on NFS to prevent `SIGBUS` error on deletion |
|
450 | 450 | let contents = self.hg_vfs().read(docket.data_filename()); |
|
451 | 451 | let contents = match contents { |
|
452 | 452 | Ok(c) => c, |
|
453 | 453 | Err(HgError::IoError { error, context }) => { |
|
454 | 454 | match error.raw_os_error().expect("real os error") { |
|
455 | 455 | // 2 = ENOENT, No such file or directory |
|
456 | 456 | // 116 = ESTALE, Stale NFS file handle |
|
457 | 457 | // |
|
458 | 458 | // TODO match on `error.kind()` when |
|
459 | 459 | // `ErrorKind::StaleNetworkFileHandle` is stable. |
|
460 | 460 | 2 | 116 => { |
|
461 | 461 | // Race where the data file was deleted right after |
|
462 | 462 | // we read the docket, try again |
|
463 | 463 | return Err(race_error.into()); |
|
464 | 464 | } |
|
465 | 465 | _ => { |
|
466 | 466 | return Err( |
|
467 | 467 | HgError::IoError { error, context }.into() |
|
468 | 468 | ) |
|
469 | 469 | } |
|
470 | 470 | } |
|
471 | 471 | } |
|
472 | 472 | Err(e) => return Err(e.into()), |
|
473 | 473 | }; |
|
474 | 474 | OwningDirstateMap::new_v2( |
|
475 | 475 | contents, data_size, metadata, uuid, identity, |
|
476 | 476 | ) |
|
477 | 477 | } else { |
|
478 | 478 | match self |
|
479 | 479 | .hg_vfs() |
|
480 | 480 | .mmap_open(docket.data_filename()) |
|
481 | 481 | .io_not_found_as_none() |
|
482 | 482 | { |
|
483 | 483 | Ok(Some(data_mmap)) => OwningDirstateMap::new_v2( |
|
484 | 484 | data_mmap, data_size, metadata, uuid, identity, |
|
485 | 485 | ), |
|
486 | 486 | Ok(None) => { |
|
487 | 487 | // Race where the data file was deleted right after we |
|
488 | 488 | // read the docket, try again |
|
489 | 489 | return Err(race_error.into()); |
|
490 | 490 | } |
|
491 | 491 | Err(e) => return Err(e.into()), |
|
492 | 492 | } |
|
493 | 493 | }?; |
|
494 | 494 | |
|
495 | 495 | let write_mode_config = self |
|
496 | 496 | .config() |
|
497 | 497 | .get_str(b"devel", b"dirstate.v2.data_update_mode") |
|
498 | 498 | .unwrap_or(Some("auto")) |
|
499 | 499 | .unwrap_or("auto"); // don't bother for devel options |
|
500 | 500 | let write_mode = match write_mode_config { |
|
501 | 501 | "auto" => DirstateMapWriteMode::Auto, |
|
502 | 502 | "force-new" => DirstateMapWriteMode::ForceNewDataFile, |
|
503 | 503 | "force-append" => DirstateMapWriteMode::ForceAppend, |
|
504 | 504 | _ => DirstateMapWriteMode::Auto, |
|
505 | 505 | }; |
|
506 | 506 | |
|
507 | 507 | map.with_dmap_mut(|m| m.set_write_mode(write_mode)); |
|
508 | 508 | |
|
509 | 509 | Ok(map) |
|
510 | 510 | } |
|
511 | 511 | |
|
512 | 512 | pub fn dirstate_map( |
|
513 | 513 | &self, |
|
514 | 514 | ) -> Result<Ref<OwningDirstateMap>, DirstateError> { |
|
515 | 515 | self.dirstate_map.get_or_init(|| self.new_dirstate_map()) |
|
516 | 516 | } |
|
517 | 517 | |
|
518 | 518 | pub fn dirstate_map_mut( |
|
519 | 519 | &self, |
|
520 | 520 | ) -> Result<RefMut<OwningDirstateMap>, DirstateError> { |
|
521 | 521 | self.dirstate_map |
|
522 | 522 | .get_mut_or_init(|| self.new_dirstate_map()) |
|
523 | 523 | } |
|
524 | 524 | |
|
525 | 525 | fn new_changelog(&self) -> Result<Changelog, HgError> { |
|
526 | 526 | Changelog::open(&self.store_vfs(), self.has_nodemap()) |
|
527 | 527 | } |
|
528 | 528 | |
|
529 | 529 | pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> { |
|
530 | 530 | self.changelog.get_or_init(|| self.new_changelog()) |
|
531 | 531 | } |
|
532 | 532 | |
|
533 | 533 | pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> { |
|
534 | 534 | self.changelog.get_mut_or_init(|| self.new_changelog()) |
|
535 | 535 | } |
|
536 | 536 | |
|
537 | 537 | fn new_manifestlog(&self) -> Result<Manifestlog, HgError> { |
|
538 | 538 | Manifestlog::open(&self.store_vfs(), self.has_nodemap()) |
|
539 | 539 | } |
|
540 | 540 | |
|
541 | 541 | pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> { |
|
542 | 542 | self.manifestlog.get_or_init(|| self.new_manifestlog()) |
|
543 | 543 | } |
|
544 | 544 | |
|
545 | 545 | pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> { |
|
546 | 546 | self.manifestlog.get_mut_or_init(|| self.new_manifestlog()) |
|
547 | 547 | } |
|
548 | 548 | |
|
549 | 549 | /// Returns the manifest of the *changeset* with the given node ID |
|
550 | 550 | pub fn manifest_for_node( |
|
551 | 551 | &self, |
|
552 | 552 | node: impl Into<NodePrefix>, |
|
553 | 553 | ) -> Result<Manifest, RevlogError> { |
|
554 | 554 | self.manifestlog()?.data_for_node( |
|
555 | 555 | self.changelog()? |
|
556 | 556 | .data_for_node(node.into())? |
|
557 | 557 | .manifest_node()? |
|
558 | 558 | .into(), |
|
559 | 559 | ) |
|
560 | 560 | } |
|
561 | 561 | |
|
562 | 562 | /// Returns the manifest of the *changeset* with the given revision number |
|
563 | 563 | pub fn manifest_for_rev( |
|
564 | 564 | &self, |
|
565 | revision: Revision, | |
|
565 | revision: UncheckedRevision, | |
|
566 | 566 | ) -> Result<Manifest, RevlogError> { |
|
567 | 567 | self.manifestlog()?.data_for_node( |
|
568 | 568 | self.changelog()? |
|
569 | 569 | .data_for_rev(revision)? |
|
570 | 570 | .manifest_node()? |
|
571 | 571 | .into(), |
|
572 | 572 | ) |
|
573 | 573 | } |
|
574 | 574 | |
|
575 | 575 | pub fn has_subrepos(&self) -> Result<bool, DirstateError> { |
|
576 | 576 | if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? { |
|
577 | 577 | Ok(entry.tracked()) |
|
578 | 578 | } else { |
|
579 | 579 | Ok(false) |
|
580 | 580 | } |
|
581 | 581 | } |
|
582 | 582 | |
|
583 | 583 | pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> { |
|
584 | 584 | Filelog::open(self, path) |
|
585 | 585 | } |
|
586 | 586 | |
|
587 | 587 | /// Write to disk any updates that were made through `dirstate_map_mut`. |
|
588 | 588 | /// |
|
589 | 589 | /// The "wlock" must be held while calling this. |
|
590 | 590 | /// See for example `try_with_wlock_no_wait`. |
|
591 | 591 | /// |
|
592 | 592 | /// TODO: have a `WritableRepo` type only accessible while holding the |
|
593 | 593 | /// lock? |
|
594 | 594 | pub fn write_dirstate(&self) -> Result<(), DirstateError> { |
|
595 | 595 | let map = self.dirstate_map()?; |
|
596 | 596 | // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if |
|
597 | 597 | // it’s unset |
|
598 | 598 | let parents = self.dirstate_parents()?; |
|
599 | 599 | let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() { |
|
600 | 600 | let (identity, uuid, data_size) = |
|
601 | 601 | self.get_dirstate_data_file_integrity()?; |
|
602 | 602 | let identity_changed = identity != map.old_identity(); |
|
603 | 603 | let uuid_changed = uuid.as_deref() != map.old_uuid(); |
|
604 | 604 | let data_length_changed = data_size != map.old_data_size(); |
|
605 | 605 | |
|
606 | 606 | if identity_changed || uuid_changed || data_length_changed { |
|
607 | 607 | // If any of identity, uuid or length have changed since |
|
608 | 608 | // last disk read, don't write. |
|
609 | 609 | // This is fine because either we're in a command that doesn't |
|
610 | 610 | // write anything too important (like `hg status`), or we're in |
|
611 | 611 | // `hg add` and we're supposed to have taken the lock before |
|
612 | 612 | // reading anyway. |
|
613 | 613 | // |
|
614 | 614 | // TODO complain loudly if we've changed anything important |
|
615 | 615 | // without taking the lock. |
|
616 | 616 | // (see `hg help config.format.use-dirstate-tracked-hint`) |
|
617 | 617 | log::debug!( |
|
618 | 618 | "dirstate has changed since last read, not updating." |
|
619 | 619 | ); |
|
620 | 620 | return Ok(()); |
|
621 | 621 | } |
|
622 | 622 | |
|
623 | 623 | let uuid_opt = map.old_uuid(); |
|
624 | 624 | let write_mode = if uuid_opt.is_some() { |
|
625 | 625 | DirstateMapWriteMode::Auto |
|
626 | 626 | } else { |
|
627 | 627 | DirstateMapWriteMode::ForceNewDataFile |
|
628 | 628 | }; |
|
629 | 629 | let (data, tree_metadata, append, old_data_size) = |
|
630 | 630 | map.pack_v2(write_mode)?; |
|
631 | 631 | |
|
632 | 632 | // Reuse the uuid, or generate a new one, keeping the old for |
|
633 | 633 | // deletion. |
|
634 | 634 | let (uuid, old_uuid) = match uuid_opt { |
|
635 | 635 | Some(uuid) => { |
|
636 | 636 | let as_str = std::str::from_utf8(uuid) |
|
637 | 637 | .map_err(|_| { |
|
638 | 638 | HgError::corrupted( |
|
639 | 639 | "non-UTF-8 dirstate data file ID", |
|
640 | 640 | ) |
|
641 | 641 | })? |
|
642 | 642 | .to_owned(); |
|
643 | 643 | if append { |
|
644 | 644 | (as_str, None) |
|
645 | 645 | } else { |
|
646 | 646 | (DirstateDocket::new_uid(), Some(as_str)) |
|
647 | 647 | } |
|
648 | 648 | } |
|
649 | 649 | None => (DirstateDocket::new_uid(), None), |
|
650 | 650 | }; |
|
651 | 651 | |
|
652 | 652 | let data_filename = format!("dirstate.{}", uuid); |
|
653 | 653 | let data_filename = self.hg_vfs().join(data_filename); |
|
654 | 654 | let mut options = std::fs::OpenOptions::new(); |
|
655 | 655 | options.write(true); |
|
656 | 656 | |
|
657 | 657 | // Why are we not using the O_APPEND flag when appending? |
|
658 | 658 | // |
|
659 | 659 | // - O_APPEND makes it trickier to deal with garbage at the end of |
|
660 | 660 | // the file, left by a previous uncommitted transaction. By |
|
661 | 661 | // starting the write at [old_data_size] we make sure we erase |
|
662 | 662 | // all such garbage. |
|
663 | 663 | // |
|
664 | 664 | // - O_APPEND requires to special-case 0-byte writes, whereas we |
|
665 | 665 | // don't need that. |
|
666 | 666 | // |
|
667 | 667 | // - Some OSes have bugs in implementation O_APPEND: |
|
668 | 668 | // revlog.py talks about a Solaris bug, but we also saw some ZFS |
|
669 | 669 | // bug: https://github.com/openzfs/zfs/pull/3124, |
|
670 | 670 | // https://github.com/openzfs/zfs/issues/13370 |
|
671 | 671 | // |
|
672 | 672 | if !append { |
|
673 | 673 | log::trace!("creating a new dirstate data file"); |
|
674 | 674 | options.create_new(true); |
|
675 | 675 | } else { |
|
676 | 676 | log::trace!("appending to the dirstate data file"); |
|
677 | 677 | } |
|
678 | 678 | |
|
679 | 679 | let data_size = (|| { |
|
680 | 680 | // TODO: loop and try another random ID if !append and this |
|
681 | 681 | // returns `ErrorKind::AlreadyExists`? Collision chance of two |
|
682 | 682 | // random IDs is one in 2**32 |
|
683 | 683 | let mut file = options.open(&data_filename)?; |
|
684 | 684 | if append { |
|
685 | 685 | file.seek(SeekFrom::Start(old_data_size as u64))?; |
|
686 | 686 | } |
|
687 | 687 | file.write_all(&data)?; |
|
688 | 688 | file.flush()?; |
|
689 | 689 | file.seek(SeekFrom::Current(0)) |
|
690 | 690 | })() |
|
691 | 691 | .when_writing_file(&data_filename)?; |
|
692 | 692 | |
|
693 | 693 | let packed_dirstate = DirstateDocket::serialize( |
|
694 | 694 | parents, |
|
695 | 695 | tree_metadata, |
|
696 | 696 | data_size, |
|
697 | 697 | uuid.as_bytes(), |
|
698 | 698 | ) |
|
699 | 699 | .map_err(|_: std::num::TryFromIntError| { |
|
700 | 700 | HgError::corrupted("overflow in dirstate docket serialization") |
|
701 | 701 | })?; |
|
702 | 702 | |
|
703 | 703 | (packed_dirstate, old_uuid) |
|
704 | 704 | } else { |
|
705 | 705 | let identity = self.dirstate_identity()?; |
|
706 | 706 | if identity != map.old_identity() { |
|
707 | 707 | // If identity changed since last disk read, don't write. |
|
708 | 708 | // This is fine because either we're in a command that doesn't |
|
709 | 709 | // write anything too important (like `hg status`), or we're in |
|
710 | 710 | // `hg add` and we're supposed to have taken the lock before |
|
711 | 711 | // reading anyway. |
|
712 | 712 | // |
|
713 | 713 | // TODO complain loudly if we've changed anything important |
|
714 | 714 | // without taking the lock. |
|
715 | 715 | // (see `hg help config.format.use-dirstate-tracked-hint`) |
|
716 | 716 | log::debug!( |
|
717 | 717 | "dirstate has changed since last read, not updating." |
|
718 | 718 | ); |
|
719 | 719 | return Ok(()); |
|
720 | 720 | } |
|
721 | 721 | (map.pack_v1(parents)?, None) |
|
722 | 722 | }; |
|
723 | 723 | |
|
724 | 724 | let vfs = self.hg_vfs(); |
|
725 | 725 | vfs.atomic_write("dirstate", &packed_dirstate)?; |
|
726 | 726 | if let Some(uuid) = old_uuid_to_remove { |
|
727 | 727 | // Remove the old data file after the new docket pointing to the |
|
728 | 728 | // new data file was written. |
|
729 | 729 | vfs.remove_file(format!("dirstate.{}", uuid))?; |
|
730 | 730 | } |
|
731 | 731 | Ok(()) |
|
732 | 732 | } |
|
733 | 733 | } |
|
734 | 734 | |
|
735 | 735 | /// Lazily-initialized component of `Repo` with interior mutability |
|
736 | 736 | /// |
|
737 | 737 | /// This differs from `OnceCell` in that the value can still be "deinitialized" |
|
738 | 738 | /// later by setting its inner `Option` to `None`. It also takes the |
|
739 | 739 | /// initialization function as an argument when the value is requested, not |
|
740 | 740 | /// when the instance is created. |
|
741 | 741 | struct LazyCell<T> { |
|
742 | 742 | value: RefCell<Option<T>>, |
|
743 | 743 | } |
|
744 | 744 | |
|
745 | 745 | impl<T> LazyCell<T> { |
|
746 | 746 | fn new() -> Self { |
|
747 | 747 | Self { |
|
748 | 748 | value: RefCell::new(None), |
|
749 | 749 | } |
|
750 | 750 | } |
|
751 | 751 | |
|
752 | 752 | fn set(&self, value: T) { |
|
753 | 753 | *self.value.borrow_mut() = Some(value) |
|
754 | 754 | } |
|
755 | 755 | |
|
756 | 756 | fn get_or_init<E>( |
|
757 | 757 | &self, |
|
758 | 758 | init: impl Fn() -> Result<T, E>, |
|
759 | 759 | ) -> Result<Ref<T>, E> { |
|
760 | 760 | let mut borrowed = self.value.borrow(); |
|
761 | 761 | if borrowed.is_none() { |
|
762 | 762 | drop(borrowed); |
|
763 | 763 | // Only use `borrow_mut` if it is really needed to avoid panic in |
|
764 | 764 | // case there is another outstanding borrow but mutation is not |
|
765 | 765 | // needed. |
|
766 | 766 | *self.value.borrow_mut() = Some(init()?); |
|
767 | 767 | borrowed = self.value.borrow() |
|
768 | 768 | } |
|
769 | 769 | Ok(Ref::map(borrowed, |option| option.as_ref().unwrap())) |
|
770 | 770 | } |
|
771 | 771 | |
|
772 | 772 | fn get_mut_or_init<E>( |
|
773 | 773 | &self, |
|
774 | 774 | init: impl Fn() -> Result<T, E>, |
|
775 | 775 | ) -> Result<RefMut<T>, E> { |
|
776 | 776 | let mut borrowed = self.value.borrow_mut(); |
|
777 | 777 | if borrowed.is_none() { |
|
778 | 778 | *borrowed = Some(init()?); |
|
779 | 779 | } |
|
780 | 780 | Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap())) |
|
781 | 781 | } |
|
782 | 782 | } |
@@ -1,343 +1,353 b'' | |||
|
1 | 1 | use crate::errors::HgError; |
|
2 | 2 | use crate::revlog::Revision; |
|
3 | 3 | use crate::revlog::{Node, NodePrefix}; |
|
4 | 4 | use crate::revlog::{Revlog, RevlogEntry, RevlogError}; |
|
5 | 5 | use crate::utils::hg_path::HgPath; |
|
6 | 6 | use crate::vfs::Vfs; |
|
7 | use crate::UncheckedRevision; | |
|
7 | 8 | use itertools::Itertools; |
|
8 | 9 | use std::ascii::escape_default; |
|
9 | 10 | use std::borrow::Cow; |
|
10 | 11 | use std::fmt::{Debug, Formatter}; |
|
11 | 12 | |
|
12 | 13 | /// A specialized `Revlog` to work with changelog data format. |
|
13 | 14 | pub struct Changelog { |
|
14 | 15 | /// The generic `revlog` format. |
|
15 | 16 | pub(crate) revlog: Revlog, |
|
16 | 17 | } |
|
17 | 18 | |
|
18 | 19 | impl Changelog { |
|
19 | 20 | /// Open the `changelog` of a repository given by its root. |
|
20 | 21 | pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { |
|
21 | 22 | let revlog = |
|
22 | 23 | Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?; |
|
23 | 24 | Ok(Self { revlog }) |
|
24 | 25 | } |
|
25 | 26 | |
|
26 | 27 | /// Return the `ChangelogRevisionData` for the given node ID. |
|
27 | 28 | pub fn data_for_node( |
|
28 | 29 | &self, |
|
29 | 30 | node: NodePrefix, |
|
30 | 31 | ) -> Result<ChangelogRevisionData, RevlogError> { |
|
31 | 32 | let rev = self.revlog.rev_from_node(node)?; |
|
32 |
self. |
|
|
33 | self.entry_for_checked_rev(rev)?.data() | |
|
33 | 34 | } |
|
34 | 35 | |
|
35 | 36 | /// Return the [`ChangelogEntry`] for the given revision number. |
|
36 | 37 | pub fn entry_for_rev( |
|
37 | 38 | &self, |
|
39 | rev: UncheckedRevision, | |
|
40 | ) -> Result<ChangelogEntry, RevlogError> { | |
|
41 | let revlog_entry = self.revlog.get_entry(rev)?; | |
|
42 | Ok(ChangelogEntry { revlog_entry }) | |
|
43 | } | |
|
44 | ||
|
45 | /// Same as [`Self::entry_for_rev`] for checked revisions. | |
|
46 | fn entry_for_checked_rev( | |
|
47 | &self, | |
|
38 | 48 | rev: Revision, |
|
39 | 49 | ) -> Result<ChangelogEntry, RevlogError> { |
|
40 | let revlog_entry = self.revlog.get_entry(rev)?; | |
|
50 | let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?; | |
|
41 | 51 | Ok(ChangelogEntry { revlog_entry }) |
|
42 | 52 | } |
|
43 | 53 | |
|
44 | 54 | /// Return the [`ChangelogRevisionData`] for the given revision number. |
|
45 | 55 | /// |
|
46 | 56 | /// This is a useful shortcut in case the caller does not need the |
|
47 | 57 | /// generic revlog information (parents, hashes etc). Otherwise |
|
48 | 58 | /// consider taking a [`ChangelogEntry`] with |
|
49 | 59 | /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there. |
|
50 | 60 | pub fn data_for_rev( |
|
51 | 61 | &self, |
|
52 | rev: Revision, | |
|
62 | rev: UncheckedRevision, | |
|
53 | 63 | ) -> Result<ChangelogRevisionData, RevlogError> { |
|
54 | 64 | self.entry_for_rev(rev)?.data() |
|
55 | 65 | } |
|
56 | 66 | |
|
57 | pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> { | |
|
67 | pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { | |
|
58 | 68 | self.revlog.node_from_rev(rev) |
|
59 | 69 | } |
|
60 | 70 | |
|
61 | 71 | pub fn rev_from_node( |
|
62 | 72 | &self, |
|
63 | 73 | node: NodePrefix, |
|
64 | 74 | ) -> Result<Revision, RevlogError> { |
|
65 | 75 | self.revlog.rev_from_node(node) |
|
66 | 76 | } |
|
67 | 77 | } |
|
68 | 78 | |
|
69 | 79 | /// A specialized `RevlogEntry` for `changelog` data format |
|
70 | 80 | /// |
|
71 | 81 | /// This is a `RevlogEntry` with the added semantics that the associated |
|
72 | 82 | /// data should meet the requirements for `changelog`, materialized by |
|
73 | 83 | /// the fact that `data()` constructs a `ChangelogRevisionData`. |
|
74 | 84 | /// In case that promise would be broken, the `data` method returns an error. |
|
75 | 85 | #[derive(Clone)] |
|
76 | 86 | pub struct ChangelogEntry<'changelog> { |
|
77 | 87 | /// Same data, as a generic `RevlogEntry`. |
|
78 | 88 | pub(crate) revlog_entry: RevlogEntry<'changelog>, |
|
79 | 89 | } |
|
80 | 90 | |
|
81 | 91 | impl<'changelog> ChangelogEntry<'changelog> { |
|
82 | 92 | pub fn data<'a>( |
|
83 | 93 | &'a self, |
|
84 | 94 | ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> { |
|
85 | 95 | let bytes = self.revlog_entry.data()?; |
|
86 | 96 | if bytes.is_empty() { |
|
87 | 97 | Ok(ChangelogRevisionData::null()) |
|
88 | 98 | } else { |
|
89 | 99 | Ok(ChangelogRevisionData::new(bytes).map_err(|err| { |
|
90 | 100 | RevlogError::Other(HgError::CorruptedRepository(format!( |
|
91 | 101 | "Invalid changelog data for revision {}: {:?}", |
|
92 | 102 | self.revlog_entry.revision(), |
|
93 | 103 | err |
|
94 | 104 | ))) |
|
95 | 105 | })?) |
|
96 | 106 | } |
|
97 | 107 | } |
|
98 | 108 | |
|
99 | 109 | /// Obtain a reference to the underlying `RevlogEntry`. |
|
100 | 110 | /// |
|
101 | 111 | /// This allows the caller to access the information that is common |
|
102 | 112 | /// to all revlog entries: revision number, node id, parent revisions etc. |
|
103 | 113 | pub fn as_revlog_entry(&self) -> &RevlogEntry { |
|
104 | 114 | &self.revlog_entry |
|
105 | 115 | } |
|
106 | 116 | |
|
107 | 117 | pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { |
|
108 | 118 | Ok(self |
|
109 | 119 | .revlog_entry |
|
110 | 120 | .p1_entry()? |
|
111 | 121 | .map(|revlog_entry| Self { revlog_entry })) |
|
112 | 122 | } |
|
113 | 123 | |
|
114 | 124 | pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { |
|
115 | 125 | Ok(self |
|
116 | 126 | .revlog_entry |
|
117 | 127 | .p2_entry()? |
|
118 | 128 | .map(|revlog_entry| Self { revlog_entry })) |
|
119 | 129 | } |
|
120 | 130 | } |
|
121 | 131 | |
|
122 | 132 | /// `Changelog` entry which knows how to interpret the `changelog` data bytes. |
|
123 | 133 | #[derive(PartialEq)] |
|
124 | 134 | pub struct ChangelogRevisionData<'changelog> { |
|
125 | 135 | /// The data bytes of the `changelog` entry. |
|
126 | 136 | bytes: Cow<'changelog, [u8]>, |
|
127 | 137 | /// The end offset for the hex manifest (not including the newline) |
|
128 | 138 | manifest_end: usize, |
|
129 | 139 | /// The end offset for the user+email (not including the newline) |
|
130 | 140 | user_end: usize, |
|
131 | 141 | /// The end offset for the timestamp+timezone+extras (not including the |
|
132 | 142 | /// newline) |
|
133 | 143 | timestamp_end: usize, |
|
134 | 144 | /// The end offset for the file list (not including the newline) |
|
135 | 145 | files_end: usize, |
|
136 | 146 | } |
|
137 | 147 | |
|
138 | 148 | impl<'changelog> ChangelogRevisionData<'changelog> { |
|
139 | 149 | fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> { |
|
140 | 150 | let mut line_iter = bytes.split(|b| b == &b'\n'); |
|
141 | 151 | let manifest_end = line_iter |
|
142 | 152 | .next() |
|
143 | 153 | .expect("Empty iterator from split()?") |
|
144 | 154 | .len(); |
|
145 | 155 | let user_slice = line_iter.next().ok_or_else(|| { |
|
146 | 156 | HgError::corrupted("Changeset data truncated after manifest line") |
|
147 | 157 | })?; |
|
148 | 158 | let user_end = manifest_end + 1 + user_slice.len(); |
|
149 | 159 | let timestamp_slice = line_iter.next().ok_or_else(|| { |
|
150 | 160 | HgError::corrupted("Changeset data truncated after user line") |
|
151 | 161 | })?; |
|
152 | 162 | let timestamp_end = user_end + 1 + timestamp_slice.len(); |
|
153 | 163 | let mut files_end = timestamp_end + 1; |
|
154 | 164 | loop { |
|
155 | 165 | let line = line_iter.next().ok_or_else(|| { |
|
156 | 166 | HgError::corrupted("Changeset data truncated in files list") |
|
157 | 167 | })?; |
|
158 | 168 | if line.is_empty() { |
|
159 | 169 | if files_end == bytes.len() { |
|
160 | 170 | // The list of files ended with a single newline (there |
|
161 | 171 | // should be two) |
|
162 | 172 | return Err(HgError::corrupted( |
|
163 | 173 | "Changeset data truncated after files list", |
|
164 | 174 | )); |
|
165 | 175 | } |
|
166 | 176 | files_end -= 1; |
|
167 | 177 | break; |
|
168 | 178 | } |
|
169 | 179 | files_end += line.len() + 1; |
|
170 | 180 | } |
|
171 | 181 | |
|
172 | 182 | Ok(Self { |
|
173 | 183 | bytes, |
|
174 | 184 | manifest_end, |
|
175 | 185 | user_end, |
|
176 | 186 | timestamp_end, |
|
177 | 187 | files_end, |
|
178 | 188 | }) |
|
179 | 189 | } |
|
180 | 190 | |
|
181 | 191 | fn null() -> Self { |
|
182 | 192 | Self::new(Cow::Borrowed( |
|
183 | 193 | b"0000000000000000000000000000000000000000\n\n0 0\n\n", |
|
184 | 194 | )) |
|
185 | 195 | .unwrap() |
|
186 | 196 | } |
|
187 | 197 | |
|
188 | 198 | /// Return an iterator over the lines of the entry. |
|
189 | 199 | pub fn lines(&self) -> impl Iterator<Item = &[u8]> { |
|
190 | 200 | self.bytes.split(|b| b == &b'\n') |
|
191 | 201 | } |
|
192 | 202 | |
|
193 | 203 | /// Return the node id of the `manifest` referenced by this `changelog` |
|
194 | 204 | /// entry. |
|
195 | 205 | pub fn manifest_node(&self) -> Result<Node, HgError> { |
|
196 | 206 | let manifest_node_hex = &self.bytes[..self.manifest_end]; |
|
197 | 207 | Node::from_hex_for_repo(manifest_node_hex) |
|
198 | 208 | } |
|
199 | 209 | |
|
200 | 210 | /// The full user string (usually a name followed by an email enclosed in |
|
201 | 211 | /// angle brackets) |
|
202 | 212 | pub fn user(&self) -> &[u8] { |
|
203 | 213 | &self.bytes[self.manifest_end + 1..self.user_end] |
|
204 | 214 | } |
|
205 | 215 | |
|
206 | 216 | /// The full timestamp line (timestamp in seconds, offset in seconds, and |
|
207 | 217 | /// possibly extras) |
|
208 | 218 | // TODO: We should expose this in a more useful way |
|
209 | 219 | pub fn timestamp_line(&self) -> &[u8] { |
|
210 | 220 | &self.bytes[self.user_end + 1..self.timestamp_end] |
|
211 | 221 | } |
|
212 | 222 | |
|
213 | 223 | /// The files changed in this revision. |
|
214 | 224 | pub fn files(&self) -> impl Iterator<Item = &HgPath> { |
|
215 | 225 | self.bytes[self.timestamp_end + 1..self.files_end] |
|
216 | 226 | .split(|b| b == &b'\n') |
|
217 | 227 | .map(HgPath::new) |
|
218 | 228 | } |
|
219 | 229 | |
|
220 | 230 | /// The change description. |
|
221 | 231 | pub fn description(&self) -> &[u8] { |
|
222 | 232 | &self.bytes[self.files_end + 2..] |
|
223 | 233 | } |
|
224 | 234 | } |
|
225 | 235 | |
|
226 | 236 | impl Debug for ChangelogRevisionData<'_> { |
|
227 | 237 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
|
228 | 238 | f.debug_struct("ChangelogRevisionData") |
|
229 | 239 | .field("bytes", &debug_bytes(&self.bytes)) |
|
230 | 240 | .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end])) |
|
231 | 241 | .field( |
|
232 | 242 | "user", |
|
233 | 243 | &debug_bytes( |
|
234 | 244 | &self.bytes[self.manifest_end + 1..self.user_end], |
|
235 | 245 | ), |
|
236 | 246 | ) |
|
237 | 247 | .field( |
|
238 | 248 | "timestamp", |
|
239 | 249 | &debug_bytes( |
|
240 | 250 | &self.bytes[self.user_end + 1..self.timestamp_end], |
|
241 | 251 | ), |
|
242 | 252 | ) |
|
243 | 253 | .field( |
|
244 | 254 | "files", |
|
245 | 255 | &debug_bytes( |
|
246 | 256 | &self.bytes[self.timestamp_end + 1..self.files_end], |
|
247 | 257 | ), |
|
248 | 258 | ) |
|
249 | 259 | .field( |
|
250 | 260 | "description", |
|
251 | 261 | &debug_bytes(&self.bytes[self.files_end + 2..]), |
|
252 | 262 | ) |
|
253 | 263 | .finish() |
|
254 | 264 | } |
|
255 | 265 | } |
|
256 | 266 | |
|
257 | 267 | fn debug_bytes(bytes: &[u8]) -> String { |
|
258 | 268 | String::from_utf8_lossy( |
|
259 | 269 | &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(), |
|
260 | 270 | ) |
|
261 | 271 | .to_string() |
|
262 | 272 | } |
|
263 | 273 | |
|
264 | 274 | #[cfg(test)] |
|
265 | 275 | mod tests { |
|
266 | 276 | use super::*; |
|
267 | 277 | use crate::vfs::Vfs; |
|
268 | 278 | use crate::NULL_REVISION; |
|
269 | 279 | use pretty_assertions::assert_eq; |
|
270 | 280 | |
|
271 | 281 | #[test] |
|
272 | 282 | fn test_create_changelogrevisiondata_invalid() { |
|
273 | 283 | // Completely empty |
|
274 | 284 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); |
|
275 | 285 | // No newline after manifest |
|
276 | 286 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); |
|
277 | 287 | // No newline after user |
|
278 | 288 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err()); |
|
279 | 289 | // No newline after timestamp |
|
280 | 290 | assert!( |
|
281 | 291 | ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err() |
|
282 | 292 | ); |
|
283 | 293 | // Missing newline after files |
|
284 | 294 | assert!(ChangelogRevisionData::new(Cow::Borrowed( |
|
285 | 295 | b"abcd\n\n0 0\nfile1\nfile2" |
|
286 | 296 | )) |
|
287 | 297 | .is_err(),); |
|
288 | 298 | // Only one newline after files |
|
289 | 299 | assert!(ChangelogRevisionData::new(Cow::Borrowed( |
|
290 | 300 | b"abcd\n\n0 0\nfile1\nfile2\n" |
|
291 | 301 | )) |
|
292 | 302 | .is_err(),); |
|
293 | 303 | } |
|
294 | 304 | |
|
295 | 305 | #[test] |
|
296 | 306 | fn test_create_changelogrevisiondata() { |
|
297 | 307 | let data = ChangelogRevisionData::new(Cow::Borrowed( |
|
298 | 308 | b"0123456789abcdef0123456789abcdef01234567 |
|
299 | 309 | Some One <someone@example.com> |
|
300 | 310 | 0 0 |
|
301 | 311 | file1 |
|
302 | 312 | file2 |
|
303 | 313 | |
|
304 | 314 | some |
|
305 | 315 | commit |
|
306 | 316 | message", |
|
307 | 317 | )) |
|
308 | 318 | .unwrap(); |
|
309 | 319 | assert_eq!( |
|
310 | 320 | data.manifest_node().unwrap(), |
|
311 | 321 | Node::from_hex("0123456789abcdef0123456789abcdef01234567") |
|
312 | 322 | .unwrap() |
|
313 | 323 | ); |
|
314 | 324 | assert_eq!(data.user(), b"Some One <someone@example.com>"); |
|
315 | 325 | assert_eq!(data.timestamp_line(), b"0 0"); |
|
316 | 326 | assert_eq!( |
|
317 | 327 | data.files().collect_vec(), |
|
318 | 328 | vec![HgPath::new("file1"), HgPath::new("file2")] |
|
319 | 329 | ); |
|
320 | 330 | assert_eq!(data.description(), b"some\ncommit\nmessage"); |
|
321 | 331 | } |
|
322 | 332 | |
|
323 | 333 | #[test] |
|
324 | 334 | fn test_data_from_rev_null() -> Result<(), RevlogError> { |
|
325 | 335 | // an empty revlog will be enough for this case |
|
326 | 336 | let temp = tempfile::tempdir().unwrap(); |
|
327 | 337 | let vfs = Vfs { base: temp.path() }; |
|
328 | 338 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); |
|
329 | 339 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
330 | 340 | |
|
331 | 341 | let changelog = Changelog { revlog }; |
|
332 | 342 | assert_eq!( |
|
333 | changelog.data_for_rev(NULL_REVISION)?, | |
|
343 | changelog.data_for_rev(NULL_REVISION.into())?, | |
|
334 | 344 | ChangelogRevisionData::null() |
|
335 | 345 | ); |
|
336 | 346 | // same with the intermediate entry object |
|
337 | 347 | assert_eq!( |
|
338 | changelog.entry_for_rev(NULL_REVISION)?.data()?, | |
|
348 | changelog.entry_for_rev(NULL_REVISION.into())?.data()?, | |
|
339 | 349 | ChangelogRevisionData::null() |
|
340 | 350 | ); |
|
341 | 351 | Ok(()) |
|
342 | 352 | } |
|
343 | 353 | } |
@@ -1,208 +1,231 b'' | |||
|
1 | 1 | use crate::errors::HgError; |
|
2 | use crate::exit_codes; | |
|
2 | 3 | use crate::repo::Repo; |
|
3 | 4 | use crate::revlog::path_encode::path_encode; |
|
4 | 5 | use crate::revlog::NodePrefix; |
|
5 | 6 | use crate::revlog::Revision; |
|
6 | 7 | use crate::revlog::RevlogEntry; |
|
7 | 8 | use crate::revlog::{Revlog, RevlogError}; |
|
8 | 9 | use crate::utils::files::get_path_from_bytes; |
|
9 | 10 | use crate::utils::hg_path::HgPath; |
|
10 | 11 | use crate::utils::SliceExt; |
|
12 | use crate::UncheckedRevision; | |
|
11 | 13 | use std::path::PathBuf; |
|
12 | 14 | |
|
13 | 15 | /// A specialized `Revlog` to work with file data logs. |
|
14 | 16 | pub struct Filelog { |
|
15 | 17 | /// The generic `revlog` format. |
|
16 | 18 | revlog: Revlog, |
|
17 | 19 | } |
|
18 | 20 | |
|
19 | 21 | impl Filelog { |
|
20 | 22 | pub fn open_vfs( |
|
21 | 23 | store_vfs: &crate::vfs::Vfs<'_>, |
|
22 | 24 | file_path: &HgPath, |
|
23 | 25 | ) -> Result<Self, HgError> { |
|
24 | 26 | let index_path = store_path(file_path, b".i"); |
|
25 | 27 | let data_path = store_path(file_path, b".d"); |
|
26 | 28 | let revlog = |
|
27 | 29 | Revlog::open(store_vfs, index_path, Some(&data_path), false)?; |
|
28 | 30 | Ok(Self { revlog }) |
|
29 | 31 | } |
|
30 | 32 | |
|
31 | 33 | pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> { |
|
32 | 34 | Self::open_vfs(&repo.store_vfs(), file_path) |
|
33 | 35 | } |
|
34 | 36 | |
|
35 | 37 | /// The given node ID is that of the file as found in a filelog, not of a |
|
36 | 38 | /// changeset. |
|
37 | 39 | pub fn data_for_node( |
|
38 | 40 | &self, |
|
39 | 41 | file_node: impl Into<NodePrefix>, |
|
40 | 42 | ) -> Result<FilelogRevisionData, RevlogError> { |
|
41 | 43 | let file_rev = self.revlog.rev_from_node(file_node.into())?; |
|
42 | self.data_for_rev(file_rev) | |
|
44 | self.data_for_rev(file_rev.into()) | |
|
43 | 45 | } |
|
44 | 46 | |
|
45 | 47 | /// The given revision is that of the file as found in a filelog, not of a |
|
46 | 48 | /// changeset. |
|
47 | 49 | pub fn data_for_rev( |
|
48 | 50 | &self, |
|
49 | file_rev: Revision, | |
|
51 | file_rev: UncheckedRevision, | |
|
50 | 52 | ) -> Result<FilelogRevisionData, RevlogError> { |
|
51 | 53 | let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); |
|
52 | 54 | Ok(FilelogRevisionData(data)) |
|
53 | 55 | } |
|
54 | 56 | |
|
55 | 57 | /// The given node ID is that of the file as found in a filelog, not of a |
|
56 | 58 | /// changeset. |
|
57 | 59 | pub fn entry_for_node( |
|
58 | 60 | &self, |
|
59 | 61 | file_node: impl Into<NodePrefix>, |
|
60 | 62 | ) -> Result<FilelogEntry, RevlogError> { |
|
61 | 63 | let file_rev = self.revlog.rev_from_node(file_node.into())?; |
|
62 | self.entry_for_rev(file_rev) | |
|
64 | self.entry_for_checked_rev(file_rev) | |
|
63 | 65 | } |
|
64 | 66 | |
|
65 | 67 | /// The given revision is that of the file as found in a filelog, not of a |
|
66 | 68 | /// changeset. |
|
67 | 69 | pub fn entry_for_rev( |
|
68 | 70 | &self, |
|
71 | file_rev: UncheckedRevision, | |
|
72 | ) -> Result<FilelogEntry, RevlogError> { | |
|
73 | Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) | |
|
74 | } | |
|
75 | ||
|
76 | fn entry_for_checked_rev( | |
|
77 | &self, | |
|
69 | 78 | file_rev: Revision, |
|
70 | 79 | ) -> Result<FilelogEntry, RevlogError> { |
|
71 | Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) | |
|
80 | Ok(FilelogEntry( | |
|
81 | self.revlog.get_entry_for_checked_rev(file_rev)?, | |
|
82 | )) | |
|
72 | 83 | } |
|
73 | 84 | } |
|
74 | 85 | |
|
75 | 86 | fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { |
|
76 | 87 | let encoded_bytes = |
|
77 | 88 | path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); |
|
78 | 89 | get_path_from_bytes(&encoded_bytes).into() |
|
79 | 90 | } |
|
80 | 91 | |
|
81 | 92 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); |
|
82 | 93 | |
|
83 | 94 | impl FilelogEntry<'_> { |
|
84 | 95 | /// `self.data()` can be expensive, with decompression and delta |
|
85 | 96 | /// resolution. |
|
86 | 97 | /// |
|
87 | 98 | /// *Without* paying this cost, based on revlog index information |
|
88 | 99 | /// including `RevlogEntry::uncompressed_len`: |
|
89 | 100 | /// |
|
90 | 101 | /// * Returns `true` if the length that `self.data().file_data().len()` |
|
91 | 102 | /// would return is definitely **not equal** to `other_len`. |
|
92 | 103 | /// * Returns `false` if available information is inconclusive. |
|
93 | 104 | pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { |
|
94 | 105 | // Relevant code that implement this behavior in Python code: |
|
95 | 106 | // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, |
|
96 | 107 | // revlog.size, revlog.rawsize |
|
97 | 108 | |
|
98 | 109 | // Let’s call `file_data_len` what would be returned by |
|
99 | 110 | // `self.data().file_data().len()`. |
|
100 | 111 | |
|
101 | 112 | if self.0.is_censored() { |
|
102 | 113 | let file_data_len = 0; |
|
103 | 114 | return other_len != file_data_len; |
|
104 | 115 | } |
|
105 | 116 | |
|
106 | 117 | if self.0.has_length_affecting_flag_processor() { |
|
107 | 118 | // We can’t conclude anything about `file_data_len`. |
|
108 | 119 | return false; |
|
109 | 120 | } |
|
110 | 121 | |
|
111 | 122 | // Revlog revisions (usually) have metadata for the size of |
|
112 | 123 | // their data after decompression and delta resolution |
|
113 | 124 | // as would be returned by `Revlog::get_rev_data`. |
|
114 | 125 | // |
|
115 | 126 | // For filelogs this is the file’s contents preceded by an optional |
|
116 | 127 | // metadata block. |
|
117 | 128 | let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { |
|
118 | 129 | l as u64 |
|
119 | 130 | } else { |
|
120 | 131 | // The field was set to -1, the actual uncompressed len is unknown. |
|
121 | 132 | // We need to decompress to say more. |
|
122 | 133 | return false; |
|
123 | 134 | }; |
|
124 | 135 | // `uncompressed_len = file_data_len + optional_metadata_len`, |
|
125 | 136 | // so `file_data_len <= uncompressed_len`. |
|
126 | 137 | if uncompressed_len < other_len { |
|
127 | 138 | // Transitively, `file_data_len < other_len`. |
|
128 | 139 | // So `other_len != file_data_len` definitely. |
|
129 | 140 | return true; |
|
130 | 141 | } |
|
131 | 142 | |
|
132 | 143 | if uncompressed_len == other_len + 4 { |
|
133 | 144 | // It’s possible that `file_data_len == other_len` with an empty |
|
134 | 145 | // metadata block (2 start marker bytes + 2 end marker bytes). |
|
135 | 146 | // This happens when there wouldn’t otherwise be metadata, but |
|
136 | 147 | // the first 2 bytes of file data happen to match a start marker |
|
137 | 148 | // and would be ambiguous. |
|
138 | 149 | return false; |
|
139 | 150 | } |
|
140 | 151 | |
|
141 | 152 | if !self.0.has_p1() { |
|
142 | 153 | // There may or may not be copy metadata, so we can’t deduce more |
|
143 | 154 | // about `file_data_len` without computing file data. |
|
144 | 155 | return false; |
|
145 | 156 | } |
|
146 | 157 | |
|
147 | 158 | // Filelog ancestry is not meaningful in the way changelog ancestry is. |
|
148 | 159 | // It only provides hints to delta generation. |
|
149 | 160 | // p1 and p2 are set to null when making a copy or rename since |
|
150 | 161 | // contents are likely unrelatedto what might have previously existed |
|
151 | 162 | // at the destination path. |
|
152 | 163 | // |
|
153 | 164 | // Conversely, since here p1 is non-null, there is no copy metadata. |
|
154 | 165 | // Note that this reasoning may be invalidated in the presence of |
|
155 | 166 | // merges made by some previous versions of Mercurial that |
|
156 | 167 | // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> |
|
157 | 168 | // and `tests/test-issue6528.t`. |
|
158 | 169 | // |
|
159 | 170 | // Since copy metadata is currently the only kind of metadata |
|
160 | 171 | // kept in revlog data of filelogs, |
|
161 | 172 | // this `FilelogEntry` does not have such metadata: |
|
162 | 173 | let file_data_len = uncompressed_len; |
|
163 | 174 | |
|
164 | 175 | file_data_len != other_len |
|
165 | 176 | } |
|
166 | 177 | |
|
167 | 178 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { |
|
168 | Ok(FilelogRevisionData(self.0.data()?.into_owned())) | |
|
179 | let data = self.0.data(); | |
|
180 | match data { | |
|
181 | Ok(data) => Ok(FilelogRevisionData(data.into_owned())), | |
|
182 | // Errors other than `HgError` should not happen at this point | |
|
183 | Err(e) => match e { | |
|
184 | RevlogError::Other(hg_error) => Err(hg_error), | |
|
185 | revlog_error => Err(HgError::abort( | |
|
186 | revlog_error.to_string(), | |
|
187 | exit_codes::ABORT, | |
|
188 | None, | |
|
189 | )), | |
|
190 | }, | |
|
191 | } | |
|
169 | 192 | } |
|
170 | 193 | } |
|
171 | 194 | |
|
172 | 195 | /// The data for one revision in a filelog, uncompressed and delta-resolved. |
|
173 | 196 | pub struct FilelogRevisionData(Vec<u8>); |
|
174 | 197 | |
|
175 | 198 | impl FilelogRevisionData { |
|
176 | 199 | /// Split into metadata and data |
|
177 | 200 | pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { |
|
178 | 201 | const DELIMITER: &[u8; 2] = &[b'\x01', b'\n']; |
|
179 | 202 | |
|
180 | 203 | if let Some(rest) = self.0.drop_prefix(DELIMITER) { |
|
181 | 204 | if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { |
|
182 | 205 | Ok((Some(metadata), data)) |
|
183 | 206 | } else { |
|
184 | 207 | Err(HgError::corrupted( |
|
185 | 208 | "Missing metadata end delimiter in filelog entry", |
|
186 | 209 | )) |
|
187 | 210 | } |
|
188 | 211 | } else { |
|
189 | 212 | Ok((None, &self.0)) |
|
190 | 213 | } |
|
191 | 214 | } |
|
192 | 215 | |
|
193 | 216 | /// Returns the file contents at this revision, stripped of any metadata |
|
194 | 217 | pub fn file_data(&self) -> Result<&[u8], HgError> { |
|
195 | 218 | let (_metadata, data) = self.split()?; |
|
196 | 219 | Ok(data) |
|
197 | 220 | } |
|
198 | 221 | |
|
199 | 222 | /// Consume the entry, and convert it into data, discarding any metadata, |
|
200 | 223 | /// if present. |
|
201 | 224 | pub fn into_file_data(self) -> Result<Vec<u8>, HgError> { |
|
202 | 225 | if let (Some(_metadata), data) = self.split()? { |
|
203 | 226 | Ok(data.to_owned()) |
|
204 | 227 | } else { |
|
205 | 228 | Ok(self.0) |
|
206 | 229 | } |
|
207 | 230 | } |
|
208 | 231 | } |
@@ -1,615 +1,622 b'' | |||
|
1 | use std::fmt::Debug; | |
|
1 | 2 | use std::ops::Deref; |
|
2 | 3 | |
|
3 | 4 | use byteorder::{BigEndian, ByteOrder}; |
|
4 | 5 | |
|
5 | 6 | use crate::errors::HgError; |
|
6 | 7 | use crate::revlog::node::Node; |
|
7 | 8 | use crate::revlog::{Revision, NULL_REVISION}; |
|
9 | use crate::UncheckedRevision; | |
|
8 | 10 | |
|
9 | 11 | pub const INDEX_ENTRY_SIZE: usize = 64; |
|
10 | 12 | |
|
11 | 13 | pub struct IndexHeader { |
|
12 | 14 | header_bytes: [u8; 4], |
|
13 | 15 | } |
|
14 | 16 | |
|
15 | 17 | #[derive(Copy, Clone)] |
|
16 | 18 | pub struct IndexHeaderFlags { |
|
17 | 19 | flags: u16, |
|
18 | 20 | } |
|
19 | 21 | |
|
20 | 22 | /// Corresponds to the high bits of `_format_flags` in python |
|
21 | 23 | impl IndexHeaderFlags { |
|
22 | 24 | /// Corresponds to FLAG_INLINE_DATA in python |
|
23 | 25 | pub fn is_inline(self) -> bool { |
|
24 | 26 | self.flags & 1 != 0 |
|
25 | 27 | } |
|
26 | 28 | /// Corresponds to FLAG_GENERALDELTA in python |
|
27 | 29 | pub fn uses_generaldelta(self) -> bool { |
|
28 | 30 | self.flags & 2 != 0 |
|
29 | 31 | } |
|
30 | 32 | } |
|
31 | 33 | |
|
32 | 34 | /// Corresponds to the INDEX_HEADER structure, |
|
33 | 35 | /// which is parsed as a `header` variable in `_loadindex` in `revlog.py` |
|
34 | 36 | impl IndexHeader { |
|
35 | 37 | fn format_flags(&self) -> IndexHeaderFlags { |
|
36 | 38 | // No "unknown flags" check here, unlike in python. Maybe there should |
|
37 | 39 | // be. |
|
38 | 40 | IndexHeaderFlags { |
|
39 | 41 | flags: BigEndian::read_u16(&self.header_bytes[0..2]), |
|
40 | 42 | } |
|
41 | 43 | } |
|
42 | 44 | |
|
43 | 45 | /// The only revlog version currently supported by rhg. |
|
44 | 46 | const REVLOGV1: u16 = 1; |
|
45 | 47 | |
|
46 | 48 | /// Corresponds to `_format_version` in Python. |
|
47 | 49 | fn format_version(&self) -> u16 { |
|
48 | 50 | BigEndian::read_u16(&self.header_bytes[2..4]) |
|
49 | 51 | } |
|
50 | 52 | |
|
51 | 53 | const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader { |
|
52 | 54 | // We treat an empty file as a valid index with no entries. |
|
53 | 55 | // Here we make an arbitrary choice of what we assume the format of the |
|
54 | 56 | // index to be (V1, using generaldelta). |
|
55 | 57 | // This doesn't matter too much, since we're only doing read-only |
|
56 | 58 | // access. but the value corresponds to the `new_header` variable in |
|
57 | 59 | // `revlog.py`, `_loadindex` |
|
58 | 60 | header_bytes: [0, 3, 0, 1], |
|
59 | 61 | }; |
|
60 | 62 | |
|
61 | 63 | fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> { |
|
62 | 64 | if index_bytes.is_empty() { |
|
63 | 65 | return Ok(IndexHeader::EMPTY_INDEX_HEADER); |
|
64 | 66 | } |
|
65 | 67 | if index_bytes.len() < 4 { |
|
66 | 68 | return Err(HgError::corrupted( |
|
67 | 69 | "corrupted revlog: can't read the index format header", |
|
68 | 70 | )); |
|
69 | 71 | } |
|
70 | 72 | Ok(IndexHeader { |
|
71 | 73 | header_bytes: { |
|
72 | 74 | let bytes: [u8; 4] = |
|
73 | 75 | index_bytes[0..4].try_into().expect("impossible"); |
|
74 | 76 | bytes |
|
75 | 77 | }, |
|
76 | 78 | }) |
|
77 | 79 | } |
|
78 | 80 | } |
|
79 | 81 | |
|
80 | 82 | /// A Revlog index |
|
81 | 83 | pub struct Index { |
|
82 | 84 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
83 | 85 | /// Offsets of starts of index blocks. |
|
84 | 86 | /// Only needed when the index is interleaved with data. |
|
85 | 87 | offsets: Option<Vec<usize>>, |
|
86 | 88 | uses_generaldelta: bool, |
|
87 | 89 | } |
|
88 | 90 | |
|
91 | impl Debug for Index { | |
|
92 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
|
93 | f.debug_struct("Index") | |
|
94 | .field("offsets", &self.offsets) | |
|
95 | .field("uses_generaldelta", &self.uses_generaldelta) | |
|
96 | .finish() | |
|
97 | } | |
|
98 | } | |
|
99 | ||
|
89 | 100 | impl Index { |
|
90 | 101 | /// Create an index from bytes. |
|
91 | 102 | /// Calculate the start of each entry when is_inline is true. |
|
92 | 103 | pub fn new( |
|
93 | 104 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
94 | 105 | ) -> Result<Self, HgError> { |
|
95 | 106 | let header = IndexHeader::parse(bytes.as_ref())?; |
|
96 | 107 | |
|
97 | 108 | if header.format_version() != IndexHeader::REVLOGV1 { |
|
98 | 109 | // A proper new version should have had a repo/store |
|
99 | 110 | // requirement. |
|
100 | 111 | return Err(HgError::corrupted("unsupported revlog version")); |
|
101 | 112 | } |
|
102 | 113 | |
|
103 | 114 | // This is only correct because we know version is REVLOGV1. |
|
104 | 115 | // In v2 we always use generaldelta, while in v0 we never use |
|
105 | 116 | // generaldelta. Similar for [is_inline] (it's only used in v1). |
|
106 | 117 | let uses_generaldelta = header.format_flags().uses_generaldelta(); |
|
107 | 118 | |
|
108 | 119 | if header.format_flags().is_inline() { |
|
109 | 120 | let mut offset: usize = 0; |
|
110 | 121 | let mut offsets = Vec::new(); |
|
111 | 122 | |
|
112 | 123 | while offset + INDEX_ENTRY_SIZE <= bytes.len() { |
|
113 | 124 | offsets.push(offset); |
|
114 | 125 | let end = offset + INDEX_ENTRY_SIZE; |
|
115 | 126 | let entry = IndexEntry { |
|
116 | 127 | bytes: &bytes[offset..end], |
|
117 | 128 | offset_override: None, |
|
118 | 129 | }; |
|
119 | 130 | |
|
120 | 131 | offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize; |
|
121 | 132 | } |
|
122 | 133 | |
|
123 | 134 | if offset == bytes.len() { |
|
124 | 135 | Ok(Self { |
|
125 | 136 | bytes, |
|
126 | 137 | offsets: Some(offsets), |
|
127 | 138 | uses_generaldelta, |
|
128 | 139 | }) |
|
129 | 140 | } else { |
|
130 | 141 | Err(HgError::corrupted("unexpected inline revlog length")) |
|
131 | 142 | } |
|
132 | 143 | } else { |
|
133 | 144 | Ok(Self { |
|
134 | 145 | bytes, |
|
135 | 146 | offsets: None, |
|
136 | 147 | uses_generaldelta, |
|
137 | 148 | }) |
|
138 | 149 | } |
|
139 | 150 | } |
|
140 | 151 | |
|
141 | 152 | pub fn uses_generaldelta(&self) -> bool { |
|
142 | 153 | self.uses_generaldelta |
|
143 | 154 | } |
|
144 | 155 | |
|
145 | 156 | /// Value of the inline flag. |
|
146 | 157 | pub fn is_inline(&self) -> bool { |
|
147 | 158 | self.offsets.is_some() |
|
148 | 159 | } |
|
149 | 160 | |
|
150 | 161 | /// Return a slice of bytes if `revlog` is inline. Panic if not. |
|
151 | 162 | pub fn data(&self, start: usize, end: usize) -> &[u8] { |
|
152 | 163 | if !self.is_inline() { |
|
153 | 164 | panic!("tried to access data in the index of a revlog that is not inline"); |
|
154 | 165 | } |
|
155 | 166 | &self.bytes[start..end] |
|
156 | 167 | } |
|
157 | 168 | |
|
158 | 169 | /// Return number of entries of the revlog index. |
|
159 | 170 | pub fn len(&self) -> usize { |
|
160 | 171 | if let Some(offsets) = &self.offsets { |
|
161 | 172 | offsets.len() |
|
162 | 173 | } else { |
|
163 | 174 | self.bytes.len() / INDEX_ENTRY_SIZE |
|
164 | 175 | } |
|
165 | 176 | } |
|
166 | 177 | |
|
167 | 178 | /// Returns `true` if the `Index` has zero `entries`. |
|
168 | 179 | pub fn is_empty(&self) -> bool { |
|
169 | 180 | self.len() == 0 |
|
170 | 181 | } |
|
171 | 182 | |
|
172 | 183 | /// Return the index entry corresponding to the given revision if it |
|
173 | 184 | /// exists. |
|
174 | 185 | pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { |
|
175 | 186 | if rev == NULL_REVISION { |
|
176 | 187 | return None; |
|
177 | 188 | } |
|
178 | if let Some(offsets) = &self.offsets { | |
|
189 | Some(if let Some(offsets) = &self.offsets { | |
|
179 | 190 | self.get_entry_inline(rev, offsets) |
|
180 | 191 | } else { |
|
181 | 192 | self.get_entry_separated(rev) |
|
182 | } | |
|
193 | }) | |
|
183 | 194 | } |
|
184 | 195 | |
|
185 | 196 | fn get_entry_inline( |
|
186 | 197 | &self, |
|
187 | 198 | rev: Revision, |
|
188 | 199 | offsets: &[usize], |
|
189 |
) -> |
|
|
190 |
let start = |
|
|
191 |
let end = start |
|
|
200 | ) -> IndexEntry { | |
|
201 | let start = offsets[rev as usize]; | |
|
202 | let end = start + INDEX_ENTRY_SIZE; | |
|
192 | 203 | let bytes = &self.bytes[start..end]; |
|
193 | 204 | |
|
194 | 205 | // See IndexEntry for an explanation of this override. |
|
195 | 206 | let offset_override = Some(end); |
|
196 | 207 | |
|
197 |
|
|
|
208 | IndexEntry { | |
|
198 | 209 | bytes, |
|
199 | 210 | offset_override, |
|
200 |
} |
|
|
211 | } | |
|
201 | 212 | } |
|
202 | 213 | |
|
203 |
fn get_entry_separated(&self, rev: Revision) -> |
|
|
204 | let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE; | |
|
205 | if rev as usize >= max_rev { | |
|
206 | return None; | |
|
207 | } | |
|
214 | fn get_entry_separated(&self, rev: Revision) -> IndexEntry { | |
|
208 | 215 | let start = rev as usize * INDEX_ENTRY_SIZE; |
|
209 | 216 | let end = start + INDEX_ENTRY_SIZE; |
|
210 | 217 | let bytes = &self.bytes[start..end]; |
|
211 | 218 | |
|
212 | 219 | // Override the offset of the first revision as its bytes are used |
|
213 | 220 | // for the index's metadata (saving space because it is always 0) |
|
214 | 221 | let offset_override = if rev == 0 { Some(0) } else { None }; |
|
215 | 222 | |
|
216 |
|
|
|
223 | IndexEntry { | |
|
217 | 224 | bytes, |
|
218 | 225 | offset_override, |
|
219 |
} |
|
|
226 | } | |
|
220 | 227 | } |
|
221 | 228 | } |
|
222 | 229 | |
|
223 | 230 | impl super::RevlogIndex for Index { |
|
224 | 231 | fn len(&self) -> usize { |
|
225 | 232 | self.len() |
|
226 | 233 | } |
|
227 | 234 | |
|
228 | 235 | fn node(&self, rev: Revision) -> Option<&Node> { |
|
229 | 236 | self.get_entry(rev).map(|entry| entry.hash()) |
|
230 | 237 | } |
|
231 | 238 | } |
|
232 | 239 | |
|
233 | 240 | #[derive(Debug)] |
|
234 | 241 | pub struct IndexEntry<'a> { |
|
235 | 242 | bytes: &'a [u8], |
|
236 | 243 | /// Allows to override the offset value of the entry. |
|
237 | 244 | /// |
|
238 | 245 | /// For interleaved index and data, the offset stored in the index |
|
239 | 246 | /// corresponds to the separated data offset. |
|
240 | 247 | /// It has to be overridden with the actual offset in the interleaved |
|
241 | 248 | /// index which is just after the index block. |
|
242 | 249 | /// |
|
243 | 250 | /// For separated index and data, the offset stored in the first index |
|
244 | 251 | /// entry is mixed with the index headers. |
|
245 | 252 | /// It has to be overridden with 0. |
|
246 | 253 | offset_override: Option<usize>, |
|
247 | 254 | } |
|
248 | 255 | |
|
249 | 256 | impl<'a> IndexEntry<'a> { |
|
250 | 257 | /// Return the offset of the data. |
|
251 | 258 | pub fn offset(&self) -> usize { |
|
252 | 259 | if let Some(offset_override) = self.offset_override { |
|
253 | 260 | offset_override |
|
254 | 261 | } else { |
|
255 | 262 | let mut bytes = [0; 8]; |
|
256 | 263 | bytes[2..8].copy_from_slice(&self.bytes[0..=5]); |
|
257 | 264 | BigEndian::read_u64(&bytes[..]) as usize |
|
258 | 265 | } |
|
259 | 266 | } |
|
260 | 267 | |
|
261 | 268 | pub fn flags(&self) -> u16 { |
|
262 | 269 | BigEndian::read_u16(&self.bytes[6..=7]) |
|
263 | 270 | } |
|
264 | 271 | |
|
265 | 272 | /// Return the compressed length of the data. |
|
266 | 273 | pub fn compressed_len(&self) -> u32 { |
|
267 | 274 | BigEndian::read_u32(&self.bytes[8..=11]) |
|
268 | 275 | } |
|
269 | 276 | |
|
270 | 277 | /// Return the uncompressed length of the data. |
|
271 | 278 | pub fn uncompressed_len(&self) -> i32 { |
|
272 | 279 | BigEndian::read_i32(&self.bytes[12..=15]) |
|
273 | 280 | } |
|
274 | 281 | |
|
275 | 282 | /// Return the revision upon which the data has been derived. |
|
276 | pub fn base_revision_or_base_of_delta_chain(&self) -> Revision { | |
|
283 | pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision { | |
|
277 | 284 | // TODO Maybe return an Option when base_revision == rev? |
|
278 | 285 | // Requires to add rev to IndexEntry |
|
279 | 286 | |
|
280 | BigEndian::read_i32(&self.bytes[16..]) | |
|
287 | BigEndian::read_i32(&self.bytes[16..]).into() | |
|
281 | 288 | } |
|
282 | 289 | |
|
283 | pub fn link_revision(&self) -> Revision { | |
|
284 | BigEndian::read_i32(&self.bytes[20..]) | |
|
290 | pub fn link_revision(&self) -> UncheckedRevision { | |
|
291 | BigEndian::read_i32(&self.bytes[20..]).into() | |
|
285 | 292 | } |
|
286 | 293 | |
|
287 | pub fn p1(&self) -> Revision { | |
|
288 | BigEndian::read_i32(&self.bytes[24..]) | |
|
294 | pub fn p1(&self) -> UncheckedRevision { | |
|
295 | BigEndian::read_i32(&self.bytes[24..]).into() | |
|
289 | 296 | } |
|
290 | 297 | |
|
291 | pub fn p2(&self) -> Revision { | |
|
292 | BigEndian::read_i32(&self.bytes[28..]) | |
|
298 | pub fn p2(&self) -> UncheckedRevision { | |
|
299 | BigEndian::read_i32(&self.bytes[28..]).into() | |
|
293 | 300 | } |
|
294 | 301 | |
|
295 | 302 | /// Return the hash of revision's full text. |
|
296 | 303 | /// |
|
297 | 304 | /// Currently, SHA-1 is used and only the first 20 bytes of this field |
|
298 | 305 | /// are used. |
|
299 | 306 | pub fn hash(&self) -> &'a Node { |
|
300 | 307 | (&self.bytes[32..52]).try_into().unwrap() |
|
301 | 308 | } |
|
302 | 309 | } |
|
303 | 310 | |
|
304 | 311 | #[cfg(test)] |
|
305 | 312 | mod tests { |
|
306 | 313 | use super::*; |
|
307 | 314 | use crate::node::NULL_NODE; |
|
308 | 315 | |
|
309 | 316 | #[cfg(test)] |
|
310 | 317 | #[derive(Debug, Copy, Clone)] |
|
311 | 318 | pub struct IndexEntryBuilder { |
|
312 | 319 | is_first: bool, |
|
313 | 320 | is_inline: bool, |
|
314 | 321 | is_general_delta: bool, |
|
315 | 322 | version: u16, |
|
316 | 323 | offset: usize, |
|
317 | 324 | compressed_len: usize, |
|
318 | 325 | uncompressed_len: usize, |
|
319 | 326 | base_revision_or_base_of_delta_chain: Revision, |
|
320 | 327 | link_revision: Revision, |
|
321 | 328 | p1: Revision, |
|
322 | 329 | p2: Revision, |
|
323 | 330 | node: Node, |
|
324 | 331 | } |
|
325 | 332 | |
|
326 | 333 | #[cfg(test)] |
|
327 | 334 | impl IndexEntryBuilder { |
|
328 | 335 | #[allow(clippy::new_without_default)] |
|
329 | 336 | pub fn new() -> Self { |
|
330 | 337 | Self { |
|
331 | 338 | is_first: false, |
|
332 | 339 | is_inline: false, |
|
333 | 340 | is_general_delta: true, |
|
334 | 341 | version: 1, |
|
335 | 342 | offset: 0, |
|
336 | 343 | compressed_len: 0, |
|
337 | 344 | uncompressed_len: 0, |
|
338 | 345 | base_revision_or_base_of_delta_chain: 0, |
|
339 | 346 | link_revision: 0, |
|
340 | 347 | p1: NULL_REVISION, |
|
341 | 348 | p2: NULL_REVISION, |
|
342 | 349 | node: NULL_NODE, |
|
343 | 350 | } |
|
344 | 351 | } |
|
345 | 352 | |
|
346 | 353 | pub fn is_first(&mut self, value: bool) -> &mut Self { |
|
347 | 354 | self.is_first = value; |
|
348 | 355 | self |
|
349 | 356 | } |
|
350 | 357 | |
|
351 | 358 | pub fn with_inline(&mut self, value: bool) -> &mut Self { |
|
352 | 359 | self.is_inline = value; |
|
353 | 360 | self |
|
354 | 361 | } |
|
355 | 362 | |
|
356 | 363 | pub fn with_general_delta(&mut self, value: bool) -> &mut Self { |
|
357 | 364 | self.is_general_delta = value; |
|
358 | 365 | self |
|
359 | 366 | } |
|
360 | 367 | |
|
361 | 368 | pub fn with_version(&mut self, value: u16) -> &mut Self { |
|
362 | 369 | self.version = value; |
|
363 | 370 | self |
|
364 | 371 | } |
|
365 | 372 | |
|
366 | 373 | pub fn with_offset(&mut self, value: usize) -> &mut Self { |
|
367 | 374 | self.offset = value; |
|
368 | 375 | self |
|
369 | 376 | } |
|
370 | 377 | |
|
371 | 378 | pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { |
|
372 | 379 | self.compressed_len = value; |
|
373 | 380 | self |
|
374 | 381 | } |
|
375 | 382 | |
|
376 | 383 | pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { |
|
377 | 384 | self.uncompressed_len = value; |
|
378 | 385 | self |
|
379 | 386 | } |
|
380 | 387 | |
|
381 | 388 | pub fn with_base_revision_or_base_of_delta_chain( |
|
382 | 389 | &mut self, |
|
383 | 390 | value: Revision, |
|
384 | 391 | ) -> &mut Self { |
|
385 | 392 | self.base_revision_or_base_of_delta_chain = value; |
|
386 | 393 | self |
|
387 | 394 | } |
|
388 | 395 | |
|
389 | 396 | pub fn with_link_revision(&mut self, value: Revision) -> &mut Self { |
|
390 | 397 | self.link_revision = value; |
|
391 | 398 | self |
|
392 | 399 | } |
|
393 | 400 | |
|
394 | 401 | pub fn with_p1(&mut self, value: Revision) -> &mut Self { |
|
395 | 402 | self.p1 = value; |
|
396 | 403 | self |
|
397 | 404 | } |
|
398 | 405 | |
|
399 | 406 | pub fn with_p2(&mut self, value: Revision) -> &mut Self { |
|
400 | 407 | self.p2 = value; |
|
401 | 408 | self |
|
402 | 409 | } |
|
403 | 410 | |
|
404 | 411 | pub fn with_node(&mut self, value: Node) -> &mut Self { |
|
405 | 412 | self.node = value; |
|
406 | 413 | self |
|
407 | 414 | } |
|
408 | 415 | |
|
409 | 416 | pub fn build(&self) -> Vec<u8> { |
|
410 | 417 | let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); |
|
411 | 418 | if self.is_first { |
|
412 | 419 | bytes.extend(&match (self.is_general_delta, self.is_inline) { |
|
413 | 420 | (false, false) => [0u8, 0], |
|
414 | 421 | (false, true) => [0u8, 1], |
|
415 | 422 | (true, false) => [0u8, 2], |
|
416 | 423 | (true, true) => [0u8, 3], |
|
417 | 424 | }); |
|
418 | 425 | bytes.extend(&self.version.to_be_bytes()); |
|
419 | 426 | // Remaining offset bytes. |
|
420 | 427 | bytes.extend(&[0u8; 2]); |
|
421 | 428 | } else { |
|
422 | 429 | // Offset stored on 48 bits (6 bytes) |
|
423 | 430 | bytes.extend(&(self.offset as u64).to_be_bytes()[2..]); |
|
424 | 431 | } |
|
425 | 432 | bytes.extend(&[0u8; 2]); // Revision flags. |
|
426 | 433 | bytes.extend(&(self.compressed_len as u32).to_be_bytes()); |
|
427 | 434 | bytes.extend(&(self.uncompressed_len as u32).to_be_bytes()); |
|
428 | 435 | bytes.extend( |
|
429 | 436 | &self.base_revision_or_base_of_delta_chain.to_be_bytes(), |
|
430 | 437 | ); |
|
431 | 438 | bytes.extend(&self.link_revision.to_be_bytes()); |
|
432 | 439 | bytes.extend(&self.p1.to_be_bytes()); |
|
433 | 440 | bytes.extend(&self.p2.to_be_bytes()); |
|
434 | 441 | bytes.extend(self.node.as_bytes()); |
|
435 | 442 | bytes.extend(vec![0u8; 12]); |
|
436 | 443 | bytes |
|
437 | 444 | } |
|
438 | 445 | } |
|
439 | 446 | |
|
440 | 447 | pub fn is_inline(index_bytes: &[u8]) -> bool { |
|
441 | 448 | IndexHeader::parse(index_bytes) |
|
442 | 449 | .expect("too short") |
|
443 | 450 | .format_flags() |
|
444 | 451 | .is_inline() |
|
445 | 452 | } |
|
446 | 453 | |
|
447 | 454 | pub fn uses_generaldelta(index_bytes: &[u8]) -> bool { |
|
448 | 455 | IndexHeader::parse(index_bytes) |
|
449 | 456 | .expect("too short") |
|
450 | 457 | .format_flags() |
|
451 | 458 | .uses_generaldelta() |
|
452 | 459 | } |
|
453 | 460 | |
|
454 | 461 | pub fn get_version(index_bytes: &[u8]) -> u16 { |
|
455 | 462 | IndexHeader::parse(index_bytes) |
|
456 | 463 | .expect("too short") |
|
457 | 464 | .format_version() |
|
458 | 465 | } |
|
459 | 466 | |
|
460 | 467 | #[test] |
|
461 | 468 | fn flags_when_no_inline_flag_test() { |
|
462 | 469 | let bytes = IndexEntryBuilder::new() |
|
463 | 470 | .is_first(true) |
|
464 | 471 | .with_general_delta(false) |
|
465 | 472 | .with_inline(false) |
|
466 | 473 | .build(); |
|
467 | 474 | |
|
468 | 475 | assert!(!is_inline(&bytes)); |
|
469 | 476 | assert!(!uses_generaldelta(&bytes)); |
|
470 | 477 | } |
|
471 | 478 | |
|
472 | 479 | #[test] |
|
473 | 480 | fn flags_when_inline_flag_test() { |
|
474 | 481 | let bytes = IndexEntryBuilder::new() |
|
475 | 482 | .is_first(true) |
|
476 | 483 | .with_general_delta(false) |
|
477 | 484 | .with_inline(true) |
|
478 | 485 | .build(); |
|
479 | 486 | |
|
480 | 487 | assert!(is_inline(&bytes)); |
|
481 | 488 | assert!(!uses_generaldelta(&bytes)); |
|
482 | 489 | } |
|
483 | 490 | |
|
484 | 491 | #[test] |
|
485 | 492 | fn flags_when_inline_and_generaldelta_flags_test() { |
|
486 | 493 | let bytes = IndexEntryBuilder::new() |
|
487 | 494 | .is_first(true) |
|
488 | 495 | .with_general_delta(true) |
|
489 | 496 | .with_inline(true) |
|
490 | 497 | .build(); |
|
491 | 498 | |
|
492 | 499 | assert!(is_inline(&bytes)); |
|
493 | 500 | assert!(uses_generaldelta(&bytes)); |
|
494 | 501 | } |
|
495 | 502 | |
|
496 | 503 | #[test] |
|
497 | 504 | fn test_offset() { |
|
498 | 505 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
499 | 506 | let entry = IndexEntry { |
|
500 | 507 | bytes: &bytes, |
|
501 | 508 | offset_override: None, |
|
502 | 509 | }; |
|
503 | 510 | |
|
504 | 511 | assert_eq!(entry.offset(), 1) |
|
505 | 512 | } |
|
506 | 513 | |
|
507 | 514 | #[test] |
|
508 | 515 | fn test_with_overridden_offset() { |
|
509 | 516 | let bytes = IndexEntryBuilder::new().with_offset(1).build(); |
|
510 | 517 | let entry = IndexEntry { |
|
511 | 518 | bytes: &bytes, |
|
512 | 519 | offset_override: Some(2), |
|
513 | 520 | }; |
|
514 | 521 | |
|
515 | 522 | assert_eq!(entry.offset(), 2) |
|
516 | 523 | } |
|
517 | 524 | |
|
518 | 525 | #[test] |
|
519 | 526 | fn test_compressed_len() { |
|
520 | 527 | let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); |
|
521 | 528 | let entry = IndexEntry { |
|
522 | 529 | bytes: &bytes, |
|
523 | 530 | offset_override: None, |
|
524 | 531 | }; |
|
525 | 532 | |
|
526 | 533 | assert_eq!(entry.compressed_len(), 1) |
|
527 | 534 | } |
|
528 | 535 | |
|
529 | 536 | #[test] |
|
530 | 537 | fn test_uncompressed_len() { |
|
531 | 538 | let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); |
|
532 | 539 | let entry = IndexEntry { |
|
533 | 540 | bytes: &bytes, |
|
534 | 541 | offset_override: None, |
|
535 | 542 | }; |
|
536 | 543 | |
|
537 | 544 | assert_eq!(entry.uncompressed_len(), 1) |
|
538 | 545 | } |
|
539 | 546 | |
|
540 | 547 | #[test] |
|
541 | 548 | fn test_base_revision_or_base_of_delta_chain() { |
|
542 | 549 | let bytes = IndexEntryBuilder::new() |
|
543 | 550 | .with_base_revision_or_base_of_delta_chain(1) |
|
544 | 551 | .build(); |
|
545 | 552 | let entry = IndexEntry { |
|
546 | 553 | bytes: &bytes, |
|
547 | 554 | offset_override: None, |
|
548 | 555 | }; |
|
549 | 556 | |
|
550 | assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1) | |
|
557 | assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into()) | |
|
551 | 558 | } |
|
552 | 559 | |
|
553 | 560 | #[test] |
|
554 | 561 | fn link_revision_test() { |
|
555 | 562 | let bytes = IndexEntryBuilder::new().with_link_revision(123).build(); |
|
556 | 563 | |
|
557 | 564 | let entry = IndexEntry { |
|
558 | 565 | bytes: &bytes, |
|
559 | 566 | offset_override: None, |
|
560 | 567 | }; |
|
561 | 568 | |
|
562 | assert_eq!(entry.link_revision(), 123); | |
|
569 | assert_eq!(entry.link_revision(), 123.into()); | |
|
563 | 570 | } |
|
564 | 571 | |
|
565 | 572 | #[test] |
|
566 | 573 | fn p1_test() { |
|
567 | 574 | let bytes = IndexEntryBuilder::new().with_p1(123).build(); |
|
568 | 575 | |
|
569 | 576 | let entry = IndexEntry { |
|
570 | 577 | bytes: &bytes, |
|
571 | 578 | offset_override: None, |
|
572 | 579 | }; |
|
573 | 580 | |
|
574 | assert_eq!(entry.p1(), 123); | |
|
581 | assert_eq!(entry.p1(), 123.into()); | |
|
575 | 582 | } |
|
576 | 583 | |
|
577 | 584 | #[test] |
|
578 | 585 | fn p2_test() { |
|
579 | 586 | let bytes = IndexEntryBuilder::new().with_p2(123).build(); |
|
580 | 587 | |
|
581 | 588 | let entry = IndexEntry { |
|
582 | 589 | bytes: &bytes, |
|
583 | 590 | offset_override: None, |
|
584 | 591 | }; |
|
585 | 592 | |
|
586 | assert_eq!(entry.p2(), 123); | |
|
593 | assert_eq!(entry.p2(), 123.into()); | |
|
587 | 594 | } |
|
588 | 595 | |
|
589 | 596 | #[test] |
|
590 | 597 | fn node_test() { |
|
591 | 598 | let node = Node::from_hex("0123456789012345678901234567890123456789") |
|
592 | 599 | .unwrap(); |
|
593 | 600 | let bytes = IndexEntryBuilder::new().with_node(node).build(); |
|
594 | 601 | |
|
595 | 602 | let entry = IndexEntry { |
|
596 | 603 | bytes: &bytes, |
|
597 | 604 | offset_override: None, |
|
598 | 605 | }; |
|
599 | 606 | |
|
600 | 607 | assert_eq!(*entry.hash(), node); |
|
601 | 608 | } |
|
602 | 609 | |
|
603 | 610 | #[test] |
|
604 | 611 | fn version_test() { |
|
605 | 612 | let bytes = IndexEntryBuilder::new() |
|
606 | 613 | .is_first(true) |
|
607 | 614 | .with_version(2) |
|
608 | 615 | .build(); |
|
609 | 616 | |
|
610 | 617 | assert_eq!(get_version(&bytes), 2) |
|
611 | 618 | } |
|
612 | 619 | } |
|
613 | 620 | |
|
614 | 621 | #[cfg(test)] |
|
615 | 622 | pub use tests::IndexEntryBuilder; |
@@ -1,194 +1,203 b'' | |||
|
1 | 1 | use crate::errors::HgError; |
|
2 | use crate::revlog::Revision; | |
|
3 | 2 | use crate::revlog::{Node, NodePrefix}; |
|
4 | 3 | use crate::revlog::{Revlog, RevlogError}; |
|
5 | 4 | use crate::utils::hg_path::HgPath; |
|
6 | 5 | use crate::utils::SliceExt; |
|
7 | 6 | use crate::vfs::Vfs; |
|
7 | use crate::{Revision, UncheckedRevision}; | |
|
8 | 8 | |
|
9 | 9 | /// A specialized `Revlog` to work with `manifest` data format. |
|
10 | 10 | pub struct Manifestlog { |
|
11 | 11 | /// The generic `revlog` format. |
|
12 | 12 | revlog: Revlog, |
|
13 | 13 | } |
|
14 | 14 | |
|
15 | 15 | impl Manifestlog { |
|
16 | 16 | /// Open the `manifest` of a repository given by its root. |
|
17 | 17 | pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { |
|
18 | 18 | let revlog = |
|
19 | 19 | Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?; |
|
20 | 20 | Ok(Self { revlog }) |
|
21 | 21 | } |
|
22 | 22 | |
|
23 | 23 | /// Return the `Manifest` for the given node ID. |
|
24 | 24 | /// |
|
25 | 25 | /// Note: this is a node ID in the manifestlog, typically found through |
|
26 | 26 | /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any |
|
27 | 27 | /// changeset. |
|
28 | 28 | /// |
|
29 | 29 | /// See also `Repo::manifest_for_node` |
|
30 | 30 | pub fn data_for_node( |
|
31 | 31 | &self, |
|
32 | 32 | node: NodePrefix, |
|
33 | 33 | ) -> Result<Manifest, RevlogError> { |
|
34 | 34 | let rev = self.revlog.rev_from_node(node)?; |
|
35 | self.data_for_rev(rev) | |
|
35 | self.data_for_checked_rev(rev) | |
|
36 | 36 | } |
|
37 | 37 | |
|
38 | 38 | /// Return the `Manifest` of a given revision number. |
|
39 | 39 | /// |
|
40 | 40 | /// Note: this is a revision number in the manifestlog, *not* of any |
|
41 | 41 | /// changeset. |
|
42 | 42 | /// |
|
43 | 43 | /// See also `Repo::manifest_for_rev` |
|
44 | 44 | pub fn data_for_rev( |
|
45 | 45 | &self, |
|
46 | rev: UncheckedRevision, | |
|
47 | ) -> Result<Manifest, RevlogError> { | |
|
48 | let bytes = self.revlog.get_rev_data(rev)?.into_owned(); | |
|
49 | Ok(Manifest { bytes }) | |
|
50 | } | |
|
51 | ||
|
52 | pub fn data_for_checked_rev( | |
|
53 | &self, | |
|
46 | 54 | rev: Revision, |
|
47 | 55 | ) -> Result<Manifest, RevlogError> { |
|
48 | let bytes = self.revlog.get_rev_data(rev)?.into_owned(); | |
|
56 | let bytes = | |
|
57 | self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned(); | |
|
49 | 58 | Ok(Manifest { bytes }) |
|
50 | 59 | } |
|
51 | 60 | } |
|
52 | 61 | |
|
53 | 62 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
|
54 | 63 | #[derive(Debug)] |
|
55 | 64 | pub struct Manifest { |
|
56 | 65 | /// Format for a manifest: flat sequence of variable-size entries, |
|
57 | 66 | /// sorted by path, each as: |
|
58 | 67 | /// |
|
59 | 68 | /// ```text |
|
60 | 69 | /// <path> \0 <hex_node_id> <flags> \n |
|
61 | 70 | /// ``` |
|
62 | 71 | /// |
|
63 | 72 | /// The last entry is also terminated by a newline character. |
|
64 | 73 | /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`. |
|
65 | 74 | bytes: Vec<u8>, |
|
66 | 75 | } |
|
67 | 76 | |
|
68 | 77 | impl Manifest { |
|
69 | 78 | pub fn iter( |
|
70 | 79 | &self, |
|
71 | 80 | ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> { |
|
72 | 81 | self.bytes |
|
73 | 82 | .split(|b| b == &b'\n') |
|
74 | 83 | .filter(|line| !line.is_empty()) |
|
75 | 84 | .map(ManifestEntry::from_raw) |
|
76 | 85 | } |
|
77 | 86 | |
|
78 | 87 | /// If the given path is in this manifest, return its filelog node ID |
|
79 | 88 | pub fn find_by_path( |
|
80 | 89 | &self, |
|
81 | 90 | path: &HgPath, |
|
82 | 91 | ) -> Result<Option<ManifestEntry>, HgError> { |
|
83 | 92 | use std::cmp::Ordering::*; |
|
84 | 93 | let path = path.as_bytes(); |
|
85 | 94 | // Both boundaries of this `&[u8]` slice are always at the boundary of |
|
86 | 95 | // an entry |
|
87 | 96 | let mut bytes = &*self.bytes; |
|
88 | 97 | |
|
89 | 98 | // Binary search algorithm derived from `[T]::binary_search_by` |
|
90 | 99 | // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221> |
|
91 | 100 | // except we don’t have a slice of entries. Instead we jump to the |
|
92 | 101 | // middle of the byte slice and look around for entry delimiters |
|
93 | 102 | // (newlines). |
|
94 | 103 | while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? { |
|
95 | 104 | let (entry_path, rest) = |
|
96 | 105 | ManifestEntry::split_path(&bytes[entry_range.clone()])?; |
|
97 | 106 | let cmp = entry_path.cmp(path); |
|
98 | 107 | if cmp == Less { |
|
99 | 108 | let after_newline = entry_range.end + 1; |
|
100 | 109 | bytes = &bytes[after_newline..]; |
|
101 | 110 | } else if cmp == Greater { |
|
102 | 111 | bytes = &bytes[..entry_range.start]; |
|
103 | 112 | } else { |
|
104 | 113 | return Ok(Some(ManifestEntry::from_path_and_rest( |
|
105 | 114 | entry_path, rest, |
|
106 | 115 | ))); |
|
107 | 116 | } |
|
108 | 117 | } |
|
109 | 118 | Ok(None) |
|
110 | 119 | } |
|
111 | 120 | |
|
112 | 121 | /// If there is at least one, return the byte range of an entry *excluding* |
|
113 | 122 | /// the final newline. |
|
114 | 123 | fn find_entry_near_middle_of( |
|
115 | 124 | bytes: &[u8], |
|
116 | 125 | ) -> Result<Option<std::ops::Range<usize>>, HgError> { |
|
117 | 126 | let len = bytes.len(); |
|
118 | 127 | if len > 0 { |
|
119 | 128 | let middle = bytes.len() / 2; |
|
120 | 129 | // Integer division rounds down, so `middle < len`. |
|
121 | 130 | let (before, after) = bytes.split_at(middle); |
|
122 | 131 | let is_newline = |&byte: &u8| byte == b'\n'; |
|
123 | 132 | let entry_start = match before.iter().rposition(is_newline) { |
|
124 | 133 | Some(i) => i + 1, |
|
125 | 134 | None => 0, // We choose the first entry in `bytes` |
|
126 | 135 | }; |
|
127 | 136 | let entry_end = match after.iter().position(is_newline) { |
|
128 | 137 | Some(i) => { |
|
129 | 138 | // No `+ 1` here to exclude this newline from the range |
|
130 | 139 | middle + i |
|
131 | 140 | } |
|
132 | 141 | None => { |
|
133 | 142 | // In a well-formed manifest: |
|
134 | 143 | // |
|
135 | 144 | // * Since `len > 0`, `bytes` contains at least one entry |
|
136 | 145 | // * Every entry ends with a newline |
|
137 | 146 | // * Since `middle < len`, `after` contains at least the |
|
138 | 147 | // newline at the end of the last entry of `bytes`. |
|
139 | 148 | // |
|
140 | 149 | // We didn’t find a newline, so this manifest is not |
|
141 | 150 | // well-formed. |
|
142 | 151 | return Err(HgError::corrupted( |
|
143 | 152 | "manifest entry without \\n delimiter", |
|
144 | 153 | )); |
|
145 | 154 | } |
|
146 | 155 | }; |
|
147 | 156 | Ok(Some(entry_start..entry_end)) |
|
148 | 157 | } else { |
|
149 | 158 | // len == 0 |
|
150 | 159 | Ok(None) |
|
151 | 160 | } |
|
152 | 161 | } |
|
153 | 162 | } |
|
154 | 163 | |
|
155 | 164 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. |
|
156 | 165 | #[derive(Debug)] |
|
157 | 166 | pub struct ManifestEntry<'manifest> { |
|
158 | 167 | pub path: &'manifest HgPath, |
|
159 | 168 | pub hex_node_id: &'manifest [u8], |
|
160 | 169 | |
|
161 | 170 | /// `Some` values are b'x', b'l', or 't' |
|
162 | 171 | pub flags: Option<u8>, |
|
163 | 172 | } |
|
164 | 173 | |
|
165 | 174 | impl<'a> ManifestEntry<'a> { |
|
166 | 175 | fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> { |
|
167 | 176 | bytes.split_2(b'\0').ok_or_else(|| { |
|
168 | 177 | HgError::corrupted("manifest entry without \\0 delimiter") |
|
169 | 178 | }) |
|
170 | 179 | } |
|
171 | 180 | |
|
172 | 181 | fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self { |
|
173 | 182 | let (hex_node_id, flags) = match rest.split_last() { |
|
174 | 183 | Some((&b'x', rest)) => (rest, Some(b'x')), |
|
175 | 184 | Some((&b'l', rest)) => (rest, Some(b'l')), |
|
176 | 185 | Some((&b't', rest)) => (rest, Some(b't')), |
|
177 | 186 | _ => (rest, None), |
|
178 | 187 | }; |
|
179 | 188 | Self { |
|
180 | 189 | path: HgPath::new(path), |
|
181 | 190 | hex_node_id, |
|
182 | 191 | flags, |
|
183 | 192 | } |
|
184 | 193 | } |
|
185 | 194 | |
|
186 | 195 | fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> { |
|
187 | 196 | let (path, rest) = Self::split_path(bytes)?; |
|
188 | 197 | Ok(Self::from_path_and_rest(path, rest)) |
|
189 | 198 | } |
|
190 | 199 | |
|
191 | 200 | pub fn node_id(&self) -> Result<Node, HgError> { |
|
192 | 201 | Node::from_hex_for_repo(self.hex_node_id) |
|
193 | 202 | } |
|
194 | 203 | } |
@@ -1,849 +1,904 b'' | |||
|
1 | 1 | // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net> |
|
2 | 2 | // and Mercurial contributors |
|
3 | 3 | // |
|
4 | 4 | // This software may be used and distributed according to the terms of the |
|
5 | 5 | // GNU General Public License version 2 or any later version. |
|
6 | 6 | //! Mercurial concepts for handling revision history |
|
7 | 7 | |
|
8 | 8 | pub mod node; |
|
9 | 9 | pub mod nodemap; |
|
10 | 10 | mod nodemap_docket; |
|
11 | 11 | pub mod path_encode; |
|
12 | 12 | pub use node::{FromHexError, Node, NodePrefix}; |
|
13 | 13 | pub mod changelog; |
|
14 | 14 | pub mod filelog; |
|
15 | 15 | pub mod index; |
|
16 | 16 | pub mod manifest; |
|
17 | 17 | pub mod patch; |
|
18 | 18 | |
|
19 | 19 | use std::borrow::Cow; |
|
20 | 20 | use std::io::Read; |
|
21 | 21 | use std::ops::Deref; |
|
22 | 22 | use std::path::Path; |
|
23 | 23 | |
|
24 | 24 | use flate2::read::ZlibDecoder; |
|
25 | 25 | use sha1::{Digest, Sha1}; |
|
26 | 26 | use std::cell::RefCell; |
|
27 | 27 | use zstd; |
|
28 | 28 | |
|
29 | 29 | use self::node::{NODE_BYTES_LENGTH, NULL_NODE}; |
|
30 | 30 | use self::nodemap_docket::NodeMapDocket; |
|
31 | 31 | use super::index::Index; |
|
32 | 32 | use super::nodemap::{NodeMap, NodeMapError}; |
|
33 | 33 | use crate::errors::HgError; |
|
34 | 34 | use crate::vfs::Vfs; |
|
35 | 35 | |
|
36 | 36 | /// Mercurial revision numbers |
|
37 | 37 | /// |
|
38 | 38 | /// As noted in revlog.c, revision numbers are actually encoded in |
|
39 | 39 | /// 4 bytes, and are liberally converted to ints, whence the i32 |
|
40 | 40 | pub type Revision = i32; |
|
41 | 41 | |
|
42 | 42 | /// Unchecked Mercurial revision numbers. |
|
43 | 43 | /// |
|
44 | 44 | /// Values of this type have no guarantee of being a valid revision number |
|
45 | 45 | /// in any context. Use method `check_revision` to get a valid revision within |
|
46 | 46 | /// the appropriate index object. |
|
47 | 47 | /// |
|
48 | 48 | /// As noted in revlog.c, revision numbers are actually encoded in |
|
49 | 49 | /// 4 bytes, and are liberally converted to ints, whence the i32 |
|
50 | pub type UncheckedRevision = i32; | |
|
50 | #[derive( | |
|
51 | Debug, | |
|
52 | derive_more::Display, | |
|
53 | Clone, | |
|
54 | Copy, | |
|
55 | Hash, | |
|
56 | PartialEq, | |
|
57 | Eq, | |
|
58 | PartialOrd, | |
|
59 | Ord, | |
|
60 | )] | |
|
61 | pub struct UncheckedRevision(i32); | |
|
62 | ||
|
63 | impl From<Revision> for UncheckedRevision { | |
|
64 | fn from(value: Revision) -> Self { | |
|
65 | Self(value) | |
|
66 | } | |
|
67 | } | |
|
51 | 68 | |
|
52 | 69 | /// Marker expressing the absence of a parent |
|
53 | 70 | /// |
|
54 | 71 | /// Independently of the actual representation, `NULL_REVISION` is guaranteed |
|
55 | 72 | /// to be smaller than all existing revisions. |
|
56 | 73 | pub const NULL_REVISION: Revision = -1; |
|
57 | 74 | |
|
58 | 75 | /// Same as `mercurial.node.wdirrev` |
|
59 | 76 | /// |
|
60 | 77 | /// This is also equal to `i32::max_value()`, but it's better to spell |
|
61 | 78 | /// it out explicitely, same as in `mercurial.node` |
|
62 | 79 | #[allow(clippy::unreadable_literal)] |
|
63 |
pub const WORKING_DIRECTORY_REVISION: Revision = |
|
|
80 | pub const WORKING_DIRECTORY_REVISION: UncheckedRevision = | |
|
81 | UncheckedRevision(0x7fffffff); | |
|
64 | 82 | |
|
65 | 83 | pub const WORKING_DIRECTORY_HEX: &str = |
|
66 | 84 | "ffffffffffffffffffffffffffffffffffffffff"; |
|
67 | 85 | |
|
68 | 86 | /// The simplest expression of what we need of Mercurial DAGs. |
|
69 | 87 | pub trait Graph { |
|
70 | 88 | /// Return the two parents of the given `Revision`. |
|
71 | 89 | /// |
|
72 | 90 | /// Each of the parents can be independently `NULL_REVISION` |
|
73 | 91 | fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>; |
|
74 | 92 | } |
|
75 | 93 | |
|
76 | 94 | #[derive(Clone, Debug, PartialEq)] |
|
77 | 95 | pub enum GraphError { |
|
78 | 96 | ParentOutOfRange(Revision), |
|
79 | 97 | } |
|
80 | 98 | |
|
81 | 99 | /// The Mercurial Revlog Index |
|
82 | 100 | /// |
|
83 | 101 | /// This is currently limited to the minimal interface that is needed for |
|
84 | 102 | /// the [`nodemap`](nodemap/index.html) module |
|
85 | 103 | pub trait RevlogIndex { |
|
86 | 104 | /// Total number of Revisions referenced in this index |
|
87 | 105 | fn len(&self) -> usize; |
|
88 | 106 | |
|
89 | 107 | fn is_empty(&self) -> bool { |
|
90 | 108 | self.len() == 0 |
|
91 | 109 | } |
|
92 | 110 | |
|
93 |
/// Return a reference to the Node or `None` |
|
|
94 | /// | |
|
95 | /// `NULL_REVISION` is not considered to be out of bounds. | |
|
111 | /// Return a reference to the Node or `None` for `NULL_REVISION` | |
|
96 | 112 | fn node(&self, rev: Revision) -> Option<&Node>; |
|
97 | 113 | |
|
98 | 114 | /// Return a [`Revision`] if `rev` is a valid revision number for this |
|
99 | 115 | /// index |
|
100 | 116 | fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { |
|
117 | let rev = rev.0; | |
|
118 | ||
|
101 | 119 | if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) { |
|
102 | 120 | Some(rev) |
|
103 | 121 | } else { |
|
104 | 122 | None |
|
105 | 123 | } |
|
106 | 124 | } |
|
107 | 125 | } |
|
108 | 126 | |
|
109 | 127 | const REVISION_FLAG_CENSORED: u16 = 1 << 15; |
|
110 | 128 | const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14; |
|
111 | 129 | const REVISION_FLAG_EXTSTORED: u16 = 1 << 13; |
|
112 | 130 | const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12; |
|
113 | 131 | |
|
114 | 132 | // Keep this in sync with REVIDX_KNOWN_FLAGS in |
|
115 | 133 | // mercurial/revlogutils/flagutil.py |
|
116 | 134 | const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED |
|
117 | 135 | | REVISION_FLAG_ELLIPSIS |
|
118 | 136 | | REVISION_FLAG_EXTSTORED |
|
119 | 137 | | REVISION_FLAG_HASCOPIESINFO; |
|
120 | 138 | |
|
121 | 139 | const NULL_REVLOG_ENTRY_FLAGS: u16 = 0; |
|
122 | 140 | |
|
123 | #[derive(Debug, derive_more::From)] | |
|
141 | #[derive(Debug, derive_more::From, derive_more::Display)] | |
|
124 | 142 | pub enum RevlogError { |
|
125 | 143 | InvalidRevision, |
|
126 | 144 | /// Working directory is not supported |
|
127 | 145 | WDirUnsupported, |
|
128 | 146 | /// Found more than one entry whose ID match the requested prefix |
|
129 | 147 | AmbiguousPrefix, |
|
130 | 148 | #[from] |
|
131 | 149 | Other(HgError), |
|
132 | 150 | } |
|
133 | 151 | |
|
134 | 152 | impl From<NodeMapError> for RevlogError { |
|
135 | 153 | fn from(error: NodeMapError) -> Self { |
|
136 | 154 | match error { |
|
137 | 155 | NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix, |
|
138 | 156 | NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted( |
|
139 | 157 | format!("nodemap point to revision {} not in index", rev), |
|
140 | 158 | ), |
|
141 | 159 | } |
|
142 | 160 | } |
|
143 | 161 | } |
|
144 | 162 | |
|
145 | 163 | fn corrupted<S: AsRef<str>>(context: S) -> HgError { |
|
146 | 164 | HgError::corrupted(format!("corrupted revlog, {}", context.as_ref())) |
|
147 | 165 | } |
|
148 | 166 | |
|
149 | 167 | impl RevlogError { |
|
150 | 168 | fn corrupted<S: AsRef<str>>(context: S) -> Self { |
|
151 | 169 | RevlogError::Other(corrupted(context)) |
|
152 | 170 | } |
|
153 | 171 | } |
|
154 | 172 | |
|
155 | 173 | /// Read only implementation of revlog. |
|
156 | 174 | pub struct Revlog { |
|
157 | 175 | /// When index and data are not interleaved: bytes of the revlog index. |
|
158 | 176 | /// When index and data are interleaved: bytes of the revlog index and |
|
159 | 177 | /// data. |
|
160 | 178 | index: Index, |
|
161 | 179 | /// When index and data are not interleaved: bytes of the revlog data |
|
162 | 180 | data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, |
|
163 | 181 | /// When present on disk: the persistent nodemap for this revlog |
|
164 | 182 | nodemap: Option<nodemap::NodeTree>, |
|
165 | 183 | } |
|
166 | 184 | |
|
167 | 185 | impl Revlog { |
|
168 | 186 | /// Open a revlog index file. |
|
169 | 187 | /// |
|
170 | 188 | /// It will also open the associated data file if index and data are not |
|
171 | 189 | /// interleaved. |
|
172 | 190 | pub fn open( |
|
173 | 191 | store_vfs: &Vfs, |
|
174 | 192 | index_path: impl AsRef<Path>, |
|
175 | 193 | data_path: Option<&Path>, |
|
176 | 194 | use_nodemap: bool, |
|
177 | 195 | ) -> Result<Self, HgError> { |
|
178 | 196 | let index_path = index_path.as_ref(); |
|
179 | 197 | let index = { |
|
180 | 198 | match store_vfs.mmap_open_opt(&index_path)? { |
|
181 | 199 | None => Index::new(Box::new(vec![])), |
|
182 | 200 | Some(index_mmap) => { |
|
183 | 201 | let index = Index::new(Box::new(index_mmap))?; |
|
184 | 202 | Ok(index) |
|
185 | 203 | } |
|
186 | 204 | } |
|
187 | 205 | }?; |
|
188 | 206 | |
|
189 | 207 | let default_data_path = index_path.with_extension("d"); |
|
190 | 208 | |
|
191 | 209 | // type annotation required |
|
192 | 210 | // won't recognize Mmap as Deref<Target = [u8]> |
|
193 | 211 | let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = |
|
194 | 212 | if index.is_inline() { |
|
195 | 213 | None |
|
196 | 214 | } else { |
|
197 | 215 | let data_path = data_path.unwrap_or(&default_data_path); |
|
198 | 216 | let data_mmap = store_vfs.mmap_open(data_path)?; |
|
199 | 217 | Some(Box::new(data_mmap)) |
|
200 | 218 | }; |
|
201 | 219 | |
|
202 | 220 | let nodemap = if index.is_inline() || !use_nodemap { |
|
203 | 221 | None |
|
204 | 222 | } else { |
|
205 | 223 | NodeMapDocket::read_from_file(store_vfs, index_path)?.map( |
|
206 | 224 | |(docket, data)| { |
|
207 | 225 | nodemap::NodeTree::load_bytes( |
|
208 | 226 | Box::new(data), |
|
209 | 227 | docket.data_length, |
|
210 | 228 | ) |
|
211 | 229 | }, |
|
212 | 230 | ) |
|
213 | 231 | }; |
|
214 | 232 | |
|
215 | 233 | Ok(Revlog { |
|
216 | 234 | index, |
|
217 | 235 | data_bytes, |
|
218 | 236 | nodemap, |
|
219 | 237 | }) |
|
220 | 238 | } |
|
221 | 239 | |
|
222 | 240 | /// Return number of entries of the `Revlog`. |
|
223 | 241 | pub fn len(&self) -> usize { |
|
224 | 242 | self.index.len() |
|
225 | 243 | } |
|
226 | 244 | |
|
227 | 245 | /// Returns `true` if the `Revlog` has zero `entries`. |
|
228 | 246 | pub fn is_empty(&self) -> bool { |
|
229 | 247 | self.index.is_empty() |
|
230 | 248 | } |
|
231 | 249 | |
|
232 | 250 | /// Returns the node ID for the given revision number, if it exists in this |
|
233 | 251 | /// revlog |
|
234 | pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> { | |
|
235 | if rev == NULL_REVISION { | |
|
252 | pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { | |
|
253 | if rev == NULL_REVISION.into() { | |
|
236 | 254 | return Some(&NULL_NODE); |
|
237 | 255 | } |
|
256 | let rev = self.index.check_revision(rev)?; | |
|
238 | 257 | Some(self.index.get_entry(rev)?.hash()) |
|
239 | 258 | } |
|
240 | 259 | |
|
241 | 260 | /// Return the revision number for the given node ID, if it exists in this |
|
242 | 261 | /// revlog |
|
243 | 262 | pub fn rev_from_node( |
|
244 | 263 | &self, |
|
245 | 264 | node: NodePrefix, |
|
246 | 265 | ) -> Result<Revision, RevlogError> { |
|
247 | 266 | let looked_up = if let Some(nodemap) = &self.nodemap { |
|
248 | 267 | nodemap |
|
249 | 268 | .find_bin(&self.index, node)? |
|
250 | 269 | .ok_or(RevlogError::InvalidRevision) |
|
251 | 270 | } else { |
|
252 | 271 | self.rev_from_node_no_persistent_nodemap(node) |
|
253 | 272 | }; |
|
254 | 273 | |
|
255 | 274 | if node.is_prefix_of(&NULL_NODE) { |
|
256 | 275 | return match looked_up { |
|
257 | 276 | Ok(_) => Err(RevlogError::AmbiguousPrefix), |
|
258 | 277 | Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION), |
|
259 | 278 | res => res, |
|
260 | 279 | }; |
|
261 | 280 | }; |
|
262 | 281 | |
|
263 | 282 | looked_up |
|
264 | 283 | } |
|
265 | 284 | |
|
266 | 285 | /// Same as `rev_from_node`, without using a persistent nodemap |
|
267 | 286 | /// |
|
268 | 287 | /// This is used as fallback when a persistent nodemap is not present. |
|
269 | 288 | /// This happens when the persistent-nodemap experimental feature is not |
|
270 | 289 | /// enabled, or for small revlogs. |
|
271 | 290 | fn rev_from_node_no_persistent_nodemap( |
|
272 | 291 | &self, |
|
273 | 292 | node: NodePrefix, |
|
274 | 293 | ) -> Result<Revision, RevlogError> { |
|
275 | 294 | // Linear scan of the revlog |
|
276 | 295 | // TODO: consider building a non-persistent nodemap in memory to |
|
277 | 296 | // optimize these cases. |
|
278 | 297 | let mut found_by_prefix = None; |
|
279 | 298 | for rev in (0..self.len() as Revision).rev() { |
|
280 | 299 | let index_entry = self.index.get_entry(rev).ok_or_else(|| { |
|
281 | 300 | HgError::corrupted( |
|
282 | 301 | "revlog references a revision not in the index", |
|
283 | 302 | ) |
|
284 | 303 | })?; |
|
285 | 304 | if node == *index_entry.hash() { |
|
286 | 305 | return Ok(rev); |
|
287 | 306 | } |
|
288 | 307 | if node.is_prefix_of(index_entry.hash()) { |
|
289 | 308 | if found_by_prefix.is_some() { |
|
290 | 309 | return Err(RevlogError::AmbiguousPrefix); |
|
291 | 310 | } |
|
292 | 311 | found_by_prefix = Some(rev) |
|
293 | 312 | } |
|
294 | 313 | } |
|
295 | 314 | found_by_prefix.ok_or(RevlogError::InvalidRevision) |
|
296 | 315 | } |
|
297 | 316 | |
|
298 | 317 | /// Returns whether the given revision exists in this revlog. |
|
299 | pub fn has_rev(&self, rev: Revision) -> bool { | |
|
300 |
self.index. |
|
|
318 | pub fn has_rev(&self, rev: UncheckedRevision) -> bool { | |
|
319 | self.index.check_revision(rev).is_some() | |
|
301 | 320 | } |
|
302 | 321 | |
|
303 | 322 | /// Return the full data associated to a revision. |
|
304 | 323 | /// |
|
305 | 324 | /// All entries required to build the final data out of deltas will be |
|
306 | 325 | /// retrieved as needed, and the deltas will be applied to the inital |
|
307 | 326 | /// snapshot to rebuild the final data. |
|
308 | 327 | pub fn get_rev_data( |
|
309 | 328 | &self, |
|
329 | rev: UncheckedRevision, | |
|
330 | ) -> Result<Cow<[u8]>, RevlogError> { | |
|
331 | if rev == NULL_REVISION.into() { | |
|
332 | return Ok(Cow::Borrowed(&[])); | |
|
333 | }; | |
|
334 | self.get_entry(rev)?.data() | |
|
335 | } | |
|
336 | ||
|
337 | /// [`Self::get_rev_data`] for checked revisions. | |
|
338 | pub fn get_rev_data_for_checked_rev( | |
|
339 | &self, | |
|
310 | 340 | rev: Revision, |
|
311 | 341 | ) -> Result<Cow<[u8]>, RevlogError> { |
|
312 | 342 | if rev == NULL_REVISION { |
|
313 | 343 | return Ok(Cow::Borrowed(&[])); |
|
314 | 344 | }; |
|
315 |
|
|
|
345 | self.get_entry_for_checked_rev(rev)?.data() | |
|
316 | 346 | } |
|
317 | 347 | |
|
318 | 348 | /// Check the hash of some given data against the recorded hash. |
|
319 | 349 | pub fn check_hash( |
|
320 | 350 | &self, |
|
321 | 351 | p1: Revision, |
|
322 | 352 | p2: Revision, |
|
323 | 353 | expected: &[u8], |
|
324 | 354 | data: &[u8], |
|
325 | 355 | ) -> bool { |
|
326 | 356 | let e1 = self.index.get_entry(p1); |
|
327 | 357 | let h1 = match e1 { |
|
328 | 358 | Some(ref entry) => entry.hash(), |
|
329 | 359 | None => &NULL_NODE, |
|
330 | 360 | }; |
|
331 | 361 | let e2 = self.index.get_entry(p2); |
|
332 | 362 | let h2 = match e2 { |
|
333 | 363 | Some(ref entry) => entry.hash(), |
|
334 | 364 | None => &NULL_NODE, |
|
335 | 365 | }; |
|
336 | 366 | |
|
337 | 367 | hash(data, h1.as_bytes(), h2.as_bytes()) == expected |
|
338 | 368 | } |
|
339 | 369 | |
|
340 | 370 | /// Build the full data of a revision out its snapshot |
|
341 | 371 | /// and its deltas. |
|
342 | 372 | fn build_data_from_deltas( |
|
343 | 373 | snapshot: RevlogEntry, |
|
344 | 374 | deltas: &[RevlogEntry], |
|
345 | 375 | ) -> Result<Vec<u8>, HgError> { |
|
346 | 376 | let snapshot = snapshot.data_chunk()?; |
|
347 | 377 | let deltas = deltas |
|
348 | 378 | .iter() |
|
349 | 379 | .rev() |
|
350 | 380 | .map(RevlogEntry::data_chunk) |
|
351 | 381 | .collect::<Result<Vec<_>, _>>()?; |
|
352 | 382 | let patches: Vec<_> = |
|
353 | 383 | deltas.iter().map(|d| patch::PatchList::new(d)).collect(); |
|
354 | 384 | let patch = patch::fold_patch_lists(&patches); |
|
355 | 385 | Ok(patch.apply(&snapshot)) |
|
356 | 386 | } |
|
357 | 387 | |
|
358 | 388 | /// Return the revlog data. |
|
359 | 389 | fn data(&self) -> &[u8] { |
|
360 | 390 | match &self.data_bytes { |
|
361 | 391 | Some(data_bytes) => data_bytes, |
|
362 | 392 | None => panic!( |
|
363 | 393 | "forgot to load the data or trying to access inline data" |
|
364 | 394 | ), |
|
365 | 395 | } |
|
366 | 396 | } |
|
367 | 397 | |
|
368 | 398 | pub fn make_null_entry(&self) -> RevlogEntry { |
|
369 | 399 | RevlogEntry { |
|
370 | 400 | revlog: self, |
|
371 | 401 | rev: NULL_REVISION, |
|
372 | 402 | bytes: b"", |
|
373 | 403 | compressed_len: 0, |
|
374 | 404 | uncompressed_len: 0, |
|
375 | 405 | base_rev_or_base_of_delta_chain: None, |
|
376 | 406 | p1: NULL_REVISION, |
|
377 | 407 | p2: NULL_REVISION, |
|
378 | 408 | flags: NULL_REVLOG_ENTRY_FLAGS, |
|
379 | 409 | hash: NULL_NODE, |
|
380 | 410 | } |
|
381 | 411 | } |
|
382 | 412 | |
|
383 | /// Get an entry of the revlog. | |
|
384 | pub fn get_entry( | |
|
413 | fn get_entry_for_checked_rev( | |
|
385 | 414 | &self, |
|
386 | 415 | rev: Revision, |
|
387 | 416 | ) -> Result<RevlogEntry, RevlogError> { |
|
388 | 417 | if rev == NULL_REVISION { |
|
389 | 418 | return Ok(self.make_null_entry()); |
|
390 | 419 | } |
|
391 | 420 | let index_entry = self |
|
392 | 421 | .index |
|
393 | 422 | .get_entry(rev) |
|
394 | 423 | .ok_or(RevlogError::InvalidRevision)?; |
|
395 | 424 | let start = index_entry.offset(); |
|
396 | 425 | let end = start + index_entry.compressed_len() as usize; |
|
397 | 426 | let data = if self.index.is_inline() { |
|
398 | 427 | self.index.data(start, end) |
|
399 | 428 | } else { |
|
400 | 429 | &self.data()[start..end] |
|
401 | 430 | }; |
|
431 | let base_rev = self | |
|
432 | .index | |
|
433 | .check_revision(index_entry.base_revision_or_base_of_delta_chain()) | |
|
434 | .ok_or_else(|| { | |
|
435 | RevlogError::corrupted(format!( | |
|
436 | "base revision for rev {} is invalid", | |
|
437 | rev | |
|
438 | )) | |
|
439 | })?; | |
|
440 | let p1 = | |
|
441 | self.index.check_revision(index_entry.p1()).ok_or_else(|| { | |
|
442 | RevlogError::corrupted(format!( | |
|
443 | "p1 for rev {} is invalid", | |
|
444 | rev | |
|
445 | )) | |
|
446 | })?; | |
|
447 | let p2 = | |
|
448 | self.index.check_revision(index_entry.p2()).ok_or_else(|| { | |
|
449 | RevlogError::corrupted(format!( | |
|
450 | "p2 for rev {} is invalid", | |
|
451 | rev | |
|
452 | )) | |
|
453 | })?; | |
|
402 | 454 | let entry = RevlogEntry { |
|
403 | 455 | revlog: self, |
|
404 | 456 | rev, |
|
405 | 457 | bytes: data, |
|
406 | 458 | compressed_len: index_entry.compressed_len(), |
|
407 | 459 | uncompressed_len: index_entry.uncompressed_len(), |
|
408 |
base_rev_or_base_of_delta_chain: if |
|
|
409 | .base_revision_or_base_of_delta_chain() | |
|
410 | == rev | |
|
411 | { | |
|
460 | base_rev_or_base_of_delta_chain: if base_rev == rev { | |
|
412 | 461 | None |
|
413 | 462 | } else { |
|
414 | Some(index_entry.base_revision_or_base_of_delta_chain()) | |
|
463 | Some(base_rev) | |
|
415 | 464 | }, |
|
416 |
p1 |
|
|
417 |
p2 |
|
|
465 | p1, | |
|
466 | p2, | |
|
418 | 467 | flags: index_entry.flags(), |
|
419 | 468 | hash: *index_entry.hash(), |
|
420 | 469 | }; |
|
421 | 470 | Ok(entry) |
|
422 | 471 | } |
|
423 | 472 | |
|
424 | /// when resolving internal references within revlog, any errors | |
|
425 | /// should be reported as corruption, instead of e.g. "invalid revision" | |
|
426 | fn get_entry_internal( | |
|
473 | /// Get an entry of the revlog. | |
|
474 | pub fn get_entry( | |
|
427 | 475 | &self, |
|
428 | rev: Revision, | |
|
429 |
) -> Result<RevlogEntry, |
|
|
430 | self.get_entry(rev) | |
|
431 | .map_err(|_| corrupted(format!("revision {} out of range", rev))) | |
|
476 | rev: UncheckedRevision, | |
|
477 | ) -> Result<RevlogEntry, RevlogError> { | |
|
478 | if rev == NULL_REVISION.into() { | |
|
479 | return Ok(self.make_null_entry()); | |
|
480 | } | |
|
481 | let rev = self.index.check_revision(rev).ok_or_else(|| { | |
|
482 | RevlogError::corrupted(format!("rev {} is invalid", rev)) | |
|
483 | })?; | |
|
484 | self.get_entry_for_checked_rev(rev) | |
|
432 | 485 | } |
|
433 | 486 | } |
|
434 | 487 | |
|
435 | 488 | /// The revlog entry's bytes and the necessary informations to extract |
|
436 | 489 | /// the entry's data. |
|
437 | 490 | #[derive(Clone)] |
|
438 | 491 | pub struct RevlogEntry<'revlog> { |
|
439 | 492 | revlog: &'revlog Revlog, |
|
440 | 493 | rev: Revision, |
|
441 | 494 | bytes: &'revlog [u8], |
|
442 | 495 | compressed_len: u32, |
|
443 | 496 | uncompressed_len: i32, |
|
444 | 497 | base_rev_or_base_of_delta_chain: Option<Revision>, |
|
445 | 498 | p1: Revision, |
|
446 | 499 | p2: Revision, |
|
447 | 500 | flags: u16, |
|
448 | 501 | hash: Node, |
|
449 | 502 | } |
|
450 | 503 | |
|
451 | 504 | thread_local! { |
|
452 | 505 | // seems fine to [unwrap] here: this can only fail due to memory allocation |
|
453 | 506 | // failing, and it's normal for that to cause panic. |
|
454 | 507 | static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> = |
|
455 | 508 | RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap()); |
|
456 | 509 | } |
|
457 | 510 | |
|
458 | 511 | fn zstd_decompress_to_buffer( |
|
459 | 512 | bytes: &[u8], |
|
460 | 513 | buf: &mut Vec<u8>, |
|
461 | 514 | ) -> Result<usize, std::io::Error> { |
|
462 | 515 | ZSTD_DECODER |
|
463 | 516 | .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf)) |
|
464 | 517 | } |
|
465 | 518 | |
|
466 | 519 | impl<'revlog> RevlogEntry<'revlog> { |
|
467 | 520 | pub fn revision(&self) -> Revision { |
|
468 | 521 | self.rev |
|
469 | 522 | } |
|
470 | 523 | |
|
471 | 524 | pub fn node(&self) -> &Node { |
|
472 | 525 | &self.hash |
|
473 | 526 | } |
|
474 | 527 | |
|
475 | 528 | pub fn uncompressed_len(&self) -> Option<u32> { |
|
476 | 529 | u32::try_from(self.uncompressed_len).ok() |
|
477 | 530 | } |
|
478 | 531 | |
|
479 | 532 | pub fn has_p1(&self) -> bool { |
|
480 | 533 | self.p1 != NULL_REVISION |
|
481 | 534 | } |
|
482 | 535 | |
|
483 | 536 | pub fn p1_entry( |
|
484 | 537 | &self, |
|
485 | 538 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { |
|
486 | 539 | if self.p1 == NULL_REVISION { |
|
487 | 540 | Ok(None) |
|
488 | 541 | } else { |
|
489 | Ok(Some(self.revlog.get_entry(self.p1)?)) | |
|
542 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?)) | |
|
490 | 543 | } |
|
491 | 544 | } |
|
492 | 545 | |
|
493 | 546 | pub fn p2_entry( |
|
494 | 547 | &self, |
|
495 | 548 | ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> { |
|
496 | 549 | if self.p2 == NULL_REVISION { |
|
497 | 550 | Ok(None) |
|
498 | 551 | } else { |
|
499 | Ok(Some(self.revlog.get_entry(self.p2)?)) | |
|
552 | Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?)) | |
|
500 | 553 | } |
|
501 | 554 | } |
|
502 | 555 | |
|
503 | 556 | pub fn p1(&self) -> Option<Revision> { |
|
504 | 557 | if self.p1 == NULL_REVISION { |
|
505 | 558 | None |
|
506 | 559 | } else { |
|
507 | 560 | Some(self.p1) |
|
508 | 561 | } |
|
509 | 562 | } |
|
510 | 563 | |
|
511 | 564 | pub fn p2(&self) -> Option<Revision> { |
|
512 | 565 | if self.p2 == NULL_REVISION { |
|
513 | 566 | None |
|
514 | 567 | } else { |
|
515 | 568 | Some(self.p2) |
|
516 | 569 | } |
|
517 | 570 | } |
|
518 | 571 | |
|
519 | 572 | pub fn is_censored(&self) -> bool { |
|
520 | 573 | (self.flags & REVISION_FLAG_CENSORED) != 0 |
|
521 | 574 | } |
|
522 | 575 | |
|
523 | 576 | pub fn has_length_affecting_flag_processor(&self) -> bool { |
|
524 | 577 | // Relevant Python code: revlog.size() |
|
525 | 578 | // note: ELLIPSIS is known to not change the content |
|
526 | 579 | (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0 |
|
527 | 580 | } |
|
528 | 581 | |
|
529 | 582 | /// The data for this entry, after resolving deltas if any. |
|
530 |
pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, |
|
|
583 | pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
|
531 | 584 | let mut entry = self.clone(); |
|
532 | 585 | let mut delta_chain = vec![]; |
|
533 | 586 | |
|
534 | 587 | // The meaning of `base_rev_or_base_of_delta_chain` depends on |
|
535 | 588 | // generaldelta. See the doc on `ENTRY_DELTA_BASE` in |
|
536 | 589 | // `mercurial/revlogutils/constants.py` and the code in |
|
537 | 590 | // [_chaininfo] and in [index_deltachain]. |
|
538 | 591 | let uses_generaldelta = self.revlog.index.uses_generaldelta(); |
|
539 | 592 | while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { |
|
540 | 593 | entry = if uses_generaldelta { |
|
541 | 594 | delta_chain.push(entry); |
|
542 |
self.revlog.get_entry_ |
|
|
595 | self.revlog.get_entry_for_checked_rev(base_rev)? | |
|
543 | 596 | } else { |
|
544 | let base_rev = entry.rev - 1; | |
|
597 | let base_rev = UncheckedRevision(entry.rev - 1); | |
|
545 | 598 | delta_chain.push(entry); |
|
546 |
self.revlog.get_entry |
|
|
599 | self.revlog.get_entry(base_rev)? | |
|
547 | 600 | }; |
|
548 | 601 | } |
|
549 | 602 | |
|
550 | 603 | let data = if delta_chain.is_empty() { |
|
551 | 604 | entry.data_chunk()? |
|
552 | 605 | } else { |
|
553 | 606 | Revlog::build_data_from_deltas(entry, &delta_chain)?.into() |
|
554 | 607 | }; |
|
555 | 608 | |
|
556 | 609 | Ok(data) |
|
557 | 610 | } |
|
558 | 611 | |
|
559 | 612 | fn check_data( |
|
560 | 613 | &self, |
|
561 | 614 | data: Cow<'revlog, [u8]>, |
|
562 |
) -> Result<Cow<'revlog, [u8]>, |
|
|
615 | ) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
|
563 | 616 | if self.revlog.check_hash( |
|
564 | 617 | self.p1, |
|
565 | 618 | self.p2, |
|
566 | 619 | self.hash.as_bytes(), |
|
567 | 620 | &data, |
|
568 | 621 | ) { |
|
569 | 622 | Ok(data) |
|
570 | 623 | } else { |
|
571 | 624 | if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 { |
|
572 | 625 | return Err(HgError::unsupported( |
|
573 | 626 | "ellipsis revisions are not supported by rhg", |
|
574 |
) |
|
|
627 | ) | |
|
628 | .into()); | |
|
575 | 629 | } |
|
576 | 630 | Err(corrupted(format!( |
|
577 | 631 | "hash check failed for revision {}", |
|
578 | 632 | self.rev |
|
579 |
)) |
|
|
633 | )) | |
|
634 | .into()) | |
|
580 | 635 | } |
|
581 | 636 | } |
|
582 | 637 | |
|
583 |
pub fn data(&self) -> Result<Cow<'revlog, [u8]>, |
|
|
638 | pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { | |
|
584 | 639 | let data = self.rawdata()?; |
|
585 | 640 | if self.rev == NULL_REVISION { |
|
586 | 641 | return Ok(data); |
|
587 | 642 | } |
|
588 | 643 | if self.is_censored() { |
|
589 | return Err(HgError::CensoredNodeError); | |
|
644 | return Err(HgError::CensoredNodeError.into()); | |
|
590 | 645 | } |
|
591 | 646 | self.check_data(data) |
|
592 | 647 | } |
|
593 | 648 | |
|
594 | 649 | /// Extract the data contained in the entry. |
|
595 | 650 | /// This may be a delta. (See `is_delta`.) |
|
596 | 651 | fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> { |
|
597 | 652 | if self.bytes.is_empty() { |
|
598 | 653 | return Ok(Cow::Borrowed(&[])); |
|
599 | 654 | } |
|
600 | 655 | match self.bytes[0] { |
|
601 | 656 | // Revision data is the entirety of the entry, including this |
|
602 | 657 | // header. |
|
603 | 658 | b'\0' => Ok(Cow::Borrowed(self.bytes)), |
|
604 | 659 | // Raw revision data follows. |
|
605 | 660 | b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), |
|
606 | 661 | // zlib (RFC 1950) data. |
|
607 | 662 | b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), |
|
608 | 663 | // zstd data. |
|
609 | 664 | b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), |
|
610 | 665 | // A proper new format should have had a repo/store requirement. |
|
611 | 666 | format_type => Err(corrupted(format!( |
|
612 | 667 | "unknown compression header '{}'", |
|
613 | 668 | format_type |
|
614 | 669 | ))), |
|
615 | 670 | } |
|
616 | 671 | } |
|
617 | 672 | |
|
618 | 673 | fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> { |
|
619 | 674 | let mut decoder = ZlibDecoder::new(self.bytes); |
|
620 | 675 | if self.is_delta() { |
|
621 | 676 | let mut buf = Vec::with_capacity(self.compressed_len as usize); |
|
622 | 677 | decoder |
|
623 | 678 | .read_to_end(&mut buf) |
|
624 | 679 | .map_err(|e| corrupted(e.to_string()))?; |
|
625 | 680 | Ok(buf) |
|
626 | 681 | } else { |
|
627 | 682 | let cap = self.uncompressed_len.max(0) as usize; |
|
628 | 683 | let mut buf = vec![0; cap]; |
|
629 | 684 | decoder |
|
630 | 685 | .read_exact(&mut buf) |
|
631 | 686 | .map_err(|e| corrupted(e.to_string()))?; |
|
632 | 687 | Ok(buf) |
|
633 | 688 | } |
|
634 | 689 | } |
|
635 | 690 | |
|
636 | 691 | fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> { |
|
637 | 692 | let cap = self.uncompressed_len.max(0) as usize; |
|
638 | 693 | if self.is_delta() { |
|
639 | 694 | // [cap] is usually an over-estimate of the space needed because |
|
640 | 695 | // it's the length of delta-decoded data, but we're interested |
|
641 | 696 | // in the size of the delta. |
|
642 | 697 | // This means we have to [shrink_to_fit] to avoid holding on |
|
643 | 698 | // to a large chunk of memory, but it also means we must have a |
|
644 | 699 | // fallback branch, for the case when the delta is longer than |
|
645 | 700 | // the original data (surprisingly, this does happen in practice) |
|
646 | 701 | let mut buf = Vec::with_capacity(cap); |
|
647 | 702 | match zstd_decompress_to_buffer(self.bytes, &mut buf) { |
|
648 | 703 | Ok(_) => buf.shrink_to_fit(), |
|
649 | 704 | Err(_) => { |
|
650 | 705 | buf.clear(); |
|
651 | 706 | zstd::stream::copy_decode(self.bytes, &mut buf) |
|
652 | 707 | .map_err(|e| corrupted(e.to_string()))?; |
|
653 | 708 | } |
|
654 | 709 | }; |
|
655 | 710 | Ok(buf) |
|
656 | 711 | } else { |
|
657 | 712 | let mut buf = Vec::with_capacity(cap); |
|
658 | 713 | let len = zstd_decompress_to_buffer(self.bytes, &mut buf) |
|
659 | 714 | .map_err(|e| corrupted(e.to_string()))?; |
|
660 | 715 | if len != self.uncompressed_len as usize { |
|
661 | 716 | Err(corrupted("uncompressed length does not match")) |
|
662 | 717 | } else { |
|
663 | 718 | Ok(buf) |
|
664 | 719 | } |
|
665 | 720 | } |
|
666 | 721 | } |
|
667 | 722 | |
|
668 | 723 | /// Tell if the entry is a snapshot or a delta |
|
669 | 724 | /// (influences on decompression). |
|
670 | 725 | fn is_delta(&self) -> bool { |
|
671 | 726 | self.base_rev_or_base_of_delta_chain.is_some() |
|
672 | 727 | } |
|
673 | 728 | } |
|
674 | 729 | |
|
675 | 730 | /// Calculate the hash of a revision given its data and its parents. |
|
676 | 731 | fn hash( |
|
677 | 732 | data: &[u8], |
|
678 | 733 | p1_hash: &[u8], |
|
679 | 734 | p2_hash: &[u8], |
|
680 | 735 | ) -> [u8; NODE_BYTES_LENGTH] { |
|
681 | 736 | let mut hasher = Sha1::new(); |
|
682 | 737 | let (a, b) = (p1_hash, p2_hash); |
|
683 | 738 | if a > b { |
|
684 | 739 | hasher.update(b); |
|
685 | 740 | hasher.update(a); |
|
686 | 741 | } else { |
|
687 | 742 | hasher.update(a); |
|
688 | 743 | hasher.update(b); |
|
689 | 744 | } |
|
690 | 745 | hasher.update(data); |
|
691 | 746 | *hasher.finalize().as_ref() |
|
692 | 747 | } |
|
693 | 748 | |
|
694 | 749 | #[cfg(test)] |
|
695 | 750 | mod tests { |
|
696 | 751 | use super::*; |
|
697 | 752 | use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE}; |
|
698 | 753 | use itertools::Itertools; |
|
699 | 754 | |
|
700 | 755 | #[test] |
|
701 | 756 | fn test_empty() { |
|
702 | 757 | let temp = tempfile::tempdir().unwrap(); |
|
703 | 758 | let vfs = Vfs { base: temp.path() }; |
|
704 | 759 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); |
|
705 | 760 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
706 | 761 | assert!(revlog.is_empty()); |
|
707 | 762 | assert_eq!(revlog.len(), 0); |
|
708 | assert!(revlog.get_entry(0).is_err()); | |
|
709 | assert!(!revlog.has_rev(0)); | |
|
763 | assert!(revlog.get_entry(0.into()).is_err()); | |
|
764 | assert!(!revlog.has_rev(0.into())); | |
|
710 | 765 | assert_eq!( |
|
711 | 766 | revlog.rev_from_node(NULL_NODE.into()).unwrap(), |
|
712 | 767 | NULL_REVISION |
|
713 | 768 | ); |
|
714 | let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap(); | |
|
769 | let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap(); | |
|
715 | 770 | assert_eq!(null_entry.revision(), NULL_REVISION); |
|
716 | 771 | assert!(null_entry.data().unwrap().is_empty()); |
|
717 | 772 | } |
|
718 | 773 | |
|
719 | 774 | #[test] |
|
720 | 775 | fn test_inline() { |
|
721 | 776 | let temp = tempfile::tempdir().unwrap(); |
|
722 | 777 | let vfs = Vfs { base: temp.path() }; |
|
723 | 778 | let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd") |
|
724 | 779 | .unwrap(); |
|
725 | 780 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") |
|
726 | 781 | .unwrap(); |
|
727 | 782 | let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582") |
|
728 | 783 | .unwrap(); |
|
729 | 784 | let entry0_bytes = IndexEntryBuilder::new() |
|
730 | 785 | .is_first(true) |
|
731 | 786 | .with_version(1) |
|
732 | 787 | .with_inline(true) |
|
733 | 788 | .with_offset(INDEX_ENTRY_SIZE) |
|
734 | 789 | .with_node(node0) |
|
735 | 790 | .build(); |
|
736 | 791 | let entry1_bytes = IndexEntryBuilder::new() |
|
737 | 792 | .with_offset(INDEX_ENTRY_SIZE) |
|
738 | 793 | .with_node(node1) |
|
739 | 794 | .build(); |
|
740 | 795 | let entry2_bytes = IndexEntryBuilder::new() |
|
741 | 796 | .with_offset(INDEX_ENTRY_SIZE) |
|
742 | 797 | .with_p1(0) |
|
743 | 798 | .with_p2(1) |
|
744 | 799 | .with_node(node2) |
|
745 | 800 | .build(); |
|
746 | 801 | let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes] |
|
747 | 802 | .into_iter() |
|
748 | 803 | .flatten() |
|
749 | 804 | .collect_vec(); |
|
750 | 805 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); |
|
751 | 806 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
752 | 807 | |
|
753 | let entry0 = revlog.get_entry(0).ok().unwrap(); | |
|
808 | let entry0 = revlog.get_entry(0.into()).ok().unwrap(); | |
|
754 | 809 | assert_eq!(entry0.revision(), 0); |
|
755 | 810 | assert_eq!(*entry0.node(), node0); |
|
756 | 811 | assert!(!entry0.has_p1()); |
|
757 | 812 | assert_eq!(entry0.p1(), None); |
|
758 | 813 | assert_eq!(entry0.p2(), None); |
|
759 | 814 | let p1_entry = entry0.p1_entry().unwrap(); |
|
760 | 815 | assert!(p1_entry.is_none()); |
|
761 | 816 | let p2_entry = entry0.p2_entry().unwrap(); |
|
762 | 817 | assert!(p2_entry.is_none()); |
|
763 | 818 | |
|
764 | let entry1 = revlog.get_entry(1).ok().unwrap(); | |
|
819 | let entry1 = revlog.get_entry(1.into()).ok().unwrap(); | |
|
765 | 820 | assert_eq!(entry1.revision(), 1); |
|
766 | 821 | assert_eq!(*entry1.node(), node1); |
|
767 | 822 | assert!(!entry1.has_p1()); |
|
768 | 823 | assert_eq!(entry1.p1(), None); |
|
769 | 824 | assert_eq!(entry1.p2(), None); |
|
770 | 825 | let p1_entry = entry1.p1_entry().unwrap(); |
|
771 | 826 | assert!(p1_entry.is_none()); |
|
772 | 827 | let p2_entry = entry1.p2_entry().unwrap(); |
|
773 | 828 | assert!(p2_entry.is_none()); |
|
774 | 829 | |
|
775 | let entry2 = revlog.get_entry(2).ok().unwrap(); | |
|
830 | let entry2 = revlog.get_entry(2.into()).ok().unwrap(); | |
|
776 | 831 | assert_eq!(entry2.revision(), 2); |
|
777 | 832 | assert_eq!(*entry2.node(), node2); |
|
778 | 833 | assert!(entry2.has_p1()); |
|
779 | 834 | assert_eq!(entry2.p1(), Some(0)); |
|
780 | 835 | assert_eq!(entry2.p2(), Some(1)); |
|
781 | 836 | let p1_entry = entry2.p1_entry().unwrap(); |
|
782 | 837 | assert!(p1_entry.is_some()); |
|
783 | 838 | assert_eq!(p1_entry.unwrap().revision(), 0); |
|
784 | 839 | let p2_entry = entry2.p2_entry().unwrap(); |
|
785 | 840 | assert!(p2_entry.is_some()); |
|
786 | 841 | assert_eq!(p2_entry.unwrap().revision(), 1); |
|
787 | 842 | } |
|
788 | 843 | |
|
789 | 844 | #[test] |
|
790 | 845 | fn test_nodemap() { |
|
791 | 846 | let temp = tempfile::tempdir().unwrap(); |
|
792 | 847 | let vfs = Vfs { base: temp.path() }; |
|
793 | 848 | |
|
794 | 849 | // building a revlog with a forced Node starting with zeros |
|
795 | 850 | // This is a corruption, but it does not preclude using the nodemap |
|
796 | 851 | // if we don't try and access the data |
|
797 | 852 | let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd") |
|
798 | 853 | .unwrap(); |
|
799 | 854 | let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12") |
|
800 | 855 | .unwrap(); |
|
801 | 856 | let entry0_bytes = IndexEntryBuilder::new() |
|
802 | 857 | .is_first(true) |
|
803 | 858 | .with_version(1) |
|
804 | 859 | .with_inline(true) |
|
805 | 860 | .with_offset(INDEX_ENTRY_SIZE) |
|
806 | 861 | .with_node(node0) |
|
807 | 862 | .build(); |
|
808 | 863 | let entry1_bytes = IndexEntryBuilder::new() |
|
809 | 864 | .with_offset(INDEX_ENTRY_SIZE) |
|
810 | 865 | .with_node(node1) |
|
811 | 866 | .build(); |
|
812 | 867 | let contents = vec![entry0_bytes, entry1_bytes] |
|
813 | 868 | .into_iter() |
|
814 | 869 | .flatten() |
|
815 | 870 | .collect_vec(); |
|
816 | 871 | std::fs::write(temp.path().join("foo.i"), contents).unwrap(); |
|
817 | 872 | let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); |
|
818 | 873 | |
|
819 | 874 | // accessing the data shows the corruption |
|
820 | revlog.get_entry(0).unwrap().data().unwrap_err(); | |
|
875 | revlog.get_entry(0.into()).unwrap().data().unwrap_err(); | |
|
821 | 876 | |
|
822 | 877 | assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1); |
|
823 | 878 | assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0); |
|
824 | 879 | assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1); |
|
825 | 880 | assert_eq!( |
|
826 | 881 | revlog |
|
827 | 882 | .rev_from_node(NodePrefix::from_hex("000").unwrap()) |
|
828 | 883 | .unwrap(), |
|
829 | 884 | -1 |
|
830 | 885 | ); |
|
831 | 886 | assert_eq!( |
|
832 | 887 | revlog |
|
833 | 888 | .rev_from_node(NodePrefix::from_hex("b00").unwrap()) |
|
834 | 889 | .unwrap(), |
|
835 | 890 | 1 |
|
836 | 891 | ); |
|
837 | 892 | // RevlogError does not implement PartialEq |
|
838 | 893 | // (ultimately because io::Error does not) |
|
839 | 894 | match revlog |
|
840 | 895 | .rev_from_node(NodePrefix::from_hex("00").unwrap()) |
|
841 | 896 | .expect_err("Expected to give AmbiguousPrefix error") |
|
842 | 897 | { |
|
843 | 898 | RevlogError::AmbiguousPrefix => (), |
|
844 | 899 | e => { |
|
845 | 900 | panic!("Got another error than AmbiguousPrefix: {:?}", e); |
|
846 | 901 | } |
|
847 | 902 | }; |
|
848 | 903 | } |
|
849 | 904 | } |
@@ -1,1067 +1,1082 b'' | |||
|
1 | 1 | // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> |
|
2 | 2 | // and Mercurial contributors |
|
3 | 3 | // |
|
4 | 4 | // This software may be used and distributed according to the terms of the |
|
5 | 5 | // GNU General Public License version 2 or any later version. |
|
6 | 6 | //! Indexing facilities for fast retrieval of `Revision` from `Node` |
|
7 | 7 | //! |
|
8 | 8 | //! This provides a variation on the 16-ary radix tree that is |
|
9 | 9 | //! provided as "nodetree" in revlog.c, ready for append-only persistence |
|
10 | 10 | //! on disk. |
|
11 | 11 | //! |
|
12 | 12 | //! Following existing implicit conventions, the "nodemap" terminology |
|
13 | 13 | //! is used in a more abstract context. |
|
14 | 14 | |
|
15 | use crate::UncheckedRevision; | |
|
16 | ||
|
15 | 17 | use super::{ |
|
16 | 18 | node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION, |
|
17 | 19 | }; |
|
18 | 20 | |
|
19 | 21 | use bytes_cast::{unaligned, BytesCast}; |
|
20 | 22 | use std::cmp::max; |
|
21 | 23 | use std::fmt; |
|
22 | 24 | use std::mem::{self, align_of, size_of}; |
|
23 | 25 | use std::ops::Deref; |
|
24 | 26 | use std::ops::Index; |
|
25 | 27 | |
|
26 | 28 | #[derive(Debug, PartialEq)] |
|
27 | 29 | pub enum NodeMapError { |
|
28 | 30 | /// A `NodePrefix` matches several [`Revision`]s. |
|
29 | 31 | /// |
|
30 | 32 | /// This can be returned by methods meant for (at most) one match. |
|
31 | 33 | MultipleResults, |
|
32 | 34 | /// A `Revision` stored in the nodemap could not be found in the index |
|
33 | RevisionNotInIndex(Revision), | |
|
35 | RevisionNotInIndex(UncheckedRevision), | |
|
34 | 36 | } |
|
35 | 37 | |
|
36 | 38 | /// Mapping system from Mercurial nodes to revision numbers. |
|
37 | 39 | /// |
|
38 | 40 | /// ## `RevlogIndex` and `NodeMap` |
|
39 | 41 | /// |
|
40 | 42 | /// One way to think about their relationship is that |
|
41 | 43 | /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`] |
|
42 | 44 | /// information carried by a [`RevlogIndex`]. |
|
43 | 45 | /// |
|
44 | 46 | /// Many of the methods in this trait take a `RevlogIndex` argument |
|
45 | 47 | /// which is used for validation of their results. This index must naturally |
|
46 | 48 | /// be the one the `NodeMap` is about, and it must be consistent. |
|
47 | 49 | /// |
|
48 | 50 | /// Notably, the `NodeMap` must not store |
|
49 | 51 | /// information about more `Revision` values than there are in the index. |
|
50 | 52 | /// In these methods, an encountered `Revision` is not in the index, a |
|
51 | 53 | /// [RevisionNotInIndex](NodeMapError) error is returned. |
|
52 | 54 | /// |
|
53 | 55 | /// In insert operations, the rule is thus that the `NodeMap` must always |
|
54 | 56 | /// be updated after the `RevlogIndex` it is about. |
|
55 | 57 | pub trait NodeMap { |
|
56 | 58 | /// Find the unique `Revision` having the given `Node` |
|
57 | 59 | /// |
|
58 | 60 | /// If no Revision matches the given `Node`, `Ok(None)` is returned. |
|
59 | 61 | fn find_node( |
|
60 | 62 | &self, |
|
61 | 63 | index: &impl RevlogIndex, |
|
62 | 64 | node: &Node, |
|
63 | 65 | ) -> Result<Option<Revision>, NodeMapError> { |
|
64 | 66 | self.find_bin(index, node.into()) |
|
65 | 67 | } |
|
66 | 68 | |
|
67 | 69 | /// Find the unique Revision whose `Node` starts with a given binary prefix |
|
68 | 70 | /// |
|
69 | 71 | /// If no Revision matches the given prefix, `Ok(None)` is returned. |
|
70 | 72 | /// |
|
71 | 73 | /// If several Revisions match the given prefix, a |
|
72 | 74 | /// [MultipleResults](NodeMapError) error is returned. |
|
73 | 75 | fn find_bin( |
|
74 | 76 | &self, |
|
75 | 77 | idx: &impl RevlogIndex, |
|
76 | 78 | prefix: NodePrefix, |
|
77 | 79 | ) -> Result<Option<Revision>, NodeMapError>; |
|
78 | 80 | |
|
79 | 81 | /// Give the size of the shortest node prefix that determines |
|
80 | 82 | /// the revision uniquely. |
|
81 | 83 | /// |
|
82 | 84 | /// From a binary node prefix, if it is matched in the node map, this |
|
83 | 85 | /// returns the number of hexadecimal digits that would had sufficed |
|
84 | 86 | /// to find the revision uniquely. |
|
85 | 87 | /// |
|
86 | 88 | /// Returns `None` if no [`Revision`] could be found for the prefix. |
|
87 | 89 | /// |
|
88 | 90 | /// If several Revisions match the given prefix, a |
|
89 | 91 | /// [MultipleResults](NodeMapError) error is returned. |
|
90 | 92 | fn unique_prefix_len_bin( |
|
91 | 93 | &self, |
|
92 | 94 | idx: &impl RevlogIndex, |
|
93 | 95 | node_prefix: NodePrefix, |
|
94 | 96 | ) -> Result<Option<usize>, NodeMapError>; |
|
95 | 97 | |
|
96 | 98 | /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with |
|
97 | 99 | /// a full [`Node`] as input |
|
98 | 100 | fn unique_prefix_len_node( |
|
99 | 101 | &self, |
|
100 | 102 | idx: &impl RevlogIndex, |
|
101 | 103 | node: &Node, |
|
102 | 104 | ) -> Result<Option<usize>, NodeMapError> { |
|
103 | 105 | self.unique_prefix_len_bin(idx, node.into()) |
|
104 | 106 | } |
|
105 | 107 | } |
|
106 | 108 | |
|
107 | 109 | pub trait MutableNodeMap: NodeMap { |
|
108 | 110 | fn insert<I: RevlogIndex>( |
|
109 | 111 | &mut self, |
|
110 | 112 | index: &I, |
|
111 | 113 | node: &Node, |
|
112 | 114 | rev: Revision, |
|
113 | 115 | ) -> Result<(), NodeMapError>; |
|
114 | 116 | } |
|
115 | 117 | |
|
116 | 118 | /// Low level NodeTree [`Block`] elements |
|
117 | 119 | /// |
|
118 | 120 | /// These are exactly as for instance on persistent storage. |
|
119 | 121 | type RawElement = unaligned::I32Be; |
|
120 | 122 | |
|
121 | 123 | /// High level representation of values in NodeTree |
|
122 | 124 | /// [`Blocks`](struct.Block.html) |
|
123 | 125 | /// |
|
124 | 126 | /// This is the high level representation that most algorithms should |
|
125 | 127 | /// use. |
|
126 | 128 | #[derive(Clone, Debug, Eq, PartialEq)] |
|
127 | 129 | enum Element { |
|
128 | Rev(Revision), | |
|
130 | // This is not a Mercurial revision. It's a `i32` because this is the | |
|
131 | // right type for this structure. | |
|
132 | Rev(i32), | |
|
129 | 133 | Block(usize), |
|
130 | 134 | None, |
|
131 | 135 | } |
|
132 | 136 | |
|
133 | 137 | impl From<RawElement> for Element { |
|
134 | 138 | /// Conversion from low level representation, after endianness conversion. |
|
135 | 139 | /// |
|
136 | 140 | /// See [`Block`](struct.Block.html) for explanation about the encoding. |
|
137 | 141 | fn from(raw: RawElement) -> Element { |
|
138 | 142 | let int = raw.get(); |
|
139 | 143 | if int >= 0 { |
|
140 | 144 | Element::Block(int as usize) |
|
141 | 145 | } else if int == -1 { |
|
142 | 146 | Element::None |
|
143 | 147 | } else { |
|
144 | 148 | Element::Rev(-int - 2) |
|
145 | 149 | } |
|
146 | 150 | } |
|
147 | 151 | } |
|
148 | 152 | |
|
149 | 153 | impl From<Element> for RawElement { |
|
150 | 154 | fn from(element: Element) -> RawElement { |
|
151 | 155 | RawElement::from(match element { |
|
152 | 156 | Element::None => 0, |
|
153 | 157 | Element::Block(i) => i as i32, |
|
154 | 158 | Element::Rev(rev) => -rev - 2, |
|
155 | 159 | }) |
|
156 | 160 | } |
|
157 | 161 | } |
|
158 | 162 | |
|
159 | 163 | const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble |
|
160 | 164 | |
|
161 | 165 | /// A logical block of the [`NodeTree`], packed with a fixed size. |
|
162 | 166 | /// |
|
163 | 167 | /// These are always used in container types implementing `Index<Block>`, |
|
164 | 168 | /// such as `&Block` |
|
165 | 169 | /// |
|
166 | 170 | /// As an array of integers, its ith element encodes that the |
|
167 | 171 | /// ith potential edge from the block, representing the ith hexadecimal digit |
|
168 | 172 | /// (nybble) `i` is either: |
|
169 | 173 | /// |
|
170 | 174 | /// - absent (value -1) |
|
171 | 175 | /// - another `Block` in the same indexable container (value ≥ 0) |
|
172 | 176 | /// - a [`Revision`] leaf (value ≤ -2) |
|
173 | 177 | /// |
|
174 | 178 | /// Endianness has to be fixed for consistency on shared storage across |
|
175 | 179 | /// different architectures. |
|
176 | 180 | /// |
|
177 | 181 | /// A key difference with the C `nodetree` is that we need to be |
|
178 | 182 | /// able to represent the [`Block`] at index 0, hence -1 is the empty marker |
|
179 | 183 | /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1. |
|
180 | 184 | /// |
|
181 | 185 | /// Another related difference is that `NULL_REVISION` (-1) is not |
|
182 | 186 | /// represented at all, because we want an immutable empty nodetree |
|
183 | 187 | /// to be valid. |
|
184 | 188 | #[derive(Copy, Clone, BytesCast, PartialEq)] |
|
185 | 189 | #[repr(transparent)] |
|
186 | 190 | pub struct Block([RawElement; ELEMENTS_PER_BLOCK]); |
|
187 | 191 | |
|
188 | 192 | impl Block { |
|
189 | 193 | fn new() -> Self { |
|
190 | 194 | let absent_node = RawElement::from(-1); |
|
191 | 195 | Block([absent_node; ELEMENTS_PER_BLOCK]) |
|
192 | 196 | } |
|
193 | 197 | |
|
194 | 198 | fn get(&self, nybble: u8) -> Element { |
|
195 | 199 | self.0[nybble as usize].into() |
|
196 | 200 | } |
|
197 | 201 | |
|
198 | 202 | fn set(&mut self, nybble: u8, element: Element) { |
|
199 | 203 | self.0[nybble as usize] = element.into() |
|
200 | 204 | } |
|
201 | 205 | } |
|
202 | 206 | |
|
203 | 207 | impl fmt::Debug for Block { |
|
204 | 208 | /// sparse representation for testing and debugging purposes |
|
205 | 209 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
206 | 210 | f.debug_map() |
|
207 | 211 | .entries((0..16).filter_map(|i| match self.get(i) { |
|
208 | 212 | Element::None => None, |
|
209 | 213 | element => Some((i, element)), |
|
210 | 214 | })) |
|
211 | 215 | .finish() |
|
212 | 216 | } |
|
213 | 217 | } |
|
214 | 218 | |
|
215 | 219 | /// A mutable 16-radix tree with the root block logically at the end |
|
216 | 220 | /// |
|
217 | 221 | /// Because of the append only nature of our node trees, we need to |
|
218 | 222 | /// keep the original untouched and store new blocks separately. |
|
219 | 223 | /// |
|
220 | 224 | /// The mutable root [`Block`] is kept apart so that we don't have to rebump |
|
221 | 225 | /// it on each insertion. |
|
222 | 226 | pub struct NodeTree { |
|
223 | 227 | readonly: Box<dyn Deref<Target = [Block]> + Send>, |
|
224 | 228 | growable: Vec<Block>, |
|
225 | 229 | root: Block, |
|
226 | 230 | masked_inner_blocks: usize, |
|
227 | 231 | } |
|
228 | 232 | |
|
229 | 233 | impl Index<usize> for NodeTree { |
|
230 | 234 | type Output = Block; |
|
231 | 235 | |
|
232 | 236 | fn index(&self, i: usize) -> &Block { |
|
233 | 237 | let ro_len = self.readonly.len(); |
|
234 | 238 | if i < ro_len { |
|
235 | 239 | &self.readonly[i] |
|
236 | 240 | } else if i == ro_len + self.growable.len() { |
|
237 | 241 | &self.root |
|
238 | 242 | } else { |
|
239 | 243 | &self.growable[i - ro_len] |
|
240 | 244 | } |
|
241 | 245 | } |
|
242 | 246 | } |
|
243 | 247 | |
|
244 | 248 | /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`. |
|
245 | 249 | fn has_prefix_or_none( |
|
246 | 250 | idx: &impl RevlogIndex, |
|
247 | 251 | prefix: NodePrefix, |
|
248 | rev: Revision, | |
|
252 | rev: UncheckedRevision, | |
|
249 | 253 | ) -> Result<Option<Revision>, NodeMapError> { |
|
250 | idx.node(rev) | |
|
251 | .ok_or(NodeMapError::RevisionNotInIndex(rev)) | |
|
252 | .map(|node| { | |
|
253 | if prefix.is_prefix_of(node) { | |
|
254 | Some(rev) | |
|
255 | } else { | |
|
256 |
|
|
|
257 | } | |
|
258 | }) | |
|
254 | match idx.check_revision(rev) { | |
|
255 | Some(checked) => idx | |
|
256 | .node(checked) | |
|
257 | .ok_or(NodeMapError::RevisionNotInIndex(rev)) | |
|
258 | .map(|node| { | |
|
259 | if prefix.is_prefix_of(node) { | |
|
260 | Some(checked) | |
|
261 | } else { | |
|
262 | None | |
|
263 | } | |
|
264 | }), | |
|
265 | None => Err(NodeMapError::RevisionNotInIndex(rev)), | |
|
266 | } | |
|
259 | 267 | } |
|
260 | 268 | |
|
261 | 269 | /// validate that the candidate's node starts indeed with given prefix, |
|
262 | 270 | /// and treat ambiguities related to [`NULL_REVISION`]. |
|
263 | 271 | /// |
|
264 | 272 | /// From the data in the NodeTree, one can only conclude that some |
|
265 | 273 | /// revision is the only one for a *subprefix* of the one being looked up. |
|
266 | 274 | fn validate_candidate( |
|
267 | 275 | idx: &impl RevlogIndex, |
|
268 | 276 | prefix: NodePrefix, |
|
269 | candidate: (Option<Revision>, usize), | |
|
277 | candidate: (Option<UncheckedRevision>, usize), | |
|
270 | 278 | ) -> Result<(Option<Revision>, usize), NodeMapError> { |
|
271 | 279 | let (rev, steps) = candidate; |
|
272 | 280 | if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) { |
|
273 | 281 | rev.map_or(Ok((None, steps)), |r| { |
|
274 | 282 | has_prefix_or_none(idx, prefix, r) |
|
275 | 283 | .map(|opt| (opt, max(steps, nz_nybble + 1))) |
|
276 | 284 | }) |
|
277 | 285 | } else { |
|
278 | 286 | // the prefix is only made of zeros; NULL_REVISION always matches it |
|
279 | 287 | // and any other *valid* result is an ambiguity |
|
280 | 288 | match rev { |
|
281 | 289 | None => Ok((Some(NULL_REVISION), steps + 1)), |
|
282 | 290 | Some(r) => match has_prefix_or_none(idx, prefix, r)? { |
|
283 | 291 | None => Ok((Some(NULL_REVISION), steps + 1)), |
|
284 | 292 | _ => Err(NodeMapError::MultipleResults), |
|
285 | 293 | }, |
|
286 | 294 | } |
|
287 | 295 | } |
|
288 | 296 | } |
|
289 | 297 | |
|
290 | 298 | impl NodeTree { |
|
291 | 299 | /// Initiate a NodeTree from an immutable slice-like of `Block` |
|
292 | 300 | /// |
|
293 | 301 | /// We keep `readonly` and clone its root block if it isn't empty. |
|
294 | 302 | fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self { |
|
295 | 303 | let root = readonly.last().cloned().unwrap_or_else(Block::new); |
|
296 | 304 | NodeTree { |
|
297 | 305 | readonly, |
|
298 | 306 | growable: Vec::new(), |
|
299 | 307 | root, |
|
300 | 308 | masked_inner_blocks: 0, |
|
301 | 309 | } |
|
302 | 310 | } |
|
303 | 311 | |
|
304 | 312 | /// Create from an opaque bunch of bytes |
|
305 | 313 | /// |
|
306 | 314 | /// The created [`NodeTreeBytes`] from `bytes`, |
|
307 | 315 | /// of which exactly `amount` bytes are used. |
|
308 | 316 | /// |
|
309 | 317 | /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects. |
|
310 | 318 | /// - `amount` is expressed in bytes, and is not automatically derived from |
|
311 | 319 | /// `bytes`, so that a caller that manages them atomically can perform |
|
312 | 320 | /// temporary disk serializations and still rollback easily if needed. |
|
313 | 321 | /// First use-case for this would be to support Mercurial shell hooks. |
|
314 | 322 | /// |
|
315 | 323 | /// panics if `buffer` is smaller than `amount` |
|
316 | 324 | pub fn load_bytes( |
|
317 | 325 | bytes: Box<dyn Deref<Target = [u8]> + Send>, |
|
318 | 326 | amount: usize, |
|
319 | 327 | ) -> Self { |
|
320 | 328 | NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount))) |
|
321 | 329 | } |
|
322 | 330 | |
|
323 | 331 | /// Retrieve added [`Block`]s and the original immutable data |
|
324 | 332 | pub fn into_readonly_and_added( |
|
325 | 333 | self, |
|
326 | 334 | ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) { |
|
327 | 335 | let mut vec = self.growable; |
|
328 | 336 | let readonly = self.readonly; |
|
329 | 337 | if readonly.last() != Some(&self.root) { |
|
330 | 338 | vec.push(self.root); |
|
331 | 339 | } |
|
332 | 340 | (readonly, vec) |
|
333 | 341 | } |
|
334 | 342 | |
|
335 | 343 | /// Retrieve added [`Block]s as bytes, ready to be written to persistent |
|
336 | 344 | /// storage |
|
337 | 345 | pub fn into_readonly_and_added_bytes( |
|
338 | 346 | self, |
|
339 | 347 | ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) { |
|
340 | 348 | let (readonly, vec) = self.into_readonly_and_added(); |
|
341 | 349 | // Prevent running `v`'s destructor so we are in complete control |
|
342 | 350 | // of the allocation. |
|
343 | 351 | let vec = mem::ManuallyDrop::new(vec); |
|
344 | 352 | |
|
345 | 353 | // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous |
|
346 | 354 | // bytes, so this is perfectly safe. |
|
347 | 355 | let bytes = unsafe { |
|
348 | 356 | // Check for compatible allocation layout. |
|
349 | 357 | // (Optimized away by constant-folding + dead code elimination.) |
|
350 | 358 | assert_eq!(size_of::<Block>(), 64); |
|
351 | 359 | assert_eq!(align_of::<Block>(), 1); |
|
352 | 360 | |
|
353 | 361 | // /!\ Any use of `vec` after this is use-after-free. |
|
354 | 362 | // TODO: use `into_raw_parts` once stabilized |
|
355 | 363 | Vec::from_raw_parts( |
|
356 | 364 | vec.as_ptr() as *mut u8, |
|
357 | 365 | vec.len() * size_of::<Block>(), |
|
358 | 366 | vec.capacity() * size_of::<Block>(), |
|
359 | 367 | ) |
|
360 | 368 | }; |
|
361 | 369 | (readonly, bytes) |
|
362 | 370 | } |
|
363 | 371 | |
|
364 | 372 | /// Total number of blocks |
|
365 | 373 | fn len(&self) -> usize { |
|
366 | 374 | self.readonly.len() + self.growable.len() + 1 |
|
367 | 375 | } |
|
368 | 376 | |
|
369 | 377 | /// Implemented for completeness |
|
370 | 378 | /// |
|
371 | 379 | /// A `NodeTree` always has at least the mutable root block. |
|
372 | 380 | #[allow(dead_code)] |
|
373 | 381 | fn is_empty(&self) -> bool { |
|
374 | 382 | false |
|
375 | 383 | } |
|
376 | 384 | |
|
377 | 385 | /// Main working method for `NodeTree` searches |
|
378 | 386 | /// |
|
379 | 387 | /// The first returned value is the result of analysing `NodeTree` data |
|
380 | 388 | /// *alone*: whereas `None` guarantees that the given prefix is absent |
|
381 | 389 | /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with |
|
382 | 390 | /// `Some(rev)`, it is to be understood that `rev` is the unique |
|
383 | 391 | /// [`Revision`] that could match the prefix. Actually, all that can |
|
384 | 392 | /// be inferred from |
|
385 | 393 | /// the `NodeTree` data is that `rev` is the revision with the longest |
|
386 | 394 | /// common node prefix with the given prefix. |
|
395 | /// We return an [`UncheckedRevision`] because we have no guarantee that | |
|
396 | /// the revision we found is valid for the index. | |
|
387 | 397 | /// |
|
388 | 398 | /// The second returned value is the size of the smallest subprefix |
|
389 | 399 | /// of `prefix` that would give the same result, i.e. not the |
|
390 | 400 | /// [MultipleResults](NodeMapError) error variant (again, using only the |
|
391 | 401 | /// data of the [`NodeTree`]). |
|
392 | 402 | fn lookup( |
|
393 | 403 | &self, |
|
394 | 404 | prefix: NodePrefix, |
|
395 | ) -> Result<(Option<Revision>, usize), NodeMapError> { | |
|
405 | ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> { | |
|
396 | 406 | for (i, visit_item) in self.visit(prefix).enumerate() { |
|
397 | 407 | if let Some(opt) = visit_item.final_revision() { |
|
398 | 408 | return Ok((opt, i + 1)); |
|
399 | 409 | } |
|
400 | 410 | } |
|
401 | 411 | Err(NodeMapError::MultipleResults) |
|
402 | 412 | } |
|
403 | 413 | |
|
404 | 414 | fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor { |
|
405 | 415 | NodeTreeVisitor { |
|
406 | 416 | nt: self, |
|
407 | 417 | prefix, |
|
408 | 418 | visit: self.len() - 1, |
|
409 | 419 | nybble_idx: 0, |
|
410 | 420 | done: false, |
|
411 | 421 | } |
|
412 | 422 | } |
|
413 | 423 | /// Return a mutable reference for `Block` at index `idx`. |
|
414 | 424 | /// |
|
415 | 425 | /// If `idx` lies in the immutable area, then the reference is to |
|
416 | 426 | /// a newly appended copy. |
|
417 | 427 | /// |
|
418 | 428 | /// Returns (new_idx, glen, mut_ref) where |
|
419 | 429 | /// |
|
420 | 430 | /// - `new_idx` is the index of the mutable `Block` |
|
421 | 431 | /// - `mut_ref` is a mutable reference to the mutable Block. |
|
422 | 432 | /// - `glen` is the new length of `self.growable` |
|
423 | 433 | /// |
|
424 | 434 | /// Note: the caller wouldn't be allowed to query `self.growable.len()` |
|
425 | 435 | /// itself because of the mutable borrow taken with the returned `Block` |
|
426 | 436 | fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) { |
|
427 | 437 | let ro_blocks = &self.readonly; |
|
428 | 438 | let ro_len = ro_blocks.len(); |
|
429 | 439 | let glen = self.growable.len(); |
|
430 | 440 | if idx < ro_len { |
|
431 | 441 | self.masked_inner_blocks += 1; |
|
432 | 442 | self.growable.push(ro_blocks[idx]); |
|
433 | 443 | (glen + ro_len, &mut self.growable[glen], glen + 1) |
|
434 | 444 | } else if glen + ro_len == idx { |
|
435 | 445 | (idx, &mut self.root, glen) |
|
436 | 446 | } else { |
|
437 | 447 | (idx, &mut self.growable[idx - ro_len], glen) |
|
438 | 448 | } |
|
439 | 449 | } |
|
440 | 450 | |
|
441 | 451 | /// Main insertion method |
|
442 | 452 | /// |
|
443 | 453 | /// This will dive in the node tree to find the deepest `Block` for |
|
444 | 454 | /// `node`, split it as much as needed and record `node` in there. |
|
445 | 455 | /// The method then backtracks, updating references in all the visited |
|
446 | 456 | /// blocks from the root. |
|
447 | 457 | /// |
|
448 | 458 | /// All the mutated `Block` are copied first to the growable part if |
|
449 | 459 | /// needed. That happens for those in the immutable part except the root. |
|
450 | 460 | pub fn insert<I: RevlogIndex>( |
|
451 | 461 | &mut self, |
|
452 | 462 | index: &I, |
|
453 | 463 | node: &Node, |
|
454 | 464 | rev: Revision, |
|
455 | 465 | ) -> Result<(), NodeMapError> { |
|
456 | 466 | let ro_len = &self.readonly.len(); |
|
457 | 467 | |
|
458 | 468 | let mut visit_steps: Vec<_> = self.visit(node.into()).collect(); |
|
459 | 469 | let read_nybbles = visit_steps.len(); |
|
460 | 470 | // visit_steps cannot be empty, since we always visit the root block |
|
461 | 471 | let deepest = visit_steps.pop().unwrap(); |
|
462 | 472 | |
|
463 | 473 | let (mut block_idx, mut block, mut glen) = |
|
464 | 474 | self.mutable_block(deepest.block_idx); |
|
465 | 475 | |
|
466 | 476 | if let Element::Rev(old_rev) = deepest.element { |
|
467 | let old_node = index | |
|
468 | .node(old_rev) | |
|
469 | .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?; | |
|
477 | let old_node = index.node(old_rev).ok_or_else(|| { | |
|
478 | NodeMapError::RevisionNotInIndex(old_rev.into()) | |
|
479 | })?; | |
|
470 | 480 | if old_node == node { |
|
471 | 481 | return Ok(()); // avoid creating lots of useless blocks |
|
472 | 482 | } |
|
473 | 483 | |
|
474 | 484 | // Looping over the tail of nybbles in both nodes, creating |
|
475 | 485 | // new blocks until we find the difference |
|
476 | 486 | let mut new_block_idx = ro_len + glen; |
|
477 | 487 | let mut nybble = deepest.nybble; |
|
478 | 488 | for nybble_pos in read_nybbles..node.nybbles_len() { |
|
479 | 489 | block.set(nybble, Element::Block(new_block_idx)); |
|
480 | 490 | |
|
481 | 491 | let new_nybble = node.get_nybble(nybble_pos); |
|
482 | 492 | let old_nybble = old_node.get_nybble(nybble_pos); |
|
483 | 493 | |
|
484 | 494 | if old_nybble == new_nybble { |
|
485 | 495 | self.growable.push(Block::new()); |
|
486 | 496 | block = &mut self.growable[glen]; |
|
487 | 497 | glen += 1; |
|
488 | 498 | new_block_idx += 1; |
|
489 | 499 | nybble = new_nybble; |
|
490 | 500 | } else { |
|
491 | 501 | let mut new_block = Block::new(); |
|
492 | 502 | new_block.set(old_nybble, Element::Rev(old_rev)); |
|
493 | 503 | new_block.set(new_nybble, Element::Rev(rev)); |
|
494 | 504 | self.growable.push(new_block); |
|
495 | 505 | break; |
|
496 | 506 | } |
|
497 | 507 | } |
|
498 | 508 | } else { |
|
499 | 509 | // Free slot in the deepest block: no splitting has to be done |
|
500 | 510 | block.set(deepest.nybble, Element::Rev(rev)); |
|
501 | 511 | } |
|
502 | 512 | |
|
503 | 513 | // Backtrack over visit steps to update references |
|
504 | 514 | while let Some(visited) = visit_steps.pop() { |
|
505 | 515 | let to_write = Element::Block(block_idx); |
|
506 | 516 | if visit_steps.is_empty() { |
|
507 | 517 | self.root.set(visited.nybble, to_write); |
|
508 | 518 | break; |
|
509 | 519 | } |
|
510 | 520 | let (new_idx, block, _) = self.mutable_block(visited.block_idx); |
|
511 | 521 | if block.get(visited.nybble) == to_write { |
|
512 | 522 | break; |
|
513 | 523 | } |
|
514 | 524 | block.set(visited.nybble, to_write); |
|
515 | 525 | block_idx = new_idx; |
|
516 | 526 | } |
|
517 | 527 | Ok(()) |
|
518 | 528 | } |
|
519 | 529 | |
|
520 | 530 | /// Make the whole `NodeTree` logically empty, without touching the |
|
521 | 531 | /// immutable part. |
|
522 | 532 | pub fn invalidate_all(&mut self) { |
|
523 | 533 | self.root = Block::new(); |
|
524 | 534 | self.growable = Vec::new(); |
|
525 | 535 | self.masked_inner_blocks = self.readonly.len(); |
|
526 | 536 | } |
|
527 | 537 | |
|
528 | 538 | /// Return the number of blocks in the readonly part that are currently |
|
529 | 539 | /// masked in the mutable part. |
|
530 | 540 | /// |
|
531 | 541 | /// The `NodeTree` structure has no efficient way to know how many blocks |
|
532 | 542 | /// are already unreachable in the readonly part. |
|
533 | 543 | /// |
|
534 | 544 | /// After a call to `invalidate_all()`, the returned number can be actually |
|
535 | 545 | /// bigger than the whole readonly part, a conventional way to mean that |
|
536 | 546 | /// all the readonly blocks have been masked. This is what is really |
|
537 | 547 | /// useful to the caller and does not require to know how many were |
|
538 | 548 | /// actually unreachable to begin with. |
|
539 | 549 | pub fn masked_readonly_blocks(&self) -> usize { |
|
540 | 550 | if let Some(readonly_root) = self.readonly.last() { |
|
541 | 551 | if readonly_root == &self.root { |
|
542 | 552 | return 0; |
|
543 | 553 | } |
|
544 | 554 | } else { |
|
545 | 555 | return 0; |
|
546 | 556 | } |
|
547 | 557 | self.masked_inner_blocks + 1 |
|
548 | 558 | } |
|
549 | 559 | } |
|
550 | 560 | |
|
551 | 561 | pub struct NodeTreeBytes { |
|
552 | 562 | buffer: Box<dyn Deref<Target = [u8]> + Send>, |
|
553 | 563 | len_in_blocks: usize, |
|
554 | 564 | } |
|
555 | 565 | |
|
556 | 566 | impl NodeTreeBytes { |
|
557 | 567 | fn new( |
|
558 | 568 | buffer: Box<dyn Deref<Target = [u8]> + Send>, |
|
559 | 569 | amount: usize, |
|
560 | 570 | ) -> Self { |
|
561 | 571 | assert!(buffer.len() >= amount); |
|
562 | 572 | let len_in_blocks = amount / size_of::<Block>(); |
|
563 | 573 | NodeTreeBytes { |
|
564 | 574 | buffer, |
|
565 | 575 | len_in_blocks, |
|
566 | 576 | } |
|
567 | 577 | } |
|
568 | 578 | } |
|
569 | 579 | |
|
570 | 580 | impl Deref for NodeTreeBytes { |
|
571 | 581 | type Target = [Block]; |
|
572 | 582 | |
|
573 | 583 | fn deref(&self) -> &[Block] { |
|
574 | 584 | Block::slice_from_bytes(&self.buffer, self.len_in_blocks) |
|
575 | 585 | // `NodeTreeBytes::new` already asserted that `self.buffer` is |
|
576 | 586 | // large enough. |
|
577 | 587 | .unwrap() |
|
578 | 588 | .0 |
|
579 | 589 | } |
|
580 | 590 | } |
|
581 | 591 | |
|
582 | 592 | struct NodeTreeVisitor<'n> { |
|
583 | 593 | nt: &'n NodeTree, |
|
584 | 594 | prefix: NodePrefix, |
|
585 | 595 | visit: usize, |
|
586 | 596 | nybble_idx: usize, |
|
587 | 597 | done: bool, |
|
588 | 598 | } |
|
589 | 599 | |
|
590 | 600 | #[derive(Debug, PartialEq, Clone)] |
|
591 | 601 | struct NodeTreeVisitItem { |
|
592 | 602 | block_idx: usize, |
|
593 | 603 | nybble: u8, |
|
594 | 604 | element: Element, |
|
595 | 605 | } |
|
596 | 606 | |
|
597 | 607 | impl<'n> Iterator for NodeTreeVisitor<'n> { |
|
598 | 608 | type Item = NodeTreeVisitItem; |
|
599 | 609 | |
|
600 | 610 | fn next(&mut self) -> Option<Self::Item> { |
|
601 | 611 | if self.done || self.nybble_idx >= self.prefix.nybbles_len() { |
|
602 | 612 | return None; |
|
603 | 613 | } |
|
604 | 614 | |
|
605 | 615 | let nybble = self.prefix.get_nybble(self.nybble_idx); |
|
606 | 616 | self.nybble_idx += 1; |
|
607 | 617 | |
|
608 | 618 | let visit = self.visit; |
|
609 | 619 | let element = self.nt[visit].get(nybble); |
|
610 | 620 | if let Element::Block(idx) = element { |
|
611 | 621 | self.visit = idx; |
|
612 | 622 | } else { |
|
613 | 623 | self.done = true; |
|
614 | 624 | } |
|
615 | 625 | |
|
616 | 626 | Some(NodeTreeVisitItem { |
|
617 | 627 | block_idx: visit, |
|
618 | 628 | nybble, |
|
619 | 629 | element, |
|
620 | 630 | }) |
|
621 | 631 | } |
|
622 | 632 | } |
|
623 | 633 | |
|
624 | 634 | impl NodeTreeVisitItem { |
|
625 | 635 | // Return `Some(opt)` if this item is final, with `opt` being the |
|
626 | // `Revision` that it may represent. | |
|
636 | // `UncheckedRevision` that it may represent. | |
|
627 | 637 | // |
|
628 | 638 | // If the item is not terminal, return `None` |
|
629 | fn final_revision(&self) -> Option<Option<Revision>> { | |
|
639 | fn final_revision(&self) -> Option<Option<UncheckedRevision>> { | |
|
630 | 640 | match self.element { |
|
631 | 641 | Element::Block(_) => None, |
|
632 | Element::Rev(r) => Some(Some(r)), | |
|
642 | Element::Rev(r) => Some(Some(r.into())), | |
|
633 | 643 | Element::None => Some(None), |
|
634 | 644 | } |
|
635 | 645 | } |
|
636 | 646 | } |
|
637 | 647 | |
|
638 | 648 | impl From<Vec<Block>> for NodeTree { |
|
639 | 649 | fn from(vec: Vec<Block>) -> Self { |
|
640 | 650 | Self::new(Box::new(vec)) |
|
641 | 651 | } |
|
642 | 652 | } |
|
643 | 653 | |
|
644 | 654 | impl fmt::Debug for NodeTree { |
|
645 | 655 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
|
646 | 656 | let readonly: &[Block] = &*self.readonly; |
|
647 | 657 | write!( |
|
648 | 658 | f, |
|
649 | 659 | "readonly: {:?}, growable: {:?}, root: {:?}", |
|
650 | 660 | readonly, self.growable, self.root |
|
651 | 661 | ) |
|
652 | 662 | } |
|
653 | 663 | } |
|
654 | 664 | |
|
655 | 665 | impl Default for NodeTree { |
|
656 | 666 | /// Create a fully mutable empty NodeTree |
|
657 | 667 | fn default() -> Self { |
|
658 | 668 | NodeTree::new(Box::new(Vec::new())) |
|
659 | 669 | } |
|
660 | 670 | } |
|
661 | 671 | |
|
662 | 672 | impl NodeMap for NodeTree { |
|
663 | 673 | fn find_bin<'a>( |
|
664 | 674 | &self, |
|
665 | 675 | idx: &impl RevlogIndex, |
|
666 | 676 | prefix: NodePrefix, |
|
667 | 677 | ) -> Result<Option<Revision>, NodeMapError> { |
|
668 | 678 | validate_candidate(idx, prefix, self.lookup(prefix)?) |
|
669 | 679 | .map(|(opt, _shortest)| opt) |
|
670 | 680 | } |
|
671 | 681 | |
|
672 | 682 | fn unique_prefix_len_bin<'a>( |
|
673 | 683 | &self, |
|
674 | 684 | idx: &impl RevlogIndex, |
|
675 | 685 | prefix: NodePrefix, |
|
676 | 686 | ) -> Result<Option<usize>, NodeMapError> { |
|
677 | 687 | validate_candidate(idx, prefix, self.lookup(prefix)?) |
|
678 | 688 | .map(|(opt, shortest)| opt.map(|_rev| shortest)) |
|
679 | 689 | } |
|
680 | 690 | } |
|
681 | 691 | |
|
682 | 692 | #[cfg(test)] |
|
683 | 693 | mod tests { |
|
684 | 694 | use super::NodeMapError::*; |
|
685 | 695 | use super::*; |
|
686 | 696 | use crate::revlog::node::{hex_pad_right, Node}; |
|
687 | 697 | use std::collections::HashMap; |
|
688 | 698 | |
|
689 | 699 | /// Creates a `Block` using a syntax close to the `Debug` output |
|
690 | 700 | macro_rules! block { |
|
691 | 701 | {$($nybble:tt : $variant:ident($val:tt)),*} => ( |
|
692 | 702 | { |
|
693 | 703 | let mut block = Block::new(); |
|
694 | 704 | $(block.set($nybble, Element::$variant($val)));*; |
|
695 | 705 | block |
|
696 | 706 | } |
|
697 | 707 | ) |
|
698 | 708 | } |
|
699 | 709 | |
|
700 | 710 | #[test] |
|
701 | 711 | fn test_block_debug() { |
|
702 | 712 | let mut block = Block::new(); |
|
703 | 713 | block.set(1, Element::Rev(3)); |
|
704 | 714 | block.set(10, Element::Block(0)); |
|
705 | 715 | assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}"); |
|
706 | 716 | } |
|
707 | 717 | |
|
708 | 718 | #[test] |
|
709 | 719 | fn test_block_macro() { |
|
710 | 720 | let block = block! {5: Block(2)}; |
|
711 | 721 | assert_eq!(format!("{:?}", block), "{5: Block(2)}"); |
|
712 | 722 | |
|
713 | 723 | let block = block! {13: Rev(15), 5: Block(2)}; |
|
714 | 724 | assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}"); |
|
715 | 725 | } |
|
716 | 726 | |
|
717 | 727 | #[test] |
|
718 | 728 | fn test_raw_block() { |
|
719 | 729 | let mut raw = [255u8; 64]; |
|
720 | 730 | |
|
721 | 731 | let mut counter = 0; |
|
722 | 732 | for val in [0_i32, 15, -2, -1, -3].iter() { |
|
723 | 733 | for byte in val.to_be_bytes().iter() { |
|
724 | 734 | raw[counter] = *byte; |
|
725 | 735 | counter += 1; |
|
726 | 736 | } |
|
727 | 737 | } |
|
728 | 738 | let (block, _) = Block::from_bytes(&raw).unwrap(); |
|
729 | 739 | assert_eq!(block.get(0), Element::Block(0)); |
|
730 | 740 | assert_eq!(block.get(1), Element::Block(15)); |
|
731 | 741 | assert_eq!(block.get(3), Element::None); |
|
732 | 742 | assert_eq!(block.get(2), Element::Rev(0)); |
|
733 | 743 | assert_eq!(block.get(4), Element::Rev(1)); |
|
734 | 744 | } |
|
735 | 745 | |
|
736 | type TestIndex = HashMap<Revision, Node>; | |
|
746 | type TestIndex = HashMap<UncheckedRevision, Node>; | |
|
737 | 747 | |
|
738 | 748 | impl RevlogIndex for TestIndex { |
|
739 | 749 | fn node(&self, rev: Revision) -> Option<&Node> { |
|
740 | self.get(&rev) | |
|
750 | self.get(&rev.into()) | |
|
741 | 751 | } |
|
742 | 752 | |
|
743 | 753 | fn len(&self) -> usize { |
|
744 | 754 | self.len() |
|
745 | 755 | } |
|
756 | ||
|
757 | fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { | |
|
758 | self.get(&rev).map(|_| rev.0) | |
|
759 | } | |
|
746 | 760 | } |
|
747 | 761 | |
|
748 | 762 | /// Pad hexadecimal Node prefix with zeros on the right |
|
749 | 763 | /// |
|
750 | 764 | /// This avoids having to repeatedly write very long hexadecimal |
|
751 | 765 | /// strings for test data, and brings actual hash size independency. |
|
752 | 766 | #[cfg(test)] |
|
753 | 767 | fn pad_node(hex: &str) -> Node { |
|
754 | 768 | Node::from_hex(&hex_pad_right(hex)).unwrap() |
|
755 | 769 | } |
|
756 | 770 | |
|
757 | 771 | /// Pad hexadecimal Node prefix with zeros on the right, then insert |
|
758 | 772 | fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) { |
|
759 | idx.insert(rev, pad_node(hex)); | |
|
773 | idx.insert(rev.into(), pad_node(hex)); | |
|
760 | 774 | } |
|
761 | 775 | |
|
762 | 776 | fn sample_nodetree() -> NodeTree { |
|
763 | 777 | NodeTree::from(vec![ |
|
764 | 778 | block![0: Rev(9)], |
|
765 | 779 | block![0: Rev(0), 1: Rev(9)], |
|
766 | 780 | block![0: Block(1), 1:Rev(1)], |
|
767 | 781 | ]) |
|
768 | 782 | } |
|
769 | 783 | |
|
770 | 784 | fn hex(s: &str) -> NodePrefix { |
|
771 | 785 | NodePrefix::from_hex(s).unwrap() |
|
772 | 786 | } |
|
773 | 787 | |
|
774 | 788 | #[test] |
|
775 | 789 | fn test_nt_debug() { |
|
776 | 790 | let nt = sample_nodetree(); |
|
777 | 791 | assert_eq!( |
|
778 | 792 | format!("{:?}", nt), |
|
779 | 793 | "readonly: \ |
|
780 | 794 | [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \ |
|
781 | 795 | growable: [], \ |
|
782 | 796 | root: {0: Block(1), 1: Rev(1)}", |
|
783 | 797 | ); |
|
784 | 798 | } |
|
785 | 799 | |
|
786 | 800 | #[test] |
|
787 | 801 | fn test_immutable_find_simplest() -> Result<(), NodeMapError> { |
|
788 | 802 | let mut idx: TestIndex = HashMap::new(); |
|
789 | 803 | pad_insert(&mut idx, 1, "1234deadcafe"); |
|
790 | 804 | |
|
791 | 805 | let nt = NodeTree::from(vec![block! {1: Rev(1)}]); |
|
792 | 806 | assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1)); |
|
793 | 807 | assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1)); |
|
794 | 808 | assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1)); |
|
795 | 809 | assert_eq!(nt.find_bin(&idx, hex("1a"))?, None); |
|
796 | 810 | assert_eq!(nt.find_bin(&idx, hex("ab"))?, None); |
|
797 | 811 | |
|
798 | 812 | // and with full binary Nodes |
|
799 | assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1)); | |
|
813 | assert_eq!(nt.find_node(&idx, idx.get(&1.into()).unwrap())?, Some(1)); | |
|
800 | 814 | let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap(); |
|
801 | 815 | assert_eq!(nt.find_node(&idx, &unknown)?, None); |
|
802 | 816 | Ok(()) |
|
803 | 817 | } |
|
804 | 818 | |
|
805 | 819 | #[test] |
|
806 | 820 | fn test_immutable_find_one_jump() { |
|
807 | 821 | let mut idx = TestIndex::new(); |
|
808 | 822 | pad_insert(&mut idx, 9, "012"); |
|
809 | 823 | pad_insert(&mut idx, 0, "00a"); |
|
810 | 824 | |
|
811 | 825 | let nt = sample_nodetree(); |
|
812 | 826 | |
|
813 | 827 | assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults)); |
|
814 | 828 | assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9))); |
|
815 | 829 | assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults)); |
|
816 | 830 | assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0))); |
|
817 | 831 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3))); |
|
818 | 832 | assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION))); |
|
819 | 833 | } |
|
820 | 834 | |
|
821 | 835 | #[test] |
|
822 | 836 | fn test_mutated_find() -> Result<(), NodeMapError> { |
|
823 | 837 | let mut idx = TestIndex::new(); |
|
824 | 838 | pad_insert(&mut idx, 9, "012"); |
|
825 | 839 | pad_insert(&mut idx, 0, "00a"); |
|
826 | 840 | pad_insert(&mut idx, 2, "cafe"); |
|
827 | 841 | pad_insert(&mut idx, 3, "15"); |
|
828 | 842 | pad_insert(&mut idx, 1, "10"); |
|
829 | 843 | |
|
830 | 844 | let nt = NodeTree { |
|
831 | 845 | readonly: sample_nodetree().readonly, |
|
832 | 846 | growable: vec![block![0: Rev(1), 5: Rev(3)]], |
|
833 | 847 | root: block![0: Block(1), 1:Block(3), 12: Rev(2)], |
|
834 | 848 | masked_inner_blocks: 1, |
|
835 | 849 | }; |
|
836 | 850 | assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1)); |
|
837 | 851 | assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2)); |
|
838 | 852 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1)); |
|
839 | 853 | assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults)); |
|
840 | 854 | assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION)); |
|
841 | 855 | assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3)); |
|
842 | 856 | assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9)); |
|
843 | 857 | assert_eq!(nt.masked_readonly_blocks(), 2); |
|
844 | 858 | Ok(()) |
|
845 | 859 | } |
|
846 | 860 | |
|
847 | 861 | struct TestNtIndex { |
|
848 | 862 | index: TestIndex, |
|
849 | 863 | nt: NodeTree, |
|
850 | 864 | } |
|
851 | 865 | |
|
852 | 866 | impl TestNtIndex { |
|
853 | 867 | fn new() -> Self { |
|
854 | 868 | TestNtIndex { |
|
855 | 869 | index: HashMap::new(), |
|
856 | 870 | nt: NodeTree::default(), |
|
857 | 871 | } |
|
858 | 872 | } |
|
859 | 873 | |
|
860 | fn insert( | |
|
861 | &mut self, | |
|
862 | rev: Revision, | |
|
863 | hex: &str, | |
|
864 | ) -> Result<(), NodeMapError> { | |
|
874 | fn insert(&mut self, rev: i32, hex: &str) -> Result<(), NodeMapError> { | |
|
865 | 875 | let node = pad_node(hex); |
|
876 | let rev: UncheckedRevision = rev.into(); | |
|
866 | 877 | self.index.insert(rev, node); |
|
867 |
self.nt.insert( |
|
|
878 | self.nt.insert( | |
|
879 | &self.index, | |
|
880 | &node, | |
|
881 | self.index.check_revision(rev).unwrap(), | |
|
882 | )?; | |
|
868 | 883 | Ok(()) |
|
869 | 884 | } |
|
870 | 885 | |
|
871 | 886 | fn find_hex( |
|
872 | 887 | &self, |
|
873 | 888 | prefix: &str, |
|
874 | 889 | ) -> Result<Option<Revision>, NodeMapError> { |
|
875 | 890 | self.nt.find_bin(&self.index, hex(prefix)) |
|
876 | 891 | } |
|
877 | 892 | |
|
878 | 893 | fn unique_prefix_len_hex( |
|
879 | 894 | &self, |
|
880 | 895 | prefix: &str, |
|
881 | 896 | ) -> Result<Option<usize>, NodeMapError> { |
|
882 | 897 | self.nt.unique_prefix_len_bin(&self.index, hex(prefix)) |
|
883 | 898 | } |
|
884 | 899 | |
|
885 | 900 | /// Drain `added` and restart a new one |
|
886 | 901 | fn commit(self) -> Self { |
|
887 | 902 | let mut as_vec: Vec<Block> = |
|
888 | 903 | self.nt.readonly.iter().copied().collect(); |
|
889 | 904 | as_vec.extend(self.nt.growable); |
|
890 | 905 | as_vec.push(self.nt.root); |
|
891 | 906 | |
|
892 | 907 | Self { |
|
893 | 908 | index: self.index, |
|
894 | 909 | nt: NodeTree::from(as_vec), |
|
895 | 910 | } |
|
896 | 911 | } |
|
897 | 912 | } |
|
898 | 913 | |
|
899 | 914 | #[test] |
|
900 | 915 | fn test_insert_full_mutable() -> Result<(), NodeMapError> { |
|
901 | 916 | let mut idx = TestNtIndex::new(); |
|
902 | 917 | idx.insert(0, "1234")?; |
|
903 | 918 | assert_eq!(idx.find_hex("1")?, Some(0)); |
|
904 | 919 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
905 | 920 | |
|
906 | 921 | // let's trigger a simple split |
|
907 | 922 | idx.insert(1, "1a34")?; |
|
908 | 923 | assert_eq!(idx.nt.growable.len(), 1); |
|
909 | 924 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
910 | 925 | assert_eq!(idx.find_hex("1a")?, Some(1)); |
|
911 | 926 | |
|
912 | 927 | // reinserting is a no_op |
|
913 | 928 | idx.insert(1, "1a34")?; |
|
914 | 929 | assert_eq!(idx.nt.growable.len(), 1); |
|
915 | 930 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
916 | 931 | assert_eq!(idx.find_hex("1a")?, Some(1)); |
|
917 | 932 | |
|
918 | 933 | idx.insert(2, "1a01")?; |
|
919 | 934 | assert_eq!(idx.nt.growable.len(), 2); |
|
920 | 935 | assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults)); |
|
921 | 936 | assert_eq!(idx.find_hex("12")?, Some(0)); |
|
922 | 937 | assert_eq!(idx.find_hex("1a3")?, Some(1)); |
|
923 | 938 | assert_eq!(idx.find_hex("1a0")?, Some(2)); |
|
924 | 939 | assert_eq!(idx.find_hex("1a12")?, None); |
|
925 | 940 | |
|
926 | 941 | // now let's make it split and create more than one additional block |
|
927 | 942 | idx.insert(3, "1a345")?; |
|
928 | 943 | assert_eq!(idx.nt.growable.len(), 4); |
|
929 | 944 | assert_eq!(idx.find_hex("1a340")?, Some(1)); |
|
930 | 945 | assert_eq!(idx.find_hex("1a345")?, Some(3)); |
|
931 | 946 | assert_eq!(idx.find_hex("1a341")?, None); |
|
932 | 947 | |
|
933 | 948 | // there's no readonly block to mask |
|
934 | 949 | assert_eq!(idx.nt.masked_readonly_blocks(), 0); |
|
935 | 950 | Ok(()) |
|
936 | 951 | } |
|
937 | 952 | |
|
938 | 953 | #[test] |
|
939 | 954 | fn test_unique_prefix_len_zero_prefix() { |
|
940 | 955 | let mut idx = TestNtIndex::new(); |
|
941 | 956 | idx.insert(0, "00000abcd").unwrap(); |
|
942 | 957 | |
|
943 | 958 | assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults)); |
|
944 | 959 | // in the nodetree proper, this will be found at the first nybble |
|
945 | 960 | // yet the correct answer for unique_prefix_len is not 1, nor 1+1, |
|
946 | 961 | // but the first difference with `NULL_NODE` |
|
947 | 962 | assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); |
|
948 | 963 | assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); |
|
949 | 964 | |
|
950 | 965 | // same with odd result |
|
951 | 966 | idx.insert(1, "00123").unwrap(); |
|
952 | 967 | assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3))); |
|
953 | 968 | assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3))); |
|
954 | 969 | |
|
955 | 970 | // these are unchanged of course |
|
956 | 971 | assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); |
|
957 | 972 | assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); |
|
958 | 973 | } |
|
959 | 974 | |
|
960 | 975 | #[test] |
|
961 | 976 | fn test_insert_extreme_splitting() -> Result<(), NodeMapError> { |
|
962 | 977 | // check that the splitting loop is long enough |
|
963 | 978 | let mut nt_idx = TestNtIndex::new(); |
|
964 | 979 | let nt = &mut nt_idx.nt; |
|
965 | 980 | let idx = &mut nt_idx.index; |
|
966 | 981 | |
|
967 | 982 | let node0_hex = hex_pad_right("444444"); |
|
968 | 983 | let mut node1_hex = hex_pad_right("444444"); |
|
969 | 984 | node1_hex.pop(); |
|
970 | 985 | node1_hex.push('5'); |
|
971 | 986 | let node0 = Node::from_hex(&node0_hex).unwrap(); |
|
972 | 987 | let node1 = Node::from_hex(&node1_hex).unwrap(); |
|
973 | 988 | |
|
974 | idx.insert(0, node0); | |
|
989 | idx.insert(0.into(), node0); | |
|
975 | 990 | nt.insert(idx, &node0, 0)?; |
|
976 | idx.insert(1, node1); | |
|
991 | idx.insert(1.into(), node1); | |
|
977 | 992 | nt.insert(idx, &node1, 1)?; |
|
978 | 993 | |
|
979 | 994 | assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0)); |
|
980 | 995 | assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1)); |
|
981 | 996 | Ok(()) |
|
982 | 997 | } |
|
983 | 998 | |
|
984 | 999 | #[test] |
|
985 | 1000 | fn test_insert_partly_immutable() -> Result<(), NodeMapError> { |
|
986 | 1001 | let mut idx = TestNtIndex::new(); |
|
987 | 1002 | idx.insert(0, "1234")?; |
|
988 | 1003 | idx.insert(1, "1235")?; |
|
989 | 1004 | idx.insert(2, "131")?; |
|
990 | 1005 | idx.insert(3, "cafe")?; |
|
991 | 1006 | let mut idx = idx.commit(); |
|
992 | 1007 | assert_eq!(idx.find_hex("1234")?, Some(0)); |
|
993 | 1008 | assert_eq!(idx.find_hex("1235")?, Some(1)); |
|
994 | 1009 | assert_eq!(idx.find_hex("131")?, Some(2)); |
|
995 | 1010 | assert_eq!(idx.find_hex("cafe")?, Some(3)); |
|
996 | 1011 | // we did not add anything since init from readonly |
|
997 | 1012 | assert_eq!(idx.nt.masked_readonly_blocks(), 0); |
|
998 | 1013 | |
|
999 | 1014 | idx.insert(4, "123A")?; |
|
1000 | 1015 | assert_eq!(idx.find_hex("1234")?, Some(0)); |
|
1001 | 1016 | assert_eq!(idx.find_hex("1235")?, Some(1)); |
|
1002 | 1017 | assert_eq!(idx.find_hex("131")?, Some(2)); |
|
1003 | 1018 | assert_eq!(idx.find_hex("cafe")?, Some(3)); |
|
1004 | 1019 | assert_eq!(idx.find_hex("123A")?, Some(4)); |
|
1005 | 1020 | // we masked blocks for all prefixes of "123", including the root |
|
1006 | 1021 | assert_eq!(idx.nt.masked_readonly_blocks(), 4); |
|
1007 | 1022 | |
|
1008 | 1023 | eprintln!("{:?}", idx.nt); |
|
1009 | 1024 | idx.insert(5, "c0")?; |
|
1010 | 1025 | assert_eq!(idx.find_hex("cafe")?, Some(3)); |
|
1011 | 1026 | assert_eq!(idx.find_hex("c0")?, Some(5)); |
|
1012 | 1027 | assert_eq!(idx.find_hex("c1")?, None); |
|
1013 | 1028 | assert_eq!(idx.find_hex("1234")?, Some(0)); |
|
1014 | 1029 | // inserting "c0" is just splitting the 'c' slot of the mutable root, |
|
1015 | 1030 | // it doesn't mask anything |
|
1016 | 1031 | assert_eq!(idx.nt.masked_readonly_blocks(), 4); |
|
1017 | 1032 | |
|
1018 | 1033 | Ok(()) |
|
1019 | 1034 | } |
|
1020 | 1035 | |
|
1021 | 1036 | #[test] |
|
1022 | 1037 | fn test_invalidate_all() -> Result<(), NodeMapError> { |
|
1023 | 1038 | let mut idx = TestNtIndex::new(); |
|
1024 | 1039 | idx.insert(0, "1234")?; |
|
1025 | 1040 | idx.insert(1, "1235")?; |
|
1026 | 1041 | idx.insert(2, "131")?; |
|
1027 | 1042 | idx.insert(3, "cafe")?; |
|
1028 | 1043 | let mut idx = idx.commit(); |
|
1029 | 1044 | |
|
1030 | 1045 | idx.nt.invalidate_all(); |
|
1031 | 1046 | |
|
1032 | 1047 | assert_eq!(idx.find_hex("1234")?, None); |
|
1033 | 1048 | assert_eq!(idx.find_hex("1235")?, None); |
|
1034 | 1049 | assert_eq!(idx.find_hex("131")?, None); |
|
1035 | 1050 | assert_eq!(idx.find_hex("cafe")?, None); |
|
1036 | 1051 | // all the readonly blocks have been masked, this is the |
|
1037 | 1052 | // conventional expected response |
|
1038 | 1053 | assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1); |
|
1039 | 1054 | Ok(()) |
|
1040 | 1055 | } |
|
1041 | 1056 | |
|
1042 | 1057 | #[test] |
|
1043 | 1058 | fn test_into_added_empty() { |
|
1044 | 1059 | assert!(sample_nodetree().into_readonly_and_added().1.is_empty()); |
|
1045 | 1060 | assert!(sample_nodetree() |
|
1046 | 1061 | .into_readonly_and_added_bytes() |
|
1047 | 1062 | .1 |
|
1048 | 1063 | .is_empty()); |
|
1049 | 1064 | } |
|
1050 | 1065 | |
|
1051 | 1066 | #[test] |
|
1052 | 1067 | fn test_into_added_bytes() -> Result<(), NodeMapError> { |
|
1053 | 1068 | let mut idx = TestNtIndex::new(); |
|
1054 | 1069 | idx.insert(0, "1234")?; |
|
1055 | 1070 | let mut idx = idx.commit(); |
|
1056 | 1071 | idx.insert(4, "cafe")?; |
|
1057 | 1072 | let (_, bytes) = idx.nt.into_readonly_and_added_bytes(); |
|
1058 | 1073 | |
|
1059 | 1074 | // only the root block has been changed |
|
1060 | 1075 | assert_eq!(bytes.len(), size_of::<Block>()); |
|
1061 | 1076 | // big endian for -2 |
|
1062 | 1077 | assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]); |
|
1063 | 1078 | // big endian for -6 |
|
1064 | 1079 | assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]); |
|
1065 | 1080 | Ok(()) |
|
1066 | 1081 | } |
|
1067 | 1082 | } |
@@ -1,69 +1,69 b'' | |||
|
1 | 1 | //! The revset query language |
|
2 | 2 | //! |
|
3 | 3 | //! <https://www.mercurial-scm.org/repo/hg/help/revsets> |
|
4 | 4 | |
|
5 | 5 | use crate::errors::HgError; |
|
6 | 6 | use crate::repo::Repo; |
|
7 | 7 | use crate::revlog::NodePrefix; |
|
8 | 8 | use crate::revlog::{Revision, NULL_REVISION, WORKING_DIRECTORY_HEX}; |
|
9 | 9 | use crate::revlog::{Revlog, RevlogError}; |
|
10 | 10 | use crate::Node; |
|
11 | 11 | |
|
12 | 12 | /// Resolve a query string into a single revision. |
|
13 | 13 | /// |
|
14 | 14 | /// Only some of the revset language is implemented yet. |
|
15 | 15 | pub fn resolve_single( |
|
16 | 16 | input: &str, |
|
17 | 17 | repo: &Repo, |
|
18 | 18 | ) -> Result<Revision, RevlogError> { |
|
19 | 19 | let changelog = repo.changelog()?; |
|
20 | 20 | |
|
21 | 21 | match input { |
|
22 | 22 | "." => { |
|
23 | 23 | let p1 = repo.dirstate_parents()?.p1; |
|
24 | 24 | return changelog.revlog.rev_from_node(p1.into()); |
|
25 | 25 | } |
|
26 | 26 | "null" => return Ok(NULL_REVISION), |
|
27 | 27 | _ => {} |
|
28 | 28 | } |
|
29 | 29 | |
|
30 | 30 | match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) { |
|
31 | 31 | Err(RevlogError::InvalidRevision) => { |
|
32 | 32 | // TODO: support for the rest of the language here. |
|
33 | 33 | let msg = format!("cannot parse revset '{}'", input); |
|
34 | 34 | Err(HgError::unsupported(msg).into()) |
|
35 | 35 | } |
|
36 | 36 | result => result, |
|
37 | 37 | } |
|
38 | 38 | } |
|
39 | 39 | |
|
40 | 40 | /// Resolve the small subset of the language suitable for revlogs other than |
|
41 | 41 | /// the changelog, such as in `hg debugdata --manifest` CLI argument. |
|
42 | 42 | /// |
|
43 | 43 | /// * A non-negative decimal integer for a revision number, or |
|
44 | 44 | /// * An hexadecimal string, for the unique node ID that starts with this |
|
45 | 45 | /// prefix |
|
46 | 46 | pub fn resolve_rev_number_or_hex_prefix( |
|
47 | 47 | input: &str, |
|
48 | 48 | revlog: &Revlog, |
|
49 | 49 | ) -> Result<Revision, RevlogError> { |
|
50 | 50 | // The Python equivalent of this is part of `revsymbol` in |
|
51 | 51 | // `mercurial/scmutil.py` |
|
52 | 52 | |
|
53 | 53 | if let Ok(integer) = input.parse::<i32>() { |
|
54 | 54 | if integer.to_string() == input |
|
55 | 55 | && integer >= 0 |
|
56 | && revlog.has_rev(integer) | |
|
56 | && revlog.has_rev(integer.into()) | |
|
57 | 57 | { |
|
58 | 58 | return Ok(integer); |
|
59 | 59 | } |
|
60 | 60 | } |
|
61 | 61 | if let Ok(prefix) = NodePrefix::from_hex(input) { |
|
62 | 62 | if prefix.is_prefix_of(&Node::from_hex(WORKING_DIRECTORY_HEX).unwrap()) |
|
63 | 63 | { |
|
64 | 64 | return Err(RevlogError::WDirUnsupported); |
|
65 | 65 | } |
|
66 | 66 | return revlog.rev_from_node(prefix); |
|
67 | 67 | } |
|
68 | 68 | Err(RevlogError::InvalidRevision) |
|
69 | 69 | } |
@@ -1,515 +1,518 b'' | |||
|
1 | 1 | // revlog.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | use crate::{ |
|
9 | 9 | cindex, |
|
10 | 10 | utils::{node_from_py_bytes, node_from_py_object}, |
|
11 | 11 | }; |
|
12 | 12 | use cpython::{ |
|
13 | 13 | buffer::{Element, PyBuffer}, |
|
14 | 14 | exc::{IndexError, ValueError}, |
|
15 | 15 | ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule, |
|
16 | 16 | PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, |
|
17 | 17 | }; |
|
18 | 18 | use hg::{ |
|
19 | 19 | nodemap::{Block, NodeMapError, NodeTree}, |
|
20 | 20 | revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, |
|
21 | Revision, | |
|
21 | Revision, UncheckedRevision, | |
|
22 | 22 | }; |
|
23 | 23 | use std::cell::RefCell; |
|
24 | 24 | |
|
25 | 25 | /// Return a Struct implementing the Graph trait |
|
26 | 26 | pub(crate) fn pyindex_to_graph( |
|
27 | 27 | py: Python, |
|
28 | 28 | index: PyObject, |
|
29 | 29 | ) -> PyResult<cindex::Index> { |
|
30 | 30 | match index.extract::<MixedIndex>(py) { |
|
31 | 31 | Ok(midx) => Ok(midx.clone_cindex(py)), |
|
32 | 32 | Err(_) => cindex::Index::new(py, index), |
|
33 | 33 | } |
|
34 | 34 | } |
|
35 | 35 | |
|
36 | 36 | py_class!(pub class MixedIndex |py| { |
|
37 | 37 | data cindex: RefCell<cindex::Index>; |
|
38 | 38 | data nt: RefCell<Option<NodeTree>>; |
|
39 | 39 | data docket: RefCell<Option<PyObject>>; |
|
40 | 40 | // Holds a reference to the mmap'ed persistent nodemap data |
|
41 | 41 | data mmap: RefCell<Option<PyBuffer>>; |
|
42 | 42 | |
|
43 | 43 | def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> { |
|
44 | 44 | Self::new(py, cindex) |
|
45 | 45 | } |
|
46 | 46 | |
|
47 | 47 | /// Compatibility layer used for Python consumers needing access to the C index |
|
48 | 48 | /// |
|
49 | 49 | /// Only use case so far is `scmutil.shortesthexnodeidprefix`, |
|
50 | 50 | /// that may need to build a custom `nodetree`, based on a specified revset. |
|
51 | 51 | /// With a Rust implementation of the nodemap, we will be able to get rid of |
|
52 | 52 | /// this, by exposing our own standalone nodemap class, |
|
53 | 53 | /// ready to accept `MixedIndex`. |
|
54 | 54 | def get_cindex(&self) -> PyResult<PyObject> { |
|
55 | 55 | Ok(self.cindex(py).borrow().inner().clone_ref(py)) |
|
56 | 56 | } |
|
57 | 57 | |
|
58 | 58 | // Index API involving nodemap, as defined in mercurial/pure/parsers.py |
|
59 | 59 | |
|
60 | 60 | /// Return Revision if found, raises a bare `error.RevlogError` |
|
61 | 61 | /// in case of ambiguity, same as C version does |
|
62 | 62 | def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> { |
|
63 | 63 | let opt = self.get_nodetree(py)?.borrow(); |
|
64 | 64 | let nt = opt.as_ref().unwrap(); |
|
65 | 65 | let idx = &*self.cindex(py).borrow(); |
|
66 | 66 | let node = node_from_py_bytes(py, &node)?; |
|
67 | 67 | nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e)) |
|
68 | 68 | } |
|
69 | 69 | |
|
70 | 70 | /// same as `get_rev()` but raises a bare `error.RevlogError` if node |
|
71 | 71 | /// is not found. |
|
72 | 72 | /// |
|
73 | 73 | /// No need to repeat `node` in the exception, `mercurial/revlog.py` |
|
74 | 74 | /// will catch and rewrap with it |
|
75 | 75 | def rev(&self, node: PyBytes) -> PyResult<Revision> { |
|
76 | 76 | self.get_rev(py, node)?.ok_or_else(|| revlog_error(py)) |
|
77 | 77 | } |
|
78 | 78 | |
|
79 | 79 | /// return True if the node exist in the index |
|
80 | 80 | def has_node(&self, node: PyBytes) -> PyResult<bool> { |
|
81 | 81 | self.get_rev(py, node).map(|opt| opt.is_some()) |
|
82 | 82 | } |
|
83 | 83 | |
|
84 | 84 | /// find length of shortest hex nodeid of a binary ID |
|
85 | 85 | def shortest(&self, node: PyBytes) -> PyResult<usize> { |
|
86 | 86 | let opt = self.get_nodetree(py)?.borrow(); |
|
87 | 87 | let nt = opt.as_ref().unwrap(); |
|
88 | 88 | let idx = &*self.cindex(py).borrow(); |
|
89 | 89 | match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) |
|
90 | 90 | { |
|
91 | 91 | Ok(Some(l)) => Ok(l), |
|
92 | 92 | Ok(None) => Err(revlog_error(py)), |
|
93 | 93 | Err(e) => Err(nodemap_error(py, e)), |
|
94 | 94 | } |
|
95 | 95 | } |
|
96 | 96 | |
|
97 | 97 | def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> { |
|
98 | 98 | let opt = self.get_nodetree(py)?.borrow(); |
|
99 | 99 | let nt = opt.as_ref().unwrap(); |
|
100 | 100 | let idx = &*self.cindex(py).borrow(); |
|
101 | 101 | |
|
102 | 102 | let node_as_string = if cfg!(feature = "python3-sys") { |
|
103 | 103 | node.cast_as::<PyString>(py)?.to_string(py)?.to_string() |
|
104 | 104 | } |
|
105 | 105 | else { |
|
106 | 106 | let node = node.extract::<PyBytes>(py)?; |
|
107 | 107 | String::from_utf8_lossy(node.data(py)).to_string() |
|
108 | 108 | }; |
|
109 | 109 | |
|
110 | 110 | let prefix = NodePrefix::from_hex(&node_as_string) |
|
111 | 111 | .map_err(|_| PyErr::new::<ValueError, _>( |
|
112 | 112 | py, format!("Invalid node or prefix '{}'", node_as_string)) |
|
113 | 113 | )?; |
|
114 | 114 | |
|
115 | 115 | nt.find_bin(idx, prefix) |
|
116 | 116 | // TODO make an inner API returning the node directly |
|
117 | 117 | .map(|opt| opt.map( |
|
118 | 118 | |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))) |
|
119 | 119 | .map_err(|e| nodemap_error(py, e)) |
|
120 | 120 | |
|
121 | 121 | } |
|
122 | 122 | |
|
123 | 123 | /// append an index entry |
|
124 | 124 | def append(&self, tup: PyTuple) -> PyResult<PyObject> { |
|
125 | 125 | if tup.len(py) < 8 { |
|
126 | 126 | // this is better than the panic promised by tup.get_item() |
|
127 | 127 | return Err( |
|
128 | 128 | PyErr::new::<IndexError, _>(py, "tuple index out of range")) |
|
129 | 129 | } |
|
130 | 130 | let node_bytes = tup.get_item(py, 7).extract(py)?; |
|
131 | 131 | let node = node_from_py_object(py, &node_bytes)?; |
|
132 | 132 | |
|
133 | 133 | let mut idx = self.cindex(py).borrow_mut(); |
|
134 | 134 | let rev = idx.len() as Revision; |
|
135 | 135 | |
|
136 | 136 | idx.append(py, tup)?; |
|
137 | 137 | self.get_nodetree(py)?.borrow_mut().as_mut().unwrap() |
|
138 | 138 | .insert(&*idx, &node, rev) |
|
139 | 139 | .map_err(|e| nodemap_error(py, e))?; |
|
140 | 140 | Ok(py.None()) |
|
141 | 141 | } |
|
142 | 142 | |
|
143 | 143 | def __delitem__(&self, key: PyObject) -> PyResult<()> { |
|
144 | 144 | // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]` |
|
145 | 145 | self.cindex(py).borrow().inner().del_item(py, key)?; |
|
146 | 146 | let mut opt = self.get_nodetree(py)?.borrow_mut(); |
|
147 | 147 | let nt = opt.as_mut().unwrap(); |
|
148 | 148 | nt.invalidate_all(); |
|
149 | 149 | self.fill_nodemap(py, nt)?; |
|
150 | 150 | Ok(()) |
|
151 | 151 | } |
|
152 | 152 | |
|
153 | 153 | // |
|
154 | 154 | // Reforwarded C index API |
|
155 | 155 | // |
|
156 | 156 | |
|
157 | 157 | // index_methods (tp_methods). Same ordering as in revlog.c |
|
158 | 158 | |
|
159 | 159 | /// return the gca set of the given revs |
|
160 | 160 | def ancestors(&self, *args, **kw) -> PyResult<PyObject> { |
|
161 | 161 | self.call_cindex(py, "ancestors", args, kw) |
|
162 | 162 | } |
|
163 | 163 | |
|
164 | 164 | /// return the heads of the common ancestors of the given revs |
|
165 | 165 | def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> { |
|
166 | 166 | self.call_cindex(py, "commonancestorsheads", args, kw) |
|
167 | 167 | } |
|
168 | 168 | |
|
169 | 169 | /// Clear the index caches and inner py_class data. |
|
170 | 170 | /// It is Python's responsibility to call `update_nodemap_data` again. |
|
171 | 171 | def clearcaches(&self, *args, **kw) -> PyResult<PyObject> { |
|
172 | 172 | self.nt(py).borrow_mut().take(); |
|
173 | 173 | self.docket(py).borrow_mut().take(); |
|
174 | 174 | self.mmap(py).borrow_mut().take(); |
|
175 | 175 | self.call_cindex(py, "clearcaches", args, kw) |
|
176 | 176 | } |
|
177 | 177 | |
|
178 | 178 | /// return the raw binary string representing a revision |
|
179 | 179 | def entry_binary(&self, *args, **kw) -> PyResult<PyObject> { |
|
180 | 180 | self.call_cindex(py, "entry_binary", args, kw) |
|
181 | 181 | } |
|
182 | 182 | |
|
183 | 183 | /// return a binary packed version of the header |
|
184 | 184 | def pack_header(&self, *args, **kw) -> PyResult<PyObject> { |
|
185 | 185 | self.call_cindex(py, "pack_header", args, kw) |
|
186 | 186 | } |
|
187 | 187 | |
|
188 | 188 | /// get an index entry |
|
189 | 189 | def get(&self, *args, **kw) -> PyResult<PyObject> { |
|
190 | 190 | self.call_cindex(py, "get", args, kw) |
|
191 | 191 | } |
|
192 | 192 | |
|
193 | 193 | /// compute phases |
|
194 | 194 | def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> { |
|
195 | 195 | self.call_cindex(py, "computephasesmapsets", args, kw) |
|
196 | 196 | } |
|
197 | 197 | |
|
198 | 198 | /// reachableroots |
|
199 | 199 | def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> { |
|
200 | 200 | self.call_cindex(py, "reachableroots2", args, kw) |
|
201 | 201 | } |
|
202 | 202 | |
|
203 | 203 | /// get head revisions |
|
204 | 204 | def headrevs(&self, *args, **kw) -> PyResult<PyObject> { |
|
205 | 205 | self.call_cindex(py, "headrevs", args, kw) |
|
206 | 206 | } |
|
207 | 207 | |
|
208 | 208 | /// get filtered head revisions |
|
209 | 209 | def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> { |
|
210 | 210 | self.call_cindex(py, "headrevsfiltered", args, kw) |
|
211 | 211 | } |
|
212 | 212 | |
|
213 | 213 | /// True if the object is a snapshot |
|
214 | 214 | def issnapshot(&self, *args, **kw) -> PyResult<PyObject> { |
|
215 | 215 | self.call_cindex(py, "issnapshot", args, kw) |
|
216 | 216 | } |
|
217 | 217 | |
|
218 | 218 | /// Gather snapshot data in a cache dict |
|
219 | 219 | def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> { |
|
220 | 220 | self.call_cindex(py, "findsnapshots", args, kw) |
|
221 | 221 | } |
|
222 | 222 | |
|
223 | 223 | /// determine revisions with deltas to reconstruct fulltext |
|
224 | 224 | def deltachain(&self, *args, **kw) -> PyResult<PyObject> { |
|
225 | 225 | self.call_cindex(py, "deltachain", args, kw) |
|
226 | 226 | } |
|
227 | 227 | |
|
228 | 228 | /// slice planned chunk read to reach a density threshold |
|
229 | 229 | def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> { |
|
230 | 230 | self.call_cindex(py, "slicechunktodensity", args, kw) |
|
231 | 231 | } |
|
232 | 232 | |
|
233 | 233 | /// stats for the index |
|
234 | 234 | def stats(&self, *args, **kw) -> PyResult<PyObject> { |
|
235 | 235 | self.call_cindex(py, "stats", args, kw) |
|
236 | 236 | } |
|
237 | 237 | |
|
238 | 238 | // index_sequence_methods and index_mapping_methods. |
|
239 | 239 | // |
|
240 | 240 | // Since we call back through the high level Python API, |
|
241 | 241 | // there's no point making a distinction between index_get |
|
242 | 242 | // and index_getitem. |
|
243 | 243 | |
|
244 | 244 | def __len__(&self) -> PyResult<usize> { |
|
245 | 245 | self.cindex(py).borrow().inner().len(py) |
|
246 | 246 | } |
|
247 | 247 | |
|
248 | 248 | def __getitem__(&self, key: PyObject) -> PyResult<PyObject> { |
|
249 | 249 | // this conversion seems needless, but that's actually because |
|
250 | 250 | // `index_getitem` does not handle conversion from PyLong, |
|
251 | 251 | // which expressions such as [e for e in index] internally use. |
|
252 | 252 | // Note that we don't seem to have a direct way to call |
|
253 | 253 | // PySequence_GetItem (does the job), which would possibly be better |
|
254 | 254 | // for performance |
|
255 |
let key = match key.extract::< |
|
|
255 | let key = match key.extract::<i32>(py) { | |
|
256 | 256 | Ok(rev) => rev.to_py_object(py).into_object(), |
|
257 | 257 | Err(_) => key, |
|
258 | 258 | }; |
|
259 | 259 | self.cindex(py).borrow().inner().get_item(py, key) |
|
260 | 260 | } |
|
261 | 261 | |
|
262 | 262 | def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> { |
|
263 | 263 | self.cindex(py).borrow().inner().set_item(py, key, value) |
|
264 | 264 | } |
|
265 | 265 | |
|
266 | 266 | def __contains__(&self, item: PyObject) -> PyResult<bool> { |
|
267 | 267 | // ObjectProtocol does not seem to provide contains(), so |
|
268 | 268 | // this is an equivalent implementation of the index_contains() |
|
269 | 269 | // defined in revlog.c |
|
270 | 270 | let cindex = self.cindex(py).borrow(); |
|
271 |
match item.extract::< |
|
|
271 | match item.extract::<i32>(py) { | |
|
272 | 272 | Ok(rev) => { |
|
273 | 273 | Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision) |
|
274 | 274 | } |
|
275 | 275 | Err(_) => { |
|
276 | 276 | cindex.inner().call_method( |
|
277 | 277 | py, |
|
278 | 278 | "has_node", |
|
279 | 279 | PyTuple::new(py, &[item]), |
|
280 | 280 | None)? |
|
281 | 281 | .extract(py) |
|
282 | 282 | } |
|
283 | 283 | } |
|
284 | 284 | } |
|
285 | 285 | |
|
286 | 286 | def nodemap_data_all(&self) -> PyResult<PyBytes> { |
|
287 | 287 | self.inner_nodemap_data_all(py) |
|
288 | 288 | } |
|
289 | 289 | |
|
290 | 290 | def nodemap_data_incremental(&self) -> PyResult<PyObject> { |
|
291 | 291 | self.inner_nodemap_data_incremental(py) |
|
292 | 292 | } |
|
293 | 293 | def update_nodemap_data( |
|
294 | 294 | &self, |
|
295 | 295 | docket: PyObject, |
|
296 | 296 | nm_data: PyObject |
|
297 | 297 | ) -> PyResult<PyObject> { |
|
298 | 298 | self.inner_update_nodemap_data(py, docket, nm_data) |
|
299 | 299 | } |
|
300 | 300 | |
|
301 | 301 | @property |
|
302 | 302 | def entry_size(&self) -> PyResult<PyInt> { |
|
303 | 303 | self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py) |
|
304 | 304 | } |
|
305 | 305 | |
|
306 | 306 | @property |
|
307 | 307 | def rust_ext_compat(&self) -> PyResult<PyInt> { |
|
308 | 308 | self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py) |
|
309 | 309 | } |
|
310 | 310 | |
|
311 | 311 | }); |
|
312 | 312 | |
|
313 | 313 | impl MixedIndex { |
|
314 | 314 | fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> { |
|
315 | 315 | Self::create_instance( |
|
316 | 316 | py, |
|
317 | 317 | RefCell::new(cindex::Index::new(py, cindex)?), |
|
318 | 318 | RefCell::new(None), |
|
319 | 319 | RefCell::new(None), |
|
320 | 320 | RefCell::new(None), |
|
321 | 321 | ) |
|
322 | 322 | } |
|
323 | 323 | |
|
324 | 324 | /// This is scaffolding at this point, but it could also become |
|
325 | 325 | /// a way to start a persistent nodemap or perform a |
|
326 | 326 | /// vacuum / repack operation |
|
327 | 327 | fn fill_nodemap( |
|
328 | 328 | &self, |
|
329 | 329 | py: Python, |
|
330 | 330 | nt: &mut NodeTree, |
|
331 | 331 | ) -> PyResult<PyObject> { |
|
332 | 332 | let index = self.cindex(py).borrow(); |
|
333 | 333 | for r in 0..index.len() { |
|
334 | 334 | let rev = r as Revision; |
|
335 | 335 | // in this case node() won't ever return None |
|
336 | 336 | nt.insert(&*index, index.node(rev).unwrap(), rev) |
|
337 | 337 | .map_err(|e| nodemap_error(py, e))? |
|
338 | 338 | } |
|
339 | 339 | Ok(py.None()) |
|
340 | 340 | } |
|
341 | 341 | |
|
342 | 342 | fn get_nodetree<'a>( |
|
343 | 343 | &'a self, |
|
344 | 344 | py: Python<'a>, |
|
345 | 345 | ) -> PyResult<&'a RefCell<Option<NodeTree>>> { |
|
346 | 346 | if self.nt(py).borrow().is_none() { |
|
347 | 347 | let readonly = Box::new(Vec::new()); |
|
348 | 348 | let mut nt = NodeTree::load_bytes(readonly, 0); |
|
349 | 349 | self.fill_nodemap(py, &mut nt)?; |
|
350 | 350 | self.nt(py).borrow_mut().replace(nt); |
|
351 | 351 | } |
|
352 | 352 | Ok(self.nt(py)) |
|
353 | 353 | } |
|
354 | 354 | |
|
355 | 355 | /// forward a method call to the underlying C index |
|
356 | 356 | fn call_cindex( |
|
357 | 357 | &self, |
|
358 | 358 | py: Python, |
|
359 | 359 | name: &str, |
|
360 | 360 | args: &PyTuple, |
|
361 | 361 | kwargs: Option<&PyDict>, |
|
362 | 362 | ) -> PyResult<PyObject> { |
|
363 | 363 | self.cindex(py) |
|
364 | 364 | .borrow() |
|
365 | 365 | .inner() |
|
366 | 366 | .call_method(py, name, args, kwargs) |
|
367 | 367 | } |
|
368 | 368 | |
|
369 | 369 | pub fn clone_cindex(&self, py: Python) -> cindex::Index { |
|
370 | 370 | self.cindex(py).borrow().clone_ref(py) |
|
371 | 371 | } |
|
372 | 372 | |
|
373 | 373 | /// Returns the full nodemap bytes to be written as-is to disk |
|
374 | 374 | fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> { |
|
375 | 375 | let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap(); |
|
376 | 376 | let (readonly, bytes) = nodemap.into_readonly_and_added_bytes(); |
|
377 | 377 | |
|
378 | 378 | // If there's anything readonly, we need to build the data again from |
|
379 | 379 | // scratch |
|
380 | 380 | let bytes = if readonly.len() > 0 { |
|
381 | 381 | let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0); |
|
382 | 382 | self.fill_nodemap(py, &mut nt)?; |
|
383 | 383 | |
|
384 | 384 | let (readonly, bytes) = nt.into_readonly_and_added_bytes(); |
|
385 | 385 | assert_eq!(readonly.len(), 0); |
|
386 | 386 | |
|
387 | 387 | bytes |
|
388 | 388 | } else { |
|
389 | 389 | bytes |
|
390 | 390 | }; |
|
391 | 391 | |
|
392 | 392 | let bytes = PyBytes::new(py, &bytes); |
|
393 | 393 | Ok(bytes) |
|
394 | 394 | } |
|
395 | 395 | |
|
396 | 396 | /// Returns the last saved docket along with the size of any changed data |
|
397 | 397 | /// (in number of blocks), and said data as bytes. |
|
398 | 398 | fn inner_nodemap_data_incremental( |
|
399 | 399 | &self, |
|
400 | 400 | py: Python, |
|
401 | 401 | ) -> PyResult<PyObject> { |
|
402 | 402 | let docket = self.docket(py).borrow(); |
|
403 | 403 | let docket = match docket.as_ref() { |
|
404 | 404 | Some(d) => d, |
|
405 | 405 | None => return Ok(py.None()), |
|
406 | 406 | }; |
|
407 | 407 | |
|
408 | 408 | let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap(); |
|
409 | 409 | let masked_blocks = node_tree.masked_readonly_blocks(); |
|
410 | 410 | let (_, data) = node_tree.into_readonly_and_added_bytes(); |
|
411 | 411 | let changed = masked_blocks * std::mem::size_of::<Block>(); |
|
412 | 412 | |
|
413 | 413 | Ok((docket, changed, PyBytes::new(py, &data)) |
|
414 | 414 | .to_py_object(py) |
|
415 | 415 | .into_object()) |
|
416 | 416 | } |
|
417 | 417 | |
|
418 | 418 | /// Update the nodemap from the new (mmaped) data. |
|
419 | 419 | /// The docket is kept as a reference for later incremental calls. |
|
420 | 420 | fn inner_update_nodemap_data( |
|
421 | 421 | &self, |
|
422 | 422 | py: Python, |
|
423 | 423 | docket: PyObject, |
|
424 | 424 | nm_data: PyObject, |
|
425 | 425 | ) -> PyResult<PyObject> { |
|
426 | 426 | let buf = PyBuffer::get(py, &nm_data)?; |
|
427 | 427 | let len = buf.item_count(); |
|
428 | 428 | |
|
429 | 429 | // Build a slice from the mmap'ed buffer data |
|
430 | 430 | let cbuf = buf.buf_ptr(); |
|
431 | 431 | let bytes = if std::mem::size_of::<u8>() == buf.item_size() |
|
432 | 432 | && buf.is_c_contiguous() |
|
433 | 433 | && u8::is_compatible_format(buf.format()) |
|
434 | 434 | { |
|
435 | 435 | unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) } |
|
436 | 436 | } else { |
|
437 | 437 | return Err(PyErr::new::<ValueError, _>( |
|
438 | 438 | py, |
|
439 | 439 | "Nodemap data buffer has an invalid memory representation" |
|
440 | 440 | .to_string(), |
|
441 | 441 | )); |
|
442 | 442 | }; |
|
443 | 443 | |
|
444 | 444 | // Keep a reference to the mmap'ed buffer, otherwise we get a dangling |
|
445 | 445 | // pointer. |
|
446 | 446 | self.mmap(py).borrow_mut().replace(buf); |
|
447 | 447 | |
|
448 | 448 | let mut nt = NodeTree::load_bytes(Box::new(bytes), len); |
|
449 | 449 | |
|
450 | 450 | let data_tip = |
|
451 |
docket.getattr(py, "tip_rev")?.extract::< |
|
|
451 | docket.getattr(py, "tip_rev")?.extract::<i32>(py)?.into(); | |
|
452 | 452 | self.docket(py).borrow_mut().replace(docket.clone_ref(py)); |
|
453 | 453 | let idx = self.cindex(py).borrow(); |
|
454 | let data_tip = idx.check_revision(data_tip).ok_or_else(|| { | |
|
455 | nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip)) | |
|
456 | })?; | |
|
454 | 457 | let current_tip = idx.len(); |
|
455 | 458 | |
|
456 | 459 | for r in (data_tip + 1)..current_tip as Revision { |
|
457 | 460 | let rev = r as Revision; |
|
458 | 461 | // in this case node() won't ever return None |
|
459 | 462 | nt.insert(&*idx, idx.node(rev).unwrap(), rev) |
|
460 | 463 | .map_err(|e| nodemap_error(py, e))? |
|
461 | 464 | } |
|
462 | 465 | |
|
463 | 466 | *self.nt(py).borrow_mut() = Some(nt); |
|
464 | 467 | |
|
465 | 468 | Ok(py.None()) |
|
466 | 469 | } |
|
467 | 470 | } |
|
468 | 471 | |
|
469 | 472 | fn revlog_error(py: Python) -> PyErr { |
|
470 | 473 | match py |
|
471 | 474 | .import("mercurial.error") |
|
472 | 475 | .and_then(|m| m.get(py, "RevlogError")) |
|
473 | 476 | { |
|
474 | 477 | Err(e) => e, |
|
475 | 478 | Ok(cls) => PyErr::from_instance( |
|
476 | 479 | py, |
|
477 | 480 | cls.call(py, (py.None(),), None).ok().into_py_object(py), |
|
478 | 481 | ), |
|
479 | 482 | } |
|
480 | 483 | } |
|
481 | 484 | |
|
482 | fn rev_not_in_index(py: Python, rev: Revision) -> PyErr { | |
|
485 | fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr { | |
|
483 | 486 | PyErr::new::<ValueError, _>( |
|
484 | 487 | py, |
|
485 | 488 | format!( |
|
486 | 489 | "Inconsistency: Revision {} found in nodemap \ |
|
487 | 490 | is not in revlog index", |
|
488 | 491 | rev |
|
489 | 492 | ), |
|
490 | 493 | ) |
|
491 | 494 | } |
|
492 | 495 | |
|
493 | 496 | /// Standard treatment of NodeMapError |
|
494 | 497 | fn nodemap_error(py: Python, err: NodeMapError) -> PyErr { |
|
495 | 498 | match err { |
|
496 | 499 | NodeMapError::MultipleResults => revlog_error(py), |
|
497 | 500 | NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r), |
|
498 | 501 | } |
|
499 | 502 | } |
|
500 | 503 | |
|
501 | 504 | /// Create the module, with __package__ given from parent |
|
502 | 505 | pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> { |
|
503 | 506 | let dotted_name = &format!("{}.revlog", package); |
|
504 | 507 | let m = PyModule::new(py, dotted_name)?; |
|
505 | 508 | m.add(py, "__package__", package)?; |
|
506 | 509 | m.add(py, "__doc__", "RevLog - Rust implementations")?; |
|
507 | 510 | |
|
508 | 511 | m.add_class::<MixedIndex>(py)?; |
|
509 | 512 | |
|
510 | 513 | let sys = PyModule::import(py, "sys")?; |
|
511 | 514 | let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; |
|
512 | 515 | sys_modules.set_item(py, dotted_name, &m)?; |
|
513 | 516 | |
|
514 | 517 | Ok(m) |
|
515 | 518 | } |
General Comments 0
You need to be logged in to leave comments.
Login now