##// END OF EJS Templates
rust: use the new `UncheckedRevision` everywhere applicable...
Raphaël Gomès -
r51870:1928b770 default
parent child Browse files
Show More
@@ -1,115 +1,115 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::repo::Repo;
9 9 use crate::revlog::Node;
10 10 use crate::revlog::RevlogError;
11 11
12 12 use crate::utils::hg_path::HgPath;
13 13
14 14 use crate::errors::HgError;
15 15 use crate::manifest::Manifest;
16 16 use crate::manifest::ManifestEntry;
17 17 use itertools::put_back;
18 18 use itertools::PutBack;
19 19 use std::cmp::Ordering;
20 20
21 21 pub struct CatOutput<'a> {
22 22 /// Whether any file in the manifest matched the paths given as CLI
23 23 /// arguments
24 24 pub found_any: bool,
25 25 /// The contents of matching files, in manifest order
26 26 pub results: Vec<(&'a HgPath, Vec<u8>)>,
27 27 /// Which of the CLI arguments did not match any manifest file
28 28 pub missing: Vec<&'a HgPath>,
29 29 /// The node ID that the given revset was resolved to
30 30 pub node: Node,
31 31 }
32 32
33 33 // Find an item in an iterator over a sorted collection.
34 34 fn find_item<'a>(
35 35 i: &mut PutBack<impl Iterator<Item = Result<ManifestEntry<'a>, HgError>>>,
36 36 needle: &HgPath,
37 37 ) -> Result<Option<Node>, HgError> {
38 38 loop {
39 39 match i.next() {
40 40 None => return Ok(None),
41 41 Some(result) => {
42 42 let entry = result?;
43 43 match needle.as_bytes().cmp(entry.path.as_bytes()) {
44 44 Ordering::Less => {
45 45 i.put_back(Ok(entry));
46 46 return Ok(None);
47 47 }
48 48 Ordering::Greater => continue,
49 49 Ordering::Equal => return Ok(Some(entry.node_id()?)),
50 50 }
51 51 }
52 52 }
53 53 }
54 54 }
55 55
56 56 // Tuple of (missing, found) paths in the manifest
57 57 type ManifestQueryResponse<'a> = (Vec<(&'a HgPath, Node)>, Vec<&'a HgPath>);
58 58
59 59 fn find_files_in_manifest<'query>(
60 60 manifest: &Manifest,
61 61 query: impl Iterator<Item = &'query HgPath>,
62 62 ) -> Result<ManifestQueryResponse<'query>, HgError> {
63 63 let mut manifest = put_back(manifest.iter());
64 64 let mut res = vec![];
65 65 let mut missing = vec![];
66 66
67 67 for file in query {
68 68 match find_item(&mut manifest, file)? {
69 69 None => missing.push(file),
70 70 Some(item) => res.push((file, item)),
71 71 }
72 72 }
73 73 Ok((res, missing))
74 74 }
75 75
76 76 /// Output the given revision of files
77 77 ///
78 78 /// * `root`: Repository root
79 79 /// * `rev`: The revision to cat the files from.
80 80 /// * `files`: The files to output.
81 81 pub fn cat<'a>(
82 82 repo: &Repo,
83 83 revset: &str,
84 84 mut files: Vec<&'a HgPath>,
85 85 ) -> Result<CatOutput<'a>, RevlogError> {
86 86 let rev = crate::revset::resolve_single(revset, repo)?;
87 let manifest = repo.manifest_for_rev(rev)?;
87 let manifest = repo.manifest_for_rev(rev.into())?;
88 88 let node = *repo
89 89 .changelog()?
90 .node_from_rev(rev)
90 .node_from_rev(rev.into())
91 91 .expect("should succeed when repo.manifest did");
92 92 let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![];
93 93 let mut found_any = false;
94 94
95 95 files.sort_unstable();
96 96
97 97 let (found, missing) =
98 98 find_files_in_manifest(&manifest, files.into_iter())?;
99 99
100 100 for (file_path, file_node) in found {
101 101 found_any = true;
102 102 let file_log = repo.filelog(file_path)?;
103 103 results.push((
104 104 file_path,
105 105 file_log.data_for_node(file_node)?.into_file_data()?,
106 106 ));
107 107 }
108 108
109 109 Ok(CatOutput {
110 110 found_any,
111 111 results,
112 112 missing,
113 113 node,
114 114 })
115 115 }
@@ -1,38 +1,38 b''
1 1 // debugdata.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::repo::Repo;
9 9 use crate::requirements;
10 10 use crate::revlog::{Revlog, RevlogError};
11 11
12 12 /// Kind of data to debug
13 13 #[derive(Debug, Copy, Clone)]
14 14 pub enum DebugDataKind {
15 15 Changelog,
16 16 Manifest,
17 17 }
18 18
19 19 /// Dump the contents data of a revision.
20 20 pub fn debug_data(
21 21 repo: &Repo,
22 22 revset: &str,
23 23 kind: DebugDataKind,
24 24 ) -> Result<Vec<u8>, RevlogError> {
25 25 let index_file = match kind {
26 26 DebugDataKind::Changelog => "00changelog.i",
27 27 DebugDataKind::Manifest => "00manifest.i",
28 28 };
29 29 let use_nodemap = repo
30 30 .requirements()
31 31 .contains(requirements::NODEMAP_REQUIREMENT);
32 32 let revlog =
33 33 Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?;
34 34 let rev =
35 35 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
36 let data = revlog.get_rev_data(rev)?;
36 let data = revlog.get_rev_data_for_checked_rev(rev)?;
37 37 Ok(data.into_owned())
38 38 }
@@ -1,45 +1,45 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::errors::HgError;
9 9 use crate::matchers::Matcher;
10 10 use crate::repo::Repo;
11 11 use crate::revlog::manifest::Manifest;
12 12 use crate::revlog::RevlogError;
13 13 use crate::utils::filter_map_results;
14 14 use crate::utils::hg_path::HgPath;
15 15
16 16 /// List files under Mercurial control at a given revision.
17 17 pub fn list_rev_tracked_files(
18 18 repo: &Repo,
19 19 revset: &str,
20 20 narrow_matcher: Box<dyn Matcher>,
21 21 ) -> Result<FilesForRev, RevlogError> {
22 22 let rev = crate::revset::resolve_single(revset, repo)?;
23 23 Ok(FilesForRev {
24 manifest: repo.manifest_for_rev(rev)?,
24 manifest: repo.manifest_for_rev(rev.into())?,
25 25 narrow_matcher,
26 26 })
27 27 }
28 28
29 29 pub struct FilesForRev {
30 30 manifest: Manifest,
31 31 narrow_matcher: Box<dyn Matcher>,
32 32 }
33 33
34 34 impl FilesForRev {
35 35 pub fn iter(&self) -> impl Iterator<Item = Result<&HgPath, HgError>> {
36 36 filter_map_results(self.manifest.iter(), |entry| {
37 37 let path = entry.path;
38 38 Ok(if self.narrow_matcher.matches(path) {
39 39 Some(path)
40 40 } else {
41 41 None
42 42 })
43 43 })
44 44 }
45 45 }
@@ -1,782 +1,782 b''
1 1 use crate::changelog::Changelog;
2 2 use crate::config::{Config, ConfigError, ConfigParseError};
3 3 use crate::dirstate::DirstateParents;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
5 5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
6 6 use crate::dirstate_tree::owning::OwningDirstateMap;
7 7 use crate::errors::HgResultExt;
8 8 use crate::errors::{HgError, IoResultExt};
9 9 use crate::lock::{try_with_lock_no_wait, LockError};
10 10 use crate::manifest::{Manifest, Manifestlog};
11 11 use crate::revlog::filelog::Filelog;
12 12 use crate::revlog::RevlogError;
13 13 use crate::utils::debug::debug_wait_for_file_or_print;
14 14 use crate::utils::files::get_path_from_bytes;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::utils::SliceExt;
17 17 use crate::vfs::{is_dir, is_file, Vfs};
18 use crate::{requirements, NodePrefix};
19 use crate::{DirstateError, Revision};
18 use crate::DirstateError;
19 use crate::{requirements, NodePrefix, UncheckedRevision};
20 20 use std::cell::{Ref, RefCell, RefMut};
21 21 use std::collections::HashSet;
22 22 use std::io::Seek;
23 23 use std::io::SeekFrom;
24 24 use std::io::Write as IoWrite;
25 25 use std::path::{Path, PathBuf};
26 26
27 27 const V2_MAX_READ_ATTEMPTS: usize = 5;
28 28
29 29 type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
30 30
31 31 /// A repository on disk
32 32 pub struct Repo {
33 33 working_directory: PathBuf,
34 34 dot_hg: PathBuf,
35 35 store: PathBuf,
36 36 requirements: HashSet<String>,
37 37 config: Config,
38 38 dirstate_parents: LazyCell<DirstateParents>,
39 39 dirstate_map: LazyCell<OwningDirstateMap>,
40 40 changelog: LazyCell<Changelog>,
41 41 manifestlog: LazyCell<Manifestlog>,
42 42 }
43 43
44 44 #[derive(Debug, derive_more::From)]
45 45 pub enum RepoError {
46 46 NotFound {
47 47 at: PathBuf,
48 48 },
49 49 #[from]
50 50 ConfigParseError(ConfigParseError),
51 51 #[from]
52 52 Other(HgError),
53 53 }
54 54
55 55 impl From<ConfigError> for RepoError {
56 56 fn from(error: ConfigError) -> Self {
57 57 match error {
58 58 ConfigError::Parse(error) => error.into(),
59 59 ConfigError::Other(error) => error.into(),
60 60 }
61 61 }
62 62 }
63 63
64 64 impl Repo {
65 65 /// tries to find nearest repository root in current working directory or
66 66 /// its ancestors
67 67 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
68 68 let current_directory = crate::utils::current_dir()?;
69 69 // ancestors() is inclusive: it first yields `current_directory`
70 70 // as-is.
71 71 for ancestor in current_directory.ancestors() {
72 72 if is_dir(ancestor.join(".hg"))? {
73 73 return Ok(ancestor.to_path_buf());
74 74 }
75 75 }
76 76 Err(RepoError::NotFound {
77 77 at: current_directory,
78 78 })
79 79 }
80 80
81 81 /// Find a repository, either at the given path (which must contain a `.hg`
82 82 /// sub-directory) or by searching the current directory and its
83 83 /// ancestors.
84 84 ///
85 85 /// A method with two very different "modes" like this usually a code smell
86 86 /// to make two methods instead, but in this case an `Option` is what rhg
87 87 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
88 88 /// Having two methods would just move that `if` to almost all callers.
89 89 pub fn find(
90 90 config: &Config,
91 91 explicit_path: Option<PathBuf>,
92 92 ) -> Result<Self, RepoError> {
93 93 if let Some(root) = explicit_path {
94 94 if is_dir(root.join(".hg"))? {
95 95 Self::new_at_path(root, config)
96 96 } else if is_file(&root)? {
97 97 Err(HgError::unsupported("bundle repository").into())
98 98 } else {
99 99 Err(RepoError::NotFound { at: root })
100 100 }
101 101 } else {
102 102 let root = Self::find_repo_root()?;
103 103 Self::new_at_path(root, config)
104 104 }
105 105 }
106 106
107 107 /// To be called after checking that `.hg` is a sub-directory
108 108 fn new_at_path(
109 109 working_directory: PathBuf,
110 110 config: &Config,
111 111 ) -> Result<Self, RepoError> {
112 112 let dot_hg = working_directory.join(".hg");
113 113
114 114 let mut repo_config_files =
115 115 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
116 116
117 117 let hg_vfs = Vfs { base: &dot_hg };
118 118 let mut reqs = requirements::load_if_exists(hg_vfs)?;
119 119 let relative =
120 120 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
121 121 let shared =
122 122 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
123 123
124 124 // From `mercurial/localrepo.py`:
125 125 //
126 126 // if .hg/requires contains the sharesafe requirement, it means
127 127 // there exists a `.hg/store/requires` too and we should read it
128 128 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
129 129 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
130 130 // is not present, refer checkrequirementscompat() for that
131 131 //
132 132 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
133 133 // repository was shared the old way. We check the share source
134 134 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
135 135 // current repository needs to be reshared
136 136 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
137 137
138 138 let store_path;
139 139 if !shared {
140 140 store_path = dot_hg.join("store");
141 141 } else {
142 142 let bytes = hg_vfs.read("sharedpath")?;
143 143 let mut shared_path =
144 144 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
145 145 .to_owned();
146 146 if relative {
147 147 shared_path = dot_hg.join(shared_path)
148 148 }
149 149 if !is_dir(&shared_path)? {
150 150 return Err(HgError::corrupted(format!(
151 151 ".hg/sharedpath points to nonexistent directory {}",
152 152 shared_path.display()
153 153 ))
154 154 .into());
155 155 }
156 156
157 157 store_path = shared_path.join("store");
158 158
159 159 let source_is_share_safe =
160 160 requirements::load(Vfs { base: &shared_path })?
161 161 .contains(requirements::SHARESAFE_REQUIREMENT);
162 162
163 163 if share_safe != source_is_share_safe {
164 164 return Err(HgError::unsupported("share-safe mismatch").into());
165 165 }
166 166
167 167 if share_safe {
168 168 repo_config_files.insert(0, shared_path.join("hgrc"))
169 169 }
170 170 }
171 171 if share_safe {
172 172 reqs.extend(requirements::load(Vfs { base: &store_path })?);
173 173 }
174 174
175 175 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
176 176 config.combine_with_repo(&repo_config_files)?
177 177 } else {
178 178 config.clone()
179 179 };
180 180
181 181 let repo = Self {
182 182 requirements: reqs,
183 183 working_directory,
184 184 store: store_path,
185 185 dot_hg,
186 186 config: repo_config,
187 187 dirstate_parents: LazyCell::new(),
188 188 dirstate_map: LazyCell::new(),
189 189 changelog: LazyCell::new(),
190 190 manifestlog: LazyCell::new(),
191 191 };
192 192
193 193 requirements::check(&repo)?;
194 194
195 195 Ok(repo)
196 196 }
197 197
198 198 pub fn working_directory_path(&self) -> &Path {
199 199 &self.working_directory
200 200 }
201 201
202 202 pub fn requirements(&self) -> &HashSet<String> {
203 203 &self.requirements
204 204 }
205 205
206 206 pub fn config(&self) -> &Config {
207 207 &self.config
208 208 }
209 209
210 210 /// For accessing repository files (in `.hg`), except for the store
211 211 /// (`.hg/store`).
212 212 pub fn hg_vfs(&self) -> Vfs<'_> {
213 213 Vfs { base: &self.dot_hg }
214 214 }
215 215
216 216 /// For accessing repository store files (in `.hg/store`)
217 217 pub fn store_vfs(&self) -> Vfs<'_> {
218 218 Vfs { base: &self.store }
219 219 }
220 220
221 221 /// For accessing the working copy
222 222 pub fn working_directory_vfs(&self) -> Vfs<'_> {
223 223 Vfs {
224 224 base: &self.working_directory,
225 225 }
226 226 }
227 227
228 228 pub fn try_with_wlock_no_wait<R>(
229 229 &self,
230 230 f: impl FnOnce() -> R,
231 231 ) -> Result<R, LockError> {
232 232 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
233 233 }
234 234
235 235 /// Whether this repo should use dirstate-v2.
236 236 /// The presence of `dirstate-v2` in the requirements does not mean that
237 237 /// the on-disk dirstate is necessarily in version 2. In most cases,
238 238 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
239 239 /// upgrade mechanism being cut short), the on-disk version will be a
240 240 /// v1 file.
241 241 /// Semantically, having a requirement only means that a client cannot
242 242 /// properly understand or properly update the repo if it lacks the support
243 243 /// for the required feature, but not that that feature is actually used
244 244 /// in all occasions.
245 245 pub fn use_dirstate_v2(&self) -> bool {
246 246 self.requirements
247 247 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
248 248 }
249 249
250 250 pub fn has_sparse(&self) -> bool {
251 251 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
252 252 }
253 253
254 254 pub fn has_narrow(&self) -> bool {
255 255 self.requirements.contains(requirements::NARROW_REQUIREMENT)
256 256 }
257 257
258 258 pub fn has_nodemap(&self) -> bool {
259 259 self.requirements
260 260 .contains(requirements::NODEMAP_REQUIREMENT)
261 261 }
262 262
263 263 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
264 264 Ok(self
265 265 .hg_vfs()
266 266 .read("dirstate")
267 267 .io_not_found_as_none()?
268 268 .unwrap_or_default())
269 269 }
270 270
271 271 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
272 272 use std::os::unix::fs::MetadataExt;
273 273 Ok(self
274 274 .hg_vfs()
275 275 .symlink_metadata("dirstate")
276 276 .io_not_found_as_none()?
277 277 .map(|meta| meta.ino()))
278 278 }
279 279
280 280 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
281 281 Ok(*self
282 282 .dirstate_parents
283 283 .get_or_init(|| self.read_dirstate_parents())?)
284 284 }
285 285
286 286 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
287 287 let dirstate = self.dirstate_file_contents()?;
288 288 let parents = if dirstate.is_empty() {
289 289 DirstateParents::NULL
290 290 } else if self.use_dirstate_v2() {
291 291 let docket_res =
292 292 crate::dirstate_tree::on_disk::read_docket(&dirstate);
293 293 match docket_res {
294 294 Ok(docket) => docket.parents(),
295 295 Err(_) => {
296 296 log::info!(
297 297 "Parsing dirstate docket failed, \
298 298 falling back to dirstate-v1"
299 299 );
300 300 *crate::dirstate::parsers::parse_dirstate_parents(
301 301 &dirstate,
302 302 )?
303 303 }
304 304 }
305 305 } else {
306 306 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
307 307 };
308 308 self.dirstate_parents.set(parents);
309 309 Ok(parents)
310 310 }
311 311
312 312 /// Returns the information read from the dirstate docket necessary to
313 313 /// check if the data file has been updated/deleted by another process
314 314 /// since we last read the dirstate.
315 315 /// Namely, the inode, data file uuid and the data size.
316 316 fn get_dirstate_data_file_integrity(
317 317 &self,
318 318 ) -> Result<DirstateMapIdentity, HgError> {
319 319 assert!(
320 320 self.use_dirstate_v2(),
321 321 "accessing dirstate data file ID without dirstate-v2"
322 322 );
323 323 // Get the identity before the contents since we could have a race
324 324 // between the two. Having an identity that is too old is fine, but
325 325 // one that is younger than the content change is bad.
326 326 let identity = self.dirstate_identity()?;
327 327 let dirstate = self.dirstate_file_contents()?;
328 328 if dirstate.is_empty() {
329 329 self.dirstate_parents.set(DirstateParents::NULL);
330 330 Ok((identity, None, 0))
331 331 } else {
332 332 let docket_res =
333 333 crate::dirstate_tree::on_disk::read_docket(&dirstate);
334 334 match docket_res {
335 335 Ok(docket) => {
336 336 self.dirstate_parents.set(docket.parents());
337 337 Ok((
338 338 identity,
339 339 Some(docket.uuid.to_owned()),
340 340 docket.data_size(),
341 341 ))
342 342 }
343 343 Err(_) => {
344 344 log::info!(
345 345 "Parsing dirstate docket failed, \
346 346 falling back to dirstate-v1"
347 347 );
348 348 let parents =
349 349 *crate::dirstate::parsers::parse_dirstate_parents(
350 350 &dirstate,
351 351 )?;
352 352 self.dirstate_parents.set(parents);
353 353 Ok((identity, None, 0))
354 354 }
355 355 }
356 356 }
357 357 }
358 358
359 359 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
360 360 if self.use_dirstate_v2() {
361 361 // The v2 dirstate is split into a docket and a data file.
362 362 // Since we don't always take the `wlock` to read it
363 363 // (like in `hg status`), it is susceptible to races.
364 364 // A simple retry method should be enough since full rewrites
365 365 // only happen when too much garbage data is present and
366 366 // this race is unlikely.
367 367 let mut tries = 0;
368 368
369 369 while tries < V2_MAX_READ_ATTEMPTS {
370 370 tries += 1;
371 371 match self.read_docket_and_data_file() {
372 372 Ok(m) => {
373 373 return Ok(m);
374 374 }
375 375 Err(e) => match e {
376 376 DirstateError::Common(HgError::RaceDetected(
377 377 context,
378 378 )) => {
379 379 log::info!(
380 380 "dirstate read race detected {} (retry {}/{})",
381 381 context,
382 382 tries,
383 383 V2_MAX_READ_ATTEMPTS,
384 384 );
385 385 continue;
386 386 }
387 387 _ => {
388 388 log::info!(
389 389 "Reading dirstate v2 failed, \
390 390 falling back to v1"
391 391 );
392 392 return self.new_dirstate_map_v1();
393 393 }
394 394 },
395 395 }
396 396 }
397 397 let error = HgError::abort(
398 398 format!("dirstate read race happened {tries} times in a row"),
399 399 255,
400 400 None,
401 401 );
402 402 Err(DirstateError::Common(error))
403 403 } else {
404 404 self.new_dirstate_map_v1()
405 405 }
406 406 }
407 407
408 408 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
409 409 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
410 410 let identity = self.dirstate_identity()?;
411 411 let dirstate_file_contents = self.dirstate_file_contents()?;
412 412 if dirstate_file_contents.is_empty() {
413 413 self.dirstate_parents.set(DirstateParents::NULL);
414 414 Ok(OwningDirstateMap::new_empty(Vec::new()))
415 415 } else {
416 416 let (map, parents) =
417 417 OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
418 418 self.dirstate_parents.set(parents);
419 419 Ok(map)
420 420 }
421 421 }
422 422
423 423 fn read_docket_and_data_file(
424 424 &self,
425 425 ) -> Result<OwningDirstateMap, DirstateError> {
426 426 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
427 427 let dirstate_file_contents = self.dirstate_file_contents()?;
428 428 let identity = self.dirstate_identity()?;
429 429 if dirstate_file_contents.is_empty() {
430 430 self.dirstate_parents.set(DirstateParents::NULL);
431 431 return Ok(OwningDirstateMap::new_empty(Vec::new()));
432 432 }
433 433 let docket = crate::dirstate_tree::on_disk::read_docket(
434 434 &dirstate_file_contents,
435 435 )?;
436 436 debug_wait_for_file_or_print(
437 437 self.config(),
438 438 "dirstate.post-docket-read-file",
439 439 );
440 440 self.dirstate_parents.set(docket.parents());
441 441 let uuid = docket.uuid.to_owned();
442 442 let data_size = docket.data_size();
443 443
444 444 let context = "between reading dirstate docket and data file";
445 445 let race_error = HgError::RaceDetected(context.into());
446 446 let metadata = docket.tree_metadata();
447 447
448 448 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
449 449 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
450 450 let contents = self.hg_vfs().read(docket.data_filename());
451 451 let contents = match contents {
452 452 Ok(c) => c,
453 453 Err(HgError::IoError { error, context }) => {
454 454 match error.raw_os_error().expect("real os error") {
455 455 // 2 = ENOENT, No such file or directory
456 456 // 116 = ESTALE, Stale NFS file handle
457 457 //
458 458 // TODO match on `error.kind()` when
459 459 // `ErrorKind::StaleNetworkFileHandle` is stable.
460 460 2 | 116 => {
461 461 // Race where the data file was deleted right after
462 462 // we read the docket, try again
463 463 return Err(race_error.into());
464 464 }
465 465 _ => {
466 466 return Err(
467 467 HgError::IoError { error, context }.into()
468 468 )
469 469 }
470 470 }
471 471 }
472 472 Err(e) => return Err(e.into()),
473 473 };
474 474 OwningDirstateMap::new_v2(
475 475 contents, data_size, metadata, uuid, identity,
476 476 )
477 477 } else {
478 478 match self
479 479 .hg_vfs()
480 480 .mmap_open(docket.data_filename())
481 481 .io_not_found_as_none()
482 482 {
483 483 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
484 484 data_mmap, data_size, metadata, uuid, identity,
485 485 ),
486 486 Ok(None) => {
487 487 // Race where the data file was deleted right after we
488 488 // read the docket, try again
489 489 return Err(race_error.into());
490 490 }
491 491 Err(e) => return Err(e.into()),
492 492 }
493 493 }?;
494 494
495 495 let write_mode_config = self
496 496 .config()
497 497 .get_str(b"devel", b"dirstate.v2.data_update_mode")
498 498 .unwrap_or(Some("auto"))
499 499 .unwrap_or("auto"); // don't bother for devel options
500 500 let write_mode = match write_mode_config {
501 501 "auto" => DirstateMapWriteMode::Auto,
502 502 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
503 503 "force-append" => DirstateMapWriteMode::ForceAppend,
504 504 _ => DirstateMapWriteMode::Auto,
505 505 };
506 506
507 507 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
508 508
509 509 Ok(map)
510 510 }
511 511
512 512 pub fn dirstate_map(
513 513 &self,
514 514 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
515 515 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
516 516 }
517 517
518 518 pub fn dirstate_map_mut(
519 519 &self,
520 520 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
521 521 self.dirstate_map
522 522 .get_mut_or_init(|| self.new_dirstate_map())
523 523 }
524 524
525 525 fn new_changelog(&self) -> Result<Changelog, HgError> {
526 526 Changelog::open(&self.store_vfs(), self.has_nodemap())
527 527 }
528 528
529 529 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
530 530 self.changelog.get_or_init(|| self.new_changelog())
531 531 }
532 532
533 533 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
534 534 self.changelog.get_mut_or_init(|| self.new_changelog())
535 535 }
536 536
537 537 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
538 538 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
539 539 }
540 540
541 541 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
542 542 self.manifestlog.get_or_init(|| self.new_manifestlog())
543 543 }
544 544
545 545 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
546 546 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
547 547 }
548 548
549 549 /// Returns the manifest of the *changeset* with the given node ID
550 550 pub fn manifest_for_node(
551 551 &self,
552 552 node: impl Into<NodePrefix>,
553 553 ) -> Result<Manifest, RevlogError> {
554 554 self.manifestlog()?.data_for_node(
555 555 self.changelog()?
556 556 .data_for_node(node.into())?
557 557 .manifest_node()?
558 558 .into(),
559 559 )
560 560 }
561 561
562 562 /// Returns the manifest of the *changeset* with the given revision number
563 563 pub fn manifest_for_rev(
564 564 &self,
565 revision: Revision,
565 revision: UncheckedRevision,
566 566 ) -> Result<Manifest, RevlogError> {
567 567 self.manifestlog()?.data_for_node(
568 568 self.changelog()?
569 569 .data_for_rev(revision)?
570 570 .manifest_node()?
571 571 .into(),
572 572 )
573 573 }
574 574
575 575 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
576 576 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
577 577 Ok(entry.tracked())
578 578 } else {
579 579 Ok(false)
580 580 }
581 581 }
582 582
583 583 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
584 584 Filelog::open(self, path)
585 585 }
586 586
587 587 /// Write to disk any updates that were made through `dirstate_map_mut`.
588 588 ///
589 589 /// The "wlock" must be held while calling this.
590 590 /// See for example `try_with_wlock_no_wait`.
591 591 ///
592 592 /// TODO: have a `WritableRepo` type only accessible while holding the
593 593 /// lock?
594 594 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
595 595 let map = self.dirstate_map()?;
596 596 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
597 597 // it’s unset
598 598 let parents = self.dirstate_parents()?;
599 599 let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
600 600 let (identity, uuid, data_size) =
601 601 self.get_dirstate_data_file_integrity()?;
602 602 let identity_changed = identity != map.old_identity();
603 603 let uuid_changed = uuid.as_deref() != map.old_uuid();
604 604 let data_length_changed = data_size != map.old_data_size();
605 605
606 606 if identity_changed || uuid_changed || data_length_changed {
607 607 // If any of identity, uuid or length have changed since
608 608 // last disk read, don't write.
609 609 // This is fine because either we're in a command that doesn't
610 610 // write anything too important (like `hg status`), or we're in
611 611 // `hg add` and we're supposed to have taken the lock before
612 612 // reading anyway.
613 613 //
614 614 // TODO complain loudly if we've changed anything important
615 615 // without taking the lock.
616 616 // (see `hg help config.format.use-dirstate-tracked-hint`)
617 617 log::debug!(
618 618 "dirstate has changed since last read, not updating."
619 619 );
620 620 return Ok(());
621 621 }
622 622
623 623 let uuid_opt = map.old_uuid();
624 624 let write_mode = if uuid_opt.is_some() {
625 625 DirstateMapWriteMode::Auto
626 626 } else {
627 627 DirstateMapWriteMode::ForceNewDataFile
628 628 };
629 629 let (data, tree_metadata, append, old_data_size) =
630 630 map.pack_v2(write_mode)?;
631 631
632 632 // Reuse the uuid, or generate a new one, keeping the old for
633 633 // deletion.
634 634 let (uuid, old_uuid) = match uuid_opt {
635 635 Some(uuid) => {
636 636 let as_str = std::str::from_utf8(uuid)
637 637 .map_err(|_| {
638 638 HgError::corrupted(
639 639 "non-UTF-8 dirstate data file ID",
640 640 )
641 641 })?
642 642 .to_owned();
643 643 if append {
644 644 (as_str, None)
645 645 } else {
646 646 (DirstateDocket::new_uid(), Some(as_str))
647 647 }
648 648 }
649 649 None => (DirstateDocket::new_uid(), None),
650 650 };
651 651
652 652 let data_filename = format!("dirstate.{}", uuid);
653 653 let data_filename = self.hg_vfs().join(data_filename);
654 654 let mut options = std::fs::OpenOptions::new();
655 655 options.write(true);
656 656
657 657 // Why are we not using the O_APPEND flag when appending?
658 658 //
659 659 // - O_APPEND makes it trickier to deal with garbage at the end of
660 660 // the file, left by a previous uncommitted transaction. By
661 661 // starting the write at [old_data_size] we make sure we erase
662 662 // all such garbage.
663 663 //
664 664 // - O_APPEND requires to special-case 0-byte writes, whereas we
665 665 // don't need that.
666 666 //
667 667 // - Some OSes have bugs in implementation O_APPEND:
668 668 // revlog.py talks about a Solaris bug, but we also saw some ZFS
669 669 // bug: https://github.com/openzfs/zfs/pull/3124,
670 670 // https://github.com/openzfs/zfs/issues/13370
671 671 //
672 672 if !append {
673 673 log::trace!("creating a new dirstate data file");
674 674 options.create_new(true);
675 675 } else {
676 676 log::trace!("appending to the dirstate data file");
677 677 }
678 678
679 679 let data_size = (|| {
680 680 // TODO: loop and try another random ID if !append and this
681 681 // returns `ErrorKind::AlreadyExists`? Collision chance of two
682 682 // random IDs is one in 2**32
683 683 let mut file = options.open(&data_filename)?;
684 684 if append {
685 685 file.seek(SeekFrom::Start(old_data_size as u64))?;
686 686 }
687 687 file.write_all(&data)?;
688 688 file.flush()?;
689 689 file.seek(SeekFrom::Current(0))
690 690 })()
691 691 .when_writing_file(&data_filename)?;
692 692
693 693 let packed_dirstate = DirstateDocket::serialize(
694 694 parents,
695 695 tree_metadata,
696 696 data_size,
697 697 uuid.as_bytes(),
698 698 )
699 699 .map_err(|_: std::num::TryFromIntError| {
700 700 HgError::corrupted("overflow in dirstate docket serialization")
701 701 })?;
702 702
703 703 (packed_dirstate, old_uuid)
704 704 } else {
705 705 let identity = self.dirstate_identity()?;
706 706 if identity != map.old_identity() {
707 707 // If identity changed since last disk read, don't write.
708 708 // This is fine because either we're in a command that doesn't
709 709 // write anything too important (like `hg status`), or we're in
710 710 // `hg add` and we're supposed to have taken the lock before
711 711 // reading anyway.
712 712 //
713 713 // TODO complain loudly if we've changed anything important
714 714 // without taking the lock.
715 715 // (see `hg help config.format.use-dirstate-tracked-hint`)
716 716 log::debug!(
717 717 "dirstate has changed since last read, not updating."
718 718 );
719 719 return Ok(());
720 720 }
721 721 (map.pack_v1(parents)?, None)
722 722 };
723 723
724 724 let vfs = self.hg_vfs();
725 725 vfs.atomic_write("dirstate", &packed_dirstate)?;
726 726 if let Some(uuid) = old_uuid_to_remove {
727 727 // Remove the old data file after the new docket pointing to the
728 728 // new data file was written.
729 729 vfs.remove_file(format!("dirstate.{}", uuid))?;
730 730 }
731 731 Ok(())
732 732 }
733 733 }
734 734
735 735 /// Lazily-initialized component of `Repo` with interior mutability
736 736 ///
737 737 /// This differs from `OnceCell` in that the value can still be "deinitialized"
738 738 /// later by setting its inner `Option` to `None`. It also takes the
739 739 /// initialization function as an argument when the value is requested, not
740 740 /// when the instance is created.
741 741 struct LazyCell<T> {
742 742 value: RefCell<Option<T>>,
743 743 }
744 744
745 745 impl<T> LazyCell<T> {
746 746 fn new() -> Self {
747 747 Self {
748 748 value: RefCell::new(None),
749 749 }
750 750 }
751 751
752 752 fn set(&self, value: T) {
753 753 *self.value.borrow_mut() = Some(value)
754 754 }
755 755
756 756 fn get_or_init<E>(
757 757 &self,
758 758 init: impl Fn() -> Result<T, E>,
759 759 ) -> Result<Ref<T>, E> {
760 760 let mut borrowed = self.value.borrow();
761 761 if borrowed.is_none() {
762 762 drop(borrowed);
763 763 // Only use `borrow_mut` if it is really needed to avoid panic in
764 764 // case there is another outstanding borrow but mutation is not
765 765 // needed.
766 766 *self.value.borrow_mut() = Some(init()?);
767 767 borrowed = self.value.borrow()
768 768 }
769 769 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
770 770 }
771 771
772 772 fn get_mut_or_init<E>(
773 773 &self,
774 774 init: impl Fn() -> Result<T, E>,
775 775 ) -> Result<RefMut<T>, E> {
776 776 let mut borrowed = self.value.borrow_mut();
777 777 if borrowed.is_none() {
778 778 *borrowed = Some(init()?);
779 779 }
780 780 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
781 781 }
782 782 }
@@ -1,343 +1,353 b''
1 1 use crate::errors::HgError;
2 2 use crate::revlog::Revision;
3 3 use crate::revlog::{Node, NodePrefix};
4 4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 5 use crate::utils::hg_path::HgPath;
6 6 use crate::vfs::Vfs;
7 use crate::UncheckedRevision;
7 8 use itertools::Itertools;
8 9 use std::ascii::escape_default;
9 10 use std::borrow::Cow;
10 11 use std::fmt::{Debug, Formatter};
11 12
12 13 /// A specialized `Revlog` to work with changelog data format.
13 14 pub struct Changelog {
14 15 /// The generic `revlog` format.
15 16 pub(crate) revlog: Revlog,
16 17 }
17 18
18 19 impl Changelog {
19 20 /// Open the `changelog` of a repository given by its root.
20 21 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 22 let revlog =
22 23 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 24 Ok(Self { revlog })
24 25 }
25 26
26 27 /// Return the `ChangelogRevisionData` for the given node ID.
27 28 pub fn data_for_node(
28 29 &self,
29 30 node: NodePrefix,
30 31 ) -> Result<ChangelogRevisionData, RevlogError> {
31 32 let rev = self.revlog.rev_from_node(node)?;
32 self.data_for_rev(rev)
33 self.entry_for_checked_rev(rev)?.data()
33 34 }
34 35
35 36 /// Return the [`ChangelogEntry`] for the given revision number.
36 37 pub fn entry_for_rev(
37 38 &self,
39 rev: UncheckedRevision,
40 ) -> Result<ChangelogEntry, RevlogError> {
41 let revlog_entry = self.revlog.get_entry(rev)?;
42 Ok(ChangelogEntry { revlog_entry })
43 }
44
45 /// Same as [`Self::entry_for_rev`] for checked revisions.
46 fn entry_for_checked_rev(
47 &self,
38 48 rev: Revision,
39 49 ) -> Result<ChangelogEntry, RevlogError> {
40 let revlog_entry = self.revlog.get_entry(rev)?;
50 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
41 51 Ok(ChangelogEntry { revlog_entry })
42 52 }
43 53
44 54 /// Return the [`ChangelogRevisionData`] for the given revision number.
45 55 ///
46 56 /// This is a useful shortcut in case the caller does not need the
47 57 /// generic revlog information (parents, hashes etc). Otherwise
48 58 /// consider taking a [`ChangelogEntry`] with
49 59 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
50 60 pub fn data_for_rev(
51 61 &self,
52 rev: Revision,
62 rev: UncheckedRevision,
53 63 ) -> Result<ChangelogRevisionData, RevlogError> {
54 64 self.entry_for_rev(rev)?.data()
55 65 }
56 66
57 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
67 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
58 68 self.revlog.node_from_rev(rev)
59 69 }
60 70
61 71 pub fn rev_from_node(
62 72 &self,
63 73 node: NodePrefix,
64 74 ) -> Result<Revision, RevlogError> {
65 75 self.revlog.rev_from_node(node)
66 76 }
67 77 }
68 78
69 79 /// A specialized `RevlogEntry` for `changelog` data format
70 80 ///
71 81 /// This is a `RevlogEntry` with the added semantics that the associated
72 82 /// data should meet the requirements for `changelog`, materialized by
73 83 /// the fact that `data()` constructs a `ChangelogRevisionData`.
74 84 /// In case that promise would be broken, the `data` method returns an error.
75 85 #[derive(Clone)]
76 86 pub struct ChangelogEntry<'changelog> {
77 87 /// Same data, as a generic `RevlogEntry`.
78 88 pub(crate) revlog_entry: RevlogEntry<'changelog>,
79 89 }
80 90
81 91 impl<'changelog> ChangelogEntry<'changelog> {
82 92 pub fn data<'a>(
83 93 &'a self,
84 94 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
85 95 let bytes = self.revlog_entry.data()?;
86 96 if bytes.is_empty() {
87 97 Ok(ChangelogRevisionData::null())
88 98 } else {
89 99 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
90 100 RevlogError::Other(HgError::CorruptedRepository(format!(
91 101 "Invalid changelog data for revision {}: {:?}",
92 102 self.revlog_entry.revision(),
93 103 err
94 104 )))
95 105 })?)
96 106 }
97 107 }
98 108
99 109 /// Obtain a reference to the underlying `RevlogEntry`.
100 110 ///
101 111 /// This allows the caller to access the information that is common
102 112 /// to all revlog entries: revision number, node id, parent revisions etc.
103 113 pub fn as_revlog_entry(&self) -> &RevlogEntry {
104 114 &self.revlog_entry
105 115 }
106 116
107 117 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
108 118 Ok(self
109 119 .revlog_entry
110 120 .p1_entry()?
111 121 .map(|revlog_entry| Self { revlog_entry }))
112 122 }
113 123
114 124 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
115 125 Ok(self
116 126 .revlog_entry
117 127 .p2_entry()?
118 128 .map(|revlog_entry| Self { revlog_entry }))
119 129 }
120 130 }
121 131
122 132 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
123 133 #[derive(PartialEq)]
124 134 pub struct ChangelogRevisionData<'changelog> {
125 135 /// The data bytes of the `changelog` entry.
126 136 bytes: Cow<'changelog, [u8]>,
127 137 /// The end offset for the hex manifest (not including the newline)
128 138 manifest_end: usize,
129 139 /// The end offset for the user+email (not including the newline)
130 140 user_end: usize,
131 141 /// The end offset for the timestamp+timezone+extras (not including the
132 142 /// newline)
133 143 timestamp_end: usize,
134 144 /// The end offset for the file list (not including the newline)
135 145 files_end: usize,
136 146 }
137 147
138 148 impl<'changelog> ChangelogRevisionData<'changelog> {
139 149 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
140 150 let mut line_iter = bytes.split(|b| b == &b'\n');
141 151 let manifest_end = line_iter
142 152 .next()
143 153 .expect("Empty iterator from split()?")
144 154 .len();
145 155 let user_slice = line_iter.next().ok_or_else(|| {
146 156 HgError::corrupted("Changeset data truncated after manifest line")
147 157 })?;
148 158 let user_end = manifest_end + 1 + user_slice.len();
149 159 let timestamp_slice = line_iter.next().ok_or_else(|| {
150 160 HgError::corrupted("Changeset data truncated after user line")
151 161 })?;
152 162 let timestamp_end = user_end + 1 + timestamp_slice.len();
153 163 let mut files_end = timestamp_end + 1;
154 164 loop {
155 165 let line = line_iter.next().ok_or_else(|| {
156 166 HgError::corrupted("Changeset data truncated in files list")
157 167 })?;
158 168 if line.is_empty() {
159 169 if files_end == bytes.len() {
160 170 // The list of files ended with a single newline (there
161 171 // should be two)
162 172 return Err(HgError::corrupted(
163 173 "Changeset data truncated after files list",
164 174 ));
165 175 }
166 176 files_end -= 1;
167 177 break;
168 178 }
169 179 files_end += line.len() + 1;
170 180 }
171 181
172 182 Ok(Self {
173 183 bytes,
174 184 manifest_end,
175 185 user_end,
176 186 timestamp_end,
177 187 files_end,
178 188 })
179 189 }
180 190
181 191 fn null() -> Self {
182 192 Self::new(Cow::Borrowed(
183 193 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
184 194 ))
185 195 .unwrap()
186 196 }
187 197
188 198 /// Return an iterator over the lines of the entry.
189 199 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
190 200 self.bytes.split(|b| b == &b'\n')
191 201 }
192 202
193 203 /// Return the node id of the `manifest` referenced by this `changelog`
194 204 /// entry.
195 205 pub fn manifest_node(&self) -> Result<Node, HgError> {
196 206 let manifest_node_hex = &self.bytes[..self.manifest_end];
197 207 Node::from_hex_for_repo(manifest_node_hex)
198 208 }
199 209
200 210 /// The full user string (usually a name followed by an email enclosed in
201 211 /// angle brackets)
202 212 pub fn user(&self) -> &[u8] {
203 213 &self.bytes[self.manifest_end + 1..self.user_end]
204 214 }
205 215
206 216 /// The full timestamp line (timestamp in seconds, offset in seconds, and
207 217 /// possibly extras)
208 218 // TODO: We should expose this in a more useful way
209 219 pub fn timestamp_line(&self) -> &[u8] {
210 220 &self.bytes[self.user_end + 1..self.timestamp_end]
211 221 }
212 222
213 223 /// The files changed in this revision.
214 224 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
215 225 self.bytes[self.timestamp_end + 1..self.files_end]
216 226 .split(|b| b == &b'\n')
217 227 .map(HgPath::new)
218 228 }
219 229
220 230 /// The change description.
221 231 pub fn description(&self) -> &[u8] {
222 232 &self.bytes[self.files_end + 2..]
223 233 }
224 234 }
225 235
226 236 impl Debug for ChangelogRevisionData<'_> {
227 237 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
228 238 f.debug_struct("ChangelogRevisionData")
229 239 .field("bytes", &debug_bytes(&self.bytes))
230 240 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
231 241 .field(
232 242 "user",
233 243 &debug_bytes(
234 244 &self.bytes[self.manifest_end + 1..self.user_end],
235 245 ),
236 246 )
237 247 .field(
238 248 "timestamp",
239 249 &debug_bytes(
240 250 &self.bytes[self.user_end + 1..self.timestamp_end],
241 251 ),
242 252 )
243 253 .field(
244 254 "files",
245 255 &debug_bytes(
246 256 &self.bytes[self.timestamp_end + 1..self.files_end],
247 257 ),
248 258 )
249 259 .field(
250 260 "description",
251 261 &debug_bytes(&self.bytes[self.files_end + 2..]),
252 262 )
253 263 .finish()
254 264 }
255 265 }
256 266
257 267 fn debug_bytes(bytes: &[u8]) -> String {
258 268 String::from_utf8_lossy(
259 269 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
260 270 )
261 271 .to_string()
262 272 }
263 273
264 274 #[cfg(test)]
265 275 mod tests {
266 276 use super::*;
267 277 use crate::vfs::Vfs;
268 278 use crate::NULL_REVISION;
269 279 use pretty_assertions::assert_eq;
270 280
271 281 #[test]
272 282 fn test_create_changelogrevisiondata_invalid() {
273 283 // Completely empty
274 284 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
275 285 // No newline after manifest
276 286 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
277 287 // No newline after user
278 288 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
279 289 // No newline after timestamp
280 290 assert!(
281 291 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
282 292 );
283 293 // Missing newline after files
284 294 assert!(ChangelogRevisionData::new(Cow::Borrowed(
285 295 b"abcd\n\n0 0\nfile1\nfile2"
286 296 ))
287 297 .is_err(),);
288 298 // Only one newline after files
289 299 assert!(ChangelogRevisionData::new(Cow::Borrowed(
290 300 b"abcd\n\n0 0\nfile1\nfile2\n"
291 301 ))
292 302 .is_err(),);
293 303 }
294 304
295 305 #[test]
296 306 fn test_create_changelogrevisiondata() {
297 307 let data = ChangelogRevisionData::new(Cow::Borrowed(
298 308 b"0123456789abcdef0123456789abcdef01234567
299 309 Some One <someone@example.com>
300 310 0 0
301 311 file1
302 312 file2
303 313
304 314 some
305 315 commit
306 316 message",
307 317 ))
308 318 .unwrap();
309 319 assert_eq!(
310 320 data.manifest_node().unwrap(),
311 321 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
312 322 .unwrap()
313 323 );
314 324 assert_eq!(data.user(), b"Some One <someone@example.com>");
315 325 assert_eq!(data.timestamp_line(), b"0 0");
316 326 assert_eq!(
317 327 data.files().collect_vec(),
318 328 vec![HgPath::new("file1"), HgPath::new("file2")]
319 329 );
320 330 assert_eq!(data.description(), b"some\ncommit\nmessage");
321 331 }
322 332
323 333 #[test]
324 334 fn test_data_from_rev_null() -> Result<(), RevlogError> {
325 335 // an empty revlog will be enough for this case
326 336 let temp = tempfile::tempdir().unwrap();
327 337 let vfs = Vfs { base: temp.path() };
328 338 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
329 339 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
330 340
331 341 let changelog = Changelog { revlog };
332 342 assert_eq!(
333 changelog.data_for_rev(NULL_REVISION)?,
343 changelog.data_for_rev(NULL_REVISION.into())?,
334 344 ChangelogRevisionData::null()
335 345 );
336 346 // same with the intermediate entry object
337 347 assert_eq!(
338 changelog.entry_for_rev(NULL_REVISION)?.data()?,
348 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
339 349 ChangelogRevisionData::null()
340 350 );
341 351 Ok(())
342 352 }
343 353 }
@@ -1,208 +1,231 b''
1 1 use crate::errors::HgError;
2 use crate::exit_codes;
2 3 use crate::repo::Repo;
3 4 use crate::revlog::path_encode::path_encode;
4 5 use crate::revlog::NodePrefix;
5 6 use crate::revlog::Revision;
6 7 use crate::revlog::RevlogEntry;
7 8 use crate::revlog::{Revlog, RevlogError};
8 9 use crate::utils::files::get_path_from_bytes;
9 10 use crate::utils::hg_path::HgPath;
10 11 use crate::utils::SliceExt;
12 use crate::UncheckedRevision;
11 13 use std::path::PathBuf;
12 14
13 15 /// A specialized `Revlog` to work with file data logs.
14 16 pub struct Filelog {
15 17 /// The generic `revlog` format.
16 18 revlog: Revlog,
17 19 }
18 20
19 21 impl Filelog {
20 22 pub fn open_vfs(
21 23 store_vfs: &crate::vfs::Vfs<'_>,
22 24 file_path: &HgPath,
23 25 ) -> Result<Self, HgError> {
24 26 let index_path = store_path(file_path, b".i");
25 27 let data_path = store_path(file_path, b".d");
26 28 let revlog =
27 29 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
28 30 Ok(Self { revlog })
29 31 }
30 32
31 33 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
32 34 Self::open_vfs(&repo.store_vfs(), file_path)
33 35 }
34 36
35 37 /// The given node ID is that of the file as found in a filelog, not of a
36 38 /// changeset.
37 39 pub fn data_for_node(
38 40 &self,
39 41 file_node: impl Into<NodePrefix>,
40 42 ) -> Result<FilelogRevisionData, RevlogError> {
41 43 let file_rev = self.revlog.rev_from_node(file_node.into())?;
42 self.data_for_rev(file_rev)
44 self.data_for_rev(file_rev.into())
43 45 }
44 46
45 47 /// The given revision is that of the file as found in a filelog, not of a
46 48 /// changeset.
47 49 pub fn data_for_rev(
48 50 &self,
49 file_rev: Revision,
51 file_rev: UncheckedRevision,
50 52 ) -> Result<FilelogRevisionData, RevlogError> {
51 53 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
52 54 Ok(FilelogRevisionData(data))
53 55 }
54 56
55 57 /// The given node ID is that of the file as found in a filelog, not of a
56 58 /// changeset.
57 59 pub fn entry_for_node(
58 60 &self,
59 61 file_node: impl Into<NodePrefix>,
60 62 ) -> Result<FilelogEntry, RevlogError> {
61 63 let file_rev = self.revlog.rev_from_node(file_node.into())?;
62 self.entry_for_rev(file_rev)
64 self.entry_for_checked_rev(file_rev)
63 65 }
64 66
65 67 /// The given revision is that of the file as found in a filelog, not of a
66 68 /// changeset.
67 69 pub fn entry_for_rev(
68 70 &self,
71 file_rev: UncheckedRevision,
72 ) -> Result<FilelogEntry, RevlogError> {
73 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
74 }
75
76 fn entry_for_checked_rev(
77 &self,
69 78 file_rev: Revision,
70 79 ) -> Result<FilelogEntry, RevlogError> {
71 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
80 Ok(FilelogEntry(
81 self.revlog.get_entry_for_checked_rev(file_rev)?,
82 ))
72 83 }
73 84 }
74 85
75 86 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
76 87 let encoded_bytes =
77 88 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
78 89 get_path_from_bytes(&encoded_bytes).into()
79 90 }
80 91
81 92 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
82 93
83 94 impl FilelogEntry<'_> {
84 95 /// `self.data()` can be expensive, with decompression and delta
85 96 /// resolution.
86 97 ///
87 98 /// *Without* paying this cost, based on revlog index information
88 99 /// including `RevlogEntry::uncompressed_len`:
89 100 ///
90 101 /// * Returns `true` if the length that `self.data().file_data().len()`
91 102 /// would return is definitely **not equal** to `other_len`.
92 103 /// * Returns `false` if available information is inconclusive.
93 104 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
94 105 // Relevant code that implement this behavior in Python code:
95 106 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
96 107 // revlog.size, revlog.rawsize
97 108
98 109 // Let’s call `file_data_len` what would be returned by
99 110 // `self.data().file_data().len()`.
100 111
101 112 if self.0.is_censored() {
102 113 let file_data_len = 0;
103 114 return other_len != file_data_len;
104 115 }
105 116
106 117 if self.0.has_length_affecting_flag_processor() {
107 118 // We can’t conclude anything about `file_data_len`.
108 119 return false;
109 120 }
110 121
111 122 // Revlog revisions (usually) have metadata for the size of
112 123 // their data after decompression and delta resolution
113 124 // as would be returned by `Revlog::get_rev_data`.
114 125 //
115 126 // For filelogs this is the file’s contents preceded by an optional
116 127 // metadata block.
117 128 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
118 129 l as u64
119 130 } else {
120 131 // The field was set to -1, the actual uncompressed len is unknown.
121 132 // We need to decompress to say more.
122 133 return false;
123 134 };
124 135 // `uncompressed_len = file_data_len + optional_metadata_len`,
125 136 // so `file_data_len <= uncompressed_len`.
126 137 if uncompressed_len < other_len {
127 138 // Transitively, `file_data_len < other_len`.
128 139 // So `other_len != file_data_len` definitely.
129 140 return true;
130 141 }
131 142
132 143 if uncompressed_len == other_len + 4 {
133 144 // It’s possible that `file_data_len == other_len` with an empty
134 145 // metadata block (2 start marker bytes + 2 end marker bytes).
135 146 // This happens when there wouldn’t otherwise be metadata, but
136 147 // the first 2 bytes of file data happen to match a start marker
137 148 // and would be ambiguous.
138 149 return false;
139 150 }
140 151
141 152 if !self.0.has_p1() {
142 153 // There may or may not be copy metadata, so we can’t deduce more
143 154 // about `file_data_len` without computing file data.
144 155 return false;
145 156 }
146 157
147 158 // Filelog ancestry is not meaningful in the way changelog ancestry is.
148 159 // It only provides hints to delta generation.
149 160 // p1 and p2 are set to null when making a copy or rename since
150 161 // contents are likely unrelatedto what might have previously existed
151 162 // at the destination path.
152 163 //
153 164 // Conversely, since here p1 is non-null, there is no copy metadata.
154 165 // Note that this reasoning may be invalidated in the presence of
155 166 // merges made by some previous versions of Mercurial that
156 167 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
157 168 // and `tests/test-issue6528.t`.
158 169 //
159 170 // Since copy metadata is currently the only kind of metadata
160 171 // kept in revlog data of filelogs,
161 172 // this `FilelogEntry` does not have such metadata:
162 173 let file_data_len = uncompressed_len;
163 174
164 175 file_data_len != other_len
165 176 }
166 177
167 178 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
168 Ok(FilelogRevisionData(self.0.data()?.into_owned()))
179 let data = self.0.data();
180 match data {
181 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
182 // Errors other than `HgError` should not happen at this point
183 Err(e) => match e {
184 RevlogError::Other(hg_error) => Err(hg_error),
185 revlog_error => Err(HgError::abort(
186 revlog_error.to_string(),
187 exit_codes::ABORT,
188 None,
189 )),
190 },
191 }
169 192 }
170 193 }
171 194
172 195 /// The data for one revision in a filelog, uncompressed and delta-resolved.
173 196 pub struct FilelogRevisionData(Vec<u8>);
174 197
175 198 impl FilelogRevisionData {
176 199 /// Split into metadata and data
177 200 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
178 201 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
179 202
180 203 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
181 204 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
182 205 Ok((Some(metadata), data))
183 206 } else {
184 207 Err(HgError::corrupted(
185 208 "Missing metadata end delimiter in filelog entry",
186 209 ))
187 210 }
188 211 } else {
189 212 Ok((None, &self.0))
190 213 }
191 214 }
192 215
193 216 /// Returns the file contents at this revision, stripped of any metadata
194 217 pub fn file_data(&self) -> Result<&[u8], HgError> {
195 218 let (_metadata, data) = self.split()?;
196 219 Ok(data)
197 220 }
198 221
199 222 /// Consume the entry, and convert it into data, discarding any metadata,
200 223 /// if present.
201 224 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
202 225 if let (Some(_metadata), data) = self.split()? {
203 226 Ok(data.to_owned())
204 227 } else {
205 228 Ok(self.0)
206 229 }
207 230 }
208 231 }
@@ -1,615 +1,622 b''
1 use std::fmt::Debug;
1 2 use std::ops::Deref;
2 3
3 4 use byteorder::{BigEndian, ByteOrder};
4 5
5 6 use crate::errors::HgError;
6 7 use crate::revlog::node::Node;
7 8 use crate::revlog::{Revision, NULL_REVISION};
9 use crate::UncheckedRevision;
8 10
9 11 pub const INDEX_ENTRY_SIZE: usize = 64;
10 12
11 13 pub struct IndexHeader {
12 14 header_bytes: [u8; 4],
13 15 }
14 16
15 17 #[derive(Copy, Clone)]
16 18 pub struct IndexHeaderFlags {
17 19 flags: u16,
18 20 }
19 21
20 22 /// Corresponds to the high bits of `_format_flags` in python
21 23 impl IndexHeaderFlags {
22 24 /// Corresponds to FLAG_INLINE_DATA in python
23 25 pub fn is_inline(self) -> bool {
24 26 self.flags & 1 != 0
25 27 }
26 28 /// Corresponds to FLAG_GENERALDELTA in python
27 29 pub fn uses_generaldelta(self) -> bool {
28 30 self.flags & 2 != 0
29 31 }
30 32 }
31 33
32 34 /// Corresponds to the INDEX_HEADER structure,
33 35 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
34 36 impl IndexHeader {
35 37 fn format_flags(&self) -> IndexHeaderFlags {
36 38 // No "unknown flags" check here, unlike in python. Maybe there should
37 39 // be.
38 40 IndexHeaderFlags {
39 41 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
40 42 }
41 43 }
42 44
43 45 /// The only revlog version currently supported by rhg.
44 46 const REVLOGV1: u16 = 1;
45 47
46 48 /// Corresponds to `_format_version` in Python.
47 49 fn format_version(&self) -> u16 {
48 50 BigEndian::read_u16(&self.header_bytes[2..4])
49 51 }
50 52
51 53 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
52 54 // We treat an empty file as a valid index with no entries.
53 55 // Here we make an arbitrary choice of what we assume the format of the
54 56 // index to be (V1, using generaldelta).
55 57 // This doesn't matter too much, since we're only doing read-only
56 58 // access. but the value corresponds to the `new_header` variable in
57 59 // `revlog.py`, `_loadindex`
58 60 header_bytes: [0, 3, 0, 1],
59 61 };
60 62
61 63 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
62 64 if index_bytes.is_empty() {
63 65 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
64 66 }
65 67 if index_bytes.len() < 4 {
66 68 return Err(HgError::corrupted(
67 69 "corrupted revlog: can't read the index format header",
68 70 ));
69 71 }
70 72 Ok(IndexHeader {
71 73 header_bytes: {
72 74 let bytes: [u8; 4] =
73 75 index_bytes[0..4].try_into().expect("impossible");
74 76 bytes
75 77 },
76 78 })
77 79 }
78 80 }
79 81
80 82 /// A Revlog index
81 83 pub struct Index {
82 84 bytes: Box<dyn Deref<Target = [u8]> + Send>,
83 85 /// Offsets of starts of index blocks.
84 86 /// Only needed when the index is interleaved with data.
85 87 offsets: Option<Vec<usize>>,
86 88 uses_generaldelta: bool,
87 89 }
88 90
91 impl Debug for Index {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 f.debug_struct("Index")
94 .field("offsets", &self.offsets)
95 .field("uses_generaldelta", &self.uses_generaldelta)
96 .finish()
97 }
98 }
99
89 100 impl Index {
90 101 /// Create an index from bytes.
91 102 /// Calculate the start of each entry when is_inline is true.
92 103 pub fn new(
93 104 bytes: Box<dyn Deref<Target = [u8]> + Send>,
94 105 ) -> Result<Self, HgError> {
95 106 let header = IndexHeader::parse(bytes.as_ref())?;
96 107
97 108 if header.format_version() != IndexHeader::REVLOGV1 {
98 109 // A proper new version should have had a repo/store
99 110 // requirement.
100 111 return Err(HgError::corrupted("unsupported revlog version"));
101 112 }
102 113
103 114 // This is only correct because we know version is REVLOGV1.
104 115 // In v2 we always use generaldelta, while in v0 we never use
105 116 // generaldelta. Similar for [is_inline] (it's only used in v1).
106 117 let uses_generaldelta = header.format_flags().uses_generaldelta();
107 118
108 119 if header.format_flags().is_inline() {
109 120 let mut offset: usize = 0;
110 121 let mut offsets = Vec::new();
111 122
112 123 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
113 124 offsets.push(offset);
114 125 let end = offset + INDEX_ENTRY_SIZE;
115 126 let entry = IndexEntry {
116 127 bytes: &bytes[offset..end],
117 128 offset_override: None,
118 129 };
119 130
120 131 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
121 132 }
122 133
123 134 if offset == bytes.len() {
124 135 Ok(Self {
125 136 bytes,
126 137 offsets: Some(offsets),
127 138 uses_generaldelta,
128 139 })
129 140 } else {
130 141 Err(HgError::corrupted("unexpected inline revlog length"))
131 142 }
132 143 } else {
133 144 Ok(Self {
134 145 bytes,
135 146 offsets: None,
136 147 uses_generaldelta,
137 148 })
138 149 }
139 150 }
140 151
141 152 pub fn uses_generaldelta(&self) -> bool {
142 153 self.uses_generaldelta
143 154 }
144 155
145 156 /// Value of the inline flag.
146 157 pub fn is_inline(&self) -> bool {
147 158 self.offsets.is_some()
148 159 }
149 160
150 161 /// Return a slice of bytes if `revlog` is inline. Panic if not.
151 162 pub fn data(&self, start: usize, end: usize) -> &[u8] {
152 163 if !self.is_inline() {
153 164 panic!("tried to access data in the index of a revlog that is not inline");
154 165 }
155 166 &self.bytes[start..end]
156 167 }
157 168
158 169 /// Return number of entries of the revlog index.
159 170 pub fn len(&self) -> usize {
160 171 if let Some(offsets) = &self.offsets {
161 172 offsets.len()
162 173 } else {
163 174 self.bytes.len() / INDEX_ENTRY_SIZE
164 175 }
165 176 }
166 177
167 178 /// Returns `true` if the `Index` has zero `entries`.
168 179 pub fn is_empty(&self) -> bool {
169 180 self.len() == 0
170 181 }
171 182
172 183 /// Return the index entry corresponding to the given revision if it
173 184 /// exists.
174 185 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
175 186 if rev == NULL_REVISION {
176 187 return None;
177 188 }
178 if let Some(offsets) = &self.offsets {
189 Some(if let Some(offsets) = &self.offsets {
179 190 self.get_entry_inline(rev, offsets)
180 191 } else {
181 192 self.get_entry_separated(rev)
182 }
193 })
183 194 }
184 195
185 196 fn get_entry_inline(
186 197 &self,
187 198 rev: Revision,
188 199 offsets: &[usize],
189 ) -> Option<IndexEntry> {
190 let start = *offsets.get(rev as usize)?;
191 let end = start.checked_add(INDEX_ENTRY_SIZE)?;
200 ) -> IndexEntry {
201 let start = offsets[rev as usize];
202 let end = start + INDEX_ENTRY_SIZE;
192 203 let bytes = &self.bytes[start..end];
193 204
194 205 // See IndexEntry for an explanation of this override.
195 206 let offset_override = Some(end);
196 207
197 Some(IndexEntry {
208 IndexEntry {
198 209 bytes,
199 210 offset_override,
200 })
211 }
201 212 }
202 213
203 fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> {
204 let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE;
205 if rev as usize >= max_rev {
206 return None;
207 }
214 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
208 215 let start = rev as usize * INDEX_ENTRY_SIZE;
209 216 let end = start + INDEX_ENTRY_SIZE;
210 217 let bytes = &self.bytes[start..end];
211 218
212 219 // Override the offset of the first revision as its bytes are used
213 220 // for the index's metadata (saving space because it is always 0)
214 221 let offset_override = if rev == 0 { Some(0) } else { None };
215 222
216 Some(IndexEntry {
223 IndexEntry {
217 224 bytes,
218 225 offset_override,
219 })
226 }
220 227 }
221 228 }
222 229
223 230 impl super::RevlogIndex for Index {
224 231 fn len(&self) -> usize {
225 232 self.len()
226 233 }
227 234
228 235 fn node(&self, rev: Revision) -> Option<&Node> {
229 236 self.get_entry(rev).map(|entry| entry.hash())
230 237 }
231 238 }
232 239
233 240 #[derive(Debug)]
234 241 pub struct IndexEntry<'a> {
235 242 bytes: &'a [u8],
236 243 /// Allows to override the offset value of the entry.
237 244 ///
238 245 /// For interleaved index and data, the offset stored in the index
239 246 /// corresponds to the separated data offset.
240 247 /// It has to be overridden with the actual offset in the interleaved
241 248 /// index which is just after the index block.
242 249 ///
243 250 /// For separated index and data, the offset stored in the first index
244 251 /// entry is mixed with the index headers.
245 252 /// It has to be overridden with 0.
246 253 offset_override: Option<usize>,
247 254 }
248 255
249 256 impl<'a> IndexEntry<'a> {
250 257 /// Return the offset of the data.
251 258 pub fn offset(&self) -> usize {
252 259 if let Some(offset_override) = self.offset_override {
253 260 offset_override
254 261 } else {
255 262 let mut bytes = [0; 8];
256 263 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
257 264 BigEndian::read_u64(&bytes[..]) as usize
258 265 }
259 266 }
260 267
261 268 pub fn flags(&self) -> u16 {
262 269 BigEndian::read_u16(&self.bytes[6..=7])
263 270 }
264 271
265 272 /// Return the compressed length of the data.
266 273 pub fn compressed_len(&self) -> u32 {
267 274 BigEndian::read_u32(&self.bytes[8..=11])
268 275 }
269 276
270 277 /// Return the uncompressed length of the data.
271 278 pub fn uncompressed_len(&self) -> i32 {
272 279 BigEndian::read_i32(&self.bytes[12..=15])
273 280 }
274 281
275 282 /// Return the revision upon which the data has been derived.
276 pub fn base_revision_or_base_of_delta_chain(&self) -> Revision {
283 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
277 284 // TODO Maybe return an Option when base_revision == rev?
278 285 // Requires to add rev to IndexEntry
279 286
280 BigEndian::read_i32(&self.bytes[16..])
287 BigEndian::read_i32(&self.bytes[16..]).into()
281 288 }
282 289
283 pub fn link_revision(&self) -> Revision {
284 BigEndian::read_i32(&self.bytes[20..])
290 pub fn link_revision(&self) -> UncheckedRevision {
291 BigEndian::read_i32(&self.bytes[20..]).into()
285 292 }
286 293
287 pub fn p1(&self) -> Revision {
288 BigEndian::read_i32(&self.bytes[24..])
294 pub fn p1(&self) -> UncheckedRevision {
295 BigEndian::read_i32(&self.bytes[24..]).into()
289 296 }
290 297
291 pub fn p2(&self) -> Revision {
292 BigEndian::read_i32(&self.bytes[28..])
298 pub fn p2(&self) -> UncheckedRevision {
299 BigEndian::read_i32(&self.bytes[28..]).into()
293 300 }
294 301
295 302 /// Return the hash of revision's full text.
296 303 ///
297 304 /// Currently, SHA-1 is used and only the first 20 bytes of this field
298 305 /// are used.
299 306 pub fn hash(&self) -> &'a Node {
300 307 (&self.bytes[32..52]).try_into().unwrap()
301 308 }
302 309 }
303 310
304 311 #[cfg(test)]
305 312 mod tests {
306 313 use super::*;
307 314 use crate::node::NULL_NODE;
308 315
309 316 #[cfg(test)]
310 317 #[derive(Debug, Copy, Clone)]
311 318 pub struct IndexEntryBuilder {
312 319 is_first: bool,
313 320 is_inline: bool,
314 321 is_general_delta: bool,
315 322 version: u16,
316 323 offset: usize,
317 324 compressed_len: usize,
318 325 uncompressed_len: usize,
319 326 base_revision_or_base_of_delta_chain: Revision,
320 327 link_revision: Revision,
321 328 p1: Revision,
322 329 p2: Revision,
323 330 node: Node,
324 331 }
325 332
326 333 #[cfg(test)]
327 334 impl IndexEntryBuilder {
328 335 #[allow(clippy::new_without_default)]
329 336 pub fn new() -> Self {
330 337 Self {
331 338 is_first: false,
332 339 is_inline: false,
333 340 is_general_delta: true,
334 341 version: 1,
335 342 offset: 0,
336 343 compressed_len: 0,
337 344 uncompressed_len: 0,
338 345 base_revision_or_base_of_delta_chain: 0,
339 346 link_revision: 0,
340 347 p1: NULL_REVISION,
341 348 p2: NULL_REVISION,
342 349 node: NULL_NODE,
343 350 }
344 351 }
345 352
346 353 pub fn is_first(&mut self, value: bool) -> &mut Self {
347 354 self.is_first = value;
348 355 self
349 356 }
350 357
351 358 pub fn with_inline(&mut self, value: bool) -> &mut Self {
352 359 self.is_inline = value;
353 360 self
354 361 }
355 362
356 363 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
357 364 self.is_general_delta = value;
358 365 self
359 366 }
360 367
361 368 pub fn with_version(&mut self, value: u16) -> &mut Self {
362 369 self.version = value;
363 370 self
364 371 }
365 372
366 373 pub fn with_offset(&mut self, value: usize) -> &mut Self {
367 374 self.offset = value;
368 375 self
369 376 }
370 377
371 378 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
372 379 self.compressed_len = value;
373 380 self
374 381 }
375 382
376 383 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
377 384 self.uncompressed_len = value;
378 385 self
379 386 }
380 387
381 388 pub fn with_base_revision_or_base_of_delta_chain(
382 389 &mut self,
383 390 value: Revision,
384 391 ) -> &mut Self {
385 392 self.base_revision_or_base_of_delta_chain = value;
386 393 self
387 394 }
388 395
389 396 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
390 397 self.link_revision = value;
391 398 self
392 399 }
393 400
394 401 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
395 402 self.p1 = value;
396 403 self
397 404 }
398 405
399 406 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
400 407 self.p2 = value;
401 408 self
402 409 }
403 410
404 411 pub fn with_node(&mut self, value: Node) -> &mut Self {
405 412 self.node = value;
406 413 self
407 414 }
408 415
409 416 pub fn build(&self) -> Vec<u8> {
410 417 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
411 418 if self.is_first {
412 419 bytes.extend(&match (self.is_general_delta, self.is_inline) {
413 420 (false, false) => [0u8, 0],
414 421 (false, true) => [0u8, 1],
415 422 (true, false) => [0u8, 2],
416 423 (true, true) => [0u8, 3],
417 424 });
418 425 bytes.extend(&self.version.to_be_bytes());
419 426 // Remaining offset bytes.
420 427 bytes.extend(&[0u8; 2]);
421 428 } else {
422 429 // Offset stored on 48 bits (6 bytes)
423 430 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
424 431 }
425 432 bytes.extend(&[0u8; 2]); // Revision flags.
426 433 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
427 434 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
428 435 bytes.extend(
429 436 &self.base_revision_or_base_of_delta_chain.to_be_bytes(),
430 437 );
431 438 bytes.extend(&self.link_revision.to_be_bytes());
432 439 bytes.extend(&self.p1.to_be_bytes());
433 440 bytes.extend(&self.p2.to_be_bytes());
434 441 bytes.extend(self.node.as_bytes());
435 442 bytes.extend(vec![0u8; 12]);
436 443 bytes
437 444 }
438 445 }
439 446
440 447 pub fn is_inline(index_bytes: &[u8]) -> bool {
441 448 IndexHeader::parse(index_bytes)
442 449 .expect("too short")
443 450 .format_flags()
444 451 .is_inline()
445 452 }
446 453
447 454 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
448 455 IndexHeader::parse(index_bytes)
449 456 .expect("too short")
450 457 .format_flags()
451 458 .uses_generaldelta()
452 459 }
453 460
454 461 pub fn get_version(index_bytes: &[u8]) -> u16 {
455 462 IndexHeader::parse(index_bytes)
456 463 .expect("too short")
457 464 .format_version()
458 465 }
459 466
460 467 #[test]
461 468 fn flags_when_no_inline_flag_test() {
462 469 let bytes = IndexEntryBuilder::new()
463 470 .is_first(true)
464 471 .with_general_delta(false)
465 472 .with_inline(false)
466 473 .build();
467 474
468 475 assert!(!is_inline(&bytes));
469 476 assert!(!uses_generaldelta(&bytes));
470 477 }
471 478
472 479 #[test]
473 480 fn flags_when_inline_flag_test() {
474 481 let bytes = IndexEntryBuilder::new()
475 482 .is_first(true)
476 483 .with_general_delta(false)
477 484 .with_inline(true)
478 485 .build();
479 486
480 487 assert!(is_inline(&bytes));
481 488 assert!(!uses_generaldelta(&bytes));
482 489 }
483 490
484 491 #[test]
485 492 fn flags_when_inline_and_generaldelta_flags_test() {
486 493 let bytes = IndexEntryBuilder::new()
487 494 .is_first(true)
488 495 .with_general_delta(true)
489 496 .with_inline(true)
490 497 .build();
491 498
492 499 assert!(is_inline(&bytes));
493 500 assert!(uses_generaldelta(&bytes));
494 501 }
495 502
496 503 #[test]
497 504 fn test_offset() {
498 505 let bytes = IndexEntryBuilder::new().with_offset(1).build();
499 506 let entry = IndexEntry {
500 507 bytes: &bytes,
501 508 offset_override: None,
502 509 };
503 510
504 511 assert_eq!(entry.offset(), 1)
505 512 }
506 513
507 514 #[test]
508 515 fn test_with_overridden_offset() {
509 516 let bytes = IndexEntryBuilder::new().with_offset(1).build();
510 517 let entry = IndexEntry {
511 518 bytes: &bytes,
512 519 offset_override: Some(2),
513 520 };
514 521
515 522 assert_eq!(entry.offset(), 2)
516 523 }
517 524
518 525 #[test]
519 526 fn test_compressed_len() {
520 527 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
521 528 let entry = IndexEntry {
522 529 bytes: &bytes,
523 530 offset_override: None,
524 531 };
525 532
526 533 assert_eq!(entry.compressed_len(), 1)
527 534 }
528 535
529 536 #[test]
530 537 fn test_uncompressed_len() {
531 538 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
532 539 let entry = IndexEntry {
533 540 bytes: &bytes,
534 541 offset_override: None,
535 542 };
536 543
537 544 assert_eq!(entry.uncompressed_len(), 1)
538 545 }
539 546
540 547 #[test]
541 548 fn test_base_revision_or_base_of_delta_chain() {
542 549 let bytes = IndexEntryBuilder::new()
543 550 .with_base_revision_or_base_of_delta_chain(1)
544 551 .build();
545 552 let entry = IndexEntry {
546 553 bytes: &bytes,
547 554 offset_override: None,
548 555 };
549 556
550 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1)
557 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
551 558 }
552 559
553 560 #[test]
554 561 fn link_revision_test() {
555 562 let bytes = IndexEntryBuilder::new().with_link_revision(123).build();
556 563
557 564 let entry = IndexEntry {
558 565 bytes: &bytes,
559 566 offset_override: None,
560 567 };
561 568
562 assert_eq!(entry.link_revision(), 123);
569 assert_eq!(entry.link_revision(), 123.into());
563 570 }
564 571
565 572 #[test]
566 573 fn p1_test() {
567 574 let bytes = IndexEntryBuilder::new().with_p1(123).build();
568 575
569 576 let entry = IndexEntry {
570 577 bytes: &bytes,
571 578 offset_override: None,
572 579 };
573 580
574 assert_eq!(entry.p1(), 123);
581 assert_eq!(entry.p1(), 123.into());
575 582 }
576 583
577 584 #[test]
578 585 fn p2_test() {
579 586 let bytes = IndexEntryBuilder::new().with_p2(123).build();
580 587
581 588 let entry = IndexEntry {
582 589 bytes: &bytes,
583 590 offset_override: None,
584 591 };
585 592
586 assert_eq!(entry.p2(), 123);
593 assert_eq!(entry.p2(), 123.into());
587 594 }
588 595
589 596 #[test]
590 597 fn node_test() {
591 598 let node = Node::from_hex("0123456789012345678901234567890123456789")
592 599 .unwrap();
593 600 let bytes = IndexEntryBuilder::new().with_node(node).build();
594 601
595 602 let entry = IndexEntry {
596 603 bytes: &bytes,
597 604 offset_override: None,
598 605 };
599 606
600 607 assert_eq!(*entry.hash(), node);
601 608 }
602 609
603 610 #[test]
604 611 fn version_test() {
605 612 let bytes = IndexEntryBuilder::new()
606 613 .is_first(true)
607 614 .with_version(2)
608 615 .build();
609 616
610 617 assert_eq!(get_version(&bytes), 2)
611 618 }
612 619 }
613 620
614 621 #[cfg(test)]
615 622 pub use tests::IndexEntryBuilder;
@@ -1,194 +1,203 b''
1 1 use crate::errors::HgError;
2 use crate::revlog::Revision;
3 2 use crate::revlog::{Node, NodePrefix};
4 3 use crate::revlog::{Revlog, RevlogError};
5 4 use crate::utils::hg_path::HgPath;
6 5 use crate::utils::SliceExt;
7 6 use crate::vfs::Vfs;
7 use crate::{Revision, UncheckedRevision};
8 8
9 9 /// A specialized `Revlog` to work with `manifest` data format.
10 10 pub struct Manifestlog {
11 11 /// The generic `revlog` format.
12 12 revlog: Revlog,
13 13 }
14 14
15 15 impl Manifestlog {
16 16 /// Open the `manifest` of a repository given by its root.
17 17 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
18 18 let revlog =
19 19 Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
20 20 Ok(Self { revlog })
21 21 }
22 22
23 23 /// Return the `Manifest` for the given node ID.
24 24 ///
25 25 /// Note: this is a node ID in the manifestlog, typically found through
26 26 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
27 27 /// changeset.
28 28 ///
29 29 /// See also `Repo::manifest_for_node`
30 30 pub fn data_for_node(
31 31 &self,
32 32 node: NodePrefix,
33 33 ) -> Result<Manifest, RevlogError> {
34 34 let rev = self.revlog.rev_from_node(node)?;
35 self.data_for_rev(rev)
35 self.data_for_checked_rev(rev)
36 36 }
37 37
38 38 /// Return the `Manifest` of a given revision number.
39 39 ///
40 40 /// Note: this is a revision number in the manifestlog, *not* of any
41 41 /// changeset.
42 42 ///
43 43 /// See also `Repo::manifest_for_rev`
44 44 pub fn data_for_rev(
45 45 &self,
46 rev: UncheckedRevision,
47 ) -> Result<Manifest, RevlogError> {
48 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
49 Ok(Manifest { bytes })
50 }
51
52 pub fn data_for_checked_rev(
53 &self,
46 54 rev: Revision,
47 55 ) -> Result<Manifest, RevlogError> {
48 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
56 let bytes =
57 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
49 58 Ok(Manifest { bytes })
50 59 }
51 60 }
52 61
53 62 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
54 63 #[derive(Debug)]
55 64 pub struct Manifest {
56 65 /// Format for a manifest: flat sequence of variable-size entries,
57 66 /// sorted by path, each as:
58 67 ///
59 68 /// ```text
60 69 /// <path> \0 <hex_node_id> <flags> \n
61 70 /// ```
62 71 ///
63 72 /// The last entry is also terminated by a newline character.
64 73 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
65 74 bytes: Vec<u8>,
66 75 }
67 76
68 77 impl Manifest {
69 78 pub fn iter(
70 79 &self,
71 80 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
72 81 self.bytes
73 82 .split(|b| b == &b'\n')
74 83 .filter(|line| !line.is_empty())
75 84 .map(ManifestEntry::from_raw)
76 85 }
77 86
78 87 /// If the given path is in this manifest, return its filelog node ID
79 88 pub fn find_by_path(
80 89 &self,
81 90 path: &HgPath,
82 91 ) -> Result<Option<ManifestEntry>, HgError> {
83 92 use std::cmp::Ordering::*;
84 93 let path = path.as_bytes();
85 94 // Both boundaries of this `&[u8]` slice are always at the boundary of
86 95 // an entry
87 96 let mut bytes = &*self.bytes;
88 97
89 98 // Binary search algorithm derived from `[T]::binary_search_by`
90 99 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
91 100 // except we don’t have a slice of entries. Instead we jump to the
92 101 // middle of the byte slice and look around for entry delimiters
93 102 // (newlines).
94 103 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
95 104 let (entry_path, rest) =
96 105 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
97 106 let cmp = entry_path.cmp(path);
98 107 if cmp == Less {
99 108 let after_newline = entry_range.end + 1;
100 109 bytes = &bytes[after_newline..];
101 110 } else if cmp == Greater {
102 111 bytes = &bytes[..entry_range.start];
103 112 } else {
104 113 return Ok(Some(ManifestEntry::from_path_and_rest(
105 114 entry_path, rest,
106 115 )));
107 116 }
108 117 }
109 118 Ok(None)
110 119 }
111 120
112 121 /// If there is at least one, return the byte range of an entry *excluding*
113 122 /// the final newline.
114 123 fn find_entry_near_middle_of(
115 124 bytes: &[u8],
116 125 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
117 126 let len = bytes.len();
118 127 if len > 0 {
119 128 let middle = bytes.len() / 2;
120 129 // Integer division rounds down, so `middle < len`.
121 130 let (before, after) = bytes.split_at(middle);
122 131 let is_newline = |&byte: &u8| byte == b'\n';
123 132 let entry_start = match before.iter().rposition(is_newline) {
124 133 Some(i) => i + 1,
125 134 None => 0, // We choose the first entry in `bytes`
126 135 };
127 136 let entry_end = match after.iter().position(is_newline) {
128 137 Some(i) => {
129 138 // No `+ 1` here to exclude this newline from the range
130 139 middle + i
131 140 }
132 141 None => {
133 142 // In a well-formed manifest:
134 143 //
135 144 // * Since `len > 0`, `bytes` contains at least one entry
136 145 // * Every entry ends with a newline
137 146 // * Since `middle < len`, `after` contains at least the
138 147 // newline at the end of the last entry of `bytes`.
139 148 //
140 149 // We didn’t find a newline, so this manifest is not
141 150 // well-formed.
142 151 return Err(HgError::corrupted(
143 152 "manifest entry without \\n delimiter",
144 153 ));
145 154 }
146 155 };
147 156 Ok(Some(entry_start..entry_end))
148 157 } else {
149 158 // len == 0
150 159 Ok(None)
151 160 }
152 161 }
153 162 }
154 163
155 164 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
156 165 #[derive(Debug)]
157 166 pub struct ManifestEntry<'manifest> {
158 167 pub path: &'manifest HgPath,
159 168 pub hex_node_id: &'manifest [u8],
160 169
161 170 /// `Some` values are b'x', b'l', or 't'
162 171 pub flags: Option<u8>,
163 172 }
164 173
165 174 impl<'a> ManifestEntry<'a> {
166 175 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
167 176 bytes.split_2(b'\0').ok_or_else(|| {
168 177 HgError::corrupted("manifest entry without \\0 delimiter")
169 178 })
170 179 }
171 180
172 181 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
173 182 let (hex_node_id, flags) = match rest.split_last() {
174 183 Some((&b'x', rest)) => (rest, Some(b'x')),
175 184 Some((&b'l', rest)) => (rest, Some(b'l')),
176 185 Some((&b't', rest)) => (rest, Some(b't')),
177 186 _ => (rest, None),
178 187 };
179 188 Self {
180 189 path: HgPath::new(path),
181 190 hex_node_id,
182 191 flags,
183 192 }
184 193 }
185 194
186 195 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
187 196 let (path, rest) = Self::split_path(bytes)?;
188 197 Ok(Self::from_path_and_rest(path, rest))
189 198 }
190 199
191 200 pub fn node_id(&self) -> Result<Node, HgError> {
192 201 Node::from_hex_for_repo(self.hex_node_id)
193 202 }
194 203 }
@@ -1,849 +1,904 b''
1 1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 mod nodemap_docket;
11 11 pub mod path_encode;
12 12 pub use node::{FromHexError, Node, NodePrefix};
13 13 pub mod changelog;
14 14 pub mod filelog;
15 15 pub mod index;
16 16 pub mod manifest;
17 17 pub mod patch;
18 18
19 19 use std::borrow::Cow;
20 20 use std::io::Read;
21 21 use std::ops::Deref;
22 22 use std::path::Path;
23 23
24 24 use flate2::read::ZlibDecoder;
25 25 use sha1::{Digest, Sha1};
26 26 use std::cell::RefCell;
27 27 use zstd;
28 28
29 29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 30 use self::nodemap_docket::NodeMapDocket;
31 31 use super::index::Index;
32 32 use super::nodemap::{NodeMap, NodeMapError};
33 33 use crate::errors::HgError;
34 34 use crate::vfs::Vfs;
35 35
36 36 /// Mercurial revision numbers
37 37 ///
38 38 /// As noted in revlog.c, revision numbers are actually encoded in
39 39 /// 4 bytes, and are liberally converted to ints, whence the i32
40 40 pub type Revision = i32;
41 41
42 42 /// Unchecked Mercurial revision numbers.
43 43 ///
44 44 /// Values of this type have no guarantee of being a valid revision number
45 45 /// in any context. Use method `check_revision` to get a valid revision within
46 46 /// the appropriate index object.
47 47 ///
48 48 /// As noted in revlog.c, revision numbers are actually encoded in
49 49 /// 4 bytes, and are liberally converted to ints, whence the i32
50 pub type UncheckedRevision = i32;
50 #[derive(
51 Debug,
52 derive_more::Display,
53 Clone,
54 Copy,
55 Hash,
56 PartialEq,
57 Eq,
58 PartialOrd,
59 Ord,
60 )]
61 pub struct UncheckedRevision(i32);
62
63 impl From<Revision> for UncheckedRevision {
64 fn from(value: Revision) -> Self {
65 Self(value)
66 }
67 }
51 68
52 69 /// Marker expressing the absence of a parent
53 70 ///
54 71 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
55 72 /// to be smaller than all existing revisions.
56 73 pub const NULL_REVISION: Revision = -1;
57 74
58 75 /// Same as `mercurial.node.wdirrev`
59 76 ///
60 77 /// This is also equal to `i32::max_value()`, but it's better to spell
61 78 /// it out explicitely, same as in `mercurial.node`
62 79 #[allow(clippy::unreadable_literal)]
63 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
80 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
81 UncheckedRevision(0x7fffffff);
64 82
65 83 pub const WORKING_DIRECTORY_HEX: &str =
66 84 "ffffffffffffffffffffffffffffffffffffffff";
67 85
68 86 /// The simplest expression of what we need of Mercurial DAGs.
69 87 pub trait Graph {
70 88 /// Return the two parents of the given `Revision`.
71 89 ///
72 90 /// Each of the parents can be independently `NULL_REVISION`
73 91 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
74 92 }
75 93
76 94 #[derive(Clone, Debug, PartialEq)]
77 95 pub enum GraphError {
78 96 ParentOutOfRange(Revision),
79 97 }
80 98
81 99 /// The Mercurial Revlog Index
82 100 ///
83 101 /// This is currently limited to the minimal interface that is needed for
84 102 /// the [`nodemap`](nodemap/index.html) module
85 103 pub trait RevlogIndex {
86 104 /// Total number of Revisions referenced in this index
87 105 fn len(&self) -> usize;
88 106
89 107 fn is_empty(&self) -> bool {
90 108 self.len() == 0
91 109 }
92 110
93 /// Return a reference to the Node or `None` if rev is out of bounds
94 ///
95 /// `NULL_REVISION` is not considered to be out of bounds.
111 /// Return a reference to the Node or `None` for `NULL_REVISION`
96 112 fn node(&self, rev: Revision) -> Option<&Node>;
97 113
98 114 /// Return a [`Revision`] if `rev` is a valid revision number for this
99 115 /// index
100 116 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
117 let rev = rev.0;
118
101 119 if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) {
102 120 Some(rev)
103 121 } else {
104 122 None
105 123 }
106 124 }
107 125 }
108 126
109 127 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
110 128 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
111 129 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
112 130 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
113 131
114 132 // Keep this in sync with REVIDX_KNOWN_FLAGS in
115 133 // mercurial/revlogutils/flagutil.py
116 134 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
117 135 | REVISION_FLAG_ELLIPSIS
118 136 | REVISION_FLAG_EXTSTORED
119 137 | REVISION_FLAG_HASCOPIESINFO;
120 138
121 139 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
122 140
123 #[derive(Debug, derive_more::From)]
141 #[derive(Debug, derive_more::From, derive_more::Display)]
124 142 pub enum RevlogError {
125 143 InvalidRevision,
126 144 /// Working directory is not supported
127 145 WDirUnsupported,
128 146 /// Found more than one entry whose ID match the requested prefix
129 147 AmbiguousPrefix,
130 148 #[from]
131 149 Other(HgError),
132 150 }
133 151
134 152 impl From<NodeMapError> for RevlogError {
135 153 fn from(error: NodeMapError) -> Self {
136 154 match error {
137 155 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
138 156 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
139 157 format!("nodemap point to revision {} not in index", rev),
140 158 ),
141 159 }
142 160 }
143 161 }
144 162
145 163 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
146 164 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
147 165 }
148 166
149 167 impl RevlogError {
150 168 fn corrupted<S: AsRef<str>>(context: S) -> Self {
151 169 RevlogError::Other(corrupted(context))
152 170 }
153 171 }
154 172
155 173 /// Read only implementation of revlog.
156 174 pub struct Revlog {
157 175 /// When index and data are not interleaved: bytes of the revlog index.
158 176 /// When index and data are interleaved: bytes of the revlog index and
159 177 /// data.
160 178 index: Index,
161 179 /// When index and data are not interleaved: bytes of the revlog data
162 180 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
163 181 /// When present on disk: the persistent nodemap for this revlog
164 182 nodemap: Option<nodemap::NodeTree>,
165 183 }
166 184
167 185 impl Revlog {
168 186 /// Open a revlog index file.
169 187 ///
170 188 /// It will also open the associated data file if index and data are not
171 189 /// interleaved.
172 190 pub fn open(
173 191 store_vfs: &Vfs,
174 192 index_path: impl AsRef<Path>,
175 193 data_path: Option<&Path>,
176 194 use_nodemap: bool,
177 195 ) -> Result<Self, HgError> {
178 196 let index_path = index_path.as_ref();
179 197 let index = {
180 198 match store_vfs.mmap_open_opt(&index_path)? {
181 199 None => Index::new(Box::new(vec![])),
182 200 Some(index_mmap) => {
183 201 let index = Index::new(Box::new(index_mmap))?;
184 202 Ok(index)
185 203 }
186 204 }
187 205 }?;
188 206
189 207 let default_data_path = index_path.with_extension("d");
190 208
191 209 // type annotation required
192 210 // won't recognize Mmap as Deref<Target = [u8]>
193 211 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
194 212 if index.is_inline() {
195 213 None
196 214 } else {
197 215 let data_path = data_path.unwrap_or(&default_data_path);
198 216 let data_mmap = store_vfs.mmap_open(data_path)?;
199 217 Some(Box::new(data_mmap))
200 218 };
201 219
202 220 let nodemap = if index.is_inline() || !use_nodemap {
203 221 None
204 222 } else {
205 223 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
206 224 |(docket, data)| {
207 225 nodemap::NodeTree::load_bytes(
208 226 Box::new(data),
209 227 docket.data_length,
210 228 )
211 229 },
212 230 )
213 231 };
214 232
215 233 Ok(Revlog {
216 234 index,
217 235 data_bytes,
218 236 nodemap,
219 237 })
220 238 }
221 239
222 240 /// Return number of entries of the `Revlog`.
223 241 pub fn len(&self) -> usize {
224 242 self.index.len()
225 243 }
226 244
227 245 /// Returns `true` if the `Revlog` has zero `entries`.
228 246 pub fn is_empty(&self) -> bool {
229 247 self.index.is_empty()
230 248 }
231 249
232 250 /// Returns the node ID for the given revision number, if it exists in this
233 251 /// revlog
234 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
235 if rev == NULL_REVISION {
252 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
253 if rev == NULL_REVISION.into() {
236 254 return Some(&NULL_NODE);
237 255 }
256 let rev = self.index.check_revision(rev)?;
238 257 Some(self.index.get_entry(rev)?.hash())
239 258 }
240 259
241 260 /// Return the revision number for the given node ID, if it exists in this
242 261 /// revlog
243 262 pub fn rev_from_node(
244 263 &self,
245 264 node: NodePrefix,
246 265 ) -> Result<Revision, RevlogError> {
247 266 let looked_up = if let Some(nodemap) = &self.nodemap {
248 267 nodemap
249 268 .find_bin(&self.index, node)?
250 269 .ok_or(RevlogError::InvalidRevision)
251 270 } else {
252 271 self.rev_from_node_no_persistent_nodemap(node)
253 272 };
254 273
255 274 if node.is_prefix_of(&NULL_NODE) {
256 275 return match looked_up {
257 276 Ok(_) => Err(RevlogError::AmbiguousPrefix),
258 277 Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
259 278 res => res,
260 279 };
261 280 };
262 281
263 282 looked_up
264 283 }
265 284
266 285 /// Same as `rev_from_node`, without using a persistent nodemap
267 286 ///
268 287 /// This is used as fallback when a persistent nodemap is not present.
269 288 /// This happens when the persistent-nodemap experimental feature is not
270 289 /// enabled, or for small revlogs.
271 290 fn rev_from_node_no_persistent_nodemap(
272 291 &self,
273 292 node: NodePrefix,
274 293 ) -> Result<Revision, RevlogError> {
275 294 // Linear scan of the revlog
276 295 // TODO: consider building a non-persistent nodemap in memory to
277 296 // optimize these cases.
278 297 let mut found_by_prefix = None;
279 298 for rev in (0..self.len() as Revision).rev() {
280 299 let index_entry = self.index.get_entry(rev).ok_or_else(|| {
281 300 HgError::corrupted(
282 301 "revlog references a revision not in the index",
283 302 )
284 303 })?;
285 304 if node == *index_entry.hash() {
286 305 return Ok(rev);
287 306 }
288 307 if node.is_prefix_of(index_entry.hash()) {
289 308 if found_by_prefix.is_some() {
290 309 return Err(RevlogError::AmbiguousPrefix);
291 310 }
292 311 found_by_prefix = Some(rev)
293 312 }
294 313 }
295 314 found_by_prefix.ok_or(RevlogError::InvalidRevision)
296 315 }
297 316
298 317 /// Returns whether the given revision exists in this revlog.
299 pub fn has_rev(&self, rev: Revision) -> bool {
300 self.index.get_entry(rev).is_some()
318 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
319 self.index.check_revision(rev).is_some()
301 320 }
302 321
303 322 /// Return the full data associated to a revision.
304 323 ///
305 324 /// All entries required to build the final data out of deltas will be
306 325 /// retrieved as needed, and the deltas will be applied to the inital
307 326 /// snapshot to rebuild the final data.
308 327 pub fn get_rev_data(
309 328 &self,
329 rev: UncheckedRevision,
330 ) -> Result<Cow<[u8]>, RevlogError> {
331 if rev == NULL_REVISION.into() {
332 return Ok(Cow::Borrowed(&[]));
333 };
334 self.get_entry(rev)?.data()
335 }
336
337 /// [`Self::get_rev_data`] for checked revisions.
338 pub fn get_rev_data_for_checked_rev(
339 &self,
310 340 rev: Revision,
311 341 ) -> Result<Cow<[u8]>, RevlogError> {
312 342 if rev == NULL_REVISION {
313 343 return Ok(Cow::Borrowed(&[]));
314 344 };
315 Ok(self.get_entry(rev)?.data()?)
345 self.get_entry_for_checked_rev(rev)?.data()
316 346 }
317 347
318 348 /// Check the hash of some given data against the recorded hash.
319 349 pub fn check_hash(
320 350 &self,
321 351 p1: Revision,
322 352 p2: Revision,
323 353 expected: &[u8],
324 354 data: &[u8],
325 355 ) -> bool {
326 356 let e1 = self.index.get_entry(p1);
327 357 let h1 = match e1 {
328 358 Some(ref entry) => entry.hash(),
329 359 None => &NULL_NODE,
330 360 };
331 361 let e2 = self.index.get_entry(p2);
332 362 let h2 = match e2 {
333 363 Some(ref entry) => entry.hash(),
334 364 None => &NULL_NODE,
335 365 };
336 366
337 367 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
338 368 }
339 369
340 370 /// Build the full data of a revision out its snapshot
341 371 /// and its deltas.
342 372 fn build_data_from_deltas(
343 373 snapshot: RevlogEntry,
344 374 deltas: &[RevlogEntry],
345 375 ) -> Result<Vec<u8>, HgError> {
346 376 let snapshot = snapshot.data_chunk()?;
347 377 let deltas = deltas
348 378 .iter()
349 379 .rev()
350 380 .map(RevlogEntry::data_chunk)
351 381 .collect::<Result<Vec<_>, _>>()?;
352 382 let patches: Vec<_> =
353 383 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
354 384 let patch = patch::fold_patch_lists(&patches);
355 385 Ok(patch.apply(&snapshot))
356 386 }
357 387
358 388 /// Return the revlog data.
359 389 fn data(&self) -> &[u8] {
360 390 match &self.data_bytes {
361 391 Some(data_bytes) => data_bytes,
362 392 None => panic!(
363 393 "forgot to load the data or trying to access inline data"
364 394 ),
365 395 }
366 396 }
367 397
368 398 pub fn make_null_entry(&self) -> RevlogEntry {
369 399 RevlogEntry {
370 400 revlog: self,
371 401 rev: NULL_REVISION,
372 402 bytes: b"",
373 403 compressed_len: 0,
374 404 uncompressed_len: 0,
375 405 base_rev_or_base_of_delta_chain: None,
376 406 p1: NULL_REVISION,
377 407 p2: NULL_REVISION,
378 408 flags: NULL_REVLOG_ENTRY_FLAGS,
379 409 hash: NULL_NODE,
380 410 }
381 411 }
382 412
383 /// Get an entry of the revlog.
384 pub fn get_entry(
413 fn get_entry_for_checked_rev(
385 414 &self,
386 415 rev: Revision,
387 416 ) -> Result<RevlogEntry, RevlogError> {
388 417 if rev == NULL_REVISION {
389 418 return Ok(self.make_null_entry());
390 419 }
391 420 let index_entry = self
392 421 .index
393 422 .get_entry(rev)
394 423 .ok_or(RevlogError::InvalidRevision)?;
395 424 let start = index_entry.offset();
396 425 let end = start + index_entry.compressed_len() as usize;
397 426 let data = if self.index.is_inline() {
398 427 self.index.data(start, end)
399 428 } else {
400 429 &self.data()[start..end]
401 430 };
431 let base_rev = self
432 .index
433 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
434 .ok_or_else(|| {
435 RevlogError::corrupted(format!(
436 "base revision for rev {} is invalid",
437 rev
438 ))
439 })?;
440 let p1 =
441 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
442 RevlogError::corrupted(format!(
443 "p1 for rev {} is invalid",
444 rev
445 ))
446 })?;
447 let p2 =
448 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
449 RevlogError::corrupted(format!(
450 "p2 for rev {} is invalid",
451 rev
452 ))
453 })?;
402 454 let entry = RevlogEntry {
403 455 revlog: self,
404 456 rev,
405 457 bytes: data,
406 458 compressed_len: index_entry.compressed_len(),
407 459 uncompressed_len: index_entry.uncompressed_len(),
408 base_rev_or_base_of_delta_chain: if index_entry
409 .base_revision_or_base_of_delta_chain()
410 == rev
411 {
460 base_rev_or_base_of_delta_chain: if base_rev == rev {
412 461 None
413 462 } else {
414 Some(index_entry.base_revision_or_base_of_delta_chain())
463 Some(base_rev)
415 464 },
416 p1: index_entry.p1(),
417 p2: index_entry.p2(),
465 p1,
466 p2,
418 467 flags: index_entry.flags(),
419 468 hash: *index_entry.hash(),
420 469 };
421 470 Ok(entry)
422 471 }
423 472
424 /// when resolving internal references within revlog, any errors
425 /// should be reported as corruption, instead of e.g. "invalid revision"
426 fn get_entry_internal(
473 /// Get an entry of the revlog.
474 pub fn get_entry(
427 475 &self,
428 rev: Revision,
429 ) -> Result<RevlogEntry, HgError> {
430 self.get_entry(rev)
431 .map_err(|_| corrupted(format!("revision {} out of range", rev)))
476 rev: UncheckedRevision,
477 ) -> Result<RevlogEntry, RevlogError> {
478 if rev == NULL_REVISION.into() {
479 return Ok(self.make_null_entry());
480 }
481 let rev = self.index.check_revision(rev).ok_or_else(|| {
482 RevlogError::corrupted(format!("rev {} is invalid", rev))
483 })?;
484 self.get_entry_for_checked_rev(rev)
432 485 }
433 486 }
434 487
435 488 /// The revlog entry's bytes and the necessary informations to extract
436 489 /// the entry's data.
437 490 #[derive(Clone)]
438 491 pub struct RevlogEntry<'revlog> {
439 492 revlog: &'revlog Revlog,
440 493 rev: Revision,
441 494 bytes: &'revlog [u8],
442 495 compressed_len: u32,
443 496 uncompressed_len: i32,
444 497 base_rev_or_base_of_delta_chain: Option<Revision>,
445 498 p1: Revision,
446 499 p2: Revision,
447 500 flags: u16,
448 501 hash: Node,
449 502 }
450 503
451 504 thread_local! {
452 505 // seems fine to [unwrap] here: this can only fail due to memory allocation
453 506 // failing, and it's normal for that to cause panic.
454 507 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
455 508 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
456 509 }
457 510
458 511 fn zstd_decompress_to_buffer(
459 512 bytes: &[u8],
460 513 buf: &mut Vec<u8>,
461 514 ) -> Result<usize, std::io::Error> {
462 515 ZSTD_DECODER
463 516 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
464 517 }
465 518
466 519 impl<'revlog> RevlogEntry<'revlog> {
467 520 pub fn revision(&self) -> Revision {
468 521 self.rev
469 522 }
470 523
471 524 pub fn node(&self) -> &Node {
472 525 &self.hash
473 526 }
474 527
475 528 pub fn uncompressed_len(&self) -> Option<u32> {
476 529 u32::try_from(self.uncompressed_len).ok()
477 530 }
478 531
479 532 pub fn has_p1(&self) -> bool {
480 533 self.p1 != NULL_REVISION
481 534 }
482 535
483 536 pub fn p1_entry(
484 537 &self,
485 538 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
486 539 if self.p1 == NULL_REVISION {
487 540 Ok(None)
488 541 } else {
489 Ok(Some(self.revlog.get_entry(self.p1)?))
542 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
490 543 }
491 544 }
492 545
493 546 pub fn p2_entry(
494 547 &self,
495 548 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
496 549 if self.p2 == NULL_REVISION {
497 550 Ok(None)
498 551 } else {
499 Ok(Some(self.revlog.get_entry(self.p2)?))
552 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
500 553 }
501 554 }
502 555
503 556 pub fn p1(&self) -> Option<Revision> {
504 557 if self.p1 == NULL_REVISION {
505 558 None
506 559 } else {
507 560 Some(self.p1)
508 561 }
509 562 }
510 563
511 564 pub fn p2(&self) -> Option<Revision> {
512 565 if self.p2 == NULL_REVISION {
513 566 None
514 567 } else {
515 568 Some(self.p2)
516 569 }
517 570 }
518 571
519 572 pub fn is_censored(&self) -> bool {
520 573 (self.flags & REVISION_FLAG_CENSORED) != 0
521 574 }
522 575
523 576 pub fn has_length_affecting_flag_processor(&self) -> bool {
524 577 // Relevant Python code: revlog.size()
525 578 // note: ELLIPSIS is known to not change the content
526 579 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
527 580 }
528 581
529 582 /// The data for this entry, after resolving deltas if any.
530 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
583 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
531 584 let mut entry = self.clone();
532 585 let mut delta_chain = vec![];
533 586
534 587 // The meaning of `base_rev_or_base_of_delta_chain` depends on
535 588 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
536 589 // `mercurial/revlogutils/constants.py` and the code in
537 590 // [_chaininfo] and in [index_deltachain].
538 591 let uses_generaldelta = self.revlog.index.uses_generaldelta();
539 592 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
540 593 entry = if uses_generaldelta {
541 594 delta_chain.push(entry);
542 self.revlog.get_entry_internal(base_rev)?
595 self.revlog.get_entry_for_checked_rev(base_rev)?
543 596 } else {
544 let base_rev = entry.rev - 1;
597 let base_rev = UncheckedRevision(entry.rev - 1);
545 598 delta_chain.push(entry);
546 self.revlog.get_entry_internal(base_rev)?
599 self.revlog.get_entry(base_rev)?
547 600 };
548 601 }
549 602
550 603 let data = if delta_chain.is_empty() {
551 604 entry.data_chunk()?
552 605 } else {
553 606 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
554 607 };
555 608
556 609 Ok(data)
557 610 }
558 611
559 612 fn check_data(
560 613 &self,
561 614 data: Cow<'revlog, [u8]>,
562 ) -> Result<Cow<'revlog, [u8]>, HgError> {
615 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
563 616 if self.revlog.check_hash(
564 617 self.p1,
565 618 self.p2,
566 619 self.hash.as_bytes(),
567 620 &data,
568 621 ) {
569 622 Ok(data)
570 623 } else {
571 624 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
572 625 return Err(HgError::unsupported(
573 626 "ellipsis revisions are not supported by rhg",
574 ));
627 )
628 .into());
575 629 }
576 630 Err(corrupted(format!(
577 631 "hash check failed for revision {}",
578 632 self.rev
579 )))
633 ))
634 .into())
580 635 }
581 636 }
582 637
583 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
638 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
584 639 let data = self.rawdata()?;
585 640 if self.rev == NULL_REVISION {
586 641 return Ok(data);
587 642 }
588 643 if self.is_censored() {
589 return Err(HgError::CensoredNodeError);
644 return Err(HgError::CensoredNodeError.into());
590 645 }
591 646 self.check_data(data)
592 647 }
593 648
594 649 /// Extract the data contained in the entry.
595 650 /// This may be a delta. (See `is_delta`.)
596 651 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
597 652 if self.bytes.is_empty() {
598 653 return Ok(Cow::Borrowed(&[]));
599 654 }
600 655 match self.bytes[0] {
601 656 // Revision data is the entirety of the entry, including this
602 657 // header.
603 658 b'\0' => Ok(Cow::Borrowed(self.bytes)),
604 659 // Raw revision data follows.
605 660 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
606 661 // zlib (RFC 1950) data.
607 662 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
608 663 // zstd data.
609 664 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
610 665 // A proper new format should have had a repo/store requirement.
611 666 format_type => Err(corrupted(format!(
612 667 "unknown compression header '{}'",
613 668 format_type
614 669 ))),
615 670 }
616 671 }
617 672
618 673 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
619 674 let mut decoder = ZlibDecoder::new(self.bytes);
620 675 if self.is_delta() {
621 676 let mut buf = Vec::with_capacity(self.compressed_len as usize);
622 677 decoder
623 678 .read_to_end(&mut buf)
624 679 .map_err(|e| corrupted(e.to_string()))?;
625 680 Ok(buf)
626 681 } else {
627 682 let cap = self.uncompressed_len.max(0) as usize;
628 683 let mut buf = vec![0; cap];
629 684 decoder
630 685 .read_exact(&mut buf)
631 686 .map_err(|e| corrupted(e.to_string()))?;
632 687 Ok(buf)
633 688 }
634 689 }
635 690
636 691 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
637 692 let cap = self.uncompressed_len.max(0) as usize;
638 693 if self.is_delta() {
639 694 // [cap] is usually an over-estimate of the space needed because
640 695 // it's the length of delta-decoded data, but we're interested
641 696 // in the size of the delta.
642 697 // This means we have to [shrink_to_fit] to avoid holding on
643 698 // to a large chunk of memory, but it also means we must have a
644 699 // fallback branch, for the case when the delta is longer than
645 700 // the original data (surprisingly, this does happen in practice)
646 701 let mut buf = Vec::with_capacity(cap);
647 702 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
648 703 Ok(_) => buf.shrink_to_fit(),
649 704 Err(_) => {
650 705 buf.clear();
651 706 zstd::stream::copy_decode(self.bytes, &mut buf)
652 707 .map_err(|e| corrupted(e.to_string()))?;
653 708 }
654 709 };
655 710 Ok(buf)
656 711 } else {
657 712 let mut buf = Vec::with_capacity(cap);
658 713 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
659 714 .map_err(|e| corrupted(e.to_string()))?;
660 715 if len != self.uncompressed_len as usize {
661 716 Err(corrupted("uncompressed length does not match"))
662 717 } else {
663 718 Ok(buf)
664 719 }
665 720 }
666 721 }
667 722
668 723 /// Tell if the entry is a snapshot or a delta
669 724 /// (influences on decompression).
670 725 fn is_delta(&self) -> bool {
671 726 self.base_rev_or_base_of_delta_chain.is_some()
672 727 }
673 728 }
674 729
675 730 /// Calculate the hash of a revision given its data and its parents.
676 731 fn hash(
677 732 data: &[u8],
678 733 p1_hash: &[u8],
679 734 p2_hash: &[u8],
680 735 ) -> [u8; NODE_BYTES_LENGTH] {
681 736 let mut hasher = Sha1::new();
682 737 let (a, b) = (p1_hash, p2_hash);
683 738 if a > b {
684 739 hasher.update(b);
685 740 hasher.update(a);
686 741 } else {
687 742 hasher.update(a);
688 743 hasher.update(b);
689 744 }
690 745 hasher.update(data);
691 746 *hasher.finalize().as_ref()
692 747 }
693 748
694 749 #[cfg(test)]
695 750 mod tests {
696 751 use super::*;
697 752 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
698 753 use itertools::Itertools;
699 754
700 755 #[test]
701 756 fn test_empty() {
702 757 let temp = tempfile::tempdir().unwrap();
703 758 let vfs = Vfs { base: temp.path() };
704 759 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
705 760 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
706 761 assert!(revlog.is_empty());
707 762 assert_eq!(revlog.len(), 0);
708 assert!(revlog.get_entry(0).is_err());
709 assert!(!revlog.has_rev(0));
763 assert!(revlog.get_entry(0.into()).is_err());
764 assert!(!revlog.has_rev(0.into()));
710 765 assert_eq!(
711 766 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
712 767 NULL_REVISION
713 768 );
714 let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap();
769 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
715 770 assert_eq!(null_entry.revision(), NULL_REVISION);
716 771 assert!(null_entry.data().unwrap().is_empty());
717 772 }
718 773
719 774 #[test]
720 775 fn test_inline() {
721 776 let temp = tempfile::tempdir().unwrap();
722 777 let vfs = Vfs { base: temp.path() };
723 778 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
724 779 .unwrap();
725 780 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
726 781 .unwrap();
727 782 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
728 783 .unwrap();
729 784 let entry0_bytes = IndexEntryBuilder::new()
730 785 .is_first(true)
731 786 .with_version(1)
732 787 .with_inline(true)
733 788 .with_offset(INDEX_ENTRY_SIZE)
734 789 .with_node(node0)
735 790 .build();
736 791 let entry1_bytes = IndexEntryBuilder::new()
737 792 .with_offset(INDEX_ENTRY_SIZE)
738 793 .with_node(node1)
739 794 .build();
740 795 let entry2_bytes = IndexEntryBuilder::new()
741 796 .with_offset(INDEX_ENTRY_SIZE)
742 797 .with_p1(0)
743 798 .with_p2(1)
744 799 .with_node(node2)
745 800 .build();
746 801 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
747 802 .into_iter()
748 803 .flatten()
749 804 .collect_vec();
750 805 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
751 806 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
752 807
753 let entry0 = revlog.get_entry(0).ok().unwrap();
808 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
754 809 assert_eq!(entry0.revision(), 0);
755 810 assert_eq!(*entry0.node(), node0);
756 811 assert!(!entry0.has_p1());
757 812 assert_eq!(entry0.p1(), None);
758 813 assert_eq!(entry0.p2(), None);
759 814 let p1_entry = entry0.p1_entry().unwrap();
760 815 assert!(p1_entry.is_none());
761 816 let p2_entry = entry0.p2_entry().unwrap();
762 817 assert!(p2_entry.is_none());
763 818
764 let entry1 = revlog.get_entry(1).ok().unwrap();
819 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
765 820 assert_eq!(entry1.revision(), 1);
766 821 assert_eq!(*entry1.node(), node1);
767 822 assert!(!entry1.has_p1());
768 823 assert_eq!(entry1.p1(), None);
769 824 assert_eq!(entry1.p2(), None);
770 825 let p1_entry = entry1.p1_entry().unwrap();
771 826 assert!(p1_entry.is_none());
772 827 let p2_entry = entry1.p2_entry().unwrap();
773 828 assert!(p2_entry.is_none());
774 829
775 let entry2 = revlog.get_entry(2).ok().unwrap();
830 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
776 831 assert_eq!(entry2.revision(), 2);
777 832 assert_eq!(*entry2.node(), node2);
778 833 assert!(entry2.has_p1());
779 834 assert_eq!(entry2.p1(), Some(0));
780 835 assert_eq!(entry2.p2(), Some(1));
781 836 let p1_entry = entry2.p1_entry().unwrap();
782 837 assert!(p1_entry.is_some());
783 838 assert_eq!(p1_entry.unwrap().revision(), 0);
784 839 let p2_entry = entry2.p2_entry().unwrap();
785 840 assert!(p2_entry.is_some());
786 841 assert_eq!(p2_entry.unwrap().revision(), 1);
787 842 }
788 843
789 844 #[test]
790 845 fn test_nodemap() {
791 846 let temp = tempfile::tempdir().unwrap();
792 847 let vfs = Vfs { base: temp.path() };
793 848
794 849 // building a revlog with a forced Node starting with zeros
795 850 // This is a corruption, but it does not preclude using the nodemap
796 851 // if we don't try and access the data
797 852 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
798 853 .unwrap();
799 854 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
800 855 .unwrap();
801 856 let entry0_bytes = IndexEntryBuilder::new()
802 857 .is_first(true)
803 858 .with_version(1)
804 859 .with_inline(true)
805 860 .with_offset(INDEX_ENTRY_SIZE)
806 861 .with_node(node0)
807 862 .build();
808 863 let entry1_bytes = IndexEntryBuilder::new()
809 864 .with_offset(INDEX_ENTRY_SIZE)
810 865 .with_node(node1)
811 866 .build();
812 867 let contents = vec![entry0_bytes, entry1_bytes]
813 868 .into_iter()
814 869 .flatten()
815 870 .collect_vec();
816 871 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
817 872 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
818 873
819 874 // accessing the data shows the corruption
820 revlog.get_entry(0).unwrap().data().unwrap_err();
875 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
821 876
822 877 assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
823 878 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
824 879 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
825 880 assert_eq!(
826 881 revlog
827 882 .rev_from_node(NodePrefix::from_hex("000").unwrap())
828 883 .unwrap(),
829 884 -1
830 885 );
831 886 assert_eq!(
832 887 revlog
833 888 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
834 889 .unwrap(),
835 890 1
836 891 );
837 892 // RevlogError does not implement PartialEq
838 893 // (ultimately because io::Error does not)
839 894 match revlog
840 895 .rev_from_node(NodePrefix::from_hex("00").unwrap())
841 896 .expect_err("Expected to give AmbiguousPrefix error")
842 897 {
843 898 RevlogError::AmbiguousPrefix => (),
844 899 e => {
845 900 panic!("Got another error than AmbiguousPrefix: {:?}", e);
846 901 }
847 902 };
848 903 }
849 904 }
@@ -1,1067 +1,1082 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 use crate::UncheckedRevision;
16
15 17 use super::{
16 18 node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
17 19 };
18 20
19 21 use bytes_cast::{unaligned, BytesCast};
20 22 use std::cmp::max;
21 23 use std::fmt;
22 24 use std::mem::{self, align_of, size_of};
23 25 use std::ops::Deref;
24 26 use std::ops::Index;
25 27
26 28 #[derive(Debug, PartialEq)]
27 29 pub enum NodeMapError {
28 30 /// A `NodePrefix` matches several [`Revision`]s.
29 31 ///
30 32 /// This can be returned by methods meant for (at most) one match.
31 33 MultipleResults,
32 34 /// A `Revision` stored in the nodemap could not be found in the index
33 RevisionNotInIndex(Revision),
35 RevisionNotInIndex(UncheckedRevision),
34 36 }
35 37
36 38 /// Mapping system from Mercurial nodes to revision numbers.
37 39 ///
38 40 /// ## `RevlogIndex` and `NodeMap`
39 41 ///
40 42 /// One way to think about their relationship is that
41 43 /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
42 44 /// information carried by a [`RevlogIndex`].
43 45 ///
44 46 /// Many of the methods in this trait take a `RevlogIndex` argument
45 47 /// which is used for validation of their results. This index must naturally
46 48 /// be the one the `NodeMap` is about, and it must be consistent.
47 49 ///
48 50 /// Notably, the `NodeMap` must not store
49 51 /// information about more `Revision` values than there are in the index.
50 52 /// In these methods, an encountered `Revision` is not in the index, a
51 53 /// [RevisionNotInIndex](NodeMapError) error is returned.
52 54 ///
53 55 /// In insert operations, the rule is thus that the `NodeMap` must always
54 56 /// be updated after the `RevlogIndex` it is about.
55 57 pub trait NodeMap {
56 58 /// Find the unique `Revision` having the given `Node`
57 59 ///
58 60 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
59 61 fn find_node(
60 62 &self,
61 63 index: &impl RevlogIndex,
62 64 node: &Node,
63 65 ) -> Result<Option<Revision>, NodeMapError> {
64 66 self.find_bin(index, node.into())
65 67 }
66 68
67 69 /// Find the unique Revision whose `Node` starts with a given binary prefix
68 70 ///
69 71 /// If no Revision matches the given prefix, `Ok(None)` is returned.
70 72 ///
71 73 /// If several Revisions match the given prefix, a
72 74 /// [MultipleResults](NodeMapError) error is returned.
73 75 fn find_bin(
74 76 &self,
75 77 idx: &impl RevlogIndex,
76 78 prefix: NodePrefix,
77 79 ) -> Result<Option<Revision>, NodeMapError>;
78 80
79 81 /// Give the size of the shortest node prefix that determines
80 82 /// the revision uniquely.
81 83 ///
82 84 /// From a binary node prefix, if it is matched in the node map, this
83 85 /// returns the number of hexadecimal digits that would had sufficed
84 86 /// to find the revision uniquely.
85 87 ///
86 88 /// Returns `None` if no [`Revision`] could be found for the prefix.
87 89 ///
88 90 /// If several Revisions match the given prefix, a
89 91 /// [MultipleResults](NodeMapError) error is returned.
90 92 fn unique_prefix_len_bin(
91 93 &self,
92 94 idx: &impl RevlogIndex,
93 95 node_prefix: NodePrefix,
94 96 ) -> Result<Option<usize>, NodeMapError>;
95 97
96 98 /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
97 99 /// a full [`Node`] as input
98 100 fn unique_prefix_len_node(
99 101 &self,
100 102 idx: &impl RevlogIndex,
101 103 node: &Node,
102 104 ) -> Result<Option<usize>, NodeMapError> {
103 105 self.unique_prefix_len_bin(idx, node.into())
104 106 }
105 107 }
106 108
107 109 pub trait MutableNodeMap: NodeMap {
108 110 fn insert<I: RevlogIndex>(
109 111 &mut self,
110 112 index: &I,
111 113 node: &Node,
112 114 rev: Revision,
113 115 ) -> Result<(), NodeMapError>;
114 116 }
115 117
116 118 /// Low level NodeTree [`Block`] elements
117 119 ///
118 120 /// These are exactly as for instance on persistent storage.
119 121 type RawElement = unaligned::I32Be;
120 122
121 123 /// High level representation of values in NodeTree
122 124 /// [`Blocks`](struct.Block.html)
123 125 ///
124 126 /// This is the high level representation that most algorithms should
125 127 /// use.
126 128 #[derive(Clone, Debug, Eq, PartialEq)]
127 129 enum Element {
128 Rev(Revision),
130 // This is not a Mercurial revision. It's a `i32` because this is the
131 // right type for this structure.
132 Rev(i32),
129 133 Block(usize),
130 134 None,
131 135 }
132 136
133 137 impl From<RawElement> for Element {
134 138 /// Conversion from low level representation, after endianness conversion.
135 139 ///
136 140 /// See [`Block`](struct.Block.html) for explanation about the encoding.
137 141 fn from(raw: RawElement) -> Element {
138 142 let int = raw.get();
139 143 if int >= 0 {
140 144 Element::Block(int as usize)
141 145 } else if int == -1 {
142 146 Element::None
143 147 } else {
144 148 Element::Rev(-int - 2)
145 149 }
146 150 }
147 151 }
148 152
149 153 impl From<Element> for RawElement {
150 154 fn from(element: Element) -> RawElement {
151 155 RawElement::from(match element {
152 156 Element::None => 0,
153 157 Element::Block(i) => i as i32,
154 158 Element::Rev(rev) => -rev - 2,
155 159 })
156 160 }
157 161 }
158 162
159 163 const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
160 164
161 165 /// A logical block of the [`NodeTree`], packed with a fixed size.
162 166 ///
163 167 /// These are always used in container types implementing `Index<Block>`,
164 168 /// such as `&Block`
165 169 ///
166 170 /// As an array of integers, its ith element encodes that the
167 171 /// ith potential edge from the block, representing the ith hexadecimal digit
168 172 /// (nybble) `i` is either:
169 173 ///
170 174 /// - absent (value -1)
171 175 /// - another `Block` in the same indexable container (value ≥ 0)
172 176 /// - a [`Revision`] leaf (value ≤ -2)
173 177 ///
174 178 /// Endianness has to be fixed for consistency on shared storage across
175 179 /// different architectures.
176 180 ///
177 181 /// A key difference with the C `nodetree` is that we need to be
178 182 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
179 183 /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
180 184 ///
181 185 /// Another related difference is that `NULL_REVISION` (-1) is not
182 186 /// represented at all, because we want an immutable empty nodetree
183 187 /// to be valid.
184 188 #[derive(Copy, Clone, BytesCast, PartialEq)]
185 189 #[repr(transparent)]
186 190 pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
187 191
188 192 impl Block {
189 193 fn new() -> Self {
190 194 let absent_node = RawElement::from(-1);
191 195 Block([absent_node; ELEMENTS_PER_BLOCK])
192 196 }
193 197
194 198 fn get(&self, nybble: u8) -> Element {
195 199 self.0[nybble as usize].into()
196 200 }
197 201
198 202 fn set(&mut self, nybble: u8, element: Element) {
199 203 self.0[nybble as usize] = element.into()
200 204 }
201 205 }
202 206
203 207 impl fmt::Debug for Block {
204 208 /// sparse representation for testing and debugging purposes
205 209 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
206 210 f.debug_map()
207 211 .entries((0..16).filter_map(|i| match self.get(i) {
208 212 Element::None => None,
209 213 element => Some((i, element)),
210 214 }))
211 215 .finish()
212 216 }
213 217 }
214 218
215 219 /// A mutable 16-radix tree with the root block logically at the end
216 220 ///
217 221 /// Because of the append only nature of our node trees, we need to
218 222 /// keep the original untouched and store new blocks separately.
219 223 ///
220 224 /// The mutable root [`Block`] is kept apart so that we don't have to rebump
221 225 /// it on each insertion.
222 226 pub struct NodeTree {
223 227 readonly: Box<dyn Deref<Target = [Block]> + Send>,
224 228 growable: Vec<Block>,
225 229 root: Block,
226 230 masked_inner_blocks: usize,
227 231 }
228 232
229 233 impl Index<usize> for NodeTree {
230 234 type Output = Block;
231 235
232 236 fn index(&self, i: usize) -> &Block {
233 237 let ro_len = self.readonly.len();
234 238 if i < ro_len {
235 239 &self.readonly[i]
236 240 } else if i == ro_len + self.growable.len() {
237 241 &self.root
238 242 } else {
239 243 &self.growable[i - ro_len]
240 244 }
241 245 }
242 246 }
243 247
244 248 /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
245 249 fn has_prefix_or_none(
246 250 idx: &impl RevlogIndex,
247 251 prefix: NodePrefix,
248 rev: Revision,
252 rev: UncheckedRevision,
249 253 ) -> Result<Option<Revision>, NodeMapError> {
250 idx.node(rev)
254 match idx.check_revision(rev) {
255 Some(checked) => idx
256 .node(checked)
251 257 .ok_or(NodeMapError::RevisionNotInIndex(rev))
252 258 .map(|node| {
253 259 if prefix.is_prefix_of(node) {
254 Some(rev)
260 Some(checked)
255 261 } else {
256 262 None
257 263 }
258 })
264 }),
265 None => Err(NodeMapError::RevisionNotInIndex(rev)),
266 }
259 267 }
260 268
261 269 /// validate that the candidate's node starts indeed with given prefix,
262 270 /// and treat ambiguities related to [`NULL_REVISION`].
263 271 ///
264 272 /// From the data in the NodeTree, one can only conclude that some
265 273 /// revision is the only one for a *subprefix* of the one being looked up.
266 274 fn validate_candidate(
267 275 idx: &impl RevlogIndex,
268 276 prefix: NodePrefix,
269 candidate: (Option<Revision>, usize),
277 candidate: (Option<UncheckedRevision>, usize),
270 278 ) -> Result<(Option<Revision>, usize), NodeMapError> {
271 279 let (rev, steps) = candidate;
272 280 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
273 281 rev.map_or(Ok((None, steps)), |r| {
274 282 has_prefix_or_none(idx, prefix, r)
275 283 .map(|opt| (opt, max(steps, nz_nybble + 1)))
276 284 })
277 285 } else {
278 286 // the prefix is only made of zeros; NULL_REVISION always matches it
279 287 // and any other *valid* result is an ambiguity
280 288 match rev {
281 289 None => Ok((Some(NULL_REVISION), steps + 1)),
282 290 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
283 291 None => Ok((Some(NULL_REVISION), steps + 1)),
284 292 _ => Err(NodeMapError::MultipleResults),
285 293 },
286 294 }
287 295 }
288 296 }
289 297
290 298 impl NodeTree {
291 299 /// Initiate a NodeTree from an immutable slice-like of `Block`
292 300 ///
293 301 /// We keep `readonly` and clone its root block if it isn't empty.
294 302 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
295 303 let root = readonly.last().cloned().unwrap_or_else(Block::new);
296 304 NodeTree {
297 305 readonly,
298 306 growable: Vec::new(),
299 307 root,
300 308 masked_inner_blocks: 0,
301 309 }
302 310 }
303 311
304 312 /// Create from an opaque bunch of bytes
305 313 ///
306 314 /// The created [`NodeTreeBytes`] from `bytes`,
307 315 /// of which exactly `amount` bytes are used.
308 316 ///
309 317 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
310 318 /// - `amount` is expressed in bytes, and is not automatically derived from
311 319 /// `bytes`, so that a caller that manages them atomically can perform
312 320 /// temporary disk serializations and still rollback easily if needed.
313 321 /// First use-case for this would be to support Mercurial shell hooks.
314 322 ///
315 323 /// panics if `buffer` is smaller than `amount`
316 324 pub fn load_bytes(
317 325 bytes: Box<dyn Deref<Target = [u8]> + Send>,
318 326 amount: usize,
319 327 ) -> Self {
320 328 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
321 329 }
322 330
323 331 /// Retrieve added [`Block`]s and the original immutable data
324 332 pub fn into_readonly_and_added(
325 333 self,
326 334 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
327 335 let mut vec = self.growable;
328 336 let readonly = self.readonly;
329 337 if readonly.last() != Some(&self.root) {
330 338 vec.push(self.root);
331 339 }
332 340 (readonly, vec)
333 341 }
334 342
335 343 /// Retrieve added [`Block]s as bytes, ready to be written to persistent
336 344 /// storage
337 345 pub fn into_readonly_and_added_bytes(
338 346 self,
339 347 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
340 348 let (readonly, vec) = self.into_readonly_and_added();
341 349 // Prevent running `v`'s destructor so we are in complete control
342 350 // of the allocation.
343 351 let vec = mem::ManuallyDrop::new(vec);
344 352
345 353 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
346 354 // bytes, so this is perfectly safe.
347 355 let bytes = unsafe {
348 356 // Check for compatible allocation layout.
349 357 // (Optimized away by constant-folding + dead code elimination.)
350 358 assert_eq!(size_of::<Block>(), 64);
351 359 assert_eq!(align_of::<Block>(), 1);
352 360
353 361 // /!\ Any use of `vec` after this is use-after-free.
354 362 // TODO: use `into_raw_parts` once stabilized
355 363 Vec::from_raw_parts(
356 364 vec.as_ptr() as *mut u8,
357 365 vec.len() * size_of::<Block>(),
358 366 vec.capacity() * size_of::<Block>(),
359 367 )
360 368 };
361 369 (readonly, bytes)
362 370 }
363 371
364 372 /// Total number of blocks
365 373 fn len(&self) -> usize {
366 374 self.readonly.len() + self.growable.len() + 1
367 375 }
368 376
369 377 /// Implemented for completeness
370 378 ///
371 379 /// A `NodeTree` always has at least the mutable root block.
372 380 #[allow(dead_code)]
373 381 fn is_empty(&self) -> bool {
374 382 false
375 383 }
376 384
377 385 /// Main working method for `NodeTree` searches
378 386 ///
379 387 /// The first returned value is the result of analysing `NodeTree` data
380 388 /// *alone*: whereas `None` guarantees that the given prefix is absent
381 389 /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
382 390 /// `Some(rev)`, it is to be understood that `rev` is the unique
383 391 /// [`Revision`] that could match the prefix. Actually, all that can
384 392 /// be inferred from
385 393 /// the `NodeTree` data is that `rev` is the revision with the longest
386 394 /// common node prefix with the given prefix.
395 /// We return an [`UncheckedRevision`] because we have no guarantee that
396 /// the revision we found is valid for the index.
387 397 ///
388 398 /// The second returned value is the size of the smallest subprefix
389 399 /// of `prefix` that would give the same result, i.e. not the
390 400 /// [MultipleResults](NodeMapError) error variant (again, using only the
391 401 /// data of the [`NodeTree`]).
392 402 fn lookup(
393 403 &self,
394 404 prefix: NodePrefix,
395 ) -> Result<(Option<Revision>, usize), NodeMapError> {
405 ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> {
396 406 for (i, visit_item) in self.visit(prefix).enumerate() {
397 407 if let Some(opt) = visit_item.final_revision() {
398 408 return Ok((opt, i + 1));
399 409 }
400 410 }
401 411 Err(NodeMapError::MultipleResults)
402 412 }
403 413
404 414 fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
405 415 NodeTreeVisitor {
406 416 nt: self,
407 417 prefix,
408 418 visit: self.len() - 1,
409 419 nybble_idx: 0,
410 420 done: false,
411 421 }
412 422 }
413 423 /// Return a mutable reference for `Block` at index `idx`.
414 424 ///
415 425 /// If `idx` lies in the immutable area, then the reference is to
416 426 /// a newly appended copy.
417 427 ///
418 428 /// Returns (new_idx, glen, mut_ref) where
419 429 ///
420 430 /// - `new_idx` is the index of the mutable `Block`
421 431 /// - `mut_ref` is a mutable reference to the mutable Block.
422 432 /// - `glen` is the new length of `self.growable`
423 433 ///
424 434 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
425 435 /// itself because of the mutable borrow taken with the returned `Block`
426 436 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
427 437 let ro_blocks = &self.readonly;
428 438 let ro_len = ro_blocks.len();
429 439 let glen = self.growable.len();
430 440 if idx < ro_len {
431 441 self.masked_inner_blocks += 1;
432 442 self.growable.push(ro_blocks[idx]);
433 443 (glen + ro_len, &mut self.growable[glen], glen + 1)
434 444 } else if glen + ro_len == idx {
435 445 (idx, &mut self.root, glen)
436 446 } else {
437 447 (idx, &mut self.growable[idx - ro_len], glen)
438 448 }
439 449 }
440 450
441 451 /// Main insertion method
442 452 ///
443 453 /// This will dive in the node tree to find the deepest `Block` for
444 454 /// `node`, split it as much as needed and record `node` in there.
445 455 /// The method then backtracks, updating references in all the visited
446 456 /// blocks from the root.
447 457 ///
448 458 /// All the mutated `Block` are copied first to the growable part if
449 459 /// needed. That happens for those in the immutable part except the root.
450 460 pub fn insert<I: RevlogIndex>(
451 461 &mut self,
452 462 index: &I,
453 463 node: &Node,
454 464 rev: Revision,
455 465 ) -> Result<(), NodeMapError> {
456 466 let ro_len = &self.readonly.len();
457 467
458 468 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
459 469 let read_nybbles = visit_steps.len();
460 470 // visit_steps cannot be empty, since we always visit the root block
461 471 let deepest = visit_steps.pop().unwrap();
462 472
463 473 let (mut block_idx, mut block, mut glen) =
464 474 self.mutable_block(deepest.block_idx);
465 475
466 476 if let Element::Rev(old_rev) = deepest.element {
467 let old_node = index
468 .node(old_rev)
469 .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?;
477 let old_node = index.node(old_rev).ok_or_else(|| {
478 NodeMapError::RevisionNotInIndex(old_rev.into())
479 })?;
470 480 if old_node == node {
471 481 return Ok(()); // avoid creating lots of useless blocks
472 482 }
473 483
474 484 // Looping over the tail of nybbles in both nodes, creating
475 485 // new blocks until we find the difference
476 486 let mut new_block_idx = ro_len + glen;
477 487 let mut nybble = deepest.nybble;
478 488 for nybble_pos in read_nybbles..node.nybbles_len() {
479 489 block.set(nybble, Element::Block(new_block_idx));
480 490
481 491 let new_nybble = node.get_nybble(nybble_pos);
482 492 let old_nybble = old_node.get_nybble(nybble_pos);
483 493
484 494 if old_nybble == new_nybble {
485 495 self.growable.push(Block::new());
486 496 block = &mut self.growable[glen];
487 497 glen += 1;
488 498 new_block_idx += 1;
489 499 nybble = new_nybble;
490 500 } else {
491 501 let mut new_block = Block::new();
492 502 new_block.set(old_nybble, Element::Rev(old_rev));
493 503 new_block.set(new_nybble, Element::Rev(rev));
494 504 self.growable.push(new_block);
495 505 break;
496 506 }
497 507 }
498 508 } else {
499 509 // Free slot in the deepest block: no splitting has to be done
500 510 block.set(deepest.nybble, Element::Rev(rev));
501 511 }
502 512
503 513 // Backtrack over visit steps to update references
504 514 while let Some(visited) = visit_steps.pop() {
505 515 let to_write = Element::Block(block_idx);
506 516 if visit_steps.is_empty() {
507 517 self.root.set(visited.nybble, to_write);
508 518 break;
509 519 }
510 520 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
511 521 if block.get(visited.nybble) == to_write {
512 522 break;
513 523 }
514 524 block.set(visited.nybble, to_write);
515 525 block_idx = new_idx;
516 526 }
517 527 Ok(())
518 528 }
519 529
520 530 /// Make the whole `NodeTree` logically empty, without touching the
521 531 /// immutable part.
522 532 pub fn invalidate_all(&mut self) {
523 533 self.root = Block::new();
524 534 self.growable = Vec::new();
525 535 self.masked_inner_blocks = self.readonly.len();
526 536 }
527 537
528 538 /// Return the number of blocks in the readonly part that are currently
529 539 /// masked in the mutable part.
530 540 ///
531 541 /// The `NodeTree` structure has no efficient way to know how many blocks
532 542 /// are already unreachable in the readonly part.
533 543 ///
534 544 /// After a call to `invalidate_all()`, the returned number can be actually
535 545 /// bigger than the whole readonly part, a conventional way to mean that
536 546 /// all the readonly blocks have been masked. This is what is really
537 547 /// useful to the caller and does not require to know how many were
538 548 /// actually unreachable to begin with.
539 549 pub fn masked_readonly_blocks(&self) -> usize {
540 550 if let Some(readonly_root) = self.readonly.last() {
541 551 if readonly_root == &self.root {
542 552 return 0;
543 553 }
544 554 } else {
545 555 return 0;
546 556 }
547 557 self.masked_inner_blocks + 1
548 558 }
549 559 }
550 560
551 561 pub struct NodeTreeBytes {
552 562 buffer: Box<dyn Deref<Target = [u8]> + Send>,
553 563 len_in_blocks: usize,
554 564 }
555 565
556 566 impl NodeTreeBytes {
557 567 fn new(
558 568 buffer: Box<dyn Deref<Target = [u8]> + Send>,
559 569 amount: usize,
560 570 ) -> Self {
561 571 assert!(buffer.len() >= amount);
562 572 let len_in_blocks = amount / size_of::<Block>();
563 573 NodeTreeBytes {
564 574 buffer,
565 575 len_in_blocks,
566 576 }
567 577 }
568 578 }
569 579
570 580 impl Deref for NodeTreeBytes {
571 581 type Target = [Block];
572 582
573 583 fn deref(&self) -> &[Block] {
574 584 Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
575 585 // `NodeTreeBytes::new` already asserted that `self.buffer` is
576 586 // large enough.
577 587 .unwrap()
578 588 .0
579 589 }
580 590 }
581 591
582 592 struct NodeTreeVisitor<'n> {
583 593 nt: &'n NodeTree,
584 594 prefix: NodePrefix,
585 595 visit: usize,
586 596 nybble_idx: usize,
587 597 done: bool,
588 598 }
589 599
590 600 #[derive(Debug, PartialEq, Clone)]
591 601 struct NodeTreeVisitItem {
592 602 block_idx: usize,
593 603 nybble: u8,
594 604 element: Element,
595 605 }
596 606
597 607 impl<'n> Iterator for NodeTreeVisitor<'n> {
598 608 type Item = NodeTreeVisitItem;
599 609
600 610 fn next(&mut self) -> Option<Self::Item> {
601 611 if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
602 612 return None;
603 613 }
604 614
605 615 let nybble = self.prefix.get_nybble(self.nybble_idx);
606 616 self.nybble_idx += 1;
607 617
608 618 let visit = self.visit;
609 619 let element = self.nt[visit].get(nybble);
610 620 if let Element::Block(idx) = element {
611 621 self.visit = idx;
612 622 } else {
613 623 self.done = true;
614 624 }
615 625
616 626 Some(NodeTreeVisitItem {
617 627 block_idx: visit,
618 628 nybble,
619 629 element,
620 630 })
621 631 }
622 632 }
623 633
624 634 impl NodeTreeVisitItem {
625 635 // Return `Some(opt)` if this item is final, with `opt` being the
626 // `Revision` that it may represent.
636 // `UncheckedRevision` that it may represent.
627 637 //
628 638 // If the item is not terminal, return `None`
629 fn final_revision(&self) -> Option<Option<Revision>> {
639 fn final_revision(&self) -> Option<Option<UncheckedRevision>> {
630 640 match self.element {
631 641 Element::Block(_) => None,
632 Element::Rev(r) => Some(Some(r)),
642 Element::Rev(r) => Some(Some(r.into())),
633 643 Element::None => Some(None),
634 644 }
635 645 }
636 646 }
637 647
638 648 impl From<Vec<Block>> for NodeTree {
639 649 fn from(vec: Vec<Block>) -> Self {
640 650 Self::new(Box::new(vec))
641 651 }
642 652 }
643 653
644 654 impl fmt::Debug for NodeTree {
645 655 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
646 656 let readonly: &[Block] = &*self.readonly;
647 657 write!(
648 658 f,
649 659 "readonly: {:?}, growable: {:?}, root: {:?}",
650 660 readonly, self.growable, self.root
651 661 )
652 662 }
653 663 }
654 664
655 665 impl Default for NodeTree {
656 666 /// Create a fully mutable empty NodeTree
657 667 fn default() -> Self {
658 668 NodeTree::new(Box::new(Vec::new()))
659 669 }
660 670 }
661 671
662 672 impl NodeMap for NodeTree {
663 673 fn find_bin<'a>(
664 674 &self,
665 675 idx: &impl RevlogIndex,
666 676 prefix: NodePrefix,
667 677 ) -> Result<Option<Revision>, NodeMapError> {
668 678 validate_candidate(idx, prefix, self.lookup(prefix)?)
669 679 .map(|(opt, _shortest)| opt)
670 680 }
671 681
672 682 fn unique_prefix_len_bin<'a>(
673 683 &self,
674 684 idx: &impl RevlogIndex,
675 685 prefix: NodePrefix,
676 686 ) -> Result<Option<usize>, NodeMapError> {
677 687 validate_candidate(idx, prefix, self.lookup(prefix)?)
678 688 .map(|(opt, shortest)| opt.map(|_rev| shortest))
679 689 }
680 690 }
681 691
682 692 #[cfg(test)]
683 693 mod tests {
684 694 use super::NodeMapError::*;
685 695 use super::*;
686 696 use crate::revlog::node::{hex_pad_right, Node};
687 697 use std::collections::HashMap;
688 698
689 699 /// Creates a `Block` using a syntax close to the `Debug` output
690 700 macro_rules! block {
691 701 {$($nybble:tt : $variant:ident($val:tt)),*} => (
692 702 {
693 703 let mut block = Block::new();
694 704 $(block.set($nybble, Element::$variant($val)));*;
695 705 block
696 706 }
697 707 )
698 708 }
699 709
700 710 #[test]
701 711 fn test_block_debug() {
702 712 let mut block = Block::new();
703 713 block.set(1, Element::Rev(3));
704 714 block.set(10, Element::Block(0));
705 715 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
706 716 }
707 717
708 718 #[test]
709 719 fn test_block_macro() {
710 720 let block = block! {5: Block(2)};
711 721 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
712 722
713 723 let block = block! {13: Rev(15), 5: Block(2)};
714 724 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
715 725 }
716 726
717 727 #[test]
718 728 fn test_raw_block() {
719 729 let mut raw = [255u8; 64];
720 730
721 731 let mut counter = 0;
722 732 for val in [0_i32, 15, -2, -1, -3].iter() {
723 733 for byte in val.to_be_bytes().iter() {
724 734 raw[counter] = *byte;
725 735 counter += 1;
726 736 }
727 737 }
728 738 let (block, _) = Block::from_bytes(&raw).unwrap();
729 739 assert_eq!(block.get(0), Element::Block(0));
730 740 assert_eq!(block.get(1), Element::Block(15));
731 741 assert_eq!(block.get(3), Element::None);
732 742 assert_eq!(block.get(2), Element::Rev(0));
733 743 assert_eq!(block.get(4), Element::Rev(1));
734 744 }
735 745
736 type TestIndex = HashMap<Revision, Node>;
746 type TestIndex = HashMap<UncheckedRevision, Node>;
737 747
738 748 impl RevlogIndex for TestIndex {
739 749 fn node(&self, rev: Revision) -> Option<&Node> {
740 self.get(&rev)
750 self.get(&rev.into())
741 751 }
742 752
743 753 fn len(&self) -> usize {
744 754 self.len()
745 755 }
756
757 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
758 self.get(&rev).map(|_| rev.0)
759 }
746 760 }
747 761
748 762 /// Pad hexadecimal Node prefix with zeros on the right
749 763 ///
750 764 /// This avoids having to repeatedly write very long hexadecimal
751 765 /// strings for test data, and brings actual hash size independency.
752 766 #[cfg(test)]
753 767 fn pad_node(hex: &str) -> Node {
754 768 Node::from_hex(&hex_pad_right(hex)).unwrap()
755 769 }
756 770
757 771 /// Pad hexadecimal Node prefix with zeros on the right, then insert
758 772 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
759 idx.insert(rev, pad_node(hex));
773 idx.insert(rev.into(), pad_node(hex));
760 774 }
761 775
762 776 fn sample_nodetree() -> NodeTree {
763 777 NodeTree::from(vec![
764 778 block![0: Rev(9)],
765 779 block![0: Rev(0), 1: Rev(9)],
766 780 block![0: Block(1), 1:Rev(1)],
767 781 ])
768 782 }
769 783
770 784 fn hex(s: &str) -> NodePrefix {
771 785 NodePrefix::from_hex(s).unwrap()
772 786 }
773 787
774 788 #[test]
775 789 fn test_nt_debug() {
776 790 let nt = sample_nodetree();
777 791 assert_eq!(
778 792 format!("{:?}", nt),
779 793 "readonly: \
780 794 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
781 795 growable: [], \
782 796 root: {0: Block(1), 1: Rev(1)}",
783 797 );
784 798 }
785 799
786 800 #[test]
787 801 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
788 802 let mut idx: TestIndex = HashMap::new();
789 803 pad_insert(&mut idx, 1, "1234deadcafe");
790 804
791 805 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
792 806 assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(1));
793 807 assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(1));
794 808 assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(1));
795 809 assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
796 810 assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
797 811
798 812 // and with full binary Nodes
799 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
813 assert_eq!(nt.find_node(&idx, idx.get(&1.into()).unwrap())?, Some(1));
800 814 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
801 815 assert_eq!(nt.find_node(&idx, &unknown)?, None);
802 816 Ok(())
803 817 }
804 818
805 819 #[test]
806 820 fn test_immutable_find_one_jump() {
807 821 let mut idx = TestIndex::new();
808 822 pad_insert(&mut idx, 9, "012");
809 823 pad_insert(&mut idx, 0, "00a");
810 824
811 825 let nt = sample_nodetree();
812 826
813 827 assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
814 828 assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(9)));
815 829 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
816 830 assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(0)));
817 831 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
818 832 assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
819 833 }
820 834
821 835 #[test]
822 836 fn test_mutated_find() -> Result<(), NodeMapError> {
823 837 let mut idx = TestIndex::new();
824 838 pad_insert(&mut idx, 9, "012");
825 839 pad_insert(&mut idx, 0, "00a");
826 840 pad_insert(&mut idx, 2, "cafe");
827 841 pad_insert(&mut idx, 3, "15");
828 842 pad_insert(&mut idx, 1, "10");
829 843
830 844 let nt = NodeTree {
831 845 readonly: sample_nodetree().readonly,
832 846 growable: vec![block![0: Rev(1), 5: Rev(3)]],
833 847 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
834 848 masked_inner_blocks: 1,
835 849 };
836 850 assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(1));
837 851 assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(2));
838 852 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
839 853 assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
840 854 assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
841 855 assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
842 856 assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(9));
843 857 assert_eq!(nt.masked_readonly_blocks(), 2);
844 858 Ok(())
845 859 }
846 860
847 861 struct TestNtIndex {
848 862 index: TestIndex,
849 863 nt: NodeTree,
850 864 }
851 865
852 866 impl TestNtIndex {
853 867 fn new() -> Self {
854 868 TestNtIndex {
855 869 index: HashMap::new(),
856 870 nt: NodeTree::default(),
857 871 }
858 872 }
859 873
860 fn insert(
861 &mut self,
862 rev: Revision,
863 hex: &str,
864 ) -> Result<(), NodeMapError> {
874 fn insert(&mut self, rev: i32, hex: &str) -> Result<(), NodeMapError> {
865 875 let node = pad_node(hex);
876 let rev: UncheckedRevision = rev.into();
866 877 self.index.insert(rev, node);
867 self.nt.insert(&self.index, &node, rev)?;
878 self.nt.insert(
879 &self.index,
880 &node,
881 self.index.check_revision(rev).unwrap(),
882 )?;
868 883 Ok(())
869 884 }
870 885
871 886 fn find_hex(
872 887 &self,
873 888 prefix: &str,
874 889 ) -> Result<Option<Revision>, NodeMapError> {
875 890 self.nt.find_bin(&self.index, hex(prefix))
876 891 }
877 892
878 893 fn unique_prefix_len_hex(
879 894 &self,
880 895 prefix: &str,
881 896 ) -> Result<Option<usize>, NodeMapError> {
882 897 self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
883 898 }
884 899
885 900 /// Drain `added` and restart a new one
886 901 fn commit(self) -> Self {
887 902 let mut as_vec: Vec<Block> =
888 903 self.nt.readonly.iter().copied().collect();
889 904 as_vec.extend(self.nt.growable);
890 905 as_vec.push(self.nt.root);
891 906
892 907 Self {
893 908 index: self.index,
894 909 nt: NodeTree::from(as_vec),
895 910 }
896 911 }
897 912 }
898 913
899 914 #[test]
900 915 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
901 916 let mut idx = TestNtIndex::new();
902 917 idx.insert(0, "1234")?;
903 918 assert_eq!(idx.find_hex("1")?, Some(0));
904 919 assert_eq!(idx.find_hex("12")?, Some(0));
905 920
906 921 // let's trigger a simple split
907 922 idx.insert(1, "1a34")?;
908 923 assert_eq!(idx.nt.growable.len(), 1);
909 924 assert_eq!(idx.find_hex("12")?, Some(0));
910 925 assert_eq!(idx.find_hex("1a")?, Some(1));
911 926
912 927 // reinserting is a no_op
913 928 idx.insert(1, "1a34")?;
914 929 assert_eq!(idx.nt.growable.len(), 1);
915 930 assert_eq!(idx.find_hex("12")?, Some(0));
916 931 assert_eq!(idx.find_hex("1a")?, Some(1));
917 932
918 933 idx.insert(2, "1a01")?;
919 934 assert_eq!(idx.nt.growable.len(), 2);
920 935 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
921 936 assert_eq!(idx.find_hex("12")?, Some(0));
922 937 assert_eq!(idx.find_hex("1a3")?, Some(1));
923 938 assert_eq!(idx.find_hex("1a0")?, Some(2));
924 939 assert_eq!(idx.find_hex("1a12")?, None);
925 940
926 941 // now let's make it split and create more than one additional block
927 942 idx.insert(3, "1a345")?;
928 943 assert_eq!(idx.nt.growable.len(), 4);
929 944 assert_eq!(idx.find_hex("1a340")?, Some(1));
930 945 assert_eq!(idx.find_hex("1a345")?, Some(3));
931 946 assert_eq!(idx.find_hex("1a341")?, None);
932 947
933 948 // there's no readonly block to mask
934 949 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
935 950 Ok(())
936 951 }
937 952
938 953 #[test]
939 954 fn test_unique_prefix_len_zero_prefix() {
940 955 let mut idx = TestNtIndex::new();
941 956 idx.insert(0, "00000abcd").unwrap();
942 957
943 958 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
944 959 // in the nodetree proper, this will be found at the first nybble
945 960 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
946 961 // but the first difference with `NULL_NODE`
947 962 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
948 963 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
949 964
950 965 // same with odd result
951 966 idx.insert(1, "00123").unwrap();
952 967 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
953 968 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
954 969
955 970 // these are unchanged of course
956 971 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
957 972 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
958 973 }
959 974
960 975 #[test]
961 976 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
962 977 // check that the splitting loop is long enough
963 978 let mut nt_idx = TestNtIndex::new();
964 979 let nt = &mut nt_idx.nt;
965 980 let idx = &mut nt_idx.index;
966 981
967 982 let node0_hex = hex_pad_right("444444");
968 983 let mut node1_hex = hex_pad_right("444444");
969 984 node1_hex.pop();
970 985 node1_hex.push('5');
971 986 let node0 = Node::from_hex(&node0_hex).unwrap();
972 987 let node1 = Node::from_hex(&node1_hex).unwrap();
973 988
974 idx.insert(0, node0);
989 idx.insert(0.into(), node0);
975 990 nt.insert(idx, &node0, 0)?;
976 idx.insert(1, node1);
991 idx.insert(1.into(), node1);
977 992 nt.insert(idx, &node1, 1)?;
978 993
979 994 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
980 995 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
981 996 Ok(())
982 997 }
983 998
984 999 #[test]
985 1000 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
986 1001 let mut idx = TestNtIndex::new();
987 1002 idx.insert(0, "1234")?;
988 1003 idx.insert(1, "1235")?;
989 1004 idx.insert(2, "131")?;
990 1005 idx.insert(3, "cafe")?;
991 1006 let mut idx = idx.commit();
992 1007 assert_eq!(idx.find_hex("1234")?, Some(0));
993 1008 assert_eq!(idx.find_hex("1235")?, Some(1));
994 1009 assert_eq!(idx.find_hex("131")?, Some(2));
995 1010 assert_eq!(idx.find_hex("cafe")?, Some(3));
996 1011 // we did not add anything since init from readonly
997 1012 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
998 1013
999 1014 idx.insert(4, "123A")?;
1000 1015 assert_eq!(idx.find_hex("1234")?, Some(0));
1001 1016 assert_eq!(idx.find_hex("1235")?, Some(1));
1002 1017 assert_eq!(idx.find_hex("131")?, Some(2));
1003 1018 assert_eq!(idx.find_hex("cafe")?, Some(3));
1004 1019 assert_eq!(idx.find_hex("123A")?, Some(4));
1005 1020 // we masked blocks for all prefixes of "123", including the root
1006 1021 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1007 1022
1008 1023 eprintln!("{:?}", idx.nt);
1009 1024 idx.insert(5, "c0")?;
1010 1025 assert_eq!(idx.find_hex("cafe")?, Some(3));
1011 1026 assert_eq!(idx.find_hex("c0")?, Some(5));
1012 1027 assert_eq!(idx.find_hex("c1")?, None);
1013 1028 assert_eq!(idx.find_hex("1234")?, Some(0));
1014 1029 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1015 1030 // it doesn't mask anything
1016 1031 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1017 1032
1018 1033 Ok(())
1019 1034 }
1020 1035
1021 1036 #[test]
1022 1037 fn test_invalidate_all() -> Result<(), NodeMapError> {
1023 1038 let mut idx = TestNtIndex::new();
1024 1039 idx.insert(0, "1234")?;
1025 1040 idx.insert(1, "1235")?;
1026 1041 idx.insert(2, "131")?;
1027 1042 idx.insert(3, "cafe")?;
1028 1043 let mut idx = idx.commit();
1029 1044
1030 1045 idx.nt.invalidate_all();
1031 1046
1032 1047 assert_eq!(idx.find_hex("1234")?, None);
1033 1048 assert_eq!(idx.find_hex("1235")?, None);
1034 1049 assert_eq!(idx.find_hex("131")?, None);
1035 1050 assert_eq!(idx.find_hex("cafe")?, None);
1036 1051 // all the readonly blocks have been masked, this is the
1037 1052 // conventional expected response
1038 1053 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1039 1054 Ok(())
1040 1055 }
1041 1056
1042 1057 #[test]
1043 1058 fn test_into_added_empty() {
1044 1059 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1045 1060 assert!(sample_nodetree()
1046 1061 .into_readonly_and_added_bytes()
1047 1062 .1
1048 1063 .is_empty());
1049 1064 }
1050 1065
1051 1066 #[test]
1052 1067 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1053 1068 let mut idx = TestNtIndex::new();
1054 1069 idx.insert(0, "1234")?;
1055 1070 let mut idx = idx.commit();
1056 1071 idx.insert(4, "cafe")?;
1057 1072 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1058 1073
1059 1074 // only the root block has been changed
1060 1075 assert_eq!(bytes.len(), size_of::<Block>());
1061 1076 // big endian for -2
1062 1077 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1063 1078 // big endian for -6
1064 1079 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1065 1080 Ok(())
1066 1081 }
1067 1082 }
@@ -1,69 +1,69 b''
1 1 //! The revset query language
2 2 //!
3 3 //! <https://www.mercurial-scm.org/repo/hg/help/revsets>
4 4
5 5 use crate::errors::HgError;
6 6 use crate::repo::Repo;
7 7 use crate::revlog::NodePrefix;
8 8 use crate::revlog::{Revision, NULL_REVISION, WORKING_DIRECTORY_HEX};
9 9 use crate::revlog::{Revlog, RevlogError};
10 10 use crate::Node;
11 11
12 12 /// Resolve a query string into a single revision.
13 13 ///
14 14 /// Only some of the revset language is implemented yet.
15 15 pub fn resolve_single(
16 16 input: &str,
17 17 repo: &Repo,
18 18 ) -> Result<Revision, RevlogError> {
19 19 let changelog = repo.changelog()?;
20 20
21 21 match input {
22 22 "." => {
23 23 let p1 = repo.dirstate_parents()?.p1;
24 24 return changelog.revlog.rev_from_node(p1.into());
25 25 }
26 26 "null" => return Ok(NULL_REVISION),
27 27 _ => {}
28 28 }
29 29
30 30 match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) {
31 31 Err(RevlogError::InvalidRevision) => {
32 32 // TODO: support for the rest of the language here.
33 33 let msg = format!("cannot parse revset '{}'", input);
34 34 Err(HgError::unsupported(msg).into())
35 35 }
36 36 result => result,
37 37 }
38 38 }
39 39
40 40 /// Resolve the small subset of the language suitable for revlogs other than
41 41 /// the changelog, such as in `hg debugdata --manifest` CLI argument.
42 42 ///
43 43 /// * A non-negative decimal integer for a revision number, or
44 44 /// * An hexadecimal string, for the unique node ID that starts with this
45 45 /// prefix
46 46 pub fn resolve_rev_number_or_hex_prefix(
47 47 input: &str,
48 48 revlog: &Revlog,
49 49 ) -> Result<Revision, RevlogError> {
50 50 // The Python equivalent of this is part of `revsymbol` in
51 51 // `mercurial/scmutil.py`
52 52
53 53 if let Ok(integer) = input.parse::<i32>() {
54 54 if integer.to_string() == input
55 55 && integer >= 0
56 && revlog.has_rev(integer)
56 && revlog.has_rev(integer.into())
57 57 {
58 58 return Ok(integer);
59 59 }
60 60 }
61 61 if let Ok(prefix) = NodePrefix::from_hex(input) {
62 62 if prefix.is_prefix_of(&Node::from_hex(WORKING_DIRECTORY_HEX).unwrap())
63 63 {
64 64 return Err(RevlogError::WDirUnsupported);
65 65 }
66 66 return revlog.rev_from_node(prefix);
67 67 }
68 68 Err(RevlogError::InvalidRevision)
69 69 }
@@ -1,515 +1,518 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 Revision,
21 Revision, UncheckedRevision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 let prefix = NodePrefix::from_hex(&node_as_string)
111 111 .map_err(|_| PyErr::new::<ValueError, _>(
112 112 py, format!("Invalid node or prefix '{}'", node_as_string))
113 113 )?;
114 114
115 115 nt.find_bin(idx, prefix)
116 116 // TODO make an inner API returning the node directly
117 117 .map(|opt| opt.map(
118 118 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
119 119 .map_err(|e| nodemap_error(py, e))
120 120
121 121 }
122 122
123 123 /// append an index entry
124 124 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
125 125 if tup.len(py) < 8 {
126 126 // this is better than the panic promised by tup.get_item()
127 127 return Err(
128 128 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
129 129 }
130 130 let node_bytes = tup.get_item(py, 7).extract(py)?;
131 131 let node = node_from_py_object(py, &node_bytes)?;
132 132
133 133 let mut idx = self.cindex(py).borrow_mut();
134 134 let rev = idx.len() as Revision;
135 135
136 136 idx.append(py, tup)?;
137 137 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
138 138 .insert(&*idx, &node, rev)
139 139 .map_err(|e| nodemap_error(py, e))?;
140 140 Ok(py.None())
141 141 }
142 142
143 143 def __delitem__(&self, key: PyObject) -> PyResult<()> {
144 144 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
145 145 self.cindex(py).borrow().inner().del_item(py, key)?;
146 146 let mut opt = self.get_nodetree(py)?.borrow_mut();
147 147 let nt = opt.as_mut().unwrap();
148 148 nt.invalidate_all();
149 149 self.fill_nodemap(py, nt)?;
150 150 Ok(())
151 151 }
152 152
153 153 //
154 154 // Reforwarded C index API
155 155 //
156 156
157 157 // index_methods (tp_methods). Same ordering as in revlog.c
158 158
159 159 /// return the gca set of the given revs
160 160 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
161 161 self.call_cindex(py, "ancestors", args, kw)
162 162 }
163 163
164 164 /// return the heads of the common ancestors of the given revs
165 165 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
166 166 self.call_cindex(py, "commonancestorsheads", args, kw)
167 167 }
168 168
169 169 /// Clear the index caches and inner py_class data.
170 170 /// It is Python's responsibility to call `update_nodemap_data` again.
171 171 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
172 172 self.nt(py).borrow_mut().take();
173 173 self.docket(py).borrow_mut().take();
174 174 self.mmap(py).borrow_mut().take();
175 175 self.call_cindex(py, "clearcaches", args, kw)
176 176 }
177 177
178 178 /// return the raw binary string representing a revision
179 179 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
180 180 self.call_cindex(py, "entry_binary", args, kw)
181 181 }
182 182
183 183 /// return a binary packed version of the header
184 184 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
185 185 self.call_cindex(py, "pack_header", args, kw)
186 186 }
187 187
188 188 /// get an index entry
189 189 def get(&self, *args, **kw) -> PyResult<PyObject> {
190 190 self.call_cindex(py, "get", args, kw)
191 191 }
192 192
193 193 /// compute phases
194 194 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
195 195 self.call_cindex(py, "computephasesmapsets", args, kw)
196 196 }
197 197
198 198 /// reachableroots
199 199 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
200 200 self.call_cindex(py, "reachableroots2", args, kw)
201 201 }
202 202
203 203 /// get head revisions
204 204 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
205 205 self.call_cindex(py, "headrevs", args, kw)
206 206 }
207 207
208 208 /// get filtered head revisions
209 209 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
210 210 self.call_cindex(py, "headrevsfiltered", args, kw)
211 211 }
212 212
213 213 /// True if the object is a snapshot
214 214 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
215 215 self.call_cindex(py, "issnapshot", args, kw)
216 216 }
217 217
218 218 /// Gather snapshot data in a cache dict
219 219 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
220 220 self.call_cindex(py, "findsnapshots", args, kw)
221 221 }
222 222
223 223 /// determine revisions with deltas to reconstruct fulltext
224 224 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
225 225 self.call_cindex(py, "deltachain", args, kw)
226 226 }
227 227
228 228 /// slice planned chunk read to reach a density threshold
229 229 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
230 230 self.call_cindex(py, "slicechunktodensity", args, kw)
231 231 }
232 232
233 233 /// stats for the index
234 234 def stats(&self, *args, **kw) -> PyResult<PyObject> {
235 235 self.call_cindex(py, "stats", args, kw)
236 236 }
237 237
238 238 // index_sequence_methods and index_mapping_methods.
239 239 //
240 240 // Since we call back through the high level Python API,
241 241 // there's no point making a distinction between index_get
242 242 // and index_getitem.
243 243
244 244 def __len__(&self) -> PyResult<usize> {
245 245 self.cindex(py).borrow().inner().len(py)
246 246 }
247 247
248 248 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
249 249 // this conversion seems needless, but that's actually because
250 250 // `index_getitem` does not handle conversion from PyLong,
251 251 // which expressions such as [e for e in index] internally use.
252 252 // Note that we don't seem to have a direct way to call
253 253 // PySequence_GetItem (does the job), which would possibly be better
254 254 // for performance
255 let key = match key.extract::<Revision>(py) {
255 let key = match key.extract::<i32>(py) {
256 256 Ok(rev) => rev.to_py_object(py).into_object(),
257 257 Err(_) => key,
258 258 };
259 259 self.cindex(py).borrow().inner().get_item(py, key)
260 260 }
261 261
262 262 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
263 263 self.cindex(py).borrow().inner().set_item(py, key, value)
264 264 }
265 265
266 266 def __contains__(&self, item: PyObject) -> PyResult<bool> {
267 267 // ObjectProtocol does not seem to provide contains(), so
268 268 // this is an equivalent implementation of the index_contains()
269 269 // defined in revlog.c
270 270 let cindex = self.cindex(py).borrow();
271 match item.extract::<Revision>(py) {
271 match item.extract::<i32>(py) {
272 272 Ok(rev) => {
273 273 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
274 274 }
275 275 Err(_) => {
276 276 cindex.inner().call_method(
277 277 py,
278 278 "has_node",
279 279 PyTuple::new(py, &[item]),
280 280 None)?
281 281 .extract(py)
282 282 }
283 283 }
284 284 }
285 285
286 286 def nodemap_data_all(&self) -> PyResult<PyBytes> {
287 287 self.inner_nodemap_data_all(py)
288 288 }
289 289
290 290 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
291 291 self.inner_nodemap_data_incremental(py)
292 292 }
293 293 def update_nodemap_data(
294 294 &self,
295 295 docket: PyObject,
296 296 nm_data: PyObject
297 297 ) -> PyResult<PyObject> {
298 298 self.inner_update_nodemap_data(py, docket, nm_data)
299 299 }
300 300
301 301 @property
302 302 def entry_size(&self) -> PyResult<PyInt> {
303 303 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
304 304 }
305 305
306 306 @property
307 307 def rust_ext_compat(&self) -> PyResult<PyInt> {
308 308 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
309 309 }
310 310
311 311 });
312 312
313 313 impl MixedIndex {
314 314 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
315 315 Self::create_instance(
316 316 py,
317 317 RefCell::new(cindex::Index::new(py, cindex)?),
318 318 RefCell::new(None),
319 319 RefCell::new(None),
320 320 RefCell::new(None),
321 321 )
322 322 }
323 323
324 324 /// This is scaffolding at this point, but it could also become
325 325 /// a way to start a persistent nodemap or perform a
326 326 /// vacuum / repack operation
327 327 fn fill_nodemap(
328 328 &self,
329 329 py: Python,
330 330 nt: &mut NodeTree,
331 331 ) -> PyResult<PyObject> {
332 332 let index = self.cindex(py).borrow();
333 333 for r in 0..index.len() {
334 334 let rev = r as Revision;
335 335 // in this case node() won't ever return None
336 336 nt.insert(&*index, index.node(rev).unwrap(), rev)
337 337 .map_err(|e| nodemap_error(py, e))?
338 338 }
339 339 Ok(py.None())
340 340 }
341 341
342 342 fn get_nodetree<'a>(
343 343 &'a self,
344 344 py: Python<'a>,
345 345 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
346 346 if self.nt(py).borrow().is_none() {
347 347 let readonly = Box::new(Vec::new());
348 348 let mut nt = NodeTree::load_bytes(readonly, 0);
349 349 self.fill_nodemap(py, &mut nt)?;
350 350 self.nt(py).borrow_mut().replace(nt);
351 351 }
352 352 Ok(self.nt(py))
353 353 }
354 354
355 355 /// forward a method call to the underlying C index
356 356 fn call_cindex(
357 357 &self,
358 358 py: Python,
359 359 name: &str,
360 360 args: &PyTuple,
361 361 kwargs: Option<&PyDict>,
362 362 ) -> PyResult<PyObject> {
363 363 self.cindex(py)
364 364 .borrow()
365 365 .inner()
366 366 .call_method(py, name, args, kwargs)
367 367 }
368 368
369 369 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
370 370 self.cindex(py).borrow().clone_ref(py)
371 371 }
372 372
373 373 /// Returns the full nodemap bytes to be written as-is to disk
374 374 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
375 375 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
376 376 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
377 377
378 378 // If there's anything readonly, we need to build the data again from
379 379 // scratch
380 380 let bytes = if readonly.len() > 0 {
381 381 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
382 382 self.fill_nodemap(py, &mut nt)?;
383 383
384 384 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
385 385 assert_eq!(readonly.len(), 0);
386 386
387 387 bytes
388 388 } else {
389 389 bytes
390 390 };
391 391
392 392 let bytes = PyBytes::new(py, &bytes);
393 393 Ok(bytes)
394 394 }
395 395
396 396 /// Returns the last saved docket along with the size of any changed data
397 397 /// (in number of blocks), and said data as bytes.
398 398 fn inner_nodemap_data_incremental(
399 399 &self,
400 400 py: Python,
401 401 ) -> PyResult<PyObject> {
402 402 let docket = self.docket(py).borrow();
403 403 let docket = match docket.as_ref() {
404 404 Some(d) => d,
405 405 None => return Ok(py.None()),
406 406 };
407 407
408 408 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
409 409 let masked_blocks = node_tree.masked_readonly_blocks();
410 410 let (_, data) = node_tree.into_readonly_and_added_bytes();
411 411 let changed = masked_blocks * std::mem::size_of::<Block>();
412 412
413 413 Ok((docket, changed, PyBytes::new(py, &data))
414 414 .to_py_object(py)
415 415 .into_object())
416 416 }
417 417
418 418 /// Update the nodemap from the new (mmaped) data.
419 419 /// The docket is kept as a reference for later incremental calls.
420 420 fn inner_update_nodemap_data(
421 421 &self,
422 422 py: Python,
423 423 docket: PyObject,
424 424 nm_data: PyObject,
425 425 ) -> PyResult<PyObject> {
426 426 let buf = PyBuffer::get(py, &nm_data)?;
427 427 let len = buf.item_count();
428 428
429 429 // Build a slice from the mmap'ed buffer data
430 430 let cbuf = buf.buf_ptr();
431 431 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
432 432 && buf.is_c_contiguous()
433 433 && u8::is_compatible_format(buf.format())
434 434 {
435 435 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
436 436 } else {
437 437 return Err(PyErr::new::<ValueError, _>(
438 438 py,
439 439 "Nodemap data buffer has an invalid memory representation"
440 440 .to_string(),
441 441 ));
442 442 };
443 443
444 444 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
445 445 // pointer.
446 446 self.mmap(py).borrow_mut().replace(buf);
447 447
448 448 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
449 449
450 450 let data_tip =
451 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
451 docket.getattr(py, "tip_rev")?.extract::<i32>(py)?.into();
452 452 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
453 453 let idx = self.cindex(py).borrow();
454 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
455 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
456 })?;
454 457 let current_tip = idx.len();
455 458
456 459 for r in (data_tip + 1)..current_tip as Revision {
457 460 let rev = r as Revision;
458 461 // in this case node() won't ever return None
459 462 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
460 463 .map_err(|e| nodemap_error(py, e))?
461 464 }
462 465
463 466 *self.nt(py).borrow_mut() = Some(nt);
464 467
465 468 Ok(py.None())
466 469 }
467 470 }
468 471
469 472 fn revlog_error(py: Python) -> PyErr {
470 473 match py
471 474 .import("mercurial.error")
472 475 .and_then(|m| m.get(py, "RevlogError"))
473 476 {
474 477 Err(e) => e,
475 478 Ok(cls) => PyErr::from_instance(
476 479 py,
477 480 cls.call(py, (py.None(),), None).ok().into_py_object(py),
478 481 ),
479 482 }
480 483 }
481 484
482 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
485 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
483 486 PyErr::new::<ValueError, _>(
484 487 py,
485 488 format!(
486 489 "Inconsistency: Revision {} found in nodemap \
487 490 is not in revlog index",
488 491 rev
489 492 ),
490 493 )
491 494 }
492 495
493 496 /// Standard treatment of NodeMapError
494 497 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
495 498 match err {
496 499 NodeMapError::MultipleResults => revlog_error(py),
497 500 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
498 501 }
499 502 }
500 503
501 504 /// Create the module, with __package__ given from parent
502 505 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
503 506 let dotted_name = &format!("{}.revlog", package);
504 507 let m = PyModule::new(py, dotted_name)?;
505 508 m.add(py, "__package__", package)?;
506 509 m.add(py, "__doc__", "RevLog - Rust implementations")?;
507 510
508 511 m.add_class::<MixedIndex>(py)?;
509 512
510 513 let sys = PyModule::import(py, "sys")?;
511 514 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
512 515 sys_modules.set_item(py, dotted_name, &m)?;
513 516
514 517 Ok(m)
515 518 }
General Comments 0
You need to be logged in to leave comments. Login now