##// END OF EJS Templates
rust-revlog: make `Changelog` and `ManifestLog` unaware of `Repo`...
Martin von Zweigbergk -
r49981:a5ef50be default
parent child Browse files
Show More
@@ -1,509 +1,523
1 1 use crate::changelog::Changelog;
2 2 use crate::config::{Config, ConfigError, ConfigParseError};
3 3 use crate::dirstate::DirstateParents;
4 4 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
5 5 use crate::dirstate_tree::owning::OwningDirstateMap;
6 6 use crate::errors::HgResultExt;
7 7 use crate::errors::{HgError, IoResultExt};
8 8 use crate::lock::{try_with_lock_no_wait, LockError};
9 9 use crate::manifest::{Manifest, Manifestlog};
10 10 use crate::revlog::filelog::Filelog;
11 11 use crate::revlog::revlog::RevlogError;
12 12 use crate::utils::files::get_path_from_bytes;
13 13 use crate::utils::hg_path::HgPath;
14 14 use crate::utils::SliceExt;
15 15 use crate::vfs::{is_dir, is_file, Vfs};
16 16 use crate::{requirements, NodePrefix};
17 17 use crate::{DirstateError, Revision};
18 18 use std::cell::{Ref, RefCell, RefMut};
19 19 use std::collections::HashSet;
20 20 use std::io::Seek;
21 21 use std::io::SeekFrom;
22 22 use std::io::Write as IoWrite;
23 23 use std::path::{Path, PathBuf};
24 24
25 25 /// A repository on disk
26 26 pub struct Repo {
27 27 working_directory: PathBuf,
28 28 dot_hg: PathBuf,
29 29 store: PathBuf,
30 30 requirements: HashSet<String>,
31 31 config: Config,
32 32 dirstate_parents: LazyCell<DirstateParents, HgError>,
33 33 dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>, HgError>,
34 34 dirstate_map: LazyCell<OwningDirstateMap, DirstateError>,
35 35 changelog: LazyCell<Changelog, HgError>,
36 36 manifestlog: LazyCell<Manifestlog, HgError>,
37 37 }
38 38
39 39 #[derive(Debug, derive_more::From)]
40 40 pub enum RepoError {
41 41 NotFound {
42 42 at: PathBuf,
43 43 },
44 44 #[from]
45 45 ConfigParseError(ConfigParseError),
46 46 #[from]
47 47 Other(HgError),
48 48 }
49 49
50 50 impl From<ConfigError> for RepoError {
51 51 fn from(error: ConfigError) -> Self {
52 52 match error {
53 53 ConfigError::Parse(error) => error.into(),
54 54 ConfigError::Other(error) => error.into(),
55 55 }
56 56 }
57 57 }
58 58
59 59 impl Repo {
60 60 /// tries to find nearest repository root in current working directory or
61 61 /// its ancestors
62 62 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
63 63 let current_directory = crate::utils::current_dir()?;
64 64 // ancestors() is inclusive: it first yields `current_directory`
65 65 // as-is.
66 66 for ancestor in current_directory.ancestors() {
67 67 if is_dir(ancestor.join(".hg"))? {
68 68 return Ok(ancestor.to_path_buf());
69 69 }
70 70 }
71 71 return Err(RepoError::NotFound {
72 72 at: current_directory,
73 73 });
74 74 }
75 75
76 76 /// Find a repository, either at the given path (which must contain a `.hg`
77 77 /// sub-directory) or by searching the current directory and its
78 78 /// ancestors.
79 79 ///
80 80 /// A method with two very different "modes" like this usually a code smell
81 81 /// to make two methods instead, but in this case an `Option` is what rhg
82 82 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
83 83 /// Having two methods would just move that `if` to almost all callers.
84 84 pub fn find(
85 85 config: &Config,
86 86 explicit_path: Option<PathBuf>,
87 87 ) -> Result<Self, RepoError> {
88 88 if let Some(root) = explicit_path {
89 89 if is_dir(root.join(".hg"))? {
90 90 Self::new_at_path(root.to_owned(), config)
91 91 } else if is_file(&root)? {
92 92 Err(HgError::unsupported("bundle repository").into())
93 93 } else {
94 94 Err(RepoError::NotFound {
95 95 at: root.to_owned(),
96 96 })
97 97 }
98 98 } else {
99 99 let root = Self::find_repo_root()?;
100 100 Self::new_at_path(root, config)
101 101 }
102 102 }
103 103
104 104 /// To be called after checking that `.hg` is a sub-directory
105 105 fn new_at_path(
106 106 working_directory: PathBuf,
107 107 config: &Config,
108 108 ) -> Result<Self, RepoError> {
109 109 let dot_hg = working_directory.join(".hg");
110 110
111 111 let mut repo_config_files = Vec::new();
112 112 repo_config_files.push(dot_hg.join("hgrc"));
113 113 repo_config_files.push(dot_hg.join("hgrc-not-shared"));
114 114
115 115 let hg_vfs = Vfs { base: &dot_hg };
116 116 let mut reqs = requirements::load_if_exists(hg_vfs)?;
117 117 let relative =
118 118 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
119 119 let shared =
120 120 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
121 121
122 122 // From `mercurial/localrepo.py`:
123 123 //
124 124 // if .hg/requires contains the sharesafe requirement, it means
125 125 // there exists a `.hg/store/requires` too and we should read it
126 126 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
127 127 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
128 128 // is not present, refer checkrequirementscompat() for that
129 129 //
130 130 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
131 131 // repository was shared the old way. We check the share source
132 132 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
133 133 // current repository needs to be reshared
134 134 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
135 135
136 136 let store_path;
137 137 if !shared {
138 138 store_path = dot_hg.join("store");
139 139 } else {
140 140 let bytes = hg_vfs.read("sharedpath")?;
141 141 let mut shared_path =
142 142 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
143 143 .to_owned();
144 144 if relative {
145 145 shared_path = dot_hg.join(shared_path)
146 146 }
147 147 if !is_dir(&shared_path)? {
148 148 return Err(HgError::corrupted(format!(
149 149 ".hg/sharedpath points to nonexistent directory {}",
150 150 shared_path.display()
151 151 ))
152 152 .into());
153 153 }
154 154
155 155 store_path = shared_path.join("store");
156 156
157 157 let source_is_share_safe =
158 158 requirements::load(Vfs { base: &shared_path })?
159 159 .contains(requirements::SHARESAFE_REQUIREMENT);
160 160
161 161 if share_safe != source_is_share_safe {
162 162 return Err(HgError::unsupported("share-safe mismatch").into());
163 163 }
164 164
165 165 if share_safe {
166 166 repo_config_files.insert(0, shared_path.join("hgrc"))
167 167 }
168 168 }
169 169 if share_safe {
170 170 reqs.extend(requirements::load(Vfs { base: &store_path })?);
171 171 }
172 172
173 173 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
174 174 config.combine_with_repo(&repo_config_files)?
175 175 } else {
176 176 config.clone()
177 177 };
178 178
179 179 let repo = Self {
180 180 requirements: reqs,
181 181 working_directory,
182 182 store: store_path,
183 183 dot_hg,
184 184 config: repo_config,
185 185 dirstate_parents: LazyCell::new(Self::read_dirstate_parents),
186 186 dirstate_data_file_uuid: LazyCell::new(
187 187 Self::read_dirstate_data_file_uuid,
188 188 ),
189 189 dirstate_map: LazyCell::new(Self::new_dirstate_map),
190 changelog: LazyCell::new(Changelog::open),
191 manifestlog: LazyCell::new(Manifestlog::open),
190 changelog: LazyCell::new(Self::new_changelog),
191 manifestlog: LazyCell::new(Self::new_manifestlog),
192 192 };
193 193
194 194 requirements::check(&repo)?;
195 195
196 196 Ok(repo)
197 197 }
198 198
199 199 pub fn working_directory_path(&self) -> &Path {
200 200 &self.working_directory
201 201 }
202 202
203 203 pub fn requirements(&self) -> &HashSet<String> {
204 204 &self.requirements
205 205 }
206 206
207 207 pub fn config(&self) -> &Config {
208 208 &self.config
209 209 }
210 210
211 211 /// For accessing repository files (in `.hg`), except for the store
212 212 /// (`.hg/store`).
213 213 pub fn hg_vfs(&self) -> Vfs<'_> {
214 214 Vfs { base: &self.dot_hg }
215 215 }
216 216
217 217 /// For accessing repository store files (in `.hg/store`)
218 218 pub fn store_vfs(&self) -> Vfs<'_> {
219 219 Vfs { base: &self.store }
220 220 }
221 221
222 222 /// For accessing the working copy
223 223 pub fn working_directory_vfs(&self) -> Vfs<'_> {
224 224 Vfs {
225 225 base: &self.working_directory,
226 226 }
227 227 }
228 228
229 229 pub fn try_with_wlock_no_wait<R>(
230 230 &self,
231 231 f: impl FnOnce() -> R,
232 232 ) -> Result<R, LockError> {
233 233 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
234 234 }
235 235
236 236 pub fn has_dirstate_v2(&self) -> bool {
237 237 self.requirements
238 238 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
239 239 }
240 240
241 241 pub fn has_sparse(&self) -> bool {
242 242 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
243 243 }
244 244
245 245 pub fn has_narrow(&self) -> bool {
246 246 self.requirements.contains(requirements::NARROW_REQUIREMENT)
247 247 }
248 248
249 249 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
250 250 Ok(self
251 251 .hg_vfs()
252 252 .read("dirstate")
253 253 .io_not_found_as_none()?
254 254 .unwrap_or(Vec::new()))
255 255 }
256 256
257 257 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
258 258 Ok(*self.dirstate_parents.get_or_init(self)?)
259 259 }
260 260
261 261 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
262 262 let dirstate = self.dirstate_file_contents()?;
263 263 let parents = if dirstate.is_empty() {
264 264 if self.has_dirstate_v2() {
265 265 self.dirstate_data_file_uuid.set(None);
266 266 }
267 267 DirstateParents::NULL
268 268 } else if self.has_dirstate_v2() {
269 269 let docket =
270 270 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
271 271 self.dirstate_data_file_uuid
272 272 .set(Some(docket.uuid.to_owned()));
273 273 docket.parents()
274 274 } else {
275 275 crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
276 276 .clone()
277 277 };
278 278 self.dirstate_parents.set(parents);
279 279 Ok(parents)
280 280 }
281 281
282 282 fn read_dirstate_data_file_uuid(
283 283 &self,
284 284 ) -> Result<Option<Vec<u8>>, HgError> {
285 285 assert!(
286 286 self.has_dirstate_v2(),
287 287 "accessing dirstate data file ID without dirstate-v2"
288 288 );
289 289 let dirstate = self.dirstate_file_contents()?;
290 290 if dirstate.is_empty() {
291 291 self.dirstate_parents.set(DirstateParents::NULL);
292 292 Ok(None)
293 293 } else {
294 294 let docket =
295 295 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
296 296 self.dirstate_parents.set(docket.parents());
297 297 Ok(Some(docket.uuid.to_owned()))
298 298 }
299 299 }
300 300
301 301 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
302 302 let dirstate_file_contents = self.dirstate_file_contents()?;
303 303 if dirstate_file_contents.is_empty() {
304 304 self.dirstate_parents.set(DirstateParents::NULL);
305 305 if self.has_dirstate_v2() {
306 306 self.dirstate_data_file_uuid.set(None);
307 307 }
308 308 Ok(OwningDirstateMap::new_empty(Vec::new()))
309 309 } else if self.has_dirstate_v2() {
310 310 let docket = crate::dirstate_tree::on_disk::read_docket(
311 311 &dirstate_file_contents,
312 312 )?;
313 313 self.dirstate_parents.set(docket.parents());
314 314 self.dirstate_data_file_uuid
315 315 .set(Some(docket.uuid.to_owned()));
316 316 let data_size = docket.data_size();
317 317 let metadata = docket.tree_metadata();
318 318 if let Some(data_mmap) = self
319 319 .hg_vfs()
320 320 .mmap_open(docket.data_filename())
321 321 .io_not_found_as_none()?
322 322 {
323 323 OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
324 324 } else {
325 325 OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
326 326 }
327 327 } else {
328 328 let (map, parents) =
329 329 OwningDirstateMap::new_v1(dirstate_file_contents)?;
330 330 self.dirstate_parents.set(parents);
331 331 Ok(map)
332 332 }
333 333 }
334 334
335 335 pub fn dirstate_map(
336 336 &self,
337 337 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
338 338 self.dirstate_map.get_or_init(self)
339 339 }
340 340
341 341 pub fn dirstate_map_mut(
342 342 &self,
343 343 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
344 344 self.dirstate_map.get_mut_or_init(self)
345 345 }
346 346
347 fn new_changelog(&self) -> Result<Changelog, HgError> {
348 let use_nodemap = self
349 .requirements
350 .contains(requirements::NODEMAP_REQUIREMENT);
351 Changelog::open(&self.store_vfs(), use_nodemap)
352 }
353
347 354 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
348 355 self.changelog.get_or_init(self)
349 356 }
350 357
351 358 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
352 359 self.changelog.get_mut_or_init(self)
353 360 }
354 361
362 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
363 let use_nodemap = self
364 .requirements
365 .contains(requirements::NODEMAP_REQUIREMENT);
366 Manifestlog::open(&self.store_vfs(), use_nodemap)
367 }
368
355 369 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
356 370 self.manifestlog.get_or_init(self)
357 371 }
358 372
359 373 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
360 374 self.manifestlog.get_mut_or_init(self)
361 375 }
362 376
363 377 /// Returns the manifest of the *changeset* with the given node ID
364 378 pub fn manifest_for_node(
365 379 &self,
366 380 node: impl Into<NodePrefix>,
367 381 ) -> Result<Manifest, RevlogError> {
368 382 self.manifestlog()?.data_for_node(
369 383 self.changelog()?
370 384 .data_for_node(node.into())?
371 385 .manifest_node()?
372 386 .into(),
373 387 )
374 388 }
375 389
376 390 /// Returns the manifest of the *changeset* with the given revision number
377 391 pub fn manifest_for_rev(
378 392 &self,
379 393 revision: Revision,
380 394 ) -> Result<Manifest, RevlogError> {
381 395 self.manifestlog()?.data_for_node(
382 396 self.changelog()?
383 397 .data_for_rev(revision)?
384 398 .manifest_node()?
385 399 .into(),
386 400 )
387 401 }
388 402
389 403 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
390 404 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
391 405 Ok(entry.state().is_tracked())
392 406 } else {
393 407 Ok(false)
394 408 }
395 409 }
396 410
397 411 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
398 412 Filelog::open(self, path)
399 413 }
400 414
401 415 /// Write to disk any updates that were made through `dirstate_map_mut`.
402 416 ///
403 417 /// The "wlock" must be held while calling this.
404 418 /// See for example `try_with_wlock_no_wait`.
405 419 ///
406 420 /// TODO: have a `WritableRepo` type only accessible while holding the
407 421 /// lock?
408 422 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
409 423 let map = self.dirstate_map()?;
410 424 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
411 425 // it’s unset
412 426 let parents = self.dirstate_parents()?;
413 427 let packed_dirstate = if self.has_dirstate_v2() {
414 428 let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
415 429 let mut uuid = uuid.as_ref();
416 430 let can_append = uuid.is_some();
417 431 let (data, tree_metadata, append) = map.pack_v2(can_append)?;
418 432 if !append {
419 433 uuid = None
420 434 }
421 435 let uuid = if let Some(uuid) = uuid {
422 436 std::str::from_utf8(uuid)
423 437 .map_err(|_| {
424 438 HgError::corrupted("non-UTF-8 dirstate data file ID")
425 439 })?
426 440 .to_owned()
427 441 } else {
428 442 DirstateDocket::new_uid()
429 443 };
430 444 let data_filename = format!("dirstate.{}", uuid);
431 445 let data_filename = self.hg_vfs().join(data_filename);
432 446 let mut options = std::fs::OpenOptions::new();
433 447 if append {
434 448 options.append(true);
435 449 } else {
436 450 options.write(true).create_new(true);
437 451 }
438 452 let data_size = (|| {
439 453 // TODO: loop and try another random ID if !append and this
440 454 // returns `ErrorKind::AlreadyExists`? Collision chance of two
441 455 // random IDs is one in 2**32
442 456 let mut file = options.open(&data_filename)?;
443 457 file.write_all(&data)?;
444 458 file.flush()?;
445 459 // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
446 460 file.seek(SeekFrom::Current(0))
447 461 })()
448 462 .when_writing_file(&data_filename)?;
449 463 DirstateDocket::serialize(
450 464 parents,
451 465 tree_metadata,
452 466 data_size,
453 467 uuid.as_bytes(),
454 468 )
455 469 .map_err(|_: std::num::TryFromIntError| {
456 470 HgError::corrupted("overflow in dirstate docket serialization")
457 471 })?
458 472 } else {
459 473 map.pack_v1(parents)?
460 474 };
461 475 self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?;
462 476 Ok(())
463 477 }
464 478 }
465 479
466 480 /// Lazily-initialized component of `Repo` with interior mutability
467 481 ///
468 482 /// This differs from `OnceCell` in that the value can still be "deinitialized"
469 483 /// later by setting its inner `Option` to `None`.
470 484 struct LazyCell<T, E> {
471 485 value: RefCell<Option<T>>,
472 486 // `Fn`s that don’t capture environment are zero-size, so this box does
473 487 // not allocate:
474 488 init: Box<dyn Fn(&Repo) -> Result<T, E>>,
475 489 }
476 490
477 491 impl<T, E> LazyCell<T, E> {
478 492 fn new(init: impl Fn(&Repo) -> Result<T, E> + 'static) -> Self {
479 493 Self {
480 494 value: RefCell::new(None),
481 495 init: Box::new(init),
482 496 }
483 497 }
484 498
485 499 fn set(&self, value: T) {
486 500 *self.value.borrow_mut() = Some(value)
487 501 }
488 502
489 503 fn get_or_init(&self, repo: &Repo) -> Result<Ref<T>, E> {
490 504 let mut borrowed = self.value.borrow();
491 505 if borrowed.is_none() {
492 506 drop(borrowed);
493 507 // Only use `borrow_mut` if it is really needed to avoid panic in
494 508 // case there is another outstanding borrow but mutation is not
495 509 // needed.
496 510 *self.value.borrow_mut() = Some((self.init)(repo)?);
497 511 borrowed = self.value.borrow()
498 512 }
499 513 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
500 514 }
501 515
502 516 fn get_mut_or_init(&self, repo: &Repo) -> Result<RefMut<T>, E> {
503 517 let mut borrowed = self.value.borrow_mut();
504 518 if borrowed.is_none() {
505 519 *borrowed = Some((self.init)(repo)?);
506 520 }
507 521 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
508 522 }
509 523 }
@@ -1,277 +1,269
1 1 use crate::errors::HgError;
2 use crate::repo::Repo;
3 use crate::requirements;
4 2 use crate::revlog::revlog::{Revlog, RevlogEntry, RevlogError};
5 3 use crate::revlog::Revision;
6 4 use crate::revlog::{Node, NodePrefix};
7 5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
8 7 use itertools::Itertools;
9 8 use std::ascii::escape_default;
10 9 use std::fmt::{Debug, Formatter};
11 10
12 11 /// A specialized `Revlog` to work with `changelog` data format.
13 12 pub struct Changelog {
14 13 /// The generic `revlog` format.
15 14 pub(crate) revlog: Revlog,
16 15 }
17 16
18 17 impl Changelog {
19 18 /// Open the `changelog` of a repository given by its root.
20 pub fn open(repo: &Repo) -> Result<Self, HgError> {
21 let use_nodemap = repo
22 .requirements()
23 .contains(requirements::NODEMAP_REQUIREMENT);
24 let revlog = Revlog::open(
25 &repo.store_vfs(),
26 "00changelog.i",
27 None,
28 use_nodemap,
29 )?;
19 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
20 let revlog =
21 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
30 22 Ok(Self { revlog })
31 23 }
32 24
33 25 /// Return the `ChangelogEntry` for the given node ID.
34 26 pub fn data_for_node(
35 27 &self,
36 28 node: NodePrefix,
37 29 ) -> Result<ChangelogRevisionData, RevlogError> {
38 30 let rev = self.revlog.rev_from_node(node)?;
39 31 self.data_for_rev(rev)
40 32 }
41 33
42 34 /// Return the `RevlogEntry` of the given revision number.
43 35 pub fn entry_for_rev(
44 36 &self,
45 37 rev: Revision,
46 38 ) -> Result<RevlogEntry, RevlogError> {
47 39 self.revlog.get_entry(rev)
48 40 }
49 41
50 42 /// Return the `ChangelogEntry` of the given revision number.
51 43 pub fn data_for_rev(
52 44 &self,
53 45 rev: Revision,
54 46 ) -> Result<ChangelogRevisionData, RevlogError> {
55 47 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
56 48 if bytes.is_empty() {
57 49 Ok(ChangelogRevisionData::null())
58 50 } else {
59 51 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
60 52 RevlogError::Other(HgError::CorruptedRepository(format!(
61 53 "Invalid changelog data for revision {}: {:?}",
62 54 rev, err
63 55 )))
64 56 })?)
65 57 }
66 58 }
67 59
68 60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
69 61 self.revlog.node_from_rev(rev)
70 62 }
71 63
72 64 pub fn rev_from_node(
73 65 &self,
74 66 node: NodePrefix,
75 67 ) -> Result<Revision, RevlogError> {
76 68 self.revlog.rev_from_node(node)
77 69 }
78 70 }
79 71
80 72 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
81 73 #[derive(PartialEq)]
82 74 pub struct ChangelogRevisionData {
83 75 /// The data bytes of the `changelog` entry.
84 76 bytes: Vec<u8>,
85 77 /// The end offset for the hex manifest (not including the newline)
86 78 manifest_end: usize,
87 79 /// The end offset for the user+email (not including the newline)
88 80 user_end: usize,
89 81 /// The end offset for the timestamp+timezone+extras (not including the
90 82 /// newline)
91 83 timestamp_end: usize,
92 84 /// The end offset for the file list (not including the newline)
93 85 files_end: usize,
94 86 }
95 87
96 88 impl ChangelogRevisionData {
97 89 fn new(bytes: Vec<u8>) -> Result<Self, HgError> {
98 90 let mut line_iter = bytes.split(|b| b == &b'\n');
99 91 let manifest_end = line_iter
100 92 .next()
101 93 .expect("Empty iterator from split()?")
102 94 .len();
103 95 let user_slice = line_iter.next().ok_or_else(|| {
104 96 HgError::corrupted("Changeset data truncated after manifest line")
105 97 })?;
106 98 let user_end = manifest_end + 1 + user_slice.len();
107 99 let timestamp_slice = line_iter.next().ok_or_else(|| {
108 100 HgError::corrupted("Changeset data truncated after user line")
109 101 })?;
110 102 let timestamp_end = user_end + 1 + timestamp_slice.len();
111 103 let mut files_end = timestamp_end + 1;
112 104 loop {
113 105 let line = line_iter.next().ok_or_else(|| {
114 106 HgError::corrupted("Changeset data truncated in files list")
115 107 })?;
116 108 if line.is_empty() {
117 109 if files_end == bytes.len() {
118 110 // The list of files ended with a single newline (there
119 111 // should be two)
120 112 return Err(HgError::corrupted(
121 113 "Changeset data truncated after files list",
122 114 ));
123 115 }
124 116 files_end -= 1;
125 117 break;
126 118 }
127 119 files_end += line.len() + 1;
128 120 }
129 121
130 122 Ok(Self {
131 123 bytes,
132 124 manifest_end,
133 125 user_end,
134 126 timestamp_end,
135 127 files_end,
136 128 })
137 129 }
138 130
139 131 fn null() -> Self {
140 132 Self::new(
141 133 b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
142 134 )
143 135 .unwrap()
144 136 }
145 137
146 138 /// Return an iterator over the lines of the entry.
147 139 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
148 140 self.bytes.split(|b| b == &b'\n')
149 141 }
150 142
151 143 /// Return the node id of the `manifest` referenced by this `changelog`
152 144 /// entry.
153 145 pub fn manifest_node(&self) -> Result<Node, HgError> {
154 146 let manifest_node_hex = &self.bytes[..self.manifest_end];
155 147 Node::from_hex_for_repo(manifest_node_hex)
156 148 }
157 149
158 150 /// The full user string (usually a name followed by an email enclosed in
159 151 /// angle brackets)
160 152 pub fn user(&self) -> &[u8] {
161 153 &self.bytes[self.manifest_end + 1..self.user_end]
162 154 }
163 155
164 156 /// The full timestamp line (timestamp in seconds, offset in seconds, and
165 157 /// possibly extras)
166 158 // TODO: We should expose this in a more useful way
167 159 pub fn timestamp_line(&self) -> &[u8] {
168 160 &self.bytes[self.user_end + 1..self.timestamp_end]
169 161 }
170 162
171 163 /// The files changed in this revision.
172 164 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
173 165 self.bytes[self.timestamp_end + 1..self.files_end]
174 166 .split(|b| b == &b'\n')
175 167 .map(|path| HgPath::new(path))
176 168 }
177 169
178 170 /// The change description.
179 171 pub fn description(&self) -> &[u8] {
180 172 &self.bytes[self.files_end + 2..]
181 173 }
182 174 }
183 175
184 176 impl Debug for ChangelogRevisionData {
185 177 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
186 178 f.debug_struct("ChangelogRevisionData")
187 179 .field("bytes", &debug_bytes(&self.bytes))
188 180 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
189 181 .field(
190 182 "user",
191 183 &debug_bytes(
192 184 &self.bytes[self.manifest_end + 1..self.user_end],
193 185 ),
194 186 )
195 187 .field(
196 188 "timestamp",
197 189 &debug_bytes(
198 190 &self.bytes[self.user_end + 1..self.timestamp_end],
199 191 ),
200 192 )
201 193 .field(
202 194 "files",
203 195 &debug_bytes(
204 196 &self.bytes[self.timestamp_end + 1..self.files_end],
205 197 ),
206 198 )
207 199 .field(
208 200 "description",
209 201 &debug_bytes(&self.bytes[self.files_end + 2..]),
210 202 )
211 203 .finish()
212 204 }
213 205 }
214 206
215 207 fn debug_bytes(bytes: &[u8]) -> String {
216 208 String::from_utf8_lossy(
217 209 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
218 210 )
219 211 .to_string()
220 212 }
221 213
222 214 #[cfg(test)]
223 215 mod tests {
224 216 use super::*;
225 217 use pretty_assertions::assert_eq;
226 218
227 219 #[test]
228 220 fn test_create_changelogrevisiondata_invalid() {
229 221 // Completely empty
230 222 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
231 223 // No newline after manifest
232 224 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
233 225 // No newline after user
234 226 assert!(ChangelogRevisionData::new(b"abcd\n".to_vec()).is_err());
235 227 // No newline after timestamp
236 228 assert!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()).is_err());
237 229 // Missing newline after files
238 230 assert!(ChangelogRevisionData::new(
239 231 b"abcd\n\n0 0\nfile1\nfile2".to_vec()
240 232 )
241 233 .is_err(),);
242 234 // Only one newline after files
243 235 assert!(ChangelogRevisionData::new(
244 236 b"abcd\n\n0 0\nfile1\nfile2\n".to_vec()
245 237 )
246 238 .is_err(),);
247 239 }
248 240
249 241 #[test]
250 242 fn test_create_changelogrevisiondata() {
251 243 let data = ChangelogRevisionData::new(
252 244 b"0123456789abcdef0123456789abcdef01234567
253 245 Some One <someone@example.com>
254 246 0 0
255 247 file1
256 248 file2
257 249
258 250 some
259 251 commit
260 252 message"
261 253 .to_vec(),
262 254 )
263 255 .unwrap();
264 256 assert_eq!(
265 257 data.manifest_node().unwrap(),
266 258 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
267 259 .unwrap()
268 260 );
269 261 assert_eq!(data.user(), b"Some One <someone@example.com>");
270 262 assert_eq!(data.timestamp_line(), b"0 0");
271 263 assert_eq!(
272 264 data.files().collect_vec(),
273 265 vec![HgPath::new("file1"), HgPath::new("file2")]
274 266 );
275 267 assert_eq!(data.description(), b"some\ncommit\nmessage");
276 268 }
277 269 }
@@ -1,202 +1,194
1 1 use crate::errors::HgError;
2 use crate::repo::Repo;
3 use crate::requirements;
4 2 use crate::revlog::revlog::{Revlog, RevlogError};
5 3 use crate::revlog::Revision;
6 4 use crate::revlog::{Node, NodePrefix};
7 5 use crate::utils::hg_path::HgPath;
8 6 use crate::utils::SliceExt;
7 use crate::vfs::Vfs;
9 8
10 9 /// A specialized `Revlog` to work with `manifest` data format.
11 10 pub struct Manifestlog {
12 11 /// The generic `revlog` format.
13 12 revlog: Revlog,
14 13 }
15 14
16 15 impl Manifestlog {
17 16 /// Open the `manifest` of a repository given by its root.
18 pub fn open(repo: &Repo) -> Result<Self, HgError> {
19 let use_nodemap = repo
20 .requirements()
21 .contains(requirements::NODEMAP_REQUIREMENT);
22 let revlog = Revlog::open(
23 &repo.store_vfs(),
24 "00manifest.i",
25 None,
26 use_nodemap,
27 )?;
17 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
18 let revlog =
19 Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
28 20 Ok(Self { revlog })
29 21 }
30 22
31 23 /// Return the `Manifest` for the given node ID.
32 24 ///
33 25 /// Note: this is a node ID in the manifestlog, typically found through
34 26 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
35 27 /// changeset.
36 28 ///
37 29 /// See also `Repo::manifest_for_node`
38 30 pub fn data_for_node(
39 31 &self,
40 32 node: NodePrefix,
41 33 ) -> Result<Manifest, RevlogError> {
42 34 let rev = self.revlog.rev_from_node(node)?;
43 35 self.data_for_rev(rev)
44 36 }
45 37
46 38 /// Return the `Manifest` of a given revision number.
47 39 ///
48 40 /// Note: this is a revision number in the manifestlog, *not* of any
49 41 /// changeset.
50 42 ///
51 43 /// See also `Repo::manifest_for_rev`
52 44 pub fn data_for_rev(
53 45 &self,
54 46 rev: Revision,
55 47 ) -> Result<Manifest, RevlogError> {
56 48 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
57 49 Ok(Manifest { bytes })
58 50 }
59 51 }
60 52
61 53 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
62 54 #[derive(Debug)]
63 55 pub struct Manifest {
64 56 /// Format for a manifest: flat sequence of variable-size entries,
65 57 /// sorted by path, each as:
66 58 ///
67 59 /// ```text
68 60 /// <path> \0 <hex_node_id> <flags> \n
69 61 /// ```
70 62 ///
71 63 /// The last entry is also terminated by a newline character.
72 64 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
73 65 bytes: Vec<u8>,
74 66 }
75 67
76 68 impl Manifest {
77 69 pub fn iter(
78 70 &self,
79 71 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
80 72 self.bytes
81 73 .split(|b| b == &b'\n')
82 74 .filter(|line| !line.is_empty())
83 75 .map(ManifestEntry::from_raw)
84 76 }
85 77
86 78 /// If the given path is in this manifest, return its filelog node ID
87 79 pub fn find_by_path(
88 80 &self,
89 81 path: &HgPath,
90 82 ) -> Result<Option<ManifestEntry>, HgError> {
91 83 use std::cmp::Ordering::*;
92 84 let path = path.as_bytes();
93 85 // Both boundaries of this `&[u8]` slice are always at the boundary of
94 86 // an entry
95 87 let mut bytes = &*self.bytes;
96 88
97 89 // Binary search algorithm derived from `[T]::binary_search_by`
98 90 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
99 91 // except we don’t have a slice of entries. Instead we jump to the
100 92 // middle of the byte slice and look around for entry delimiters
101 93 // (newlines).
102 94 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
103 95 let (entry_path, rest) =
104 96 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
105 97 let cmp = entry_path.cmp(path);
106 98 if cmp == Less {
107 99 let after_newline = entry_range.end + 1;
108 100 bytes = &bytes[after_newline..];
109 101 } else if cmp == Greater {
110 102 bytes = &bytes[..entry_range.start];
111 103 } else {
112 104 return Ok(Some(ManifestEntry::from_path_and_rest(
113 105 entry_path, rest,
114 106 )));
115 107 }
116 108 }
117 109 Ok(None)
118 110 }
119 111
120 112 /// If there is at least one, return the byte range of an entry *excluding*
121 113 /// the final newline.
122 114 fn find_entry_near_middle_of(
123 115 bytes: &[u8],
124 116 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
125 117 let len = bytes.len();
126 118 if len > 0 {
127 119 let middle = bytes.len() / 2;
128 120 // Integer division rounds down, so `middle < len`.
129 121 let (before, after) = bytes.split_at(middle);
130 122 let is_newline = |&byte: &u8| byte == b'\n';
131 123 let entry_start = match before.iter().rposition(is_newline) {
132 124 Some(i) => i + 1,
133 125 None => 0, // We choose the first entry in `bytes`
134 126 };
135 127 let entry_end = match after.iter().position(is_newline) {
136 128 Some(i) => {
137 129 // No `+ 1` here to exclude this newline from the range
138 130 middle + i
139 131 }
140 132 None => {
141 133 // In a well-formed manifest:
142 134 //
143 135 // * Since `len > 0`, `bytes` contains at least one entry
144 136 // * Every entry ends with a newline
145 137 // * Since `middle < len`, `after` contains at least the
146 138 // newline at the end of the last entry of `bytes`.
147 139 //
148 140 // We didn’t find a newline, so this manifest is not
149 141 // well-formed.
150 142 return Err(HgError::corrupted(
151 143 "manifest entry without \\n delimiter",
152 144 ));
153 145 }
154 146 };
155 147 Ok(Some(entry_start..entry_end))
156 148 } else {
157 149 // len == 0
158 150 Ok(None)
159 151 }
160 152 }
161 153 }
162 154
163 155 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
164 156 #[derive(Debug)]
165 157 pub struct ManifestEntry<'manifest> {
166 158 pub path: &'manifest HgPath,
167 159 pub hex_node_id: &'manifest [u8],
168 160
169 161 /// `Some` values are b'x', b'l', or 't'
170 162 pub flags: Option<u8>,
171 163 }
172 164
173 165 impl<'a> ManifestEntry<'a> {
174 166 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
175 167 bytes.split_2(b'\0').ok_or_else(|| {
176 168 HgError::corrupted("manifest entry without \\0 delimiter")
177 169 })
178 170 }
179 171
180 172 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
181 173 let (hex_node_id, flags) = match rest.split_last() {
182 174 Some((&b'x', rest)) => (rest, Some(b'x')),
183 175 Some((&b'l', rest)) => (rest, Some(b'l')),
184 176 Some((&b't', rest)) => (rest, Some(b't')),
185 177 _ => (rest, None),
186 178 };
187 179 Self {
188 180 path: HgPath::new(path),
189 181 hex_node_id,
190 182 flags,
191 183 }
192 184 }
193 185
194 186 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
195 187 let (path, rest) = Self::split_path(bytes)?;
196 188 Ok(Self::from_path_and_rest(path, rest))
197 189 }
198 190
199 191 pub fn node_id(&self) -> Result<Node, HgError> {
200 192 Node::from_hex_for_repo(self.hex_node_id)
201 193 }
202 194 }
General Comments 0
You need to be logged in to leave comments. Login now