##// END OF EJS Templates
rhg: do not fail when the repo is empty...
Arseniy Alekseyev -
r49013:9d0e5629 default
parent child Browse files
Show More
@@ -1,423 +1,429 b''
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use byteorder::{BigEndian, ByteOrder};
7 7 use flate2::read::ZlibDecoder;
8 8 use micro_timer::timed;
9 9 use sha1::{Digest, Sha1};
10 10 use zstd;
11 11
12 12 use super::index::Index;
13 13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 14 use super::nodemap;
15 15 use super::nodemap::{NodeMap, NodeMapError};
16 16 use super::nodemap_docket::NodeMapDocket;
17 17 use super::patch;
18 18 use crate::errors::HgError;
19 19 use crate::repo::Repo;
20 20 use crate::revlog::Revision;
21 21 use crate::{Node, NULL_REVISION};
22 22
23 23 #[derive(derive_more::From)]
24 24 pub enum RevlogError {
25 25 InvalidRevision,
26 26 /// Working directory is not supported
27 27 WDirUnsupported,
28 28 /// Found more than one entry whose ID match the requested prefix
29 29 AmbiguousPrefix,
30 30 #[from]
31 31 Other(HgError),
32 32 }
33 33
34 34 impl From<NodeMapError> for RevlogError {
35 35 fn from(error: NodeMapError) -> Self {
36 36 match error {
37 37 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
38 38 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
39 39 }
40 40 }
41 41 }
42 42
43 43 impl RevlogError {
44 44 fn corrupted() -> Self {
45 45 RevlogError::Other(HgError::corrupted("corrupted revlog"))
46 46 }
47 47 }
48 48
49 49 /// Read only implementation of revlog.
50 50 pub struct Revlog {
51 51 /// When index and data are not interleaved: bytes of the revlog index.
52 52 /// When index and data are interleaved: bytes of the revlog index and
53 53 /// data.
54 54 index: Index,
55 55 /// When index and data are not interleaved: bytes of the revlog data
56 56 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
57 57 /// When present on disk: the persistent nodemap for this revlog
58 58 nodemap: Option<nodemap::NodeTree>,
59 59 }
60 60
61 61 impl Revlog {
62 62 /// Open a revlog index file.
63 63 ///
64 64 /// It will also open the associated data file if index and data are not
65 65 /// interleaved.
66 66 #[timed]
67 67 pub fn open(
68 68 repo: &Repo,
69 69 index_path: impl AsRef<Path>,
70 70 data_path: Option<&Path>,
71 71 ) -> Result<Self, HgError> {
72 72 let index_path = index_path.as_ref();
73 let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
74
73 let index = {
74 match repo.store_vfs().mmap_open_opt(&index_path)? {
75 None => Index::new(Box::new(vec![])),
76 Some(index_mmap) => {
75 77 let version = get_version(&index_mmap)?;
76 78 if version != 1 {
77 79 // A proper new version should have had a repo/store requirement.
78 80 return Err(HgError::corrupted("corrupted revlog"));
79 81 }
80 82
81 83 let index = Index::new(Box::new(index_mmap))?;
84 Ok(index)
85 }
86 }
87 }?;
82 88
83 89 let default_data_path = index_path.with_extension("d");
84 90
85 91 // type annotation required
86 92 // won't recognize Mmap as Deref<Target = [u8]>
87 93 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
88 94 if index.is_inline() {
89 95 None
90 96 } else {
91 97 let data_path = data_path.unwrap_or(&default_data_path);
92 98 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
93 99 Some(Box::new(data_mmap))
94 100 };
95 101
96 102 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
97 103 |(docket, data)| {
98 104 nodemap::NodeTree::load_bytes(
99 105 Box::new(data),
100 106 docket.data_length,
101 107 )
102 108 },
103 109 );
104 110
105 111 Ok(Revlog {
106 112 index,
107 113 data_bytes,
108 114 nodemap,
109 115 })
110 116 }
111 117
112 118 /// Return number of entries of the `Revlog`.
113 119 pub fn len(&self) -> usize {
114 120 self.index.len()
115 121 }
116 122
117 123 /// Returns `true` if the `Revlog` has zero `entries`.
118 124 pub fn is_empty(&self) -> bool {
119 125 self.index.is_empty()
120 126 }
121 127
122 128 /// Returns the node ID for the given revision number, if it exists in this
123 129 /// revlog
124 130 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
125 131 Some(self.index.get_entry(rev)?.hash())
126 132 }
127 133
128 134 /// Return the revision number for the given node ID, if it exists in this
129 135 /// revlog
130 136 #[timed]
131 137 pub fn rev_from_node(
132 138 &self,
133 139 node: NodePrefix,
134 140 ) -> Result<Revision, RevlogError> {
135 141 if node.is_prefix_of(&NULL_NODE) {
136 142 return Ok(NULL_REVISION);
137 143 }
138 144
139 145 if let Some(nodemap) = &self.nodemap {
140 146 return nodemap
141 147 .find_bin(&self.index, node)?
142 148 .ok_or(RevlogError::InvalidRevision);
143 149 }
144 150
145 151 // Fallback to linear scan when a persistent nodemap is not present.
146 152 // This happens when the persistent-nodemap experimental feature is not
147 153 // enabled, or for small revlogs.
148 154 //
149 155 // TODO: consider building a non-persistent nodemap in memory to
150 156 // optimize these cases.
151 157 let mut found_by_prefix = None;
152 158 for rev in (0..self.len() as Revision).rev() {
153 159 let index_entry =
154 160 self.index.get_entry(rev).ok_or(HgError::corrupted(
155 161 "revlog references a revision not in the index",
156 162 ))?;
157 163 if node == *index_entry.hash() {
158 164 return Ok(rev);
159 165 }
160 166 if node.is_prefix_of(index_entry.hash()) {
161 167 if found_by_prefix.is_some() {
162 168 return Err(RevlogError::AmbiguousPrefix);
163 169 }
164 170 found_by_prefix = Some(rev)
165 171 }
166 172 }
167 173 found_by_prefix.ok_or(RevlogError::InvalidRevision)
168 174 }
169 175
170 176 /// Returns whether the given revision exists in this revlog.
171 177 pub fn has_rev(&self, rev: Revision) -> bool {
172 178 self.index.get_entry(rev).is_some()
173 179 }
174 180
175 181 /// Return the full data associated to a revision.
176 182 ///
177 183 /// All entries required to build the final data out of deltas will be
178 184 /// retrieved as needed, and the deltas will be applied to the inital
179 185 /// snapshot to rebuild the final data.
180 186 #[timed]
181 187 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
182 188 if rev == NULL_REVISION {
183 189 return Ok(vec![]);
184 190 };
185 191 // Todo return -> Cow
186 192 let mut entry = self.get_entry(rev)?;
187 193 let mut delta_chain = vec![];
188 194 while let Some(base_rev) = entry.base_rev {
189 195 delta_chain.push(entry);
190 196 entry = self
191 197 .get_entry(base_rev)
192 198 .map_err(|_| RevlogError::corrupted())?;
193 199 }
194 200
195 201 // TODO do not look twice in the index
196 202 let index_entry = self
197 203 .index
198 204 .get_entry(rev)
199 205 .ok_or(RevlogError::InvalidRevision)?;
200 206
201 207 let data: Vec<u8> = if delta_chain.is_empty() {
202 208 entry.data()?.into()
203 209 } else {
204 210 Revlog::build_data_from_deltas(entry, &delta_chain)?
205 211 };
206 212
207 213 if self.check_hash(
208 214 index_entry.p1(),
209 215 index_entry.p2(),
210 216 index_entry.hash().as_bytes(),
211 217 &data,
212 218 ) {
213 219 Ok(data)
214 220 } else {
215 221 Err(RevlogError::corrupted())
216 222 }
217 223 }
218 224
219 225 /// Check the hash of some given data against the recorded hash.
220 226 pub fn check_hash(
221 227 &self,
222 228 p1: Revision,
223 229 p2: Revision,
224 230 expected: &[u8],
225 231 data: &[u8],
226 232 ) -> bool {
227 233 let e1 = self.index.get_entry(p1);
228 234 let h1 = match e1 {
229 235 Some(ref entry) => entry.hash(),
230 236 None => &NULL_NODE,
231 237 };
232 238 let e2 = self.index.get_entry(p2);
233 239 let h2 = match e2 {
234 240 Some(ref entry) => entry.hash(),
235 241 None => &NULL_NODE,
236 242 };
237 243
238 244 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
239 245 }
240 246
241 247 /// Build the full data of a revision out its snapshot
242 248 /// and its deltas.
243 249 #[timed]
244 250 fn build_data_from_deltas(
245 251 snapshot: RevlogEntry,
246 252 deltas: &[RevlogEntry],
247 253 ) -> Result<Vec<u8>, RevlogError> {
248 254 let snapshot = snapshot.data()?;
249 255 let deltas = deltas
250 256 .iter()
251 257 .rev()
252 258 .map(RevlogEntry::data)
253 259 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
254 260 let patches: Vec<_> =
255 261 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
256 262 let patch = patch::fold_patch_lists(&patches);
257 263 Ok(patch.apply(&snapshot))
258 264 }
259 265
260 266 /// Return the revlog data.
261 267 fn data(&self) -> &[u8] {
262 268 match self.data_bytes {
263 269 Some(ref data_bytes) => &data_bytes,
264 270 None => panic!(
265 271 "forgot to load the data or trying to access inline data"
266 272 ),
267 273 }
268 274 }
269 275
270 276 /// Get an entry of the revlog.
271 277 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
272 278 let index_entry = self
273 279 .index
274 280 .get_entry(rev)
275 281 .ok_or(RevlogError::InvalidRevision)?;
276 282 let start = index_entry.offset();
277 283 let end = start + index_entry.compressed_len();
278 284 let data = if self.index.is_inline() {
279 285 self.index.data(start, end)
280 286 } else {
281 287 &self.data()[start..end]
282 288 };
283 289 let entry = RevlogEntry {
284 290 rev,
285 291 bytes: data,
286 292 compressed_len: index_entry.compressed_len(),
287 293 uncompressed_len: index_entry.uncompressed_len(),
288 294 base_rev: if index_entry.base_revision() == rev {
289 295 None
290 296 } else {
291 297 Some(index_entry.base_revision())
292 298 },
293 299 };
294 300 Ok(entry)
295 301 }
296 302 }
297 303
298 304 /// The revlog entry's bytes and the necessary informations to extract
299 305 /// the entry's data.
300 306 #[derive(Debug)]
301 307 pub struct RevlogEntry<'a> {
302 308 rev: Revision,
303 309 bytes: &'a [u8],
304 310 compressed_len: usize,
305 311 uncompressed_len: usize,
306 312 base_rev: Option<Revision>,
307 313 }
308 314
309 315 impl<'a> RevlogEntry<'a> {
310 316 pub fn revision(&self) -> Revision {
311 317 self.rev
312 318 }
313 319
314 320 /// Extract the data contained in the entry.
315 321 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
316 322 if self.bytes.is_empty() {
317 323 return Ok(Cow::Borrowed(&[]));
318 324 }
319 325 match self.bytes[0] {
320 326 // Revision data is the entirety of the entry, including this
321 327 // header.
322 328 b'\0' => Ok(Cow::Borrowed(self.bytes)),
323 329 // Raw revision data follows.
324 330 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
325 331 // zlib (RFC 1950) data.
326 332 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
327 333 // zstd data.
328 334 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
329 335 // A proper new format should have had a repo/store requirement.
330 336 _format_type => Err(RevlogError::corrupted()),
331 337 }
332 338 }
333 339
334 340 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
335 341 let mut decoder = ZlibDecoder::new(self.bytes);
336 342 if self.is_delta() {
337 343 let mut buf = Vec::with_capacity(self.compressed_len);
338 344 decoder
339 345 .read_to_end(&mut buf)
340 346 .map_err(|_| RevlogError::corrupted())?;
341 347 Ok(buf)
342 348 } else {
343 349 let mut buf = vec![0; self.uncompressed_len];
344 350 decoder
345 351 .read_exact(&mut buf)
346 352 .map_err(|_| RevlogError::corrupted())?;
347 353 Ok(buf)
348 354 }
349 355 }
350 356
351 357 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
352 358 if self.is_delta() {
353 359 let mut buf = Vec::with_capacity(self.compressed_len);
354 360 zstd::stream::copy_decode(self.bytes, &mut buf)
355 361 .map_err(|_| RevlogError::corrupted())?;
356 362 Ok(buf)
357 363 } else {
358 364 let mut buf = vec![0; self.uncompressed_len];
359 365 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
360 366 .map_err(|_| RevlogError::corrupted())?;
361 367 if len != self.uncompressed_len {
362 368 Err(RevlogError::corrupted())
363 369 } else {
364 370 Ok(buf)
365 371 }
366 372 }
367 373 }
368 374
369 375 /// Tell if the entry is a snapshot or a delta
370 376 /// (influences on decompression).
371 377 fn is_delta(&self) -> bool {
372 378 self.base_rev.is_some()
373 379 }
374 380 }
375 381
376 382 /// Format version of the revlog.
377 383 pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
378 384 if index_bytes.len() == 0 {
379 385 return Ok(1);
380 386 };
381 387 if index_bytes.len() < 4 {
382 388 return Err(HgError::corrupted(
383 389 "corrupted revlog: can't read the index format header",
384 390 ));
385 391 };
386 392 Ok(BigEndian::read_u16(&index_bytes[2..=3]))
387 393 }
388 394
389 395 /// Calculate the hash of a revision given its data and its parents.
390 396 fn hash(
391 397 data: &[u8],
392 398 p1_hash: &[u8],
393 399 p2_hash: &[u8],
394 400 ) -> [u8; NODE_BYTES_LENGTH] {
395 401 let mut hasher = Sha1::new();
396 402 let (a, b) = (p1_hash, p2_hash);
397 403 if a > b {
398 404 hasher.update(b);
399 405 hasher.update(a);
400 406 } else {
401 407 hasher.update(a);
402 408 hasher.update(b);
403 409 }
404 410 hasher.update(data);
405 411 *hasher.finalize().as_ref()
406 412 }
407 413
408 414 #[cfg(test)]
409 415 mod tests {
410 416 use super::*;
411 417
412 418 use super::super::index::IndexEntryBuilder;
413 419
414 420 #[test]
415 421 fn version_test() {
416 422 let bytes = IndexEntryBuilder::new()
417 423 .is_first(true)
418 424 .with_version(1)
419 425 .build();
420 426
421 assert_eq!(get_version(&bytes), 1)
427 assert_eq!(get_version(&bytes).map_err(|_err|()), Ok(1))
422 428 }
423 429 }
@@ -1,73 +1,100 b''
1 1 use crate::errors::{HgError, IoErrorContext, IoResultExt};
2 2 use memmap2::{Mmap, MmapOptions};
3 3 use std::io::ErrorKind;
4 4 use std::path::{Path, PathBuf};
5 5
6 6 /// Filesystem access abstraction for the contents of a given "base" diretory
7 7 #[derive(Clone, Copy)]
8 8 pub struct Vfs<'a> {
9 9 pub(crate) base: &'a Path,
10 10 }
11 11
12 struct FileNotFound(std::io::Error, PathBuf);
13
12 14 impl Vfs<'_> {
13 15 pub fn join(&self, relative_path: impl AsRef<Path>) -> PathBuf {
14 16 self.base.join(relative_path)
15 17 }
16 18
17 19 pub fn read(
18 20 &self,
19 21 relative_path: impl AsRef<Path>,
20 22 ) -> Result<Vec<u8>, HgError> {
21 23 let path = self.join(relative_path);
22 24 std::fs::read(&path).when_reading_file(&path)
23 25 }
24 26
27 fn mmap_open_gen(
28 &self,
29 relative_path: impl AsRef<Path>,
30 ) -> Result<Result<Mmap, FileNotFound>, HgError> {
31 let path = self.join(relative_path);
32 let file = match std::fs::File::open(&path) {
33 Err(err) => {
34 if let ErrorKind::NotFound = err.kind() {
35 return Ok(Err(FileNotFound(err, path)));
36 };
37 return (Err(err)).when_reading_file(&path);
38 }
39 Ok(file) => file,
40 };
41 // TODO: what are the safety requirements here?
42 let mmap = unsafe { MmapOptions::new().map(&file) }
43 .when_reading_file(&path)?;
44 Ok(Ok(mmap))
45 }
46
47 pub fn mmap_open_opt(
48 &self,
49 relative_path: impl AsRef<Path>,
50 ) -> Result<Option<Mmap>, HgError> {
51 self.mmap_open_gen(relative_path).map(|res| res.ok())
52 }
53
25 54 pub fn mmap_open(
26 55 &self,
27 56 relative_path: impl AsRef<Path>,
28 57 ) -> Result<Mmap, HgError> {
29 let path = self.base.join(relative_path);
30 let file = std::fs::File::open(&path).when_reading_file(&path)?;
31 // TODO: what are the safety requirements here?
32 let mmap = unsafe { MmapOptions::new().map(&file) }
33 .when_reading_file(&path)?;
34 Ok(mmap)
58 match self.mmap_open_gen(relative_path)? {
59 Err(FileNotFound(err, path)) => Err(err).when_reading_file(&path),
60 Ok(res) => Ok(res),
61 }
35 62 }
36 63
37 64 pub fn rename(
38 65 &self,
39 66 relative_from: impl AsRef<Path>,
40 67 relative_to: impl AsRef<Path>,
41 68 ) -> Result<(), HgError> {
42 69 let from = self.join(relative_from);
43 70 let to = self.join(relative_to);
44 71 std::fs::rename(&from, &to)
45 72 .with_context(|| IoErrorContext::RenamingFile { from, to })
46 73 }
47 74 }
48 75
49 76 fn fs_metadata(
50 77 path: impl AsRef<Path>,
51 78 ) -> Result<Option<std::fs::Metadata>, HgError> {
52 79 let path = path.as_ref();
53 80 match std::fs::metadata(path) {
54 81 Ok(meta) => Ok(Some(meta)),
55 82 Err(error) => match error.kind() {
56 83 // TODO: when we require a Rust version where `NotADirectory` is
57 84 // stable, invert this logic and return None for it and `NotFound`
58 85 // and propagate any other error.
59 86 ErrorKind::PermissionDenied => Err(error).with_context(|| {
60 87 IoErrorContext::ReadingMetadata(path.to_owned())
61 88 }),
62 89 _ => Ok(None),
63 90 },
64 91 }
65 92 }
66 93
67 94 pub(crate) fn is_dir(path: impl AsRef<Path>) -> Result<bool, HgError> {
68 95 Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_dir()))
69 96 }
70 97
71 98 pub(crate) fn is_file(path: impl AsRef<Path>) -> Result<bool, HgError> {
72 99 Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_file()))
73 100 }
@@ -1,23 +1,27 b''
1 Create a repo such that the changelog entry refers to a null manifest node:
1 Test null revisions (node 0000000000000000000000000000000000000000, aka rev -1)
2 in various circumstances.
3
4 Make an empty repo:
2 5
3 6 $ hg init a
4 7 $ cd a
5 $ hg log
6 $ touch x
7 $ hg add x
8 $ hg commit -m "init"
9 $ hg rm x
10 $ hg commit -q --amend
11 8
12 $ wc -c < .hg/store/00manifest.i
13 0
14
15 Make sure that the manifest can be read (and is empty):
16
17 $ hg --config rhg.on-unsupported=abort files -r .
9 $ hg files -r 0000000000000000000000000000000000000000
10 [1]
11 $ hg files -r .
18 12 [1]
19 13
20 Test a null changelog rev, too:
14 Add an empty commit (this makes the changelog refer to a null manifest node):
15
16
17 $ hg commit -m "init" --config ui.allowemptycommit=true
21 18
22 $ hg --config rhg.on-unsupported=abort files -r 0000000000000000000000000000000000000000
19 $ hg files -r .
23 20 [1]
21
22 Strip that empty commit (this makes the changelog file empty, as opposed to missing):
23
24 $ hg --config 'extensions.strip=' strip . > /dev/null
25
26 $ hg files -r .
27 [1]
General Comments 0
You need to be logged in to leave comments. Login now