##// END OF EJS Templates
rust: Reformat source code...
Simon Sapin -
r49041:4518d91f default
parent child Browse files
Show More
@@ -1,433 +1,434 b''
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use byteorder::{BigEndian, ByteOrder};
7 7 use flate2::read::ZlibDecoder;
8 8 use micro_timer::timed;
9 9 use sha1::{Digest, Sha1};
10 10 use zstd;
11 11
12 12 use super::index::Index;
13 13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 14 use super::nodemap;
15 15 use super::nodemap::{NodeMap, NodeMapError};
16 16 use super::nodemap_docket::NodeMapDocket;
17 17 use super::patch;
18 18 use crate::errors::HgError;
19 19 use crate::repo::Repo;
20 20 use crate::revlog::Revision;
21 21 use crate::{Node, NULL_REVISION};
22 22
23 23 #[derive(derive_more::From)]
24 24 pub enum RevlogError {
25 25 InvalidRevision,
26 26 /// Working directory is not supported
27 27 WDirUnsupported,
28 28 /// Found more than one entry whose ID match the requested prefix
29 29 AmbiguousPrefix,
30 30 #[from]
31 31 Other(HgError),
32 32 }
33 33
34 34 impl From<NodeMapError> for RevlogError {
35 35 fn from(error: NodeMapError) -> Self {
36 36 match error {
37 37 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
38 38 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
39 39 }
40 40 }
41 41 }
42 42
43 43 impl RevlogError {
44 44 fn corrupted() -> Self {
45 45 RevlogError::Other(HgError::corrupted("corrupted revlog"))
46 46 }
47 47 }
48 48
49 49 /// Read only implementation of revlog.
50 50 pub struct Revlog {
51 51 /// When index and data are not interleaved: bytes of the revlog index.
52 52 /// When index and data are interleaved: bytes of the revlog index and
53 53 /// data.
54 54 index: Index,
55 55 /// When index and data are not interleaved: bytes of the revlog data
56 56 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
57 57 /// When present on disk: the persistent nodemap for this revlog
58 58 nodemap: Option<nodemap::NodeTree>,
59 59 }
60 60
61 61 impl Revlog {
62 62 /// Open a revlog index file.
63 63 ///
64 64 /// It will also open the associated data file if index and data are not
65 65 /// interleaved.
66 66 #[timed]
67 67 pub fn open(
68 68 repo: &Repo,
69 69 index_path: impl AsRef<Path>,
70 70 data_path: Option<&Path>,
71 71 ) -> Result<Self, HgError> {
72 72 let index_path = index_path.as_ref();
73 73 let index = {
74 74 match repo.store_vfs().mmap_open_opt(&index_path)? {
75 75 None => Index::new(Box::new(vec![])),
76 76 Some(index_mmap) => {
77 77 let version = get_version(&index_mmap)?;
78 78 if version != 1 {
79 // A proper new version should have had a repo/store requirement.
79 // A proper new version should have had a repo/store
80 // requirement.
80 81 return Err(HgError::corrupted("corrupted revlog"));
81 82 }
82 83
83 84 let index = Index::new(Box::new(index_mmap))?;
84 85 Ok(index)
85 86 }
86 87 }
87 88 }?;
88 89
89 90 let default_data_path = index_path.with_extension("d");
90 91
91 92 // type annotation required
92 93 // won't recognize Mmap as Deref<Target = [u8]>
93 94 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
94 95 if index.is_inline() {
95 96 None
96 97 } else {
97 98 let data_path = data_path.unwrap_or(&default_data_path);
98 99 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
99 100 Some(Box::new(data_mmap))
100 101 };
101 102
102 103 let nodemap = if index.is_inline() {
103 104 None
104 105 } else {
105 106 NodeMapDocket::read_from_file(repo, index_path)?.map(
106 107 |(docket, data)| {
107 108 nodemap::NodeTree::load_bytes(
108 109 Box::new(data),
109 110 docket.data_length,
110 111 )
111 112 },
112 113 )
113 114 };
114 115
115 116 Ok(Revlog {
116 117 index,
117 118 data_bytes,
118 119 nodemap,
119 120 })
120 121 }
121 122
122 123 /// Return number of entries of the `Revlog`.
123 124 pub fn len(&self) -> usize {
124 125 self.index.len()
125 126 }
126 127
127 128 /// Returns `true` if the `Revlog` has zero `entries`.
128 129 pub fn is_empty(&self) -> bool {
129 130 self.index.is_empty()
130 131 }
131 132
132 133 /// Returns the node ID for the given revision number, if it exists in this
133 134 /// revlog
134 135 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
135 136 Some(self.index.get_entry(rev)?.hash())
136 137 }
137 138
138 139 /// Return the revision number for the given node ID, if it exists in this
139 140 /// revlog
140 141 #[timed]
141 142 pub fn rev_from_node(
142 143 &self,
143 144 node: NodePrefix,
144 145 ) -> Result<Revision, RevlogError> {
145 146 if node.is_prefix_of(&NULL_NODE) {
146 147 return Ok(NULL_REVISION);
147 148 }
148 149
149 150 if let Some(nodemap) = &self.nodemap {
150 151 return nodemap
151 152 .find_bin(&self.index, node)?
152 153 .ok_or(RevlogError::InvalidRevision);
153 154 }
154 155
155 156 // Fallback to linear scan when a persistent nodemap is not present.
156 157 // This happens when the persistent-nodemap experimental feature is not
157 158 // enabled, or for small revlogs.
158 159 //
159 160 // TODO: consider building a non-persistent nodemap in memory to
160 161 // optimize these cases.
161 162 let mut found_by_prefix = None;
162 163 for rev in (0..self.len() as Revision).rev() {
163 164 let index_entry =
164 165 self.index.get_entry(rev).ok_or(HgError::corrupted(
165 166 "revlog references a revision not in the index",
166 167 ))?;
167 168 if node == *index_entry.hash() {
168 169 return Ok(rev);
169 170 }
170 171 if node.is_prefix_of(index_entry.hash()) {
171 172 if found_by_prefix.is_some() {
172 173 return Err(RevlogError::AmbiguousPrefix);
173 174 }
174 175 found_by_prefix = Some(rev)
175 176 }
176 177 }
177 178 found_by_prefix.ok_or(RevlogError::InvalidRevision)
178 179 }
179 180
180 181 /// Returns whether the given revision exists in this revlog.
181 182 pub fn has_rev(&self, rev: Revision) -> bool {
182 183 self.index.get_entry(rev).is_some()
183 184 }
184 185
185 186 /// Return the full data associated to a revision.
186 187 ///
187 188 /// All entries required to build the final data out of deltas will be
188 189 /// retrieved as needed, and the deltas will be applied to the inital
189 190 /// snapshot to rebuild the final data.
190 191 #[timed]
191 192 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
192 193 if rev == NULL_REVISION {
193 194 return Ok(vec![]);
194 195 };
195 196 // Todo return -> Cow
196 197 let mut entry = self.get_entry(rev)?;
197 198 let mut delta_chain = vec![];
198 199 while let Some(base_rev) = entry.base_rev {
199 200 delta_chain.push(entry);
200 201 entry = self
201 202 .get_entry(base_rev)
202 203 .map_err(|_| RevlogError::corrupted())?;
203 204 }
204 205
205 206 // TODO do not look twice in the index
206 207 let index_entry = self
207 208 .index
208 209 .get_entry(rev)
209 210 .ok_or(RevlogError::InvalidRevision)?;
210 211
211 212 let data: Vec<u8> = if delta_chain.is_empty() {
212 213 entry.data()?.into()
213 214 } else {
214 215 Revlog::build_data_from_deltas(entry, &delta_chain)?
215 216 };
216 217
217 218 if self.check_hash(
218 219 index_entry.p1(),
219 220 index_entry.p2(),
220 221 index_entry.hash().as_bytes(),
221 222 &data,
222 223 ) {
223 224 Ok(data)
224 225 } else {
225 226 Err(RevlogError::corrupted())
226 227 }
227 228 }
228 229
229 230 /// Check the hash of some given data against the recorded hash.
230 231 pub fn check_hash(
231 232 &self,
232 233 p1: Revision,
233 234 p2: Revision,
234 235 expected: &[u8],
235 236 data: &[u8],
236 237 ) -> bool {
237 238 let e1 = self.index.get_entry(p1);
238 239 let h1 = match e1 {
239 240 Some(ref entry) => entry.hash(),
240 241 None => &NULL_NODE,
241 242 };
242 243 let e2 = self.index.get_entry(p2);
243 244 let h2 = match e2 {
244 245 Some(ref entry) => entry.hash(),
245 246 None => &NULL_NODE,
246 247 };
247 248
248 249 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
249 250 }
250 251
251 252 /// Build the full data of a revision out its snapshot
252 253 /// and its deltas.
253 254 #[timed]
254 255 fn build_data_from_deltas(
255 256 snapshot: RevlogEntry,
256 257 deltas: &[RevlogEntry],
257 258 ) -> Result<Vec<u8>, RevlogError> {
258 259 let snapshot = snapshot.data()?;
259 260 let deltas = deltas
260 261 .iter()
261 262 .rev()
262 263 .map(RevlogEntry::data)
263 264 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
264 265 let patches: Vec<_> =
265 266 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
266 267 let patch = patch::fold_patch_lists(&patches);
267 268 Ok(patch.apply(&snapshot))
268 269 }
269 270
270 271 /// Return the revlog data.
271 272 fn data(&self) -> &[u8] {
272 273 match self.data_bytes {
273 274 Some(ref data_bytes) => &data_bytes,
274 275 None => panic!(
275 276 "forgot to load the data or trying to access inline data"
276 277 ),
277 278 }
278 279 }
279 280
280 281 /// Get an entry of the revlog.
281 282 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
282 283 let index_entry = self
283 284 .index
284 285 .get_entry(rev)
285 286 .ok_or(RevlogError::InvalidRevision)?;
286 287 let start = index_entry.offset();
287 288 let end = start + index_entry.compressed_len();
288 289 let data = if self.index.is_inline() {
289 290 self.index.data(start, end)
290 291 } else {
291 292 &self.data()[start..end]
292 293 };
293 294 let entry = RevlogEntry {
294 295 rev,
295 296 bytes: data,
296 297 compressed_len: index_entry.compressed_len(),
297 298 uncompressed_len: index_entry.uncompressed_len(),
298 299 base_rev: if index_entry.base_revision() == rev {
299 300 None
300 301 } else {
301 302 Some(index_entry.base_revision())
302 303 },
303 304 };
304 305 Ok(entry)
305 306 }
306 307 }
307 308
308 309 /// The revlog entry's bytes and the necessary informations to extract
309 310 /// the entry's data.
310 311 #[derive(Debug)]
311 312 pub struct RevlogEntry<'a> {
312 313 rev: Revision,
313 314 bytes: &'a [u8],
314 315 compressed_len: usize,
315 316 uncompressed_len: usize,
316 317 base_rev: Option<Revision>,
317 318 }
318 319
319 320 impl<'a> RevlogEntry<'a> {
320 321 pub fn revision(&self) -> Revision {
321 322 self.rev
322 323 }
323 324
324 325 /// Extract the data contained in the entry.
325 326 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
326 327 if self.bytes.is_empty() {
327 328 return Ok(Cow::Borrowed(&[]));
328 329 }
329 330 match self.bytes[0] {
330 331 // Revision data is the entirety of the entry, including this
331 332 // header.
332 333 b'\0' => Ok(Cow::Borrowed(self.bytes)),
333 334 // Raw revision data follows.
334 335 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
335 336 // zlib (RFC 1950) data.
336 337 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
337 338 // zstd data.
338 339 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
339 340 // A proper new format should have had a repo/store requirement.
340 341 _format_type => Err(RevlogError::corrupted()),
341 342 }
342 343 }
343 344
344 345 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
345 346 let mut decoder = ZlibDecoder::new(self.bytes);
346 347 if self.is_delta() {
347 348 let mut buf = Vec::with_capacity(self.compressed_len);
348 349 decoder
349 350 .read_to_end(&mut buf)
350 351 .map_err(|_| RevlogError::corrupted())?;
351 352 Ok(buf)
352 353 } else {
353 354 let mut buf = vec![0; self.uncompressed_len];
354 355 decoder
355 356 .read_exact(&mut buf)
356 357 .map_err(|_| RevlogError::corrupted())?;
357 358 Ok(buf)
358 359 }
359 360 }
360 361
361 362 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
362 363 if self.is_delta() {
363 364 let mut buf = Vec::with_capacity(self.compressed_len);
364 365 zstd::stream::copy_decode(self.bytes, &mut buf)
365 366 .map_err(|_| RevlogError::corrupted())?;
366 367 Ok(buf)
367 368 } else {
368 369 let mut buf = vec![0; self.uncompressed_len];
369 370 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
370 371 .map_err(|_| RevlogError::corrupted())?;
371 372 if len != self.uncompressed_len {
372 373 Err(RevlogError::corrupted())
373 374 } else {
374 375 Ok(buf)
375 376 }
376 377 }
377 378 }
378 379
379 380 /// Tell if the entry is a snapshot or a delta
380 381 /// (influences on decompression).
381 382 fn is_delta(&self) -> bool {
382 383 self.base_rev.is_some()
383 384 }
384 385 }
385 386
386 387 /// Format version of the revlog.
387 388 pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
388 389 if index_bytes.len() == 0 {
389 390 return Ok(1);
390 391 };
391 392 if index_bytes.len() < 4 {
392 393 return Err(HgError::corrupted(
393 394 "corrupted revlog: can't read the index format header",
394 395 ));
395 396 };
396 397 Ok(BigEndian::read_u16(&index_bytes[2..=3]))
397 398 }
398 399
399 400 /// Calculate the hash of a revision given its data and its parents.
400 401 fn hash(
401 402 data: &[u8],
402 403 p1_hash: &[u8],
403 404 p2_hash: &[u8],
404 405 ) -> [u8; NODE_BYTES_LENGTH] {
405 406 let mut hasher = Sha1::new();
406 407 let (a, b) = (p1_hash, p2_hash);
407 408 if a > b {
408 409 hasher.update(b);
409 410 hasher.update(a);
410 411 } else {
411 412 hasher.update(a);
412 413 hasher.update(b);
413 414 }
414 415 hasher.update(data);
415 416 *hasher.finalize().as_ref()
416 417 }
417 418
418 419 #[cfg(test)]
419 420 mod tests {
420 421 use super::*;
421 422
422 423 use super::super::index::IndexEntryBuilder;
423 424
424 425 #[test]
425 426 fn version_test() {
426 427 let bytes = IndexEntryBuilder::new()
427 428 .is_first(true)
428 429 .with_version(1)
429 430 .build();
430 431
431 assert_eq!(get_version(&bytes).map_err(|_err|()), Ok(1))
432 assert_eq!(get_version(&bytes).map_err(|_err| ()), Ok(1))
432 433 }
433 434 }
General Comments 0
You need to be logged in to leave comments. Login now