##// END OF EJS Templates
rhg: correctly handle the case where diffs are encoded relative to nullrev...
Arseniy Alekseyev -
r50105:cc92ad0e stable
parent child Browse files
Show More
@@ -1,468 +1,488 b''
1 1 use std::borrow::Cow;
2 2 use std::convert::TryFrom;
3 3 use std::io::Read;
4 4 use std::ops::Deref;
5 5 use std::path::Path;
6 6
7 7 use flate2::read::ZlibDecoder;
8 8 use micro_timer::timed;
9 9 use sha1::{Digest, Sha1};
10 10 use zstd;
11 11
12 12 use super::index::Index;
13 13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 14 use super::nodemap;
15 15 use super::nodemap::{NodeMap, NodeMapError};
16 16 use super::nodemap_docket::NodeMapDocket;
17 17 use super::patch;
18 18 use crate::errors::HgError;
19 19 use crate::repo::Repo;
20 20 use crate::revlog::Revision;
21 21 use crate::{Node, NULL_REVISION};
22 22
23 23 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
24 24 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
25 25 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
26 26 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
27 27
28 28 // Keep this in sync with REVIDX_KNOWN_FLAGS in
29 29 // mercurial/revlogutils/flagutil.py
30 30 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
31 31 | REVISION_FLAG_ELLIPSIS
32 32 | REVISION_FLAG_EXTSTORED
33 33 | REVISION_FLAG_HASCOPIESINFO;
34 34
35 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
36
35 37 #[derive(derive_more::From)]
36 38 pub enum RevlogError {
37 39 InvalidRevision,
38 40 /// Working directory is not supported
39 41 WDirUnsupported,
40 42 /// Found more than one entry whose ID match the requested prefix
41 43 AmbiguousPrefix,
42 44 #[from]
43 45 Other(HgError),
44 46 }
45 47
46 48 impl From<NodeMapError> for RevlogError {
47 49 fn from(error: NodeMapError) -> Self {
48 50 match error {
49 51 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
50 52 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
51 53 }
52 54 }
53 55 }
54 56
55 57 fn corrupted() -> HgError {
56 58 HgError::corrupted("corrupted revlog")
57 59 }
58 60
59 61 impl RevlogError {
60 62 fn corrupted() -> Self {
61 63 RevlogError::Other(corrupted())
62 64 }
63 65 }
64 66
65 67 /// Read only implementation of revlog.
66 68 pub struct Revlog {
67 69 /// When index and data are not interleaved: bytes of the revlog index.
68 70 /// When index and data are interleaved: bytes of the revlog index and
69 71 /// data.
70 72 index: Index,
71 73 /// When index and data are not interleaved: bytes of the revlog data
72 74 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
73 75 /// When present on disk: the persistent nodemap for this revlog
74 76 nodemap: Option<nodemap::NodeTree>,
75 77 }
76 78
77 79 impl Revlog {
78 80 /// Open a revlog index file.
79 81 ///
80 82 /// It will also open the associated data file if index and data are not
81 83 /// interleaved.
82 84 #[timed]
83 85 pub fn open(
84 86 repo: &Repo,
85 87 index_path: impl AsRef<Path>,
86 88 data_path: Option<&Path>,
87 89 ) -> Result<Self, HgError> {
88 90 let index_path = index_path.as_ref();
89 91 let index = {
90 92 match repo.store_vfs().mmap_open_opt(&index_path)? {
91 93 None => Index::new(Box::new(vec![])),
92 94 Some(index_mmap) => {
93 95 let index = Index::new(Box::new(index_mmap))?;
94 96 Ok(index)
95 97 }
96 98 }
97 99 }?;
98 100
99 101 let default_data_path = index_path.with_extension("d");
100 102
101 103 // type annotation required
102 104 // won't recognize Mmap as Deref<Target = [u8]>
103 105 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
104 106 if index.is_inline() {
105 107 None
106 108 } else {
107 109 let data_path = data_path.unwrap_or(&default_data_path);
108 110 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
109 111 Some(Box::new(data_mmap))
110 112 };
111 113
112 114 let nodemap = if index.is_inline() {
113 115 None
114 116 } else {
115 117 NodeMapDocket::read_from_file(repo, index_path)?.map(
116 118 |(docket, data)| {
117 119 nodemap::NodeTree::load_bytes(
118 120 Box::new(data),
119 121 docket.data_length,
120 122 )
121 123 },
122 124 )
123 125 };
124 126
125 127 Ok(Revlog {
126 128 index,
127 129 data_bytes,
128 130 nodemap,
129 131 })
130 132 }
131 133
132 134 /// Return number of entries of the `Revlog`.
133 135 pub fn len(&self) -> usize {
134 136 self.index.len()
135 137 }
136 138
137 139 /// Returns `true` if the `Revlog` has zero `entries`.
138 140 pub fn is_empty(&self) -> bool {
139 141 self.index.is_empty()
140 142 }
141 143
142 144 /// Returns the node ID for the given revision number, if it exists in this
143 145 /// revlog
144 146 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
145 147 if rev == NULL_REVISION {
146 148 return Some(&NULL_NODE);
147 149 }
148 150 Some(self.index.get_entry(rev)?.hash())
149 151 }
150 152
151 153 /// Return the revision number for the given node ID, if it exists in this
152 154 /// revlog
153 155 #[timed]
154 156 pub fn rev_from_node(
155 157 &self,
156 158 node: NodePrefix,
157 159 ) -> Result<Revision, RevlogError> {
158 160 if node.is_prefix_of(&NULL_NODE) {
159 161 return Ok(NULL_REVISION);
160 162 }
161 163
162 164 if let Some(nodemap) = &self.nodemap {
163 165 return nodemap
164 166 .find_bin(&self.index, node)?
165 167 .ok_or(RevlogError::InvalidRevision);
166 168 }
167 169
168 170 // Fallback to linear scan when a persistent nodemap is not present.
169 171 // This happens when the persistent-nodemap experimental feature is not
170 172 // enabled, or for small revlogs.
171 173 //
172 174 // TODO: consider building a non-persistent nodemap in memory to
173 175 // optimize these cases.
174 176 let mut found_by_prefix = None;
175 177 for rev in (0..self.len() as Revision).rev() {
176 178 let index_entry =
177 179 self.index.get_entry(rev).ok_or(HgError::corrupted(
178 180 "revlog references a revision not in the index",
179 181 ))?;
180 182 if node == *index_entry.hash() {
181 183 return Ok(rev);
182 184 }
183 185 if node.is_prefix_of(index_entry.hash()) {
184 186 if found_by_prefix.is_some() {
185 187 return Err(RevlogError::AmbiguousPrefix);
186 188 }
187 189 found_by_prefix = Some(rev)
188 190 }
189 191 }
190 192 found_by_prefix.ok_or(RevlogError::InvalidRevision)
191 193 }
192 194
193 195 /// Returns whether the given revision exists in this revlog.
194 196 pub fn has_rev(&self, rev: Revision) -> bool {
195 197 self.index.get_entry(rev).is_some()
196 198 }
197 199
198 200 /// Return the full data associated to a revision.
199 201 ///
200 202 /// All entries required to build the final data out of deltas will be
201 203 /// retrieved as needed, and the deltas will be applied to the inital
202 204 /// snapshot to rebuild the final data.
203 205 #[timed]
204 206 pub fn get_rev_data(
205 207 &self,
206 208 rev: Revision,
207 209 ) -> Result<Cow<[u8]>, RevlogError> {
208 210 if rev == NULL_REVISION {
209 211 return Ok(Cow::Borrowed(&[]));
210 212 };
211 213 Ok(self.get_entry(rev)?.data()?)
212 214 }
213 215
214 216 /// Check the hash of some given data against the recorded hash.
215 217 pub fn check_hash(
216 218 &self,
217 219 p1: Revision,
218 220 p2: Revision,
219 221 expected: &[u8],
220 222 data: &[u8],
221 223 ) -> bool {
222 224 let e1 = self.index.get_entry(p1);
223 225 let h1 = match e1 {
224 226 Some(ref entry) => entry.hash(),
225 227 None => &NULL_NODE,
226 228 };
227 229 let e2 = self.index.get_entry(p2);
228 230 let h2 = match e2 {
229 231 Some(ref entry) => entry.hash(),
230 232 None => &NULL_NODE,
231 233 };
232 234
233 235 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
234 236 }
235 237
236 238 /// Build the full data of a revision out its snapshot
237 239 /// and its deltas.
238 240 #[timed]
239 241 fn build_data_from_deltas(
240 242 snapshot: RevlogEntry,
241 243 deltas: &[RevlogEntry],
242 244 ) -> Result<Vec<u8>, HgError> {
243 245 let snapshot = snapshot.data_chunk()?;
244 246 let deltas = deltas
245 247 .iter()
246 248 .rev()
247 249 .map(RevlogEntry::data_chunk)
248 250 .collect::<Result<Vec<_>, _>>()?;
249 251 let patches: Vec<_> =
250 252 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
251 253 let patch = patch::fold_patch_lists(&patches);
252 254 Ok(patch.apply(&snapshot))
253 255 }
254 256
255 257 /// Return the revlog data.
256 258 fn data(&self) -> &[u8] {
257 259 match self.data_bytes {
258 260 Some(ref data_bytes) => &data_bytes,
259 261 None => panic!(
260 262 "forgot to load the data or trying to access inline data"
261 263 ),
262 264 }
263 265 }
264 266
267 pub fn make_null_entry(&self) -> RevlogEntry {
268 RevlogEntry {
269 revlog: self,
270 rev: NULL_REVISION,
271 bytes: b"",
272 compressed_len: 0,
273 uncompressed_len: 0,
274 base_rev_or_base_of_delta_chain: None,
275 p1: NULL_REVISION,
276 p2: NULL_REVISION,
277 flags: NULL_REVLOG_ENTRY_FLAGS,
278 hash: NULL_NODE,
279 }
280 }
281
265 282 /// Get an entry of the revlog.
266 283 pub fn get_entry(
267 284 &self,
268 285 rev: Revision,
269 286 ) -> Result<RevlogEntry, RevlogError> {
287 if rev == NULL_REVISION {
288 return Ok(self.make_null_entry());
289 }
270 290 let index_entry = self
271 291 .index
272 292 .get_entry(rev)
273 293 .ok_or(RevlogError::InvalidRevision)?;
274 294 let start = index_entry.offset();
275 295 let end = start + index_entry.compressed_len() as usize;
276 296 let data = if self.index.is_inline() {
277 297 self.index.data(start, end)
278 298 } else {
279 299 &self.data()[start..end]
280 300 };
281 301 let entry = RevlogEntry {
282 302 revlog: self,
283 303 rev,
284 304 bytes: data,
285 305 compressed_len: index_entry.compressed_len(),
286 306 uncompressed_len: index_entry.uncompressed_len(),
287 307 base_rev_or_base_of_delta_chain: if index_entry
288 308 .base_revision_or_base_of_delta_chain()
289 309 == rev
290 310 {
291 311 None
292 312 } else {
293 313 Some(index_entry.base_revision_or_base_of_delta_chain())
294 314 },
295 315 p1: index_entry.p1(),
296 316 p2: index_entry.p2(),
297 317 flags: index_entry.flags(),
298 318 hash: *index_entry.hash(),
299 319 };
300 320 Ok(entry)
301 321 }
302 322
303 323 /// when resolving internal references within revlog, any errors
304 324 /// should be reported as corruption, instead of e.g. "invalid revision"
305 325 fn get_entry_internal(
306 326 &self,
307 327 rev: Revision,
308 328 ) -> Result<RevlogEntry, HgError> {
309 329 return self.get_entry(rev).map_err(|_| corrupted());
310 330 }
311 331 }
312 332
313 333 /// The revlog entry's bytes and the necessary informations to extract
314 334 /// the entry's data.
315 335 #[derive(Clone)]
316 336 pub struct RevlogEntry<'a> {
317 337 revlog: &'a Revlog,
318 338 rev: Revision,
319 339 bytes: &'a [u8],
320 340 compressed_len: u32,
321 341 uncompressed_len: i32,
322 342 base_rev_or_base_of_delta_chain: Option<Revision>,
323 343 p1: Revision,
324 344 p2: Revision,
325 345 flags: u16,
326 346 hash: Node,
327 347 }
328 348
329 349 impl<'a> RevlogEntry<'a> {
330 350 pub fn revision(&self) -> Revision {
331 351 self.rev
332 352 }
333 353
334 354 pub fn uncompressed_len(&self) -> Option<u32> {
335 355 u32::try_from(self.uncompressed_len).ok()
336 356 }
337 357
338 358 pub fn has_p1(&self) -> bool {
339 359 self.p1 != NULL_REVISION
340 360 }
341 361
342 362 pub fn is_cencored(&self) -> bool {
343 363 (self.flags & REVISION_FLAG_CENSORED) != 0
344 364 }
345 365
346 366 pub fn has_length_affecting_flag_processor(&self) -> bool {
347 367 // Relevant Python code: revlog.size()
348 368 // note: ELLIPSIS is known to not change the content
349 369 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
350 370 }
351 371
352 372 /// The data for this entry, after resolving deltas if any.
353 373 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
354 374 let mut entry = self.clone();
355 375 let mut delta_chain = vec![];
356 376
357 377 // The meaning of `base_rev_or_base_of_delta_chain` depends on
358 378 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
359 379 // `mercurial/revlogutils/constants.py` and the code in
360 380 // [_chaininfo] and in [index_deltachain].
361 381 let uses_generaldelta = self.revlog.index.uses_generaldelta();
362 382 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
363 383 let base_rev = if uses_generaldelta {
364 384 base_rev
365 385 } else {
366 386 entry.rev - 1
367 387 };
368 388 delta_chain.push(entry);
369 389 entry = self.revlog.get_entry_internal(base_rev)?;
370 390 }
371 391
372 392 let data = if delta_chain.is_empty() {
373 393 entry.data_chunk()?
374 394 } else {
375 395 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
376 396 };
377 397
378 398 if self.revlog.check_hash(
379 399 self.p1,
380 400 self.p2,
381 401 self.hash.as_bytes(),
382 402 &data,
383 403 ) {
384 404 Ok(data)
385 405 } else {
386 406 Err(corrupted())
387 407 }
388 408 }
389 409
390 410 /// Extract the data contained in the entry.
391 411 /// This may be a delta. (See `is_delta`.)
392 412 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
393 413 if self.bytes.is_empty() {
394 414 return Ok(Cow::Borrowed(&[]));
395 415 }
396 416 match self.bytes[0] {
397 417 // Revision data is the entirety of the entry, including this
398 418 // header.
399 419 b'\0' => Ok(Cow::Borrowed(self.bytes)),
400 420 // Raw revision data follows.
401 421 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
402 422 // zlib (RFC 1950) data.
403 423 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
404 424 // zstd data.
405 425 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
406 426 // A proper new format should have had a repo/store requirement.
407 427 _format_type => Err(corrupted()),
408 428 }
409 429 }
410 430
411 431 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
412 432 let mut decoder = ZlibDecoder::new(self.bytes);
413 433 if self.is_delta() {
414 434 let mut buf = Vec::with_capacity(self.compressed_len as usize);
415 435 decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
416 436 Ok(buf)
417 437 } else {
418 438 let cap = self.uncompressed_len.max(0) as usize;
419 439 let mut buf = vec![0; cap];
420 440 decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
421 441 Ok(buf)
422 442 }
423 443 }
424 444
425 445 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
426 446 if self.is_delta() {
427 447 let mut buf = Vec::with_capacity(self.compressed_len as usize);
428 448 zstd::stream::copy_decode(self.bytes, &mut buf)
429 449 .map_err(|_| corrupted())?;
430 450 Ok(buf)
431 451 } else {
432 452 let cap = self.uncompressed_len.max(0) as usize;
433 453 let mut buf = vec![0; cap];
434 454 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
435 455 .map_err(|_| corrupted())?;
436 456 if len != self.uncompressed_len as usize {
437 457 Err(corrupted())
438 458 } else {
439 459 Ok(buf)
440 460 }
441 461 }
442 462 }
443 463
444 464 /// Tell if the entry is a snapshot or a delta
445 465 /// (influences on decompression).
446 466 fn is_delta(&self) -> bool {
447 467 self.base_rev_or_base_of_delta_chain.is_some()
448 468 }
449 469 }
450 470
451 471 /// Calculate the hash of a revision given its data and its parents.
452 472 fn hash(
453 473 data: &[u8],
454 474 p1_hash: &[u8],
455 475 p2_hash: &[u8],
456 476 ) -> [u8; NODE_BYTES_LENGTH] {
457 477 let mut hasher = Sha1::new();
458 478 let (a, b) = (p1_hash, p2_hash);
459 479 if a > b {
460 480 hasher.update(b);
461 481 hasher.update(a);
462 482 } else {
463 483 hasher.update(a);
464 484 hasher.update(b);
465 485 }
466 486 hasher.update(data);
467 487 *hasher.finalize().as_ref()
468 488 }
@@ -1,89 +1,83 b''
1 1 $ hg init empty-repo
2 2 $ cd empty-repo
3 3
4 4 Flags on revlog version 0 are rejected
5 5
6 6 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
7 7 ... fh.write(b'\x00\x01\x00\x00') and None
8 8
9 9 $ hg log
10 10 abort: unknown flags (0x01) in version 0 revlog 00changelog
11 11 [50]
12 12
13 13 Unknown flags on revlog version 1 are rejected
14 14
15 15 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
16 16 ... fh.write(b'\x00\x04\x00\x01') and None
17 17
18 18 $ hg log
19 19 abort: unknown flags (0x04) in version 1 revlog 00changelog
20 20 [50]
21 21
22 22 Unknown version is rejected
23 23
24 24 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
25 25 ... fh.write(b'\x00\x00\xbe\xef') and None
26 26
27 27 $ hg log
28 28 abort: unknown version (48879) in revlog 00changelog
29 29 [50]
30 30
31 31 $ cd ..
32 32
33 33 Test for CVE-2016-3630
34 34
35 35 $ mkdir test2; cd test2
36 36 $ hg init
37 37
38 38 >>> import codecs
39 39 >>> open("a.i", "wb").write(codecs.decode(codecs.decode(
40 40 ... b"""eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
41 41 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA==""",
42 42 ... "base64"), "zlib")) and None
43 43
44 44 $ hg debugrevlogindex a.i
45 45 rev linkrev nodeid p1 p2
46 46 0 2 99e0332bd498 000000000000 000000000000
47 47 1 3 6674f57a23d8 99e0332bd498 000000000000
48 48
49 49 >>> from mercurial.revlogutils.constants import KIND_OTHER
50 50 >>> from mercurial import revlog, vfs
51 51 >>> tvfs = vfs.vfs(b'.')
52 52 >>> tvfs.options = {b'revlogv1': True}
53 53 >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), radix=b'a')
54 54 >>> rl.revision(1)
55 55 mpatchError(*'patch cannot be decoded'*) (glob)
56 56
57 57 $ cd ..
58 58
59 59
60 60 Regression test for support for the old repos with strange diff encoding.
61 61 Apparently it used to be possible (maybe it's still possible, but we don't know how)
62 62 to create commits whose diffs are encoded relative to a nullrev.
63 63 This test checks that a repo with that encoding can still be read.
64 64
65 65 This is what we did to produce the repo in test-revlog-diff-relative-to-nullrev.tar:
66 66
67 67 - tweak the code in mercurial/revlogutils/deltas.py to produce such "trivial" deltas:
68 68 > if deltainfo is None:
69 69 > - deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
70 70 > + deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
71 71 - hg init
72 72 - echo hi > a
73 73 - hg commit -Am_
74 74 - remove some cache files
75 75
76 76 $ tar --force-local -xf "$TESTDIR"/bundles/test-revlog-diff-relative-to-nullrev.tar
77 77 $ cd nullrev-diff
78 78 $ hg debugdeltachain a
79 79 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity srchunks
80 80 0 1 2 -1 p1 15 3 15 5.00000 15 0 0.00000 15 15 1.00000 1
81 #if rhg
82 $ hg cat --config rhg.cat=true -r 0 a
83 abort: corrupted revlog
84 [255]
85 #else
86 81 $ hg cat --config rhg.cat=true -r 0 a
87 82 hi
88 #endif
89 83 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now