##// END OF EJS Templates
rhg: correctly handle the case where diffs are encoded relative to nullrev...
Arseniy Alekseyev -
r50105:cc92ad0e stable
parent child Browse files
Show More
@@ -1,468 +1,488 b''
1 use std::borrow::Cow;
1 use std::borrow::Cow;
2 use std::convert::TryFrom;
2 use std::convert::TryFrom;
3 use std::io::Read;
3 use std::io::Read;
4 use std::ops::Deref;
4 use std::ops::Deref;
5 use std::path::Path;
5 use std::path::Path;
6
6
7 use flate2::read::ZlibDecoder;
7 use flate2::read::ZlibDecoder;
8 use micro_timer::timed;
8 use micro_timer::timed;
9 use sha1::{Digest, Sha1};
9 use sha1::{Digest, Sha1};
10 use zstd;
10 use zstd;
11
11
12 use super::index::Index;
12 use super::index::Index;
13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 use super::nodemap;
14 use super::nodemap;
15 use super::nodemap::{NodeMap, NodeMapError};
15 use super::nodemap::{NodeMap, NodeMapError};
16 use super::nodemap_docket::NodeMapDocket;
16 use super::nodemap_docket::NodeMapDocket;
17 use super::patch;
17 use super::patch;
18 use crate::errors::HgError;
18 use crate::errors::HgError;
19 use crate::repo::Repo;
19 use crate::repo::Repo;
20 use crate::revlog::Revision;
20 use crate::revlog::Revision;
21 use crate::{Node, NULL_REVISION};
21 use crate::{Node, NULL_REVISION};
22
22
23 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
23 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
24 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
24 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
25 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
25 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
26 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
26 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
27
27
28 // Keep this in sync with REVIDX_KNOWN_FLAGS in
28 // Keep this in sync with REVIDX_KNOWN_FLAGS in
29 // mercurial/revlogutils/flagutil.py
29 // mercurial/revlogutils/flagutil.py
30 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
30 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
31 | REVISION_FLAG_ELLIPSIS
31 | REVISION_FLAG_ELLIPSIS
32 | REVISION_FLAG_EXTSTORED
32 | REVISION_FLAG_EXTSTORED
33 | REVISION_FLAG_HASCOPIESINFO;
33 | REVISION_FLAG_HASCOPIESINFO;
34
34
35 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
36
35 #[derive(derive_more::From)]
37 #[derive(derive_more::From)]
36 pub enum RevlogError {
38 pub enum RevlogError {
37 InvalidRevision,
39 InvalidRevision,
38 /// Working directory is not supported
40 /// Working directory is not supported
39 WDirUnsupported,
41 WDirUnsupported,
40 /// Found more than one entry whose ID match the requested prefix
42 /// Found more than one entry whose ID match the requested prefix
41 AmbiguousPrefix,
43 AmbiguousPrefix,
42 #[from]
44 #[from]
43 Other(HgError),
45 Other(HgError),
44 }
46 }
45
47
46 impl From<NodeMapError> for RevlogError {
48 impl From<NodeMapError> for RevlogError {
47 fn from(error: NodeMapError) -> Self {
49 fn from(error: NodeMapError) -> Self {
48 match error {
50 match error {
49 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
51 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
50 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
52 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
51 }
53 }
52 }
54 }
53 }
55 }
54
56
55 fn corrupted() -> HgError {
57 fn corrupted() -> HgError {
56 HgError::corrupted("corrupted revlog")
58 HgError::corrupted("corrupted revlog")
57 }
59 }
58
60
59 impl RevlogError {
61 impl RevlogError {
60 fn corrupted() -> Self {
62 fn corrupted() -> Self {
61 RevlogError::Other(corrupted())
63 RevlogError::Other(corrupted())
62 }
64 }
63 }
65 }
64
66
65 /// Read only implementation of revlog.
67 /// Read only implementation of revlog.
66 pub struct Revlog {
68 pub struct Revlog {
67 /// When index and data are not interleaved: bytes of the revlog index.
69 /// When index and data are not interleaved: bytes of the revlog index.
68 /// When index and data are interleaved: bytes of the revlog index and
70 /// When index and data are interleaved: bytes of the revlog index and
69 /// data.
71 /// data.
70 index: Index,
72 index: Index,
71 /// When index and data are not interleaved: bytes of the revlog data
73 /// When index and data are not interleaved: bytes of the revlog data
72 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
74 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
73 /// When present on disk: the persistent nodemap for this revlog
75 /// When present on disk: the persistent nodemap for this revlog
74 nodemap: Option<nodemap::NodeTree>,
76 nodemap: Option<nodemap::NodeTree>,
75 }
77 }
76
78
77 impl Revlog {
79 impl Revlog {
78 /// Open a revlog index file.
80 /// Open a revlog index file.
79 ///
81 ///
80 /// It will also open the associated data file if index and data are not
82 /// It will also open the associated data file if index and data are not
81 /// interleaved.
83 /// interleaved.
82 #[timed]
84 #[timed]
83 pub fn open(
85 pub fn open(
84 repo: &Repo,
86 repo: &Repo,
85 index_path: impl AsRef<Path>,
87 index_path: impl AsRef<Path>,
86 data_path: Option<&Path>,
88 data_path: Option<&Path>,
87 ) -> Result<Self, HgError> {
89 ) -> Result<Self, HgError> {
88 let index_path = index_path.as_ref();
90 let index_path = index_path.as_ref();
89 let index = {
91 let index = {
90 match repo.store_vfs().mmap_open_opt(&index_path)? {
92 match repo.store_vfs().mmap_open_opt(&index_path)? {
91 None => Index::new(Box::new(vec![])),
93 None => Index::new(Box::new(vec![])),
92 Some(index_mmap) => {
94 Some(index_mmap) => {
93 let index = Index::new(Box::new(index_mmap))?;
95 let index = Index::new(Box::new(index_mmap))?;
94 Ok(index)
96 Ok(index)
95 }
97 }
96 }
98 }
97 }?;
99 }?;
98
100
99 let default_data_path = index_path.with_extension("d");
101 let default_data_path = index_path.with_extension("d");
100
102
101 // type annotation required
103 // type annotation required
102 // won't recognize Mmap as Deref<Target = [u8]>
104 // won't recognize Mmap as Deref<Target = [u8]>
103 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
105 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
104 if index.is_inline() {
106 if index.is_inline() {
105 None
107 None
106 } else {
108 } else {
107 let data_path = data_path.unwrap_or(&default_data_path);
109 let data_path = data_path.unwrap_or(&default_data_path);
108 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
110 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
109 Some(Box::new(data_mmap))
111 Some(Box::new(data_mmap))
110 };
112 };
111
113
112 let nodemap = if index.is_inline() {
114 let nodemap = if index.is_inline() {
113 None
115 None
114 } else {
116 } else {
115 NodeMapDocket::read_from_file(repo, index_path)?.map(
117 NodeMapDocket::read_from_file(repo, index_path)?.map(
116 |(docket, data)| {
118 |(docket, data)| {
117 nodemap::NodeTree::load_bytes(
119 nodemap::NodeTree::load_bytes(
118 Box::new(data),
120 Box::new(data),
119 docket.data_length,
121 docket.data_length,
120 )
122 )
121 },
123 },
122 )
124 )
123 };
125 };
124
126
125 Ok(Revlog {
127 Ok(Revlog {
126 index,
128 index,
127 data_bytes,
129 data_bytes,
128 nodemap,
130 nodemap,
129 })
131 })
130 }
132 }
131
133
132 /// Return number of entries of the `Revlog`.
134 /// Return number of entries of the `Revlog`.
133 pub fn len(&self) -> usize {
135 pub fn len(&self) -> usize {
134 self.index.len()
136 self.index.len()
135 }
137 }
136
138
137 /// Returns `true` if the `Revlog` has zero `entries`.
139 /// Returns `true` if the `Revlog` has zero `entries`.
138 pub fn is_empty(&self) -> bool {
140 pub fn is_empty(&self) -> bool {
139 self.index.is_empty()
141 self.index.is_empty()
140 }
142 }
141
143
142 /// Returns the node ID for the given revision number, if it exists in this
144 /// Returns the node ID for the given revision number, if it exists in this
143 /// revlog
145 /// revlog
144 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
146 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
145 if rev == NULL_REVISION {
147 if rev == NULL_REVISION {
146 return Some(&NULL_NODE);
148 return Some(&NULL_NODE);
147 }
149 }
148 Some(self.index.get_entry(rev)?.hash())
150 Some(self.index.get_entry(rev)?.hash())
149 }
151 }
150
152
151 /// Return the revision number for the given node ID, if it exists in this
153 /// Return the revision number for the given node ID, if it exists in this
152 /// revlog
154 /// revlog
153 #[timed]
155 #[timed]
154 pub fn rev_from_node(
156 pub fn rev_from_node(
155 &self,
157 &self,
156 node: NodePrefix,
158 node: NodePrefix,
157 ) -> Result<Revision, RevlogError> {
159 ) -> Result<Revision, RevlogError> {
158 if node.is_prefix_of(&NULL_NODE) {
160 if node.is_prefix_of(&NULL_NODE) {
159 return Ok(NULL_REVISION);
161 return Ok(NULL_REVISION);
160 }
162 }
161
163
162 if let Some(nodemap) = &self.nodemap {
164 if let Some(nodemap) = &self.nodemap {
163 return nodemap
165 return nodemap
164 .find_bin(&self.index, node)?
166 .find_bin(&self.index, node)?
165 .ok_or(RevlogError::InvalidRevision);
167 .ok_or(RevlogError::InvalidRevision);
166 }
168 }
167
169
168 // Fallback to linear scan when a persistent nodemap is not present.
170 // Fallback to linear scan when a persistent nodemap is not present.
169 // This happens when the persistent-nodemap experimental feature is not
171 // This happens when the persistent-nodemap experimental feature is not
170 // enabled, or for small revlogs.
172 // enabled, or for small revlogs.
171 //
173 //
172 // TODO: consider building a non-persistent nodemap in memory to
174 // TODO: consider building a non-persistent nodemap in memory to
173 // optimize these cases.
175 // optimize these cases.
174 let mut found_by_prefix = None;
176 let mut found_by_prefix = None;
175 for rev in (0..self.len() as Revision).rev() {
177 for rev in (0..self.len() as Revision).rev() {
176 let index_entry =
178 let index_entry =
177 self.index.get_entry(rev).ok_or(HgError::corrupted(
179 self.index.get_entry(rev).ok_or(HgError::corrupted(
178 "revlog references a revision not in the index",
180 "revlog references a revision not in the index",
179 ))?;
181 ))?;
180 if node == *index_entry.hash() {
182 if node == *index_entry.hash() {
181 return Ok(rev);
183 return Ok(rev);
182 }
184 }
183 if node.is_prefix_of(index_entry.hash()) {
185 if node.is_prefix_of(index_entry.hash()) {
184 if found_by_prefix.is_some() {
186 if found_by_prefix.is_some() {
185 return Err(RevlogError::AmbiguousPrefix);
187 return Err(RevlogError::AmbiguousPrefix);
186 }
188 }
187 found_by_prefix = Some(rev)
189 found_by_prefix = Some(rev)
188 }
190 }
189 }
191 }
190 found_by_prefix.ok_or(RevlogError::InvalidRevision)
192 found_by_prefix.ok_or(RevlogError::InvalidRevision)
191 }
193 }
192
194
193 /// Returns whether the given revision exists in this revlog.
195 /// Returns whether the given revision exists in this revlog.
194 pub fn has_rev(&self, rev: Revision) -> bool {
196 pub fn has_rev(&self, rev: Revision) -> bool {
195 self.index.get_entry(rev).is_some()
197 self.index.get_entry(rev).is_some()
196 }
198 }
197
199
198 /// Return the full data associated to a revision.
200 /// Return the full data associated to a revision.
199 ///
201 ///
200 /// All entries required to build the final data out of deltas will be
202 /// All entries required to build the final data out of deltas will be
201 /// retrieved as needed, and the deltas will be applied to the inital
203 /// retrieved as needed, and the deltas will be applied to the inital
202 /// snapshot to rebuild the final data.
204 /// snapshot to rebuild the final data.
203 #[timed]
205 #[timed]
204 pub fn get_rev_data(
206 pub fn get_rev_data(
205 &self,
207 &self,
206 rev: Revision,
208 rev: Revision,
207 ) -> Result<Cow<[u8]>, RevlogError> {
209 ) -> Result<Cow<[u8]>, RevlogError> {
208 if rev == NULL_REVISION {
210 if rev == NULL_REVISION {
209 return Ok(Cow::Borrowed(&[]));
211 return Ok(Cow::Borrowed(&[]));
210 };
212 };
211 Ok(self.get_entry(rev)?.data()?)
213 Ok(self.get_entry(rev)?.data()?)
212 }
214 }
213
215
214 /// Check the hash of some given data against the recorded hash.
216 /// Check the hash of some given data against the recorded hash.
215 pub fn check_hash(
217 pub fn check_hash(
216 &self,
218 &self,
217 p1: Revision,
219 p1: Revision,
218 p2: Revision,
220 p2: Revision,
219 expected: &[u8],
221 expected: &[u8],
220 data: &[u8],
222 data: &[u8],
221 ) -> bool {
223 ) -> bool {
222 let e1 = self.index.get_entry(p1);
224 let e1 = self.index.get_entry(p1);
223 let h1 = match e1 {
225 let h1 = match e1 {
224 Some(ref entry) => entry.hash(),
226 Some(ref entry) => entry.hash(),
225 None => &NULL_NODE,
227 None => &NULL_NODE,
226 };
228 };
227 let e2 = self.index.get_entry(p2);
229 let e2 = self.index.get_entry(p2);
228 let h2 = match e2 {
230 let h2 = match e2 {
229 Some(ref entry) => entry.hash(),
231 Some(ref entry) => entry.hash(),
230 None => &NULL_NODE,
232 None => &NULL_NODE,
231 };
233 };
232
234
233 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
235 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
234 }
236 }
235
237
236 /// Build the full data of a revision out its snapshot
238 /// Build the full data of a revision out its snapshot
237 /// and its deltas.
239 /// and its deltas.
238 #[timed]
240 #[timed]
239 fn build_data_from_deltas(
241 fn build_data_from_deltas(
240 snapshot: RevlogEntry,
242 snapshot: RevlogEntry,
241 deltas: &[RevlogEntry],
243 deltas: &[RevlogEntry],
242 ) -> Result<Vec<u8>, HgError> {
244 ) -> Result<Vec<u8>, HgError> {
243 let snapshot = snapshot.data_chunk()?;
245 let snapshot = snapshot.data_chunk()?;
244 let deltas = deltas
246 let deltas = deltas
245 .iter()
247 .iter()
246 .rev()
248 .rev()
247 .map(RevlogEntry::data_chunk)
249 .map(RevlogEntry::data_chunk)
248 .collect::<Result<Vec<_>, _>>()?;
250 .collect::<Result<Vec<_>, _>>()?;
249 let patches: Vec<_> =
251 let patches: Vec<_> =
250 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
252 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
251 let patch = patch::fold_patch_lists(&patches);
253 let patch = patch::fold_patch_lists(&patches);
252 Ok(patch.apply(&snapshot))
254 Ok(patch.apply(&snapshot))
253 }
255 }
254
256
255 /// Return the revlog data.
257 /// Return the revlog data.
256 fn data(&self) -> &[u8] {
258 fn data(&self) -> &[u8] {
257 match self.data_bytes {
259 match self.data_bytes {
258 Some(ref data_bytes) => &data_bytes,
260 Some(ref data_bytes) => &data_bytes,
259 None => panic!(
261 None => panic!(
260 "forgot to load the data or trying to access inline data"
262 "forgot to load the data or trying to access inline data"
261 ),
263 ),
262 }
264 }
263 }
265 }
264
266
267 pub fn make_null_entry(&self) -> RevlogEntry {
268 RevlogEntry {
269 revlog: self,
270 rev: NULL_REVISION,
271 bytes: b"",
272 compressed_len: 0,
273 uncompressed_len: 0,
274 base_rev_or_base_of_delta_chain: None,
275 p1: NULL_REVISION,
276 p2: NULL_REVISION,
277 flags: NULL_REVLOG_ENTRY_FLAGS,
278 hash: NULL_NODE,
279 }
280 }
281
265 /// Get an entry of the revlog.
282 /// Get an entry of the revlog.
266 pub fn get_entry(
283 pub fn get_entry(
267 &self,
284 &self,
268 rev: Revision,
285 rev: Revision,
269 ) -> Result<RevlogEntry, RevlogError> {
286 ) -> Result<RevlogEntry, RevlogError> {
287 if rev == NULL_REVISION {
288 return Ok(self.make_null_entry());
289 }
270 let index_entry = self
290 let index_entry = self
271 .index
291 .index
272 .get_entry(rev)
292 .get_entry(rev)
273 .ok_or(RevlogError::InvalidRevision)?;
293 .ok_or(RevlogError::InvalidRevision)?;
274 let start = index_entry.offset();
294 let start = index_entry.offset();
275 let end = start + index_entry.compressed_len() as usize;
295 let end = start + index_entry.compressed_len() as usize;
276 let data = if self.index.is_inline() {
296 let data = if self.index.is_inline() {
277 self.index.data(start, end)
297 self.index.data(start, end)
278 } else {
298 } else {
279 &self.data()[start..end]
299 &self.data()[start..end]
280 };
300 };
281 let entry = RevlogEntry {
301 let entry = RevlogEntry {
282 revlog: self,
302 revlog: self,
283 rev,
303 rev,
284 bytes: data,
304 bytes: data,
285 compressed_len: index_entry.compressed_len(),
305 compressed_len: index_entry.compressed_len(),
286 uncompressed_len: index_entry.uncompressed_len(),
306 uncompressed_len: index_entry.uncompressed_len(),
287 base_rev_or_base_of_delta_chain: if index_entry
307 base_rev_or_base_of_delta_chain: if index_entry
288 .base_revision_or_base_of_delta_chain()
308 .base_revision_or_base_of_delta_chain()
289 == rev
309 == rev
290 {
310 {
291 None
311 None
292 } else {
312 } else {
293 Some(index_entry.base_revision_or_base_of_delta_chain())
313 Some(index_entry.base_revision_or_base_of_delta_chain())
294 },
314 },
295 p1: index_entry.p1(),
315 p1: index_entry.p1(),
296 p2: index_entry.p2(),
316 p2: index_entry.p2(),
297 flags: index_entry.flags(),
317 flags: index_entry.flags(),
298 hash: *index_entry.hash(),
318 hash: *index_entry.hash(),
299 };
319 };
300 Ok(entry)
320 Ok(entry)
301 }
321 }
302
322
303 /// when resolving internal references within revlog, any errors
323 /// when resolving internal references within revlog, any errors
304 /// should be reported as corruption, instead of e.g. "invalid revision"
324 /// should be reported as corruption, instead of e.g. "invalid revision"
305 fn get_entry_internal(
325 fn get_entry_internal(
306 &self,
326 &self,
307 rev: Revision,
327 rev: Revision,
308 ) -> Result<RevlogEntry, HgError> {
328 ) -> Result<RevlogEntry, HgError> {
309 return self.get_entry(rev).map_err(|_| corrupted());
329 return self.get_entry(rev).map_err(|_| corrupted());
310 }
330 }
311 }
331 }
312
332
313 /// The revlog entry's bytes and the necessary informations to extract
333 /// The revlog entry's bytes and the necessary informations to extract
314 /// the entry's data.
334 /// the entry's data.
315 #[derive(Clone)]
335 #[derive(Clone)]
316 pub struct RevlogEntry<'a> {
336 pub struct RevlogEntry<'a> {
317 revlog: &'a Revlog,
337 revlog: &'a Revlog,
318 rev: Revision,
338 rev: Revision,
319 bytes: &'a [u8],
339 bytes: &'a [u8],
320 compressed_len: u32,
340 compressed_len: u32,
321 uncompressed_len: i32,
341 uncompressed_len: i32,
322 base_rev_or_base_of_delta_chain: Option<Revision>,
342 base_rev_or_base_of_delta_chain: Option<Revision>,
323 p1: Revision,
343 p1: Revision,
324 p2: Revision,
344 p2: Revision,
325 flags: u16,
345 flags: u16,
326 hash: Node,
346 hash: Node,
327 }
347 }
328
348
329 impl<'a> RevlogEntry<'a> {
349 impl<'a> RevlogEntry<'a> {
330 pub fn revision(&self) -> Revision {
350 pub fn revision(&self) -> Revision {
331 self.rev
351 self.rev
332 }
352 }
333
353
334 pub fn uncompressed_len(&self) -> Option<u32> {
354 pub fn uncompressed_len(&self) -> Option<u32> {
335 u32::try_from(self.uncompressed_len).ok()
355 u32::try_from(self.uncompressed_len).ok()
336 }
356 }
337
357
338 pub fn has_p1(&self) -> bool {
358 pub fn has_p1(&self) -> bool {
339 self.p1 != NULL_REVISION
359 self.p1 != NULL_REVISION
340 }
360 }
341
361
342 pub fn is_cencored(&self) -> bool {
362 pub fn is_cencored(&self) -> bool {
343 (self.flags & REVISION_FLAG_CENSORED) != 0
363 (self.flags & REVISION_FLAG_CENSORED) != 0
344 }
364 }
345
365
346 pub fn has_length_affecting_flag_processor(&self) -> bool {
366 pub fn has_length_affecting_flag_processor(&self) -> bool {
347 // Relevant Python code: revlog.size()
367 // Relevant Python code: revlog.size()
348 // note: ELLIPSIS is known to not change the content
368 // note: ELLIPSIS is known to not change the content
349 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
369 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
350 }
370 }
351
371
352 /// The data for this entry, after resolving deltas if any.
372 /// The data for this entry, after resolving deltas if any.
353 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
373 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
354 let mut entry = self.clone();
374 let mut entry = self.clone();
355 let mut delta_chain = vec![];
375 let mut delta_chain = vec![];
356
376
357 // The meaning of `base_rev_or_base_of_delta_chain` depends on
377 // The meaning of `base_rev_or_base_of_delta_chain` depends on
358 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
378 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
359 // `mercurial/revlogutils/constants.py` and the code in
379 // `mercurial/revlogutils/constants.py` and the code in
360 // [_chaininfo] and in [index_deltachain].
380 // [_chaininfo] and in [index_deltachain].
361 let uses_generaldelta = self.revlog.index.uses_generaldelta();
381 let uses_generaldelta = self.revlog.index.uses_generaldelta();
362 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
382 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
363 let base_rev = if uses_generaldelta {
383 let base_rev = if uses_generaldelta {
364 base_rev
384 base_rev
365 } else {
385 } else {
366 entry.rev - 1
386 entry.rev - 1
367 };
387 };
368 delta_chain.push(entry);
388 delta_chain.push(entry);
369 entry = self.revlog.get_entry_internal(base_rev)?;
389 entry = self.revlog.get_entry_internal(base_rev)?;
370 }
390 }
371
391
372 let data = if delta_chain.is_empty() {
392 let data = if delta_chain.is_empty() {
373 entry.data_chunk()?
393 entry.data_chunk()?
374 } else {
394 } else {
375 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
395 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
376 };
396 };
377
397
378 if self.revlog.check_hash(
398 if self.revlog.check_hash(
379 self.p1,
399 self.p1,
380 self.p2,
400 self.p2,
381 self.hash.as_bytes(),
401 self.hash.as_bytes(),
382 &data,
402 &data,
383 ) {
403 ) {
384 Ok(data)
404 Ok(data)
385 } else {
405 } else {
386 Err(corrupted())
406 Err(corrupted())
387 }
407 }
388 }
408 }
389
409
390 /// Extract the data contained in the entry.
410 /// Extract the data contained in the entry.
391 /// This may be a delta. (See `is_delta`.)
411 /// This may be a delta. (See `is_delta`.)
392 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
412 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
393 if self.bytes.is_empty() {
413 if self.bytes.is_empty() {
394 return Ok(Cow::Borrowed(&[]));
414 return Ok(Cow::Borrowed(&[]));
395 }
415 }
396 match self.bytes[0] {
416 match self.bytes[0] {
397 // Revision data is the entirety of the entry, including this
417 // Revision data is the entirety of the entry, including this
398 // header.
418 // header.
399 b'\0' => Ok(Cow::Borrowed(self.bytes)),
419 b'\0' => Ok(Cow::Borrowed(self.bytes)),
400 // Raw revision data follows.
420 // Raw revision data follows.
401 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
421 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
402 // zlib (RFC 1950) data.
422 // zlib (RFC 1950) data.
403 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
423 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
404 // zstd data.
424 // zstd data.
405 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
425 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
406 // A proper new format should have had a repo/store requirement.
426 // A proper new format should have had a repo/store requirement.
407 _format_type => Err(corrupted()),
427 _format_type => Err(corrupted()),
408 }
428 }
409 }
429 }
410
430
411 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
431 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
412 let mut decoder = ZlibDecoder::new(self.bytes);
432 let mut decoder = ZlibDecoder::new(self.bytes);
413 if self.is_delta() {
433 if self.is_delta() {
414 let mut buf = Vec::with_capacity(self.compressed_len as usize);
434 let mut buf = Vec::with_capacity(self.compressed_len as usize);
415 decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
435 decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
416 Ok(buf)
436 Ok(buf)
417 } else {
437 } else {
418 let cap = self.uncompressed_len.max(0) as usize;
438 let cap = self.uncompressed_len.max(0) as usize;
419 let mut buf = vec![0; cap];
439 let mut buf = vec![0; cap];
420 decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
440 decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
421 Ok(buf)
441 Ok(buf)
422 }
442 }
423 }
443 }
424
444
425 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
445 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
426 if self.is_delta() {
446 if self.is_delta() {
427 let mut buf = Vec::with_capacity(self.compressed_len as usize);
447 let mut buf = Vec::with_capacity(self.compressed_len as usize);
428 zstd::stream::copy_decode(self.bytes, &mut buf)
448 zstd::stream::copy_decode(self.bytes, &mut buf)
429 .map_err(|_| corrupted())?;
449 .map_err(|_| corrupted())?;
430 Ok(buf)
450 Ok(buf)
431 } else {
451 } else {
432 let cap = self.uncompressed_len.max(0) as usize;
452 let cap = self.uncompressed_len.max(0) as usize;
433 let mut buf = vec![0; cap];
453 let mut buf = vec![0; cap];
434 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
454 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
435 .map_err(|_| corrupted())?;
455 .map_err(|_| corrupted())?;
436 if len != self.uncompressed_len as usize {
456 if len != self.uncompressed_len as usize {
437 Err(corrupted())
457 Err(corrupted())
438 } else {
458 } else {
439 Ok(buf)
459 Ok(buf)
440 }
460 }
441 }
461 }
442 }
462 }
443
463
444 /// Tell if the entry is a snapshot or a delta
464 /// Tell if the entry is a snapshot or a delta
445 /// (influences on decompression).
465 /// (influences on decompression).
446 fn is_delta(&self) -> bool {
466 fn is_delta(&self) -> bool {
447 self.base_rev_or_base_of_delta_chain.is_some()
467 self.base_rev_or_base_of_delta_chain.is_some()
448 }
468 }
449 }
469 }
450
470
451 /// Calculate the hash of a revision given its data and its parents.
471 /// Calculate the hash of a revision given its data and its parents.
452 fn hash(
472 fn hash(
453 data: &[u8],
473 data: &[u8],
454 p1_hash: &[u8],
474 p1_hash: &[u8],
455 p2_hash: &[u8],
475 p2_hash: &[u8],
456 ) -> [u8; NODE_BYTES_LENGTH] {
476 ) -> [u8; NODE_BYTES_LENGTH] {
457 let mut hasher = Sha1::new();
477 let mut hasher = Sha1::new();
458 let (a, b) = (p1_hash, p2_hash);
478 let (a, b) = (p1_hash, p2_hash);
459 if a > b {
479 if a > b {
460 hasher.update(b);
480 hasher.update(b);
461 hasher.update(a);
481 hasher.update(a);
462 } else {
482 } else {
463 hasher.update(a);
483 hasher.update(a);
464 hasher.update(b);
484 hasher.update(b);
465 }
485 }
466 hasher.update(data);
486 hasher.update(data);
467 *hasher.finalize().as_ref()
487 *hasher.finalize().as_ref()
468 }
488 }
@@ -1,89 +1,83 b''
1 $ hg init empty-repo
1 $ hg init empty-repo
2 $ cd empty-repo
2 $ cd empty-repo
3
3
4 Flags on revlog version 0 are rejected
4 Flags on revlog version 0 are rejected
5
5
6 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
6 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
7 ... fh.write(b'\x00\x01\x00\x00') and None
7 ... fh.write(b'\x00\x01\x00\x00') and None
8
8
9 $ hg log
9 $ hg log
10 abort: unknown flags (0x01) in version 0 revlog 00changelog
10 abort: unknown flags (0x01) in version 0 revlog 00changelog
11 [50]
11 [50]
12
12
13 Unknown flags on revlog version 1 are rejected
13 Unknown flags on revlog version 1 are rejected
14
14
15 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
15 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
16 ... fh.write(b'\x00\x04\x00\x01') and None
16 ... fh.write(b'\x00\x04\x00\x01') and None
17
17
18 $ hg log
18 $ hg log
19 abort: unknown flags (0x04) in version 1 revlog 00changelog
19 abort: unknown flags (0x04) in version 1 revlog 00changelog
20 [50]
20 [50]
21
21
22 Unknown version is rejected
22 Unknown version is rejected
23
23
24 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
24 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
25 ... fh.write(b'\x00\x00\xbe\xef') and None
25 ... fh.write(b'\x00\x00\xbe\xef') and None
26
26
27 $ hg log
27 $ hg log
28 abort: unknown version (48879) in revlog 00changelog
28 abort: unknown version (48879) in revlog 00changelog
29 [50]
29 [50]
30
30
31 $ cd ..
31 $ cd ..
32
32
33 Test for CVE-2016-3630
33 Test for CVE-2016-3630
34
34
35 $ mkdir test2; cd test2
35 $ mkdir test2; cd test2
36 $ hg init
36 $ hg init
37
37
38 >>> import codecs
38 >>> import codecs
39 >>> open("a.i", "wb").write(codecs.decode(codecs.decode(
39 >>> open("a.i", "wb").write(codecs.decode(codecs.decode(
40 ... b"""eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
40 ... b"""eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
41 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA==""",
41 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA==""",
42 ... "base64"), "zlib")) and None
42 ... "base64"), "zlib")) and None
43
43
44 $ hg debugrevlogindex a.i
44 $ hg debugrevlogindex a.i
45 rev linkrev nodeid p1 p2
45 rev linkrev nodeid p1 p2
46 0 2 99e0332bd498 000000000000 000000000000
46 0 2 99e0332bd498 000000000000 000000000000
47 1 3 6674f57a23d8 99e0332bd498 000000000000
47 1 3 6674f57a23d8 99e0332bd498 000000000000
48
48
49 >>> from mercurial.revlogutils.constants import KIND_OTHER
49 >>> from mercurial.revlogutils.constants import KIND_OTHER
50 >>> from mercurial import revlog, vfs
50 >>> from mercurial import revlog, vfs
51 >>> tvfs = vfs.vfs(b'.')
51 >>> tvfs = vfs.vfs(b'.')
52 >>> tvfs.options = {b'revlogv1': True}
52 >>> tvfs.options = {b'revlogv1': True}
53 >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), radix=b'a')
53 >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), radix=b'a')
54 >>> rl.revision(1)
54 >>> rl.revision(1)
55 mpatchError(*'patch cannot be decoded'*) (glob)
55 mpatchError(*'patch cannot be decoded'*) (glob)
56
56
57 $ cd ..
57 $ cd ..
58
58
59
59
60 Regression test for support for the old repos with strange diff encoding.
60 Regression test for support for the old repos with strange diff encoding.
61 Apparently it used to be possible (maybe it's still possible, but we don't know how)
61 Apparently it used to be possible (maybe it's still possible, but we don't know how)
62 to create commits whose diffs are encoded relative to a nullrev.
62 to create commits whose diffs are encoded relative to a nullrev.
63 This test checks that a repo with that encoding can still be read.
63 This test checks that a repo with that encoding can still be read.
64
64
65 This is what we did to produce the repo in test-revlog-diff-relative-to-nullrev.tar:
65 This is what we did to produce the repo in test-revlog-diff-relative-to-nullrev.tar:
66
66
67 - tweak the code in mercurial/revlogutils/deltas.py to produce such "trivial" deltas:
67 - tweak the code in mercurial/revlogutils/deltas.py to produce such "trivial" deltas:
68 > if deltainfo is None:
68 > if deltainfo is None:
69 > - deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
69 > - deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
70 > + deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
70 > + deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
71 - hg init
71 - hg init
72 - echo hi > a
72 - echo hi > a
73 - hg commit -Am_
73 - hg commit -Am_
74 - remove some cache files
74 - remove some cache files
75
75
76 $ tar --force-local -xf "$TESTDIR"/bundles/test-revlog-diff-relative-to-nullrev.tar
76 $ tar --force-local -xf "$TESTDIR"/bundles/test-revlog-diff-relative-to-nullrev.tar
77 $ cd nullrev-diff
77 $ cd nullrev-diff
78 $ hg debugdeltachain a
78 $ hg debugdeltachain a
79 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity srchunks
79 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity srchunks
80 0 1 2 -1 p1 15 3 15 5.00000 15 0 0.00000 15 15 1.00000 1
80 0 1 2 -1 p1 15 3 15 5.00000 15 0 0.00000 15 15 1.00000 1
81 #if rhg
82 $ hg cat --config rhg.cat=true -r 0 a
83 abort: corrupted revlog
84 [255]
85 #else
86 $ hg cat --config rhg.cat=true -r 0 a
81 $ hg cat --config rhg.cat=true -r 0 a
87 hi
82 hi
88 #endif
89 $ cd ..
83 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now