##// END OF EJS Templates
rhg: Add RevlogEntry::data that does delta resolution...
Simon Sapin -
r49373:f2f57724 default
parent child Browse files
Show More
@@ -1,33 +1,33
1 1 // debugdata.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::repo::Repo;
9 9 use crate::revlog::revlog::{Revlog, RevlogError};
10 10
11 11 /// Kind of data to debug
12 12 #[derive(Debug, Copy, Clone)]
13 13 pub enum DebugDataKind {
14 14 Changelog,
15 15 Manifest,
16 16 }
17 17
18 18 /// Dump the contents data of a revision.
19 19 pub fn debug_data(
20 20 repo: &Repo,
21 21 revset: &str,
22 22 kind: DebugDataKind,
23 23 ) -> Result<Vec<u8>, RevlogError> {
24 24 let index_file = match kind {
25 25 DebugDataKind::Changelog => "00changelog.i",
26 26 DebugDataKind::Manifest => "00manifest.i",
27 27 };
28 28 let revlog = Revlog::open(repo, index_file, None)?;
29 29 let rev =
30 30 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
31 31 let data = revlog.get_rev_data(rev)?;
32 Ok(data)
32 Ok(data.into_owned())
33 33 }
@@ -1,67 +1,67
1 1 use crate::errors::HgError;
2 2 use crate::repo::Repo;
3 3 use crate::revlog::node::NULL_NODE;
4 4 use crate::revlog::revlog::{Revlog, RevlogError};
5 5 use crate::revlog::Revision;
6 6 use crate::revlog::{Node, NodePrefix};
7 7
8 8 /// A specialized `Revlog` to work with `changelog` data format.
9 9 pub struct Changelog {
10 10 /// The generic `revlog` format.
11 11 pub(crate) revlog: Revlog,
12 12 }
13 13
14 14 impl Changelog {
15 15 /// Open the `changelog` of a repository given by its root.
16 16 pub fn open(repo: &Repo) -> Result<Self, HgError> {
17 17 let revlog = Revlog::open(repo, "00changelog.i", None)?;
18 18 Ok(Self { revlog })
19 19 }
20 20
21 21 /// Return the `ChangelogEntry` for the given node ID.
22 22 pub fn data_for_node(
23 23 &self,
24 24 node: NodePrefix,
25 25 ) -> Result<ChangelogRevisionData, RevlogError> {
26 26 let rev = self.revlog.rev_from_node(node)?;
27 27 self.data_for_rev(rev)
28 28 }
29 29
30 30 /// Return the `ChangelogEntry` of the given revision number.
31 31 pub fn data_for_rev(
32 32 &self,
33 33 rev: Revision,
34 34 ) -> Result<ChangelogRevisionData, RevlogError> {
35 let bytes = self.revlog.get_rev_data(rev)?;
35 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
36 36 Ok(ChangelogRevisionData { bytes })
37 37 }
38 38
39 39 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
40 40 self.revlog.node_from_rev(rev)
41 41 }
42 42 }
43 43
44 44 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
45 45 #[derive(Debug)]
46 46 pub struct ChangelogRevisionData {
47 47 /// The data bytes of the `changelog` entry.
48 48 bytes: Vec<u8>,
49 49 }
50 50
51 51 impl ChangelogRevisionData {
52 52 /// Return an iterator over the lines of the entry.
53 53 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
54 54 self.bytes
55 55 .split(|b| b == &b'\n')
56 56 .filter(|line| !line.is_empty())
57 57 }
58 58
59 59 /// Return the node id of the `manifest` referenced by this `changelog`
60 60 /// entry.
61 61 pub fn manifest_node(&self) -> Result<Node, HgError> {
62 62 match self.lines().next() {
63 63 None => Ok(NULL_NODE),
64 64 Some(x) => Node::from_hex_for_repo(x),
65 65 }
66 66 }
67 67 }
@@ -1,89 +1,89
1 1 use crate::errors::HgError;
2 2 use crate::repo::Repo;
3 3 use crate::revlog::path_encode::path_encode;
4 4 use crate::revlog::revlog::{Revlog, RevlogError};
5 5 use crate::revlog::NodePrefix;
6 6 use crate::revlog::Revision;
7 7 use crate::utils::files::get_path_from_bytes;
8 8 use crate::utils::hg_path::HgPath;
9 9 use crate::utils::SliceExt;
10 10 use std::path::PathBuf;
11 11
12 12 /// A specialized `Revlog` to work with file data logs.
13 13 pub struct Filelog {
14 14 /// The generic `revlog` format.
15 15 revlog: Revlog,
16 16 }
17 17
18 18 impl Filelog {
19 19 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
20 20 let index_path = store_path(file_path, b".i");
21 21 let data_path = store_path(file_path, b".d");
22 22 let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
23 23 Ok(Self { revlog })
24 24 }
25 25
26 26 /// The given node ID is that of the file as found in a manifest, not of a
27 27 /// changeset.
28 28 pub fn data_for_node(
29 29 &self,
30 30 file_node: impl Into<NodePrefix>,
31 31 ) -> Result<FilelogRevisionData, RevlogError> {
32 32 let file_rev = self.revlog.rev_from_node(file_node.into())?;
33 33 self.data_for_rev(file_rev)
34 34 }
35 35
36 36 /// The given revision is that of the file as found in a manifest, not of a
37 37 /// changeset.
38 38 pub fn data_for_rev(
39 39 &self,
40 40 file_rev: Revision,
41 41 ) -> Result<FilelogRevisionData, RevlogError> {
42 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?;
42 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
43 43 Ok(FilelogRevisionData(data.into()))
44 44 }
45 45 }
46 46
47 47 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
48 48 let encoded_bytes =
49 49 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
50 50 get_path_from_bytes(&encoded_bytes).into()
51 51 }
52 52
53 53 /// The data for one revision in a filelog, uncompressed and delta-resolved.
54 54 pub struct FilelogRevisionData(Vec<u8>);
55 55
56 56 impl FilelogRevisionData {
57 57 /// Split into metadata and data
58 58 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
59 59 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
60 60
61 61 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
62 62 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
63 63 Ok((Some(metadata), data))
64 64 } else {
65 65 Err(HgError::corrupted(
66 66 "Missing metadata end delimiter in filelog entry",
67 67 ))
68 68 }
69 69 } else {
70 70 Ok((None, &self.0))
71 71 }
72 72 }
73 73
74 74 /// Returns the file contents at this revision, stripped of any metadata
75 75 pub fn file_data(&self) -> Result<&[u8], HgError> {
76 76 let (_metadata, data) = self.split()?;
77 77 Ok(data)
78 78 }
79 79
80 80 /// Consume the entry, and convert it into data, discarding any metadata,
81 81 /// if present.
82 82 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
83 83 if let (Some(_metadata), data) = self.split()? {
84 84 Ok(data.to_owned())
85 85 } else {
86 86 Ok(self.0)
87 87 }
88 88 }
89 89 }
@@ -1,193 +1,193
1 1 use crate::errors::HgError;
2 2 use crate::repo::Repo;
3 3 use crate::revlog::revlog::{Revlog, RevlogError};
4 4 use crate::revlog::Revision;
5 5 use crate::revlog::{Node, NodePrefix};
6 6 use crate::utils::hg_path::HgPath;
7 7 use crate::utils::SliceExt;
8 8
9 9 /// A specialized `Revlog` to work with `manifest` data format.
10 10 pub struct Manifestlog {
11 11 /// The generic `revlog` format.
12 12 revlog: Revlog,
13 13 }
14 14
15 15 impl Manifestlog {
16 16 /// Open the `manifest` of a repository given by its root.
17 17 pub fn open(repo: &Repo) -> Result<Self, HgError> {
18 18 let revlog = Revlog::open(repo, "00manifest.i", None)?;
19 19 Ok(Self { revlog })
20 20 }
21 21
22 22 /// Return the `Manifest` for the given node ID.
23 23 ///
24 24 /// Note: this is a node ID in the manifestlog, typically found through
25 25 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
26 26 /// changeset.
27 27 ///
28 28 /// See also `Repo::manifest_for_node`
29 29 pub fn data_for_node(
30 30 &self,
31 31 node: NodePrefix,
32 32 ) -> Result<Manifest, RevlogError> {
33 33 let rev = self.revlog.rev_from_node(node)?;
34 34 self.data_for_rev(rev)
35 35 }
36 36
37 37 /// Return the `Manifest` of a given revision number.
38 38 ///
39 39 /// Note: this is a revision number in the manifestlog, *not* of any
40 40 /// changeset.
41 41 ///
42 42 /// See also `Repo::manifest_for_rev`
43 43 pub fn data_for_rev(
44 44 &self,
45 45 rev: Revision,
46 46 ) -> Result<Manifest, RevlogError> {
47 let bytes = self.revlog.get_rev_data(rev)?;
47 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
48 48 Ok(Manifest { bytes })
49 49 }
50 50 }
51 51
52 52 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
53 53 #[derive(Debug)]
54 54 pub struct Manifest {
55 55 /// Format for a manifest: flat sequence of variable-size entries,
56 56 /// sorted by path, each as:
57 57 ///
58 58 /// ```text
59 59 /// <path> \0 <hex_node_id> <flags> \n
60 60 /// ```
61 61 ///
62 62 /// The last entry is also terminated by a newline character.
63 63 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
64 64 bytes: Vec<u8>,
65 65 }
66 66
67 67 impl Manifest {
68 68 pub fn iter(
69 69 &self,
70 70 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
71 71 self.bytes
72 72 .split(|b| b == &b'\n')
73 73 .filter(|line| !line.is_empty())
74 74 .map(ManifestEntry::from_raw)
75 75 }
76 76
77 77 /// If the given path is in this manifest, return its filelog node ID
78 78 pub fn find_by_path(
79 79 &self,
80 80 path: &HgPath,
81 81 ) -> Result<Option<ManifestEntry>, HgError> {
82 82 use std::cmp::Ordering::*;
83 83 let path = path.as_bytes();
84 84 // Both boundaries of this `&[u8]` slice are always at the boundary of
85 85 // an entry
86 86 let mut bytes = &*self.bytes;
87 87
88 88 // Binary search algorithm derived from `[T]::binary_search_by`
89 89 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
90 90 // except we don’t have a slice of entries. Instead we jump to the
91 91 // middle of the byte slice and look around for entry delimiters
92 92 // (newlines).
93 93 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
94 94 let (entry_path, rest) =
95 95 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
96 96 let cmp = entry_path.cmp(path);
97 97 if cmp == Less {
98 98 let after_newline = entry_range.end + 1;
99 99 bytes = &bytes[after_newline..];
100 100 } else if cmp == Greater {
101 101 bytes = &bytes[..entry_range.start];
102 102 } else {
103 103 return Ok(Some(ManifestEntry::from_path_and_rest(
104 104 entry_path, rest,
105 105 )));
106 106 }
107 107 }
108 108 Ok(None)
109 109 }
110 110
111 111 /// If there is at least one, return the byte range of an entry *excluding*
112 112 /// the final newline.
113 113 fn find_entry_near_middle_of(
114 114 bytes: &[u8],
115 115 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
116 116 let len = bytes.len();
117 117 if len > 0 {
118 118 let middle = bytes.len() / 2;
119 119 // Integer division rounds down, so `middle < len`.
120 120 let (before, after) = bytes.split_at(middle);
121 121 let is_newline = |&byte: &u8| byte == b'\n';
122 122 let entry_start = match before.iter().rposition(is_newline) {
123 123 Some(i) => i + 1,
124 124 None => 0, // We choose the first entry in `bytes`
125 125 };
126 126 let entry_end = match after.iter().position(is_newline) {
127 127 Some(i) => {
128 128 // No `+ 1` here to exclude this newline from the range
129 129 middle + i
130 130 }
131 131 None => {
132 132 // In a well-formed manifest:
133 133 //
134 134 // * Since `len > 0`, `bytes` contains at least one entry
135 135 // * Every entry ends with a newline
136 136 // * Since `middle < len`, `after` contains at least the
137 137 // newline at the end of the last entry of `bytes`.
138 138 //
139 139 // We didn’t find a newline, so this manifest is not
140 140 // well-formed.
141 141 return Err(HgError::corrupted(
142 142 "manifest entry without \\n delimiter",
143 143 ));
144 144 }
145 145 };
146 146 Ok(Some(entry_start..entry_end))
147 147 } else {
148 148 // len == 0
149 149 Ok(None)
150 150 }
151 151 }
152 152 }
153 153
154 154 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
155 155 #[derive(Debug)]
156 156 pub struct ManifestEntry<'manifest> {
157 157 pub path: &'manifest HgPath,
158 158 pub hex_node_id: &'manifest [u8],
159 159
160 160 /// `Some` values are b'x', b'l', or 't'
161 161 pub flags: Option<u8>,
162 162 }
163 163
164 164 impl<'a> ManifestEntry<'a> {
165 165 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
166 166 bytes.split_2(b'\0').ok_or_else(|| {
167 167 HgError::corrupted("manifest entry without \\0 delimiter")
168 168 })
169 169 }
170 170
171 171 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
172 172 let (hex_node_id, flags) = match rest.split_last() {
173 173 Some((&b'x', rest)) => (rest, Some(b'x')),
174 174 Some((&b'l', rest)) => (rest, Some(b'l')),
175 175 Some((&b't', rest)) => (rest, Some(b't')),
176 176 _ => (rest, None),
177 177 };
178 178 Self {
179 179 path: HgPath::new(path),
180 180 hex_node_id,
181 181 flags,
182 182 }
183 183 }
184 184
185 185 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
186 186 let (path, rest) = Self::split_path(bytes)?;
187 187 Ok(Self::from_path_and_rest(path, rest))
188 188 }
189 189
190 190 pub fn node_id(&self) -> Result<Node, HgError> {
191 191 Node::from_hex_for_repo(self.hex_node_id)
192 192 }
193 193 }
@@ -1,421 +1,431
1 1 use std::borrow::Cow;
2 2 use std::io::Read;
3 3 use std::ops::Deref;
4 4 use std::path::Path;
5 5
6 6 use flate2::read::ZlibDecoder;
7 7 use micro_timer::timed;
8 8 use sha1::{Digest, Sha1};
9 9 use zstd;
10 10
11 11 use super::index::Index;
12 12 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
13 13 use super::nodemap;
14 14 use super::nodemap::{NodeMap, NodeMapError};
15 15 use super::nodemap_docket::NodeMapDocket;
16 16 use super::patch;
17 17 use crate::errors::HgError;
18 18 use crate::repo::Repo;
19 19 use crate::revlog::Revision;
20 20 use crate::{Node, NULL_REVISION};
21 21
22 22 #[derive(derive_more::From)]
23 23 pub enum RevlogError {
24 24 InvalidRevision,
25 25 /// Working directory is not supported
26 26 WDirUnsupported,
27 27 /// Found more than one entry whose ID match the requested prefix
28 28 AmbiguousPrefix,
29 29 #[from]
30 30 Other(HgError),
31 31 }
32 32
33 33 impl From<NodeMapError> for RevlogError {
34 34 fn from(error: NodeMapError) -> Self {
35 35 match error {
36 36 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
37 37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
38 38 }
39 39 }
40 40 }
41 41
42 42 impl RevlogError {
43 43 fn corrupted() -> Self {
44 44 RevlogError::Other(HgError::corrupted("corrupted revlog"))
45 45 }
46 46 }
47 47
48 48 /// Read only implementation of revlog.
49 49 pub struct Revlog {
50 50 /// When index and data are not interleaved: bytes of the revlog index.
51 51 /// When index and data are interleaved: bytes of the revlog index and
52 52 /// data.
53 53 index: Index,
54 54 /// When index and data are not interleaved: bytes of the revlog data
55 55 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
56 56 /// When present on disk: the persistent nodemap for this revlog
57 57 nodemap: Option<nodemap::NodeTree>,
58 58 }
59 59
60 60 impl Revlog {
61 61 /// Open a revlog index file.
62 62 ///
63 63 /// It will also open the associated data file if index and data are not
64 64 /// interleaved.
65 65 #[timed]
66 66 pub fn open(
67 67 repo: &Repo,
68 68 index_path: impl AsRef<Path>,
69 69 data_path: Option<&Path>,
70 70 ) -> Result<Self, HgError> {
71 71 let index_path = index_path.as_ref();
72 72 let index = {
73 73 match repo.store_vfs().mmap_open_opt(&index_path)? {
74 74 None => Index::new(Box::new(vec![])),
75 75 Some(index_mmap) => {
76 76 let index = Index::new(Box::new(index_mmap))?;
77 77 Ok(index)
78 78 }
79 79 }
80 80 }?;
81 81
82 82 let default_data_path = index_path.with_extension("d");
83 83
84 84 // type annotation required
85 85 // won't recognize Mmap as Deref<Target = [u8]>
86 86 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
87 87 if index.is_inline() {
88 88 None
89 89 } else {
90 90 let data_path = data_path.unwrap_or(&default_data_path);
91 91 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
92 92 Some(Box::new(data_mmap))
93 93 };
94 94
95 95 let nodemap = if index.is_inline() {
96 96 None
97 97 } else {
98 98 NodeMapDocket::read_from_file(repo, index_path)?.map(
99 99 |(docket, data)| {
100 100 nodemap::NodeTree::load_bytes(
101 101 Box::new(data),
102 102 docket.data_length,
103 103 )
104 104 },
105 105 )
106 106 };
107 107
108 108 Ok(Revlog {
109 109 index,
110 110 data_bytes,
111 111 nodemap,
112 112 })
113 113 }
114 114
115 115 /// Return number of entries of the `Revlog`.
116 116 pub fn len(&self) -> usize {
117 117 self.index.len()
118 118 }
119 119
120 120 /// Returns `true` if the `Revlog` has zero `entries`.
121 121 pub fn is_empty(&self) -> bool {
122 122 self.index.is_empty()
123 123 }
124 124
125 125 /// Returns the node ID for the given revision number, if it exists in this
126 126 /// revlog
127 127 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
128 128 if rev == NULL_REVISION {
129 129 return Some(&NULL_NODE);
130 130 }
131 131 Some(self.index.get_entry(rev)?.hash())
132 132 }
133 133
134 134 /// Return the revision number for the given node ID, if it exists in this
135 135 /// revlog
136 136 #[timed]
137 137 pub fn rev_from_node(
138 138 &self,
139 139 node: NodePrefix,
140 140 ) -> Result<Revision, RevlogError> {
141 141 if node.is_prefix_of(&NULL_NODE) {
142 142 return Ok(NULL_REVISION);
143 143 }
144 144
145 145 if let Some(nodemap) = &self.nodemap {
146 146 return nodemap
147 147 .find_bin(&self.index, node)?
148 148 .ok_or(RevlogError::InvalidRevision);
149 149 }
150 150
151 151 // Fallback to linear scan when a persistent nodemap is not present.
152 152 // This happens when the persistent-nodemap experimental feature is not
153 153 // enabled, or for small revlogs.
154 154 //
155 155 // TODO: consider building a non-persistent nodemap in memory to
156 156 // optimize these cases.
157 157 let mut found_by_prefix = None;
158 158 for rev in (0..self.len() as Revision).rev() {
159 159 let index_entry =
160 160 self.index.get_entry(rev).ok_or(HgError::corrupted(
161 161 "revlog references a revision not in the index",
162 162 ))?;
163 163 if node == *index_entry.hash() {
164 164 return Ok(rev);
165 165 }
166 166 if node.is_prefix_of(index_entry.hash()) {
167 167 if found_by_prefix.is_some() {
168 168 return Err(RevlogError::AmbiguousPrefix);
169 169 }
170 170 found_by_prefix = Some(rev)
171 171 }
172 172 }
173 173 found_by_prefix.ok_or(RevlogError::InvalidRevision)
174 174 }
175 175
176 176 /// Returns whether the given revision exists in this revlog.
177 177 pub fn has_rev(&self, rev: Revision) -> bool {
178 178 self.index.get_entry(rev).is_some()
179 179 }
180 180
181 181 /// Return the full data associated to a revision.
182 182 ///
183 183 /// All entries required to build the final data out of deltas will be
184 184 /// retrieved as needed, and the deltas will be applied to the inital
185 185 /// snapshot to rebuild the final data.
186 186 #[timed]
187 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
187 pub fn get_rev_data(
188 &self,
189 rev: Revision,
190 ) -> Result<Cow<[u8]>, RevlogError> {
188 191 if rev == NULL_REVISION {
189 return Ok(vec![]);
192 return Ok(Cow::Borrowed(&[]));
190 193 };
191 // Todo return -> Cow
192 let mut entry = self.get_entry(rev)?;
193 let mut delta_chain = vec![];
194
195 // The meaning of `base_rev_or_base_of_delta_chain` depends on
196 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
197 // `mercurial/revlogutils/constants.py` and the code in
198 // [_chaininfo] and in [index_deltachain].
199 let uses_generaldelta = self.index.uses_generaldelta();
200 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
201 let base_rev = if uses_generaldelta {
202 base_rev
203 } else {
204 entry.rev - 1
205 };
206 delta_chain.push(entry);
207 entry = self.get_entry_internal(base_rev)?;
208 }
209
210 // TODO do not look twice in the index
211 let index_entry = self
212 .index
213 .get_entry(rev)
214 .ok_or(RevlogError::InvalidRevision)?;
215
216 let data: Vec<u8> = if delta_chain.is_empty() {
217 entry.data_chunk()?.into()
218 } else {
219 Revlog::build_data_from_deltas(entry, &delta_chain)?
220 };
221
222 if self.check_hash(
223 index_entry.p1(),
224 index_entry.p2(),
225 index_entry.hash().as_bytes(),
226 &data,
227 ) {
228 Ok(data)
229 } else {
230 Err(RevlogError::corrupted())
231 }
194 self.get_entry(rev)?.data()
232 195 }
233 196
234 197 /// Check the hash of some given data against the recorded hash.
235 198 pub fn check_hash(
236 199 &self,
237 200 p1: Revision,
238 201 p2: Revision,
239 202 expected: &[u8],
240 203 data: &[u8],
241 204 ) -> bool {
242 205 let e1 = self.index.get_entry(p1);
243 206 let h1 = match e1 {
244 207 Some(ref entry) => entry.hash(),
245 208 None => &NULL_NODE,
246 209 };
247 210 let e2 = self.index.get_entry(p2);
248 211 let h2 = match e2 {
249 212 Some(ref entry) => entry.hash(),
250 213 None => &NULL_NODE,
251 214 };
252 215
253 216 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
254 217 }
255 218
256 219 /// Build the full data of a revision out its snapshot
257 220 /// and its deltas.
258 221 #[timed]
259 222 fn build_data_from_deltas(
260 223 snapshot: RevlogEntry,
261 224 deltas: &[RevlogEntry],
262 225 ) -> Result<Vec<u8>, RevlogError> {
263 226 let snapshot = snapshot.data_chunk()?;
264 227 let deltas = deltas
265 228 .iter()
266 229 .rev()
267 230 .map(RevlogEntry::data_chunk)
268 231 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
269 232 let patches: Vec<_> =
270 233 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
271 234 let patch = patch::fold_patch_lists(&patches);
272 235 Ok(patch.apply(&snapshot))
273 236 }
274 237
275 238 /// Return the revlog data.
276 239 fn data(&self) -> &[u8] {
277 240 match self.data_bytes {
278 241 Some(ref data_bytes) => &data_bytes,
279 242 None => panic!(
280 243 "forgot to load the data or trying to access inline data"
281 244 ),
282 245 }
283 246 }
284 247
285 248 /// Get an entry of the revlog.
286 249 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
287 250 let index_entry = self
288 251 .index
289 252 .get_entry(rev)
290 253 .ok_or(RevlogError::InvalidRevision)?;
291 254 let start = index_entry.offset();
292 255 let end = start + index_entry.compressed_len();
293 256 let data = if self.index.is_inline() {
294 257 self.index.data(start, end)
295 258 } else {
296 259 &self.data()[start..end]
297 260 };
298 261 let entry = RevlogEntry {
262 revlog: self,
299 263 rev,
300 264 bytes: data,
301 265 compressed_len: index_entry.compressed_len(),
302 266 uncompressed_len: index_entry.uncompressed_len(),
303 267 base_rev_or_base_of_delta_chain: if index_entry
304 268 .base_revision_or_base_of_delta_chain()
305 269 == rev
306 270 {
307 271 None
308 272 } else {
309 273 Some(index_entry.base_revision_or_base_of_delta_chain())
310 274 },
311 275 };
312 276 Ok(entry)
313 277 }
314 278
315 279 /// when resolving internal references within revlog, any errors
316 280 /// should be reported as corruption, instead of e.g. "invalid revision"
317 281 fn get_entry_internal(
318 282 &self,
319 283 rev: Revision,
320 284 ) -> Result<RevlogEntry, RevlogError> {
321 285 return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
322 286 }
323 287 }
324 288
325 289 /// The revlog entry's bytes and the necessary informations to extract
326 290 /// the entry's data.
327 #[derive(Debug)]
291 #[derive(Clone)]
328 292 pub struct RevlogEntry<'a> {
293 revlog: &'a Revlog,
329 294 rev: Revision,
330 295 bytes: &'a [u8],
331 296 compressed_len: usize,
332 297 uncompressed_len: usize,
333 298 base_rev_or_base_of_delta_chain: Option<Revision>,
334 299 }
335 300
336 301 impl<'a> RevlogEntry<'a> {
337 302 pub fn revision(&self) -> Revision {
338 303 self.rev
339 304 }
340 305
306 /// The data for this entry, after resolving deltas if any.
307 pub fn data(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
308 let mut entry = self.clone();
309 let mut delta_chain = vec![];
310
311 // The meaning of `base_rev_or_base_of_delta_chain` depends on
312 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
313 // `mercurial/revlogutils/constants.py` and the code in
314 // [_chaininfo] and in [index_deltachain].
315 let uses_generaldelta = self.revlog.index.uses_generaldelta();
316 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
317 let base_rev = if uses_generaldelta {
318 base_rev
319 } else {
320 entry.rev - 1
321 };
322 delta_chain.push(entry);
323 entry = self.revlog.get_entry_internal(base_rev)?;
324 }
325
326 // TODO do not look twice in the index
327 let index_entry = self
328 .revlog
329 .index
330 .get_entry(self.rev)
331 .ok_or(RevlogError::InvalidRevision)?;
332
333 let data = if delta_chain.is_empty() {
334 entry.data_chunk()?
335 } else {
336 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
337 };
338
339 if self.revlog.check_hash(
340 index_entry.p1(),
341 index_entry.p2(),
342 index_entry.hash().as_bytes(),
343 &data,
344 ) {
345 Ok(data)
346 } else {
347 Err(RevlogError::corrupted())
348 }
349 }
350
341 351 /// Extract the data contained in the entry.
342 352 /// This may be a delta. (See `is_delta`.)
343 fn data_chunk(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
353 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
344 354 if self.bytes.is_empty() {
345 355 return Ok(Cow::Borrowed(&[]));
346 356 }
347 357 match self.bytes[0] {
348 358 // Revision data is the entirety of the entry, including this
349 359 // header.
350 360 b'\0' => Ok(Cow::Borrowed(self.bytes)),
351 361 // Raw revision data follows.
352 362 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
353 363 // zlib (RFC 1950) data.
354 364 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
355 365 // zstd data.
356 366 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
357 367 // A proper new format should have had a repo/store requirement.
358 368 _format_type => Err(RevlogError::corrupted()),
359 369 }
360 370 }
361 371
362 372 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
363 373 let mut decoder = ZlibDecoder::new(self.bytes);
364 374 if self.is_delta() {
365 375 let mut buf = Vec::with_capacity(self.compressed_len);
366 376 decoder
367 377 .read_to_end(&mut buf)
368 378 .map_err(|_| RevlogError::corrupted())?;
369 379 Ok(buf)
370 380 } else {
371 381 let mut buf = vec![0; self.uncompressed_len];
372 382 decoder
373 383 .read_exact(&mut buf)
374 384 .map_err(|_| RevlogError::corrupted())?;
375 385 Ok(buf)
376 386 }
377 387 }
378 388
379 389 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
380 390 if self.is_delta() {
381 391 let mut buf = Vec::with_capacity(self.compressed_len);
382 392 zstd::stream::copy_decode(self.bytes, &mut buf)
383 393 .map_err(|_| RevlogError::corrupted())?;
384 394 Ok(buf)
385 395 } else {
386 396 let mut buf = vec![0; self.uncompressed_len];
387 397 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
388 398 .map_err(|_| RevlogError::corrupted())?;
389 399 if len != self.uncompressed_len {
390 400 Err(RevlogError::corrupted())
391 401 } else {
392 402 Ok(buf)
393 403 }
394 404 }
395 405 }
396 406
397 407 /// Tell if the entry is a snapshot or a delta
398 408 /// (influences on decompression).
399 409 fn is_delta(&self) -> bool {
400 410 self.base_rev_or_base_of_delta_chain.is_some()
401 411 }
402 412 }
403 413
404 414 /// Calculate the hash of a revision given its data and its parents.
405 415 fn hash(
406 416 data: &[u8],
407 417 p1_hash: &[u8],
408 418 p2_hash: &[u8],
409 419 ) -> [u8; NODE_BYTES_LENGTH] {
410 420 let mut hasher = Sha1::new();
411 421 let (a, b) = (p1_hash, p2_hash);
412 422 if a > b {
413 423 hasher.update(b);
414 424 hasher.update(a);
415 425 } else {
416 426 hasher.update(a);
417 427 hasher.update(b);
418 428 }
419 429 hasher.update(data);
420 430 *hasher.finalize().as_ref()
421 431 }
General Comments 0
You need to be logged in to leave comments. Login now