##// END OF EJS Templates
rust-nodemap-docket: make unaware of `Repo`...
Martin von Zweigbergk -
r49977:704e993e default
parent child Browse files
Show More
@@ -1,110 +1,108 b''
1 use crate::errors::{HgError, HgResultExt};
1 use crate::errors::{HgError, HgResultExt};
2 use bytes_cast::{unaligned, BytesCast};
2 use bytes_cast::{unaligned, BytesCast};
3 use memmap2::Mmap;
3 use memmap2::Mmap;
4 use std::path::{Path, PathBuf};
4 use std::path::{Path, PathBuf};
5
5
6 use crate::repo::Repo;
7 use crate::utils::strip_suffix;
6 use crate::utils::strip_suffix;
7 use crate::vfs::Vfs;
8
8
9 const ONDISK_VERSION: u8 = 1;
9 const ONDISK_VERSION: u8 = 1;
10
10
11 pub(super) struct NodeMapDocket {
11 pub(super) struct NodeMapDocket {
12 pub data_length: usize,
12 pub data_length: usize,
13 // TODO: keep here more of the data from `parse()` when we need it
13 // TODO: keep here more of the data from `parse()` when we need it
14 }
14 }
15
15
16 #[derive(BytesCast)]
16 #[derive(BytesCast)]
17 #[repr(C)]
17 #[repr(C)]
18 struct DocketHeader {
18 struct DocketHeader {
19 uid_size: u8,
19 uid_size: u8,
20 _tip_rev: unaligned::U64Be,
20 _tip_rev: unaligned::U64Be,
21 data_length: unaligned::U64Be,
21 data_length: unaligned::U64Be,
22 _data_unused: unaligned::U64Be,
22 _data_unused: unaligned::U64Be,
23 tip_node_size: unaligned::U64Be,
23 tip_node_size: unaligned::U64Be,
24 }
24 }
25
25
26 impl NodeMapDocket {
26 impl NodeMapDocket {
27 /// Return `Ok(None)` when the caller should proceed without a persistent
27 /// Return `Ok(None)` when the caller should proceed without a persistent
28 /// nodemap:
28 /// nodemap:
29 ///
29 ///
30 /// * This revlog does not have a `.n` docket file (it is not generated for
30 /// * This revlog does not have a `.n` docket file (it is not generated for
31 /// small revlogs), or
31 /// small revlogs), or
32 /// * The docket has an unsupported version number (repositories created by
32 /// * The docket has an unsupported version number (repositories created by
33 /// later hg, maybe that should be a requirement instead?), or
33 /// later hg, maybe that should be a requirement instead?), or
34 /// * The docket file points to a missing (likely deleted) data file (this
34 /// * The docket file points to a missing (likely deleted) data file (this
35 /// can happen in a rare race condition).
35 /// can happen in a rare race condition).
36 pub fn read_from_file(
36 pub fn read_from_file(
37 repo: &Repo,
37 store_vfs: &Vfs,
38 index_path: &Path,
38 index_path: &Path,
39 ) -> Result<Option<(Self, Mmap)>, HgError> {
39 ) -> Result<Option<(Self, Mmap)>, HgError> {
40 let docket_path = index_path.with_extension("n");
40 let docket_path = index_path.with_extension("n");
41 let docket_bytes = if let Some(bytes) =
41 let docket_bytes = if let Some(bytes) =
42 repo.store_vfs().read(&docket_path).io_not_found_as_none()?
42 store_vfs.read(&docket_path).io_not_found_as_none()?
43 {
43 {
44 bytes
44 bytes
45 } else {
45 } else {
46 return Ok(None);
46 return Ok(None);
47 };
47 };
48
48
49 let input = if let Some((&ONDISK_VERSION, rest)) =
49 let input = if let Some((&ONDISK_VERSION, rest)) =
50 docket_bytes.split_first()
50 docket_bytes.split_first()
51 {
51 {
52 rest
52 rest
53 } else {
53 } else {
54 return Ok(None);
54 return Ok(None);
55 };
55 };
56
56
57 /// Treat any error as a parse error
57 /// Treat any error as a parse error
58 fn parse<T, E>(result: Result<T, E>) -> Result<T, HgError> {
58 fn parse<T, E>(result: Result<T, E>) -> Result<T, HgError> {
59 result
59 result
60 .map_err(|_| HgError::corrupted("nodemap docket parse error"))
60 .map_err(|_| HgError::corrupted("nodemap docket parse error"))
61 }
61 }
62
62
63 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
63 let (header, rest) = parse(DocketHeader::from_bytes(input))?;
64 let uid_size = header.uid_size as usize;
64 let uid_size = header.uid_size as usize;
65 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
65 // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
66 // systems?
66 // systems?
67 let tip_node_size = header.tip_node_size.get() as usize;
67 let tip_node_size = header.tip_node_size.get() as usize;
68 let data_length = header.data_length.get() as usize;
68 let data_length = header.data_length.get() as usize;
69 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
69 let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
70 let (_tip_node, _rest) =
70 let (_tip_node, _rest) =
71 parse(u8::slice_from_bytes(rest, tip_node_size))?;
71 parse(u8::slice_from_bytes(rest, tip_node_size))?;
72 let uid = parse(std::str::from_utf8(uid))?;
72 let uid = parse(std::str::from_utf8(uid))?;
73 let docket = NodeMapDocket { data_length };
73 let docket = NodeMapDocket { data_length };
74
74
75 let data_path = rawdata_path(&docket_path, uid);
75 let data_path = rawdata_path(&docket_path, uid);
76 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
76 // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
77 // config is false?
77 // config is false?
78 if let Some(mmap) = repo
78 if let Some(mmap) =
79 .store_vfs()
79 store_vfs.mmap_open(&data_path).io_not_found_as_none()?
80 .mmap_open(&data_path)
81 .io_not_found_as_none()?
82 {
80 {
83 if mmap.len() >= data_length {
81 if mmap.len() >= data_length {
84 Ok(Some((docket, mmap)))
82 Ok(Some((docket, mmap)))
85 } else {
83 } else {
86 Err(HgError::corrupted("persistent nodemap too short"))
84 Err(HgError::corrupted("persistent nodemap too short"))
87 }
85 }
88 } else {
86 } else {
89 // Even if .hg/requires opted in, some revlogs are deemed small
87 // Even if .hg/requires opted in, some revlogs are deemed small
90 // enough to not need a persistent nodemap.
88 // enough to not need a persistent nodemap.
91 Ok(None)
89 Ok(None)
92 }
90 }
93 }
91 }
94 }
92 }
95
93
96 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
94 fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
97 let docket_name = docket_path
95 let docket_name = docket_path
98 .file_name()
96 .file_name()
99 .expect("expected a base name")
97 .expect("expected a base name")
100 .to_str()
98 .to_str()
101 .expect("expected an ASCII file name in the store");
99 .expect("expected an ASCII file name in the store");
102 let prefix = strip_suffix(docket_name, ".n.a")
100 let prefix = strip_suffix(docket_name, ".n.a")
103 .or_else(|| strip_suffix(docket_name, ".n"))
101 .or_else(|| strip_suffix(docket_name, ".n"))
104 .expect("expected docket path in .n or .n.a");
102 .expect("expected docket path in .n or .n.a");
105 let name = format!("{}-{}.nd", prefix, uid);
103 let name = format!("{}-{}.nd", prefix, uid);
106 docket_path
104 docket_path
107 .parent()
105 .parent()
108 .expect("expected a non-root path")
106 .expect("expected a non-root path")
109 .join(name)
107 .join(name)
110 }
108 }
@@ -1,510 +1,510 b''
1 use std::borrow::Cow;
1 use std::borrow::Cow;
2 use std::convert::TryFrom;
2 use std::convert::TryFrom;
3 use std::io::Read;
3 use std::io::Read;
4 use std::ops::Deref;
4 use std::ops::Deref;
5 use std::path::Path;
5 use std::path::Path;
6
6
7 use flate2::read::ZlibDecoder;
7 use flate2::read::ZlibDecoder;
8 use micro_timer::timed;
8 use micro_timer::timed;
9 use sha1::{Digest, Sha1};
9 use sha1::{Digest, Sha1};
10 use zstd;
10 use zstd;
11
11
12 use super::index::Index;
12 use super::index::Index;
13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
13 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
14 use super::nodemap;
14 use super::nodemap;
15 use super::nodemap::{NodeMap, NodeMapError};
15 use super::nodemap::{NodeMap, NodeMapError};
16 use super::nodemap_docket::NodeMapDocket;
16 use super::nodemap_docket::NodeMapDocket;
17 use super::patch;
17 use super::patch;
18 use crate::errors::HgError;
18 use crate::errors::HgError;
19 use crate::repo::Repo;
19 use crate::repo::Repo;
20 use crate::revlog::Revision;
20 use crate::revlog::Revision;
21 use crate::{requirements, Node, NULL_REVISION};
21 use crate::{requirements, Node, NULL_REVISION};
22
22
23 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
23 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
24 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
24 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
25 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
25 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
26 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
26 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
27
27
28 // Keep this in sync with REVIDX_KNOWN_FLAGS in
28 // Keep this in sync with REVIDX_KNOWN_FLAGS in
29 // mercurial/revlogutils/flagutil.py
29 // mercurial/revlogutils/flagutil.py
30 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
30 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
31 | REVISION_FLAG_ELLIPSIS
31 | REVISION_FLAG_ELLIPSIS
32 | REVISION_FLAG_EXTSTORED
32 | REVISION_FLAG_EXTSTORED
33 | REVISION_FLAG_HASCOPIESINFO;
33 | REVISION_FLAG_HASCOPIESINFO;
34
34
35 #[derive(derive_more::From)]
35 #[derive(derive_more::From)]
36 pub enum RevlogError {
36 pub enum RevlogError {
37 InvalidRevision,
37 InvalidRevision,
38 /// Working directory is not supported
38 /// Working directory is not supported
39 WDirUnsupported,
39 WDirUnsupported,
40 /// Found more than one entry whose ID match the requested prefix
40 /// Found more than one entry whose ID match the requested prefix
41 AmbiguousPrefix,
41 AmbiguousPrefix,
42 #[from]
42 #[from]
43 Other(HgError),
43 Other(HgError),
44 }
44 }
45
45
46 impl From<NodeMapError> for RevlogError {
46 impl From<NodeMapError> for RevlogError {
47 fn from(error: NodeMapError) -> Self {
47 fn from(error: NodeMapError) -> Self {
48 match error {
48 match error {
49 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
49 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
50 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
50 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
51 }
51 }
52 }
52 }
53 }
53 }
54
54
55 fn corrupted() -> HgError {
55 fn corrupted() -> HgError {
56 HgError::corrupted("corrupted revlog")
56 HgError::corrupted("corrupted revlog")
57 }
57 }
58
58
59 impl RevlogError {
59 impl RevlogError {
60 fn corrupted() -> Self {
60 fn corrupted() -> Self {
61 RevlogError::Other(corrupted())
61 RevlogError::Other(corrupted())
62 }
62 }
63 }
63 }
64
64
65 /// Read only implementation of revlog.
65 /// Read only implementation of revlog.
66 pub struct Revlog {
66 pub struct Revlog {
67 /// When index and data are not interleaved: bytes of the revlog index.
67 /// When index and data are not interleaved: bytes of the revlog index.
68 /// When index and data are interleaved: bytes of the revlog index and
68 /// When index and data are interleaved: bytes of the revlog index and
69 /// data.
69 /// data.
70 index: Index,
70 index: Index,
71 /// When index and data are not interleaved: bytes of the revlog data
71 /// When index and data are not interleaved: bytes of the revlog data
72 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
72 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
73 /// When present on disk: the persistent nodemap for this revlog
73 /// When present on disk: the persistent nodemap for this revlog
74 nodemap: Option<nodemap::NodeTree>,
74 nodemap: Option<nodemap::NodeTree>,
75 }
75 }
76
76
77 impl Revlog {
77 impl Revlog {
78 /// Open a revlog index file.
78 /// Open a revlog index file.
79 ///
79 ///
80 /// It will also open the associated data file if index and data are not
80 /// It will also open the associated data file if index and data are not
81 /// interleaved.
81 /// interleaved.
82 #[timed]
82 #[timed]
83 pub fn open(
83 pub fn open(
84 repo: &Repo,
84 repo: &Repo,
85 index_path: impl AsRef<Path>,
85 index_path: impl AsRef<Path>,
86 data_path: Option<&Path>,
86 data_path: Option<&Path>,
87 ) -> Result<Self, HgError> {
87 ) -> Result<Self, HgError> {
88 let index_path = index_path.as_ref();
88 let index_path = index_path.as_ref();
89 let index = {
89 let index = {
90 match repo.store_vfs().mmap_open_opt(&index_path)? {
90 match repo.store_vfs().mmap_open_opt(&index_path)? {
91 None => Index::new(Box::new(vec![])),
91 None => Index::new(Box::new(vec![])),
92 Some(index_mmap) => {
92 Some(index_mmap) => {
93 let index = Index::new(Box::new(index_mmap))?;
93 let index = Index::new(Box::new(index_mmap))?;
94 Ok(index)
94 Ok(index)
95 }
95 }
96 }
96 }
97 }?;
97 }?;
98
98
99 let default_data_path = index_path.with_extension("d");
99 let default_data_path = index_path.with_extension("d");
100
100
101 // type annotation required
101 // type annotation required
102 // won't recognize Mmap as Deref<Target = [u8]>
102 // won't recognize Mmap as Deref<Target = [u8]>
103 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
103 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
104 if index.is_inline() {
104 if index.is_inline() {
105 None
105 None
106 } else {
106 } else {
107 let data_path = data_path.unwrap_or(&default_data_path);
107 let data_path = data_path.unwrap_or(&default_data_path);
108 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
108 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
109 Some(Box::new(data_mmap))
109 Some(Box::new(data_mmap))
110 };
110 };
111
111
112 let nodemap = if index.is_inline() {
112 let nodemap = if index.is_inline() {
113 None
113 None
114 } else if !repo
114 } else if !repo
115 .requirements()
115 .requirements()
116 .contains(requirements::NODEMAP_REQUIREMENT)
116 .contains(requirements::NODEMAP_REQUIREMENT)
117 {
117 {
118 // If .hg/requires does not opt it, don’t try to open a nodemap
118 // If .hg/requires does not opt it, don’t try to open a nodemap
119 None
119 None
120 } else {
120 } else {
121 NodeMapDocket::read_from_file(repo, index_path)?.map(
121 NodeMapDocket::read_from_file(&repo.store_vfs(), index_path)?.map(
122 |(docket, data)| {
122 |(docket, data)| {
123 nodemap::NodeTree::load_bytes(
123 nodemap::NodeTree::load_bytes(
124 Box::new(data),
124 Box::new(data),
125 docket.data_length,
125 docket.data_length,
126 )
126 )
127 },
127 },
128 )
128 )
129 };
129 };
130
130
131 Ok(Revlog {
131 Ok(Revlog {
132 index,
132 index,
133 data_bytes,
133 data_bytes,
134 nodemap,
134 nodemap,
135 })
135 })
136 }
136 }
137
137
138 /// Return number of entries of the `Revlog`.
138 /// Return number of entries of the `Revlog`.
139 pub fn len(&self) -> usize {
139 pub fn len(&self) -> usize {
140 self.index.len()
140 self.index.len()
141 }
141 }
142
142
143 /// Returns `true` if the `Revlog` has zero `entries`.
143 /// Returns `true` if the `Revlog` has zero `entries`.
144 pub fn is_empty(&self) -> bool {
144 pub fn is_empty(&self) -> bool {
145 self.index.is_empty()
145 self.index.is_empty()
146 }
146 }
147
147
148 /// Returns the node ID for the given revision number, if it exists in this
148 /// Returns the node ID for the given revision number, if it exists in this
149 /// revlog
149 /// revlog
150 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
150 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
151 if rev == NULL_REVISION {
151 if rev == NULL_REVISION {
152 return Some(&NULL_NODE);
152 return Some(&NULL_NODE);
153 }
153 }
154 Some(self.index.get_entry(rev)?.hash())
154 Some(self.index.get_entry(rev)?.hash())
155 }
155 }
156
156
157 /// Return the revision number for the given node ID, if it exists in this
157 /// Return the revision number for the given node ID, if it exists in this
158 /// revlog
158 /// revlog
159 #[timed]
159 #[timed]
160 pub fn rev_from_node(
160 pub fn rev_from_node(
161 &self,
161 &self,
162 node: NodePrefix,
162 node: NodePrefix,
163 ) -> Result<Revision, RevlogError> {
163 ) -> Result<Revision, RevlogError> {
164 if node.is_prefix_of(&NULL_NODE) {
164 if node.is_prefix_of(&NULL_NODE) {
165 return Ok(NULL_REVISION);
165 return Ok(NULL_REVISION);
166 }
166 }
167
167
168 if let Some(nodemap) = &self.nodemap {
168 if let Some(nodemap) = &self.nodemap {
169 return nodemap
169 return nodemap
170 .find_bin(&self.index, node)?
170 .find_bin(&self.index, node)?
171 .ok_or(RevlogError::InvalidRevision);
171 .ok_or(RevlogError::InvalidRevision);
172 }
172 }
173
173
174 // Fallback to linear scan when a persistent nodemap is not present.
174 // Fallback to linear scan when a persistent nodemap is not present.
175 // This happens when the persistent-nodemap experimental feature is not
175 // This happens when the persistent-nodemap experimental feature is not
176 // enabled, or for small revlogs.
176 // enabled, or for small revlogs.
177 //
177 //
178 // TODO: consider building a non-persistent nodemap in memory to
178 // TODO: consider building a non-persistent nodemap in memory to
179 // optimize these cases.
179 // optimize these cases.
180 let mut found_by_prefix = None;
180 let mut found_by_prefix = None;
181 for rev in (0..self.len() as Revision).rev() {
181 for rev in (0..self.len() as Revision).rev() {
182 let index_entry =
182 let index_entry =
183 self.index.get_entry(rev).ok_or(HgError::corrupted(
183 self.index.get_entry(rev).ok_or(HgError::corrupted(
184 "revlog references a revision not in the index",
184 "revlog references a revision not in the index",
185 ))?;
185 ))?;
186 if node == *index_entry.hash() {
186 if node == *index_entry.hash() {
187 return Ok(rev);
187 return Ok(rev);
188 }
188 }
189 if node.is_prefix_of(index_entry.hash()) {
189 if node.is_prefix_of(index_entry.hash()) {
190 if found_by_prefix.is_some() {
190 if found_by_prefix.is_some() {
191 return Err(RevlogError::AmbiguousPrefix);
191 return Err(RevlogError::AmbiguousPrefix);
192 }
192 }
193 found_by_prefix = Some(rev)
193 found_by_prefix = Some(rev)
194 }
194 }
195 }
195 }
196 found_by_prefix.ok_or(RevlogError::InvalidRevision)
196 found_by_prefix.ok_or(RevlogError::InvalidRevision)
197 }
197 }
198
198
199 /// Returns whether the given revision exists in this revlog.
199 /// Returns whether the given revision exists in this revlog.
200 pub fn has_rev(&self, rev: Revision) -> bool {
200 pub fn has_rev(&self, rev: Revision) -> bool {
201 self.index.get_entry(rev).is_some()
201 self.index.get_entry(rev).is_some()
202 }
202 }
203
203
204 /// Return the full data associated to a revision.
204 /// Return the full data associated to a revision.
205 ///
205 ///
206 /// All entries required to build the final data out of deltas will be
206 /// All entries required to build the final data out of deltas will be
207 /// retrieved as needed, and the deltas will be applied to the inital
207 /// retrieved as needed, and the deltas will be applied to the inital
208 /// snapshot to rebuild the final data.
208 /// snapshot to rebuild the final data.
209 #[timed]
209 #[timed]
210 pub fn get_rev_data(
210 pub fn get_rev_data(
211 &self,
211 &self,
212 rev: Revision,
212 rev: Revision,
213 ) -> Result<Cow<[u8]>, RevlogError> {
213 ) -> Result<Cow<[u8]>, RevlogError> {
214 if rev == NULL_REVISION {
214 if rev == NULL_REVISION {
215 return Ok(Cow::Borrowed(&[]));
215 return Ok(Cow::Borrowed(&[]));
216 };
216 };
217 Ok(self.get_entry(rev)?.data()?)
217 Ok(self.get_entry(rev)?.data()?)
218 }
218 }
219
219
220 /// Check the hash of some given data against the recorded hash.
220 /// Check the hash of some given data against the recorded hash.
221 pub fn check_hash(
221 pub fn check_hash(
222 &self,
222 &self,
223 p1: Revision,
223 p1: Revision,
224 p2: Revision,
224 p2: Revision,
225 expected: &[u8],
225 expected: &[u8],
226 data: &[u8],
226 data: &[u8],
227 ) -> bool {
227 ) -> bool {
228 let e1 = self.index.get_entry(p1);
228 let e1 = self.index.get_entry(p1);
229 let h1 = match e1 {
229 let h1 = match e1 {
230 Some(ref entry) => entry.hash(),
230 Some(ref entry) => entry.hash(),
231 None => &NULL_NODE,
231 None => &NULL_NODE,
232 };
232 };
233 let e2 = self.index.get_entry(p2);
233 let e2 = self.index.get_entry(p2);
234 let h2 = match e2 {
234 let h2 = match e2 {
235 Some(ref entry) => entry.hash(),
235 Some(ref entry) => entry.hash(),
236 None => &NULL_NODE,
236 None => &NULL_NODE,
237 };
237 };
238
238
239 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
239 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
240 }
240 }
241
241
242 /// Build the full data of a revision out its snapshot
242 /// Build the full data of a revision out its snapshot
243 /// and its deltas.
243 /// and its deltas.
244 #[timed]
244 #[timed]
245 fn build_data_from_deltas(
245 fn build_data_from_deltas(
246 snapshot: RevlogEntry,
246 snapshot: RevlogEntry,
247 deltas: &[RevlogEntry],
247 deltas: &[RevlogEntry],
248 ) -> Result<Vec<u8>, HgError> {
248 ) -> Result<Vec<u8>, HgError> {
249 let snapshot = snapshot.data_chunk()?;
249 let snapshot = snapshot.data_chunk()?;
250 let deltas = deltas
250 let deltas = deltas
251 .iter()
251 .iter()
252 .rev()
252 .rev()
253 .map(RevlogEntry::data_chunk)
253 .map(RevlogEntry::data_chunk)
254 .collect::<Result<Vec<_>, _>>()?;
254 .collect::<Result<Vec<_>, _>>()?;
255 let patches: Vec<_> =
255 let patches: Vec<_> =
256 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
256 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
257 let patch = patch::fold_patch_lists(&patches);
257 let patch = patch::fold_patch_lists(&patches);
258 Ok(patch.apply(&snapshot))
258 Ok(patch.apply(&snapshot))
259 }
259 }
260
260
261 /// Return the revlog data.
261 /// Return the revlog data.
262 fn data(&self) -> &[u8] {
262 fn data(&self) -> &[u8] {
263 match self.data_bytes {
263 match self.data_bytes {
264 Some(ref data_bytes) => &data_bytes,
264 Some(ref data_bytes) => &data_bytes,
265 None => panic!(
265 None => panic!(
266 "forgot to load the data or trying to access inline data"
266 "forgot to load the data or trying to access inline data"
267 ),
267 ),
268 }
268 }
269 }
269 }
270
270
271 /// Get an entry of the revlog.
271 /// Get an entry of the revlog.
272 pub fn get_entry(
272 pub fn get_entry(
273 &self,
273 &self,
274 rev: Revision,
274 rev: Revision,
275 ) -> Result<RevlogEntry, RevlogError> {
275 ) -> Result<RevlogEntry, RevlogError> {
276 let index_entry = self
276 let index_entry = self
277 .index
277 .index
278 .get_entry(rev)
278 .get_entry(rev)
279 .ok_or(RevlogError::InvalidRevision)?;
279 .ok_or(RevlogError::InvalidRevision)?;
280 let start = index_entry.offset();
280 let start = index_entry.offset();
281 let end = start + index_entry.compressed_len() as usize;
281 let end = start + index_entry.compressed_len() as usize;
282 let data = if self.index.is_inline() {
282 let data = if self.index.is_inline() {
283 self.index.data(start, end)
283 self.index.data(start, end)
284 } else {
284 } else {
285 &self.data()[start..end]
285 &self.data()[start..end]
286 };
286 };
287 let entry = RevlogEntry {
287 let entry = RevlogEntry {
288 revlog: self,
288 revlog: self,
289 rev,
289 rev,
290 bytes: data,
290 bytes: data,
291 compressed_len: index_entry.compressed_len(),
291 compressed_len: index_entry.compressed_len(),
292 uncompressed_len: index_entry.uncompressed_len(),
292 uncompressed_len: index_entry.uncompressed_len(),
293 base_rev_or_base_of_delta_chain: if index_entry
293 base_rev_or_base_of_delta_chain: if index_entry
294 .base_revision_or_base_of_delta_chain()
294 .base_revision_or_base_of_delta_chain()
295 == rev
295 == rev
296 {
296 {
297 None
297 None
298 } else {
298 } else {
299 Some(index_entry.base_revision_or_base_of_delta_chain())
299 Some(index_entry.base_revision_or_base_of_delta_chain())
300 },
300 },
301 p1: index_entry.p1(),
301 p1: index_entry.p1(),
302 p2: index_entry.p2(),
302 p2: index_entry.p2(),
303 flags: index_entry.flags(),
303 flags: index_entry.flags(),
304 hash: *index_entry.hash(),
304 hash: *index_entry.hash(),
305 };
305 };
306 Ok(entry)
306 Ok(entry)
307 }
307 }
308
308
309 /// when resolving internal references within revlog, any errors
309 /// when resolving internal references within revlog, any errors
310 /// should be reported as corruption, instead of e.g. "invalid revision"
310 /// should be reported as corruption, instead of e.g. "invalid revision"
311 fn get_entry_internal(
311 fn get_entry_internal(
312 &self,
312 &self,
313 rev: Revision,
313 rev: Revision,
314 ) -> Result<RevlogEntry, HgError> {
314 ) -> Result<RevlogEntry, HgError> {
315 return self.get_entry(rev).map_err(|_| corrupted());
315 return self.get_entry(rev).map_err(|_| corrupted());
316 }
316 }
317 }
317 }
318
318
319 /// The revlog entry's bytes and the necessary informations to extract
319 /// The revlog entry's bytes and the necessary informations to extract
320 /// the entry's data.
320 /// the entry's data.
321 #[derive(Clone)]
321 #[derive(Clone)]
322 pub struct RevlogEntry<'a> {
322 pub struct RevlogEntry<'a> {
323 revlog: &'a Revlog,
323 revlog: &'a Revlog,
324 rev: Revision,
324 rev: Revision,
325 bytes: &'a [u8],
325 bytes: &'a [u8],
326 compressed_len: u32,
326 compressed_len: u32,
327 uncompressed_len: i32,
327 uncompressed_len: i32,
328 base_rev_or_base_of_delta_chain: Option<Revision>,
328 base_rev_or_base_of_delta_chain: Option<Revision>,
329 p1: Revision,
329 p1: Revision,
330 p2: Revision,
330 p2: Revision,
331 flags: u16,
331 flags: u16,
332 hash: Node,
332 hash: Node,
333 }
333 }
334
334
335 impl<'a> RevlogEntry<'a> {
335 impl<'a> RevlogEntry<'a> {
336 pub fn revision(&self) -> Revision {
336 pub fn revision(&self) -> Revision {
337 self.rev
337 self.rev
338 }
338 }
339
339
340 pub fn node(&self) -> &Node {
340 pub fn node(&self) -> &Node {
341 &self.hash
341 &self.hash
342 }
342 }
343
343
344 pub fn uncompressed_len(&self) -> Option<u32> {
344 pub fn uncompressed_len(&self) -> Option<u32> {
345 u32::try_from(self.uncompressed_len).ok()
345 u32::try_from(self.uncompressed_len).ok()
346 }
346 }
347
347
348 pub fn has_p1(&self) -> bool {
348 pub fn has_p1(&self) -> bool {
349 self.p1 != NULL_REVISION
349 self.p1 != NULL_REVISION
350 }
350 }
351
351
352 pub fn p1_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
352 pub fn p1_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
353 if self.p1 == NULL_REVISION {
353 if self.p1 == NULL_REVISION {
354 Ok(None)
354 Ok(None)
355 } else {
355 } else {
356 Ok(Some(self.revlog.get_entry(self.p1)?))
356 Ok(Some(self.revlog.get_entry(self.p1)?))
357 }
357 }
358 }
358 }
359
359
360 pub fn p2_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
360 pub fn p2_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
361 if self.p2 == NULL_REVISION {
361 if self.p2 == NULL_REVISION {
362 Ok(None)
362 Ok(None)
363 } else {
363 } else {
364 Ok(Some(self.revlog.get_entry(self.p2)?))
364 Ok(Some(self.revlog.get_entry(self.p2)?))
365 }
365 }
366 }
366 }
367
367
368 pub fn p1(&self) -> Option<Revision> {
368 pub fn p1(&self) -> Option<Revision> {
369 if self.p1 == NULL_REVISION {
369 if self.p1 == NULL_REVISION {
370 None
370 None
371 } else {
371 } else {
372 Some(self.p1)
372 Some(self.p1)
373 }
373 }
374 }
374 }
375
375
376 pub fn p2(&self) -> Option<Revision> {
376 pub fn p2(&self) -> Option<Revision> {
377 if self.p2 == NULL_REVISION {
377 if self.p2 == NULL_REVISION {
378 None
378 None
379 } else {
379 } else {
380 Some(self.p2)
380 Some(self.p2)
381 }
381 }
382 }
382 }
383
383
384 pub fn is_cencored(&self) -> bool {
384 pub fn is_cencored(&self) -> bool {
385 (self.flags & REVISION_FLAG_CENSORED) != 0
385 (self.flags & REVISION_FLAG_CENSORED) != 0
386 }
386 }
387
387
388 pub fn has_length_affecting_flag_processor(&self) -> bool {
388 pub fn has_length_affecting_flag_processor(&self) -> bool {
389 // Relevant Python code: revlog.size()
389 // Relevant Python code: revlog.size()
390 // note: ELLIPSIS is known to not change the content
390 // note: ELLIPSIS is known to not change the content
391 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
391 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
392 }
392 }
393
393
394 /// The data for this entry, after resolving deltas if any.
394 /// The data for this entry, after resolving deltas if any.
395 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
395 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
396 let mut entry = self.clone();
396 let mut entry = self.clone();
397 let mut delta_chain = vec![];
397 let mut delta_chain = vec![];
398
398
399 // The meaning of `base_rev_or_base_of_delta_chain` depends on
399 // The meaning of `base_rev_or_base_of_delta_chain` depends on
400 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
400 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
401 // `mercurial/revlogutils/constants.py` and the code in
401 // `mercurial/revlogutils/constants.py` and the code in
402 // [_chaininfo] and in [index_deltachain].
402 // [_chaininfo] and in [index_deltachain].
403 let uses_generaldelta = self.revlog.index.uses_generaldelta();
403 let uses_generaldelta = self.revlog.index.uses_generaldelta();
404 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
404 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
405 let base_rev = if uses_generaldelta {
405 let base_rev = if uses_generaldelta {
406 base_rev
406 base_rev
407 } else {
407 } else {
408 entry.rev - 1
408 entry.rev - 1
409 };
409 };
410 delta_chain.push(entry);
410 delta_chain.push(entry);
411 entry = self.revlog.get_entry_internal(base_rev)?;
411 entry = self.revlog.get_entry_internal(base_rev)?;
412 }
412 }
413
413
414 let data = if delta_chain.is_empty() {
414 let data = if delta_chain.is_empty() {
415 entry.data_chunk()?
415 entry.data_chunk()?
416 } else {
416 } else {
417 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
417 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
418 };
418 };
419
419
420 if self.revlog.check_hash(
420 if self.revlog.check_hash(
421 self.p1,
421 self.p1,
422 self.p2,
422 self.p2,
423 self.hash.as_bytes(),
423 self.hash.as_bytes(),
424 &data,
424 &data,
425 ) {
425 ) {
426 Ok(data)
426 Ok(data)
427 } else {
427 } else {
428 Err(corrupted())
428 Err(corrupted())
429 }
429 }
430 }
430 }
431
431
432 /// Extract the data contained in the entry.
432 /// Extract the data contained in the entry.
433 /// This may be a delta. (See `is_delta`.)
433 /// This may be a delta. (See `is_delta`.)
434 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
434 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
435 if self.bytes.is_empty() {
435 if self.bytes.is_empty() {
436 return Ok(Cow::Borrowed(&[]));
436 return Ok(Cow::Borrowed(&[]));
437 }
437 }
438 match self.bytes[0] {
438 match self.bytes[0] {
439 // Revision data is the entirety of the entry, including this
439 // Revision data is the entirety of the entry, including this
440 // header.
440 // header.
441 b'\0' => Ok(Cow::Borrowed(self.bytes)),
441 b'\0' => Ok(Cow::Borrowed(self.bytes)),
442 // Raw revision data follows.
442 // Raw revision data follows.
443 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
443 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
444 // zlib (RFC 1950) data.
444 // zlib (RFC 1950) data.
445 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
445 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
446 // zstd data.
446 // zstd data.
447 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
447 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
448 // A proper new format should have had a repo/store requirement.
448 // A proper new format should have had a repo/store requirement.
449 _format_type => Err(corrupted()),
449 _format_type => Err(corrupted()),
450 }
450 }
451 }
451 }
452
452
453 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
453 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
454 let mut decoder = ZlibDecoder::new(self.bytes);
454 let mut decoder = ZlibDecoder::new(self.bytes);
455 if self.is_delta() {
455 if self.is_delta() {
456 let mut buf = Vec::with_capacity(self.compressed_len as usize);
456 let mut buf = Vec::with_capacity(self.compressed_len as usize);
457 decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
457 decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
458 Ok(buf)
458 Ok(buf)
459 } else {
459 } else {
460 let cap = self.uncompressed_len.max(0) as usize;
460 let cap = self.uncompressed_len.max(0) as usize;
461 let mut buf = vec![0; cap];
461 let mut buf = vec![0; cap];
462 decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
462 decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
463 Ok(buf)
463 Ok(buf)
464 }
464 }
465 }
465 }
466
466
467 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
467 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
468 if self.is_delta() {
468 if self.is_delta() {
469 let mut buf = Vec::with_capacity(self.compressed_len as usize);
469 let mut buf = Vec::with_capacity(self.compressed_len as usize);
470 zstd::stream::copy_decode(self.bytes, &mut buf)
470 zstd::stream::copy_decode(self.bytes, &mut buf)
471 .map_err(|_| corrupted())?;
471 .map_err(|_| corrupted())?;
472 Ok(buf)
472 Ok(buf)
473 } else {
473 } else {
474 let cap = self.uncompressed_len.max(0) as usize;
474 let cap = self.uncompressed_len.max(0) as usize;
475 let mut buf = vec![0; cap];
475 let mut buf = vec![0; cap];
476 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
476 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
477 .map_err(|_| corrupted())?;
477 .map_err(|_| corrupted())?;
478 if len != self.uncompressed_len as usize {
478 if len != self.uncompressed_len as usize {
479 Err(corrupted())
479 Err(corrupted())
480 } else {
480 } else {
481 Ok(buf)
481 Ok(buf)
482 }
482 }
483 }
483 }
484 }
484 }
485
485
486 /// Tell if the entry is a snapshot or a delta
486 /// Tell if the entry is a snapshot or a delta
487 /// (influences on decompression).
487 /// (influences on decompression).
488 fn is_delta(&self) -> bool {
488 fn is_delta(&self) -> bool {
489 self.base_rev_or_base_of_delta_chain.is_some()
489 self.base_rev_or_base_of_delta_chain.is_some()
490 }
490 }
491 }
491 }
492
492
493 /// Calculate the hash of a revision given its data and its parents.
493 /// Calculate the hash of a revision given its data and its parents.
494 fn hash(
494 fn hash(
495 data: &[u8],
495 data: &[u8],
496 p1_hash: &[u8],
496 p1_hash: &[u8],
497 p2_hash: &[u8],
497 p2_hash: &[u8],
498 ) -> [u8; NODE_BYTES_LENGTH] {
498 ) -> [u8; NODE_BYTES_LENGTH] {
499 let mut hasher = Sha1::new();
499 let mut hasher = Sha1::new();
500 let (a, b) = (p1_hash, p2_hash);
500 let (a, b) = (p1_hash, p2_hash);
501 if a > b {
501 if a > b {
502 hasher.update(b);
502 hasher.update(b);
503 hasher.update(a);
503 hasher.update(a);
504 } else {
504 } else {
505 hasher.update(a);
505 hasher.update(a);
506 hasher.update(b);
506 hasher.update(b);
507 }
507 }
508 hasher.update(data);
508 hasher.update(data);
509 *hasher.finalize().as_ref()
509 *hasher.finalize().as_ref()
510 }
510 }
General Comments 0
You need to be logged in to leave comments. Login now