##// END OF EJS Templates
rhg: Add RevlogEntry::data that does delta resolution...
Simon Sapin -
r49373:f2f57724 default
parent child Browse files
Show More
@@ -1,33 +1,33
1 // debugdata.rs
1 // debugdata.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::repo::Repo;
8 use crate::repo::Repo;
9 use crate::revlog::revlog::{Revlog, RevlogError};
9 use crate::revlog::revlog::{Revlog, RevlogError};
10
10
11 /// Kind of data to debug
11 /// Kind of data to debug
12 #[derive(Debug, Copy, Clone)]
12 #[derive(Debug, Copy, Clone)]
13 pub enum DebugDataKind {
13 pub enum DebugDataKind {
14 Changelog,
14 Changelog,
15 Manifest,
15 Manifest,
16 }
16 }
17
17
18 /// Dump the contents data of a revision.
18 /// Dump the contents data of a revision.
19 pub fn debug_data(
19 pub fn debug_data(
20 repo: &Repo,
20 repo: &Repo,
21 revset: &str,
21 revset: &str,
22 kind: DebugDataKind,
22 kind: DebugDataKind,
23 ) -> Result<Vec<u8>, RevlogError> {
23 ) -> Result<Vec<u8>, RevlogError> {
24 let index_file = match kind {
24 let index_file = match kind {
25 DebugDataKind::Changelog => "00changelog.i",
25 DebugDataKind::Changelog => "00changelog.i",
26 DebugDataKind::Manifest => "00manifest.i",
26 DebugDataKind::Manifest => "00manifest.i",
27 };
27 };
28 let revlog = Revlog::open(repo, index_file, None)?;
28 let revlog = Revlog::open(repo, index_file, None)?;
29 let rev =
29 let rev =
30 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
30 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
31 let data = revlog.get_rev_data(rev)?;
31 let data = revlog.get_rev_data(rev)?;
32 Ok(data)
32 Ok(data.into_owned())
33 }
33 }
@@ -1,67 +1,67
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::repo::Repo;
2 use crate::repo::Repo;
3 use crate::revlog::node::NULL_NODE;
3 use crate::revlog::node::NULL_NODE;
4 use crate::revlog::revlog::{Revlog, RevlogError};
4 use crate::revlog::revlog::{Revlog, RevlogError};
5 use crate::revlog::Revision;
5 use crate::revlog::Revision;
6 use crate::revlog::{Node, NodePrefix};
6 use crate::revlog::{Node, NodePrefix};
7
7
8 /// A specialized `Revlog` to work with `changelog` data format.
8 /// A specialized `Revlog` to work with `changelog` data format.
9 pub struct Changelog {
9 pub struct Changelog {
10 /// The generic `revlog` format.
10 /// The generic `revlog` format.
11 pub(crate) revlog: Revlog,
11 pub(crate) revlog: Revlog,
12 }
12 }
13
13
14 impl Changelog {
14 impl Changelog {
15 /// Open the `changelog` of a repository given by its root.
15 /// Open the `changelog` of a repository given by its root.
16 pub fn open(repo: &Repo) -> Result<Self, HgError> {
16 pub fn open(repo: &Repo) -> Result<Self, HgError> {
17 let revlog = Revlog::open(repo, "00changelog.i", None)?;
17 let revlog = Revlog::open(repo, "00changelog.i", None)?;
18 Ok(Self { revlog })
18 Ok(Self { revlog })
19 }
19 }
20
20
21 /// Return the `ChangelogEntry` for the given node ID.
21 /// Return the `ChangelogEntry` for the given node ID.
22 pub fn data_for_node(
22 pub fn data_for_node(
23 &self,
23 &self,
24 node: NodePrefix,
24 node: NodePrefix,
25 ) -> Result<ChangelogRevisionData, RevlogError> {
25 ) -> Result<ChangelogRevisionData, RevlogError> {
26 let rev = self.revlog.rev_from_node(node)?;
26 let rev = self.revlog.rev_from_node(node)?;
27 self.data_for_rev(rev)
27 self.data_for_rev(rev)
28 }
28 }
29
29
30 /// Return the `ChangelogEntry` of the given revision number.
30 /// Return the `ChangelogEntry` of the given revision number.
31 pub fn data_for_rev(
31 pub fn data_for_rev(
32 &self,
32 &self,
33 rev: Revision,
33 rev: Revision,
34 ) -> Result<ChangelogRevisionData, RevlogError> {
34 ) -> Result<ChangelogRevisionData, RevlogError> {
35 let bytes = self.revlog.get_rev_data(rev)?;
35 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
36 Ok(ChangelogRevisionData { bytes })
36 Ok(ChangelogRevisionData { bytes })
37 }
37 }
38
38
39 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
39 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
40 self.revlog.node_from_rev(rev)
40 self.revlog.node_from_rev(rev)
41 }
41 }
42 }
42 }
43
43
44 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
44 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
45 #[derive(Debug)]
45 #[derive(Debug)]
46 pub struct ChangelogRevisionData {
46 pub struct ChangelogRevisionData {
47 /// The data bytes of the `changelog` entry.
47 /// The data bytes of the `changelog` entry.
48 bytes: Vec<u8>,
48 bytes: Vec<u8>,
49 }
49 }
50
50
51 impl ChangelogRevisionData {
51 impl ChangelogRevisionData {
52 /// Return an iterator over the lines of the entry.
52 /// Return an iterator over the lines of the entry.
53 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
53 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
54 self.bytes
54 self.bytes
55 .split(|b| b == &b'\n')
55 .split(|b| b == &b'\n')
56 .filter(|line| !line.is_empty())
56 .filter(|line| !line.is_empty())
57 }
57 }
58
58
59 /// Return the node id of the `manifest` referenced by this `changelog`
59 /// Return the node id of the `manifest` referenced by this `changelog`
60 /// entry.
60 /// entry.
61 pub fn manifest_node(&self) -> Result<Node, HgError> {
61 pub fn manifest_node(&self) -> Result<Node, HgError> {
62 match self.lines().next() {
62 match self.lines().next() {
63 None => Ok(NULL_NODE),
63 None => Ok(NULL_NODE),
64 Some(x) => Node::from_hex_for_repo(x),
64 Some(x) => Node::from_hex_for_repo(x),
65 }
65 }
66 }
66 }
67 }
67 }
@@ -1,89 +1,89
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::repo::Repo;
2 use crate::repo::Repo;
3 use crate::revlog::path_encode::path_encode;
3 use crate::revlog::path_encode::path_encode;
4 use crate::revlog::revlog::{Revlog, RevlogError};
4 use crate::revlog::revlog::{Revlog, RevlogError};
5 use crate::revlog::NodePrefix;
5 use crate::revlog::NodePrefix;
6 use crate::revlog::Revision;
6 use crate::revlog::Revision;
7 use crate::utils::files::get_path_from_bytes;
7 use crate::utils::files::get_path_from_bytes;
8 use crate::utils::hg_path::HgPath;
8 use crate::utils::hg_path::HgPath;
9 use crate::utils::SliceExt;
9 use crate::utils::SliceExt;
10 use std::path::PathBuf;
10 use std::path::PathBuf;
11
11
12 /// A specialized `Revlog` to work with file data logs.
12 /// A specialized `Revlog` to work with file data logs.
13 pub struct Filelog {
13 pub struct Filelog {
14 /// The generic `revlog` format.
14 /// The generic `revlog` format.
15 revlog: Revlog,
15 revlog: Revlog,
16 }
16 }
17
17
18 impl Filelog {
18 impl Filelog {
19 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
19 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
20 let index_path = store_path(file_path, b".i");
20 let index_path = store_path(file_path, b".i");
21 let data_path = store_path(file_path, b".d");
21 let data_path = store_path(file_path, b".d");
22 let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
22 let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
23 Ok(Self { revlog })
23 Ok(Self { revlog })
24 }
24 }
25
25
26 /// The given node ID is that of the file as found in a manifest, not of a
26 /// The given node ID is that of the file as found in a manifest, not of a
27 /// changeset.
27 /// changeset.
28 pub fn data_for_node(
28 pub fn data_for_node(
29 &self,
29 &self,
30 file_node: impl Into<NodePrefix>,
30 file_node: impl Into<NodePrefix>,
31 ) -> Result<FilelogRevisionData, RevlogError> {
31 ) -> Result<FilelogRevisionData, RevlogError> {
32 let file_rev = self.revlog.rev_from_node(file_node.into())?;
32 let file_rev = self.revlog.rev_from_node(file_node.into())?;
33 self.data_for_rev(file_rev)
33 self.data_for_rev(file_rev)
34 }
34 }
35
35
36 /// The given revision is that of the file as found in a manifest, not of a
36 /// The given revision is that of the file as found in a manifest, not of a
37 /// changeset.
37 /// changeset.
38 pub fn data_for_rev(
38 pub fn data_for_rev(
39 &self,
39 &self,
40 file_rev: Revision,
40 file_rev: Revision,
41 ) -> Result<FilelogRevisionData, RevlogError> {
41 ) -> Result<FilelogRevisionData, RevlogError> {
42 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?;
42 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
43 Ok(FilelogRevisionData(data.into()))
43 Ok(FilelogRevisionData(data.into()))
44 }
44 }
45 }
45 }
46
46
47 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
47 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
48 let encoded_bytes =
48 let encoded_bytes =
49 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
49 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
50 get_path_from_bytes(&encoded_bytes).into()
50 get_path_from_bytes(&encoded_bytes).into()
51 }
51 }
52
52
53 /// The data for one revision in a filelog, uncompressed and delta-resolved.
53 /// The data for one revision in a filelog, uncompressed and delta-resolved.
54 pub struct FilelogRevisionData(Vec<u8>);
54 pub struct FilelogRevisionData(Vec<u8>);
55
55
56 impl FilelogRevisionData {
56 impl FilelogRevisionData {
57 /// Split into metadata and data
57 /// Split into metadata and data
58 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
58 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
59 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
59 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
60
60
61 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
61 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
62 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
62 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
63 Ok((Some(metadata), data))
63 Ok((Some(metadata), data))
64 } else {
64 } else {
65 Err(HgError::corrupted(
65 Err(HgError::corrupted(
66 "Missing metadata end delimiter in filelog entry",
66 "Missing metadata end delimiter in filelog entry",
67 ))
67 ))
68 }
68 }
69 } else {
69 } else {
70 Ok((None, &self.0))
70 Ok((None, &self.0))
71 }
71 }
72 }
72 }
73
73
74 /// Returns the file contents at this revision, stripped of any metadata
74 /// Returns the file contents at this revision, stripped of any metadata
75 pub fn file_data(&self) -> Result<&[u8], HgError> {
75 pub fn file_data(&self) -> Result<&[u8], HgError> {
76 let (_metadata, data) = self.split()?;
76 let (_metadata, data) = self.split()?;
77 Ok(data)
77 Ok(data)
78 }
78 }
79
79
80 /// Consume the entry, and convert it into data, discarding any metadata,
80 /// Consume the entry, and convert it into data, discarding any metadata,
81 /// if present.
81 /// if present.
82 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
82 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
83 if let (Some(_metadata), data) = self.split()? {
83 if let (Some(_metadata), data) = self.split()? {
84 Ok(data.to_owned())
84 Ok(data.to_owned())
85 } else {
85 } else {
86 Ok(self.0)
86 Ok(self.0)
87 }
87 }
88 }
88 }
89 }
89 }
@@ -1,193 +1,193
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::repo::Repo;
2 use crate::repo::Repo;
3 use crate::revlog::revlog::{Revlog, RevlogError};
3 use crate::revlog::revlog::{Revlog, RevlogError};
4 use crate::revlog::Revision;
4 use crate::revlog::Revision;
5 use crate::revlog::{Node, NodePrefix};
5 use crate::revlog::{Node, NodePrefix};
6 use crate::utils::hg_path::HgPath;
6 use crate::utils::hg_path::HgPath;
7 use crate::utils::SliceExt;
7 use crate::utils::SliceExt;
8
8
9 /// A specialized `Revlog` to work with `manifest` data format.
9 /// A specialized `Revlog` to work with `manifest` data format.
10 pub struct Manifestlog {
10 pub struct Manifestlog {
11 /// The generic `revlog` format.
11 /// The generic `revlog` format.
12 revlog: Revlog,
12 revlog: Revlog,
13 }
13 }
14
14
15 impl Manifestlog {
15 impl Manifestlog {
16 /// Open the `manifest` of a repository given by its root.
16 /// Open the `manifest` of a repository given by its root.
17 pub fn open(repo: &Repo) -> Result<Self, HgError> {
17 pub fn open(repo: &Repo) -> Result<Self, HgError> {
18 let revlog = Revlog::open(repo, "00manifest.i", None)?;
18 let revlog = Revlog::open(repo, "00manifest.i", None)?;
19 Ok(Self { revlog })
19 Ok(Self { revlog })
20 }
20 }
21
21
22 /// Return the `Manifest` for the given node ID.
22 /// Return the `Manifest` for the given node ID.
23 ///
23 ///
24 /// Note: this is a node ID in the manifestlog, typically found through
24 /// Note: this is a node ID in the manifestlog, typically found through
25 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
25 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
26 /// changeset.
26 /// changeset.
27 ///
27 ///
28 /// See also `Repo::manifest_for_node`
28 /// See also `Repo::manifest_for_node`
29 pub fn data_for_node(
29 pub fn data_for_node(
30 &self,
30 &self,
31 node: NodePrefix,
31 node: NodePrefix,
32 ) -> Result<Manifest, RevlogError> {
32 ) -> Result<Manifest, RevlogError> {
33 let rev = self.revlog.rev_from_node(node)?;
33 let rev = self.revlog.rev_from_node(node)?;
34 self.data_for_rev(rev)
34 self.data_for_rev(rev)
35 }
35 }
36
36
37 /// Return the `Manifest` of a given revision number.
37 /// Return the `Manifest` of a given revision number.
38 ///
38 ///
39 /// Note: this is a revision number in the manifestlog, *not* of any
39 /// Note: this is a revision number in the manifestlog, *not* of any
40 /// changeset.
40 /// changeset.
41 ///
41 ///
42 /// See also `Repo::manifest_for_rev`
42 /// See also `Repo::manifest_for_rev`
43 pub fn data_for_rev(
43 pub fn data_for_rev(
44 &self,
44 &self,
45 rev: Revision,
45 rev: Revision,
46 ) -> Result<Manifest, RevlogError> {
46 ) -> Result<Manifest, RevlogError> {
47 let bytes = self.revlog.get_rev_data(rev)?;
47 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
48 Ok(Manifest { bytes })
48 Ok(Manifest { bytes })
49 }
49 }
50 }
50 }
51
51
52 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
52 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
53 #[derive(Debug)]
53 #[derive(Debug)]
54 pub struct Manifest {
54 pub struct Manifest {
55 /// Format for a manifest: flat sequence of variable-size entries,
55 /// Format for a manifest: flat sequence of variable-size entries,
56 /// sorted by path, each as:
56 /// sorted by path, each as:
57 ///
57 ///
58 /// ```text
58 /// ```text
59 /// <path> \0 <hex_node_id> <flags> \n
59 /// <path> \0 <hex_node_id> <flags> \n
60 /// ```
60 /// ```
61 ///
61 ///
62 /// The last entry is also terminated by a newline character.
62 /// The last entry is also terminated by a newline character.
63 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
63 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
64 bytes: Vec<u8>,
64 bytes: Vec<u8>,
65 }
65 }
66
66
67 impl Manifest {
67 impl Manifest {
68 pub fn iter(
68 pub fn iter(
69 &self,
69 &self,
70 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
70 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
71 self.bytes
71 self.bytes
72 .split(|b| b == &b'\n')
72 .split(|b| b == &b'\n')
73 .filter(|line| !line.is_empty())
73 .filter(|line| !line.is_empty())
74 .map(ManifestEntry::from_raw)
74 .map(ManifestEntry::from_raw)
75 }
75 }
76
76
77 /// If the given path is in this manifest, return its filelog node ID
77 /// If the given path is in this manifest, return its filelog node ID
78 pub fn find_by_path(
78 pub fn find_by_path(
79 &self,
79 &self,
80 path: &HgPath,
80 path: &HgPath,
81 ) -> Result<Option<ManifestEntry>, HgError> {
81 ) -> Result<Option<ManifestEntry>, HgError> {
82 use std::cmp::Ordering::*;
82 use std::cmp::Ordering::*;
83 let path = path.as_bytes();
83 let path = path.as_bytes();
84 // Both boundaries of this `&[u8]` slice are always at the boundary of
84 // Both boundaries of this `&[u8]` slice are always at the boundary of
85 // an entry
85 // an entry
86 let mut bytes = &*self.bytes;
86 let mut bytes = &*self.bytes;
87
87
88 // Binary search algorithm derived from `[T]::binary_search_by`
88 // Binary search algorithm derived from `[T]::binary_search_by`
89 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
89 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
90 // except we don’t have a slice of entries. Instead we jump to the
90 // except we don’t have a slice of entries. Instead we jump to the
91 // middle of the byte slice and look around for entry delimiters
91 // middle of the byte slice and look around for entry delimiters
92 // (newlines).
92 // (newlines).
93 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
93 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
94 let (entry_path, rest) =
94 let (entry_path, rest) =
95 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
95 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
96 let cmp = entry_path.cmp(path);
96 let cmp = entry_path.cmp(path);
97 if cmp == Less {
97 if cmp == Less {
98 let after_newline = entry_range.end + 1;
98 let after_newline = entry_range.end + 1;
99 bytes = &bytes[after_newline..];
99 bytes = &bytes[after_newline..];
100 } else if cmp == Greater {
100 } else if cmp == Greater {
101 bytes = &bytes[..entry_range.start];
101 bytes = &bytes[..entry_range.start];
102 } else {
102 } else {
103 return Ok(Some(ManifestEntry::from_path_and_rest(
103 return Ok(Some(ManifestEntry::from_path_and_rest(
104 entry_path, rest,
104 entry_path, rest,
105 )));
105 )));
106 }
106 }
107 }
107 }
108 Ok(None)
108 Ok(None)
109 }
109 }
110
110
111 /// If there is at least one, return the byte range of an entry *excluding*
111 /// If there is at least one, return the byte range of an entry *excluding*
112 /// the final newline.
112 /// the final newline.
113 fn find_entry_near_middle_of(
113 fn find_entry_near_middle_of(
114 bytes: &[u8],
114 bytes: &[u8],
115 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
115 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
116 let len = bytes.len();
116 let len = bytes.len();
117 if len > 0 {
117 if len > 0 {
118 let middle = bytes.len() / 2;
118 let middle = bytes.len() / 2;
119 // Integer division rounds down, so `middle < len`.
119 // Integer division rounds down, so `middle < len`.
120 let (before, after) = bytes.split_at(middle);
120 let (before, after) = bytes.split_at(middle);
121 let is_newline = |&byte: &u8| byte == b'\n';
121 let is_newline = |&byte: &u8| byte == b'\n';
122 let entry_start = match before.iter().rposition(is_newline) {
122 let entry_start = match before.iter().rposition(is_newline) {
123 Some(i) => i + 1,
123 Some(i) => i + 1,
124 None => 0, // We choose the first entry in `bytes`
124 None => 0, // We choose the first entry in `bytes`
125 };
125 };
126 let entry_end = match after.iter().position(is_newline) {
126 let entry_end = match after.iter().position(is_newline) {
127 Some(i) => {
127 Some(i) => {
128 // No `+ 1` here to exclude this newline from the range
128 // No `+ 1` here to exclude this newline from the range
129 middle + i
129 middle + i
130 }
130 }
131 None => {
131 None => {
132 // In a well-formed manifest:
132 // In a well-formed manifest:
133 //
133 //
134 // * Since `len > 0`, `bytes` contains at least one entry
134 // * Since `len > 0`, `bytes` contains at least one entry
135 // * Every entry ends with a newline
135 // * Every entry ends with a newline
136 // * Since `middle < len`, `after` contains at least the
136 // * Since `middle < len`, `after` contains at least the
137 // newline at the end of the last entry of `bytes`.
137 // newline at the end of the last entry of `bytes`.
138 //
138 //
139 // We didn’t find a newline, so this manifest is not
139 // We didn’t find a newline, so this manifest is not
140 // well-formed.
140 // well-formed.
141 return Err(HgError::corrupted(
141 return Err(HgError::corrupted(
142 "manifest entry without \\n delimiter",
142 "manifest entry without \\n delimiter",
143 ));
143 ));
144 }
144 }
145 };
145 };
146 Ok(Some(entry_start..entry_end))
146 Ok(Some(entry_start..entry_end))
147 } else {
147 } else {
148 // len == 0
148 // len == 0
149 Ok(None)
149 Ok(None)
150 }
150 }
151 }
151 }
152 }
152 }
153
153
154 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
154 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
155 #[derive(Debug)]
155 #[derive(Debug)]
156 pub struct ManifestEntry<'manifest> {
156 pub struct ManifestEntry<'manifest> {
157 pub path: &'manifest HgPath,
157 pub path: &'manifest HgPath,
158 pub hex_node_id: &'manifest [u8],
158 pub hex_node_id: &'manifest [u8],
159
159
160 /// `Some` values are b'x', b'l', or 't'
160 /// `Some` values are b'x', b'l', or 't'
161 pub flags: Option<u8>,
161 pub flags: Option<u8>,
162 }
162 }
163
163
164 impl<'a> ManifestEntry<'a> {
164 impl<'a> ManifestEntry<'a> {
165 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
165 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
166 bytes.split_2(b'\0').ok_or_else(|| {
166 bytes.split_2(b'\0').ok_or_else(|| {
167 HgError::corrupted("manifest entry without \\0 delimiter")
167 HgError::corrupted("manifest entry without \\0 delimiter")
168 })
168 })
169 }
169 }
170
170
171 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
171 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
172 let (hex_node_id, flags) = match rest.split_last() {
172 let (hex_node_id, flags) = match rest.split_last() {
173 Some((&b'x', rest)) => (rest, Some(b'x')),
173 Some((&b'x', rest)) => (rest, Some(b'x')),
174 Some((&b'l', rest)) => (rest, Some(b'l')),
174 Some((&b'l', rest)) => (rest, Some(b'l')),
175 Some((&b't', rest)) => (rest, Some(b't')),
175 Some((&b't', rest)) => (rest, Some(b't')),
176 _ => (rest, None),
176 _ => (rest, None),
177 };
177 };
178 Self {
178 Self {
179 path: HgPath::new(path),
179 path: HgPath::new(path),
180 hex_node_id,
180 hex_node_id,
181 flags,
181 flags,
182 }
182 }
183 }
183 }
184
184
185 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
185 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
186 let (path, rest) = Self::split_path(bytes)?;
186 let (path, rest) = Self::split_path(bytes)?;
187 Ok(Self::from_path_and_rest(path, rest))
187 Ok(Self::from_path_and_rest(path, rest))
188 }
188 }
189
189
190 pub fn node_id(&self) -> Result<Node, HgError> {
190 pub fn node_id(&self) -> Result<Node, HgError> {
191 Node::from_hex_for_repo(self.hex_node_id)
191 Node::from_hex_for_repo(self.hex_node_id)
192 }
192 }
193 }
193 }
@@ -1,421 +1,431
1 use std::borrow::Cow;
1 use std::borrow::Cow;
2 use std::io::Read;
2 use std::io::Read;
3 use std::ops::Deref;
3 use std::ops::Deref;
4 use std::path::Path;
4 use std::path::Path;
5
5
6 use flate2::read::ZlibDecoder;
6 use flate2::read::ZlibDecoder;
7 use micro_timer::timed;
7 use micro_timer::timed;
8 use sha1::{Digest, Sha1};
8 use sha1::{Digest, Sha1};
9 use zstd;
9 use zstd;
10
10
11 use super::index::Index;
11 use super::index::Index;
12 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
12 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
13 use super::nodemap;
13 use super::nodemap;
14 use super::nodemap::{NodeMap, NodeMapError};
14 use super::nodemap::{NodeMap, NodeMapError};
15 use super::nodemap_docket::NodeMapDocket;
15 use super::nodemap_docket::NodeMapDocket;
16 use super::patch;
16 use super::patch;
17 use crate::errors::HgError;
17 use crate::errors::HgError;
18 use crate::repo::Repo;
18 use crate::repo::Repo;
19 use crate::revlog::Revision;
19 use crate::revlog::Revision;
20 use crate::{Node, NULL_REVISION};
20 use crate::{Node, NULL_REVISION};
21
21
22 #[derive(derive_more::From)]
22 #[derive(derive_more::From)]
23 pub enum RevlogError {
23 pub enum RevlogError {
24 InvalidRevision,
24 InvalidRevision,
25 /// Working directory is not supported
25 /// Working directory is not supported
26 WDirUnsupported,
26 WDirUnsupported,
27 /// Found more than one entry whose ID match the requested prefix
27 /// Found more than one entry whose ID match the requested prefix
28 AmbiguousPrefix,
28 AmbiguousPrefix,
29 #[from]
29 #[from]
30 Other(HgError),
30 Other(HgError),
31 }
31 }
32
32
33 impl From<NodeMapError> for RevlogError {
33 impl From<NodeMapError> for RevlogError {
34 fn from(error: NodeMapError) -> Self {
34 fn from(error: NodeMapError) -> Self {
35 match error {
35 match error {
36 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
36 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
38 }
38 }
39 }
39 }
40 }
40 }
41
41
42 impl RevlogError {
42 impl RevlogError {
43 fn corrupted() -> Self {
43 fn corrupted() -> Self {
44 RevlogError::Other(HgError::corrupted("corrupted revlog"))
44 RevlogError::Other(HgError::corrupted("corrupted revlog"))
45 }
45 }
46 }
46 }
47
47
48 /// Read only implementation of revlog.
48 /// Read only implementation of revlog.
49 pub struct Revlog {
49 pub struct Revlog {
50 /// When index and data are not interleaved: bytes of the revlog index.
50 /// When index and data are not interleaved: bytes of the revlog index.
51 /// When index and data are interleaved: bytes of the revlog index and
51 /// When index and data are interleaved: bytes of the revlog index and
52 /// data.
52 /// data.
53 index: Index,
53 index: Index,
54 /// When index and data are not interleaved: bytes of the revlog data
54 /// When index and data are not interleaved: bytes of the revlog data
55 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
55 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
56 /// When present on disk: the persistent nodemap for this revlog
56 /// When present on disk: the persistent nodemap for this revlog
57 nodemap: Option<nodemap::NodeTree>,
57 nodemap: Option<nodemap::NodeTree>,
58 }
58 }
59
59
60 impl Revlog {
60 impl Revlog {
61 /// Open a revlog index file.
61 /// Open a revlog index file.
62 ///
62 ///
63 /// It will also open the associated data file if index and data are not
63 /// It will also open the associated data file if index and data are not
64 /// interleaved.
64 /// interleaved.
65 #[timed]
65 #[timed]
66 pub fn open(
66 pub fn open(
67 repo: &Repo,
67 repo: &Repo,
68 index_path: impl AsRef<Path>,
68 index_path: impl AsRef<Path>,
69 data_path: Option<&Path>,
69 data_path: Option<&Path>,
70 ) -> Result<Self, HgError> {
70 ) -> Result<Self, HgError> {
71 let index_path = index_path.as_ref();
71 let index_path = index_path.as_ref();
72 let index = {
72 let index = {
73 match repo.store_vfs().mmap_open_opt(&index_path)? {
73 match repo.store_vfs().mmap_open_opt(&index_path)? {
74 None => Index::new(Box::new(vec![])),
74 None => Index::new(Box::new(vec![])),
75 Some(index_mmap) => {
75 Some(index_mmap) => {
76 let index = Index::new(Box::new(index_mmap))?;
76 let index = Index::new(Box::new(index_mmap))?;
77 Ok(index)
77 Ok(index)
78 }
78 }
79 }
79 }
80 }?;
80 }?;
81
81
82 let default_data_path = index_path.with_extension("d");
82 let default_data_path = index_path.with_extension("d");
83
83
84 // type annotation required
84 // type annotation required
85 // won't recognize Mmap as Deref<Target = [u8]>
85 // won't recognize Mmap as Deref<Target = [u8]>
86 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
86 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
87 if index.is_inline() {
87 if index.is_inline() {
88 None
88 None
89 } else {
89 } else {
90 let data_path = data_path.unwrap_or(&default_data_path);
90 let data_path = data_path.unwrap_or(&default_data_path);
91 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
91 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
92 Some(Box::new(data_mmap))
92 Some(Box::new(data_mmap))
93 };
93 };
94
94
95 let nodemap = if index.is_inline() {
95 let nodemap = if index.is_inline() {
96 None
96 None
97 } else {
97 } else {
98 NodeMapDocket::read_from_file(repo, index_path)?.map(
98 NodeMapDocket::read_from_file(repo, index_path)?.map(
99 |(docket, data)| {
99 |(docket, data)| {
100 nodemap::NodeTree::load_bytes(
100 nodemap::NodeTree::load_bytes(
101 Box::new(data),
101 Box::new(data),
102 docket.data_length,
102 docket.data_length,
103 )
103 )
104 },
104 },
105 )
105 )
106 };
106 };
107
107
108 Ok(Revlog {
108 Ok(Revlog {
109 index,
109 index,
110 data_bytes,
110 data_bytes,
111 nodemap,
111 nodemap,
112 })
112 })
113 }
113 }
114
114
115 /// Return number of entries of the `Revlog`.
115 /// Return number of entries of the `Revlog`.
116 pub fn len(&self) -> usize {
116 pub fn len(&self) -> usize {
117 self.index.len()
117 self.index.len()
118 }
118 }
119
119
120 /// Returns `true` if the `Revlog` has zero `entries`.
120 /// Returns `true` if the `Revlog` has zero `entries`.
121 pub fn is_empty(&self) -> bool {
121 pub fn is_empty(&self) -> bool {
122 self.index.is_empty()
122 self.index.is_empty()
123 }
123 }
124
124
125 /// Returns the node ID for the given revision number, if it exists in this
125 /// Returns the node ID for the given revision number, if it exists in this
126 /// revlog
126 /// revlog
127 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
127 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
128 if rev == NULL_REVISION {
128 if rev == NULL_REVISION {
129 return Some(&NULL_NODE);
129 return Some(&NULL_NODE);
130 }
130 }
131 Some(self.index.get_entry(rev)?.hash())
131 Some(self.index.get_entry(rev)?.hash())
132 }
132 }
133
133
134 /// Return the revision number for the given node ID, if it exists in this
134 /// Return the revision number for the given node ID, if it exists in this
135 /// revlog
135 /// revlog
136 #[timed]
136 #[timed]
137 pub fn rev_from_node(
137 pub fn rev_from_node(
138 &self,
138 &self,
139 node: NodePrefix,
139 node: NodePrefix,
140 ) -> Result<Revision, RevlogError> {
140 ) -> Result<Revision, RevlogError> {
141 if node.is_prefix_of(&NULL_NODE) {
141 if node.is_prefix_of(&NULL_NODE) {
142 return Ok(NULL_REVISION);
142 return Ok(NULL_REVISION);
143 }
143 }
144
144
145 if let Some(nodemap) = &self.nodemap {
145 if let Some(nodemap) = &self.nodemap {
146 return nodemap
146 return nodemap
147 .find_bin(&self.index, node)?
147 .find_bin(&self.index, node)?
148 .ok_or(RevlogError::InvalidRevision);
148 .ok_or(RevlogError::InvalidRevision);
149 }
149 }
150
150
151 // Fallback to linear scan when a persistent nodemap is not present.
151 // Fallback to linear scan when a persistent nodemap is not present.
152 // This happens when the persistent-nodemap experimental feature is not
152 // This happens when the persistent-nodemap experimental feature is not
153 // enabled, or for small revlogs.
153 // enabled, or for small revlogs.
154 //
154 //
155 // TODO: consider building a non-persistent nodemap in memory to
155 // TODO: consider building a non-persistent nodemap in memory to
156 // optimize these cases.
156 // optimize these cases.
157 let mut found_by_prefix = None;
157 let mut found_by_prefix = None;
158 for rev in (0..self.len() as Revision).rev() {
158 for rev in (0..self.len() as Revision).rev() {
159 let index_entry =
159 let index_entry =
160 self.index.get_entry(rev).ok_or(HgError::corrupted(
160 self.index.get_entry(rev).ok_or(HgError::corrupted(
161 "revlog references a revision not in the index",
161 "revlog references a revision not in the index",
162 ))?;
162 ))?;
163 if node == *index_entry.hash() {
163 if node == *index_entry.hash() {
164 return Ok(rev);
164 return Ok(rev);
165 }
165 }
166 if node.is_prefix_of(index_entry.hash()) {
166 if node.is_prefix_of(index_entry.hash()) {
167 if found_by_prefix.is_some() {
167 if found_by_prefix.is_some() {
168 return Err(RevlogError::AmbiguousPrefix);
168 return Err(RevlogError::AmbiguousPrefix);
169 }
169 }
170 found_by_prefix = Some(rev)
170 found_by_prefix = Some(rev)
171 }
171 }
172 }
172 }
173 found_by_prefix.ok_or(RevlogError::InvalidRevision)
173 found_by_prefix.ok_or(RevlogError::InvalidRevision)
174 }
174 }
175
175
176 /// Returns whether the given revision exists in this revlog.
176 /// Returns whether the given revision exists in this revlog.
177 pub fn has_rev(&self, rev: Revision) -> bool {
177 pub fn has_rev(&self, rev: Revision) -> bool {
178 self.index.get_entry(rev).is_some()
178 self.index.get_entry(rev).is_some()
179 }
179 }
180
180
181 /// Return the full data associated to a revision.
181 /// Return the full data associated to a revision.
182 ///
182 ///
183 /// All entries required to build the final data out of deltas will be
183 /// All entries required to build the final data out of deltas will be
184 /// retrieved as needed, and the deltas will be applied to the inital
184 /// retrieved as needed, and the deltas will be applied to the inital
185 /// snapshot to rebuild the final data.
185 /// snapshot to rebuild the final data.
186 #[timed]
186 #[timed]
187 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
187 pub fn get_rev_data(
188 &self,
189 rev: Revision,
190 ) -> Result<Cow<[u8]>, RevlogError> {
188 if rev == NULL_REVISION {
191 if rev == NULL_REVISION {
189 return Ok(vec![]);
192 return Ok(Cow::Borrowed(&[]));
190 };
193 };
191 // Todo return -> Cow
194 self.get_entry(rev)?.data()
192 let mut entry = self.get_entry(rev)?;
193 let mut delta_chain = vec![];
194
195 // The meaning of `base_rev_or_base_of_delta_chain` depends on
196 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
197 // `mercurial/revlogutils/constants.py` and the code in
198 // [_chaininfo] and in [index_deltachain].
199 let uses_generaldelta = self.index.uses_generaldelta();
200 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
201 let base_rev = if uses_generaldelta {
202 base_rev
203 } else {
204 entry.rev - 1
205 };
206 delta_chain.push(entry);
207 entry = self.get_entry_internal(base_rev)?;
208 }
209
210 // TODO do not look twice in the index
211 let index_entry = self
212 .index
213 .get_entry(rev)
214 .ok_or(RevlogError::InvalidRevision)?;
215
216 let data: Vec<u8> = if delta_chain.is_empty() {
217 entry.data_chunk()?.into()
218 } else {
219 Revlog::build_data_from_deltas(entry, &delta_chain)?
220 };
221
222 if self.check_hash(
223 index_entry.p1(),
224 index_entry.p2(),
225 index_entry.hash().as_bytes(),
226 &data,
227 ) {
228 Ok(data)
229 } else {
230 Err(RevlogError::corrupted())
231 }
232 }
195 }
233
196
234 /// Check the hash of some given data against the recorded hash.
197 /// Check the hash of some given data against the recorded hash.
235 pub fn check_hash(
198 pub fn check_hash(
236 &self,
199 &self,
237 p1: Revision,
200 p1: Revision,
238 p2: Revision,
201 p2: Revision,
239 expected: &[u8],
202 expected: &[u8],
240 data: &[u8],
203 data: &[u8],
241 ) -> bool {
204 ) -> bool {
242 let e1 = self.index.get_entry(p1);
205 let e1 = self.index.get_entry(p1);
243 let h1 = match e1 {
206 let h1 = match e1 {
244 Some(ref entry) => entry.hash(),
207 Some(ref entry) => entry.hash(),
245 None => &NULL_NODE,
208 None => &NULL_NODE,
246 };
209 };
247 let e2 = self.index.get_entry(p2);
210 let e2 = self.index.get_entry(p2);
248 let h2 = match e2 {
211 let h2 = match e2 {
249 Some(ref entry) => entry.hash(),
212 Some(ref entry) => entry.hash(),
250 None => &NULL_NODE,
213 None => &NULL_NODE,
251 };
214 };
252
215
253 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
216 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
254 }
217 }
255
218
256 /// Build the full data of a revision out its snapshot
219 /// Build the full data of a revision out its snapshot
257 /// and its deltas.
220 /// and its deltas.
258 #[timed]
221 #[timed]
259 fn build_data_from_deltas(
222 fn build_data_from_deltas(
260 snapshot: RevlogEntry,
223 snapshot: RevlogEntry,
261 deltas: &[RevlogEntry],
224 deltas: &[RevlogEntry],
262 ) -> Result<Vec<u8>, RevlogError> {
225 ) -> Result<Vec<u8>, RevlogError> {
263 let snapshot = snapshot.data_chunk()?;
226 let snapshot = snapshot.data_chunk()?;
264 let deltas = deltas
227 let deltas = deltas
265 .iter()
228 .iter()
266 .rev()
229 .rev()
267 .map(RevlogEntry::data_chunk)
230 .map(RevlogEntry::data_chunk)
268 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
231 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
269 let patches: Vec<_> =
232 let patches: Vec<_> =
270 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
233 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
271 let patch = patch::fold_patch_lists(&patches);
234 let patch = patch::fold_patch_lists(&patches);
272 Ok(patch.apply(&snapshot))
235 Ok(patch.apply(&snapshot))
273 }
236 }
274
237
275 /// Return the revlog data.
238 /// Return the revlog data.
276 fn data(&self) -> &[u8] {
239 fn data(&self) -> &[u8] {
277 match self.data_bytes {
240 match self.data_bytes {
278 Some(ref data_bytes) => &data_bytes,
241 Some(ref data_bytes) => &data_bytes,
279 None => panic!(
242 None => panic!(
280 "forgot to load the data or trying to access inline data"
243 "forgot to load the data or trying to access inline data"
281 ),
244 ),
282 }
245 }
283 }
246 }
284
247
285 /// Get an entry of the revlog.
248 /// Get an entry of the revlog.
286 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
249 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
287 let index_entry = self
250 let index_entry = self
288 .index
251 .index
289 .get_entry(rev)
252 .get_entry(rev)
290 .ok_or(RevlogError::InvalidRevision)?;
253 .ok_or(RevlogError::InvalidRevision)?;
291 let start = index_entry.offset();
254 let start = index_entry.offset();
292 let end = start + index_entry.compressed_len();
255 let end = start + index_entry.compressed_len();
293 let data = if self.index.is_inline() {
256 let data = if self.index.is_inline() {
294 self.index.data(start, end)
257 self.index.data(start, end)
295 } else {
258 } else {
296 &self.data()[start..end]
259 &self.data()[start..end]
297 };
260 };
298 let entry = RevlogEntry {
261 let entry = RevlogEntry {
262 revlog: self,
299 rev,
263 rev,
300 bytes: data,
264 bytes: data,
301 compressed_len: index_entry.compressed_len(),
265 compressed_len: index_entry.compressed_len(),
302 uncompressed_len: index_entry.uncompressed_len(),
266 uncompressed_len: index_entry.uncompressed_len(),
303 base_rev_or_base_of_delta_chain: if index_entry
267 base_rev_or_base_of_delta_chain: if index_entry
304 .base_revision_or_base_of_delta_chain()
268 .base_revision_or_base_of_delta_chain()
305 == rev
269 == rev
306 {
270 {
307 None
271 None
308 } else {
272 } else {
309 Some(index_entry.base_revision_or_base_of_delta_chain())
273 Some(index_entry.base_revision_or_base_of_delta_chain())
310 },
274 },
311 };
275 };
312 Ok(entry)
276 Ok(entry)
313 }
277 }
314
278
315 /// when resolving internal references within revlog, any errors
279 /// when resolving internal references within revlog, any errors
316 /// should be reported as corruption, instead of e.g. "invalid revision"
280 /// should be reported as corruption, instead of e.g. "invalid revision"
317 fn get_entry_internal(
281 fn get_entry_internal(
318 &self,
282 &self,
319 rev: Revision,
283 rev: Revision,
320 ) -> Result<RevlogEntry, RevlogError> {
284 ) -> Result<RevlogEntry, RevlogError> {
321 return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
285 return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
322 }
286 }
323 }
287 }
324
288
325 /// The revlog entry's bytes and the necessary informations to extract
289 /// The revlog entry's bytes and the necessary informations to extract
326 /// the entry's data.
290 /// the entry's data.
327 #[derive(Debug)]
291 #[derive(Clone)]
328 pub struct RevlogEntry<'a> {
292 pub struct RevlogEntry<'a> {
293 revlog: &'a Revlog,
329 rev: Revision,
294 rev: Revision,
330 bytes: &'a [u8],
295 bytes: &'a [u8],
331 compressed_len: usize,
296 compressed_len: usize,
332 uncompressed_len: usize,
297 uncompressed_len: usize,
333 base_rev_or_base_of_delta_chain: Option<Revision>,
298 base_rev_or_base_of_delta_chain: Option<Revision>,
334 }
299 }
335
300
336 impl<'a> RevlogEntry<'a> {
301 impl<'a> RevlogEntry<'a> {
337 pub fn revision(&self) -> Revision {
302 pub fn revision(&self) -> Revision {
338 self.rev
303 self.rev
339 }
304 }
340
305
306 /// The data for this entry, after resolving deltas if any.
307 pub fn data(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
308 let mut entry = self.clone();
309 let mut delta_chain = vec![];
310
311 // The meaning of `base_rev_or_base_of_delta_chain` depends on
312 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
313 // `mercurial/revlogutils/constants.py` and the code in
314 // [_chaininfo] and in [index_deltachain].
315 let uses_generaldelta = self.revlog.index.uses_generaldelta();
316 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
317 let base_rev = if uses_generaldelta {
318 base_rev
319 } else {
320 entry.rev - 1
321 };
322 delta_chain.push(entry);
323 entry = self.revlog.get_entry_internal(base_rev)?;
324 }
325
326 // TODO do not look twice in the index
327 let index_entry = self
328 .revlog
329 .index
330 .get_entry(self.rev)
331 .ok_or(RevlogError::InvalidRevision)?;
332
333 let data = if delta_chain.is_empty() {
334 entry.data_chunk()?
335 } else {
336 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
337 };
338
339 if self.revlog.check_hash(
340 index_entry.p1(),
341 index_entry.p2(),
342 index_entry.hash().as_bytes(),
343 &data,
344 ) {
345 Ok(data)
346 } else {
347 Err(RevlogError::corrupted())
348 }
349 }
350
341 /// Extract the data contained in the entry.
351 /// Extract the data contained in the entry.
342 /// This may be a delta. (See `is_delta`.)
352 /// This may be a delta. (See `is_delta`.)
343 fn data_chunk(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
353 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
344 if self.bytes.is_empty() {
354 if self.bytes.is_empty() {
345 return Ok(Cow::Borrowed(&[]));
355 return Ok(Cow::Borrowed(&[]));
346 }
356 }
347 match self.bytes[0] {
357 match self.bytes[0] {
348 // Revision data is the entirety of the entry, including this
358 // Revision data is the entirety of the entry, including this
349 // header.
359 // header.
350 b'\0' => Ok(Cow::Borrowed(self.bytes)),
360 b'\0' => Ok(Cow::Borrowed(self.bytes)),
351 // Raw revision data follows.
361 // Raw revision data follows.
352 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
362 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
353 // zlib (RFC 1950) data.
363 // zlib (RFC 1950) data.
354 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
364 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
355 // zstd data.
365 // zstd data.
356 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
366 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
357 // A proper new format should have had a repo/store requirement.
367 // A proper new format should have had a repo/store requirement.
358 _format_type => Err(RevlogError::corrupted()),
368 _format_type => Err(RevlogError::corrupted()),
359 }
369 }
360 }
370 }
361
371
362 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
372 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
363 let mut decoder = ZlibDecoder::new(self.bytes);
373 let mut decoder = ZlibDecoder::new(self.bytes);
364 if self.is_delta() {
374 if self.is_delta() {
365 let mut buf = Vec::with_capacity(self.compressed_len);
375 let mut buf = Vec::with_capacity(self.compressed_len);
366 decoder
376 decoder
367 .read_to_end(&mut buf)
377 .read_to_end(&mut buf)
368 .map_err(|_| RevlogError::corrupted())?;
378 .map_err(|_| RevlogError::corrupted())?;
369 Ok(buf)
379 Ok(buf)
370 } else {
380 } else {
371 let mut buf = vec![0; self.uncompressed_len];
381 let mut buf = vec![0; self.uncompressed_len];
372 decoder
382 decoder
373 .read_exact(&mut buf)
383 .read_exact(&mut buf)
374 .map_err(|_| RevlogError::corrupted())?;
384 .map_err(|_| RevlogError::corrupted())?;
375 Ok(buf)
385 Ok(buf)
376 }
386 }
377 }
387 }
378
388
379 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
389 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
380 if self.is_delta() {
390 if self.is_delta() {
381 let mut buf = Vec::with_capacity(self.compressed_len);
391 let mut buf = Vec::with_capacity(self.compressed_len);
382 zstd::stream::copy_decode(self.bytes, &mut buf)
392 zstd::stream::copy_decode(self.bytes, &mut buf)
383 .map_err(|_| RevlogError::corrupted())?;
393 .map_err(|_| RevlogError::corrupted())?;
384 Ok(buf)
394 Ok(buf)
385 } else {
395 } else {
386 let mut buf = vec![0; self.uncompressed_len];
396 let mut buf = vec![0; self.uncompressed_len];
387 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
397 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
388 .map_err(|_| RevlogError::corrupted())?;
398 .map_err(|_| RevlogError::corrupted())?;
389 if len != self.uncompressed_len {
399 if len != self.uncompressed_len {
390 Err(RevlogError::corrupted())
400 Err(RevlogError::corrupted())
391 } else {
401 } else {
392 Ok(buf)
402 Ok(buf)
393 }
403 }
394 }
404 }
395 }
405 }
396
406
397 /// Tell if the entry is a snapshot or a delta
407 /// Tell if the entry is a snapshot or a delta
398 /// (influences on decompression).
408 /// (influences on decompression).
399 fn is_delta(&self) -> bool {
409 fn is_delta(&self) -> bool {
400 self.base_rev_or_base_of_delta_chain.is_some()
410 self.base_rev_or_base_of_delta_chain.is_some()
401 }
411 }
402 }
412 }
403
413
404 /// Calculate the hash of a revision given its data and its parents.
414 /// Calculate the hash of a revision given its data and its parents.
405 fn hash(
415 fn hash(
406 data: &[u8],
416 data: &[u8],
407 p1_hash: &[u8],
417 p1_hash: &[u8],
408 p2_hash: &[u8],
418 p2_hash: &[u8],
409 ) -> [u8; NODE_BYTES_LENGTH] {
419 ) -> [u8; NODE_BYTES_LENGTH] {
410 let mut hasher = Sha1::new();
420 let mut hasher = Sha1::new();
411 let (a, b) = (p1_hash, p2_hash);
421 let (a, b) = (p1_hash, p2_hash);
412 if a > b {
422 if a > b {
413 hasher.update(b);
423 hasher.update(b);
414 hasher.update(a);
424 hasher.update(a);
415 } else {
425 } else {
416 hasher.update(a);
426 hasher.update(a);
417 hasher.update(b);
427 hasher.update(b);
418 }
428 }
419 hasher.update(data);
429 hasher.update(data);
420 *hasher.finalize().as_ref()
430 *hasher.finalize().as_ref()
421 }
431 }
General Comments 0
You need to be logged in to leave comments. Login now