##// END OF EJS Templates
hg-core: fix path encoding usage...
Antoine cezar -
r46397:c0277679 default draft
parent child Browse files
Show More
@@ -1,158 +1,169 b''
1 1 // list_tracked_files.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::convert::From;
9 use std::path::PathBuf;
9 use std::path::{Path, PathBuf};
10 10
11 11 use crate::revlog::changelog::Changelog;
12 12 use crate::revlog::manifest::{Manifest, ManifestEntry};
13 13 use crate::revlog::path_encode::path_encode;
14 14 use crate::revlog::revlog::Revlog;
15 15 use crate::revlog::revlog::RevlogError;
16 16 use crate::revlog::Revision;
17 use crate::utils::hg_path::HgPathBuf;
17 use crate::utils::files::get_path_from_bytes;
18 use crate::utils::hg_path::{HgPath, HgPathBuf};
18 19
19 20 const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
20 21
21 22 /// Kind of error encountered by `CatRev`
22 23 #[derive(Debug)]
23 24 pub enum CatRevErrorKind {
24 25 /// Error when reading a `revlog` file.
25 26 IoError(std::io::Error),
26 27 /// The revision has not been found.
27 28 InvalidRevision,
28 29 /// A `revlog` file is corrupted.
29 30 CorruptedRevlog,
30 31 /// The `revlog` format version is not supported.
31 32 UnsuportedRevlogVersion(u16),
32 33 /// The `revlog` data format is not supported.
33 34 UnknowRevlogDataFormat(u8),
34 35 }
35 36
36 37 /// A `CatRev` error
37 38 #[derive(Debug)]
38 39 pub struct CatRevError {
39 40 /// Kind of error encountered by `CatRev`
40 41 pub kind: CatRevErrorKind,
41 42 }
42 43
43 44 impl From<CatRevErrorKind> for CatRevError {
44 45 fn from(kind: CatRevErrorKind) -> Self {
45 46 CatRevError { kind }
46 47 }
47 48 }
48 49
49 50 impl From<RevlogError> for CatRevError {
50 51 fn from(err: RevlogError) -> Self {
51 52 match err {
52 53 RevlogError::IoError(err) => CatRevErrorKind::IoError(err),
53 54 RevlogError::UnsuportedVersion(version) => {
54 55 CatRevErrorKind::UnsuportedRevlogVersion(version)
55 56 }
56 57 RevlogError::InvalidRevision => CatRevErrorKind::InvalidRevision,
57 58 RevlogError::Corrupted => CatRevErrorKind::CorruptedRevlog,
58 59 RevlogError::UnknowDataFormat(format) => {
59 60 CatRevErrorKind::UnknowRevlogDataFormat(format)
60 61 }
61 62 }
62 63 .into()
63 64 }
64 65 }
65 66
66 67 /// List files under Mercurial control at a given revision.
67 68 pub struct CatRev<'a> {
68 69 root: &'a PathBuf,
69 70 /// The revision to cat the files from.
70 71 rev: &'a str,
71 72 /// The files to output.
72 73 files: &'a [HgPathBuf],
73 74 /// The changelog file
74 75 changelog: Changelog,
75 76 /// The manifest file
76 77 manifest: Manifest,
77 78 /// The manifest entry corresponding to the revision.
78 79 ///
79 80 /// Used to hold the owner of the returned references.
80 81 manifest_entry: Option<ManifestEntry>,
81 82 }
82 83
83 84 impl<'a> CatRev<'a> {
84 85 pub fn new(
85 86 root: &'a PathBuf,
86 87 rev: &'a str,
87 88 files: &'a [HgPathBuf],
88 89 ) -> Result<Self, CatRevError> {
89 90 let changelog = Changelog::open(&root)?;
90 91 let manifest = Manifest::open(&root)?;
91 92 let manifest_entry = None;
92 93
93 94 Ok(Self {
94 95 root,
95 96 rev,
96 97 files,
97 98 changelog,
98 99 manifest,
99 100 manifest_entry,
100 101 })
101 102 }
102 103
103 104 pub fn run(&mut self) -> Result<Vec<u8>, CatRevError> {
104 105 let changelog_entry = match self.rev.parse::<Revision>() {
105 106 Ok(rev) => self.changelog.get_rev(rev)?,
106 107 _ => {
107 108 let changelog_node = hex::decode(&self.rev)
108 109 .map_err(|_| CatRevErrorKind::InvalidRevision)?;
109 110 self.changelog.get_node(&changelog_node)?
110 111 }
111 112 };
112 113 let manifest_node = hex::decode(&changelog_entry.manifest_node()?)
113 114 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
114 115
115 116 self.manifest_entry = Some(self.manifest.get_node(&manifest_node)?);
116 117 if let Some(ref manifest_entry) = self.manifest_entry {
117 118 let mut bytes = vec![];
118 119
119 120 for (manifest_file, node_bytes) in
120 121 manifest_entry.files_with_nodes()
121 122 {
122 123 for cat_file in self.files.iter() {
123 124 if cat_file.as_bytes() == manifest_file.as_bytes() {
124 let encoded_bytes =
125 path_encode(manifest_file.as_bytes());
126 let revlog_index_string = format!(
127 ".hg/store/data/{}.i",
128 String::from_utf8_lossy(&encoded_bytes),
129 );
130 let revlog_index_path =
131 self.root.join(&revlog_index_string);
132 let file_log = Revlog::open(&revlog_index_path)?;
125 let index_path =
126 store_path(self.root, manifest_file, b".i");
127 let data_path =
128 store_path(self.root, manifest_file, b".d");
129
130 let file_log =
131 Revlog::open(&index_path, Some(&data_path))?;
133 132 let file_node = hex::decode(&node_bytes)
134 133 .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
135 134 let file_rev = file_log.get_node_rev(&file_node)?;
136 135 let data = file_log.get_rev_data(file_rev)?;
137 136 if data.starts_with(&METADATA_DELIMITER) {
138 137 let end_delimiter_position = data
139 138 [METADATA_DELIMITER.len()..]
140 139 .windows(METADATA_DELIMITER.len())
141 140 .position(|bytes| bytes == METADATA_DELIMITER);
142 141 if let Some(position) = end_delimiter_position {
143 142 let offset = METADATA_DELIMITER.len() * 2;
144 143 bytes.extend(data[position + offset..].iter());
145 144 }
146 145 } else {
147 146 bytes.extend(data);
148 147 }
149 148 }
150 149 }
151 150 }
152 151
153 152 Ok(bytes)
154 153 } else {
155 154 unreachable!("manifest_entry should have been stored");
156 155 }
157 156 }
158 157 }
158
159 fn store_path(root: &Path, hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
160 let encoded_bytes =
161 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
162 [
163 root,
164 &Path::new(".hg/store/"),
165 get_path_from_bytes(&encoded_bytes),
166 ]
167 .iter()
168 .collect()
169 }
@@ -1,110 +1,110 b''
1 1 // debugdata.rs
2 2 //
3 3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use super::find_root;
9 9 use crate::revlog::revlog::{Revlog, RevlogError};
10 10 use crate::revlog::Revision;
11 11
12 12 /// Kind of data to debug
13 13 #[derive(Debug, Copy, Clone)]
14 14 pub enum DebugDataKind {
15 15 Changelog,
16 16 Manifest,
17 17 }
18 18
19 19 /// Kind of error encountered by DebugData
20 20 #[derive(Debug)]
21 21 pub enum DebugDataErrorKind {
22 22 FindRootError(find_root::FindRootError),
23 23 /// Error when reading a `revlog` file.
24 24 IoError(std::io::Error),
25 25 /// The revision has not been found.
26 26 InvalidRevision,
27 27 /// A `revlog` file is corrupted.
28 28 CorruptedRevlog,
29 29 /// The `revlog` format version is not supported.
30 30 UnsuportedRevlogVersion(u16),
31 31 /// The `revlog` data format is not supported.
32 32 UnknowRevlogDataFormat(u8),
33 33 }
34 34
35 35 /// A DebugData error
36 36 #[derive(Debug)]
37 37 pub struct DebugDataError {
38 38 /// Kind of error encountered by DebugData
39 39 pub kind: DebugDataErrorKind,
40 40 }
41 41
42 42 impl From<DebugDataErrorKind> for DebugDataError {
43 43 fn from(kind: DebugDataErrorKind) -> Self {
44 44 DebugDataError { kind }
45 45 }
46 46 }
47 47
48 48 impl From<find_root::FindRootError> for DebugDataError {
49 49 fn from(err: find_root::FindRootError) -> Self {
50 50 let kind = DebugDataErrorKind::FindRootError(err);
51 51 DebugDataError { kind }
52 52 }
53 53 }
54 54
55 55 impl From<std::io::Error> for DebugDataError {
56 56 fn from(err: std::io::Error) -> Self {
57 57 let kind = DebugDataErrorKind::IoError(err);
58 58 DebugDataError { kind }
59 59 }
60 60 }
61 61
62 62 impl From<RevlogError> for DebugDataError {
63 63 fn from(err: RevlogError) -> Self {
64 64 match err {
65 65 RevlogError::IoError(err) => DebugDataErrorKind::IoError(err),
66 66 RevlogError::UnsuportedVersion(version) => {
67 67 DebugDataErrorKind::UnsuportedRevlogVersion(version)
68 68 }
69 69 RevlogError::InvalidRevision => {
70 70 DebugDataErrorKind::InvalidRevision
71 71 }
72 72 RevlogError::Corrupted => DebugDataErrorKind::CorruptedRevlog,
73 73 RevlogError::UnknowDataFormat(format) => {
74 74 DebugDataErrorKind::UnknowRevlogDataFormat(format)
75 75 }
76 76 }
77 77 .into()
78 78 }
79 79 }
80 80
81 81 /// Dump the contents data of a revision.
82 82 pub struct DebugData<'a> {
83 83 /// Revision or hash of the revision.
84 84 rev: &'a str,
85 85 /// Kind of data to debug.
86 86 kind: DebugDataKind,
87 87 }
88 88
89 89 impl<'a> DebugData<'a> {
90 90 pub fn new(rev: &'a str, kind: DebugDataKind) -> Self {
91 91 DebugData { rev, kind }
92 92 }
93 93
94 94 pub fn run(&mut self) -> Result<Vec<u8>, DebugDataError> {
95 95 let rev = self
96 96 .rev
97 97 .parse::<Revision>()
98 98 .or(Err(DebugDataErrorKind::InvalidRevision))?;
99 99
100 100 let root = find_root::FindRoot::new().run()?;
101 101 let index_file = match self.kind {
102 102 DebugDataKind::Changelog => root.join(".hg/store/00changelog.i"),
103 103 DebugDataKind::Manifest => root.join(".hg/store/00manifest.i"),
104 104 };
105 let revlog = Revlog::open(&index_file)?;
105 let revlog = Revlog::open(&index_file, None)?;
106 106 let data = revlog.get_rev_data(rev)?;
107 107
108 108 Ok(data)
109 109 }
110 110 }
@@ -1,58 +1,58 b''
1 1 use crate::revlog::revlog::{Revlog, RevlogError};
2 2 use crate::revlog::Revision;
3 3 use std::path::PathBuf;
4 4
5 5 /// A specialized `Revlog` to work with `changelog` data format.
6 6 pub struct Changelog {
7 7 /// The generic `revlog` format.
8 8 revlog: Revlog,
9 9 }
10 10
11 11 impl Changelog {
12 12 /// Open the `changelog` of a repository given by its root.
13 13 pub fn open(root: &PathBuf) -> Result<Self, RevlogError> {
14 14 let index_file = root.join(".hg/store/00changelog.i");
15 let revlog = Revlog::open(&index_file)?;
15 let revlog = Revlog::open(&index_file, None)?;
16 16 Ok(Self { revlog })
17 17 }
18 18
19 19 /// Return the `ChangelogEntry` a given node id.
20 20 pub fn get_node(
21 21 &self,
22 22 node: &[u8],
23 23 ) -> Result<ChangelogEntry, RevlogError> {
24 24 let rev = self.revlog.get_node_rev(node)?;
25 25 self.get_rev(rev)
26 26 }
27 27
28 28 /// Return the `ChangelogEntry` of a given node revision.
29 29 pub fn get_rev(
30 30 &self,
31 31 rev: Revision,
32 32 ) -> Result<ChangelogEntry, RevlogError> {
33 33 let bytes = self.revlog.get_rev_data(rev)?;
34 34 Ok(ChangelogEntry { bytes })
35 35 }
36 36 }
37 37
38 38 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
39 39 #[derive(Debug)]
40 40 pub struct ChangelogEntry {
41 41 /// The data bytes of the `changelog` entry.
42 42 bytes: Vec<u8>,
43 43 }
44 44
45 45 impl ChangelogEntry {
46 46 /// Return an iterator over the lines of the entry.
47 47 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
48 48 self.bytes
49 49 .split(|b| b == &b'\n')
50 50 .filter(|line| !line.is_empty())
51 51 }
52 52
53 53 /// Return the node id of the `manifest` referenced by this `changelog`
54 54 /// entry.
55 55 pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
56 56 self.lines().next().ok_or(RevlogError::Corrupted)
57 57 }
58 58 }
@@ -1,73 +1,73 b''
1 1 use crate::revlog::revlog::{Revlog, RevlogError};
2 2 use crate::revlog::Revision;
3 3 use crate::utils::hg_path::HgPath;
4 4 use std::path::PathBuf;
5 5
6 6 /// A specialized `Revlog` to work with `manifest` data format.
7 7 pub struct Manifest {
8 8 /// The generic `revlog` format.
9 9 revlog: Revlog,
10 10 }
11 11
12 12 impl Manifest {
13 13 /// Open the `manifest` of a repository given by its root.
14 14 pub fn open(root: &PathBuf) -> Result<Self, RevlogError> {
15 15 let index_file = root.join(".hg/store/00manifest.i");
16 let revlog = Revlog::open(&index_file)?;
16 let revlog = Revlog::open(&index_file, None)?;
17 17 Ok(Self { revlog })
18 18 }
19 19
20 20 /// Return the `ManifestEntry` of a given node id.
21 21 pub fn get_node(&self, node: &[u8]) -> Result<ManifestEntry, RevlogError> {
22 22 let rev = self.revlog.get_node_rev(node)?;
23 23 self.get_rev(rev)
24 24 }
25 25
26 26 /// Return the `ManifestEntry` of a given node revision.
27 27 pub fn get_rev(
28 28 &self,
29 29 rev: Revision,
30 30 ) -> Result<ManifestEntry, RevlogError> {
31 31 let bytes = self.revlog.get_rev_data(rev)?;
32 32 Ok(ManifestEntry { bytes })
33 33 }
34 34 }
35 35
36 36 /// `Manifest` entry which knows how to interpret the `manifest` data bytes.
37 37 #[derive(Debug)]
38 38 pub struct ManifestEntry {
39 39 bytes: Vec<u8>,
40 40 }
41 41
42 42 impl ManifestEntry {
43 43 /// Return an iterator over the lines of the entry.
44 44 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
45 45 self.bytes
46 46 .split(|b| b == &b'\n')
47 47 .filter(|line| !line.is_empty())
48 48 }
49 49
50 50 /// Return an iterator over the files of the entry.
51 51 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
52 52 self.lines().filter(|line| !line.is_empty()).map(|line| {
53 53 let pos = line
54 54 .iter()
55 55 .position(|x| x == &b'\0')
56 56 .expect("manifest line should contain \\0");
57 57 HgPath::new(&line[..pos])
58 58 })
59 59 }
60 60
61 61 /// Return an iterator over the files of the entry.
62 62 pub fn files_with_nodes(&self) -> impl Iterator<Item = (&HgPath, &[u8])> {
63 63 self.lines().filter(|line| !line.is_empty()).map(|line| {
64 64 let pos = line
65 65 .iter()
66 66 .position(|x| x == &b'\0')
67 67 .expect("manifest line should contain \\0");
68 68 let hash_start = pos + 1;
69 69 let hash_end = hash_start + 40;
70 70 (HgPath::new(&line[..pos]), &line[hash_start..hash_end])
71 71 })
72 72 }
73 73 }
@@ -1,331 +1,335 b''
1 1 use std::borrow::Cow;
2 2 use std::fs::File;
3 3 use std::io::Read;
4 4 use std::ops::Deref;
5 5 use std::path::Path;
6 6
7 7 use byteorder::{BigEndian, ByteOrder};
8 8 use crypto::digest::Digest;
9 9 use crypto::sha1::Sha1;
10 10 use flate2::read::ZlibDecoder;
11 11 use memmap::{Mmap, MmapOptions};
12 12 use micro_timer::timed;
13 13 use zstd;
14 14
15 15 use super::index::Index;
16 16 use super::node::{NODE_BYTES_LENGTH, NULL_NODE_ID};
17 17 use super::patch;
18 18 use crate::revlog::Revision;
19 19
20 20 pub enum RevlogError {
21 21 IoError(std::io::Error),
22 22 UnsuportedVersion(u16),
23 23 InvalidRevision,
24 24 Corrupted,
25 25 UnknowDataFormat(u8),
26 26 }
27 27
28 28 fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
29 29 let file = File::open(path)?;
30 30 let mmap = unsafe { MmapOptions::new().map(&file) }?;
31 31 Ok(mmap)
32 32 }
33 33
34 34 /// Read only implementation of revlog.
35 35 pub struct Revlog {
36 36 /// When index and data are not interleaved: bytes of the revlog index.
37 37 /// When index and data are interleaved: bytes of the revlog index and
38 38 /// data.
39 39 index: Index,
40 40 /// When index and data are not interleaved: bytes of the revlog data
41 41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
42 42 }
43 43
44 44 impl Revlog {
45 45 /// Open a revlog index file.
46 46 ///
47 47 /// It will also open the associated data file if index and data are not
48 48 /// interleaved.
49 49 #[timed]
50 pub fn open(index_path: &Path) -> Result<Self, RevlogError> {
50 pub fn open(
51 index_path: &Path,
52 data_path: Option<&Path>,
53 ) -> Result<Self, RevlogError> {
51 54 let index_mmap =
52 55 mmap_open(&index_path).map_err(RevlogError::IoError)?;
53 56
54 57 let version = get_version(&index_mmap);
55 58 if version != 1 {
56 59 return Err(RevlogError::UnsuportedVersion(version));
57 60 }
58 61
59 62 let index = Index::new(Box::new(index_mmap))?;
60 63
61 // TODO load data only when needed //
64 let default_data_path = index_path.with_extension("d");
65
62 66 // type annotation required
63 67 // won't recognize Mmap as Deref<Target = [u8]>
64 68 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
65 69 if index.is_inline() {
66 70 None
67 71 } else {
68 let data_path = index_path.with_extension("d");
72 let data_path = data_path.unwrap_or(&default_data_path);
69 73 let data_mmap =
70 mmap_open(&data_path).map_err(RevlogError::IoError)?;
74 mmap_open(data_path).map_err(RevlogError::IoError)?;
71 75 Some(Box::new(data_mmap))
72 76 };
73 77
74 78 Ok(Revlog { index, data_bytes })
75 79 }
76 80
77 81 /// Return number of entries of the `Revlog`.
78 82 pub fn len(&self) -> usize {
79 83 self.index.len()
80 84 }
81 85
82 86 /// Returns `true` if the `Revlog` has zero `entries`.
83 87 pub fn is_empty(&self) -> bool {
84 88 self.index.is_empty()
85 89 }
86 90
87 91 /// Return the full data associated to a node.
88 92 #[timed]
89 93 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> {
90 94 // This is brute force. But it is fast enough for now.
91 95 // Optimization will come later.
92 96 for rev in (0..self.len() as Revision).rev() {
93 97 let index_entry =
94 98 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
95 99 if node == index_entry.hash() {
96 100 return Ok(rev);
97 101 }
98 102 }
99 103 Err(RevlogError::InvalidRevision)
100 104 }
101 105
102 106 /// Return the full data associated to a revision.
103 107 ///
104 108 /// All entries required to build the final data out of deltas will be
105 109 /// retrieved as needed, and the deltas will be applied to the inital
106 110 /// snapshot to rebuild the final data.
107 111 #[timed]
108 112 pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
109 113 // Todo return -> Cow
110 114 let mut entry = self.get_entry(rev)?;
111 115 let mut delta_chain = vec![];
112 116 while let Some(base_rev) = entry.base_rev {
113 117 delta_chain.push(entry);
114 118 entry =
115 119 self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
116 120 }
117 121
118 122 // TODO do not look twice in the index
119 123 let index_entry = self
120 124 .index
121 125 .get_entry(rev)
122 126 .ok_or(RevlogError::InvalidRevision)?;
123 127
124 128 let data: Vec<u8> = if delta_chain.is_empty() {
125 129 entry.data()?.into()
126 130 } else {
127 131 Revlog::build_data_from_deltas(entry, &delta_chain)?
128 132 };
129 133
130 134 if self.check_hash(
131 135 index_entry.p1(),
132 136 index_entry.p2(),
133 137 index_entry.hash(),
134 138 &data,
135 139 ) {
136 140 Ok(data)
137 141 } else {
138 142 Err(RevlogError::Corrupted)
139 143 }
140 144 }
141 145
142 146 /// Check the hash of some given data against the recorded hash.
143 147 pub fn check_hash(
144 148 &self,
145 149 p1: Revision,
146 150 p2: Revision,
147 151 expected: &[u8],
148 152 data: &[u8],
149 153 ) -> bool {
150 154 let e1 = self.index.get_entry(p1);
151 155 let h1 = match e1 {
152 156 Some(ref entry) => entry.hash(),
153 157 None => &NULL_NODE_ID,
154 158 };
155 159 let e2 = self.index.get_entry(p2);
156 160 let h2 = match e2 {
157 161 Some(ref entry) => entry.hash(),
158 162 None => &NULL_NODE_ID,
159 163 };
160 164
161 165 hash(data, &h1, &h2).as_slice() == expected
162 166 }
163 167
164 168 /// Build the full data of a revision out its snapshot
165 169 /// and its deltas.
166 170 #[timed]
167 171 fn build_data_from_deltas(
168 172 snapshot: RevlogEntry,
169 173 deltas: &[RevlogEntry],
170 174 ) -> Result<Vec<u8>, RevlogError> {
171 175 let snapshot = snapshot.data()?;
172 176 let deltas = deltas
173 177 .iter()
174 178 .rev()
175 179 .map(RevlogEntry::data)
176 180 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
177 181 let patches: Vec<_> =
178 182 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
179 183 let patch = patch::fold_patch_lists(&patches);
180 184 Ok(patch.apply(&snapshot))
181 185 }
182 186
183 187 /// Return the revlog data.
184 188 fn data(&self) -> &[u8] {
185 189 match self.data_bytes {
186 190 Some(ref data_bytes) => &data_bytes,
187 191 None => panic!(
188 192 "forgot to load the data or trying to access inline data"
189 193 ),
190 194 }
191 195 }
192 196
193 197 /// Get an entry of the revlog.
194 198 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
195 199 let index_entry = self
196 200 .index
197 201 .get_entry(rev)
198 202 .ok_or(RevlogError::InvalidRevision)?;
199 203 let start = index_entry.offset();
200 204 let end = start + index_entry.compressed_len();
201 205 let data = if self.index.is_inline() {
202 206 self.index.data(start, end)
203 207 } else {
204 208 &self.data()[start..end]
205 209 };
206 210 let entry = RevlogEntry {
207 211 rev,
208 212 bytes: data,
209 213 compressed_len: index_entry.compressed_len(),
210 214 uncompressed_len: index_entry.uncompressed_len(),
211 215 base_rev: if index_entry.base_revision() == rev {
212 216 None
213 217 } else {
214 218 Some(index_entry.base_revision())
215 219 },
216 220 };
217 221 Ok(entry)
218 222 }
219 223 }
220 224
221 225 /// The revlog entry's bytes and the necessary informations to extract
222 226 /// the entry's data.
223 227 #[derive(Debug)]
224 228 pub struct RevlogEntry<'a> {
225 229 rev: Revision,
226 230 bytes: &'a [u8],
227 231 compressed_len: usize,
228 232 uncompressed_len: usize,
229 233 base_rev: Option<Revision>,
230 234 }
231 235
232 236 impl<'a> RevlogEntry<'a> {
233 237 /// Extract the data contained in the entry.
234 238 pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
235 239 if self.bytes.is_empty() {
236 240 return Ok(Cow::Borrowed(&[]));
237 241 }
238 242 match self.bytes[0] {
239 243 // Revision data is the entirety of the entry, including this
240 244 // header.
241 245 b'\0' => Ok(Cow::Borrowed(self.bytes)),
242 246 // Raw revision data follows.
243 247 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
244 248 // zlib (RFC 1950) data.
245 249 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
246 250 // zstd data.
247 251 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
248 252 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
249 253 }
250 254 }
251 255
252 256 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
253 257 let mut decoder = ZlibDecoder::new(self.bytes);
254 258 if self.is_delta() {
255 259 let mut buf = Vec::with_capacity(self.compressed_len);
256 260 decoder
257 261 .read_to_end(&mut buf)
258 262 .or(Err(RevlogError::Corrupted))?;
259 263 Ok(buf)
260 264 } else {
261 265 let mut buf = vec![0; self.uncompressed_len];
262 266 decoder
263 267 .read_exact(&mut buf)
264 268 .or(Err(RevlogError::Corrupted))?;
265 269 Ok(buf)
266 270 }
267 271 }
268 272
269 273 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
270 274 if self.is_delta() {
271 275 let mut buf = Vec::with_capacity(self.compressed_len);
272 276 zstd::stream::copy_decode(self.bytes, &mut buf)
273 277 .or(Err(RevlogError::Corrupted))?;
274 278 Ok(buf)
275 279 } else {
276 280 let mut buf = vec![0; self.uncompressed_len];
277 281 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
278 282 .or(Err(RevlogError::Corrupted))?;
279 283 if len != self.uncompressed_len {
280 284 Err(RevlogError::Corrupted)
281 285 } else {
282 286 Ok(buf)
283 287 }
284 288 }
285 289 }
286 290
287 291 /// Tell if the entry is a snapshot or a delta
288 292 /// (influences on decompression).
289 293 fn is_delta(&self) -> bool {
290 294 self.base_rev.is_some()
291 295 }
292 296 }
293 297
294 298 /// Format version of the revlog.
295 299 pub fn get_version(index_bytes: &[u8]) -> u16 {
296 300 BigEndian::read_u16(&index_bytes[2..=3])
297 301 }
298 302
299 303 /// Calculate the hash of a revision given its data and its parents.
300 304 fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
301 305 let mut hasher = Sha1::new();
302 306 let (a, b) = (p1_hash, p2_hash);
303 307 if a > b {
304 308 hasher.input(b);
305 309 hasher.input(a);
306 310 } else {
307 311 hasher.input(a);
308 312 hasher.input(b);
309 313 }
310 314 hasher.input(data);
311 315 let mut hash = vec![0; NODE_BYTES_LENGTH];
312 316 hasher.result(&mut hash);
313 317 hash
314 318 }
315 319
316 320 #[cfg(test)]
317 321 mod tests {
318 322 use super::*;
319 323
320 324 use super::super::index::IndexEntryBuilder;
321 325
322 326 #[test]
323 327 fn version_test() {
324 328 let bytes = IndexEntryBuilder::new()
325 329 .is_first(true)
326 330 .with_version(1)
327 331 .build();
328 332
329 333 assert_eq!(get_version(&bytes), 1)
330 334 }
331 335 }
General Comments 0
You need to be logged in to leave comments. Login now