##// END OF EJS Templates
rust-changelog: introducing an intermediate `ChangelogEntry`...
Georges Racinet -
r51268:841b13e6 default
parent child Browse files
Show More
@@ -1,289 +1,327 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::Revision;
3 use crate::revlog::{Node, NodePrefix};
2 use crate::revlog::{Node, NodePrefix};
3 use crate::revlog::{Revision, NULL_REVISION};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 use crate::utils::hg_path::HgPath;
5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use itertools::Itertools;
7 use itertools::Itertools;
8 use std::ascii::escape_default;
8 use std::ascii::escape_default;
9 use std::borrow::Cow;
9 use std::borrow::Cow;
10 use std::fmt::{Debug, Formatter};
10 use std::fmt::{Debug, Formatter};
11
11
12 /// A specialized `Revlog` to work with changelog data format.
12 /// A specialized `Revlog` to work with changelog data format.
13 pub struct Changelog {
13 pub struct Changelog {
14 /// The generic `revlog` format.
14 /// The generic `revlog` format.
15 pub(crate) revlog: Revlog,
15 pub(crate) revlog: Revlog,
16 }
16 }
17
17
18 impl Changelog {
18 impl Changelog {
19 /// Open the `changelog` of a repository given by its root.
19 /// Open the `changelog` of a repository given by its root.
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 let revlog =
21 let revlog =
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 Ok(Self { revlog })
23 Ok(Self { revlog })
24 }
24 }
25
25
26 /// Return the `ChangelogRevisionData` for the given node ID.
26 /// Return the `ChangelogRevisionData` for the given node ID.
27 pub fn data_for_node(
27 pub fn data_for_node(
28 &self,
28 &self,
29 node: NodePrefix,
29 node: NodePrefix,
30 ) -> Result<ChangelogRevisionData, RevlogError> {
30 ) -> Result<ChangelogRevisionData, RevlogError> {
31 let rev = self.revlog.rev_from_node(node)?;
31 let rev = self.revlog.rev_from_node(node)?;
32 self.data_for_rev(rev)
32 self.data_for_rev(rev)
33 }
33 }
34
34
35 /// Return the `RevlogEntry` for the given revision number.
35 /// Return the [`ChangelogEntry`] for the given revision number.
36 pub fn entry_for_rev(
36 pub fn entry_for_rev(
37 &self,
37 &self,
38 rev: Revision,
38 rev: Revision,
39 ) -> Result<RevlogEntry, RevlogError> {
39 ) -> Result<ChangelogEntry, RevlogError> {
40 self.revlog.get_entry(rev)
40 let revlog_entry = self.revlog.get_entry(rev)?;
41 Ok(ChangelogEntry { revlog_entry })
41 }
42 }
42
43
43 /// Return the [`ChangelogRevisionData`] for the given revision number.
44 /// Return the [`ChangelogRevisionData`] for the given revision number.
45 ///
46 /// This is a useful shortcut in case the caller does not need the
47 /// generic revlog information (parents, hashes etc). Otherwise
48 /// consider taking a [`ChangelogEntry`] with
49 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
44 pub fn data_for_rev(
50 pub fn data_for_rev(
45 &self,
51 &self,
46 rev: Revision,
52 rev: Revision,
47 ) -> Result<ChangelogRevisionData, RevlogError> {
53 ) -> Result<ChangelogRevisionData, RevlogError> {
48 let bytes = self.revlog.get_rev_data(rev)?;
54 if rev == NULL_REVISION {
49 if bytes.is_empty() {
55 return Ok(ChangelogRevisionData::null());
50 Ok(ChangelogRevisionData::null())
51 } else {
52 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
53 RevlogError::Other(HgError::CorruptedRepository(format!(
54 "Invalid changelog data for revision {}: {:?}",
55 rev, err
56 )))
57 })?)
58 }
56 }
57 self.entry_for_rev(rev)?.data()
59 }
58 }
60
59
61 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
62 self.revlog.node_from_rev(rev)
61 self.revlog.node_from_rev(rev)
63 }
62 }
64
63
65 pub fn rev_from_node(
64 pub fn rev_from_node(
66 &self,
65 &self,
67 node: NodePrefix,
66 node: NodePrefix,
68 ) -> Result<Revision, RevlogError> {
67 ) -> Result<Revision, RevlogError> {
69 self.revlog.rev_from_node(node)
68 self.revlog.rev_from_node(node)
70 }
69 }
71 }
70 }
72
71
72 /// A specialized `RevlogEntry` for `changelog` data format
73 ///
74 /// This is a `RevlogEntry` with the added semantics that the associated
75 /// data should meet the requirements for `changelog`, materialized by
76 /// the fact that `data()` constructs a `ChangelogRevisionData`.
77 /// In case that promise would be broken, the `data` method returns an error.
78 #[derive(Clone)]
79 pub struct ChangelogEntry<'changelog> {
80 /// Same data, as a generic `RevlogEntry`.
81 pub(crate) revlog_entry: RevlogEntry<'changelog>,
82 }
83
84 impl<'changelog> ChangelogEntry<'changelog> {
85 pub fn data<'a>(
86 &'a self,
87 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
88 let bytes = self.revlog_entry.data()?;
89 if bytes.is_empty() {
90 Ok(ChangelogRevisionData::null())
91 } else {
92 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
93 RevlogError::Other(HgError::CorruptedRepository(format!(
94 "Invalid changelog data for revision {}: {:?}",
95 self.revlog_entry.revision(),
96 err
97 )))
98 })?)
99 }
100 }
101
102 /// Obtain a reference to the underlying `RevlogEntry`.
103 ///
104 /// This allows the caller to access the information that is common
105 /// to all revlog entries: revision number, node id, parent revisions etc.
106 pub fn as_revlog_entry(&self) -> &RevlogEntry {
107 &self.revlog_entry
108 }
109 }
110
73 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
111 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
74 #[derive(PartialEq)]
112 #[derive(PartialEq)]
75 pub struct ChangelogRevisionData<'changelog> {
113 pub struct ChangelogRevisionData<'changelog> {
76 /// The data bytes of the `changelog` entry.
114 /// The data bytes of the `changelog` entry.
77 bytes: Cow<'changelog, [u8]>,
115 bytes: Cow<'changelog, [u8]>,
78 /// The end offset for the hex manifest (not including the newline)
116 /// The end offset for the hex manifest (not including the newline)
79 manifest_end: usize,
117 manifest_end: usize,
80 /// The end offset for the user+email (not including the newline)
118 /// The end offset for the user+email (not including the newline)
81 user_end: usize,
119 user_end: usize,
82 /// The end offset for the timestamp+timezone+extras (not including the
120 /// The end offset for the timestamp+timezone+extras (not including the
83 /// newline)
121 /// newline)
84 timestamp_end: usize,
122 timestamp_end: usize,
85 /// The end offset for the file list (not including the newline)
123 /// The end offset for the file list (not including the newline)
86 files_end: usize,
124 files_end: usize,
87 }
125 }
88
126
89 impl<'changelog> ChangelogRevisionData<'changelog> {
127 impl<'changelog> ChangelogRevisionData<'changelog> {
90 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
128 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
91 let mut line_iter = bytes.split(|b| b == &b'\n');
129 let mut line_iter = bytes.split(|b| b == &b'\n');
92 let manifest_end = line_iter
130 let manifest_end = line_iter
93 .next()
131 .next()
94 .expect("Empty iterator from split()?")
132 .expect("Empty iterator from split()?")
95 .len();
133 .len();
96 let user_slice = line_iter.next().ok_or_else(|| {
134 let user_slice = line_iter.next().ok_or_else(|| {
97 HgError::corrupted("Changeset data truncated after manifest line")
135 HgError::corrupted("Changeset data truncated after manifest line")
98 })?;
136 })?;
99 let user_end = manifest_end + 1 + user_slice.len();
137 let user_end = manifest_end + 1 + user_slice.len();
100 let timestamp_slice = line_iter.next().ok_or_else(|| {
138 let timestamp_slice = line_iter.next().ok_or_else(|| {
101 HgError::corrupted("Changeset data truncated after user line")
139 HgError::corrupted("Changeset data truncated after user line")
102 })?;
140 })?;
103 let timestamp_end = user_end + 1 + timestamp_slice.len();
141 let timestamp_end = user_end + 1 + timestamp_slice.len();
104 let mut files_end = timestamp_end + 1;
142 let mut files_end = timestamp_end + 1;
105 loop {
143 loop {
106 let line = line_iter.next().ok_or_else(|| {
144 let line = line_iter.next().ok_or_else(|| {
107 HgError::corrupted("Changeset data truncated in files list")
145 HgError::corrupted("Changeset data truncated in files list")
108 })?;
146 })?;
109 if line.is_empty() {
147 if line.is_empty() {
110 if files_end == bytes.len() {
148 if files_end == bytes.len() {
111 // The list of files ended with a single newline (there
149 // The list of files ended with a single newline (there
112 // should be two)
150 // should be two)
113 return Err(HgError::corrupted(
151 return Err(HgError::corrupted(
114 "Changeset data truncated after files list",
152 "Changeset data truncated after files list",
115 ));
153 ));
116 }
154 }
117 files_end -= 1;
155 files_end -= 1;
118 break;
156 break;
119 }
157 }
120 files_end += line.len() + 1;
158 files_end += line.len() + 1;
121 }
159 }
122
160
123 Ok(Self {
161 Ok(Self {
124 bytes,
162 bytes,
125 manifest_end,
163 manifest_end,
126 user_end,
164 user_end,
127 timestamp_end,
165 timestamp_end,
128 files_end,
166 files_end,
129 })
167 })
130 }
168 }
131
169
132 fn null() -> Self {
170 fn null() -> Self {
133 Self::new(Cow::Borrowed(
171 Self::new(Cow::Borrowed(
134 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
172 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
135 ))
173 ))
136 .unwrap()
174 .unwrap()
137 }
175 }
138
176
139 /// Return an iterator over the lines of the entry.
177 /// Return an iterator over the lines of the entry.
140 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
178 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
141 self.bytes.split(|b| b == &b'\n')
179 self.bytes.split(|b| b == &b'\n')
142 }
180 }
143
181
144 /// Return the node id of the `manifest` referenced by this `changelog`
182 /// Return the node id of the `manifest` referenced by this `changelog`
145 /// entry.
183 /// entry.
146 pub fn manifest_node(&self) -> Result<Node, HgError> {
184 pub fn manifest_node(&self) -> Result<Node, HgError> {
147 let manifest_node_hex = &self.bytes[..self.manifest_end];
185 let manifest_node_hex = &self.bytes[..self.manifest_end];
148 Node::from_hex_for_repo(manifest_node_hex)
186 Node::from_hex_for_repo(manifest_node_hex)
149 }
187 }
150
188
151 /// The full user string (usually a name followed by an email enclosed in
189 /// The full user string (usually a name followed by an email enclosed in
152 /// angle brackets)
190 /// angle brackets)
153 pub fn user(&self) -> &[u8] {
191 pub fn user(&self) -> &[u8] {
154 &self.bytes[self.manifest_end + 1..self.user_end]
192 &self.bytes[self.manifest_end + 1..self.user_end]
155 }
193 }
156
194
157 /// The full timestamp line (timestamp in seconds, offset in seconds, and
195 /// The full timestamp line (timestamp in seconds, offset in seconds, and
158 /// possibly extras)
196 /// possibly extras)
159 // TODO: We should expose this in a more useful way
197 // TODO: We should expose this in a more useful way
160 pub fn timestamp_line(&self) -> &[u8] {
198 pub fn timestamp_line(&self) -> &[u8] {
161 &self.bytes[self.user_end + 1..self.timestamp_end]
199 &self.bytes[self.user_end + 1..self.timestamp_end]
162 }
200 }
163
201
164 /// The files changed in this revision.
202 /// The files changed in this revision.
165 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
203 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
166 self.bytes[self.timestamp_end + 1..self.files_end]
204 self.bytes[self.timestamp_end + 1..self.files_end]
167 .split(|b| b == &b'\n')
205 .split(|b| b == &b'\n')
168 .map(HgPath::new)
206 .map(HgPath::new)
169 }
207 }
170
208
171 /// The change description.
209 /// The change description.
172 pub fn description(&self) -> &[u8] {
210 pub fn description(&self) -> &[u8] {
173 &self.bytes[self.files_end + 2..]
211 &self.bytes[self.files_end + 2..]
174 }
212 }
175 }
213 }
176
214
177 impl Debug for ChangelogRevisionData<'_> {
215 impl Debug for ChangelogRevisionData<'_> {
178 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
216 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
179 f.debug_struct("ChangelogRevisionData")
217 f.debug_struct("ChangelogRevisionData")
180 .field("bytes", &debug_bytes(&self.bytes))
218 .field("bytes", &debug_bytes(&self.bytes))
181 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
219 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
182 .field(
220 .field(
183 "user",
221 "user",
184 &debug_bytes(
222 &debug_bytes(
185 &self.bytes[self.manifest_end + 1..self.user_end],
223 &self.bytes[self.manifest_end + 1..self.user_end],
186 ),
224 ),
187 )
225 )
188 .field(
226 .field(
189 "timestamp",
227 "timestamp",
190 &debug_bytes(
228 &debug_bytes(
191 &self.bytes[self.user_end + 1..self.timestamp_end],
229 &self.bytes[self.user_end + 1..self.timestamp_end],
192 ),
230 ),
193 )
231 )
194 .field(
232 .field(
195 "files",
233 "files",
196 &debug_bytes(
234 &debug_bytes(
197 &self.bytes[self.timestamp_end + 1..self.files_end],
235 &self.bytes[self.timestamp_end + 1..self.files_end],
198 ),
236 ),
199 )
237 )
200 .field(
238 .field(
201 "description",
239 "description",
202 &debug_bytes(&self.bytes[self.files_end + 2..]),
240 &debug_bytes(&self.bytes[self.files_end + 2..]),
203 )
241 )
204 .finish()
242 .finish()
205 }
243 }
206 }
244 }
207
245
208 fn debug_bytes(bytes: &[u8]) -> String {
246 fn debug_bytes(bytes: &[u8]) -> String {
209 String::from_utf8_lossy(
247 String::from_utf8_lossy(
210 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
248 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
211 )
249 )
212 .to_string()
250 .to_string()
213 }
251 }
214
252
215 #[cfg(test)]
253 #[cfg(test)]
216 mod tests {
254 mod tests {
217 use super::*;
255 use super::*;
218 use crate::vfs::Vfs;
256 use crate::vfs::Vfs;
219 use crate::NULL_REVISION;
257 use crate::NULL_REVISION;
220 use pretty_assertions::assert_eq;
258 use pretty_assertions::assert_eq;
221
259
222 #[test]
260 #[test]
223 fn test_create_changelogrevisiondata_invalid() {
261 fn test_create_changelogrevisiondata_invalid() {
224 // Completely empty
262 // Completely empty
225 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
263 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
226 // No newline after manifest
264 // No newline after manifest
227 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
265 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
228 // No newline after user
266 // No newline after user
229 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
267 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
230 // No newline after timestamp
268 // No newline after timestamp
231 assert!(
269 assert!(
232 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
270 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
233 );
271 );
234 // Missing newline after files
272 // Missing newline after files
235 assert!(ChangelogRevisionData::new(Cow::Borrowed(
273 assert!(ChangelogRevisionData::new(Cow::Borrowed(
236 b"abcd\n\n0 0\nfile1\nfile2"
274 b"abcd\n\n0 0\nfile1\nfile2"
237 ))
275 ))
238 .is_err(),);
276 .is_err(),);
239 // Only one newline after files
277 // Only one newline after files
240 assert!(ChangelogRevisionData::new(Cow::Borrowed(
278 assert!(ChangelogRevisionData::new(Cow::Borrowed(
241 b"abcd\n\n0 0\nfile1\nfile2\n"
279 b"abcd\n\n0 0\nfile1\nfile2\n"
242 ))
280 ))
243 .is_err(),);
281 .is_err(),);
244 }
282 }
245
283
246 #[test]
284 #[test]
247 fn test_create_changelogrevisiondata() {
285 fn test_create_changelogrevisiondata() {
248 let data = ChangelogRevisionData::new(Cow::Borrowed(
286 let data = ChangelogRevisionData::new(Cow::Borrowed(
249 b"0123456789abcdef0123456789abcdef01234567
287 b"0123456789abcdef0123456789abcdef01234567
250 Some One <someone@example.com>
288 Some One <someone@example.com>
251 0 0
289 0 0
252 file1
290 file1
253 file2
291 file2
254
292
255 some
293 some
256 commit
294 commit
257 message",
295 message",
258 ))
296 ))
259 .unwrap();
297 .unwrap();
260 assert_eq!(
298 assert_eq!(
261 data.manifest_node().unwrap(),
299 data.manifest_node().unwrap(),
262 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
300 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
263 .unwrap()
301 .unwrap()
264 );
302 );
265 assert_eq!(data.user(), b"Some One <someone@example.com>");
303 assert_eq!(data.user(), b"Some One <someone@example.com>");
266 assert_eq!(data.timestamp_line(), b"0 0");
304 assert_eq!(data.timestamp_line(), b"0 0");
267 assert_eq!(
305 assert_eq!(
268 data.files().collect_vec(),
306 data.files().collect_vec(),
269 vec![HgPath::new("file1"), HgPath::new("file2")]
307 vec![HgPath::new("file1"), HgPath::new("file2")]
270 );
308 );
271 assert_eq!(data.description(), b"some\ncommit\nmessage");
309 assert_eq!(data.description(), b"some\ncommit\nmessage");
272 }
310 }
273
311
274 #[test]
312 #[test]
275 fn test_data_from_rev_null() -> Result<(), RevlogError> {
313 fn test_data_from_rev_null() -> Result<(), RevlogError> {
276 // an empty revlog will be enough for this case
314 // an empty revlog will be enough for this case
277 let temp = tempfile::tempdir().unwrap();
315 let temp = tempfile::tempdir().unwrap();
278 let vfs = Vfs { base: temp.path() };
316 let vfs = Vfs { base: temp.path() };
279 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
317 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
280 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
318 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
281
319
282 let changelog = Changelog { revlog };
320 let changelog = Changelog { revlog };
283 assert_eq!(
321 assert_eq!(
284 changelog.data_for_rev(NULL_REVISION)?,
322 changelog.data_for_rev(NULL_REVISION)?,
285 ChangelogRevisionData::null()
323 ChangelogRevisionData::null()
286 );
324 );
287 Ok(())
325 Ok(())
288 }
326 }
289 }
327 }
General Comments 0
You need to be logged in to leave comments. Login now