##// END OF EJS Templates
rust-changelog: introduce ChangelogEntry parent entries accessors...
Georges Racinet -
r51271:071a6c1d default
parent child Browse files
Show More
@@ -1,327 +1,341
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::{Node, NodePrefix};
2 use crate::revlog::{Node, NodePrefix};
3 use crate::revlog::{Revision, NULL_REVISION};
3 use crate::revlog::{Revision, NULL_REVISION};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 use crate::utils::hg_path::HgPath;
5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use itertools::Itertools;
7 use itertools::Itertools;
8 use std::ascii::escape_default;
8 use std::ascii::escape_default;
9 use std::borrow::Cow;
9 use std::borrow::Cow;
10 use std::fmt::{Debug, Formatter};
10 use std::fmt::{Debug, Formatter};
11
11
12 /// A specialized `Revlog` to work with changelog data format.
12 /// A specialized `Revlog` to work with changelog data format.
13 pub struct Changelog {
13 pub struct Changelog {
14 /// The generic `revlog` format.
14 /// The generic `revlog` format.
15 pub(crate) revlog: Revlog,
15 pub(crate) revlog: Revlog,
16 }
16 }
17
17
18 impl Changelog {
18 impl Changelog {
19 /// Open the `changelog` of a repository given by its root.
19 /// Open the `changelog` of a repository given by its root.
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 let revlog =
21 let revlog =
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 Ok(Self { revlog })
23 Ok(Self { revlog })
24 }
24 }
25
25
26 /// Return the `ChangelogRevisionData` for the given node ID.
26 /// Return the `ChangelogRevisionData` for the given node ID.
27 pub fn data_for_node(
27 pub fn data_for_node(
28 &self,
28 &self,
29 node: NodePrefix,
29 node: NodePrefix,
30 ) -> Result<ChangelogRevisionData, RevlogError> {
30 ) -> Result<ChangelogRevisionData, RevlogError> {
31 let rev = self.revlog.rev_from_node(node)?;
31 let rev = self.revlog.rev_from_node(node)?;
32 self.data_for_rev(rev)
32 self.data_for_rev(rev)
33 }
33 }
34
34
35 /// Return the [`ChangelogEntry`] for the given revision number.
35 /// Return the [`ChangelogEntry`] for the given revision number.
36 pub fn entry_for_rev(
36 pub fn entry_for_rev(
37 &self,
37 &self,
38 rev: Revision,
38 rev: Revision,
39 ) -> Result<ChangelogEntry, RevlogError> {
39 ) -> Result<ChangelogEntry, RevlogError> {
40 let revlog_entry = self.revlog.get_entry(rev)?;
40 let revlog_entry = self.revlog.get_entry(rev)?;
41 Ok(ChangelogEntry { revlog_entry })
41 Ok(ChangelogEntry { revlog_entry })
42 }
42 }
43
43
44 /// Return the [`ChangelogRevisionData`] for the given revision number.
44 /// Return the [`ChangelogRevisionData`] for the given revision number.
45 ///
45 ///
46 /// This is a useful shortcut in case the caller does not need the
46 /// This is a useful shortcut in case the caller does not need the
47 /// generic revlog information (parents, hashes etc). Otherwise
47 /// generic revlog information (parents, hashes etc). Otherwise
48 /// consider taking a [`ChangelogEntry`] with
48 /// consider taking a [`ChangelogEntry`] with
49 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
49 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
50 pub fn data_for_rev(
50 pub fn data_for_rev(
51 &self,
51 &self,
52 rev: Revision,
52 rev: Revision,
53 ) -> Result<ChangelogRevisionData, RevlogError> {
53 ) -> Result<ChangelogRevisionData, RevlogError> {
54 if rev == NULL_REVISION {
54 if rev == NULL_REVISION {
55 return Ok(ChangelogRevisionData::null());
55 return Ok(ChangelogRevisionData::null());
56 }
56 }
57 self.entry_for_rev(rev)?.data()
57 self.entry_for_rev(rev)?.data()
58 }
58 }
59
59
60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
61 self.revlog.node_from_rev(rev)
61 self.revlog.node_from_rev(rev)
62 }
62 }
63
63
64 pub fn rev_from_node(
64 pub fn rev_from_node(
65 &self,
65 &self,
66 node: NodePrefix,
66 node: NodePrefix,
67 ) -> Result<Revision, RevlogError> {
67 ) -> Result<Revision, RevlogError> {
68 self.revlog.rev_from_node(node)
68 self.revlog.rev_from_node(node)
69 }
69 }
70 }
70 }
71
71
72 /// A specialized `RevlogEntry` for `changelog` data format
72 /// A specialized `RevlogEntry` for `changelog` data format
73 ///
73 ///
74 /// This is a `RevlogEntry` with the added semantics that the associated
74 /// This is a `RevlogEntry` with the added semantics that the associated
75 /// data should meet the requirements for `changelog`, materialized by
75 /// data should meet the requirements for `changelog`, materialized by
76 /// the fact that `data()` constructs a `ChangelogRevisionData`.
76 /// the fact that `data()` constructs a `ChangelogRevisionData`.
77 /// In case that promise would be broken, the `data` method returns an error.
77 /// In case that promise would be broken, the `data` method returns an error.
78 #[derive(Clone)]
78 #[derive(Clone)]
79 pub struct ChangelogEntry<'changelog> {
79 pub struct ChangelogEntry<'changelog> {
80 /// Same data, as a generic `RevlogEntry`.
80 /// Same data, as a generic `RevlogEntry`.
81 pub(crate) revlog_entry: RevlogEntry<'changelog>,
81 pub(crate) revlog_entry: RevlogEntry<'changelog>,
82 }
82 }
83
83
84 impl<'changelog> ChangelogEntry<'changelog> {
84 impl<'changelog> ChangelogEntry<'changelog> {
85 pub fn data<'a>(
85 pub fn data<'a>(
86 &'a self,
86 &'a self,
87 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
87 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
88 let bytes = self.revlog_entry.data()?;
88 let bytes = self.revlog_entry.data()?;
89 if bytes.is_empty() {
89 if bytes.is_empty() {
90 Ok(ChangelogRevisionData::null())
90 Ok(ChangelogRevisionData::null())
91 } else {
91 } else {
92 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
92 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
93 RevlogError::Other(HgError::CorruptedRepository(format!(
93 RevlogError::Other(HgError::CorruptedRepository(format!(
94 "Invalid changelog data for revision {}: {:?}",
94 "Invalid changelog data for revision {}: {:?}",
95 self.revlog_entry.revision(),
95 self.revlog_entry.revision(),
96 err
96 err
97 )))
97 )))
98 })?)
98 })?)
99 }
99 }
100 }
100 }
101
101
102 /// Obtain a reference to the underlying `RevlogEntry`.
102 /// Obtain a reference to the underlying `RevlogEntry`.
103 ///
103 ///
104 /// This allows the caller to access the information that is common
104 /// This allows the caller to access the information that is common
105 /// to all revlog entries: revision number, node id, parent revisions etc.
105 /// to all revlog entries: revision number, node id, parent revisions etc.
106 pub fn as_revlog_entry(&self) -> &RevlogEntry {
106 pub fn as_revlog_entry(&self) -> &RevlogEntry {
107 &self.revlog_entry
107 &self.revlog_entry
108 }
108 }
109
110 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
111 Ok(self
112 .revlog_entry
113 .p1_entry()?
114 .map(|revlog_entry| Self { revlog_entry }))
115 }
116
117 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
118 Ok(self
119 .revlog_entry
120 .p2_entry()?
121 .map(|revlog_entry| Self { revlog_entry }))
122 }
109 }
123 }
110
124
111 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
125 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
112 #[derive(PartialEq)]
126 #[derive(PartialEq)]
113 pub struct ChangelogRevisionData<'changelog> {
127 pub struct ChangelogRevisionData<'changelog> {
114 /// The data bytes of the `changelog` entry.
128 /// The data bytes of the `changelog` entry.
115 bytes: Cow<'changelog, [u8]>,
129 bytes: Cow<'changelog, [u8]>,
116 /// The end offset for the hex manifest (not including the newline)
130 /// The end offset for the hex manifest (not including the newline)
117 manifest_end: usize,
131 manifest_end: usize,
118 /// The end offset for the user+email (not including the newline)
132 /// The end offset for the user+email (not including the newline)
119 user_end: usize,
133 user_end: usize,
120 /// The end offset for the timestamp+timezone+extras (not including the
134 /// The end offset for the timestamp+timezone+extras (not including the
121 /// newline)
135 /// newline)
122 timestamp_end: usize,
136 timestamp_end: usize,
123 /// The end offset for the file list (not including the newline)
137 /// The end offset for the file list (not including the newline)
124 files_end: usize,
138 files_end: usize,
125 }
139 }
126
140
127 impl<'changelog> ChangelogRevisionData<'changelog> {
141 impl<'changelog> ChangelogRevisionData<'changelog> {
128 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
142 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
129 let mut line_iter = bytes.split(|b| b == &b'\n');
143 let mut line_iter = bytes.split(|b| b == &b'\n');
130 let manifest_end = line_iter
144 let manifest_end = line_iter
131 .next()
145 .next()
132 .expect("Empty iterator from split()?")
146 .expect("Empty iterator from split()?")
133 .len();
147 .len();
134 let user_slice = line_iter.next().ok_or_else(|| {
148 let user_slice = line_iter.next().ok_or_else(|| {
135 HgError::corrupted("Changeset data truncated after manifest line")
149 HgError::corrupted("Changeset data truncated after manifest line")
136 })?;
150 })?;
137 let user_end = manifest_end + 1 + user_slice.len();
151 let user_end = manifest_end + 1 + user_slice.len();
138 let timestamp_slice = line_iter.next().ok_or_else(|| {
152 let timestamp_slice = line_iter.next().ok_or_else(|| {
139 HgError::corrupted("Changeset data truncated after user line")
153 HgError::corrupted("Changeset data truncated after user line")
140 })?;
154 })?;
141 let timestamp_end = user_end + 1 + timestamp_slice.len();
155 let timestamp_end = user_end + 1 + timestamp_slice.len();
142 let mut files_end = timestamp_end + 1;
156 let mut files_end = timestamp_end + 1;
143 loop {
157 loop {
144 let line = line_iter.next().ok_or_else(|| {
158 let line = line_iter.next().ok_or_else(|| {
145 HgError::corrupted("Changeset data truncated in files list")
159 HgError::corrupted("Changeset data truncated in files list")
146 })?;
160 })?;
147 if line.is_empty() {
161 if line.is_empty() {
148 if files_end == bytes.len() {
162 if files_end == bytes.len() {
149 // The list of files ended with a single newline (there
163 // The list of files ended with a single newline (there
150 // should be two)
164 // should be two)
151 return Err(HgError::corrupted(
165 return Err(HgError::corrupted(
152 "Changeset data truncated after files list",
166 "Changeset data truncated after files list",
153 ));
167 ));
154 }
168 }
155 files_end -= 1;
169 files_end -= 1;
156 break;
170 break;
157 }
171 }
158 files_end += line.len() + 1;
172 files_end += line.len() + 1;
159 }
173 }
160
174
161 Ok(Self {
175 Ok(Self {
162 bytes,
176 bytes,
163 manifest_end,
177 manifest_end,
164 user_end,
178 user_end,
165 timestamp_end,
179 timestamp_end,
166 files_end,
180 files_end,
167 })
181 })
168 }
182 }
169
183
170 fn null() -> Self {
184 fn null() -> Self {
171 Self::new(Cow::Borrowed(
185 Self::new(Cow::Borrowed(
172 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
186 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
173 ))
187 ))
174 .unwrap()
188 .unwrap()
175 }
189 }
176
190
177 /// Return an iterator over the lines of the entry.
191 /// Return an iterator over the lines of the entry.
178 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
192 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
179 self.bytes.split(|b| b == &b'\n')
193 self.bytes.split(|b| b == &b'\n')
180 }
194 }
181
195
182 /// Return the node id of the `manifest` referenced by this `changelog`
196 /// Return the node id of the `manifest` referenced by this `changelog`
183 /// entry.
197 /// entry.
184 pub fn manifest_node(&self) -> Result<Node, HgError> {
198 pub fn manifest_node(&self) -> Result<Node, HgError> {
185 let manifest_node_hex = &self.bytes[..self.manifest_end];
199 let manifest_node_hex = &self.bytes[..self.manifest_end];
186 Node::from_hex_for_repo(manifest_node_hex)
200 Node::from_hex_for_repo(manifest_node_hex)
187 }
201 }
188
202
189 /// The full user string (usually a name followed by an email enclosed in
203 /// The full user string (usually a name followed by an email enclosed in
190 /// angle brackets)
204 /// angle brackets)
191 pub fn user(&self) -> &[u8] {
205 pub fn user(&self) -> &[u8] {
192 &self.bytes[self.manifest_end + 1..self.user_end]
206 &self.bytes[self.manifest_end + 1..self.user_end]
193 }
207 }
194
208
195 /// The full timestamp line (timestamp in seconds, offset in seconds, and
209 /// The full timestamp line (timestamp in seconds, offset in seconds, and
196 /// possibly extras)
210 /// possibly extras)
197 // TODO: We should expose this in a more useful way
211 // TODO: We should expose this in a more useful way
198 pub fn timestamp_line(&self) -> &[u8] {
212 pub fn timestamp_line(&self) -> &[u8] {
199 &self.bytes[self.user_end + 1..self.timestamp_end]
213 &self.bytes[self.user_end + 1..self.timestamp_end]
200 }
214 }
201
215
202 /// The files changed in this revision.
216 /// The files changed in this revision.
203 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
217 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
204 self.bytes[self.timestamp_end + 1..self.files_end]
218 self.bytes[self.timestamp_end + 1..self.files_end]
205 .split(|b| b == &b'\n')
219 .split(|b| b == &b'\n')
206 .map(HgPath::new)
220 .map(HgPath::new)
207 }
221 }
208
222
209 /// The change description.
223 /// The change description.
210 pub fn description(&self) -> &[u8] {
224 pub fn description(&self) -> &[u8] {
211 &self.bytes[self.files_end + 2..]
225 &self.bytes[self.files_end + 2..]
212 }
226 }
213 }
227 }
214
228
215 impl Debug for ChangelogRevisionData<'_> {
229 impl Debug for ChangelogRevisionData<'_> {
216 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
230 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
217 f.debug_struct("ChangelogRevisionData")
231 f.debug_struct("ChangelogRevisionData")
218 .field("bytes", &debug_bytes(&self.bytes))
232 .field("bytes", &debug_bytes(&self.bytes))
219 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
233 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
220 .field(
234 .field(
221 "user",
235 "user",
222 &debug_bytes(
236 &debug_bytes(
223 &self.bytes[self.manifest_end + 1..self.user_end],
237 &self.bytes[self.manifest_end + 1..self.user_end],
224 ),
238 ),
225 )
239 )
226 .field(
240 .field(
227 "timestamp",
241 "timestamp",
228 &debug_bytes(
242 &debug_bytes(
229 &self.bytes[self.user_end + 1..self.timestamp_end],
243 &self.bytes[self.user_end + 1..self.timestamp_end],
230 ),
244 ),
231 )
245 )
232 .field(
246 .field(
233 "files",
247 "files",
234 &debug_bytes(
248 &debug_bytes(
235 &self.bytes[self.timestamp_end + 1..self.files_end],
249 &self.bytes[self.timestamp_end + 1..self.files_end],
236 ),
250 ),
237 )
251 )
238 .field(
252 .field(
239 "description",
253 "description",
240 &debug_bytes(&self.bytes[self.files_end + 2..]),
254 &debug_bytes(&self.bytes[self.files_end + 2..]),
241 )
255 )
242 .finish()
256 .finish()
243 }
257 }
244 }
258 }
245
259
246 fn debug_bytes(bytes: &[u8]) -> String {
260 fn debug_bytes(bytes: &[u8]) -> String {
247 String::from_utf8_lossy(
261 String::from_utf8_lossy(
248 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
262 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
249 )
263 )
250 .to_string()
264 .to_string()
251 }
265 }
252
266
253 #[cfg(test)]
267 #[cfg(test)]
254 mod tests {
268 mod tests {
255 use super::*;
269 use super::*;
256 use crate::vfs::Vfs;
270 use crate::vfs::Vfs;
257 use crate::NULL_REVISION;
271 use crate::NULL_REVISION;
258 use pretty_assertions::assert_eq;
272 use pretty_assertions::assert_eq;
259
273
260 #[test]
274 #[test]
261 fn test_create_changelogrevisiondata_invalid() {
275 fn test_create_changelogrevisiondata_invalid() {
262 // Completely empty
276 // Completely empty
263 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
277 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
264 // No newline after manifest
278 // No newline after manifest
265 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
279 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
266 // No newline after user
280 // No newline after user
267 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
281 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
268 // No newline after timestamp
282 // No newline after timestamp
269 assert!(
283 assert!(
270 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
284 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
271 );
285 );
272 // Missing newline after files
286 // Missing newline after files
273 assert!(ChangelogRevisionData::new(Cow::Borrowed(
287 assert!(ChangelogRevisionData::new(Cow::Borrowed(
274 b"abcd\n\n0 0\nfile1\nfile2"
288 b"abcd\n\n0 0\nfile1\nfile2"
275 ))
289 ))
276 .is_err(),);
290 .is_err(),);
277 // Only one newline after files
291 // Only one newline after files
278 assert!(ChangelogRevisionData::new(Cow::Borrowed(
292 assert!(ChangelogRevisionData::new(Cow::Borrowed(
279 b"abcd\n\n0 0\nfile1\nfile2\n"
293 b"abcd\n\n0 0\nfile1\nfile2\n"
280 ))
294 ))
281 .is_err(),);
295 .is_err(),);
282 }
296 }
283
297
284 #[test]
298 #[test]
285 fn test_create_changelogrevisiondata() {
299 fn test_create_changelogrevisiondata() {
286 let data = ChangelogRevisionData::new(Cow::Borrowed(
300 let data = ChangelogRevisionData::new(Cow::Borrowed(
287 b"0123456789abcdef0123456789abcdef01234567
301 b"0123456789abcdef0123456789abcdef01234567
288 Some One <someone@example.com>
302 Some One <someone@example.com>
289 0 0
303 0 0
290 file1
304 file1
291 file2
305 file2
292
306
293 some
307 some
294 commit
308 commit
295 message",
309 message",
296 ))
310 ))
297 .unwrap();
311 .unwrap();
298 assert_eq!(
312 assert_eq!(
299 data.manifest_node().unwrap(),
313 data.manifest_node().unwrap(),
300 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
314 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
301 .unwrap()
315 .unwrap()
302 );
316 );
303 assert_eq!(data.user(), b"Some One <someone@example.com>");
317 assert_eq!(data.user(), b"Some One <someone@example.com>");
304 assert_eq!(data.timestamp_line(), b"0 0");
318 assert_eq!(data.timestamp_line(), b"0 0");
305 assert_eq!(
319 assert_eq!(
306 data.files().collect_vec(),
320 data.files().collect_vec(),
307 vec![HgPath::new("file1"), HgPath::new("file2")]
321 vec![HgPath::new("file1"), HgPath::new("file2")]
308 );
322 );
309 assert_eq!(data.description(), b"some\ncommit\nmessage");
323 assert_eq!(data.description(), b"some\ncommit\nmessage");
310 }
324 }
311
325
312 #[test]
326 #[test]
313 fn test_data_from_rev_null() -> Result<(), RevlogError> {
327 fn test_data_from_rev_null() -> Result<(), RevlogError> {
314 // an empty revlog will be enough for this case
328 // an empty revlog will be enough for this case
315 let temp = tempfile::tempdir().unwrap();
329 let temp = tempfile::tempdir().unwrap();
316 let vfs = Vfs { base: temp.path() };
330 let vfs = Vfs { base: temp.path() };
317 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
331 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
318 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
332 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
319
333
320 let changelog = Changelog { revlog };
334 let changelog = Changelog { revlog };
321 assert_eq!(
335 assert_eq!(
322 changelog.data_for_rev(NULL_REVISION)?,
336 changelog.data_for_rev(NULL_REVISION)?,
323 ChangelogRevisionData::null()
337 ChangelogRevisionData::null()
324 );
338 );
325 Ok(())
339 Ok(())
326 }
340 }
327 }
341 }
General Comments 0
You need to be logged in to leave comments. Login now