##// END OF EJS Templates
rust-changelog: introduce ChangelogEntry parent entries accessors...
Georges Racinet -
r51271:071a6c1d default
parent child Browse files
Show More
@@ -1,327 +1,341
1 1 use crate::errors::HgError;
2 2 use crate::revlog::{Node, NodePrefix};
3 3 use crate::revlog::{Revision, NULL_REVISION};
4 4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 5 use crate::utils::hg_path::HgPath;
6 6 use crate::vfs::Vfs;
7 7 use itertools::Itertools;
8 8 use std::ascii::escape_default;
9 9 use std::borrow::Cow;
10 10 use std::fmt::{Debug, Formatter};
11 11
12 12 /// A specialized `Revlog` to work with changelog data format.
13 13 pub struct Changelog {
14 14 /// The generic `revlog` format.
15 15 pub(crate) revlog: Revlog,
16 16 }
17 17
18 18 impl Changelog {
19 19 /// Open the `changelog` of a repository given by its root.
20 20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 21 let revlog =
22 22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 23 Ok(Self { revlog })
24 24 }
25 25
26 26 /// Return the `ChangelogRevisionData` for the given node ID.
27 27 pub fn data_for_node(
28 28 &self,
29 29 node: NodePrefix,
30 30 ) -> Result<ChangelogRevisionData, RevlogError> {
31 31 let rev = self.revlog.rev_from_node(node)?;
32 32 self.data_for_rev(rev)
33 33 }
34 34
35 35 /// Return the [`ChangelogEntry`] for the given revision number.
36 36 pub fn entry_for_rev(
37 37 &self,
38 38 rev: Revision,
39 39 ) -> Result<ChangelogEntry, RevlogError> {
40 40 let revlog_entry = self.revlog.get_entry(rev)?;
41 41 Ok(ChangelogEntry { revlog_entry })
42 42 }
43 43
44 44 /// Return the [`ChangelogRevisionData`] for the given revision number.
45 45 ///
46 46 /// This is a useful shortcut in case the caller does not need the
47 47 /// generic revlog information (parents, hashes etc). Otherwise
48 48 /// consider taking a [`ChangelogEntry`] with
49 49 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
50 50 pub fn data_for_rev(
51 51 &self,
52 52 rev: Revision,
53 53 ) -> Result<ChangelogRevisionData, RevlogError> {
54 54 if rev == NULL_REVISION {
55 55 return Ok(ChangelogRevisionData::null());
56 56 }
57 57 self.entry_for_rev(rev)?.data()
58 58 }
59 59
60 60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
61 61 self.revlog.node_from_rev(rev)
62 62 }
63 63
64 64 pub fn rev_from_node(
65 65 &self,
66 66 node: NodePrefix,
67 67 ) -> Result<Revision, RevlogError> {
68 68 self.revlog.rev_from_node(node)
69 69 }
70 70 }
71 71
72 72 /// A specialized `RevlogEntry` for `changelog` data format
73 73 ///
74 74 /// This is a `RevlogEntry` with the added semantics that the associated
75 75 /// data should meet the requirements for `changelog`, materialized by
76 76 /// the fact that `data()` constructs a `ChangelogRevisionData`.
77 77 /// In case that promise would be broken, the `data` method returns an error.
78 78 #[derive(Clone)]
79 79 pub struct ChangelogEntry<'changelog> {
80 80 /// Same data, as a generic `RevlogEntry`.
81 81 pub(crate) revlog_entry: RevlogEntry<'changelog>,
82 82 }
83 83
84 84 impl<'changelog> ChangelogEntry<'changelog> {
85 85 pub fn data<'a>(
86 86 &'a self,
87 87 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
88 88 let bytes = self.revlog_entry.data()?;
89 89 if bytes.is_empty() {
90 90 Ok(ChangelogRevisionData::null())
91 91 } else {
92 92 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
93 93 RevlogError::Other(HgError::CorruptedRepository(format!(
94 94 "Invalid changelog data for revision {}: {:?}",
95 95 self.revlog_entry.revision(),
96 96 err
97 97 )))
98 98 })?)
99 99 }
100 100 }
101 101
102 102 /// Obtain a reference to the underlying `RevlogEntry`.
103 103 ///
104 104 /// This allows the caller to access the information that is common
105 105 /// to all revlog entries: revision number, node id, parent revisions etc.
106 106 pub fn as_revlog_entry(&self) -> &RevlogEntry {
107 107 &self.revlog_entry
108 108 }
109
110 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
111 Ok(self
112 .revlog_entry
113 .p1_entry()?
114 .map(|revlog_entry| Self { revlog_entry }))
115 }
116
117 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
118 Ok(self
119 .revlog_entry
120 .p2_entry()?
121 .map(|revlog_entry| Self { revlog_entry }))
122 }
109 123 }
110 124
111 125 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
112 126 #[derive(PartialEq)]
113 127 pub struct ChangelogRevisionData<'changelog> {
114 128 /// The data bytes of the `changelog` entry.
115 129 bytes: Cow<'changelog, [u8]>,
116 130 /// The end offset for the hex manifest (not including the newline)
117 131 manifest_end: usize,
118 132 /// The end offset for the user+email (not including the newline)
119 133 user_end: usize,
120 134 /// The end offset for the timestamp+timezone+extras (not including the
121 135 /// newline)
122 136 timestamp_end: usize,
123 137 /// The end offset for the file list (not including the newline)
124 138 files_end: usize,
125 139 }
126 140
127 141 impl<'changelog> ChangelogRevisionData<'changelog> {
128 142 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
129 143 let mut line_iter = bytes.split(|b| b == &b'\n');
130 144 let manifest_end = line_iter
131 145 .next()
132 146 .expect("Empty iterator from split()?")
133 147 .len();
134 148 let user_slice = line_iter.next().ok_or_else(|| {
135 149 HgError::corrupted("Changeset data truncated after manifest line")
136 150 })?;
137 151 let user_end = manifest_end + 1 + user_slice.len();
138 152 let timestamp_slice = line_iter.next().ok_or_else(|| {
139 153 HgError::corrupted("Changeset data truncated after user line")
140 154 })?;
141 155 let timestamp_end = user_end + 1 + timestamp_slice.len();
142 156 let mut files_end = timestamp_end + 1;
143 157 loop {
144 158 let line = line_iter.next().ok_or_else(|| {
145 159 HgError::corrupted("Changeset data truncated in files list")
146 160 })?;
147 161 if line.is_empty() {
148 162 if files_end == bytes.len() {
149 163 // The list of files ended with a single newline (there
150 164 // should be two)
151 165 return Err(HgError::corrupted(
152 166 "Changeset data truncated after files list",
153 167 ));
154 168 }
155 169 files_end -= 1;
156 170 break;
157 171 }
158 172 files_end += line.len() + 1;
159 173 }
160 174
161 175 Ok(Self {
162 176 bytes,
163 177 manifest_end,
164 178 user_end,
165 179 timestamp_end,
166 180 files_end,
167 181 })
168 182 }
169 183
170 184 fn null() -> Self {
171 185 Self::new(Cow::Borrowed(
172 186 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
173 187 ))
174 188 .unwrap()
175 189 }
176 190
177 191 /// Return an iterator over the lines of the entry.
178 192 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
179 193 self.bytes.split(|b| b == &b'\n')
180 194 }
181 195
182 196 /// Return the node id of the `manifest` referenced by this `changelog`
183 197 /// entry.
184 198 pub fn manifest_node(&self) -> Result<Node, HgError> {
185 199 let manifest_node_hex = &self.bytes[..self.manifest_end];
186 200 Node::from_hex_for_repo(manifest_node_hex)
187 201 }
188 202
189 203 /// The full user string (usually a name followed by an email enclosed in
190 204 /// angle brackets)
191 205 pub fn user(&self) -> &[u8] {
192 206 &self.bytes[self.manifest_end + 1..self.user_end]
193 207 }
194 208
195 209 /// The full timestamp line (timestamp in seconds, offset in seconds, and
196 210 /// possibly extras)
197 211 // TODO: We should expose this in a more useful way
198 212 pub fn timestamp_line(&self) -> &[u8] {
199 213 &self.bytes[self.user_end + 1..self.timestamp_end]
200 214 }
201 215
202 216 /// The files changed in this revision.
203 217 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
204 218 self.bytes[self.timestamp_end + 1..self.files_end]
205 219 .split(|b| b == &b'\n')
206 220 .map(HgPath::new)
207 221 }
208 222
209 223 /// The change description.
210 224 pub fn description(&self) -> &[u8] {
211 225 &self.bytes[self.files_end + 2..]
212 226 }
213 227 }
214 228
215 229 impl Debug for ChangelogRevisionData<'_> {
216 230 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
217 231 f.debug_struct("ChangelogRevisionData")
218 232 .field("bytes", &debug_bytes(&self.bytes))
219 233 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
220 234 .field(
221 235 "user",
222 236 &debug_bytes(
223 237 &self.bytes[self.manifest_end + 1..self.user_end],
224 238 ),
225 239 )
226 240 .field(
227 241 "timestamp",
228 242 &debug_bytes(
229 243 &self.bytes[self.user_end + 1..self.timestamp_end],
230 244 ),
231 245 )
232 246 .field(
233 247 "files",
234 248 &debug_bytes(
235 249 &self.bytes[self.timestamp_end + 1..self.files_end],
236 250 ),
237 251 )
238 252 .field(
239 253 "description",
240 254 &debug_bytes(&self.bytes[self.files_end + 2..]),
241 255 )
242 256 .finish()
243 257 }
244 258 }
245 259
246 260 fn debug_bytes(bytes: &[u8]) -> String {
247 261 String::from_utf8_lossy(
248 262 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
249 263 )
250 264 .to_string()
251 265 }
252 266
253 267 #[cfg(test)]
254 268 mod tests {
255 269 use super::*;
256 270 use crate::vfs::Vfs;
257 271 use crate::NULL_REVISION;
258 272 use pretty_assertions::assert_eq;
259 273
260 274 #[test]
261 275 fn test_create_changelogrevisiondata_invalid() {
262 276 // Completely empty
263 277 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
264 278 // No newline after manifest
265 279 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
266 280 // No newline after user
267 281 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
268 282 // No newline after timestamp
269 283 assert!(
270 284 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
271 285 );
272 286 // Missing newline after files
273 287 assert!(ChangelogRevisionData::new(Cow::Borrowed(
274 288 b"abcd\n\n0 0\nfile1\nfile2"
275 289 ))
276 290 .is_err(),);
277 291 // Only one newline after files
278 292 assert!(ChangelogRevisionData::new(Cow::Borrowed(
279 293 b"abcd\n\n0 0\nfile1\nfile2\n"
280 294 ))
281 295 .is_err(),);
282 296 }
283 297
284 298 #[test]
285 299 fn test_create_changelogrevisiondata() {
286 300 let data = ChangelogRevisionData::new(Cow::Borrowed(
287 301 b"0123456789abcdef0123456789abcdef01234567
288 302 Some One <someone@example.com>
289 303 0 0
290 304 file1
291 305 file2
292 306
293 307 some
294 308 commit
295 309 message",
296 310 ))
297 311 .unwrap();
298 312 assert_eq!(
299 313 data.manifest_node().unwrap(),
300 314 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
301 315 .unwrap()
302 316 );
303 317 assert_eq!(data.user(), b"Some One <someone@example.com>");
304 318 assert_eq!(data.timestamp_line(), b"0 0");
305 319 assert_eq!(
306 320 data.files().collect_vec(),
307 321 vec![HgPath::new("file1"), HgPath::new("file2")]
308 322 );
309 323 assert_eq!(data.description(), b"some\ncommit\nmessage");
310 324 }
311 325
312 326 #[test]
313 327 fn test_data_from_rev_null() -> Result<(), RevlogError> {
314 328 // an empty revlog will be enough for this case
315 329 let temp = tempfile::tempdir().unwrap();
316 330 let vfs = Vfs { base: temp.path() };
317 331 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
318 332 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
319 333
320 334 let changelog = Changelog { revlog };
321 335 assert_eq!(
322 336 changelog.data_for_rev(NULL_REVISION)?,
323 337 ChangelogRevisionData::null()
324 338 );
325 339 Ok(())
326 340 }
327 341 }
General Comments 0
You need to be logged in to leave comments. Login now