##// END OF EJS Templates
rust-changelog: don't panic on empty file lists
Arun Kulshreshtha -
r52256:d626e5e7 stable
parent child Browse files
Show More
@@ -1,359 +1,374 b''
1 1 use crate::errors::HgError;
2 2 use crate::revlog::Revision;
3 3 use crate::revlog::{Node, NodePrefix};
4 4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 5 use crate::utils::hg_path::HgPath;
6 6 use crate::vfs::Vfs;
7 7 use crate::{Graph, GraphError, UncheckedRevision};
8 use itertools::Itertools;
8 use itertools::{Either, Itertools};
9 9 use std::ascii::escape_default;
10 10 use std::borrow::Cow;
11 11 use std::fmt::{Debug, Formatter};
12 use std::iter;
12 13
13 14 /// A specialized `Revlog` to work with changelog data format.
14 15 pub struct Changelog {
15 16 /// The generic `revlog` format.
16 17 pub(crate) revlog: Revlog,
17 18 }
18 19
19 20 impl Changelog {
20 21 /// Open the `changelog` of a repository given by its root.
21 22 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
22 23 let revlog =
23 24 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
24 25 Ok(Self { revlog })
25 26 }
26 27
27 28 /// Return the `ChangelogRevisionData` for the given node ID.
28 29 pub fn data_for_node(
29 30 &self,
30 31 node: NodePrefix,
31 32 ) -> Result<ChangelogRevisionData, RevlogError> {
32 33 let rev = self.revlog.rev_from_node(node)?;
33 34 self.entry_for_checked_rev(rev)?.data()
34 35 }
35 36
36 37 /// Return the [`ChangelogEntry`] for the given revision number.
37 38 pub fn entry_for_rev(
38 39 &self,
39 40 rev: UncheckedRevision,
40 41 ) -> Result<ChangelogEntry, RevlogError> {
41 42 let revlog_entry = self.revlog.get_entry(rev)?;
42 43 Ok(ChangelogEntry { revlog_entry })
43 44 }
44 45
45 46 /// Same as [`Self::entry_for_rev`] for checked revisions.
46 47 fn entry_for_checked_rev(
47 48 &self,
48 49 rev: Revision,
49 50 ) -> Result<ChangelogEntry, RevlogError> {
50 51 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
51 52 Ok(ChangelogEntry { revlog_entry })
52 53 }
53 54
54 55 /// Return the [`ChangelogRevisionData`] for the given revision number.
55 56 ///
56 57 /// This is a useful shortcut in case the caller does not need the
57 58 /// generic revlog information (parents, hashes etc). Otherwise
58 59 /// consider taking a [`ChangelogEntry`] with
59 60 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
60 61 pub fn data_for_rev(
61 62 &self,
62 63 rev: UncheckedRevision,
63 64 ) -> Result<ChangelogRevisionData, RevlogError> {
64 65 self.entry_for_rev(rev)?.data()
65 66 }
66 67
67 68 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
68 69 self.revlog.node_from_rev(rev)
69 70 }
70 71
71 72 pub fn rev_from_node(
72 73 &self,
73 74 node: NodePrefix,
74 75 ) -> Result<Revision, RevlogError> {
75 76 self.revlog.rev_from_node(node)
76 77 }
77 78 }
78 79
79 80 impl Graph for Changelog {
80 81 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
81 82 self.revlog.parents(rev)
82 83 }
83 84 }
84 85
85 86 /// A specialized `RevlogEntry` for `changelog` data format
86 87 ///
87 88 /// This is a `RevlogEntry` with the added semantics that the associated
88 89 /// data should meet the requirements for `changelog`, materialized by
89 90 /// the fact that `data()` constructs a `ChangelogRevisionData`.
90 91 /// In case that promise would be broken, the `data` method returns an error.
91 92 #[derive(Clone)]
92 93 pub struct ChangelogEntry<'changelog> {
93 94 /// Same data, as a generic `RevlogEntry`.
94 95 pub(crate) revlog_entry: RevlogEntry<'changelog>,
95 96 }
96 97
97 98 impl<'changelog> ChangelogEntry<'changelog> {
98 99 pub fn data<'a>(
99 100 &'a self,
100 101 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
101 102 let bytes = self.revlog_entry.data()?;
102 103 if bytes.is_empty() {
103 104 Ok(ChangelogRevisionData::null())
104 105 } else {
105 106 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
106 107 RevlogError::Other(HgError::CorruptedRepository(format!(
107 108 "Invalid changelog data for revision {}: {:?}",
108 109 self.revlog_entry.revision(),
109 110 err
110 111 )))
111 112 })?)
112 113 }
113 114 }
114 115
115 116 /// Obtain a reference to the underlying `RevlogEntry`.
116 117 ///
117 118 /// This allows the caller to access the information that is common
118 119 /// to all revlog entries: revision number, node id, parent revisions etc.
119 120 pub fn as_revlog_entry(&self) -> &RevlogEntry {
120 121 &self.revlog_entry
121 122 }
122 123
123 124 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
124 125 Ok(self
125 126 .revlog_entry
126 127 .p1_entry()?
127 128 .map(|revlog_entry| Self { revlog_entry }))
128 129 }
129 130
130 131 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 132 Ok(self
132 133 .revlog_entry
133 134 .p2_entry()?
134 135 .map(|revlog_entry| Self { revlog_entry }))
135 136 }
136 137 }
137 138
138 139 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
139 140 #[derive(PartialEq)]
140 141 pub struct ChangelogRevisionData<'changelog> {
141 142 /// The data bytes of the `changelog` entry.
142 143 bytes: Cow<'changelog, [u8]>,
143 144 /// The end offset for the hex manifest (not including the newline)
144 145 manifest_end: usize,
145 146 /// The end offset for the user+email (not including the newline)
146 147 user_end: usize,
147 148 /// The end offset for the timestamp+timezone+extras (not including the
148 149 /// newline)
149 150 timestamp_end: usize,
150 151 /// The end offset for the file list (not including the newline)
151 152 files_end: usize,
152 153 }
153 154
154 155 impl<'changelog> ChangelogRevisionData<'changelog> {
155 156 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
156 157 let mut line_iter = bytes.split(|b| b == &b'\n');
157 158 let manifest_end = line_iter
158 159 .next()
159 160 .expect("Empty iterator from split()?")
160 161 .len();
161 162 let user_slice = line_iter.next().ok_or_else(|| {
162 163 HgError::corrupted("Changeset data truncated after manifest line")
163 164 })?;
164 165 let user_end = manifest_end + 1 + user_slice.len();
165 166 let timestamp_slice = line_iter.next().ok_or_else(|| {
166 167 HgError::corrupted("Changeset data truncated after user line")
167 168 })?;
168 169 let timestamp_end = user_end + 1 + timestamp_slice.len();
169 170 let mut files_end = timestamp_end + 1;
170 171 loop {
171 172 let line = line_iter.next().ok_or_else(|| {
172 173 HgError::corrupted("Changeset data truncated in files list")
173 174 })?;
174 175 if line.is_empty() {
175 176 if files_end == bytes.len() {
176 177 // The list of files ended with a single newline (there
177 178 // should be two)
178 179 return Err(HgError::corrupted(
179 180 "Changeset data truncated after files list",
180 181 ));
181 182 }
182 183 files_end -= 1;
183 184 break;
184 185 }
185 186 files_end += line.len() + 1;
186 187 }
187 188
188 189 Ok(Self {
189 190 bytes,
190 191 manifest_end,
191 192 user_end,
192 193 timestamp_end,
193 194 files_end,
194 195 })
195 196 }
196 197
197 198 fn null() -> Self {
198 199 Self::new(Cow::Borrowed(
199 200 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
200 201 ))
201 202 .unwrap()
202 203 }
203 204
204 205 /// Return an iterator over the lines of the entry.
205 206 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
206 207 self.bytes.split(|b| b == &b'\n')
207 208 }
208 209
209 210 /// Return the node id of the `manifest` referenced by this `changelog`
210 211 /// entry.
211 212 pub fn manifest_node(&self) -> Result<Node, HgError> {
212 213 let manifest_node_hex = &self.bytes[..self.manifest_end];
213 214 Node::from_hex_for_repo(manifest_node_hex)
214 215 }
215 216
216 217 /// The full user string (usually a name followed by an email enclosed in
217 218 /// angle brackets)
218 219 pub fn user(&self) -> &[u8] {
219 220 &self.bytes[self.manifest_end + 1..self.user_end]
220 221 }
221 222
222 223 /// The full timestamp line (timestamp in seconds, offset in seconds, and
223 224 /// possibly extras)
224 225 // TODO: We should expose this in a more useful way
225 226 pub fn timestamp_line(&self) -> &[u8] {
226 227 &self.bytes[self.user_end + 1..self.timestamp_end]
227 228 }
228 229
229 230 /// The files changed in this revision.
230 231 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
232 if self.timestamp_end == self.files_end {
233 Either::Left(iter::empty())
234 } else {
235 Either::Right(
231 236 self.bytes[self.timestamp_end + 1..self.files_end]
232 237 .split(|b| b == &b'\n')
233 .map(HgPath::new)
238 .map(HgPath::new),
239 )
240 }
234 241 }
235 242
236 243 /// The change description.
237 244 pub fn description(&self) -> &[u8] {
238 245 &self.bytes[self.files_end + 2..]
239 246 }
240 247 }
241 248
242 249 impl Debug for ChangelogRevisionData<'_> {
243 250 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
244 251 f.debug_struct("ChangelogRevisionData")
245 252 .field("bytes", &debug_bytes(&self.bytes))
246 253 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
247 254 .field(
248 255 "user",
249 256 &debug_bytes(
250 257 &self.bytes[self.manifest_end + 1..self.user_end],
251 258 ),
252 259 )
253 260 .field(
254 261 "timestamp",
255 262 &debug_bytes(
256 263 &self.bytes[self.user_end + 1..self.timestamp_end],
257 264 ),
258 265 )
259 266 .field(
260 267 "files",
261 268 &debug_bytes(
262 269 &self.bytes[self.timestamp_end + 1..self.files_end],
263 270 ),
264 271 )
265 272 .field(
266 273 "description",
267 274 &debug_bytes(&self.bytes[self.files_end + 2..]),
268 275 )
269 276 .finish()
270 277 }
271 278 }
272 279
273 280 fn debug_bytes(bytes: &[u8]) -> String {
274 281 String::from_utf8_lossy(
275 282 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
276 283 )
277 284 .to_string()
278 285 }
279 286
280 287 #[cfg(test)]
281 288 mod tests {
282 289 use super::*;
283 290 use crate::vfs::Vfs;
284 291 use crate::NULL_REVISION;
285 292 use pretty_assertions::assert_eq;
286 293
287 294 #[test]
288 295 fn test_create_changelogrevisiondata_invalid() {
289 296 // Completely empty
290 297 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
291 298 // No newline after manifest
292 299 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
293 300 // No newline after user
294 301 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
295 302 // No newline after timestamp
296 303 assert!(
297 304 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
298 305 );
299 306 // Missing newline after files
300 307 assert!(ChangelogRevisionData::new(Cow::Borrowed(
301 308 b"abcd\n\n0 0\nfile1\nfile2"
302 309 ))
303 310 .is_err(),);
304 311 // Only one newline after files
305 312 assert!(ChangelogRevisionData::new(Cow::Borrowed(
306 313 b"abcd\n\n0 0\nfile1\nfile2\n"
307 314 ))
308 315 .is_err(),);
309 316 }
310 317
311 318 #[test]
312 319 fn test_create_changelogrevisiondata() {
313 320 let data = ChangelogRevisionData::new(Cow::Borrowed(
314 321 b"0123456789abcdef0123456789abcdef01234567
315 322 Some One <someone@example.com>
316 323 0 0
317 324 file1
318 325 file2
319 326
320 327 some
321 328 commit
322 329 message",
323 330 ))
324 331 .unwrap();
325 332 assert_eq!(
326 333 data.manifest_node().unwrap(),
327 334 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
328 335 .unwrap()
329 336 );
330 337 assert_eq!(data.user(), b"Some One <someone@example.com>");
331 338 assert_eq!(data.timestamp_line(), b"0 0");
332 339 assert_eq!(
333 340 data.files().collect_vec(),
334 341 vec![HgPath::new("file1"), HgPath::new("file2")]
335 342 );
336 343 assert_eq!(data.description(), b"some\ncommit\nmessage");
337 344 }
338 345
339 346 #[test]
340 347 fn test_data_from_rev_null() -> Result<(), RevlogError> {
341 348 // an empty revlog will be enough for this case
342 349 let temp = tempfile::tempdir().unwrap();
343 350 let vfs = Vfs { base: temp.path() };
344 351 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
345 352 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
346 353
347 354 let changelog = Changelog { revlog };
348 355 assert_eq!(
349 356 changelog.data_for_rev(NULL_REVISION.into())?,
350 357 ChangelogRevisionData::null()
351 358 );
352 359 // same with the intermediate entry object
353 360 assert_eq!(
354 361 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
355 362 ChangelogRevisionData::null()
356 363 );
357 364 Ok(())
358 365 }
366
367 #[test]
368 fn test_empty_files_list() {
369 assert!(ChangelogRevisionData::null()
370 .files()
371 .collect_vec()
372 .is_empty());
359 373 }
374 }
General Comments 0
You need to be logged in to leave comments. Login now