##// END OF EJS Templates
rust-changelog: start parsing changeset data...
Martin von Zweigbergk -
r49938:95da3e99 default
parent child Browse files
Show More
@@ -3,6 +3,10 b' use crate::repo::Repo;'
3 use crate::revlog::revlog::{Revlog, RevlogError};
3 use crate::revlog::revlog::{Revlog, RevlogError};
4 use crate::revlog::Revision;
4 use crate::revlog::Revision;
5 use crate::revlog::{Node, NodePrefix};
5 use crate::revlog::{Node, NodePrefix};
6 use crate::utils::hg_path::HgPath;
7 use itertools::Itertools;
8 use std::ascii::escape_default;
9 use std::fmt::{Debug, Formatter};
6
10
7 /// A specialized `Revlog` to work with `changelog` data format.
11 /// A specialized `Revlog` to work with `changelog` data format.
8 pub struct Changelog {
12 pub struct Changelog {
@@ -35,7 +39,12 b' impl Changelog {'
35 if bytes.is_empty() {
39 if bytes.is_empty() {
36 Ok(ChangelogRevisionData::null())
40 Ok(ChangelogRevisionData::null())
37 } else {
41 } else {
38 Ok(ChangelogRevisionData::new(bytes))
42 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
43 RevlogError::Other(HgError::CorruptedRepository(format!(
44 "Invalid changelog data for revision {}: {:?}",
45 rev, err
46 )))
47 })?)
39 }
48 }
40 }
49 }
41
50
@@ -45,21 +54,69 b' impl Changelog {'
45 }
54 }
46
55
47 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
56 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
48 #[derive(Debug)]
57 #[derive(PartialEq)]
49 pub struct ChangelogRevisionData {
58 pub struct ChangelogRevisionData {
50 /// The data bytes of the `changelog` entry.
59 /// The data bytes of the `changelog` entry.
51 bytes: Vec<u8>,
60 bytes: Vec<u8>,
61 /// The end offset for the hex manifest (not including the newline)
62 manifest_end: usize,
63 /// The end offset for the user+email (not including the newline)
64 user_end: usize,
65 /// The end offset for the timestamp+timezone+extras (not including the
66 /// newline)
67 timestamp_end: usize,
68 /// The end offset for the file list (not including the newline)
69 files_end: usize,
52 }
70 }
53
71
54 impl ChangelogRevisionData {
72 impl ChangelogRevisionData {
55 fn new(bytes: Vec<u8>) -> Self {
73 fn new(bytes: Vec<u8>) -> Result<Self, HgError> {
56 Self { bytes }
74 let mut line_iter = bytes.split(|b| b == &b'\n');
75 let manifest_end = line_iter
76 .next()
77 .expect("Empty iterator from split()?")
78 .len();
79 let user_slice = line_iter.next().ok_or_else(|| {
80 HgError::corrupted("Changeset data truncated after manifest line")
81 })?;
82 let user_end = manifest_end + 1 + user_slice.len();
83 let timestamp_slice = line_iter.next().ok_or_else(|| {
84 HgError::corrupted("Changeset data truncated after user line")
85 })?;
86 let timestamp_end = user_end + 1 + timestamp_slice.len();
87 let mut files_end = timestamp_end + 1;
88 loop {
89 let line = line_iter.next().ok_or_else(|| {
90 HgError::corrupted("Changeset data truncated in files list")
91 })?;
92 if line.is_empty() {
93 if files_end == bytes.len() {
94 // The list of files ended with a single newline (there
95 // should be two)
96 return Err(HgError::corrupted(
97 "Changeset data truncated after files list",
98 ));
99 }
100 files_end -= 1;
101 break;
102 }
103 files_end += line.len() + 1;
104 }
105
106 Ok(Self {
107 bytes,
108 manifest_end,
109 user_end,
110 timestamp_end,
111 files_end,
112 })
57 }
113 }
58
114
59 fn null() -> Self {
115 fn null() -> Self {
60 Self::new(
116 Self::new(
61 b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
117 b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
62 )
118 )
119 .unwrap()
63 }
120 }
64
121
65 /// Return an iterator over the lines of the entry.
122 /// Return an iterator over the lines of the entry.
@@ -70,8 +127,128 b' impl ChangelogRevisionData {'
70 /// Return the node id of the `manifest` referenced by this `changelog`
127 /// Return the node id of the `manifest` referenced by this `changelog`
71 /// entry.
128 /// entry.
72 pub fn manifest_node(&self) -> Result<Node, HgError> {
129 pub fn manifest_node(&self) -> Result<Node, HgError> {
73 let manifest_node_hex =
130 let manifest_node_hex = &self.bytes[..self.manifest_end];
74 self.lines().next().expect("Empty iterator from split()?");
75 Node::from_hex_for_repo(manifest_node_hex)
131 Node::from_hex_for_repo(manifest_node_hex)
76 }
132 }
133
134 /// The full user string (usually a name followed by an email enclosed in
135 /// angle brackets)
136 pub fn user(&self) -> &[u8] {
137 &self.bytes[self.manifest_end + 1..self.user_end]
138 }
139
140 /// The full timestamp line (timestamp in seconds, offset in seconds, and
141 /// possibly extras)
142 // TODO: We should expose this in a more useful way
143 pub fn timestamp_line(&self) -> &[u8] {
144 &self.bytes[self.user_end + 1..self.timestamp_end]
145 }
146
147 /// The files changed in this revision.
148 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
149 self.bytes[self.timestamp_end + 1..self.files_end]
150 .split(|b| b == &b'\n')
151 .map(|path| HgPath::new(path))
152 }
153
154 /// The change description.
155 pub fn description(&self) -> &[u8] {
156 &self.bytes[self.files_end + 2..]
157 }
77 }
158 }
159
160 impl Debug for ChangelogRevisionData {
161 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
162 f.debug_struct("ChangelogRevisionData")
163 .field("bytes", &debug_bytes(&self.bytes))
164 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
165 .field(
166 "user",
167 &debug_bytes(
168 &self.bytes[self.manifest_end + 1..self.user_end],
169 ),
170 )
171 .field(
172 "timestamp",
173 &debug_bytes(
174 &self.bytes[self.user_end + 1..self.timestamp_end],
175 ),
176 )
177 .field(
178 "files",
179 &debug_bytes(
180 &self.bytes[self.timestamp_end + 1..self.files_end],
181 ),
182 )
183 .field(
184 "description",
185 &debug_bytes(&self.bytes[self.files_end + 2..]),
186 )
187 .finish()
188 }
189 }
190
191 fn debug_bytes(bytes: &[u8]) -> String {
192 String::from_utf8_lossy(
193 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
194 )
195 .to_string()
196 }
197
198 #[cfg(test)]
199 mod tests {
200 use super::*;
201 use itertools::Itertools;
202 use pretty_assertions::assert_eq;
203
204 #[test]
205 fn test_create_changelogrevisiondata_invalid() {
206 // Completely empty
207 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
208 // No newline after manifest
209 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
210 // No newline after user
211 assert!(ChangelogRevisionData::new(b"abcd\n".to_vec()).is_err());
212 // No newline after timestamp
213 assert!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()).is_err());
214 // Missing newline after files
215 assert!(ChangelogRevisionData::new(
216 b"abcd\n\n0 0\nfile1\nfile2".to_vec()
217 )
218 .is_err(),);
219 // Only one newline after files
220 assert!(ChangelogRevisionData::new(
221 b"abcd\n\n0 0\nfile1\nfile2\n".to_vec()
222 )
223 .is_err(),);
224 }
225
226 #[test]
227 fn test_create_changelogrevisiondata() {
228 let data = ChangelogRevisionData::new(
229 b"0123456789abcdef0123456789abcdef01234567
230 Some One <someone@example.com>
231 0 0
232 file1
233 file2
234
235 some
236 commit
237 message"
238 .to_vec(),
239 )
240 .unwrap();
241 assert_eq!(
242 data.manifest_node().unwrap(),
243 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
244 .unwrap()
245 );
246 assert_eq!(data.user(), b"Some One <someone@example.com>");
247 assert_eq!(data.timestamp_line(), b"0 0");
248 assert_eq!(
249 data.files().collect_vec(),
250 vec![HgPath::new("file1"), HgPath::new("file2")]
251 );
252 assert_eq!(data.description(), b"some\ncommit\nmessage");
253 }
254 }
General Comments 0
You need to be logged in to leave comments. Login now