Show More
@@ -3,6 +3,10 b' use crate::repo::Repo;' | |||||
3 | use crate::revlog::revlog::{Revlog, RevlogError}; |
|
3 | use crate::revlog::revlog::{Revlog, RevlogError}; | |
4 | use crate::revlog::Revision; |
|
4 | use crate::revlog::Revision; | |
5 | use crate::revlog::{Node, NodePrefix}; |
|
5 | use crate::revlog::{Node, NodePrefix}; | |
|
6 | use crate::utils::hg_path::HgPath; | |||
|
7 | use itertools::Itertools; | |||
|
8 | use std::ascii::escape_default; | |||
|
9 | use std::fmt::{Debug, Formatter}; | |||
6 |
|
10 | |||
7 | /// A specialized `Revlog` to work with `changelog` data format. |
|
11 | /// A specialized `Revlog` to work with `changelog` data format. | |
8 | pub struct Changelog { |
|
12 | pub struct Changelog { | |
@@ -35,7 +39,12 b' impl Changelog {' | |||||
35 | if bytes.is_empty() { |
|
39 | if bytes.is_empty() { | |
36 | Ok(ChangelogRevisionData::null()) |
|
40 | Ok(ChangelogRevisionData::null()) | |
37 | } else { |
|
41 | } else { | |
38 |
Ok(ChangelogRevisionData::new(bytes) |
|
42 | Ok(ChangelogRevisionData::new(bytes).map_err(|err| { | |
|
43 | RevlogError::Other(HgError::CorruptedRepository(format!( | |||
|
44 | "Invalid changelog data for revision {}: {:?}", | |||
|
45 | rev, err | |||
|
46 | ))) | |||
|
47 | })?) | |||
39 | } |
|
48 | } | |
40 | } |
|
49 | } | |
41 |
|
50 | |||
@@ -45,21 +54,69 b' impl Changelog {' | |||||
45 | } |
|
54 | } | |
46 |
|
55 | |||
47 | /// `Changelog` entry which knows how to interpret the `changelog` data bytes. |
|
56 | /// `Changelog` entry which knows how to interpret the `changelog` data bytes. | |
48 |
#[derive( |
|
57 | #[derive(PartialEq)] | |
49 | pub struct ChangelogRevisionData { |
|
58 | pub struct ChangelogRevisionData { | |
50 | /// The data bytes of the `changelog` entry. |
|
59 | /// The data bytes of the `changelog` entry. | |
51 | bytes: Vec<u8>, |
|
60 | bytes: Vec<u8>, | |
|
61 | /// The end offset for the hex manifest (not including the newline) | |||
|
62 | manifest_end: usize, | |||
|
63 | /// The end offset for the user+email (not including the newline) | |||
|
64 | user_end: usize, | |||
|
65 | /// The end offset for the timestamp+timezone+extras (not including the | |||
|
66 | /// newline) | |||
|
67 | timestamp_end: usize, | |||
|
68 | /// The end offset for the file list (not including the newline) | |||
|
69 | files_end: usize, | |||
52 | } |
|
70 | } | |
53 |
|
71 | |||
54 | impl ChangelogRevisionData { |
|
72 | impl ChangelogRevisionData { | |
55 | fn new(bytes: Vec<u8>) -> Self { |
|
73 | fn new(bytes: Vec<u8>) -> Result<Self, HgError> { | |
56 | Self { bytes } |
|
74 | let mut line_iter = bytes.split(|b| b == &b'\n'); | |
|
75 | let manifest_end = line_iter | |||
|
76 | .next() | |||
|
77 | .expect("Empty iterator from split()?") | |||
|
78 | .len(); | |||
|
79 | let user_slice = line_iter.next().ok_or_else(|| { | |||
|
80 | HgError::corrupted("Changeset data truncated after manifest line") | |||
|
81 | })?; | |||
|
82 | let user_end = manifest_end + 1 + user_slice.len(); | |||
|
83 | let timestamp_slice = line_iter.next().ok_or_else(|| { | |||
|
84 | HgError::corrupted("Changeset data truncated after user line") | |||
|
85 | })?; | |||
|
86 | let timestamp_end = user_end + 1 + timestamp_slice.len(); | |||
|
87 | let mut files_end = timestamp_end + 1; | |||
|
88 | loop { | |||
|
89 | let line = line_iter.next().ok_or_else(|| { | |||
|
90 | HgError::corrupted("Changeset data truncated in files list") | |||
|
91 | })?; | |||
|
92 | if line.is_empty() { | |||
|
93 | if files_end == bytes.len() { | |||
|
94 | // The list of files ended with a single newline (there | |||
|
95 | // should be two) | |||
|
96 | return Err(HgError::corrupted( | |||
|
97 | "Changeset data truncated after files list", | |||
|
98 | )); | |||
|
99 | } | |||
|
100 | files_end -= 1; | |||
|
101 | break; | |||
|
102 | } | |||
|
103 | files_end += line.len() + 1; | |||
|
104 | } | |||
|
105 | ||||
|
106 | Ok(Self { | |||
|
107 | bytes, | |||
|
108 | manifest_end, | |||
|
109 | user_end, | |||
|
110 | timestamp_end, | |||
|
111 | files_end, | |||
|
112 | }) | |||
57 | } |
|
113 | } | |
58 |
|
114 | |||
59 | fn null() -> Self { |
|
115 | fn null() -> Self { | |
60 | Self::new( |
|
116 | Self::new( | |
61 | b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(), |
|
117 | b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(), | |
62 | ) |
|
118 | ) | |
|
119 | .unwrap() | |||
63 | } |
|
120 | } | |
64 |
|
121 | |||
65 | /// Return an iterator over the lines of the entry. |
|
122 | /// Return an iterator over the lines of the entry. | |
@@ -70,8 +127,128 b' impl ChangelogRevisionData {' | |||||
70 | /// Return the node id of the `manifest` referenced by this `changelog` |
|
127 | /// Return the node id of the `manifest` referenced by this `changelog` | |
71 | /// entry. |
|
128 | /// entry. | |
72 | pub fn manifest_node(&self) -> Result<Node, HgError> { |
|
129 | pub fn manifest_node(&self) -> Result<Node, HgError> { | |
73 | let manifest_node_hex = |
|
130 | let manifest_node_hex = &self.bytes[..self.manifest_end]; | |
74 | self.lines().next().expect("Empty iterator from split()?"); |
|
|||
75 | Node::from_hex_for_repo(manifest_node_hex) |
|
131 | Node::from_hex_for_repo(manifest_node_hex) | |
76 | } |
|
132 | } | |
|
133 | ||||
|
134 | /// The full user string (usually a name followed by an email enclosed in | |||
|
135 | /// angle brackets) | |||
|
136 | pub fn user(&self) -> &[u8] { | |||
|
137 | &self.bytes[self.manifest_end + 1..self.user_end] | |||
|
138 | } | |||
|
139 | ||||
|
140 | /// The full timestamp line (timestamp in seconds, offset in seconds, and | |||
|
141 | /// possibly extras) | |||
|
142 | // TODO: We should expose this in a more useful way | |||
|
143 | pub fn timestamp_line(&self) -> &[u8] { | |||
|
144 | &self.bytes[self.user_end + 1..self.timestamp_end] | |||
|
145 | } | |||
|
146 | ||||
|
147 | /// The files changed in this revision. | |||
|
148 | pub fn files(&self) -> impl Iterator<Item = &HgPath> { | |||
|
149 | self.bytes[self.timestamp_end + 1..self.files_end] | |||
|
150 | .split(|b| b == &b'\n') | |||
|
151 | .map(|path| HgPath::new(path)) | |||
|
152 | } | |||
|
153 | ||||
|
154 | /// The change description. | |||
|
155 | pub fn description(&self) -> &[u8] { | |||
|
156 | &self.bytes[self.files_end + 2..] | |||
|
157 | } | |||
77 | } |
|
158 | } | |
|
159 | ||||
|
160 | impl Debug for ChangelogRevisionData { | |||
|
161 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | |||
|
162 | f.debug_struct("ChangelogRevisionData") | |||
|
163 | .field("bytes", &debug_bytes(&self.bytes)) | |||
|
164 | .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end])) | |||
|
165 | .field( | |||
|
166 | "user", | |||
|
167 | &debug_bytes( | |||
|
168 | &self.bytes[self.manifest_end + 1..self.user_end], | |||
|
169 | ), | |||
|
170 | ) | |||
|
171 | .field( | |||
|
172 | "timestamp", | |||
|
173 | &debug_bytes( | |||
|
174 | &self.bytes[self.user_end + 1..self.timestamp_end], | |||
|
175 | ), | |||
|
176 | ) | |||
|
177 | .field( | |||
|
178 | "files", | |||
|
179 | &debug_bytes( | |||
|
180 | &self.bytes[self.timestamp_end + 1..self.files_end], | |||
|
181 | ), | |||
|
182 | ) | |||
|
183 | .field( | |||
|
184 | "description", | |||
|
185 | &debug_bytes(&self.bytes[self.files_end + 2..]), | |||
|
186 | ) | |||
|
187 | .finish() | |||
|
188 | } | |||
|
189 | } | |||
|
190 | ||||
|
191 | fn debug_bytes(bytes: &[u8]) -> String { | |||
|
192 | String::from_utf8_lossy( | |||
|
193 | &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(), | |||
|
194 | ) | |||
|
195 | .to_string() | |||
|
196 | } | |||
|
197 | ||||
|
198 | #[cfg(test)] | |||
|
199 | mod tests { | |||
|
200 | use super::*; | |||
|
201 | use itertools::Itertools; | |||
|
202 | use pretty_assertions::assert_eq; | |||
|
203 | ||||
|
204 | #[test] | |||
|
205 | fn test_create_changelogrevisiondata_invalid() { | |||
|
206 | // Completely empty | |||
|
207 | assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err()); | |||
|
208 | // No newline after manifest | |||
|
209 | assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err()); | |||
|
210 | // No newline after user | |||
|
211 | assert!(ChangelogRevisionData::new(b"abcd\n".to_vec()).is_err()); | |||
|
212 | // No newline after timestamp | |||
|
213 | assert!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()).is_err()); | |||
|
214 | // Missing newline after files | |||
|
215 | assert!(ChangelogRevisionData::new( | |||
|
216 | b"abcd\n\n0 0\nfile1\nfile2".to_vec() | |||
|
217 | ) | |||
|
218 | .is_err(),); | |||
|
219 | // Only one newline after files | |||
|
220 | assert!(ChangelogRevisionData::new( | |||
|
221 | b"abcd\n\n0 0\nfile1\nfile2\n".to_vec() | |||
|
222 | ) | |||
|
223 | .is_err(),); | |||
|
224 | } | |||
|
225 | ||||
|
226 | #[test] | |||
|
227 | fn test_create_changelogrevisiondata() { | |||
|
228 | let data = ChangelogRevisionData::new( | |||
|
229 | b"0123456789abcdef0123456789abcdef01234567 | |||
|
230 | Some One <someone@example.com> | |||
|
231 | 0 0 | |||
|
232 | file1 | |||
|
233 | file2 | |||
|
234 | ||||
|
235 | some | |||
|
236 | commit | |||
|
237 | message" | |||
|
238 | .to_vec(), | |||
|
239 | ) | |||
|
240 | .unwrap(); | |||
|
241 | assert_eq!( | |||
|
242 | data.manifest_node().unwrap(), | |||
|
243 | Node::from_hex("0123456789abcdef0123456789abcdef01234567") | |||
|
244 | .unwrap() | |||
|
245 | ); | |||
|
246 | assert_eq!(data.user(), b"Some One <someone@example.com>"); | |||
|
247 | assert_eq!(data.timestamp_line(), b"0 0"); | |||
|
248 | assert_eq!( | |||
|
249 | data.files().collect_vec(), | |||
|
250 | vec![HgPath::new("file1"), HgPath::new("file2")] | |||
|
251 | ); | |||
|
252 | assert_eq!(data.description(), b"some\ncommit\nmessage"); | |||
|
253 | } | |||
|
254 | } |
General Comments 0
You need to be logged in to leave comments.
Login now