##// END OF EJS Templates
changelog: avoid copying changeset data into `ChangesetRevisionData`...
Martin von Zweigbergk -
r49987:07ec9f4f default
parent child Browse files
Show More
@@ -1,269 +1,271 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::revlog::{Revlog, RevlogEntry, RevlogError};
2 use crate::revlog::revlog::{Revlog, RevlogEntry, RevlogError};
3 use crate::revlog::Revision;
3 use crate::revlog::Revision;
4 use crate::revlog::{Node, NodePrefix};
4 use crate::revlog::{Node, NodePrefix};
5 use crate::utils::hg_path::HgPath;
5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use itertools::Itertools;
7 use itertools::Itertools;
8 use std::ascii::escape_default;
8 use std::ascii::escape_default;
9 use std::borrow::Cow;
9 use std::fmt::{Debug, Formatter};
10 use std::fmt::{Debug, Formatter};
10
11
11 /// A specialized `Revlog` to work with `changelog` data format.
12 /// A specialized `Revlog` to work with `changelog` data format.
12 pub struct Changelog {
13 pub struct Changelog {
13 /// The generic `revlog` format.
14 /// The generic `revlog` format.
14 pub(crate) revlog: Revlog,
15 pub(crate) revlog: Revlog,
15 }
16 }
16
17
17 impl Changelog {
18 impl Changelog {
18 /// Open the `changelog` of a repository given by its root.
19 /// Open the `changelog` of a repository given by its root.
19 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
20 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
20 let revlog =
21 let revlog =
21 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
22 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
22 Ok(Self { revlog })
23 Ok(Self { revlog })
23 }
24 }
24
25
25 /// Return the `ChangelogEntry` for the given node ID.
26 /// Return the `ChangelogEntry` for the given node ID.
26 pub fn data_for_node(
27 pub fn data_for_node(
27 &self,
28 &self,
28 node: NodePrefix,
29 node: NodePrefix,
29 ) -> Result<ChangelogRevisionData, RevlogError> {
30 ) -> Result<ChangelogRevisionData, RevlogError> {
30 let rev = self.revlog.rev_from_node(node)?;
31 let rev = self.revlog.rev_from_node(node)?;
31 self.data_for_rev(rev)
32 self.data_for_rev(rev)
32 }
33 }
33
34
34 /// Return the `RevlogEntry` of the given revision number.
35 /// Return the `RevlogEntry` of the given revision number.
35 pub fn entry_for_rev(
36 pub fn entry_for_rev(
36 &self,
37 &self,
37 rev: Revision,
38 rev: Revision,
38 ) -> Result<RevlogEntry, RevlogError> {
39 ) -> Result<RevlogEntry, RevlogError> {
39 self.revlog.get_entry(rev)
40 self.revlog.get_entry(rev)
40 }
41 }
41
42
42 /// Return the `ChangelogEntry` of the given revision number.
43 /// Return the `ChangelogEntry` of the given revision number.
43 pub fn data_for_rev(
44 pub fn data_for_rev(
44 &self,
45 &self,
45 rev: Revision,
46 rev: Revision,
46 ) -> Result<ChangelogRevisionData, RevlogError> {
47 ) -> Result<ChangelogRevisionData, RevlogError> {
47 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
48 let bytes = self.revlog.get_rev_data(rev)?;
48 if bytes.is_empty() {
49 if bytes.is_empty() {
49 Ok(ChangelogRevisionData::null())
50 Ok(ChangelogRevisionData::null())
50 } else {
51 } else {
51 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
52 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
52 RevlogError::Other(HgError::CorruptedRepository(format!(
53 RevlogError::Other(HgError::CorruptedRepository(format!(
53 "Invalid changelog data for revision {}: {:?}",
54 "Invalid changelog data for revision {}: {:?}",
54 rev, err
55 rev, err
55 )))
56 )))
56 })?)
57 })?)
57 }
58 }
58 }
59 }
59
60
60 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
61 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
61 self.revlog.node_from_rev(rev)
62 self.revlog.node_from_rev(rev)
62 }
63 }
63
64
64 pub fn rev_from_node(
65 pub fn rev_from_node(
65 &self,
66 &self,
66 node: NodePrefix,
67 node: NodePrefix,
67 ) -> Result<Revision, RevlogError> {
68 ) -> Result<Revision, RevlogError> {
68 self.revlog.rev_from_node(node)
69 self.revlog.rev_from_node(node)
69 }
70 }
70 }
71 }
71
72
72 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
73 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
73 #[derive(PartialEq)]
74 #[derive(PartialEq)]
74 pub struct ChangelogRevisionData {
75 pub struct ChangelogRevisionData<'changelog> {
75 /// The data bytes of the `changelog` entry.
76 /// The data bytes of the `changelog` entry.
76 bytes: Vec<u8>,
77 bytes: Cow<'changelog, [u8]>,
77 /// The end offset for the hex manifest (not including the newline)
78 /// The end offset for the hex manifest (not including the newline)
78 manifest_end: usize,
79 manifest_end: usize,
79 /// The end offset for the user+email (not including the newline)
80 /// The end offset for the user+email (not including the newline)
80 user_end: usize,
81 user_end: usize,
81 /// The end offset for the timestamp+timezone+extras (not including the
82 /// The end offset for the timestamp+timezone+extras (not including the
82 /// newline)
83 /// newline)
83 timestamp_end: usize,
84 timestamp_end: usize,
84 /// The end offset for the file list (not including the newline)
85 /// The end offset for the file list (not including the newline)
85 files_end: usize,
86 files_end: usize,
86 }
87 }
87
88
88 impl ChangelogRevisionData {
89 impl<'changelog> ChangelogRevisionData<'changelog> {
89 fn new(bytes: Vec<u8>) -> Result<Self, HgError> {
90 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
90 let mut line_iter = bytes.split(|b| b == &b'\n');
91 let mut line_iter = bytes.split(|b| b == &b'\n');
91 let manifest_end = line_iter
92 let manifest_end = line_iter
92 .next()
93 .next()
93 .expect("Empty iterator from split()?")
94 .expect("Empty iterator from split()?")
94 .len();
95 .len();
95 let user_slice = line_iter.next().ok_or_else(|| {
96 let user_slice = line_iter.next().ok_or_else(|| {
96 HgError::corrupted("Changeset data truncated after manifest line")
97 HgError::corrupted("Changeset data truncated after manifest line")
97 })?;
98 })?;
98 let user_end = manifest_end + 1 + user_slice.len();
99 let user_end = manifest_end + 1 + user_slice.len();
99 let timestamp_slice = line_iter.next().ok_or_else(|| {
100 let timestamp_slice = line_iter.next().ok_or_else(|| {
100 HgError::corrupted("Changeset data truncated after user line")
101 HgError::corrupted("Changeset data truncated after user line")
101 })?;
102 })?;
102 let timestamp_end = user_end + 1 + timestamp_slice.len();
103 let timestamp_end = user_end + 1 + timestamp_slice.len();
103 let mut files_end = timestamp_end + 1;
104 let mut files_end = timestamp_end + 1;
104 loop {
105 loop {
105 let line = line_iter.next().ok_or_else(|| {
106 let line = line_iter.next().ok_or_else(|| {
106 HgError::corrupted("Changeset data truncated in files list")
107 HgError::corrupted("Changeset data truncated in files list")
107 })?;
108 })?;
108 if line.is_empty() {
109 if line.is_empty() {
109 if files_end == bytes.len() {
110 if files_end == bytes.len() {
110 // The list of files ended with a single newline (there
111 // The list of files ended with a single newline (there
111 // should be two)
112 // should be two)
112 return Err(HgError::corrupted(
113 return Err(HgError::corrupted(
113 "Changeset data truncated after files list",
114 "Changeset data truncated after files list",
114 ));
115 ));
115 }
116 }
116 files_end -= 1;
117 files_end -= 1;
117 break;
118 break;
118 }
119 }
119 files_end += line.len() + 1;
120 files_end += line.len() + 1;
120 }
121 }
121
122
122 Ok(Self {
123 Ok(Self {
123 bytes,
124 bytes,
124 manifest_end,
125 manifest_end,
125 user_end,
126 user_end,
126 timestamp_end,
127 timestamp_end,
127 files_end,
128 files_end,
128 })
129 })
129 }
130 }
130
131
131 fn null() -> Self {
132 fn null() -> Self {
132 Self::new(
133 Self::new(Cow::Borrowed(
133 b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
134 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
134 )
135 ))
135 .unwrap()
136 .unwrap()
136 }
137 }
137
138
138 /// Return an iterator over the lines of the entry.
139 /// Return an iterator over the lines of the entry.
139 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
140 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
140 self.bytes.split(|b| b == &b'\n')
141 self.bytes.split(|b| b == &b'\n')
141 }
142 }
142
143
143 /// Return the node id of the `manifest` referenced by this `changelog`
144 /// Return the node id of the `manifest` referenced by this `changelog`
144 /// entry.
145 /// entry.
145 pub fn manifest_node(&self) -> Result<Node, HgError> {
146 pub fn manifest_node(&self) -> Result<Node, HgError> {
146 let manifest_node_hex = &self.bytes[..self.manifest_end];
147 let manifest_node_hex = &self.bytes[..self.manifest_end];
147 Node::from_hex_for_repo(manifest_node_hex)
148 Node::from_hex_for_repo(manifest_node_hex)
148 }
149 }
149
150
150 /// The full user string (usually a name followed by an email enclosed in
151 /// The full user string (usually a name followed by an email enclosed in
151 /// angle brackets)
152 /// angle brackets)
152 pub fn user(&self) -> &[u8] {
153 pub fn user(&self) -> &[u8] {
153 &self.bytes[self.manifest_end + 1..self.user_end]
154 &self.bytes[self.manifest_end + 1..self.user_end]
154 }
155 }
155
156
156 /// The full timestamp line (timestamp in seconds, offset in seconds, and
157 /// The full timestamp line (timestamp in seconds, offset in seconds, and
157 /// possibly extras)
158 /// possibly extras)
158 // TODO: We should expose this in a more useful way
159 // TODO: We should expose this in a more useful way
159 pub fn timestamp_line(&self) -> &[u8] {
160 pub fn timestamp_line(&self) -> &[u8] {
160 &self.bytes[self.user_end + 1..self.timestamp_end]
161 &self.bytes[self.user_end + 1..self.timestamp_end]
161 }
162 }
162
163
163 /// The files changed in this revision.
164 /// The files changed in this revision.
164 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
165 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
165 self.bytes[self.timestamp_end + 1..self.files_end]
166 self.bytes[self.timestamp_end + 1..self.files_end]
166 .split(|b| b == &b'\n')
167 .split(|b| b == &b'\n')
167 .map(|path| HgPath::new(path))
168 .map(|path| HgPath::new(path))
168 }
169 }
169
170
170 /// The change description.
171 /// The change description.
171 pub fn description(&self) -> &[u8] {
172 pub fn description(&self) -> &[u8] {
172 &self.bytes[self.files_end + 2..]
173 &self.bytes[self.files_end + 2..]
173 }
174 }
174 }
175 }
175
176
176 impl Debug for ChangelogRevisionData {
177 impl Debug for ChangelogRevisionData<'_> {
177 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
178 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
178 f.debug_struct("ChangelogRevisionData")
179 f.debug_struct("ChangelogRevisionData")
179 .field("bytes", &debug_bytes(&self.bytes))
180 .field("bytes", &debug_bytes(&self.bytes))
180 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
181 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
181 .field(
182 .field(
182 "user",
183 "user",
183 &debug_bytes(
184 &debug_bytes(
184 &self.bytes[self.manifest_end + 1..self.user_end],
185 &self.bytes[self.manifest_end + 1..self.user_end],
185 ),
186 ),
186 )
187 )
187 .field(
188 .field(
188 "timestamp",
189 "timestamp",
189 &debug_bytes(
190 &debug_bytes(
190 &self.bytes[self.user_end + 1..self.timestamp_end],
191 &self.bytes[self.user_end + 1..self.timestamp_end],
191 ),
192 ),
192 )
193 )
193 .field(
194 .field(
194 "files",
195 "files",
195 &debug_bytes(
196 &debug_bytes(
196 &self.bytes[self.timestamp_end + 1..self.files_end],
197 &self.bytes[self.timestamp_end + 1..self.files_end],
197 ),
198 ),
198 )
199 )
199 .field(
200 .field(
200 "description",
201 "description",
201 &debug_bytes(&self.bytes[self.files_end + 2..]),
202 &debug_bytes(&self.bytes[self.files_end + 2..]),
202 )
203 )
203 .finish()
204 .finish()
204 }
205 }
205 }
206 }
206
207
207 fn debug_bytes(bytes: &[u8]) -> String {
208 fn debug_bytes(bytes: &[u8]) -> String {
208 String::from_utf8_lossy(
209 String::from_utf8_lossy(
209 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
210 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
210 )
211 )
211 .to_string()
212 .to_string()
212 }
213 }
213
214
214 #[cfg(test)]
215 #[cfg(test)]
215 mod tests {
216 mod tests {
216 use super::*;
217 use super::*;
217 use pretty_assertions::assert_eq;
218 use pretty_assertions::assert_eq;
218
219
219 #[test]
220 #[test]
220 fn test_create_changelogrevisiondata_invalid() {
221 fn test_create_changelogrevisiondata_invalid() {
221 // Completely empty
222 // Completely empty
222 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
223 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
223 // No newline after manifest
224 // No newline after manifest
224 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
225 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
225 // No newline after user
226 // No newline after user
226 assert!(ChangelogRevisionData::new(b"abcd\n".to_vec()).is_err());
227 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
227 // No newline after timestamp
228 // No newline after timestamp
228 assert!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()).is_err());
229 assert!(
230 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
231 );
229 // Missing newline after files
232 // Missing newline after files
230 assert!(ChangelogRevisionData::new(
233 assert!(ChangelogRevisionData::new(Cow::Borrowed(
231 b"abcd\n\n0 0\nfile1\nfile2".to_vec()
234 b"abcd\n\n0 0\nfile1\nfile2"
232 )
235 ))
233 .is_err(),);
236 .is_err(),);
234 // Only one newline after files
237 // Only one newline after files
235 assert!(ChangelogRevisionData::new(
238 assert!(ChangelogRevisionData::new(Cow::Borrowed(
236 b"abcd\n\n0 0\nfile1\nfile2\n".to_vec()
239 b"abcd\n\n0 0\nfile1\nfile2\n"
237 )
240 ))
238 .is_err(),);
241 .is_err(),);
239 }
242 }
240
243
241 #[test]
244 #[test]
242 fn test_create_changelogrevisiondata() {
245 fn test_create_changelogrevisiondata() {
243 let data = ChangelogRevisionData::new(
246 let data = ChangelogRevisionData::new(Cow::Borrowed(
244 b"0123456789abcdef0123456789abcdef01234567
247 b"0123456789abcdef0123456789abcdef01234567
245 Some One <someone@example.com>
248 Some One <someone@example.com>
246 0 0
249 0 0
247 file1
250 file1
248 file2
251 file2
249
252
250 some
253 some
251 commit
254 commit
252 message"
255 message",
253 .to_vec(),
256 ))
254 )
255 .unwrap();
257 .unwrap();
256 assert_eq!(
258 assert_eq!(
257 data.manifest_node().unwrap(),
259 data.manifest_node().unwrap(),
258 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
260 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
259 .unwrap()
261 .unwrap()
260 );
262 );
261 assert_eq!(data.user(), b"Some One <someone@example.com>");
263 assert_eq!(data.user(), b"Some One <someone@example.com>");
262 assert_eq!(data.timestamp_line(), b"0 0");
264 assert_eq!(data.timestamp_line(), b"0 0");
263 assert_eq!(
265 assert_eq!(
264 data.files().collect_vec(),
266 data.files().collect_vec(),
265 vec![HgPath::new("file1"), HgPath::new("file2")]
267 vec![HgPath::new("file1"), HgPath::new("file2")]
266 );
268 );
267 assert_eq!(data.description(), b"some\ncommit\nmessage");
269 assert_eq!(data.description(), b"some\ncommit\nmessage");
268 }
270 }
269 }
271 }
General Comments 0
You need to be logged in to leave comments. Login now