##// END OF EJS Templates
rust-changelog: accessing the index...
Georges Racinet -
r52618:bbe59cc5 default
parent child Browse files
Show More
@@ -1,744 +1,749 b''
1 use std::ascii::escape_default;
1 use std::ascii::escape_default;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::collections::BTreeMap;
3 use std::collections::BTreeMap;
4 use std::fmt::{Debug, Formatter};
4 use std::fmt::{Debug, Formatter};
5 use std::{iter, str};
5 use std::{iter, str};
6
6
7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
8 use itertools::{Either, Itertools};
8 use itertools::{Either, Itertools};
9
9
10 use crate::errors::HgError;
10 use crate::errors::HgError;
11 use crate::revlog::Index;
11 use crate::revlog::Revision;
12 use crate::revlog::Revision;
12 use crate::revlog::{Node, NodePrefix};
13 use crate::revlog::{Node, NodePrefix};
13 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
15 use crate::vfs::Vfs;
16 use crate::vfs::Vfs;
16 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
17 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
17
18
18 /// A specialized `Revlog` to work with changelog data format.
19 /// A specialized `Revlog` to work with changelog data format.
19 pub struct Changelog {
20 pub struct Changelog {
20 /// The generic `revlog` format.
21 /// The generic `revlog` format.
21 pub(crate) revlog: Revlog,
22 pub(crate) revlog: Revlog,
22 }
23 }
23
24
24 impl Changelog {
25 impl Changelog {
25 /// Open the `changelog` of a repository given by its root.
26 /// Open the `changelog` of a repository given by its root.
26 pub fn open(
27 pub fn open(
27 store_vfs: &Vfs,
28 store_vfs: &Vfs,
28 options: RevlogOpenOptions,
29 options: RevlogOpenOptions,
29 ) -> Result<Self, HgError> {
30 ) -> Result<Self, HgError> {
30 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
31 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
31 Ok(Self { revlog })
32 Ok(Self { revlog })
32 }
33 }
33
34
34 /// Return the `ChangelogRevisionData` for the given node ID.
35 /// Return the `ChangelogRevisionData` for the given node ID.
35 pub fn data_for_node(
36 pub fn data_for_node(
36 &self,
37 &self,
37 node: NodePrefix,
38 node: NodePrefix,
38 ) -> Result<ChangelogRevisionData, RevlogError> {
39 ) -> Result<ChangelogRevisionData, RevlogError> {
39 let rev = self.revlog.rev_from_node(node)?;
40 let rev = self.revlog.rev_from_node(node)?;
40 self.entry_for_checked_rev(rev)?.data()
41 self.entry_for_checked_rev(rev)?.data()
41 }
42 }
42
43
43 /// Return the [`ChangelogEntry`] for the given revision number.
44 /// Return the [`ChangelogEntry`] for the given revision number.
44 pub fn entry_for_rev(
45 pub fn entry_for_rev(
45 &self,
46 &self,
46 rev: UncheckedRevision,
47 rev: UncheckedRevision,
47 ) -> Result<ChangelogEntry, RevlogError> {
48 ) -> Result<ChangelogEntry, RevlogError> {
48 let revlog_entry = self.revlog.get_entry(rev)?;
49 let revlog_entry = self.revlog.get_entry(rev)?;
49 Ok(ChangelogEntry { revlog_entry })
50 Ok(ChangelogEntry { revlog_entry })
50 }
51 }
51
52
52 /// Same as [`Self::entry_for_rev`] for checked revisions.
53 /// Same as [`Self::entry_for_rev`] for checked revisions.
53 fn entry_for_checked_rev(
54 fn entry_for_checked_rev(
54 &self,
55 &self,
55 rev: Revision,
56 rev: Revision,
56 ) -> Result<ChangelogEntry, RevlogError> {
57 ) -> Result<ChangelogEntry, RevlogError> {
57 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
58 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
58 Ok(ChangelogEntry { revlog_entry })
59 Ok(ChangelogEntry { revlog_entry })
59 }
60 }
60
61
61 /// Return the [`ChangelogRevisionData`] for the given revision number.
62 /// Return the [`ChangelogRevisionData`] for the given revision number.
62 ///
63 ///
63 /// This is a useful shortcut in case the caller does not need the
64 /// This is a useful shortcut in case the caller does not need the
64 /// generic revlog information (parents, hashes etc). Otherwise
65 /// generic revlog information (parents, hashes etc). Otherwise
65 /// consider taking a [`ChangelogEntry`] with
66 /// consider taking a [`ChangelogEntry`] with
66 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
67 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
67 pub fn data_for_rev(
68 pub fn data_for_rev(
68 &self,
69 &self,
69 rev: UncheckedRevision,
70 rev: UncheckedRevision,
70 ) -> Result<ChangelogRevisionData, RevlogError> {
71 ) -> Result<ChangelogRevisionData, RevlogError> {
71 self.entry_for_rev(rev)?.data()
72 self.entry_for_rev(rev)?.data()
72 }
73 }
73
74
74 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
75 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
75 self.revlog.node_from_rev(rev)
76 self.revlog.node_from_rev(rev)
76 }
77 }
77
78
78 pub fn rev_from_node(
79 pub fn rev_from_node(
79 &self,
80 &self,
80 node: NodePrefix,
81 node: NodePrefix,
81 ) -> Result<Revision, RevlogError> {
82 ) -> Result<Revision, RevlogError> {
82 self.revlog.rev_from_node(node)
83 self.revlog.rev_from_node(node)
83 }
84 }
85
86 pub fn get_index(&self) -> &Index {
87 &self.revlog.index
88 }
84 }
89 }
85
90
86 impl Graph for Changelog {
91 impl Graph for Changelog {
87 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
92 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
88 self.revlog.parents(rev)
93 self.revlog.parents(rev)
89 }
94 }
90 }
95 }
91
96
92 /// A specialized `RevlogEntry` for `changelog` data format
97 /// A specialized `RevlogEntry` for `changelog` data format
93 ///
98 ///
94 /// This is a `RevlogEntry` with the added semantics that the associated
99 /// This is a `RevlogEntry` with the added semantics that the associated
95 /// data should meet the requirements for `changelog`, materialized by
100 /// data should meet the requirements for `changelog`, materialized by
96 /// the fact that `data()` constructs a `ChangelogRevisionData`.
101 /// the fact that `data()` constructs a `ChangelogRevisionData`.
97 /// In case that promise would be broken, the `data` method returns an error.
102 /// In case that promise would be broken, the `data` method returns an error.
98 #[derive(Clone)]
103 #[derive(Clone)]
99 pub struct ChangelogEntry<'changelog> {
104 pub struct ChangelogEntry<'changelog> {
100 /// Same data, as a generic `RevlogEntry`.
105 /// Same data, as a generic `RevlogEntry`.
101 pub(crate) revlog_entry: RevlogEntry<'changelog>,
106 pub(crate) revlog_entry: RevlogEntry<'changelog>,
102 }
107 }
103
108
104 impl<'changelog> ChangelogEntry<'changelog> {
109 impl<'changelog> ChangelogEntry<'changelog> {
105 pub fn data<'a>(
110 pub fn data<'a>(
106 &'a self,
111 &'a self,
107 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
112 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
108 let bytes = self.revlog_entry.data()?;
113 let bytes = self.revlog_entry.data()?;
109 if bytes.is_empty() {
114 if bytes.is_empty() {
110 Ok(ChangelogRevisionData::null())
115 Ok(ChangelogRevisionData::null())
111 } else {
116 } else {
112 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
117 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
113 RevlogError::Other(HgError::CorruptedRepository(format!(
118 RevlogError::Other(HgError::CorruptedRepository(format!(
114 "Invalid changelog data for revision {}: {:?}",
119 "Invalid changelog data for revision {}: {:?}",
115 self.revlog_entry.revision(),
120 self.revlog_entry.revision(),
116 err
121 err
117 )))
122 )))
118 })?)
123 })?)
119 }
124 }
120 }
125 }
121
126
122 /// Obtain a reference to the underlying `RevlogEntry`.
127 /// Obtain a reference to the underlying `RevlogEntry`.
123 ///
128 ///
124 /// This allows the caller to access the information that is common
129 /// This allows the caller to access the information that is common
125 /// to all revlog entries: revision number, node id, parent revisions etc.
130 /// to all revlog entries: revision number, node id, parent revisions etc.
126 pub fn as_revlog_entry(&self) -> &RevlogEntry {
131 pub fn as_revlog_entry(&self) -> &RevlogEntry {
127 &self.revlog_entry
132 &self.revlog_entry
128 }
133 }
129
134
130 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
135 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 Ok(self
136 Ok(self
132 .revlog_entry
137 .revlog_entry
133 .p1_entry()?
138 .p1_entry()?
134 .map(|revlog_entry| Self { revlog_entry }))
139 .map(|revlog_entry| Self { revlog_entry }))
135 }
140 }
136
141
137 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
142 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
138 Ok(self
143 Ok(self
139 .revlog_entry
144 .revlog_entry
140 .p2_entry()?
145 .p2_entry()?
141 .map(|revlog_entry| Self { revlog_entry }))
146 .map(|revlog_entry| Self { revlog_entry }))
142 }
147 }
143 }
148 }
144
149
145 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
150 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
146 #[derive(PartialEq)]
151 #[derive(PartialEq)]
147 pub struct ChangelogRevisionData<'changelog> {
152 pub struct ChangelogRevisionData<'changelog> {
148 /// The data bytes of the `changelog` entry.
153 /// The data bytes of the `changelog` entry.
149 bytes: Cow<'changelog, [u8]>,
154 bytes: Cow<'changelog, [u8]>,
150 /// The end offset for the hex manifest (not including the newline)
155 /// The end offset for the hex manifest (not including the newline)
151 manifest_end: usize,
156 manifest_end: usize,
152 /// The end offset for the user+email (not including the newline)
157 /// The end offset for the user+email (not including the newline)
153 user_end: usize,
158 user_end: usize,
154 /// The end offset for the timestamp+timezone+extras (not including the
159 /// The end offset for the timestamp+timezone+extras (not including the
155 /// newline)
160 /// newline)
156 timestamp_end: usize,
161 timestamp_end: usize,
157 /// The end offset for the file list (not including the newline)
162 /// The end offset for the file list (not including the newline)
158 files_end: usize,
163 files_end: usize,
159 }
164 }
160
165
161 impl<'changelog> ChangelogRevisionData<'changelog> {
166 impl<'changelog> ChangelogRevisionData<'changelog> {
162 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
167 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
163 let mut line_iter = bytes.split(|b| b == &b'\n');
168 let mut line_iter = bytes.split(|b| b == &b'\n');
164 let manifest_end = line_iter
169 let manifest_end = line_iter
165 .next()
170 .next()
166 .expect("Empty iterator from split()?")
171 .expect("Empty iterator from split()?")
167 .len();
172 .len();
168 let user_slice = line_iter.next().ok_or_else(|| {
173 let user_slice = line_iter.next().ok_or_else(|| {
169 HgError::corrupted("Changeset data truncated after manifest line")
174 HgError::corrupted("Changeset data truncated after manifest line")
170 })?;
175 })?;
171 let user_end = manifest_end + 1 + user_slice.len();
176 let user_end = manifest_end + 1 + user_slice.len();
172 let timestamp_slice = line_iter.next().ok_or_else(|| {
177 let timestamp_slice = line_iter.next().ok_or_else(|| {
173 HgError::corrupted("Changeset data truncated after user line")
178 HgError::corrupted("Changeset data truncated after user line")
174 })?;
179 })?;
175 let timestamp_end = user_end + 1 + timestamp_slice.len();
180 let timestamp_end = user_end + 1 + timestamp_slice.len();
176 let mut files_end = timestamp_end + 1;
181 let mut files_end = timestamp_end + 1;
177 loop {
182 loop {
178 let line = line_iter.next().ok_or_else(|| {
183 let line = line_iter.next().ok_or_else(|| {
179 HgError::corrupted("Changeset data truncated in files list")
184 HgError::corrupted("Changeset data truncated in files list")
180 })?;
185 })?;
181 if line.is_empty() {
186 if line.is_empty() {
182 if files_end == bytes.len() {
187 if files_end == bytes.len() {
183 // The list of files ended with a single newline (there
188 // The list of files ended with a single newline (there
184 // should be two)
189 // should be two)
185 return Err(HgError::corrupted(
190 return Err(HgError::corrupted(
186 "Changeset data truncated after files list",
191 "Changeset data truncated after files list",
187 ));
192 ));
188 }
193 }
189 files_end -= 1;
194 files_end -= 1;
190 break;
195 break;
191 }
196 }
192 files_end += line.len() + 1;
197 files_end += line.len() + 1;
193 }
198 }
194
199
195 Ok(Self {
200 Ok(Self {
196 bytes,
201 bytes,
197 manifest_end,
202 manifest_end,
198 user_end,
203 user_end,
199 timestamp_end,
204 timestamp_end,
200 files_end,
205 files_end,
201 })
206 })
202 }
207 }
203
208
204 fn null() -> Self {
209 fn null() -> Self {
205 Self::new(Cow::Borrowed(
210 Self::new(Cow::Borrowed(
206 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
211 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
207 ))
212 ))
208 .unwrap()
213 .unwrap()
209 }
214 }
210
215
211 /// Return an iterator over the lines of the entry.
216 /// Return an iterator over the lines of the entry.
212 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
217 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
213 self.bytes.split(|b| b == &b'\n')
218 self.bytes.split(|b| b == &b'\n')
214 }
219 }
215
220
216 /// Return the node id of the `manifest` referenced by this `changelog`
221 /// Return the node id of the `manifest` referenced by this `changelog`
217 /// entry.
222 /// entry.
218 pub fn manifest_node(&self) -> Result<Node, HgError> {
223 pub fn manifest_node(&self) -> Result<Node, HgError> {
219 let manifest_node_hex = &self.bytes[..self.manifest_end];
224 let manifest_node_hex = &self.bytes[..self.manifest_end];
220 Node::from_hex_for_repo(manifest_node_hex)
225 Node::from_hex_for_repo(manifest_node_hex)
221 }
226 }
222
227
223 /// The full user string (usually a name followed by an email enclosed in
228 /// The full user string (usually a name followed by an email enclosed in
224 /// angle brackets)
229 /// angle brackets)
225 pub fn user(&self) -> &[u8] {
230 pub fn user(&self) -> &[u8] {
226 &self.bytes[self.manifest_end + 1..self.user_end]
231 &self.bytes[self.manifest_end + 1..self.user_end]
227 }
232 }
228
233
229 /// The full timestamp line (timestamp in seconds, offset in seconds, and
234 /// The full timestamp line (timestamp in seconds, offset in seconds, and
230 /// possibly extras)
235 /// possibly extras)
231 // TODO: We should expose this in a more useful way
236 // TODO: We should expose this in a more useful way
232 pub fn timestamp_line(&self) -> &[u8] {
237 pub fn timestamp_line(&self) -> &[u8] {
233 &self.bytes[self.user_end + 1..self.timestamp_end]
238 &self.bytes[self.user_end + 1..self.timestamp_end]
234 }
239 }
235
240
236 /// Parsed timestamp.
241 /// Parsed timestamp.
237 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
242 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
238 parse_timestamp(self.timestamp_line())
243 parse_timestamp(self.timestamp_line())
239 }
244 }
240
245
241 /// Optional commit extras.
246 /// Optional commit extras.
242 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
247 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
243 parse_timestamp_line_extra(self.timestamp_line())
248 parse_timestamp_line_extra(self.timestamp_line())
244 }
249 }
245
250
246 /// The files changed in this revision.
251 /// The files changed in this revision.
247 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
252 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
248 if self.timestamp_end == self.files_end {
253 if self.timestamp_end == self.files_end {
249 Either::Left(iter::empty())
254 Either::Left(iter::empty())
250 } else {
255 } else {
251 Either::Right(
256 Either::Right(
252 self.bytes[self.timestamp_end + 1..self.files_end]
257 self.bytes[self.timestamp_end + 1..self.files_end]
253 .split(|b| b == &b'\n')
258 .split(|b| b == &b'\n')
254 .map(HgPath::new),
259 .map(HgPath::new),
255 )
260 )
256 }
261 }
257 }
262 }
258
263
259 /// The change description.
264 /// The change description.
260 pub fn description(&self) -> &[u8] {
265 pub fn description(&self) -> &[u8] {
261 &self.bytes[self.files_end + 2..]
266 &self.bytes[self.files_end + 2..]
262 }
267 }
263 }
268 }
264
269
265 impl Debug for ChangelogRevisionData<'_> {
270 impl Debug for ChangelogRevisionData<'_> {
266 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
271 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
267 f.debug_struct("ChangelogRevisionData")
272 f.debug_struct("ChangelogRevisionData")
268 .field("bytes", &debug_bytes(&self.bytes))
273 .field("bytes", &debug_bytes(&self.bytes))
269 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
274 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
270 .field(
275 .field(
271 "user",
276 "user",
272 &debug_bytes(
277 &debug_bytes(
273 &self.bytes[self.manifest_end + 1..self.user_end],
278 &self.bytes[self.manifest_end + 1..self.user_end],
274 ),
279 ),
275 )
280 )
276 .field(
281 .field(
277 "timestamp",
282 "timestamp",
278 &debug_bytes(
283 &debug_bytes(
279 &self.bytes[self.user_end + 1..self.timestamp_end],
284 &self.bytes[self.user_end + 1..self.timestamp_end],
280 ),
285 ),
281 )
286 )
282 .field(
287 .field(
283 "files",
288 "files",
284 &debug_bytes(
289 &debug_bytes(
285 &self.bytes[self.timestamp_end + 1..self.files_end],
290 &self.bytes[self.timestamp_end + 1..self.files_end],
286 ),
291 ),
287 )
292 )
288 .field(
293 .field(
289 "description",
294 "description",
290 &debug_bytes(&self.bytes[self.files_end + 2..]),
295 &debug_bytes(&self.bytes[self.files_end + 2..]),
291 )
296 )
292 .finish()
297 .finish()
293 }
298 }
294 }
299 }
295
300
296 fn debug_bytes(bytes: &[u8]) -> String {
301 fn debug_bytes(bytes: &[u8]) -> String {
297 String::from_utf8_lossy(
302 String::from_utf8_lossy(
298 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
303 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
299 )
304 )
300 .to_string()
305 .to_string()
301 }
306 }
302
307
303 /// Parse the raw bytes of the timestamp line from a changelog entry.
308 /// Parse the raw bytes of the timestamp line from a changelog entry.
304 ///
309 ///
305 /// According to the documentation in `hg help dates` and the
310 /// According to the documentation in `hg help dates` and the
306 /// implementation in `changelog.py`, the format of the timestamp line
311 /// implementation in `changelog.py`, the format of the timestamp line
307 /// is `time tz extra\n` where:
312 /// is `time tz extra\n` where:
308 ///
313 ///
309 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
314 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
310 /// as seconds since the UNIX epoch.
315 /// as seconds since the UNIX epoch.
311 ///
316 ///
312 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
317 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
313 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
318 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
314 /// opposite of the sign in ISO 8601 timestamps).
319 /// opposite of the sign in ISO 8601 timestamps).
315 ///
320 ///
316 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
321 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
317 /// and value in each pair separated by an ASCII colon. Keys are limited to
322 /// and value in each pair separated by an ASCII colon. Keys are limited to
318 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
323 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
319 /// arbitrary bytes.
324 /// arbitrary bytes.
320 fn parse_timestamp(
325 fn parse_timestamp(
321 timestamp_line: &[u8],
326 timestamp_line: &[u8],
322 ) -> Result<DateTime<FixedOffset>, HgError> {
327 ) -> Result<DateTime<FixedOffset>, HgError> {
323 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
328 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
324
329
325 let timestamp_bytes = parts
330 let timestamp_bytes = parts
326 .next()
331 .next()
327 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
332 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
328 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
333 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
329 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
334 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
330 })?;
335 })?;
331 let timestamp_utc = timestamp_str
336 let timestamp_utc = timestamp_str
332 .parse()
337 .parse()
333 .map_err(|e| {
338 .map_err(|e| {
334 HgError::corrupted(format!("failed to parse timestamp: {e}"))
339 HgError::corrupted(format!("failed to parse timestamp: {e}"))
335 })
340 })
336 .and_then(|secs| {
341 .and_then(|secs| {
337 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
342 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
338 HgError::corrupted(format!(
343 HgError::corrupted(format!(
339 "integer timestamp out of valid range: {secs}"
344 "integer timestamp out of valid range: {secs}"
340 ))
345 ))
341 })
346 })
342 })
347 })
343 // Attempt to parse the timestamp as a float if we can't parse
348 // Attempt to parse the timestamp as a float if we can't parse
344 // it as an int. It doesn't seem like float timestamps are actually
349 // it as an int. It doesn't seem like float timestamps are actually
345 // used in practice, but the Python code supports them.
350 // used in practice, but the Python code supports them.
346 .or_else(|_| parse_float_timestamp(timestamp_str))?;
351 .or_else(|_| parse_float_timestamp(timestamp_str))?;
347
352
348 let timezone_bytes = parts
353 let timezone_bytes = parts
349 .next()
354 .next()
350 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
355 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
351 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
356 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
352 .map_err(|e| {
357 .map_err(|e| {
353 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
358 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
354 })?
359 })?
355 .parse()
360 .parse()
356 .map_err(|e| {
361 .map_err(|e| {
357 HgError::corrupted(format!("timezone is not an integer: {e}"))
362 HgError::corrupted(format!("timezone is not an integer: {e}"))
358 })?;
363 })?;
359 let timezone = FixedOffset::west_opt(timezone_secs)
364 let timezone = FixedOffset::west_opt(timezone_secs)
360 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
365 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
361
366
362 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
367 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
363 }
368 }
364
369
365 /// Attempt to parse the given string as floating-point timestamp, and
370 /// Attempt to parse the given string as floating-point timestamp, and
366 /// convert the result into a `chrono::NaiveDateTime`.
371 /// convert the result into a `chrono::NaiveDateTime`.
367 fn parse_float_timestamp(
372 fn parse_float_timestamp(
368 timestamp_str: &str,
373 timestamp_str: &str,
369 ) -> Result<NaiveDateTime, HgError> {
374 ) -> Result<NaiveDateTime, HgError> {
370 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
375 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
371 HgError::corrupted(format!("failed to parse timestamp: {e}"))
376 HgError::corrupted(format!("failed to parse timestamp: {e}"))
372 })?;
377 })?;
373
378
374 // To construct a `NaiveDateTime` we'll need to convert the float
379 // To construct a `NaiveDateTime` we'll need to convert the float
375 // into signed integer seconds and unsigned integer nanoseconds.
380 // into signed integer seconds and unsigned integer nanoseconds.
376 let mut secs = timestamp.trunc() as i64;
381 let mut secs = timestamp.trunc() as i64;
377 let mut subsecs = timestamp.fract();
382 let mut subsecs = timestamp.fract();
378
383
379 // If the timestamp is negative, we need to express the fractional
384 // If the timestamp is negative, we need to express the fractional
380 // component as positive nanoseconds since the previous second.
385 // component as positive nanoseconds since the previous second.
381 if timestamp < 0.0 {
386 if timestamp < 0.0 {
382 secs -= 1;
387 secs -= 1;
383 subsecs += 1.0;
388 subsecs += 1.0;
384 }
389 }
385
390
386 // This cast should be safe because the fractional component is
391 // This cast should be safe because the fractional component is
387 // by definition less than 1.0, so this value should not exceed
392 // by definition less than 1.0, so this value should not exceed
388 // 1 billion, which is representable as an f64 without loss of
393 // 1 billion, which is representable as an f64 without loss of
389 // precision and should fit into a u32 without overflowing.
394 // precision and should fit into a u32 without overflowing.
390 //
395 //
391 // (Any loss of precision in the fractional component will have
396 // (Any loss of precision in the fractional component will have
392 // already happened at the time of initial parsing; in general,
397 // already happened at the time of initial parsing; in general,
393 // f64s are insufficiently precise to provide nanosecond-level
398 // f64s are insufficiently precise to provide nanosecond-level
394 // precision with present-day timestamps.)
399 // precision with present-day timestamps.)
395 let nsecs = (subsecs * 1_000_000_000.0) as u32;
400 let nsecs = (subsecs * 1_000_000_000.0) as u32;
396
401
397 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
402 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
398 HgError::corrupted(format!(
403 HgError::corrupted(format!(
399 "float timestamp out of valid range: {timestamp}"
404 "float timestamp out of valid range: {timestamp}"
400 ))
405 ))
401 })
406 })
402 }
407 }
403
408
404 /// Decode changeset extra fields.
409 /// Decode changeset extra fields.
405 ///
410 ///
406 /// Extras are null-delimited key-value pairs where the key consists of ASCII
411 /// Extras are null-delimited key-value pairs where the key consists of ASCII
407 /// alphanumeric characters plus hyphens and underscores, and the value can
412 /// alphanumeric characters plus hyphens and underscores, and the value can
408 /// contain arbitrary bytes.
413 /// contain arbitrary bytes.
409 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
414 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
410 extra
415 extra
411 .split(|c| *c == b'\0')
416 .split(|c| *c == b'\0')
412 .map(|pair| {
417 .map(|pair| {
413 let pair = unescape_extra(pair);
418 let pair = unescape_extra(pair);
414 let mut iter = pair.splitn(2, |c| *c == b':');
419 let mut iter = pair.splitn(2, |c| *c == b':');
415
420
416 let key_bytes =
421 let key_bytes =
417 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
422 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
418 HgError::corrupted("empty key in changeset extras")
423 HgError::corrupted("empty key in changeset extras")
419 })?;
424 })?;
420
425
421 let key = str::from_utf8(key_bytes)
426 let key = str::from_utf8(key_bytes)
422 .ok()
427 .ok()
423 .filter(|k| {
428 .filter(|k| {
424 k.chars().all(|c| {
429 k.chars().all(|c| {
425 c.is_ascii_alphanumeric() || c == '_' || c == '-'
430 c.is_ascii_alphanumeric() || c == '_' || c == '-'
426 })
431 })
427 })
432 })
428 .ok_or_else(|| {
433 .ok_or_else(|| {
429 let key = String::from_utf8_lossy(key_bytes);
434 let key = String::from_utf8_lossy(key_bytes);
430 HgError::corrupted(format!(
435 HgError::corrupted(format!(
431 "invalid key in changeset extras: {key}",
436 "invalid key in changeset extras: {key}",
432 ))
437 ))
433 })?
438 })?
434 .to_string();
439 .to_string();
435
440
436 let value = iter.next().map(Into::into).ok_or_else(|| {
441 let value = iter.next().map(Into::into).ok_or_else(|| {
437 HgError::corrupted(format!(
442 HgError::corrupted(format!(
438 "missing value for changeset extra: {key}"
443 "missing value for changeset extra: {key}"
439 ))
444 ))
440 })?;
445 })?;
441
446
442 Ok((key, value))
447 Ok((key, value))
443 })
448 })
444 .collect()
449 .collect()
445 }
450 }
446
451
447 /// Parse the extra fields from a changeset's timestamp line.
452 /// Parse the extra fields from a changeset's timestamp line.
448 fn parse_timestamp_line_extra(
453 fn parse_timestamp_line_extra(
449 timestamp_line: &[u8],
454 timestamp_line: &[u8],
450 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
455 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
451 Ok(timestamp_line
456 Ok(timestamp_line
452 .splitn(3, |c| *c == b' ')
457 .splitn(3, |c| *c == b' ')
453 .nth(2)
458 .nth(2)
454 .map(decode_extra)
459 .map(decode_extra)
455 .transpose()?
460 .transpose()?
456 .unwrap_or_default())
461 .unwrap_or_default())
457 }
462 }
458
463
459 /// Decode Mercurial's escaping for changelog extras.
464 /// Decode Mercurial's escaping for changelog extras.
460 ///
465 ///
461 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
466 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
462 /// (null, backslash, newline, and carriage return) so we only decode those.
467 /// (null, backslash, newline, and carriage return) so we only decode those.
463 ///
468 ///
464 /// The Python code also includes a workaround for decoding escaped nuls
469 /// The Python code also includes a workaround for decoding escaped nuls
465 /// that are followed by an ASCII octal digit, since Python's built-in
470 /// that are followed by an ASCII octal digit, since Python's built-in
466 /// `string_escape` codec will interpret that as an escaped octal byte value.
471 /// `string_escape` codec will interpret that as an escaped octal byte value.
467 /// That workaround is omitted here since we don't support decoding octal.
472 /// That workaround is omitted here since we don't support decoding octal.
468 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
473 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
469 let mut output = Vec::with_capacity(bytes.len());
474 let mut output = Vec::with_capacity(bytes.len());
470 let mut input = bytes.iter().copied();
475 let mut input = bytes.iter().copied();
471
476
472 while let Some(c) = input.next() {
477 while let Some(c) = input.next() {
473 if c != b'\\' {
478 if c != b'\\' {
474 output.push(c);
479 output.push(c);
475 continue;
480 continue;
476 }
481 }
477
482
478 match input.next() {
483 match input.next() {
479 Some(b'0') => output.push(b'\0'),
484 Some(b'0') => output.push(b'\0'),
480 Some(b'\\') => output.push(b'\\'),
485 Some(b'\\') => output.push(b'\\'),
481 Some(b'n') => output.push(b'\n'),
486 Some(b'n') => output.push(b'\n'),
482 Some(b'r') => output.push(b'\r'),
487 Some(b'r') => output.push(b'\r'),
483 // The following cases should never occur in theory because any
488 // The following cases should never occur in theory because any
484 // backslashes in the original input should have been escaped
489 // backslashes in the original input should have been escaped
485 // with another backslash, so it should not be possible to
490 // with another backslash, so it should not be possible to
486 // observe an escape sequence other than the 4 above.
491 // observe an escape sequence other than the 4 above.
487 Some(c) => output.extend_from_slice(&[b'\\', c]),
492 Some(c) => output.extend_from_slice(&[b'\\', c]),
488 None => output.push(b'\\'),
493 None => output.push(b'\\'),
489 }
494 }
490 }
495 }
491
496
492 output
497 output
493 }
498 }
494
499
495 #[cfg(test)]
500 #[cfg(test)]
496 mod tests {
501 mod tests {
497 use super::*;
502 use super::*;
498 use crate::vfs::Vfs;
503 use crate::vfs::Vfs;
499 use crate::NULL_REVISION;
504 use crate::NULL_REVISION;
500 use pretty_assertions::assert_eq;
505 use pretty_assertions::assert_eq;
501
506
502 #[test]
507 #[test]
503 fn test_create_changelogrevisiondata_invalid() {
508 fn test_create_changelogrevisiondata_invalid() {
504 // Completely empty
509 // Completely empty
505 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
510 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
506 // No newline after manifest
511 // No newline after manifest
507 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
512 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
508 // No newline after user
513 // No newline after user
509 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
514 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
510 // No newline after timestamp
515 // No newline after timestamp
511 assert!(
516 assert!(
512 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
517 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
513 );
518 );
514 // Missing newline after files
519 // Missing newline after files
515 assert!(ChangelogRevisionData::new(Cow::Borrowed(
520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
516 b"abcd\n\n0 0\nfile1\nfile2"
521 b"abcd\n\n0 0\nfile1\nfile2"
517 ))
522 ))
518 .is_err(),);
523 .is_err(),);
519 // Only one newline after files
524 // Only one newline after files
520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
525 assert!(ChangelogRevisionData::new(Cow::Borrowed(
521 b"abcd\n\n0 0\nfile1\nfile2\n"
526 b"abcd\n\n0 0\nfile1\nfile2\n"
522 ))
527 ))
523 .is_err(),);
528 .is_err(),);
524 }
529 }
525
530
526 #[test]
531 #[test]
527 fn test_create_changelogrevisiondata() {
532 fn test_create_changelogrevisiondata() {
528 let data = ChangelogRevisionData::new(Cow::Borrowed(
533 let data = ChangelogRevisionData::new(Cow::Borrowed(
529 b"0123456789abcdef0123456789abcdef01234567
534 b"0123456789abcdef0123456789abcdef01234567
530 Some One <someone@example.com>
535 Some One <someone@example.com>
531 0 0
536 0 0
532 file1
537 file1
533 file2
538 file2
534
539
535 some
540 some
536 commit
541 commit
537 message",
542 message",
538 ))
543 ))
539 .unwrap();
544 .unwrap();
540 assert_eq!(
545 assert_eq!(
541 data.manifest_node().unwrap(),
546 data.manifest_node().unwrap(),
542 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
547 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
543 .unwrap()
548 .unwrap()
544 );
549 );
545 assert_eq!(data.user(), b"Some One <someone@example.com>");
550 assert_eq!(data.user(), b"Some One <someone@example.com>");
546 assert_eq!(data.timestamp_line(), b"0 0");
551 assert_eq!(data.timestamp_line(), b"0 0");
547 assert_eq!(
552 assert_eq!(
548 data.files().collect_vec(),
553 data.files().collect_vec(),
549 vec![HgPath::new("file1"), HgPath::new("file2")]
554 vec![HgPath::new("file1"), HgPath::new("file2")]
550 );
555 );
551 assert_eq!(data.description(), b"some\ncommit\nmessage");
556 assert_eq!(data.description(), b"some\ncommit\nmessage");
552 }
557 }
553
558
554 #[test]
559 #[test]
555 fn test_data_from_rev_null() -> Result<(), RevlogError> {
560 fn test_data_from_rev_null() -> Result<(), RevlogError> {
556 // an empty revlog will be enough for this case
561 // an empty revlog will be enough for this case
557 let temp = tempfile::tempdir().unwrap();
562 let temp = tempfile::tempdir().unwrap();
558 let vfs = Vfs { base: temp.path() };
563 let vfs = Vfs { base: temp.path() };
559 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
564 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
560 let revlog =
565 let revlog =
561 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
566 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
562 .unwrap();
567 .unwrap();
563
568
564 let changelog = Changelog { revlog };
569 let changelog = Changelog { revlog };
565 assert_eq!(
570 assert_eq!(
566 changelog.data_for_rev(NULL_REVISION.into())?,
571 changelog.data_for_rev(NULL_REVISION.into())?,
567 ChangelogRevisionData::null()
572 ChangelogRevisionData::null()
568 );
573 );
569 // same with the intermediate entry object
574 // same with the intermediate entry object
570 assert_eq!(
575 assert_eq!(
571 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
576 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
572 ChangelogRevisionData::null()
577 ChangelogRevisionData::null()
573 );
578 );
574 Ok(())
579 Ok(())
575 }
580 }
576
581
577 #[test]
582 #[test]
578 fn test_empty_files_list() {
583 fn test_empty_files_list() {
579 assert!(ChangelogRevisionData::null()
584 assert!(ChangelogRevisionData::null()
580 .files()
585 .files()
581 .collect_vec()
586 .collect_vec()
582 .is_empty());
587 .is_empty());
583 }
588 }
584
589
585 #[test]
590 #[test]
586 fn test_unescape_basic() {
591 fn test_unescape_basic() {
587 // '\0', '\\', '\n', and '\r' are correctly unescaped.
592 // '\0', '\\', '\n', and '\r' are correctly unescaped.
588 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
593 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
589 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
594 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
590 let unescaped = unescape_extra(escaped);
595 let unescaped = unescape_extra(escaped);
591 assert_eq!(&expected[..], &unescaped[..]);
596 assert_eq!(&expected[..], &unescaped[..]);
592 }
597 }
593
598
594 #[test]
599 #[test]
595 fn test_unescape_unsupported_sequence() {
600 fn test_unescape_unsupported_sequence() {
596 // Other escape sequences are left unaltered.
601 // Other escape sequences are left unaltered.
597 for c in 0u8..255 {
602 for c in 0u8..255 {
598 match c {
603 match c {
599 b'0' | b'\\' | b'n' | b'r' => continue,
604 b'0' | b'\\' | b'n' | b'r' => continue,
600 c => {
605 c => {
601 let expected = &[b'\\', c][..];
606 let expected = &[b'\\', c][..];
602 let unescaped = unescape_extra(expected);
607 let unescaped = unescape_extra(expected);
603 assert_eq!(expected, &unescaped[..]);
608 assert_eq!(expected, &unescaped[..]);
604 }
609 }
605 }
610 }
606 }
611 }
607 }
612 }
608
613
609 #[test]
614 #[test]
610 fn test_unescape_trailing_backslash() {
615 fn test_unescape_trailing_backslash() {
611 // Trailing backslashes are OK.
616 // Trailing backslashes are OK.
612 let expected = br"hi\";
617 let expected = br"hi\";
613 let unescaped = unescape_extra(expected);
618 let unescaped = unescape_extra(expected);
614 assert_eq!(&expected[..], &unescaped[..]);
619 assert_eq!(&expected[..], &unescaped[..]);
615 }
620 }
616
621
617 #[test]
622 #[test]
618 fn test_unescape_nul_followed_by_octal() {
623 fn test_unescape_nul_followed_by_octal() {
619 // Escaped NUL chars followed by octal digits are decoded correctly.
624 // Escaped NUL chars followed by octal digits are decoded correctly.
620 let expected = b"\x0012";
625 let expected = b"\x0012";
621 let escaped = br"\012";
626 let escaped = br"\012";
622 let unescaped = unescape_extra(escaped);
627 let unescaped = unescape_extra(escaped);
623 assert_eq!(&expected[..], &unescaped[..]);
628 assert_eq!(&expected[..], &unescaped[..]);
624 }
629 }
625
630
626 #[test]
631 #[test]
627 fn test_parse_float_timestamp() {
632 fn test_parse_float_timestamp() {
628 let test_cases = [
633 let test_cases = [
629 // Zero should map to the UNIX epoch.
634 // Zero should map to the UNIX epoch.
630 ("0.0", "1970-01-01 00:00:00"),
635 ("0.0", "1970-01-01 00:00:00"),
631 // Negative zero should be the same as positive zero.
636 // Negative zero should be the same as positive zero.
632 ("-0.0", "1970-01-01 00:00:00"),
637 ("-0.0", "1970-01-01 00:00:00"),
633 // Values without fractional components should work like integers.
638 // Values without fractional components should work like integers.
634 // (Assuming the timestamp is within the limits of f64 precision.)
639 // (Assuming the timestamp is within the limits of f64 precision.)
635 ("1115154970.0", "2005-05-03 21:16:10"),
640 ("1115154970.0", "2005-05-03 21:16:10"),
636 // We expect some loss of precision in the fractional component
641 // We expect some loss of precision in the fractional component
637 // when parsing arbitrary floating-point values.
642 // when parsing arbitrary floating-point values.
638 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
643 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
639 // But representable f64 values should parse losslessly.
644 // But representable f64 values should parse losslessly.
640 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
645 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
641 // Negative fractional components are subtracted from the epoch.
646 // Negative fractional components are subtracted from the epoch.
642 ("-1.333", "1969-12-31 23:59:58.667"),
647 ("-1.333", "1969-12-31 23:59:58.667"),
643 ];
648 ];
644
649
645 for (input, expected) in test_cases {
650 for (input, expected) in test_cases {
646 let res = parse_float_timestamp(input).unwrap().to_string();
651 let res = parse_float_timestamp(input).unwrap().to_string();
647 assert_eq!(res, expected);
652 assert_eq!(res, expected);
648 }
653 }
649 }
654 }
650
655
651 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
656 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
652 let mut output = Vec::with_capacity(bytes.len());
657 let mut output = Vec::with_capacity(bytes.len());
653
658
654 for c in bytes.iter().copied() {
659 for c in bytes.iter().copied() {
655 output.extend_from_slice(match c {
660 output.extend_from_slice(match c {
656 b'\0' => &b"\\0"[..],
661 b'\0' => &b"\\0"[..],
657 b'\\' => &b"\\\\"[..],
662 b'\\' => &b"\\\\"[..],
658 b'\n' => &b"\\n"[..],
663 b'\n' => &b"\\n"[..],
659 b'\r' => &b"\\r"[..],
664 b'\r' => &b"\\r"[..],
660 _ => {
665 _ => {
661 output.push(c);
666 output.push(c);
662 continue;
667 continue;
663 }
668 }
664 });
669 });
665 }
670 }
666
671
667 output
672 output
668 }
673 }
669
674
670 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
675 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
671 where
676 where
672 K: AsRef<[u8]>,
677 K: AsRef<[u8]>,
673 V: AsRef<[u8]>,
678 V: AsRef<[u8]>,
674 {
679 {
675 let extras = pairs.into_iter().map(|(k, v)| {
680 let extras = pairs.into_iter().map(|(k, v)| {
676 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
681 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
677 });
682 });
678 // Use fully-qualified syntax to avoid a future naming conflict with
683 // Use fully-qualified syntax to avoid a future naming conflict with
679 // the standard library: https://github.com/rust-lang/rust/issues/79524
684 // the standard library: https://github.com/rust-lang/rust/issues/79524
680 Itertools::intersperse(extras, b"\0".to_vec()).concat()
685 Itertools::intersperse(extras, b"\0".to_vec()).concat()
681 }
686 }
682
687
683 #[test]
688 #[test]
684 fn test_decode_extra() {
689 fn test_decode_extra() {
685 let extra = [
690 let extra = [
686 ("branch".into(), b"default".to_vec()),
691 ("branch".into(), b"default".to_vec()),
687 ("key-with-hyphens".into(), b"value1".to_vec()),
692 ("key-with-hyphens".into(), b"value1".to_vec()),
688 ("key_with_underscores".into(), b"value2".to_vec()),
693 ("key_with_underscores".into(), b"value2".to_vec()),
689 ("empty-value".into(), b"".to_vec()),
694 ("empty-value".into(), b"".to_vec()),
690 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
695 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
691 ]
696 ]
692 .into_iter()
697 .into_iter()
693 .collect::<BTreeMap<String, Vec<u8>>>();
698 .collect::<BTreeMap<String, Vec<u8>>>();
694
699
695 let encoded = encode_extra(&extra);
700 let encoded = encode_extra(&extra);
696 let decoded = decode_extra(&encoded).unwrap();
701 let decoded = decode_extra(&encoded).unwrap();
697
702
698 assert_eq!(extra, decoded);
703 assert_eq!(extra, decoded);
699 }
704 }
700
705
701 #[test]
706 #[test]
702 fn test_corrupt_extra() {
707 fn test_corrupt_extra() {
703 let test_cases = [
708 let test_cases = [
704 (&b""[..], "empty input"),
709 (&b""[..], "empty input"),
705 (&b"\0"[..], "unexpected null byte"),
710 (&b"\0"[..], "unexpected null byte"),
706 (&b":empty-key"[..], "empty key"),
711 (&b":empty-key"[..], "empty key"),
707 (&b"\0leading-null:"[..], "leading null"),
712 (&b"\0leading-null:"[..], "leading null"),
708 (&b"trailing-null:\0"[..], "trailing null"),
713 (&b"trailing-null:\0"[..], "trailing null"),
709 (&b"missing-value"[..], "missing value"),
714 (&b"missing-value"[..], "missing value"),
710 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
715 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
711 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
716 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
712 ];
717 ];
713
718
714 for (extra, msg) in test_cases {
719 for (extra, msg) in test_cases {
715 assert!(
720 assert!(
716 decode_extra(extra).is_err(),
721 decode_extra(extra).is_err(),
717 "corrupt extra should have failed to parse: {}",
722 "corrupt extra should have failed to parse: {}",
718 msg
723 msg
719 );
724 );
720 }
725 }
721 }
726 }
722
727
723 #[test]
728 #[test]
724 fn test_parse_timestamp_line() {
729 fn test_parse_timestamp_line() {
725 let extra = [
730 let extra = [
726 ("branch".into(), b"default".to_vec()),
731 ("branch".into(), b"default".to_vec()),
727 ("key-with-hyphens".into(), b"value1".to_vec()),
732 ("key-with-hyphens".into(), b"value1".to_vec()),
728 ("key_with_underscores".into(), b"value2".to_vec()),
733 ("key_with_underscores".into(), b"value2".to_vec()),
729 ("empty-value".into(), b"".to_vec()),
734 ("empty-value".into(), b"".to_vec()),
730 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
735 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
731 ]
736 ]
732 .into_iter()
737 .into_iter()
733 .collect::<BTreeMap<String, Vec<u8>>>();
738 .collect::<BTreeMap<String, Vec<u8>>>();
734
739
735 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
740 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
736 line.extend_from_slice(&encode_extra(&extra));
741 line.extend_from_slice(&encode_extra(&extra));
737
742
738 let timestamp = parse_timestamp(&line).unwrap();
743 let timestamp = parse_timestamp(&line).unwrap();
739 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
744 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
740
745
741 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
746 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
742 assert_eq!(extra, parsed_extra);
747 assert_eq!(extra, parsed_extra);
743 }
748 }
744 }
749 }
General Comments 0
You need to be logged in to leave comments. Login now