##// END OF EJS Templates
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
Arseniy Alekseyev -
r52810:1d698282 default
parent child Browse files
Show More
@@ -1,764 +1,767
1 use std::ascii::escape_default;
1 use std::ascii::escape_default;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::collections::BTreeMap;
3 use std::collections::BTreeMap;
4 use std::fmt::{Debug, Formatter};
4 use std::fmt::{Debug, Formatter};
5 use std::{iter, str};
5 use std::{iter, str};
6
6
7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
7 use chrono::{DateTime, FixedOffset, Utc};
8 use itertools::{Either, Itertools};
8 use itertools::{Either, Itertools};
9
9
10 use crate::errors::HgError;
10 use crate::errors::HgError;
11 use crate::revlog::Index;
11 use crate::revlog::Index;
12 use crate::revlog::Revision;
12 use crate::revlog::Revision;
13 use crate::revlog::{Node, NodePrefix};
13 use crate::revlog::{Node, NodePrefix};
14 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::vfs::VfsImpl;
16 use crate::vfs::VfsImpl;
17 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
17 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
18
18
19 /// A specialized `Revlog` to work with changelog data format.
19 /// A specialized `Revlog` to work with changelog data format.
20 pub struct Changelog {
20 pub struct Changelog {
21 /// The generic `revlog` format.
21 /// The generic `revlog` format.
22 pub(crate) revlog: Revlog,
22 pub(crate) revlog: Revlog,
23 }
23 }
24
24
25 impl Changelog {
25 impl Changelog {
26 /// Open the `changelog` of a repository given by its root.
26 /// Open the `changelog` of a repository given by its root.
27 pub fn open(
27 pub fn open(
28 store_vfs: &VfsImpl,
28 store_vfs: &VfsImpl,
29 options: RevlogOpenOptions,
29 options: RevlogOpenOptions,
30 ) -> Result<Self, HgError> {
30 ) -> Result<Self, HgError> {
31 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
31 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
32 Ok(Self { revlog })
32 Ok(Self { revlog })
33 }
33 }
34
34
35 /// Return the `ChangelogRevisionData` for the given node ID.
35 /// Return the `ChangelogRevisionData` for the given node ID.
36 pub fn data_for_node(
36 pub fn data_for_node(
37 &self,
37 &self,
38 node: NodePrefix,
38 node: NodePrefix,
39 ) -> Result<ChangelogRevisionData, RevlogError> {
39 ) -> Result<ChangelogRevisionData, RevlogError> {
40 let rev = self.revlog.rev_from_node(node)?;
40 let rev = self.revlog.rev_from_node(node)?;
41 self.entry_for_checked_rev(rev)?.data()
41 self.entry_for_checked_rev(rev)?.data()
42 }
42 }
43
43
44 /// Return the [`ChangelogEntry`] for the given revision number.
44 /// Return the [`ChangelogEntry`] for the given revision number.
45 pub fn entry_for_rev(
45 pub fn entry_for_rev(
46 &self,
46 &self,
47 rev: UncheckedRevision,
47 rev: UncheckedRevision,
48 ) -> Result<ChangelogEntry, RevlogError> {
48 ) -> Result<ChangelogEntry, RevlogError> {
49 let revlog_entry = self.revlog.get_entry(rev)?;
49 let revlog_entry = self.revlog.get_entry(rev)?;
50 Ok(ChangelogEntry { revlog_entry })
50 Ok(ChangelogEntry { revlog_entry })
51 }
51 }
52
52
53 /// Same as [`Self::entry_for_rev`] for checked revisions.
53 /// Same as [`Self::entry_for_rev`] for checked revisions.
54 fn entry_for_checked_rev(
54 fn entry_for_checked_rev(
55 &self,
55 &self,
56 rev: Revision,
56 rev: Revision,
57 ) -> Result<ChangelogEntry, RevlogError> {
57 ) -> Result<ChangelogEntry, RevlogError> {
58 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
58 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
59 Ok(ChangelogEntry { revlog_entry })
59 Ok(ChangelogEntry { revlog_entry })
60 }
60 }
61
61
62 /// Return the [`ChangelogRevisionData`] for the given revision number.
62 /// Return the [`ChangelogRevisionData`] for the given revision number.
63 ///
63 ///
64 /// This is a useful shortcut in case the caller does not need the
64 /// This is a useful shortcut in case the caller does not need the
65 /// generic revlog information (parents, hashes etc). Otherwise
65 /// generic revlog information (parents, hashes etc). Otherwise
66 /// consider taking a [`ChangelogEntry`] with
66 /// consider taking a [`ChangelogEntry`] with
67 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
67 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
68 pub fn data_for_rev(
68 pub fn data_for_rev(
69 &self,
69 &self,
70 rev: UncheckedRevision,
70 rev: UncheckedRevision,
71 ) -> Result<ChangelogRevisionData, RevlogError> {
71 ) -> Result<ChangelogRevisionData, RevlogError> {
72 self.entry_for_rev(rev)?.data()
72 self.entry_for_rev(rev)?.data()
73 }
73 }
74
74
75 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
75 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
76 self.revlog.node_from_rev(rev)
76 self.revlog.node_from_rev(rev)
77 }
77 }
78
78
79 pub fn rev_from_node(
79 pub fn rev_from_node(
80 &self,
80 &self,
81 node: NodePrefix,
81 node: NodePrefix,
82 ) -> Result<Revision, RevlogError> {
82 ) -> Result<Revision, RevlogError> {
83 self.revlog.rev_from_node(node)
83 self.revlog.rev_from_node(node)
84 }
84 }
85
85
86 pub fn get_index(&self) -> &Index {
86 pub fn get_index(&self) -> &Index {
87 &self.revlog.index
87 &self.revlog.index
88 }
88 }
89 }
89 }
90
90
91 impl Graph for Changelog {
91 impl Graph for Changelog {
92 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
92 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
93 self.revlog.parents(rev)
93 self.revlog.parents(rev)
94 }
94 }
95 }
95 }
96
96
97 /// A specialized `RevlogEntry` for `changelog` data format
97 /// A specialized `RevlogEntry` for `changelog` data format
98 ///
98 ///
99 /// This is a `RevlogEntry` with the added semantics that the associated
99 /// This is a `RevlogEntry` with the added semantics that the associated
100 /// data should meet the requirements for `changelog`, materialized by
100 /// data should meet the requirements for `changelog`, materialized by
101 /// the fact that `data()` constructs a `ChangelogRevisionData`.
101 /// the fact that `data()` constructs a `ChangelogRevisionData`.
102 /// In case that promise would be broken, the `data` method returns an error.
102 /// In case that promise would be broken, the `data` method returns an error.
103 #[derive(Clone)]
103 #[derive(Clone)]
104 pub struct ChangelogEntry<'changelog> {
104 pub struct ChangelogEntry<'changelog> {
105 /// Same data, as a generic `RevlogEntry`.
105 /// Same data, as a generic `RevlogEntry`.
106 pub(crate) revlog_entry: RevlogEntry<'changelog>,
106 pub(crate) revlog_entry: RevlogEntry<'changelog>,
107 }
107 }
108
108
109 impl<'changelog> ChangelogEntry<'changelog> {
109 impl<'changelog> ChangelogEntry<'changelog> {
110 pub fn data<'a>(
110 pub fn data<'a>(
111 &'a self,
111 &'a self,
112 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
112 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
113 let bytes = self.revlog_entry.data()?;
113 let bytes = self.revlog_entry.data()?;
114 if bytes.is_empty() {
114 if bytes.is_empty() {
115 Ok(ChangelogRevisionData::null())
115 Ok(ChangelogRevisionData::null())
116 } else {
116 } else {
117 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
117 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
118 RevlogError::Other(HgError::CorruptedRepository(format!(
118 RevlogError::Other(HgError::CorruptedRepository(format!(
119 "Invalid changelog data for revision {}: {:?}",
119 "Invalid changelog data for revision {}: {:?}",
120 self.revlog_entry.revision(),
120 self.revlog_entry.revision(),
121 err
121 err
122 )))
122 )))
123 })?)
123 })?)
124 }
124 }
125 }
125 }
126
126
127 /// Obtain a reference to the underlying `RevlogEntry`.
127 /// Obtain a reference to the underlying `RevlogEntry`.
128 ///
128 ///
129 /// This allows the caller to access the information that is common
129 /// This allows the caller to access the information that is common
130 /// to all revlog entries: revision number, node id, parent revisions etc.
130 /// to all revlog entries: revision number, node id, parent revisions etc.
131 pub fn as_revlog_entry(&self) -> &RevlogEntry {
131 pub fn as_revlog_entry(&self) -> &RevlogEntry {
132 &self.revlog_entry
132 &self.revlog_entry
133 }
133 }
134
134
135 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
135 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
136 Ok(self
136 Ok(self
137 .revlog_entry
137 .revlog_entry
138 .p1_entry()?
138 .p1_entry()?
139 .map(|revlog_entry| Self { revlog_entry }))
139 .map(|revlog_entry| Self { revlog_entry }))
140 }
140 }
141
141
142 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
142 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
143 Ok(self
143 Ok(self
144 .revlog_entry
144 .revlog_entry
145 .p2_entry()?
145 .p2_entry()?
146 .map(|revlog_entry| Self { revlog_entry }))
146 .map(|revlog_entry| Self { revlog_entry }))
147 }
147 }
148 }
148 }
149
149
150 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
150 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
151 #[derive(PartialEq)]
151 #[derive(PartialEq)]
152 pub struct ChangelogRevisionData<'changelog> {
152 pub struct ChangelogRevisionData<'changelog> {
153 /// The data bytes of the `changelog` entry.
153 /// The data bytes of the `changelog` entry.
154 bytes: Cow<'changelog, [u8]>,
154 bytes: Cow<'changelog, [u8]>,
155 /// The end offset for the hex manifest (not including the newline)
155 /// The end offset for the hex manifest (not including the newline)
156 manifest_end: usize,
156 manifest_end: usize,
157 /// The end offset for the user+email (not including the newline)
157 /// The end offset for the user+email (not including the newline)
158 user_end: usize,
158 user_end: usize,
159 /// The end offset for the timestamp+timezone+extras (not including the
159 /// The end offset for the timestamp+timezone+extras (not including the
160 /// newline)
160 /// newline)
161 timestamp_end: usize,
161 timestamp_end: usize,
162 /// The end offset for the file list (not including the newline)
162 /// The end offset for the file list (not including the newline)
163 files_end: usize,
163 files_end: usize,
164 }
164 }
165
165
166 impl<'changelog> ChangelogRevisionData<'changelog> {
166 impl<'changelog> ChangelogRevisionData<'changelog> {
167 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
167 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
168 let mut line_iter = bytes.split(|b| b == &b'\n');
168 let mut line_iter = bytes.split(|b| b == &b'\n');
169 let manifest_end = line_iter
169 let manifest_end = line_iter
170 .next()
170 .next()
171 .expect("Empty iterator from split()?")
171 .expect("Empty iterator from split()?")
172 .len();
172 .len();
173 let user_slice = line_iter.next().ok_or_else(|| {
173 let user_slice = line_iter.next().ok_or_else(|| {
174 HgError::corrupted("Changeset data truncated after manifest line")
174 HgError::corrupted("Changeset data truncated after manifest line")
175 })?;
175 })?;
176 let user_end = manifest_end + 1 + user_slice.len();
176 let user_end = manifest_end + 1 + user_slice.len();
177 let timestamp_slice = line_iter.next().ok_or_else(|| {
177 let timestamp_slice = line_iter.next().ok_or_else(|| {
178 HgError::corrupted("Changeset data truncated after user line")
178 HgError::corrupted("Changeset data truncated after user line")
179 })?;
179 })?;
180 let timestamp_end = user_end + 1 + timestamp_slice.len();
180 let timestamp_end = user_end + 1 + timestamp_slice.len();
181 let mut files_end = timestamp_end + 1;
181 let mut files_end = timestamp_end + 1;
182 loop {
182 loop {
183 let line = line_iter.next().ok_or_else(|| {
183 let line = line_iter.next().ok_or_else(|| {
184 HgError::corrupted("Changeset data truncated in files list")
184 HgError::corrupted("Changeset data truncated in files list")
185 })?;
185 })?;
186 if line.is_empty() {
186 if line.is_empty() {
187 if files_end == bytes.len() {
187 if files_end == bytes.len() {
188 // The list of files ended with a single newline (there
188 // The list of files ended with a single newline (there
189 // should be two)
189 // should be two)
190 return Err(HgError::corrupted(
190 return Err(HgError::corrupted(
191 "Changeset data truncated after files list",
191 "Changeset data truncated after files list",
192 ));
192 ));
193 }
193 }
194 files_end -= 1;
194 files_end -= 1;
195 break;
195 break;
196 }
196 }
197 files_end += line.len() + 1;
197 files_end += line.len() + 1;
198 }
198 }
199
199
200 Ok(Self {
200 Ok(Self {
201 bytes,
201 bytes,
202 manifest_end,
202 manifest_end,
203 user_end,
203 user_end,
204 timestamp_end,
204 timestamp_end,
205 files_end,
205 files_end,
206 })
206 })
207 }
207 }
208
208
209 fn null() -> Self {
209 fn null() -> Self {
210 Self::new(Cow::Borrowed(
210 Self::new(Cow::Borrowed(
211 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
211 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
212 ))
212 ))
213 .unwrap()
213 .unwrap()
214 }
214 }
215
215
216 /// Return an iterator over the lines of the entry.
216 /// Return an iterator over the lines of the entry.
217 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
217 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
218 self.bytes.split(|b| b == &b'\n')
218 self.bytes.split(|b| b == &b'\n')
219 }
219 }
220
220
221 /// Return the node id of the `manifest` referenced by this `changelog`
221 /// Return the node id of the `manifest` referenced by this `changelog`
222 /// entry.
222 /// entry.
223 pub fn manifest_node(&self) -> Result<Node, HgError> {
223 pub fn manifest_node(&self) -> Result<Node, HgError> {
224 let manifest_node_hex = &self.bytes[..self.manifest_end];
224 let manifest_node_hex = &self.bytes[..self.manifest_end];
225 Node::from_hex_for_repo(manifest_node_hex)
225 Node::from_hex_for_repo(manifest_node_hex)
226 }
226 }
227
227
228 /// The full user string (usually a name followed by an email enclosed in
228 /// The full user string (usually a name followed by an email enclosed in
229 /// angle brackets)
229 /// angle brackets)
230 pub fn user(&self) -> &[u8] {
230 pub fn user(&self) -> &[u8] {
231 &self.bytes[self.manifest_end + 1..self.user_end]
231 &self.bytes[self.manifest_end + 1..self.user_end]
232 }
232 }
233
233
234 /// The full timestamp line (timestamp in seconds, offset in seconds, and
234 /// The full timestamp line (timestamp in seconds, offset in seconds, and
235 /// possibly extras)
235 /// possibly extras)
236 // TODO: We should expose this in a more useful way
236 // TODO: We should expose this in a more useful way
237 pub fn timestamp_line(&self) -> &[u8] {
237 pub fn timestamp_line(&self) -> &[u8] {
238 &self.bytes[self.user_end + 1..self.timestamp_end]
238 &self.bytes[self.user_end + 1..self.timestamp_end]
239 }
239 }
240
240
241 /// Parsed timestamp.
241 /// Parsed timestamp.
242 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
242 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
243 parse_timestamp(self.timestamp_line())
243 parse_timestamp(self.timestamp_line())
244 }
244 }
245
245
246 /// Optional commit extras.
246 /// Optional commit extras.
247 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
247 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
248 parse_timestamp_line_extra(self.timestamp_line())
248 parse_timestamp_line_extra(self.timestamp_line())
249 }
249 }
250
250
251 /// The files changed in this revision.
251 /// The files changed in this revision.
252 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
252 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
253 if self.timestamp_end == self.files_end {
253 if self.timestamp_end == self.files_end {
254 Either::Left(iter::empty())
254 Either::Left(iter::empty())
255 } else {
255 } else {
256 Either::Right(
256 Either::Right(
257 self.bytes[self.timestamp_end + 1..self.files_end]
257 self.bytes[self.timestamp_end + 1..self.files_end]
258 .split(|b| b == &b'\n')
258 .split(|b| b == &b'\n')
259 .map(HgPath::new),
259 .map(HgPath::new),
260 )
260 )
261 }
261 }
262 }
262 }
263
263
264 /// The change description.
264 /// The change description.
265 pub fn description(&self) -> &[u8] {
265 pub fn description(&self) -> &[u8] {
266 &self.bytes[self.files_end + 2..]
266 &self.bytes[self.files_end + 2..]
267 }
267 }
268 }
268 }
269
269
270 impl Debug for ChangelogRevisionData<'_> {
270 impl Debug for ChangelogRevisionData<'_> {
271 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
271 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
272 f.debug_struct("ChangelogRevisionData")
272 f.debug_struct("ChangelogRevisionData")
273 .field("bytes", &debug_bytes(&self.bytes))
273 .field("bytes", &debug_bytes(&self.bytes))
274 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
274 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
275 .field(
275 .field(
276 "user",
276 "user",
277 &debug_bytes(
277 &debug_bytes(
278 &self.bytes[self.manifest_end + 1..self.user_end],
278 &self.bytes[self.manifest_end + 1..self.user_end],
279 ),
279 ),
280 )
280 )
281 .field(
281 .field(
282 "timestamp",
282 "timestamp",
283 &debug_bytes(
283 &debug_bytes(
284 &self.bytes[self.user_end + 1..self.timestamp_end],
284 &self.bytes[self.user_end + 1..self.timestamp_end],
285 ),
285 ),
286 )
286 )
287 .field(
287 .field(
288 "files",
288 "files",
289 &debug_bytes(
289 &debug_bytes(
290 &self.bytes[self.timestamp_end + 1..self.files_end],
290 &self.bytes[self.timestamp_end + 1..self.files_end],
291 ),
291 ),
292 )
292 )
293 .field(
293 .field(
294 "description",
294 "description",
295 &debug_bytes(&self.bytes[self.files_end + 2..]),
295 &debug_bytes(&self.bytes[self.files_end + 2..]),
296 )
296 )
297 .finish()
297 .finish()
298 }
298 }
299 }
299 }
300
300
301 fn debug_bytes(bytes: &[u8]) -> String {
301 fn debug_bytes(bytes: &[u8]) -> String {
302 String::from_utf8_lossy(
302 String::from_utf8_lossy(
303 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
303 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
304 )
304 )
305 .to_string()
305 .to_string()
306 }
306 }
307
307
308 /// Parse the raw bytes of the timestamp line from a changelog entry.
308 /// Parse the raw bytes of the timestamp line from a changelog entry.
309 ///
309 ///
310 /// According to the documentation in `hg help dates` and the
310 /// According to the documentation in `hg help dates` and the
311 /// implementation in `changelog.py`, the format of the timestamp line
311 /// implementation in `changelog.py`, the format of the timestamp line
312 /// is `time tz extra\n` where:
312 /// is `time tz extra\n` where:
313 ///
313 ///
314 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
314 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
315 /// as seconds since the UNIX epoch.
315 /// as seconds since the UNIX epoch.
316 ///
316 ///
317 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
317 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
318 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
318 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
319 /// opposite of the sign in ISO 8601 timestamps).
319 /// opposite of the sign in ISO 8601 timestamps).
320 ///
320 ///
321 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
321 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
322 /// and value in each pair separated by an ASCII colon. Keys are limited to
322 /// and value in each pair separated by an ASCII colon. Keys are limited to
323 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
323 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
324 /// arbitrary bytes.
324 /// arbitrary bytes.
325 fn parse_timestamp(
325 fn parse_timestamp(
326 timestamp_line: &[u8],
326 timestamp_line: &[u8],
327 ) -> Result<DateTime<FixedOffset>, HgError> {
327 ) -> Result<DateTime<FixedOffset>, HgError> {
328 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
328 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
329
329
330 let timestamp_bytes = parts
330 let timestamp_bytes = parts
331 .next()
331 .next()
332 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
332 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
333 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
333 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
334 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
334 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
335 })?;
335 })?;
336 let timestamp_utc = timestamp_str
336 let timestamp_utc = timestamp_str
337 .parse()
337 .parse()
338 .map_err(|e| {
338 .map_err(|e| {
339 HgError::corrupted(format!("failed to parse timestamp: {e}"))
339 HgError::corrupted(format!("failed to parse timestamp: {e}"))
340 })
340 })
341 .and_then(|secs| {
341 .and_then(|secs| {
342 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
342 DateTime::from_timestamp(secs, 0).ok_or_else(|| {
343 HgError::corrupted(format!(
343 HgError::corrupted(format!(
344 "integer timestamp out of valid range: {secs}"
344 "integer timestamp out of valid range: {secs}"
345 ))
345 ))
346 })
346 })
347 })
347 })
348 // Attempt to parse the timestamp as a float if we can't parse
348 // Attempt to parse the timestamp as a float if we can't parse
349 // it as an int. It doesn't seem like float timestamps are actually
349 // it as an int. It doesn't seem like float timestamps are actually
350 // used in practice, but the Python code supports them.
350 // used in practice, but the Python code supports them.
351 .or_else(|_| parse_float_timestamp(timestamp_str))?;
351 .or_else(|_| parse_float_timestamp(timestamp_str))?;
352
352
353 let timezone_bytes = parts
353 let timezone_bytes = parts
354 .next()
354 .next()
355 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
355 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
356 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
356 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
357 .map_err(|e| {
357 .map_err(|e| {
358 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
358 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
359 })?
359 })?
360 .parse()
360 .parse()
361 .map_err(|e| {
361 .map_err(|e| {
362 HgError::corrupted(format!("timezone is not an integer: {e}"))
362 HgError::corrupted(format!("timezone is not an integer: {e}"))
363 })?;
363 })?;
364 let timezone = FixedOffset::west_opt(timezone_secs)
364 let timezone = FixedOffset::west_opt(timezone_secs)
365 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
365 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
366
366
367 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
367 Ok(DateTime::from_naive_utc_and_offset(
368 timestamp_utc.naive_utc(),
369 timezone,
370 ))
368 }
371 }
369
372
370 /// Attempt to parse the given string as floating-point timestamp, and
373 /// Attempt to parse the given string as floating-point timestamp, and
371 /// convert the result into a `chrono::NaiveDateTime`.
374 /// convert the result into a `chrono::NaiveDateTime`.
372 fn parse_float_timestamp(
375 fn parse_float_timestamp(
373 timestamp_str: &str,
376 timestamp_str: &str,
374 ) -> Result<NaiveDateTime, HgError> {
377 ) -> Result<DateTime<Utc>, HgError> {
375 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
378 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
376 HgError::corrupted(format!("failed to parse timestamp: {e}"))
379 HgError::corrupted(format!("failed to parse timestamp: {e}"))
377 })?;
380 })?;
378
381
379 // To construct a `NaiveDateTime` we'll need to convert the float
382 // To construct a `NaiveDateTime` we'll need to convert the float
380 // into signed integer seconds and unsigned integer nanoseconds.
383 // into signed integer seconds and unsigned integer nanoseconds.
381 let mut secs = timestamp.trunc() as i64;
384 let mut secs = timestamp.trunc() as i64;
382 let mut subsecs = timestamp.fract();
385 let mut subsecs = timestamp.fract();
383
386
384 // If the timestamp is negative, we need to express the fractional
387 // If the timestamp is negative, we need to express the fractional
385 // component as positive nanoseconds since the previous second.
388 // component as positive nanoseconds since the previous second.
386 if timestamp < 0.0 {
389 if timestamp < 0.0 {
387 secs -= 1;
390 secs -= 1;
388 subsecs += 1.0;
391 subsecs += 1.0;
389 }
392 }
390
393
391 // This cast should be safe because the fractional component is
394 // This cast should be safe because the fractional component is
392 // by definition less than 1.0, so this value should not exceed
395 // by definition less than 1.0, so this value should not exceed
393 // 1 billion, which is representable as an f64 without loss of
396 // 1 billion, which is representable as an f64 without loss of
394 // precision and should fit into a u32 without overflowing.
397 // precision and should fit into a u32 without overflowing.
395 //
398 //
396 // (Any loss of precision in the fractional component will have
399 // (Any loss of precision in the fractional component will have
397 // already happened at the time of initial parsing; in general,
400 // already happened at the time of initial parsing; in general,
398 // f64s are insufficiently precise to provide nanosecond-level
401 // f64s are insufficiently precise to provide nanosecond-level
399 // precision with present-day timestamps.)
402 // precision with present-day timestamps.)
400 let nsecs = (subsecs * 1_000_000_000.0) as u32;
403 let nsecs = (subsecs * 1_000_000_000.0) as u32;
401
404
402 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
405 DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {
403 HgError::corrupted(format!(
406 HgError::corrupted(format!(
404 "float timestamp out of valid range: {timestamp}"
407 "float timestamp out of valid range: {timestamp}"
405 ))
408 ))
406 })
409 })
407 }
410 }
408
411
409 /// Decode changeset extra fields.
412 /// Decode changeset extra fields.
410 ///
413 ///
411 /// Extras are null-delimited key-value pairs where the key consists of ASCII
414 /// Extras are null-delimited key-value pairs where the key consists of ASCII
412 /// alphanumeric characters plus hyphens and underscores, and the value can
415 /// alphanumeric characters plus hyphens and underscores, and the value can
413 /// contain arbitrary bytes.
416 /// contain arbitrary bytes.
414 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
417 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
415 extra
418 extra
416 .split(|c| *c == b'\0')
419 .split(|c| *c == b'\0')
417 .map(|pair| {
420 .map(|pair| {
418 let pair = unescape_extra(pair);
421 let pair = unescape_extra(pair);
419 let mut iter = pair.splitn(2, |c| *c == b':');
422 let mut iter = pair.splitn(2, |c| *c == b':');
420
423
421 let key_bytes =
424 let key_bytes =
422 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
425 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
423 HgError::corrupted("empty key in changeset extras")
426 HgError::corrupted("empty key in changeset extras")
424 })?;
427 })?;
425
428
426 let key = str::from_utf8(key_bytes)
429 let key = str::from_utf8(key_bytes)
427 .ok()
430 .ok()
428 .filter(|k| {
431 .filter(|k| {
429 k.chars().all(|c| {
432 k.chars().all(|c| {
430 c.is_ascii_alphanumeric() || c == '_' || c == '-'
433 c.is_ascii_alphanumeric() || c == '_' || c == '-'
431 })
434 })
432 })
435 })
433 .ok_or_else(|| {
436 .ok_or_else(|| {
434 let key = String::from_utf8_lossy(key_bytes);
437 let key = String::from_utf8_lossy(key_bytes);
435 HgError::corrupted(format!(
438 HgError::corrupted(format!(
436 "invalid key in changeset extras: {key}",
439 "invalid key in changeset extras: {key}",
437 ))
440 ))
438 })?
441 })?
439 .to_string();
442 .to_string();
440
443
441 let value = iter.next().map(Into::into).ok_or_else(|| {
444 let value = iter.next().map(Into::into).ok_or_else(|| {
442 HgError::corrupted(format!(
445 HgError::corrupted(format!(
443 "missing value for changeset extra: {key}"
446 "missing value for changeset extra: {key}"
444 ))
447 ))
445 })?;
448 })?;
446
449
447 Ok((key, value))
450 Ok((key, value))
448 })
451 })
449 .collect()
452 .collect()
450 }
453 }
451
454
452 /// Parse the extra fields from a changeset's timestamp line.
455 /// Parse the extra fields from a changeset's timestamp line.
453 fn parse_timestamp_line_extra(
456 fn parse_timestamp_line_extra(
454 timestamp_line: &[u8],
457 timestamp_line: &[u8],
455 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
458 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
456 Ok(timestamp_line
459 Ok(timestamp_line
457 .splitn(3, |c| *c == b' ')
460 .splitn(3, |c| *c == b' ')
458 .nth(2)
461 .nth(2)
459 .map(decode_extra)
462 .map(decode_extra)
460 .transpose()?
463 .transpose()?
461 .unwrap_or_default())
464 .unwrap_or_default())
462 }
465 }
463
466
464 /// Decode Mercurial's escaping for changelog extras.
467 /// Decode Mercurial's escaping for changelog extras.
465 ///
468 ///
466 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
469 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
467 /// (null, backslash, newline, and carriage return) so we only decode those.
470 /// (null, backslash, newline, and carriage return) so we only decode those.
468 ///
471 ///
469 /// The Python code also includes a workaround for decoding escaped nuls
472 /// The Python code also includes a workaround for decoding escaped nuls
470 /// that are followed by an ASCII octal digit, since Python's built-in
473 /// that are followed by an ASCII octal digit, since Python's built-in
471 /// `string_escape` codec will interpret that as an escaped octal byte value.
474 /// `string_escape` codec will interpret that as an escaped octal byte value.
472 /// That workaround is omitted here since we don't support decoding octal.
475 /// That workaround is omitted here since we don't support decoding octal.
473 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
476 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
474 let mut output = Vec::with_capacity(bytes.len());
477 let mut output = Vec::with_capacity(bytes.len());
475 let mut input = bytes.iter().copied();
478 let mut input = bytes.iter().copied();
476
479
477 while let Some(c) = input.next() {
480 while let Some(c) = input.next() {
478 if c != b'\\' {
481 if c != b'\\' {
479 output.push(c);
482 output.push(c);
480 continue;
483 continue;
481 }
484 }
482
485
483 match input.next() {
486 match input.next() {
484 Some(b'0') => output.push(b'\0'),
487 Some(b'0') => output.push(b'\0'),
485 Some(b'\\') => output.push(b'\\'),
488 Some(b'\\') => output.push(b'\\'),
486 Some(b'n') => output.push(b'\n'),
489 Some(b'n') => output.push(b'\n'),
487 Some(b'r') => output.push(b'\r'),
490 Some(b'r') => output.push(b'\r'),
488 // The following cases should never occur in theory because any
491 // The following cases should never occur in theory because any
489 // backslashes in the original input should have been escaped
492 // backslashes in the original input should have been escaped
490 // with another backslash, so it should not be possible to
493 // with another backslash, so it should not be possible to
491 // observe an escape sequence other than the 4 above.
494 // observe an escape sequence other than the 4 above.
492 Some(c) => output.extend_from_slice(&[b'\\', c]),
495 Some(c) => output.extend_from_slice(&[b'\\', c]),
493 None => output.push(b'\\'),
496 None => output.push(b'\\'),
494 }
497 }
495 }
498 }
496
499
497 output
500 output
498 }
501 }
499
502
500 #[cfg(test)]
503 #[cfg(test)]
501 mod tests {
504 mod tests {
502 use super::*;
505 use super::*;
503 use crate::vfs::VfsImpl;
506 use crate::vfs::VfsImpl;
504 use crate::{
507 use crate::{
505 RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
508 RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
506 NULL_REVISION,
509 NULL_REVISION,
507 };
510 };
508 use pretty_assertions::assert_eq;
511 use pretty_assertions::assert_eq;
509
512
510 #[test]
513 #[test]
511 fn test_create_changelogrevisiondata_invalid() {
514 fn test_create_changelogrevisiondata_invalid() {
512 // Completely empty
515 // Completely empty
513 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
516 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
514 // No newline after manifest
517 // No newline after manifest
515 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
518 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
516 // No newline after user
519 // No newline after user
517 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
520 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
518 // No newline after timestamp
521 // No newline after timestamp
519 assert!(
522 assert!(
520 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
523 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
521 );
524 );
522 // Missing newline after files
525 // Missing newline after files
523 assert!(ChangelogRevisionData::new(Cow::Borrowed(
526 assert!(ChangelogRevisionData::new(Cow::Borrowed(
524 b"abcd\n\n0 0\nfile1\nfile2"
527 b"abcd\n\n0 0\nfile1\nfile2"
525 ))
528 ))
526 .is_err(),);
529 .is_err(),);
527 // Only one newline after files
530 // Only one newline after files
528 assert!(ChangelogRevisionData::new(Cow::Borrowed(
531 assert!(ChangelogRevisionData::new(Cow::Borrowed(
529 b"abcd\n\n0 0\nfile1\nfile2\n"
532 b"abcd\n\n0 0\nfile1\nfile2\n"
530 ))
533 ))
531 .is_err(),);
534 .is_err(),);
532 }
535 }
533
536
534 #[test]
537 #[test]
535 fn test_create_changelogrevisiondata() {
538 fn test_create_changelogrevisiondata() {
536 let data = ChangelogRevisionData::new(Cow::Borrowed(
539 let data = ChangelogRevisionData::new(Cow::Borrowed(
537 b"0123456789abcdef0123456789abcdef01234567
540 b"0123456789abcdef0123456789abcdef01234567
538 Some One <someone@example.com>
541 Some One <someone@example.com>
539 0 0
542 0 0
540 file1
543 file1
541 file2
544 file2
542
545
543 some
546 some
544 commit
547 commit
545 message",
548 message",
546 ))
549 ))
547 .unwrap();
550 .unwrap();
548 assert_eq!(
551 assert_eq!(
549 data.manifest_node().unwrap(),
552 data.manifest_node().unwrap(),
550 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
553 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
551 .unwrap()
554 .unwrap()
552 );
555 );
553 assert_eq!(data.user(), b"Some One <someone@example.com>");
556 assert_eq!(data.user(), b"Some One <someone@example.com>");
554 assert_eq!(data.timestamp_line(), b"0 0");
557 assert_eq!(data.timestamp_line(), b"0 0");
555 assert_eq!(
558 assert_eq!(
556 data.files().collect_vec(),
559 data.files().collect_vec(),
557 vec![HgPath::new("file1"), HgPath::new("file2")]
560 vec![HgPath::new("file1"), HgPath::new("file2")]
558 );
561 );
559 assert_eq!(data.description(), b"some\ncommit\nmessage");
562 assert_eq!(data.description(), b"some\ncommit\nmessage");
560 }
563 }
561
564
562 #[test]
565 #[test]
563 fn test_data_from_rev_null() -> Result<(), RevlogError> {
566 fn test_data_from_rev_null() -> Result<(), RevlogError> {
564 // an empty revlog will be enough for this case
567 // an empty revlog will be enough for this case
565 let temp = tempfile::tempdir().unwrap();
568 let temp = tempfile::tempdir().unwrap();
566 let vfs = VfsImpl {
569 let vfs = VfsImpl {
567 base: temp.path().to_owned(),
570 base: temp.path().to_owned(),
568 };
571 };
569 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
572 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
570 std::fs::write(temp.path().join("foo.d"), b"").unwrap();
573 std::fs::write(temp.path().join("foo.d"), b"").unwrap();
571 let revlog = Revlog::open(
574 let revlog = Revlog::open(
572 &vfs,
575 &vfs,
573 "foo.i",
576 "foo.i",
574 None,
577 None,
575 RevlogOpenOptions::new(
578 RevlogOpenOptions::new(
576 false,
579 false,
577 RevlogDataConfig::default(),
580 RevlogDataConfig::default(),
578 RevlogDeltaConfig::default(),
581 RevlogDeltaConfig::default(),
579 RevlogFeatureConfig::default(),
582 RevlogFeatureConfig::default(),
580 ),
583 ),
581 )
584 )
582 .unwrap();
585 .unwrap();
583
586
584 let changelog = Changelog { revlog };
587 let changelog = Changelog { revlog };
585 assert_eq!(
588 assert_eq!(
586 changelog.data_for_rev(NULL_REVISION.into())?,
589 changelog.data_for_rev(NULL_REVISION.into())?,
587 ChangelogRevisionData::null()
590 ChangelogRevisionData::null()
588 );
591 );
589 // same with the intermediate entry object
592 // same with the intermediate entry object
590 assert_eq!(
593 assert_eq!(
591 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
594 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
592 ChangelogRevisionData::null()
595 ChangelogRevisionData::null()
593 );
596 );
594 Ok(())
597 Ok(())
595 }
598 }
596
599
597 #[test]
600 #[test]
598 fn test_empty_files_list() {
601 fn test_empty_files_list() {
599 assert!(ChangelogRevisionData::null()
602 assert!(ChangelogRevisionData::null()
600 .files()
603 .files()
601 .collect_vec()
604 .collect_vec()
602 .is_empty());
605 .is_empty());
603 }
606 }
604
607
605 #[test]
608 #[test]
606 fn test_unescape_basic() {
609 fn test_unescape_basic() {
607 // '\0', '\\', '\n', and '\r' are correctly unescaped.
610 // '\0', '\\', '\n', and '\r' are correctly unescaped.
608 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
611 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
609 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
612 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
610 let unescaped = unescape_extra(escaped);
613 let unescaped = unescape_extra(escaped);
611 assert_eq!(&expected[..], &unescaped[..]);
614 assert_eq!(&expected[..], &unescaped[..]);
612 }
615 }
613
616
614 #[test]
617 #[test]
615 fn test_unescape_unsupported_sequence() {
618 fn test_unescape_unsupported_sequence() {
616 // Other escape sequences are left unaltered.
619 // Other escape sequences are left unaltered.
617 for c in 0u8..255 {
620 for c in 0u8..255 {
618 match c {
621 match c {
619 b'0' | b'\\' | b'n' | b'r' => continue,
622 b'0' | b'\\' | b'n' | b'r' => continue,
620 c => {
623 c => {
621 let expected = &[b'\\', c][..];
624 let expected = &[b'\\', c][..];
622 let unescaped = unescape_extra(expected);
625 let unescaped = unescape_extra(expected);
623 assert_eq!(expected, &unescaped[..]);
626 assert_eq!(expected, &unescaped[..]);
624 }
627 }
625 }
628 }
626 }
629 }
627 }
630 }
628
631
629 #[test]
632 #[test]
630 fn test_unescape_trailing_backslash() {
633 fn test_unescape_trailing_backslash() {
631 // Trailing backslashes are OK.
634 // Trailing backslashes are OK.
632 let expected = br"hi\";
635 let expected = br"hi\";
633 let unescaped = unescape_extra(expected);
636 let unescaped = unescape_extra(expected);
634 assert_eq!(&expected[..], &unescaped[..]);
637 assert_eq!(&expected[..], &unescaped[..]);
635 }
638 }
636
639
637 #[test]
640 #[test]
638 fn test_unescape_nul_followed_by_octal() {
641 fn test_unescape_nul_followed_by_octal() {
639 // Escaped NUL chars followed by octal digits are decoded correctly.
642 // Escaped NUL chars followed by octal digits are decoded correctly.
640 let expected = b"\x0012";
643 let expected = b"\x0012";
641 let escaped = br"\012";
644 let escaped = br"\012";
642 let unescaped = unescape_extra(escaped);
645 let unescaped = unescape_extra(escaped);
643 assert_eq!(&expected[..], &unescaped[..]);
646 assert_eq!(&expected[..], &unescaped[..]);
644 }
647 }
645
648
646 #[test]
649 #[test]
647 fn test_parse_float_timestamp() {
650 fn test_parse_float_timestamp() {
648 let test_cases = [
651 let test_cases = [
649 // Zero should map to the UNIX epoch.
652 // Zero should map to the UNIX epoch.
650 ("0.0", "1970-01-01 00:00:00"),
653 ("0.0", "1970-01-01 00:00:00 UTC"),
651 // Negative zero should be the same as positive zero.
654 // Negative zero should be the same as positive zero.
652 ("-0.0", "1970-01-01 00:00:00"),
655 ("-0.0", "1970-01-01 00:00:00 UTC"),
653 // Values without fractional components should work like integers.
656 // Values without fractional components should work like integers.
654 // (Assuming the timestamp is within the limits of f64 precision.)
657 // (Assuming the timestamp is within the limits of f64 precision.)
655 ("1115154970.0", "2005-05-03 21:16:10"),
658 ("1115154970.0", "2005-05-03 21:16:10 UTC"),
656 // We expect some loss of precision in the fractional component
659 // We expect some loss of precision in the fractional component
657 // when parsing arbitrary floating-point values.
660 // when parsing arbitrary floating-point values.
658 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
661 ("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),
659 // But representable f64 values should parse losslessly.
662 // But representable f64 values should parse losslessly.
660 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
663 ("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),
661 // Negative fractional components are subtracted from the epoch.
664 // Negative fractional components are subtracted from the epoch.
662 ("-1.333", "1969-12-31 23:59:58.667"),
665 ("-1.333", "1969-12-31 23:59:58.667 UTC"),
663 ];
666 ];
664
667
665 for (input, expected) in test_cases {
668 for (input, expected) in test_cases {
666 let res = parse_float_timestamp(input).unwrap().to_string();
669 let res = parse_float_timestamp(input).unwrap().to_string();
667 assert_eq!(res, expected);
670 assert_eq!(res, expected);
668 }
671 }
669 }
672 }
670
673
671 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
674 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
672 let mut output = Vec::with_capacity(bytes.len());
675 let mut output = Vec::with_capacity(bytes.len());
673
676
674 for c in bytes.iter().copied() {
677 for c in bytes.iter().copied() {
675 output.extend_from_slice(match c {
678 output.extend_from_slice(match c {
676 b'\0' => &b"\\0"[..],
679 b'\0' => &b"\\0"[..],
677 b'\\' => &b"\\\\"[..],
680 b'\\' => &b"\\\\"[..],
678 b'\n' => &b"\\n"[..],
681 b'\n' => &b"\\n"[..],
679 b'\r' => &b"\\r"[..],
682 b'\r' => &b"\\r"[..],
680 _ => {
683 _ => {
681 output.push(c);
684 output.push(c);
682 continue;
685 continue;
683 }
686 }
684 });
687 });
685 }
688 }
686
689
687 output
690 output
688 }
691 }
689
692
690 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
693 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
691 where
694 where
692 K: AsRef<[u8]>,
695 K: AsRef<[u8]>,
693 V: AsRef<[u8]>,
696 V: AsRef<[u8]>,
694 {
697 {
695 let extras = pairs.into_iter().map(|(k, v)| {
698 let extras = pairs.into_iter().map(|(k, v)| {
696 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
699 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
697 });
700 });
698 // Use fully-qualified syntax to avoid a future naming conflict with
701 // Use fully-qualified syntax to avoid a future naming conflict with
699 // the standard library: https://github.com/rust-lang/rust/issues/79524
702 // the standard library: https://github.com/rust-lang/rust/issues/79524
700 Itertools::intersperse(extras, b"\0".to_vec()).concat()
703 Itertools::intersperse(extras, b"\0".to_vec()).concat()
701 }
704 }
702
705
703 #[test]
706 #[test]
704 fn test_decode_extra() {
707 fn test_decode_extra() {
705 let extra = [
708 let extra = [
706 ("branch".into(), b"default".to_vec()),
709 ("branch".into(), b"default".to_vec()),
707 ("key-with-hyphens".into(), b"value1".to_vec()),
710 ("key-with-hyphens".into(), b"value1".to_vec()),
708 ("key_with_underscores".into(), b"value2".to_vec()),
711 ("key_with_underscores".into(), b"value2".to_vec()),
709 ("empty-value".into(), b"".to_vec()),
712 ("empty-value".into(), b"".to_vec()),
710 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
713 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
711 ]
714 ]
712 .into_iter()
715 .into_iter()
713 .collect::<BTreeMap<String, Vec<u8>>>();
716 .collect::<BTreeMap<String, Vec<u8>>>();
714
717
715 let encoded = encode_extra(&extra);
718 let encoded = encode_extra(&extra);
716 let decoded = decode_extra(&encoded).unwrap();
719 let decoded = decode_extra(&encoded).unwrap();
717
720
718 assert_eq!(extra, decoded);
721 assert_eq!(extra, decoded);
719 }
722 }
720
723
721 #[test]
724 #[test]
722 fn test_corrupt_extra() {
725 fn test_corrupt_extra() {
723 let test_cases = [
726 let test_cases = [
724 (&b""[..], "empty input"),
727 (&b""[..], "empty input"),
725 (&b"\0"[..], "unexpected null byte"),
728 (&b"\0"[..], "unexpected null byte"),
726 (&b":empty-key"[..], "empty key"),
729 (&b":empty-key"[..], "empty key"),
727 (&b"\0leading-null:"[..], "leading null"),
730 (&b"\0leading-null:"[..], "leading null"),
728 (&b"trailing-null:\0"[..], "trailing null"),
731 (&b"trailing-null:\0"[..], "trailing null"),
729 (&b"missing-value"[..], "missing value"),
732 (&b"missing-value"[..], "missing value"),
730 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
733 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
731 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
734 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
732 ];
735 ];
733
736
734 for (extra, msg) in test_cases {
737 for (extra, msg) in test_cases {
735 assert!(
738 assert!(
736 decode_extra(extra).is_err(),
739 decode_extra(extra).is_err(),
737 "corrupt extra should have failed to parse: {}",
740 "corrupt extra should have failed to parse: {}",
738 msg
741 msg
739 );
742 );
740 }
743 }
741 }
744 }
742
745
743 #[test]
746 #[test]
744 fn test_parse_timestamp_line() {
747 fn test_parse_timestamp_line() {
745 let extra = [
748 let extra = [
746 ("branch".into(), b"default".to_vec()),
749 ("branch".into(), b"default".to_vec()),
747 ("key-with-hyphens".into(), b"value1".to_vec()),
750 ("key-with-hyphens".into(), b"value1".to_vec()),
748 ("key_with_underscores".into(), b"value2".to_vec()),
751 ("key_with_underscores".into(), b"value2".to_vec()),
749 ("empty-value".into(), b"".to_vec()),
752 ("empty-value".into(), b"".to_vec()),
750 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
753 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
751 ]
754 ]
752 .into_iter()
755 .into_iter()
753 .collect::<BTreeMap<String, Vec<u8>>>();
756 .collect::<BTreeMap<String, Vec<u8>>>();
754
757
755 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
758 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
756 line.extend_from_slice(&encode_extra(&extra));
759 line.extend_from_slice(&encode_extra(&extra));
757
760
758 let timestamp = parse_timestamp(&line).unwrap();
761 let timestamp = parse_timestamp(&line).unwrap();
759 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
762 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
760
763
761 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
764 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
762 assert_eq!(extra, parsed_extra);
765 assert_eq!(extra, parsed_extra);
763 }
766 }
764 }
767 }
General Comments 0
You need to be logged in to leave comments. Login now