##// END OF EJS Templates
rust-changelog: switch away from deprecated APIs for datetime use...
Raphaël Gomès -
r53061:6b7ffa3f default
parent child Browse files
Show More
@@ -1,756 +1,753
1 use std::ascii::escape_default;
1 use std::ascii::escape_default;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::collections::BTreeMap;
3 use std::collections::BTreeMap;
4 use std::fmt::{Debug, Formatter};
4 use std::fmt::{Debug, Formatter};
5 use std::{iter, str};
5 use std::{iter, str};
6
6
7 use chrono::{DateTime, FixedOffset, Utc};
7 use chrono::{DateTime, FixedOffset, Utc};
8 use itertools::{Either, Itertools};
8 use itertools::{Either, Itertools};
9
9
10 use crate::errors::HgError;
10 use crate::errors::HgError;
11 use crate::revlog::Index;
11 use crate::revlog::Index;
12 use crate::revlog::Revision;
12 use crate::revlog::Revision;
13 use crate::revlog::{Node, NodePrefix};
13 use crate::revlog::{Node, NodePrefix};
14 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::vfs::VfsImpl;
16 use crate::vfs::VfsImpl;
17 use crate::{Graph, GraphError, UncheckedRevision};
17 use crate::{Graph, GraphError, UncheckedRevision};
18
18
19 use super::options::RevlogOpenOptions;
19 use super::options::RevlogOpenOptions;
20
20
21 /// A specialized `Revlog` to work with changelog data format.
21 /// A specialized `Revlog` to work with changelog data format.
22 pub struct Changelog {
22 pub struct Changelog {
23 /// The generic `revlog` format.
23 /// The generic `revlog` format.
24 pub(crate) revlog: Revlog,
24 pub(crate) revlog: Revlog,
25 }
25 }
26
26
27 impl Changelog {
27 impl Changelog {
28 /// Open the `changelog` of a repository given by its root.
28 /// Open the `changelog` of a repository given by its root.
29 pub fn open(
29 pub fn open(
30 store_vfs: &VfsImpl,
30 store_vfs: &VfsImpl,
31 options: RevlogOpenOptions,
31 options: RevlogOpenOptions,
32 ) -> Result<Self, HgError> {
32 ) -> Result<Self, HgError> {
33 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
33 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
34 Ok(Self { revlog })
34 Ok(Self { revlog })
35 }
35 }
36
36
37 /// Return the `ChangelogRevisionData` for the given node ID.
37 /// Return the `ChangelogRevisionData` for the given node ID.
38 pub fn data_for_node(
38 pub fn data_for_node(
39 &self,
39 &self,
40 node: NodePrefix,
40 node: NodePrefix,
41 ) -> Result<ChangelogRevisionData, RevlogError> {
41 ) -> Result<ChangelogRevisionData, RevlogError> {
42 let rev = self.revlog.rev_from_node(node)?;
42 let rev = self.revlog.rev_from_node(node)?;
43 self.entry_for_checked_rev(rev)?.data()
43 self.entry_for_checked_rev(rev)?.data()
44 }
44 }
45
45
46 /// Return the [`ChangelogEntry`] for the given revision number.
46 /// Return the [`ChangelogEntry`] for the given revision number.
47 pub fn entry_for_rev(
47 pub fn entry_for_rev(
48 &self,
48 &self,
49 rev: UncheckedRevision,
49 rev: UncheckedRevision,
50 ) -> Result<ChangelogEntry, RevlogError> {
50 ) -> Result<ChangelogEntry, RevlogError> {
51 let revlog_entry = self.revlog.get_entry(rev)?;
51 let revlog_entry = self.revlog.get_entry(rev)?;
52 Ok(ChangelogEntry { revlog_entry })
52 Ok(ChangelogEntry { revlog_entry })
53 }
53 }
54
54
55 /// Same as [`Self::entry_for_rev`] for checked revisions.
55 /// Same as [`Self::entry_for_rev`] for checked revisions.
56 fn entry_for_checked_rev(
56 fn entry_for_checked_rev(
57 &self,
57 &self,
58 rev: Revision,
58 rev: Revision,
59 ) -> Result<ChangelogEntry, RevlogError> {
59 ) -> Result<ChangelogEntry, RevlogError> {
60 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
60 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
61 Ok(ChangelogEntry { revlog_entry })
61 Ok(ChangelogEntry { revlog_entry })
62 }
62 }
63
63
64 /// Return the [`ChangelogRevisionData`] for the given revision number.
64 /// Return the [`ChangelogRevisionData`] for the given revision number.
65 ///
65 ///
66 /// This is a useful shortcut in case the caller does not need the
66 /// This is a useful shortcut in case the caller does not need the
67 /// generic revlog information (parents, hashes etc). Otherwise
67 /// generic revlog information (parents, hashes etc). Otherwise
68 /// consider taking a [`ChangelogEntry`] with
68 /// consider taking a [`ChangelogEntry`] with
69 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
69 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
70 pub fn data_for_rev(
70 pub fn data_for_rev(
71 &self,
71 &self,
72 rev: UncheckedRevision,
72 rev: UncheckedRevision,
73 ) -> Result<ChangelogRevisionData, RevlogError> {
73 ) -> Result<ChangelogRevisionData, RevlogError> {
74 self.entry_for_rev(rev)?.data()
74 self.entry_for_rev(rev)?.data()
75 }
75 }
76
76
77 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
77 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
78 self.revlog.node_from_rev(rev)
78 self.revlog.node_from_rev(rev)
79 }
79 }
80
80
81 pub fn rev_from_node(
81 pub fn rev_from_node(
82 &self,
82 &self,
83 node: NodePrefix,
83 node: NodePrefix,
84 ) -> Result<Revision, RevlogError> {
84 ) -> Result<Revision, RevlogError> {
85 self.revlog.rev_from_node(node)
85 self.revlog.rev_from_node(node)
86 }
86 }
87
87
88 pub fn get_index(&self) -> &Index {
88 pub fn get_index(&self) -> &Index {
89 self.revlog.index()
89 self.revlog.index()
90 }
90 }
91 }
91 }
92
92
93 impl Graph for Changelog {
93 impl Graph for Changelog {
94 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
94 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
95 self.revlog.parents(rev)
95 self.revlog.parents(rev)
96 }
96 }
97 }
97 }
98
98
99 /// A specialized `RevlogEntry` for `changelog` data format
99 /// A specialized `RevlogEntry` for `changelog` data format
100 ///
100 ///
101 /// This is a `RevlogEntry` with the added semantics that the associated
101 /// This is a `RevlogEntry` with the added semantics that the associated
102 /// data should meet the requirements for `changelog`, materialized by
102 /// data should meet the requirements for `changelog`, materialized by
103 /// the fact that `data()` constructs a `ChangelogRevisionData`.
103 /// the fact that `data()` constructs a `ChangelogRevisionData`.
104 /// In case that promise would be broken, the `data` method returns an error.
104 /// In case that promise would be broken, the `data` method returns an error.
105 #[derive(Clone)]
105 #[derive(Clone)]
106 pub struct ChangelogEntry<'changelog> {
106 pub struct ChangelogEntry<'changelog> {
107 /// Same data, as a generic `RevlogEntry`.
107 /// Same data, as a generic `RevlogEntry`.
108 pub(crate) revlog_entry: RevlogEntry<'changelog>,
108 pub(crate) revlog_entry: RevlogEntry<'changelog>,
109 }
109 }
110
110
111 impl<'changelog> ChangelogEntry<'changelog> {
111 impl<'changelog> ChangelogEntry<'changelog> {
112 pub fn data<'a>(
112 pub fn data<'a>(
113 &'a self,
113 &'a self,
114 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
114 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
115 let bytes = self.revlog_entry.data()?;
115 let bytes = self.revlog_entry.data()?;
116 if bytes.is_empty() {
116 if bytes.is_empty() {
117 Ok(ChangelogRevisionData::null())
117 Ok(ChangelogRevisionData::null())
118 } else {
118 } else {
119 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
119 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
120 RevlogError::Other(HgError::CorruptedRepository(format!(
120 RevlogError::Other(HgError::CorruptedRepository(format!(
121 "Invalid changelog data for revision {}: {:?}",
121 "Invalid changelog data for revision {}: {:?}",
122 self.revlog_entry.revision(),
122 self.revlog_entry.revision(),
123 err
123 err
124 )))
124 )))
125 })?)
125 })?)
126 }
126 }
127 }
127 }
128
128
129 /// Obtain a reference to the underlying `RevlogEntry`.
129 /// Obtain a reference to the underlying `RevlogEntry`.
130 ///
130 ///
131 /// This allows the caller to access the information that is common
131 /// This allows the caller to access the information that is common
132 /// to all revlog entries: revision number, node id, parent revisions etc.
132 /// to all revlog entries: revision number, node id, parent revisions etc.
133 pub fn as_revlog_entry(&self) -> &RevlogEntry {
133 pub fn as_revlog_entry(&self) -> &RevlogEntry {
134 &self.revlog_entry
134 &self.revlog_entry
135 }
135 }
136
136
137 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
137 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
138 Ok(self
138 Ok(self
139 .revlog_entry
139 .revlog_entry
140 .p1_entry()?
140 .p1_entry()?
141 .map(|revlog_entry| Self { revlog_entry }))
141 .map(|revlog_entry| Self { revlog_entry }))
142 }
142 }
143
143
144 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
144 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
145 Ok(self
145 Ok(self
146 .revlog_entry
146 .revlog_entry
147 .p2_entry()?
147 .p2_entry()?
148 .map(|revlog_entry| Self { revlog_entry }))
148 .map(|revlog_entry| Self { revlog_entry }))
149 }
149 }
150 }
150 }
151
151
152 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
152 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
153 #[derive(PartialEq)]
153 #[derive(PartialEq)]
154 pub struct ChangelogRevisionData<'changelog> {
154 pub struct ChangelogRevisionData<'changelog> {
155 /// The data bytes of the `changelog` entry.
155 /// The data bytes of the `changelog` entry.
156 bytes: Cow<'changelog, [u8]>,
156 bytes: Cow<'changelog, [u8]>,
157 /// The end offset for the hex manifest (not including the newline)
157 /// The end offset for the hex manifest (not including the newline)
158 manifest_end: usize,
158 manifest_end: usize,
159 /// The end offset for the user+email (not including the newline)
159 /// The end offset for the user+email (not including the newline)
160 user_end: usize,
160 user_end: usize,
161 /// The end offset for the timestamp+timezone+extras (not including the
161 /// The end offset for the timestamp+timezone+extras (not including the
162 /// newline)
162 /// newline)
163 timestamp_end: usize,
163 timestamp_end: usize,
164 /// The end offset for the file list (not including the newline)
164 /// The end offset for the file list (not including the newline)
165 files_end: usize,
165 files_end: usize,
166 }
166 }
167
167
168 impl<'changelog> ChangelogRevisionData<'changelog> {
168 impl<'changelog> ChangelogRevisionData<'changelog> {
169 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
169 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
170 let mut line_iter = bytes.split(|b| b == &b'\n');
170 let mut line_iter = bytes.split(|b| b == &b'\n');
171 let manifest_end = line_iter
171 let manifest_end = line_iter
172 .next()
172 .next()
173 .expect("Empty iterator from split()?")
173 .expect("Empty iterator from split()?")
174 .len();
174 .len();
175 let user_slice = line_iter.next().ok_or_else(|| {
175 let user_slice = line_iter.next().ok_or_else(|| {
176 HgError::corrupted("Changeset data truncated after manifest line")
176 HgError::corrupted("Changeset data truncated after manifest line")
177 })?;
177 })?;
178 let user_end = manifest_end + 1 + user_slice.len();
178 let user_end = manifest_end + 1 + user_slice.len();
179 let timestamp_slice = line_iter.next().ok_or_else(|| {
179 let timestamp_slice = line_iter.next().ok_or_else(|| {
180 HgError::corrupted("Changeset data truncated after user line")
180 HgError::corrupted("Changeset data truncated after user line")
181 })?;
181 })?;
182 let timestamp_end = user_end + 1 + timestamp_slice.len();
182 let timestamp_end = user_end + 1 + timestamp_slice.len();
183 let mut files_end = timestamp_end + 1;
183 let mut files_end = timestamp_end + 1;
184 loop {
184 loop {
185 let line = line_iter.next().ok_or_else(|| {
185 let line = line_iter.next().ok_or_else(|| {
186 HgError::corrupted("Changeset data truncated in files list")
186 HgError::corrupted("Changeset data truncated in files list")
187 })?;
187 })?;
188 if line.is_empty() {
188 if line.is_empty() {
189 if files_end == bytes.len() {
189 if files_end == bytes.len() {
190 // The list of files ended with a single newline (there
190 // The list of files ended with a single newline (there
191 // should be two)
191 // should be two)
192 return Err(HgError::corrupted(
192 return Err(HgError::corrupted(
193 "Changeset data truncated after files list",
193 "Changeset data truncated after files list",
194 ));
194 ));
195 }
195 }
196 files_end -= 1;
196 files_end -= 1;
197 break;
197 break;
198 }
198 }
199 files_end += line.len() + 1;
199 files_end += line.len() + 1;
200 }
200 }
201
201
202 Ok(Self {
202 Ok(Self {
203 bytes,
203 bytes,
204 manifest_end,
204 manifest_end,
205 user_end,
205 user_end,
206 timestamp_end,
206 timestamp_end,
207 files_end,
207 files_end,
208 })
208 })
209 }
209 }
210
210
211 fn null() -> Self {
211 fn null() -> Self {
212 Self::new(Cow::Borrowed(
212 Self::new(Cow::Borrowed(
213 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
213 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
214 ))
214 ))
215 .unwrap()
215 .unwrap()
216 }
216 }
217
217
218 /// Return an iterator over the lines of the entry.
218 /// Return an iterator over the lines of the entry.
219 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
219 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
220 self.bytes.split(|b| b == &b'\n')
220 self.bytes.split(|b| b == &b'\n')
221 }
221 }
222
222
223 /// Return the node id of the `manifest` referenced by this `changelog`
223 /// Return the node id of the `manifest` referenced by this `changelog`
224 /// entry.
224 /// entry.
225 pub fn manifest_node(&self) -> Result<Node, HgError> {
225 pub fn manifest_node(&self) -> Result<Node, HgError> {
226 let manifest_node_hex = &self.bytes[..self.manifest_end];
226 let manifest_node_hex = &self.bytes[..self.manifest_end];
227 Node::from_hex_for_repo(manifest_node_hex)
227 Node::from_hex_for_repo(manifest_node_hex)
228 }
228 }
229
229
230 /// The full user string (usually a name followed by an email enclosed in
230 /// The full user string (usually a name followed by an email enclosed in
231 /// angle brackets)
231 /// angle brackets)
232 pub fn user(&self) -> &[u8] {
232 pub fn user(&self) -> &[u8] {
233 &self.bytes[self.manifest_end + 1..self.user_end]
233 &self.bytes[self.manifest_end + 1..self.user_end]
234 }
234 }
235
235
236 /// The full timestamp line (timestamp in seconds, offset in seconds, and
236 /// The full timestamp line (timestamp in seconds, offset in seconds, and
237 /// possibly extras)
237 /// possibly extras)
238 // TODO: We should expose this in a more useful way
238 // TODO: We should expose this in a more useful way
239 pub fn timestamp_line(&self) -> &[u8] {
239 pub fn timestamp_line(&self) -> &[u8] {
240 &self.bytes[self.user_end + 1..self.timestamp_end]
240 &self.bytes[self.user_end + 1..self.timestamp_end]
241 }
241 }
242
242
243 /// Parsed timestamp.
243 /// Parsed timestamp.
244 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
244 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
245 parse_timestamp(self.timestamp_line())
245 parse_timestamp(self.timestamp_line())
246 }
246 }
247
247
248 /// Optional commit extras.
248 /// Optional commit extras.
249 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
249 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
250 parse_timestamp_line_extra(self.timestamp_line())
250 parse_timestamp_line_extra(self.timestamp_line())
251 }
251 }
252
252
253 /// The files changed in this revision.
253 /// The files changed in this revision.
254 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
254 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
255 if self.timestamp_end == self.files_end {
255 if self.timestamp_end == self.files_end {
256 Either::Left(iter::empty())
256 Either::Left(iter::empty())
257 } else {
257 } else {
258 Either::Right(
258 Either::Right(
259 self.bytes[self.timestamp_end + 1..self.files_end]
259 self.bytes[self.timestamp_end + 1..self.files_end]
260 .split(|b| b == &b'\n')
260 .split(|b| b == &b'\n')
261 .map(HgPath::new),
261 .map(HgPath::new),
262 )
262 )
263 }
263 }
264 }
264 }
265
265
266 /// The change description.
266 /// The change description.
267 pub fn description(&self) -> &[u8] {
267 pub fn description(&self) -> &[u8] {
268 &self.bytes[self.files_end + 2..]
268 &self.bytes[self.files_end + 2..]
269 }
269 }
270 }
270 }
271
271
272 impl Debug for ChangelogRevisionData<'_> {
272 impl Debug for ChangelogRevisionData<'_> {
273 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
273 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
274 f.debug_struct("ChangelogRevisionData")
274 f.debug_struct("ChangelogRevisionData")
275 .field("bytes", &debug_bytes(&self.bytes))
275 .field("bytes", &debug_bytes(&self.bytes))
276 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
276 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
277 .field(
277 .field(
278 "user",
278 "user",
279 &debug_bytes(
279 &debug_bytes(
280 &self.bytes[self.manifest_end + 1..self.user_end],
280 &self.bytes[self.manifest_end + 1..self.user_end],
281 ),
281 ),
282 )
282 )
283 .field(
283 .field(
284 "timestamp",
284 "timestamp",
285 &debug_bytes(
285 &debug_bytes(
286 &self.bytes[self.user_end + 1..self.timestamp_end],
286 &self.bytes[self.user_end + 1..self.timestamp_end],
287 ),
287 ),
288 )
288 )
289 .field(
289 .field(
290 "files",
290 "files",
291 &debug_bytes(
291 &debug_bytes(
292 &self.bytes[self.timestamp_end + 1..self.files_end],
292 &self.bytes[self.timestamp_end + 1..self.files_end],
293 ),
293 ),
294 )
294 )
295 .field(
295 .field(
296 "description",
296 "description",
297 &debug_bytes(&self.bytes[self.files_end + 2..]),
297 &debug_bytes(&self.bytes[self.files_end + 2..]),
298 )
298 )
299 .finish()
299 .finish()
300 }
300 }
301 }
301 }
302
302
303 fn debug_bytes(bytes: &[u8]) -> String {
303 fn debug_bytes(bytes: &[u8]) -> String {
304 String::from_utf8_lossy(
304 String::from_utf8_lossy(
305 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
305 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
306 )
306 )
307 .to_string()
307 .to_string()
308 }
308 }
309
309
310 /// Parse the raw bytes of the timestamp line from a changelog entry.
310 /// Parse the raw bytes of the timestamp line from a changelog entry.
311 ///
311 ///
312 /// According to the documentation in `hg help dates` and the
312 /// According to the documentation in `hg help dates` and the
313 /// implementation in `changelog.py`, the format of the timestamp line
313 /// implementation in `changelog.py`, the format of the timestamp line
314 /// is `time tz extra\n` where:
314 /// is `time tz extra\n` where:
315 ///
315 ///
316 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
316 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
317 /// as seconds since the UNIX epoch.
317 /// as seconds since the UNIX epoch.
318 ///
318 ///
319 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
319 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
320 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
320 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
321 /// opposite of the sign in ISO 8601 timestamps).
321 /// opposite of the sign in ISO 8601 timestamps).
322 ///
322 ///
323 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
323 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
324 /// and value in each pair separated by an ASCII colon. Keys are limited to
324 /// and value in each pair separated by an ASCII colon. Keys are limited to
325 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
325 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
326 /// arbitrary bytes.
326 /// arbitrary bytes.
327 fn parse_timestamp(
327 fn parse_timestamp(
328 timestamp_line: &[u8],
328 timestamp_line: &[u8],
329 ) -> Result<DateTime<FixedOffset>, HgError> {
329 ) -> Result<DateTime<FixedOffset>, HgError> {
330 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
330 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
331
331
332 let timestamp_bytes = parts
332 let timestamp_bytes = parts
333 .next()
333 .next()
334 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
334 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
335 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
335 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
336 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
336 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
337 })?;
337 })?;
338 let timestamp_utc = timestamp_str
338 let timestamp_utc = timestamp_str
339 .parse()
339 .parse()
340 .map_err(|e| {
340 .map_err(|e| {
341 HgError::corrupted(format!("failed to parse timestamp: {e}"))
341 HgError::corrupted(format!("failed to parse timestamp: {e}"))
342 })
342 })
343 .and_then(|secs| {
343 .and_then(|secs| {
344 DateTime::from_timestamp(secs, 0).ok_or_else(|| {
344 DateTime::from_timestamp(secs, 0).ok_or_else(|| {
345 HgError::corrupted(format!(
345 HgError::corrupted(format!(
346 "integer timestamp out of valid range: {secs}"
346 "integer timestamp out of valid range: {secs}"
347 ))
347 ))
348 })
348 })
349 })
349 })
350 // Attempt to parse the timestamp as a float if we can't parse
350 // Attempt to parse the timestamp as a float if we can't parse
351 // it as an int. It doesn't seem like float timestamps are actually
351 // it as an int. It doesn't seem like float timestamps are actually
352 // used in practice, but the Python code supports them.
352 // used in practice, but the Python code supports them.
353 .or_else(|_| parse_float_timestamp(timestamp_str))?;
353 .or_else(|_| parse_float_timestamp(timestamp_str))?;
354
354
355 let timezone_bytes = parts
355 let timezone_bytes = parts
356 .next()
356 .next()
357 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
357 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
358 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
358 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
359 .map_err(|e| {
359 .map_err(|e| {
360 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
360 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
361 })?
361 })?
362 .parse()
362 .parse()
363 .map_err(|e| {
363 .map_err(|e| {
364 HgError::corrupted(format!("timezone is not an integer: {e}"))
364 HgError::corrupted(format!("timezone is not an integer: {e}"))
365 })?;
365 })?;
366 let timezone = FixedOffset::west_opt(timezone_secs)
366 let timezone = FixedOffset::west_opt(timezone_secs)
367 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
367 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
368
368
369 Ok(DateTime::from_naive_utc_and_offset(
369 Ok(timestamp_utc.with_timezone(&timezone))
370 timestamp_utc.naive_utc(),
371 timezone,
372 ))
373 }
370 }
374
371
375 /// Attempt to parse the given string as floating-point timestamp, and
372 /// Attempt to parse the given string as floating-point timestamp, and
376 /// convert the result into a `chrono::NaiveDateTime`.
373 /// convert the result into a `chrono::NaiveDateTime`.
377 fn parse_float_timestamp(
374 fn parse_float_timestamp(
378 timestamp_str: &str,
375 timestamp_str: &str,
379 ) -> Result<DateTime<Utc>, HgError> {
376 ) -> Result<DateTime<Utc>, HgError> {
380 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
377 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
381 HgError::corrupted(format!("failed to parse timestamp: {e}"))
378 HgError::corrupted(format!("failed to parse timestamp: {e}"))
382 })?;
379 })?;
383
380
384 // To construct a `NaiveDateTime` we'll need to convert the float
381 // To construct a `NaiveDateTime` we'll need to convert the float
385 // into signed integer seconds and unsigned integer nanoseconds.
382 // into signed integer seconds and unsigned integer nanoseconds.
386 let mut secs = timestamp.trunc() as i64;
383 let mut secs = timestamp.trunc() as i64;
387 let mut subsecs = timestamp.fract();
384 let mut subsecs = timestamp.fract();
388
385
389 // If the timestamp is negative, we need to express the fractional
386 // If the timestamp is negative, we need to express the fractional
390 // component as positive nanoseconds since the previous second.
387 // component as positive nanoseconds since the previous second.
391 if timestamp < 0.0 {
388 if timestamp < 0.0 {
392 secs -= 1;
389 secs -= 1;
393 subsecs += 1.0;
390 subsecs += 1.0;
394 }
391 }
395
392
396 // This cast should be safe because the fractional component is
393 // This cast should be safe because the fractional component is
397 // by definition less than 1.0, so this value should not exceed
394 // by definition less than 1.0, so this value should not exceed
398 // 1 billion, which is representable as an f64 without loss of
395 // 1 billion, which is representable as an f64 without loss of
399 // precision and should fit into a u32 without overflowing.
396 // precision and should fit into a u32 without overflowing.
400 //
397 //
401 // (Any loss of precision in the fractional component will have
398 // (Any loss of precision in the fractional component will have
402 // already happened at the time of initial parsing; in general,
399 // already happened at the time of initial parsing; in general,
403 // f64s are insufficiently precise to provide nanosecond-level
400 // f64s are insufficiently precise to provide nanosecond-level
404 // precision with present-day timestamps.)
401 // precision with present-day timestamps.)
405 let nsecs = (subsecs * 1_000_000_000.0) as u32;
402 let nsecs = (subsecs * 1_000_000_000.0) as u32;
406
403
407 DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {
404 DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {
408 HgError::corrupted(format!(
405 HgError::corrupted(format!(
409 "float timestamp out of valid range: {timestamp}"
406 "float timestamp out of valid range: {timestamp}"
410 ))
407 ))
411 })
408 })
412 }
409 }
413
410
414 /// Decode changeset extra fields.
411 /// Decode changeset extra fields.
415 ///
412 ///
416 /// Extras are null-delimited key-value pairs where the key consists of ASCII
413 /// Extras are null-delimited key-value pairs where the key consists of ASCII
417 /// alphanumeric characters plus hyphens and underscores, and the value can
414 /// alphanumeric characters plus hyphens and underscores, and the value can
418 /// contain arbitrary bytes.
415 /// contain arbitrary bytes.
419 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
416 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
420 extra
417 extra
421 .split(|c| *c == b'\0')
418 .split(|c| *c == b'\0')
422 .map(|pair| {
419 .map(|pair| {
423 let pair = unescape_extra(pair);
420 let pair = unescape_extra(pair);
424 let mut iter = pair.splitn(2, |c| *c == b':');
421 let mut iter = pair.splitn(2, |c| *c == b':');
425
422
426 let key_bytes =
423 let key_bytes =
427 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
424 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
428 HgError::corrupted("empty key in changeset extras")
425 HgError::corrupted("empty key in changeset extras")
429 })?;
426 })?;
430
427
431 let key = str::from_utf8(key_bytes)
428 let key = str::from_utf8(key_bytes)
432 .ok()
429 .ok()
433 .filter(|k| {
430 .filter(|k| {
434 k.chars().all(|c| {
431 k.chars().all(|c| {
435 c.is_ascii_alphanumeric() || c == '_' || c == '-'
432 c.is_ascii_alphanumeric() || c == '_' || c == '-'
436 })
433 })
437 })
434 })
438 .ok_or_else(|| {
435 .ok_or_else(|| {
439 let key = String::from_utf8_lossy(key_bytes);
436 let key = String::from_utf8_lossy(key_bytes);
440 HgError::corrupted(format!(
437 HgError::corrupted(format!(
441 "invalid key in changeset extras: {key}",
438 "invalid key in changeset extras: {key}",
442 ))
439 ))
443 })?
440 })?
444 .to_string();
441 .to_string();
445
442
446 let value = iter.next().map(Into::into).ok_or_else(|| {
443 let value = iter.next().map(Into::into).ok_or_else(|| {
447 HgError::corrupted(format!(
444 HgError::corrupted(format!(
448 "missing value for changeset extra: {key}"
445 "missing value for changeset extra: {key}"
449 ))
446 ))
450 })?;
447 })?;
451
448
452 Ok((key, value))
449 Ok((key, value))
453 })
450 })
454 .collect()
451 .collect()
455 }
452 }
456
453
457 /// Parse the extra fields from a changeset's timestamp line.
454 /// Parse the extra fields from a changeset's timestamp line.
458 fn parse_timestamp_line_extra(
455 fn parse_timestamp_line_extra(
459 timestamp_line: &[u8],
456 timestamp_line: &[u8],
460 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
457 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
461 Ok(timestamp_line
458 Ok(timestamp_line
462 .splitn(3, |c| *c == b' ')
459 .splitn(3, |c| *c == b' ')
463 .nth(2)
460 .nth(2)
464 .map(decode_extra)
461 .map(decode_extra)
465 .transpose()?
462 .transpose()?
466 .unwrap_or_default())
463 .unwrap_or_default())
467 }
464 }
468
465
469 /// Decode Mercurial's escaping for changelog extras.
466 /// Decode Mercurial's escaping for changelog extras.
470 ///
467 ///
471 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
468 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
472 /// (null, backslash, newline, and carriage return) so we only decode those.
469 /// (null, backslash, newline, and carriage return) so we only decode those.
473 ///
470 ///
474 /// The Python code also includes a workaround for decoding escaped nuls
471 /// The Python code also includes a workaround for decoding escaped nuls
475 /// that are followed by an ASCII octal digit, since Python's built-in
472 /// that are followed by an ASCII octal digit, since Python's built-in
476 /// `string_escape` codec will interpret that as an escaped octal byte value.
473 /// `string_escape` codec will interpret that as an escaped octal byte value.
477 /// That workaround is omitted here since we don't support decoding octal.
474 /// That workaround is omitted here since we don't support decoding octal.
478 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
475 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
479 let mut output = Vec::with_capacity(bytes.len());
476 let mut output = Vec::with_capacity(bytes.len());
480 let mut input = bytes.iter().copied();
477 let mut input = bytes.iter().copied();
481
478
482 while let Some(c) = input.next() {
479 while let Some(c) = input.next() {
483 if c != b'\\' {
480 if c != b'\\' {
484 output.push(c);
481 output.push(c);
485 continue;
482 continue;
486 }
483 }
487
484
488 match input.next() {
485 match input.next() {
489 Some(b'0') => output.push(b'\0'),
486 Some(b'0') => output.push(b'\0'),
490 Some(b'\\') => output.push(b'\\'),
487 Some(b'\\') => output.push(b'\\'),
491 Some(b'n') => output.push(b'\n'),
488 Some(b'n') => output.push(b'\n'),
492 Some(b'r') => output.push(b'\r'),
489 Some(b'r') => output.push(b'\r'),
493 // The following cases should never occur in theory because any
490 // The following cases should never occur in theory because any
494 // backslashes in the original input should have been escaped
491 // backslashes in the original input should have been escaped
495 // with another backslash, so it should not be possible to
492 // with another backslash, so it should not be possible to
496 // observe an escape sequence other than the 4 above.
493 // observe an escape sequence other than the 4 above.
497 Some(c) => output.extend_from_slice(&[b'\\', c]),
494 Some(c) => output.extend_from_slice(&[b'\\', c]),
498 None => output.push(b'\\'),
495 None => output.push(b'\\'),
499 }
496 }
500 }
497 }
501
498
502 output
499 output
503 }
500 }
504
501
505 #[cfg(test)]
502 #[cfg(test)]
506 mod tests {
503 mod tests {
507 use super::*;
504 use super::*;
508 use crate::vfs::VfsImpl;
505 use crate::vfs::VfsImpl;
509 use crate::NULL_REVISION;
506 use crate::NULL_REVISION;
510 use pretty_assertions::assert_eq;
507 use pretty_assertions::assert_eq;
511
508
512 #[test]
509 #[test]
513 fn test_create_changelogrevisiondata_invalid() {
510 fn test_create_changelogrevisiondata_invalid() {
514 // Completely empty
511 // Completely empty
515 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
512 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
516 // No newline after manifest
513 // No newline after manifest
517 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
514 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
518 // No newline after user
515 // No newline after user
519 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
516 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
520 // No newline after timestamp
517 // No newline after timestamp
521 assert!(
518 assert!(
522 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
519 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
523 );
520 );
524 // Missing newline after files
521 // Missing newline after files
525 assert!(ChangelogRevisionData::new(Cow::Borrowed(
522 assert!(ChangelogRevisionData::new(Cow::Borrowed(
526 b"abcd\n\n0 0\nfile1\nfile2"
523 b"abcd\n\n0 0\nfile1\nfile2"
527 ))
524 ))
528 .is_err(),);
525 .is_err(),);
529 // Only one newline after files
526 // Only one newline after files
530 assert!(ChangelogRevisionData::new(Cow::Borrowed(
527 assert!(ChangelogRevisionData::new(Cow::Borrowed(
531 b"abcd\n\n0 0\nfile1\nfile2\n"
528 b"abcd\n\n0 0\nfile1\nfile2\n"
532 ))
529 ))
533 .is_err(),);
530 .is_err(),);
534 }
531 }
535
532
536 #[test]
533 #[test]
537 fn test_create_changelogrevisiondata() {
534 fn test_create_changelogrevisiondata() {
538 let data = ChangelogRevisionData::new(Cow::Borrowed(
535 let data = ChangelogRevisionData::new(Cow::Borrowed(
539 b"0123456789abcdef0123456789abcdef01234567
536 b"0123456789abcdef0123456789abcdef01234567
540 Some One <someone@example.com>
537 Some One <someone@example.com>
541 0 0
538 0 0
542 file1
539 file1
543 file2
540 file2
544
541
545 some
542 some
546 commit
543 commit
547 message",
544 message",
548 ))
545 ))
549 .unwrap();
546 .unwrap();
550 assert_eq!(
547 assert_eq!(
551 data.manifest_node().unwrap(),
548 data.manifest_node().unwrap(),
552 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
549 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
553 .unwrap()
550 .unwrap()
554 );
551 );
555 assert_eq!(data.user(), b"Some One <someone@example.com>");
552 assert_eq!(data.user(), b"Some One <someone@example.com>");
556 assert_eq!(data.timestamp_line(), b"0 0");
553 assert_eq!(data.timestamp_line(), b"0 0");
557 assert_eq!(
554 assert_eq!(
558 data.files().collect_vec(),
555 data.files().collect_vec(),
559 vec![HgPath::new("file1"), HgPath::new("file2")]
556 vec![HgPath::new("file1"), HgPath::new("file2")]
560 );
557 );
561 assert_eq!(data.description(), b"some\ncommit\nmessage");
558 assert_eq!(data.description(), b"some\ncommit\nmessage");
562 }
559 }
563
560
564 #[test]
561 #[test]
565 fn test_data_from_rev_null() -> Result<(), RevlogError> {
562 fn test_data_from_rev_null() -> Result<(), RevlogError> {
566 // an empty revlog will be enough for this case
563 // an empty revlog will be enough for this case
567 let temp = tempfile::tempdir().unwrap();
564 let temp = tempfile::tempdir().unwrap();
568 let vfs = VfsImpl {
565 let vfs = VfsImpl {
569 base: temp.path().to_owned(),
566 base: temp.path().to_owned(),
570 };
567 };
571 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
568 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
572 let revlog =
569 let revlog =
573 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
570 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
574 .unwrap();
571 .unwrap();
575
572
576 let changelog = Changelog { revlog };
573 let changelog = Changelog { revlog };
577 assert_eq!(
574 assert_eq!(
578 changelog.data_for_rev(NULL_REVISION.into())?,
575 changelog.data_for_rev(NULL_REVISION.into())?,
579 ChangelogRevisionData::null()
576 ChangelogRevisionData::null()
580 );
577 );
581 // same with the intermediate entry object
578 // same with the intermediate entry object
582 assert_eq!(
579 assert_eq!(
583 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
580 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
584 ChangelogRevisionData::null()
581 ChangelogRevisionData::null()
585 );
582 );
586 Ok(())
583 Ok(())
587 }
584 }
588
585
589 #[test]
586 #[test]
590 fn test_empty_files_list() {
587 fn test_empty_files_list() {
591 assert!(ChangelogRevisionData::null()
588 assert!(ChangelogRevisionData::null()
592 .files()
589 .files()
593 .collect_vec()
590 .collect_vec()
594 .is_empty());
591 .is_empty());
595 }
592 }
596
593
597 #[test]
594 #[test]
598 fn test_unescape_basic() {
595 fn test_unescape_basic() {
599 // '\0', '\\', '\n', and '\r' are correctly unescaped.
596 // '\0', '\\', '\n', and '\r' are correctly unescaped.
600 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
597 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
601 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
598 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
602 let unescaped = unescape_extra(escaped);
599 let unescaped = unescape_extra(escaped);
603 assert_eq!(&expected[..], &unescaped[..]);
600 assert_eq!(&expected[..], &unescaped[..]);
604 }
601 }
605
602
606 #[test]
603 #[test]
607 fn test_unescape_unsupported_sequence() {
604 fn test_unescape_unsupported_sequence() {
608 // Other escape sequences are left unaltered.
605 // Other escape sequences are left unaltered.
609 for c in 0u8..255 {
606 for c in 0u8..255 {
610 match c {
607 match c {
611 b'0' | b'\\' | b'n' | b'r' => continue,
608 b'0' | b'\\' | b'n' | b'r' => continue,
612 c => {
609 c => {
613 let expected = &[b'\\', c][..];
610 let expected = &[b'\\', c][..];
614 let unescaped = unescape_extra(expected);
611 let unescaped = unescape_extra(expected);
615 assert_eq!(expected, &unescaped[..]);
612 assert_eq!(expected, &unescaped[..]);
616 }
613 }
617 }
614 }
618 }
615 }
619 }
616 }
620
617
621 #[test]
618 #[test]
622 fn test_unescape_trailing_backslash() {
619 fn test_unescape_trailing_backslash() {
623 // Trailing backslashes are OK.
620 // Trailing backslashes are OK.
624 let expected = br"hi\";
621 let expected = br"hi\";
625 let unescaped = unescape_extra(expected);
622 let unescaped = unescape_extra(expected);
626 assert_eq!(&expected[..], &unescaped[..]);
623 assert_eq!(&expected[..], &unescaped[..]);
627 }
624 }
628
625
629 #[test]
626 #[test]
630 fn test_unescape_nul_followed_by_octal() {
627 fn test_unescape_nul_followed_by_octal() {
631 // Escaped NUL chars followed by octal digits are decoded correctly.
628 // Escaped NUL chars followed by octal digits are decoded correctly.
632 let expected = b"\x0012";
629 let expected = b"\x0012";
633 let escaped = br"\012";
630 let escaped = br"\012";
634 let unescaped = unescape_extra(escaped);
631 let unescaped = unescape_extra(escaped);
635 assert_eq!(&expected[..], &unescaped[..]);
632 assert_eq!(&expected[..], &unescaped[..]);
636 }
633 }
637
634
638 #[test]
635 #[test]
639 fn test_parse_float_timestamp() {
636 fn test_parse_float_timestamp() {
640 let test_cases = [
637 let test_cases = [
641 // Zero should map to the UNIX epoch.
638 // Zero should map to the UNIX epoch.
642 ("0.0", "1970-01-01 00:00:00 UTC"),
639 ("0.0", "1970-01-01 00:00:00 UTC"),
643 // Negative zero should be the same as positive zero.
640 // Negative zero should be the same as positive zero.
644 ("-0.0", "1970-01-01 00:00:00 UTC"),
641 ("-0.0", "1970-01-01 00:00:00 UTC"),
645 // Values without fractional components should work like integers.
642 // Values without fractional components should work like integers.
646 // (Assuming the timestamp is within the limits of f64 precision.)
643 // (Assuming the timestamp is within the limits of f64 precision.)
647 ("1115154970.0", "2005-05-03 21:16:10 UTC"),
644 ("1115154970.0", "2005-05-03 21:16:10 UTC"),
648 // We expect some loss of precision in the fractional component
645 // We expect some loss of precision in the fractional component
649 // when parsing arbitrary floating-point values.
646 // when parsing arbitrary floating-point values.
650 ("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),
647 ("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),
651 // But representable f64 values should parse losslessly.
648 // But representable f64 values should parse losslessly.
652 ("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),
649 ("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),
653 // Negative fractional components are subtracted from the epoch.
650 // Negative fractional components are subtracted from the epoch.
654 ("-1.333", "1969-12-31 23:59:58.667 UTC"),
651 ("-1.333", "1969-12-31 23:59:58.667 UTC"),
655 ];
652 ];
656
653
657 for (input, expected) in test_cases {
654 for (input, expected) in test_cases {
658 let res = parse_float_timestamp(input).unwrap().to_string();
655 let res = parse_float_timestamp(input).unwrap().to_string();
659 assert_eq!(res, expected);
656 assert_eq!(res, expected);
660 }
657 }
661 }
658 }
662
659
663 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
660 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
664 let mut output = Vec::with_capacity(bytes.len());
661 let mut output = Vec::with_capacity(bytes.len());
665
662
666 for c in bytes.iter().copied() {
663 for c in bytes.iter().copied() {
667 output.extend_from_slice(match c {
664 output.extend_from_slice(match c {
668 b'\0' => &b"\\0"[..],
665 b'\0' => &b"\\0"[..],
669 b'\\' => &b"\\\\"[..],
666 b'\\' => &b"\\\\"[..],
670 b'\n' => &b"\\n"[..],
667 b'\n' => &b"\\n"[..],
671 b'\r' => &b"\\r"[..],
668 b'\r' => &b"\\r"[..],
672 _ => {
669 _ => {
673 output.push(c);
670 output.push(c);
674 continue;
671 continue;
675 }
672 }
676 });
673 });
677 }
674 }
678
675
679 output
676 output
680 }
677 }
681
678
682 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
679 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
683 where
680 where
684 K: AsRef<[u8]>,
681 K: AsRef<[u8]>,
685 V: AsRef<[u8]>,
682 V: AsRef<[u8]>,
686 {
683 {
687 let extras = pairs.into_iter().map(|(k, v)| {
684 let extras = pairs.into_iter().map(|(k, v)| {
688 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
685 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
689 });
686 });
690 // Use fully-qualified syntax to avoid a future naming conflict with
687 // Use fully-qualified syntax to avoid a future naming conflict with
691 // the standard library: https://github.com/rust-lang/rust/issues/79524
688 // the standard library: https://github.com/rust-lang/rust/issues/79524
692 Itertools::intersperse(extras, b"\0".to_vec()).concat()
689 Itertools::intersperse(extras, b"\0".to_vec()).concat()
693 }
690 }
694
691
695 #[test]
692 #[test]
696 fn test_decode_extra() {
693 fn test_decode_extra() {
697 let extra = [
694 let extra = [
698 ("branch".into(), b"default".to_vec()),
695 ("branch".into(), b"default".to_vec()),
699 ("key-with-hyphens".into(), b"value1".to_vec()),
696 ("key-with-hyphens".into(), b"value1".to_vec()),
700 ("key_with_underscores".into(), b"value2".to_vec()),
697 ("key_with_underscores".into(), b"value2".to_vec()),
701 ("empty-value".into(), b"".to_vec()),
698 ("empty-value".into(), b"".to_vec()),
702 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
699 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
703 ]
700 ]
704 .into_iter()
701 .into_iter()
705 .collect::<BTreeMap<String, Vec<u8>>>();
702 .collect::<BTreeMap<String, Vec<u8>>>();
706
703
707 let encoded = encode_extra(&extra);
704 let encoded = encode_extra(&extra);
708 let decoded = decode_extra(&encoded).unwrap();
705 let decoded = decode_extra(&encoded).unwrap();
709
706
710 assert_eq!(extra, decoded);
707 assert_eq!(extra, decoded);
711 }
708 }
712
709
713 #[test]
710 #[test]
714 fn test_corrupt_extra() {
711 fn test_corrupt_extra() {
715 let test_cases = [
712 let test_cases = [
716 (&b""[..], "empty input"),
713 (&b""[..], "empty input"),
717 (&b"\0"[..], "unexpected null byte"),
714 (&b"\0"[..], "unexpected null byte"),
718 (&b":empty-key"[..], "empty key"),
715 (&b":empty-key"[..], "empty key"),
719 (&b"\0leading-null:"[..], "leading null"),
716 (&b"\0leading-null:"[..], "leading null"),
720 (&b"trailing-null:\0"[..], "trailing null"),
717 (&b"trailing-null:\0"[..], "trailing null"),
721 (&b"missing-value"[..], "missing value"),
718 (&b"missing-value"[..], "missing value"),
722 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
719 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
723 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
720 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
724 ];
721 ];
725
722
726 for (extra, msg) in test_cases {
723 for (extra, msg) in test_cases {
727 assert!(
724 assert!(
728 decode_extra(extra).is_err(),
725 decode_extra(extra).is_err(),
729 "corrupt extra should have failed to parse: {}",
726 "corrupt extra should have failed to parse: {}",
730 msg
727 msg
731 );
728 );
732 }
729 }
733 }
730 }
734
731
735 #[test]
732 #[test]
736 fn test_parse_timestamp_line() {
733 fn test_parse_timestamp_line() {
737 let extra = [
734 let extra = [
738 ("branch".into(), b"default".to_vec()),
735 ("branch".into(), b"default".to_vec()),
739 ("key-with-hyphens".into(), b"value1".to_vec()),
736 ("key-with-hyphens".into(), b"value1".to_vec()),
740 ("key_with_underscores".into(), b"value2".to_vec()),
737 ("key_with_underscores".into(), b"value2".to_vec()),
741 ("empty-value".into(), b"".to_vec()),
738 ("empty-value".into(), b"".to_vec()),
742 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
739 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
743 ]
740 ]
744 .into_iter()
741 .into_iter()
745 .collect::<BTreeMap<String, Vec<u8>>>();
742 .collect::<BTreeMap<String, Vec<u8>>>();
746
743
747 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
744 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
748 line.extend_from_slice(&encode_extra(&extra));
745 line.extend_from_slice(&encode_extra(&extra));
749
746
750 let timestamp = parse_timestamp(&line).unwrap();
747 let timestamp = parse_timestamp(&line).unwrap();
751 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
748 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
752
749
753 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
750 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
754 assert_eq!(extra, parsed_extra);
751 assert_eq!(extra, parsed_extra);
755 }
752 }
756 }
753 }
General Comments 0
You need to be logged in to leave comments. Login now