##// END OF EJS Templates
hg-core: separate timestamp and extra methods
Arun Kulshreshtha -
r52286:6603a144 default
parent child Browse files
Show More
@@ -1,746 +1,744 b''
1 use std::ascii::escape_default;
1 use std::ascii::escape_default;
2 use std::borrow::Cow;
2 use std::borrow::Cow;
3 use std::collections::BTreeMap;
3 use std::collections::BTreeMap;
4 use std::fmt::{Debug, Formatter};
4 use std::fmt::{Debug, Formatter};
5 use std::{iter, str};
5 use std::{iter, str};
6
6
7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
8 use itertools::{Either, Itertools};
8 use itertools::{Either, Itertools};
9
9
10 use crate::errors::HgError;
10 use crate::errors::HgError;
11 use crate::revlog::Revision;
11 use crate::revlog::Revision;
12 use crate::revlog::{Node, NodePrefix};
12 use crate::revlog::{Node, NodePrefix};
13 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
13 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 use crate::utils::hg_path::HgPath;
14 use crate::utils::hg_path::HgPath;
15 use crate::vfs::Vfs;
15 use crate::vfs::Vfs;
16 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
16 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
17
17
18 /// A specialized `Revlog` to work with changelog data format.
18 /// A specialized `Revlog` to work with changelog data format.
19 pub struct Changelog {
19 pub struct Changelog {
20 /// The generic `revlog` format.
20 /// The generic `revlog` format.
21 pub(crate) revlog: Revlog,
21 pub(crate) revlog: Revlog,
22 }
22 }
23
23
24 impl Changelog {
24 impl Changelog {
25 /// Open the `changelog` of a repository given by its root.
25 /// Open the `changelog` of a repository given by its root.
26 pub fn open(
26 pub fn open(
27 store_vfs: &Vfs,
27 store_vfs: &Vfs,
28 options: RevlogOpenOptions,
28 options: RevlogOpenOptions,
29 ) -> Result<Self, HgError> {
29 ) -> Result<Self, HgError> {
30 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
30 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
31 Ok(Self { revlog })
31 Ok(Self { revlog })
32 }
32 }
33
33
34 /// Return the `ChangelogRevisionData` for the given node ID.
34 /// Return the `ChangelogRevisionData` for the given node ID.
35 pub fn data_for_node(
35 pub fn data_for_node(
36 &self,
36 &self,
37 node: NodePrefix,
37 node: NodePrefix,
38 ) -> Result<ChangelogRevisionData, RevlogError> {
38 ) -> Result<ChangelogRevisionData, RevlogError> {
39 let rev = self.revlog.rev_from_node(node)?;
39 let rev = self.revlog.rev_from_node(node)?;
40 self.entry_for_checked_rev(rev)?.data()
40 self.entry_for_checked_rev(rev)?.data()
41 }
41 }
42
42
43 /// Return the [`ChangelogEntry`] for the given revision number.
43 /// Return the [`ChangelogEntry`] for the given revision number.
44 pub fn entry_for_rev(
44 pub fn entry_for_rev(
45 &self,
45 &self,
46 rev: UncheckedRevision,
46 rev: UncheckedRevision,
47 ) -> Result<ChangelogEntry, RevlogError> {
47 ) -> Result<ChangelogEntry, RevlogError> {
48 let revlog_entry = self.revlog.get_entry(rev)?;
48 let revlog_entry = self.revlog.get_entry(rev)?;
49 Ok(ChangelogEntry { revlog_entry })
49 Ok(ChangelogEntry { revlog_entry })
50 }
50 }
51
51
52 /// Same as [`Self::entry_for_rev`] for checked revisions.
52 /// Same as [`Self::entry_for_rev`] for checked revisions.
53 fn entry_for_checked_rev(
53 fn entry_for_checked_rev(
54 &self,
54 &self,
55 rev: Revision,
55 rev: Revision,
56 ) -> Result<ChangelogEntry, RevlogError> {
56 ) -> Result<ChangelogEntry, RevlogError> {
57 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
57 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
58 Ok(ChangelogEntry { revlog_entry })
58 Ok(ChangelogEntry { revlog_entry })
59 }
59 }
60
60
61 /// Return the [`ChangelogRevisionData`] for the given revision number.
61 /// Return the [`ChangelogRevisionData`] for the given revision number.
62 ///
62 ///
63 /// This is a useful shortcut in case the caller does not need the
63 /// This is a useful shortcut in case the caller does not need the
64 /// generic revlog information (parents, hashes etc). Otherwise
64 /// generic revlog information (parents, hashes etc). Otherwise
65 /// consider taking a [`ChangelogEntry`] with
65 /// consider taking a [`ChangelogEntry`] with
66 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
66 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
67 pub fn data_for_rev(
67 pub fn data_for_rev(
68 &self,
68 &self,
69 rev: UncheckedRevision,
69 rev: UncheckedRevision,
70 ) -> Result<ChangelogRevisionData, RevlogError> {
70 ) -> Result<ChangelogRevisionData, RevlogError> {
71 self.entry_for_rev(rev)?.data()
71 self.entry_for_rev(rev)?.data()
72 }
72 }
73
73
74 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
74 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
75 self.revlog.node_from_rev(rev)
75 self.revlog.node_from_rev(rev)
76 }
76 }
77
77
78 pub fn rev_from_node(
78 pub fn rev_from_node(
79 &self,
79 &self,
80 node: NodePrefix,
80 node: NodePrefix,
81 ) -> Result<Revision, RevlogError> {
81 ) -> Result<Revision, RevlogError> {
82 self.revlog.rev_from_node(node)
82 self.revlog.rev_from_node(node)
83 }
83 }
84 }
84 }
85
85
86 impl Graph for Changelog {
86 impl Graph for Changelog {
87 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
87 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
88 self.revlog.parents(rev)
88 self.revlog.parents(rev)
89 }
89 }
90 }
90 }
91
91
92 /// A specialized `RevlogEntry` for `changelog` data format
92 /// A specialized `RevlogEntry` for `changelog` data format
93 ///
93 ///
94 /// This is a `RevlogEntry` with the added semantics that the associated
94 /// This is a `RevlogEntry` with the added semantics that the associated
95 /// data should meet the requirements for `changelog`, materialized by
95 /// data should meet the requirements for `changelog`, materialized by
96 /// the fact that `data()` constructs a `ChangelogRevisionData`.
96 /// the fact that `data()` constructs a `ChangelogRevisionData`.
97 /// In case that promise would be broken, the `data` method returns an error.
97 /// In case that promise would be broken, the `data` method returns an error.
98 #[derive(Clone)]
98 #[derive(Clone)]
99 pub struct ChangelogEntry<'changelog> {
99 pub struct ChangelogEntry<'changelog> {
100 /// Same data, as a generic `RevlogEntry`.
100 /// Same data, as a generic `RevlogEntry`.
101 pub(crate) revlog_entry: RevlogEntry<'changelog>,
101 pub(crate) revlog_entry: RevlogEntry<'changelog>,
102 }
102 }
103
103
104 impl<'changelog> ChangelogEntry<'changelog> {
104 impl<'changelog> ChangelogEntry<'changelog> {
105 pub fn data<'a>(
105 pub fn data<'a>(
106 &'a self,
106 &'a self,
107 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
107 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
108 let bytes = self.revlog_entry.data()?;
108 let bytes = self.revlog_entry.data()?;
109 if bytes.is_empty() {
109 if bytes.is_empty() {
110 Ok(ChangelogRevisionData::null())
110 Ok(ChangelogRevisionData::null())
111 } else {
111 } else {
112 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
112 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
113 RevlogError::Other(HgError::CorruptedRepository(format!(
113 RevlogError::Other(HgError::CorruptedRepository(format!(
114 "Invalid changelog data for revision {}: {:?}",
114 "Invalid changelog data for revision {}: {:?}",
115 self.revlog_entry.revision(),
115 self.revlog_entry.revision(),
116 err
116 err
117 )))
117 )))
118 })?)
118 })?)
119 }
119 }
120 }
120 }
121
121
122 /// Obtain a reference to the underlying `RevlogEntry`.
122 /// Obtain a reference to the underlying `RevlogEntry`.
123 ///
123 ///
124 /// This allows the caller to access the information that is common
124 /// This allows the caller to access the information that is common
125 /// to all revlog entries: revision number, node id, parent revisions etc.
125 /// to all revlog entries: revision number, node id, parent revisions etc.
126 pub fn as_revlog_entry(&self) -> &RevlogEntry {
126 pub fn as_revlog_entry(&self) -> &RevlogEntry {
127 &self.revlog_entry
127 &self.revlog_entry
128 }
128 }
129
129
130 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
130 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 Ok(self
131 Ok(self
132 .revlog_entry
132 .revlog_entry
133 .p1_entry()?
133 .p1_entry()?
134 .map(|revlog_entry| Self { revlog_entry }))
134 .map(|revlog_entry| Self { revlog_entry }))
135 }
135 }
136
136
137 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
137 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
138 Ok(self
138 Ok(self
139 .revlog_entry
139 .revlog_entry
140 .p2_entry()?
140 .p2_entry()?
141 .map(|revlog_entry| Self { revlog_entry }))
141 .map(|revlog_entry| Self { revlog_entry }))
142 }
142 }
143 }
143 }
144
144
145 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
145 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
146 #[derive(PartialEq)]
146 #[derive(PartialEq)]
147 pub struct ChangelogRevisionData<'changelog> {
147 pub struct ChangelogRevisionData<'changelog> {
148 /// The data bytes of the `changelog` entry.
148 /// The data bytes of the `changelog` entry.
149 bytes: Cow<'changelog, [u8]>,
149 bytes: Cow<'changelog, [u8]>,
150 /// The end offset for the hex manifest (not including the newline)
150 /// The end offset for the hex manifest (not including the newline)
151 manifest_end: usize,
151 manifest_end: usize,
152 /// The end offset for the user+email (not including the newline)
152 /// The end offset for the user+email (not including the newline)
153 user_end: usize,
153 user_end: usize,
154 /// The end offset for the timestamp+timezone+extras (not including the
154 /// The end offset for the timestamp+timezone+extras (not including the
155 /// newline)
155 /// newline)
156 timestamp_end: usize,
156 timestamp_end: usize,
157 /// The end offset for the file list (not including the newline)
157 /// The end offset for the file list (not including the newline)
158 files_end: usize,
158 files_end: usize,
159 }
159 }
160
160
161 impl<'changelog> ChangelogRevisionData<'changelog> {
161 impl<'changelog> ChangelogRevisionData<'changelog> {
162 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
162 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
163 let mut line_iter = bytes.split(|b| b == &b'\n');
163 let mut line_iter = bytes.split(|b| b == &b'\n');
164 let manifest_end = line_iter
164 let manifest_end = line_iter
165 .next()
165 .next()
166 .expect("Empty iterator from split()?")
166 .expect("Empty iterator from split()?")
167 .len();
167 .len();
168 let user_slice = line_iter.next().ok_or_else(|| {
168 let user_slice = line_iter.next().ok_or_else(|| {
169 HgError::corrupted("Changeset data truncated after manifest line")
169 HgError::corrupted("Changeset data truncated after manifest line")
170 })?;
170 })?;
171 let user_end = manifest_end + 1 + user_slice.len();
171 let user_end = manifest_end + 1 + user_slice.len();
172 let timestamp_slice = line_iter.next().ok_or_else(|| {
172 let timestamp_slice = line_iter.next().ok_or_else(|| {
173 HgError::corrupted("Changeset data truncated after user line")
173 HgError::corrupted("Changeset data truncated after user line")
174 })?;
174 })?;
175 let timestamp_end = user_end + 1 + timestamp_slice.len();
175 let timestamp_end = user_end + 1 + timestamp_slice.len();
176 let mut files_end = timestamp_end + 1;
176 let mut files_end = timestamp_end + 1;
177 loop {
177 loop {
178 let line = line_iter.next().ok_or_else(|| {
178 let line = line_iter.next().ok_or_else(|| {
179 HgError::corrupted("Changeset data truncated in files list")
179 HgError::corrupted("Changeset data truncated in files list")
180 })?;
180 })?;
181 if line.is_empty() {
181 if line.is_empty() {
182 if files_end == bytes.len() {
182 if files_end == bytes.len() {
183 // The list of files ended with a single newline (there
183 // The list of files ended with a single newline (there
184 // should be two)
184 // should be two)
185 return Err(HgError::corrupted(
185 return Err(HgError::corrupted(
186 "Changeset data truncated after files list",
186 "Changeset data truncated after files list",
187 ));
187 ));
188 }
188 }
189 files_end -= 1;
189 files_end -= 1;
190 break;
190 break;
191 }
191 }
192 files_end += line.len() + 1;
192 files_end += line.len() + 1;
193 }
193 }
194
194
195 Ok(Self {
195 Ok(Self {
196 bytes,
196 bytes,
197 manifest_end,
197 manifest_end,
198 user_end,
198 user_end,
199 timestamp_end,
199 timestamp_end,
200 files_end,
200 files_end,
201 })
201 })
202 }
202 }
203
203
204 fn null() -> Self {
204 fn null() -> Self {
205 Self::new(Cow::Borrowed(
205 Self::new(Cow::Borrowed(
206 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
206 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
207 ))
207 ))
208 .unwrap()
208 .unwrap()
209 }
209 }
210
210
211 /// Return an iterator over the lines of the entry.
211 /// Return an iterator over the lines of the entry.
212 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
212 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
213 self.bytes.split(|b| b == &b'\n')
213 self.bytes.split(|b| b == &b'\n')
214 }
214 }
215
215
216 /// Return the node id of the `manifest` referenced by this `changelog`
216 /// Return the node id of the `manifest` referenced by this `changelog`
217 /// entry.
217 /// entry.
218 pub fn manifest_node(&self) -> Result<Node, HgError> {
218 pub fn manifest_node(&self) -> Result<Node, HgError> {
219 let manifest_node_hex = &self.bytes[..self.manifest_end];
219 let manifest_node_hex = &self.bytes[..self.manifest_end];
220 Node::from_hex_for_repo(manifest_node_hex)
220 Node::from_hex_for_repo(manifest_node_hex)
221 }
221 }
222
222
223 /// The full user string (usually a name followed by an email enclosed in
223 /// The full user string (usually a name followed by an email enclosed in
224 /// angle brackets)
224 /// angle brackets)
225 pub fn user(&self) -> &[u8] {
225 pub fn user(&self) -> &[u8] {
226 &self.bytes[self.manifest_end + 1..self.user_end]
226 &self.bytes[self.manifest_end + 1..self.user_end]
227 }
227 }
228
228
229 /// The full timestamp line (timestamp in seconds, offset in seconds, and
229 /// The full timestamp line (timestamp in seconds, offset in seconds, and
230 /// possibly extras)
230 /// possibly extras)
231 // TODO: We should expose this in a more useful way
231 // TODO: We should expose this in a more useful way
232 pub fn timestamp_line(&self) -> &[u8] {
232 pub fn timestamp_line(&self) -> &[u8] {
233 &self.bytes[self.user_end + 1..self.timestamp_end]
233 &self.bytes[self.user_end + 1..self.timestamp_end]
234 }
234 }
235
235
236 /// Parsed timestamp line, including optional extras.
236 /// Parsed timestamp.
237 pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {
237 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
238 TimestampAndExtra::from_bytes(self.timestamp_line())
238 parse_timestamp(self.timestamp_line())
239 }
240
241 /// Optional commit extras.
242 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
243 parse_timestamp_line_extra(self.timestamp_line())
239 }
244 }
240
245
241 /// The files changed in this revision.
246 /// The files changed in this revision.
242 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
247 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
243 if self.timestamp_end == self.files_end {
248 if self.timestamp_end == self.files_end {
244 Either::Left(iter::empty())
249 Either::Left(iter::empty())
245 } else {
250 } else {
246 Either::Right(
251 Either::Right(
247 self.bytes[self.timestamp_end + 1..self.files_end]
252 self.bytes[self.timestamp_end + 1..self.files_end]
248 .split(|b| b == &b'\n')
253 .split(|b| b == &b'\n')
249 .map(HgPath::new),
254 .map(HgPath::new),
250 )
255 )
251 }
256 }
252 }
257 }
253
258
254 /// The change description.
259 /// The change description.
255 pub fn description(&self) -> &[u8] {
260 pub fn description(&self) -> &[u8] {
256 &self.bytes[self.files_end + 2..]
261 &self.bytes[self.files_end + 2..]
257 }
262 }
258 }
263 }
259
264
260 impl Debug for ChangelogRevisionData<'_> {
265 impl Debug for ChangelogRevisionData<'_> {
261 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
266 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
262 f.debug_struct("ChangelogRevisionData")
267 f.debug_struct("ChangelogRevisionData")
263 .field("bytes", &debug_bytes(&self.bytes))
268 .field("bytes", &debug_bytes(&self.bytes))
264 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
269 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
265 .field(
270 .field(
266 "user",
271 "user",
267 &debug_bytes(
272 &debug_bytes(
268 &self.bytes[self.manifest_end + 1..self.user_end],
273 &self.bytes[self.manifest_end + 1..self.user_end],
269 ),
274 ),
270 )
275 )
271 .field(
276 .field(
272 "timestamp",
277 "timestamp",
273 &debug_bytes(
278 &debug_bytes(
274 &self.bytes[self.user_end + 1..self.timestamp_end],
279 &self.bytes[self.user_end + 1..self.timestamp_end],
275 ),
280 ),
276 )
281 )
277 .field(
282 .field(
278 "files",
283 "files",
279 &debug_bytes(
284 &debug_bytes(
280 &self.bytes[self.timestamp_end + 1..self.files_end],
285 &self.bytes[self.timestamp_end + 1..self.files_end],
281 ),
286 ),
282 )
287 )
283 .field(
288 .field(
284 "description",
289 "description",
285 &debug_bytes(&self.bytes[self.files_end + 2..]),
290 &debug_bytes(&self.bytes[self.files_end + 2..]),
286 )
291 )
287 .finish()
292 .finish()
288 }
293 }
289 }
294 }
290
295
291 fn debug_bytes(bytes: &[u8]) -> String {
296 fn debug_bytes(bytes: &[u8]) -> String {
292 String::from_utf8_lossy(
297 String::from_utf8_lossy(
293 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
298 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
294 )
299 )
295 .to_string()
300 .to_string()
296 }
301 }
297
302
298 /// Parsed timestamp line, including the timestamp and optional extras.
303 /// Parse the raw bytes of the timestamp line from a changelog entry.
299 #[derive(Clone, Debug)]
304 ///
300 pub struct TimestampAndExtra {
305 /// According to the documentation in `hg help dates` and the
301 pub timestamp: DateTime<FixedOffset>,
306 /// implementation in `changelog.py`, the format of the timestamp line
302 pub extra: BTreeMap<String, Vec<u8>>,
307 /// is `time tz extra\n` where:
303 }
308 ///
309 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
310 /// as seconds since the UNIX epoch.
311 ///
312 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
313 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
314 /// opposite of the sign in ISO 8601 timestamps).
315 ///
316 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
317 /// and value in each pair separated by an ASCII colon. Keys are limited to
318 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
319 /// arbitrary bytes.
320 fn parse_timestamp(
321 timestamp_line: &[u8],
322 ) -> Result<DateTime<FixedOffset>, HgError> {
323 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
304
324
305 impl TimestampAndExtra {
325 let timestamp_bytes = parts
306 /// Parse the raw bytes of the timestamp line from a changelog entry.
326 .next()
307 ///
327 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
308 /// According to the documentation in `hg help dates` and the
328 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
309 /// implementation in `changelog.py`, the format of the timestamp line
329 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
310 /// is `time tz extra\n` where:
330 })?;
311 ///
331 let timestamp_utc = timestamp_str
312 /// - `time` is an ASCII-encoded signed int or float denoting a UTC
332 .parse()
313 /// timestamp as seconds since the UNIX epoch.
333 .map_err(|e| {
314 ///
334 HgError::corrupted(format!("failed to parse timestamp: {e}"))
315 /// - `tz` is the timezone offset as an ASCII-encoded signed integer
335 })
316 /// denoting seconds WEST of UTC (so negative for timezones east of UTC,
336 .and_then(|secs| {
317 /// which is the opposite of the sign in ISO 8601 timestamps).
337 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
318 ///
338 HgError::corrupted(format!(
319 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the
339 "integer timestamp out of valid range: {secs}"
320 /// key and value in each pair separated by an ASCII colon. Keys are
340 ))
321 /// limited to ASCII letters, digits, hyphens, and underscores, whereas
322 /// values can be arbitrary bytes.
323 fn from_bytes(line: &[u8]) -> Result<Self, HgError> {
324 let mut parts = line.splitn(3, |c| *c == b' ');
325
326 let timestamp_bytes = parts
327 .next()
328 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
329 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
330 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
331 })?;
332 let timestamp_utc = timestamp_str
333 .parse()
334 .map_err(|e| {
335 HgError::corrupted(format!("failed to parse timestamp: {e}"))
336 })
341 })
337 .and_then(|secs| {
342 })
338 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
343 // Attempt to parse the timestamp as a float if we can't parse
339 HgError::corrupted(format!(
344 // it as an int. It doesn't seem like float timestamps are actually
340 "integer timestamp out of valid range: {secs}"
345 // used in practice, but the Python code supports them.
341 ))
346 .or_else(|_| parse_float_timestamp(timestamp_str))?;
342 })
343 })
344 // Attempt to parse the timestamp as a float if we can't parse
345 // it as an int. It doesn't seem like float timestamps are actually
346 // used in practice, but the Python code supports them.
347 .or_else(|_| parse_float_timestamp(timestamp_str))?;
348
347
349 let timezone_bytes = parts
348 let timezone_bytes = parts
350 .next()
349 .next()
351 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
350 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
352 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
351 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
353 .map_err(|e| {
352 .map_err(|e| {
354 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
353 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
355 })?
354 })?
356 .parse()
355 .parse()
357 .map_err(|e| {
356 .map_err(|e| {
358 HgError::corrupted(format!("timezone is not an integer: {e}"))
357 HgError::corrupted(format!("timezone is not an integer: {e}"))
359 })?;
358 })?;
360 let timezone =
359 let timezone = FixedOffset::west_opt(timezone_secs)
361 FixedOffset::west_opt(timezone_secs).ok_or_else(|| {
360 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
362 HgError::corrupted("timezone offset out of bounds")
363 })?;
364
361
365 let timestamp =
362 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
366 DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);
367 let extra = parts
368 .next()
369 .map(parse_extra)
370 .transpose()?
371 .unwrap_or_default();
372
373 Ok(Self { timestamp, extra })
374 }
375 }
363 }
376
364
377 /// Attempt to parse the given string as floating-point timestamp, and
365 /// Attempt to parse the given string as floating-point timestamp, and
378 /// convert the result into a `chrono::NaiveDateTime`.
366 /// convert the result into a `chrono::NaiveDateTime`.
379 fn parse_float_timestamp(
367 fn parse_float_timestamp(
380 timestamp_str: &str,
368 timestamp_str: &str,
381 ) -> Result<NaiveDateTime, HgError> {
369 ) -> Result<NaiveDateTime, HgError> {
382 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
370 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
383 HgError::corrupted(format!("failed to parse timestamp: {e}"))
371 HgError::corrupted(format!("failed to parse timestamp: {e}"))
384 })?;
372 })?;
385
373
386 // To construct a `NaiveDateTime` we'll need to convert the float
374 // To construct a `NaiveDateTime` we'll need to convert the float
387 // into signed integer seconds and unsigned integer nanoseconds.
375 // into signed integer seconds and unsigned integer nanoseconds.
388 let mut secs = timestamp.trunc() as i64;
376 let mut secs = timestamp.trunc() as i64;
389 let mut subsecs = timestamp.fract();
377 let mut subsecs = timestamp.fract();
390
378
391 // If the timestamp is negative, we need to express the fractional
379 // If the timestamp is negative, we need to express the fractional
392 // component as positive nanoseconds since the previous second.
380 // component as positive nanoseconds since the previous second.
393 if timestamp < 0.0 {
381 if timestamp < 0.0 {
394 secs -= 1;
382 secs -= 1;
395 subsecs += 1.0;
383 subsecs += 1.0;
396 }
384 }
397
385
398 // This cast should be safe because the fractional component is
386 // This cast should be safe because the fractional component is
399 // by definition less than 1.0, so this value should not exceed
387 // by definition less than 1.0, so this value should not exceed
400 // 1 billion, which is representable as an f64 without loss of
388 // 1 billion, which is representable as an f64 without loss of
401 // precision and should fit into a u32 without overflowing.
389 // precision and should fit into a u32 without overflowing.
402 //
390 //
403 // (Any loss of precision in the fractional component will have
391 // (Any loss of precision in the fractional component will have
404 // already happened at the time of initial parsing; in general,
392 // already happened at the time of initial parsing; in general,
405 // f64s are insufficiently precise to provide nanosecond-level
393 // f64s are insufficiently precise to provide nanosecond-level
406 // precision with present-day timestamps.)
394 // precision with present-day timestamps.)
407 let nsecs = (subsecs * 1_000_000_000.0) as u32;
395 let nsecs = (subsecs * 1_000_000_000.0) as u32;
408
396
409 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
397 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
410 HgError::corrupted(format!(
398 HgError::corrupted(format!(
411 "float timestamp out of valid range: {timestamp}"
399 "float timestamp out of valid range: {timestamp}"
412 ))
400 ))
413 })
401 })
414 }
402 }
415
403
416 /// Parse the "extra" fields from a changeset's timestamp line.
404 /// Decode changeset extra fields.
417 ///
405 ///
418 /// Extras are null-delimited key-value pairs where the key consists of ASCII
406 /// Extras are null-delimited key-value pairs where the key consists of ASCII
419 /// alphanumeric characters plus hyphens and underscores, and the value can
407 /// alphanumeric characters plus hyphens and underscores, and the value can
420 /// contain arbitrary bytes.
408 /// contain arbitrary bytes.
421 fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
409 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
422 extra
410 extra
423 .split(|c| *c == b'\0')
411 .split(|c| *c == b'\0')
424 .map(|pair| {
412 .map(|pair| {
425 let pair = unescape_extra(pair);
413 let pair = unescape_extra(pair);
426 let mut iter = pair.splitn(2, |c| *c == b':');
414 let mut iter = pair.splitn(2, |c| *c == b':');
427
415
428 let key_bytes =
416 let key_bytes =
429 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
417 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
430 HgError::corrupted("empty key in changeset extras")
418 HgError::corrupted("empty key in changeset extras")
431 })?;
419 })?;
432
420
433 let key = str::from_utf8(key_bytes)
421 let key = str::from_utf8(key_bytes)
434 .ok()
422 .ok()
435 .filter(|k| {
423 .filter(|k| {
436 k.chars().all(|c| {
424 k.chars().all(|c| {
437 c.is_ascii_alphanumeric() || c == '_' || c == '-'
425 c.is_ascii_alphanumeric() || c == '_' || c == '-'
438 })
426 })
439 })
427 })
440 .ok_or_else(|| {
428 .ok_or_else(|| {
441 let key = String::from_utf8_lossy(key_bytes);
429 let key = String::from_utf8_lossy(key_bytes);
442 HgError::corrupted(format!(
430 HgError::corrupted(format!(
443 "invalid key in changeset extras: {key}",
431 "invalid key in changeset extras: {key}",
444 ))
432 ))
445 })?
433 })?
446 .to_string();
434 .to_string();
447
435
448 let value = iter.next().map(Into::into).ok_or_else(|| {
436 let value = iter.next().map(Into::into).ok_or_else(|| {
449 HgError::corrupted(format!(
437 HgError::corrupted(format!(
450 "missing value for changeset extra: {key}"
438 "missing value for changeset extra: {key}"
451 ))
439 ))
452 })?;
440 })?;
453
441
454 Ok((key, value))
442 Ok((key, value))
455 })
443 })
456 .collect()
444 .collect()
457 }
445 }
458
446
447 /// Parse the extra fields from a changeset's timestamp line.
448 fn parse_timestamp_line_extra(
449 timestamp_line: &[u8],
450 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
451 Ok(timestamp_line
452 .splitn(3, |c| *c == b' ')
453 .nth(2)
454 .map(decode_extra)
455 .transpose()?
456 .unwrap_or_default())
457 }
458
459 /// Decode Mercurial's escaping for changelog extras.
459 /// Decode Mercurial's escaping for changelog extras.
460 ///
460 ///
461 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
461 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
462 /// (null, backslash, newline, and carriage return) so we only decode those.
462 /// (null, backslash, newline, and carriage return) so we only decode those.
463 ///
463 ///
464 /// The Python code also includes a workaround for decoding escaped nuls
464 /// The Python code also includes a workaround for decoding escaped nuls
465 /// that are followed by an ASCII octal digit, since Python's built-in
465 /// that are followed by an ASCII octal digit, since Python's built-in
466 /// `string_escape` codec will interpret that as an escaped octal byte value.
466 /// `string_escape` codec will interpret that as an escaped octal byte value.
467 /// That workaround is omitted here since we don't support decoding octal.
467 /// That workaround is omitted here since we don't support decoding octal.
468 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
468 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
469 let mut output = Vec::with_capacity(bytes.len());
469 let mut output = Vec::with_capacity(bytes.len());
470 let mut input = bytes.iter().copied();
470 let mut input = bytes.iter().copied();
471
471
472 while let Some(c) = input.next() {
472 while let Some(c) = input.next() {
473 if c != b'\\' {
473 if c != b'\\' {
474 output.push(c);
474 output.push(c);
475 continue;
475 continue;
476 }
476 }
477
477
478 match input.next() {
478 match input.next() {
479 Some(b'0') => output.push(b'\0'),
479 Some(b'0') => output.push(b'\0'),
480 Some(b'\\') => output.push(b'\\'),
480 Some(b'\\') => output.push(b'\\'),
481 Some(b'n') => output.push(b'\n'),
481 Some(b'n') => output.push(b'\n'),
482 Some(b'r') => output.push(b'\r'),
482 Some(b'r') => output.push(b'\r'),
483 // The following cases should never occur in theory because any
483 // The following cases should never occur in theory because any
484 // backslashes in the original input should have been escaped
484 // backslashes in the original input should have been escaped
485 // with another backslash, so it should not be possible to
485 // with another backslash, so it should not be possible to
486 // observe an escape sequence other than the 4 above.
486 // observe an escape sequence other than the 4 above.
487 Some(c) => output.extend_from_slice(&[b'\\', c]),
487 Some(c) => output.extend_from_slice(&[b'\\', c]),
488 None => output.push(b'\\'),
488 None => output.push(b'\\'),
489 }
489 }
490 }
490 }
491
491
492 output
492 output
493 }
493 }
494
494
495 #[cfg(test)]
495 #[cfg(test)]
496 mod tests {
496 mod tests {
497 use super::*;
497 use super::*;
498 use crate::vfs::Vfs;
498 use crate::vfs::Vfs;
499 use crate::NULL_REVISION;
499 use crate::NULL_REVISION;
500 use pretty_assertions::assert_eq;
500 use pretty_assertions::assert_eq;
501
501
502 #[test]
502 #[test]
503 fn test_create_changelogrevisiondata_invalid() {
503 fn test_create_changelogrevisiondata_invalid() {
504 // Completely empty
504 // Completely empty
505 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
505 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
506 // No newline after manifest
506 // No newline after manifest
507 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
507 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
508 // No newline after user
508 // No newline after user
509 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
509 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
510 // No newline after timestamp
510 // No newline after timestamp
511 assert!(
511 assert!(
512 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
512 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
513 );
513 );
514 // Missing newline after files
514 // Missing newline after files
515 assert!(ChangelogRevisionData::new(Cow::Borrowed(
515 assert!(ChangelogRevisionData::new(Cow::Borrowed(
516 b"abcd\n\n0 0\nfile1\nfile2"
516 b"abcd\n\n0 0\nfile1\nfile2"
517 ))
517 ))
518 .is_err(),);
518 .is_err(),);
519 // Only one newline after files
519 // Only one newline after files
520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
521 b"abcd\n\n0 0\nfile1\nfile2\n"
521 b"abcd\n\n0 0\nfile1\nfile2\n"
522 ))
522 ))
523 .is_err(),);
523 .is_err(),);
524 }
524 }
525
525
526 #[test]
526 #[test]
527 fn test_create_changelogrevisiondata() {
527 fn test_create_changelogrevisiondata() {
528 let data = ChangelogRevisionData::new(Cow::Borrowed(
528 let data = ChangelogRevisionData::new(Cow::Borrowed(
529 b"0123456789abcdef0123456789abcdef01234567
529 b"0123456789abcdef0123456789abcdef01234567
530 Some One <someone@example.com>
530 Some One <someone@example.com>
531 0 0
531 0 0
532 file1
532 file1
533 file2
533 file2
534
534
535 some
535 some
536 commit
536 commit
537 message",
537 message",
538 ))
538 ))
539 .unwrap();
539 .unwrap();
540 assert_eq!(
540 assert_eq!(
541 data.manifest_node().unwrap(),
541 data.manifest_node().unwrap(),
542 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
542 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
543 .unwrap()
543 .unwrap()
544 );
544 );
545 assert_eq!(data.user(), b"Some One <someone@example.com>");
545 assert_eq!(data.user(), b"Some One <someone@example.com>");
546 assert_eq!(data.timestamp_line(), b"0 0");
546 assert_eq!(data.timestamp_line(), b"0 0");
547 assert_eq!(
547 assert_eq!(
548 data.files().collect_vec(),
548 data.files().collect_vec(),
549 vec![HgPath::new("file1"), HgPath::new("file2")]
549 vec![HgPath::new("file1"), HgPath::new("file2")]
550 );
550 );
551 assert_eq!(data.description(), b"some\ncommit\nmessage");
551 assert_eq!(data.description(), b"some\ncommit\nmessage");
552 }
552 }
553
553
554 #[test]
554 #[test]
555 fn test_data_from_rev_null() -> Result<(), RevlogError> {
555 fn test_data_from_rev_null() -> Result<(), RevlogError> {
556 // an empty revlog will be enough for this case
556 // an empty revlog will be enough for this case
557 let temp = tempfile::tempdir().unwrap();
557 let temp = tempfile::tempdir().unwrap();
558 let vfs = Vfs { base: temp.path() };
558 let vfs = Vfs { base: temp.path() };
559 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
559 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
560 let revlog =
560 let revlog =
561 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
561 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
562 .unwrap();
562 .unwrap();
563
563
564 let changelog = Changelog { revlog };
564 let changelog = Changelog { revlog };
565 assert_eq!(
565 assert_eq!(
566 changelog.data_for_rev(NULL_REVISION.into())?,
566 changelog.data_for_rev(NULL_REVISION.into())?,
567 ChangelogRevisionData::null()
567 ChangelogRevisionData::null()
568 );
568 );
569 // same with the intermediate entry object
569 // same with the intermediate entry object
570 assert_eq!(
570 assert_eq!(
571 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
571 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
572 ChangelogRevisionData::null()
572 ChangelogRevisionData::null()
573 );
573 );
574 Ok(())
574 Ok(())
575 }
575 }
576
576
577 #[test]
577 #[test]
578 fn test_empty_files_list() {
578 fn test_empty_files_list() {
579 assert!(ChangelogRevisionData::null()
579 assert!(ChangelogRevisionData::null()
580 .files()
580 .files()
581 .collect_vec()
581 .collect_vec()
582 .is_empty());
582 .is_empty());
583 }
583 }
584
584
585 #[test]
585 #[test]
586 fn test_unescape_basic() {
586 fn test_unescape_basic() {
587 // '\0', '\\', '\n', and '\r' are correctly unescaped.
587 // '\0', '\\', '\n', and '\r' are correctly unescaped.
588 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
588 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
589 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
589 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
590 let unescaped = unescape_extra(escaped);
590 let unescaped = unescape_extra(escaped);
591 assert_eq!(&expected[..], &unescaped[..]);
591 assert_eq!(&expected[..], &unescaped[..]);
592 }
592 }
593
593
594 #[test]
594 #[test]
595 fn test_unescape_unsupported_sequence() {
595 fn test_unescape_unsupported_sequence() {
596 // Other escape sequences are left unaltered.
596 // Other escape sequences are left unaltered.
597 for c in 0u8..255 {
597 for c in 0u8..255 {
598 match c {
598 match c {
599 b'0' | b'\\' | b'n' | b'r' => continue,
599 b'0' | b'\\' | b'n' | b'r' => continue,
600 c => {
600 c => {
601 let expected = &[b'\\', c][..];
601 let expected = &[b'\\', c][..];
602 let unescaped = unescape_extra(expected);
602 let unescaped = unescape_extra(expected);
603 assert_eq!(expected, &unescaped[..]);
603 assert_eq!(expected, &unescaped[..]);
604 }
604 }
605 }
605 }
606 }
606 }
607 }
607 }
608
608
609 #[test]
609 #[test]
610 fn test_unescape_trailing_backslash() {
610 fn test_unescape_trailing_backslash() {
611 // Trailing backslashes are OK.
611 // Trailing backslashes are OK.
612 let expected = br"hi\";
612 let expected = br"hi\";
613 let unescaped = unescape_extra(expected);
613 let unescaped = unescape_extra(expected);
614 assert_eq!(&expected[..], &unescaped[..]);
614 assert_eq!(&expected[..], &unescaped[..]);
615 }
615 }
616
616
617 #[test]
617 #[test]
618 fn test_unescape_nul_followed_by_octal() {
618 fn test_unescape_nul_followed_by_octal() {
619 // Escaped NUL chars followed by octal digits are decoded correctly.
619 // Escaped NUL chars followed by octal digits are decoded correctly.
620 let expected = b"\012";
620 let expected = b"\012";
621 let escaped = br"\012";
621 let escaped = br"\012";
622 let unescaped = unescape_extra(escaped);
622 let unescaped = unescape_extra(escaped);
623 assert_eq!(&expected[..], &unescaped[..]);
623 assert_eq!(&expected[..], &unescaped[..]);
624 }
624 }
625
625
626 #[test]
626 #[test]
627 fn test_parse_float_timestamp() {
627 fn test_parse_float_timestamp() {
628 let test_cases = [
628 let test_cases = [
629 // Zero should map to the UNIX epoch.
629 // Zero should map to the UNIX epoch.
630 ("0.0", "1970-01-01 00:00:00"),
630 ("0.0", "1970-01-01 00:00:00"),
631 // Negative zero should be the same as positive zero.
631 // Negative zero should be the same as positive zero.
632 ("-0.0", "1970-01-01 00:00:00"),
632 ("-0.0", "1970-01-01 00:00:00"),
633 // Values without fractional components should work like integers.
633 // Values without fractional components should work like integers.
634 // (Assuming the timestamp is within the limits of f64 precision.)
634 // (Assuming the timestamp is within the limits of f64 precision.)
635 ("1115154970.0", "2005-05-03 21:16:10"),
635 ("1115154970.0", "2005-05-03 21:16:10"),
636 // We expect some loss of precision in the fractional component
636 // We expect some loss of precision in the fractional component
637 // when parsing arbitrary floating-point values.
637 // when parsing arbitrary floating-point values.
638 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
638 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
639 // But representable f64 values should parse losslessly.
639 // But representable f64 values should parse losslessly.
640 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
640 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
641 // Negative fractional components are subtracted from the epoch.
641 // Negative fractional components are subtracted from the epoch.
642 ("-1.333", "1969-12-31 23:59:58.667"),
642 ("-1.333", "1969-12-31 23:59:58.667"),
643 ];
643 ];
644
644
645 for (input, expected) in test_cases {
645 for (input, expected) in test_cases {
646 let res = parse_float_timestamp(input).unwrap().to_string();
646 let res = parse_float_timestamp(input).unwrap().to_string();
647 assert_eq!(res, expected);
647 assert_eq!(res, expected);
648 }
648 }
649 }
649 }
650
650
651 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
651 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
652 let mut output = Vec::with_capacity(bytes.len());
652 let mut output = Vec::with_capacity(bytes.len());
653
653
654 for c in bytes.iter().copied() {
654 for c in bytes.iter().copied() {
655 output.extend_from_slice(match c {
655 output.extend_from_slice(match c {
656 b'\0' => &b"\\0"[..],
656 b'\0' => &b"\\0"[..],
657 b'\\' => &b"\\\\"[..],
657 b'\\' => &b"\\\\"[..],
658 b'\n' => &b"\\n"[..],
658 b'\n' => &b"\\n"[..],
659 b'\r' => &b"\\r"[..],
659 b'\r' => &b"\\r"[..],
660 _ => {
660 _ => {
661 output.push(c);
661 output.push(c);
662 continue;
662 continue;
663 }
663 }
664 });
664 });
665 }
665 }
666
666
667 output
667 output
668 }
668 }
669
669
670 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
670 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
671 where
671 where
672 K: AsRef<[u8]>,
672 K: AsRef<[u8]>,
673 V: AsRef<[u8]>,
673 V: AsRef<[u8]>,
674 {
674 {
675 let extras = pairs.into_iter().map(|(k, v)| {
675 let extras = pairs.into_iter().map(|(k, v)| {
676 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
676 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
677 });
677 });
678 // Use fully-qualified syntax to avoid a future naming conflict with
678 // Use fully-qualified syntax to avoid a future naming conflict with
679 // the standard library: https://github.com/rust-lang/rust/issues/79524
679 // the standard library: https://github.com/rust-lang/rust/issues/79524
680 Itertools::intersperse(extras, b"\0".to_vec()).concat()
680 Itertools::intersperse(extras, b"\0".to_vec()).concat()
681 }
681 }
682
682
683 #[test]
683 #[test]
684 fn test_parse_extra() {
684 fn test_decode_extra() {
685 let extra = [
685 let extra = [
686 ("branch".into(), b"default".to_vec()),
686 ("branch".into(), b"default".to_vec()),
687 ("key-with-hyphens".into(), b"value1".to_vec()),
687 ("key-with-hyphens".into(), b"value1".to_vec()),
688 ("key_with_underscores".into(), b"value2".to_vec()),
688 ("key_with_underscores".into(), b"value2".to_vec()),
689 ("empty-value".into(), b"".to_vec()),
689 ("empty-value".into(), b"".to_vec()),
690 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
690 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
691 ]
691 ]
692 .into_iter()
692 .into_iter()
693 .collect::<BTreeMap<String, Vec<u8>>>();
693 .collect::<BTreeMap<String, Vec<u8>>>();
694
694
695 let encoded = encode_extra(&extra);
695 let encoded = encode_extra(&extra);
696 let parsed = parse_extra(&encoded).unwrap();
696 let decoded = decode_extra(&encoded).unwrap();
697
697
698 assert_eq!(extra, parsed);
698 assert_eq!(extra, decoded);
699 }
699 }
700
700
701 #[test]
701 #[test]
702 fn test_corrupt_extra() {
702 fn test_corrupt_extra() {
703 let test_cases = [
703 let test_cases = [
704 (&b""[..], "empty input"),
704 (&b""[..], "empty input"),
705 (&b"\0"[..], "unexpected null byte"),
705 (&b"\0"[..], "unexpected null byte"),
706 (&b":empty-key"[..], "empty key"),
706 (&b":empty-key"[..], "empty key"),
707 (&b"\0leading-null:"[..], "leading null"),
707 (&b"\0leading-null:"[..], "leading null"),
708 (&b"trailing-null:\0"[..], "trailing null"),
708 (&b"trailing-null:\0"[..], "trailing null"),
709 (&b"missing-value"[..], "missing value"),
709 (&b"missing-value"[..], "missing value"),
710 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
710 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
711 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
711 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
712 ];
712 ];
713
713
714 for (extra, msg) in test_cases {
714 for (extra, msg) in test_cases {
715 assert!(
715 assert!(
716 parse_extra(&extra).is_err(),
716 decode_extra(&extra).is_err(),
717 "corrupt extra should have failed to parse: {}",
717 "corrupt extra should have failed to parse: {}",
718 msg
718 msg
719 );
719 );
720 }
720 }
721 }
721 }
722
722
723 #[test]
723 #[test]
724 fn test_parse_timestamp_line() {
724 fn test_parse_timestamp_line() {
725 let extra = [
725 let extra = [
726 ("branch".into(), b"default".to_vec()),
726 ("branch".into(), b"default".to_vec()),
727 ("key-with-hyphens".into(), b"value1".to_vec()),
727 ("key-with-hyphens".into(), b"value1".to_vec()),
728 ("key_with_underscores".into(), b"value2".to_vec()),
728 ("key_with_underscores".into(), b"value2".to_vec()),
729 ("empty-value".into(), b"".to_vec()),
729 ("empty-value".into(), b"".to_vec()),
730 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
730 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
731 ]
731 ]
732 .into_iter()
732 .into_iter()
733 .collect::<BTreeMap<String, Vec<u8>>>();
733 .collect::<BTreeMap<String, Vec<u8>>>();
734
734
735 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
735 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
736 line.extend_from_slice(&encode_extra(&extra));
736 line.extend_from_slice(&encode_extra(&extra));
737
737
738 let parsed = TimestampAndExtra::from_bytes(&line).unwrap();
738 let timestamp = parse_timestamp(&line).unwrap();
739 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
739
740
740 assert_eq!(
741 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
741 &parsed.timestamp.to_rfc3339(),
742 assert_eq!(extra, parsed_extra);
742 "2005-05-03T13:16:10-08:00"
743 );
744 assert_eq!(extra, parsed.extra);
745 }
743 }
746 }
744 }
General Comments 0
You need to be logged in to leave comments. Login now