Show More
@@ -233,9 +233,14 b" impl<'changelog> ChangelogRevisionData<'" | |||
|
233 | 233 | &self.bytes[self.user_end + 1..self.timestamp_end] |
|
234 | 234 | } |
|
235 | 235 | |
|
236 |
/// Parsed timestamp |
|
|
237 |
pub fn |
|
|
238 |
|
|
|
236 | /// Parsed timestamp. | |
|
237 | pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> { | |
|
238 | parse_timestamp(self.timestamp_line()) | |
|
239 | } | |
|
240 | ||
|
241 | /// Optional commit extras. | |
|
242 | pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> { | |
|
243 | parse_timestamp_line_extra(self.timestamp_line()) | |
|
239 | 244 | } |
|
240 | 245 | |
|
241 | 246 | /// The files changed in this revision. |
@@ -295,83 +300,66 b' fn debug_bytes(bytes: &[u8]) -> String {' | |||
|
295 | 300 | .to_string() |
|
296 | 301 | } |
|
297 | 302 | |
|
298 | /// Parsed timestamp line, including the timestamp and optional extras. | |
|
299 | #[derive(Clone, Debug)] | |
|
300 | pub struct TimestampAndExtra { | |
|
301 | pub timestamp: DateTime<FixedOffset>, | |
|
302 | pub extra: BTreeMap<String, Vec<u8>>, | |
|
303 | } | |
|
303 | /// Parse the raw bytes of the timestamp line from a changelog entry. | |
|
304 | /// | |
|
305 | /// According to the documentation in `hg help dates` and the | |
|
306 | /// implementation in `changelog.py`, the format of the timestamp line | |
|
307 | /// is `time tz extra\n` where: | |
|
308 | /// | |
|
309 | /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp | |
|
310 | /// as seconds since the UNIX epoch. | |
|
311 | /// | |
|
312 | /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting | |
|
313 | /// seconds WEST of UTC (so negative for timezones east of UTC, which is the | |
|
314 | /// opposite of the sign in ISO 8601 timestamps). | |
|
315 | /// | |
|
316 | /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key | |
|
317 | /// and value in each pair separated by an ASCII colon. Keys are limited to | |
|
318 | /// ASCII letters, digits, hyphens, and underscores, whereas values can be | |
|
319 | /// arbitrary bytes. | |
|
320 | fn parse_timestamp( | |
|
321 | timestamp_line: &[u8], | |
|
322 | ) -> Result<DateTime<FixedOffset>, HgError> { | |
|
323 | let mut parts = timestamp_line.splitn(3, |c| *c == b' '); | |
|
304 | 324 | |
|
305 | impl TimestampAndExtra { | |
|
306 | /// Parse the raw bytes of the timestamp line from a changelog entry. | |
|
307 | /// | |
|
308 | /// According to the documentation in `hg help dates` and the | |
|
309 | /// implementation in `changelog.py`, the format of the timestamp line | |
|
310 | /// is `time tz extra\n` where: | |
|
311 | /// | |
|
312 | /// - `time` is an ASCII-encoded signed int or float denoting a UTC | |
|
313 | /// timestamp as seconds since the UNIX epoch. | |
|
314 | /// | |
|
315 | /// - `tz` is the timezone offset as an ASCII-encoded signed integer | |
|
316 | /// denoting seconds WEST of UTC (so negative for timezones east of UTC, | |
|
317 | /// which is the opposite of the sign in ISO 8601 timestamps). | |
|
318 | /// | |
|
319 | /// - `extra` is an optional set of NUL-delimited key-value pairs, with the | |
|
320 | /// key and value in each pair separated by an ASCII colon. Keys are | |
|
321 | /// limited to ASCII letters, digits, hyphens, and underscores, whereas | |
|
322 | /// values can be arbitrary bytes. | |
|
323 | fn from_bytes(line: &[u8]) -> Result<Self, HgError> { | |
|
324 | let mut parts = line.splitn(3, |c| *c == b' '); | |
|
325 | ||
|
326 | let timestamp_bytes = parts | |
|
327 | .next() | |
|
328 | .ok_or_else(|| HgError::corrupted("missing timestamp"))?; | |
|
329 | let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| { | |
|
330 | HgError::corrupted(format!("timestamp is not valid UTF-8: {e}")) | |
|
331 | })?; | |
|
332 | let timestamp_utc = timestamp_str | |
|
333 | .parse() | |
|
334 | .map_err(|e| { | |
|
335 | HgError::corrupted(format!("failed to parse timestamp: {e}")) | |
|
325 | let timestamp_bytes = parts | |
|
326 | .next() | |
|
327 | .ok_or_else(|| HgError::corrupted("missing timestamp"))?; | |
|
328 | let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| { | |
|
329 | HgError::corrupted(format!("timestamp is not valid UTF-8: {e}")) | |
|
330 | })?; | |
|
331 | let timestamp_utc = timestamp_str | |
|
332 | .parse() | |
|
333 | .map_err(|e| { | |
|
334 | HgError::corrupted(format!("failed to parse timestamp: {e}")) | |
|
335 | }) | |
|
336 | .and_then(|secs| { | |
|
337 | NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| { | |
|
338 | HgError::corrupted(format!( | |
|
339 | "integer timestamp out of valid range: {secs}" | |
|
340 | )) | |
|
336 | 341 | }) |
|
337 | .and_then(|secs| { | |
|
338 | NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| { | |
|
339 | HgError::corrupted(format!( | |
|
340 | "integer timestamp out of valid range: {secs}" | |
|
341 | )) | |
|
342 | }) | |
|
343 | }) | |
|
344 | // Attempt to parse the timestamp as a float if we can't parse | |
|
345 | // it as an int. It doesn't seem like float timestamps are actually | |
|
346 | // used in practice, but the Python code supports them. | |
|
347 | .or_else(|_| parse_float_timestamp(timestamp_str))?; | |
|
342 | }) | |
|
343 | // Attempt to parse the timestamp as a float if we can't parse | |
|
344 | // it as an int. It doesn't seem like float timestamps are actually | |
|
345 | // used in practice, but the Python code supports them. | |
|
346 | .or_else(|_| parse_float_timestamp(timestamp_str))?; | |
|
348 | 347 | |
|
349 |
|
|
|
350 |
|
|
|
351 |
|
|
|
352 |
|
|
|
353 |
|
|
|
354 |
|
|
|
355 |
|
|
|
356 |
|
|
|
357 |
|
|
|
358 |
|
|
|
359 |
|
|
|
360 | let timezone = | |
|
361 | FixedOffset::west_opt(timezone_secs).ok_or_else(|| { | |
|
362 | HgError::corrupted("timezone offset out of bounds") | |
|
363 | })?; | |
|
348 | let timezone_bytes = parts | |
|
349 | .next() | |
|
350 | .ok_or_else(|| HgError::corrupted("missing timezone"))?; | |
|
351 | let timezone_secs: i32 = str::from_utf8(timezone_bytes) | |
|
352 | .map_err(|e| { | |
|
353 | HgError::corrupted(format!("timezone is not valid UTF-8: {e}")) | |
|
354 | })? | |
|
355 | .parse() | |
|
356 | .map_err(|e| { | |
|
357 | HgError::corrupted(format!("timezone is not an integer: {e}")) | |
|
358 | })?; | |
|
359 | let timezone = FixedOffset::west_opt(timezone_secs) | |
|
360 | .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?; | |
|
364 | 361 | |
|
365 | let timestamp = | |
|
366 | DateTime::from_naive_utc_and_offset(timestamp_utc, timezone); | |
|
367 | let extra = parts | |
|
368 | .next() | |
|
369 | .map(parse_extra) | |
|
370 | .transpose()? | |
|
371 | .unwrap_or_default(); | |
|
372 | ||
|
373 | Ok(Self { timestamp, extra }) | |
|
374 | } | |
|
362 | Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone)) | |
|
375 | 363 | } |
|
376 | 364 | |
|
377 | 365 | /// Attempt to parse the given string as floating-point timestamp, and |
@@ -413,12 +401,12 b' fn parse_float_timestamp(' | |||
|
413 | 401 | }) |
|
414 | 402 | } |
|
415 | 403 | |
|
416 | /// Parse the "extra" fields from a changeset's timestamp line. | |
|
404 | /// Decode changeset extra fields. | |
|
417 | 405 | /// |
|
418 | 406 | /// Extras are null-delimited key-value pairs where the key consists of ASCII |
|
419 | 407 | /// alphanumeric characters plus hyphens and underscores, and the value can |
|
420 | 408 | /// contain arbitrary bytes. |
|
421 |
fn |
|
|
409 | fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> { | |
|
422 | 410 | extra |
|
423 | 411 | .split(|c| *c == b'\0') |
|
424 | 412 | .map(|pair| { |
@@ -456,6 +444,18 b' fn parse_extra(extra: &[u8]) -> Result<B' | |||
|
456 | 444 | .collect() |
|
457 | 445 | } |
|
458 | 446 | |
|
447 | /// Parse the extra fields from a changeset's timestamp line. | |
|
448 | fn parse_timestamp_line_extra( | |
|
449 | timestamp_line: &[u8], | |
|
450 | ) -> Result<BTreeMap<String, Vec<u8>>, HgError> { | |
|
451 | Ok(timestamp_line | |
|
452 | .splitn(3, |c| *c == b' ') | |
|
453 | .nth(2) | |
|
454 | .map(decode_extra) | |
|
455 | .transpose()? | |
|
456 | .unwrap_or_default()) | |
|
457 | } | |
|
458 | ||
|
459 | 459 | /// Decode Mercurial's escaping for changelog extras. |
|
460 | 460 | /// |
|
461 | 461 | /// The `_string_escape` function in `changelog.py` only escapes 4 characters |
@@ -681,7 +681,7 b' message",' | |||
|
681 | 681 | } |
|
682 | 682 | |
|
683 | 683 | #[test] |
|
684 |
fn test_ |
|
|
684 | fn test_decode_extra() { | |
|
685 | 685 | let extra = [ |
|
686 | 686 | ("branch".into(), b"default".to_vec()), |
|
687 | 687 | ("key-with-hyphens".into(), b"value1".to_vec()), |
@@ -693,9 +693,9 b' message",' | |||
|
693 | 693 | .collect::<BTreeMap<String, Vec<u8>>>(); |
|
694 | 694 | |
|
695 | 695 | let encoded = encode_extra(&extra); |
|
696 |
let |
|
|
696 | let decoded = decode_extra(&encoded).unwrap(); | |
|
697 | 697 | |
|
698 |
assert_eq!(extra, |
|
|
698 | assert_eq!(extra, decoded); | |
|
699 | 699 | } |
|
700 | 700 | |
|
701 | 701 | #[test] |
@@ -713,7 +713,7 b' message",' | |||
|
713 | 713 | |
|
714 | 714 | for (extra, msg) in test_cases { |
|
715 | 715 | assert!( |
|
716 |
|
|
|
716 | decode_extra(&extra).is_err(), | |
|
717 | 717 | "corrupt extra should have failed to parse: {}", |
|
718 | 718 | msg |
|
719 | 719 | ); |
@@ -735,12 +735,10 b' message",' | |||
|
735 | 735 | let mut line: Vec<u8> = b"1115154970 28800 ".to_vec(); |
|
736 | 736 | line.extend_from_slice(&encode_extra(&extra)); |
|
737 | 737 | |
|
738 |
let p |
|
|
738 | let timestamp = parse_timestamp(&line).unwrap(); | |
|
739 | assert_eq!(×tamp.to_rfc3339(), "2005-05-03T13:16:10-08:00"); | |
|
739 | 740 | |
|
740 | assert_eq!( | |
|
741 | &parsed.timestamp.to_rfc3339(), | |
|
742 | "2005-05-03T13:16:10-08:00" | |
|
743 | ); | |
|
744 | assert_eq!(extra, parsed.extra); | |
|
741 | let parsed_extra = parse_timestamp_line_extra(&line).unwrap(); | |
|
742 | assert_eq!(extra, parsed_extra); | |
|
745 | 743 | } |
|
746 | 744 | } |
General Comments 0
You need to be logged in to leave comments.
Login now