##// END OF EJS Templates
hg-core: separate timestamp and extra methods
Arun Kulshreshtha -
r52286:6603a144 default
parent child Browse files
Show More
@@ -233,9 +233,14 b" impl<'changelog> ChangelogRevisionData<'"
233 233 &self.bytes[self.user_end + 1..self.timestamp_end]
234 234 }
235 235
236 /// Parsed timestamp line, including optional extras.
237 pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {
238 TimestampAndExtra::from_bytes(self.timestamp_line())
236 /// Parsed timestamp.
237 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
238 parse_timestamp(self.timestamp_line())
239 }
240
241 /// Optional commit extras.
242 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
243 parse_timestamp_line_extra(self.timestamp_line())
239 244 }
240 245
241 246 /// The files changed in this revision.
@@ -295,83 +300,66 b' fn debug_bytes(bytes: &[u8]) -> String {'
295 300 .to_string()
296 301 }
297 302
298 /// Parsed timestamp line, including the timestamp and optional extras.
299 #[derive(Clone, Debug)]
300 pub struct TimestampAndExtra {
301 pub timestamp: DateTime<FixedOffset>,
302 pub extra: BTreeMap<String, Vec<u8>>,
303 }
303 /// Parse the raw bytes of the timestamp line from a changelog entry.
304 ///
305 /// According to the documentation in `hg help dates` and the
306 /// implementation in `changelog.py`, the format of the timestamp line
307 /// is `time tz extra\n` where:
308 ///
309 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
310 /// as seconds since the UNIX epoch.
311 ///
312 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
313 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
314 /// opposite of the sign in ISO 8601 timestamps).
315 ///
316 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
317 /// and value in each pair separated by an ASCII colon. Keys are limited to
318 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
319 /// arbitrary bytes.
320 fn parse_timestamp(
321 timestamp_line: &[u8],
322 ) -> Result<DateTime<FixedOffset>, HgError> {
323 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
304 324
305 impl TimestampAndExtra {
306 /// Parse the raw bytes of the timestamp line from a changelog entry.
307 ///
308 /// According to the documentation in `hg help dates` and the
309 /// implementation in `changelog.py`, the format of the timestamp line
310 /// is `time tz extra\n` where:
311 ///
312 /// - `time` is an ASCII-encoded signed int or float denoting a UTC
313 /// timestamp as seconds since the UNIX epoch.
314 ///
315 /// - `tz` is the timezone offset as an ASCII-encoded signed integer
316 /// denoting seconds WEST of UTC (so negative for timezones east of UTC,
317 /// which is the opposite of the sign in ISO 8601 timestamps).
318 ///
319 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the
320 /// key and value in each pair separated by an ASCII colon. Keys are
321 /// limited to ASCII letters, digits, hyphens, and underscores, whereas
322 /// values can be arbitrary bytes.
323 fn from_bytes(line: &[u8]) -> Result<Self, HgError> {
324 let mut parts = line.splitn(3, |c| *c == b' ');
325
326 let timestamp_bytes = parts
327 .next()
328 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
329 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
330 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
331 })?;
332 let timestamp_utc = timestamp_str
333 .parse()
334 .map_err(|e| {
335 HgError::corrupted(format!("failed to parse timestamp: {e}"))
325 let timestamp_bytes = parts
326 .next()
327 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
328 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
329 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
330 })?;
331 let timestamp_utc = timestamp_str
332 .parse()
333 .map_err(|e| {
334 HgError::corrupted(format!("failed to parse timestamp: {e}"))
335 })
336 .and_then(|secs| {
337 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
338 HgError::corrupted(format!(
339 "integer timestamp out of valid range: {secs}"
340 ))
336 341 })
337 .and_then(|secs| {
338 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
339 HgError::corrupted(format!(
340 "integer timestamp out of valid range: {secs}"
341 ))
342 })
343 })
344 // Attempt to parse the timestamp as a float if we can't parse
345 // it as an int. It doesn't seem like float timestamps are actually
346 // used in practice, but the Python code supports them.
347 .or_else(|_| parse_float_timestamp(timestamp_str))?;
342 })
343 // Attempt to parse the timestamp as a float if we can't parse
344 // it as an int. It doesn't seem like float timestamps are actually
345 // used in practice, but the Python code supports them.
346 .or_else(|_| parse_float_timestamp(timestamp_str))?;
348 347
349 let timezone_bytes = parts
350 .next()
351 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
352 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
353 .map_err(|e| {
354 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
355 })?
356 .parse()
357 .map_err(|e| {
358 HgError::corrupted(format!("timezone is not an integer: {e}"))
359 })?;
360 let timezone =
361 FixedOffset::west_opt(timezone_secs).ok_or_else(|| {
362 HgError::corrupted("timezone offset out of bounds")
363 })?;
348 let timezone_bytes = parts
349 .next()
350 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
351 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
352 .map_err(|e| {
353 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
354 })?
355 .parse()
356 .map_err(|e| {
357 HgError::corrupted(format!("timezone is not an integer: {e}"))
358 })?;
359 let timezone = FixedOffset::west_opt(timezone_secs)
360 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
364 361
365 let timestamp =
366 DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);
367 let extra = parts
368 .next()
369 .map(parse_extra)
370 .transpose()?
371 .unwrap_or_default();
372
373 Ok(Self { timestamp, extra })
374 }
362 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
375 363 }
376 364
377 365 /// Attempt to parse the given string as floating-point timestamp, and
@@ -413,12 +401,12 b' fn parse_float_timestamp('
413 401 })
414 402 }
415 403
416 /// Parse the "extra" fields from a changeset's timestamp line.
404 /// Decode changeset extra fields.
417 405 ///
418 406 /// Extras are null-delimited key-value pairs where the key consists of ASCII
419 407 /// alphanumeric characters plus hyphens and underscores, and the value can
420 408 /// contain arbitrary bytes.
421 fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
409 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
422 410 extra
423 411 .split(|c| *c == b'\0')
424 412 .map(|pair| {
@@ -456,6 +444,18 b' fn parse_extra(extra: &[u8]) -> Result<B'
456 444 .collect()
457 445 }
458 446
447 /// Parse the extra fields from a changeset's timestamp line.
448 fn parse_timestamp_line_extra(
449 timestamp_line: &[u8],
450 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
451 Ok(timestamp_line
452 .splitn(3, |c| *c == b' ')
453 .nth(2)
454 .map(decode_extra)
455 .transpose()?
456 .unwrap_or_default())
457 }
458
459 459 /// Decode Mercurial's escaping for changelog extras.
460 460 ///
461 461 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
@@ -681,7 +681,7 b' message",'
681 681 }
682 682
683 683 #[test]
684 fn test_parse_extra() {
684 fn test_decode_extra() {
685 685 let extra = [
686 686 ("branch".into(), b"default".to_vec()),
687 687 ("key-with-hyphens".into(), b"value1".to_vec()),
@@ -693,9 +693,9 b' message",'
693 693 .collect::<BTreeMap<String, Vec<u8>>>();
694 694
695 695 let encoded = encode_extra(&extra);
696 let parsed = parse_extra(&encoded).unwrap();
696 let decoded = decode_extra(&encoded).unwrap();
697 697
698 assert_eq!(extra, parsed);
698 assert_eq!(extra, decoded);
699 699 }
700 700
701 701 #[test]
@@ -713,7 +713,7 b' message",'
713 713
714 714 for (extra, msg) in test_cases {
715 715 assert!(
716 parse_extra(&extra).is_err(),
716 decode_extra(&extra).is_err(),
717 717 "corrupt extra should have failed to parse: {}",
718 718 msg
719 719 );
@@ -735,12 +735,10 b' message",'
735 735 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
736 736 line.extend_from_slice(&encode_extra(&extra));
737 737
738 let parsed = TimestampAndExtra::from_bytes(&line).unwrap();
738 let timestamp = parse_timestamp(&line).unwrap();
739 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
739 740
740 assert_eq!(
741 &parsed.timestamp.to_rfc3339(),
742 "2005-05-03T13:16:10-08:00"
743 );
744 assert_eq!(extra, parsed.extra);
741 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
742 assert_eq!(extra, parsed_extra);
745 743 }
746 744 }
General Comments 0
You need to be logged in to leave comments. Login now