upstream/mercurial-mirror Commit - r52286:6603a144

hg-core: separate timestamp and extra methods

Arun Kulshreshtha -

r52286:6603a144 default

parent child

rust/hg-core/src/revlog/changelog.rs

0 +86 -88

              use std::ascii::escape_default;
              use std::borrow::Cow;
              use std::collections::BTreeMap;
              use std::fmt::{Debug, Formatter};
              use std::{iter, str};
              use chrono::{DateTime, FixedOffset, NaiveDateTime};
              use itertools::{Either, Itertools};
              use crate::errors::HgError;
              use crate::revlog::Revision;
              use crate::revlog::{Node, NodePrefix};
              use crate::revlog::{Revlog, RevlogEntry, RevlogError};
              use crate::utils::hg_path::HgPath;
              use crate::vfs::Vfs;
              use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
              /// A specialized `Revlog` to work with changelog data format.
              pub struct Changelog {
                  /// The generic `revlog` format.
                  pub(crate) revlog: Revlog,
              }
              impl Changelog {
                  /// Open the `changelog` of a repository given by its root.
                  pub fn open(
                      store_vfs: &Vfs,
                      options: RevlogOpenOptions,
                  ) -> Result<Self, HgError> {
                      let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
                      Ok(Self { revlog })
                  }
                  /// Return the `ChangelogRevisionData` for the given node ID.
                  pub fn data_for_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<ChangelogRevisionData, RevlogError> {
                      let rev = self.revlog.rev_from_node(node)?;
                      self.entry_for_checked_rev(rev)?.data()
                  }
                  /// Return the [`ChangelogEntry`] for the given revision number.
                  pub fn entry_for_rev(
                      &self,
                      rev: UncheckedRevision,
                  ) -> Result<ChangelogEntry, RevlogError> {
                      let revlog_entry = self.revlog.get_entry(rev)?;
                      Ok(ChangelogEntry { revlog_entry })
                  }
                  /// Same as [`Self::entry_for_rev`] for checked revisions.
                  fn entry_for_checked_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<ChangelogEntry, RevlogError> {
                      let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
                      Ok(ChangelogEntry { revlog_entry })
                  }
                  /// Return the [`ChangelogRevisionData`] for the given revision number.
                  ///
                  /// This is a useful shortcut in case the caller does not need the
                  /// generic revlog information (parents, hashes etc). Otherwise
                  /// consider taking a [`ChangelogEntry`] with
                  /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
                  pub fn data_for_rev(
                      &self,
                      rev: UncheckedRevision,
                  ) -> Result<ChangelogRevisionData, RevlogError> {
                      self.entry_for_rev(rev)?.data()
                  }
                  pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                      self.revlog.node_from_rev(rev)
                  }
                  pub fn rev_from_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      self.revlog.rev_from_node(node)
                  }
              }
              impl Graph for Changelog {
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                      self.revlog.parents(rev)
                  }
              }
              /// A specialized `RevlogEntry` for `changelog` data format
              ///
              /// This is a `RevlogEntry` with the added semantics that the associated
              /// data should meet the requirements for `changelog`, materialized by
              /// the fact that `data()` constructs a `ChangelogRevisionData`.
              /// In case that promise would be broken, the `data` method returns an error.
              #[derive(Clone)]
              pub struct ChangelogEntry<'changelog> {
                  /// Same data, as a generic `RevlogEntry`.
                  pub(crate) revlog_entry: RevlogEntry<'changelog>,
              }
              impl<'changelog> ChangelogEntry<'changelog> {
                  pub fn data<'a>(
                      &'a self,
                  ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
                      let bytes = self.revlog_entry.data()?;
                      if bytes.is_empty() {
                          Ok(ChangelogRevisionData::null())
                      } else {
                          Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                              RevlogError::Other(HgError::CorruptedRepository(format!(
                                  "Invalid changelog data for revision {}: {:?}",
                                  self.revlog_entry.revision(),
                                  err
                              )))
                          })?)
                      }
                  }
                  /// Obtain a reference to the underlying `RevlogEntry`.
                  ///
                  /// This allows the caller to access the information that is common
                  /// to all revlog entries: revision number, node id, parent revisions etc.
                  pub fn as_revlog_entry(&self) -> &RevlogEntry {
                      &self.revlog_entry
                  }
                  pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                      Ok(self
                          .revlog_entry
                          .p1_entry()?
                          .map(|revlog_entry| Self { revlog_entry }))
                  }
                  pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                      Ok(self
                          .revlog_entry
                          .p2_entry()?
                          .map(|revlog_entry| Self { revlog_entry }))
                  }
              }
              /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
              #[derive(PartialEq)]
              pub struct ChangelogRevisionData<'changelog> {
                  /// The data bytes of the `changelog` entry.
                  bytes: Cow<'changelog, [u8]>,
                  /// The end offset for the hex manifest (not including the newline)
                  manifest_end: usize,
                  /// The end offset for the user+email (not including the newline)
                  user_end: usize,
                  /// The end offset for the timestamp+timezone+extras (not including the
                  /// newline)
                  timestamp_end: usize,
                  /// The end offset for the file list (not including the newline)
                  files_end: usize,
              }
              impl<'changelog> ChangelogRevisionData<'changelog> {
                  fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                      let mut line_iter = bytes.split(|b| b == &b'\n');
                      let manifest_end = line_iter
                          .next()
                          .expect("Empty iterator from split()?")
                          .len();
                      let user_slice = line_iter.next().ok_or_else(|| {
                          HgError::corrupted("Changeset data truncated after manifest line")
                      })?;
                      let user_end = manifest_end + 1 + user_slice.len();
                      let timestamp_slice = line_iter.next().ok_or_else(|| {
                          HgError::corrupted("Changeset data truncated after user line")
                      })?;
                      let timestamp_end = user_end + 1 + timestamp_slice.len();
                      let mut files_end = timestamp_end + 1;
                      loop {
                          let line = line_iter.next().ok_or_else(|| {
                              HgError::corrupted("Changeset data truncated in files list")
                          })?;
                          if line.is_empty() {
                              if files_end == bytes.len() {
                                  // The list of files ended with a single newline (there
                                  // should be two)
                                  return Err(HgError::corrupted(
                                      "Changeset data truncated after files list",
                                  ));
                              }
                              files_end -= 1;
                              break;
                          }
                          files_end += line.len() + 1;
                      }
                      Ok(Self {
                          bytes,
                          manifest_end,
                          user_end,
                          timestamp_end,
                          files_end,
                      })
                  }
                  fn null() -> Self {
                      Self::new(Cow::Borrowed(
                          b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                      ))
                      .unwrap()
                  }
                  /// Return an iterator over the lines of the entry.
                  pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                      self.bytes.split(|b| b == &b'\n')
                  }
                  /// Return the node id of the `manifest` referenced by this `changelog`
                  /// entry.
                  pub fn manifest_node(&self) -> Result<Node, HgError> {
                      let manifest_node_hex = &self.bytes[..self.manifest_end];
                      Node::from_hex_for_repo(manifest_node_hex)
                  }
                  /// The full user string (usually a name followed by an email enclosed in
                  /// angle brackets)
                  pub fn user(&self) -> &[u8] {
                      &self.bytes[self.manifest_end + 1..self.user_end]
                  }
                  /// The full timestamp line (timestamp in seconds, offset in seconds, and
                  /// possibly extras)
                  // TODO: We should expose this in a more useful way
                  pub fn timestamp_line(&self) -> &[u8] {
                      &self.bytes[self.user_end + 1..self.timestamp_end]
                  }
-                 /// Parsed timestamp line, including optional extras.
-                 pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {
-                     TimestampAndExtra::from_bytes(self.timestamp_line())
+                 /// Parsed timestamp.
+                 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
+                     parse_timestamp(self.timestamp_line())
+                 }
+                 /// Optional commit extras.
+                 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+                     parse_timestamp_line_extra(self.timestamp_line())
                  }
                  /// The files changed in this revision.
                  pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                      if self.timestamp_end == self.files_end {
                          Either::Left(iter::empty())
                      } else {
                          Either::Right(
                              self.bytes[self.timestamp_end + 1..self.files_end]
                                  .split(|b| b == &b'\n')
                                  .map(HgPath::new),
                          )
                      }
                  }
                  /// The change description.
                  pub fn description(&self) -> &[u8] {
                      &self.bytes[self.files_end + 2..]
                  }
              }
              impl Debug for ChangelogRevisionData<'_> {
                  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                      f.debug_struct("ChangelogRevisionData")
                          .field("bytes", &debug_bytes(&self.bytes))
                          .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                          .field(
                              "user",
                              &debug_bytes(
                                  &self.bytes[self.manifest_end + 1..self.user_end],
                              ),
                          )
                          .field(
                              "timestamp",
                              &debug_bytes(
                                  &self.bytes[self.user_end + 1..self.timestamp_end],
                              ),
                          )
                          .field(
                              "files",
                              &debug_bytes(
                                  &self.bytes[self.timestamp_end + 1..self.files_end],
                              ),
                          )
                          .field(
                              "description",
                              &debug_bytes(&self.bytes[self.files_end + 2..]),
                          )
                          .finish()
                  }
              }
              fn debug_bytes(bytes: &[u8]) -> String {
                  String::from_utf8_lossy(
                      &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                  )
                  .to_string()
              }
-             /// Parsed timestamp line, including the timestamp and optional extras.
-             #[derive(Clone, Debug)]
-             pub struct TimestampAndExtra {
-                 pub timestamp: DateTime<FixedOffset>,
-                 pub extra: BTreeMap<String, Vec<u8>>,
+             }
+             /// Parse the raw bytes of the timestamp line from a changelog entry.
+             ///
+             /// According to the documentation in `hg help dates` and the
+             /// implementation in `changelog.py`, the format of the timestamp line
+             /// is `time tz extra\n` where:
+             ///
+             /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
+             ///   as seconds since the UNIX epoch.
+             ///
+             /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
+             ///   seconds WEST of UTC (so negative for timezones east of UTC, which is the
+             ///   opposite of the sign in ISO 8601 timestamps).
+             ///
+             /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
+             ///   and value in each pair separated by an ASCII colon. Keys are limited to
+             ///   ASCII letters, digits, hyphens, and underscores, whereas values can be
+             ///   arbitrary bytes.
+             fn parse_timestamp(
+                 timestamp_line: &[u8],
+             ) -> Result<DateTime<FixedOffset>, HgError> {
+                 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
-             impl TimestampAndExtra {
-                 /// Parse the raw bytes of the timestamp line from a changelog entry.
-                 ///
-                 /// According to the documentation in `hg help dates` and the
-                 /// implementation in `changelog.py`, the format of the timestamp line
-                 /// is `time tz extra\n` where:
-                 ///
-                 /// - `time` is an ASCII-encoded signed int or float denoting a UTC
-                 ///   timestamp as seconds since the UNIX epoch.
-                 ///
-                 /// - `tz` is the timezone offset as an ASCII-encoded signed integer
-                 ///   denoting seconds WEST of UTC (so negative for timezones east of UTC,
-                 ///   which is the opposite of the sign in ISO 8601 timestamps).
-                 ///
-                 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the
-                 ///   key and value in each pair separated by an ASCII colon. Keys are
-                 ///   limited to ASCII letters, digits, hyphens, and underscores, whereas
-                 ///   values can be arbitrary bytes.
-                 fn from_bytes(line: &[u8]) -> Result<Self, HgError> {
-                     let mut parts = line.splitn(3, |c| *c == b' ');
-                     let timestamp_bytes = parts
-                         .next()
-                         .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
-                     let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
-                         HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
-                     })?;
-                     let timestamp_utc = timestamp_str
-                         .parse()
-                         .map_err(|e| {
-                             HgError::corrupted(format!("failed to parse timestamp: {e}"))
+                 let timestamp_bytes = parts
+                     .next()
+                     .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
+                 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
+                     HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
+                 })?;
+                 let timestamp_utc = timestamp_str
+                     .parse()
+                     .map_err(|e| {
+                         HgError::corrupted(format!("failed to parse timestamp: {e}"))
+                     })
+                     .and_then(|secs| {
+                         NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
+                             HgError::corrupted(format!(
+                                 "integer timestamp out of valid range: {secs}"
+                             ))
                          })
-                         .and_then(|secs| {
-                             NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
-                                 HgError::corrupted(format!(
-                                     "integer timestamp out of valid range: {secs}"
-                                 ))
-                             })
-                         })
-                         // Attempt to parse the timestamp as a float if we can't parse
-                         // it as an int. It doesn't seem like float timestamps are actually
-                         // used in practice, but the Python code supports them.
-                         .or_else(|_| parse_float_timestamp(timestamp_str))?;
+                     })
+                     // Attempt to parse the timestamp as a float if we can't parse
+                     // it as an int. It doesn't seem like float timestamps are actually
+                     // used in practice, but the Python code supports them.
+                     .or_else(|_| parse_float_timestamp(timestamp_str))?;
-                     let timezone_bytes = parts
-                         .next()
-                         .ok_or_else(|| HgError::corrupted("missing timezone"))?;
-                     let timezone_secs: i32 = str::from_utf8(timezone_bytes)
-                         .map_err(|e| {
-                             HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
-                         })?
-                         .parse()
-                         .map_err(|e| {
-                             HgError::corrupted(format!("timezone is not an integer: {e}"))
-                         })?;
-                     let timezone =
-                         FixedOffset::west_opt(timezone_secs).ok_or_else(|| {
-                             HgError::corrupted("timezone offset out of bounds")
-                         })?;
+                 let timezone_bytes = parts
+                     .next()
+                     .ok_or_else(|| HgError::corrupted("missing timezone"))?;
+                 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
+                     .map_err(|e| {
+                         HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
+                     })?
+                     .parse()
+                     .map_err(|e| {
+                         HgError::corrupted(format!("timezone is not an integer: {e}"))
+                     })?;
+                 let timezone = FixedOffset::west_opt(timezone_secs)
+                     .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
-                     let timestamp =
-                         DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);
-                     let extra = parts
-                         .next()
-                         .map(parse_extra)
-                         .transpose()?
-                         .unwrap_or_default();
-                     Ok(Self { timestamp, extra })
+                 }
+                 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
              }
              /// Attempt to parse the given string as floating-point timestamp, and
              /// convert the result into a `chrono::NaiveDateTime`.
              fn parse_float_timestamp(
                  timestamp_str: &str,
              ) -> Result<NaiveDateTime, HgError> {
                  let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
                      HgError::corrupted(format!("failed to parse timestamp: {e}"))
                  })?;
                  // To construct a `NaiveDateTime` we'll need to convert the float
                  // into signed integer seconds and unsigned integer nanoseconds.
                  let mut secs = timestamp.trunc() as i64;
                  let mut subsecs = timestamp.fract();
                  // If the timestamp is negative, we need to express the fractional
                  // component as positive nanoseconds since the previous second.
                  if timestamp < 0.0 {
                      secs -= 1;
                      subsecs += 1.0;
                  }
                  // This cast should be safe because the fractional component is
                  // by definition less than 1.0, so this value should not exceed
                  // 1 billion, which is representable as an f64 without loss of
                  // precision and should fit into a u32 without overflowing.
                  //
                  // (Any loss of precision in the fractional component will have
                  // already happened at the time of initial parsing; in general,
                  // f64s are insufficiently precise to provide nanosecond-level
                  // precision with present-day timestamps.)
                  let nsecs = (subsecs * 1_000_000_000.0) as u32;
                  NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
                      HgError::corrupted(format!(
                          "float timestamp out of valid range: {timestamp}"
                      ))
                  })
              }
-             /// Parse the "extra" fields from a changeset's timestamp line.
+             /// Decode changeset extra fields.
              ///
              /// Extras are null-delimited key-value pairs where the key consists of ASCII
              /// alphanumeric characters plus hyphens and underscores, and the value can
              /// contain arbitrary bytes.
-             fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+             fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                  extra
                      .split(|c| *c == b'\0')
                      .map(|pair| {
                          let pair = unescape_extra(pair);
                          let mut iter = pair.splitn(2, |c| *c == b':');
                          let key_bytes =
                              iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
                                  HgError::corrupted("empty key in changeset extras")
                              })?;
                          let key = str::from_utf8(key_bytes)
                              .ok()
                              .filter(|k| {
                                  k.chars().all(|c| {
                                      c.is_ascii_alphanumeric() || c == '_' || c == '-'
                                  })
                              })
                              .ok_or_else(|| {
                                  let key = String::from_utf8_lossy(key_bytes);
                                  HgError::corrupted(format!(
                                      "invalid key in changeset extras: {key}",
                                  ))
                              })?
                              .to_string();
                          let value = iter.next().map(Into::into).ok_or_else(|| {
                              HgError::corrupted(format!(
                                  "missing value for changeset extra: {key}"
                              ))
                          })?;
                          Ok((key, value))
                      })
                      .collect()
              }
+             /// Parse the extra fields from a changeset's timestamp line.
+             fn parse_timestamp_line_extra(
+                 timestamp_line: &[u8],
+             ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+                 Ok(timestamp_line
+                     .splitn(3, |c| *c == b' ')
+                     .nth(2)
+                     .map(decode_extra)
+                     .transpose()?
+                     .unwrap_or_default())
+             }
              /// Decode Mercurial's escaping for changelog extras.
              ///
              /// The `_string_escape` function in `changelog.py` only escapes 4 characters
              /// (null, backslash, newline, and carriage return) so we only decode those.
              ///
              /// The Python code also includes a workaround for decoding escaped nuls
              /// that are followed by an ASCII octal digit, since Python's built-in
              /// `string_escape` codec will interpret that as an escaped octal byte value.
              /// That workaround is omitted here since we don't support decoding octal.
              fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
                  let mut output = Vec::with_capacity(bytes.len());
                  let mut input = bytes.iter().copied();
                  while let Some(c) = input.next() {
                      if c != b'\\' {
                          output.push(c);
                          continue;
                      }
                      match input.next() {
                          Some(b'0') => output.push(b'\0'),
                          Some(b'\\') => output.push(b'\\'),
                          Some(b'n') => output.push(b'\n'),
                          Some(b'r') => output.push(b'\r'),
                          // The following cases should never occur in theory because any
                          // backslashes in the original input should have been escaped
                          // with another backslash, so it should not be possible to
                          // observe an escape sequence other than the 4 above.
                          Some(c) => output.extend_from_slice(&[b'\\', c]),
                          None => output.push(b'\\'),
                      }
                  }
                  output
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::vfs::Vfs;
                  use crate::NULL_REVISION;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn test_create_changelogrevisiondata_invalid() {
                      // Completely empty
                      assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                      // No newline after manifest
                      assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                      // No newline after user
                      assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                      // No newline after timestamp
                      assert!(
                          ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                      );
                      // Missing newline after files
                      assert!(ChangelogRevisionData::new(Cow::Borrowed(
                          b"abcd\n\n0 0\nfile1\nfile2"
                      ))
                      .is_err(),);
                      // Only one newline after files
                      assert!(ChangelogRevisionData::new(Cow::Borrowed(
                          b"abcd\n\n0 0\nfile1\nfile2\n"
                      ))
                      .is_err(),);
                  }
                  #[test]
                  fn test_create_changelogrevisiondata() {
                      let data = ChangelogRevisionData::new(Cow::Borrowed(
                          b"0123456789abcdef0123456789abcdef01234567
              Some One <someone@example.com>
 0
              file1
              file2
              some
              commit
              message",
                      ))
                      .unwrap();
                      assert_eq!(
                          data.manifest_node().unwrap(),
                          Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                              .unwrap()
                      );
                      assert_eq!(data.user(), b"Some One <someone@example.com>");
                      assert_eq!(data.timestamp_line(), b"0 0");
                      assert_eq!(
                          data.files().collect_vec(),
                          vec![HgPath::new("file1"), HgPath::new("file2")]
                      );
                      assert_eq!(data.description(), b"some\ncommit\nmessage");
                  }
                  #[test]
                  fn test_data_from_rev_null() -> Result<(), RevlogError> {
                      // an empty revlog will be enough for this case
                      let temp = tempfile::tempdir().unwrap();
                      let vfs = Vfs { base: temp.path() };
                      std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                      let revlog =
                          Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
                              .unwrap();
                      let changelog = Changelog { revlog };
                      assert_eq!(
                          changelog.data_for_rev(NULL_REVISION.into())?,
                          ChangelogRevisionData::null()
                      );
                      // same with the intermediate entry object
                      assert_eq!(
                          changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
                          ChangelogRevisionData::null()
                      );
                      Ok(())
                  }
                  #[test]
                  fn test_empty_files_list() {
                      assert!(ChangelogRevisionData::null()
                          .files()
                          .collect_vec()
                          .is_empty());
                  }
                  #[test]
                  fn test_unescape_basic() {
                      // '\0', '\\', '\n', and '\r' are correctly unescaped.
                      let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
                      let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
                      let unescaped = unescape_extra(escaped);
                      assert_eq!(&expected[..], &unescaped[..]);
                  }
                  #[test]
                  fn test_unescape_unsupported_sequence() {
                      // Other escape sequences are left unaltered.
                      for c in 0u8..255 {
                          match c {
                              b'0' | b'\\' | b'n' | b'r' => continue,
                              c => {
                                  let expected = &[b'\\', c][..];
                                  let unescaped = unescape_extra(expected);
                                  assert_eq!(expected, &unescaped[..]);
                              }
                          }
                      }
                  }
                  #[test]
                  fn test_unescape_trailing_backslash() {
                      // Trailing backslashes are OK.
                      let expected = br"hi\";
                      let unescaped = unescape_extra(expected);
                      assert_eq!(&expected[..], &unescaped[..]);
                  }
                  #[test]
                  fn test_unescape_nul_followed_by_octal() {
                      // Escaped NUL chars followed by octal digits are decoded correctly.
                      let expected = b"\012";
                      let escaped = br"\012";
                      let unescaped = unescape_extra(escaped);
                      assert_eq!(&expected[..], &unescaped[..]);
                  }
                  #[test]
                  fn test_parse_float_timestamp() {
                      let test_cases = [
                          // Zero should map to the UNIX epoch.
                          ("0.0", "1970-01-01 00:00:00"),
                          // Negative zero should be the same as positive zero.
                          ("-0.0", "1970-01-01 00:00:00"),
                          // Values without fractional components should work like integers.
                          // (Assuming the timestamp is within the limits of f64 precision.)
                          ("1115154970.0", "2005-05-03 21:16:10"),
                          // We expect some loss of precision in the fractional component
                          // when parsing arbitrary floating-point values.
                          ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
                          // But representable f64 values should parse losslessly.
                          ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
                          // Negative fractional components are subtracted from the epoch.
                          ("-1.333", "1969-12-31 23:59:58.667"),
                      ];
                      for (input, expected) in test_cases {
                          let res = parse_float_timestamp(input).unwrap().to_string();
                          assert_eq!(res, expected);
                      }
                  }
                  fn escape_extra(bytes: &[u8]) -> Vec<u8> {
                      let mut output = Vec::with_capacity(bytes.len());
                      for c in bytes.iter().copied() {
                          output.extend_from_slice(match c {
                              b'\0' => &b"\\0"[..],
                              b'\\' => &b"\\\\"[..],
                              b'\n' => &b"\\n"[..],
                              b'\r' => &b"\\r"[..],
                              _ => {
                                  output.push(c);
                                  continue;
                              }
                          });
                      }
                      output
                  }
                  fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
                  where
                      K: AsRef<[u8]>,
                      V: AsRef<[u8]>,
                  {
                      let extras = pairs.into_iter().map(|(k, v)| {
                          escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
                      });
                      // Use fully-qualified syntax to avoid a future naming conflict with
                      // the standard library: https://github.com/rust-lang/rust/issues/79524
                      Itertools::intersperse(extras, b"\0".to_vec()).concat()
                  }
                  #[test]
-                 fn test_parse_extra() {
+                 fn test_decode_extra() {
                      let extra = [
                          ("branch".into(), b"default".to_vec()),
                          ("key-with-hyphens".into(), b"value1".to_vec()),
                          ("key_with_underscores".into(), b"value2".to_vec()),
                          ("empty-value".into(), b"".to_vec()),
                          ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                      ]
                      .into_iter()
                      .collect::<BTreeMap<String, Vec<u8>>>();
                      let encoded = encode_extra(&extra);
-                     let parsed = parse_extra(&encoded).unwrap();
+                     let decoded = decode_extra(&encoded).unwrap();
-                     assert_eq!(extra, parsed);
+                     assert_eq!(extra, decoded);
                  }
                  #[test]
                  fn test_corrupt_extra() {
                      let test_cases = [
                          (&b""[..], "empty input"),
                          (&b"\0"[..], "unexpected null byte"),
                          (&b":empty-key"[..], "empty key"),
                          (&b"\0leading-null:"[..], "leading null"),
                          (&b"trailing-null:\0"[..], "trailing null"),
                          (&b"missing-value"[..], "missing value"),
                          (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
                          (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
                      ];
                      for (extra, msg) in test_cases {
                          assert!(
-                             parse_extra(&extra).is_err(),
+                             decode_extra(&extra).is_err(),
                              "corrupt extra should have failed to parse: {}",
                              msg
                          );
                      }
                  }
                  #[test]
                  fn test_parse_timestamp_line() {
                      let extra = [
                          ("branch".into(), b"default".to_vec()),
                          ("key-with-hyphens".into(), b"value1".to_vec()),
                          ("key_with_underscores".into(), b"value2".to_vec()),
                          ("empty-value".into(), b"".to_vec()),
                          ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                      ]
                      .into_iter()
                      .collect::<BTreeMap<String, Vec<u8>>>();
                      let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
                      line.extend_from_slice(&encode_extra(&extra));
-                     let parsed = TimestampAndExtra::from_bytes(&line).unwrap();
+                     let timestamp = parse_timestamp(&line).unwrap();
+                     assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
-                     assert_eq!(
-                         &parsed.timestamp.to_rfc3339(),
-                         "2005-05-03T13:16:10-08:00"
-                     );
-                     assert_eq!(extra, parsed.extra);
+                     let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
+                     assert_eq!(extra, parsed_extra);
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages