upstream/mercurial-mirror Files · rust/hg-core/src/revlog/changelog.rs

hg-core: implement timestamp line parsing

Arun Kulshreshtha - - Load All Authors

File last commit:

r52284:a96ed440 default


                r52284:a96ed440

default

Download file

             changelog.rs
        
                    746 lines
            
             | 25.3 KiB
            
                | application/rls-services+xml
            
             |
                RustLexer
            
             / rust / hg-core / src / revlog / changelog.rs
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      use std::ascii::escape_default;

      use std::borrow::Cow;

      use std::collections::BTreeMap;

      use std::fmt::{Debug, Formatter};

      use std::{iter, str};

      use chrono::{DateTime, FixedOffset, NaiveDateTime};

      use itertools::{Either, Itertools};

      use crate::errors::HgError;

      use crate::revlog::Revision;

      use crate::revlog::{Node, NodePrefix};

      use crate::revlog::{Revlog, RevlogEntry, RevlogError};

      use crate::utils::hg_path::HgPath;

      use crate::vfs::Vfs;

      use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

      /// A specialized `Revlog` to work with changelog data format.

      pub struct Changelog {

          /// The generic `revlog` format.

          pub(crate) revlog: Revlog,

      }

      impl Changelog {

          /// Open the `changelog` of a repository given by its root.

          pub fn open(

              store_vfs: &Vfs,

              options: RevlogOpenOptions,

          ) -> Result<Self, HgError> {

              let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

              Ok(Self { revlog })

          }

          /// Return the `ChangelogRevisionData` for the given node ID.

          pub fn data_for_node(

              &self,

              node: NodePrefix,

          ) -> Result<ChangelogRevisionData, RevlogError> {

              let rev = self.revlog.rev_from_node(node)?;

              self.entry_for_checked_rev(rev)?.data()

          }

          /// Return the [`ChangelogEntry`] for the given revision number.

          pub fn entry_for_rev(

              &self,

              rev: UncheckedRevision,

          ) -> Result<ChangelogEntry, RevlogError> {

              let revlog_entry = self.revlog.get_entry(rev)?;

              Ok(ChangelogEntry { revlog_entry })

          }

          /// Same as [`Self::entry_for_rev`] for checked revisions.

          fn entry_for_checked_rev(

              &self,

              rev: Revision,

          ) -> Result<ChangelogEntry, RevlogError> {

              let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

              Ok(ChangelogEntry { revlog_entry })

          }

          /// Return the [`ChangelogRevisionData`] for the given revision number.

          ///

          /// This is a useful shortcut in case the caller does not need the

          /// generic revlog information (parents, hashes etc). Otherwise

          /// consider taking a [`ChangelogEntry`] with

          /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

          pub fn data_for_rev(

              &self,

              rev: UncheckedRevision,

          ) -> Result<ChangelogRevisionData, RevlogError> {

              self.entry_for_rev(rev)?.data()

          }

          pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

              self.revlog.node_from_rev(rev)

          }

          pub fn rev_from_node(

              &self,

              node: NodePrefix,

          ) -> Result<Revision, RevlogError> {

              self.revlog.rev_from_node(node)

          }

      }

      impl Graph for Changelog {

          fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

              self.revlog.parents(rev)

          }

      }

      /// A specialized `RevlogEntry` for `changelog` data format

      ///

      /// This is a `RevlogEntry` with the added semantics that the associated

      /// data should meet the requirements for `changelog`, materialized by

      /// the fact that `data()` constructs a `ChangelogRevisionData`.

      /// In case that promise would be broken, the `data` method returns an error.

      #[derive(Clone)]

      pub struct ChangelogEntry<'changelog> {

          /// Same data, as a generic `RevlogEntry`.

          pub(crate) revlog_entry: RevlogEntry<'changelog>,

      }

      impl<'changelog> ChangelogEntry<'changelog> {

          pub fn data<'a>(

              &'a self,

          ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

              let bytes = self.revlog_entry.data()?;

              if bytes.is_empty() {

                  Ok(ChangelogRevisionData::null())

              } else {

                  Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

                      RevlogError::Other(HgError::CorruptedRepository(format!(

                          "Invalid changelog data for revision {}: {:?}",

                          self.revlog_entry.revision(),

                          err

                      )))

                  })?)

              }

          }

          /// Obtain a reference to the underlying `RevlogEntry`.

          ///

          /// This allows the caller to access the information that is common

          /// to all revlog entries: revision number, node id, parent revisions etc.

          pub fn as_revlog_entry(&self) -> &RevlogEntry {

              &self.revlog_entry

          }

          pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

              Ok(self

                  .revlog_entry

                  .p1_entry()?

                  .map(|revlog_entry| Self { revlog_entry }))

          }

          pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

              Ok(self

                  .revlog_entry

                  .p2_entry()?

                  .map(|revlog_entry| Self { revlog_entry }))

          }

      }

      /// `Changelog` entry which knows how to interpret the `changelog` data bytes.

      #[derive(PartialEq)]

      pub struct ChangelogRevisionData<'changelog> {

          /// The data bytes of the `changelog` entry.

          bytes: Cow<'changelog, [u8]>,

          /// The end offset for the hex manifest (not including the newline)

          manifest_end: usize,

          /// The end offset for the user+email (not including the newline)

          user_end: usize,

          /// The end offset for the timestamp+timezone+extras (not including the

          /// newline)

          timestamp_end: usize,

          /// The end offset for the file list (not including the newline)

          files_end: usize,

      }

      impl<'changelog> ChangelogRevisionData<'changelog> {

          fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

              let mut line_iter = bytes.split(|b| b == &b'\n');

              let manifest_end = line_iter

                  .next()

                  .expect("Empty iterator from split()?")

                  .len();

              let user_slice = line_iter.next().ok_or_else(|| {

                  HgError::corrupted("Changeset data truncated after manifest line")

              })?;

              let user_end = manifest_end + 1 + user_slice.len();

              let timestamp_slice = line_iter.next().ok_or_else(|| {

                  HgError::corrupted("Changeset data truncated after user line")

              })?;

              let timestamp_end = user_end + 1 + timestamp_slice.len();

              let mut files_end = timestamp_end + 1;

              loop {

                  let line = line_iter.next().ok_or_else(|| {

                      HgError::corrupted("Changeset data truncated in files list")

                  })?;

                  if line.is_empty() {

                      if files_end == bytes.len() {

                          // The list of files ended with a single newline (there

                          // should be two)

                          return Err(HgError::corrupted(

                              "Changeset data truncated after files list",

                          ));

                      }

                      files_end -= 1;

                      break;

                  }

                  files_end += line.len() + 1;

              }

              Ok(Self {

                  bytes,

                  manifest_end,

                  user_end,

                  timestamp_end,

                  files_end,

              })

          }

          fn null() -> Self {

              Self::new(Cow::Borrowed(

                  b"0000000000000000000000000000000000000000\n\n0 0\n\n",

              ))

              .unwrap()

          }

          /// Return an iterator over the lines of the entry.

          pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

              self.bytes.split(|b| b == &b'\n')

          }

          /// Return the node id of the `manifest` referenced by this `changelog`

          /// entry.

          pub fn manifest_node(&self) -> Result<Node, HgError> {

              let manifest_node_hex = &self.bytes[..self.manifest_end];

              Node::from_hex_for_repo(manifest_node_hex)

          }

          /// The full user string (usually a name followed by an email enclosed in

          /// angle brackets)

          pub fn user(&self) -> &[u8] {

              &self.bytes[self.manifest_end + 1..self.user_end]

          }

          /// The full timestamp line (timestamp in seconds, offset in seconds, and

          /// possibly extras)

          // TODO: We should expose this in a more useful way

          pub fn timestamp_line(&self) -> &[u8] {

              &self.bytes[self.user_end + 1..self.timestamp_end]

          }

          /// Parsed timestamp line, including optional extras.

          pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {

              TimestampAndExtra::from_bytes(self.timestamp_line())

          }

          /// The files changed in this revision.

          pub fn files(&self) -> impl Iterator<Item = &HgPath> {

              if self.timestamp_end == self.files_end {

                  Either::Left(iter::empty())

              } else {

                  Either::Right(

                      self.bytes[self.timestamp_end + 1..self.files_end]

                          .split(|b| b == &b'\n')

                          .map(HgPath::new),

                  )

              }

          }

          /// The change description.

          pub fn description(&self) -> &[u8] {

              &self.bytes[self.files_end + 2..]

          }

      }

      impl Debug for ChangelogRevisionData<'_> {

          fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

              f.debug_struct("ChangelogRevisionData")

                  .field("bytes", &debug_bytes(&self.bytes))

                  .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

                  .field(

                      "user",

                      &debug_bytes(

                          &self.bytes[self.manifest_end + 1..self.user_end],

                      ),

                  )

                  .field(

                      "timestamp",

                      &debug_bytes(

                          &self.bytes[self.user_end + 1..self.timestamp_end],

                      ),

                  )

                  .field(

                      "files",

                      &debug_bytes(

                          &self.bytes[self.timestamp_end + 1..self.files_end],

                      ),

                  )

                  .field(

                      "description",

                      &debug_bytes(&self.bytes[self.files_end + 2..]),

                  )

                  .finish()

          }

      }

      fn debug_bytes(bytes: &[u8]) -> String {

          String::from_utf8_lossy(

              &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

          )

          .to_string()

      }

      /// Parsed timestamp line, including the timestamp and optional extras.

      #[derive(Clone, Debug)]

      pub struct TimestampAndExtra {

          pub timestamp: DateTime<FixedOffset>,

          pub extra: BTreeMap<String, Vec<u8>>,

      }

      impl TimestampAndExtra {

          /// Parse the raw bytes of the timestamp line from a changelog entry.

          ///

          /// According to the documentation in `hg help dates` and the

          /// implementation in `changelog.py`, the format of the timestamp line

          /// is `time tz extra\n` where:

          ///

          /// - `time` is an ASCII-encoded signed int or float denoting a UTC

          ///   timestamp as seconds since the UNIX epoch.

          ///

          /// - `tz` is the timezone offset as an ASCII-encoded signed integer

          ///   denoting seconds WEST of UTC (so negative for timezones east of UTC,

          ///   which is the opposite of the sign in ISO 8601 timestamps).

          ///

          /// - `extra` is an optional set of NUL-delimited key-value pairs, with the

          ///   key and value in each pair separated by an ASCII colon. Keys are

          ///   limited to ASCII letters, digits, hyphens, and underscores, whereas

          ///   values can be arbitrary bytes.

          fn from_bytes(line: &[u8]) -> Result<Self, HgError> {

              let mut parts = line.splitn(3, |c| *c == b' ');

              let timestamp_bytes = parts

                  .next()

                  .ok_or_else(|| HgError::corrupted("missing timestamp"))?;

              let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

                  HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

              })?;

              let timestamp_utc = timestamp_str

                  .parse()

                  .map_err(|e| {

                      HgError::corrupted(format!("failed to parse timestamp: {e}"))

                  })

                  .and_then(|secs| {

                      NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {

                          HgError::corrupted(format!(

                              "integer timestamp out of valid range: {secs}"

                          ))

                      })

                  })

                  // Attempt to parse the timestamp as a float if we can't parse

                  // it as an int. It doesn't seem like float timestamps are actually

                  // used in practice, but the Python code supports them.

                  .or_else(|_| parse_float_timestamp(timestamp_str))?;

              let timezone_bytes = parts

                  .next()

                  .ok_or_else(|| HgError::corrupted("missing timezone"))?;

              let timezone_secs: i32 = str::from_utf8(timezone_bytes)

                  .map_err(|e| {

                      HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

                  })?

                  .parse()

                  .map_err(|e| {

                      HgError::corrupted(format!("timezone is not an integer: {e}"))

                  })?;

              let timezone =

                  FixedOffset::west_opt(timezone_secs).ok_or_else(|| {

                      HgError::corrupted("timezone offset out of bounds")

                  })?;

              let timestamp =

                  DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);

              let extra = parts

                  .next()

                  .map(parse_extra)

                  .transpose()?

                  .unwrap_or_default();

              Ok(Self { timestamp, extra })

          }

      }

      /// Attempt to parse the given string as floating-point timestamp, and

      /// convert the result into a `chrono::NaiveDateTime`.

      fn parse_float_timestamp(

          timestamp_str: &str,

      ) -> Result<NaiveDateTime, HgError> {

          let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

              HgError::corrupted(format!("failed to parse timestamp: {e}"))

          })?;

          // To construct a `NaiveDateTime` we'll need to convert the float

          // into signed integer seconds and unsigned integer nanoseconds.

          let mut secs = timestamp.trunc() as i64;

          let mut subsecs = timestamp.fract();

          // If the timestamp is negative, we need to express the fractional

          // component as positive nanoseconds since the previous second.

          if timestamp < 0.0 {

              secs -= 1;

              subsecs += 1.0;

          }

          // This cast should be safe because the fractional component is

          // by definition less than 1.0, so this value should not exceed

          // 1 billion, which is representable as an f64 without loss of

          // precision and should fit into a u32 without overflowing.

          //

          // (Any loss of precision in the fractional component will have

          // already happened at the time of initial parsing; in general,

          // f64s are insufficiently precise to provide nanosecond-level

          // precision with present-day timestamps.)

          let nsecs = (subsecs * 1_000_000_000.0) as u32;

          NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {

              HgError::corrupted(format!(

                  "float timestamp out of valid range: {timestamp}"

              ))

          })

      }

      /// Parse the "extra" fields from a changeset's timestamp line.

      ///

      /// Extras are null-delimited key-value pairs where the key consists of ASCII

      /// alphanumeric characters plus hyphens and underscores, and the value can

      /// contain arbitrary bytes.

      fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

          extra

              .split(|c| *c == b'\0')

              .map(|pair| {

                  let pair = unescape_extra(pair);

                  let mut iter = pair.splitn(2, |c| *c == b':');

                  let key_bytes =

                      iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

                          HgError::corrupted("empty key in changeset extras")

                      })?;

                  let key = str::from_utf8(key_bytes)

                      .ok()

                      .filter(|k| {

                          k.chars().all(|c| {

                              c.is_ascii_alphanumeric() || c == '_' || c == '-'

                          })

                      })

                      .ok_or_else(|| {

                          let key = String::from_utf8_lossy(key_bytes);

                          HgError::corrupted(format!(

                              "invalid key in changeset extras: {key}",

                          ))

                      })?

                      .to_string();

                  let value = iter.next().map(Into::into).ok_or_else(|| {

                      HgError::corrupted(format!(

                          "missing value for changeset extra: {key}"

                      ))

                  })?;

                  Ok((key, value))

              })

              .collect()

      }

      /// Decode Mercurial's escaping for changelog extras.

      ///

      /// The `_string_escape` function in `changelog.py` only escapes 4 characters

      /// (null, backslash, newline, and carriage return) so we only decode those.

      ///

      /// The Python code also includes a workaround for decoding escaped nuls

      /// that are followed by an ASCII octal digit, since Python's built-in

      /// `string_escape` codec will interpret that as an escaped octal byte value.

      /// That workaround is omitted here since we don't support decoding octal.

      fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

          let mut output = Vec::with_capacity(bytes.len());

          let mut input = bytes.iter().copied();

          while let Some(c) = input.next() {

              if c != b'\\' {

                  output.push(c);

                  continue;

              }

              match input.next() {

                  Some(b'0') => output.push(b'\0'),

                  Some(b'\\') => output.push(b'\\'),

                  Some(b'n') => output.push(b'\n'),

                  Some(b'r') => output.push(b'\r'),

                  // The following cases should never occur in theory because any

                  // backslashes in the original input should have been escaped

                  // with another backslash, so it should not be possible to

                  // observe an escape sequence other than the 4 above.

                  Some(c) => output.extend_from_slice(&[b'\\', c]),

                  None => output.push(b'\\'),

              }

          }

          output

      }

      #[cfg(test)]

      mod tests {

          use super::*;

          use crate::vfs::Vfs;

          use crate::NULL_REVISION;

          use pretty_assertions::assert_eq;

          #[test]

          fn test_create_changelogrevisiondata_invalid() {

              // Completely empty

              assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

              // No newline after manifest

              assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

              // No newline after user

              assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

              // No newline after timestamp

              assert!(

                  ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

              );

              // Missing newline after files

              assert!(ChangelogRevisionData::new(Cow::Borrowed(

                  b"abcd\n\n0 0\nfile1\nfile2"

              ))

              .is_err(),);

              // Only one newline after files

              assert!(ChangelogRevisionData::new(Cow::Borrowed(

                  b"abcd\n\n0 0\nfile1\nfile2\n"

              ))

              .is_err(),);

          }

          #[test]

          fn test_create_changelogrevisiondata() {

              let data = ChangelogRevisionData::new(Cow::Borrowed(

                  b"0123456789abcdef0123456789abcdef01234567

      Some One <someone@example.com>

      0 0

      file1

      file2

      some

      commit

      message",

              ))

              .unwrap();

              assert_eq!(

                  data.manifest_node().unwrap(),

                  Node::from_hex("0123456789abcdef0123456789abcdef01234567")

                      .unwrap()

              );

              assert_eq!(data.user(), b"Some One <someone@example.com>");

              assert_eq!(data.timestamp_line(), b"0 0");

              assert_eq!(

                  data.files().collect_vec(),

                  vec![HgPath::new("file1"), HgPath::new("file2")]

              );

              assert_eq!(data.description(), b"some\ncommit\nmessage");

          }

          #[test]

          fn test_data_from_rev_null() -> Result<(), RevlogError> {

              // an empty revlog will be enough for this case

              let temp = tempfile::tempdir().unwrap();

              let vfs = Vfs { base: temp.path() };

              std::fs::write(temp.path().join("foo.i"), b"").unwrap();

              let revlog =

                  Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())

                      .unwrap();

              let changelog = Changelog { revlog };

              assert_eq!(

                  changelog.data_for_rev(NULL_REVISION.into())?,

                  ChangelogRevisionData::null()

              );

              // same with the intermediate entry object

              assert_eq!(

                  changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

                  ChangelogRevisionData::null()

              );

              Ok(())

          }

          #[test]

          fn test_empty_files_list() {

              assert!(ChangelogRevisionData::null()

                  .files()

                  .collect_vec()

                  .is_empty());

          }

          #[test]

          fn test_unescape_basic() {

              // '\0', '\\', '\n', and '\r' are correctly unescaped.

              let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

              let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

              let unescaped = unescape_extra(escaped);

              assert_eq!(&expected[..], &unescaped[..]);

          }

          #[test]

          fn test_unescape_unsupported_sequence() {

              // Other escape sequences are left unaltered.

              for c in 0u8..255 {

                  match c {

                      b'0' | b'\\' | b'n' | b'r' => continue,

                      c => {

                          let expected = &[b'\\', c][..];

                          let unescaped = unescape_extra(expected);

                          assert_eq!(expected, &unescaped[..]);

                      }

                  }

              }

          }

          #[test]

          fn test_unescape_trailing_backslash() {

              // Trailing backslashes are OK.

              let expected = br"hi\";

              let unescaped = unescape_extra(expected);

              assert_eq!(&expected[..], &unescaped[..]);

          }

          #[test]

          fn test_unescape_nul_followed_by_octal() {

              // Escaped NUL chars followed by octal digits are decoded correctly.

              let expected = b"\012";

              let escaped = br"\012";

              let unescaped = unescape_extra(escaped);

              assert_eq!(&expected[..], &unescaped[..]);

          }

          #[test]

          fn test_parse_float_timestamp() {

              let test_cases = [

                  // Zero should map to the UNIX epoch.

                  ("0.0", "1970-01-01 00:00:00"),

                  // Negative zero should be the same as positive zero.

                  ("-0.0", "1970-01-01 00:00:00"),

                  // Values without fractional components should work like integers.

                  // (Assuming the timestamp is within the limits of f64 precision.)

                  ("1115154970.0", "2005-05-03 21:16:10"),

                  // We expect some loss of precision in the fractional component

                  // when parsing arbitrary floating-point values.

                  ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),

                  // But representable f64 values should parse losslessly.

                  ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),

                  // Negative fractional components are subtracted from the epoch.

                  ("-1.333", "1969-12-31 23:59:58.667"),

              ];

              for (input, expected) in test_cases {

                  let res = parse_float_timestamp(input).unwrap().to_string();

                  assert_eq!(res, expected);

              }

          }

          fn escape_extra(bytes: &[u8]) -> Vec<u8> {

              let mut output = Vec::with_capacity(bytes.len());

              for c in bytes.iter().copied() {

                  output.extend_from_slice(match c {

                      b'\0' => &b"\\0"[..],

                      b'\\' => &b"\\\\"[..],

                      b'\n' => &b"\\n"[..],

                      b'\r' => &b"\\r"[..],

                      _ => {

                          output.push(c);

                          continue;

                      }

                  });

              }

              output

          }

          fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

          where

              K: AsRef<[u8]>,

              V: AsRef<[u8]>,

          {

              let extras = pairs.into_iter().map(|(k, v)| {

                  escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

              });

              // Use fully-qualified syntax to avoid a future naming conflict with

              // the standard library: https://github.com/rust-lang/rust/issues/79524

              Itertools::intersperse(extras, b"\0".to_vec()).concat()

          }

          #[test]

          fn test_parse_extra() {

              let extra = [

                  ("branch".into(), b"default".to_vec()),

                  ("key-with-hyphens".into(), b"value1".to_vec()),

                  ("key_with_underscores".into(), b"value2".to_vec()),

                  ("empty-value".into(), b"".to_vec()),

                  ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

              ]

              .into_iter()

              .collect::<BTreeMap<String, Vec<u8>>>();

              let encoded = encode_extra(&extra);

              let parsed = parse_extra(&encoded).unwrap();

              assert_eq!(extra, parsed);

          }

          #[test]

          fn test_corrupt_extra() {

              let test_cases = [

                  (&b""[..], "empty input"),

                  (&b"\0"[..], "unexpected null byte"),

                  (&b":empty-key"[..], "empty key"),

                  (&b"\0leading-null:"[..], "leading null"),

                  (&b"trailing-null:\0"[..], "trailing null"),

                  (&b"missing-value"[..], "missing value"),

                  (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

                  (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

              ];

              for (extra, msg) in test_cases {

                  assert!(

                      parse_extra(&extra).is_err(),

                      "corrupt extra should have failed to parse: {}",

                      msg

                  );

              }

          }

          #[test]

          fn test_parse_timestamp_line() {

              let extra = [

                  ("branch".into(), b"default".to_vec()),

                  ("key-with-hyphens".into(), b"value1".to_vec()),

                  ("key_with_underscores".into(), b"value2".to_vec()),

                  ("empty-value".into(), b"".to_vec()),

                  ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

              ]

              .into_iter()

              .collect::<BTreeMap<String, Vec<u8>>>();

              let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

              line.extend_from_slice(&encode_extra(&extra));

              let parsed = TimestampAndExtra::from_bytes(&line).unwrap();

              assert_eq!(

                  &parsed.timestamp.to_rfc3339(),

                  "2005-05-03T13:16:10-08:00"

              );

              assert_eq!(extra, parsed.extra);

          }

      }

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				use std::ascii::escape_default;
				use std::borrow::Cow;
				use std::collections::BTreeMap;
				use std::fmt::{Debug, Formatter};
				use std::{iter, str};

				use chrono::{DateTime, FixedOffset, NaiveDateTime};
				use itertools::{Either, Itertools};

				use crate::errors::HgError;
				use crate::revlog::Revision;
				use crate::revlog::{Node, NodePrefix};
				use crate::revlog::{Revlog, RevlogEntry, RevlogError};
				use crate::utils::hg_path::HgPath;
				use crate::vfs::Vfs;
				use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

				/// A specialized `Revlog` to work with changelog data format.
				pub struct Changelog {
				/// The generic `revlog` format.
				pub(crate) revlog: Revlog,
				}

				impl Changelog {
				/// Open the `changelog` of a repository given by its root.
				pub fn open(
				store_vfs: &Vfs,
				options: RevlogOpenOptions,
				) -> Result<Self, HgError> {
				let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
				Ok(Self { revlog })
				}

				/// Return the `ChangelogRevisionData` for the given node ID.
				pub fn data_for_node(
				&self,
				node: NodePrefix,
				) -> Result<ChangelogRevisionData, RevlogError> {
				let rev = self.revlog.rev_from_node(node)?;
				self.entry_for_checked_rev(rev)?.data()
				}

				/// Return the [`ChangelogEntry`] for the given revision number.
				pub fn entry_for_rev(
				&self,
				rev: UncheckedRevision,
				) -> Result<ChangelogEntry, RevlogError> {
				let revlog_entry = self.revlog.get_entry(rev)?;
				Ok(ChangelogEntry { revlog_entry })
				}

				/// Same as [`Self::entry_for_rev`] for checked revisions.
				fn entry_for_checked_rev(
				&self,
				rev: Revision,
				) -> Result<ChangelogEntry, RevlogError> {
				let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
				Ok(ChangelogEntry { revlog_entry })
				}

				/// Return the [`ChangelogRevisionData`] for the given revision number.
				///
				/// This is a useful shortcut in case the caller does not need the
				/// generic revlog information (parents, hashes etc). Otherwise
				/// consider taking a [`ChangelogEntry`] with
				/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
				pub fn data_for_rev(
				&self,
				rev: UncheckedRevision,
				) -> Result<ChangelogRevisionData, RevlogError> {
				self.entry_for_rev(rev)?.data()
				}

				pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
				self.revlog.node_from_rev(rev)
				}

				pub fn rev_from_node(
				&self,
				node: NodePrefix,
				) -> Result<Revision, RevlogError> {
				self.revlog.rev_from_node(node)
				}
				}

				impl Graph for Changelog {
				fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
				self.revlog.parents(rev)
				}
				}

				/// A specialized `RevlogEntry` for `changelog` data format
				///
				/// This is a `RevlogEntry` with the added semantics that the associated
				/// data should meet the requirements for `changelog`, materialized by
				/// the fact that `data()` constructs a `ChangelogRevisionData`.
				/// In case that promise would be broken, the `data` method returns an error.
				#[derive(Clone)]
				pub struct ChangelogEntry<'changelog> {
				/// Same data, as a generic `RevlogEntry`.
				pub(crate) revlog_entry: RevlogEntry<'changelog>,
				}

				impl<'changelog> ChangelogEntry<'changelog> {
				pub fn data<'a>(
				&'a self,
				) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
				let bytes = self.revlog_entry.data()?;
				if bytes.is_empty() {
				Ok(ChangelogRevisionData::null())
				} else {
				Ok(ChangelogRevisionData::new(bytes).map_err(\|err\| {
				RevlogError::Other(HgError::CorruptedRepository(format!(
				"Invalid changelog data for revision {}: {:?}",
				self.revlog_entry.revision(),
				err
				)))
				})?)
				}
				}

				/// Obtain a reference to the underlying `RevlogEntry`.
				///
				/// This allows the caller to access the information that is common
				/// to all revlog entries: revision number, node id, parent revisions etc.
				pub fn as_revlog_entry(&self) -> &RevlogEntry {
				&self.revlog_entry
				}

				pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
				Ok(self
				.revlog_entry
				.p1_entry()?
				.map(\|revlog_entry\| Self { revlog_entry }))
				}

				pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
				Ok(self
				.revlog_entry
				.p2_entry()?
				.map(\|revlog_entry\| Self { revlog_entry }))
				}
				}

				/// `Changelog` entry which knows how to interpret the `changelog` data bytes.
				#[derive(PartialEq)]
				pub struct ChangelogRevisionData<'changelog> {
				/// The data bytes of the `changelog` entry.
				bytes: Cow<'changelog, [u8]>,
				/// The end offset for the hex manifest (not including the newline)
				manifest_end: usize,
				/// The end offset for the user+email (not including the newline)
				user_end: usize,
				/// The end offset for the timestamp+timezone+extras (not including the
				/// newline)
				timestamp_end: usize,
				/// The end offset for the file list (not including the newline)
				files_end: usize,
				}

				impl<'changelog> ChangelogRevisionData<'changelog> {
				fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
				let mut line_iter = bytes.split(\|b\| b == &b'\n');
				let manifest_end = line_iter
				.next()
				.expect("Empty iterator from split()?")
				.len();
				let user_slice = line_iter.next().ok_or_else(\|\| {
				HgError::corrupted("Changeset data truncated after manifest line")
				})?;
				let user_end = manifest_end + 1 + user_slice.len();
				let timestamp_slice = line_iter.next().ok_or_else(\|\| {
				HgError::corrupted("Changeset data truncated after user line")
				})?;
				let timestamp_end = user_end + 1 + timestamp_slice.len();
				let mut files_end = timestamp_end + 1;
				loop {
				let line = line_iter.next().ok_or_else(\|\| {
				HgError::corrupted("Changeset data truncated in files list")
				})?;
				if line.is_empty() {
				if files_end == bytes.len() {
				// The list of files ended with a single newline (there
				// should be two)
				return Err(HgError::corrupted(
				"Changeset data truncated after files list",
				));
				}
				files_end -= 1;
				break;
				}
				files_end += line.len() + 1;
				}

				Ok(Self {
				bytes,
				manifest_end,
				user_end,
				timestamp_end,
				files_end,
				})
				}

				fn null() -> Self {
				Self::new(Cow::Borrowed(
				b"0000000000000000000000000000000000000000\n\n0 0\n\n",
				))
				.unwrap()
				}

				/// Return an iterator over the lines of the entry.
				pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
				self.bytes.split(\|b\| b == &b'\n')
				}

				/// Return the node id of the `manifest` referenced by this `changelog`
				/// entry.
				pub fn manifest_node(&self) -> Result<Node, HgError> {
				let manifest_node_hex = &self.bytes[..self.manifest_end];
				Node::from_hex_for_repo(manifest_node_hex)
				}

				/// The full user string (usually a name followed by an email enclosed in
				/// angle brackets)
				pub fn user(&self) -> &[u8] {
				&self.bytes[self.manifest_end + 1..self.user_end]
				}

				/// The full timestamp line (timestamp in seconds, offset in seconds, and
				/// possibly extras)
				// TODO: We should expose this in a more useful way
				pub fn timestamp_line(&self) -> &[u8] {
				&self.bytes[self.user_end + 1..self.timestamp_end]
				}

				/// Parsed timestamp line, including optional extras.
				pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {
				TimestampAndExtra::from_bytes(self.timestamp_line())
				}

				/// The files changed in this revision.
				pub fn files(&self) -> impl Iterator<Item = &HgPath> {
				if self.timestamp_end == self.files_end {
				Either::Left(iter::empty())
				} else {
				Either::Right(
				self.bytes[self.timestamp_end + 1..self.files_end]
				.split(\|b\| b == &b'\n')
				.map(HgPath::new),
				)
				}
				}

				/// The change description.
				pub fn description(&self) -> &[u8] {
				&self.bytes[self.files_end + 2..]
				}
				}

				impl Debug for ChangelogRevisionData<'_> {
				fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
				f.debug_struct("ChangelogRevisionData")
				.field("bytes", &debug_bytes(&self.bytes))
				.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
				.field(
				"user",
				&debug_bytes(
				&self.bytes[self.manifest_end + 1..self.user_end],
				),
				)
				.field(
				"timestamp",
				&debug_bytes(
				&self.bytes[self.user_end + 1..self.timestamp_end],
				),
				)
				.field(
				"files",
				&debug_bytes(
				&self.bytes[self.timestamp_end + 1..self.files_end],
				),
				)
				.field(
				"description",
				&debug_bytes(&self.bytes[self.files_end + 2..]),
				)
				.finish()
				}
				}

				fn debug_bytes(bytes: &[u8]) -> String {
				String::from_utf8_lossy(
				&bytes.iter().flat_map(\|b\| escape_default(*b)).collect_vec(),
				)
				.to_string()
				}

				/// Parsed timestamp line, including the timestamp and optional extras.
				#[derive(Clone, Debug)]
				pub struct TimestampAndExtra {
				pub timestamp: DateTime<FixedOffset>,
				pub extra: BTreeMap<String, Vec<u8>>,
				}

				impl TimestampAndExtra {
				/// Parse the raw bytes of the timestamp line from a changelog entry.
				///
				/// According to the documentation in `hg help dates` and the
				/// implementation in `changelog.py`, the format of the timestamp line
				/// is `time tz extra\n` where:
				///
				/// - `time` is an ASCII-encoded signed int or float denoting a UTC
				/// timestamp as seconds since the UNIX epoch.
				///
				/// - `tz` is the timezone offset as an ASCII-encoded signed integer
				/// denoting seconds WEST of UTC (so negative for timezones east of UTC,
				/// which is the opposite of the sign in ISO 8601 timestamps).
				///
				/// - `extra` is an optional set of NUL-delimited key-value pairs, with the
				/// key and value in each pair separated by an ASCII colon. Keys are
				/// limited to ASCII letters, digits, hyphens, and underscores, whereas
				/// values can be arbitrary bytes.
				fn from_bytes(line: &[u8]) -> Result<Self, HgError> {
				let mut parts = line.splitn(3, \|c\| *c == b' ');

				let timestamp_bytes = parts
				.next()
				.ok_or_else(\|\| HgError::corrupted("missing timestamp"))?;
				let timestamp_str = str::from_utf8(timestamp_bytes).map_err(\|e\| {
				HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
				})?;
				let timestamp_utc = timestamp_str
				.parse()
				.map_err(\|e\| {
				HgError::corrupted(format!("failed to parse timestamp: {e}"))
				})
				.and_then(\|secs\| {
				NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(\|\| {
				HgError::corrupted(format!(
				"integer timestamp out of valid range: {secs}"
				))
				})
				})
				// Attempt to parse the timestamp as a float if we can't parse
				// it as an int. It doesn't seem like float timestamps are actually
				// used in practice, but the Python code supports them.
				.or_else(\|_\| parse_float_timestamp(timestamp_str))?;

				let timezone_bytes = parts
				.next()
				.ok_or_else(\|\| HgError::corrupted("missing timezone"))?;
				let timezone_secs: i32 = str::from_utf8(timezone_bytes)
				.map_err(\|e\| {
				HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
				})?
				.parse()
				.map_err(\|e\| {
				HgError::corrupted(format!("timezone is not an integer: {e}"))
				})?;
				let timezone =
				FixedOffset::west_opt(timezone_secs).ok_or_else(\|\| {
				HgError::corrupted("timezone offset out of bounds")
				})?;

				let timestamp =
				DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);
				let extra = parts
				.next()
				.map(parse_extra)
				.transpose()?
				.unwrap_or_default();

				Ok(Self { timestamp, extra })
				}
				}

				/// Attempt to parse the given string as floating-point timestamp, and
				/// convert the result into a `chrono::NaiveDateTime`.
				fn parse_float_timestamp(
				timestamp_str: &str,
				) -> Result<NaiveDateTime, HgError> {
				let timestamp = timestamp_str.parse::<f64>().map_err(\|e\| {
				HgError::corrupted(format!("failed to parse timestamp: {e}"))
				})?;

				// To construct a `NaiveDateTime` we'll need to convert the float
				// into signed integer seconds and unsigned integer nanoseconds.
				let mut secs = timestamp.trunc() as i64;
				let mut subsecs = timestamp.fract();

				// If the timestamp is negative, we need to express the fractional
				// component as positive nanoseconds since the previous second.
				if timestamp < 0.0 {
				secs -= 1;
				subsecs += 1.0;
				}

				// This cast should be safe because the fractional component is
				// by definition less than 1.0, so this value should not exceed
				// 1 billion, which is representable as an f64 without loss of
				// precision and should fit into a u32 without overflowing.
				//
				// (Any loss of precision in the fractional component will have
				// already happened at the time of initial parsing; in general,
				// f64s are insufficiently precise to provide nanosecond-level
				// precision with present-day timestamps.)
				let nsecs = (subsecs * 1_000_000_000.0) as u32;

				NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(\|\| {
				HgError::corrupted(format!(
				"float timestamp out of valid range: {timestamp}"
				))
				})
				}

				/// Parse the "extra" fields from a changeset's timestamp line.
				///
				/// Extras are null-delimited key-value pairs where the key consists of ASCII
				/// alphanumeric characters plus hyphens and underscores, and the value can
				/// contain arbitrary bytes.
				fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
				extra
				.split(\|c\| *c == b'\0')
				.map(\|pair\| {
				let pair = unescape_extra(pair);
				let mut iter = pair.splitn(2, \|c\| *c == b':');

				let key_bytes =
				iter.next().filter(\|k\| !k.is_empty()).ok_or_else(\|\| {
				HgError::corrupted("empty key in changeset extras")
				})?;

				let key = str::from_utf8(key_bytes)
				.ok()
				.filter(\|k\| {
				k.chars().all(\|c\| {
				c.is_ascii_alphanumeric() \|\| c == '_' \|\| c == '-'
				})
				})
				.ok_or_else(\|\| {
				let key = String::from_utf8_lossy(key_bytes);
				HgError::corrupted(format!(
				"invalid key in changeset extras: {key}",
				))
				})?
				.to_string();

				let value = iter.next().map(Into::into).ok_or_else(\|\| {
				HgError::corrupted(format!(
				"missing value for changeset extra: {key}"
				))
				})?;

				Ok((key, value))
				})
				.collect()
				}

				/// Decode Mercurial's escaping for changelog extras.
				///
				/// The `_string_escape` function in `changelog.py` only escapes 4 characters
				/// (null, backslash, newline, and carriage return) so we only decode those.
				///
				/// The Python code also includes a workaround for decoding escaped nuls
				/// that are followed by an ASCII octal digit, since Python's built-in
				/// `string_escape` codec will interpret that as an escaped octal byte value.
				/// That workaround is omitted here since we don't support decoding octal.
				fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
				let mut output = Vec::with_capacity(bytes.len());
				let mut input = bytes.iter().copied();

				while let Some(c) = input.next() {
				if c != b'\\' {
				output.push(c);
				continue;
				}

				match input.next() {
				Some(b'0') => output.push(b'\0'),
				Some(b'\\') => output.push(b'\\'),
				Some(b'n') => output.push(b'\n'),
				Some(b'r') => output.push(b'\r'),
				// The following cases should never occur in theory because any
				// backslashes in the original input should have been escaped
				// with another backslash, so it should not be possible to
				// observe an escape sequence other than the 4 above.
				Some(c) => output.extend_from_slice(&[b'\\', c]),
				None => output.push(b'\\'),
				}
				}

				output
				}

				#[cfg(test)]
				mod tests {
				use super::*;
				use crate::vfs::Vfs;
				use crate::NULL_REVISION;
				use pretty_assertions::assert_eq;

				#[test]
				fn test_create_changelogrevisiondata_invalid() {
				// Completely empty
				assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
				// No newline after manifest
				assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
				// No newline after user
				assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
				// No newline after timestamp
				assert!(
				ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
				);
				// Missing newline after files
				assert!(ChangelogRevisionData::new(Cow::Borrowed(
				b"abcd\n\n0 0\nfile1\nfile2"
				))
				.is_err(),);
				// Only one newline after files
				assert!(ChangelogRevisionData::new(Cow::Borrowed(
				b"abcd\n\n0 0\nfile1\nfile2\n"
				))
				.is_err(),);
				}

				#[test]
				fn test_create_changelogrevisiondata() {
				let data = ChangelogRevisionData::new(Cow::Borrowed(
				b"0123456789abcdef0123456789abcdef01234567
				Some One <someone@example.com>
				0 0
				file1
				file2

				some
				commit
				message",
				))
				.unwrap();
				assert_eq!(
				data.manifest_node().unwrap(),
				Node::from_hex("0123456789abcdef0123456789abcdef01234567")
				.unwrap()
				);
				assert_eq!(data.user(), b"Some One <someone@example.com>");
				assert_eq!(data.timestamp_line(), b"0 0");
				assert_eq!(
				data.files().collect_vec(),
				vec![HgPath::new("file1"), HgPath::new("file2")]
				);
				assert_eq!(data.description(), b"some\ncommit\nmessage");
				}

				#[test]
				fn test_data_from_rev_null() -> Result<(), RevlogError> {
				// an empty revlog will be enough for this case
				let temp = tempfile::tempdir().unwrap();
				let vfs = Vfs { base: temp.path() };
				std::fs::write(temp.path().join("foo.i"), b"").unwrap();
				let revlog =
				Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
				.unwrap();

				let changelog = Changelog { revlog };
				assert_eq!(
				changelog.data_for_rev(NULL_REVISION.into())?,
				ChangelogRevisionData::null()
				);
				// same with the intermediate entry object
				assert_eq!(
				changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
				ChangelogRevisionData::null()
				);
				Ok(())
				}

				#[test]
				fn test_empty_files_list() {
				assert!(ChangelogRevisionData::null()
				.files()
				.collect_vec()
				.is_empty());
				}

				#[test]
				fn test_unescape_basic() {
				// '\0', '\\', '\n', and '\r' are correctly unescaped.
				let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
				let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
				let unescaped = unescape_extra(escaped);
				assert_eq!(&expected[..], &unescaped[..]);
				}

				#[test]
				fn test_unescape_unsupported_sequence() {
				// Other escape sequences are left unaltered.
				for c in 0u8..255 {
				match c {
				b'0' \| b'\\' \| b'n' \| b'r' => continue,
				c => {
				let expected = &[b'\\', c][..];
				let unescaped = unescape_extra(expected);
				assert_eq!(expected, &unescaped[..]);
				}
				}
				}
				}

				#[test]
				fn test_unescape_trailing_backslash() {
				// Trailing backslashes are OK.
				let expected = br"hi\";
				let unescaped = unescape_extra(expected);
				assert_eq!(&expected[..], &unescaped[..]);
				}

				#[test]
				fn test_unescape_nul_followed_by_octal() {
				// Escaped NUL chars followed by octal digits are decoded correctly.
				let expected = b"\012";
				let escaped = br"\012";
				let unescaped = unescape_extra(escaped);
				assert_eq!(&expected[..], &unescaped[..]);
				}

				#[test]
				fn test_parse_float_timestamp() {
				let test_cases = [
				// Zero should map to the UNIX epoch.
				("0.0", "1970-01-01 00:00:00"),
				// Negative zero should be the same as positive zero.
				("-0.0", "1970-01-01 00:00:00"),
				// Values without fractional components should work like integers.
				// (Assuming the timestamp is within the limits of f64 precision.)
				("1115154970.0", "2005-05-03 21:16:10"),
				// We expect some loss of precision in the fractional component
				// when parsing arbitrary floating-point values.
				("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
				// But representable f64 values should parse losslessly.
				("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
				// Negative fractional components are subtracted from the epoch.
				("-1.333", "1969-12-31 23:59:58.667"),
				];

				for (input, expected) in test_cases {
				let res = parse_float_timestamp(input).unwrap().to_string();
				assert_eq!(res, expected);
				}
				}

				fn escape_extra(bytes: &[u8]) -> Vec<u8> {
				let mut output = Vec::with_capacity(bytes.len());

				for c in bytes.iter().copied() {
				output.extend_from_slice(match c {
				b'\0' => &b"\\0"[..],
				b'\\' => &b"\\\\"[..],
				b'\n' => &b"\\n"[..],
				b'\r' => &b"\\r"[..],
				_ => {
				output.push(c);
				continue;
				}
				});
				}

				output
				}

				fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
				where
				K: AsRef<[u8]>,
				V: AsRef<[u8]>,
				{
				let extras = pairs.into_iter().map(\|(k, v)\| {
				escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
				});
				// Use fully-qualified syntax to avoid a future naming conflict with
				// the standard library: https://github.com/rust-lang/rust/issues/79524
				Itertools::intersperse(extras, b"\0".to_vec()).concat()
				}

				#[test]
				fn test_parse_extra() {
				let extra = [
				("branch".into(), b"default".to_vec()),
				("key-with-hyphens".into(), b"value1".to_vec()),
				("key_with_underscores".into(), b"value2".to_vec()),
				("empty-value".into(), b"".to_vec()),
				("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
				]
				.into_iter()
				.collect::<BTreeMap<String, Vec<u8>>>();

				let encoded = encode_extra(&extra);
				let parsed = parse_extra(&encoded).unwrap();

				assert_eq!(extra, parsed);
				}

				#[test]
				fn test_corrupt_extra() {
				let test_cases = [
				(&b""[..], "empty input"),
				(&b"\0"[..], "unexpected null byte"),
				(&b":empty-key"[..], "empty key"),
				(&b"\0leading-null:"[..], "leading null"),
				(&b"trailing-null:\0"[..], "trailing null"),
				(&b"missing-value"[..], "missing value"),
				(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
				(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
				];

				for (extra, msg) in test_cases {
				assert!(
				parse_extra(&extra).is_err(),
				"corrupt extra should have failed to parse: {}",
				msg
				);
				}
				}

				#[test]
				fn test_parse_timestamp_line() {
				let extra = [
				("branch".into(), b"default".to_vec()),
				("key-with-hyphens".into(), b"value1".to_vec()),
				("key_with_underscores".into(), b"value2".to_vec()),
				("empty-value".into(), b"".to_vec()),
				("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
				]
				.into_iter()
				.collect::<BTreeMap<String, Vec<u8>>>();

				let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
				line.extend_from_slice(&encode_extra(&extra));

				let parsed = TimestampAndExtra::from_bytes(&line).unwrap();

				assert_eq!(
				&parsed.timestamp.to_rfc3339(),
				"2005-05-03T13:16:10-08:00"
				);
				assert_eq!(extra, parsed.extra);
				}
				}