upstream/mercurial-mirror Files · rust/hg-core/src/revlog/changelog.rs

rust-index: drop offset_override...

rust-index: drop offset_override The inline `offsets` value diverge from the one on disk for added value, so the offset_override tricks is not going to work well once we start having the full revlog logic in Rust. We remove it beforehand and align the Rust logic to the Python one (adjusting the segment offset at read time for inline revlog).

Arun Kulshreshtha - - Load All Authors

File last commit:

r52286:6603a144 default


                r52338:d2858d97

default

Download file

             changelog.rs
        
                    744 lines
            
             | 25.1 KiB
            
                | application/rls-services+xml
            
             |
                RustLexer
            
             / rust / hg-core / src / revlog / changelog.rs
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
      use std::ascii::escape_default;

      use std::borrow::Cow;

      use std::collections::BTreeMap;

      use std::fmt::{Debug, Formatter};

      use std::{iter, str};

      use chrono::{DateTime, FixedOffset, NaiveDateTime};

      use itertools::{Either, Itertools};

        Simon Sapin
    
rust: use HgError in RevlogError and Vfs...

              r47172
            
      use crate::errors::HgError;

        Georges Racinet
    
rust-changelog: removed now useless early conditional for NULL_REVISION...

              r51640
            
      use crate::revlog::Revision;

        Simon Sapin
    
rhg: `cat` command: print error messages for missing files...

              r47478
            
      use crate::revlog::{Node, NodePrefix};

        Raphaël Gomès
    
rust-clippy: merge "revlog" module definition and struct implementation...

              r50832
            
      use crate::revlog::{Revlog, RevlogEntry, RevlogError};

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
      use crate::utils::hg_path::HgPath;

        Martin von Zweigbergk
    
rust-revlog: make `Changelog` and `ManifestLog` unaware of `Repo`...

              r49981
            
      use crate::vfs::Vfs;

        Raphaël Gomès
    
rust-revlog: teach the revlog opening code to read the repo options...

              r52084
            
      use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
        Georges Racinet
    
rust-changelog: made doc-comments more consistent...

              r51266
            
      /// A specialized `Revlog` to work with changelog data format.

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
      pub struct Changelog {

          /// The generic `revlog` format.

        Simon Sapin
    
rhg: centralize parsing of `--rev` CLI arguments...

              r47162
            
          pub(crate) revlog: Revlog,

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
      }

      impl Changelog {

          /// Open the `changelog` of a repository given by its root.

        Raphaël Gomès
    
rust-revlog: teach the revlog opening code to read the repo options...

              r52084
            
          pub fn open(

              store_vfs: &Vfs,

              options: RevlogOpenOptions,

          ) -> Result<Self, HgError> {

              let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
              Ok(Self { revlog })

          }

        Georges Racinet
    
rust-changelog: made doc-comments more consistent...

              r51266
            
          /// Return the `ChangelogRevisionData` for the given node ID.

        Simon Sapin
    
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev...

              r48783
            
          pub fn data_for_node(

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
              &self,

        Simon Sapin
    
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...

              r47160
            
              node: NodePrefix,

        Simon Sapin
    
rhg: Rename some revlog-related types and methods...

              r49372
            
          ) -> Result<ChangelogRevisionData, RevlogError> {

        Simon Sapin
    
rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`...

              r48782
            
              let rev = self.revlog.rev_from_node(node)?;

        Raphaël Gomès
    
rust: use the new `UncheckedRevision` everywhere applicable...

              r51870
            
              self.entry_for_checked_rev(rev)?.data()

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
          }

        Georges Racinet
    
rust-changelog: introducing an intermediate `ChangelogEntry`...

              r51268
            
          /// Return the [`ChangelogEntry`] for the given revision number.

        Martin von Zweigbergk
    
rust-revlog: add methods for getting parent revs and entries...

              r49939
            
          pub fn entry_for_rev(

              &self,

        Raphaël Gomès
    
rust: use the new `UncheckedRevision` everywhere applicable...

              r51870
            
              rev: UncheckedRevision,

          ) -> Result<ChangelogEntry, RevlogError> {

              let revlog_entry = self.revlog.get_entry(rev)?;

              Ok(ChangelogEntry { revlog_entry })

          }

          /// Same as [`Self::entry_for_rev`] for checked revisions.

          fn entry_for_checked_rev(

              &self,

        Martin von Zweigbergk
    
rust-revlog: add methods for getting parent revs and entries...

              r49939
            
              rev: Revision,

        Georges Racinet
    
rust-changelog: introducing an intermediate `ChangelogEntry`...

              r51268
            
          ) -> Result<ChangelogEntry, RevlogError> {

        Raphaël Gomès
    
rust: use the new `UncheckedRevision` everywhere applicable...

              r51870
            
              let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;

        Georges Racinet
    
rust-changelog: introducing an intermediate `ChangelogEntry`...

              r51268
            
              Ok(ChangelogEntry { revlog_entry })

        Martin von Zweigbergk
    
rust-revlog: add methods for getting parent revs and entries...

              r49939
            
          }

        Georges Racinet
    
rust-changelog: made doc-comments more consistent...

              r51266
            
          /// Return the [`ChangelogRevisionData`] for the given revision number.

        Georges Racinet
    
rust-changelog: introducing an intermediate `ChangelogEntry`...

              r51268
            
          ///

          /// This is a useful shortcut in case the caller does not need the

          /// generic revlog information (parents, hashes etc). Otherwise

          /// consider taking a [`ChangelogEntry`] with

          /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.

        Simon Sapin
    
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev...

              r48783
            
          pub fn data_for_rev(

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
              &self,

        Raphaël Gomès
    
rust: use the new `UncheckedRevision` everywhere applicable...

              r51870
            
              rev: UncheckedRevision,

        Simon Sapin
    
rhg: Rename some revlog-related types and methods...

              r49372
            
          ) -> Result<ChangelogRevisionData, RevlogError> {

        Georges Racinet
    
rust-changelog: introducing an intermediate `ChangelogEntry`...

              r51268
            
              self.entry_for_rev(rev)?.data()

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
          }

        Simon Sapin
    
rhg: `cat` command: print error messages for missing files...

              r47478
            
        Raphaël Gomès
    
rust: use the new `UncheckedRevision` everywhere applicable...

              r51870
            
          pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {

        Simon Sapin
    
rust: Make private the `index` field of the `Revlog` struct...

              r48781
            
              self.revlog.node_from_rev(rev)

        Simon Sapin
    
rhg: `cat` command: print error messages for missing files...

              r47478
            
          }

        Martin von Zweigbergk
    
rust-revlog: add methods for getting parent revs and entries...

              r49939
            
          pub fn rev_from_node(

              &self,

              node: NodePrefix,

          ) -> Result<Revision, RevlogError> {

              self.revlog.rev_from_node(node)

          }

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
      }

        Raphaël Gomès
    
rust: implement the `Graph` trait for all revlogs...

              r51871
            
      impl Graph for Changelog {

          fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {

              self.revlog.parents(rev)

          }

      }

        Georges Racinet
    
rust-changelog: introducing an intermediate `ChangelogEntry`...

              r51268
            
      /// A specialized `RevlogEntry` for `changelog` data format

      ///

      /// This is a `RevlogEntry` with the added semantics that the associated

      /// data should meet the requirements for `changelog`, materialized by

      /// the fact that `data()` constructs a `ChangelogRevisionData`.

      /// In case that promise would be broken, the `data` method returns an error.

      #[derive(Clone)]

      pub struct ChangelogEntry<'changelog> {

          /// Same data, as a generic `RevlogEntry`.

          pub(crate) revlog_entry: RevlogEntry<'changelog>,

      }

      impl<'changelog> ChangelogEntry<'changelog> {

          pub fn data<'a>(

              &'a self,

          ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {

              let bytes = self.revlog_entry.data()?;

              if bytes.is_empty() {

                  Ok(ChangelogRevisionData::null())

              } else {

                  Ok(ChangelogRevisionData::new(bytes).map_err(|err| {

                      RevlogError::Other(HgError::CorruptedRepository(format!(

                          "Invalid changelog data for revision {}: {:?}",

                          self.revlog_entry.revision(),

                          err

                      )))

                  })?)

              }

          }

          /// Obtain a reference to the underlying `RevlogEntry`.

          ///

          /// This allows the caller to access the information that is common

          /// to all revlog entries: revision number, node id, parent revisions etc.

          pub fn as_revlog_entry(&self) -> &RevlogEntry {

              &self.revlog_entry

          }

        Georges Racinet
    
rust-changelog: introduce ChangelogEntry parent entries accessors...

              r51271
            
          pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

              Ok(self

                  .revlog_entry

                  .p1_entry()?

                  .map(|revlog_entry| Self { revlog_entry }))

          }

          pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {

              Ok(self

                  .revlog_entry

                  .p2_entry()?

                  .map(|revlog_entry| Self { revlog_entry }))

          }

        Georges Racinet
    
rust-changelog: introducing an intermediate `ChangelogEntry`...

              r51268
            
      }

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
      /// `Changelog` entry which knows how to interpret the `changelog` data bytes.

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
      #[derive(PartialEq)]

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
      pub struct ChangelogRevisionData<'changelog> {

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
          /// The data bytes of the `changelog` entry.

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
          bytes: Cow<'changelog, [u8]>,

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
          /// The end offset for the hex manifest (not including the newline)

          manifest_end: usize,

          /// The end offset for the user+email (not including the newline)

          user_end: usize,

          /// The end offset for the timestamp+timezone+extras (not including the

          /// newline)

          timestamp_end: usize,

          /// The end offset for the file list (not including the newline)

          files_end: usize,

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
      }

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
      impl<'changelog> ChangelogRevisionData<'changelog> {

          fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              let mut line_iter = bytes.split(|b| b == &b'\n');

              let manifest_end = line_iter

                  .next()

                  .expect("Empty iterator from split()?")

                  .len();

              let user_slice = line_iter.next().ok_or_else(|| {

                  HgError::corrupted("Changeset data truncated after manifest line")

              })?;

              let user_end = manifest_end + 1 + user_slice.len();

              let timestamp_slice = line_iter.next().ok_or_else(|| {

                  HgError::corrupted("Changeset data truncated after user line")

              })?;

              let timestamp_end = user_end + 1 + timestamp_slice.len();

              let mut files_end = timestamp_end + 1;

              loop {

                  let line = line_iter.next().ok_or_else(|| {

                      HgError::corrupted("Changeset data truncated in files list")

                  })?;

                  if line.is_empty() {

                      if files_end == bytes.len() {

                          // The list of files ended with a single newline (there

                          // should be two)

                          return Err(HgError::corrupted(

                              "Changeset data truncated after files list",

                          ));

                      }

                      files_end -= 1;

                      break;

                  }

                  files_end += line.len() + 1;

              }

              Ok(Self {

                  bytes,

                  manifest_end,

                  user_end,

                  timestamp_end,

                  files_end,

              })

        Martin von Zweigbergk
    
rust-changelog: remove special parsing of empty changelog data for null rev...

              r49937
            
          }

          fn null() -> Self {

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              Self::new(Cow::Borrowed(

                  b"0000000000000000000000000000000000000000\n\n0 0\n\n",

              ))

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              .unwrap()

        Martin von Zweigbergk
    
rust-changelog: remove special parsing of empty changelog data for null rev...

              r49937
            
          }

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
          /// Return an iterator over the lines of the entry.

          pub fn lines(&self) -> impl Iterator<Item = &[u8]> {

        Martin von Zweigbergk
    
rust-changelog: don't skip empty lines when iterating over changeset lines...

              r49936
            
              self.bytes.split(|b| b == &b'\n')

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
          }

          /// Return the node id of the `manifest` referenced by this `changelog`

          /// entry.

        Simon Sapin
    
rhg: Reuse manifest when checking status of multiple ambiguous files...

              r48778
            
          pub fn manifest_node(&self) -> Result<Node, HgError> {

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              let manifest_node_hex = &self.bytes[..self.manifest_end];

        Martin von Zweigbergk
    
rust-changelog: remove special parsing of empty changelog data for null rev...

              r49937
            
              Node::from_hex_for_repo(manifest_node_hex)

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
          }

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
          /// The full user string (usually a name followed by an email enclosed in

          /// angle brackets)

          pub fn user(&self) -> &[u8] {

              &self.bytes[self.manifest_end + 1..self.user_end]

          }

          /// The full timestamp line (timestamp in seconds, offset in seconds, and

          /// possibly extras)

          // TODO: We should expose this in a more useful way

          pub fn timestamp_line(&self) -> &[u8] {

              &self.bytes[self.user_end + 1..self.timestamp_end]

          }

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
          /// Parsed timestamp.

          pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {

              parse_timestamp(self.timestamp_line())

          }

          /// Optional commit extras.

          pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

              parse_timestamp_line_extra(self.timestamp_line())

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
          }

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
          /// The files changed in this revision.

          pub fn files(&self) -> impl Iterator<Item = &HgPath> {

        Arun Kulshreshtha
    
rust-changelog: don't panic on empty file lists

              r52256
            
              if self.timestamp_end == self.files_end {

                  Either::Left(iter::empty())

              } else {

                  Either::Right(

                      self.bytes[self.timestamp_end + 1..self.files_end]

                          .split(|b| b == &b'\n')

                          .map(HgPath::new),

                  )

              }

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
          }

          /// The change description.

          pub fn description(&self) -> &[u8] {

              &self.bytes[self.files_end + 2..]

          }

        Antoine Cezar
    
hg-core: add `Changlog` a specialized `Revlog`...

              r46103
            
      }

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
      impl Debug for ChangelogRevisionData<'_> {

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
          fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

              f.debug_struct("ChangelogRevisionData")

                  .field("bytes", &debug_bytes(&self.bytes))

                  .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))

                  .field(

                      "user",

                      &debug_bytes(

                          &self.bytes[self.manifest_end + 1..self.user_end],

                      ),

                  )

                  .field(

                      "timestamp",

                      &debug_bytes(

                          &self.bytes[self.user_end + 1..self.timestamp_end],

                      ),

                  )

                  .field(

                      "files",

                      &debug_bytes(

                          &self.bytes[self.timestamp_end + 1..self.files_end],

                      ),

                  )

                  .field(

                      "description",

                      &debug_bytes(&self.bytes[self.files_end + 2..]),

                  )

                  .finish()

          }

      }

      fn debug_bytes(bytes: &[u8]) -> String {

          String::from_utf8_lossy(

              &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),

          )

          .to_string()

      }

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
      /// Parse the raw bytes of the timestamp line from a changelog entry.

      ///

      /// According to the documentation in `hg help dates` and the

      /// implementation in `changelog.py`, the format of the timestamp line

      /// is `time tz extra\n` where:

      ///

      /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp

      ///   as seconds since the UNIX epoch.

      ///

      /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting

      ///   seconds WEST of UTC (so negative for timezones east of UTC, which is the

      ///   opposite of the sign in ISO 8601 timestamps).

      ///

      /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key

      ///   and value in each pair separated by an ASCII colon. Keys are limited to

      ///   ASCII letters, digits, hyphens, and underscores, whereas values can be

      ///   arbitrary bytes.

      fn parse_timestamp(

          timestamp_line: &[u8],

      ) -> Result<DateTime<FixedOffset>, HgError> {

          let mut parts = timestamp_line.splitn(3, |c| *c == b' ');

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
          let timestamp_bytes = parts

              .next()

              .ok_or_else(|| HgError::corrupted("missing timestamp"))?;

          let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {

              HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))

          })?;

          let timestamp_utc = timestamp_str

              .parse()

              .map_err(|e| {

                  HgError::corrupted(format!("failed to parse timestamp: {e}"))

              })

              .and_then(|secs| {

                  NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {

                      HgError::corrupted(format!(

                          "integer timestamp out of valid range: {secs}"

                      ))

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
                  })

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
              })

              // Attempt to parse the timestamp as a float if we can't parse

              // it as an int. It doesn't seem like float timestamps are actually

              // used in practice, but the Python code supports them.

              .or_else(|_| parse_float_timestamp(timestamp_str))?;

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
          let timezone_bytes = parts

              .next()

              .ok_or_else(|| HgError::corrupted("missing timezone"))?;

          let timezone_secs: i32 = str::from_utf8(timezone_bytes)

              .map_err(|e| {

                  HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))

              })?

              .parse()

              .map_err(|e| {

                  HgError::corrupted(format!("timezone is not an integer: {e}"))

              })?;

          let timezone = FixedOffset::west_opt(timezone_secs)

              .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
          Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
      }

      /// Attempt to parse the given string as floating-point timestamp, and

      /// convert the result into a `chrono::NaiveDateTime`.

      fn parse_float_timestamp(

          timestamp_str: &str,

      ) -> Result<NaiveDateTime, HgError> {

          let timestamp = timestamp_str.parse::<f64>().map_err(|e| {

              HgError::corrupted(format!("failed to parse timestamp: {e}"))

          })?;

          // To construct a `NaiveDateTime` we'll need to convert the float

          // into signed integer seconds and unsigned integer nanoseconds.

          let mut secs = timestamp.trunc() as i64;

          let mut subsecs = timestamp.fract();

          // If the timestamp is negative, we need to express the fractional

          // component as positive nanoseconds since the previous second.

          if timestamp < 0.0 {

              secs -= 1;

              subsecs += 1.0;

          }

          // This cast should be safe because the fractional component is

          // by definition less than 1.0, so this value should not exceed

          // 1 billion, which is representable as an f64 without loss of

          // precision and should fit into a u32 without overflowing.

          //

          // (Any loss of precision in the fractional component will have

          // already happened at the time of initial parsing; in general,

          // f64s are insufficiently precise to provide nanosecond-level

          // precision with present-day timestamps.)

          let nsecs = (subsecs * 1_000_000_000.0) as u32;

          NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {

              HgError::corrupted(format!(

                  "float timestamp out of valid range: {timestamp}"

              ))

          })

      }

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
      /// Decode changeset extra fields.

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
      ///

      /// Extras are null-delimited key-value pairs where the key consists of ASCII

      /// alphanumeric characters plus hyphens and underscores, and the value can

      /// contain arbitrary bytes.

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
      fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
          extra

              .split(|c| *c == b'\0')

              .map(|pair| {

                  let pair = unescape_extra(pair);

                  let mut iter = pair.splitn(2, |c| *c == b':');

                  let key_bytes =

                      iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {

                          HgError::corrupted("empty key in changeset extras")

                      })?;

                  let key = str::from_utf8(key_bytes)

                      .ok()

                      .filter(|k| {

                          k.chars().all(|c| {

                              c.is_ascii_alphanumeric() || c == '_' || c == '-'

                          })

                      })

                      .ok_or_else(|| {

                          let key = String::from_utf8_lossy(key_bytes);

                          HgError::corrupted(format!(

                              "invalid key in changeset extras: {key}",

                          ))

                      })?

                      .to_string();

                  let value = iter.next().map(Into::into).ok_or_else(|| {

                      HgError::corrupted(format!(

                          "missing value for changeset extra: {key}"

                      ))

                  })?;

                  Ok((key, value))

              })

              .collect()

      }

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
      /// Parse the extra fields from a changeset's timestamp line.

      fn parse_timestamp_line_extra(

          timestamp_line: &[u8],

      ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {

          Ok(timestamp_line

              .splitn(3, |c| *c == b' ')

              .nth(2)

              .map(decode_extra)

              .transpose()?

              .unwrap_or_default())

      }

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
      /// Decode Mercurial's escaping for changelog extras.

      ///

      /// The `_string_escape` function in `changelog.py` only escapes 4 characters

      /// (null, backslash, newline, and carriage return) so we only decode those.

      ///

      /// The Python code also includes a workaround for decoding escaped nuls

      /// that are followed by an ASCII octal digit, since Python's built-in

      /// `string_escape` codec will interpret that as an escaped octal byte value.

      /// That workaround is omitted here since we don't support decoding octal.

      fn unescape_extra(bytes: &[u8]) -> Vec<u8> {

          let mut output = Vec::with_capacity(bytes.len());

          let mut input = bytes.iter().copied();

          while let Some(c) = input.next() {

              if c != b'\\' {

                  output.push(c);

                  continue;

              }

              match input.next() {

                  Some(b'0') => output.push(b'\0'),

                  Some(b'\\') => output.push(b'\\'),

                  Some(b'n') => output.push(b'\n'),

                  Some(b'r') => output.push(b'\r'),

                  // The following cases should never occur in theory because any

                  // backslashes in the original input should have been escaped

                  // with another backslash, so it should not be possible to

                  // observe an escape sequence other than the 4 above.

                  Some(c) => output.extend_from_slice(&[b'\\', c]),

                  None => output.push(b'\\'),

              }

          }

          output

      }

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
      #[cfg(test)]

      mod tests {

          use super::*;

        Georges Racinet
    
rust-changelog: added a test for `NULL_REVISION` special case...

              r51267
            
          use crate::vfs::Vfs;

          use crate::NULL_REVISION;

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
          use pretty_assertions::assert_eq;

          #[test]

          fn test_create_changelogrevisiondata_invalid() {

              // Completely empty

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              // No newline after manifest

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              // No newline after user

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              // No newline after timestamp

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              assert!(

                  ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()

              );

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              // Missing newline after files

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              assert!(ChangelogRevisionData::new(Cow::Borrowed(

                  b"abcd\n\n0 0\nfile1\nfile2"

              ))

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              .is_err(),);

              // Only one newline after files

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              assert!(ChangelogRevisionData::new(Cow::Borrowed(

                  b"abcd\n\n0 0\nfile1\nfile2\n"

              ))

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              .is_err(),);

          }

          #[test]

          fn test_create_changelogrevisiondata() {

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
              let data = ChangelogRevisionData::new(Cow::Borrowed(

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
                  b"0123456789abcdef0123456789abcdef01234567

      Some One <someone@example.com>

      0 0

      file1

      file2

      some

      commit

        Martin von Zweigbergk
    
changelog: avoid copying changeset data into `ChangesetRevisionData`...

              r49987
            
      message",

              ))

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
              .unwrap();

              assert_eq!(

                  data.manifest_node().unwrap(),

                  Node::from_hex("0123456789abcdef0123456789abcdef01234567")

                      .unwrap()

              );

              assert_eq!(data.user(), b"Some One <someone@example.com>");

              assert_eq!(data.timestamp_line(), b"0 0");

              assert_eq!(

                  data.files().collect_vec(),

                  vec![HgPath::new("file1"), HgPath::new("file2")]

              );

              assert_eq!(data.description(), b"some\ncommit\nmessage");

          }

        Georges Racinet
    
rust-changelog: added a test for `NULL_REVISION` special case...

              r51267
            
          #[test]

          fn test_data_from_rev_null() -> Result<(), RevlogError> {

              // an empty revlog will be enough for this case

              let temp = tempfile::tempdir().unwrap();

              let vfs = Vfs { base: temp.path() };

              std::fs::write(temp.path().join("foo.i"), b"").unwrap();

        Raphaël Gomès
    
rust-revlog: teach the revlog opening code to read the repo options...

              r52084
            
              let revlog =

                  Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())

                      .unwrap();

        Georges Racinet
    
rust-changelog: added a test for `NULL_REVISION` special case...

              r51267
            
              let changelog = Changelog { revlog };

              assert_eq!(

        Raphaël Gomès
    
rust: use the new `UncheckedRevision` everywhere applicable...

              r51870
            
                  changelog.data_for_rev(NULL_REVISION.into())?,

        Georges Racinet
    
rust-changelog: added a test for `NULL_REVISION` special case...

              r51267
            
                  ChangelogRevisionData::null()

              );

        Georges Racinet
    
rust-revlog: fix RevlogEntry.data() for NULL_REVISION...

              r51639
            
              // same with the intermediate entry object

              assert_eq!(

        Raphaël Gomès
    
rust: use the new `UncheckedRevision` everywhere applicable...

              r51870
            
                  changelog.entry_for_rev(NULL_REVISION.into())?.data()?,

        Georges Racinet
    
rust-revlog: fix RevlogEntry.data() for NULL_REVISION...

              r51639
            
                  ChangelogRevisionData::null()

              );

        Georges Racinet
    
rust-changelog: added a test for `NULL_REVISION` special case...

              r51267
            
              Ok(())

          }

        Arun Kulshreshtha
    
rust-changelog: don't panic on empty file lists

              r52256
            
          #[test]

          fn test_empty_files_list() {

              assert!(ChangelogRevisionData::null()

                  .files()

                  .collect_vec()

                  .is_empty());

          }

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
          #[test]

          fn test_unescape_basic() {

              // '\0', '\\', '\n', and '\r' are correctly unescaped.

              let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";

              let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";

              let unescaped = unescape_extra(escaped);

              assert_eq!(&expected[..], &unescaped[..]);

          }

          #[test]

          fn test_unescape_unsupported_sequence() {

              // Other escape sequences are left unaltered.

              for c in 0u8..255 {

                  match c {

                      b'0' | b'\\' | b'n' | b'r' => continue,

                      c => {

                          let expected = &[b'\\', c][..];

                          let unescaped = unescape_extra(expected);

                          assert_eq!(expected, &unescaped[..]);

                      }

                  }

              }

          }

          #[test]

          fn test_unescape_trailing_backslash() {

              // Trailing backslashes are OK.

              let expected = br"hi\";

              let unescaped = unescape_extra(expected);

              assert_eq!(&expected[..], &unescaped[..]);

          }

          #[test]

          fn test_unescape_nul_followed_by_octal() {

              // Escaped NUL chars followed by octal digits are decoded correctly.

              let expected = b"\012";

              let escaped = br"\012";

              let unescaped = unescape_extra(escaped);

              assert_eq!(&expected[..], &unescaped[..]);

          }

          #[test]

          fn test_parse_float_timestamp() {

              let test_cases = [

                  // Zero should map to the UNIX epoch.

                  ("0.0", "1970-01-01 00:00:00"),

                  // Negative zero should be the same as positive zero.

                  ("-0.0", "1970-01-01 00:00:00"),

                  // Values without fractional components should work like integers.

                  // (Assuming the timestamp is within the limits of f64 precision.)

                  ("1115154970.0", "2005-05-03 21:16:10"),

                  // We expect some loss of precision in the fractional component

                  // when parsing arbitrary floating-point values.

                  ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),

                  // But representable f64 values should parse losslessly.

                  ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),

                  // Negative fractional components are subtracted from the epoch.

                  ("-1.333", "1969-12-31 23:59:58.667"),

              ];

              for (input, expected) in test_cases {

                  let res = parse_float_timestamp(input).unwrap().to_string();

                  assert_eq!(res, expected);

              }

          }

          fn escape_extra(bytes: &[u8]) -> Vec<u8> {

              let mut output = Vec::with_capacity(bytes.len());

              for c in bytes.iter().copied() {

                  output.extend_from_slice(match c {

                      b'\0' => &b"\\0"[..],

                      b'\\' => &b"\\\\"[..],

                      b'\n' => &b"\\n"[..],

                      b'\r' => &b"\\r"[..],

                      _ => {

                          output.push(c);

                          continue;

                      }

                  });

              }

              output

          }

          fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>

          where

              K: AsRef<[u8]>,

              V: AsRef<[u8]>,

          {

              let extras = pairs.into_iter().map(|(k, v)| {

                  escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())

              });

              // Use fully-qualified syntax to avoid a future naming conflict with

              // the standard library: https://github.com/rust-lang/rust/issues/79524

              Itertools::intersperse(extras, b"\0".to_vec()).concat()

          }

          #[test]

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
          fn test_decode_extra() {

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
              let extra = [

                  ("branch".into(), b"default".to_vec()),

                  ("key-with-hyphens".into(), b"value1".to_vec()),

                  ("key_with_underscores".into(), b"value2".to_vec()),

                  ("empty-value".into(), b"".to_vec()),

                  ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

              ]

              .into_iter()

              .collect::<BTreeMap<String, Vec<u8>>>();

              let encoded = encode_extra(&extra);

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
              let decoded = decode_extra(&encoded).unwrap();

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
              assert_eq!(extra, decoded);

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
          }

          #[test]

          fn test_corrupt_extra() {

              let test_cases = [

                  (&b""[..], "empty input"),

                  (&b"\0"[..], "unexpected null byte"),

                  (&b":empty-key"[..], "empty key"),

                  (&b"\0leading-null:"[..], "leading null"),

                  (&b"trailing-null:\0"[..], "trailing null"),

                  (&b"missing-value"[..], "missing value"),

                  (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),

                  (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),

              ];

              for (extra, msg) in test_cases {

                  assert!(

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
                      decode_extra(&extra).is_err(),

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
                      "corrupt extra should have failed to parse: {}",

                      msg

                  );

              }

          }

          #[test]

          fn test_parse_timestamp_line() {

              let extra = [

                  ("branch".into(), b"default".to_vec()),

                  ("key-with-hyphens".into(), b"value1".to_vec()),

                  ("key_with_underscores".into(), b"value2".to_vec()),

                  ("empty-value".into(), b"".to_vec()),

                  ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),

              ]

              .into_iter()

              .collect::<BTreeMap<String, Vec<u8>>>();

              let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();

              line.extend_from_slice(&encode_extra(&extra));

        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
              let timestamp = parse_timestamp(&line).unwrap();

              assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
        Arun Kulshreshtha
    
hg-core: separate timestamp and extra methods

              r52286
            
              let parsed_extra = parse_timestamp_line_extra(&line).unwrap();

              assert_eq!(extra, parsed_extra);

        Arun Kulshreshtha
    
hg-core: implement timestamp line parsing

              r52284
            
          }

        Martin von Zweigbergk
    
rust-changelog: start parsing changeset data...

              r49938
            
      }

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages