##// END OF EJS Templates
rust: update `clap` to the latest 4.x version...
rust: update `clap` to the latest 4.x version This brings in more up-to-date dependencies, some bug fixes (none of which are relevant yet), and slightly improved compile times.

File last commit:

r53187:a3fa37bd default
r53204:f7b28060 default
Show More
changelog.rs
753 lines | 25.3 KiB | application/rls-services+xml | RustLexer
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 use std::ascii::escape_default;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use std::{iter, str};
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 use chrono::{DateTime, FixedOffset, Utc};
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 use itertools::{Either, Itertools};
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 use crate::errors::HgError;
Georges Racinet
rust-changelog: accessing the index...
r52618 use crate::revlog::Index;
Georges Racinet
rust-changelog: removed now useless early conditional for NULL_REVISION...
r51640 use crate::revlog::Revision;
Simon Sapin
rhg: `cat` command: print error messages for missing files...
r47478 use crate::revlog::{Node, NodePrefix};
Raphaël Gomès
rust-clippy: merge "revlog" module definition and struct implementation...
r50832 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 use crate::utils::hg_path::HgPath;
Raphaël Gomès
rust: add Vfs trait...
r52761 use crate::vfs::VfsImpl;
Raphaël Gomès
rust-revlog: introduce an `options` module...
r53053 use crate::{Graph, GraphError, UncheckedRevision};
use super::options::RevlogOpenOptions;
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103
Georges Racinet
rust-changelog: made doc-comments more consistent...
r51266 /// A specialized `Revlog` to work with changelog data format.
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 pub struct Changelog {
/// The generic `revlog` format.
Simon Sapin
rhg: centralize parsing of `--rev` CLI arguments...
r47162 pub(crate) revlog: Revlog,
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
impl Changelog {
/// Open the `changelog` of a repository given by its root.
Raphaël Gomès
rust-revlog: teach the revlog opening code to read the repo options...
r52084 pub fn open(
Raphaël Gomès
rust: add Vfs trait...
r52761 store_vfs: &VfsImpl,
Raphaël Gomès
rust-revlog: teach the revlog opening code to read the repo options...
r52084 options: RevlogOpenOptions,
) -> Result<Self, HgError> {
let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 Ok(Self { revlog })
}
Georges Racinet
rust-changelog: made doc-comments more consistent...
r51266 /// Return the `ChangelogRevisionData` for the given node ID.
Simon Sapin
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev...
r48783 pub fn data_for_node(
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 &self,
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 node: NodePrefix,
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 ) -> Result<ChangelogRevisionData, RevlogError> {
Simon Sapin
rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`...
r48782 let rev = self.revlog.rev_from_node(node)?;
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 self.entry(rev)?.data()
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
Georges Racinet
rust-changelog: introducing an intermediate `ChangelogEntry`...
r51268 /// Return the [`ChangelogEntry`] for the given revision number.
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 pub fn entry_for_unchecked_rev(
Martin von Zweigbergk
rust-revlog: add methods for getting parent revs and entries...
r49939 &self,
Raphaël Gomès
rust: use the new `UncheckedRevision` everywhere applicable...
r51870 rev: UncheckedRevision,
) -> Result<ChangelogEntry, RevlogError> {
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 let revlog_entry = self.revlog.get_entry_for_unchecked_rev(rev)?;
Raphaël Gomès
rust: use the new `UncheckedRevision` everywhere applicable...
r51870 Ok(ChangelogEntry { revlog_entry })
}
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 /// Same as [`Self::entry_for_unchecked_rev`] for a checked revision
fn entry(&self, rev: Revision) -> Result<ChangelogEntry, RevlogError> {
let revlog_entry = self.revlog.get_entry(rev)?;
Georges Racinet
rust-changelog: introducing an intermediate `ChangelogEntry`...
r51268 Ok(ChangelogEntry { revlog_entry })
Martin von Zweigbergk
rust-revlog: add methods for getting parent revs and entries...
r49939 }
Georges Racinet
rust-changelog: made doc-comments more consistent...
r51266 /// Return the [`ChangelogRevisionData`] for the given revision number.
Georges Racinet
rust-changelog: introducing an intermediate `ChangelogEntry`...
r51268 ///
/// This is a useful shortcut in case the caller does not need the
/// generic revlog information (parents, hashes etc). Otherwise
/// consider taking a [`ChangelogEntry`] with
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 /// [`Self::entry_for_unchecked_rev`] and doing everything from there.
pub fn data_for_unchecked_rev(
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 &self,
Raphaël Gomès
rust: use the new `UncheckedRevision` everywhere applicable...
r51870 rev: UncheckedRevision,
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 ) -> Result<ChangelogRevisionData, RevlogError> {
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 self.entry_for_unchecked_rev(rev)?.data()
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
Simon Sapin
rhg: `cat` command: print error messages for missing files...
r47478
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 pub fn node_from_unchecked_rev(
&self,
rev: UncheckedRevision,
) -> Option<&Node> {
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 self.revlog.node_from_rev(rev)
Simon Sapin
rhg: `cat` command: print error messages for missing files...
r47478 }
Martin von Zweigbergk
rust-revlog: add methods for getting parent revs and entries...
r49939
pub fn rev_from_node(
&self,
node: NodePrefix,
) -> Result<Revision, RevlogError> {
self.revlog.rev_from_node(node)
}
Georges Racinet
rust-changelog: accessing the index...
r52618
pub fn get_index(&self) -> &Index {
Raphaël Gomès
rust-revlog: add a Rust-only `InnerRevlog`...
r53057 self.revlog.index()
Georges Racinet
rust-changelog: accessing the index...
r52618 }
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
Raphaël Gomès
rust: implement the `Graph` trait for all revlogs...
r51871 impl Graph for Changelog {
fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
self.revlog.parents(rev)
}
}
Georges Racinet
rust-changelog: introducing an intermediate `ChangelogEntry`...
r51268 /// A specialized `RevlogEntry` for `changelog` data format
///
/// This is a `RevlogEntry` with the added semantics that the associated
/// data should meet the requirements for `changelog`, materialized by
/// the fact that `data()` constructs a `ChangelogRevisionData`.
/// In case that promise would be broken, the `data` method returns an error.
#[derive(Clone)]
pub struct ChangelogEntry<'changelog> {
/// Same data, as a generic `RevlogEntry`.
pub(crate) revlog_entry: RevlogEntry<'changelog>,
}
impl<'changelog> ChangelogEntry<'changelog> {
pub fn data<'a>(
&'a self,
) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
let bytes = self.revlog_entry.data()?;
if bytes.is_empty() {
Ok(ChangelogRevisionData::null())
} else {
Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
RevlogError::Other(HgError::CorruptedRepository(format!(
"Invalid changelog data for revision {}: {:?}",
self.revlog_entry.revision(),
err
)))
})?)
}
}
/// Obtain a reference to the underlying `RevlogEntry`.
///
/// This allows the caller to access the information that is common
/// to all revlog entries: revision number, node id, parent revisions etc.
pub fn as_revlog_entry(&self) -> &RevlogEntry {
&self.revlog_entry
}
Georges Racinet
rust-changelog: introduce ChangelogEntry parent entries accessors...
r51271
pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
Ok(self
.revlog_entry
.p1_entry()?
.map(|revlog_entry| Self { revlog_entry }))
}
pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
Ok(self
.revlog_entry
.p2_entry()?
.map(|revlog_entry| Self { revlog_entry }))
}
Georges Racinet
rust-changelog: introducing an intermediate `ChangelogEntry`...
r51268 }
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 #[derive(PartialEq)]
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 pub struct ChangelogRevisionData<'changelog> {
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 /// The data bytes of the `changelog` entry.
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 bytes: Cow<'changelog, [u8]>,
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 /// The end offset for the hex manifest (not including the newline)
manifest_end: usize,
/// The end offset for the user+email (not including the newline)
user_end: usize,
/// The end offset for the timestamp+timezone+extras (not including the
/// newline)
timestamp_end: usize,
/// The end offset for the file list (not including the newline)
files_end: usize,
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 impl<'changelog> ChangelogRevisionData<'changelog> {
fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 let mut line_iter = bytes.split(|b| b == &b'\n');
let manifest_end = line_iter
.next()
.expect("Empty iterator from split()?")
.len();
let user_slice = line_iter.next().ok_or_else(|| {
HgError::corrupted("Changeset data truncated after manifest line")
})?;
let user_end = manifest_end + 1 + user_slice.len();
let timestamp_slice = line_iter.next().ok_or_else(|| {
HgError::corrupted("Changeset data truncated after user line")
})?;
let timestamp_end = user_end + 1 + timestamp_slice.len();
let mut files_end = timestamp_end + 1;
loop {
let line = line_iter.next().ok_or_else(|| {
HgError::corrupted("Changeset data truncated in files list")
})?;
if line.is_empty() {
if files_end == bytes.len() {
// The list of files ended with a single newline (there
// should be two)
return Err(HgError::corrupted(
"Changeset data truncated after files list",
));
}
files_end -= 1;
break;
}
files_end += line.len() + 1;
}
Ok(Self {
bytes,
manifest_end,
user_end,
timestamp_end,
files_end,
})
Martin von Zweigbergk
rust-changelog: remove special parsing of empty changelog data for null rev...
r49937 }
fn null() -> Self {
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 Self::new(Cow::Borrowed(
b"0000000000000000000000000000000000000000\n\n0 0\n\n",
))
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 .unwrap()
Martin von Zweigbergk
rust-changelog: remove special parsing of empty changelog data for null rev...
r49937 }
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 /// Return an iterator over the lines of the entry.
pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
Martin von Zweigbergk
rust-changelog: don't skip empty lines when iterating over changeset lines...
r49936 self.bytes.split(|b| b == &b'\n')
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
/// Return the node id of the `manifest` referenced by this `changelog`
/// entry.
Simon Sapin
rhg: Reuse manifest when checking status of multiple ambiguous files...
r48778 pub fn manifest_node(&self) -> Result<Node, HgError> {
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 let manifest_node_hex = &self.bytes[..self.manifest_end];
Martin von Zweigbergk
rust-changelog: remove special parsing of empty changelog data for null rev...
r49937 Node::from_hex_for_repo(manifest_node_hex)
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938
/// The full user string (usually a name followed by an email enclosed in
/// angle brackets)
pub fn user(&self) -> &[u8] {
&self.bytes[self.manifest_end + 1..self.user_end]
}
/// The full timestamp line (timestamp in seconds, offset in seconds, and
/// possibly extras)
// TODO: We should expose this in a more useful way
pub fn timestamp_line(&self) -> &[u8] {
&self.bytes[self.user_end + 1..self.timestamp_end]
}
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 /// Parsed timestamp.
pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
parse_timestamp(self.timestamp_line())
}
/// Optional commit extras.
pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
parse_timestamp_line_extra(self.timestamp_line())
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 }
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 /// The files changed in this revision.
pub fn files(&self) -> impl Iterator<Item = &HgPath> {
Arun Kulshreshtha
rust-changelog: don't panic on empty file lists
r52256 if self.timestamp_end == self.files_end {
Either::Left(iter::empty())
} else {
Either::Right(
self.bytes[self.timestamp_end + 1..self.files_end]
.split(|b| b == &b'\n')
.map(HgPath::new),
)
}
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 }
/// The change description.
pub fn description(&self) -> &[u8] {
&self.bytes[self.files_end + 2..]
}
Antoine Cezar
hg-core: add `Changlog` a specialized `Revlog`...
r46103 }
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 impl Debug for ChangelogRevisionData<'_> {
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ChangelogRevisionData")
.field("bytes", &debug_bytes(&self.bytes))
.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
.field(
"user",
&debug_bytes(
&self.bytes[self.manifest_end + 1..self.user_end],
),
)
.field(
"timestamp",
&debug_bytes(
&self.bytes[self.user_end + 1..self.timestamp_end],
),
)
.field(
"files",
&debug_bytes(
&self.bytes[self.timestamp_end + 1..self.files_end],
),
)
.field(
"description",
&debug_bytes(&self.bytes[self.files_end + 2..]),
)
.finish()
}
}
fn debug_bytes(bytes: &[u8]) -> String {
String::from_utf8_lossy(
&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
)
.to_string()
}
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 /// Parse the raw bytes of the timestamp line from a changelog entry.
///
/// According to the documentation in `hg help dates` and the
/// implementation in `changelog.py`, the format of the timestamp line
/// is `time tz extra\n` where:
///
/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
/// as seconds since the UNIX epoch.
///
/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
/// seconds WEST of UTC (so negative for timezones east of UTC, which is the
/// opposite of the sign in ISO 8601 timestamps).
///
/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
/// and value in each pair separated by an ASCII colon. Keys are limited to
/// ASCII letters, digits, hyphens, and underscores, whereas values can be
/// arbitrary bytes.
fn parse_timestamp(
timestamp_line: &[u8],
) -> Result<DateTime<FixedOffset>, HgError> {
let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 let timestamp_bytes = parts
.next()
.ok_or_else(|| HgError::corrupted("missing timestamp"))?;
let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
})?;
let timestamp_utc = timestamp_str
.parse()
.map_err(|e| {
HgError::corrupted(format!("failed to parse timestamp: {e}"))
})
.and_then(|secs| {
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 DateTime::from_timestamp(secs, 0).ok_or_else(|| {
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 HgError::corrupted(format!(
"integer timestamp out of valid range: {secs}"
))
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 })
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 })
// Attempt to parse the timestamp as a float if we can't parse
// it as an int. It doesn't seem like float timestamps are actually
// used in practice, but the Python code supports them.
.or_else(|_| parse_float_timestamp(timestamp_str))?;
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 let timezone_bytes = parts
.next()
.ok_or_else(|| HgError::corrupted("missing timezone"))?;
let timezone_secs: i32 = str::from_utf8(timezone_bytes)
.map_err(|e| {
HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
})?
.parse()
.map_err(|e| {
HgError::corrupted(format!("timezone is not an integer: {e}"))
})?;
let timezone = FixedOffset::west_opt(timezone_secs)
.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284
Raphaël Gomès
rust-changelog: switch away from deprecated APIs for datetime use...
r53061 Ok(timestamp_utc.with_timezone(&timezone))
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 }
/// Attempt to parse the given string as floating-point timestamp, and
/// convert the result into a `chrono::NaiveDateTime`.
fn parse_float_timestamp(
timestamp_str: &str,
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 ) -> Result<DateTime<Utc>, HgError> {
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
HgError::corrupted(format!("failed to parse timestamp: {e}"))
})?;
// To construct a `NaiveDateTime` we'll need to convert the float
// into signed integer seconds and unsigned integer nanoseconds.
let mut secs = timestamp.trunc() as i64;
let mut subsecs = timestamp.fract();
// If the timestamp is negative, we need to express the fractional
// component as positive nanoseconds since the previous second.
if timestamp < 0.0 {
secs -= 1;
subsecs += 1.0;
}
// This cast should be safe because the fractional component is
// by definition less than 1.0, so this value should not exceed
// 1 billion, which is representable as an f64 without loss of
// precision and should fit into a u32 without overflowing.
//
// (Any loss of precision in the fractional component will have
// already happened at the time of initial parsing; in general,
// f64s are insufficiently precise to provide nanosecond-level
// precision with present-day timestamps.)
let nsecs = (subsecs * 1_000_000_000.0) as u32;
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 DateTime::from_timestamp(secs, nsecs).ok_or_else(|| {
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 HgError::corrupted(format!(
"float timestamp out of valid range: {timestamp}"
))
})
}
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 /// Decode changeset extra fields.
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 ///
/// Extras are null-delimited key-value pairs where the key consists of ASCII
/// alphanumeric characters plus hyphens and underscores, and the value can
/// contain arbitrary bytes.
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 extra
.split(|c| *c == b'\0')
.map(|pair| {
let pair = unescape_extra(pair);
let mut iter = pair.splitn(2, |c| *c == b':');
let key_bytes =
iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
HgError::corrupted("empty key in changeset extras")
})?;
let key = str::from_utf8(key_bytes)
.ok()
.filter(|k| {
k.chars().all(|c| {
c.is_ascii_alphanumeric() || c == '_' || c == '-'
})
})
.ok_or_else(|| {
let key = String::from_utf8_lossy(key_bytes);
HgError::corrupted(format!(
"invalid key in changeset extras: {key}",
))
})?
.to_string();
let value = iter.next().map(Into::into).ok_or_else(|| {
HgError::corrupted(format!(
"missing value for changeset extra: {key}"
))
})?;
Ok((key, value))
})
.collect()
}
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 /// Parse the extra fields from a changeset's timestamp line.
fn parse_timestamp_line_extra(
timestamp_line: &[u8],
) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
Ok(timestamp_line
.splitn(3, |c| *c == b' ')
.nth(2)
.map(decode_extra)
.transpose()?
.unwrap_or_default())
}
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 /// Decode Mercurial's escaping for changelog extras.
///
/// The `_string_escape` function in `changelog.py` only escapes 4 characters
/// (null, backslash, newline, and carriage return) so we only decode those.
///
/// The Python code also includes a workaround for decoding escaped nuls
/// that are followed by an ASCII octal digit, since Python's built-in
/// `string_escape` codec will interpret that as an escaped octal byte value.
/// That workaround is omitted here since we don't support decoding octal.
fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
let mut output = Vec::with_capacity(bytes.len());
let mut input = bytes.iter().copied();
while let Some(c) = input.next() {
if c != b'\\' {
output.push(c);
continue;
}
match input.next() {
Some(b'0') => output.push(b'\0'),
Some(b'\\') => output.push(b'\\'),
Some(b'n') => output.push(b'\n'),
Some(b'r') => output.push(b'\r'),
// The following cases should never occur in theory because any
// backslashes in the original input should have been escaped
// with another backslash, so it should not be possible to
// observe an escape sequence other than the 4 above.
Some(c) => output.extend_from_slice(&[b'\\', c]),
None => output.push(b'\\'),
}
}
output
}
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 #[cfg(test)]
mod tests {
use super::*;
Raphaël Gomès
rust: add Vfs trait...
r52761 use crate::vfs::VfsImpl;
Raphaël Gomès
rust-revlog: introduce an `options` module...
r53053 use crate::NULL_REVISION;
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 use pretty_assertions::assert_eq;
#[test]
fn test_create_changelogrevisiondata_invalid() {
// Completely empty
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 // No newline after manifest
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 // No newline after user
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 // No newline after timestamp
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 assert!(
ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
);
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 // Missing newline after files
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 assert!(ChangelogRevisionData::new(Cow::Borrowed(
b"abcd\n\n0 0\nfile1\nfile2"
))
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 .is_err(),);
// Only one newline after files
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 assert!(ChangelogRevisionData::new(Cow::Borrowed(
b"abcd\n\n0 0\nfile1\nfile2\n"
))
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 .is_err(),);
}
#[test]
fn test_create_changelogrevisiondata() {
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 let data = ChangelogRevisionData::new(Cow::Borrowed(
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 b"0123456789abcdef0123456789abcdef01234567
Some One <someone@example.com>
0 0
file1
file2
some
commit
Martin von Zweigbergk
changelog: avoid copying changeset data into `ChangesetRevisionData`...
r49987 message",
))
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 .unwrap();
assert_eq!(
data.manifest_node().unwrap(),
Node::from_hex("0123456789abcdef0123456789abcdef01234567")
.unwrap()
);
assert_eq!(data.user(), b"Some One <someone@example.com>");
assert_eq!(data.timestamp_line(), b"0 0");
assert_eq!(
data.files().collect_vec(),
vec![HgPath::new("file1"), HgPath::new("file2")]
);
assert_eq!(data.description(), b"some\ncommit\nmessage");
}
Georges Racinet
rust-changelog: added a test for `NULL_REVISION` special case...
r51267
#[test]
fn test_data_from_rev_null() -> Result<(), RevlogError> {
// an empty revlog will be enough for this case
let temp = tempfile::tempdir().unwrap();
Raphaël Gomès
hg-core: add a complete VFS...
r53064 let vfs = VfsImpl::new(temp.path().to_owned(), false);
Georges Racinet
rust-changelog: added a test for `NULL_REVISION` special case...
r51267 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
Raphaël Gomès
rust-revlog: introduce an `options` module...
r53053 let revlog =
Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
.unwrap();
Georges Racinet
rust-changelog: added a test for `NULL_REVISION` special case...
r51267
let changelog = Changelog { revlog };
assert_eq!(
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 changelog.data_for_unchecked_rev(NULL_REVISION.into())?,
Georges Racinet
rust-changelog: added a test for `NULL_REVISION` special case...
r51267 ChangelogRevisionData::null()
);
Georges Racinet
rust-revlog: fix RevlogEntry.data() for NULL_REVISION...
r51639 // same with the intermediate entry object
assert_eq!(
Raphaël Gomès
rust: normalize `_for_unchecked_rev` naming among revlogs and the index...
r53187 changelog
.entry_for_unchecked_rev(NULL_REVISION.into())?
.data()?,
Georges Racinet
rust-revlog: fix RevlogEntry.data() for NULL_REVISION...
r51639 ChangelogRevisionData::null()
);
Georges Racinet
rust-changelog: added a test for `NULL_REVISION` special case...
r51267 Ok(())
}
Arun Kulshreshtha
rust-changelog: don't panic on empty file lists
r52256
#[test]
fn test_empty_files_list() {
assert!(ChangelogRevisionData::null()
.files()
.collect_vec()
.is_empty());
}
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284
#[test]
fn test_unescape_basic() {
// '\0', '\\', '\n', and '\r' are correctly unescaped.
let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
let unescaped = unescape_extra(escaped);
assert_eq!(&expected[..], &unescaped[..]);
}
#[test]
fn test_unescape_unsupported_sequence() {
// Other escape sequences are left unaltered.
for c in 0u8..255 {
match c {
b'0' | b'\\' | b'n' | b'r' => continue,
c => {
let expected = &[b'\\', c][..];
let unescaped = unescape_extra(expected);
assert_eq!(expected, &unescaped[..]);
}
}
}
}
#[test]
fn test_unescape_trailing_backslash() {
// Trailing backslashes are OK.
let expected = br"hi\";
let unescaped = unescape_extra(expected);
assert_eq!(&expected[..], &unescaped[..]);
}
#[test]
fn test_unescape_nul_followed_by_octal() {
// Escaped NUL chars followed by octal digits are decoded correctly.
Raphaël Gomès
rust: apply clippy lints...
r52600 let expected = b"\x0012";
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 let escaped = br"\012";
let unescaped = unescape_extra(escaped);
assert_eq!(&expected[..], &unescaped[..]);
}
#[test]
fn test_parse_float_timestamp() {
let test_cases = [
// Zero should map to the UNIX epoch.
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 ("0.0", "1970-01-01 00:00:00 UTC"),
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 // Negative zero should be the same as positive zero.
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 ("-0.0", "1970-01-01 00:00:00 UTC"),
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 // Values without fractional components should work like integers.
// (Assuming the timestamp is within the limits of f64 precision.)
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 ("1115154970.0", "2005-05-03 21:16:10 UTC"),
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 // We expect some loss of precision in the fractional component
// when parsing arbitrary floating-point values.
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 ("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"),
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 // But representable f64 values should parse losslessly.
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 ("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"),
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 // Negative fractional components are subtracted from the epoch.
Arseniy Alekseyev
rust: fix the deprecation warning in NaiveDateTime::from_timestamp...
r52810 ("-1.333", "1969-12-31 23:59:58.667 UTC"),
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 ];
for (input, expected) in test_cases {
let res = parse_float_timestamp(input).unwrap().to_string();
assert_eq!(res, expected);
}
}
fn escape_extra(bytes: &[u8]) -> Vec<u8> {
let mut output = Vec::with_capacity(bytes.len());
for c in bytes.iter().copied() {
output.extend_from_slice(match c {
b'\0' => &b"\\0"[..],
b'\\' => &b"\\\\"[..],
b'\n' => &b"\\n"[..],
b'\r' => &b"\\r"[..],
_ => {
output.push(c);
continue;
}
});
}
output
}
fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
where
K: AsRef<[u8]>,
V: AsRef<[u8]>,
{
let extras = pairs.into_iter().map(|(k, v)| {
escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
});
// Use fully-qualified syntax to avoid a future naming conflict with
// the standard library: https://github.com/rust-lang/rust/issues/79524
Itertools::intersperse(extras, b"\0".to_vec()).concat()
}
#[test]
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 fn test_decode_extra() {
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 let extra = [
("branch".into(), b"default".to_vec()),
("key-with-hyphens".into(), b"value1".to_vec()),
("key_with_underscores".into(), b"value2".to_vec()),
("empty-value".into(), b"".to_vec()),
("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
]
.into_iter()
.collect::<BTreeMap<String, Vec<u8>>>();
let encoded = encode_extra(&extra);
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 let decoded = decode_extra(&encoded).unwrap();
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 assert_eq!(extra, decoded);
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 }
#[test]
fn test_corrupt_extra() {
let test_cases = [
(&b""[..], "empty input"),
(&b"\0"[..], "unexpected null byte"),
(&b":empty-key"[..], "empty key"),
(&b"\0leading-null:"[..], "leading null"),
(&b"trailing-null:\0"[..], "trailing null"),
(&b"missing-value"[..], "missing value"),
(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
];
for (extra, msg) in test_cases {
assert!(
Raphaël Gomès
rust: apply clippy lints...
r52600 decode_extra(extra).is_err(),
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 "corrupt extra should have failed to parse: {}",
msg
);
}
}
#[test]
fn test_parse_timestamp_line() {
let extra = [
("branch".into(), b"default".to_vec()),
("key-with-hyphens".into(), b"value1".to_vec()),
("key_with_underscores".into(), b"value2".to_vec()),
("empty-value".into(), b"".to_vec()),
("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
]
.into_iter()
.collect::<BTreeMap<String, Vec<u8>>>();
let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
line.extend_from_slice(&encode_extra(&extra));
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 let timestamp = parse_timestamp(&line).unwrap();
assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284
Arun Kulshreshtha
hg-core: separate timestamp and extra methods
r52286 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
assert_eq!(extra, parsed_extra);
Arun Kulshreshtha
hg-core: implement timestamp line parsing
r52284 }
Martin von Zweigbergk
rust-changelog: start parsing changeset data...
r49938 }