changelog.rs
767 lines
| 25.6 KiB
| application/rls-services+xml
|
RustLexer
Arun Kulshreshtha
|
r52284 | use std::ascii::escape_default; | ||
use std::borrow::Cow; | ||||
use std::collections::BTreeMap; | ||||
use std::fmt::{Debug, Formatter}; | ||||
use std::{iter, str}; | ||||
Arseniy Alekseyev
|
r52810 | use chrono::{DateTime, FixedOffset, Utc}; | ||
Arun Kulshreshtha
|
r52284 | use itertools::{Either, Itertools}; | ||
Simon Sapin
|
r47172 | use crate::errors::HgError; | ||
Georges Racinet
|
r52618 | use crate::revlog::Index; | ||
Georges Racinet
|
r51640 | use crate::revlog::Revision; | ||
Simon Sapin
|
r47478 | use crate::revlog::{Node, NodePrefix}; | ||
Raphaël Gomès
|
r50832 | use crate::revlog::{Revlog, RevlogEntry, RevlogError}; | ||
Martin von Zweigbergk
|
r49938 | use crate::utils::hg_path::HgPath; | ||
Raphaël Gomès
|
r52761 | use crate::vfs::VfsImpl; | ||
Raphaël Gomès
|
r52084 | use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision}; | ||
Antoine Cezar
|
r46103 | |||
Georges Racinet
|
r51266 | /// A specialized `Revlog` to work with changelog data format. | ||
Antoine Cezar
|
r46103 | pub struct Changelog { | ||
/// The generic `revlog` format. | ||||
Simon Sapin
|
r47162 | pub(crate) revlog: Revlog, | ||
Antoine Cezar
|
r46103 | } | ||
impl Changelog { | ||||
/// Open the `changelog` of a repository given by its root. | ||||
Raphaël Gomès
|
r52084 | pub fn open( | ||
Raphaël Gomès
|
r52761 | store_vfs: &VfsImpl, | ||
Raphaël Gomès
|
r52084 | options: RevlogOpenOptions, | ||
) -> Result<Self, HgError> { | ||||
let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?; | ||||
Antoine Cezar
|
r46103 | Ok(Self { revlog }) | ||
} | ||||
Georges Racinet
|
r51266 | /// Return the `ChangelogRevisionData` for the given node ID. | ||
Simon Sapin
|
r48783 | pub fn data_for_node( | ||
Antoine Cezar
|
r46103 | &self, | ||
Simon Sapin
|
r47160 | node: NodePrefix, | ||
Simon Sapin
|
r49372 | ) -> Result<ChangelogRevisionData, RevlogError> { | ||
Simon Sapin
|
r48782 | let rev = self.revlog.rev_from_node(node)?; | ||
Raphaël Gomès
|
r51870 | self.entry_for_checked_rev(rev)?.data() | ||
Antoine Cezar
|
r46103 | } | ||
Georges Racinet
|
r51268 | /// Return the [`ChangelogEntry`] for the given revision number. | ||
Martin von Zweigbergk
|
r49939 | pub fn entry_for_rev( | ||
&self, | ||||
Raphaël Gomès
|
r51870 | rev: UncheckedRevision, | ||
) -> Result<ChangelogEntry, RevlogError> { | ||||
let revlog_entry = self.revlog.get_entry(rev)?; | ||||
Ok(ChangelogEntry { revlog_entry }) | ||||
} | ||||
/// Same as [`Self::entry_for_rev`] for checked revisions. | ||||
fn entry_for_checked_rev( | ||||
&self, | ||||
Martin von Zweigbergk
|
r49939 | rev: Revision, | ||
Georges Racinet
|
r51268 | ) -> Result<ChangelogEntry, RevlogError> { | ||
Raphaël Gomès
|
r51870 | let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?; | ||
Georges Racinet
|
r51268 | Ok(ChangelogEntry { revlog_entry }) | ||
Martin von Zweigbergk
|
r49939 | } | ||
Georges Racinet
|
r51266 | /// Return the [`ChangelogRevisionData`] for the given revision number. | ||
Georges Racinet
|
r51268 | /// | ||
/// This is a useful shortcut in case the caller does not need the | ||||
/// generic revlog information (parents, hashes etc). Otherwise | ||||
/// consider taking a [`ChangelogEntry`] with | ||||
/// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there. | ||||
Simon Sapin
|
r48783 | pub fn data_for_rev( | ||
Antoine Cezar
|
r46103 | &self, | ||
Raphaël Gomès
|
r51870 | rev: UncheckedRevision, | ||
Simon Sapin
|
r49372 | ) -> Result<ChangelogRevisionData, RevlogError> { | ||
Georges Racinet
|
r51268 | self.entry_for_rev(rev)?.data() | ||
Antoine Cezar
|
r46103 | } | ||
Simon Sapin
|
r47478 | |||
Raphaël Gomès
|
r51870 | pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { | ||
Simon Sapin
|
r48781 | self.revlog.node_from_rev(rev) | ||
Simon Sapin
|
r47478 | } | ||
Martin von Zweigbergk
|
r49939 | |||
pub fn rev_from_node( | ||||
&self, | ||||
node: NodePrefix, | ||||
) -> Result<Revision, RevlogError> { | ||||
self.revlog.rev_from_node(node) | ||||
} | ||||
Georges Racinet
|
r52618 | |||
pub fn get_index(&self) -> &Index { | ||||
&self.revlog.index | ||||
} | ||||
Antoine Cezar
|
r46103 | } | ||
Raphaël Gomès
|
r51871 | impl Graph for Changelog { | ||
fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { | ||||
self.revlog.parents(rev) | ||||
} | ||||
} | ||||
Georges Racinet
|
r51268 | /// A specialized `RevlogEntry` for `changelog` data format | ||
/// | ||||
/// This is a `RevlogEntry` with the added semantics that the associated | ||||
/// data should meet the requirements for `changelog`, materialized by | ||||
/// the fact that `data()` constructs a `ChangelogRevisionData`. | ||||
/// In case that promise would be broken, the `data` method returns an error. | ||||
#[derive(Clone)] | ||||
pub struct ChangelogEntry<'changelog> { | ||||
/// Same data, as a generic `RevlogEntry`. | ||||
pub(crate) revlog_entry: RevlogEntry<'changelog>, | ||||
} | ||||
impl<'changelog> ChangelogEntry<'changelog> { | ||||
pub fn data<'a>( | ||||
&'a self, | ||||
) -> Result<ChangelogRevisionData<'changelog>, RevlogError> { | ||||
let bytes = self.revlog_entry.data()?; | ||||
if bytes.is_empty() { | ||||
Ok(ChangelogRevisionData::null()) | ||||
} else { | ||||
Ok(ChangelogRevisionData::new(bytes).map_err(|err| { | ||||
RevlogError::Other(HgError::CorruptedRepository(format!( | ||||
"Invalid changelog data for revision {}: {:?}", | ||||
self.revlog_entry.revision(), | ||||
err | ||||
))) | ||||
})?) | ||||
} | ||||
} | ||||
/// Obtain a reference to the underlying `RevlogEntry`. | ||||
/// | ||||
/// This allows the caller to access the information that is common | ||||
/// to all revlog entries: revision number, node id, parent revisions etc. | ||||
pub fn as_revlog_entry(&self) -> &RevlogEntry { | ||||
&self.revlog_entry | ||||
} | ||||
Georges Racinet
|
r51271 | |||
pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { | ||||
Ok(self | ||||
.revlog_entry | ||||
.p1_entry()? | ||||
.map(|revlog_entry| Self { revlog_entry })) | ||||
} | ||||
pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { | ||||
Ok(self | ||||
.revlog_entry | ||||
.p2_entry()? | ||||
.map(|revlog_entry| Self { revlog_entry })) | ||||
} | ||||
Georges Racinet
|
r51268 | } | ||
Antoine Cezar
|
r46103 | /// `Changelog` entry which knows how to interpret the `changelog` data bytes. | ||
Martin von Zweigbergk
|
r49938 | #[derive(PartialEq)] | ||
Martin von Zweigbergk
|
r49987 | pub struct ChangelogRevisionData<'changelog> { | ||
Antoine Cezar
|
r46103 | /// The data bytes of the `changelog` entry. | ||
Martin von Zweigbergk
|
r49987 | bytes: Cow<'changelog, [u8]>, | ||
Martin von Zweigbergk
|
r49938 | /// The end offset for the hex manifest (not including the newline) | ||
manifest_end: usize, | ||||
/// The end offset for the user+email (not including the newline) | ||||
user_end: usize, | ||||
/// The end offset for the timestamp+timezone+extras (not including the | ||||
/// newline) | ||||
timestamp_end: usize, | ||||
/// The end offset for the file list (not including the newline) | ||||
files_end: usize, | ||||
Antoine Cezar
|
r46103 | } | ||
Martin von Zweigbergk
|
r49987 | impl<'changelog> ChangelogRevisionData<'changelog> { | ||
fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> { | ||||
Martin von Zweigbergk
|
r49938 | let mut line_iter = bytes.split(|b| b == &b'\n'); | ||
let manifest_end = line_iter | ||||
.next() | ||||
.expect("Empty iterator from split()?") | ||||
.len(); | ||||
let user_slice = line_iter.next().ok_or_else(|| { | ||||
HgError::corrupted("Changeset data truncated after manifest line") | ||||
})?; | ||||
let user_end = manifest_end + 1 + user_slice.len(); | ||||
let timestamp_slice = line_iter.next().ok_or_else(|| { | ||||
HgError::corrupted("Changeset data truncated after user line") | ||||
})?; | ||||
let timestamp_end = user_end + 1 + timestamp_slice.len(); | ||||
let mut files_end = timestamp_end + 1; | ||||
loop { | ||||
let line = line_iter.next().ok_or_else(|| { | ||||
HgError::corrupted("Changeset data truncated in files list") | ||||
})?; | ||||
if line.is_empty() { | ||||
if files_end == bytes.len() { | ||||
// The list of files ended with a single newline (there | ||||
// should be two) | ||||
return Err(HgError::corrupted( | ||||
"Changeset data truncated after files list", | ||||
)); | ||||
} | ||||
files_end -= 1; | ||||
break; | ||||
} | ||||
files_end += line.len() + 1; | ||||
} | ||||
Ok(Self { | ||||
bytes, | ||||
manifest_end, | ||||
user_end, | ||||
timestamp_end, | ||||
files_end, | ||||
}) | ||||
Martin von Zweigbergk
|
r49937 | } | ||
fn null() -> Self { | ||||
Martin von Zweigbergk
|
r49987 | Self::new(Cow::Borrowed( | ||
b"0000000000000000000000000000000000000000\n\n0 0\n\n", | ||||
)) | ||||
Martin von Zweigbergk
|
r49938 | .unwrap() | ||
Martin von Zweigbergk
|
r49937 | } | ||
Antoine Cezar
|
r46103 | /// Return an iterator over the lines of the entry. | ||
pub fn lines(&self) -> impl Iterator<Item = &[u8]> { | ||||
Martin von Zweigbergk
|
r49936 | self.bytes.split(|b| b == &b'\n') | ||
Antoine Cezar
|
r46103 | } | ||
/// Return the node id of the `manifest` referenced by this `changelog` | ||||
/// entry. | ||||
Simon Sapin
|
r48778 | pub fn manifest_node(&self) -> Result<Node, HgError> { | ||
Martin von Zweigbergk
|
r49938 | let manifest_node_hex = &self.bytes[..self.manifest_end]; | ||
Martin von Zweigbergk
|
r49937 | Node::from_hex_for_repo(manifest_node_hex) | ||
Antoine Cezar
|
r46103 | } | ||
Martin von Zweigbergk
|
r49938 | |||
/// The full user string (usually a name followed by an email enclosed in | ||||
/// angle brackets) | ||||
pub fn user(&self) -> &[u8] { | ||||
&self.bytes[self.manifest_end + 1..self.user_end] | ||||
} | ||||
/// The full timestamp line (timestamp in seconds, offset in seconds, and | ||||
/// possibly extras) | ||||
// TODO: We should expose this in a more useful way | ||||
pub fn timestamp_line(&self) -> &[u8] { | ||||
&self.bytes[self.user_end + 1..self.timestamp_end] | ||||
} | ||||
Arun Kulshreshtha
|
r52286 | /// Parsed timestamp. | ||
pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> { | ||||
parse_timestamp(self.timestamp_line()) | ||||
} | ||||
/// Optional commit extras. | ||||
pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> { | ||||
parse_timestamp_line_extra(self.timestamp_line()) | ||||
Arun Kulshreshtha
|
r52284 | } | ||
Martin von Zweigbergk
|
r49938 | /// The files changed in this revision. | ||
pub fn files(&self) -> impl Iterator<Item = &HgPath> { | ||||
Arun Kulshreshtha
|
r52256 | if self.timestamp_end == self.files_end { | ||
Either::Left(iter::empty()) | ||||
} else { | ||||
Either::Right( | ||||
self.bytes[self.timestamp_end + 1..self.files_end] | ||||
.split(|b| b == &b'\n') | ||||
.map(HgPath::new), | ||||
) | ||||
} | ||||
Martin von Zweigbergk
|
r49938 | } | ||
/// The change description. | ||||
pub fn description(&self) -> &[u8] { | ||||
&self.bytes[self.files_end + 2..] | ||||
} | ||||
Antoine Cezar
|
r46103 | } | ||
Martin von Zweigbergk
|
r49938 | |||
Martin von Zweigbergk
|
r49987 | impl Debug for ChangelogRevisionData<'_> { | ||
Martin von Zweigbergk
|
r49938 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | ||
f.debug_struct("ChangelogRevisionData") | ||||
.field("bytes", &debug_bytes(&self.bytes)) | ||||
.field("manifest", &debug_bytes(&self.bytes[..self.manifest_end])) | ||||
.field( | ||||
"user", | ||||
&debug_bytes( | ||||
&self.bytes[self.manifest_end + 1..self.user_end], | ||||
), | ||||
) | ||||
.field( | ||||
"timestamp", | ||||
&debug_bytes( | ||||
&self.bytes[self.user_end + 1..self.timestamp_end], | ||||
), | ||||
) | ||||
.field( | ||||
"files", | ||||
&debug_bytes( | ||||
&self.bytes[self.timestamp_end + 1..self.files_end], | ||||
), | ||||
) | ||||
.field( | ||||
"description", | ||||
&debug_bytes(&self.bytes[self.files_end + 2..]), | ||||
) | ||||
.finish() | ||||
} | ||||
} | ||||
fn debug_bytes(bytes: &[u8]) -> String { | ||||
String::from_utf8_lossy( | ||||
&bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(), | ||||
) | ||||
.to_string() | ||||
} | ||||
Arun Kulshreshtha
|
r52286 | /// Parse the raw bytes of the timestamp line from a changelog entry. | ||
/// | ||||
/// According to the documentation in `hg help dates` and the | ||||
/// implementation in `changelog.py`, the format of the timestamp line | ||||
/// is `time tz extra\n` where: | ||||
/// | ||||
/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp | ||||
/// as seconds since the UNIX epoch. | ||||
/// | ||||
/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting | ||||
/// seconds WEST of UTC (so negative for timezones east of UTC, which is the | ||||
/// opposite of the sign in ISO 8601 timestamps). | ||||
/// | ||||
/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key | ||||
/// and value in each pair separated by an ASCII colon. Keys are limited to | ||||
/// ASCII letters, digits, hyphens, and underscores, whereas values can be | ||||
/// arbitrary bytes. | ||||
fn parse_timestamp( | ||||
timestamp_line: &[u8], | ||||
) -> Result<DateTime<FixedOffset>, HgError> { | ||||
let mut parts = timestamp_line.splitn(3, |c| *c == b' '); | ||||
Arun Kulshreshtha
|
r52284 | |||
Arun Kulshreshtha
|
r52286 | let timestamp_bytes = parts | ||
.next() | ||||
.ok_or_else(|| HgError::corrupted("missing timestamp"))?; | ||||
let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| { | ||||
HgError::corrupted(format!("timestamp is not valid UTF-8: {e}")) | ||||
})?; | ||||
let timestamp_utc = timestamp_str | ||||
.parse() | ||||
.map_err(|e| { | ||||
HgError::corrupted(format!("failed to parse timestamp: {e}")) | ||||
}) | ||||
.and_then(|secs| { | ||||
Arseniy Alekseyev
|
r52810 | DateTime::from_timestamp(secs, 0).ok_or_else(|| { | ||
Arun Kulshreshtha
|
r52286 | HgError::corrupted(format!( | ||
"integer timestamp out of valid range: {secs}" | ||||
)) | ||||
Arun Kulshreshtha
|
r52284 | }) | ||
Arun Kulshreshtha
|
r52286 | }) | ||
// Attempt to parse the timestamp as a float if we can't parse | ||||
// it as an int. It doesn't seem like float timestamps are actually | ||||
// used in practice, but the Python code supports them. | ||||
.or_else(|_| parse_float_timestamp(timestamp_str))?; | ||||
Arun Kulshreshtha
|
r52284 | |||
Arun Kulshreshtha
|
r52286 | let timezone_bytes = parts | ||
.next() | ||||
.ok_or_else(|| HgError::corrupted("missing timezone"))?; | ||||
let timezone_secs: i32 = str::from_utf8(timezone_bytes) | ||||
.map_err(|e| { | ||||
HgError::corrupted(format!("timezone is not valid UTF-8: {e}")) | ||||
})? | ||||
.parse() | ||||
.map_err(|e| { | ||||
HgError::corrupted(format!("timezone is not an integer: {e}")) | ||||
})?; | ||||
let timezone = FixedOffset::west_opt(timezone_secs) | ||||
.ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?; | ||||
Arun Kulshreshtha
|
r52284 | |||
Arseniy Alekseyev
|
r52810 | Ok(DateTime::from_naive_utc_and_offset( | ||
timestamp_utc.naive_utc(), | ||||
timezone, | ||||
)) | ||||
Arun Kulshreshtha
|
r52284 | } | ||
/// Attempt to parse the given string as floating-point timestamp, and | ||||
/// convert the result into a `chrono::NaiveDateTime`. | ||||
fn parse_float_timestamp( | ||||
timestamp_str: &str, | ||||
Arseniy Alekseyev
|
r52810 | ) -> Result<DateTime<Utc>, HgError> { | ||
Arun Kulshreshtha
|
r52284 | let timestamp = timestamp_str.parse::<f64>().map_err(|e| { | ||
HgError::corrupted(format!("failed to parse timestamp: {e}")) | ||||
})?; | ||||
// To construct a `NaiveDateTime` we'll need to convert the float | ||||
// into signed integer seconds and unsigned integer nanoseconds. | ||||
let mut secs = timestamp.trunc() as i64; | ||||
let mut subsecs = timestamp.fract(); | ||||
// If the timestamp is negative, we need to express the fractional | ||||
// component as positive nanoseconds since the previous second. | ||||
if timestamp < 0.0 { | ||||
secs -= 1; | ||||
subsecs += 1.0; | ||||
} | ||||
// This cast should be safe because the fractional component is | ||||
// by definition less than 1.0, so this value should not exceed | ||||
// 1 billion, which is representable as an f64 without loss of | ||||
// precision and should fit into a u32 without overflowing. | ||||
// | ||||
// (Any loss of precision in the fractional component will have | ||||
// already happened at the time of initial parsing; in general, | ||||
// f64s are insufficiently precise to provide nanosecond-level | ||||
// precision with present-day timestamps.) | ||||
let nsecs = (subsecs * 1_000_000_000.0) as u32; | ||||
Arseniy Alekseyev
|
r52810 | DateTime::from_timestamp(secs, nsecs).ok_or_else(|| { | ||
Arun Kulshreshtha
|
r52284 | HgError::corrupted(format!( | ||
"float timestamp out of valid range: {timestamp}" | ||||
)) | ||||
}) | ||||
} | ||||
Arun Kulshreshtha
|
r52286 | /// Decode changeset extra fields. | ||
Arun Kulshreshtha
|
r52284 | /// | ||
/// Extras are null-delimited key-value pairs where the key consists of ASCII | ||||
/// alphanumeric characters plus hyphens and underscores, and the value can | ||||
/// contain arbitrary bytes. | ||||
Arun Kulshreshtha
|
r52286 | fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> { | ||
Arun Kulshreshtha
|
r52284 | extra | ||
.split(|c| *c == b'\0') | ||||
.map(|pair| { | ||||
let pair = unescape_extra(pair); | ||||
let mut iter = pair.splitn(2, |c| *c == b':'); | ||||
let key_bytes = | ||||
iter.next().filter(|k| !k.is_empty()).ok_or_else(|| { | ||||
HgError::corrupted("empty key in changeset extras") | ||||
})?; | ||||
let key = str::from_utf8(key_bytes) | ||||
.ok() | ||||
.filter(|k| { | ||||
k.chars().all(|c| { | ||||
c.is_ascii_alphanumeric() || c == '_' || c == '-' | ||||
}) | ||||
}) | ||||
.ok_or_else(|| { | ||||
let key = String::from_utf8_lossy(key_bytes); | ||||
HgError::corrupted(format!( | ||||
"invalid key in changeset extras: {key}", | ||||
)) | ||||
})? | ||||
.to_string(); | ||||
let value = iter.next().map(Into::into).ok_or_else(|| { | ||||
HgError::corrupted(format!( | ||||
"missing value for changeset extra: {key}" | ||||
)) | ||||
})?; | ||||
Ok((key, value)) | ||||
}) | ||||
.collect() | ||||
} | ||||
Arun Kulshreshtha
|
r52286 | /// Parse the extra fields from a changeset's timestamp line. | ||
fn parse_timestamp_line_extra( | ||||
timestamp_line: &[u8], | ||||
) -> Result<BTreeMap<String, Vec<u8>>, HgError> { | ||||
Ok(timestamp_line | ||||
.splitn(3, |c| *c == b' ') | ||||
.nth(2) | ||||
.map(decode_extra) | ||||
.transpose()? | ||||
.unwrap_or_default()) | ||||
} | ||||
Arun Kulshreshtha
|
r52284 | /// Decode Mercurial's escaping for changelog extras. | ||
/// | ||||
/// The `_string_escape` function in `changelog.py` only escapes 4 characters | ||||
/// (null, backslash, newline, and carriage return) so we only decode those. | ||||
/// | ||||
/// The Python code also includes a workaround for decoding escaped nuls | ||||
/// that are followed by an ASCII octal digit, since Python's built-in | ||||
/// `string_escape` codec will interpret that as an escaped octal byte value. | ||||
/// That workaround is omitted here since we don't support decoding octal. | ||||
fn unescape_extra(bytes: &[u8]) -> Vec<u8> { | ||||
let mut output = Vec::with_capacity(bytes.len()); | ||||
let mut input = bytes.iter().copied(); | ||||
while let Some(c) = input.next() { | ||||
if c != b'\\' { | ||||
output.push(c); | ||||
continue; | ||||
} | ||||
match input.next() { | ||||
Some(b'0') => output.push(b'\0'), | ||||
Some(b'\\') => output.push(b'\\'), | ||||
Some(b'n') => output.push(b'\n'), | ||||
Some(b'r') => output.push(b'\r'), | ||||
// The following cases should never occur in theory because any | ||||
// backslashes in the original input should have been escaped | ||||
// with another backslash, so it should not be possible to | ||||
// observe an escape sequence other than the 4 above. | ||||
Some(c) => output.extend_from_slice(&[b'\\', c]), | ||||
None => output.push(b'\\'), | ||||
} | ||||
} | ||||
output | ||||
} | ||||
Martin von Zweigbergk
|
r49938 | #[cfg(test)] | ||
mod tests { | ||||
use super::*; | ||||
Raphaël Gomès
|
r52761 | use crate::vfs::VfsImpl; | ||
Raphaël Gomès
|
r52760 | use crate::{ | ||
RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, | ||||
NULL_REVISION, | ||||
}; | ||||
Martin von Zweigbergk
|
r49938 | use pretty_assertions::assert_eq; | ||
#[test] | ||||
fn test_create_changelogrevisiondata_invalid() { | ||||
// Completely empty | ||||
Martin von Zweigbergk
|
r49987 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); | ||
Martin von Zweigbergk
|
r49938 | // No newline after manifest | ||
Martin von Zweigbergk
|
r49987 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); | ||
Martin von Zweigbergk
|
r49938 | // No newline after user | ||
Martin von Zweigbergk
|
r49987 | assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err()); | ||
Martin von Zweigbergk
|
r49938 | // No newline after timestamp | ||
Martin von Zweigbergk
|
r49987 | assert!( | ||
ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err() | ||||
); | ||||
Martin von Zweigbergk
|
r49938 | // Missing newline after files | ||
Martin von Zweigbergk
|
r49987 | assert!(ChangelogRevisionData::new(Cow::Borrowed( | ||
b"abcd\n\n0 0\nfile1\nfile2" | ||||
)) | ||||
Martin von Zweigbergk
|
r49938 | .is_err(),); | ||
// Only one newline after files | ||||
Martin von Zweigbergk
|
r49987 | assert!(ChangelogRevisionData::new(Cow::Borrowed( | ||
b"abcd\n\n0 0\nfile1\nfile2\n" | ||||
)) | ||||
Martin von Zweigbergk
|
r49938 | .is_err(),); | ||
} | ||||
#[test] | ||||
fn test_create_changelogrevisiondata() { | ||||
Martin von Zweigbergk
|
r49987 | let data = ChangelogRevisionData::new(Cow::Borrowed( | ||
Martin von Zweigbergk
|
r49938 | b"0123456789abcdef0123456789abcdef01234567 | ||
Some One <someone@example.com> | ||||
0 0 | ||||
file1 | ||||
file2 | ||||
some | ||||
commit | ||||
Martin von Zweigbergk
|
r49987 | message", | ||
)) | ||||
Martin von Zweigbergk
|
r49938 | .unwrap(); | ||
assert_eq!( | ||||
data.manifest_node().unwrap(), | ||||
Node::from_hex("0123456789abcdef0123456789abcdef01234567") | ||||
.unwrap() | ||||
); | ||||
assert_eq!(data.user(), b"Some One <someone@example.com>"); | ||||
assert_eq!(data.timestamp_line(), b"0 0"); | ||||
assert_eq!( | ||||
data.files().collect_vec(), | ||||
vec![HgPath::new("file1"), HgPath::new("file2")] | ||||
); | ||||
assert_eq!(data.description(), b"some\ncommit\nmessage"); | ||||
} | ||||
Georges Racinet
|
r51267 | |||
#[test] | ||||
fn test_data_from_rev_null() -> Result<(), RevlogError> { | ||||
// an empty revlog will be enough for this case | ||||
let temp = tempfile::tempdir().unwrap(); | ||||
Raphaël Gomès
|
r52761 | let vfs = VfsImpl { | ||
base: temp.path().to_owned(), | ||||
}; | ||||
Georges Racinet
|
r51267 | std::fs::write(temp.path().join("foo.i"), b"").unwrap(); | ||
Raphaël Gomès
|
r52760 | std::fs::write(temp.path().join("foo.d"), b"").unwrap(); | ||
let revlog = Revlog::open( | ||||
&vfs, | ||||
"foo.i", | ||||
None, | ||||
RevlogOpenOptions::new( | ||||
false, | ||||
RevlogDataConfig::default(), | ||||
RevlogDeltaConfig::default(), | ||||
RevlogFeatureConfig::default(), | ||||
), | ||||
) | ||||
.unwrap(); | ||||
Georges Racinet
|
r51267 | |||
let changelog = Changelog { revlog }; | ||||
assert_eq!( | ||||
Raphaël Gomès
|
r51870 | changelog.data_for_rev(NULL_REVISION.into())?, | ||
Georges Racinet
|
r51267 | ChangelogRevisionData::null() | ||
); | ||||
Georges Racinet
|
r51639 | // same with the intermediate entry object | ||
assert_eq!( | ||||
Raphaël Gomès
|
r51870 | changelog.entry_for_rev(NULL_REVISION.into())?.data()?, | ||
Georges Racinet
|
r51639 | ChangelogRevisionData::null() | ||
); | ||||
Georges Racinet
|
r51267 | Ok(()) | ||
} | ||||
Arun Kulshreshtha
|
r52256 | |||
#[test] | ||||
fn test_empty_files_list() { | ||||
assert!(ChangelogRevisionData::null() | ||||
.files() | ||||
.collect_vec() | ||||
.is_empty()); | ||||
} | ||||
Arun Kulshreshtha
|
r52284 | |||
#[test] | ||||
fn test_unescape_basic() { | ||||
// '\0', '\\', '\n', and '\r' are correctly unescaped. | ||||
let expected = b"AAA\0BBB\\CCC\nDDD\rEEE"; | ||||
let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE"; | ||||
let unescaped = unescape_extra(escaped); | ||||
assert_eq!(&expected[..], &unescaped[..]); | ||||
} | ||||
#[test] | ||||
fn test_unescape_unsupported_sequence() { | ||||
// Other escape sequences are left unaltered. | ||||
for c in 0u8..255 { | ||||
match c { | ||||
b'0' | b'\\' | b'n' | b'r' => continue, | ||||
c => { | ||||
let expected = &[b'\\', c][..]; | ||||
let unescaped = unescape_extra(expected); | ||||
assert_eq!(expected, &unescaped[..]); | ||||
} | ||||
} | ||||
} | ||||
} | ||||
#[test] | ||||
fn test_unescape_trailing_backslash() { | ||||
// Trailing backslashes are OK. | ||||
let expected = br"hi\"; | ||||
let unescaped = unescape_extra(expected); | ||||
assert_eq!(&expected[..], &unescaped[..]); | ||||
} | ||||
#[test] | ||||
fn test_unescape_nul_followed_by_octal() { | ||||
// Escaped NUL chars followed by octal digits are decoded correctly. | ||||
Raphaël Gomès
|
r52600 | let expected = b"\x0012"; | ||
Arun Kulshreshtha
|
r52284 | let escaped = br"\012"; | ||
let unescaped = unescape_extra(escaped); | ||||
assert_eq!(&expected[..], &unescaped[..]); | ||||
} | ||||
#[test] | ||||
fn test_parse_float_timestamp() { | ||||
let test_cases = [ | ||||
// Zero should map to the UNIX epoch. | ||||
Arseniy Alekseyev
|
r52810 | ("0.0", "1970-01-01 00:00:00 UTC"), | ||
Arun Kulshreshtha
|
r52284 | // Negative zero should be the same as positive zero. | ||
Arseniy Alekseyev
|
r52810 | ("-0.0", "1970-01-01 00:00:00 UTC"), | ||
Arun Kulshreshtha
|
r52284 | // Values without fractional components should work like integers. | ||
// (Assuming the timestamp is within the limits of f64 precision.) | ||||
Arseniy Alekseyev
|
r52810 | ("1115154970.0", "2005-05-03 21:16:10 UTC"), | ||
Arun Kulshreshtha
|
r52284 | // We expect some loss of precision in the fractional component | ||
// when parsing arbitrary floating-point values. | ||||
Arseniy Alekseyev
|
r52810 | ("1115154970.123456789", "2005-05-03 21:16:10.123456716 UTC"), | ||
Arun Kulshreshtha
|
r52284 | // But representable f64 values should parse losslessly. | ||
Arseniy Alekseyev
|
r52810 | ("1115154970.123456716", "2005-05-03 21:16:10.123456716 UTC"), | ||
Arun Kulshreshtha
|
r52284 | // Negative fractional components are subtracted from the epoch. | ||
Arseniy Alekseyev
|
r52810 | ("-1.333", "1969-12-31 23:59:58.667 UTC"), | ||
Arun Kulshreshtha
|
r52284 | ]; | ||
for (input, expected) in test_cases { | ||||
let res = parse_float_timestamp(input).unwrap().to_string(); | ||||
assert_eq!(res, expected); | ||||
} | ||||
} | ||||
fn escape_extra(bytes: &[u8]) -> Vec<u8> { | ||||
let mut output = Vec::with_capacity(bytes.len()); | ||||
for c in bytes.iter().copied() { | ||||
output.extend_from_slice(match c { | ||||
b'\0' => &b"\\0"[..], | ||||
b'\\' => &b"\\\\"[..], | ||||
b'\n' => &b"\\n"[..], | ||||
b'\r' => &b"\\r"[..], | ||||
_ => { | ||||
output.push(c); | ||||
continue; | ||||
} | ||||
}); | ||||
} | ||||
output | ||||
} | ||||
fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8> | ||||
where | ||||
K: AsRef<[u8]>, | ||||
V: AsRef<[u8]>, | ||||
{ | ||||
let extras = pairs.into_iter().map(|(k, v)| { | ||||
escape_extra(&[k.as_ref(), b":", v.as_ref()].concat()) | ||||
}); | ||||
// Use fully-qualified syntax to avoid a future naming conflict with | ||||
// the standard library: https://github.com/rust-lang/rust/issues/79524 | ||||
Itertools::intersperse(extras, b"\0".to_vec()).concat() | ||||
} | ||||
#[test] | ||||
Arun Kulshreshtha
|
r52286 | fn test_decode_extra() { | ||
Arun Kulshreshtha
|
r52284 | let extra = [ | ||
("branch".into(), b"default".to_vec()), | ||||
("key-with-hyphens".into(), b"value1".to_vec()), | ||||
("key_with_underscores".into(), b"value2".to_vec()), | ||||
("empty-value".into(), b"".to_vec()), | ||||
("binary-value".into(), (0u8..=255).collect::<Vec<_>>()), | ||||
] | ||||
.into_iter() | ||||
.collect::<BTreeMap<String, Vec<u8>>>(); | ||||
let encoded = encode_extra(&extra); | ||||
Arun Kulshreshtha
|
r52286 | let decoded = decode_extra(&encoded).unwrap(); | ||
Arun Kulshreshtha
|
r52284 | |||
Arun Kulshreshtha
|
r52286 | assert_eq!(extra, decoded); | ||
Arun Kulshreshtha
|
r52284 | } | ||
#[test] | ||||
fn test_corrupt_extra() { | ||||
let test_cases = [ | ||||
(&b""[..], "empty input"), | ||||
(&b"\0"[..], "unexpected null byte"), | ||||
(&b":empty-key"[..], "empty key"), | ||||
(&b"\0leading-null:"[..], "leading null"), | ||||
(&b"trailing-null:\0"[..], "trailing null"), | ||||
(&b"missing-value"[..], "missing value"), | ||||
(&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"), | ||||
(&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"), | ||||
]; | ||||
for (extra, msg) in test_cases { | ||||
assert!( | ||||
Raphaël Gomès
|
r52600 | decode_extra(extra).is_err(), | ||
Arun Kulshreshtha
|
r52284 | "corrupt extra should have failed to parse: {}", | ||
msg | ||||
); | ||||
} | ||||
} | ||||
#[test] | ||||
fn test_parse_timestamp_line() { | ||||
let extra = [ | ||||
("branch".into(), b"default".to_vec()), | ||||
("key-with-hyphens".into(), b"value1".to_vec()), | ||||
("key_with_underscores".into(), b"value2".to_vec()), | ||||
("empty-value".into(), b"".to_vec()), | ||||
("binary-value".into(), (0u8..=255).collect::<Vec<_>>()), | ||||
] | ||||
.into_iter() | ||||
.collect::<BTreeMap<String, Vec<u8>>>(); | ||||
let mut line: Vec<u8> = b"1115154970 28800 ".to_vec(); | ||||
line.extend_from_slice(&encode_extra(&extra)); | ||||
Arun Kulshreshtha
|
r52286 | let timestamp = parse_timestamp(&line).unwrap(); | ||
assert_eq!(×tamp.to_rfc3339(), "2005-05-03T13:16:10-08:00"); | ||||
Arun Kulshreshtha
|
r52284 | |||
Arun Kulshreshtha
|
r52286 | let parsed_extra = parse_timestamp_line_extra(&line).unwrap(); | ||
assert_eq!(extra, parsed_extra); | ||||
Arun Kulshreshtha
|
r52284 | } | ||
Martin von Zweigbergk
|
r49938 | } | ||