##// END OF EJS Templates
hg-core: separate timestamp and extra methods
Arun Kulshreshtha -
r52286:6603a144 default
parent child Browse files
Show More
@@ -1,746 +1,744 b''
1 1 use std::ascii::escape_default;
2 2 use std::borrow::Cow;
3 3 use std::collections::BTreeMap;
4 4 use std::fmt::{Debug, Formatter};
5 5 use std::{iter, str};
6 6
7 7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
8 8 use itertools::{Either, Itertools};
9 9
10 10 use crate::errors::HgError;
11 11 use crate::revlog::Revision;
12 12 use crate::revlog::{Node, NodePrefix};
13 13 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::vfs::Vfs;
16 16 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
17 17
18 18 /// A specialized `Revlog` to work with changelog data format.
19 19 pub struct Changelog {
20 20 /// The generic `revlog` format.
21 21 pub(crate) revlog: Revlog,
22 22 }
23 23
24 24 impl Changelog {
25 25 /// Open the `changelog` of a repository given by its root.
26 26 pub fn open(
27 27 store_vfs: &Vfs,
28 28 options: RevlogOpenOptions,
29 29 ) -> Result<Self, HgError> {
30 30 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
31 31 Ok(Self { revlog })
32 32 }
33 33
34 34 /// Return the `ChangelogRevisionData` for the given node ID.
35 35 pub fn data_for_node(
36 36 &self,
37 37 node: NodePrefix,
38 38 ) -> Result<ChangelogRevisionData, RevlogError> {
39 39 let rev = self.revlog.rev_from_node(node)?;
40 40 self.entry_for_checked_rev(rev)?.data()
41 41 }
42 42
43 43 /// Return the [`ChangelogEntry`] for the given revision number.
44 44 pub fn entry_for_rev(
45 45 &self,
46 46 rev: UncheckedRevision,
47 47 ) -> Result<ChangelogEntry, RevlogError> {
48 48 let revlog_entry = self.revlog.get_entry(rev)?;
49 49 Ok(ChangelogEntry { revlog_entry })
50 50 }
51 51
52 52 /// Same as [`Self::entry_for_rev`] for checked revisions.
53 53 fn entry_for_checked_rev(
54 54 &self,
55 55 rev: Revision,
56 56 ) -> Result<ChangelogEntry, RevlogError> {
57 57 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
58 58 Ok(ChangelogEntry { revlog_entry })
59 59 }
60 60
61 61 /// Return the [`ChangelogRevisionData`] for the given revision number.
62 62 ///
63 63 /// This is a useful shortcut in case the caller does not need the
64 64 /// generic revlog information (parents, hashes etc). Otherwise
65 65 /// consider taking a [`ChangelogEntry`] with
66 66 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
67 67 pub fn data_for_rev(
68 68 &self,
69 69 rev: UncheckedRevision,
70 70 ) -> Result<ChangelogRevisionData, RevlogError> {
71 71 self.entry_for_rev(rev)?.data()
72 72 }
73 73
74 74 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
75 75 self.revlog.node_from_rev(rev)
76 76 }
77 77
78 78 pub fn rev_from_node(
79 79 &self,
80 80 node: NodePrefix,
81 81 ) -> Result<Revision, RevlogError> {
82 82 self.revlog.rev_from_node(node)
83 83 }
84 84 }
85 85
86 86 impl Graph for Changelog {
87 87 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
88 88 self.revlog.parents(rev)
89 89 }
90 90 }
91 91
92 92 /// A specialized `RevlogEntry` for `changelog` data format
93 93 ///
94 94 /// This is a `RevlogEntry` with the added semantics that the associated
95 95 /// data should meet the requirements for `changelog`, materialized by
96 96 /// the fact that `data()` constructs a `ChangelogRevisionData`.
97 97 /// In case that promise would be broken, the `data` method returns an error.
98 98 #[derive(Clone)]
99 99 pub struct ChangelogEntry<'changelog> {
100 100 /// Same data, as a generic `RevlogEntry`.
101 101 pub(crate) revlog_entry: RevlogEntry<'changelog>,
102 102 }
103 103
104 104 impl<'changelog> ChangelogEntry<'changelog> {
105 105 pub fn data<'a>(
106 106 &'a self,
107 107 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
108 108 let bytes = self.revlog_entry.data()?;
109 109 if bytes.is_empty() {
110 110 Ok(ChangelogRevisionData::null())
111 111 } else {
112 112 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
113 113 RevlogError::Other(HgError::CorruptedRepository(format!(
114 114 "Invalid changelog data for revision {}: {:?}",
115 115 self.revlog_entry.revision(),
116 116 err
117 117 )))
118 118 })?)
119 119 }
120 120 }
121 121
122 122 /// Obtain a reference to the underlying `RevlogEntry`.
123 123 ///
124 124 /// This allows the caller to access the information that is common
125 125 /// to all revlog entries: revision number, node id, parent revisions etc.
126 126 pub fn as_revlog_entry(&self) -> &RevlogEntry {
127 127 &self.revlog_entry
128 128 }
129 129
130 130 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 131 Ok(self
132 132 .revlog_entry
133 133 .p1_entry()?
134 134 .map(|revlog_entry| Self { revlog_entry }))
135 135 }
136 136
137 137 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
138 138 Ok(self
139 139 .revlog_entry
140 140 .p2_entry()?
141 141 .map(|revlog_entry| Self { revlog_entry }))
142 142 }
143 143 }
144 144
145 145 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
146 146 #[derive(PartialEq)]
147 147 pub struct ChangelogRevisionData<'changelog> {
148 148 /// The data bytes of the `changelog` entry.
149 149 bytes: Cow<'changelog, [u8]>,
150 150 /// The end offset for the hex manifest (not including the newline)
151 151 manifest_end: usize,
152 152 /// The end offset for the user+email (not including the newline)
153 153 user_end: usize,
154 154 /// The end offset for the timestamp+timezone+extras (not including the
155 155 /// newline)
156 156 timestamp_end: usize,
157 157 /// The end offset for the file list (not including the newline)
158 158 files_end: usize,
159 159 }
160 160
161 161 impl<'changelog> ChangelogRevisionData<'changelog> {
162 162 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
163 163 let mut line_iter = bytes.split(|b| b == &b'\n');
164 164 let manifest_end = line_iter
165 165 .next()
166 166 .expect("Empty iterator from split()?")
167 167 .len();
168 168 let user_slice = line_iter.next().ok_or_else(|| {
169 169 HgError::corrupted("Changeset data truncated after manifest line")
170 170 })?;
171 171 let user_end = manifest_end + 1 + user_slice.len();
172 172 let timestamp_slice = line_iter.next().ok_or_else(|| {
173 173 HgError::corrupted("Changeset data truncated after user line")
174 174 })?;
175 175 let timestamp_end = user_end + 1 + timestamp_slice.len();
176 176 let mut files_end = timestamp_end + 1;
177 177 loop {
178 178 let line = line_iter.next().ok_or_else(|| {
179 179 HgError::corrupted("Changeset data truncated in files list")
180 180 })?;
181 181 if line.is_empty() {
182 182 if files_end == bytes.len() {
183 183 // The list of files ended with a single newline (there
184 184 // should be two)
185 185 return Err(HgError::corrupted(
186 186 "Changeset data truncated after files list",
187 187 ));
188 188 }
189 189 files_end -= 1;
190 190 break;
191 191 }
192 192 files_end += line.len() + 1;
193 193 }
194 194
195 195 Ok(Self {
196 196 bytes,
197 197 manifest_end,
198 198 user_end,
199 199 timestamp_end,
200 200 files_end,
201 201 })
202 202 }
203 203
204 204 fn null() -> Self {
205 205 Self::new(Cow::Borrowed(
206 206 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
207 207 ))
208 208 .unwrap()
209 209 }
210 210
211 211 /// Return an iterator over the lines of the entry.
212 212 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
213 213 self.bytes.split(|b| b == &b'\n')
214 214 }
215 215
216 216 /// Return the node id of the `manifest` referenced by this `changelog`
217 217 /// entry.
218 218 pub fn manifest_node(&self) -> Result<Node, HgError> {
219 219 let manifest_node_hex = &self.bytes[..self.manifest_end];
220 220 Node::from_hex_for_repo(manifest_node_hex)
221 221 }
222 222
223 223 /// The full user string (usually a name followed by an email enclosed in
224 224 /// angle brackets)
225 225 pub fn user(&self) -> &[u8] {
226 226 &self.bytes[self.manifest_end + 1..self.user_end]
227 227 }
228 228
229 229 /// The full timestamp line (timestamp in seconds, offset in seconds, and
230 230 /// possibly extras)
231 231 // TODO: We should expose this in a more useful way
232 232 pub fn timestamp_line(&self) -> &[u8] {
233 233 &self.bytes[self.user_end + 1..self.timestamp_end]
234 234 }
235 235
236 /// Parsed timestamp line, including optional extras.
237 pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {
238 TimestampAndExtra::from_bytes(self.timestamp_line())
236 /// Parsed timestamp.
237 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
238 parse_timestamp(self.timestamp_line())
239 }
240
241 /// Optional commit extras.
242 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
243 parse_timestamp_line_extra(self.timestamp_line())
239 244 }
240 245
241 246 /// The files changed in this revision.
242 247 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
243 248 if self.timestamp_end == self.files_end {
244 249 Either::Left(iter::empty())
245 250 } else {
246 251 Either::Right(
247 252 self.bytes[self.timestamp_end + 1..self.files_end]
248 253 .split(|b| b == &b'\n')
249 254 .map(HgPath::new),
250 255 )
251 256 }
252 257 }
253 258
254 259 /// The change description.
255 260 pub fn description(&self) -> &[u8] {
256 261 &self.bytes[self.files_end + 2..]
257 262 }
258 263 }
259 264
260 265 impl Debug for ChangelogRevisionData<'_> {
261 266 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
262 267 f.debug_struct("ChangelogRevisionData")
263 268 .field("bytes", &debug_bytes(&self.bytes))
264 269 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
265 270 .field(
266 271 "user",
267 272 &debug_bytes(
268 273 &self.bytes[self.manifest_end + 1..self.user_end],
269 274 ),
270 275 )
271 276 .field(
272 277 "timestamp",
273 278 &debug_bytes(
274 279 &self.bytes[self.user_end + 1..self.timestamp_end],
275 280 ),
276 281 )
277 282 .field(
278 283 "files",
279 284 &debug_bytes(
280 285 &self.bytes[self.timestamp_end + 1..self.files_end],
281 286 ),
282 287 )
283 288 .field(
284 289 "description",
285 290 &debug_bytes(&self.bytes[self.files_end + 2..]),
286 291 )
287 292 .finish()
288 293 }
289 294 }
290 295
291 296 fn debug_bytes(bytes: &[u8]) -> String {
292 297 String::from_utf8_lossy(
293 298 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
294 299 )
295 300 .to_string()
296 301 }
297 302
298 /// Parsed timestamp line, including the timestamp and optional extras.
299 #[derive(Clone, Debug)]
300 pub struct TimestampAndExtra {
301 pub timestamp: DateTime<FixedOffset>,
302 pub extra: BTreeMap<String, Vec<u8>>,
303 }
303 /// Parse the raw bytes of the timestamp line from a changelog entry.
304 ///
305 /// According to the documentation in `hg help dates` and the
306 /// implementation in `changelog.py`, the format of the timestamp line
307 /// is `time tz extra\n` where:
308 ///
309 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
310 /// as seconds since the UNIX epoch.
311 ///
312 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
313 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
314 /// opposite of the sign in ISO 8601 timestamps).
315 ///
316 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
317 /// and value in each pair separated by an ASCII colon. Keys are limited to
318 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
319 /// arbitrary bytes.
320 fn parse_timestamp(
321 timestamp_line: &[u8],
322 ) -> Result<DateTime<FixedOffset>, HgError> {
323 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
304 324
305 impl TimestampAndExtra {
306 /// Parse the raw bytes of the timestamp line from a changelog entry.
307 ///
308 /// According to the documentation in `hg help dates` and the
309 /// implementation in `changelog.py`, the format of the timestamp line
310 /// is `time tz extra\n` where:
311 ///
312 /// - `time` is an ASCII-encoded signed int or float denoting a UTC
313 /// timestamp as seconds since the UNIX epoch.
314 ///
315 /// - `tz` is the timezone offset as an ASCII-encoded signed integer
316 /// denoting seconds WEST of UTC (so negative for timezones east of UTC,
317 /// which is the opposite of the sign in ISO 8601 timestamps).
318 ///
319 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the
320 /// key and value in each pair separated by an ASCII colon. Keys are
321 /// limited to ASCII letters, digits, hyphens, and underscores, whereas
322 /// values can be arbitrary bytes.
323 fn from_bytes(line: &[u8]) -> Result<Self, HgError> {
324 let mut parts = line.splitn(3, |c| *c == b' ');
325
326 let timestamp_bytes = parts
327 .next()
328 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
329 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
330 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
331 })?;
332 let timestamp_utc = timestamp_str
333 .parse()
334 .map_err(|e| {
335 HgError::corrupted(format!("failed to parse timestamp: {e}"))
325 let timestamp_bytes = parts
326 .next()
327 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
328 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
329 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
330 })?;
331 let timestamp_utc = timestamp_str
332 .parse()
333 .map_err(|e| {
334 HgError::corrupted(format!("failed to parse timestamp: {e}"))
335 })
336 .and_then(|secs| {
337 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
338 HgError::corrupted(format!(
339 "integer timestamp out of valid range: {secs}"
340 ))
336 341 })
337 .and_then(|secs| {
338 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
339 HgError::corrupted(format!(
340 "integer timestamp out of valid range: {secs}"
341 ))
342 })
343 })
344 // Attempt to parse the timestamp as a float if we can't parse
345 // it as an int. It doesn't seem like float timestamps are actually
346 // used in practice, but the Python code supports them.
347 .or_else(|_| parse_float_timestamp(timestamp_str))?;
342 })
343 // Attempt to parse the timestamp as a float if we can't parse
344 // it as an int. It doesn't seem like float timestamps are actually
345 // used in practice, but the Python code supports them.
346 .or_else(|_| parse_float_timestamp(timestamp_str))?;
348 347
349 let timezone_bytes = parts
350 .next()
351 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
352 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
353 .map_err(|e| {
354 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
355 })?
356 .parse()
357 .map_err(|e| {
358 HgError::corrupted(format!("timezone is not an integer: {e}"))
359 })?;
360 let timezone =
361 FixedOffset::west_opt(timezone_secs).ok_or_else(|| {
362 HgError::corrupted("timezone offset out of bounds")
363 })?;
348 let timezone_bytes = parts
349 .next()
350 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
351 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
352 .map_err(|e| {
353 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
354 })?
355 .parse()
356 .map_err(|e| {
357 HgError::corrupted(format!("timezone is not an integer: {e}"))
358 })?;
359 let timezone = FixedOffset::west_opt(timezone_secs)
360 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
364 361
365 let timestamp =
366 DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);
367 let extra = parts
368 .next()
369 .map(parse_extra)
370 .transpose()?
371 .unwrap_or_default();
372
373 Ok(Self { timestamp, extra })
374 }
362 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
375 363 }
376 364
377 365 /// Attempt to parse the given string as floating-point timestamp, and
378 366 /// convert the result into a `chrono::NaiveDateTime`.
379 367 fn parse_float_timestamp(
380 368 timestamp_str: &str,
381 369 ) -> Result<NaiveDateTime, HgError> {
382 370 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
383 371 HgError::corrupted(format!("failed to parse timestamp: {e}"))
384 372 })?;
385 373
386 374 // To construct a `NaiveDateTime` we'll need to convert the float
387 375 // into signed integer seconds and unsigned integer nanoseconds.
388 376 let mut secs = timestamp.trunc() as i64;
389 377 let mut subsecs = timestamp.fract();
390 378
391 379 // If the timestamp is negative, we need to express the fractional
392 380 // component as positive nanoseconds since the previous second.
393 381 if timestamp < 0.0 {
394 382 secs -= 1;
395 383 subsecs += 1.0;
396 384 }
397 385
398 386 // This cast should be safe because the fractional component is
399 387 // by definition less than 1.0, so this value should not exceed
400 388 // 1 billion, which is representable as an f64 without loss of
401 389 // precision and should fit into a u32 without overflowing.
402 390 //
403 391 // (Any loss of precision in the fractional component will have
404 392 // already happened at the time of initial parsing; in general,
405 393 // f64s are insufficiently precise to provide nanosecond-level
406 394 // precision with present-day timestamps.)
407 395 let nsecs = (subsecs * 1_000_000_000.0) as u32;
408 396
409 397 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
410 398 HgError::corrupted(format!(
411 399 "float timestamp out of valid range: {timestamp}"
412 400 ))
413 401 })
414 402 }
415 403
416 /// Parse the "extra" fields from a changeset's timestamp line.
404 /// Decode changeset extra fields.
417 405 ///
418 406 /// Extras are null-delimited key-value pairs where the key consists of ASCII
419 407 /// alphanumeric characters plus hyphens and underscores, and the value can
420 408 /// contain arbitrary bytes.
421 fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
409 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
422 410 extra
423 411 .split(|c| *c == b'\0')
424 412 .map(|pair| {
425 413 let pair = unescape_extra(pair);
426 414 let mut iter = pair.splitn(2, |c| *c == b':');
427 415
428 416 let key_bytes =
429 417 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
430 418 HgError::corrupted("empty key in changeset extras")
431 419 })?;
432 420
433 421 let key = str::from_utf8(key_bytes)
434 422 .ok()
435 423 .filter(|k| {
436 424 k.chars().all(|c| {
437 425 c.is_ascii_alphanumeric() || c == '_' || c == '-'
438 426 })
439 427 })
440 428 .ok_or_else(|| {
441 429 let key = String::from_utf8_lossy(key_bytes);
442 430 HgError::corrupted(format!(
443 431 "invalid key in changeset extras: {key}",
444 432 ))
445 433 })?
446 434 .to_string();
447 435
448 436 let value = iter.next().map(Into::into).ok_or_else(|| {
449 437 HgError::corrupted(format!(
450 438 "missing value for changeset extra: {key}"
451 439 ))
452 440 })?;
453 441
454 442 Ok((key, value))
455 443 })
456 444 .collect()
457 445 }
458 446
447 /// Parse the extra fields from a changeset's timestamp line.
448 fn parse_timestamp_line_extra(
449 timestamp_line: &[u8],
450 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
451 Ok(timestamp_line
452 .splitn(3, |c| *c == b' ')
453 .nth(2)
454 .map(decode_extra)
455 .transpose()?
456 .unwrap_or_default())
457 }
458
459 459 /// Decode Mercurial's escaping for changelog extras.
460 460 ///
461 461 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
462 462 /// (null, backslash, newline, and carriage return) so we only decode those.
463 463 ///
464 464 /// The Python code also includes a workaround for decoding escaped nuls
465 465 /// that are followed by an ASCII octal digit, since Python's built-in
466 466 /// `string_escape` codec will interpret that as an escaped octal byte value.
467 467 /// That workaround is omitted here since we don't support decoding octal.
468 468 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
469 469 let mut output = Vec::with_capacity(bytes.len());
470 470 let mut input = bytes.iter().copied();
471 471
472 472 while let Some(c) = input.next() {
473 473 if c != b'\\' {
474 474 output.push(c);
475 475 continue;
476 476 }
477 477
478 478 match input.next() {
479 479 Some(b'0') => output.push(b'\0'),
480 480 Some(b'\\') => output.push(b'\\'),
481 481 Some(b'n') => output.push(b'\n'),
482 482 Some(b'r') => output.push(b'\r'),
483 483 // The following cases should never occur in theory because any
484 484 // backslashes in the original input should have been escaped
485 485 // with another backslash, so it should not be possible to
486 486 // observe an escape sequence other than the 4 above.
487 487 Some(c) => output.extend_from_slice(&[b'\\', c]),
488 488 None => output.push(b'\\'),
489 489 }
490 490 }
491 491
492 492 output
493 493 }
494 494
495 495 #[cfg(test)]
496 496 mod tests {
497 497 use super::*;
498 498 use crate::vfs::Vfs;
499 499 use crate::NULL_REVISION;
500 500 use pretty_assertions::assert_eq;
501 501
502 502 #[test]
503 503 fn test_create_changelogrevisiondata_invalid() {
504 504 // Completely empty
505 505 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
506 506 // No newline after manifest
507 507 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
508 508 // No newline after user
509 509 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
510 510 // No newline after timestamp
511 511 assert!(
512 512 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
513 513 );
514 514 // Missing newline after files
515 515 assert!(ChangelogRevisionData::new(Cow::Borrowed(
516 516 b"abcd\n\n0 0\nfile1\nfile2"
517 517 ))
518 518 .is_err(),);
519 519 // Only one newline after files
520 520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
521 521 b"abcd\n\n0 0\nfile1\nfile2\n"
522 522 ))
523 523 .is_err(),);
524 524 }
525 525
526 526 #[test]
527 527 fn test_create_changelogrevisiondata() {
528 528 let data = ChangelogRevisionData::new(Cow::Borrowed(
529 529 b"0123456789abcdef0123456789abcdef01234567
530 530 Some One <someone@example.com>
531 531 0 0
532 532 file1
533 533 file2
534 534
535 535 some
536 536 commit
537 537 message",
538 538 ))
539 539 .unwrap();
540 540 assert_eq!(
541 541 data.manifest_node().unwrap(),
542 542 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
543 543 .unwrap()
544 544 );
545 545 assert_eq!(data.user(), b"Some One <someone@example.com>");
546 546 assert_eq!(data.timestamp_line(), b"0 0");
547 547 assert_eq!(
548 548 data.files().collect_vec(),
549 549 vec![HgPath::new("file1"), HgPath::new("file2")]
550 550 );
551 551 assert_eq!(data.description(), b"some\ncommit\nmessage");
552 552 }
553 553
554 554 #[test]
555 555 fn test_data_from_rev_null() -> Result<(), RevlogError> {
556 556 // an empty revlog will be enough for this case
557 557 let temp = tempfile::tempdir().unwrap();
558 558 let vfs = Vfs { base: temp.path() };
559 559 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
560 560 let revlog =
561 561 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
562 562 .unwrap();
563 563
564 564 let changelog = Changelog { revlog };
565 565 assert_eq!(
566 566 changelog.data_for_rev(NULL_REVISION.into())?,
567 567 ChangelogRevisionData::null()
568 568 );
569 569 // same with the intermediate entry object
570 570 assert_eq!(
571 571 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
572 572 ChangelogRevisionData::null()
573 573 );
574 574 Ok(())
575 575 }
576 576
577 577 #[test]
578 578 fn test_empty_files_list() {
579 579 assert!(ChangelogRevisionData::null()
580 580 .files()
581 581 .collect_vec()
582 582 .is_empty());
583 583 }
584 584
585 585 #[test]
586 586 fn test_unescape_basic() {
587 587 // '\0', '\\', '\n', and '\r' are correctly unescaped.
588 588 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
589 589 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
590 590 let unescaped = unescape_extra(escaped);
591 591 assert_eq!(&expected[..], &unescaped[..]);
592 592 }
593 593
594 594 #[test]
595 595 fn test_unescape_unsupported_sequence() {
596 596 // Other escape sequences are left unaltered.
597 597 for c in 0u8..255 {
598 598 match c {
599 599 b'0' | b'\\' | b'n' | b'r' => continue,
600 600 c => {
601 601 let expected = &[b'\\', c][..];
602 602 let unescaped = unescape_extra(expected);
603 603 assert_eq!(expected, &unescaped[..]);
604 604 }
605 605 }
606 606 }
607 607 }
608 608
609 609 #[test]
610 610 fn test_unescape_trailing_backslash() {
611 611 // Trailing backslashes are OK.
612 612 let expected = br"hi\";
613 613 let unescaped = unescape_extra(expected);
614 614 assert_eq!(&expected[..], &unescaped[..]);
615 615 }
616 616
617 617 #[test]
618 618 fn test_unescape_nul_followed_by_octal() {
619 619 // Escaped NUL chars followed by octal digits are decoded correctly.
620 620 let expected = b"\012";
621 621 let escaped = br"\012";
622 622 let unescaped = unescape_extra(escaped);
623 623 assert_eq!(&expected[..], &unescaped[..]);
624 624 }
625 625
626 626 #[test]
627 627 fn test_parse_float_timestamp() {
628 628 let test_cases = [
629 629 // Zero should map to the UNIX epoch.
630 630 ("0.0", "1970-01-01 00:00:00"),
631 631 // Negative zero should be the same as positive zero.
632 632 ("-0.0", "1970-01-01 00:00:00"),
633 633 // Values without fractional components should work like integers.
634 634 // (Assuming the timestamp is within the limits of f64 precision.)
635 635 ("1115154970.0", "2005-05-03 21:16:10"),
636 636 // We expect some loss of precision in the fractional component
637 637 // when parsing arbitrary floating-point values.
638 638 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
639 639 // But representable f64 values should parse losslessly.
640 640 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
641 641 // Negative fractional components are subtracted from the epoch.
642 642 ("-1.333", "1969-12-31 23:59:58.667"),
643 643 ];
644 644
645 645 for (input, expected) in test_cases {
646 646 let res = parse_float_timestamp(input).unwrap().to_string();
647 647 assert_eq!(res, expected);
648 648 }
649 649 }
650 650
651 651 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
652 652 let mut output = Vec::with_capacity(bytes.len());
653 653
654 654 for c in bytes.iter().copied() {
655 655 output.extend_from_slice(match c {
656 656 b'\0' => &b"\\0"[..],
657 657 b'\\' => &b"\\\\"[..],
658 658 b'\n' => &b"\\n"[..],
659 659 b'\r' => &b"\\r"[..],
660 660 _ => {
661 661 output.push(c);
662 662 continue;
663 663 }
664 664 });
665 665 }
666 666
667 667 output
668 668 }
669 669
670 670 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
671 671 where
672 672 K: AsRef<[u8]>,
673 673 V: AsRef<[u8]>,
674 674 {
675 675 let extras = pairs.into_iter().map(|(k, v)| {
676 676 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
677 677 });
678 678 // Use fully-qualified syntax to avoid a future naming conflict with
679 679 // the standard library: https://github.com/rust-lang/rust/issues/79524
680 680 Itertools::intersperse(extras, b"\0".to_vec()).concat()
681 681 }
682 682
683 683 #[test]
684 fn test_parse_extra() {
684 fn test_decode_extra() {
685 685 let extra = [
686 686 ("branch".into(), b"default".to_vec()),
687 687 ("key-with-hyphens".into(), b"value1".to_vec()),
688 688 ("key_with_underscores".into(), b"value2".to_vec()),
689 689 ("empty-value".into(), b"".to_vec()),
690 690 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
691 691 ]
692 692 .into_iter()
693 693 .collect::<BTreeMap<String, Vec<u8>>>();
694 694
695 695 let encoded = encode_extra(&extra);
696 let parsed = parse_extra(&encoded).unwrap();
696 let decoded = decode_extra(&encoded).unwrap();
697 697
698 assert_eq!(extra, parsed);
698 assert_eq!(extra, decoded);
699 699 }
700 700
701 701 #[test]
702 702 fn test_corrupt_extra() {
703 703 let test_cases = [
704 704 (&b""[..], "empty input"),
705 705 (&b"\0"[..], "unexpected null byte"),
706 706 (&b":empty-key"[..], "empty key"),
707 707 (&b"\0leading-null:"[..], "leading null"),
708 708 (&b"trailing-null:\0"[..], "trailing null"),
709 709 (&b"missing-value"[..], "missing value"),
710 710 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
711 711 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
712 712 ];
713 713
714 714 for (extra, msg) in test_cases {
715 715 assert!(
716 parse_extra(&extra).is_err(),
716 decode_extra(&extra).is_err(),
717 717 "corrupt extra should have failed to parse: {}",
718 718 msg
719 719 );
720 720 }
721 721 }
722 722
723 723 #[test]
724 724 fn test_parse_timestamp_line() {
725 725 let extra = [
726 726 ("branch".into(), b"default".to_vec()),
727 727 ("key-with-hyphens".into(), b"value1".to_vec()),
728 728 ("key_with_underscores".into(), b"value2".to_vec()),
729 729 ("empty-value".into(), b"".to_vec()),
730 730 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
731 731 ]
732 732 .into_iter()
733 733 .collect::<BTreeMap<String, Vec<u8>>>();
734 734
735 735 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
736 736 line.extend_from_slice(&encode_extra(&extra));
737 737
738 let parsed = TimestampAndExtra::from_bytes(&line).unwrap();
738 let timestamp = parse_timestamp(&line).unwrap();
739 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
739 740
740 assert_eq!(
741 &parsed.timestamp.to_rfc3339(),
742 "2005-05-03T13:16:10-08:00"
743 );
744 assert_eq!(extra, parsed.extra);
741 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
742 assert_eq!(extra, parsed_extra);
745 743 }
746 744 }
General Comments 0
You need to be logged in to leave comments. Login now