##// END OF EJS Templates
rust-changelog: accessing the index...
Georges Racinet -
r52618:bbe59cc5 default
parent child Browse files
Show More
@@ -1,744 +1,749 b''
1 1 use std::ascii::escape_default;
2 2 use std::borrow::Cow;
3 3 use std::collections::BTreeMap;
4 4 use std::fmt::{Debug, Formatter};
5 5 use std::{iter, str};
6 6
7 7 use chrono::{DateTime, FixedOffset, NaiveDateTime};
8 8 use itertools::{Either, Itertools};
9 9
10 10 use crate::errors::HgError;
11 use crate::revlog::Index;
11 12 use crate::revlog::Revision;
12 13 use crate::revlog::{Node, NodePrefix};
13 14 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
14 15 use crate::utils::hg_path::HgPath;
15 16 use crate::vfs::Vfs;
16 17 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
17 18
18 19 /// A specialized `Revlog` to work with changelog data format.
19 20 pub struct Changelog {
20 21 /// The generic `revlog` format.
21 22 pub(crate) revlog: Revlog,
22 23 }
23 24
24 25 impl Changelog {
25 26 /// Open the `changelog` of a repository given by its root.
26 27 pub fn open(
27 28 store_vfs: &Vfs,
28 29 options: RevlogOpenOptions,
29 30 ) -> Result<Self, HgError> {
30 31 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
31 32 Ok(Self { revlog })
32 33 }
33 34
34 35 /// Return the `ChangelogRevisionData` for the given node ID.
35 36 pub fn data_for_node(
36 37 &self,
37 38 node: NodePrefix,
38 39 ) -> Result<ChangelogRevisionData, RevlogError> {
39 40 let rev = self.revlog.rev_from_node(node)?;
40 41 self.entry_for_checked_rev(rev)?.data()
41 42 }
42 43
43 44 /// Return the [`ChangelogEntry`] for the given revision number.
44 45 pub fn entry_for_rev(
45 46 &self,
46 47 rev: UncheckedRevision,
47 48 ) -> Result<ChangelogEntry, RevlogError> {
48 49 let revlog_entry = self.revlog.get_entry(rev)?;
49 50 Ok(ChangelogEntry { revlog_entry })
50 51 }
51 52
52 53 /// Same as [`Self::entry_for_rev`] for checked revisions.
53 54 fn entry_for_checked_rev(
54 55 &self,
55 56 rev: Revision,
56 57 ) -> Result<ChangelogEntry, RevlogError> {
57 58 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
58 59 Ok(ChangelogEntry { revlog_entry })
59 60 }
60 61
61 62 /// Return the [`ChangelogRevisionData`] for the given revision number.
62 63 ///
63 64 /// This is a useful shortcut in case the caller does not need the
64 65 /// generic revlog information (parents, hashes etc). Otherwise
65 66 /// consider taking a [`ChangelogEntry`] with
66 67 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
67 68 pub fn data_for_rev(
68 69 &self,
69 70 rev: UncheckedRevision,
70 71 ) -> Result<ChangelogRevisionData, RevlogError> {
71 72 self.entry_for_rev(rev)?.data()
72 73 }
73 74
74 75 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
75 76 self.revlog.node_from_rev(rev)
76 77 }
77 78
78 79 pub fn rev_from_node(
79 80 &self,
80 81 node: NodePrefix,
81 82 ) -> Result<Revision, RevlogError> {
82 83 self.revlog.rev_from_node(node)
83 84 }
85
86 pub fn get_index(&self) -> &Index {
87 &self.revlog.index
88 }
84 89 }
85 90
86 91 impl Graph for Changelog {
87 92 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
88 93 self.revlog.parents(rev)
89 94 }
90 95 }
91 96
92 97 /// A specialized `RevlogEntry` for `changelog` data format
93 98 ///
94 99 /// This is a `RevlogEntry` with the added semantics that the associated
95 100 /// data should meet the requirements for `changelog`, materialized by
96 101 /// the fact that `data()` constructs a `ChangelogRevisionData`.
97 102 /// In case that promise would be broken, the `data` method returns an error.
98 103 #[derive(Clone)]
99 104 pub struct ChangelogEntry<'changelog> {
100 105 /// Same data, as a generic `RevlogEntry`.
101 106 pub(crate) revlog_entry: RevlogEntry<'changelog>,
102 107 }
103 108
104 109 impl<'changelog> ChangelogEntry<'changelog> {
105 110 pub fn data<'a>(
106 111 &'a self,
107 112 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
108 113 let bytes = self.revlog_entry.data()?;
109 114 if bytes.is_empty() {
110 115 Ok(ChangelogRevisionData::null())
111 116 } else {
112 117 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
113 118 RevlogError::Other(HgError::CorruptedRepository(format!(
114 119 "Invalid changelog data for revision {}: {:?}",
115 120 self.revlog_entry.revision(),
116 121 err
117 122 )))
118 123 })?)
119 124 }
120 125 }
121 126
122 127 /// Obtain a reference to the underlying `RevlogEntry`.
123 128 ///
124 129 /// This allows the caller to access the information that is common
125 130 /// to all revlog entries: revision number, node id, parent revisions etc.
126 131 pub fn as_revlog_entry(&self) -> &RevlogEntry {
127 132 &self.revlog_entry
128 133 }
129 134
130 135 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 136 Ok(self
132 137 .revlog_entry
133 138 .p1_entry()?
134 139 .map(|revlog_entry| Self { revlog_entry }))
135 140 }
136 141
137 142 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
138 143 Ok(self
139 144 .revlog_entry
140 145 .p2_entry()?
141 146 .map(|revlog_entry| Self { revlog_entry }))
142 147 }
143 148 }
144 149
145 150 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
146 151 #[derive(PartialEq)]
147 152 pub struct ChangelogRevisionData<'changelog> {
148 153 /// The data bytes of the `changelog` entry.
149 154 bytes: Cow<'changelog, [u8]>,
150 155 /// The end offset for the hex manifest (not including the newline)
151 156 manifest_end: usize,
152 157 /// The end offset for the user+email (not including the newline)
153 158 user_end: usize,
154 159 /// The end offset for the timestamp+timezone+extras (not including the
155 160 /// newline)
156 161 timestamp_end: usize,
157 162 /// The end offset for the file list (not including the newline)
158 163 files_end: usize,
159 164 }
160 165
161 166 impl<'changelog> ChangelogRevisionData<'changelog> {
162 167 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
163 168 let mut line_iter = bytes.split(|b| b == &b'\n');
164 169 let manifest_end = line_iter
165 170 .next()
166 171 .expect("Empty iterator from split()?")
167 172 .len();
168 173 let user_slice = line_iter.next().ok_or_else(|| {
169 174 HgError::corrupted("Changeset data truncated after manifest line")
170 175 })?;
171 176 let user_end = manifest_end + 1 + user_slice.len();
172 177 let timestamp_slice = line_iter.next().ok_or_else(|| {
173 178 HgError::corrupted("Changeset data truncated after user line")
174 179 })?;
175 180 let timestamp_end = user_end + 1 + timestamp_slice.len();
176 181 let mut files_end = timestamp_end + 1;
177 182 loop {
178 183 let line = line_iter.next().ok_or_else(|| {
179 184 HgError::corrupted("Changeset data truncated in files list")
180 185 })?;
181 186 if line.is_empty() {
182 187 if files_end == bytes.len() {
183 188 // The list of files ended with a single newline (there
184 189 // should be two)
185 190 return Err(HgError::corrupted(
186 191 "Changeset data truncated after files list",
187 192 ));
188 193 }
189 194 files_end -= 1;
190 195 break;
191 196 }
192 197 files_end += line.len() + 1;
193 198 }
194 199
195 200 Ok(Self {
196 201 bytes,
197 202 manifest_end,
198 203 user_end,
199 204 timestamp_end,
200 205 files_end,
201 206 })
202 207 }
203 208
204 209 fn null() -> Self {
205 210 Self::new(Cow::Borrowed(
206 211 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
207 212 ))
208 213 .unwrap()
209 214 }
210 215
211 216 /// Return an iterator over the lines of the entry.
212 217 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
213 218 self.bytes.split(|b| b == &b'\n')
214 219 }
215 220
216 221 /// Return the node id of the `manifest` referenced by this `changelog`
217 222 /// entry.
218 223 pub fn manifest_node(&self) -> Result<Node, HgError> {
219 224 let manifest_node_hex = &self.bytes[..self.manifest_end];
220 225 Node::from_hex_for_repo(manifest_node_hex)
221 226 }
222 227
223 228 /// The full user string (usually a name followed by an email enclosed in
224 229 /// angle brackets)
225 230 pub fn user(&self) -> &[u8] {
226 231 &self.bytes[self.manifest_end + 1..self.user_end]
227 232 }
228 233
229 234 /// The full timestamp line (timestamp in seconds, offset in seconds, and
230 235 /// possibly extras)
231 236 // TODO: We should expose this in a more useful way
232 237 pub fn timestamp_line(&self) -> &[u8] {
233 238 &self.bytes[self.user_end + 1..self.timestamp_end]
234 239 }
235 240
236 241 /// Parsed timestamp.
237 242 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
238 243 parse_timestamp(self.timestamp_line())
239 244 }
240 245
241 246 /// Optional commit extras.
242 247 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
243 248 parse_timestamp_line_extra(self.timestamp_line())
244 249 }
245 250
246 251 /// The files changed in this revision.
247 252 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
248 253 if self.timestamp_end == self.files_end {
249 254 Either::Left(iter::empty())
250 255 } else {
251 256 Either::Right(
252 257 self.bytes[self.timestamp_end + 1..self.files_end]
253 258 .split(|b| b == &b'\n')
254 259 .map(HgPath::new),
255 260 )
256 261 }
257 262 }
258 263
259 264 /// The change description.
260 265 pub fn description(&self) -> &[u8] {
261 266 &self.bytes[self.files_end + 2..]
262 267 }
263 268 }
264 269
265 270 impl Debug for ChangelogRevisionData<'_> {
266 271 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
267 272 f.debug_struct("ChangelogRevisionData")
268 273 .field("bytes", &debug_bytes(&self.bytes))
269 274 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
270 275 .field(
271 276 "user",
272 277 &debug_bytes(
273 278 &self.bytes[self.manifest_end + 1..self.user_end],
274 279 ),
275 280 )
276 281 .field(
277 282 "timestamp",
278 283 &debug_bytes(
279 284 &self.bytes[self.user_end + 1..self.timestamp_end],
280 285 ),
281 286 )
282 287 .field(
283 288 "files",
284 289 &debug_bytes(
285 290 &self.bytes[self.timestamp_end + 1..self.files_end],
286 291 ),
287 292 )
288 293 .field(
289 294 "description",
290 295 &debug_bytes(&self.bytes[self.files_end + 2..]),
291 296 )
292 297 .finish()
293 298 }
294 299 }
295 300
296 301 fn debug_bytes(bytes: &[u8]) -> String {
297 302 String::from_utf8_lossy(
298 303 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
299 304 )
300 305 .to_string()
301 306 }
302 307
303 308 /// Parse the raw bytes of the timestamp line from a changelog entry.
304 309 ///
305 310 /// According to the documentation in `hg help dates` and the
306 311 /// implementation in `changelog.py`, the format of the timestamp line
307 312 /// is `time tz extra\n` where:
308 313 ///
309 314 /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
310 315 /// as seconds since the UNIX epoch.
311 316 ///
312 317 /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
313 318 /// seconds WEST of UTC (so negative for timezones east of UTC, which is the
314 319 /// opposite of the sign in ISO 8601 timestamps).
315 320 ///
316 321 /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
317 322 /// and value in each pair separated by an ASCII colon. Keys are limited to
318 323 /// ASCII letters, digits, hyphens, and underscores, whereas values can be
319 324 /// arbitrary bytes.
320 325 fn parse_timestamp(
321 326 timestamp_line: &[u8],
322 327 ) -> Result<DateTime<FixedOffset>, HgError> {
323 328 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
324 329
325 330 let timestamp_bytes = parts
326 331 .next()
327 332 .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
328 333 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
329 334 HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
330 335 })?;
331 336 let timestamp_utc = timestamp_str
332 337 .parse()
333 338 .map_err(|e| {
334 339 HgError::corrupted(format!("failed to parse timestamp: {e}"))
335 340 })
336 341 .and_then(|secs| {
337 342 NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
338 343 HgError::corrupted(format!(
339 344 "integer timestamp out of valid range: {secs}"
340 345 ))
341 346 })
342 347 })
343 348 // Attempt to parse the timestamp as a float if we can't parse
344 349 // it as an int. It doesn't seem like float timestamps are actually
345 350 // used in practice, but the Python code supports them.
346 351 .or_else(|_| parse_float_timestamp(timestamp_str))?;
347 352
348 353 let timezone_bytes = parts
349 354 .next()
350 355 .ok_or_else(|| HgError::corrupted("missing timezone"))?;
351 356 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
352 357 .map_err(|e| {
353 358 HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
354 359 })?
355 360 .parse()
356 361 .map_err(|e| {
357 362 HgError::corrupted(format!("timezone is not an integer: {e}"))
358 363 })?;
359 364 let timezone = FixedOffset::west_opt(timezone_secs)
360 365 .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
361 366
362 367 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
363 368 }
364 369
365 370 /// Attempt to parse the given string as floating-point timestamp, and
366 371 /// convert the result into a `chrono::NaiveDateTime`.
367 372 fn parse_float_timestamp(
368 373 timestamp_str: &str,
369 374 ) -> Result<NaiveDateTime, HgError> {
370 375 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
371 376 HgError::corrupted(format!("failed to parse timestamp: {e}"))
372 377 })?;
373 378
374 379 // To construct a `NaiveDateTime` we'll need to convert the float
375 380 // into signed integer seconds and unsigned integer nanoseconds.
376 381 let mut secs = timestamp.trunc() as i64;
377 382 let mut subsecs = timestamp.fract();
378 383
379 384 // If the timestamp is negative, we need to express the fractional
380 385 // component as positive nanoseconds since the previous second.
381 386 if timestamp < 0.0 {
382 387 secs -= 1;
383 388 subsecs += 1.0;
384 389 }
385 390
386 391 // This cast should be safe because the fractional component is
387 392 // by definition less than 1.0, so this value should not exceed
388 393 // 1 billion, which is representable as an f64 without loss of
389 394 // precision and should fit into a u32 without overflowing.
390 395 //
391 396 // (Any loss of precision in the fractional component will have
392 397 // already happened at the time of initial parsing; in general,
393 398 // f64s are insufficiently precise to provide nanosecond-level
394 399 // precision with present-day timestamps.)
395 400 let nsecs = (subsecs * 1_000_000_000.0) as u32;
396 401
397 402 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
398 403 HgError::corrupted(format!(
399 404 "float timestamp out of valid range: {timestamp}"
400 405 ))
401 406 })
402 407 }
403 408
404 409 /// Decode changeset extra fields.
405 410 ///
406 411 /// Extras are null-delimited key-value pairs where the key consists of ASCII
407 412 /// alphanumeric characters plus hyphens and underscores, and the value can
408 413 /// contain arbitrary bytes.
409 414 fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
410 415 extra
411 416 .split(|c| *c == b'\0')
412 417 .map(|pair| {
413 418 let pair = unescape_extra(pair);
414 419 let mut iter = pair.splitn(2, |c| *c == b':');
415 420
416 421 let key_bytes =
417 422 iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
418 423 HgError::corrupted("empty key in changeset extras")
419 424 })?;
420 425
421 426 let key = str::from_utf8(key_bytes)
422 427 .ok()
423 428 .filter(|k| {
424 429 k.chars().all(|c| {
425 430 c.is_ascii_alphanumeric() || c == '_' || c == '-'
426 431 })
427 432 })
428 433 .ok_or_else(|| {
429 434 let key = String::from_utf8_lossy(key_bytes);
430 435 HgError::corrupted(format!(
431 436 "invalid key in changeset extras: {key}",
432 437 ))
433 438 })?
434 439 .to_string();
435 440
436 441 let value = iter.next().map(Into::into).ok_or_else(|| {
437 442 HgError::corrupted(format!(
438 443 "missing value for changeset extra: {key}"
439 444 ))
440 445 })?;
441 446
442 447 Ok((key, value))
443 448 })
444 449 .collect()
445 450 }
446 451
447 452 /// Parse the extra fields from a changeset's timestamp line.
448 453 fn parse_timestamp_line_extra(
449 454 timestamp_line: &[u8],
450 455 ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
451 456 Ok(timestamp_line
452 457 .splitn(3, |c| *c == b' ')
453 458 .nth(2)
454 459 .map(decode_extra)
455 460 .transpose()?
456 461 .unwrap_or_default())
457 462 }
458 463
459 464 /// Decode Mercurial's escaping for changelog extras.
460 465 ///
461 466 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
462 467 /// (null, backslash, newline, and carriage return) so we only decode those.
463 468 ///
464 469 /// The Python code also includes a workaround for decoding escaped nuls
465 470 /// that are followed by an ASCII octal digit, since Python's built-in
466 471 /// `string_escape` codec will interpret that as an escaped octal byte value.
467 472 /// That workaround is omitted here since we don't support decoding octal.
468 473 fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
469 474 let mut output = Vec::with_capacity(bytes.len());
470 475 let mut input = bytes.iter().copied();
471 476
472 477 while let Some(c) = input.next() {
473 478 if c != b'\\' {
474 479 output.push(c);
475 480 continue;
476 481 }
477 482
478 483 match input.next() {
479 484 Some(b'0') => output.push(b'\0'),
480 485 Some(b'\\') => output.push(b'\\'),
481 486 Some(b'n') => output.push(b'\n'),
482 487 Some(b'r') => output.push(b'\r'),
483 488 // The following cases should never occur in theory because any
484 489 // backslashes in the original input should have been escaped
485 490 // with another backslash, so it should not be possible to
486 491 // observe an escape sequence other than the 4 above.
487 492 Some(c) => output.extend_from_slice(&[b'\\', c]),
488 493 None => output.push(b'\\'),
489 494 }
490 495 }
491 496
492 497 output
493 498 }
494 499
495 500 #[cfg(test)]
496 501 mod tests {
497 502 use super::*;
498 503 use crate::vfs::Vfs;
499 504 use crate::NULL_REVISION;
500 505 use pretty_assertions::assert_eq;
501 506
502 507 #[test]
503 508 fn test_create_changelogrevisiondata_invalid() {
504 509 // Completely empty
505 510 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
506 511 // No newline after manifest
507 512 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
508 513 // No newline after user
509 514 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
510 515 // No newline after timestamp
511 516 assert!(
512 517 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
513 518 );
514 519 // Missing newline after files
515 520 assert!(ChangelogRevisionData::new(Cow::Borrowed(
516 521 b"abcd\n\n0 0\nfile1\nfile2"
517 522 ))
518 523 .is_err(),);
519 524 // Only one newline after files
520 525 assert!(ChangelogRevisionData::new(Cow::Borrowed(
521 526 b"abcd\n\n0 0\nfile1\nfile2\n"
522 527 ))
523 528 .is_err(),);
524 529 }
525 530
526 531 #[test]
527 532 fn test_create_changelogrevisiondata() {
528 533 let data = ChangelogRevisionData::new(Cow::Borrowed(
529 534 b"0123456789abcdef0123456789abcdef01234567
530 535 Some One <someone@example.com>
531 536 0 0
532 537 file1
533 538 file2
534 539
535 540 some
536 541 commit
537 542 message",
538 543 ))
539 544 .unwrap();
540 545 assert_eq!(
541 546 data.manifest_node().unwrap(),
542 547 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
543 548 .unwrap()
544 549 );
545 550 assert_eq!(data.user(), b"Some One <someone@example.com>");
546 551 assert_eq!(data.timestamp_line(), b"0 0");
547 552 assert_eq!(
548 553 data.files().collect_vec(),
549 554 vec![HgPath::new("file1"), HgPath::new("file2")]
550 555 );
551 556 assert_eq!(data.description(), b"some\ncommit\nmessage");
552 557 }
553 558
554 559 #[test]
555 560 fn test_data_from_rev_null() -> Result<(), RevlogError> {
556 561 // an empty revlog will be enough for this case
557 562 let temp = tempfile::tempdir().unwrap();
558 563 let vfs = Vfs { base: temp.path() };
559 564 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
560 565 let revlog =
561 566 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
562 567 .unwrap();
563 568
564 569 let changelog = Changelog { revlog };
565 570 assert_eq!(
566 571 changelog.data_for_rev(NULL_REVISION.into())?,
567 572 ChangelogRevisionData::null()
568 573 );
569 574 // same with the intermediate entry object
570 575 assert_eq!(
571 576 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
572 577 ChangelogRevisionData::null()
573 578 );
574 579 Ok(())
575 580 }
576 581
577 582 #[test]
578 583 fn test_empty_files_list() {
579 584 assert!(ChangelogRevisionData::null()
580 585 .files()
581 586 .collect_vec()
582 587 .is_empty());
583 588 }
584 589
585 590 #[test]
586 591 fn test_unescape_basic() {
587 592 // '\0', '\\', '\n', and '\r' are correctly unescaped.
588 593 let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
589 594 let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
590 595 let unescaped = unescape_extra(escaped);
591 596 assert_eq!(&expected[..], &unescaped[..]);
592 597 }
593 598
594 599 #[test]
595 600 fn test_unescape_unsupported_sequence() {
596 601 // Other escape sequences are left unaltered.
597 602 for c in 0u8..255 {
598 603 match c {
599 604 b'0' | b'\\' | b'n' | b'r' => continue,
600 605 c => {
601 606 let expected = &[b'\\', c][..];
602 607 let unescaped = unescape_extra(expected);
603 608 assert_eq!(expected, &unescaped[..]);
604 609 }
605 610 }
606 611 }
607 612 }
608 613
609 614 #[test]
610 615 fn test_unescape_trailing_backslash() {
611 616 // Trailing backslashes are OK.
612 617 let expected = br"hi\";
613 618 let unescaped = unescape_extra(expected);
614 619 assert_eq!(&expected[..], &unescaped[..]);
615 620 }
616 621
617 622 #[test]
618 623 fn test_unescape_nul_followed_by_octal() {
619 624 // Escaped NUL chars followed by octal digits are decoded correctly.
620 625 let expected = b"\x0012";
621 626 let escaped = br"\012";
622 627 let unescaped = unescape_extra(escaped);
623 628 assert_eq!(&expected[..], &unescaped[..]);
624 629 }
625 630
626 631 #[test]
627 632 fn test_parse_float_timestamp() {
628 633 let test_cases = [
629 634 // Zero should map to the UNIX epoch.
630 635 ("0.0", "1970-01-01 00:00:00"),
631 636 // Negative zero should be the same as positive zero.
632 637 ("-0.0", "1970-01-01 00:00:00"),
633 638 // Values without fractional components should work like integers.
634 639 // (Assuming the timestamp is within the limits of f64 precision.)
635 640 ("1115154970.0", "2005-05-03 21:16:10"),
636 641 // We expect some loss of precision in the fractional component
637 642 // when parsing arbitrary floating-point values.
638 643 ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
639 644 // But representable f64 values should parse losslessly.
640 645 ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
641 646 // Negative fractional components are subtracted from the epoch.
642 647 ("-1.333", "1969-12-31 23:59:58.667"),
643 648 ];
644 649
645 650 for (input, expected) in test_cases {
646 651 let res = parse_float_timestamp(input).unwrap().to_string();
647 652 assert_eq!(res, expected);
648 653 }
649 654 }
650 655
651 656 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
652 657 let mut output = Vec::with_capacity(bytes.len());
653 658
654 659 for c in bytes.iter().copied() {
655 660 output.extend_from_slice(match c {
656 661 b'\0' => &b"\\0"[..],
657 662 b'\\' => &b"\\\\"[..],
658 663 b'\n' => &b"\\n"[..],
659 664 b'\r' => &b"\\r"[..],
660 665 _ => {
661 666 output.push(c);
662 667 continue;
663 668 }
664 669 });
665 670 }
666 671
667 672 output
668 673 }
669 674
670 675 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
671 676 where
672 677 K: AsRef<[u8]>,
673 678 V: AsRef<[u8]>,
674 679 {
675 680 let extras = pairs.into_iter().map(|(k, v)| {
676 681 escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
677 682 });
678 683 // Use fully-qualified syntax to avoid a future naming conflict with
679 684 // the standard library: https://github.com/rust-lang/rust/issues/79524
680 685 Itertools::intersperse(extras, b"\0".to_vec()).concat()
681 686 }
682 687
683 688 #[test]
684 689 fn test_decode_extra() {
685 690 let extra = [
686 691 ("branch".into(), b"default".to_vec()),
687 692 ("key-with-hyphens".into(), b"value1".to_vec()),
688 693 ("key_with_underscores".into(), b"value2".to_vec()),
689 694 ("empty-value".into(), b"".to_vec()),
690 695 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
691 696 ]
692 697 .into_iter()
693 698 .collect::<BTreeMap<String, Vec<u8>>>();
694 699
695 700 let encoded = encode_extra(&extra);
696 701 let decoded = decode_extra(&encoded).unwrap();
697 702
698 703 assert_eq!(extra, decoded);
699 704 }
700 705
701 706 #[test]
702 707 fn test_corrupt_extra() {
703 708 let test_cases = [
704 709 (&b""[..], "empty input"),
705 710 (&b"\0"[..], "unexpected null byte"),
706 711 (&b":empty-key"[..], "empty key"),
707 712 (&b"\0leading-null:"[..], "leading null"),
708 713 (&b"trailing-null:\0"[..], "trailing null"),
709 714 (&b"missing-value"[..], "missing value"),
710 715 (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
711 716 (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
712 717 ];
713 718
714 719 for (extra, msg) in test_cases {
715 720 assert!(
716 721 decode_extra(extra).is_err(),
717 722 "corrupt extra should have failed to parse: {}",
718 723 msg
719 724 );
720 725 }
721 726 }
722 727
723 728 #[test]
724 729 fn test_parse_timestamp_line() {
725 730 let extra = [
726 731 ("branch".into(), b"default".to_vec()),
727 732 ("key-with-hyphens".into(), b"value1".to_vec()),
728 733 ("key_with_underscores".into(), b"value2".to_vec()),
729 734 ("empty-value".into(), b"".to_vec()),
730 735 ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
731 736 ]
732 737 .into_iter()
733 738 .collect::<BTreeMap<String, Vec<u8>>>();
734 739
735 740 let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
736 741 line.extend_from_slice(&encode_extra(&extra));
737 742
738 743 let timestamp = parse_timestamp(&line).unwrap();
739 744 assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
740 745
741 746 let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
742 747 assert_eq!(extra, parsed_extra);
743 748 }
744 749 }
General Comments 0
You need to be logged in to leave comments. Login now