# HG changeset patch # User Arseniy Alekseyev # Date 2023-05-18 16:53:17 # Node ID 39ed7b2953bb0dfb4b83a73e7041d921f217140a # Parent d1cab48354bca53bc4be6f7124ca542c960b6e04 rust: mostly avoid streaming zstd decompression Streaming ZStd decompression seems slightly slower, and the API we use makes it very inconvenient to re-use the decompression context. Instead of using that, use the buffer-backed version, because we can give a reasonable-ish size estimate. diff --git a/rust/hg-core/src/revlog/mod.rs b/rust/hg-core/src/revlog/mod.rs --- a/rust/hg-core/src/revlog/mod.rs +++ b/rust/hg-core/src/revlog/mod.rs @@ -596,13 +596,26 @@ impl<'revlog> RevlogEntry<'revlog> { } fn uncompressed_zstd_data(&self) -> Result, HgError> { + let cap = self.uncompressed_len.max(0) as usize; if self.is_delta() { - let mut buf = Vec::with_capacity(self.compressed_len as usize); - zstd::stream::copy_decode(self.bytes, &mut buf) - .map_err(|e| corrupted(e.to_string()))?; + // [cap] is usually an over-estimate of the space needed because + // it's the length of delta-decoded data, but we're interested + // in the size of the delta. + // This means we have to [shrink_to_fit] to avoid holding on + // to a large chunk of memory, but it also means we must have a + // fallback branch, for the case when the delta is longer than + // the original data (surprisingly, this does happen in practice) + let mut buf = Vec::with_capacity(cap); + match zstd_decompress_to_buffer(self.bytes, &mut buf) { + Ok(_) => buf.shrink_to_fit(), + Err(_) => { + buf.clear(); + zstd::stream::copy_decode(self.bytes, &mut buf) + .map_err(|e| corrupted(e.to_string()))?; + } + }; Ok(buf) } else { - let cap = self.uncompressed_len.max(0) as usize; let mut buf = Vec::with_capacity(cap); let len = zstd_decompress_to_buffer(self.bytes, &mut buf) .map_err(|e| corrupted(e.to_string()))?;