# HG changeset patch # User Simon Sapin # Date 2021-07-08 10:18:21 # Node ID ff97e793ed36f40a1e43f4991226e18d9a34616a # Parent 37825a85d3b013ca5fe1907c26894058e749e4db dirstate-v2: Introduce a docket file .hg/dirstate now only contains some metadata to point to a separate data file named .hg/dirstate.{}.d with a random hexadecimal identifier. For now every update creates a new data file and removes the old one, but later we’ll (usually) append to an existing file. Separating into two files allows doing the "write to a temporary file then atomically rename into destination" dance with only a small docket file, without always rewriting a lot of data. Differential Revision: https://phab.mercurial-scm.org/D11088 diff --git a/contrib/dirstatenonnormalcheck.py b/contrib/dirstatenonnormalcheck.py --- a/contrib/dirstatenonnormalcheck.py +++ b/contrib/dirstatenonnormalcheck.py @@ -37,12 +37,12 @@ def checkconsistency(ui, orig, dmap, _no ui.develwarn(b"[map] %s\n" % b_nonnormalcomputed, config=b'dirstate') -def _checkdirstate(orig, self, arg): +def _checkdirstate(orig, self, *args, **kwargs): """Check nonnormal set consistency before and after the call to orig""" checkconsistency( self._ui, orig, self._map, self._map.nonnormalset, b"before" ) - r = orig(self, arg) + r = orig(self, *args, **kwargs) checkconsistency( self._ui, orig, self._map, self._map.nonnormalset, b"after" ) diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -7,6 +7,7 @@ from __future__ import absolute_import +import binascii import codecs import collections import contextlib @@ -987,6 +988,24 @@ def debugstate(ui, repo, **opts): @command( + b'debugdirstateignorepatternshash', + [], + _(b''), +) +def debugdirstateignorepatternshash(ui, repo, **opts): + """show the hash of ignore patterns stored in dirstate if v2, + or nothing for dirstate-v2 + """ + if repo.dirstate._use_dirstate_v2: + hash_offset = 16 # Four 32-bit integers before this field + hash_len = 20 # 160 bits for SHA-1 + data_filename = repo.dirstate._map.docket.data_filename() + with repo.vfs(data_filename) as f: + hash_bytes = f.read(hash_offset + hash_len)[-hash_len:] + ui.write(binascii.hexlify(hash_bytes) + b'\n') + + +@command( b'debugdiscovery', [ (b'', b'old', None, _(b'use old-style discovery')), diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -906,13 +906,13 @@ class dirstate(object): tr.addfilegenerator( b'dirstate', (self._filename,), - self._writedirstate, + lambda f: self._writedirstate(tr, f), location=b'plain', ) return st = self._opener(filename, b"w", atomictemp=True, checkambig=True) - self._writedirstate(st) + self._writedirstate(tr, st) def addparentchangecallback(self, category, callback): """add a callback to be called when the wd parents are changed @@ -925,7 +925,7 @@ class dirstate(object): """ self._plchangecallbacks[category] = callback - def _writedirstate(self, st): + def _writedirstate(self, tr, st): # notify callbacks about parents change if self._origpl is not None and self._origpl != self._pl: for c, callback in sorted( @@ -955,7 +955,7 @@ class dirstate(object): now = end # trust our estimate that the end is near now break - self._map.write(st, now) + self._map.write(tr, st, now) self._lastnormaltime = 0 self._dirty = False @@ -1580,7 +1580,8 @@ class dirstate(object): # output file will be used to create backup of dirstate at this point. if self._dirty or not self._opener.exists(filename): self._writedirstate( - self._opener(filename, b"w", atomictemp=True, checkambig=True) + tr, + self._opener(filename, b"w", atomictemp=True, checkambig=True), ) if tr: @@ -1590,7 +1591,7 @@ class dirstate(object): tr.addfilegenerator( b'dirstate', (self._filename,), - self._writedirstate, + lambda f: self._writedirstate(tr, f), location=b'plain', ) diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py --- a/mercurial/dirstatemap.py +++ b/mercurial/dirstatemap.py @@ -18,6 +18,10 @@ from . import ( util, ) +from .dirstateutils import ( + docket as docketmod, +) + parsers = policy.importmod('parsers') rustmod = policy.importrust('dirstate') @@ -416,7 +420,7 @@ class dirstatemap(object): self.__getitem__ = self._map.__getitem__ self.get = self._map.get - def write(self, st, now): + def write(self, _tr, st, now): st.write( parsers.pack_dirstate(self._map, self.copymap, self.parents(), now) ) @@ -466,6 +470,7 @@ if rustmod is not None: self._nodelen = 20 # Also update Rust code when changing this! self._parents = None self._dirtyparents = False + self._docket = None # for consistent view between _pl() and _read() invocations self._pendingmode = None @@ -565,6 +570,16 @@ if rustmod is not None: self._pendingmode = mode return fp + def _readdirstatefile(self, size=-1): + try: + with self._opendirstatefile() as fp: + return fp.read(size) + except IOError as err: + if err.errno != errno.ENOENT: + raise + # File doesn't exist, so the current state is empty + return b'' + def setparents(self, p1, p2): self._parents = (p1, p2) self._dirtyparents = True @@ -572,39 +587,40 @@ if rustmod is not None: def parents(self): if not self._parents: if self._use_dirstate_v2: - offset = len(rustmod.V2_FORMAT_MARKER) + self._parents = self.docket.parents else: - offset = 0 - read_len = offset + self._nodelen * 2 - try: - fp = self._opendirstatefile() - st = fp.read(read_len) - fp.close() - except IOError as err: - if err.errno != errno.ENOENT: - raise - # File doesn't exist, so the current state is empty - st = b'' - - l = len(st) - if l == read_len: - st = st[offset:] - self._parents = ( - st[: self._nodelen], - st[self._nodelen : 2 * self._nodelen], - ) - elif l == 0: - self._parents = ( - self._nodeconstants.nullid, - self._nodeconstants.nullid, - ) - else: - raise error.Abort( - _(b'working directory state appears damaged!') - ) + read_len = self._nodelen * 2 + st = self._readdirstatefile(read_len) + l = len(st) + if l == read_len: + self._parents = ( + st[: self._nodelen], + st[self._nodelen : 2 * self._nodelen], + ) + elif l == 0: + self._parents = ( + self._nodeconstants.nullid, + self._nodeconstants.nullid, + ) + else: + raise error.Abort( + _(b'working directory state appears damaged!') + ) return self._parents + @property + def docket(self): + if not self._docket: + if not self._use_dirstate_v2: + raise error.ProgrammingError( + b'dirstate only has a docket in v2 format' + ) + self._docket = docketmod.DirstateDocket.parse( + self._readdirstatefile(), self._nodeconstants + ) + return self._docket + @propertycache def _rustmap(self): """ @@ -615,20 +631,18 @@ if rustmod is not None: self._opener.join(self._filename) ) - try: - fp = self._opendirstatefile() - try: - st = fp.read() - finally: - fp.close() - except IOError as err: - if err.errno != errno.ENOENT: - raise - st = b'' - - self._rustmap, parents = rustmod.DirstateMap.new( - self._use_dirstate_tree, self._use_dirstate_v2, st - ) + if self._use_dirstate_v2: + if self.docket.uuid: + # TODO: use mmap when possible + data = self._opener.read(self.docket.data_filename()) + else: + data = b'' + self._rustmap = rustmod.DirstateMap.new_v2(data) + parents = self.docket.parents + else: + self._rustmap, parents = rustmod.DirstateMap.new_v1( + self._use_dirstate_tree, self._readdirstatefile() + ) if parents and not self._dirtyparents: self.setparents(*parents) @@ -638,13 +652,29 @@ if rustmod is not None: self.get = self._rustmap.get return self._rustmap - def write(self, st, now): - parents = self.parents() - packed = self._rustmap.write( - self._use_dirstate_v2, parents[0], parents[1], now - ) - st.write(packed) - st.close() + def write(self, tr, st, now): + if self._use_dirstate_v2: + packed = self._rustmap.write_v2(now) + old_docket = self.docket + new_docket = docketmod.DirstateDocket.with_new_uuid( + self.parents(), len(packed) + ) + self._opener.write(new_docket.data_filename(), packed) + # Write the new docket after the new data file has been + # written. Because `st` was opened with `atomictemp=True`, + # the actual `.hg/dirstate` file is only affected on close. + st.write(new_docket.serialize()) + st.close() + # Remove the old data file after the new docket pointing to + # the new data file was written. + if old_docket.uuid: + self._opener.unlink(old_docket.data_filename()) + self._docket = new_docket + else: + p1, p2 = self.parents() + packed = self._rustmap.write_v1(p1, p2, now) + st.write(packed) + st.close() self._dirtyparents = False @propertycache diff --git a/mercurial/dirstateutils/__init__.py b/mercurial/dirstateutils/__init__.py new file mode 100644 diff --git a/mercurial/dirstateutils/docket.py b/mercurial/dirstateutils/docket.py new file mode 100644 --- /dev/null +++ b/mercurial/dirstateutils/docket.py @@ -0,0 +1,62 @@ +# dirstatedocket.py - docket file for dirstate-v2 +# +# Copyright Mercurial Contributors +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import struct + +from ..revlogutils import docket as docket_mod + + +V2_FORMAT_MARKER = b"dirstate-v2\n" + +# * 12 bytes: format marker +# * 32 bytes: node ID of the working directory's first parent +# * 32 bytes: node ID of the working directory's second parent +# * 4 bytes: big-endian used size of the data file +# * 1 byte: length of the data file's UUID +# * variable: data file's UUID +# +# Node IDs are null-padded if shorter than 32 bytes. +# A data file shorter than the specified used size is corrupted (truncated) +HEADER = struct.Struct(">{}s32s32sLB".format(len(V2_FORMAT_MARKER))) + + +class DirstateDocket(object): + data_filename_pattern = b'dirstate.%s.d' + + def __init__(self, parents, data_size, uuid): + self.parents = parents + self.data_size = data_size + self.uuid = uuid + + @classmethod + def with_new_uuid(cls, parents, data): + return cls(parents, data, docket_mod.make_uid()) + + @classmethod + def parse(cls, data, nodeconstants): + if not data: + parents = (nodeconstants.nullid, nodeconstants.nullid) + return cls(parents, 0, None) + marker, p1, p2, data_size, uuid_size = HEADER.unpack_from(data) + if marker != V2_FORMAT_MARKER: + raise ValueError("expected dirstate-v2 marker") + uuid = data[HEADER.size : HEADER.size + uuid_size] + p1 = p1[: nodeconstants.nodelen] + p2 = p2[: nodeconstants.nodelen] + return cls((p1, p2), data_size, uuid) + + def serialize(self): + p1, p2 = self.parents + header = HEADER.pack( + V2_FORMAT_MARKER, p1, p2, self.data_size, len(self.uuid) + ) + return header + self.uuid + + def data_filename(self): + return self.data_filename_pattern % self.uuid diff --git a/mercurial/upgrade_utils/engine.py b/mercurial/upgrade_utils/engine.py --- a/mercurial/upgrade_utils/engine.py +++ b/mercurial/upgrade_utils/engine.py @@ -643,6 +643,7 @@ def upgrade_dirstate(ui, srcrepo, upgrad srcrepo.dirstate._use_dirstate_v2 = new == b'v2' srcrepo.dirstate._map._use_dirstate_v2 = srcrepo.dirstate._use_dirstate_v2 srcrepo.dirstate._dirty = True + srcrepo.vfs.unlink(b'dirstate') srcrepo.dirstate.write(None) scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements) diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -410,9 +410,7 @@ impl<'on_disk> DirstateMap<'on_disk> { } #[timed] - pub fn new_v2( - on_disk: &'on_disk [u8], - ) -> Result<(Self, Option), DirstateError> { + pub fn new_v2(on_disk: &'on_disk [u8]) -> Result { Ok(on_disk::read(on_disk)?) } @@ -1039,11 +1037,7 @@ impl<'on_disk> super::dispatch::Dirstate } #[timed] - fn pack_v2( - &mut self, - parents: DirstateParents, - now: Timestamp, - ) -> Result, DirstateError> { + fn pack_v2(&mut self, now: Timestamp) -> Result, DirstateError> { // TODO: how do we want to handle this in 2038? let now: i32 = now.0.try_into().expect("time overflow"); let mut paths = Vec::new(); @@ -1062,7 +1056,7 @@ impl<'on_disk> super::dispatch::Dirstate self.clear_known_ambiguous_mtimes(&paths)?; - on_disk::write(self, parents) + on_disk::write(self) } fn status<'a>( diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs --- a/rust/hg-core/src/dirstate_tree/dispatch.rs +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs @@ -183,11 +183,7 @@ pub trait DirstateMapMethods { /// format. /// /// Note: this is only supported by the tree dirstate map. - fn pack_v2( - &mut self, - parents: DirstateParents, - now: Timestamp, - ) -> Result, DirstateError>; + fn pack_v2(&mut self, now: Timestamp) -> Result, DirstateError>; /// Run the status algorithm. /// @@ -387,11 +383,7 @@ impl DirstateMapMethods for DirstateMap self.pack(parents, now) } - fn pack_v2( - &mut self, - _parents: DirstateParents, - _now: Timestamp, - ) -> Result, DirstateError> { + fn pack_v2(&mut self, _now: Timestamp) -> Result, DirstateError> { panic!( "should have used dirstate_tree::DirstateMap to use the v2 format" ) diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -19,6 +19,7 @@ use crate::DirstateParents; use crate::EntryState; use bytes_cast::unaligned::{I32Be, I64Be, U32Be}; use bytes_cast::BytesCast; +use format_bytes::format_bytes; use std::borrow::Cow; use std::convert::TryFrom; use std::time::{Duration, SystemTime, UNIX_EPOCH}; @@ -28,18 +29,34 @@ use std::time::{Duration, SystemTime, UN /// `.hg/requires` already governs which format should be used. pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; +/// Keep space for 256-bit hashes +const STORED_NODE_ID_BYTES: usize = 32; + +/// … even though only 160 bits are used for now, with SHA-1 +const USED_NODE_ID_BYTES: usize = 20; + pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; +// Must match `HEADER` in `mercurial/dirstateutils/docket.py` +#[derive(BytesCast)] +#[repr(C)] +struct DocketHeader { + marker: [u8; V2_FORMAT_MARKER.len()], + parent_1: [u8; STORED_NODE_ID_BYTES], + parent_2: [u8; STORED_NODE_ID_BYTES], + data_size: Size, + uuid_size: u8, +} + +pub struct Docket<'on_disk> { + header: &'on_disk DocketHeader, + uuid: &'on_disk [u8], +} + #[derive(BytesCast)] #[repr(C)] struct Header { - marker: [u8; V2_FORMAT_MARKER.len()], - - /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this - /// `parents` field being at this offset, immediately after `marker`. - parents: DirstateParents, - root: ChildNodes, nodes_with_entry_count: Size, nodes_with_copy_source_count: Size, @@ -172,7 +189,8 @@ type OptPathSlice = Slice; /// Make sure that size-affecting changes are made knowingly fn _static_assert_size_of() { - let _ = std::mem::transmute::; + let _ = std::mem::transmute::; + let _ = std::mem::transmute::; let _ = std::mem::transmute::; } @@ -194,11 +212,31 @@ impl From for crat } } -fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> { - let (header, _) = - Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; - if header.marker == *V2_FORMAT_MARKER { - Ok(header) +impl<'on_disk> Docket<'on_disk> { + pub fn parents(&self) -> DirstateParents { + use crate::Node; + let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) + .unwrap() + .clone(); + let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) + .unwrap() + .clone(); + DirstateParents { p1, p2 } + } + + pub fn data_filename(&self) -> String { + String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap() + } +} + +pub fn read_docket( + on_disk: &[u8], +) -> Result, DirstateV2ParseError> { + let (header, uuid) = + DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; + let uuid_size = header.uuid_size as usize; + if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { + Ok(Docket { header, uuid }) } else { Err(DirstateV2ParseError) } @@ -206,14 +244,12 @@ fn read_header(on_disk: &[u8]) -> Result pub(super) fn read<'on_disk>( on_disk: &'on_disk [u8], -) -> Result< - (DirstateMap<'on_disk>, Option), - DirstateV2ParseError, -> { +) -> Result, DirstateV2ParseError> { if on_disk.is_empty() { - return Ok((DirstateMap::empty(on_disk), None)); + return Ok(DirstateMap::empty(on_disk)); } - let header = read_header(on_disk)?; + let (header, _) = + Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; let dirstate_map = DirstateMap { on_disk, root: dirstate_map::ChildNodes::OnDisk(read_slice::( @@ -226,8 +262,7 @@ pub(super) fn read<'on_disk>( .get(), ignore_patterns_hash: header.ignore_patterns_hash, }; - let parents = Some(header.parents.clone()); - Ok((dirstate_map, parents)) + Ok(dirstate_map) } impl Node { @@ -447,17 +482,12 @@ where .ok_or_else(|| DirstateV2ParseError) } -pub(crate) fn parse_dirstate_parents( - on_disk: &[u8], -) -> Result<&DirstateParents, HgError> { - Ok(&read_header(on_disk)?.parents) -} - pub(crate) fn for_each_tracked_path<'on_disk>( on_disk: &'on_disk [u8], mut f: impl FnMut(&'on_disk HgPath), ) -> Result<(), DirstateV2ParseError> { - let header = read_header(on_disk)?; + let (header, _) = + Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; fn recur<'on_disk>( on_disk: &'on_disk [u8], nodes: Slice, @@ -478,7 +508,6 @@ pub(crate) fn for_each_tracked_path<'on_ pub(super) fn write( dirstate_map: &mut DirstateMap, - parents: DirstateParents, ) -> Result, DirstateError> { let header_len = std::mem::size_of::
(); @@ -497,8 +526,6 @@ pub(super) fn write( write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?; let header = Header { - marker: *V2_FORMAT_MARKER, - parents: parents, root, nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), nodes_with_copy_source_count: dirstate_map diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs --- a/rust/hg-core/src/operations/list_tracked_files.rs +++ b/rust/hg-core/src/operations/list_tracked_files.rs @@ -6,7 +6,7 @@ // GNU General Public License version 2 or any later version. use crate::dirstate::parsers::parse_dirstate_entries; -use crate::dirstate_tree::on_disk::for_each_tracked_path; +use crate::dirstate_tree::on_disk::{for_each_tracked_path, read_docket}; use crate::errors::HgError; use crate::repo::Repo; use crate::revlog::changelog::Changelog; @@ -27,8 +27,13 @@ pub struct Dirstate { impl Dirstate { pub fn new(repo: &Repo) -> Result { + let mut content = repo.hg_vfs().read("dirstate")?; + if repo.has_dirstate_v2() { + let docket = read_docket(&content)?; + content = repo.hg_vfs().read(docket.data_filename())?; + } Ok(Self { - content: repo.hg_vfs().read("dirstate")?, + content, dirstate_v2: repo.has_dirstate_v2(), }) } diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs +++ b/rust/hg-core/src/repo.rs @@ -241,11 +241,12 @@ impl Repo { return Ok(crate::dirstate::DirstateParents::NULL); } let parents = if self.has_dirstate_v2() { - crate::dirstate_tree::on_disk::parse_dirstate_parents(&dirstate)? + crate::dirstate_tree::on_disk::read_docket(&dirstate)?.parents() } else { crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? + .clone() }; - Ok(parents.clone()) + Ok(parents) } } diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -57,17 +57,15 @@ py_class!(pub class DirstateMap |py| { /// Returns a `(dirstate_map, parents)` tuple @staticmethod - def new( + def new_v1( use_dirstate_tree: bool, - use_dirstate_v2: bool, on_disk: PyBytes, ) -> PyResult { let dirstate_error = |e: DirstateError| { PyErr::new::(py, format!("Dirstate error: {:?}", e)) }; - let (inner, parents) = if use_dirstate_tree || use_dirstate_v2 { - let (map, parents) = - OwningDirstateMap::new(py, on_disk, use_dirstate_v2) + let (inner, parents) = if use_dirstate_tree { + let (map, parents) = OwningDirstateMap::new_v1(py, on_disk) .map_err(dirstate_error)?; (Box::new(map) as _, parents) } else { @@ -81,6 +79,20 @@ py_class!(pub class DirstateMap |py| { Ok((map, parents).to_py_object(py).into_object()) } + /// Returns a DirstateMap + @staticmethod + def new_v2( + on_disk: PyBytes, + ) -> PyResult { + let dirstate_error = |e: DirstateError| { + PyErr::new::(py, format!("Dirstate error: {:?}", e)) + }; + let inner = OwningDirstateMap::new_v2(py, on_disk) + .map_err(dirstate_error)?; + let map = Self::create_instance(py, Box::new(inner))?; + Ok(map.into_object()) + } + def clear(&self) -> PyResult { self.inner(py).borrow_mut().clear(); Ok(py.None()) @@ -304,25 +316,37 @@ py_class!(pub class DirstateMap |py| { .to_py_object(py)) } - def write( + def write_v1( &self, - use_dirstate_v2: bool, p1: PyObject, p2: PyObject, now: PyObject ) -> PyResult { let now = Timestamp(now.extract(py)?); + + let mut inner = self.inner(py).borrow_mut(); let parents = DirstateParents { p1: extract_node_id(py, &p1)?, p2: extract_node_id(py, &p2)?, }; + let result = inner.pack_v1(parents, now); + match result { + Ok(packed) => Ok(PyBytes::new(py, &packed)), + Err(_) => Err(PyErr::new::( + py, + "Dirstate error".to_string(), + )), + } + } + + def write_v2( + &self, + now: PyObject + ) -> PyResult { + let now = Timestamp(now.extract(py)?); let mut inner = self.inner(py).borrow_mut(); - let result = if use_dirstate_v2 { - inner.pack_v2(parents, now) - } else { - inner.pack_v1(parents, now) - }; + let result = inner.pack_v2(now); match result { Ok(packed) => Ok(PyBytes::new(py, &packed)), Err(_) => Err(PyErr::new::( diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs --- a/rust/hg-cpython/src/dirstate/dispatch.rs +++ b/rust/hg-cpython/src/dirstate/dispatch.rs @@ -124,12 +124,8 @@ impl DirstateMapMethods for OwningDirsta self.get_mut().pack_v1(parents, now) } - fn pack_v2( - &mut self, - parents: DirstateParents, - now: Timestamp, - ) -> Result, DirstateError> { - self.get_mut().pack_v2(parents, now) + fn pack_v2(&mut self, now: Timestamp) -> Result, DirstateError> { + self.get_mut().pack_v2(now) } fn status<'a>( diff --git a/rust/hg-cpython/src/dirstate/owning.rs b/rust/hg-cpython/src/dirstate/owning.rs --- a/rust/hg-cpython/src/dirstate/owning.rs +++ b/rust/hg-cpython/src/dirstate/owning.rs @@ -28,17 +28,12 @@ pub(super) struct OwningDirstateMap { } impl OwningDirstateMap { - pub fn new( + pub fn new_v1( py: Python, on_disk: PyBytes, - use_dirstate_v2: bool, ) -> Result<(Self, Option), DirstateError> { let bytes: &'_ [u8] = on_disk.data(py); - let (map, parents) = if use_dirstate_v2 { - DirstateMap::new_v2(bytes)? - } else { - DirstateMap::new_v1(bytes)? - }; + let (map, parents) = DirstateMap::new_v1(bytes)?; // Like in `bytes` above, this `'_` lifetime parameter borrows from // the bytes buffer owned by `on_disk`. @@ -50,6 +45,23 @@ impl OwningDirstateMap { Ok((Self { on_disk, ptr }, parents)) } + pub fn new_v2( + py: Python, + on_disk: PyBytes, + ) -> Result { + let bytes: &'_ [u8] = on_disk.data(py); + let map = DirstateMap::new_v2(bytes)?; + + // Like in `bytes` above, this `'_` lifetime parameter borrows from + // the bytes buffer owned by `on_disk`. + let ptr: *mut DirstateMap<'_> = Box::into_raw(Box::new(map)); + + // Erase the pointed type entirely in order to erase the lifetime. + let ptr: *mut () = ptr.cast(); + + Ok(Self { on_disk, ptr }) + } + pub fn get_mut<'a>(&'a mut self) -> &'a mut DirstateMap<'a> { // SAFETY: We cast the type-erased pointer back to the same type it had // in `new`, except with a different lifetime parameter. This time we diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs +++ b/rust/rhg/src/commands/status.rs @@ -10,6 +10,7 @@ use crate::ui::Ui; use clap::{Arg, SubCommand}; use hg; use hg::dirstate_tree::dirstate_map::DirstateMap; +use hg::dirstate_tree::on_disk; use hg::errors::HgResultExt; use hg::errors::IoResultExt; use hg::matchers::AlwaysMatcher; @@ -165,17 +166,33 @@ pub fn run(invocation: &crate::CliInvoca }; let repo = invocation.repo?; - let dirstate_data = - repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?; - let dirstate_data = match &dirstate_data { - Some(mmap) => &**mmap, - None => b"", - }; + let dirstate_data_mmap; let (mut dmap, parents) = if repo.has_dirstate_v2() { - DirstateMap::new_v2(dirstate_data)? + let parents; + let dirstate_data; + if let Some(docket_data) = + repo.hg_vfs().read("dirstate").io_not_found_as_none()? + { + let docket = on_disk::read_docket(&docket_data)?; + parents = Some(docket.parents()); + dirstate_data_mmap = repo + .hg_vfs() + .mmap_open(docket.data_filename()) + .io_not_found_as_none()?; + dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b""); + } else { + parents = None; + dirstate_data = b""; + } + let dmap = DirstateMap::new_v2(dirstate_data)?; + (dmap, parents) } else { + dirstate_data_mmap = + repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?; + let dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b""); DirstateMap::new_v1(dirstate_data)? }; + let options = StatusOptions { // TODO should be provided by the dirstate parsing and // hence be stored on dmap. Using a value that assumes we aren't diff --git a/rust/rhg/src/error.rs b/rust/rhg/src/error.rs --- a/rust/rhg/src/error.rs +++ b/rust/rhg/src/error.rs @@ -3,6 +3,7 @@ use crate::ui::UiError; use crate::NoRepoInCwdError; use format_bytes::format_bytes; use hg::config::{ConfigError, ConfigParseError, ConfigValueParseError}; +use hg::dirstate_tree::on_disk::DirstateV2ParseError; use hg::errors::HgError; use hg::exit_codes; use hg::repo::RepoError; @@ -199,3 +200,9 @@ impl From for CommandErro } } } + +impl From for CommandError { + fn from(error: DirstateV2ParseError) -> Self { + HgError::from(error).into() + } +} diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -1291,6 +1291,7 @@ packages = [ 'mercurial.cext', 'mercurial.cffi', 'mercurial.defaultrc', + 'mercurial.dirstateutils', 'mercurial.helptext', 'mercurial.helptext.internals', 'mercurial.hgweb', diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py --- a/tests/fakedirstatewritetime.py +++ b/tests/fakedirstatewritetime.py @@ -67,8 +67,8 @@ def fakewrite(ui, func): # The Rust implementation does not use public parse/pack dirstate # to prevent conversion round-trips orig_dirstatemap_write = dirstatemapmod.dirstatemap.write - wrapper = lambda self, st, now: orig_dirstatemap_write( - self, st, fakenow + wrapper = lambda self, tr, st, now: orig_dirstatemap_write( + self, tr, st, fakenow ) dirstatemapmod.dirstatemap.write = wrapper diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -93,6 +93,7 @@ Show debug commands if there are no othe debugdate debugdeltachain debugdirstate + debugdirstateignorepatternshash debugdiscovery debugdownload debugextensions @@ -282,6 +283,7 @@ Show all commands + options debugdata: changelog, manifest, dir debugdate: extended debugdeltachain: changelog, manifest, dir, template + debugdirstateignorepatternshash: debugdirstate: nodates, dates, datesort, dirs debugdiscovery: old, nonheads, rev, seed, local-as-revs, remote-as-revs, ssh, remotecmd, insecure, template debugdownload: output diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -1008,6 +1008,8 @@ Test list of internal help commands dump information about delta chains in a revlog debugdirstate show the contents of the current dirstate + debugdirstateignorepatternshash + show the hash of ignore patterns stored in dirstate if v2, debugdiscovery runs the changeset discovery protocol in isolation debugdownload diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t --- a/tests/test-hgignore.t +++ b/tests/test-hgignore.t @@ -405,20 +405,19 @@ Windows paths are accepted on input #if dirstate-v2 -Check the hash of ignore patterns written in the dirstate at offset -12 + 20 + 20 + 4 + 4 + 4 + 4 = 68 +Check the hash of ignore patterns written in the dirstate $ hg status > /dev/null $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff - >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode()) + $ hg debugdirstateignorepatternshash 6e315b60f15fb5dfa02be00f3e2c8f923051f5ff $ echo rel > .hg/testhgignorerel $ hg status > /dev/null $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e - >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode()) + $ hg debugdirstateignorepatternshash dea19cc7119213f24b6b582a4bae7b0cb063e34e #endif