# HG changeset patch # User Pierre-Yves David # Date 2023-05-28 03:52:58 # Node ID 5e60abf811f332190bc8878d920eb08d0803cafd # Parent 8fc10bfd9887e622418550f25e65936b10ca13ea stream-clone: make it the responsability of the store entry to stream content The store entry has more context, this will especially be true when it comes to revlogs. So we move the details of how to retrieve binary content to the StoreEntry. The stream clone code now focus on the protocol bits. diff --git a/mercurial/store.py b/mercurial/store.py --- a/mercurial/store.py +++ b/mercurial/store.py @@ -469,12 +469,36 @@ class StoreFile: def file_size(self, vfs): if self._file_size is None: + if vfs is None: + msg = b"calling vfs-less file_size without prior call: %s" + msg %= self.unencoded_path + raise error.ProgrammingError(msg) try: self._file_size = vfs.stat(self.unencoded_path).st_size except FileNotFoundError: self._file_size = 0 return self._file_size + def get_stream(self, vfs, copies): + """return data "stream" information for this file + + (unencoded_file_path, content_iterator, content_size) + """ + size = self.file_size(None) + + def get_stream(): + actual_path = copies[vfs.join(self.unencoded_path)] + with open(actual_path, 'rb') as fp: + yield None # ready to stream + if size <= 65536: + yield fp.read(size) + else: + yield from util.filechunkiter(fp, limit=size) + + s = get_stream() + next(s) + return (self.unencoded_path, s, size) + @attr.s(slots=True, init=False) class BaseStoreEntry: @@ -485,6 +509,14 @@ class BaseStoreEntry: def files(self) -> List[StoreFile]: raise NotImplementedError + def get_streams(self, vfs, copies=None): + """return a list of data stream associated to files for this entry + + return [(unencoded_file_path, content_iterator, content_size), …] + """ + assert vfs is not None + return [f.get_stream(vfs, copies) for f in self.files()] + @attr.s(slots=True, init=False) class SimpleStoreEntry(BaseStoreEntry): diff --git a/mercurial/streamclone.py b/mercurial/streamclone.py --- a/mercurial/streamclone.py +++ b/mercurial/streamclone.py @@ -11,7 +11,6 @@ import os import struct from .i18n import _ -from .pycompat import open from .interfaces import repository from . import ( bookmarks, @@ -658,36 +657,25 @@ def _emit2(repo, entries): totalbytecount = 0 for src, vfs, e in entries: - for f in e.files(): + for name, stream, size in e.get_streams(vfs, copies=copy): yield src - name = f.unencoded_path yield util.uvarintencode(len(name)) - actual_path = copy[vfs.join(name)] - fp = open(actual_path, b'rb') - size = f.file_size(vfs) + yield util.uvarintencode(size) + yield name bytecount = 0 - try: - yield util.uvarintencode(size) - yield name - if size <= 65536: - chunks = (fp.read(size),) - else: - chunks = util.filechunkiter(fp, limit=size) - for chunk in chunks: - bytecount += len(chunk) - totalbytecount += len(chunk) - progress.update(totalbytecount) - yield chunk - if bytecount != size: - # Would most likely be caused by a race due to `hg - # strip` or a revlog split - msg = _( - b'clone could only read %d bytes from %s, but ' - b'expected %d bytes' - ) - raise error.Abort(msg % (bytecount, name, size)) - finally: - fp.close() + for chunk in stream: + bytecount += len(chunk) + totalbytecount += len(chunk) + progress.update(totalbytecount) + yield chunk + if bytecount != size: + # Would most likely be caused by a race due to `hg + # strip` or a revlog split + msg = _( + b'clone could only read %d bytes from %s, but ' + b'expected %d bytes' + ) + raise error.Abort(msg % (bytecount, name, size)) def _test_sync_point_walk_1(repo):