diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -481,7 +481,7 @@ class changelog(revlog.revlog): self._delaybuf = None self._divert = False # split when we're done - self._enforceinlinesize(tr) + self._enforceinlinesize(tr, side_write=False) def _writepending(self, tr): """create a file containing the unfinalized state for @@ -512,9 +512,9 @@ class changelog(revlog.revlog): return False - def _enforceinlinesize(self, tr): + def _enforceinlinesize(self, tr, side_write=True): if not self._delayed: - revlog.revlog._enforceinlinesize(self, tr) + revlog.revlog._enforceinlinesize(self, tr, side_write=side_write) def read(self, nodeorrev): """Obtain data from a parsed changelog revision. diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -25,7 +25,7 @@ from .revlogutils import ( @interfaceutil.implementer(repository.ifilestorage) class filelog: - def __init__(self, opener, path): + def __init__(self, opener, path, try_split=False): self._revlog = revlog.revlog( opener, # XXX should use the unencoded path @@ -33,6 +33,7 @@ class filelog: radix=b'/'.join((b'data', path)), censorable=True, canonical_parent_order=False, # see comment in revlog.py + try_split=try_split, ) # Full name of the user visible file, relative to the repository root. # Used by LFS. @@ -256,8 +257,8 @@ class filelog: class narrowfilelog(filelog): """Filelog variation to be used with narrow stores.""" - def __init__(self, opener, path, narrowmatch): - super(narrowfilelog, self).__init__(opener, path) + def __init__(self, opener, path, narrowmatch, try_split=False): + super(narrowfilelog, self).__init__(opener, path, try_split=try_split) self._narrowmatch = narrowmatch def renamed(self, node): diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -1240,7 +1240,12 @@ class revlogfilestorage: if path.startswith(b'/'): path = path[1:] - return filelog.filelog(self.svfs, path) + try_split = ( + self.currenttransaction() is not None + or txnutil.mayhavepending(self.root) + ) + + return filelog.filelog(self.svfs, path, try_split=try_split) @interfaceutil.implementer(repository.ilocalrepositoryfilestorage) @@ -1251,7 +1256,13 @@ class revlognarrowfilestorage: if path.startswith(b'/'): path = path[1:] - return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch) + try_split = ( + self.currenttransaction() is not None + or txnutil.mayhavepending(self.root) + ) + return filelog.narrowfilelog( + self.svfs, path, self._storenarrowmatch, try_split=try_split + ) def makefilestorage(requirements, features, **kwargs): diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -302,6 +302,7 @@ class revlog: persistentnodemap=False, concurrencychecker=None, trypending=False, + try_split=False, canonical_parent_order=True, ): """ @@ -328,6 +329,7 @@ class revlog: self._nodemap_file = None self.postfix = postfix self._trypending = trypending + self._try_split = try_split self.opener = opener if persistentnodemap: self._nodemap_file = nodemaputil.get_nodemap_file(self) @@ -511,6 +513,8 @@ class revlog: entry_point = b'%s.i.%s' % (self.radix, self.postfix) elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix): entry_point = b'%s.i.a' % self.radix + elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix): + entry_point = b'%s.i.s' % self.radix else: entry_point = b'%s.i' % self.radix @@ -2015,7 +2019,7 @@ class revlog: raise error.CensoredNodeError(self.display_id, node, text) raise - def _enforceinlinesize(self, tr): + def _enforceinlinesize(self, tr, side_write=True): """Check if the revlog is too big for inline and convert if so. This should be called after revisions are added to the revlog. If the @@ -2032,7 +2036,8 @@ class revlog: raise error.RevlogError( _(b"%s not found in the transaction") % self._indexfile ) - trindex = None + if troffset: + tr.addbackup(self._indexfile, for_offset=True) tr.add(self._datafile, 0) existing_handles = False @@ -2048,6 +2053,29 @@ class revlog: # No need to deal with sidedata writing handle as it is only # relevant with revlog-v2 which is never inline, not reaching # this code + if side_write: + old_index_file_path = self._indexfile + new_index_file_path = self._indexfile + b'.s' + opener = self.opener + + fncache = getattr(opener, 'fncache', None) + if fncache is not None: + fncache.addignore(new_index_file_path) + + # the "split" index replace the real index when the transaction is finalized + def finalize_callback(tr): + opener.rename( + new_index_file_path, + old_index_file_path, + checkambig=True, + ) + + tr.registertmp(new_index_file_path) + if self.target[1] is not None: + finalize_id = b'000-revlog-split-%d-%s' % self.target + else: + finalize_id = b'000-revlog-split-%d' % self.target[0] + tr.addfinalize(finalize_id, finalize_callback) new_dfh = self._datafp(b'w+') new_dfh.truncate(0) # drop any potentially existing data @@ -2055,17 +2083,10 @@ class revlog: with self._indexfp() as read_ifh: for r in self: new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1]) - if ( - trindex is None - and troffset - <= self.start(r) + r * self.index.entry_size - ): - trindex = r new_dfh.flush() - if trindex is None: - trindex = 0 - + if side_write: + self._indexfile = new_index_file_path with self.__index_new_fp() as fp: self._format_flags &= ~FLAG_INLINE_DATA self._inline = False @@ -2079,16 +2100,9 @@ class revlog: if self._docket is not None: self._docket.index_end = fp.tell() - # There is a small transactional race here. If the rename of - # the index fails, we should remove the datafile. It is more - # important to ensure that the data file is not truncated - # when the index is replaced as otherwise data is lost. - tr.replace(self._datafile, self.start(trindex)) - - # the temp file replace the real index when we exit the context - # manager - - tr.replace(self._indexfile, trindex * self.index.entry_size) + # If we don't use side-write, the temp file replace the real + # index when we exit the context manager + nodemaputil.setup_persistent_nodemap(tr, self) self._segmentfile = randomaccessfile.randomaccessfile( self.opener, diff --git a/tests/test-transaction-rollback-on-revlog-split.t b/tests/test-transaction-rollback-on-revlog-split.t --- a/tests/test-transaction-rollback-on-revlog-split.t +++ b/tests/test-transaction-rollback-on-revlog-split.t @@ -9,12 +9,12 @@ Helper extension to intercept renames an > from mercurial import extensions, util > > def extsetup(ui): - > def close(orig, *args, **kwargs): - > path = util.normpath(args[0]._atomictempfile__name) - > if path.endswith(b'/.hg/store/data/file.i'): + > def rename(orig, src, dest, *args, **kwargs): + > path = util.normpath(dest) + > if path.endswith(b'data/file.i'): > os.kill(os.getpid(), signal.SIGKILL) - > return orig(*args, **kwargs) - > extensions.wrapfunction(util.atomictempfile, 'close', close) + > return orig(src, dest, *args, **kwargs) + > extensions.wrapfunction(util, 'rename', rename) > EOF $ cat > $TESTTMP/intercept_after_rename.py << EOF @@ -30,6 +30,14 @@ Helper extension to intercept renames an > os.kill(os.getpid(), signal.SIGKILL) > return r > extensions.wrapfunction(util.atomictempfile, 'close', close) + > def extsetup(ui): + > def rename(orig, src, dest, *args, **kwargs): + > path = util.normpath(dest) + > r = orig(src, dest, *args, **kwargs) + > if path.endswith(b'data/file.i'): + > os.kill(os.getpid(), signal.SIGKILL) + > return r + > extensions.wrapfunction(util, 'rename', rename) > EOF $ cat > $TESTTMP/killme.py << EOF @@ -75,7 +83,7 @@ setup a repository for tests $ printf '%20d' '1' > file $ hg commit -Aqmc $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1 - $ hg commit -AqmD + $ hg commit -AqmD --traceback Reference size: $ f -s file @@ -127,32 +135,33 @@ Reference size: #endif -The revlog have been split on disk +The inline revlog still exist, but a split version exist next to it $ f -s .hg/store/data/file* .hg/store/data/file.d: size=132139 - .hg/store/data/file.i: size=256 + .hg/store/data/file.i: size=132395 + .hg/store/data/file.i.s: size=256 - $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1 - data/file.i 128 The first file.i entry should match the "Reference size" above. The first file.d entry is the temporary record during the split, -The second entry after the split happened. The sum of the second file.d -and the second file.i entry should match the first file.i entry. +A "temporary file" entry exist for the split index. $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file data/file.i 1174 data/file.d 0 - data/file.d 1046 - data/file.i 128 + $ cat .hg/store/journal.backupfiles | tr -s '\000' ' ' | tr -s '\00' ' '| grep data/file + data/file.i data/journal.backup.file.i 0 + data/file.i.s 0 + +recover is rolling the split back, the fncache is still valid + $ hg recover rolling back interrupted transaction (verify step skipped, run `hg verify` to check your repository content) $ f -s .hg/store/data/file* - .hg/store/data/file.d: size=1046 - .hg/store/data/file.i: size=128 + .hg/store/data/file.i: size=1174 $ hg tip changeset: 1:cfa8d6e60429 tag: tip @@ -161,12 +170,8 @@ and the second file.i entry should match summary: b $ hg verify -q - warning: revlog 'data/file.d' not in fncache! - 1 warnings encountered! - hint: run "hg debugrebuildfncache" to recover from corrupt fncache $ hg debugrebuildfncache --only-data - adding data/file.d - 1 items added, 0 removed from fncache + fncache already up to date $ hg verify -q $ cd .. @@ -189,36 +194,43 @@ Reference size: $ cat > .hg/hgrc < [extensions] > intercept_rename = $TESTTMP/intercept_before_rename.py - > [hooks] - > pretxnchangegroup = python:$TESTTMP/killme.py:killme > EOF #if chg $ hg pull ../troffset-computation pulling from ../troffset-computation + searching for changes + adding changesets + adding manifests + adding file changes [255] #else $ hg pull ../troffset-computation pulling from ../troffset-computation + searching for changes + adding changesets + adding manifests + adding file changes Killed [137] #endif -The data file is created, but the revlog is still inline +The inline revlog still exist, but a split version exist next to it $ f -s .hg/store/data/file* .hg/store/data/file.d: size=132139 .hg/store/data/file.i: size=132395 + .hg/store/data/file.i.s: size=256 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file data/file.i 1174 data/file.d 0 - data/file.d 1046 + +recover is rolling the split back, the fncache is still valid $ hg recover rolling back interrupted transaction (verify step skipped, run `hg verify` to check your repository content) $ f -s .hg/store/data/file* - .hg/store/data/file.d: size=1046 .hg/store/data/file.i: size=1174 $ hg tip changeset: 1:cfa8d6e60429 @@ -235,8 +247,6 @@ Test a hard crash right after the index Now retry the procedure but intercept the rename of the index. -Things get corrupted /o\ - $ hg clone --quiet --rev 1 troffset-computation troffset-computation-crash-after-rename $ cd troffset-computation-crash-after-rename @@ -249,21 +259,27 @@ Reference size: $ cat > .hg/hgrc < [extensions] > intercept_rename = $TESTTMP/intercept_after_rename.py - > [hooks] - > pretxnchangegroup = python:$TESTTMP/killme.py:killme > EOF #if chg $ hg pull ../troffset-computation pulling from ../troffset-computation + searching for changes + adding changesets + adding manifests + adding file changes [255] #else $ hg pull ../troffset-computation pulling from ../troffset-computation + searching for changes + adding changesets + adding manifests + adding file changes Killed [137] #endif -the revlog has been split on disk +The inline revlog was over written on disk $ f -s .hg/store/data/file* .hg/store/data/file.d: size=132139 @@ -272,16 +288,14 @@ the revlog has been split on disk $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file data/file.i 1174 data/file.d 0 - data/file.d 1046 + +recover is rolling the split back, the fncache is still valid $ hg recover rolling back interrupted transaction - abort: attempted to truncate data/file.i to 1174 bytes, but it was already 256 bytes - - [255] + (verify step skipped, run `hg verify` to check your repository content) $ f -s .hg/store/data/file* - .hg/store/data/file.d: size=1046 - .hg/store/data/file.i: size=256 + .hg/store/data/file.i: size=1174 $ hg tip changeset: 1:cfa8d6e60429 tag: tip @@ -290,23 +304,6 @@ the revlog has been split on disk summary: b $ hg verify -q - abandoned transaction found - run hg recover - warning: revlog 'data/file.d' not in fncache! - file@0: data length off by -131093 bytes - file@2: unpacking fa1120531cc1: partial read of revlog data/file.d; expected 21 bytes from offset 1046, got 0 - file@3: unpacking a631378adaa3: partial read of revlog data/file.d; expected 131072 bytes from offset 1067, got -21 - file@?: rev 2 points to nonexistent changeset 2 - (expected ) - file@?: fa1120531cc1 not in manifests - file@?: rev 3 points to nonexistent changeset 3 - (expected ) - file@?: a631378adaa3 not in manifests - not checking dirstate because of previous errors - 3 warnings encountered! - hint: run "hg debugrebuildfncache" to recover from corrupt fncache - 7 integrity errors encountered! - (first damaged changeset appears to be 0) - [1] $ cd .. Have the transaction rollback itself without any hard crash @@ -332,11 +329,12 @@ Repeat the original test but let hg roll abort: pretxnchangegroup hook exited with status 1 [40] -File are still split on disk, with the expected size. +The split was rollback $ f -s .hg/store/data/file* - .hg/store/data/file.d: size=1046 - .hg/store/data/file.i: size=128 + .hg/store/data/file.d: size=0 + .hg/store/data/file.i: size=1174 + $ hg tip changeset: 1:cfa8d6e60429 @@ -346,9 +344,6 @@ File are still split on disk, with the e summary: b $ hg verify -q - warning: revlog 'data/file.d' not in fncache! - 1 warnings encountered! - hint: run "hg debugrebuildfncache" to recover from corrupt fncache $ cd .. Read race