##// END OF EJS Templates
branching: merge stable into default
branching: merge stable into default

File last commit:

r52808:62238928 default
r52856:5e2f0fec merge default
Show More
rewrite.py
907 lines | 29.9 KiB | text/x-python | PythonLexer
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 # censor code related to censoring revision
# coding: utf8
#
# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
# Copyright 2015 Google, Inc <martinvonz@google.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Matt Harbison
typing: add `from __future__ import annotations` to remaining source files...
r52757 from __future__ import annotations
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 import binascii
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 import contextlib
import os
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 import struct
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
from ..node import (
nullrev,
)
from .constants import (
COMP_MODE_PLAIN,
ENTRY_DATA_COMPRESSED_LENGTH,
ENTRY_DATA_COMPRESSION_MODE,
ENTRY_DATA_OFFSET,
ENTRY_DATA_UNCOMPRESSED_LENGTH,
ENTRY_DELTA_BASE,
ENTRY_LINK_REV,
ENTRY_NODE_ID,
ENTRY_PARENT_1,
ENTRY_PARENT_2,
ENTRY_SIDEDATA_COMPRESSED_LENGTH,
ENTRY_SIDEDATA_COMPRESSION_MODE,
ENTRY_SIDEDATA_OFFSET,
issue6528: also filter delta on the fly when applying a changegroup...
r48629 REVIDX_ISCENSORED,
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 REVLOGV0,
REVLOGV1,
)
from ..i18n import _
from .. import (
error,
issue6528: also filter delta on the fly when applying a changegroup...
r48629 mdiff,
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 pycompat,
revlogutils,
util,
)
from ..utils import (
storageutil,
)
from . import (
constants,
deltas,
)
censor: accept multiple revision in a single call...
r52163 def v1_censor(rl, tr, censor_nodes, tombstone=b''):
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 """censors a revision in a "version 1" revlog"""
assert rl._format_version == constants.REVLOGV1, rl._format_version
# avoid cycle
from .. import revlog
censor: accept multiple revision in a single call...
r52163 censor_revs = set(rl.rev(node) for node in censor_nodes)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
# Rewriting the revlog in place is hard. Our strategy for censoring is
# to create a new revlog, copy all revisions to it, then replace the
# revlogs on transaction close.
#
# This is a bit dangerous. We could easily have a mismatch of state.
newrl = revlog.revlog(
rl.opener,
target=rl.target,
radix=rl.radix,
postfix=b'tmpcensored',
censorable=True,
censor: fix things around inlining...
r52052 data_config=rl.data_config,
delta_config=rl.delta_config,
feature_config=rl.feature_config,
may_inline=rl._inline,
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 )
censor: fix things around inlining...
r52052 # inline splitting will prepare some transaction work that will get
# confused by the final file move. So if there is a risk of not being
# inline at the end, we prevent the new revlog to be inline in the first
# place.
assert not (newrl._inline and not rl._inline)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
for rev in rl.revs():
node = rl.node(rev)
p1, p2 = rl.parents(node)
censor: accept multiple revision in a single call...
r52163 if rev in censor_revs:
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 newrl.addrawrevision(
tombstone,
tr,
censor: accept multiple revision in a single call...
r52163 rl.linkrev(rev),
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 p1,
p2,
censor: accept multiple revision in a single call...
r52163 node,
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 constants.REVIDX_ISCENSORED,
)
if newrl.deltaparent(rev) != nullrev:
m = _(b'censored revision stored as delta; cannot censor')
h = _(
b'censoring of revlogs is not fully implemented;'
b' please report this bug'
)
raise error.Abort(m, hint=h)
continue
if rl.iscensored(rev):
if rl.deltaparent(rev) != nullrev:
m = _(
b'cannot censor due to censored '
b'revision having delta stored'
)
raise error.Abort(m)
revlog: move the `_chunk` method on the inner object...
r51985 rawtext = rl._inner._chunk(rev)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 else:
rawtext = rl.rawdata(rev)
newrl.addrawrevision(
rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
)
tr.addbackup(rl._indexfile, location=b'store')
if not rl._inline:
tr.addbackup(rl._datafile, location=b'store')
rl.opener.rename(newrl._indexfile, rl._indexfile)
censor: fix things around inlining...
r52052 if newrl._inline:
assert rl._inline
else:
assert not rl._inline
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 rl.opener.rename(newrl._datafile, rl._datafile)
rl.clearcaches()
revlog: move _getsegmentforrevs on the internal object...
r51980 chunk_cache = rl._loadindex()
rl._load_inner(chunk_cache)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: accept multiple revision in a single call...
r52163 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 """censors a revision in a "version 2" revlog"""
censor: split the core of the logic into its own function...
r48265 assert revlog._format_version != REVLOGV0, revlog._format_version
assert revlog._format_version != REVLOGV1, revlog._format_version
censor: accept multiple revision in a single call...
r52163 censor_revs = {revlog.rev(node) for node in censor_nodes}
censor: split the core of the logic into its own function...
r48265 _rewrite_v2(revlog, tr, censor_revs, tombstone)
def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
"""rewrite a revlog to censor some of its content
General principle
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: split the core of the logic into its own function...
r48265 We create new revlog files (index/data/sidedata) to copy the content of
the existing data without the censored data.
We need to recompute new delta for any revision that used the censored
revision as delta base. As the cumulative size of the new delta may be
large, we store them in a temporary file until they are stored in their
final destination.
All data before the censored data can be blindly copied. The rest needs
to be copied as we go and the associated index entry needs adjustement.
"""
censor: rename `rl` to `revlog` in the main function...
r48263 assert revlog._format_version != REVLOGV0, revlog._format_version
assert revlog._format_version != REVLOGV1, revlog._format_version
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: rename `rl` to `revlog` in the main function...
r48263 old_index = revlog.index
docket = revlog._docket
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
censor: migrate the logic to a set of `censor_revs`...
r48264 first_excl_rev = min(censor_revs)
first_excl_entry = revlog.index[first_excl_rev]
index_cutoff = revlog.index.entry_size * first_excl_rev
data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
censor: extract the part about recomputing delta in a function...
r48261 # rev → (new_base, data_start, data_end, compression_mode)
rewritten_entries = _precompute_rewritten_delta(
censor: rename `rl` to `revlog` in the main function...
r48263 revlog,
censor: extract the part about recomputing delta in a function...
r48261 old_index,
censor: migrate the logic to a set of `censor_revs`...
r48264 censor_revs,
censor: extract the part about recomputing delta in a function...
r48261 tmp_storage,
)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about creating and opening new files in a function...
r48262 all_files = _setup_new_files(
censor: rename `rl` to `revlog` in the main function...
r48263 revlog,
censor: extract the part about creating and opening new files in a function...
r48262 index_cutoff,
data_cutoff,
sidedata_cutoff,
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 )
# we dont need to open the old index file since its content already
# exist in a usable form in `old_index`.
censor: put the tuple of open files in an explicit variable...
r48258 with all_files() as open_files:
(
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
) = open_files
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the censored revision in a function...
r48259 # writing the censored revision
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 # Writing all subsequent revisions
censor: migrate the logic to a set of `censor_revs`...
r48264 for rev in range(first_excl_rev, len(old_index)):
if rev in censor_revs:
_rewrite_censor(
revlog,
old_index,
open_files,
rev,
tombstone,
)
else:
_rewrite_simple(
revlog,
old_index,
open_files,
rev,
rewritten_entries,
tmp_storage,
)
censor: extract the part about writing the other revision in a function...
r48260 docket.write(transaction=None, stripping=True)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about recomputing delta in a function...
r48261 def _precompute_rewritten_delta(
revlog,
old_index,
excluded_revs,
tmp_storage,
):
"""Compute new delta for revisions whose delta is based on revision that
will not survive as is.
Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
"""
dc = deltas.deltacomputer(revlog)
rewritten_entries = {}
first_excl_rev = min(excluded_revs)
delta-computer: stop explicitly taking file handle...
r51913 with revlog.reading():
censor: extract the part about recomputing delta in a function...
r48261 for rev in range(first_excl_rev, len(old_index)):
if rev in excluded_revs:
# this revision will be preserved as is, so we don't need to
# consider recomputing a delta.
continue
entry = old_index[rev]
if entry[ENTRY_DELTA_BASE] not in excluded_revs:
continue
# This is a revision that use the censored revision as the base
# for its delta. We need a need new deltas
if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
# this revision is empty, we can delta against nullrev
rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
else:
censors: simply use `revlog.reading` to keep things open dring rewrite...
r51912 text = revlog.rawdata(rev)
censor: extract the part about recomputing delta in a function...
r48261 info = revlogutils.revisioninfo(
node=entry[ENTRY_NODE_ID],
p1=revlog.node(entry[ENTRY_PARENT_1]),
p2=revlog.node(entry[ENTRY_PARENT_2]),
btext=[text],
textlen=len(text),
cachedelta=None,
flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
)
d = dc.finddeltainfo(
delta-computer: stop explicitly taking file handle...
r51913 info, excluded_bases=excluded_revs, target_rev=rev
censor: extract the part about recomputing delta in a function...
r48261 )
default_comp = revlog._docket.default_compression_header
comp_mode, d = deltas.delta_compression(default_comp, d)
# using `tell` is a bit lazy, but we are not here for speed
start = tmp_storage.tell()
tmp_storage.write(d.data[1])
end = tmp_storage.tell()
rewritten_entries[rev] = (d.base, start, end, comp_mode)
return rewritten_entries
censor: extract the part about creating and opening new files in a function...
r48262 def _setup_new_files(
revlog,
index_cutoff,
data_cutoff,
sidedata_cutoff,
):
"""
return a context manager to open all the relevant files:
- old_data_file,
- old_sidedata_file,
- new_index_file,
- new_data_file,
- new_sidedata_file,
The old_index_file is not here because it is accessed through the
`old_index` object if the caller function.
"""
docket = revlog._docket
old_index_filepath = revlog.opener.join(docket.index_filepath())
old_data_filepath = revlog.opener.join(docket.data_filepath())
old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
new_index_filepath = revlog.opener.join(docket.new_index_file())
new_data_filepath = revlog.opener.join(docket.new_data_file())
new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
util.copyfile(
old_sidedata_filepath,
new_sidedata_filepath,
nb_bytes=sidedata_cutoff,
)
revlog.opener.register_file(docket.index_filepath())
revlog.opener.register_file(docket.data_filepath())
revlog.opener.register_file(docket.sidedata_filepath())
docket.index_end = index_cutoff
docket.data_end = data_cutoff
docket.sidedata_end = sidedata_cutoff
# reload the revlog internal information
revlog.clearcaches()
revlog._loadindex(docket=docket)
@contextlib.contextmanager
def all_files_opener():
# hide opening in an helper function to please check-code, black
# and various python version at the same time
with open(old_data_filepath, 'rb') as old_data_file:
with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
with open(new_index_filepath, 'r+b') as new_index_file:
with open(new_data_filepath, 'r+b') as new_data_file:
with open(
new_sidedata_filepath, 'r+b'
) as new_sidedata_file:
new_index_file.seek(0, os.SEEK_END)
assert new_index_file.tell() == index_cutoff
new_data_file.seek(0, os.SEEK_END)
assert new_data_file.tell() == data_cutoff
new_sidedata_file.seek(0, os.SEEK_END)
assert new_sidedata_file.tell() == sidedata_cutoff
yield (
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
)
return all_files_opener
censor: extract the part about writing the other revision in a function...
r48260 def _rewrite_simple(
revlog,
old_index,
all_files,
rev,
rewritten_entries,
tmp_storage,
):
"""append a normal revision to the index after the rewritten one(s)"""
(
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
) = all_files
entry = old_index[rev]
flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 if rev not in rewritten_entries:
old_data_file.seek(old_data_offset)
new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
new_data = old_data_file.read(new_data_size)
data_delta_base = entry[ENTRY_DELTA_BASE]
d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
else:
(
data_delta_base,
start,
end,
d_comp_mode,
) = rewritten_entries[rev]
new_data_size = end - start
tmp_storage.seek(start)
new_data = tmp_storage.read(new_data_size)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 # It might be faster to group continuous read/write operation,
# however, this is censor, an operation that is not focussed
# around stellar performance. So I have not written this
# optimisation yet.
new_data_offset = new_data_file.tell()
new_data_file.write(new_data)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
new_sidedata_offset = new_sidedata_file.tell()
if 0 < sidedata_size:
old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
old_sidedata_file.seek(old_sidedata_offset)
new_sidedata = old_sidedata_file.read(sidedata_size)
new_sidedata_file.write(new_sidedata)
data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
assert data_delta_base <= rev, (data_delta_base, rev)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 new_entry = revlogutils.entry(
flags=flags,
data_offset=new_data_offset,
data_compressed_length=new_data_size,
data_uncompressed_length=data_uncompressed_length,
data_delta_base=data_delta_base,
link_rev=entry[ENTRY_LINK_REV],
parent_rev_1=entry[ENTRY_PARENT_1],
parent_rev_2=entry[ENTRY_PARENT_2],
node_id=entry[ENTRY_NODE_ID],
sidedata_offset=new_sidedata_offset,
sidedata_compressed_length=sidedata_size,
data_compression_mode=d_comp_mode,
sidedata_compression_mode=sd_com_mode,
)
revlog.index.append(new_entry)
entry_bin = revlog.index.entry_binary(rev)
new_index_file.write(entry_bin)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 revlog._docket.index_end = new_index_file.tell()
revlog._docket.data_end = new_data_file.tell()
revlog._docket.sidedata_end = new_sidedata_file.tell()
censor: extract the part about writing the censored revision in a function...
r48259
def _rewrite_censor(
revlog,
old_index,
all_files,
rev,
tombstone,
):
"""rewrite and append a censored revision"""
(
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
) = all_files
entry = old_index[rev]
# XXX consider trying the default compression too
new_data_size = len(tombstone)
new_data_offset = new_data_file.tell()
new_data_file.write(tombstone)
# we are not adding any sidedata as they might leak info about the censored version
link_rev = entry[ENTRY_LINK_REV]
p1 = entry[ENTRY_PARENT_1]
p2 = entry[ENTRY_PARENT_2]
new_entry = revlogutils.entry(
flags=constants.REVIDX_ISCENSORED,
data_offset=new_data_offset,
data_compressed_length=new_data_size,
data_uncompressed_length=new_data_size,
data_delta_base=rev,
link_rev=link_rev,
parent_rev_1=p1,
parent_rev_2=p2,
node_id=entry[ENTRY_NODE_ID],
sidedata_offset=0,
sidedata_compressed_length=0,
data_compression_mode=COMP_MODE_PLAIN,
sidedata_compression_mode=COMP_MODE_PLAIN,
)
revlog.index.append(new_entry)
entry_bin = revlog.index.entry_binary(rev)
new_index_file.write(entry_bin)
revlog._docket.index_end = new_index_file.tell()
revlog._docket.data_end = new_data_file.tell()
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623
def _get_filename_from_filelog_index(path):
# Drop the extension and the `data/` prefix
path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
if len(path_part) < 2:
msg = _(b"cannot recognize filelog from filename: '%s'")
msg %= path
raise error.Abort(msg)
return path_part[1]
def _filelog_from_filename(repo, path):
"""Returns the filelog for the given `path`. Stolen from `engine.py`"""
from .. import filelog # avoid cycle
fl = filelog.filelog(repo.svfs, path)
return fl
def _write_swapped_parents(repo, rl, rev, offset, fp):
"""Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
from ..pure import parsers # avoid cycle
if repo._currentlock(repo._lockref) is None:
# Let's be paranoid about it
msg = "repo needs to be locked to rewrite parents"
raise error.ProgrammingError(msg)
index_format = parsers.IndexObject.index_format
entry = rl.index[rev]
new_entry = list(entry)
new_entry[5], new_entry[6] = entry[6], entry[5]
packed = index_format.pack(*new_entry[:8])
fp.seek(offset)
fp.write(packed)
def _reorder_filelog_parents(repo, fl, to_fix):
"""
Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
new version to disk, overwriting the old one with a rename.
"""
from ..pure import parsers # avoid cycle
ui = repo.ui
assert len(to_fix) > 0
rl = fl._revlog
if rl._format_version != constants.REVLOGV1:
msg = "expected version 1 revlog, got version '%d'" % rl._format_version
raise error.ProgrammingError(msg)
index_file = rl._indexfile
new_file_path = index_file + b'.tmp-parents-fix'
repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
with ui.uninterruptible():
try:
util.copyfile(
rl.opener.join(index_file),
rl.opener.join(new_file_path),
revlog: remove legacy usage of `_checkambig`...
r51941 checkambig=rl.data_config.check_ambig,
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 )
with rl.opener(new_file_path, mode=b"r+") as fp:
if rl._inline:
index = parsers.InlinedIndexObject(fp.read())
for rev in fl.revs():
if rev in to_fix:
offset = index._calculate_index(rev)
_write_swapped_parents(repo, rl, rev, offset, fp)
ui.write(repaired_msg % (rev, index_file))
else:
index_format = parsers.IndexObject.index_format
for rev in to_fix:
offset = rev * index_format.size
_write_swapped_parents(repo, rl, rev, offset, fp)
ui.write(repaired_msg % (rev, index_file))
rl.opener.rename(new_file_path, index_file)
rl.clearcaches()
rl._loadindex()
finally:
util.tryunlink(new_file_path)
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 def _is_revision_affected(fl, filerev, metadata_cache=None):
issue6528: implement _is_revision_affected using callback...
r48626 full_text = lambda: fl._revlog.rawdata(filerev)
parent_revs = lambda: fl._revlog.parentrevs(filerev)
return _is_revision_affected_inner(
full_text, parent_revs, filerev, metadata_cache
)
def _is_revision_affected_inner(
full_text,
parents_revs,
filerev,
metadata_cache=None,
):
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
special meaning compared to the reverse in the context of filelog-based
copytracing. issue6528 exists because new code assumed that parent ordering
didn't matter, so this detects if the revision contains metadata (since
it's only used for filelog-based copytracing) and its parents are in the
"wrong" order."""
try:
issue6528: implement _is_revision_affected using callback...
r48626 raw_text = full_text()
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 except error.CensoredNodeError:
# We don't care about censored nodes as they never carry metadata
return False
Raphaël Gomès
rewrite: fix issue6599...
r49063
# raw text can be a `memoryview`, which doesn't implement `startswith`
has_meta = bytes(raw_text[:2]) == b'\x01\n'
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 if metadata_cache is not None:
metadata_cache[filerev] = has_meta
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 if has_meta:
issue6528: implement _is_revision_affected using callback...
r48626 (p1, p2) = parents_revs()
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 if p1 != nullrev and p2 == nullrev:
return True
return False
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
issue6528: implement _is_revision_affected_fast using callback...
r48627 rl = fl._revlog
is_censored = lambda: rl.iscensored(filerev)
delta_base = lambda: rl.deltaparent(filerev)
Joerg Sonnenberger
revlogutils: fix _chunk() reference...
r52804 delta = lambda: rl._inner._chunk(filerev)
issue6528: implement _is_revision_affected_fast using callback...
r48627 full_text = lambda: rl.rawdata(filerev)
parent_revs = lambda: rl.parentrevs(filerev)
Joerg Sonnenberger
revlogutils: teach issue6528 filtering about grandparents...
r52808 # This function is used by repair_issue6528, but not by
# filter_delta_issue6528. As such, we do not want to trust
# parent revisions of the delta base to decide whether
# the delta base has metadata.
issue6528: implement _is_revision_affected_fast using callback...
r48627 return _is_revision_affected_fast_inner(
is_censored,
delta_base,
delta,
full_text,
parent_revs,
Joerg Sonnenberger
revlogutils: teach issue6528 filtering about grandparents...
r52808 None, # don't trust the parent revisions
issue6528: implement _is_revision_affected_fast using callback...
r48627 filerev,
metadata_cache,
)
def _is_revision_affected_fast_inner(
is_censored,
delta_base,
delta,
full_text,
parent_revs,
Joerg Sonnenberger
revlogutils: teach issue6528 filtering about grandparents...
r52808 deltabase_parentrevs,
issue6528: implement _is_revision_affected_fast using callback...
r48627 filerev,
metadata_cache,
):
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 """Optimization fast-path for `_is_revision_affected`.
`metadata_cache` is a dict of `{rev: has_metadata}` which allows any
revision to check if its base has metadata, saving computation of the full
text, instead looking at the current delta.
This optimization only works if the revisions are looked at in order."""
issue6528: implement _is_revision_affected_fast using callback...
r48627 if is_censored():
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 # Censored revisions don't contain metadata, so they cannot be affected
metadata_cache[filerev] = False
return False
issue6528: implement _is_revision_affected_fast using callback...
r48627 p1, p2 = parent_revs()
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 if p1 == nullrev or p2 != nullrev:
Joerg Sonnenberger
revlogutils: remember known metadata parents for issue6528...
r52807 metadata_cache[filerev] = True
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 return False
issue6528: implement _is_revision_affected_fast using callback...
r48627 delta_parent = delta_base()
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 parent_has_metadata = metadata_cache.get(delta_parent)
if parent_has_metadata is None:
Joerg Sonnenberger
revlogutils: teach issue6528 filtering about grandparents...
r52808 if deltabase_parentrevs is not None:
deltabase_parentrevs = deltabase_parentrevs()
if deltabase_parentrevs == (nullrev, nullrev):
# Need to check the content itself as there is no flag.
parent_has_metadata = None
elif deltabase_parentrevs[0] == nullrev:
# Second parent is !null, assume repository is correct
# and has flagged this file revision as having metadata.
parent_has_metadata = True
elif deltabase_parentrevs[1] == nullrev:
# First parent is !null, so assume it has no metadata.
parent_has_metadata = False
if parent_has_metadata is None:
return _is_revision_affected_inner(
full_text,
parent_revs,
filerev,
metadata_cache,
)
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624
issue6528: implement _is_revision_affected_fast using callback...
r48627 chunk = delta()
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 if not len(chunk):
# No diff for this revision
Joerg Sonnenberger
revlogutils: for issue6528 fix, cache results for null changes
r52805 metadata_cache[filerev] = parent_has_metadata
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 return parent_has_metadata
header_length = 12
if len(chunk) < header_length:
raise error.Abort(_(b"patch cannot be decoded"))
start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
if start < 2: # len(b'\x01\n') == 2
# This delta does *something* to the metadata marker (if any).
# Check it the slow way
issue6528: implement _is_revision_affected_fast using callback...
r48627 is_affected = _is_revision_affected_inner(
full_text,
parent_revs,
filerev,
metadata_cache,
)
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 return is_affected
# The diff did not remove or add the metadata header, it's then in the same
# situation as its parent
metadata_cache[filerev] = parent_has_metadata
return parent_has_metadata
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 def _from_report(ui, repo, context, from_report, dry_run):
"""
Fix the revisions given in the `from_report` file, but still checks if the
revisions are indeed affected to prevent an unfortunate cyclic situation
where we'd swap well-ordered parents again.
See the doc for `debug_fix_issue6528` for the format documentation.
"""
ui.write(_(b"loading report file '%s'\n") % from_report)
with context(), open(from_report, mode='rb') as f:
for line in f.read().split(b'\n'):
if not line:
continue
filenodes, filename = line.split(b' ', 1)
fl = _filelog_from_filename(repo, filename)
to_fix = set(
fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
)
excluded = set()
for filerev in to_fix:
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 if _is_revision_affected(fl, filerev):
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 msg = b"found affected revision %d for filelog '%s'\n"
ui.warn(msg % (filerev, filename))
else:
msg = _(b"revision %s of file '%s' is not affected\n")
msg %= (binascii.hexlify(fl.node(filerev)), filename)
ui.warn(msg)
excluded.add(filerev)
to_fix = to_fix - excluded
if not to_fix:
msg = _(b"no affected revisions were found for '%s'\n")
ui.write(msg % filename)
continue
if not dry_run:
_reorder_filelog_parents(repo, fl, sorted(to_fix))
issue6528: also filter delta on the fly when applying a changegroup...
r48629 def filter_delta_issue6528(revlog, deltas_iter):
"""filter incomind deltas to repaire issue 6528 on the fly"""
Joerg Sonnenberger
revlogutils: for issue6528 fix, pre-cache nullrev as metadata-free
r52806 metadata_cache = {nullrev: False}
issue6528: also filter delta on the fly when applying a changegroup...
r48629
deltacomputer = deltas.deltacomputer(revlog)
for rev, d in enumerate(deltas_iter, len(revlog)):
(
node,
p1_node,
p2_node,
linknode,
deltabase,
delta,
flags,
sidedata,
) = d
if not revlog.index.has_node(deltabase):
raise error.LookupError(
deltabase, revlog.radix, _(b'unknown parent')
)
base_rev = revlog.rev(deltabase)
if not revlog.index.has_node(p1_node):
raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
p1_rev = revlog.rev(p1_node)
if not revlog.index.has_node(p2_node):
raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
p2_rev = revlog.rev(p2_node)
is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
delta_base = lambda: base_rev
parent_revs = lambda: (p1_rev, p2_rev)
Joerg Sonnenberger
revlogutils: teach issue6528 filtering about grandparents...
r52808 deltabase_parentrevs = lambda: revlog.parentrevs(base_rev)
issue6528: also filter delta on the fly when applying a changegroup...
r48629
def full_text():
# note: being able to reuse the full text computation in the
# underlying addrevision would be useful however this is a bit too
# intrusive the for the "quick" issue6528 we are writing before the
# 5.8 release
textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
revinfo = revlogutils.revisioninfo(
node,
p1_node,
p2_node,
[None],
textlen,
(base_rev, delta),
flags,
)
delta-computer: stop explicitly taking file handle...
r51913 return deltacomputer.buildtext(revinfo)
issue6528: also filter delta on the fly when applying a changegroup...
r48629
is_affected = _is_revision_affected_fast_inner(
is_censored,
delta_base,
lambda: delta,
full_text,
parent_revs,
Joerg Sonnenberger
revlogutils: teach issue6528 filtering about grandparents...
r52808 deltabase_parentrevs,
issue6528: also filter delta on the fly when applying a changegroup...
r48629 rev,
metadata_cache,
)
if is_affected:
d = (
node,
p2_node,
p1_node,
linknode,
deltabase,
delta,
flags,
sidedata,
)
yield d
Raphaël Gomès
debugcommands: add a `--paranoid` option to `debug-repair-issue-6528`...
r48625 def repair_issue6528(
ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
):
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 @contextlib.contextmanager
def context():
if dry_run or to_report: # No need for locking
yield
else:
with repo.wlock(), repo.lock():
yield
if from_report:
return _from_report(ui, repo, context, from_report, dry_run)
report_entries = []
with context():
files = list(
store: use a StoreEntry object instead of tuple for store files...
r51364 entry
store: rename `datafiles` to `data_entries`...
r51397 for entry in repo.store.data_entries()
store: use the boolean property in `repair_issue6528`
r51393 if entry.is_revlog and entry.is_filelog
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 )
progress = ui.makeprogress(
_(b"looking for affected revisions"),
unit=_(b"filelogs"),
total=len(files),
)
found_nothing = True
store: use a StoreEntry object instead of tuple for store files...
r51364 for entry in files:
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 progress.increment()
store: use StoreEntry API instead of parsing filename when fixing issue6528...
r51379 filename = entry.target_id
fl = _filelog_from_filename(repo, entry.target_id)
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623
# Set of filerevs (or hex filenodes if `to_report`) that need fixing
to_fix = set()
Joerg Sonnenberger
revlogutils: for issue6528 fix, pre-cache nullrev as metadata-free
r52806 metadata_cache = {nullrev: False}
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 for filerev in fl.revs():
Raphaël Gomès
repair: improve performance of detection of revisions affected by issue6528...
r48624 affected = _is_revision_affected_fast(
repo, fl, filerev, metadata_cache
)
Raphaël Gomès
debugcommands: add a `--paranoid` option to `debug-repair-issue-6528`...
r48625 if paranoid:
slow = _is_revision_affected(fl, filerev)
if slow != affected:
msg = _(b"paranoid check failed for '%s' at node %s")
node = binascii.hexlify(fl.node(filerev))
raise error.Abort(msg % (filename, node))
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 if affected:
store: use StoreEntry API instead of parsing filename when fixing issue6528...
r51379 msg = b"found affected revision %d for file '%s'\n"
ui.warn(msg % (filerev, filename))
Raphaël Gomès
debugcommands: introduce a debug command to repair repos affected by issue6528...
r48623 found_nothing = False
if not dry_run:
if to_report:
to_fix.add(binascii.hexlify(fl.node(filerev)))
else:
to_fix.add(filerev)
if to_fix:
to_fix = sorted(to_fix)
if to_report:
report_entries.append((filename, to_fix))
else:
_reorder_filelog_parents(repo, fl, to_fix)
if found_nothing:
ui.write(_(b"no affected revisions were found\n"))
if to_report and report_entries:
with open(to_report, mode="wb") as f:
for path, to_fix in report_entries:
f.write(b"%s %s\n" % (b",".join(to_fix), path))
progress.complete()