rewrite.py
909 lines
| 30.0 KiB
| text/x-python
|
PythonLexer
r48257 | # censor code related to censoring revision | |||
# coding: utf8 | ||||
# | ||||
# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> | ||||
# Copyright 2015 Google, Inc <martinvonz@google.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Matt Harbison
|
r52757 | from __future__ import annotations | ||
Raphaël Gomès
|
r48623 | import binascii | ||
r48257 | import contextlib | |||
import os | ||||
Raphaël Gomès
|
r48624 | import struct | ||
r48257 | ||||
from ..node import ( | ||||
nullrev, | ||||
) | ||||
from .constants import ( | ||||
COMP_MODE_PLAIN, | ||||
ENTRY_DATA_COMPRESSED_LENGTH, | ||||
ENTRY_DATA_COMPRESSION_MODE, | ||||
ENTRY_DATA_OFFSET, | ||||
ENTRY_DATA_UNCOMPRESSED_LENGTH, | ||||
ENTRY_DELTA_BASE, | ||||
ENTRY_LINK_REV, | ||||
ENTRY_NODE_ID, | ||||
ENTRY_PARENT_1, | ||||
ENTRY_PARENT_2, | ||||
ENTRY_SIDEDATA_COMPRESSED_LENGTH, | ||||
ENTRY_SIDEDATA_COMPRESSION_MODE, | ||||
ENTRY_SIDEDATA_OFFSET, | ||||
r48629 | REVIDX_ISCENSORED, | |||
r48257 | REVLOGV0, | |||
REVLOGV1, | ||||
) | ||||
from ..i18n import _ | ||||
from .. import ( | ||||
error, | ||||
r48629 | mdiff, | |||
r48257 | pycompat, | |||
revlogutils, | ||||
util, | ||||
) | ||||
from ..utils import ( | ||||
storageutil, | ||||
) | ||||
from . import ( | ||||
constants, | ||||
deltas, | ||||
) | ||||
r52163 | def v1_censor(rl, tr, censor_nodes, tombstone=b''): | |||
r48257 | """censors a revision in a "version 1" revlog""" | |||
assert rl._format_version == constants.REVLOGV1, rl._format_version | ||||
# avoid cycle | ||||
from .. import revlog | ||||
r52163 | censor_revs = set(rl.rev(node) for node in censor_nodes) | |||
r48257 | tombstone = storageutil.packmeta({b'censored': tombstone}, b'') | |||
# Rewriting the revlog in place is hard. Our strategy for censoring is | ||||
# to create a new revlog, copy all revisions to it, then replace the | ||||
# revlogs on transaction close. | ||||
# | ||||
# This is a bit dangerous. We could easily have a mismatch of state. | ||||
newrl = revlog.revlog( | ||||
rl.opener, | ||||
target=rl.target, | ||||
radix=rl.radix, | ||||
postfix=b'tmpcensored', | ||||
censorable=True, | ||||
r52052 | data_config=rl.data_config, | |||
delta_config=rl.delta_config, | ||||
feature_config=rl.feature_config, | ||||
may_inline=rl._inline, | ||||
r48257 | ) | |||
r52052 | # inline splitting will prepare some transaction work that will get | |||
# confused by the final file move. So if there is a risk of not being | ||||
# inline at the end, we prevent the new revlog to be inline in the first | ||||
# place. | ||||
assert not (newrl._inline and not rl._inline) | ||||
r48257 | ||||
for rev in rl.revs(): | ||||
node = rl.node(rev) | ||||
p1, p2 = rl.parents(node) | ||||
r52163 | if rev in censor_revs: | |||
r48257 | newrl.addrawrevision( | |||
tombstone, | ||||
tr, | ||||
r52163 | rl.linkrev(rev), | |||
r48257 | p1, | |||
p2, | ||||
r52163 | node, | |||
r48257 | constants.REVIDX_ISCENSORED, | |||
) | ||||
if newrl.deltaparent(rev) != nullrev: | ||||
m = _(b'censored revision stored as delta; cannot censor') | ||||
h = _( | ||||
b'censoring of revlogs is not fully implemented;' | ||||
b' please report this bug' | ||||
) | ||||
raise error.Abort(m, hint=h) | ||||
continue | ||||
if rl.iscensored(rev): | ||||
if rl.deltaparent(rev) != nullrev: | ||||
m = _( | ||||
b'cannot censor due to censored ' | ||||
b'revision having delta stored' | ||||
) | ||||
raise error.Abort(m) | ||||
r51985 | rawtext = rl._inner._chunk(rev) | |||
r48257 | else: | |||
rawtext = rl.rawdata(rev) | ||||
newrl.addrawrevision( | ||||
rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev) | ||||
) | ||||
tr.addbackup(rl._indexfile, location=b'store') | ||||
if not rl._inline: | ||||
tr.addbackup(rl._datafile, location=b'store') | ||||
rl.opener.rename(newrl._indexfile, rl._indexfile) | ||||
r52052 | if newrl._inline: | |||
assert rl._inline | ||||
else: | ||||
assert not rl._inline | ||||
r48257 | rl.opener.rename(newrl._datafile, rl._datafile) | |||
rl.clearcaches() | ||||
Raphaël Gomès
|
r53060 | index, chunk_cache = rl._loadindex() | ||
rl._load_inner(index, chunk_cache) | ||||
r48257 | ||||
r52163 | def v2_censor(revlog, tr, censor_nodes, tombstone=b''): | |||
r48257 | """censors a revision in a "version 2" revlog""" | |||
r48265 | assert revlog._format_version != REVLOGV0, revlog._format_version | |||
assert revlog._format_version != REVLOGV1, revlog._format_version | ||||
r52163 | censor_revs = {revlog.rev(node) for node in censor_nodes} | |||
r48265 | _rewrite_v2(revlog, tr, censor_revs, tombstone) | |||
def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''): | ||||
"""rewrite a revlog to censor some of its content | ||||
General principle | ||||
r48257 | ||||
r48265 | We create new revlog files (index/data/sidedata) to copy the content of | |||
the existing data without the censored data. | ||||
We need to recompute new delta for any revision that used the censored | ||||
revision as delta base. As the cumulative size of the new delta may be | ||||
large, we store them in a temporary file until they are stored in their | ||||
final destination. | ||||
All data before the censored data can be blindly copied. The rest needs | ||||
to be copied as we go and the associated index entry needs adjustement. | ||||
""" | ||||
r48263 | assert revlog._format_version != REVLOGV0, revlog._format_version | |||
assert revlog._format_version != REVLOGV1, revlog._format_version | ||||
r48257 | ||||
r48263 | old_index = revlog.index | |||
docket = revlog._docket | ||||
r48257 | ||||
tombstone = storageutil.packmeta({b'censored': tombstone}, b'') | ||||
r48264 | first_excl_rev = min(censor_revs) | |||
first_excl_entry = revlog.index[first_excl_rev] | ||||
index_cutoff = revlog.index.entry_size * first_excl_rev | ||||
data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16 | ||||
sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev) | ||||
r48257 | ||||
with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage: | ||||
r48261 | # rev → (new_base, data_start, data_end, compression_mode) | |||
rewritten_entries = _precompute_rewritten_delta( | ||||
r48263 | revlog, | |||
r48261 | old_index, | |||
r48264 | censor_revs, | |||
r48261 | tmp_storage, | |||
) | ||||
r48257 | ||||
r48262 | all_files = _setup_new_files( | |||
r48263 | revlog, | |||
r48262 | index_cutoff, | |||
data_cutoff, | ||||
sidedata_cutoff, | ||||
r48257 | ) | |||
# we dont need to open the old index file since its content already | ||||
# exist in a usable form in `old_index`. | ||||
r48258 | with all_files() as open_files: | |||
( | ||||
old_data_file, | ||||
old_sidedata_file, | ||||
new_index_file, | ||||
new_data_file, | ||||
new_sidedata_file, | ||||
) = open_files | ||||
r48257 | ||||
r48259 | # writing the censored revision | |||
r48257 | ||||
r48260 | # Writing all subsequent revisions | |||
r48264 | for rev in range(first_excl_rev, len(old_index)): | |||
if rev in censor_revs: | ||||
_rewrite_censor( | ||||
revlog, | ||||
old_index, | ||||
open_files, | ||||
rev, | ||||
tombstone, | ||||
) | ||||
else: | ||||
_rewrite_simple( | ||||
revlog, | ||||
old_index, | ||||
open_files, | ||||
rev, | ||||
rewritten_entries, | ||||
tmp_storage, | ||||
) | ||||
r48260 | docket.write(transaction=None, stripping=True) | |||
r48257 | ||||
r48261 | def _precompute_rewritten_delta( | |||
revlog, | ||||
old_index, | ||||
excluded_revs, | ||||
tmp_storage, | ||||
): | ||||
"""Compute new delta for revisions whose delta is based on revision that | ||||
will not survive as is. | ||||
Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)} | ||||
""" | ||||
dc = deltas.deltacomputer(revlog) | ||||
rewritten_entries = {} | ||||
first_excl_rev = min(excluded_revs) | ||||
r51913 | with revlog.reading(): | |||
r48261 | for rev in range(first_excl_rev, len(old_index)): | |||
if rev in excluded_revs: | ||||
# this revision will be preserved as is, so we don't need to | ||||
# consider recomputing a delta. | ||||
continue | ||||
entry = old_index[rev] | ||||
if entry[ENTRY_DELTA_BASE] not in excluded_revs: | ||||
continue | ||||
# This is a revision that use the censored revision as the base | ||||
# for its delta. We need a need new deltas | ||||
if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0: | ||||
# this revision is empty, we can delta against nullrev | ||||
rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN) | ||||
else: | ||||
r51912 | text = revlog.rawdata(rev) | |||
r48261 | info = revlogutils.revisioninfo( | |||
node=entry[ENTRY_NODE_ID], | ||||
p1=revlog.node(entry[ENTRY_PARENT_1]), | ||||
p2=revlog.node(entry[ENTRY_PARENT_2]), | ||||
btext=[text], | ||||
textlen=len(text), | ||||
cachedelta=None, | ||||
flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF, | ||||
) | ||||
d = dc.finddeltainfo( | ||||
r51913 | info, excluded_bases=excluded_revs, target_rev=rev | |||
r48261 | ) | |||
default_comp = revlog._docket.default_compression_header | ||||
comp_mode, d = deltas.delta_compression(default_comp, d) | ||||
# using `tell` is a bit lazy, but we are not here for speed | ||||
start = tmp_storage.tell() | ||||
tmp_storage.write(d.data[1]) | ||||
end = tmp_storage.tell() | ||||
rewritten_entries[rev] = (d.base, start, end, comp_mode) | ||||
return rewritten_entries | ||||
r48262 | def _setup_new_files( | |||
revlog, | ||||
index_cutoff, | ||||
data_cutoff, | ||||
sidedata_cutoff, | ||||
): | ||||
""" | ||||
return a context manager to open all the relevant files: | ||||
- old_data_file, | ||||
- old_sidedata_file, | ||||
- new_index_file, | ||||
- new_data_file, | ||||
- new_sidedata_file, | ||||
The old_index_file is not here because it is accessed through the | ||||
`old_index` object if the caller function. | ||||
""" | ||||
docket = revlog._docket | ||||
old_index_filepath = revlog.opener.join(docket.index_filepath()) | ||||
old_data_filepath = revlog.opener.join(docket.data_filepath()) | ||||
old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath()) | ||||
new_index_filepath = revlog.opener.join(docket.new_index_file()) | ||||
new_data_filepath = revlog.opener.join(docket.new_data_file()) | ||||
new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file()) | ||||
util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff) | ||||
util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff) | ||||
util.copyfile( | ||||
old_sidedata_filepath, | ||||
new_sidedata_filepath, | ||||
nb_bytes=sidedata_cutoff, | ||||
) | ||||
revlog.opener.register_file(docket.index_filepath()) | ||||
revlog.opener.register_file(docket.data_filepath()) | ||||
revlog.opener.register_file(docket.sidedata_filepath()) | ||||
docket.index_end = index_cutoff | ||||
docket.data_end = data_cutoff | ||||
docket.sidedata_end = sidedata_cutoff | ||||
# reload the revlog internal information | ||||
revlog.clearcaches() | ||||
Raphaël Gomès
|
r53060 | index, chunk_cache = revlog._loadindex(docket=docket) | ||
revlog._load_inner(index, chunk_cache) | ||||
r48262 | ||||
@contextlib.contextmanager | ||||
def all_files_opener(): | ||||
# hide opening in an helper function to please check-code, black | ||||
# and various python version at the same time | ||||
with open(old_data_filepath, 'rb') as old_data_file: | ||||
with open(old_sidedata_filepath, 'rb') as old_sidedata_file: | ||||
with open(new_index_filepath, 'r+b') as new_index_file: | ||||
with open(new_data_filepath, 'r+b') as new_data_file: | ||||
with open( | ||||
new_sidedata_filepath, 'r+b' | ||||
) as new_sidedata_file: | ||||
new_index_file.seek(0, os.SEEK_END) | ||||
assert new_index_file.tell() == index_cutoff | ||||
new_data_file.seek(0, os.SEEK_END) | ||||
assert new_data_file.tell() == data_cutoff | ||||
new_sidedata_file.seek(0, os.SEEK_END) | ||||
assert new_sidedata_file.tell() == sidedata_cutoff | ||||
yield ( | ||||
old_data_file, | ||||
old_sidedata_file, | ||||
new_index_file, | ||||
new_data_file, | ||||
new_sidedata_file, | ||||
) | ||||
return all_files_opener | ||||
r48260 | def _rewrite_simple( | |||
revlog, | ||||
old_index, | ||||
all_files, | ||||
rev, | ||||
rewritten_entries, | ||||
tmp_storage, | ||||
): | ||||
"""append a normal revision to the index after the rewritten one(s)""" | ||||
( | ||||
old_data_file, | ||||
old_sidedata_file, | ||||
new_index_file, | ||||
new_data_file, | ||||
new_sidedata_file, | ||||
) = all_files | ||||
entry = old_index[rev] | ||||
flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF | ||||
old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16 | ||||
r48257 | ||||
r48260 | if rev not in rewritten_entries: | |||
old_data_file.seek(old_data_offset) | ||||
new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH] | ||||
new_data = old_data_file.read(new_data_size) | ||||
data_delta_base = entry[ENTRY_DELTA_BASE] | ||||
d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE] | ||||
else: | ||||
( | ||||
data_delta_base, | ||||
start, | ||||
end, | ||||
d_comp_mode, | ||||
) = rewritten_entries[rev] | ||||
new_data_size = end - start | ||||
tmp_storage.seek(start) | ||||
new_data = tmp_storage.read(new_data_size) | ||||
r48257 | ||||
r48260 | # It might be faster to group continuous read/write operation, | |||
# however, this is censor, an operation that is not focussed | ||||
# around stellar performance. So I have not written this | ||||
# optimisation yet. | ||||
new_data_offset = new_data_file.tell() | ||||
new_data_file.write(new_data) | ||||
r48257 | ||||
r48260 | sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH] | |||
new_sidedata_offset = new_sidedata_file.tell() | ||||
if 0 < sidedata_size: | ||||
old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET] | ||||
old_sidedata_file.seek(old_sidedata_offset) | ||||
new_sidedata = old_sidedata_file.read(sidedata_size) | ||||
new_sidedata_file.write(new_sidedata) | ||||
data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] | ||||
sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE] | ||||
assert data_delta_base <= rev, (data_delta_base, rev) | ||||
r48257 | ||||
r48260 | new_entry = revlogutils.entry( | |||
flags=flags, | ||||
data_offset=new_data_offset, | ||||
data_compressed_length=new_data_size, | ||||
data_uncompressed_length=data_uncompressed_length, | ||||
data_delta_base=data_delta_base, | ||||
link_rev=entry[ENTRY_LINK_REV], | ||||
parent_rev_1=entry[ENTRY_PARENT_1], | ||||
parent_rev_2=entry[ENTRY_PARENT_2], | ||||
node_id=entry[ENTRY_NODE_ID], | ||||
sidedata_offset=new_sidedata_offset, | ||||
sidedata_compressed_length=sidedata_size, | ||||
data_compression_mode=d_comp_mode, | ||||
sidedata_compression_mode=sd_com_mode, | ||||
) | ||||
revlog.index.append(new_entry) | ||||
entry_bin = revlog.index.entry_binary(rev) | ||||
new_index_file.write(entry_bin) | ||||
r48257 | ||||
r48260 | revlog._docket.index_end = new_index_file.tell() | |||
revlog._docket.data_end = new_data_file.tell() | ||||
revlog._docket.sidedata_end = new_sidedata_file.tell() | ||||
r48259 | ||||
def _rewrite_censor( | ||||
revlog, | ||||
old_index, | ||||
all_files, | ||||
rev, | ||||
tombstone, | ||||
): | ||||
"""rewrite and append a censored revision""" | ||||
( | ||||
old_data_file, | ||||
old_sidedata_file, | ||||
new_index_file, | ||||
new_data_file, | ||||
new_sidedata_file, | ||||
) = all_files | ||||
entry = old_index[rev] | ||||
# XXX consider trying the default compression too | ||||
new_data_size = len(tombstone) | ||||
new_data_offset = new_data_file.tell() | ||||
new_data_file.write(tombstone) | ||||
# we are not adding any sidedata as they might leak info about the censored version | ||||
link_rev = entry[ENTRY_LINK_REV] | ||||
p1 = entry[ENTRY_PARENT_1] | ||||
p2 = entry[ENTRY_PARENT_2] | ||||
new_entry = revlogutils.entry( | ||||
flags=constants.REVIDX_ISCENSORED, | ||||
data_offset=new_data_offset, | ||||
data_compressed_length=new_data_size, | ||||
data_uncompressed_length=new_data_size, | ||||
data_delta_base=rev, | ||||
link_rev=link_rev, | ||||
parent_rev_1=p1, | ||||
parent_rev_2=p2, | ||||
node_id=entry[ENTRY_NODE_ID], | ||||
sidedata_offset=0, | ||||
sidedata_compressed_length=0, | ||||
data_compression_mode=COMP_MODE_PLAIN, | ||||
sidedata_compression_mode=COMP_MODE_PLAIN, | ||||
) | ||||
revlog.index.append(new_entry) | ||||
entry_bin = revlog.index.entry_binary(rev) | ||||
new_index_file.write(entry_bin) | ||||
revlog._docket.index_end = new_index_file.tell() | ||||
revlog._docket.data_end = new_data_file.tell() | ||||
Raphaël Gomès
|
r48623 | |||
def _get_filename_from_filelog_index(path): | ||||
# Drop the extension and the `data/` prefix | ||||
path_part = path.rsplit(b'.', 1)[0].split(b'/', 1) | ||||
if len(path_part) < 2: | ||||
msg = _(b"cannot recognize filelog from filename: '%s'") | ||||
msg %= path | ||||
raise error.Abort(msg) | ||||
return path_part[1] | ||||
def _filelog_from_filename(repo, path): | ||||
"""Returns the filelog for the given `path`. Stolen from `engine.py`""" | ||||
from .. import filelog # avoid cycle | ||||
fl = filelog.filelog(repo.svfs, path) | ||||
return fl | ||||
def _write_swapped_parents(repo, rl, rev, offset, fp): | ||||
"""Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`""" | ||||
from ..pure import parsers # avoid cycle | ||||
if repo._currentlock(repo._lockref) is None: | ||||
# Let's be paranoid about it | ||||
msg = "repo needs to be locked to rewrite parents" | ||||
raise error.ProgrammingError(msg) | ||||
index_format = parsers.IndexObject.index_format | ||||
entry = rl.index[rev] | ||||
new_entry = list(entry) | ||||
new_entry[5], new_entry[6] = entry[6], entry[5] | ||||
packed = index_format.pack(*new_entry[:8]) | ||||
fp.seek(offset) | ||||
fp.write(packed) | ||||
def _reorder_filelog_parents(repo, fl, to_fix): | ||||
""" | ||||
Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the | ||||
new version to disk, overwriting the old one with a rename. | ||||
""" | ||||
from ..pure import parsers # avoid cycle | ||||
ui = repo.ui | ||||
assert len(to_fix) > 0 | ||||
rl = fl._revlog | ||||
if rl._format_version != constants.REVLOGV1: | ||||
msg = "expected version 1 revlog, got version '%d'" % rl._format_version | ||||
raise error.ProgrammingError(msg) | ||||
index_file = rl._indexfile | ||||
new_file_path = index_file + b'.tmp-parents-fix' | ||||
repaired_msg = _(b"repaired revision %d of 'filelog %s'\n") | ||||
with ui.uninterruptible(): | ||||
try: | ||||
util.copyfile( | ||||
rl.opener.join(index_file), | ||||
rl.opener.join(new_file_path), | ||||
r51941 | checkambig=rl.data_config.check_ambig, | |||
Raphaël Gomès
|
r48623 | ) | ||
with rl.opener(new_file_path, mode=b"r+") as fp: | ||||
if rl._inline: | ||||
index = parsers.InlinedIndexObject(fp.read()) | ||||
for rev in fl.revs(): | ||||
if rev in to_fix: | ||||
offset = index._calculate_index(rev) | ||||
_write_swapped_parents(repo, rl, rev, offset, fp) | ||||
ui.write(repaired_msg % (rev, index_file)) | ||||
else: | ||||
index_format = parsers.IndexObject.index_format | ||||
for rev in to_fix: | ||||
offset = rev * index_format.size | ||||
_write_swapped_parents(repo, rl, rev, offset, fp) | ||||
ui.write(repaired_msg % (rev, index_file)) | ||||
rl.opener.rename(new_file_path, index_file) | ||||
rl.clearcaches() | ||||
Raphaël Gomès
|
r53060 | index, chunk_cache = rl._loadindex() | ||
rl._load_inner(index, chunk_cache) | ||||
Raphaël Gomès
|
r48623 | finally: | ||
util.tryunlink(new_file_path) | ||||
Raphaël Gomès
|
r48624 | def _is_revision_affected(fl, filerev, metadata_cache=None): | ||
r48626 | full_text = lambda: fl._revlog.rawdata(filerev) | |||
parent_revs = lambda: fl._revlog.parentrevs(filerev) | ||||
return _is_revision_affected_inner( | ||||
full_text, parent_revs, filerev, metadata_cache | ||||
) | ||||
def _is_revision_affected_inner( | ||||
full_text, | ||||
parents_revs, | ||||
filerev, | ||||
metadata_cache=None, | ||||
): | ||||
Raphaël Gomès
|
r48623 | """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a | ||
special meaning compared to the reverse in the context of filelog-based | ||||
copytracing. issue6528 exists because new code assumed that parent ordering | ||||
didn't matter, so this detects if the revision contains metadata (since | ||||
it's only used for filelog-based copytracing) and its parents are in the | ||||
"wrong" order.""" | ||||
try: | ||||
r48626 | raw_text = full_text() | |||
Raphaël Gomès
|
r48623 | except error.CensoredNodeError: | ||
# We don't care about censored nodes as they never carry metadata | ||||
return False | ||||
Raphaël Gomès
|
r49063 | |||
# raw text can be a `memoryview`, which doesn't implement `startswith` | ||||
has_meta = bytes(raw_text[:2]) == b'\x01\n' | ||||
Raphaël Gomès
|
r48624 | if metadata_cache is not None: | ||
metadata_cache[filerev] = has_meta | ||||
Raphaël Gomès
|
r48623 | if has_meta: | ||
r48626 | (p1, p2) = parents_revs() | |||
Raphaël Gomès
|
r48623 | if p1 != nullrev and p2 == nullrev: | ||
return True | ||||
return False | ||||
Raphaël Gomès
|
r48624 | def _is_revision_affected_fast(repo, fl, filerev, metadata_cache): | ||
r48627 | rl = fl._revlog | |||
is_censored = lambda: rl.iscensored(filerev) | ||||
delta_base = lambda: rl.deltaparent(filerev) | ||||
Joerg Sonnenberger
|
r52804 | delta = lambda: rl._inner._chunk(filerev) | ||
r48627 | full_text = lambda: rl.rawdata(filerev) | |||
parent_revs = lambda: rl.parentrevs(filerev) | ||||
Joerg Sonnenberger
|
r52808 | # This function is used by repair_issue6528, but not by | ||
# filter_delta_issue6528. As such, we do not want to trust | ||||
# parent revisions of the delta base to decide whether | ||||
# the delta base has metadata. | ||||
r48627 | return _is_revision_affected_fast_inner( | |||
is_censored, | ||||
delta_base, | ||||
delta, | ||||
full_text, | ||||
parent_revs, | ||||
Joerg Sonnenberger
|
r52808 | None, # don't trust the parent revisions | ||
r48627 | filerev, | |||
metadata_cache, | ||||
) | ||||
def _is_revision_affected_fast_inner( | ||||
is_censored, | ||||
delta_base, | ||||
delta, | ||||
full_text, | ||||
parent_revs, | ||||
Joerg Sonnenberger
|
r52808 | deltabase_parentrevs, | ||
r48627 | filerev, | |||
metadata_cache, | ||||
): | ||||
Raphaël Gomès
|
r48624 | """Optimization fast-path for `_is_revision_affected`. | ||
`metadata_cache` is a dict of `{rev: has_metadata}` which allows any | ||||
revision to check if its base has metadata, saving computation of the full | ||||
text, instead looking at the current delta. | ||||
This optimization only works if the revisions are looked at in order.""" | ||||
r48627 | if is_censored(): | |||
Raphaël Gomès
|
r48624 | # Censored revisions don't contain metadata, so they cannot be affected | ||
metadata_cache[filerev] = False | ||||
return False | ||||
r48627 | p1, p2 = parent_revs() | |||
Raphaël Gomès
|
r48624 | if p1 == nullrev or p2 != nullrev: | ||
Joerg Sonnenberger
|
r52807 | metadata_cache[filerev] = True | ||
Raphaël Gomès
|
r48624 | return False | ||
r48627 | delta_parent = delta_base() | |||
Raphaël Gomès
|
r48624 | parent_has_metadata = metadata_cache.get(delta_parent) | ||
if parent_has_metadata is None: | ||||
Joerg Sonnenberger
|
r52808 | if deltabase_parentrevs is not None: | ||
deltabase_parentrevs = deltabase_parentrevs() | ||||
if deltabase_parentrevs == (nullrev, nullrev): | ||||
# Need to check the content itself as there is no flag. | ||||
parent_has_metadata = None | ||||
elif deltabase_parentrevs[0] == nullrev: | ||||
# Second parent is !null, assume repository is correct | ||||
# and has flagged this file revision as having metadata. | ||||
parent_has_metadata = True | ||||
elif deltabase_parentrevs[1] == nullrev: | ||||
# First parent is !null, so assume it has no metadata. | ||||
parent_has_metadata = False | ||||
if parent_has_metadata is None: | ||||
return _is_revision_affected_inner( | ||||
full_text, | ||||
parent_revs, | ||||
filerev, | ||||
metadata_cache, | ||||
) | ||||
Raphaël Gomès
|
r48624 | |||
r48627 | chunk = delta() | |||
Raphaël Gomès
|
r48624 | if not len(chunk): | ||
# No diff for this revision | ||||
Joerg Sonnenberger
|
r52805 | metadata_cache[filerev] = parent_has_metadata | ||
Raphaël Gomès
|
r48624 | return parent_has_metadata | ||
header_length = 12 | ||||
if len(chunk) < header_length: | ||||
raise error.Abort(_(b"patch cannot be decoded")) | ||||
start, _end, _length = struct.unpack(b">lll", chunk[:header_length]) | ||||
if start < 2: # len(b'\x01\n') == 2 | ||||
# This delta does *something* to the metadata marker (if any). | ||||
# Check it the slow way | ||||
r48627 | is_affected = _is_revision_affected_inner( | |||
full_text, | ||||
parent_revs, | ||||
filerev, | ||||
metadata_cache, | ||||
) | ||||
Raphaël Gomès
|
r48624 | return is_affected | ||
# The diff did not remove or add the metadata header, it's then in the same | ||||
# situation as its parent | ||||
metadata_cache[filerev] = parent_has_metadata | ||||
return parent_has_metadata | ||||
Raphaël Gomès
|
r48623 | def _from_report(ui, repo, context, from_report, dry_run): | ||
""" | ||||
Fix the revisions given in the `from_report` file, but still checks if the | ||||
revisions are indeed affected to prevent an unfortunate cyclic situation | ||||
where we'd swap well-ordered parents again. | ||||
See the doc for `debug_fix_issue6528` for the format documentation. | ||||
""" | ||||
ui.write(_(b"loading report file '%s'\n") % from_report) | ||||
with context(), open(from_report, mode='rb') as f: | ||||
for line in f.read().split(b'\n'): | ||||
if not line: | ||||
continue | ||||
filenodes, filename = line.split(b' ', 1) | ||||
fl = _filelog_from_filename(repo, filename) | ||||
to_fix = set( | ||||
fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',') | ||||
) | ||||
excluded = set() | ||||
for filerev in to_fix: | ||||
Raphaël Gomès
|
r48624 | if _is_revision_affected(fl, filerev): | ||
Raphaël Gomès
|
r48623 | msg = b"found affected revision %d for filelog '%s'\n" | ||
ui.warn(msg % (filerev, filename)) | ||||
else: | ||||
msg = _(b"revision %s of file '%s' is not affected\n") | ||||
msg %= (binascii.hexlify(fl.node(filerev)), filename) | ||||
ui.warn(msg) | ||||
excluded.add(filerev) | ||||
to_fix = to_fix - excluded | ||||
if not to_fix: | ||||
msg = _(b"no affected revisions were found for '%s'\n") | ||||
ui.write(msg % filename) | ||||
continue | ||||
if not dry_run: | ||||
_reorder_filelog_parents(repo, fl, sorted(to_fix)) | ||||
r48629 | def filter_delta_issue6528(revlog, deltas_iter): | |||
"""filter incomind deltas to repaire issue 6528 on the fly""" | ||||
Joerg Sonnenberger
|
r52806 | metadata_cache = {nullrev: False} | ||
r48629 | ||||
deltacomputer = deltas.deltacomputer(revlog) | ||||
for rev, d in enumerate(deltas_iter, len(revlog)): | ||||
( | ||||
node, | ||||
p1_node, | ||||
p2_node, | ||||
linknode, | ||||
deltabase, | ||||
delta, | ||||
flags, | ||||
sidedata, | ||||
) = d | ||||
if not revlog.index.has_node(deltabase): | ||||
raise error.LookupError( | ||||
deltabase, revlog.radix, _(b'unknown parent') | ||||
) | ||||
base_rev = revlog.rev(deltabase) | ||||
if not revlog.index.has_node(p1_node): | ||||
raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent')) | ||||
p1_rev = revlog.rev(p1_node) | ||||
if not revlog.index.has_node(p2_node): | ||||
raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent')) | ||||
p2_rev = revlog.rev(p2_node) | ||||
is_censored = lambda: bool(flags & REVIDX_ISCENSORED) | ||||
delta_base = lambda: base_rev | ||||
parent_revs = lambda: (p1_rev, p2_rev) | ||||
Joerg Sonnenberger
|
r52808 | deltabase_parentrevs = lambda: revlog.parentrevs(base_rev) | ||
r48629 | ||||
def full_text(): | ||||
# note: being able to reuse the full text computation in the | ||||
# underlying addrevision would be useful however this is a bit too | ||||
# intrusive the for the "quick" issue6528 we are writing before the | ||||
# 5.8 release | ||||
textlen = mdiff.patchedsize(revlog.size(base_rev), delta) | ||||
revinfo = revlogutils.revisioninfo( | ||||
node, | ||||
p1_node, | ||||
p2_node, | ||||
[None], | ||||
textlen, | ||||
(base_rev, delta), | ||||
flags, | ||||
) | ||||
r51913 | return deltacomputer.buildtext(revinfo) | |||
r48629 | ||||
is_affected = _is_revision_affected_fast_inner( | ||||
is_censored, | ||||
delta_base, | ||||
lambda: delta, | ||||
full_text, | ||||
parent_revs, | ||||
Joerg Sonnenberger
|
r52808 | deltabase_parentrevs, | ||
r48629 | rev, | |||
metadata_cache, | ||||
) | ||||
if is_affected: | ||||
d = ( | ||||
node, | ||||
p2_node, | ||||
p1_node, | ||||
linknode, | ||||
deltabase, | ||||
delta, | ||||
flags, | ||||
sidedata, | ||||
) | ||||
yield d | ||||
Raphaël Gomès
|
r48625 | def repair_issue6528( | ||
ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False | ||||
): | ||||
Raphaël Gomès
|
r48623 | @contextlib.contextmanager | ||
def context(): | ||||
if dry_run or to_report: # No need for locking | ||||
yield | ||||
else: | ||||
with repo.wlock(), repo.lock(): | ||||
yield | ||||
if from_report: | ||||
return _from_report(ui, repo, context, from_report, dry_run) | ||||
report_entries = [] | ||||
with context(): | ||||
files = list( | ||||
r51364 | entry | |||
r51397 | for entry in repo.store.data_entries() | |||
r51393 | if entry.is_revlog and entry.is_filelog | |||
Raphaël Gomès
|
r48623 | ) | ||
progress = ui.makeprogress( | ||||
_(b"looking for affected revisions"), | ||||
unit=_(b"filelogs"), | ||||
total=len(files), | ||||
) | ||||
found_nothing = True | ||||
r51364 | for entry in files: | |||
Raphaël Gomès
|
r48623 | progress.increment() | ||
r51379 | filename = entry.target_id | |||
fl = _filelog_from_filename(repo, entry.target_id) | ||||
Raphaël Gomès
|
r48623 | |||
# Set of filerevs (or hex filenodes if `to_report`) that need fixing | ||||
to_fix = set() | ||||
Joerg Sonnenberger
|
r52806 | metadata_cache = {nullrev: False} | ||
Raphaël Gomès
|
r48623 | for filerev in fl.revs(): | ||
Raphaël Gomès
|
r48624 | affected = _is_revision_affected_fast( | ||
repo, fl, filerev, metadata_cache | ||||
) | ||||
Raphaël Gomès
|
r48625 | if paranoid: | ||
slow = _is_revision_affected(fl, filerev) | ||||
if slow != affected: | ||||
msg = _(b"paranoid check failed for '%s' at node %s") | ||||
node = binascii.hexlify(fl.node(filerev)) | ||||
raise error.Abort(msg % (filename, node)) | ||||
Raphaël Gomès
|
r48623 | if affected: | ||
r51379 | msg = b"found affected revision %d for file '%s'\n" | |||
ui.warn(msg % (filerev, filename)) | ||||
Raphaël Gomès
|
r48623 | found_nothing = False | ||
if not dry_run: | ||||
if to_report: | ||||
to_fix.add(binascii.hexlify(fl.node(filerev))) | ||||
else: | ||||
to_fix.add(filerev) | ||||
if to_fix: | ||||
to_fix = sorted(to_fix) | ||||
if to_report: | ||||
report_entries.append((filename, to_fix)) | ||||
else: | ||||
_reorder_filelog_parents(repo, fl, to_fix) | ||||
if found_nothing: | ||||
ui.write(_(b"no affected revisions were found\n")) | ||||
if to_report and report_entries: | ||||
with open(to_report, mode="wb") as f: | ||||
for path, to_fix in report_entries: | ||||
f.write(b"%s %s\n" % (b",".join(to_fix), path)) | ||||
progress.complete() | ||||