##// END OF EJS Templates
dirstate-map: factor out the change to _dirs and _alldirs on adding...
dirstate-map: factor out the change to _dirs and _alldirs on adding This logic is complicated enough to deserves its own function. So it now does. This will make it easier to reuse that logic in later changeset. Differential Revision: https://phab.mercurial-scm.org/D11129

File last commit:

r48265:5045ba2a default
r48487:e59bd672 default
Show More
rewrite.py
474 lines | 15.4 KiB | text/x-python | PythonLexer
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 # censor code related to censoring revision
# coding: utf8
#
# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
# Copyright 2015 Google, Inc <martinvonz@google.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import contextlib
import os
from ..node import (
nullrev,
)
from .constants import (
COMP_MODE_PLAIN,
ENTRY_DATA_COMPRESSED_LENGTH,
ENTRY_DATA_COMPRESSION_MODE,
ENTRY_DATA_OFFSET,
ENTRY_DATA_UNCOMPRESSED_LENGTH,
ENTRY_DELTA_BASE,
ENTRY_LINK_REV,
ENTRY_NODE_ID,
ENTRY_PARENT_1,
ENTRY_PARENT_2,
ENTRY_SIDEDATA_COMPRESSED_LENGTH,
ENTRY_SIDEDATA_COMPRESSION_MODE,
ENTRY_SIDEDATA_OFFSET,
REVLOGV0,
REVLOGV1,
)
from ..i18n import _
from .. import (
error,
pycompat,
revlogutils,
util,
)
from ..utils import (
storageutil,
)
from . import (
constants,
deltas,
)
def v1_censor(rl, tr, censornode, tombstone=b''):
"""censors a revision in a "version 1" revlog"""
assert rl._format_version == constants.REVLOGV1, rl._format_version
# avoid cycle
from .. import revlog
censorrev = rl.rev(censornode)
tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
# Rewriting the revlog in place is hard. Our strategy for censoring is
# to create a new revlog, copy all revisions to it, then replace the
# revlogs on transaction close.
#
# This is a bit dangerous. We could easily have a mismatch of state.
newrl = revlog.revlog(
rl.opener,
target=rl.target,
radix=rl.radix,
postfix=b'tmpcensored',
censorable=True,
)
newrl._format_version = rl._format_version
newrl._format_flags = rl._format_flags
newrl._generaldelta = rl._generaldelta
newrl._parse_index = rl._parse_index
for rev in rl.revs():
node = rl.node(rev)
p1, p2 = rl.parents(node)
if rev == censorrev:
newrl.addrawrevision(
tombstone,
tr,
rl.linkrev(censorrev),
p1,
p2,
censornode,
constants.REVIDX_ISCENSORED,
)
if newrl.deltaparent(rev) != nullrev:
m = _(b'censored revision stored as delta; cannot censor')
h = _(
b'censoring of revlogs is not fully implemented;'
b' please report this bug'
)
raise error.Abort(m, hint=h)
continue
if rl.iscensored(rev):
if rl.deltaparent(rev) != nullrev:
m = _(
b'cannot censor due to censored '
b'revision having delta stored'
)
raise error.Abort(m)
rawtext = rl._chunk(rev)
else:
rawtext = rl.rawdata(rev)
newrl.addrawrevision(
rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
)
tr.addbackup(rl._indexfile, location=b'store')
if not rl._inline:
tr.addbackup(rl._datafile, location=b'store')
rl.opener.rename(newrl._indexfile, rl._indexfile)
if not rl._inline:
rl.opener.rename(newrl._datafile, rl._datafile)
rl.clearcaches()
rl._loadindex()
censor: rename `rl` to `revlog` in the main function...
r48263 def v2_censor(revlog, tr, censornode, tombstone=b''):
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 """censors a revision in a "version 2" revlog"""
censor: split the core of the logic into its own function...
r48265 assert revlog._format_version != REVLOGV0, revlog._format_version
assert revlog._format_version != REVLOGV1, revlog._format_version
censor_revs = {revlog.rev(censornode)}
_rewrite_v2(revlog, tr, censor_revs, tombstone)
def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
"""rewrite a revlog to censor some of its content
General principle
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: split the core of the logic into its own function...
r48265 We create new revlog files (index/data/sidedata) to copy the content of
the existing data without the censored data.
We need to recompute new delta for any revision that used the censored
revision as delta base. As the cumulative size of the new delta may be
large, we store them in a temporary file until they are stored in their
final destination.
All data before the censored data can be blindly copied. The rest needs
to be copied as we go and the associated index entry needs adjustement.
"""
censor: rename `rl` to `revlog` in the main function...
r48263 assert revlog._format_version != REVLOGV0, revlog._format_version
assert revlog._format_version != REVLOGV1, revlog._format_version
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: rename `rl` to `revlog` in the main function...
r48263 old_index = revlog.index
docket = revlog._docket
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
censor: migrate the logic to a set of `censor_revs`...
r48264 first_excl_rev = min(censor_revs)
first_excl_entry = revlog.index[first_excl_rev]
index_cutoff = revlog.index.entry_size * first_excl_rev
data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
censor: extract the part about recomputing delta in a function...
r48261 # rev → (new_base, data_start, data_end, compression_mode)
rewritten_entries = _precompute_rewritten_delta(
censor: rename `rl` to `revlog` in the main function...
r48263 revlog,
censor: extract the part about recomputing delta in a function...
r48261 old_index,
censor: migrate the logic to a set of `censor_revs`...
r48264 censor_revs,
censor: extract the part about recomputing delta in a function...
r48261 tmp_storage,
)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about creating and opening new files in a function...
r48262 all_files = _setup_new_files(
censor: rename `rl` to `revlog` in the main function...
r48263 revlog,
censor: extract the part about creating and opening new files in a function...
r48262 index_cutoff,
data_cutoff,
sidedata_cutoff,
revlog: rewrite `censors.py` to `rewrite.py`...
r48257 )
# we dont need to open the old index file since its content already
# exist in a usable form in `old_index`.
censor: put the tuple of open files in an explicit variable...
r48258 with all_files() as open_files:
(
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
) = open_files
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the censored revision in a function...
r48259 # writing the censored revision
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 # Writing all subsequent revisions
censor: migrate the logic to a set of `censor_revs`...
r48264 for rev in range(first_excl_rev, len(old_index)):
if rev in censor_revs:
_rewrite_censor(
revlog,
old_index,
open_files,
rev,
tombstone,
)
else:
_rewrite_simple(
revlog,
old_index,
open_files,
rev,
rewritten_entries,
tmp_storage,
)
censor: extract the part about writing the other revision in a function...
r48260 docket.write(transaction=None, stripping=True)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about recomputing delta in a function...
r48261 def _precompute_rewritten_delta(
revlog,
old_index,
excluded_revs,
tmp_storage,
):
"""Compute new delta for revisions whose delta is based on revision that
will not survive as is.
Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
"""
dc = deltas.deltacomputer(revlog)
rewritten_entries = {}
first_excl_rev = min(excluded_revs)
with revlog._segmentfile._open_read() as dfh:
for rev in range(first_excl_rev, len(old_index)):
if rev in excluded_revs:
# this revision will be preserved as is, so we don't need to
# consider recomputing a delta.
continue
entry = old_index[rev]
if entry[ENTRY_DELTA_BASE] not in excluded_revs:
continue
# This is a revision that use the censored revision as the base
# for its delta. We need a need new deltas
if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
# this revision is empty, we can delta against nullrev
rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
else:
text = revlog.rawdata(rev, _df=dfh)
info = revlogutils.revisioninfo(
node=entry[ENTRY_NODE_ID],
p1=revlog.node(entry[ENTRY_PARENT_1]),
p2=revlog.node(entry[ENTRY_PARENT_2]),
btext=[text],
textlen=len(text),
cachedelta=None,
flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
)
d = dc.finddeltainfo(
info, dfh, excluded_bases=excluded_revs, target_rev=rev
)
default_comp = revlog._docket.default_compression_header
comp_mode, d = deltas.delta_compression(default_comp, d)
# using `tell` is a bit lazy, but we are not here for speed
start = tmp_storage.tell()
tmp_storage.write(d.data[1])
end = tmp_storage.tell()
rewritten_entries[rev] = (d.base, start, end, comp_mode)
return rewritten_entries
censor: extract the part about creating and opening new files in a function...
r48262 def _setup_new_files(
revlog,
index_cutoff,
data_cutoff,
sidedata_cutoff,
):
"""
return a context manager to open all the relevant files:
- old_data_file,
- old_sidedata_file,
- new_index_file,
- new_data_file,
- new_sidedata_file,
The old_index_file is not here because it is accessed through the
`old_index` object if the caller function.
"""
docket = revlog._docket
old_index_filepath = revlog.opener.join(docket.index_filepath())
old_data_filepath = revlog.opener.join(docket.data_filepath())
old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
new_index_filepath = revlog.opener.join(docket.new_index_file())
new_data_filepath = revlog.opener.join(docket.new_data_file())
new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
util.copyfile(
old_sidedata_filepath,
new_sidedata_filepath,
nb_bytes=sidedata_cutoff,
)
revlog.opener.register_file(docket.index_filepath())
revlog.opener.register_file(docket.data_filepath())
revlog.opener.register_file(docket.sidedata_filepath())
docket.index_end = index_cutoff
docket.data_end = data_cutoff
docket.sidedata_end = sidedata_cutoff
# reload the revlog internal information
revlog.clearcaches()
revlog._loadindex(docket=docket)
@contextlib.contextmanager
def all_files_opener():
# hide opening in an helper function to please check-code, black
# and various python version at the same time
with open(old_data_filepath, 'rb') as old_data_file:
with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
with open(new_index_filepath, 'r+b') as new_index_file:
with open(new_data_filepath, 'r+b') as new_data_file:
with open(
new_sidedata_filepath, 'r+b'
) as new_sidedata_file:
new_index_file.seek(0, os.SEEK_END)
assert new_index_file.tell() == index_cutoff
new_data_file.seek(0, os.SEEK_END)
assert new_data_file.tell() == data_cutoff
new_sidedata_file.seek(0, os.SEEK_END)
assert new_sidedata_file.tell() == sidedata_cutoff
yield (
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
)
return all_files_opener
censor: extract the part about writing the other revision in a function...
r48260 def _rewrite_simple(
revlog,
old_index,
all_files,
rev,
rewritten_entries,
tmp_storage,
):
"""append a normal revision to the index after the rewritten one(s)"""
(
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
) = all_files
entry = old_index[rev]
flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 if rev not in rewritten_entries:
old_data_file.seek(old_data_offset)
new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
new_data = old_data_file.read(new_data_size)
data_delta_base = entry[ENTRY_DELTA_BASE]
d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
else:
(
data_delta_base,
start,
end,
d_comp_mode,
) = rewritten_entries[rev]
new_data_size = end - start
tmp_storage.seek(start)
new_data = tmp_storage.read(new_data_size)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 # It might be faster to group continuous read/write operation,
# however, this is censor, an operation that is not focussed
# around stellar performance. So I have not written this
# optimisation yet.
new_data_offset = new_data_file.tell()
new_data_file.write(new_data)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
new_sidedata_offset = new_sidedata_file.tell()
if 0 < sidedata_size:
old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
old_sidedata_file.seek(old_sidedata_offset)
new_sidedata = old_sidedata_file.read(sidedata_size)
new_sidedata_file.write(new_sidedata)
data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
assert data_delta_base <= rev, (data_delta_base, rev)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 new_entry = revlogutils.entry(
flags=flags,
data_offset=new_data_offset,
data_compressed_length=new_data_size,
data_uncompressed_length=data_uncompressed_length,
data_delta_base=data_delta_base,
link_rev=entry[ENTRY_LINK_REV],
parent_rev_1=entry[ENTRY_PARENT_1],
parent_rev_2=entry[ENTRY_PARENT_2],
node_id=entry[ENTRY_NODE_ID],
sidedata_offset=new_sidedata_offset,
sidedata_compressed_length=sidedata_size,
data_compression_mode=d_comp_mode,
sidedata_compression_mode=sd_com_mode,
)
revlog.index.append(new_entry)
entry_bin = revlog.index.entry_binary(rev)
new_index_file.write(entry_bin)
revlog: rewrite `censors.py` to `rewrite.py`...
r48257
censor: extract the part about writing the other revision in a function...
r48260 revlog._docket.index_end = new_index_file.tell()
revlog._docket.data_end = new_data_file.tell()
revlog._docket.sidedata_end = new_sidedata_file.tell()
censor: extract the part about writing the censored revision in a function...
r48259
def _rewrite_censor(
revlog,
old_index,
all_files,
rev,
tombstone,
):
"""rewrite and append a censored revision"""
(
old_data_file,
old_sidedata_file,
new_index_file,
new_data_file,
new_sidedata_file,
) = all_files
entry = old_index[rev]
# XXX consider trying the default compression too
new_data_size = len(tombstone)
new_data_offset = new_data_file.tell()
new_data_file.write(tombstone)
# we are not adding any sidedata as they might leak info about the censored version
link_rev = entry[ENTRY_LINK_REV]
p1 = entry[ENTRY_PARENT_1]
p2 = entry[ENTRY_PARENT_2]
new_entry = revlogutils.entry(
flags=constants.REVIDX_ISCENSORED,
data_offset=new_data_offset,
data_compressed_length=new_data_size,
data_uncompressed_length=new_data_size,
data_delta_base=rev,
link_rev=link_rev,
parent_rev_1=p1,
parent_rev_2=p2,
node_id=entry[ENTRY_NODE_ID],
sidedata_offset=0,
sidedata_compressed_length=0,
data_compression_mode=COMP_MODE_PLAIN,
sidedata_compression_mode=COMP_MODE_PLAIN,
)
revlog.index.append(new_entry)
entry_bin = revlog.index.entry_binary(rev)
new_index_file.write(entry_bin)
revlog._docket.index_end = new_index_file.tell()
revlog._docket.data_end = new_data_file.tell()