sidedata.py
175 lines
| 6.2 KiB
| text/x-python
|
PythonLexer
r43301 | # sidedata.py - Logic around store extra data alongside revlog revisions | |||
# | ||||
# Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net) | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
"""core code for "sidedata" support | ||||
The "sidedata" are stored alongside the revision without actually being part of | ||||
its content and not affecting its hash. It's main use cases is to cache | ||||
important information related to a changesets. | ||||
The current implementation is experimental and subject to changes. Do not rely | ||||
on it in production. | ||||
Raphaël Gomès
|
r47443 | Sidedata are stored in the revlog itself, thanks to a new version of the | ||
revlog. The following format is currently used:: | ||||
r43301 | ||||
initial header: | ||||
<number of sidedata; 2 bytes> | ||||
sidedata (repeated N times): | ||||
<sidedata-key; 2 bytes> | ||||
<sidedata-entry-length: 4 bytes> | ||||
<sidedata-content-sha1-digest: 20 bytes> | ||||
<sidedata-content; X bytes> | ||||
normal raw text: | ||||
<all bytes remaining in the rawtext> | ||||
Joerg Sonnenberger
|
r46811 | This is a simple and effective format. It should be enough to experiment with | ||
r43301 | the concept. | |||
""" | ||||
Matt Harbison
|
r52755 | from __future__ import annotations | ||
r43302 | ||||
Raphaël Gomès
|
r47848 | import collections | ||
r43302 | import struct | |||
Raphaël Gomès
|
r47848 | from .. import error, requirements as requirementsmod | ||
from ..revlogutils import constants, flagutil | ||||
Augie Fackler
|
r44517 | from ..utils import hashutil | ||
r43302 | ||||
r43308 | ## sidedata type constant | |||
# reserve a block for testing purposes. | ||||
SD_TEST1 = 1 | ||||
SD_TEST2 = 2 | ||||
SD_TEST3 = 3 | ||||
SD_TEST4 = 4 | ||||
SD_TEST5 = 5 | ||||
SD_TEST6 = 6 | ||||
SD_TEST7 = 7 | ||||
r43412 | # key to store copies related information | |||
SD_P1COPIES = 8 | ||||
SD_P2COPIES = 9 | ||||
SD_FILESADDED = 10 | ||||
SD_FILESREMOVED = 11 | ||||
r46211 | SD_FILES = 12 | |||
r43412 | ||||
r43308 | # internal format constant | |||
Augie Fackler
|
r43906 | SIDEDATA_HEADER = struct.Struct('>H') | ||
SIDEDATA_ENTRY = struct.Struct('>HL20s') | ||||
r43302 | ||||
Augie Fackler
|
r43346 | |||
Raphaël Gomès
|
r47443 | def serialize_sidedata(sidedata): | ||
r43303 | sidedata = list(sidedata.items()) | |||
sidedata.sort() | ||||
Raphaël Gomès
|
r47443 | buf = [SIDEDATA_HEADER.pack(len(sidedata))] | ||
r43303 | for key, value in sidedata: | |||
Augie Fackler
|
r44517 | digest = hashutil.sha1(value).digest() | ||
Raphaël Gomès
|
r47443 | buf.append(SIDEDATA_ENTRY.pack(key, len(value), digest)) | ||
r43303 | for key, value in sidedata: | |||
Raphaël Gomès
|
r47443 | buf.append(value) | ||
buf = b''.join(buf) | ||||
return buf | ||||
r43303 | ||||
Augie Fackler
|
r43346 | |||
Raphaël Gomès
|
r47443 | def deserialize_sidedata(blob): | ||
r43302 | sidedata = {} | |||
offset = 0 | ||||
Raphaël Gomès
|
r47443 | (nbentry,) = SIDEDATA_HEADER.unpack(blob[: SIDEDATA_HEADER.size]) | ||
r43302 | offset += SIDEDATA_HEADER.size | |||
dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry) | ||||
for i in range(nbentry): | ||||
nextoffset = offset + SIDEDATA_ENTRY.size | ||||
Raphaël Gomès
|
r47443 | key, size, storeddigest = SIDEDATA_ENTRY.unpack(blob[offset:nextoffset]) | ||
r43302 | offset = nextoffset | |||
# read the data associated with that entry | ||||
nextdataoffset = dataoffset + size | ||||
Raphaël Gomès
|
r47443 | entrytext = bytes(blob[dataoffset:nextdataoffset]) | ||
Augie Fackler
|
r44517 | readdigest = hashutil.sha1(entrytext).digest() | ||
r43302 | if storeddigest != readdigest: | |||
raise error.SidedataHashError(key, storeddigest, readdigest) | ||||
sidedata[key] = entrytext | ||||
dataoffset = nextdataoffset | ||||
Raphaël Gomès
|
r47443 | return sidedata | ||
Raphaël Gomès
|
r47848 | |||
def get_sidedata_helpers(repo, remote_sd_categories, pull=False): | ||||
Raphaël Gomès
|
r47849 | """ | ||
Returns a dictionary mapping revlog types to tuples of | ||||
`(repo, computers, removers)`: | ||||
* `repo` is used as an argument for computers | ||||
* `computers` is a list of `(category, (keys, computer, flags)` that | ||||
compute the missing sidedata categories that were asked: | ||||
* `category` is the sidedata category | ||||
* `keys` are the sidedata keys to be affected | ||||
* `flags` is a bitmask (an integer) of flags to remove when | ||||
removing the category. | ||||
* `computer` is the function `(repo, store, rev, sidedata)` that | ||||
returns a tuple of | ||||
`(new sidedata dict, (flags to add, flags to remove))`. | ||||
For example, it will return `({}, (0, 1 << 15))` to return no | ||||
sidedata, with no flags to add and one flag to remove. | ||||
* `removers` will remove the keys corresponding to the categories | ||||
that are present, but not needed. | ||||
If both `computers` and `removers` are empty, sidedata will simply not | ||||
be transformed. | ||||
""" | ||||
Raphaël Gomès
|
r47848 | # Computers for computing sidedata on-the-fly | ||
sd_computers = collections.defaultdict(list) | ||||
# Computers for categories to remove from sidedata | ||||
sd_removers = collections.defaultdict(list) | ||||
to_generate = remote_sd_categories - repo._wanted_sidedata | ||||
to_remove = repo._wanted_sidedata - remote_sd_categories | ||||
if pull: | ||||
to_generate, to_remove = to_remove, to_generate | ||||
for revlog_kind, computers in repo._sidedata_computers.items(): | ||||
for category, computer in computers.items(): | ||||
if category in to_generate: | ||||
sd_computers[revlog_kind].append(computer) | ||||
if category in to_remove: | ||||
sd_removers[revlog_kind].append(computer) | ||||
sidedata_helpers = (repo, sd_computers, sd_removers) | ||||
return sidedata_helpers | ||||
def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev): | ||||
"""Returns the sidedata for the given revision after running through | ||||
the given helpers. | ||||
- `store`: the revlog this applies to (changelog, manifest, or filelog | ||||
instance) | ||||
Raphaël Gomès
|
r47849 | - `sidedata_helpers`: see `get_sidedata_helpers` | ||
Raphaël Gomès
|
r47848 | - `sidedata`: previous sidedata at the given rev, if any | ||
- `rev`: affected rev of `store` | ||||
""" | ||||
repo, sd_computers, sd_removers = sidedata_helpers | ||||
kind = store.revlog_kind | ||||
flags_to_add = 0 | ||||
flags_to_remove = 0 | ||||
for _keys, sd_computer, _flags in sd_computers.get(kind, []): | ||||
sidedata, flags = sd_computer(repo, store, rev, sidedata) | ||||
flags_to_add |= flags[0] | ||||
flags_to_remove |= flags[1] | ||||
for keys, _computer, flags in sd_removers.get(kind, []): | ||||
for key in keys: | ||||
sidedata.pop(key, None) | ||||
flags_to_remove |= flags | ||||
return sidedata, (flags_to_add, flags_to_remove) | ||||
def set_sidedata_spec_for_repo(repo): | ||||
# prevent cycle metadata -> revlogutils.sidedata -> metadata | ||||
from .. import metadata | ||||
if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements: | ||||
repo.register_wanted_sidedata(SD_FILES) | ||||
repo.register_sidedata_computer( | ||||
constants.KIND_CHANGELOG, | ||||
SD_FILES, | ||||
(SD_FILES,), | ||||
metadata.copies_sidedata_computer, | ||||
flagutil.REVIDX_HASCOPIESINFO, | ||||
) | ||||