store.py
1276 lines
| 37.8 KiB
| text/x-python
|
PythonLexer
/ mercurial / store.py
r51573 | # store.py - repository store handling for Mercurial) | |||
Adrian Buehlmann
|
r6839 | # | ||
Raphaël Gomès
|
r47575 | # Copyright 2008 Olivia Mackall <olivia@selenic.com> | ||
Adrian Buehlmann
|
r6839 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Adrian Buehlmann
|
r6839 | |||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
r51372 | import collections | |||
Pulkit Goyal
|
r42144 | import functools | ||
Gregory Szorc
|
r27480 | import os | ||
r47752 | import re | |||
Gregory Szorc
|
r27480 | import stat | ||
Matt Harbison
|
r52622 | import typing | ||
Matt Harbison
|
r52775 | from typing import ( | ||
Generator, | ||||
List, | ||||
Optional, | ||||
) | ||||
Gregory Szorc
|
r27480 | |||
from .i18n import _ | ||||
r51364 | from .thirdparty import attr | |||
Matt Harbison
|
r52622 | |||
# Force pytype to use the non-vendored package | ||||
if typing.TYPE_CHECKING: | ||||
# noinspection PyPackageRequirements | ||||
import attr | ||||
Joerg Sonnenberger
|
r46729 | from .node import hex | ||
Arseniy Alekseyev
|
r51565 | from .revlogutils.constants import ( | ||
INDEX_HEADER, | ||||
r51573 | KIND_CHANGELOG, | |||
KIND_FILELOG, | ||||
KIND_MANIFESTLOG, | ||||
Arseniy Alekseyev
|
r51565 | ) | ||
Gregory Szorc
|
r27480 | from . import ( | ||
Augie Fackler
|
r43175 | changelog, | ||
Gregory Szorc
|
r27480 | error, | ||
r51520 | filelog, | |||
Augie Fackler
|
r43175 | manifest, | ||
Yuya Nishihara
|
r32372 | policy, | ||
Mateusz Kwapich
|
r30077 | pycompat, | ||
Arseniy Alekseyev
|
r51565 | revlog as revlogmod, | ||
Gregory Szorc
|
r27480 | util, | ||
Pierre-Yves David
|
r31234 | vfs as vfsmod, | ||
Gregory Szorc
|
r27480 | ) | ||
Augie Fackler
|
r44517 | from .utils import hashutil | ||
Adrian Buehlmann
|
r6840 | |||
Augie Fackler
|
r43906 | parsers = policy.importmod('parsers') | ||
Pulkit Goyal
|
r42144 | # how much bytes should be read from fncache in one read | ||
# It is done to prevent loading large fncache files into memory | ||||
Raphaël Gomès
|
r52596 | fncache_chunksize = 10**6 | ||
Yuya Nishihara
|
r32372 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52621 | def _match_tracked_entry(entry: "BaseStoreEntry", matcher): | ||
Pulkit Goyal
|
r40529 | """parses a fncache entry and returns whether the entry is tracking a path | ||
matched by matcher or not. | ||||
If matcher is None, returns True""" | ||||
if matcher is None: | ||||
return True | ||||
Matt Harbison
|
r52621 | |||
# TODO: make this safe for other entry types. Currently, the various | ||||
# store.data_entry generators only yield RevlogStoreEntry, so the | ||||
# attributes do exist on `entry`. | ||||
# pytype: disable=attribute-error | ||||
r51391 | if entry.is_filelog: | |||
r51387 | return matcher(entry.target_id) | |||
r51391 | elif entry.is_manifestlog: | |||
r51387 | return matcher.visitdir(entry.target_id.rstrip(b'/')) | |||
Matt Harbison
|
r52621 | # pytype: enable=attribute-error | ||
r51387 | raise error.ProgrammingError(b"cannot process entry %r" % entry) | |||
Pulkit Goyal
|
r40658 | |||
Augie Fackler
|
r43346 | |||
Benoit Boissinot
|
r8531 | # This avoids a collision between a file named foo and a dir named | ||
# foo.i or foo.d | ||||
Adrian Buehlmann
|
r17607 | def _encodedir(path): | ||
Augie Fackler
|
r46554 | """ | ||
Yuya Nishihara
|
r34133 | >>> _encodedir(b'data/foo.i') | ||
Adrian Buehlmann
|
r13949 | 'data/foo.i' | ||
Yuya Nishihara
|
r34133 | >>> _encodedir(b'data/foo.i/bla.i') | ||
Adrian Buehlmann
|
r13949 | 'data/foo.i.hg/bla.i' | ||
Yuya Nishihara
|
r34133 | >>> _encodedir(b'data/foo.i.hg/bla.i') | ||
Adrian Buehlmann
|
r13949 | 'data/foo.i.hg.hg/bla.i' | ||
Yuya Nishihara
|
r34133 | >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n') | ||
Adrian Buehlmann
|
r17605 | 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n' | ||
Augie Fackler
|
r46554 | """ | ||
Augie Fackler
|
r43346 | return ( | ||
Augie Fackler
|
r43347 | path.replace(b".hg/", b".hg.hg/") | ||
.replace(b".i/", b".i.hg/") | ||||
.replace(b".d/", b".d.hg/") | ||||
Augie Fackler
|
r43346 | ) | ||
Benoit Boissinot
|
r8531 | |||
Adrian Buehlmann
|
r17607 | encodedir = getattr(parsers, 'encodedir', _encodedir) | ||
Augie Fackler
|
r43346 | |||
Benoit Boissinot
|
r8531 | def decodedir(path): | ||
Augie Fackler
|
r46554 | """ | ||
Yuya Nishihara
|
r34133 | >>> decodedir(b'data/foo.i') | ||
Adrian Buehlmann
|
r13949 | 'data/foo.i' | ||
Yuya Nishihara
|
r34133 | >>> decodedir(b'data/foo.i.hg/bla.i') | ||
Adrian Buehlmann
|
r13949 | 'data/foo.i/bla.i' | ||
Yuya Nishihara
|
r34133 | >>> decodedir(b'data/foo.i.hg.hg/bla.i') | ||
Adrian Buehlmann
|
r13949 | 'data/foo.i.hg/bla.i' | ||
Augie Fackler
|
r46554 | """ | ||
Augie Fackler
|
r43347 | if b".hg/" not in path: | ||
Benoit Boissinot
|
r8531 | return path | ||
Augie Fackler
|
r43346 | return ( | ||
Augie Fackler
|
r43347 | path.replace(b".d.hg/", b".d/") | ||
.replace(b".i.hg/", b".i/") | ||||
.replace(b".hg.hg/", b".hg/") | ||||
Augie Fackler
|
r43346 | ) | ||
Benoit Boissinot
|
r8531 | |||
timeless
|
r29071 | def _reserved(): | ||
Augie Fackler
|
r46554 | """characters that are problematic for filesystems | ||
timeless
|
r29071 | |||
* ascii escapes (0..31) | ||||
* ascii hi (126..255) | ||||
* windows specials | ||||
these characters will be escaped by encodefunctions | ||||
Augie Fackler
|
r46554 | """ | ||
Mateusz Kwapich
|
r30076 | winreserved = [ord(x) for x in u'\\:*?"<>|'] | ||
timeless
|
r29071 | for x in range(32): | ||
yield x | ||||
for x in range(126, 256): | ||||
yield x | ||||
for x in winreserved: | ||||
yield x | ||||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r6839 | def _buildencodefun(): | ||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r13949 | >>> enc, dec = _buildencodefun() | ||
Yuya Nishihara
|
r34133 | >>> enc(b'nothing/special.txt') | ||
Adrian Buehlmann
|
r13949 | 'nothing/special.txt' | ||
Yuya Nishihara
|
r34133 | >>> dec(b'nothing/special.txt') | ||
Adrian Buehlmann
|
r13949 | 'nothing/special.txt' | ||
Yuya Nishihara
|
r34133 | >>> enc(b'HELLO') | ||
Adrian Buehlmann
|
r13949 | '_h_e_l_l_o' | ||
Yuya Nishihara
|
r34133 | >>> dec(b'_h_e_l_l_o') | ||
Adrian Buehlmann
|
r13949 | 'HELLO' | ||
Yuya Nishihara
|
r34133 | >>> enc(b'hello:world?') | ||
Adrian Buehlmann
|
r13949 | 'hello~3aworld~3f' | ||
Yuya Nishihara
|
r34133 | >>> dec(b'hello~3aworld~3f') | ||
Adrian Buehlmann
|
r13949 | 'hello:world?' | ||
Yuya Nishihara
|
r34138 | >>> enc(b'the\\x07quick\\xADshot') | ||
Adrian Buehlmann
|
r13949 | 'the~07quick~adshot' | ||
Yuya Nishihara
|
r34133 | >>> dec(b'the~07quick~adshot') | ||
Adrian Buehlmann
|
r13949 | 'the\\x07quick\\xadshot' | ||
Augie Fackler
|
r46554 | """ | ||
Augie Fackler
|
r43347 | e = b'_' | ||
Yuya Nishihara
|
r31253 | xchr = pycompat.bytechr | ||
asciistr = list(map(xchr, range(127))) | ||||
Augie Fackler
|
r43347 | capitals = list(range(ord(b"A"), ord(b"Z") + 1)) | ||
Mateusz Kwapich
|
r30077 | |||
Augie Fackler
|
r44937 | cmap = {x: x for x in asciistr} | ||
timeless
|
r29071 | for x in _reserved(): | ||
Augie Fackler
|
r43347 | cmap[xchr(x)] = b"~%02x" % x | ||
Mateusz Kwapich
|
r30077 | for x in capitals + [ord(e)]: | ||
cmap[xchr(x)] = e + xchr(x).lower() | ||||
Adrian Buehlmann
|
r6839 | dmap = {} | ||
Gregory Szorc
|
r49768 | for k, v in cmap.items(): | ||
Adrian Buehlmann
|
r6839 | dmap[v] = k | ||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r6839 | def decode(s): | ||
i = 0 | ||||
while i < len(s): | ||||
Manuel Jacob
|
r50179 | for l in range(1, 4): | ||
Adrian Buehlmann
|
r6839 | try: | ||
Augie Fackler
|
r43346 | yield dmap[s[i : i + l]] | ||
Adrian Buehlmann
|
r6839 | i += l | ||
break | ||||
except KeyError: | ||||
pass | ||||
else: | ||||
raise KeyError | ||||
Augie Fackler
|
r43346 | |||
return ( | ||||
Manuel Jacob
|
r50179 | lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]), | ||
Augie Fackler
|
r43347 | lambda s: b''.join(list(decode(s))), | ||
Augie Fackler
|
r43346 | ) | ||
Adrian Buehlmann
|
r17608 | |||
_encodefname, _decodefname = _buildencodefun() | ||||
Adrian Buehlmann
|
r6839 | |||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r17608 | def encodefilename(s): | ||
Augie Fackler
|
r46554 | """ | ||
Yuya Nishihara
|
r34133 | >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO') | ||
Adrian Buehlmann
|
r17608 | 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o' | ||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r17608 | return _encodefname(encodedir(s)) | ||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r17608 | def decodefilename(s): | ||
Augie Fackler
|
r46554 | """ | ||
Yuya Nishihara
|
r34133 | >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o') | ||
Adrian Buehlmann
|
r17608 | 'foo.i/bar.d/bla.hg/hi:world?/HELLO' | ||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r17608 | return decodedir(_decodefname(s)) | ||
Adrian Buehlmann
|
r6839 | |||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r14288 | def _buildlowerencodefun(): | ||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r14288 | >>> f = _buildlowerencodefun() | ||
Yuya Nishihara
|
r34133 | >>> f(b'nothing/special.txt') | ||
Adrian Buehlmann
|
r13949 | 'nothing/special.txt' | ||
Yuya Nishihara
|
r34133 | >>> f(b'HELLO') | ||
Adrian Buehlmann
|
r13949 | 'hello' | ||
Yuya Nishihara
|
r34133 | >>> f(b'hello:world?') | ||
Adrian Buehlmann
|
r13949 | 'hello~3aworld~3f' | ||
Yuya Nishihara
|
r34138 | >>> f(b'the\\x07quick\\xADshot') | ||
Adrian Buehlmann
|
r13949 | 'the~07quick~adshot' | ||
Augie Fackler
|
r46554 | """ | ||
Yuya Nishihara
|
r34211 | xchr = pycompat.bytechr | ||
Manuel Jacob
|
r50179 | cmap = {xchr(x): xchr(x) for x in range(127)} | ||
timeless
|
r29071 | for x in _reserved(): | ||
Augie Fackler
|
r43347 | cmap[xchr(x)] = b"~%02x" % x | ||
for x in range(ord(b"A"), ord(b"Z") + 1): | ||||
Yuya Nishihara
|
r34211 | cmap[xchr(x)] = xchr(x).lower() | ||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r34210 | def lowerencode(s): | ||
Augie Fackler
|
r43347 | return b"".join([cmap[c] for c in pycompat.iterbytestr(s)]) | ||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r34210 | return lowerencode | ||
Adrian Buehlmann
|
r7229 | |||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r18430 | lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun() | ||
Adrian Buehlmann
|
r7229 | |||
Adrian Buehlmann
|
r17570 | # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9 | ||
Augie Fackler
|
r43347 | _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3 | ||
_winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9) | ||||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r12687 | def _auxencode(path, dotencode): | ||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r13949 | Encodes filenames containing names reserved by Windows or which end in | ||
period or space. Does not touch other single reserved characters c. | ||||
Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here. | ||||
Additionally encodes space or period at the beginning, if dotencode is | ||||
Adrian Buehlmann
|
r17569 | True. Parameter path is assumed to be all lowercase. | ||
A segment only needs encoding if a reserved name appears as a | ||||
basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux" | ||||
doesn't need encoding. | ||||
Adrian Buehlmann
|
r13949 | |||
Yuya Nishihara
|
r34133 | >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.' | ||
>>> _auxencode(s.split(b'/'), True) | ||||
Adrian Buehlmann
|
r17574 | ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e'] | ||
Yuya Nishihara
|
r34133 | >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.' | ||
>>> _auxencode(s.split(b'/'), False) | ||||
Adrian Buehlmann
|
r17574 | ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e'] | ||
Yuya Nishihara
|
r34133 | >>> _auxencode([b'foo. '], True) | ||
Adrian Buehlmann
|
r17574 | ['foo.~20'] | ||
Yuya Nishihara
|
r34133 | >>> _auxencode([b' .foo'], True) | ||
Adrian Buehlmann
|
r17574 | ['~20.foo'] | ||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r17589 | for i, n in enumerate(path): | ||
Adrian Buehlmann
|
r17572 | if not n: | ||
continue | ||||
Augie Fackler
|
r43347 | if dotencode and n[0] in b'. ': | ||
n = b"~%02x" % ord(n[0:1]) + n[1:] | ||||
Adrian Buehlmann
|
r17589 | path[i] = n | ||
Adrian Buehlmann
|
r17572 | else: | ||
Augie Fackler
|
r43347 | l = n.find(b'.') | ||
Adrian Buehlmann
|
r17572 | if l == -1: | ||
l = len(n) | ||||
Augie Fackler
|
r43346 | if (l == 3 and n[:3] in _winres3) or ( | ||
Augie Fackler
|
r43347 | l == 4 | ||
and n[3:4] <= b'9' | ||||
and n[3:4] >= b'1' | ||||
and n[:3] in _winres4 | ||||
Augie Fackler
|
r43346 | ): | ||
Adrian Buehlmann
|
r17572 | # encode third letter ('aux' -> 'au~78') | ||
Augie Fackler
|
r43347 | ec = b"~%02x" % ord(n[2:3]) | ||
Adrian Buehlmann
|
r17572 | n = n[0:2] + ec + n[3:] | ||
Adrian Buehlmann
|
r17589 | path[i] = n | ||
Augie Fackler
|
r43347 | if n[-1] in b'. ': | ||
Adrian Buehlmann
|
r17572 | # encode last period or space ('foo...' -> 'foo..~2e') | ||
Augie Fackler
|
r43347 | path[i] = n[:-1] + b"~%02x" % ord(n[-1:]) | ||
Adrian Buehlmann
|
r17589 | return path | ||
Adrian Buehlmann
|
r7229 | |||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r14288 | _maxstorepathlen = 120 | ||
_dirprefixlen = 8 | ||||
_maxshortdirslen = 8 * (_dirprefixlen + 1) - 4 | ||||
Bryan O'Sullivan
|
r17610 | |||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17610 | def _hashencode(path, dotencode): | ||
Joerg Sonnenberger
|
r46729 | digest = hex(hashutil.sha1(path).digest()) | ||
Augie Fackler
|
r43347 | le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/' | ||
Bryan O'Sullivan
|
r17610 | parts = _auxencode(le, dotencode) | ||
basename = parts[-1] | ||||
_root, ext = os.path.splitext(basename) | ||||
sdirs = [] | ||||
sdirslen = 0 | ||||
for p in parts[:-1]: | ||||
d = p[:_dirprefixlen] | ||||
Augie Fackler
|
r43347 | if d[-1] in b'. ': | ||
Bryan O'Sullivan
|
r17610 | # Windows can't access dirs ending in period or space | ||
Augie Fackler
|
r43347 | d = d[:-1] + b'_' | ||
Bryan O'Sullivan
|
r17610 | if sdirslen == 0: | ||
t = len(d) | ||||
else: | ||||
t = sdirslen + 1 + len(d) | ||||
if t > _maxshortdirslen: | ||||
break | ||||
sdirs.append(d) | ||||
sdirslen = t | ||||
Augie Fackler
|
r43347 | dirs = b'/'.join(sdirs) | ||
Bryan O'Sullivan
|
r17610 | if len(dirs) > 0: | ||
Augie Fackler
|
r43347 | dirs += b'/' | ||
res = b'dh/' + dirs + digest + ext | ||||
Bryan O'Sullivan
|
r17610 | spaceleft = _maxstorepathlen - len(res) | ||
if spaceleft > 0: | ||||
filler = basename[:spaceleft] | ||||
Augie Fackler
|
r43347 | res = b'dh/' + dirs + filler + digest + ext | ||
Bryan O'Sullivan
|
r17610 | return res | ||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r17590 | def _hybridencode(path, dotencode): | ||
Augie Fackler
|
r46554 | """encodes path with a length limit | ||
Adrian Buehlmann
|
r7229 | |||
Encodes all paths that begin with 'data/', according to the following. | ||||
Default encoding (reversible): | ||||
Encodes all uppercase letters 'X' as '_x'. All reserved or illegal | ||||
characters are encoded as '~xx', where xx is the two digit hex code | ||||
of the character (see encodefilename). | ||||
Relevant path components consisting of Windows reserved filenames are | ||||
Mads Kiilerich
|
r17738 | masked by encoding the third character ('aux' -> 'au~78', see _auxencode). | ||
Adrian Buehlmann
|
r7229 | |||
Hashed encoding (not reversible): | ||||
Adrian Buehlmann
|
r14288 | If the default-encoded path is longer than _maxstorepathlen, a | ||
Adrian Buehlmann
|
r7229 | non-reversible hybrid hashing of the path is done instead. | ||
Adrian Buehlmann
|
r14288 | This encoding uses up to _dirprefixlen characters of all directory | ||
Adrian Buehlmann
|
r7229 | levels of the lowerencoded path, but not more levels than can fit into | ||
Adrian Buehlmann
|
r14288 | _maxshortdirslen. | ||
Adrian Buehlmann
|
r7229 | Then follows the filler followed by the sha digest of the full path. | ||
The filler is the beginning of the basename of the lowerencoded path | ||||
(the basename is everything after the last path separator). The filler | ||||
is as long as possible, filling in characters from the basename until | ||||
Adrian Buehlmann
|
r14288 | the encoded path has _maxstorepathlen characters (or all chars of the | ||
basename have been taken). | ||||
Adrian Buehlmann
|
r7229 | The extension (e.g. '.i' or '.d') is preserved. | ||
The string 'data/' at the beginning is replaced with 'dh/', if the hashed | ||||
encoding was used. | ||||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r17609 | path = encodedir(path) | ||
Augie Fackler
|
r43347 | ef = _encodefname(path).split(b'/') | ||
res = b'/'.join(_auxencode(ef, dotencode)) | ||||
Adrian Buehlmann
|
r14288 | if len(res) > _maxstorepathlen: | ||
Bryan O'Sullivan
|
r17610 | res = _hashencode(path, dotencode) | ||
Adrian Buehlmann
|
r7229 | return res | ||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r17624 | def _pathencode(path): | ||
Bryan O'Sullivan
|
r18435 | de = encodedir(path) | ||
Adrian Buehlmann
|
r17693 | if len(path) > _maxstorepathlen: | ||
Bryan O'Sullivan
|
r18435 | return _hashencode(de, True) | ||
Augie Fackler
|
r43347 | ef = _encodefname(de).split(b'/') | ||
res = b'/'.join(_auxencode(ef, True)) | ||||
Adrian Buehlmann
|
r17624 | if len(res) > _maxstorepathlen: | ||
Bryan O'Sullivan
|
r18435 | return _hashencode(de, True) | ||
Adrian Buehlmann
|
r17624 | return res | ||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r17624 | _pathencode = getattr(parsers, 'pathencode', _pathencode) | ||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r17623 | def _plainhybridencode(f): | ||
return _hybridencode(f, False) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r17726 | def _calcmode(vfs): | ||
Matt Mackall
|
r6898 | try: | ||
# files in .hg/ will be created using this mode | ||||
FUJIWARA Katsunori
|
r17726 | mode = vfs.stat().st_mode | ||
Augie Fackler
|
r43346 | # avoid some useless chmods | ||
Gregory Szorc
|
r25658 | if (0o777 & ~util.umask) == (0o777 & mode): | ||
Matt Mackall
|
r6898 | mode = None | ||
except OSError: | ||||
mode = None | ||||
return mode | ||||
Augie Fackler
|
r43346 | |||
Pulkit Goyal
|
r45911 | _data = [ | ||
b'bookmarks', | ||||
b'narrowspec', | ||||
b'data', | ||||
b'meta', | ||||
b'00manifest.d', | ||||
b'00manifest.i', | ||||
b'00changelog.d', | ||||
b'00changelog.i', | ||||
b'phaseroots', | ||||
b'obsstore', | ||||
Pulkit Goyal
|
r46055 | b'requires', | ||
Pulkit Goyal
|
r45911 | ] | ||
Augie Fackler
|
r43346 | |||
r51564 | REVLOG_FILES_EXT = ( | |||
b'.i', | ||||
r48115 | b'.idx', | |||
b'.d', | ||||
b'.dat', | ||||
b'.n', | ||||
b'.nd', | ||||
r48181 | b'.sda', | |||
r48115 | ) | |||
r51372 | # file extension that also use a `-SOMELONGIDHASH.ext` form | |||
REVLOG_FILES_LONG_EXT = ( | ||||
b'.nd', | ||||
b'.idx', | ||||
b'.dat', | ||||
b'.sda', | ||||
) | ||||
r47751 | # files that are "volatile" and might change between listing and streaming | |||
# | ||||
# note: the ".nd" file are nodemap data and won't "change" but they might be | ||||
# deleted. | ||||
REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd') | ||||
r47657 | ||||
r47752 | # some exception to the above matching | |||
r48459 | # | |||
# XXX This is currently not in use because of issue6542 | ||||
Mads Kiilerich
|
r51245 | EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$') | ||
r47752 | ||||
r47657 | ||||
def is_revlog(f, kind, st): | ||||
if kind != stat.S_IFREG: | ||||
r51564 | return False | |||
if f.endswith(REVLOG_FILES_EXT): | ||||
return True | ||||
return False | ||||
r47657 | ||||
r51564 | def is_revlog_file(f): | |||
if f.endswith(REVLOG_FILES_EXT): | ||||
return True | ||||
return False | ||||
r47112 | ||||
Matt Mackall
|
r6903 | |||
r51531 | @attr.s(slots=True) | |||
class StoreFile: | ||||
"""a file matching a store entry""" | ||||
unencoded_path = attr.ib() | ||||
_file_size = attr.ib(default=None) | ||||
is_volatile = attr.ib(default=False) | ||||
r53256 | # Missing file can be safely ignored, used by "copy/hardlink" local clone | |||
# for cache file not covered by lock. | ||||
optional = False | ||||
r51531 | ||||
def file_size(self, vfs): | ||||
if self._file_size is None: | ||||
r51532 | if vfs is None: | |||
msg = b"calling vfs-less file_size without prior call: %s" | ||||
msg %= self.unencoded_path | ||||
raise error.ProgrammingError(msg) | ||||
r51531 | try: | |||
self._file_size = vfs.stat(self.unencoded_path).st_size | ||||
except FileNotFoundError: | ||||
self._file_size = 0 | ||||
return self._file_size | ||||
r52440 | @property | |||
def has_size(self): | ||||
return self._file_size is not None | ||||
r52909 | def get_stream(self, vfs, volatiles): | |||
r51532 | """return data "stream" information for this file | |||
(unencoded_file_path, content_iterator, content_size) | ||||
""" | ||||
size = self.file_size(None) | ||||
def get_stream(): | ||||
r52910 | path = vfs.join(self.unencoded_path) | |||
with volatiles.open(path) as fp: | ||||
r51532 | yield None # ready to stream | |||
if size <= 65536: | ||||
yield fp.read(size) | ||||
else: | ||||
yield from util.filechunkiter(fp, limit=size) | ||||
s = get_stream() | ||||
next(s) | ||||
return (self.unencoded_path, s, size) | ||||
r51531 | ||||
r51367 | @attr.s(slots=True, init=False) | |||
r51366 | class BaseStoreEntry: | |||
r51364 | """An entry in the store | |||
This is returned by `store.walk` and represent some data in the store.""" | ||||
r52445 | maybe_volatile = True | |||
r51531 | def files(self) -> List[StoreFile]: | |||
raise NotImplementedError | ||||
r51533 | def get_streams( | |||
self, | ||||
repo=None, | ||||
vfs=None, | ||||
r52909 | volatiles=None, | |||
r51533 | max_changeset=None, | |||
Arseniy Alekseyev
|
r51599 | preserve_file_count=False, | ||
r51533 | ): | |||
r51532 | """return a list of data stream associated to files for this entry | |||
return [(unencoded_file_path, content_iterator, content_size), …] | ||||
""" | ||||
assert vfs is not None | ||||
r52909 | return [f.get_stream(vfs, volatiles) for f in self.files()] | |||
r51532 | ||||
r53254 | def preserve_volatiles(self, vfs, volatiles): | |||
"""Use a VolatileManager to preserve the state of any volatile file | ||||
This is useful for code that need a consistent view of the content like stream clone. | ||||
""" | ||||
if self.maybe_volatile: | ||||
for f in self.files(): | ||||
if f.is_volatile: | ||||
volatiles(vfs.join(f.unencoded_path)) | ||||
r51389 | ||||
@attr.s(slots=True, init=False) | ||||
class SimpleStoreEntry(BaseStoreEntry): | ||||
"""A generic entry in the store""" | ||||
is_revlog = False | ||||
r52445 | maybe_volatile = attr.ib() | |||
r51388 | _entry_path = attr.ib() | |||
r51369 | _is_volatile = attr.ib(default=False) | |||
r51368 | _file_size = attr.ib(default=None) | |||
r51524 | _files = attr.ib(default=None) | |||
r51364 | ||||
r51367 | def __init__( | |||
self, | ||||
r51388 | entry_path, | |||
r51367 | is_volatile=False, | |||
file_size=None, | ||||
): | ||||
r51389 | super().__init__() | |||
r51388 | self._entry_path = entry_path | |||
r51369 | self._is_volatile = is_volatile | |||
r51368 | self._file_size = file_size | |||
r51524 | self._files = None | |||
r52445 | self.maybe_volatile = is_volatile | |||
r51367 | ||||
r51531 | def files(self) -> List[StoreFile]: | |||
r51524 | if self._files is None: | |||
self._files = [ | ||||
StoreFile( | ||||
unencoded_path=self._entry_path, | ||||
file_size=self._file_size, | ||||
is_volatile=self._is_volatile, | ||||
) | ||||
] | ||||
return self._files | ||||
r51365 | ||||
r51367 | @attr.s(slots=True, init=False) | |||
r51366 | class RevlogStoreEntry(BaseStoreEntry): | |||
"""A revlog entry in the store""" | ||||
is_revlog = True | ||||
r51389 | ||||
r51366 | revlog_type = attr.ib(default=None) | |||
r51376 | target_id = attr.ib(default=None) | |||
r52445 | maybe_volatile = attr.ib(default=True) | |||
r51389 | _path_prefix = attr.ib(default=None) | |||
_details = attr.ib(default=None) | ||||
r51524 | _files = attr.ib(default=None) | |||
r51366 | ||||
r51367 | def __init__( | |||
self, | ||||
revlog_type, | ||||
r51389 | path_prefix, | |||
r51376 | target_id, | |||
r51389 | details, | |||
r51367 | ): | |||
r51389 | super().__init__() | |||
r51367 | self.revlog_type = revlog_type | |||
r51376 | self.target_id = target_id | |||
r51389 | self._path_prefix = path_prefix | |||
assert b'.i' in details, (path_prefix, details) | ||||
r52445 | for ext in details: | |||
if ext.endswith(REVLOG_FILES_VOLATILE_EXT): | ||||
self.maybe_volatile = True | ||||
break | ||||
else: | ||||
self.maybe_volatile = False | ||||
r51389 | self._details = details | |||
r51524 | self._files = None | |||
r51367 | ||||
r51390 | @property | |||
def is_changelog(self): | ||||
r51573 | return self.revlog_type == KIND_CHANGELOG | |||
r51390 | ||||
@property | ||||
def is_manifestlog(self): | ||||
r51573 | return self.revlog_type == KIND_MANIFESTLOG | |||
r51390 | ||||
@property | ||||
def is_filelog(self): | ||||
r51573 | return self.revlog_type == KIND_FILELOG | |||
r51390 | ||||
r51385 | def main_file_path(self): | |||
"""unencoded path of the main revlog file""" | ||||
r51389 | return self._path_prefix + b'.i' | |||
r51531 | def files(self) -> List[StoreFile]: | |||
r51524 | if self._files is None: | |||
self._files = [] | ||||
for ext in sorted(self._details, key=_ext_key): | ||||
path = self._path_prefix + ext | ||||
r51563 | file_size = self._details[ext] | |||
r51561 | # files that are "volatile" and might change between | |||
# listing and streaming | ||||
# | ||||
# note: the ".nd" file are nodemap data and won't "change" | ||||
# but they might be deleted. | ||||
volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT) | ||||
r51562 | f = StoreFile(path, file_size, volatile) | |||
r51561 | self._files.append(f) | |||
r51524 | return self._files | |||
r51385 | ||||
r51533 | def get_streams( | |||
self, | ||||
repo=None, | ||||
vfs=None, | ||||
r52909 | volatiles=None, | |||
r51533 | max_changeset=None, | |||
Arseniy Alekseyev
|
r51599 | preserve_file_count=False, | ||
r51533 | ): | |||
r52440 | pre_sized = all(f.has_size for f in self.files()) | |||
if pre_sized and ( | ||||
Arseniy Alekseyev
|
r51565 | repo is None | ||
or max_changeset is None | ||||
r51533 | # This use revlog-v2, ignore for now | |||
Arseniy Alekseyev
|
r51565 | or any(k.endswith(b'.idx') for k in self._details.keys()) | ||
# This is not inline, no race expected | ||||
or b'.d' in self._details | ||||
): | ||||
r51533 | return super().get_streams( | |||
repo=repo, | ||||
vfs=vfs, | ||||
r52909 | volatiles=volatiles, | |||
r51533 | max_changeset=max_changeset, | |||
Arseniy Alekseyev
|
r51599 | preserve_file_count=preserve_file_count, | ||
r51533 | ) | |||
Arseniy Alekseyev
|
r51599 | elif not preserve_file_count: | ||
stream = [ | ||||
r52909 | f.get_stream(vfs, volatiles) | |||
Arseniy Alekseyev
|
r51599 | for f in self.files() | ||
if not f.unencoded_path.endswith((b'.i', b'.d')) | ||||
] | ||||
rl = self.get_revlog_instance(repo).get_revlog() | ||||
rl_stream = rl.get_streams(max_changeset) | ||||
stream.extend(rl_stream) | ||||
return stream | ||||
Arseniy Alekseyev
|
r51565 | |||
r51533 | name_to_size = {} | |||
for f in self.files(): | ||||
name_to_size[f.unencoded_path] = f.file_size(None) | ||||
Arseniy Alekseyev
|
r51565 | |||
r51533 | stream = [ | |||
r52909 | f.get_stream(vfs, volatiles) | |||
r51533 | for f in self.files() | |||
Arseniy Alekseyev
|
r51565 | if not f.unencoded_path.endswith(b'.i') | ||
r51533 | ] | |||
Arseniy Alekseyev
|
r51565 | index_path = self._path_prefix + b'.i' | ||
r51534 | ||||
Arseniy Alekseyev
|
r51565 | index_file = None | ||
try: | ||||
index_file = vfs(index_path) | ||||
header = index_file.read(INDEX_HEADER.size) | ||||
if revlogmod.revlog.is_inline_index(header): | ||||
size = name_to_size[index_path] | ||||
r51534 | ||||
Arseniy Alekseyev
|
r51565 | # no split underneath, just return the stream | ||
def get_stream(): | ||||
fp = index_file | ||||
try: | ||||
fp.seek(0) | ||||
yield None | ||||
if size <= 65536: | ||||
yield fp.read(size) | ||||
else: | ||||
yield from util.filechunkiter(fp, limit=size) | ||||
finally: | ||||
fp.close() | ||||
s = get_stream() | ||||
next(s) | ||||
index_file = None | ||||
stream.append((index_path, s, size)) | ||||
else: | ||||
rl = self.get_revlog_instance(repo).get_revlog() | ||||
rl_stream = rl.get_streams(max_changeset, force_inline=True) | ||||
for name, s, size in rl_stream: | ||||
if name_to_size.get(name, 0) != size: | ||||
msg = _(b"expected %d bytes but %d provided for %s") | ||||
msg %= name_to_size.get(name, 0), size, name | ||||
raise error.Abort(msg) | ||||
stream.extend(rl_stream) | ||||
finally: | ||||
if index_file is not None: | ||||
index_file.close() | ||||
r51533 | files = self.files() | |||
assert len(stream) == len(files), ( | ||||
stream, | ||||
files, | ||||
self._path_prefix, | ||||
self.target_id, | ||||
) | ||||
return stream | ||||
r51520 | def get_revlog_instance(self, repo): | |||
"""Obtain a revlog instance from this store entry | ||||
An instance of the appropriate class is returned. | ||||
""" | ||||
if self.is_changelog: | ||||
return changelog.changelog(repo.svfs) | ||||
elif self.is_manifestlog: | ||||
r51521 | mandir = self.target_id | |||
r51520 | return manifest.manifestrevlog( | |||
repo.nodeconstants, repo.svfs, tree=mandir | ||||
) | ||||
else: | ||||
return filelog.filelog(repo.svfs, self.target_id) | ||||
r51366 | ||||
r51372 | def _gather_revlog(files_data): | |||
"""group files per revlog prefix | ||||
The returns a two level nested dict. The top level key is the revlog prefix | ||||
without extension, the second level is all the file "suffix" that were | ||||
seen for this revlog and arbitrary file data as value. | ||||
""" | ||||
revlogs = collections.defaultdict(dict) | ||||
for u, value in files_data: | ||||
name, ext = _split_revlog_ext(u) | ||||
revlogs[name][ext] = value | ||||
return sorted(revlogs.items()) | ||||
def _split_revlog_ext(filename): | ||||
"""split the revlog file prefix from the variable extension""" | ||||
if filename.endswith(REVLOG_FILES_LONG_EXT): | ||||
char = b'-' | ||||
else: | ||||
char = b'.' | ||||
idx = filename.rfind(char) | ||||
return filename[:idx], filename[idx:] | ||||
r51374 | def _ext_key(ext): | |||
"""a key to order revlog suffix | ||||
important to issue .i after other entry.""" | ||||
# the only important part of this order is to keep the `.i` last. | ||||
if ext.endswith(b'.n'): | ||||
return (0, ext) | ||||
elif ext.endswith(b'.nd'): | ||||
return (10, ext) | ||||
elif ext.endswith(b'.d'): | ||||
return (20, ext) | ||||
elif ext.endswith(b'.i'): | ||||
return (50, ext) | ||||
else: | ||||
return (40, ext) | ||||
Gregory Szorc
|
r49801 | class basicstore: | ||
Adrian Buehlmann
|
r6840 | '''base class for local repository stores''' | ||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r17651 | def __init__(self, path, vfstype): | ||
FUJIWARA Katsunori
|
r17724 | vfs = vfstype(path) | ||
self.path = vfs.base | ||||
FUJIWARA Katsunori
|
r17726 | self.createmode = _calcmode(vfs) | ||
FUJIWARA Katsunori
|
r17652 | vfs.createmode = self.createmode | ||
FUJIWARA Katsunori
|
r17728 | self.rawvfs = vfs | ||
Pierre-Yves David
|
r31234 | self.vfs = vfsmod.filtervfs(vfs, encodedir) | ||
FUJIWARA Katsunori
|
r17653 | self.opener = self.vfs | ||
Adrian Buehlmann
|
r6840 | |||
def join(self, f): | ||||
Augie Fackler
|
r43347 | return self.path + b'/' + encodedir(f) | ||
Adrian Buehlmann
|
r6840 | |||
r51386 | def _walk(self, relpath, recurse, undecodable=None): | |||
Valentin Gatien-Baron
|
r48691 | '''yields (revlog_type, unencoded, size)''' | ||
Adrian Buehlmann
|
r13426 | path = self.path | ||
if relpath: | ||||
Augie Fackler
|
r43347 | path += b'/' + relpath | ||
Adrian Buehlmann
|
r13426 | striplen = len(self.path) + 1 | ||
Matt Mackall
|
r6899 | l = [] | ||
FUJIWARA Katsunori
|
r17728 | if self.rawvfs.isdir(path): | ||
Matt Mackall
|
r6899 | visit = [path] | ||
FUJIWARA Katsunori
|
r17747 | readdir = self.rawvfs.readdir | ||
Matt Mackall
|
r6899 | while visit: | ||
p = visit.pop() | ||||
FUJIWARA Katsunori
|
r17747 | for f, kind, st in readdir(p, stat=True): | ||
Augie Fackler
|
r43347 | fp = p + b'/' + f | ||
r51564 | if is_revlog(f, kind, st): | |||
Matt Mackall
|
r6900 | n = util.pconvert(fp[striplen:]) | ||
r51564 | l.append((decodedir(n), st.st_size)) | |||
Matt Mackall
|
r6899 | elif kind == stat.S_IFDIR and recurse: | ||
visit.append(fp) | ||||
r51371 | ||||
Bryan O'Sullivan
|
r17054 | l.sort() | ||
return l | ||||
Adrian Buehlmann
|
r6840 | |||
Kyle Lippincott
|
r47349 | def changelog(self, trypending, concurrencychecker=None): | ||
return changelog.changelog( | ||||
self.vfs, | ||||
trypending=trypending, | ||||
concurrencychecker=concurrencychecker, | ||||
) | ||||
Augie Fackler
|
r43175 | |||
Matt Harbison
|
r52963 | def manifestlog(self, repo, storenarrowmatch) -> manifest.manifestlog: | ||
Joerg Sonnenberger
|
r47538 | rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs) | ||
Augie Fackler
|
r43346 | return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch) | ||
Augie Fackler
|
r43175 | |||
r51397 | def data_entries( | |||
r51364 | self, matcher=None, undecodable=None | |||
r51366 | ) -> Generator[BaseStoreEntry, None, None]: | |||
Valentin Gatien-Baron
|
r48691 | """Like walk, but excluding the changelog and root manifest. | ||
When [undecodable] is None, revlogs names that can't be | ||||
decoded cause an exception. When it is provided, it should | ||||
be a list and the filenames that can't be decoded are added | ||||
to it instead. This is very rarely needed.""" | ||||
r51375 | dirs = [ | |||
r51573 | (b'data', KIND_FILELOG, False), | |||
(b'meta', KIND_MANIFESTLOG, True), | ||||
r51375 | ] | |||
r51522 | for base_dir, rl_type, strip_filename in dirs: | |||
r51386 | files = self._walk(base_dir, True, undecodable=undecodable) | |||
r51375 | for revlog, details in _gather_revlog(files): | |||
r51389 | revlog_target_id = revlog.split(b'/', 1)[1] | |||
r51522 | if strip_filename and b'/' in revlog: | |||
revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0] | ||||
revlog_target_id += b'/' | ||||
r51389 | yield RevlogStoreEntry( | |||
path_prefix=revlog, | ||||
revlog_type=rl_type, | ||||
target_id=revlog_target_id, | ||||
r51564 | details=details, | |||
r51389 | ) | |||
Adrian Buehlmann
|
r6840 | |||
r51407 | def top_entries( | |||
self, phase=False, obsolescence=False | ||||
) -> Generator[BaseStoreEntry, None, None]: | ||||
r51406 | if phase and self.vfs.exists(b'phaseroots'): | |||
yield SimpleStoreEntry( | ||||
entry_path=b'phaseroots', | ||||
is_volatile=True, | ||||
) | ||||
r51407 | if obsolescence and self.vfs.exists(b'obsstore'): | |||
# XXX if we had the file size it could be non-volatile | ||||
yield SimpleStoreEntry( | ||||
entry_path=b'obsstore', | ||||
is_volatile=True, | ||||
) | ||||
r51374 | files = reversed(self._walk(b'', False)) | |||
changelogs = collections.defaultdict(dict) | ||||
manifestlogs = collections.defaultdict(dict) | ||||
r51564 | for u, s in files: | |||
r47657 | if u.startswith(b'00changelog'): | |||
r51374 | name, ext = _split_revlog_ext(u) | |||
r51564 | changelogs[name][ext] = s | |||
r47657 | elif u.startswith(b'00manifest'): | |||
r51374 | name, ext = _split_revlog_ext(u) | |||
r51564 | manifestlogs[name][ext] = s | |||
r47657 | else: | |||
r51366 | yield SimpleStoreEntry( | |||
r51388 | entry_path=u, | |||
r51564 | is_volatile=False, | |||
r51366 | file_size=s, | |||
) | ||||
r51374 | # yield manifest before changelog | |||
top_rl = [ | ||||
r51573 | (manifestlogs, KIND_MANIFESTLOG), | |||
(changelogs, KIND_CHANGELOG), | ||||
r51374 | ] | |||
assert len(manifestlogs) <= 1 | ||||
assert len(changelogs) <= 1 | ||||
for data, revlog_type in top_rl: | ||||
for revlog, details in sorted(data.items()): | ||||
r51389 | yield RevlogStoreEntry( | |||
path_prefix=revlog, | ||||
revlog_type=revlog_type, | ||||
target_id=b'', | ||||
r51564 | details=details, | |||
r51389 | ) | |||
Durham Goode
|
r19177 | |||
r51405 | def walk( | |||
r51407 | self, matcher=None, phase=False, obsolescence=False | |||
r51405 | ) -> Generator[BaseStoreEntry, None, None]: | |||
Arseniy Alekseyev
|
r51328 | """return files related to data storage (ie: revlogs) | ||
r47612 | ||||
r51405 | yields instance from BaseStoreEntry subclasses | |||
Pulkit Goyal
|
r40376 | |||
if a matcher is passed, storage files of only those tracked paths | ||||
are passed with matches the matcher | ||||
Augie Fackler
|
r46554 | """ | ||
Adrian Buehlmann
|
r6840 | # yield data files first | ||
r51397 | for x in self.data_entries(matcher): | |||
Adrian Buehlmann
|
r6840 | yield x | ||
r51407 | for x in self.top_entries(phase=phase, obsolescence=obsolescence): | |||
Adrian Buehlmann
|
r6840 | yield x | ||
Matt Mackall
|
r6903 | def copylist(self): | ||
Pulkit Goyal
|
r46055 | return _data | ||
Matt Mackall
|
r6903 | |||
Durham Goode
|
r20883 | def write(self, tr): | ||
Adrian Buehlmann
|
r13391 | pass | ||
Durham Goode
|
r20884 | def invalidatecaches(self): | ||
pass | ||||
Durham Goode
|
r20885 | def markremoved(self, fn): | ||
pass | ||||
smuralid
|
r17744 | def __contains__(self, path): | ||
'''Checks if the store contains path''' | ||||
Augie Fackler
|
r43347 | path = b"/".join((b"data", path)) | ||
smuralid
|
r17744 | # file? | ||
Augie Fackler
|
r43347 | if self.vfs.exists(path + b".i"): | ||
smuralid
|
r17744 | return True | ||
# dir? | ||||
Augie Fackler
|
r43347 | if not path.endswith(b"/"): | ||
path = path + b"/" | ||||
FUJIWARA Katsunori
|
r19903 | return self.vfs.exists(path) | ||
smuralid
|
r17744 | |||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r6898 | class encodedstore(basicstore): | ||
FUJIWARA Katsunori
|
r17651 | def __init__(self, path, vfstype): | ||
Augie Fackler
|
r43347 | vfs = vfstype(path + b'/store') | ||
FUJIWARA Katsunori
|
r17724 | self.path = vfs.base | ||
FUJIWARA Katsunori
|
r17726 | self.createmode = _calcmode(vfs) | ||
FUJIWARA Katsunori
|
r17652 | vfs.createmode = self.createmode | ||
FUJIWARA Katsunori
|
r17728 | self.rawvfs = vfs | ||
Pierre-Yves David
|
r31234 | self.vfs = vfsmod.filtervfs(vfs, encodefilename) | ||
FUJIWARA Katsunori
|
r17653 | self.opener = self.vfs | ||
Adrian Buehlmann
|
r6840 | |||
r51386 | def _walk(self, relpath, recurse, undecodable=None): | |||
old = super()._walk(relpath, recurse) | ||||
new = [] | ||||
for f1, value in old: | ||||
Adrian Buehlmann
|
r6892 | try: | ||
Valentin Gatien-Baron
|
r48691 | f2 = decodefilename(f1) | ||
Adrian Buehlmann
|
r6892 | except KeyError: | ||
Valentin Gatien-Baron
|
r48691 | if undecodable is None: | ||
msg = _(b'undecodable revlog name %s') % f1 | ||||
raise error.StorageError(msg) | ||||
else: | ||||
undecodable.append(f1) | ||||
continue | ||||
r51386 | new.append((f2, value)) | |||
return new | ||||
r51397 | def data_entries( | |||
r51386 | self, matcher=None, undecodable=None | |||
) -> Generator[BaseStoreEntry, None, None]: | ||||
r51397 | entries = super(encodedstore, self).data_entries( | |||
undecodable=undecodable | ||||
) | ||||
r51386 | for entry in entries: | |||
r51387 | if _match_tracked_entry(entry, matcher): | |||
r51386 | yield entry | |||
Adrian Buehlmann
|
r6840 | |||
def join(self, f): | ||||
Augie Fackler
|
r43347 | return self.path + b'/' + encodefilename(f) | ||
Adrian Buehlmann
|
r6840 | |||
Matt Mackall
|
r6903 | def copylist(self): | ||
Pulkit Goyal
|
r45911 | return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data] | ||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r6903 | |||
Gregory Szorc
|
r49801 | class fncache: | ||
Benoit Boissinot
|
r8531 | # the filename used to be partially encoded | ||
# hence the encodedir/decodedir dance | ||||
FUJIWARA Katsunori
|
r17722 | def __init__(self, vfs): | ||
self.vfs = vfs | ||||
r51241 | self._ignores = set() | |||
Adrian Buehlmann
|
r7229 | self.entries = None | ||
Adrian Buehlmann
|
r13391 | self._dirty = False | ||
Pulkit Goyal
|
r40767 | # set of new additions to fncache | ||
self.addls = set() | ||||
Adrian Buehlmann
|
r7229 | |||
Raphaël Gomès
|
r53066 | @property | ||
def is_loaded(self): | ||||
return self.entries is not None | ||||
Valentin Gatien-Baron
|
r42960 | def ensureloaded(self, warn=None): | ||
Augie Fackler
|
r46554 | """read the fncache file if not already read. | ||
Valentin Gatien-Baron
|
r42960 | |||
If the file on disk is corrupted, raise. If warn is provided, | ||||
Augie Fackler
|
r46554 | warn and keep going instead.""" | ||
Raphaël Gomès
|
r53066 | if not self.is_loaded: | ||
Valentin Gatien-Baron
|
r42960 | self._load(warn) | ||
def _load(self, warn=None): | ||||
Benoit Boissinot
|
r8530 | '''fill the entries from the fncache file''' | ||
Adrian Buehlmann
|
r13391 | self._dirty = False | ||
Benoit Boissinot
|
r8530 | try: | ||
Augie Fackler
|
r43347 | fp = self.vfs(b'fncache', mode=b'rb') | ||
Benoit Boissinot
|
r8530 | except IOError: | ||
# skip nonexistent file | ||||
Bryan O'Sullivan
|
r16404 | self.entries = set() | ||
Benoit Boissinot
|
r8530 | return | ||
Pulkit Goyal
|
r42144 | |||
self.entries = set() | ||||
chunk = b'' | ||||
for c in iter(functools.partial(fp.read, fncache_chunksize), b''): | ||||
chunk += c | ||||
try: | ||||
p = chunk.rindex(b'\n') | ||||
Augie Fackler
|
r43346 | self.entries.update(decodedir(chunk[: p + 1]).splitlines()) | ||
chunk = chunk[p + 1 :] | ||||
Pulkit Goyal
|
r42144 | except ValueError: | ||
# substring '\n' not found, maybe the entry is bigger than the | ||||
# chunksize, so let's keep iterating | ||||
pass | ||||
Pulkit Goyal
|
r42147 | if chunk: | ||
Augie Fackler
|
r43347 | msg = _(b"fncache does not ends with a newline") | ||
Valentin Gatien-Baron
|
r42960 | if warn: | ||
Augie Fackler
|
r43347 | warn(msg + b'\n') | ||
Valentin Gatien-Baron
|
r42960 | else: | ||
Augie Fackler
|
r43346 | raise error.Abort( | ||
msg, | ||||
hint=_( | ||||
Augie Fackler
|
r43347 | b"use 'hg debugrebuildfncache' to " | ||
b"rebuild the fncache" | ||||
Augie Fackler
|
r43346 | ), | ||
) | ||||
Valentin Gatien-Baron
|
r42960 | self._checkentries(fp, warn) | ||
Pulkit Goyal
|
r42139 | fp.close() | ||
Valentin Gatien-Baron
|
r42960 | def _checkentries(self, fp, warn): | ||
Kyle Lippincott
|
r47856 | """make sure there is no empty string in entries""" | ||
Augie Fackler
|
r43347 | if b'' in self.entries: | ||
Bryan O'Sullivan
|
r16404 | fp.seek(0) | ||
Gregory Szorc
|
r49796 | for n, line in enumerate(fp): | ||
Augie Fackler
|
r43347 | if not line.rstrip(b'\n'): | ||
t = _(b'invalid entry in fncache, line %d') % (n + 1) | ||||
Valentin Gatien-Baron
|
r42960 | if warn: | ||
Augie Fackler
|
r43347 | warn(t + b'\n') | ||
Valentin Gatien-Baron
|
r42960 | else: | ||
raise error.Abort(t) | ||||
Adrian Buehlmann
|
r7229 | |||
Durham Goode
|
r20883 | def write(self, tr): | ||
Bryan O'Sullivan
|
r16404 | if self._dirty: | ||
Raphaël Gomès
|
r53066 | assert self.is_loaded | ||
Pulkit Goyal
|
r40779 | self.entries = self.entries | self.addls | ||
self.addls = set() | ||||
Augie Fackler
|
r43347 | tr.addbackup(b'fncache') | ||
fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True) | ||||
Durham Goode
|
r20879 | if self.entries: | ||
Augie Fackler
|
r43347 | fp.write(encodedir(b'\n'.join(self.entries) + b'\n')) | ||
Durham Goode
|
r20879 | fp.close() | ||
self._dirty = False | ||||
Pulkit Goyal
|
r40767 | if self.addls: | ||
# if we have just new entries, let's append them to the fncache | ||||
Augie Fackler
|
r43347 | tr.addbackup(b'fncache') | ||
fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True) | ||||
Pulkit Goyal
|
r40767 | if self.addls: | ||
Augie Fackler
|
r43347 | fp.write(encodedir(b'\n'.join(self.addls) + b'\n')) | ||
Pulkit Goyal
|
r40767 | fp.close() | ||
self.entries = None | ||||
self.addls = set() | ||||
Benoit Boissinot
|
r8530 | |||
r51241 | def addignore(self, fn): | |||
self._ignores.add(fn) | ||||
Benoit Boissinot
|
r8530 | def add(self, fn): | ||
r51241 | if fn in self._ignores: | |||
return | ||||
Raphaël Gomès
|
r53066 | if not self.is_loaded: | ||
Benoit Boissinot
|
r8530 | self._load() | ||
Adrian Buehlmann
|
r10577 | if fn not in self.entries: | ||
Pulkit Goyal
|
r40767 | self.addls.add(fn) | ||
Benoit Boissinot
|
r8530 | |||
Durham Goode
|
r20885 | def remove(self, fn): | ||
Raphaël Gomès
|
r53066 | if not self.is_loaded: | ||
Durham Goode
|
r20885 | self._load() | ||
Pulkit Goyal
|
r40767 | if fn in self.addls: | ||
self.addls.remove(fn) | ||||
return | ||||
Durham Goode
|
r20885 | try: | ||
self.entries.remove(fn) | ||||
self._dirty = True | ||||
except KeyError: | ||||
pass | ||||
Adrian Buehlmann
|
r17782 | def __contains__(self, fn): | ||
Pulkit Goyal
|
r40767 | if fn in self.addls: | ||
return True | ||||
Raphaël Gomès
|
r53066 | if not self.is_loaded: | ||
Benoit Boissinot
|
r8530 | self._load() | ||
Adrian Buehlmann
|
r17782 | return fn in self.entries | ||
Benoit Boissinot
|
r8530 | |||
def __iter__(self): | ||||
Raphaël Gomès
|
r53066 | if not self.is_loaded: | ||
Benoit Boissinot
|
r8530 | self._load() | ||
Pulkit Goyal
|
r40767 | return iter(self.entries | self.addls) | ||
Adrian Buehlmann
|
r7229 | |||
Augie Fackler
|
r43346 | |||
Boris Feld
|
r41125 | class _fncachevfs(vfsmod.proxyvfs): | ||
FUJIWARA Katsunori
|
r17721 | def __init__(self, vfs, fnc, encode): | ||
Yuya Nishihara
|
r33412 | vfsmod.proxyvfs.__init__(self, vfs) | ||
Raphaël Gomès
|
r53067 | self.fncache: fncache = fnc | ||
Adrian Buehlmann
|
r14194 | self.encode = encode | ||
Raphaël Gomès
|
r53068 | self.uses_dotencode = encode is _pathencode | ||
Adrian Buehlmann
|
r14194 | |||
Augie Fackler
|
r43347 | def __call__(self, path, mode=b'r', *args, **kw): | ||
Martijn Pieters
|
r38683 | encoded = self.encode(path) | ||
r51360 | if ( | |||
mode not in (b'r', b'rb') | ||||
and (path.startswith(b'data/') or path.startswith(b'meta/')) | ||||
r51564 | and is_revlog_file(path) | |||
Augie Fackler
|
r43346 | ): | ||
Martijn Pieters
|
r38683 | # do not trigger a fncache load when adding a file that already is | ||
# known to exist. | ||||
Raphaël Gomès
|
r53066 | notload = not self.fncache.is_loaded and ( | ||
Arseniy Alekseyev
|
r51705 | # if the file has size zero, it should be considered as missing. | ||
# Such zero-size files are the result of truncation when a | ||||
# transaction is aborted. | ||||
self.vfs.exists(encoded) | ||||
and self.vfs.stat(encoded).st_size | ||||
) | ||||
Martijn Pieters
|
r38683 | if not notload: | ||
self.fncache.add(path) | ||||
return self.vfs(encoded, mode, *args, **kw) | ||||
Adrian Buehlmann
|
r14194 | |||
Matt Harbison
|
r52775 | def join(self, path: Optional[bytes], *insidef: bytes) -> bytes: | ||
insidef = (self.encode(f) for f in insidef) | ||||
FUJIWARA Katsunori
|
r17725 | if path: | ||
Matt Harbison
|
r52775 | return self.vfs.join(self.encode(path), *insidef) | ||
FUJIWARA Katsunori
|
r17725 | else: | ||
Matt Harbison
|
r52775 | return self.vfs.join(path, *insidef) | ||
FUJIWARA Katsunori
|
r17725 | |||
r48236 | def register_file(self, path): | |||
"""generic hook point to lets fncache steer its stew""" | ||||
if path.startswith(b'data/') or path.startswith(b'meta/'): | ||||
self.fncache.add(path) | ||||
Augie Fackler
|
r43346 | |||
Adrian Buehlmann
|
r7229 | class fncachestore(basicstore): | ||
FUJIWARA Katsunori
|
r17651 | def __init__(self, path, vfstype, dotencode): | ||
Adrian Buehlmann
|
r17591 | if dotencode: | ||
Bryan O'Sullivan
|
r18435 | encode = _pathencode | ||
Adrian Buehlmann
|
r17591 | else: | ||
encode = _plainhybridencode | ||||
Adrian Buehlmann
|
r12687 | self.encode = encode | ||
Augie Fackler
|
r43347 | vfs = vfstype(path + b'/store') | ||
FUJIWARA Katsunori
|
r17724 | self.path = vfs.base | ||
Augie Fackler
|
r43347 | self.pathsep = self.path + b'/' | ||
FUJIWARA Katsunori
|
r17726 | self.createmode = _calcmode(vfs) | ||
FUJIWARA Katsunori
|
r17652 | vfs.createmode = self.createmode | ||
FUJIWARA Katsunori
|
r17727 | self.rawvfs = vfs | ||
FUJIWARA Katsunori
|
r17652 | fnc = fncache(vfs) | ||
Simon Heimberg
|
r9133 | self.fncache = fnc | ||
FUJIWARA Katsunori
|
r17653 | self.vfs = _fncachevfs(vfs, fnc, encode) | ||
self.opener = self.vfs | ||||
Adrian Buehlmann
|
r7229 | |||
def join(self, f): | ||||
Bryan O'Sullivan
|
r17562 | return self.pathsep + self.encode(f) | ||
Adrian Buehlmann
|
r7229 | |||
Matt Mackall
|
r17731 | def getsize(self, path): | ||
return self.rawvfs.stat(path).st_size | ||||
r51397 | def data_entries( | |||
r51364 | self, matcher=None, undecodable=None | |||
r51366 | ) -> Generator[BaseStoreEntry, None, None]: | |||
r51373 | # Note: all files in fncache should be revlog related, However the | |||
# fncache might contains such file added by previous version of | ||||
# Mercurial. | ||||
r51564 | files = ((f, None) for f in self.fncache if is_revlog_file(f)) | |||
r51373 | by_revlog = _gather_revlog(files) | |||
for revlog, details in by_revlog: | ||||
r51375 | if revlog.startswith(b'data/'): | |||
r51573 | rl_type = KIND_FILELOG | |||
r51376 | revlog_target_id = revlog.split(b'/', 1)[1] | |||
r51375 | elif revlog.startswith(b'meta/'): | |||
r51573 | rl_type = KIND_MANIFESTLOG | |||
r51376 | # drop the initial directory and the `00manifest` file part | |||
tmp = revlog.split(b'/', 1)[1] | ||||
revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/' | ||||
r51375 | else: | |||
# unreachable | ||||
assert False, revlog | ||||
r51389 | entry = RevlogStoreEntry( | |||
path_prefix=revlog, | ||||
revlog_type=rl_type, | ||||
target_id=revlog_target_id, | ||||
r51564 | details=details, | |||
r51389 | ) | |||
if _match_tracked_entry(entry, matcher): | ||||
yield entry | ||||
Adrian Buehlmann
|
r7229 | |||
def copylist(self): | ||||
Augie Fackler
|
r43346 | d = ( | ||
Pulkit Goyal
|
r45911 | b'bookmarks', | ||
b'narrowspec', | ||||
b'data', | ||||
b'meta', | ||||
b'dh', | ||||
b'fncache', | ||||
b'phaseroots', | ||||
b'obsstore', | ||||
b'00manifest.d', | ||||
b'00manifest.i', | ||||
b'00changelog.d', | ||||
b'00changelog.i', | ||||
Pulkit Goyal
|
r46055 | b'requires', | ||
Augie Fackler
|
r43346 | ) | ||
Pulkit Goyal
|
r45911 | return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d] | ||
Adrian Buehlmann
|
r7229 | |||
Durham Goode
|
r20883 | def write(self, tr): | ||
self.fncache.write(tr) | ||||
Adrian Buehlmann
|
r13391 | |||
Durham Goode
|
r20884 | def invalidatecaches(self): | ||
self.fncache.entries = None | ||||
Pulkit Goyal
|
r40767 | self.fncache.addls = set() | ||
Durham Goode
|
r20884 | |||
Durham Goode
|
r20885 | def markremoved(self, fn): | ||
self.fncache.remove(fn) | ||||
Adrian Buehlmann
|
r17783 | def _exists(self, f): | ||
ef = self.encode(f) | ||||
try: | ||||
self.getsize(ef) | ||||
return True | ||||
Manuel Jacob
|
r50201 | except FileNotFoundError: | ||
Adrian Buehlmann
|
r17783 | return False | ||
smuralid
|
r17745 | def __contains__(self, path): | ||
'''Checks if the store contains path''' | ||||
Augie Fackler
|
r43347 | path = b"/".join((b"data", path)) | ||
Adrian Buehlmann
|
r17782 | # check for files (exact match) | ||
Augie Fackler
|
r43347 | e = path + b'.i' | ||
Adrian Buehlmann
|
r17784 | if e in self.fncache and self._exists(e): | ||
Adrian Buehlmann
|
r17782 | return True | ||
# now check for directories (prefix match) | ||||
Augie Fackler
|
r43347 | if not path.endswith(b'/'): | ||
path += b'/' | ||||
Adrian Buehlmann
|
r17782 | for e in self.fncache: | ||
Adrian Buehlmann
|
r17784 | if e.startswith(path) and self._exists(e): | ||
Adrian Buehlmann
|
r17782 | return True | ||
return False | ||||