store.py
333 lines
| 11.1 KiB
| text/x-python
|
PythonLexer
/ mercurial / store.py
Adrian Buehlmann
|
r6839 | # store.py - repository store handling for Mercurial | ||
# | ||||
# Copyright 2008 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
# GNU General Public License version 2, incorporated herein by reference. | ||||
Adrian Buehlmann
|
r6839 | |||
Adrian Buehlmann
|
r7229 | from i18n import _ | ||
Simon Heimberg
|
r8312 | import osutil, util | ||
import os, stat | ||||
Adrian Buehlmann
|
r6840 | |||
Adrian Buehlmann
|
r7229 | _sha = util.sha1 | ||
Benoit Boissinot
|
r8531 | # This avoids a collision between a file named foo and a dir named | ||
# foo.i or foo.d | ||||
def encodedir(path): | ||||
if not path.startswith('data/'): | ||||
return path | ||||
return (path | ||||
.replace(".hg/", ".hg.hg/") | ||||
.replace(".i/", ".i.hg/") | ||||
.replace(".d/", ".d.hg/")) | ||||
def decodedir(path): | ||||
if not path.startswith('data/'): | ||||
return path | ||||
return (path | ||||
.replace(".d.hg/", ".d/") | ||||
.replace(".i.hg/", ".i/") | ||||
.replace(".hg.hg/", ".hg/")) | ||||
Adrian Buehlmann
|
r6839 | def _buildencodefun(): | ||
e = '_' | ||||
win_reserved = [ord(x) for x in '\\:*?"<>|'] | ||||
cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) | ||||
for x in (range(32) + range(126, 256) + win_reserved): | ||||
cmap[chr(x)] = "~%02x" % x | ||||
for x in range(ord("A"), ord("Z")+1) + [ord(e)]: | ||||
cmap[chr(x)] = e + chr(x).lower() | ||||
dmap = {} | ||||
for k, v in cmap.iteritems(): | ||||
dmap[v] = k | ||||
def decode(s): | ||||
i = 0 | ||||
while i < len(s): | ||||
for l in xrange(1, 4): | ||||
try: | ||||
yield dmap[s[i:i+l]] | ||||
i += l | ||||
break | ||||
except KeyError: | ||||
pass | ||||
else: | ||||
raise KeyError | ||||
Benoit Boissinot
|
r8531 | return (lambda s: "".join([cmap[c] for c in encodedir(s)]), | ||
lambda s: decodedir("".join(list(decode(s))))) | ||||
Adrian Buehlmann
|
r6839 | |||
encodefilename, decodefilename = _buildencodefun() | ||||
Adrian Buehlmann
|
r7229 | def _build_lower_encodefun(): | ||
win_reserved = [ord(x) for x in '\\:*?"<>|'] | ||||
cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ]) | ||||
for x in (range(32) + range(126, 256) + win_reserved): | ||||
cmap[chr(x)] = "~%02x" % x | ||||
for x in range(ord("A"), ord("Z")+1): | ||||
cmap[chr(x)] = chr(x).lower() | ||||
return lambda s: "".join([cmap[c] for c in s]) | ||||
lowerencode = _build_lower_encodefun() | ||||
_windows_reserved_filenames = '''con prn aux nul | ||||
com1 com2 com3 com4 com5 com6 com7 com8 com9 | ||||
lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() | ||||
def auxencode(path): | ||||
res = [] | ||||
for n in path.split('/'): | ||||
if n: | ||||
base = n.split('.')[0] | ||||
if base and (base in _windows_reserved_filenames): | ||||
# encode third letter ('aux' -> 'au~78') | ||||
ec = "~%02x" % ord(n[2]) | ||||
n = n[0:2] + ec + n[3:] | ||||
Adrian Buehlmann
|
r7515 | if n[-1] in '. ': | ||
# encode last period or space ('foo...' -> 'foo..~2e') | ||||
n = n[:-1] + "~%02x" % ord(n[-1]) | ||||
Adrian Buehlmann
|
r7229 | res.append(n) | ||
return '/'.join(res) | ||||
MAX_PATH_LEN_IN_HGSTORE = 120 | ||||
DIR_PREFIX_LEN = 8 | ||||
_MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4 | ||||
def hybridencode(path): | ||||
'''encodes path with a length limit | ||||
Encodes all paths that begin with 'data/', according to the following. | ||||
Default encoding (reversible): | ||||
Encodes all uppercase letters 'X' as '_x'. All reserved or illegal | ||||
characters are encoded as '~xx', where xx is the two digit hex code | ||||
of the character (see encodefilename). | ||||
Relevant path components consisting of Windows reserved filenames are | ||||
masked by encoding the third character ('aux' -> 'au~78', see auxencode). | ||||
Hashed encoding (not reversible): | ||||
If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a | ||||
non-reversible hybrid hashing of the path is done instead. | ||||
This encoding uses up to DIR_PREFIX_LEN characters of all directory | ||||
levels of the lowerencoded path, but not more levels than can fit into | ||||
_MAX_SHORTENED_DIRS_LEN. | ||||
Then follows the filler followed by the sha digest of the full path. | ||||
The filler is the beginning of the basename of the lowerencoded path | ||||
(the basename is everything after the last path separator). The filler | ||||
is as long as possible, filling in characters from the basename until | ||||
the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars | ||||
of the basename have been taken). | ||||
The extension (e.g. '.i' or '.d') is preserved. | ||||
The string 'data/' at the beginning is replaced with 'dh/', if the hashed | ||||
encoding was used. | ||||
''' | ||||
if not path.startswith('data/'): | ||||
return path | ||||
Benoit Boissinot
|
r8531 | # escape directories ending with .i and .d | ||
path = encodedir(path) | ||||
Adrian Buehlmann
|
r7229 | ndpath = path[len('data/'):] | ||
res = 'data/' + auxencode(encodefilename(ndpath)) | ||||
if len(res) > MAX_PATH_LEN_IN_HGSTORE: | ||||
digest = _sha(path).hexdigest() | ||||
aep = auxencode(lowerencode(ndpath)) | ||||
_root, ext = os.path.splitext(aep) | ||||
parts = aep.split('/') | ||||
basename = parts[-1] | ||||
sdirs = [] | ||||
for p in parts[:-1]: | ||||
d = p[:DIR_PREFIX_LEN] | ||||
Adrian Buehlmann
|
r7514 | if d[-1] in '. ': | ||
# Windows can't access dirs ending in period or space | ||||
d = d[:-1] + '_' | ||||
Adrian Buehlmann
|
r7229 | t = '/'.join(sdirs) + '/' + d | ||
if len(t) > _MAX_SHORTENED_DIRS_LEN: | ||||
break | ||||
sdirs.append(d) | ||||
dirs = '/'.join(sdirs) | ||||
if len(dirs) > 0: | ||||
dirs += '/' | ||||
res = 'dh/' + dirs + digest + ext | ||||
space_left = MAX_PATH_LEN_IN_HGSTORE - len(res) | ||||
if space_left > 0: | ||||
filler = basename[:space_left] | ||||
res = 'dh/' + dirs + filler + digest + ext | ||||
return res | ||||
Matt Mackall
|
r6898 | def _calcmode(path): | ||
try: | ||||
# files in .hg/ will be created using this mode | ||||
mode = os.stat(path).st_mode | ||||
# avoid some useless chmods | ||||
Matt Mackall
|
r7890 | if (0777 & ~util.umask) == (0777 & mode): | ||
Matt Mackall
|
r6898 | mode = None | ||
except OSError: | ||||
mode = None | ||||
return mode | ||||
Matt Mackall
|
r6903 | _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i' | ||
Benoit Boissinot
|
r8778 | class basicstore(object): | ||
Adrian Buehlmann
|
r6840 | '''base class for local repository stores''' | ||
Adrian Buehlmann
|
r6988 | def __init__(self, path, opener, pathjoiner): | ||
self.pathjoiner = pathjoiner | ||||
Adrian Buehlmann
|
r6840 | self.path = path | ||
Matt Mackall
|
r6898 | self.createmode = _calcmode(path) | ||
Benoit Boissinot
|
r8633 | op = opener(self.path) | ||
op.createmode = self.createmode | ||||
self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw) | ||||
Adrian Buehlmann
|
r6840 | |||
def join(self, f): | ||||
Benoit Boissinot
|
r8531 | return self.pathjoiner(self.path, encodedir(f)) | ||
Adrian Buehlmann
|
r6840 | |||
Matt Mackall
|
r6899 | def _walk(self, relpath, recurse): | ||
Matt Mackall
|
r6900 | '''yields (unencoded, encoded, size)''' | ||
Adrian Buehlmann
|
r6988 | path = self.pathjoiner(self.path, relpath) | ||
Adrian Buehlmann
|
r6840 | striplen = len(self.path) + len(os.sep) | ||
Matt Mackall
|
r6899 | l = [] | ||
if os.path.isdir(path): | ||||
visit = [path] | ||||
while visit: | ||||
p = visit.pop() | ||||
for f, kind, st in osutil.listdir(p, stat=True): | ||||
Adrian Buehlmann
|
r6988 | fp = self.pathjoiner(p, f) | ||
Matt Mackall
|
r6899 | if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'): | ||
Matt Mackall
|
r6900 | n = util.pconvert(fp[striplen:]) | ||
Benoit Boissinot
|
r8531 | l.append((decodedir(n), n, st.st_size)) | ||
Matt Mackall
|
r6899 | elif kind == stat.S_IFDIR and recurse: | ||
visit.append(fp) | ||||
Matt Mackall
|
r8209 | return sorted(l) | ||
Adrian Buehlmann
|
r6840 | |||
Matt Mackall
|
r6900 | def datafiles(self): | ||
Matt Mackall
|
r6899 | return self._walk('data', True) | ||
Adrian Buehlmann
|
r6840 | |||
def walk(self): | ||||
Matt Mackall
|
r6900 | '''yields (unencoded, encoded, size)''' | ||
Adrian Buehlmann
|
r6840 | # yield data files first | ||
Adrian Buehlmann
|
r6892 | for x in self.datafiles(): | ||
Adrian Buehlmann
|
r6840 | yield x | ||
# yield manifest before changelog | ||||
Matt Mackall
|
r8210 | for x in reversed(self._walk('', False)): | ||
Adrian Buehlmann
|
r6840 | yield x | ||
Matt Mackall
|
r6903 | def copylist(self): | ||
return ['requires'] + _data.split() | ||||
Matt Mackall
|
r6898 | class encodedstore(basicstore): | ||
Adrian Buehlmann
|
r6988 | def __init__(self, path, opener, pathjoiner): | ||
self.pathjoiner = pathjoiner | ||||
self.path = self.pathjoiner(path, 'store') | ||||
Matt Mackall
|
r6898 | self.createmode = _calcmode(self.path) | ||
Matt Mackall
|
r6896 | op = opener(self.path) | ||
Adrian Buehlmann
|
r6840 | op.createmode = self.createmode | ||
Matt Mackall
|
r6902 | self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw) | ||
Adrian Buehlmann
|
r6840 | |||
Matt Mackall
|
r6900 | def datafiles(self): | ||
for a, b, size in self._walk('data', True): | ||||
Adrian Buehlmann
|
r6892 | try: | ||
Matt Mackall
|
r6900 | a = decodefilename(a) | ||
Adrian Buehlmann
|
r6892 | except KeyError: | ||
Matt Mackall
|
r6900 | a = None | ||
yield a, b, size | ||||
Adrian Buehlmann
|
r6840 | |||
def join(self, f): | ||||
Adrian Buehlmann
|
r6988 | return self.pathjoiner(self.path, encodefilename(f)) | ||
Adrian Buehlmann
|
r6840 | |||
Matt Mackall
|
r6903 | def copylist(self): | ||
return (['requires', '00changelog.i'] + | ||||
Adrian Buehlmann
|
r6988 | [self.pathjoiner('store', f) for f in _data.split()]) | ||
Matt Mackall
|
r6903 | |||
Benoit Boissinot
|
r8530 | class fncache(object): | ||
Benoit Boissinot
|
r8531 | # the filename used to be partially encoded | ||
# hence the encodedir/decodedir dance | ||||
Adrian Buehlmann
|
r7229 | def __init__(self, opener): | ||
self.opener = opener | ||||
self.entries = None | ||||
Benoit Boissinot
|
r8530 | def _load(self): | ||
'''fill the entries from the fncache file''' | ||||
self.entries = set() | ||||
try: | ||||
fp = self.opener('fncache', mode='rb') | ||||
except IOError: | ||||
# skip nonexistent file | ||||
return | ||||
for n, line in enumerate(fp): | ||||
if (len(line) < 2) or (line[-1] != '\n'): | ||||
t = _('invalid entry in fncache, line %s') % (n + 1) | ||||
raise util.Abort(t) | ||||
Benoit Boissinot
|
r8531 | self.entries.add(decodedir(line[:-1])) | ||
Benoit Boissinot
|
r8530 | fp.close() | ||
Adrian Buehlmann
|
r7229 | |||
Benoit Boissinot
|
r8530 | def rewrite(self, files): | ||
fp = self.opener('fncache', mode='wb') | ||||
for p in files: | ||||
Benoit Boissinot
|
r8531 | fp.write(encodedir(p) + '\n') | ||
Benoit Boissinot
|
r8530 | fp.close() | ||
self.entries = set(files) | ||||
def add(self, fn): | ||||
if self.entries is None: | ||||
self._load() | ||||
Benoit Boissinot
|
r8531 | self.opener('fncache', 'ab').write(encodedir(fn) + '\n') | ||
Benoit Boissinot
|
r8530 | |||
def __contains__(self, fn): | ||||
if self.entries is None: | ||||
self._load() | ||||
return fn in self.entries | ||||
def __iter__(self): | ||||
if self.entries is None: | ||||
self._load() | ||||
return iter(self.entries) | ||||
Adrian Buehlmann
|
r7229 | |||
class fncachestore(basicstore): | ||||
def __init__(self, path, opener, pathjoiner): | ||||
self.pathjoiner = pathjoiner | ||||
self.path = self.pathjoiner(path, 'store') | ||||
self.createmode = _calcmode(self.path) | ||||
Simon Heimberg
|
r9133 | op = opener(self.path) | ||
op.createmode = self.createmode | ||||
fnc = fncache(op) | ||||
self.fncache = fnc | ||||
Benoit Boissinot
|
r8530 | |||
def fncacheopener(path, mode='r', *args, **kw): | ||||
if (mode not in ('r', 'rb') | ||||
and path.startswith('data/') | ||||
Simon Heimberg
|
r9133 | and path not in fnc): | ||
fnc.add(path) | ||||
return op(hybridencode(path), mode, *args, **kw) | ||||
Benoit Boissinot
|
r8530 | self.opener = fncacheopener | ||
Adrian Buehlmann
|
r7229 | |||
def join(self, f): | ||||
return self.pathjoiner(self.path, hybridencode(f)) | ||||
def datafiles(self): | ||||
rewrite = False | ||||
existing = [] | ||||
pjoin = self.pathjoiner | ||||
spath = self.path | ||||
Benoit Boissinot
|
r8530 | for f in self.fncache: | ||
Adrian Buehlmann
|
r7229 | ef = hybridencode(f) | ||
try: | ||||
st = os.stat(pjoin(spath, ef)) | ||||
yield f, ef, st.st_size | ||||
existing.append(f) | ||||
except OSError: | ||||
# nonexistent entry | ||||
rewrite = True | ||||
if rewrite: | ||||
# rewrite fncache to remove nonexistent entries | ||||
# (may be caused by rollback / strip) | ||||
Benoit Boissinot
|
r8530 | self.fncache.rewrite(existing) | ||
Adrian Buehlmann
|
r7229 | |||
def copylist(self): | ||||
Thomas Arendsen Hein
|
r7236 | d = _data + ' dh fncache' | ||
Adrian Buehlmann
|
r7229 | return (['requires', '00changelog.i'] + | ||
[self.pathjoiner('store', f) for f in d.split()]) | ||||
Patrick Mezard
|
r6989 | def store(requirements, path, opener, pathjoiner=None): | ||
pathjoiner = pathjoiner or os.path.join | ||||
Matt Mackall
|
r6898 | if 'store' in requirements: | ||
Adrian Buehlmann
|
r7229 | if 'fncache' in requirements: | ||
return fncachestore(path, opener, pathjoiner) | ||||
Adrian Buehlmann
|
r6988 | return encodedstore(path, opener, pathjoiner) | ||
return basicstore(path, opener, pathjoiner) | ||||