manifest.py
1557 lines
| 51.5 KiB
| text/x-python
|
PythonLexer
/ mercurial / manifest.py
mpm@selenic.com
|
r1089 | # manifest.py - manifest revision class for mercurial | ||
# | ||||
Thomas Arendsen Hein
|
r4635 | # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> | ||
mpm@selenic.com
|
r1089 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
mpm@selenic.com
|
r1089 | |||
Gregory Szorc
|
r27502 | from __future__ import absolute_import | ||
import array | ||||
import heapq | ||||
Martin von Zweigbergk
|
r24573 | import os | ||
Gregory Szorc
|
r27502 | import struct | ||
from .i18n import _ | ||||
from . import ( | ||||
error, | ||||
mdiff, | ||||
parsers, | ||||
revlog, | ||||
util, | ||||
) | ||||
mpm@selenic.com
|
r1089 | |||
Drew Gottlieb
|
r24322 | propertycache = util.propertycache | ||
Augie Fackler
|
r24225 | |||
Martin von Zweigbergk
|
r24572 | def _parsev1(data): | ||
Martin von Zweigbergk
|
r24524 | # This method does a little bit of excessive-looking | ||
# precondition checking. This is so that the behavior of this | ||||
# class exactly matches its C counterpart to try and help | ||||
# prevent surprise breakage for anyone that develops against | ||||
# the pure version. | ||||
if data and data[-1] != '\n': | ||||
raise ValueError('Manifest did not end in a newline.') | ||||
prev = None | ||||
for l in data.splitlines(): | ||||
if prev is not None and prev > l: | ||||
raise ValueError('Manifest lines not in sorted order.') | ||||
prev = l | ||||
f, n = l.split('\0') | ||||
if len(n) > 40: | ||||
yield f, revlog.bin(n[:40]), n[40:] | ||||
else: | ||||
yield f, revlog.bin(n), '' | ||||
Martin von Zweigbergk
|
r24572 | def _parsev2(data): | ||
metadataend = data.find('\n') | ||||
# Just ignore metadata for now | ||||
pos = metadataend + 1 | ||||
prevf = '' | ||||
while pos < len(data): | ||||
end = data.find('\n', pos + 1) # +1 to skip stem length byte | ||||
if end == -1: | ||||
raise ValueError('Manifest ended with incomplete file entry.') | ||||
stemlen = ord(data[pos]) | ||||
items = data[pos + 1:end].split('\0') | ||||
f = prevf[:stemlen] + items[0] | ||||
if prevf > f: | ||||
raise ValueError('Manifest entries not in sorted order.') | ||||
fl = items[1] | ||||
# Just ignore metadata (items[2:] for now) | ||||
n = data[end + 1:end + 21] | ||||
yield f, n, fl | ||||
pos = end + 22 | ||||
prevf = f | ||||
def _parse(data): | ||||
"""Generates (path, node, flags) tuples from a manifest text""" | ||||
if data.startswith('\0'): | ||||
return iter(_parsev2(data)) | ||||
else: | ||||
return iter(_parsev1(data)) | ||||
Martin von Zweigbergk
|
r24573 | def _text(it, usemanifestv2): | ||
Martin von Zweigbergk
|
r24525 | """Given an iterator over (path, node, flags) tuples, returns a manifest | ||
text""" | ||||
Martin von Zweigbergk
|
r24573 | if usemanifestv2: | ||
return _textv2(it) | ||||
else: | ||||
return _textv1(it) | ||||
def _textv1(it): | ||||
Martin von Zweigbergk
|
r24525 | files = [] | ||
lines = [] | ||||
_hex = revlog.hex | ||||
for f, n, fl in it: | ||||
files.append(f) | ||||
# if this is changed to support newlines in filenames, | ||||
# be sure to check the templates/ dir again (especially *-raw.tmpl) | ||||
lines.append("%s\0%s%s\n" % (f, _hex(n), fl)) | ||||
_checkforbidden(files) | ||||
return ''.join(lines) | ||||
Martin von Zweigbergk
|
r24573 | def _textv2(it): | ||
files = [] | ||||
lines = ['\0\n'] | ||||
prevf = '' | ||||
for f, n, fl in it: | ||||
files.append(f) | ||||
stem = os.path.commonprefix([prevf, f]) | ||||
stemlen = min(len(stem), 255) | ||||
lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) | ||||
prevf = f | ||||
_checkforbidden(files) | ||||
return ''.join(lines) | ||||
Maciej Fijalkowski
|
r30042 | class lazymanifestiter(object): | ||
def __init__(self, lm): | ||||
self.pos = 0 | ||||
self.lm = lm | ||||
Augie Fackler
|
r24225 | |||
Maciej Fijalkowski
|
r30042 | def __iter__(self): | ||
return self | ||||
Augie Fackler
|
r24223 | |||
Maciej Fijalkowski
|
r30042 | def next(self): | ||
try: | ||||
data, pos = self.lm._get(self.pos) | ||||
except IndexError: | ||||
raise StopIteration | ||||
if pos == -1: | ||||
self.pos += 1 | ||||
return data[0] | ||||
self.pos += 1 | ||||
zeropos = data.find('\x00', pos) | ||||
return data[pos:zeropos] | ||||
Augie Fackler
|
r24224 | |||
Maciej Fijalkowski
|
r30042 | class lazymanifestiterentries(object): | ||
def __init__(self, lm): | ||||
self.lm = lm | ||||
self.pos = 0 | ||||
Augie Fackler
|
r24225 | |||
def __iter__(self): | ||||
Maciej Fijalkowski
|
r30042 | return self | ||
def next(self): | ||||
try: | ||||
data, pos = self.lm._get(self.pos) | ||||
except IndexError: | ||||
raise StopIteration | ||||
if pos == -1: | ||||
self.pos += 1 | ||||
return data | ||||
zeropos = data.find('\x00', pos) | ||||
hashval = unhexlify(data, self.lm.extrainfo[self.pos], | ||||
zeropos + 1, 40) | ||||
flags = self.lm._getflags(data, self.pos, zeropos) | ||||
self.pos += 1 | ||||
return (data[pos:zeropos], hashval, flags) | ||||
def unhexlify(data, extra, pos, length): | ||||
s = data[pos:pos + length].decode('hex') | ||||
if extra: | ||||
s += chr(extra & 0xff) | ||||
return s | ||||
def _cmp(a, b): | ||||
return (a > b) - (a < b) | ||||
class _lazymanifest(object): | ||||
def __init__(self, data, positions=None, extrainfo=None, extradata=None): | ||||
if positions is None: | ||||
self.positions = self.findlines(data) | ||||
self.extrainfo = [0] * len(self.positions) | ||||
self.data = data | ||||
self.extradata = [] | ||||
else: | ||||
self.positions = positions[:] | ||||
self.extrainfo = extrainfo[:] | ||||
self.extradata = extradata[:] | ||||
self.data = data | ||||
def findlines(self, data): | ||||
if not data: | ||||
return [] | ||||
pos = data.find("\n") | ||||
if pos == -1 or data[-1] != '\n': | ||||
raise ValueError("Manifest did not end in a newline.") | ||||
positions = [0] | ||||
prev = data[:data.find('\x00')] | ||||
while pos < len(data) - 1 and pos != -1: | ||||
positions.append(pos + 1) | ||||
nexts = data[pos + 1:data.find('\x00', pos + 1)] | ||||
if nexts < prev: | ||||
raise ValueError("Manifest lines not in sorted order.") | ||||
prev = nexts | ||||
pos = data.find("\n", pos + 1) | ||||
return positions | ||||
def _get(self, index): | ||||
# get the position encoded in pos: | ||||
# positive number is an index in 'data' | ||||
# negative number is in extrapieces | ||||
pos = self.positions[index] | ||||
if pos >= 0: | ||||
return self.data, pos | ||||
return self.extradata[-pos - 1], -1 | ||||
def _getkey(self, pos): | ||||
if pos >= 0: | ||||
return self.data[pos:self.data.find('\x00', pos + 1)] | ||||
return self.extradata[-pos - 1][0] | ||||
def bsearch(self, key): | ||||
first = 0 | ||||
last = len(self.positions) - 1 | ||||
while first <= last: | ||||
midpoint = (first + last)//2 | ||||
nextpos = self.positions[midpoint] | ||||
candidate = self._getkey(nextpos) | ||||
r = _cmp(key, candidate) | ||||
if r == 0: | ||||
return midpoint | ||||
else: | ||||
if r < 0: | ||||
last = midpoint - 1 | ||||
else: | ||||
first = midpoint + 1 | ||||
return -1 | ||||
Augie Fackler
|
r24225 | |||
Maciej Fijalkowski
|
r30042 | def bsearch2(self, key): | ||
# same as the above, but will always return the position | ||||
# done for performance reasons | ||||
first = 0 | ||||
last = len(self.positions) - 1 | ||||
while first <= last: | ||||
midpoint = (first + last)//2 | ||||
nextpos = self.positions[midpoint] | ||||
candidate = self._getkey(nextpos) | ||||
r = _cmp(key, candidate) | ||||
if r == 0: | ||||
return (midpoint, True) | ||||
else: | ||||
if r < 0: | ||||
last = midpoint - 1 | ||||
else: | ||||
first = midpoint + 1 | ||||
return (first, False) | ||||
def __contains__(self, key): | ||||
return self.bsearch(key) != -1 | ||||
def _getflags(self, data, needle, pos): | ||||
start = pos + 41 | ||||
end = data.find("\n", start) | ||||
if end == -1: | ||||
end = len(data) - 1 | ||||
if start == end: | ||||
return '' | ||||
return self.data[start:end] | ||||
Martin von Zweigbergk
|
r24297 | |||
Maciej Fijalkowski
|
r30042 | def __getitem__(self, key): | ||
if not isinstance(key, str): | ||||
raise TypeError("getitem: manifest keys must be a string.") | ||||
needle = self.bsearch(key) | ||||
if needle == -1: | ||||
raise KeyError | ||||
data, pos = self._get(needle) | ||||
if pos == -1: | ||||
return (data[1], data[2]) | ||||
zeropos = data.find('\x00', pos) | ||||
assert 0 <= needle <= len(self.positions) | ||||
assert len(self.extrainfo) == len(self.positions) | ||||
hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) | ||||
flags = self._getflags(data, needle, zeropos) | ||||
return (hashval, flags) | ||||
def __delitem__(self, key): | ||||
needle, found = self.bsearch2(key) | ||||
if not found: | ||||
raise KeyError | ||||
cur = self.positions[needle] | ||||
self.positions = self.positions[:needle] + self.positions[needle + 1:] | ||||
self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] | ||||
if cur >= 0: | ||||
self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] | ||||
def __setitem__(self, key, value): | ||||
if not isinstance(key, str): | ||||
raise TypeError("setitem: manifest keys must be a string.") | ||||
if not isinstance(value, tuple) or len(value) != 2: | ||||
raise TypeError("Manifest values must be a tuple of (node, flags).") | ||||
hashval = value[0] | ||||
if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: | ||||
raise TypeError("node must be a 20-byte string") | ||||
flags = value[1] | ||||
if len(hashval) == 22: | ||||
hashval = hashval[:-1] | ||||
if not isinstance(flags, str) or len(flags) > 1: | ||||
raise TypeError("flags must a 0 or 1 byte string, got %r", flags) | ||||
needle, found = self.bsearch2(key) | ||||
if found: | ||||
# put the item | ||||
pos = self.positions[needle] | ||||
if pos < 0: | ||||
self.extradata[-pos - 1] = (key, hashval, value[1]) | ||||
else: | ||||
# just don't bother | ||||
self.extradata.append((key, hashval, value[1])) | ||||
self.positions[needle] = -len(self.extradata) | ||||
else: | ||||
# not found, put it in with extra positions | ||||
self.extradata.append((key, hashval, value[1])) | ||||
self.positions = (self.positions[:needle] + [-len(self.extradata)] | ||||
+ self.positions[needle:]) | ||||
self.extrainfo = (self.extrainfo[:needle] + [0] + | ||||
self.extrainfo[needle:]) | ||||
Martin von Zweigbergk
|
r24298 | |||
Matt Mackall
|
r2831 | def copy(self): | ||
Maciej Fijalkowski
|
r30042 | # XXX call _compact like in C? | ||
return _lazymanifest(self.data, self.positions, self.extrainfo, | ||||
self.extradata) | ||||
def _compact(self): | ||||
# hopefully not called TOO often | ||||
if len(self.extradata) == 0: | ||||
return | ||||
l = [] | ||||
last_cut = 0 | ||||
i = 0 | ||||
offset = 0 | ||||
self.extrainfo = [0] * len(self.positions) | ||||
while i < len(self.positions): | ||||
if self.positions[i] >= 0: | ||||
cur = self.positions[i] | ||||
last_cut = cur | ||||
while True: | ||||
self.positions[i] = offset | ||||
i += 1 | ||||
if i == len(self.positions) or self.positions[i] < 0: | ||||
break | ||||
offset += self.positions[i] - cur | ||||
cur = self.positions[i] | ||||
end_cut = self.data.find('\n', cur) | ||||
if end_cut != -1: | ||||
end_cut += 1 | ||||
offset += end_cut - cur | ||||
l.append(self.data[last_cut:end_cut]) | ||||
else: | ||||
while i < len(self.positions) and self.positions[i] < 0: | ||||
cur = self.positions[i] | ||||
t = self.extradata[-cur - 1] | ||||
l.append(self._pack(t)) | ||||
self.positions[i] = offset | ||||
if len(t[1]) > 20: | ||||
self.extrainfo[i] = ord(t[1][21]) | ||||
offset += len(l[-1]) | ||||
i += 1 | ||||
self.data = ''.join(l) | ||||
self.extradata = [] | ||||
def _pack(self, d): | ||||
return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' | ||||
def text(self): | ||||
self._compact() | ||||
return self.data | ||||
Augie Fackler
|
r24225 | |||
def diff(self, m2, clean=False): | ||||
'''Finds changes between the current manifest and m2.''' | ||||
Maciej Fijalkowski
|
r30042 | # XXX think whether efficiency matters here | ||
Augie Fackler
|
r24225 | diff = {} | ||
Maciej Fijalkowski
|
r30042 | for fn, e1, flags in self.iterentries(): | ||
Augie Fackler
|
r24225 | if fn not in m2: | ||
Maciej Fijalkowski
|
r30042 | diff[fn] = (e1, flags), (None, '') | ||
Augie Fackler
|
r24225 | else: | ||
e2 = m2[fn] | ||||
Maciej Fijalkowski
|
r30042 | if (e1, flags) != e2: | ||
diff[fn] = (e1, flags), e2 | ||||
Augie Fackler
|
r24225 | elif clean: | ||
diff[fn] = None | ||||
Maciej Fijalkowski
|
r30042 | for fn, e2, flags in m2.iterentries(): | ||
Augie Fackler
|
r24225 | if fn not in self: | ||
Maciej Fijalkowski
|
r30042 | diff[fn] = (None, ''), (e2, flags) | ||
Augie Fackler
|
r24225 | |||
return diff | ||||
Maciej Fijalkowski
|
r30042 | def iterentries(self): | ||
return lazymanifestiterentries(self) | ||||
def iterkeys(self): | ||||
return lazymanifestiter(self) | ||||
def __iter__(self): | ||||
return lazymanifestiter(self) | ||||
def __len__(self): | ||||
return len(self.positions) | ||||
Augie Fackler
|
r24225 | def filtercopy(self, filterfn): | ||
Maciej Fijalkowski
|
r30042 | # XXX should be optimized | ||
Augie Fackler
|
r24225 | c = _lazymanifest('') | ||
Martin von Zweigbergk
|
r24298 | for f, n, fl in self.iterentries(): | ||
Augie Fackler
|
r24225 | if filterfn(f): | ||
c[f] = n, fl | ||||
return c | ||||
Augie Fackler
|
r24226 | try: | ||
_lazymanifest = parsers.lazymanifest | ||||
except AttributeError: | ||||
pass | ||||
Augie Fackler
|
r24225 | class manifestdict(object): | ||
def __init__(self, data=''): | ||||
Martin von Zweigbergk
|
r24572 | if data.startswith('\0'): | ||
#_lazymanifest can not parse v2 | ||||
self._lm = _lazymanifest('') | ||||
for f, n, fl in _parsev2(data): | ||||
self._lm[f] = n, fl | ||||
else: | ||||
self._lm = _lazymanifest(data) | ||||
Augie Fackler
|
r24225 | |||
def __getitem__(self, key): | ||||
return self._lm[key][0] | ||||
Martin von Zweigbergk
|
r24277 | def find(self, key): | ||
return self._lm[key] | ||||
Augie Fackler
|
r24225 | def __len__(self): | ||
return len(self._lm) | ||||
def __setitem__(self, key, node): | ||||
self._lm[key] = node, self.flags(key, '') | ||||
def __contains__(self, key): | ||||
return key in self._lm | ||||
def __delitem__(self, key): | ||||
del self._lm[key] | ||||
Martin von Zweigbergk
|
r24295 | def __iter__(self): | ||
Martin von Zweigbergk
|
r24298 | return self._lm.__iter__() | ||
Augie Fackler
|
r24225 | |||
def iterkeys(self): | ||||
Martin von Zweigbergk
|
r24295 | return self._lm.iterkeys() | ||
def keys(self): | ||||
return list(self.iterkeys()) | ||||
Augie Fackler
|
r24225 | |||
Martin von Zweigbergk
|
r24184 | def filesnotin(self, m2): | ||
'''Set of files in this manifest that are not in the other''' | ||||
Tony Tung
|
r29056 | diff = self.diff(m2) | ||
files = set(filepath | ||||
for filepath, hashflags in diff.iteritems() | ||||
if hashflags[1][0] is None) | ||||
Martin von Zweigbergk
|
r24184 | return files | ||
Drew Gottlieb
|
r24322 | @propertycache | ||
def _dirs(self): | ||||
Drew Gottlieb
|
r24635 | return util.dirs(self) | ||
Drew Gottlieb
|
r24322 | |||
def dirs(self): | ||||
return self._dirs | ||||
Drew Gottlieb
|
r24324 | def hasdir(self, dir): | ||
return dir in self._dirs | ||||
Martin von Zweigbergk
|
r24685 | def _filesfastpath(self, match): | ||
'''Checks whether we can correctly and quickly iterate over matcher | ||||
files instead of over manifest files.''' | ||||
files = match.files() | ||||
return (len(files) < 100 and (match.isexact() or | ||||
Martin von Zweigbergk
|
r25276 | (match.prefix() and all(fn in self for fn in files)))) | ||
Martin von Zweigbergk
|
r24685 | |||
Drew Gottlieb
|
r24646 | def walk(self, match): | ||
'''Generates matching file names. | ||||
Equivalent to manifest.matches(match).iterkeys(), but without creating | ||||
an entirely new manifest. | ||||
It also reports nonexistent files by marking them bad with match.bad(). | ||||
''' | ||||
Martin von Zweigbergk
|
r24683 | if match.always(): | ||
for f in iter(self): | ||||
yield f | ||||
return | ||||
Drew Gottlieb
|
r24646 | fset = set(match.files()) | ||
# avoid the entire walk if we're only looking for specific files | ||||
Martin von Zweigbergk
|
r24685 | if self._filesfastpath(match): | ||
Martin von Zweigbergk
|
r24667 | for fn in sorted(fset): | ||
yield fn | ||||
Martin von Zweigbergk
|
r24682 | return | ||
Drew Gottlieb
|
r24646 | |||
for fn in self: | ||||
if fn in fset: | ||||
# specified pattern is the exact name | ||||
fset.remove(fn) | ||||
if match(fn): | ||||
yield fn | ||||
# for dirstate.walk, files=['.'] means "walk the whole tree". | ||||
# follow that here, too | ||||
fset.discard('.') | ||||
for fn in sorted(fset): | ||||
if not self.hasdir(fn): | ||||
match.bad(fn, None) | ||||
Martin von Zweigbergk
|
r23305 | def matches(self, match): | ||
'''generate a new manifest filtered by the match argument''' | ||||
if match.always(): | ||||
return self.copy() | ||||
Martin von Zweigbergk
|
r24685 | if self._filesfastpath(match): | ||
Martin von Zweigbergk
|
r24666 | m = manifestdict() | ||
lm = self._lm | ||||
for fn in match.files(): | ||||
if fn in lm: | ||||
m._lm[fn] = lm[fn] | ||||
return m | ||||
Martin von Zweigbergk
|
r23305 | |||
Martin von Zweigbergk
|
r24700 | m = manifestdict() | ||
Martin von Zweigbergk
|
r24664 | m._lm = self._lm.filtercopy(match) | ||
return m | ||||
Martin von Zweigbergk
|
r23305 | |||
Augie Fackler
|
r23756 | def diff(self, m2, clean=False): | ||
'''Finds changes between the current manifest and m2. | ||||
Args: | ||||
m2: the manifest to which this manifest should be compared. | ||||
clean: if true, include files unchanged between these manifests | ||||
with a None value in the returned dictionary. | ||||
The result is returned as a dict with filename as key and | ||||
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the | ||||
nodeid in the current/other manifest and fl1/fl2 is the flag | ||||
in the current/other manifest. Where the file does not exist, | ||||
the nodeid will be None and the flags will be the empty | ||||
string. | ||||
''' | ||||
Augie Fackler
|
r24225 | return self._lm.diff(m2._lm, clean) | ||
def setflag(self, key, flag): | ||||
self._lm[key] = self[key], flag | ||||
def get(self, key, default=None): | ||||
try: | ||||
return self._lm[key][0] | ||||
except KeyError: | ||||
return default | ||||
Martin von Zweigbergk
|
r22965 | |||
Augie Fackler
|
r24225 | def flags(self, key, default=''): | ||
try: | ||||
return self._lm[key][1] | ||||
except KeyError: | ||||
return default | ||||
Martin von Zweigbergk
|
r22965 | |||
Augie Fackler
|
r24225 | def copy(self): | ||
Martin von Zweigbergk
|
r24700 | c = manifestdict() | ||
Augie Fackler
|
r24225 | c._lm = self._lm.copy() | ||
return c | ||||
def iteritems(self): | ||||
Martin von Zweigbergk
|
r24298 | return (x[:2] for x in self._lm.iterentries()) | ||
Matt Mackall
|
r2831 | |||
Martin von Zweigbergk
|
r28203 | def iterentries(self): | ||
return self._lm.iterentries() | ||||
Martin von Zweigbergk
|
r24573 | def text(self, usemanifestv2=False): | ||
if usemanifestv2: | ||||
return _textv2(self._lm.iterentries()) | ||||
else: | ||||
# use (probably) native version for v1 | ||||
return self._lm.text() | ||||
Augie Fackler
|
r22408 | |||
Augie Fackler
|
r22931 | def fastdelta(self, base, changes): | ||
"""Given a base manifest text as an array.array and a list of changes | ||||
relative to that text, compute a delta that can be used by revlog. | ||||
""" | ||||
delta = [] | ||||
dstart = None | ||||
dend = None | ||||
dline = [""] | ||||
start = 0 | ||||
# zero copy representation of base as a buffer | ||||
addbuf = util.buffer(base) | ||||
Durham Goode
|
r26871 | changes = list(changes) | ||
if len(changes) < 1000: | ||||
# start with a readonly loop that finds the offset of | ||||
# each line and creates the deltas | ||||
for f, todelete in changes: | ||||
# bs will either be the index of the item or the insert point | ||||
start, end = _msearch(addbuf, f, start) | ||||
if not todelete: | ||||
h, fl = self._lm[f] | ||||
l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) | ||||
else: | ||||
if start == end: | ||||
# item we want to delete was not found, error out | ||||
raise AssertionError( | ||||
_("failed to remove %s from manifest") % f) | ||||
l = "" | ||||
if dstart is not None and dstart <= start and dend >= start: | ||||
if dend < end: | ||||
dend = end | ||||
if l: | ||||
dline.append(l) | ||||
else: | ||||
if dstart is not None: | ||||
delta.append([dstart, dend, "".join(dline)]) | ||||
dstart = start | ||||
Augie Fackler
|
r22931 | dend = end | ||
Durham Goode
|
r26871 | dline = [l] | ||
Augie Fackler
|
r22931 | |||
Durham Goode
|
r26871 | if dstart is not None: | ||
delta.append([dstart, dend, "".join(dline)]) | ||||
# apply the delta to the base, and get a delta for addrevision | ||||
deltatext, arraytext = _addlistdelta(base, delta) | ||||
else: | ||||
# For large changes, it's much cheaper to just build the text and | ||||
# diff it. | ||||
arraytext = array.array('c', self.text()) | ||||
deltatext = mdiff.textdiff(base, arraytext) | ||||
Augie Fackler
|
r22931 | return arraytext, deltatext | ||
Augie Fackler
|
r22930 | def _msearch(m, s, lo=0, hi=None): | ||
'''return a tuple (start, end) that says where to find s within m. | ||||
If the string is found m[start:end] are the line containing | ||||
that string. If start == end the string was not found and | ||||
they indicate the proper sorted insertion point. | ||||
m should be a buffer or a string | ||||
s is a string''' | ||||
def advance(i, c): | ||||
while i < lenm and m[i] != c: | ||||
i += 1 | ||||
return i | ||||
if not s: | ||||
return (lo, lo) | ||||
lenm = len(m) | ||||
if not hi: | ||||
hi = lenm | ||||
while lo < hi: | ||||
mid = (lo + hi) // 2 | ||||
start = mid | ||||
while start > 0 and m[start - 1] != '\n': | ||||
start -= 1 | ||||
end = advance(start, '\0') | ||||
if m[start:end] < s: | ||||
# we know that after the null there are 40 bytes of sha1 | ||||
# this translates to the bisect lo = mid + 1 | ||||
lo = advance(end + 40, '\n') + 1 | ||||
else: | ||||
# this translates to the bisect hi = mid | ||||
hi = start | ||||
end = advance(lo, '\0') | ||||
found = m[lo:end] | ||||
if s == found: | ||||
# we know that after the null there are 40 bytes of sha1 | ||||
end = advance(end + 40, '\n') | ||||
return (lo, end + 1) | ||||
else: | ||||
return (lo, lo) | ||||
Augie Fackler
|
r22415 | def _checkforbidden(l): | ||
Augie Fackler
|
r22408 | """Check filenames for illegal characters.""" | ||
for f in l: | ||||
if '\n' in f or '\r' in f: | ||||
raise error.RevlogError( | ||||
_("'\\n' and '\\r' disallowed in filenames: %r") % f) | ||||
Augie Fackler
|
r22409 | # apply the changes collected during the bisect loop to our addlist | ||
# return a delta suitable for addrevision | ||||
Augie Fackler
|
r22415 | def _addlistdelta(addlist, x): | ||
Augie Fackler
|
r22409 | # for large addlist arrays, building a new array is cheaper | ||
# than repeatedly modifying the existing one | ||||
currentposition = 0 | ||||
newaddlist = array.array('c') | ||||
for start, end, content in x: | ||||
newaddlist += addlist[currentposition:start] | ||||
if content: | ||||
newaddlist += array.array('c', content) | ||||
currentposition = end | ||||
newaddlist += addlist[currentposition:] | ||||
deltatext = "".join(struct.pack(">lll", start, end, len(content)) | ||||
+ content for start, end, content in x) | ||||
return deltatext, newaddlist | ||||
Martin von Zweigbergk
|
r24401 | def _splittopdir(f): | ||
if '/' in f: | ||||
dir, subpath = f.split('/', 1) | ||||
return dir + '/', subpath | ||||
else: | ||||
return '', f | ||||
Augie Fackler
|
r26402 | _noop = lambda s: None | ||
Martin von Zweigbergk
|
r25222 | |||
Martin von Zweigbergk
|
r24401 | class treemanifest(object): | ||
Martin von Zweigbergk
|
r24403 | def __init__(self, dir='', text=''): | ||
self._dir = dir | ||||
Martin von Zweigbergk
|
r25091 | self._node = revlog.nullid | ||
Augie Fackler
|
r26402 | self._loadfunc = _noop | ||
self._copyfunc = _noop | ||||
Martin von Zweigbergk
|
r25220 | self._dirty = False | ||
Martin von Zweigbergk
|
r24401 | self._dirs = {} | ||
# Using _lazymanifest here is a little slower than plain old dicts | ||||
self._files = {} | ||||
self._flags = {} | ||||
Martin von Zweigbergk
|
r25220 | if text: | ||
def readsubtree(subdir, subm): | ||||
raise AssertionError('treemanifest constructor only accepts ' | ||||
'flat manifests') | ||||
self.parse(text, readsubtree) | ||||
self._dirty = True # Mark flat manifest dirty after parsing | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24403 | def _subpath(self, path): | ||
return self._dir + path | ||||
Martin von Zweigbergk
|
r24401 | def __len__(self): | ||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | size = len(self._files) | ||
for m in self._dirs.values(): | ||||
size += m.__len__() | ||||
return size | ||||
Drew Gottlieb
|
r24551 | def _isempty(self): | ||
Martin von Zweigbergk
|
r25222 | self._load() # for consistency; already loaded by all callers | ||
Drew Gottlieb
|
r24551 | return (not self._files and (not self._dirs or | ||
Augie Fackler
|
r25151 | all(m._isempty() for m in self._dirs.values()))) | ||
Drew Gottlieb
|
r24551 | |||
Augie Fackler
|
r26400 | def __repr__(self): | ||
return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' % | ||||
Martin von Zweigbergk
|
r25222 | (self._dir, revlog.hex(self._node), | ||
Augie Fackler
|
r26402 | bool(self._loadfunc is _noop), | ||
Augie Fackler
|
r26400 | self._dirty, id(self))) | ||
Martin von Zweigbergk
|
r25091 | |||
def dir(self): | ||||
'''The directory that this tree manifest represents, including a | ||||
trailing '/'. Empty string for the repo root directory.''' | ||||
return self._dir | ||||
def node(self): | ||||
'''This node of this instance. nullid for unsaved instances. Should | ||||
be updated when the instance is read or written from a revlog. | ||||
''' | ||||
Martin von Zweigbergk
|
r25220 | assert not self._dirty | ||
Martin von Zweigbergk
|
r25091 | return self._node | ||
def setnode(self, node): | ||||
self._node = node | ||||
Martin von Zweigbergk
|
r25220 | self._dirty = False | ||
Martin von Zweigbergk
|
r24403 | |||
Martin von Zweigbergk
|
r28206 | def iterentries(self): | ||
self._load() | ||||
for p, n in sorted(self._dirs.items() + self._files.items()): | ||||
if p in self._files: | ||||
yield self._subpath(p), n, self._flags.get(p, '') | ||||
else: | ||||
for x in n.iterentries(): | ||||
yield x | ||||
Martin von Zweigbergk
|
r24401 | def iteritems(self): | ||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | for p, n in sorted(self._dirs.items() + self._files.items()): | ||
if p in self._files: | ||||
Martin von Zweigbergk
|
r24403 | yield self._subpath(p), n | ||
Martin von Zweigbergk
|
r24401 | else: | ||
Martin von Zweigbergk
|
r24403 | for f, sn in n.iteritems(): | ||
yield f, sn | ||||
Martin von Zweigbergk
|
r24401 | |||
def iterkeys(self): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | for p in sorted(self._dirs.keys() + self._files.keys()): | ||
if p in self._files: | ||||
Martin von Zweigbergk
|
r24403 | yield self._subpath(p) | ||
Martin von Zweigbergk
|
r24401 | else: | ||
for f in self._dirs[p].iterkeys(): | ||||
Martin von Zweigbergk
|
r24403 | yield f | ||
Martin von Zweigbergk
|
r24401 | |||
def keys(self): | ||||
return list(self.iterkeys()) | ||||
def __iter__(self): | ||||
return self.iterkeys() | ||||
def __contains__(self, f): | ||||
if f is None: | ||||
return False | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
return False | ||||
return self._dirs[dir].__contains__(subpath) | ||||
else: | ||||
return f in self._files | ||||
def get(self, f, default=None): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
return default | ||||
return self._dirs[dir].get(subpath, default) | ||||
else: | ||||
return self._files.get(f, default) | ||||
def __getitem__(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
return self._dirs[dir].__getitem__(subpath) | ||||
else: | ||||
return self._files[f] | ||||
def flags(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
return '' | ||||
return self._dirs[dir].flags(subpath) | ||||
else: | ||||
if f in self._dirs: | ||||
return '' | ||||
return self._flags.get(f, '') | ||||
def find(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
return self._dirs[dir].find(subpath) | ||||
else: | ||||
return self._files[f], self._flags.get(f, '') | ||||
def __delitem__(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
self._dirs[dir].__delitem__(subpath) | ||||
# If the directory is now empty, remove it | ||||
Drew Gottlieb
|
r24551 | if self._dirs[dir]._isempty(): | ||
Martin von Zweigbergk
|
r24401 | del self._dirs[dir] | ||
else: | ||||
del self._files[f] | ||||
if f in self._flags: | ||||
del self._flags[f] | ||||
Martin von Zweigbergk
|
r25220 | self._dirty = True | ||
Martin von Zweigbergk
|
r24401 | |||
def __setitem__(self, f, n): | ||||
assert n is not None | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
Martin von Zweigbergk
|
r24403 | self._dirs[dir] = treemanifest(self._subpath(dir)) | ||
Martin von Zweigbergk
|
r24401 | self._dirs[dir].__setitem__(subpath, n) | ||
else: | ||||
Martin von Zweigbergk
|
r24467 | self._files[f] = n[:21] # to match manifestdict's behavior | ||
Martin von Zweigbergk
|
r25220 | self._dirty = True | ||
Martin von Zweigbergk
|
r24401 | |||
Augie Fackler
|
r26402 | def _load(self): | ||
if self._loadfunc is not _noop: | ||||
lf, self._loadfunc = self._loadfunc, _noop | ||||
lf(self) | ||||
elif self._copyfunc is not _noop: | ||||
cf, self._copyfunc = self._copyfunc, _noop | ||||
cf(self) | ||||
Martin von Zweigbergk
|
r24401 | def setflag(self, f, flags): | ||
"""Set the flags (symlink, executable) for path f.""" | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
Martin von Zweigbergk
|
r24403 | self._dirs[dir] = treemanifest(self._subpath(dir)) | ||
Martin von Zweigbergk
|
r24401 | self._dirs[dir].setflag(subpath, flags) | ||
else: | ||||
self._flags[f] = flags | ||||
Martin von Zweigbergk
|
r25220 | self._dirty = True | ||
Martin von Zweigbergk
|
r24401 | |||
def copy(self): | ||||
Martin von Zweigbergk
|
r24403 | copy = treemanifest(self._dir) | ||
Martin von Zweigbergk
|
r25091 | copy._node = self._node | ||
Martin von Zweigbergk
|
r25220 | copy._dirty = self._dirty | ||
Augie Fackler
|
r26402 | if self._copyfunc is _noop: | ||
def _copyfunc(s): | ||||
self._load() | ||||
for d in self._dirs: | ||||
s._dirs[d] = self._dirs[d].copy() | ||||
s._files = dict.copy(self._files) | ||||
s._flags = dict.copy(self._flags) | ||||
if self._loadfunc is _noop: | ||||
_copyfunc(copy) | ||||
else: | ||||
copy._copyfunc = _copyfunc | ||||
else: | ||||
copy._copyfunc = self._copyfunc | ||||
Martin von Zweigbergk
|
r24401 | return copy | ||
def filesnotin(self, m2): | ||||
'''Set of files in this manifest that are not in the other''' | ||||
Martin von Zweigbergk
|
r24405 | files = set() | ||
def _filesnotin(t1, t2): | ||||
Martin von Zweigbergk
|
r25220 | if t1._node == t2._node and not t1._dirty and not t2._dirty: | ||
return | ||||
Martin von Zweigbergk
|
r25222 | t1._load() | ||
t2._load() | ||||
Martin von Zweigbergk
|
r24405 | for d, m1 in t1._dirs.iteritems(): | ||
if d in t2._dirs: | ||||
m2 = t2._dirs[d] | ||||
_filesnotin(m1, m2) | ||||
else: | ||||
files.update(m1.iterkeys()) | ||||
for fn in t1._files.iterkeys(): | ||||
if fn not in t2._files: | ||||
files.add(t1._subpath(fn)) | ||||
_filesnotin(self, m2) | ||||
Martin von Zweigbergk
|
r24401 | return files | ||
@propertycache | ||||
def _alldirs(self): | ||||
Drew Gottlieb
|
r24635 | return util.dirs(self) | ||
Martin von Zweigbergk
|
r24401 | |||
def dirs(self): | ||||
return self._alldirs | ||||
def hasdir(self, dir): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24406 | topdir, subdir = _splittopdir(dir) | ||
if topdir: | ||||
if topdir in self._dirs: | ||||
return self._dirs[topdir].hasdir(subdir) | ||||
return False | ||||
return (dir + '/') in self._dirs | ||||
Martin von Zweigbergk
|
r24401 | |||
Drew Gottlieb
|
r24646 | def walk(self, match): | ||
'''Generates matching file names. | ||||
Equivalent to manifest.matches(match).iterkeys(), but without creating | ||||
an entirely new manifest. | ||||
It also reports nonexistent files by marking them bad with match.bad(). | ||||
''' | ||||
Martin von Zweigbergk
|
r24683 | if match.always(): | ||
for f in iter(self): | ||||
yield f | ||||
return | ||||
Drew Gottlieb
|
r24646 | fset = set(match.files()) | ||
Drew Gottlieb
|
r24647 | for fn in self._walk(match): | ||
Drew Gottlieb
|
r24646 | if fn in fset: | ||
# specified pattern is the exact name | ||||
fset.remove(fn) | ||||
Drew Gottlieb
|
r24647 | yield fn | ||
Drew Gottlieb
|
r24646 | |||
# for dirstate.walk, files=['.'] means "walk the whole tree". | ||||
# follow that here, too | ||||
fset.discard('.') | ||||
for fn in sorted(fset): | ||||
if not self.hasdir(fn): | ||||
match.bad(fn, None) | ||||
Drew Gottlieb
|
r25188 | def _walk(self, match): | ||
'''Recursively generates matching file names for walk().''' | ||||
if not match.visitdir(self._dir[:-1] or '.'): | ||||
return | ||||
Drew Gottlieb
|
r24647 | |||
# yield this dir's files and walk its submanifests | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Drew Gottlieb
|
r24647 | for p in sorted(self._dirs.keys() + self._files.keys()): | ||
if p in self._files: | ||||
fullp = self._subpath(p) | ||||
if match(fullp): | ||||
yield fullp | ||||
else: | ||||
Drew Gottlieb
|
r25188 | for f in self._dirs[p]._walk(match): | ||
Drew Gottlieb
|
r24647 | yield f | ||
Martin von Zweigbergk
|
r24401 | def matches(self, match): | ||
'''generate a new manifest filtered by the match argument''' | ||||
if match.always(): | ||||
return self.copy() | ||||
Drew Gottlieb
|
r24552 | return self._matches(match) | ||
Drew Gottlieb
|
r25188 | def _matches(self, match): | ||
Drew Gottlieb
|
r24552 | '''recursively generate a new manifest filtered by the match argument. | ||
Drew Gottlieb
|
r25188 | ''' | ||
Martin von Zweigbergk
|
r27343 | |||
visit = match.visitdir(self._dir[:-1] or '.') | ||||
if visit == 'all': | ||||
return self.copy() | ||||
Drew Gottlieb
|
r24552 | ret = treemanifest(self._dir) | ||
Martin von Zweigbergk
|
r27343 | if not visit: | ||
Drew Gottlieb
|
r25188 | return ret | ||
Drew Gottlieb
|
r24552 | |||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Drew Gottlieb
|
r24552 | for fn in self._files: | ||
fullp = self._subpath(fn) | ||||
if not match(fullp): | ||||
continue | ||||
ret._files[fn] = self._files[fn] | ||||
if fn in self._flags: | ||||
ret._flags[fn] = self._flags[fn] | ||||
for dir, subm in self._dirs.iteritems(): | ||||
Drew Gottlieb
|
r25188 | m = subm._matches(match) | ||
Drew Gottlieb
|
r24552 | if not m._isempty(): | ||
ret._dirs[dir] = m | ||||
Martin von Zweigbergk
|
r25220 | if not ret._isempty(): | ||
ret._dirty = True | ||||
Drew Gottlieb
|
r24552 | return ret | ||
Martin von Zweigbergk
|
r24401 | |||
def diff(self, m2, clean=False): | ||||
'''Finds changes between the current manifest and m2. | ||||
Args: | ||||
m2: the manifest to which this manifest should be compared. | ||||
clean: if true, include files unchanged between these manifests | ||||
with a None value in the returned dictionary. | ||||
The result is returned as a dict with filename as key and | ||||
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the | ||||
nodeid in the current/other manifest and fl1/fl2 is the flag | ||||
in the current/other manifest. Where the file does not exist, | ||||
the nodeid will be None and the flags will be the empty | ||||
string. | ||||
''' | ||||
Martin von Zweigbergk
|
r24404 | result = {} | ||
emptytree = treemanifest() | ||||
def _diff(t1, t2): | ||||
Martin von Zweigbergk
|
r25220 | if t1._node == t2._node and not t1._dirty and not t2._dirty: | ||
return | ||||
Martin von Zweigbergk
|
r25222 | t1._load() | ||
t2._load() | ||||
Martin von Zweigbergk
|
r24404 | for d, m1 in t1._dirs.iteritems(): | ||
m2 = t2._dirs.get(d, emptytree) | ||||
_diff(m1, m2) | ||||
for d, m2 in t2._dirs.iteritems(): | ||||
if d not in t1._dirs: | ||||
_diff(emptytree, m2) | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24404 | for fn, n1 in t1._files.iteritems(): | ||
fl1 = t1._flags.get(fn, '') | ||||
n2 = t2._files.get(fn, None) | ||||
fl2 = t2._flags.get(fn, '') | ||||
if n1 != n2 or fl1 != fl2: | ||||
result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2)) | ||||
elif clean: | ||||
result[t1._subpath(fn)] = None | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24404 | for fn, n2 in t2._files.iteritems(): | ||
if fn not in t1._files: | ||||
fl2 = t2._flags.get(fn, '') | ||||
result[t2._subpath(fn)] = ((None, ''), (n2, fl2)) | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24404 | _diff(self, m2) | ||
return result | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r25221 | def unmodifiedsince(self, m2): | ||
return not self._dirty and not m2._dirty and self._node == m2._node | ||||
Martin von Zweigbergk
|
r25091 | def parse(self, text, readsubtree): | ||
Martin von Zweigbergk
|
r24781 | for f, n, fl in _parse(text): | ||
Martin von Zweigbergk
|
r27271 | if fl == 't': | ||
Martin von Zweigbergk
|
r25091 | f = f + '/' | ||
self._dirs[f] = readsubtree(self._subpath(f), n) | ||||
Martin von Zweigbergk
|
r25220 | elif '/' in f: | ||
# This is a flat manifest, so use __setitem__ and setflag rather | ||||
# than assigning directly to _files and _flags, so we can | ||||
# assign a path in a subdirectory, and to mark dirty (compared | ||||
# to nullid). | ||||
Martin von Zweigbergk
|
r25091 | self[f] = n | ||
if fl: | ||||
self.setflag(f, fl) | ||||
Martin von Zweigbergk
|
r25220 | else: | ||
# Assigning to _files and _flags avoids marking as dirty, | ||||
# and should be a little faster. | ||||
self._files[f] = n | ||||
if fl: | ||||
self._flags[f] = fl | ||||
Martin von Zweigbergk
|
r24781 | |||
Martin von Zweigbergk
|
r24573 | def text(self, usemanifestv2=False): | ||
Martin von Zweigbergk
|
r24401 | """Get the full data of this manifest as a bytestring.""" | ||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r28207 | return _text(self.iterentries(), usemanifestv2) | ||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r25091 | def dirtext(self, usemanifestv2=False): | ||
"""Get the full data of this directory as a bytestring. Make sure that | ||||
any submanifests have been written first, so their nodeids are correct. | ||||
""" | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r25091 | flags = self.flags | ||
Martin von Zweigbergk
|
r27271 | dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs] | ||
Martin von Zweigbergk
|
r25091 | files = [(f, self._files[f], flags(f)) for f in self._files] | ||
return _text(sorted(dirs + files), usemanifestv2) | ||||
Martin von Zweigbergk
|
r25222 | def read(self, gettext, readsubtree): | ||
Augie Fackler
|
r26402 | def _load_for_read(s): | ||
s.parse(gettext(), readsubtree) | ||||
s._dirty = False | ||||
self._loadfunc = _load_for_read | ||||
Martin von Zweigbergk
|
r25222 | |||
Martin von Zweigbergk
|
r25091 | def writesubtrees(self, m1, m2, writesubtree): | ||
Martin von Zweigbergk
|
r25222 | self._load() # for consistency; should never have any effect here | ||
Durham Goode
|
r29888 | m1._load() | ||
m2._load() | ||||
Martin von Zweigbergk
|
r25091 | emptytree = treemanifest() | ||
for d, subm in self._dirs.iteritems(): | ||||
subp1 = m1._dirs.get(d, emptytree)._node | ||||
subp2 = m2._dirs.get(d, emptytree)._node | ||||
if subp1 == revlog.nullid: | ||||
subp1, subp2 = subp2, subp1 | ||||
writesubtree(subm, subp1, subp2) | ||||
Durham Goode
|
r29824 | class manifestrevlog(revlog.revlog): | ||
'''A revlog that stores manifest texts. This is responsible for caching the | ||||
full-text manifest contents. | ||||
''' | ||||
Durham Goode
|
r29941 | def __init__(self, opener, dir='', dirlogcache=None): | ||
Durham Goode
|
r29824 | # During normal operations, we expect to deal with not more than four | ||
# revs at a time (such as during commit --amend). When rebasing large | ||||
# stacks of commits, the number can go up, hence the config knob below. | ||||
cachesize = 4 | ||||
Durham Goode
|
r29940 | usetreemanifest = False | ||
usemanifestv2 = False | ||||
Durham Goode
|
r29824 | opts = getattr(opener, 'options', None) | ||
if opts is not None: | ||||
cachesize = opts.get('manifestcachesize', cachesize) | ||||
Durham Goode
|
r29940 | usetreemanifest = opts.get('treemanifest', usetreemanifest) | ||
usemanifestv2 = opts.get('manifestv2', usemanifestv2) | ||||
self._treeondisk = usetreemanifest | ||||
self._usemanifestv2 = usemanifestv2 | ||||
Durham Goode
|
r29824 | self._fulltextcache = util.lrucachedict(cachesize) | ||
Durham Goode
|
r29940 | indexfile = "00manifest.i" | ||
if dir: | ||||
assert self._treeondisk, 'opts is %r' % opts | ||||
if not dir.endswith('/'): | ||||
dir = dir + '/' | ||||
indexfile = "meta/" + dir + "00manifest.i" | ||||
self._dir = dir | ||||
Durham Goode
|
r29941 | # The dirlogcache is kept on the root manifest log | ||
if dir: | ||||
self._dirlogcache = dirlogcache | ||||
else: | ||||
self._dirlogcache = {'': self} | ||||
Durham Goode
|
r29940 | |||
FUJIWARA Katsunori
|
r29998 | super(manifestrevlog, self).__init__(opener, indexfile, | ||
checkambig=bool(dir)) | ||||
Durham Goode
|
r29940 | |||
Durham Goode
|
r29824 | @property | ||
def fulltextcache(self): | ||||
return self._fulltextcache | ||||
def clearcaches(self): | ||||
super(manifestrevlog, self).clearcaches() | ||||
self._fulltextcache.clear() | ||||
Durham Goode
|
r29941 | self._dirlogcache = {'': self} | ||
def dirlog(self, dir): | ||||
if dir: | ||||
assert self._treeondisk | ||||
if dir not in self._dirlogcache: | ||||
self._dirlogcache[dir] = manifestrevlog(self.opener, dir, | ||||
self._dirlogcache) | ||||
return self._dirlogcache[dir] | ||||
Durham Goode
|
r29824 | |||
Durham Goode
|
r29961 | def add(self, m, transaction, link, p1, p2, added, removed): | ||
if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta') | ||||
and not self._usemanifestv2): | ||||
# If our first parent is in the manifest cache, we can | ||||
# compute a delta here using properties we know about the | ||||
# manifest up-front, which may save time later for the | ||||
# revlog layer. | ||||
_checkforbidden(added) | ||||
# combine the changed lists into one sorted iterator | ||||
work = heapq.merge([(x, False) for x in added], | ||||
[(x, True) for x in removed]) | ||||
arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work) | ||||
cachedelta = self.rev(p1), deltatext | ||||
text = util.buffer(arraytext) | ||||
n = self.addrevision(text, transaction, link, p1, p2, cachedelta) | ||||
else: | ||||
# The first parent manifest isn't already loaded, so we'll | ||||
# just encode a fulltext of the manifest and pass that | ||||
# through to the revlog layer, and let it handle the delta | ||||
# process. | ||||
if self._treeondisk: | ||||
m1 = self.read(p1) | ||||
m2 = self.read(p2) | ||||
n = self._addtree(m, transaction, link, m1, m2) | ||||
arraytext = None | ||||
else: | ||||
text = m.text(self._usemanifestv2) | ||||
n = self.addrevision(text, transaction, link, p1, p2) | ||||
arraytext = array.array('c', text) | ||||
Martin von Zweigbergk
|
r30209 | if arraytext is not None: | ||
self.fulltextcache[n] = arraytext | ||||
Durham Goode
|
r29961 | |||
return n | ||||
def _addtree(self, m, transaction, link, m1, m2): | ||||
# If the manifest is unchanged compared to one parent, | ||||
# don't write a new revision | ||||
if m.unmodifiedsince(m1) or m.unmodifiedsince(m2): | ||||
return m.node() | ||||
def writesubtree(subm, subp1, subp2): | ||||
sublog = self.dirlog(subm.dir()) | ||||
sublog.add(subm, transaction, link, subp1, subp2, None, None) | ||||
m.writesubtrees(m1, m2, writesubtree) | ||||
text = m.dirtext(self._usemanifestv2) | ||||
# Double-check whether contents are unchanged to one parent | ||||
if text == m1.dirtext(self._usemanifestv2): | ||||
n = m1.node() | ||||
elif text == m2.dirtext(self._usemanifestv2): | ||||
n = m2.node() | ||||
else: | ||||
n = self.addrevision(text, transaction, link, m1.node(), m2.node()) | ||||
# Save nodeid so parent manifest can calculate its nodeid | ||||
m.setnode(n) | ||||
return n | ||||
Durham Goode
|
r29825 | class manifestlog(object): | ||
"""A collection class representing the collection of manifest snapshots | ||||
referenced by commits in the repository. | ||||
In this situation, 'manifest' refers to the abstract concept of a snapshot | ||||
of the list of files in the given commit. Consumers of the output of this | ||||
class do not care about the implementation details of the actual manifests | ||||
they receive (i.e. tree or flat or lazily loaded, etc).""" | ||||
Durham Goode
|
r29826 | def __init__(self, opener, repo): | ||
self._repo = repo | ||||
Durham Goode
|
r29825 | |||
Durham Goode
|
r29959 | usetreemanifest = False | ||
opts = getattr(opener, 'options', None) | ||||
if opts is not None: | ||||
usetreemanifest = opts.get('treemanifest', usetreemanifest) | ||||
self._treeinmem = usetreemanifest | ||||
Durham Goode
|
r30219 | self._oldmanifest = repo._constructmanifest() | ||
self._revlog = self._oldmanifest | ||||
Durham Goode
|
r30292 | # A cache of the manifestctx or treemanifestctx for each directory | ||
self._dirmancache = {} | ||||
Durham Goode
|
r29825 | # We'll separate this into it's own cache once oldmanifest is no longer | ||
# used | ||||
Durham Goode
|
r30219 | self._mancache = self._oldmanifest._mancache | ||
Durham Goode
|
r30292 | self._dirmancache[''] = self._mancache | ||
# A future patch makes this use the same config value as the existing | ||||
# mancache | ||||
self.cachesize = 4 | ||||
Durham Goode
|
r29826 | |||
Durham Goode
|
r29825 | def __getitem__(self, node): | ||
Durham Goode
|
r30290 | """Retrieves the manifest instance for the given node. Throws a | ||
LookupError if not found. | ||||
Durham Goode
|
r29825 | """ | ||
Durham Goode
|
r30291 | return self.get('', node) | ||
Durham Goode
|
r29825 | |||
Durham Goode
|
r30291 | def get(self, dir, node): | ||
"""Retrieves the manifest instance for the given node. Throws a | ||||
LookupError if not found. | ||||
""" | ||||
Durham Goode
|
r30292 | if node in self._dirmancache.get(dir, ()): | ||
cachemf = self._dirmancache[dir][node] | ||||
# The old manifest may put non-ctx manifests in the cache, so | ||||
# skip those since they don't implement the full api. | ||||
if (isinstance(cachemf, manifestctx) or | ||||
isinstance(cachemf, treemanifestctx)): | ||||
return cachemf | ||||
Durham Goode
|
r30291 | if dir: | ||
if self._revlog._treeondisk: | ||||
dirlog = self._revlog.dirlog(dir) | ||||
if node not in dirlog.nodemap: | ||||
raise LookupError(node, dirlog.indexfile, | ||||
_('no node')) | ||||
m = treemanifestctx(self._repo, dir, node) | ||||
else: | ||||
raise error.Abort( | ||||
_("cannot ask for manifest directory '%s' in a flat " | ||||
"manifest") % dir) | ||||
Durham Goode
|
r29907 | else: | ||
Durham Goode
|
r30291 | if node not in self._revlog.nodemap: | ||
raise LookupError(node, self._revlog.indexfile, | ||||
_('no node')) | ||||
if self._treeinmem: | ||||
m = treemanifestctx(self._repo, '', node) | ||||
else: | ||||
m = manifestctx(self._repo, node) | ||||
Durham Goode
|
r30292 | |||
if node != revlog.nullid: | ||||
mancache = self._dirmancache.get(dir) | ||||
if not mancache: | ||||
mancache = util.lrucachedict(self.cachesize) | ||||
self._dirmancache[dir] = mancache | ||||
mancache[node] = m | ||||
Durham Goode
|
r29825 | return m | ||
Durham Goode
|
r29962 | def add(self, m, transaction, link, p1, p2, added, removed): | ||
return self._revlog.add(m, transaction, link, p1, p2, added, removed) | ||||
Durham Goode
|
r29926 | class manifestctx(object): | ||
Durham Goode
|
r29825 | """A class representing a single revision of a manifest, including its | ||
contents, its parent revs, and its linkrev. | ||||
""" | ||||
Durham Goode
|
r30220 | def __init__(self, repo, node): | ||
self._repo = repo | ||||
Durham Goode
|
r29926 | self._data = None | ||
Durham Goode
|
r29825 | |||
self._node = node | ||||
Durham Goode
|
r29907 | |||
# TODO: We eventually want p1, p2, and linkrev exposed on this class, | ||||
# but let's add it later when something needs it and we can load it | ||||
# lazily. | ||||
#self.p1, self.p2 = revlog.parents(node) | ||||
#rev = revlog.rev(node) | ||||
#self.linkrev = revlog.linkrev(rev) | ||||
Durham Goode
|
r29825 | |||
def node(self): | ||||
return self._node | ||||
Durham Goode
|
r29926 | def read(self): | ||
if not self._data: | ||||
if self._node == revlog.nullid: | ||||
self._data = manifestdict() | ||||
else: | ||||
Durham Goode
|
r30220 | rl = self._repo.manifestlog._revlog | ||
text = rl.revision(self._node) | ||||
Durham Goode
|
r29926 | arraytext = array.array('c', text) | ||
Durham Goode
|
r30220 | rl._fulltextcache[self._node] = arraytext | ||
Durham Goode
|
r29926 | self._data = manifestdict(text) | ||
return self._data | ||||
Durham Goode
|
r30293 | def readfast(self, shallow=False): | ||
Durham Goode
|
r30294 | '''Calls either readdelta or read, based on which would be less work. | ||
readdelta is called if the delta is against the p1, and therefore can be | ||||
read quickly. | ||||
If `shallow` is True, nothing changes since this is a flat manifest. | ||||
''' | ||||
Durham Goode
|
r30220 | rl = self._repo.manifestlog._revlog | ||
Durham Goode
|
r29939 | r = rl.rev(self._node) | ||
deltaparent = rl.deltaparent(r) | ||||
if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r): | ||||
return self.readdelta() | ||||
return self.read() | ||||
Durham Goode
|
r30293 | def readdelta(self, shallow=False): | ||
Durham Goode
|
r30295 | '''Returns a manifest containing just the entries that are present | ||
in this manifest, but not in its p1 manifest. This is efficient to read | ||||
if the revlog delta is already p1. | ||||
Changing the value of `shallow` has no effect on flat manifests. | ||||
''' | ||||
Durham Goode
|
r30220 | revlog = self._repo.manifestlog._revlog | ||
Durham Goode
|
r29938 | if revlog._usemanifestv2: | ||
# Need to perform a slow delta | ||||
r0 = revlog.deltaparent(revlog.rev(self._node)) | ||||
Durham Goode
|
r30220 | m0 = manifestctx(self._repo, revlog.node(r0)).read() | ||
Durham Goode
|
r29938 | m1 = self.read() | ||
md = manifestdict() | ||||
for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems(): | ||||
if n1: | ||||
md[f] = n1 | ||||
if fl1: | ||||
md.setflag(f, fl1) | ||||
return md | ||||
r = revlog.rev(self._node) | ||||
d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) | ||||
return manifestdict(d) | ||||
Durham Goode
|
r29926 | class treemanifestctx(object): | ||
Durham Goode
|
r30221 | def __init__(self, repo, dir, node): | ||
self._repo = repo | ||||
Durham Goode
|
r29907 | self._dir = dir | ||
Durham Goode
|
r29926 | self._data = None | ||
Durham Goode
|
r29907 | |||
self._node = node | ||||
# TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that | ||||
# we can instantiate treemanifestctx objects for directories we don't | ||||
# have on disk. | ||||
#self.p1, self.p2 = revlog.parents(node) | ||||
#rev = revlog.rev(node) | ||||
#self.linkrev = revlog.linkrev(rev) | ||||
Durham Goode
|
r30221 | def _revlog(self): | ||
return self._repo.manifestlog._revlog.dirlog(self._dir) | ||||
Durham Goode
|
r29926 | def read(self): | ||
if not self._data: | ||||
Durham Goode
|
r30221 | rl = self._revlog() | ||
Durham Goode
|
r29926 | if self._node == revlog.nullid: | ||
self._data = treemanifest() | ||||
Durham Goode
|
r30221 | elif rl._treeondisk: | ||
Durham Goode
|
r29926 | m = treemanifest(dir=self._dir) | ||
def gettext(): | ||||
Durham Goode
|
r30221 | return rl.revision(self._node) | ||
Durham Goode
|
r29926 | def readsubtree(dir, subm): | ||
Durham Goode
|
r30221 | return treemanifestctx(self._repo, dir, subm).read() | ||
Durham Goode
|
r29926 | m.read(gettext, readsubtree) | ||
m.setnode(self._node) | ||||
self._data = m | ||||
else: | ||||
Durham Goode
|
r30221 | text = revlog.revision(self._node) | ||
Durham Goode
|
r29926 | arraytext = array.array('c', text) | ||
Durham Goode
|
r30221 | rl.fulltextcache[self._node] = arraytext | ||
Durham Goode
|
r29926 | self._data = treemanifest(dir=self._dir, text=text) | ||
return self._data | ||||
Durham Goode
|
r29907 | |||
def node(self): | ||||
return self._node | ||||
Durham Goode
|
r30293 | def readdelta(self, shallow=False): | ||
Durham Goode
|
r30295 | '''Returns a manifest containing just the entries that are present | ||
in this manifest, but not in its p1 manifest. This is efficient to read | ||||
if the revlog delta is already p1. | ||||
If `shallow` is True, this will read the delta for this directory, | ||||
without recursively reading subdirectory manifests. Instead, any | ||||
subdirectory entry will be reported as it appears in the manifest, i.e. | ||||
the subdirectory will be reported among files and distinguished only by | ||||
its 't' flag. | ||||
''' | ||||
Durham Goode
|
r30221 | revlog = self._revlog() | ||
Durham Goode
|
r30293 | if shallow and not revlog._usemanifestv2: | ||
r = revlog.rev(self._node) | ||||
d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) | ||||
return manifestdict(d) | ||||
else: | ||||
# Need to perform a slow delta | ||||
r0 = revlog.deltaparent(revlog.rev(self._node)) | ||||
m0 = treemanifestctx(self._repo, self._dir, revlog.node(r0)).read() | ||||
m1 = self.read() | ||||
md = treemanifest(dir=self._dir) | ||||
for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems(): | ||||
if n1: | ||||
md[f] = n1 | ||||
if fl1: | ||||
md.setflag(f, fl1) | ||||
return md | ||||
Durham Goode
|
r29938 | |||
Durham Goode
|
r30293 | def readfast(self, shallow=False): | ||
Durham Goode
|
r30294 | '''Calls either readdelta or read, based on which would be less work. | ||
readdelta is called if the delta is against the p1, and therefore can be | ||||
read quickly. | ||||
If `shallow` is True, it only returns the entries from this manifest, | ||||
and not any submanifests. | ||||
''' | ||||
Durham Goode
|
r30221 | rl = self._revlog() | ||
Durham Goode
|
r29939 | r = rl.rev(self._node) | ||
deltaparent = rl.deltaparent(r) | ||||
Durham Goode
|
r30293 | if (deltaparent != revlog.nullrev and | ||
deltaparent in rl.parentrevs(r)): | ||||
return self.readdelta(shallow=shallow) | ||||
if shallow: | ||||
return manifestdict(rl.revision(self._node)) | ||||
else: | ||||
return self.read() | ||||
Durham Goode
|
r29939 | |||
Durham Goode
|
r29824 | class manifest(manifestrevlog): | ||
Martin von Zweigbergk
|
r25185 | def __init__(self, opener, dir='', dirlogcache=None): | ||
'''The 'dir' and 'dirlogcache' arguments are for internal use by | ||||
manifest.manifest only. External users should create a root manifest | ||||
log with manifest.manifest(opener) and call dirlog() on it. | ||||
''' | ||||
Durham Goode
|
r24033 | # During normal operations, we expect to deal with not more than four | ||
# revs at a time (such as during commit --amend). When rebasing large | ||||
# stacks of commits, the number can go up, hence the config knob below. | ||||
cachesize = 4 | ||||
Martin von Zweigbergk
|
r24402 | usetreemanifest = False | ||
Durham Goode
|
r24033 | opts = getattr(opener, 'options', None) | ||
if opts is not None: | ||||
cachesize = opts.get('manifestcachesize', cachesize) | ||||
Martin von Zweigbergk
|
r24956 | usetreemanifest = opts.get('treemanifest', usetreemanifest) | ||
Durham Goode
|
r24033 | self._mancache = util.lrucachedict(cachesize) | ||
Martin von Zweigbergk
|
r24701 | self._treeinmem = usetreemanifest | ||
Durham Goode
|
r29941 | super(manifest, self).__init__(opener, dir=dir, dirlogcache=dirlogcache) | ||
Martin von Zweigbergk
|
r24402 | |||
def _newmanifest(self, data=''): | ||||
Martin von Zweigbergk
|
r24701 | if self._treeinmem: | ||
Martin von Zweigbergk
|
r25091 | return treemanifest(self._dir, data) | ||
Martin von Zweigbergk
|
r24402 | return manifestdict(data) | ||
mpm@selenic.com
|
r1089 | |||
Martin von Zweigbergk
|
r25185 | def dirlog(self, dir): | ||
Durham Goode
|
r29941 | """This overrides the base revlog implementation to allow construction | ||
'manifest' types instead of manifestrevlog types. This is only needed | ||||
until we migrate off the 'manifest' type.""" | ||||
Martin von Zweigbergk
|
r28203 | if dir: | ||
assert self._treeondisk | ||||
Martin von Zweigbergk
|
r25185 | if dir not in self._dirlogcache: | ||
self._dirlogcache[dir] = manifest(self.opener, dir, | ||||
self._dirlogcache) | ||||
return self._dirlogcache[dir] | ||||
mpm@selenic.com
|
r1089 | def read(self, node): | ||
Matt Mackall
|
r7634 | if node == revlog.nullid: | ||
Martin von Zweigbergk
|
r24402 | return self._newmanifest() # don't upset local cache | ||
Siddharth Agarwal
|
r18604 | if node in self._mancache: | ||
Durham Goode
|
r29926 | cached = self._mancache[node] | ||
if (isinstance(cached, manifestctx) or | ||||
isinstance(cached, treemanifestctx)): | ||||
cached = cached.read() | ||||
return cached | ||||
Martin von Zweigbergk
|
r25091 | if self._treeondisk: | ||
Martin von Zweigbergk
|
r25222 | def gettext(): | ||
return self.revision(node) | ||||
Martin von Zweigbergk
|
r25091 | def readsubtree(dir, subm): | ||
Martin von Zweigbergk
|
r25185 | return self.dirlog(dir).read(subm) | ||
Martin von Zweigbergk
|
r25091 | m = self._newmanifest() | ||
Martin von Zweigbergk
|
r25222 | m.read(gettext, readsubtree) | ||
Martin von Zweigbergk
|
r25091 | m.setnode(node) | ||
arraytext = None | ||||
else: | ||||
Martin von Zweigbergk
|
r25222 | text = self.revision(node) | ||
Martin von Zweigbergk
|
r25091 | m = self._newmanifest(text) | ||
arraytext = array.array('c', text) | ||||
Durham Goode
|
r29823 | self._mancache[node] = m | ||
Martin von Zweigbergk
|
r30209 | if arraytext is not None: | ||
self.fulltextcache[node] = arraytext | ||||
Martin von Zweigbergk
|
r24147 | return m | ||
mpm@selenic.com
|
r1089 | |||
Vadim Gelfer
|
r2320 | def find(self, node, f): | ||
'''look up entry for a single file efficiently. | ||||
Alexis S. L. Carvalho
|
r4159 | return (node, flags) pair if found, (None, None) if not.''' | ||
Martin von Zweigbergk
|
r24292 | m = self.read(node) | ||
Augie Fackler
|
r24225 | try: | ||
Martin von Zweigbergk
|
r24292 | return m.find(f) | ||
Augie Fackler
|
r24225 | except KeyError: | ||
Vadim Gelfer
|
r2320 | return None, None | ||
Gregory Szorc
|
r27466 | def clearcaches(self): | ||
super(manifest, self).clearcaches() | ||||
self._mancache.clear() | ||||