manifest.py
1022 lines
| 32.8 KiB
| text/x-python
|
PythonLexer
/ mercurial / manifest.py
mpm@selenic.com
|
r1089 | # manifest.py - manifest revision class for mercurial | ||
# | ||||
Thomas Arendsen Hein
|
r4635 | # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> | ||
mpm@selenic.com
|
r1089 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
mpm@selenic.com
|
r1089 | |||
Matt Mackall
|
r3891 | from i18n import _ | ||
Drew Gottlieb
|
r24635 | import mdiff, parsers, error, revlog, util | ||
Simon Heimberg
|
r8312 | import array, struct | ||
Martin von Zweigbergk
|
r24573 | import os | ||
mpm@selenic.com
|
r1089 | |||
Drew Gottlieb
|
r24322 | propertycache = util.propertycache | ||
Augie Fackler
|
r24225 | |||
Martin von Zweigbergk
|
r24572 | def _parsev1(data): | ||
Martin von Zweigbergk
|
r24524 | # This method does a little bit of excessive-looking | ||
# precondition checking. This is so that the behavior of this | ||||
# class exactly matches its C counterpart to try and help | ||||
# prevent surprise breakage for anyone that develops against | ||||
# the pure version. | ||||
if data and data[-1] != '\n': | ||||
raise ValueError('Manifest did not end in a newline.') | ||||
prev = None | ||||
for l in data.splitlines(): | ||||
if prev is not None and prev > l: | ||||
raise ValueError('Manifest lines not in sorted order.') | ||||
prev = l | ||||
f, n = l.split('\0') | ||||
if len(n) > 40: | ||||
yield f, revlog.bin(n[:40]), n[40:] | ||||
else: | ||||
yield f, revlog.bin(n), '' | ||||
Martin von Zweigbergk
|
r24572 | def _parsev2(data): | ||
metadataend = data.find('\n') | ||||
# Just ignore metadata for now | ||||
pos = metadataend + 1 | ||||
prevf = '' | ||||
while pos < len(data): | ||||
end = data.find('\n', pos + 1) # +1 to skip stem length byte | ||||
if end == -1: | ||||
raise ValueError('Manifest ended with incomplete file entry.') | ||||
stemlen = ord(data[pos]) | ||||
items = data[pos + 1:end].split('\0') | ||||
f = prevf[:stemlen] + items[0] | ||||
if prevf > f: | ||||
raise ValueError('Manifest entries not in sorted order.') | ||||
fl = items[1] | ||||
# Just ignore metadata (items[2:] for now) | ||||
n = data[end + 1:end + 21] | ||||
yield f, n, fl | ||||
pos = end + 22 | ||||
prevf = f | ||||
def _parse(data): | ||||
"""Generates (path, node, flags) tuples from a manifest text""" | ||||
if data.startswith('\0'): | ||||
return iter(_parsev2(data)) | ||||
else: | ||||
return iter(_parsev1(data)) | ||||
Martin von Zweigbergk
|
r24573 | def _text(it, usemanifestv2): | ||
Martin von Zweigbergk
|
r24525 | """Given an iterator over (path, node, flags) tuples, returns a manifest | ||
text""" | ||||
Martin von Zweigbergk
|
r24573 | if usemanifestv2: | ||
return _textv2(it) | ||||
else: | ||||
return _textv1(it) | ||||
def _textv1(it): | ||||
Martin von Zweigbergk
|
r24525 | files = [] | ||
lines = [] | ||||
_hex = revlog.hex | ||||
for f, n, fl in it: | ||||
files.append(f) | ||||
# if this is changed to support newlines in filenames, | ||||
# be sure to check the templates/ dir again (especially *-raw.tmpl) | ||||
lines.append("%s\0%s%s\n" % (f, _hex(n), fl)) | ||||
_checkforbidden(files) | ||||
return ''.join(lines) | ||||
Martin von Zweigbergk
|
r24573 | def _textv2(it): | ||
files = [] | ||||
lines = ['\0\n'] | ||||
prevf = '' | ||||
for f, n, fl in it: | ||||
files.append(f) | ||||
stem = os.path.commonprefix([prevf, f]) | ||||
stemlen = min(len(stem), 255) | ||||
lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) | ||||
prevf = f | ||||
_checkforbidden(files) | ||||
return ''.join(lines) | ||||
Augie Fackler
|
r24225 | class _lazymanifest(dict): | ||
"""This is the pure implementation of lazymanifest. | ||||
It has not been optimized *at all* and is not lazy. | ||||
""" | ||||
Augie Fackler
|
r24223 | |||
Augie Fackler
|
r24225 | def __init__(self, data): | ||
dict.__init__(self) | ||||
Martin von Zweigbergk
|
r24524 | for f, n, fl in _parse(data): | ||
self[f] = n, fl | ||||
Augie Fackler
|
r24224 | |||
Augie Fackler
|
r23594 | def __setitem__(self, k, v): | ||
Augie Fackler
|
r24225 | node, flag = v | ||
assert node is not None | ||||
if len(node) > 21: | ||||
node = node[:21] # match c implementation behavior | ||||
dict.__setitem__(self, k, (node, flag)) | ||||
def __iter__(self): | ||||
Martin von Zweigbergk
|
r24298 | return iter(sorted(dict.keys(self))) | ||
Augie Fackler
|
r24225 | |||
Martin von Zweigbergk
|
r24297 | def iterkeys(self): | ||
return iter(sorted(dict.keys(self))) | ||||
Martin von Zweigbergk
|
r24298 | def iterentries(self): | ||
return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) | ||||
Matt Mackall
|
r2831 | def copy(self): | ||
Augie Fackler
|
r24225 | c = _lazymanifest('') | ||
c.update(self) | ||||
return c | ||||
def diff(self, m2, clean=False): | ||||
'''Finds changes between the current manifest and m2.''' | ||||
diff = {} | ||||
for fn, e1 in self.iteritems(): | ||||
if fn not in m2: | ||||
diff[fn] = e1, (None, '') | ||||
else: | ||||
e2 = m2[fn] | ||||
if e1 != e2: | ||||
diff[fn] = e1, e2 | ||||
elif clean: | ||||
diff[fn] = None | ||||
for fn, e2 in m2.iteritems(): | ||||
if fn not in self: | ||||
diff[fn] = (None, ''), e2 | ||||
return diff | ||||
def filtercopy(self, filterfn): | ||||
c = _lazymanifest('') | ||||
Martin von Zweigbergk
|
r24298 | for f, n, fl in self.iterentries(): | ||
Augie Fackler
|
r24225 | if filterfn(f): | ||
c[f] = n, fl | ||||
return c | ||||
def text(self): | ||||
"""Get the full data of this manifest as a bytestring.""" | ||||
Martin von Zweigbergk
|
r24573 | return _textv1(self.iterentries()) | ||
Augie Fackler
|
r24225 | |||
Augie Fackler
|
r24226 | try: | ||
_lazymanifest = parsers.lazymanifest | ||||
except AttributeError: | ||||
pass | ||||
Augie Fackler
|
r24225 | class manifestdict(object): | ||
def __init__(self, data=''): | ||||
Martin von Zweigbergk
|
r24572 | if data.startswith('\0'): | ||
#_lazymanifest can not parse v2 | ||||
self._lm = _lazymanifest('') | ||||
for f, n, fl in _parsev2(data): | ||||
self._lm[f] = n, fl | ||||
else: | ||||
self._lm = _lazymanifest(data) | ||||
Augie Fackler
|
r24225 | |||
def __getitem__(self, key): | ||||
return self._lm[key][0] | ||||
Martin von Zweigbergk
|
r24277 | def find(self, key): | ||
return self._lm[key] | ||||
Augie Fackler
|
r24225 | def __len__(self): | ||
return len(self._lm) | ||||
def __setitem__(self, key, node): | ||||
self._lm[key] = node, self.flags(key, '') | ||||
def __contains__(self, key): | ||||
return key in self._lm | ||||
def __delitem__(self, key): | ||||
del self._lm[key] | ||||
Martin von Zweigbergk
|
r24295 | def __iter__(self): | ||
Martin von Zweigbergk
|
r24298 | return self._lm.__iter__() | ||
Augie Fackler
|
r24225 | |||
def iterkeys(self): | ||||
Martin von Zweigbergk
|
r24295 | return self._lm.iterkeys() | ||
def keys(self): | ||||
return list(self.iterkeys()) | ||||
Augie Fackler
|
r24225 | |||
Martin von Zweigbergk
|
r24184 | def filesnotin(self, m2): | ||
'''Set of files in this manifest that are not in the other''' | ||||
Martin von Zweigbergk
|
r24298 | files = set(self) | ||
files.difference_update(m2) | ||||
Martin von Zweigbergk
|
r24184 | return files | ||
Drew Gottlieb
|
r24322 | @propertycache | ||
def _dirs(self): | ||||
Drew Gottlieb
|
r24635 | return util.dirs(self) | ||
Drew Gottlieb
|
r24322 | |||
def dirs(self): | ||||
return self._dirs | ||||
Drew Gottlieb
|
r24324 | def hasdir(self, dir): | ||
return dir in self._dirs | ||||
Martin von Zweigbergk
|
r24685 | def _filesfastpath(self, match): | ||
'''Checks whether we can correctly and quickly iterate over matcher | ||||
files instead of over manifest files.''' | ||||
files = match.files() | ||||
return (len(files) < 100 and (match.isexact() or | ||||
Martin von Zweigbergk
|
r25276 | (match.prefix() and all(fn in self for fn in files)))) | ||
Martin von Zweigbergk
|
r24685 | |||
Drew Gottlieb
|
r24646 | def walk(self, match): | ||
'''Generates matching file names. | ||||
Equivalent to manifest.matches(match).iterkeys(), but without creating | ||||
an entirely new manifest. | ||||
It also reports nonexistent files by marking them bad with match.bad(). | ||||
''' | ||||
Martin von Zweigbergk
|
r24683 | if match.always(): | ||
for f in iter(self): | ||||
yield f | ||||
return | ||||
Drew Gottlieb
|
r24646 | fset = set(match.files()) | ||
# avoid the entire walk if we're only looking for specific files | ||||
Martin von Zweigbergk
|
r24685 | if self._filesfastpath(match): | ||
Martin von Zweigbergk
|
r24667 | for fn in sorted(fset): | ||
yield fn | ||||
Martin von Zweigbergk
|
r24682 | return | ||
Drew Gottlieb
|
r24646 | |||
for fn in self: | ||||
if fn in fset: | ||||
# specified pattern is the exact name | ||||
fset.remove(fn) | ||||
if match(fn): | ||||
yield fn | ||||
# for dirstate.walk, files=['.'] means "walk the whole tree". | ||||
# follow that here, too | ||||
fset.discard('.') | ||||
for fn in sorted(fset): | ||||
if not self.hasdir(fn): | ||||
match.bad(fn, None) | ||||
Martin von Zweigbergk
|
r23305 | def matches(self, match): | ||
'''generate a new manifest filtered by the match argument''' | ||||
if match.always(): | ||||
return self.copy() | ||||
Martin von Zweigbergk
|
r24685 | if self._filesfastpath(match): | ||
Martin von Zweigbergk
|
r24666 | m = manifestdict() | ||
lm = self._lm | ||||
for fn in match.files(): | ||||
if fn in lm: | ||||
m._lm[fn] = lm[fn] | ||||
return m | ||||
Martin von Zweigbergk
|
r23305 | |||
Martin von Zweigbergk
|
r24700 | m = manifestdict() | ||
Martin von Zweigbergk
|
r24664 | m._lm = self._lm.filtercopy(match) | ||
return m | ||||
Martin von Zweigbergk
|
r23305 | |||
Augie Fackler
|
r23756 | def diff(self, m2, clean=False): | ||
'''Finds changes between the current manifest and m2. | ||||
Args: | ||||
m2: the manifest to which this manifest should be compared. | ||||
clean: if true, include files unchanged between these manifests | ||||
with a None value in the returned dictionary. | ||||
The result is returned as a dict with filename as key and | ||||
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the | ||||
nodeid in the current/other manifest and fl1/fl2 is the flag | ||||
in the current/other manifest. Where the file does not exist, | ||||
the nodeid will be None and the flags will be the empty | ||||
string. | ||||
''' | ||||
Augie Fackler
|
r24225 | return self._lm.diff(m2._lm, clean) | ||
def setflag(self, key, flag): | ||||
self._lm[key] = self[key], flag | ||||
def get(self, key, default=None): | ||||
try: | ||||
return self._lm[key][0] | ||||
except KeyError: | ||||
return default | ||||
Martin von Zweigbergk
|
r22965 | |||
Augie Fackler
|
r24225 | def flags(self, key, default=''): | ||
try: | ||||
return self._lm[key][1] | ||||
except KeyError: | ||||
return default | ||||
Martin von Zweigbergk
|
r22965 | |||
Augie Fackler
|
r24225 | def copy(self): | ||
Martin von Zweigbergk
|
r24700 | c = manifestdict() | ||
Augie Fackler
|
r24225 | c._lm = self._lm.copy() | ||
return c | ||||
def iteritems(self): | ||||
Martin von Zweigbergk
|
r24298 | return (x[:2] for x in self._lm.iterentries()) | ||
Matt Mackall
|
r2831 | |||
Martin von Zweigbergk
|
r24573 | def text(self, usemanifestv2=False): | ||
if usemanifestv2: | ||||
return _textv2(self._lm.iterentries()) | ||||
else: | ||||
# use (probably) native version for v1 | ||||
return self._lm.text() | ||||
Augie Fackler
|
r22408 | |||
Augie Fackler
|
r22931 | def fastdelta(self, base, changes): | ||
"""Given a base manifest text as an array.array and a list of changes | ||||
relative to that text, compute a delta that can be used by revlog. | ||||
""" | ||||
delta = [] | ||||
dstart = None | ||||
dend = None | ||||
dline = [""] | ||||
start = 0 | ||||
# zero copy representation of base as a buffer | ||||
addbuf = util.buffer(base) | ||||
# start with a readonly loop that finds the offset of | ||||
# each line and creates the deltas | ||||
for f, todelete in changes: | ||||
# bs will either be the index of the item or the insert point | ||||
start, end = _msearch(addbuf, f, start) | ||||
if not todelete: | ||||
Augie Fackler
|
r24225 | h, fl = self._lm[f] | ||
l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) | ||||
Augie Fackler
|
r22931 | else: | ||
if start == end: | ||||
# item we want to delete was not found, error out | ||||
raise AssertionError( | ||||
_("failed to remove %s from manifest") % f) | ||||
l = "" | ||||
if dstart is not None and dstart <= start and dend >= start: | ||||
if dend < end: | ||||
dend = end | ||||
if l: | ||||
dline.append(l) | ||||
else: | ||||
if dstart is not None: | ||||
delta.append([dstart, dend, "".join(dline)]) | ||||
dstart = start | ||||
dend = end | ||||
dline = [l] | ||||
if dstart is not None: | ||||
delta.append([dstart, dend, "".join(dline)]) | ||||
# apply the delta to the base, and get a delta for addrevision | ||||
deltatext, arraytext = _addlistdelta(base, delta) | ||||
return arraytext, deltatext | ||||
Augie Fackler
|
r22930 | def _msearch(m, s, lo=0, hi=None): | ||
'''return a tuple (start, end) that says where to find s within m. | ||||
If the string is found m[start:end] are the line containing | ||||
that string. If start == end the string was not found and | ||||
they indicate the proper sorted insertion point. | ||||
m should be a buffer or a string | ||||
s is a string''' | ||||
def advance(i, c): | ||||
while i < lenm and m[i] != c: | ||||
i += 1 | ||||
return i | ||||
if not s: | ||||
return (lo, lo) | ||||
lenm = len(m) | ||||
if not hi: | ||||
hi = lenm | ||||
while lo < hi: | ||||
mid = (lo + hi) // 2 | ||||
start = mid | ||||
while start > 0 and m[start - 1] != '\n': | ||||
start -= 1 | ||||
end = advance(start, '\0') | ||||
if m[start:end] < s: | ||||
# we know that after the null there are 40 bytes of sha1 | ||||
# this translates to the bisect lo = mid + 1 | ||||
lo = advance(end + 40, '\n') + 1 | ||||
else: | ||||
# this translates to the bisect hi = mid | ||||
hi = start | ||||
end = advance(lo, '\0') | ||||
found = m[lo:end] | ||||
if s == found: | ||||
# we know that after the null there are 40 bytes of sha1 | ||||
end = advance(end + 40, '\n') | ||||
return (lo, end + 1) | ||||
else: | ||||
return (lo, lo) | ||||
Augie Fackler
|
r22415 | def _checkforbidden(l): | ||
Augie Fackler
|
r22408 | """Check filenames for illegal characters.""" | ||
for f in l: | ||||
if '\n' in f or '\r' in f: | ||||
raise error.RevlogError( | ||||
_("'\\n' and '\\r' disallowed in filenames: %r") % f) | ||||
Augie Fackler
|
r22409 | # apply the changes collected during the bisect loop to our addlist | ||
# return a delta suitable for addrevision | ||||
Augie Fackler
|
r22415 | def _addlistdelta(addlist, x): | ||
Augie Fackler
|
r22409 | # for large addlist arrays, building a new array is cheaper | ||
# than repeatedly modifying the existing one | ||||
currentposition = 0 | ||||
newaddlist = array.array('c') | ||||
for start, end, content in x: | ||||
newaddlist += addlist[currentposition:start] | ||||
if content: | ||||
newaddlist += array.array('c', content) | ||||
currentposition = end | ||||
newaddlist += addlist[currentposition:] | ||||
deltatext = "".join(struct.pack(">lll", start, end, len(content)) | ||||
+ content for start, end, content in x) | ||||
return deltatext, newaddlist | ||||
Martin von Zweigbergk
|
r24401 | def _splittopdir(f): | ||
if '/' in f: | ||||
dir, subpath = f.split('/', 1) | ||||
return dir + '/', subpath | ||||
else: | ||||
return '', f | ||||
Martin von Zweigbergk
|
r25222 | _noop = lambda: None | ||
Martin von Zweigbergk
|
r24401 | class treemanifest(object): | ||
Martin von Zweigbergk
|
r24403 | def __init__(self, dir='', text=''): | ||
self._dir = dir | ||||
Martin von Zweigbergk
|
r25091 | self._node = revlog.nullid | ||
Martin von Zweigbergk
|
r25222 | self._load = _noop | ||
Martin von Zweigbergk
|
r25220 | self._dirty = False | ||
Martin von Zweigbergk
|
r24401 | self._dirs = {} | ||
# Using _lazymanifest here is a little slower than plain old dicts | ||||
self._files = {} | ||||
self._flags = {} | ||||
Martin von Zweigbergk
|
r25220 | if text: | ||
def readsubtree(subdir, subm): | ||||
raise AssertionError('treemanifest constructor only accepts ' | ||||
'flat manifests') | ||||
self.parse(text, readsubtree) | ||||
self._dirty = True # Mark flat manifest dirty after parsing | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24403 | def _subpath(self, path): | ||
return self._dir + path | ||||
Martin von Zweigbergk
|
r24401 | def __len__(self): | ||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | size = len(self._files) | ||
for m in self._dirs.values(): | ||||
size += m.__len__() | ||||
return size | ||||
Drew Gottlieb
|
r24551 | def _isempty(self): | ||
Martin von Zweigbergk
|
r25222 | self._load() # for consistency; already loaded by all callers | ||
Drew Gottlieb
|
r24551 | return (not self._files and (not self._dirs or | ||
Augie Fackler
|
r25151 | all(m._isempty() for m in self._dirs.values()))) | ||
Drew Gottlieb
|
r24551 | |||
Martin von Zweigbergk
|
r24403 | def __str__(self): | ||
Martin von Zweigbergk
|
r25222 | return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s>' % | ||
(self._dir, revlog.hex(self._node), | ||||
bool(self._load is _noop), | ||||
self._dirty)) | ||||
Martin von Zweigbergk
|
r25091 | |||
def dir(self): | ||||
'''The directory that this tree manifest represents, including a | ||||
trailing '/'. Empty string for the repo root directory.''' | ||||
return self._dir | ||||
def node(self): | ||||
'''This node of this instance. nullid for unsaved instances. Should | ||||
be updated when the instance is read or written from a revlog. | ||||
''' | ||||
Martin von Zweigbergk
|
r25220 | assert not self._dirty | ||
Martin von Zweigbergk
|
r25091 | return self._node | ||
def setnode(self, node): | ||||
self._node = node | ||||
Martin von Zweigbergk
|
r25220 | self._dirty = False | ||
Martin von Zweigbergk
|
r24403 | |||
Martin von Zweigbergk
|
r24401 | def iteritems(self): | ||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | for p, n in sorted(self._dirs.items() + self._files.items()): | ||
if p in self._files: | ||||
Martin von Zweigbergk
|
r24403 | yield self._subpath(p), n | ||
Martin von Zweigbergk
|
r24401 | else: | ||
Martin von Zweigbergk
|
r24403 | for f, sn in n.iteritems(): | ||
yield f, sn | ||||
Martin von Zweigbergk
|
r24401 | |||
def iterkeys(self): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | for p in sorted(self._dirs.keys() + self._files.keys()): | ||
if p in self._files: | ||||
Martin von Zweigbergk
|
r24403 | yield self._subpath(p) | ||
Martin von Zweigbergk
|
r24401 | else: | ||
for f in self._dirs[p].iterkeys(): | ||||
Martin von Zweigbergk
|
r24403 | yield f | ||
Martin von Zweigbergk
|
r24401 | |||
def keys(self): | ||||
return list(self.iterkeys()) | ||||
def __iter__(self): | ||||
return self.iterkeys() | ||||
def __contains__(self, f): | ||||
if f is None: | ||||
return False | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
return False | ||||
return self._dirs[dir].__contains__(subpath) | ||||
else: | ||||
return f in self._files | ||||
def get(self, f, default=None): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
return default | ||||
return self._dirs[dir].get(subpath, default) | ||||
else: | ||||
return self._files.get(f, default) | ||||
def __getitem__(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
return self._dirs[dir].__getitem__(subpath) | ||||
else: | ||||
return self._files[f] | ||||
def flags(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
return '' | ||||
return self._dirs[dir].flags(subpath) | ||||
else: | ||||
if f in self._dirs: | ||||
return '' | ||||
return self._flags.get(f, '') | ||||
def find(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
return self._dirs[dir].find(subpath) | ||||
else: | ||||
return self._files[f], self._flags.get(f, '') | ||||
def __delitem__(self, f): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
self._dirs[dir].__delitem__(subpath) | ||||
# If the directory is now empty, remove it | ||||
Drew Gottlieb
|
r24551 | if self._dirs[dir]._isempty(): | ||
Martin von Zweigbergk
|
r24401 | del self._dirs[dir] | ||
else: | ||||
del self._files[f] | ||||
if f in self._flags: | ||||
del self._flags[f] | ||||
Martin von Zweigbergk
|
r25220 | self._dirty = True | ||
Martin von Zweigbergk
|
r24401 | |||
def __setitem__(self, f, n): | ||||
assert n is not None | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
Martin von Zweigbergk
|
r24403 | self._dirs[dir] = treemanifest(self._subpath(dir)) | ||
Martin von Zweigbergk
|
r24401 | self._dirs[dir].__setitem__(subpath, n) | ||
else: | ||||
Martin von Zweigbergk
|
r24467 | self._files[f] = n[:21] # to match manifestdict's behavior | ||
Martin von Zweigbergk
|
r25220 | self._dirty = True | ||
Martin von Zweigbergk
|
r24401 | |||
def setflag(self, f, flags): | ||||
"""Set the flags (symlink, executable) for path f.""" | ||||
Martin von Zweigbergk
|
r25091 | assert 'd' not in flags | ||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24401 | dir, subpath = _splittopdir(f) | ||
if dir: | ||||
if dir not in self._dirs: | ||||
Martin von Zweigbergk
|
r24403 | self._dirs[dir] = treemanifest(self._subpath(dir)) | ||
Martin von Zweigbergk
|
r24401 | self._dirs[dir].setflag(subpath, flags) | ||
else: | ||||
self._flags[f] = flags | ||||
Martin von Zweigbergk
|
r25220 | self._dirty = True | ||
Martin von Zweigbergk
|
r24401 | |||
def copy(self): | ||||
Martin von Zweigbergk
|
r24403 | copy = treemanifest(self._dir) | ||
Martin von Zweigbergk
|
r25091 | copy._node = self._node | ||
Martin von Zweigbergk
|
r25220 | copy._dirty = self._dirty | ||
Martin von Zweigbergk
|
r25222 | def _load(): | ||
self._load() | ||||
for d in self._dirs: | ||||
copy._dirs[d] = self._dirs[d].copy() | ||||
copy._files = dict.copy(self._files) | ||||
copy._flags = dict.copy(self._flags) | ||||
copy._load = _noop | ||||
copy._load = _load | ||||
if self._load == _noop: | ||||
# Chaining _load if it's _noop is functionally correct, but the | ||||
# chain may end up excessively long (stack overflow), and | ||||
# will prevent garbage collection of 'self'. | ||||
copy._load() | ||||
Martin von Zweigbergk
|
r24401 | return copy | ||
def filesnotin(self, m2): | ||||
'''Set of files in this manifest that are not in the other''' | ||||
Martin von Zweigbergk
|
r24405 | files = set() | ||
def _filesnotin(t1, t2): | ||||
Martin von Zweigbergk
|
r25220 | if t1._node == t2._node and not t1._dirty and not t2._dirty: | ||
return | ||||
Martin von Zweigbergk
|
r25222 | t1._load() | ||
t2._load() | ||||
Martin von Zweigbergk
|
r24405 | for d, m1 in t1._dirs.iteritems(): | ||
if d in t2._dirs: | ||||
m2 = t2._dirs[d] | ||||
_filesnotin(m1, m2) | ||||
else: | ||||
files.update(m1.iterkeys()) | ||||
for fn in t1._files.iterkeys(): | ||||
if fn not in t2._files: | ||||
files.add(t1._subpath(fn)) | ||||
_filesnotin(self, m2) | ||||
Martin von Zweigbergk
|
r24401 | return files | ||
@propertycache | ||||
def _alldirs(self): | ||||
Drew Gottlieb
|
r24635 | return util.dirs(self) | ||
Martin von Zweigbergk
|
r24401 | |||
def dirs(self): | ||||
return self._alldirs | ||||
def hasdir(self, dir): | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24406 | topdir, subdir = _splittopdir(dir) | ||
if topdir: | ||||
if topdir in self._dirs: | ||||
return self._dirs[topdir].hasdir(subdir) | ||||
return False | ||||
return (dir + '/') in self._dirs | ||||
Martin von Zweigbergk
|
r24401 | |||
Drew Gottlieb
|
r24646 | def walk(self, match): | ||
'''Generates matching file names. | ||||
Equivalent to manifest.matches(match).iterkeys(), but without creating | ||||
an entirely new manifest. | ||||
It also reports nonexistent files by marking them bad with match.bad(). | ||||
''' | ||||
Martin von Zweigbergk
|
r24683 | if match.always(): | ||
for f in iter(self): | ||||
yield f | ||||
return | ||||
Drew Gottlieb
|
r24646 | fset = set(match.files()) | ||
Drew Gottlieb
|
r24647 | for fn in self._walk(match): | ||
Drew Gottlieb
|
r24646 | if fn in fset: | ||
# specified pattern is the exact name | ||||
fset.remove(fn) | ||||
Drew Gottlieb
|
r24647 | yield fn | ||
Drew Gottlieb
|
r24646 | |||
# for dirstate.walk, files=['.'] means "walk the whole tree". | ||||
# follow that here, too | ||||
fset.discard('.') | ||||
for fn in sorted(fset): | ||||
if not self.hasdir(fn): | ||||
match.bad(fn, None) | ||||
Drew Gottlieb
|
r25188 | def _walk(self, match): | ||
'''Recursively generates matching file names for walk().''' | ||||
if not match.visitdir(self._dir[:-1] or '.'): | ||||
return | ||||
Drew Gottlieb
|
r24647 | |||
# yield this dir's files and walk its submanifests | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Drew Gottlieb
|
r24647 | for p in sorted(self._dirs.keys() + self._files.keys()): | ||
if p in self._files: | ||||
fullp = self._subpath(p) | ||||
if match(fullp): | ||||
yield fullp | ||||
else: | ||||
Drew Gottlieb
|
r25188 | for f in self._dirs[p]._walk(match): | ||
Drew Gottlieb
|
r24647 | yield f | ||
Martin von Zweigbergk
|
r24401 | def matches(self, match): | ||
'''generate a new manifest filtered by the match argument''' | ||||
if match.always(): | ||||
return self.copy() | ||||
Drew Gottlieb
|
r24552 | return self._matches(match) | ||
Drew Gottlieb
|
r25188 | def _matches(self, match): | ||
Drew Gottlieb
|
r24552 | '''recursively generate a new manifest filtered by the match argument. | ||
Drew Gottlieb
|
r25188 | ''' | ||
Drew Gottlieb
|
r24552 | ret = treemanifest(self._dir) | ||
Drew Gottlieb
|
r25188 | |||
if not match.visitdir(self._dir[:-1] or '.'): | ||||
return ret | ||||
Drew Gottlieb
|
r24552 | |||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Drew Gottlieb
|
r24552 | for fn in self._files: | ||
fullp = self._subpath(fn) | ||||
if not match(fullp): | ||||
continue | ||||
ret._files[fn] = self._files[fn] | ||||
if fn in self._flags: | ||||
ret._flags[fn] = self._flags[fn] | ||||
for dir, subm in self._dirs.iteritems(): | ||||
Drew Gottlieb
|
r25188 | m = subm._matches(match) | ||
Drew Gottlieb
|
r24552 | if not m._isempty(): | ||
ret._dirs[dir] = m | ||||
Martin von Zweigbergk
|
r25220 | if not ret._isempty(): | ||
ret._dirty = True | ||||
Drew Gottlieb
|
r24552 | return ret | ||
Martin von Zweigbergk
|
r24401 | |||
def diff(self, m2, clean=False): | ||||
'''Finds changes between the current manifest and m2. | ||||
Args: | ||||
m2: the manifest to which this manifest should be compared. | ||||
clean: if true, include files unchanged between these manifests | ||||
with a None value in the returned dictionary. | ||||
The result is returned as a dict with filename as key and | ||||
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the | ||||
nodeid in the current/other manifest and fl1/fl2 is the flag | ||||
in the current/other manifest. Where the file does not exist, | ||||
the nodeid will be None and the flags will be the empty | ||||
string. | ||||
''' | ||||
Martin von Zweigbergk
|
r24404 | result = {} | ||
emptytree = treemanifest() | ||||
def _diff(t1, t2): | ||||
Martin von Zweigbergk
|
r25220 | if t1._node == t2._node and not t1._dirty and not t2._dirty: | ||
return | ||||
Martin von Zweigbergk
|
r25222 | t1._load() | ||
t2._load() | ||||
Martin von Zweigbergk
|
r24404 | for d, m1 in t1._dirs.iteritems(): | ||
m2 = t2._dirs.get(d, emptytree) | ||||
_diff(m1, m2) | ||||
for d, m2 in t2._dirs.iteritems(): | ||||
if d not in t1._dirs: | ||||
_diff(emptytree, m2) | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24404 | for fn, n1 in t1._files.iteritems(): | ||
fl1 = t1._flags.get(fn, '') | ||||
n2 = t2._files.get(fn, None) | ||||
fl2 = t2._flags.get(fn, '') | ||||
if n1 != n2 or fl1 != fl2: | ||||
result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2)) | ||||
elif clean: | ||||
result[t1._subpath(fn)] = None | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24404 | for fn, n2 in t2._files.iteritems(): | ||
if fn not in t1._files: | ||||
fl2 = t2._flags.get(fn, '') | ||||
result[t2._subpath(fn)] = ((None, ''), (n2, fl2)) | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r24404 | _diff(self, m2) | ||
return result | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r25221 | def unmodifiedsince(self, m2): | ||
return not self._dirty and not m2._dirty and self._node == m2._node | ||||
Martin von Zweigbergk
|
r25091 | def parse(self, text, readsubtree): | ||
Martin von Zweigbergk
|
r24781 | for f, n, fl in _parse(text): | ||
Martin von Zweigbergk
|
r25091 | if fl == 'd': | ||
f = f + '/' | ||||
self._dirs[f] = readsubtree(self._subpath(f), n) | ||||
Martin von Zweigbergk
|
r25220 | elif '/' in f: | ||
# This is a flat manifest, so use __setitem__ and setflag rather | ||||
# than assigning directly to _files and _flags, so we can | ||||
# assign a path in a subdirectory, and to mark dirty (compared | ||||
# to nullid). | ||||
Martin von Zweigbergk
|
r25091 | self[f] = n | ||
if fl: | ||||
self.setflag(f, fl) | ||||
Martin von Zweigbergk
|
r25220 | else: | ||
# Assigning to _files and _flags avoids marking as dirty, | ||||
# and should be a little faster. | ||||
self._files[f] = n | ||||
if fl: | ||||
self._flags[f] = fl | ||||
Martin von Zweigbergk
|
r24781 | |||
Martin von Zweigbergk
|
r24573 | def text(self, usemanifestv2=False): | ||
Martin von Zweigbergk
|
r24401 | """Get the full data of this manifest as a bytestring.""" | ||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r24525 | flags = self.flags | ||
Martin von Zweigbergk
|
r24573 | return _text(((f, self[f], flags(f)) for f in self.keys()), | ||
usemanifestv2) | ||||
Martin von Zweigbergk
|
r24401 | |||
Martin von Zweigbergk
|
r25091 | def dirtext(self, usemanifestv2=False): | ||
"""Get the full data of this directory as a bytestring. Make sure that | ||||
any submanifests have been written first, so their nodeids are correct. | ||||
""" | ||||
Martin von Zweigbergk
|
r25222 | self._load() | ||
Martin von Zweigbergk
|
r25091 | flags = self.flags | ||
dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs] | ||||
files = [(f, self._files[f], flags(f)) for f in self._files] | ||||
return _text(sorted(dirs + files), usemanifestv2) | ||||
Martin von Zweigbergk
|
r25222 | def read(self, gettext, readsubtree): | ||
def _load(): | ||||
# Mark as loaded already here, so __setitem__ and setflag() don't | ||||
# cause infinite loops when they try to load. | ||||
self._load = _noop | ||||
self.parse(gettext(), readsubtree) | ||||
self._dirty = False | ||||
self._load = _load | ||||
Martin von Zweigbergk
|
r25091 | def writesubtrees(self, m1, m2, writesubtree): | ||
Martin von Zweigbergk
|
r25222 | self._load() # for consistency; should never have any effect here | ||
Martin von Zweigbergk
|
r25091 | emptytree = treemanifest() | ||
for d, subm in self._dirs.iteritems(): | ||||
subp1 = m1._dirs.get(d, emptytree)._node | ||||
subp2 = m2._dirs.get(d, emptytree)._node | ||||
if subp1 == revlog.nullid: | ||||
subp1, subp2 = subp2, subp1 | ||||
writesubtree(subm, subp1, subp2) | ||||
Matt Mackall
|
r7634 | class manifest(revlog.revlog): | ||
Martin von Zweigbergk
|
r25185 | def __init__(self, opener, dir='', dirlogcache=None): | ||
'''The 'dir' and 'dirlogcache' arguments are for internal use by | ||||
manifest.manifest only. External users should create a root manifest | ||||
log with manifest.manifest(opener) and call dirlog() on it. | ||||
''' | ||||
Durham Goode
|
r24033 | # During normal operations, we expect to deal with not more than four | ||
# revs at a time (such as during commit --amend). When rebasing large | ||||
# stacks of commits, the number can go up, hence the config knob below. | ||||
cachesize = 4 | ||||
Martin von Zweigbergk
|
r24402 | usetreemanifest = False | ||
Martin von Zweigbergk
|
r24526 | usemanifestv2 = False | ||
Durham Goode
|
r24033 | opts = getattr(opener, 'options', None) | ||
if opts is not None: | ||||
cachesize = opts.get('manifestcachesize', cachesize) | ||||
Martin von Zweigbergk
|
r24956 | usetreemanifest = opts.get('treemanifest', usetreemanifest) | ||
Martin von Zweigbergk
|
r24571 | usemanifestv2 = opts.get('manifestv2', usemanifestv2) | ||
Durham Goode
|
r24033 | self._mancache = util.lrucachedict(cachesize) | ||
Martin von Zweigbergk
|
r24701 | self._treeinmem = usetreemanifest | ||
self._treeondisk = usetreemanifest | ||||
Martin von Zweigbergk
|
r24526 | self._usemanifestv2 = usemanifestv2 | ||
Martin von Zweigbergk
|
r25091 | indexfile = "00manifest.i" | ||
if dir: | ||||
assert self._treeondisk | ||||
Martin von Zweigbergk
|
r25119 | if not dir.endswith('/'): | ||
dir = dir + '/' | ||||
Martin von Zweigbergk
|
r25091 | indexfile = "meta/" + dir + "00manifest.i" | ||
revlog.revlog.__init__(self, opener, indexfile) | ||||
self._dir = dir | ||||
Martin von Zweigbergk
|
r25185 | # The dirlogcache is kept on the root manifest log | ||
if dir: | ||||
self._dirlogcache = dirlogcache | ||||
else: | ||||
self._dirlogcache = {'': self} | ||||
Martin von Zweigbergk
|
r24402 | |||
def _newmanifest(self, data=''): | ||||
Martin von Zweigbergk
|
r24701 | if self._treeinmem: | ||
Martin von Zweigbergk
|
r25091 | return treemanifest(self._dir, data) | ||
Martin von Zweigbergk
|
r24402 | return manifestdict(data) | ||
mpm@selenic.com
|
r1089 | |||
Martin von Zweigbergk
|
r25185 | def dirlog(self, dir): | ||
assert self._treeondisk | ||||
if dir not in self._dirlogcache: | ||||
self._dirlogcache[dir] = manifest(self.opener, dir, | ||||
self._dirlogcache) | ||||
return self._dirlogcache[dir] | ||||
Martin von Zweigbergk
|
r24528 | def _slowreaddelta(self, node): | ||
r0 = self.deltaparent(self.rev(node)) | ||||
m0 = self.read(self.node(r0)) | ||||
m1 = self.read(node) | ||||
md = self._newmanifest() | ||||
for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems(): | ||||
if n1: | ||||
md[f] = n1 | ||||
if fl1: | ||||
md.setflag(f, fl1) | ||||
return md | ||||
Brendan Cully
|
r3196 | def readdelta(self, node): | ||
Martin von Zweigbergk
|
r24701 | if self._usemanifestv2 or self._treeondisk: | ||
Martin von Zweigbergk
|
r24528 | return self._slowreaddelta(node) | ||
Matt Mackall
|
r7362 | r = self.rev(node) | ||
Augie Fackler
|
r24224 | d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r)) | ||
Martin von Zweigbergk
|
r24402 | return self._newmanifest(d) | ||
Thomas Arendsen Hein
|
r3223 | |||
Matt Mackall
|
r13711 | def readfast(self, node): | ||
Augie Fackler
|
r24925 | '''use the faster of readdelta or read | ||
This will return a manifest which is either only the files | ||||
added/modified relative to p1, or all files in the | ||||
manifest. Which one is returned depends on the codepath used | ||||
to retrieve the data. | ||||
''' | ||||
Matt Mackall
|
r13711 | r = self.rev(node) | ||
Sune Foldager
|
r14208 | deltaparent = self.deltaparent(r) | ||
if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r): | ||||
Matt Mackall
|
r13711 | return self.readdelta(node) | ||
return self.read(node) | ||||
mpm@selenic.com
|
r1089 | def read(self, node): | ||
Matt Mackall
|
r7634 | if node == revlog.nullid: | ||
Martin von Zweigbergk
|
r24402 | return self._newmanifest() # don't upset local cache | ||
Siddharth Agarwal
|
r18604 | if node in self._mancache: | ||
return self._mancache[node][0] | ||||
Martin von Zweigbergk
|
r25091 | if self._treeondisk: | ||
Martin von Zweigbergk
|
r25222 | def gettext(): | ||
return self.revision(node) | ||||
Martin von Zweigbergk
|
r25091 | def readsubtree(dir, subm): | ||
Martin von Zweigbergk
|
r25185 | return self.dirlog(dir).read(subm) | ||
Martin von Zweigbergk
|
r25091 | m = self._newmanifest() | ||
Martin von Zweigbergk
|
r25222 | m.read(gettext, readsubtree) | ||
Martin von Zweigbergk
|
r25091 | m.setnode(node) | ||
arraytext = None | ||||
else: | ||||
Martin von Zweigbergk
|
r25222 | text = self.revision(node) | ||
Martin von Zweigbergk
|
r25091 | m = self._newmanifest(text) | ||
arraytext = array.array('c', text) | ||||
Martin von Zweigbergk
|
r24147 | self._mancache[node] = (m, arraytext) | ||
return m | ||||
mpm@selenic.com
|
r1089 | |||
Vadim Gelfer
|
r2320 | def find(self, node, f): | ||
'''look up entry for a single file efficiently. | ||||
Alexis S. L. Carvalho
|
r4159 | return (node, flags) pair if found, (None, None) if not.''' | ||
Martin von Zweigbergk
|
r24292 | m = self.read(node) | ||
Augie Fackler
|
r24225 | try: | ||
Martin von Zweigbergk
|
r24292 | return m.find(f) | ||
Augie Fackler
|
r24225 | except KeyError: | ||
Vadim Gelfer
|
r2320 | return None, None | ||
Martin von Zweigbergk
|
r24147 | def add(self, m, transaction, link, p1, p2, added, removed): | ||
Martin von Zweigbergk
|
r24701 | if (p1 in self._mancache and not self._treeinmem | ||
Martin von Zweigbergk
|
r24527 | and not self._usemanifestv2): | ||
Augie Fackler
|
r22788 | # If our first parent is in the manifest cache, we can | ||
# compute a delta here using properties we know about the | ||||
# manifest up-front, which may save time later for the | ||||
# revlog layer. | ||||
mpm@selenic.com
|
r1089 | |||
Augie Fackler
|
r22415 | _checkforbidden(added) | ||
mpm@selenic.com
|
r1089 | # combine the changed lists into one list for sorting | ||
Benoit Boissinot
|
r9415 | work = [(x, False) for x in added] | ||
work.extend((x, True) for x in removed) | ||||
Mads Kiilerich
|
r17428 | # this could use heapq.merge() (from Python 2.6+) or equivalent | ||
Benoit Boissinot
|
r9415 | # since the lists are already sorted | ||
mpm@selenic.com
|
r1089 | work.sort() | ||
Martin von Zweigbergk
|
r24147 | arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work) | ||
Augie Fackler
|
r22931 | cachedelta = self.rev(p1), deltatext | ||
Matt Mackall
|
r15657 | text = util.buffer(arraytext) | ||
Martin von Zweigbergk
|
r24780 | n = self.addrevision(text, transaction, link, p1, p2, cachedelta) | ||
Augie Fackler
|
r22788 | else: | ||
# The first parent manifest isn't already loaded, so we'll | ||||
# just encode a fulltext of the manifest and pass that | ||||
# through to the revlog layer, and let it handle the delta | ||||
# process. | ||||
Martin von Zweigbergk
|
r25091 | if self._treeondisk: | ||
m1 = self.read(p1) | ||||
m2 = self.read(p2) | ||||
n = self._addtree(m, transaction, link, m1, m2) | ||||
arraytext = None | ||||
else: | ||||
text = m.text(self._usemanifestv2) | ||||
n = self.addrevision(text, transaction, link, p1, p2) | ||||
arraytext = array.array('c', text) | ||||
mason@suse.com
|
r1534 | |||
Martin von Zweigbergk
|
r24147 | self._mancache[n] = (m, arraytext) | ||
mpm@selenic.com
|
r1089 | |||
return n | ||||
Martin von Zweigbergk
|
r25091 | |||
def _addtree(self, m, transaction, link, m1, m2): | ||||
Martin von Zweigbergk
|
r25221 | # If the manifest is unchanged compared to one parent, | ||
# don't write a new revision | ||||
if m.unmodifiedsince(m1) or m.unmodifiedsince(m2): | ||||
return m.node() | ||||
Martin von Zweigbergk
|
r25091 | def writesubtree(subm, subp1, subp2): | ||
Martin von Zweigbergk
|
r25185 | sublog = self.dirlog(subm.dir()) | ||
Martin von Zweigbergk
|
r25091 | sublog.add(subm, transaction, link, subp1, subp2, None, None) | ||
m.writesubtrees(m1, m2, writesubtree) | ||||
text = m.dirtext(self._usemanifestv2) | ||||
Martin von Zweigbergk
|
r25221 | # Double-check whether contents are unchanged to one parent | ||
Martin von Zweigbergk
|
r25091 | if text == m1.dirtext(self._usemanifestv2): | ||
n = m1.node() | ||||
elif text == m2.dirtext(self._usemanifestv2): | ||||
n = m2.node() | ||||
else: | ||||
n = self.addrevision(text, transaction, link, m1.node(), m2.node()) | ||||
# Save nodeid so parent manifest can calculate its nodeid | ||||
m.setnode(n) | ||||
return n | ||||