##// END OF EJS Templates
manifest: make manifestlog a storecache...
manifest: make manifestlog a storecache The old @property on manifestlog was broken. It meant that we would always recreate the manifestlog instance, which meant the cache was never hit. Since we'll eventually remove repo.manifest and make manifestlog the only property, let's go ahead and make manifestlog the @storecache property, have manifestlog own the manifest instance, and have repo.manifest refer to it via manifestlog. This means all accesses go through repo.manifestlog, which is now invalidated correctly.

File last commit:

r30219:3c8811ef stable
r30219:3c8811ef stable
Show More
manifest.py
1531 lines | 50.4 KiB | text/x-python | PythonLexer
mpm@selenic.com
Break apart hg.py...
r1089 # manifest.py - manifest revision class for mercurial
#
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
mpm@selenic.com
Break apart hg.py...
r1089 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
mpm@selenic.com
Break apart hg.py...
r1089
Gregory Szorc
manifest: use absolute_import
r27502 from __future__ import absolute_import
import array
import heapq
Martin von Zweigbergk
manifestv2: add support for writing new manifest format...
r24573 import os
Gregory Szorc
manifest: use absolute_import
r27502 import struct
from .i18n import _
from . import (
error,
mdiff,
parsers,
revlog,
util,
)
mpm@selenic.com
Break apart hg.py...
r1089
Drew Gottlieb
manifest: add dirs() to manifestdict...
r24322 propertycache = util.propertycache
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
Martin von Zweigbergk
manifestv2: add support for reading new manifest format...
r24572 def _parsev1(data):
Martin von Zweigbergk
manifest: extract method for parsing manifest...
r24524 # This method does a little bit of excessive-looking
# precondition checking. This is so that the behavior of this
# class exactly matches its C counterpart to try and help
# prevent surprise breakage for anyone that develops against
# the pure version.
if data and data[-1] != '\n':
raise ValueError('Manifest did not end in a newline.')
prev = None
for l in data.splitlines():
if prev is not None and prev > l:
raise ValueError('Manifest lines not in sorted order.')
prev = l
f, n = l.split('\0')
if len(n) > 40:
yield f, revlog.bin(n[:40]), n[40:]
else:
yield f, revlog.bin(n), ''
Martin von Zweigbergk
manifestv2: add support for reading new manifest format...
r24572 def _parsev2(data):
metadataend = data.find('\n')
# Just ignore metadata for now
pos = metadataend + 1
prevf = ''
while pos < len(data):
end = data.find('\n', pos + 1) # +1 to skip stem length byte
if end == -1:
raise ValueError('Manifest ended with incomplete file entry.')
stemlen = ord(data[pos])
items = data[pos + 1:end].split('\0')
f = prevf[:stemlen] + items[0]
if prevf > f:
raise ValueError('Manifest entries not in sorted order.')
fl = items[1]
# Just ignore metadata (items[2:] for now)
n = data[end + 1:end + 21]
yield f, n, fl
pos = end + 22
prevf = f
def _parse(data):
"""Generates (path, node, flags) tuples from a manifest text"""
if data.startswith('\0'):
return iter(_parsev2(data))
else:
return iter(_parsev1(data))
Martin von Zweigbergk
manifestv2: add support for writing new manifest format...
r24573 def _text(it, usemanifestv2):
Martin von Zweigbergk
manifest: extract method for creating manifest text...
r24525 """Given an iterator over (path, node, flags) tuples, returns a manifest
text"""
Martin von Zweigbergk
manifestv2: add support for writing new manifest format...
r24573 if usemanifestv2:
return _textv2(it)
else:
return _textv1(it)
def _textv1(it):
Martin von Zweigbergk
manifest: extract method for creating manifest text...
r24525 files = []
lines = []
_hex = revlog.hex
for f, n, fl in it:
files.append(f)
# if this is changed to support newlines in filenames,
# be sure to check the templates/ dir again (especially *-raw.tmpl)
lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
_checkforbidden(files)
return ''.join(lines)
Martin von Zweigbergk
manifestv2: add support for writing new manifest format...
r24573 def _textv2(it):
files = []
lines = ['\0\n']
prevf = ''
for f, n, fl in it:
files.append(f)
stem = os.path.commonprefix([prevf, f])
stemlen = min(len(stem), 255)
lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
prevf = f
_checkforbidden(files)
return ''.join(lines)
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 class lazymanifestiter(object):
def __init__(self, lm):
self.pos = 0
self.lm = lm
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 def __iter__(self):
return self
Augie Fackler
manifest: move parsing functions up in file...
r24223
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 def next(self):
try:
data, pos = self.lm._get(self.pos)
except IndexError:
raise StopIteration
if pos == -1:
self.pos += 1
return data[0]
self.pos += 1
zeropos = data.find('\x00', pos)
return data[pos:zeropos]
Augie Fackler
manifest: do parsing inside manifestdict contstructor...
r24224
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 class lazymanifestiterentries(object):
def __init__(self, lm):
self.lm = lm
self.pos = 0
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
def __iter__(self):
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 return self
def next(self):
try:
data, pos = self.lm._get(self.pos)
except IndexError:
raise StopIteration
if pos == -1:
self.pos += 1
return data
zeropos = data.find('\x00', pos)
hashval = unhexlify(data, self.lm.extrainfo[self.pos],
zeropos + 1, 40)
flags = self.lm._getflags(data, self.pos, zeropos)
self.pos += 1
return (data[pos:zeropos], hashval, flags)
def unhexlify(data, extra, pos, length):
s = data[pos:pos + length].decode('hex')
if extra:
s += chr(extra & 0xff)
return s
def _cmp(a, b):
return (a > b) - (a < b)
class _lazymanifest(object):
def __init__(self, data, positions=None, extrainfo=None, extradata=None):
if positions is None:
self.positions = self.findlines(data)
self.extrainfo = [0] * len(self.positions)
self.data = data
self.extradata = []
else:
self.positions = positions[:]
self.extrainfo = extrainfo[:]
self.extradata = extradata[:]
self.data = data
def findlines(self, data):
if not data:
return []
pos = data.find("\n")
if pos == -1 or data[-1] != '\n':
raise ValueError("Manifest did not end in a newline.")
positions = [0]
prev = data[:data.find('\x00')]
while pos < len(data) - 1 and pos != -1:
positions.append(pos + 1)
nexts = data[pos + 1:data.find('\x00', pos + 1)]
if nexts < prev:
raise ValueError("Manifest lines not in sorted order.")
prev = nexts
pos = data.find("\n", pos + 1)
return positions
def _get(self, index):
# get the position encoded in pos:
# positive number is an index in 'data'
# negative number is in extrapieces
pos = self.positions[index]
if pos >= 0:
return self.data, pos
return self.extradata[-pos - 1], -1
def _getkey(self, pos):
if pos >= 0:
return self.data[pos:self.data.find('\x00', pos + 1)]
return self.extradata[-pos - 1][0]
def bsearch(self, key):
first = 0
last = len(self.positions) - 1
while first <= last:
midpoint = (first + last)//2
nextpos = self.positions[midpoint]
candidate = self._getkey(nextpos)
r = _cmp(key, candidate)
if r == 0:
return midpoint
else:
if r < 0:
last = midpoint - 1
else:
first = midpoint + 1
return -1
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 def bsearch2(self, key):
# same as the above, but will always return the position
# done for performance reasons
first = 0
last = len(self.positions) - 1
while first <= last:
midpoint = (first + last)//2
nextpos = self.positions[midpoint]
candidate = self._getkey(nextpos)
r = _cmp(key, candidate)
if r == 0:
return (midpoint, True)
else:
if r < 0:
last = midpoint - 1
else:
first = midpoint + 1
return (first, False)
def __contains__(self, key):
return self.bsearch(key) != -1
def _getflags(self, data, needle, pos):
start = pos + 41
end = data.find("\n", start)
if end == -1:
end = len(data) - 1
if start == end:
return ''
return self.data[start:end]
Martin von Zweigbergk
lazymanifest: fix pure hg iterkeys()...
r24297
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 def __getitem__(self, key):
if not isinstance(key, str):
raise TypeError("getitem: manifest keys must be a string.")
needle = self.bsearch(key)
if needle == -1:
raise KeyError
data, pos = self._get(needle)
if pos == -1:
return (data[1], data[2])
zeropos = data.find('\x00', pos)
assert 0 <= needle <= len(self.positions)
assert len(self.extrainfo) == len(self.positions)
hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
flags = self._getflags(data, needle, zeropos)
return (hashval, flags)
def __delitem__(self, key):
needle, found = self.bsearch2(key)
if not found:
raise KeyError
cur = self.positions[needle]
self.positions = self.positions[:needle] + self.positions[needle + 1:]
self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
if cur >= 0:
self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
def __setitem__(self, key, value):
if not isinstance(key, str):
raise TypeError("setitem: manifest keys must be a string.")
if not isinstance(value, tuple) or len(value) != 2:
raise TypeError("Manifest values must be a tuple of (node, flags).")
hashval = value[0]
if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
raise TypeError("node must be a 20-byte string")
flags = value[1]
if len(hashval) == 22:
hashval = hashval[:-1]
if not isinstance(flags, str) or len(flags) > 1:
raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
needle, found = self.bsearch2(key)
if found:
# put the item
pos = self.positions[needle]
if pos < 0:
self.extradata[-pos - 1] = (key, hashval, value[1])
else:
# just don't bother
self.extradata.append((key, hashval, value[1]))
self.positions[needle] = -len(self.extradata)
else:
# not found, put it in with extra positions
self.extradata.append((key, hashval, value[1]))
self.positions = (self.positions[:needle] + [-len(self.extradata)]
+ self.positions[needle:])
self.extrainfo = (self.extrainfo[:needle] + [0] +
self.extrainfo[needle:])
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298
Matt Mackall
Add manifestflags class
r2831 def copy(self):
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 # XXX call _compact like in C?
return _lazymanifest(self.data, self.positions, self.extrainfo,
self.extradata)
def _compact(self):
# hopefully not called TOO often
if len(self.extradata) == 0:
return
l = []
last_cut = 0
i = 0
offset = 0
self.extrainfo = [0] * len(self.positions)
while i < len(self.positions):
if self.positions[i] >= 0:
cur = self.positions[i]
last_cut = cur
while True:
self.positions[i] = offset
i += 1
if i == len(self.positions) or self.positions[i] < 0:
break
offset += self.positions[i] - cur
cur = self.positions[i]
end_cut = self.data.find('\n', cur)
if end_cut != -1:
end_cut += 1
offset += end_cut - cur
l.append(self.data[last_cut:end_cut])
else:
while i < len(self.positions) and self.positions[i] < 0:
cur = self.positions[i]
t = self.extradata[-cur - 1]
l.append(self._pack(t))
self.positions[i] = offset
if len(t[1]) > 20:
self.extrainfo[i] = ord(t[1][21])
offset += len(l[-1])
i += 1
self.data = ''.join(l)
self.extradata = []
def _pack(self, d):
return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
def text(self):
self._compact()
return self.data
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
def diff(self, m2, clean=False):
'''Finds changes between the current manifest and m2.'''
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 # XXX think whether efficiency matters here
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 diff = {}
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 for fn, e1, flags in self.iterentries():
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 if fn not in m2:
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 diff[fn] = (e1, flags), (None, '')
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 else:
e2 = m2[fn]
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 if (e1, flags) != e2:
diff[fn] = (e1, flags), e2
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 elif clean:
diff[fn] = None
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 for fn, e2, flags in m2.iterentries():
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 if fn not in self:
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 diff[fn] = (None, ''), (e2, flags)
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
return diff
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 def iterentries(self):
return lazymanifestiterentries(self)
def iterkeys(self):
return lazymanifestiter(self)
def __iter__(self):
return lazymanifestiter(self)
def __len__(self):
return len(self.positions)
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 def filtercopy(self, filterfn):
Maciej Fijalkowski
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
r30042 # XXX should be optimized
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 c = _lazymanifest('')
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 for f, n, fl in self.iterentries():
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 if filterfn(f):
c[f] = n, fl
return c
Augie Fackler
manifest: use custom C implementation of lazymanifest...
r24226 try:
_lazymanifest = parsers.lazymanifest
except AttributeError:
pass
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 class manifestdict(object):
def __init__(self, data=''):
Martin von Zweigbergk
manifestv2: add support for reading new manifest format...
r24572 if data.startswith('\0'):
#_lazymanifest can not parse v2
self._lm = _lazymanifest('')
for f, n, fl in _parsev2(data):
self._lm[f] = n, fl
else:
self._lm = _lazymanifest(data)
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
def __getitem__(self, key):
return self._lm[key][0]
Martin von Zweigbergk
manifest: don't let find() look inside manifestdict...
r24277 def find(self, key):
return self._lm[key]
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 def __len__(self):
return len(self._lm)
def __setitem__(self, key, node):
self._lm[key] = node, self.flags(key, '')
def __contains__(self, key):
return key in self._lm
def __delitem__(self, key):
del self._lm[key]
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 def __iter__(self):
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 return self._lm.__iter__()
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
def iterkeys(self):
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 return self._lm.iterkeys()
def keys(self):
return list(self.iterkeys())
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225
Martin von Zweigbergk
copies: move code into new manifestdict.filesnotin() method...
r24184 def filesnotin(self, m2):
'''Set of files in this manifest that are not in the other'''
Tony Tung
manifest: improve filesnotin performance by using lazymanifest diff...
r29056 diff = self.diff(m2)
files = set(filepath
for filepath, hashflags in diff.iteritems()
if hashflags[1][0] is None)
Martin von Zweigbergk
copies: move code into new manifestdict.filesnotin() method...
r24184 return files
Drew Gottlieb
manifest: add dirs() to manifestdict...
r24322 @propertycache
def _dirs(self):
Drew Gottlieb
util: move dirs() and finddirs() from scmutil to util...
r24635 return util.dirs(self)
Drew Gottlieb
manifest: add dirs() to manifestdict...
r24322
def dirs(self):
return self._dirs
Drew Gottlieb
manifest: add manifestdict.hasdir() method...
r24324 def hasdir(self, dir):
return dir in self._dirs
Martin von Zweigbergk
manifestdict: extract condition for _intersectfiles() and use for walk()...
r24685 def _filesfastpath(self, match):
'''Checks whether we can correctly and quickly iterate over matcher
files instead of over manifest files.'''
files = match.files()
return (len(files) < 100 and (match.isexact() or
Martin von Zweigbergk
manifest: use match.prefix() instead of 'not match.anypats()'...
r25276 (match.prefix() and all(fn in self for fn in files))))
Martin von Zweigbergk
manifestdict: extract condition for _intersectfiles() and use for walk()...
r24685
Drew Gottlieb
manifest: move changectx.walk() to manifests...
r24646 def walk(self, match):
'''Generates matching file names.
Equivalent to manifest.matches(match).iterkeys(), but without creating
an entirely new manifest.
It also reports nonexistent files by marking them bad with match.bad().
'''
Martin von Zweigbergk
manifest.walk: special-case match.always() for speed...
r24683 if match.always():
for f in iter(self):
yield f
return
Drew Gottlieb
manifest: move changectx.walk() to manifests...
r24646 fset = set(match.files())
# avoid the entire walk if we're only looking for specific files
Martin von Zweigbergk
manifestdict: extract condition for _intersectfiles() and use for walk()...
r24685 if self._filesfastpath(match):
Martin von Zweigbergk
manifest.walk: join nested if-conditions...
r24667 for fn in sorted(fset):
yield fn
Martin von Zweigbergk
manifest.walk: use return instead of StopIteration in generator...
r24682 return
Drew Gottlieb
manifest: move changectx.walk() to manifests...
r24646
for fn in self:
if fn in fset:
# specified pattern is the exact name
fset.remove(fn)
if match(fn):
yield fn
# for dirstate.walk, files=['.'] means "walk the whole tree".
# follow that here, too
fset.discard('.')
for fn in sorted(fset):
if not self.hasdir(fn):
match.bad(fn, None)
Martin von Zweigbergk
manifest: add matches() method...
r23305 def matches(self, match):
'''generate a new manifest filtered by the match argument'''
if match.always():
return self.copy()
Martin von Zweigbergk
manifestdict: extract condition for _intersectfiles() and use for walk()...
r24685 if self._filesfastpath(match):
Martin von Zweigbergk
manifestdict: inline _intersectfiles()...
r24666 m = manifestdict()
lm = self._lm
for fn in match.files():
if fn in lm:
m._lm[fn] = lm[fn]
return m
Martin von Zweigbergk
manifest: add matches() method...
r23305
Martin von Zweigbergk
manifestdict: drop empty-string argument when creating empty manifest...
r24700 m = manifestdict()
Martin von Zweigbergk
manifestdict.matches: avoid name 'lm' for a not-lazymanifest
r24664 m._lm = self._lm.filtercopy(match)
return m
Martin von Zweigbergk
manifest: add matches() method...
r23305
Augie Fackler
manifest: add optional recording of clean entries to diff...
r23756 def diff(self, m2, clean=False):
'''Finds changes between the current manifest and m2.
Args:
m2: the manifest to which this manifest should be compared.
clean: if true, include files unchanged between these manifests
with a None value in the returned dictionary.
The result is returned as a dict with filename as key and
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
nodeid in the current/other manifest and fl1/fl2 is the flag
in the current/other manifest. Where the file does not exist,
the nodeid will be None and the flags will be the empty
string.
'''
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 return self._lm.diff(m2._lm, clean)
def setflag(self, key, flag):
self._lm[key] = self[key], flag
def get(self, key, default=None):
try:
return self._lm[key][0]
except KeyError:
return default
Martin von Zweigbergk
manifest: for diff(), only iterate over files, not flags...
r22965
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 def flags(self, key, default=''):
try:
return self._lm[key][1]
except KeyError:
return default
Martin von Zweigbergk
manifest: for diff(), only iterate over files, not flags...
r22965
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 def copy(self):
Martin von Zweigbergk
manifestdict: drop empty-string argument when creating empty manifest...
r24700 c = manifestdict()
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 c._lm = self._lm.copy()
return c
def iteritems(self):
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 return (x[:2] for x in self._lm.iterentries())
Matt Mackall
Add manifestflags class
r2831
Martin von Zweigbergk
verify: check directory manifests...
r28203 def iterentries(self):
return self._lm.iterentries()
Martin von Zweigbergk
manifestv2: add support for writing new manifest format...
r24573 def text(self, usemanifestv2=False):
if usemanifestv2:
return _textv2(self._lm.iterentries())
else:
# use (probably) native version for v1
return self._lm.text()
Augie Fackler
manifest: move checkforbidden to module-level...
r22408
Augie Fackler
manifest: add fastdelta method to manifestdict...
r22931 def fastdelta(self, base, changes):
"""Given a base manifest text as an array.array and a list of changes
relative to that text, compute a delta that can be used by revlog.
"""
delta = []
dstart = None
dend = None
dline = [""]
start = 0
# zero copy representation of base as a buffer
addbuf = util.buffer(base)
Durham Goode
manifest: skip fastdelta if the change is large...
r26871 changes = list(changes)
if len(changes) < 1000:
# start with a readonly loop that finds the offset of
# each line and creates the deltas
for f, todelete in changes:
# bs will either be the index of the item or the insert point
start, end = _msearch(addbuf, f, start)
if not todelete:
h, fl = self._lm[f]
l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
else:
if start == end:
# item we want to delete was not found, error out
raise AssertionError(
_("failed to remove %s from manifest") % f)
l = ""
if dstart is not None and dstart <= start and dend >= start:
if dend < end:
dend = end
if l:
dline.append(l)
else:
if dstart is not None:
delta.append([dstart, dend, "".join(dline)])
dstart = start
Augie Fackler
manifest: add fastdelta method to manifestdict...
r22931 dend = end
Durham Goode
manifest: skip fastdelta if the change is large...
r26871 dline = [l]
Augie Fackler
manifest: add fastdelta method to manifestdict...
r22931
Durham Goode
manifest: skip fastdelta if the change is large...
r26871 if dstart is not None:
delta.append([dstart, dend, "".join(dline)])
# apply the delta to the base, and get a delta for addrevision
deltatext, arraytext = _addlistdelta(base, delta)
else:
# For large changes, it's much cheaper to just build the text and
# diff it.
arraytext = array.array('c', self.text())
deltatext = mdiff.textdiff(base, arraytext)
Augie Fackler
manifest: add fastdelta method to manifestdict...
r22931 return arraytext, deltatext
Augie Fackler
manifest: move _search to module level and rename to _msearch...
r22930 def _msearch(m, s, lo=0, hi=None):
'''return a tuple (start, end) that says where to find s within m.
If the string is found m[start:end] are the line containing
that string. If start == end the string was not found and
they indicate the proper sorted insertion point.
m should be a buffer or a string
s is a string'''
def advance(i, c):
while i < lenm and m[i] != c:
i += 1
return i
if not s:
return (lo, lo)
lenm = len(m)
if not hi:
hi = lenm
while lo < hi:
mid = (lo + hi) // 2
start = mid
while start > 0 and m[start - 1] != '\n':
start -= 1
end = advance(start, '\0')
if m[start:end] < s:
# we know that after the null there are 40 bytes of sha1
# this translates to the bisect lo = mid + 1
lo = advance(end + 40, '\n') + 1
else:
# this translates to the bisect hi = mid
hi = start
end = advance(lo, '\0')
found = m[lo:end]
if s == found:
# we know that after the null there are 40 bytes of sha1
end = advance(end + 40, '\n')
return (lo, end + 1)
else:
return (lo, lo)
Augie Fackler
manifest: mark addlistdelta and checkforbidden as module-private
r22415 def _checkforbidden(l):
Augie Fackler
manifest: move checkforbidden to module-level...
r22408 """Check filenames for illegal characters."""
for f in l:
if '\n' in f or '\r' in f:
raise error.RevlogError(
_("'\\n' and '\\r' disallowed in filenames: %r") % f)
Augie Fackler
manifest: move addlistdelta to module-level...
r22409 # apply the changes collected during the bisect loop to our addlist
# return a delta suitable for addrevision
Augie Fackler
manifest: mark addlistdelta and checkforbidden as module-private
r22415 def _addlistdelta(addlist, x):
Augie Fackler
manifest: move addlistdelta to module-level...
r22409 # for large addlist arrays, building a new array is cheaper
# than repeatedly modifying the existing one
currentposition = 0
newaddlist = array.array('c')
for start, end, content in x:
newaddlist += addlist[currentposition:start]
if content:
newaddlist += array.array('c', content)
currentposition = end
newaddlist += addlist[currentposition:]
deltatext = "".join(struct.pack(">lll", start, end, len(content))
+ content for start, end, content in x)
return deltatext, newaddlist
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 def _splittopdir(f):
if '/' in f:
dir, subpath = f.split('/', 1)
return dir + '/', subpath
else:
return '', f
Augie Fackler
treemanifest: rework lazy-copying code (issue4840)...
r26402 _noop = lambda s: None
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 class treemanifest(object):
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 def __init__(self, dir='', text=''):
self._dir = dir
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 self._node = revlog.nullid
Augie Fackler
treemanifest: rework lazy-copying code (issue4840)...
r26402 self._loadfunc = _noop
self._copyfunc = _noop
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 self._dirty = False
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 self._dirs = {}
# Using _lazymanifest here is a little slower than plain old dicts
self._files = {}
self._flags = {}
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 if text:
def readsubtree(subdir, subm):
raise AssertionError('treemanifest constructor only accepts '
'flat manifests')
self.parse(text, readsubtree)
self._dirty = True # Mark flat manifest dirty after parsing
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 def _subpath(self, path):
return self._dir + path
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 def __len__(self):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 size = len(self._files)
for m in self._dirs.values():
size += m.__len__()
return size
Drew Gottlieb
treemanifest: add treemanifest._isempty()...
r24551 def _isempty(self):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load() # for consistency; already loaded by all callers
Drew Gottlieb
treemanifest: add treemanifest._isempty()...
r24551 return (not self._files and (not self._dirs or
Augie Fackler
cleanup: use __builtins__.all instead of util.all
r25151 all(m._isempty() for m in self._dirs.values())))
Drew Gottlieb
treemanifest: add treemanifest._isempty()...
r24551
Augie Fackler
manifest: add id(self) to treemanifest __repr__...
r26400 def __repr__(self):
return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 (self._dir, revlog.hex(self._node),
Augie Fackler
treemanifest: rework lazy-copying code (issue4840)...
r26402 bool(self._loadfunc is _noop),
Augie Fackler
manifest: add id(self) to treemanifest __repr__...
r26400 self._dirty, id(self)))
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091
def dir(self):
'''The directory that this tree manifest represents, including a
trailing '/'. Empty string for the repo root directory.'''
return self._dir
def node(self):
'''This node of this instance. nullid for unsaved instances. Should
be updated when the instance is read or written from a revlog.
'''
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 assert not self._dirty
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 return self._node
def setnode(self, node):
self._node = node
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 self._dirty = False
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403
Martin von Zweigbergk
treemanifest: implement iterentries()...
r28206 def iterentries(self):
self._load()
for p, n in sorted(self._dirs.items() + self._files.items()):
if p in self._files:
yield self._subpath(p), n, self._flags.get(p, '')
else:
for x in n.iterentries():
yield x
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 def iteritems(self):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 for p, n in sorted(self._dirs.items() + self._files.items()):
if p in self._files:
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 yield self._subpath(p), n
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 else:
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 for f, sn in n.iteritems():
yield f, sn
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
def iterkeys(self):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 for p in sorted(self._dirs.keys() + self._files.keys()):
if p in self._files:
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 yield self._subpath(p)
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 else:
for f in self._dirs[p].iterkeys():
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 yield f
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
def keys(self):
return list(self.iterkeys())
def __iter__(self):
return self.iterkeys()
def __contains__(self, f):
if f is None:
return False
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
return False
return self._dirs[dir].__contains__(subpath)
else:
return f in self._files
def get(self, f, default=None):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
return default
return self._dirs[dir].get(subpath, default)
else:
return self._files.get(f, default)
def __getitem__(self, f):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
return self._dirs[dir].__getitem__(subpath)
else:
return self._files[f]
def flags(self, f):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
return ''
return self._dirs[dir].flags(subpath)
else:
if f in self._dirs:
return ''
return self._flags.get(f, '')
def find(self, f):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
return self._dirs[dir].find(subpath)
else:
return self._files[f], self._flags.get(f, '')
def __delitem__(self, f):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
self._dirs[dir].__delitem__(subpath)
# If the directory is now empty, remove it
Drew Gottlieb
treemanifest: add treemanifest._isempty()...
r24551 if self._dirs[dir]._isempty():
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 del self._dirs[dir]
else:
del self._files[f]
if f in self._flags:
del self._flags[f]
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 self._dirty = True
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
def __setitem__(self, f, n):
assert n is not None
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 self._dirs[dir] = treemanifest(self._subpath(dir))
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 self._dirs[dir].__setitem__(subpath, n)
else:
Martin von Zweigbergk
treemanifest: drop 22nd byte for consistency with manifestdict...
r24467 self._files[f] = n[:21] # to match manifestdict's behavior
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 self._dirty = True
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Augie Fackler
treemanifest: rework lazy-copying code (issue4840)...
r26402 def _load(self):
if self._loadfunc is not _noop:
lf, self._loadfunc = self._loadfunc, _noop
lf(self)
elif self._copyfunc is not _noop:
cf, self._copyfunc = self._copyfunc, _noop
cf(self)
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 def setflag(self, f, flags):
"""Set the flags (symlink, executable) for path f."""
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 self._dirs[dir] = treemanifest(self._subpath(dir))
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 self._dirs[dir].setflag(subpath, flags)
else:
self._flags[f] = flags
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 self._dirty = True
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
def copy(self):
Martin von Zweigbergk
treemanifest: store directory path in treemanifest nodes...
r24403 copy = treemanifest(self._dir)
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 copy._node = self._node
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 copy._dirty = self._dirty
Augie Fackler
treemanifest: rework lazy-copying code (issue4840)...
r26402 if self._copyfunc is _noop:
def _copyfunc(s):
self._load()
for d in self._dirs:
s._dirs[d] = self._dirs[d].copy()
s._files = dict.copy(self._files)
s._flags = dict.copy(self._flags)
if self._loadfunc is _noop:
_copyfunc(copy)
else:
copy._copyfunc = _copyfunc
else:
copy._copyfunc = self._copyfunc
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 return copy
def filesnotin(self, m2):
'''Set of files in this manifest that are not in the other'''
Martin von Zweigbergk
treemanifest: make filesnotin() faster...
r24405 files = set()
def _filesnotin(t1, t2):
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 if t1._node == t2._node and not t1._dirty and not t2._dirty:
return
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 t1._load()
t2._load()
Martin von Zweigbergk
treemanifest: make filesnotin() faster...
r24405 for d, m1 in t1._dirs.iteritems():
if d in t2._dirs:
m2 = t2._dirs[d]
_filesnotin(m1, m2)
else:
files.update(m1.iterkeys())
for fn in t1._files.iterkeys():
if fn not in t2._files:
files.add(t1._subpath(fn))
_filesnotin(self, m2)
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 return files
@propertycache
def _alldirs(self):
Drew Gottlieb
util: move dirs() and finddirs() from scmutil to util...
r24635 return util.dirs(self)
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
def dirs(self):
return self._alldirs
def hasdir(self, dir):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: make hasdir() faster...
r24406 topdir, subdir = _splittopdir(dir)
if topdir:
if topdir in self._dirs:
return self._dirs[topdir].hasdir(subdir)
return False
return (dir + '/') in self._dirs
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Drew Gottlieb
manifest: move changectx.walk() to manifests...
r24646 def walk(self, match):
'''Generates matching file names.
Equivalent to manifest.matches(match).iterkeys(), but without creating
an entirely new manifest.
It also reports nonexistent files by marking them bad with match.bad().
'''
Martin von Zweigbergk
manifest.walk: special-case match.always() for speed...
r24683 if match.always():
for f in iter(self):
yield f
return
Drew Gottlieb
manifest: move changectx.walk() to manifests...
r24646 fset = set(match.files())
Drew Gottlieb
treemanifest: refactor treemanifest.walk()...
r24647 for fn in self._walk(match):
Drew Gottlieb
manifest: move changectx.walk() to manifests...
r24646 if fn in fset:
# specified pattern is the exact name
fset.remove(fn)
Drew Gottlieb
treemanifest: refactor treemanifest.walk()...
r24647 yield fn
Drew Gottlieb
manifest: move changectx.walk() to manifests...
r24646
# for dirstate.walk, files=['.'] means "walk the whole tree".
# follow that here, too
fset.discard('.')
for fn in sorted(fset):
if not self.hasdir(fn):
match.bad(fn, None)
Drew Gottlieb
match: remove unnecessary optimization where visitdir() returns 'all'...
r25188 def _walk(self, match):
'''Recursively generates matching file names for walk().'''
if not match.visitdir(self._dir[:-1] or '.'):
return
Drew Gottlieb
treemanifest: refactor treemanifest.walk()...
r24647
# yield this dir's files and walk its submanifests
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Drew Gottlieb
treemanifest: refactor treemanifest.walk()...
r24647 for p in sorted(self._dirs.keys() + self._files.keys()):
if p in self._files:
fullp = self._subpath(p)
if match(fullp):
yield fullp
else:
Drew Gottlieb
match: remove unnecessary optimization where visitdir() returns 'all'...
r25188 for f in self._dirs[p]._walk(match):
Drew Gottlieb
treemanifest: refactor treemanifest.walk()...
r24647 yield f
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 def matches(self, match):
'''generate a new manifest filtered by the match argument'''
if match.always():
return self.copy()
Drew Gottlieb
treemanifest: make treemanifest.matches() faster...
r24552 return self._matches(match)
Drew Gottlieb
match: remove unnecessary optimization where visitdir() returns 'all'...
r25188 def _matches(self, match):
Drew Gottlieb
treemanifest: make treemanifest.matches() faster...
r24552 '''recursively generate a new manifest filtered by the match argument.
Drew Gottlieb
match: remove unnecessary optimization where visitdir() returns 'all'...
r25188 '''
Martin von Zweigbergk
treemanifest: don't iterate entire matching submanifests on match()...
r27343
visit = match.visitdir(self._dir[:-1] or '.')
if visit == 'all':
return self.copy()
Drew Gottlieb
treemanifest: make treemanifest.matches() faster...
r24552 ret = treemanifest(self._dir)
Martin von Zweigbergk
treemanifest: don't iterate entire matching submanifests on match()...
r27343 if not visit:
Drew Gottlieb
match: remove unnecessary optimization where visitdir() returns 'all'...
r25188 return ret
Drew Gottlieb
treemanifest: make treemanifest.matches() faster...
r24552
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Drew Gottlieb
treemanifest: make treemanifest.matches() faster...
r24552 for fn in self._files:
fullp = self._subpath(fn)
if not match(fullp):
continue
ret._files[fn] = self._files[fn]
if fn in self._flags:
ret._flags[fn] = self._flags[fn]
for dir, subm in self._dirs.iteritems():
Drew Gottlieb
match: remove unnecessary optimization where visitdir() returns 'all'...
r25188 m = subm._matches(match)
Drew Gottlieb
treemanifest: make treemanifest.matches() faster...
r24552 if not m._isempty():
ret._dirs[dir] = m
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 if not ret._isempty():
ret._dirty = True
Drew Gottlieb
treemanifest: make treemanifest.matches() faster...
r24552 return ret
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
def diff(self, m2, clean=False):
'''Finds changes between the current manifest and m2.
Args:
m2: the manifest to which this manifest should be compared.
clean: if true, include files unchanged between these manifests
with a None value in the returned dictionary.
The result is returned as a dict with filename as key and
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
nodeid in the current/other manifest and fl1/fl2 is the flag
in the current/other manifest. Where the file does not exist,
the nodeid will be None and the flags will be the empty
string.
'''
Martin von Zweigbergk
treemanifest: make diff() faster...
r24404 result = {}
emptytree = treemanifest()
def _diff(t1, t2):
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 if t1._node == t2._node and not t1._dirty and not t2._dirty:
return
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 t1._load()
t2._load()
Martin von Zweigbergk
treemanifest: make diff() faster...
r24404 for d, m1 in t1._dirs.iteritems():
m2 = t2._dirs.get(d, emptytree)
_diff(m1, m2)
for d, m2 in t2._dirs.iteritems():
if d not in t1._dirs:
_diff(emptytree, m2)
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Martin von Zweigbergk
treemanifest: make diff() faster...
r24404 for fn, n1 in t1._files.iteritems():
fl1 = t1._flags.get(fn, '')
n2 = t2._files.get(fn, None)
fl2 = t2._flags.get(fn, '')
if n1 != n2 or fl1 != fl2:
result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
elif clean:
result[t1._subpath(fn)] = None
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Martin von Zweigbergk
treemanifest: make diff() faster...
r24404 for fn, n2 in t2._files.iteritems():
if fn not in t1._files:
fl2 = t2._flags.get(fn, '')
result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Martin von Zweigbergk
treemanifest: make diff() faster...
r24404 _diff(self, m2)
return result
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Martin von Zweigbergk
treemanifest: speed up commit using dirty flag...
r25221 def unmodifiedsince(self, m2):
return not self._dirty and not m2._dirty and self._node == m2._node
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 def parse(self, text, readsubtree):
Martin von Zweigbergk
treemanifest: extract parse method from constructor...
r24781 for f, n, fl in _parse(text):
Martin von Zweigbergk
manifest: use 't' for tree manifest flag...
r27271 if fl == 't':
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 f = f + '/'
self._dirs[f] = readsubtree(self._subpath(f), n)
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 elif '/' in f:
# This is a flat manifest, so use __setitem__ and setflag rather
# than assigning directly to _files and _flags, so we can
# assign a path in a subdirectory, and to mark dirty (compared
# to nullid).
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 self[f] = n
if fl:
self.setflag(f, fl)
Martin von Zweigbergk
treemanifest: speed up diff by keeping track of dirty nodes...
r25220 else:
# Assigning to _files and _flags avoids marking as dirty,
# and should be a little faster.
self._files[f] = n
if fl:
self._flags[f] = fl
Martin von Zweigbergk
treemanifest: extract parse method from constructor...
r24781
Martin von Zweigbergk
manifestv2: add support for writing new manifest format...
r24573 def text(self, usemanifestv2=False):
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401 """Get the full data of this manifest as a bytestring."""
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: rewrite text() using iterentries()...
r28207 return _text(self.iterentries(), usemanifestv2)
Martin von Zweigbergk
treemanifest: create treemanifest class...
r24401
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 def dirtext(self, usemanifestv2=False):
"""Get the full data of this directory as a bytestring. Make sure that
any submanifests have been written first, so their nodeids are correct.
"""
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load()
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 flags = self.flags
Martin von Zweigbergk
manifest: use 't' for tree manifest flag...
r27271 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 files = [(f, self._files[f], flags(f)) for f in self._files]
return _text(sorted(dirs + files), usemanifestv2)
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 def read(self, gettext, readsubtree):
Augie Fackler
treemanifest: rework lazy-copying code (issue4840)...
r26402 def _load_for_read(s):
s.parse(gettext(), readsubtree)
s._dirty = False
self._loadfunc = _load_for_read
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 def writesubtrees(self, m1, m2, writesubtree):
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 self._load() # for consistency; should never have any effect here
Durham Goode
manifest: call m1.load and m2.load before writing a subtree...
r29888 m1._load()
m2._load()
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 emptytree = treemanifest()
for d, subm in self._dirs.iteritems():
subp1 = m1._dirs.get(d, emptytree)._node
subp2 = m2._dirs.get(d, emptytree)._node
if subp1 == revlog.nullid:
subp1, subp2 = subp2, subp1
writesubtree(subm, subp1, subp2)
Durham Goode
manifest: make manifest derive from manifestrevlog...
r29824 class manifestrevlog(revlog.revlog):
'''A revlog that stores manifest texts. This is responsible for caching the
full-text manifest contents.
'''
Durham Goode
manifest: move dirlog up to manifestrevlog...
r29941 def __init__(self, opener, dir='', dirlogcache=None):
Durham Goode
manifest: make manifest derive from manifestrevlog...
r29824 # During normal operations, we expect to deal with not more than four
# revs at a time (such as during commit --amend). When rebasing large
# stacks of commits, the number can go up, hence the config knob below.
cachesize = 4
Durham Goode
manifest: move revlog specific options from manifest to manifestrevlog...
r29940 usetreemanifest = False
usemanifestv2 = False
Durham Goode
manifest: make manifest derive from manifestrevlog...
r29824 opts = getattr(opener, 'options', None)
if opts is not None:
cachesize = opts.get('manifestcachesize', cachesize)
Durham Goode
manifest: move revlog specific options from manifest to manifestrevlog...
r29940 usetreemanifest = opts.get('treemanifest', usetreemanifest)
usemanifestv2 = opts.get('manifestv2', usemanifestv2)
self._treeondisk = usetreemanifest
self._usemanifestv2 = usemanifestv2
Durham Goode
manifest: make manifest derive from manifestrevlog...
r29824 self._fulltextcache = util.lrucachedict(cachesize)
Durham Goode
manifest: move revlog specific options from manifest to manifestrevlog...
r29940 indexfile = "00manifest.i"
if dir:
assert self._treeondisk, 'opts is %r' % opts
if not dir.endswith('/'):
dir = dir + '/'
indexfile = "meta/" + dir + "00manifest.i"
self._dir = dir
Durham Goode
manifest: move dirlog up to manifestrevlog...
r29941 # The dirlogcache is kept on the root manifest log
if dir:
self._dirlogcache = dirlogcache
else:
self._dirlogcache = {'': self}
Durham Goode
manifest: move revlog specific options from manifest to manifestrevlog...
r29940
FUJIWARA Katsunori
manifest: specify checkambig=True to revlog.__init__, to avoid ambiguity...
r29998 super(manifestrevlog, self).__init__(opener, indexfile,
checkambig=bool(dir))
Durham Goode
manifest: move revlog specific options from manifest to manifestrevlog...
r29940
Durham Goode
manifest: make manifest derive from manifestrevlog...
r29824 @property
def fulltextcache(self):
return self._fulltextcache
def clearcaches(self):
super(manifestrevlog, self).clearcaches()
self._fulltextcache.clear()
Durham Goode
manifest: move dirlog up to manifestrevlog...
r29941 self._dirlogcache = {'': self}
def dirlog(self, dir):
if dir:
assert self._treeondisk
if dir not in self._dirlogcache:
self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
self._dirlogcache)
return self._dirlogcache[dir]
Durham Goode
manifest: make manifest derive from manifestrevlog...
r29824
Durham Goode
manifest: move manifest.add onto manifestrevlog...
r29961 def add(self, m, transaction, link, p1, p2, added, removed):
if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
and not self._usemanifestv2):
# If our first parent is in the manifest cache, we can
# compute a delta here using properties we know about the
# manifest up-front, which may save time later for the
# revlog layer.
_checkforbidden(added)
# combine the changed lists into one sorted iterator
work = heapq.merge([(x, False) for x in added],
[(x, True) for x in removed])
arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
cachedelta = self.rev(p1), deltatext
text = util.buffer(arraytext)
n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
else:
# The first parent manifest isn't already loaded, so we'll
# just encode a fulltext of the manifest and pass that
# through to the revlog layer, and let it handle the delta
# process.
if self._treeondisk:
m1 = self.read(p1)
m2 = self.read(p2)
n = self._addtree(m, transaction, link, m1, m2)
arraytext = None
else:
text = m.text(self._usemanifestv2)
n = self.addrevision(text, transaction, link, p1, p2)
arraytext = array.array('c', text)
Martin von Zweigbergk
manifest: don't store None in fulltextcache...
r30209 if arraytext is not None:
self.fulltextcache[n] = arraytext
Durham Goode
manifest: move manifest.add onto manifestrevlog...
r29961
return n
def _addtree(self, m, transaction, link, m1, m2):
# If the manifest is unchanged compared to one parent,
# don't write a new revision
if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
return m.node()
def writesubtree(subm, subp1, subp2):
sublog = self.dirlog(subm.dir())
sublog.add(subm, transaction, link, subp1, subp2, None, None)
m.writesubtrees(m1, m2, writesubtree)
text = m.dirtext(self._usemanifestv2)
# Double-check whether contents are unchanged to one parent
if text == m1.dirtext(self._usemanifestv2):
n = m1.node()
elif text == m2.dirtext(self._usemanifestv2):
n = m2.node()
else:
n = self.addrevision(text, transaction, link, m1.node(), m2.node())
# Save nodeid so parent manifest can calculate its nodeid
m.setnode(n)
return n
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825 class manifestlog(object):
"""A collection class representing the collection of manifest snapshots
referenced by commits in the repository.
In this situation, 'manifest' refers to the abstract concept of a snapshot
of the list of files in the given commit. Consumers of the output of this
class do not care about the implementation details of the actual manifests
they receive (i.e. tree or flat or lazily loaded, etc)."""
Durham Goode
manifest: use property instead of field for manifest revlog storage...
r29826 def __init__(self, opener, repo):
self._repo = repo
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825
Durham Goode
manifest: move treeinmem onto manifestlog...
r29959 usetreemanifest = False
opts = getattr(opener, 'options', None)
if opts is not None:
usetreemanifest = opts.get('treemanifest', usetreemanifest)
self._treeinmem = usetreemanifest
Durham Goode
manifest: make manifestlog a storecache...
r30219 self._oldmanifest = repo._constructmanifest()
self._revlog = self._oldmanifest
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825 # We'll separate this into it's own cache once oldmanifest is no longer
# used
Durham Goode
manifest: make manifestlog a storecache...
r30219 self._mancache = self._oldmanifest._mancache
Durham Goode
manifest: use property instead of field for manifest revlog storage...
r29826
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825 def __getitem__(self, node):
"""Retrieves the manifest instance for the given node. Throws a KeyError
if not found.
"""
if node in self._mancache:
cachemf = self._mancache[node]
# The old manifest may put non-ctx manifests in the cache, so skip
# those since they don't implement the full api.
Durham Goode
manifest: add treemanifestctx class...
r29907 if (isinstance(cachemf, manifestctx) or
isinstance(cachemf, treemanifestctx)):
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825 return cachemf
Durham Goode
manifest: move treeinmem onto manifestlog...
r29959 if self._treeinmem:
Durham Goode
manifest: add treemanifestctx class...
r29907 m = treemanifestctx(self._revlog, '', node)
else:
m = manifestctx(self._revlog, node)
if node != revlog.nullid:
self._mancache[node] = m
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825 return m
Durham Goode
manifest: add manifestlog.add...
r29962 def add(self, m, transaction, link, p1, p2, added, removed):
return self._revlog.add(m, transaction, link, p1, p2, added, removed)
Durham Goode
manifest: change manifestctx to not inherit from manifestdict...
r29926 class manifestctx(object):
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825 """A class representing a single revision of a manifest, including its
contents, its parent revs, and its linkrev.
"""
def __init__(self, revlog, node):
self._revlog = revlog
Durham Goode
manifest: change manifestctx to not inherit from manifestdict...
r29926 self._data = None
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825
self._node = node
Durham Goode
manifest: add treemanifestctx class...
r29907
# TODO: We eventually want p1, p2, and linkrev exposed on this class,
# but let's add it later when something needs it and we can load it
# lazily.
#self.p1, self.p2 = revlog.parents(node)
#rev = revlog.rev(node)
#self.linkrev = revlog.linkrev(rev)
Durham Goode
manifest: introduce manifestlog and manifestctx classes...
r29825
def node(self):
return self._node
Durham Goode
manifest: change manifestctx to not inherit from manifestdict...
r29926 def read(self):
if not self._data:
if self._node == revlog.nullid:
self._data = manifestdict()
else:
text = self._revlog.revision(self._node)
arraytext = array.array('c', text)
self._revlog._fulltextcache[self._node] = arraytext
self._data = manifestdict(text)
return self._data
Durham Goode
manifest: adds manifestctx.readfast...
r29939 def readfast(self):
rl = self._revlog
r = rl.rev(self._node)
deltaparent = rl.deltaparent(r)
if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
return self.readdelta()
return self.read()
Durham Goode
manifest: add manifestctx.readdelta()...
r29938 def readdelta(self):
revlog = self._revlog
if revlog._usemanifestv2:
# Need to perform a slow delta
r0 = revlog.deltaparent(revlog.rev(self._node))
m0 = manifestctx(revlog, revlog.node(r0)).read()
m1 = self.read()
md = manifestdict()
for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
if n1:
md[f] = n1
if fl1:
md.setflag(f, fl1)
return md
r = revlog.rev(self._node)
d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
return manifestdict(d)
Durham Goode
manifest: change manifestctx to not inherit from manifestdict...
r29926 class treemanifestctx(object):
Durham Goode
manifest: add treemanifestctx class...
r29907 def __init__(self, revlog, dir, node):
revlog = revlog.dirlog(dir)
self._revlog = revlog
self._dir = dir
Durham Goode
manifest: change manifestctx to not inherit from manifestdict...
r29926 self._data = None
Durham Goode
manifest: add treemanifestctx class...
r29907
self._node = node
# TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
# we can instantiate treemanifestctx objects for directories we don't
# have on disk.
#self.p1, self.p2 = revlog.parents(node)
#rev = revlog.rev(node)
#self.linkrev = revlog.linkrev(rev)
Durham Goode
manifest: change manifestctx to not inherit from manifestdict...
r29926 def read(self):
if not self._data:
if self._node == revlog.nullid:
self._data = treemanifest()
elif self._revlog._treeondisk:
m = treemanifest(dir=self._dir)
def gettext():
return self._revlog.revision(self._node)
def readsubtree(dir, subm):
return treemanifestctx(self._revlog, dir, subm).read()
m.read(gettext, readsubtree)
m.setnode(self._node)
self._data = m
else:
text = self._revlog.revision(self._node)
arraytext = array.array('c', text)
self._revlog.fulltextcache[self._node] = arraytext
self._data = treemanifest(dir=self._dir, text=text)
return self._data
Durham Goode
manifest: add treemanifestctx class...
r29907
def node(self):
return self._node
Durham Goode
manifest: add manifestctx.readdelta()...
r29938 def readdelta(self):
# Need to perform a slow delta
revlog = self._revlog
r0 = revlog.deltaparent(revlog.rev(self._node))
Martin von Zweigbergk
treemanifest: fix bad argument order to treemanifestctx...
r30207 m0 = treemanifestctx(revlog, self._dir, revlog.node(r0)).read()
Durham Goode
manifest: add manifestctx.readdelta()...
r29938 m1 = self.read()
md = treemanifest(dir=self._dir)
for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
if n1:
md[f] = n1
if fl1:
md.setflag(f, fl1)
return md
Durham Goode
manifest: adds manifestctx.readfast...
r29939 def readfast(self):
rl = self._revlog
r = rl.rev(self._node)
deltaparent = rl.deltaparent(r)
if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
return self.readdelta()
return self.read()
Durham Goode
manifest: make manifest derive from manifestrevlog...
r29824 class manifest(manifestrevlog):
Martin von Zweigbergk
treemanifest: cache directory logs and manifests...
r25185 def __init__(self, opener, dir='', dirlogcache=None):
'''The 'dir' and 'dirlogcache' arguments are for internal use by
manifest.manifest only. External users should create a root manifest
log with manifest.manifest(opener) and call dirlog() on it.
'''
Durham Goode
manifest: make lru size configurable...
r24033 # During normal operations, we expect to deal with not more than four
# revs at a time (such as during commit --amend). When rebasing large
# stacks of commits, the number can go up, hence the config knob below.
cachesize = 4
Martin von Zweigbergk
treemanifest: add configuration for using treemanifest type...
r24402 usetreemanifest = False
Durham Goode
manifest: make lru size configurable...
r24033 opts = getattr(opener, 'options', None)
if opts is not None:
cachesize = opts.get('manifestcachesize', cachesize)
Martin von Zweigbergk
treemanifest: set requires at repo creation time, ignore config after...
r24956 usetreemanifest = opts.get('treemanifest', usetreemanifest)
Durham Goode
manifest: make lru size configurable...
r24033 self._mancache = util.lrucachedict(cachesize)
Martin von Zweigbergk
treemanifest: separate flags for trees in memory and trees on disk...
r24701 self._treeinmem = usetreemanifest
Durham Goode
manifest: move dirlog up to manifestrevlog...
r29941 super(manifest, self).__init__(opener, dir=dir, dirlogcache=dirlogcache)
Martin von Zweigbergk
treemanifest: add configuration for using treemanifest type...
r24402
def _newmanifest(self, data=''):
Martin von Zweigbergk
treemanifest: separate flags for trees in memory and trees on disk...
r24701 if self._treeinmem:
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 return treemanifest(self._dir, data)
Martin von Zweigbergk
treemanifest: add configuration for using treemanifest type...
r24402 return manifestdict(data)
mpm@selenic.com
Break apart hg.py...
r1089
Martin von Zweigbergk
treemanifest: cache directory logs and manifests...
r25185 def dirlog(self, dir):
Durham Goode
manifest: move dirlog up to manifestrevlog...
r29941 """This overrides the base revlog implementation to allow construction
'manifest' types instead of manifestrevlog types. This is only needed
until we migrate off the 'manifest' type."""
Martin von Zweigbergk
verify: check directory manifests...
r28203 if dir:
assert self._treeondisk
Martin von Zweigbergk
treemanifest: cache directory logs and manifests...
r25185 if dir not in self._dirlogcache:
self._dirlogcache[dir] = manifest(self.opener, dir,
self._dirlogcache)
return self._dirlogcache[dir]
Martin von Zweigbergk
manifestv2: implement slow readdelta() without revdiff...
r24528 def _slowreaddelta(self, node):
r0 = self.deltaparent(self.rev(node))
m0 = self.read(self.node(r0))
m1 = self.read(node)
md = self._newmanifest()
for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
if n1:
md[f] = n1
if fl1:
md.setflag(f, fl1)
return md
Brendan Cully
Abstract manifest block parsing.
r3196 def readdelta(self, node):
Martin von Zweigbergk
treemanifest: separate flags for trees in memory and trees on disk...
r24701 if self._usemanifestv2 or self._treeondisk:
Martin von Zweigbergk
manifestv2: implement slow readdelta() without revdiff...
r24528 return self._slowreaddelta(node)
Matt Mackall
revlog: remove delta function
r7362 r = self.rev(node)
Augie Fackler
manifest: do parsing inside manifestdict contstructor...
r24224 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
Martin von Zweigbergk
treemanifest: add configuration for using treemanifest type...
r24402 return self._newmanifest(d)
Thomas Arendsen Hein
Whitespace/Tab cleanup
r3223
Martin von Zweigbergk
verify: check directory manifests...
r28203 def readshallowdelta(self, node):
'''For flat manifests, this is the same as readdelta(). For
treemanifests, this will read the delta for this revlog's directory,
without recursively reading subdirectory manifests. Instead, any
subdirectory entry will be reported as it appears in the manifests, i.e.
the subdirectory will be reported among files and distinguished only by
its 't' flag.'''
if not self._treeondisk:
return self.readdelta(node)
if self._usemanifestv2:
raise error.Abort(
liscju
i18n: translate abort messages...
r29389 _("readshallowdelta() not implemented for manifestv2"))
Martin von Zweigbergk
verify: check directory manifests...
r28203 r = self.rev(node)
d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
return manifestdict(d)
Martin von Zweigbergk
changegroup: fix treemanifests on merges...
r28240 def readshallowfast(self, node):
'''like readfast(), but calls readshallowdelta() instead of readdelta()
'''
r = self.rev(node)
deltaparent = self.deltaparent(r)
if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
return self.readshallowdelta(node)
return self.readshallow(node)
mpm@selenic.com
Break apart hg.py...
r1089 def read(self, node):
Matt Mackall
revlog: kill from-style imports...
r7634 if node == revlog.nullid:
Martin von Zweigbergk
treemanifest: add configuration for using treemanifest type...
r24402 return self._newmanifest() # don't upset local cache
Siddharth Agarwal
manifest: use a size 3 LRU cache to store parsed manifests...
r18604 if node in self._mancache:
Durham Goode
manifest: change manifestctx to not inherit from manifestdict...
r29926 cached = self._mancache[node]
if (isinstance(cached, manifestctx) or
isinstance(cached, treemanifestctx)):
cached = cached.read()
return cached
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 if self._treeondisk:
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 def gettext():
return self.revision(node)
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 def readsubtree(dir, subm):
Martin von Zweigbergk
treemanifest: cache directory logs and manifests...
r25185 return self.dirlog(dir).read(subm)
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 m = self._newmanifest()
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 m.read(gettext, readsubtree)
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 m.setnode(node)
arraytext = None
else:
Martin von Zweigbergk
treemanifest: lazily load manifests...
r25222 text = self.revision(node)
Martin von Zweigbergk
treemanifest: store submanifest revlog per directory...
r25091 m = self._newmanifest(text)
arraytext = array.array('c', text)
Durham Goode
manifest: break mancache into two caches...
r29823 self._mancache[node] = m
Martin von Zweigbergk
manifest: don't store None in fulltextcache...
r30209 if arraytext is not None:
self.fulltextcache[node] = arraytext
Martin von Zweigbergk
manifest: rename 'mf', 'map', and 'mapping' to 'm'...
r24147 return m
mpm@selenic.com
Break apart hg.py...
r1089
Martin von Zweigbergk
changegroup: fix treemanifests on merges...
r28240 def readshallow(self, node):
'''Reads the manifest in this directory. When using flat manifests,
this manifest will generally have files in subdirectories in it. Does
not cache the manifest as the callers generally do not read the same
version twice.'''
return manifestdict(self.revision(node))
Vadim Gelfer
fix parsing of tags. make parse errors useful. add new tag tests....
r2320 def find(self, node, f):
'''look up entry for a single file efficiently.
Alexis S. L. Carvalho
fix manifest.find
r4159 return (node, flags) pair if found, (None, None) if not.'''
Martin von Zweigbergk
manifest: rewrite find(node, f) in terms of read(node)...
r24292 m = self.read(node)
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 try:
Martin von Zweigbergk
manifest: rewrite find(node, f) in terms of read(node)...
r24292 return m.find(f)
Augie Fackler
manifest: split manifestdict into high-level and low-level logic...
r24225 except KeyError:
Vadim Gelfer
fix parsing of tags. make parse errors useful. add new tag tests....
r2320 return None, None
Gregory Szorc
manifest: implement clearcaches()...
r27466 def clearcaches(self):
super(manifest, self).clearcaches()
self._mancache.clear()