historypack.py
517 lines
| 18.5 KiB
| text/x-python
|
PythonLexer
Augie Fackler
|
r40530 | from __future__ import absolute_import | ||
import hashlib | ||||
import struct | ||||
from mercurial.node import hex, nullid | ||||
from mercurial import ( | ||||
pycompat, | ||||
util, | ||||
) | ||||
from . import ( | ||||
basepack, | ||||
constants, | ||||
shallowutil, | ||||
) | ||||
# (filename hash, offset, size) | ||||
Augie Fackler
|
r40541 | INDEXFORMAT2 = '!20sQQII' | ||
INDEXENTRYLENGTH2 = struct.calcsize(INDEXFORMAT2) | ||||
Augie Fackler
|
r40530 | NODELENGTH = 20 | ||
NODEINDEXFORMAT = '!20sQ' | ||||
NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT) | ||||
# (node, p1, p2, linknode) | ||||
PACKFORMAT = "!20s20s20s20sH" | ||||
PACKENTRYLENGTH = 82 | ||||
ENTRYCOUNTSIZE = 4 | ||||
INDEXSUFFIX = '.histidx' | ||||
PACKSUFFIX = '.histpack' | ||||
ANC_NODE = 0 | ||||
ANC_P1NODE = 1 | ||||
ANC_P2NODE = 2 | ||||
ANC_LINKNODE = 3 | ||||
ANC_COPYFROM = 4 | ||||
class historypackstore(basepack.basepackstore): | ||||
INDEXSUFFIX = INDEXSUFFIX | ||||
PACKSUFFIX = PACKSUFFIX | ||||
def getpack(self, path): | ||||
return historypack(path) | ||||
def getancestors(self, name, node, known=None): | ||||
for pack in self.packs: | ||||
try: | ||||
return pack.getancestors(name, node, known=known) | ||||
except KeyError: | ||||
pass | ||||
for pack in self.refresh(): | ||||
try: | ||||
return pack.getancestors(name, node, known=known) | ||||
except KeyError: | ||||
pass | ||||
raise KeyError((name, node)) | ||||
def getnodeinfo(self, name, node): | ||||
for pack in self.packs: | ||||
try: | ||||
return pack.getnodeinfo(name, node) | ||||
except KeyError: | ||||
pass | ||||
for pack in self.refresh(): | ||||
try: | ||||
return pack.getnodeinfo(name, node) | ||||
except KeyError: | ||||
pass | ||||
raise KeyError((name, node)) | ||||
def add(self, filename, node, p1, p2, linknode, copyfrom): | ||||
raise RuntimeError("cannot add to historypackstore (%s:%s)" | ||||
% (filename, hex(node))) | ||||
class historypack(basepack.basepack): | ||||
INDEXSUFFIX = INDEXSUFFIX | ||||
PACKSUFFIX = PACKSUFFIX | ||||
Augie Fackler
|
r40541 | SUPPORTED_VERSIONS = [2] | ||
Augie Fackler
|
r40530 | |||
def __init__(self, path): | ||||
super(historypack, self).__init__(path) | ||||
Augie Fackler
|
r40541 | self.INDEXFORMAT = INDEXFORMAT2 | ||
self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2 | ||||
Augie Fackler
|
r40530 | |||
def getmissing(self, keys): | ||||
missing = [] | ||||
for name, node in keys: | ||||
try: | ||||
self._findnode(name, node) | ||||
except KeyError: | ||||
missing.append((name, node)) | ||||
return missing | ||||
def getancestors(self, name, node, known=None): | ||||
"""Returns as many ancestors as we're aware of. | ||||
return value: { | ||||
node: (p1, p2, linknode, copyfrom), | ||||
... | ||||
} | ||||
""" | ||||
if known and node in known: | ||||
return [] | ||||
ancestors = self._getancestors(name, node, known=known) | ||||
results = {} | ||||
for ancnode, p1, p2, linknode, copyfrom in ancestors: | ||||
results[ancnode] = (p1, p2, linknode, copyfrom) | ||||
if not results: | ||||
raise KeyError((name, node)) | ||||
return results | ||||
def getnodeinfo(self, name, node): | ||||
# Drop the node from the tuple before returning, since the result should | ||||
# just be (p1, p2, linknode, copyfrom) | ||||
return self._findnode(name, node)[1:] | ||||
def _getancestors(self, name, node, known=None): | ||||
if known is None: | ||||
known = set() | ||||
section = self._findsection(name) | ||||
filename, offset, size, nodeindexoffset, nodeindexsize = section | ||||
pending = set((node,)) | ||||
o = 0 | ||||
while o < size: | ||||
if not pending: | ||||
break | ||||
entry, copyfrom = self._readentry(offset + o) | ||||
o += PACKENTRYLENGTH | ||||
if copyfrom: | ||||
o += len(copyfrom) | ||||
ancnode = entry[ANC_NODE] | ||||
if ancnode in pending: | ||||
pending.remove(ancnode) | ||||
p1node = entry[ANC_P1NODE] | ||||
p2node = entry[ANC_P2NODE] | ||||
if p1node != nullid and p1node not in known: | ||||
pending.add(p1node) | ||||
if p2node != nullid and p2node not in known: | ||||
pending.add(p2node) | ||||
yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom) | ||||
def _readentry(self, offset): | ||||
data = self._data | ||||
entry = struct.unpack(PACKFORMAT, data[offset:offset + PACKENTRYLENGTH]) | ||||
copyfrom = None | ||||
copyfromlen = entry[ANC_COPYFROM] | ||||
if copyfromlen != 0: | ||||
offset += PACKENTRYLENGTH | ||||
copyfrom = data[offset:offset + copyfromlen] | ||||
return entry, copyfrom | ||||
def add(self, filename, node, p1, p2, linknode, copyfrom): | ||||
raise RuntimeError("cannot add to historypack (%s:%s)" % | ||||
(filename, hex(node))) | ||||
def _findnode(self, name, node): | ||||
if self.VERSION == 0: | ||||
ancestors = self._getancestors(name, node) | ||||
for ancnode, p1node, p2node, linknode, copyfrom in ancestors: | ||||
if ancnode == node: | ||||
return (ancnode, p1node, p2node, linknode, copyfrom) | ||||
else: | ||||
section = self._findsection(name) | ||||
nodeindexoffset, nodeindexsize = section[3:] | ||||
entry = self._bisect(node, nodeindexoffset, | ||||
nodeindexoffset + nodeindexsize, | ||||
NODEINDEXENTRYLENGTH) | ||||
if entry is not None: | ||||
node, offset = struct.unpack(NODEINDEXFORMAT, entry) | ||||
entry, copyfrom = self._readentry(offset) | ||||
# Drop the copyfromlen from the end of entry, and replace it | ||||
# with the copyfrom string. | ||||
return entry[:4] + (copyfrom,) | ||||
raise KeyError("unable to find history for %s:%s" % (name, hex(node))) | ||||
def _findsection(self, name): | ||||
params = self.params | ||||
namehash = hashlib.sha1(name).digest() | ||||
fanoutkey = struct.unpack(params.fanoutstruct, | ||||
namehash[:params.fanoutprefix])[0] | ||||
fanout = self._fanouttable | ||||
start = fanout[fanoutkey] + params.indexstart | ||||
indexend = self._indexend | ||||
for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount): | ||||
end = fanout[i] + params.indexstart | ||||
if end != start: | ||||
break | ||||
else: | ||||
end = indexend | ||||
entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH) | ||||
if not entry: | ||||
raise KeyError(name) | ||||
rawentry = struct.unpack(self.INDEXFORMAT, entry) | ||||
Augie Fackler
|
r40541 | x, offset, size, nodeindexoffset, nodeindexsize = rawentry | ||
rawnamelen = self._index[nodeindexoffset:nodeindexoffset + | ||||
constants.FILENAMESIZE] | ||||
actualnamelen = struct.unpack('!H', rawnamelen)[0] | ||||
nodeindexoffset += constants.FILENAMESIZE | ||||
actualname = self._index[nodeindexoffset:nodeindexoffset + | ||||
actualnamelen] | ||||
if actualname != name: | ||||
raise KeyError("found file name %s when looking for %s" % | ||||
(actualname, name)) | ||||
nodeindexoffset += actualnamelen | ||||
Augie Fackler
|
r40530 | |||
filenamelength = struct.unpack('!H', self._data[offset:offset + | ||||
constants.FILENAMESIZE])[0] | ||||
offset += constants.FILENAMESIZE | ||||
actualname = self._data[offset:offset + filenamelength] | ||||
offset += filenamelength | ||||
if name != actualname: | ||||
raise KeyError("found file name %s when looking for %s" % | ||||
(actualname, name)) | ||||
# Skip entry list size | ||||
offset += ENTRYCOUNTSIZE | ||||
nodelistoffset = offset | ||||
nodelistsize = (size - constants.FILENAMESIZE - filenamelength - | ||||
ENTRYCOUNTSIZE) | ||||
return (name, nodelistoffset, nodelistsize, | ||||
nodeindexoffset, nodeindexsize) | ||||
def _bisect(self, node, start, end, entrylen): | ||||
# Bisect between start and end to find node | ||||
origstart = start | ||||
startnode = self._index[start:start + NODELENGTH] | ||||
endnode = self._index[end:end + NODELENGTH] | ||||
if startnode == node: | ||||
return self._index[start:start + entrylen] | ||||
elif endnode == node: | ||||
return self._index[end:end + entrylen] | ||||
else: | ||||
while start < end - entrylen: | ||||
Pulkit Goyal
|
r40766 | mid = start + (end - start) // 2 | ||
Augie Fackler
|
r40530 | mid = mid - ((mid - origstart) % entrylen) | ||
midnode = self._index[mid:mid + NODELENGTH] | ||||
if midnode == node: | ||||
return self._index[mid:mid + entrylen] | ||||
if node > midnode: | ||||
start = mid | ||||
elif node < midnode: | ||||
end = mid | ||||
return None | ||||
def markledger(self, ledger, options=None): | ||||
for filename, node in self: | ||||
ledger.markhistoryentry(self, filename, node) | ||||
def cleanup(self, ledger): | ||||
entries = ledger.sources.get(self, []) | ||||
allkeys = set(self) | ||||
repackedkeys = set((e.filename, e.node) for e in entries if | ||||
e.historyrepacked) | ||||
if len(allkeys - repackedkeys) == 0: | ||||
if self.path not in ledger.created: | ||||
util.unlinkpath(self.indexpath, ignoremissing=True) | ||||
util.unlinkpath(self.packpath, ignoremissing=True) | ||||
def __iter__(self): | ||||
for f, n, x, x, x, x in self.iterentries(): | ||||
yield f, n | ||||
def iterentries(self): | ||||
# Start at 1 to skip the header | ||||
offset = 1 | ||||
while offset < self.datasize: | ||||
data = self._data | ||||
# <2 byte len> + <filename> | ||||
filenamelen = struct.unpack('!H', data[offset:offset + | ||||
constants.FILENAMESIZE])[0] | ||||
offset += constants.FILENAMESIZE | ||||
filename = data[offset:offset + filenamelen] | ||||
offset += filenamelen | ||||
revcount = struct.unpack('!I', data[offset:offset + | ||||
ENTRYCOUNTSIZE])[0] | ||||
offset += ENTRYCOUNTSIZE | ||||
for i in pycompat.xrange(revcount): | ||||
entry = struct.unpack(PACKFORMAT, data[offset:offset + | ||||
PACKENTRYLENGTH]) | ||||
offset += PACKENTRYLENGTH | ||||
copyfrom = data[offset:offset + entry[ANC_COPYFROM]] | ||||
offset += entry[ANC_COPYFROM] | ||||
yield (filename, entry[ANC_NODE], entry[ANC_P1NODE], | ||||
entry[ANC_P2NODE], entry[ANC_LINKNODE], copyfrom) | ||||
self._pagedin += PACKENTRYLENGTH | ||||
# If we've read a lot of data from the mmap, free some memory. | ||||
self.freememory() | ||||
class mutablehistorypack(basepack.mutablebasepack): | ||||
"""A class for constructing and serializing a histpack file and index. | ||||
A history pack is a pair of files that contain the revision history for | ||||
various file revisions in Mercurial. It contains only revision history (like | ||||
parent pointers and linknodes), not any revision content information. | ||||
It consists of two files, with the following format: | ||||
.histpack | ||||
The pack itself is a series of file revisions with some basic header | ||||
information on each. | ||||
datapack = <version: 1 byte> | ||||
[<filesection>,...] | ||||
filesection = <filename len: 2 byte unsigned int> | ||||
<filename> | ||||
<revision count: 4 byte unsigned int> | ||||
[<revision>,...] | ||||
revision = <node: 20 byte> | ||||
<p1node: 20 byte> | ||||
<p2node: 20 byte> | ||||
<linknode: 20 byte> | ||||
<copyfromlen: 2 byte> | ||||
<copyfrom> | ||||
The revisions within each filesection are stored in topological order | ||||
(newest first). If a given entry has a parent from another file (a copy) | ||||
then p1node is the node from the other file, and copyfrom is the | ||||
filepath of the other file. | ||||
.histidx | ||||
The index file provides a mapping from filename to the file section in | ||||
the histpack. In V1 it also contains sub-indexes for specific nodes | ||||
within each file. It consists of three parts, the fanout, the file index | ||||
and the node indexes. | ||||
The file index is a list of index entries, sorted by filename hash (one | ||||
per file section in the pack). Each entry has: | ||||
- node (The 20 byte hash of the filename) | ||||
- pack entry offset (The location of this file section in the histpack) | ||||
- pack content size (The on-disk length of this file section's pack | ||||
data) | ||||
- node index offset (The location of the file's node index in the index | ||||
file) [1] | ||||
- node index size (the on-disk length of this file's node index) [1] | ||||
The fanout is a quick lookup table to reduce the number of steps for | ||||
bisecting the index. It is a series of 4 byte pointers to positions | ||||
within the index. It has 2^16 entries, which corresponds to hash | ||||
prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F | ||||
points to the index position of the first revision whose node starts | ||||
with 4F. This saves log(2^16) bisect steps. | ||||
dataidx = <fanouttable> | ||||
<file count: 8 byte unsigned> [1] | ||||
<fileindex> | ||||
<node count: 8 byte unsigned> [1] | ||||
[<nodeindex>,...] [1] | ||||
fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries) | ||||
fileindex = [<file index entry>,...] | ||||
fileindexentry = <node: 20 byte> | ||||
<pack file section offset: 8 byte unsigned int> | ||||
<pack file section size: 8 byte unsigned int> | ||||
<node index offset: 4 byte unsigned int> [1] | ||||
<node index size: 4 byte unsigned int> [1] | ||||
nodeindex = <filename>[<node index entry>,...] [1] | ||||
filename = <filename len : 2 byte unsigned int><filename value> [1] | ||||
nodeindexentry = <node: 20 byte> [1] | ||||
<pack file node offset: 8 byte unsigned int> [1] | ||||
[1]: new in version 1. | ||||
""" | ||||
INDEXSUFFIX = INDEXSUFFIX | ||||
PACKSUFFIX = PACKSUFFIX | ||||
Augie Fackler
|
r40541 | SUPPORTED_VERSIONS = [2] | ||
Augie Fackler
|
r40530 | |||
Augie Fackler
|
r40541 | def __init__(self, ui, packpath, version=2): | ||
Augie Fackler
|
r40530 | super(mutablehistorypack, self).__init__(ui, packpath, version=version) | ||
self.files = {} | ||||
self.entrylocations = {} | ||||
self.fileentries = {} | ||||
Augie Fackler
|
r40541 | self.INDEXFORMAT = INDEXFORMAT2 | ||
self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2 | ||||
Augie Fackler
|
r40530 | |||
self.NODEINDEXFORMAT = NODEINDEXFORMAT | ||||
self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH | ||||
def add(self, filename, node, p1, p2, linknode, copyfrom): | ||||
copyfrom = copyfrom or '' | ||||
copyfromlen = struct.pack('!H', len(copyfrom)) | ||||
self.fileentries.setdefault(filename, []).append((node, p1, p2, | ||||
linknode, | ||||
copyfromlen, | ||||
copyfrom)) | ||||
def _write(self): | ||||
for filename in sorted(self.fileentries): | ||||
entries = self.fileentries[filename] | ||||
sectionstart = self.packfp.tell() | ||||
# Write the file section content | ||||
entrymap = dict((e[0], e) for e in entries) | ||||
def parentfunc(node): | ||||
x, p1, p2, x, x, x = entrymap[node] | ||||
parents = [] | ||||
if p1 != nullid: | ||||
parents.append(p1) | ||||
if p2 != nullid: | ||||
parents.append(p2) | ||||
return parents | ||||
sortednodes = list(reversed(shallowutil.sortnodes( | ||||
(e[0] for e in entries), | ||||
parentfunc))) | ||||
# Write the file section header | ||||
self.writeraw("%s%s%s" % ( | ||||
struct.pack('!H', len(filename)), | ||||
filename, | ||||
struct.pack('!I', len(sortednodes)), | ||||
)) | ||||
sectionlen = constants.FILENAMESIZE + len(filename) + 4 | ||||
rawstrings = [] | ||||
# Record the node locations for the index | ||||
locations = self.entrylocations.setdefault(filename, {}) | ||||
offset = sectionstart + sectionlen | ||||
for node in sortednodes: | ||||
locations[node] = offset | ||||
raw = '%s%s%s%s%s%s' % entrymap[node] | ||||
rawstrings.append(raw) | ||||
offset += len(raw) | ||||
rawdata = ''.join(rawstrings) | ||||
sectionlen += len(rawdata) | ||||
self.writeraw(rawdata) | ||||
# Record metadata for the index | ||||
self.files[filename] = (sectionstart, sectionlen) | ||||
node = hashlib.sha1(filename).digest() | ||||
self.entries[node] = node | ||||
def close(self, ledger=None): | ||||
if self._closed: | ||||
return | ||||
self._write() | ||||
return super(mutablehistorypack, self).close(ledger=ledger) | ||||
def createindex(self, nodelocations, indexoffset): | ||||
fileindexformat = self.INDEXFORMAT | ||||
fileindexlength = self.INDEXENTRYLENGTH | ||||
nodeindexformat = self.NODEINDEXFORMAT | ||||
nodeindexlength = self.NODEINDEXENTRYLENGTH | ||||
files = ((hashlib.sha1(filename).digest(), filename, offset, size) | ||||
for filename, (offset, size) in self.files.iteritems()) | ||||
files = sorted(files) | ||||
# node index is after file index size, file index, and node index size | ||||
indexlensize = struct.calcsize('!Q') | ||||
nodeindexoffset = (indexoffset + indexlensize + | ||||
(len(files) * fileindexlength) + indexlensize) | ||||
fileindexentries = [] | ||||
nodeindexentries = [] | ||||
nodecount = 0 | ||||
for namehash, filename, offset, size in files: | ||||
# File section index | ||||
Augie Fackler
|
r40541 | nodelocations = self.entrylocations[filename] | ||
Augie Fackler
|
r40530 | |||
Augie Fackler
|
r40541 | nodeindexsize = len(nodelocations) * nodeindexlength | ||
Augie Fackler
|
r40530 | |||
Augie Fackler
|
r40541 | rawentry = struct.pack(fileindexformat, namehash, offset, size, | ||
nodeindexoffset, nodeindexsize) | ||||
# Node index | ||||
nodeindexentries.append(struct.pack(constants.FILENAMESTRUCT, | ||||
len(filename)) + filename) | ||||
nodeindexoffset += constants.FILENAMESIZE + len(filename) | ||||
Augie Fackler
|
r40530 | |||
Augie Fackler
|
r40541 | for node, location in sorted(nodelocations.iteritems()): | ||
nodeindexentries.append(struct.pack(nodeindexformat, node, | ||||
location)) | ||||
nodecount += 1 | ||||
Augie Fackler
|
r40530 | |||
Augie Fackler
|
r40541 | nodeindexoffset += len(nodelocations) * nodeindexlength | ||
Augie Fackler
|
r40530 | |||
fileindexentries.append(rawentry) | ||||
Augie Fackler
|
r40541 | nodecountraw = struct.pack('!Q', nodecount) | ||
Augie Fackler
|
r40530 | return (''.join(fileindexentries) + nodecountraw + | ||
''.join(nodeindexentries)) | ||||