##// END OF EJS Templates
revlog: use bytes() to ensure text from _chunks is a reasonable type
Augie Fackler -
r31357:c233cbda default
parent child Browse files
Show More
@@ -1,2101 +1,2101
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 # Aliased for performance.
42 # Aliased for performance.
43 _zlibdecompress = zlib.decompress
43 _zlibdecompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
57 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
58 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
58 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
59 REVIDX_DEFAULT_FLAGS = 0
59 REVIDX_DEFAULT_FLAGS = 0
60 # stable order in which flags need to be processed and their processors applied
60 # stable order in which flags need to be processed and their processors applied
61 REVIDX_FLAGS_ORDER = [
61 REVIDX_FLAGS_ORDER = [
62 REVIDX_ISCENSORED,
62 REVIDX_ISCENSORED,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 ]
65 ]
66 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
66 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
67
67
68 # max size of revlog with inline data
68 # max size of revlog with inline data
69 _maxinline = 131072
69 _maxinline = 131072
70 _chunksize = 1048576
70 _chunksize = 1048576
71
71
72 RevlogError = error.RevlogError
72 RevlogError = error.RevlogError
73 LookupError = error.LookupError
73 LookupError = error.LookupError
74 CensoredNodeError = error.CensoredNodeError
74 CensoredNodeError = error.CensoredNodeError
75 ProgrammingError = error.ProgrammingError
75 ProgrammingError = error.ProgrammingError
76
76
77 # Store flag processors (cf. 'addflagprocessor()' to register)
77 # Store flag processors (cf. 'addflagprocessor()' to register)
78 _flagprocessors = {
78 _flagprocessors = {
79 REVIDX_ISCENSORED: None,
79 REVIDX_ISCENSORED: None,
80 }
80 }
81
81
82 def addflagprocessor(flag, processor):
82 def addflagprocessor(flag, processor):
83 """Register a flag processor on a revision data flag.
83 """Register a flag processor on a revision data flag.
84
84
85 Invariant:
85 Invariant:
86 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
86 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
87 - Only one flag processor can be registered on a specific flag.
87 - Only one flag processor can be registered on a specific flag.
88 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
88 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
89 following signatures:
89 following signatures:
90 - (read) f(self, text) -> newtext, bool
90 - (read) f(self, text) -> newtext, bool
91 - (write) f(self, text) -> newtext, bool
91 - (write) f(self, text) -> newtext, bool
92 - (raw) f(self, text) -> bool
92 - (raw) f(self, text) -> bool
93 The boolean returned by these transforms is used to determine whether
93 The boolean returned by these transforms is used to determine whether
94 'newtext' can be used for hash integrity checking.
94 'newtext' can be used for hash integrity checking.
95
95
96 Note: The 'raw' transform is used for changegroup generation and in some
96 Note: The 'raw' transform is used for changegroup generation and in some
97 debug commands. In this case the transform only indicates whether the
97 debug commands. In this case the transform only indicates whether the
98 contents can be used for hash integrity checks.
98 contents can be used for hash integrity checks.
99 """
99 """
100 if not flag & REVIDX_KNOWN_FLAGS:
100 if not flag & REVIDX_KNOWN_FLAGS:
101 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
101 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
102 raise ProgrammingError(msg)
102 raise ProgrammingError(msg)
103 if flag not in REVIDX_FLAGS_ORDER:
103 if flag not in REVIDX_FLAGS_ORDER:
104 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
104 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
105 raise ProgrammingError(msg)
105 raise ProgrammingError(msg)
106 if flag in _flagprocessors:
106 if flag in _flagprocessors:
107 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
107 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
108 raise error.Abort(msg)
108 raise error.Abort(msg)
109 _flagprocessors[flag] = processor
109 _flagprocessors[flag] = processor
110
110
111 def getoffset(q):
111 def getoffset(q):
112 return int(q >> 16)
112 return int(q >> 16)
113
113
114 def gettype(q):
114 def gettype(q):
115 return int(q & 0xFFFF)
115 return int(q & 0xFFFF)
116
116
117 def offset_type(offset, type):
117 def offset_type(offset, type):
118 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
118 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
119 raise ValueError('unknown revlog index flags')
119 raise ValueError('unknown revlog index flags')
120 return long(long(offset) << 16 | type)
120 return long(long(offset) << 16 | type)
121
121
122 _nullhash = hashlib.sha1(nullid)
122 _nullhash = hashlib.sha1(nullid)
123
123
124 def hash(text, p1, p2):
124 def hash(text, p1, p2):
125 """generate a hash from the given text and its parent hashes
125 """generate a hash from the given text and its parent hashes
126
126
127 This hash combines both the current file contents and its history
127 This hash combines both the current file contents and its history
128 in a manner that makes it easy to distinguish nodes with the same
128 in a manner that makes it easy to distinguish nodes with the same
129 content in the revision graph.
129 content in the revision graph.
130 """
130 """
131 # As of now, if one of the parent node is null, p2 is null
131 # As of now, if one of the parent node is null, p2 is null
132 if p2 == nullid:
132 if p2 == nullid:
133 # deep copy of a hash is faster than creating one
133 # deep copy of a hash is faster than creating one
134 s = _nullhash.copy()
134 s = _nullhash.copy()
135 s.update(p1)
135 s.update(p1)
136 else:
136 else:
137 # none of the parent nodes are nullid
137 # none of the parent nodes are nullid
138 l = [p1, p2]
138 l = [p1, p2]
139 l.sort()
139 l.sort()
140 s = hashlib.sha1(l[0])
140 s = hashlib.sha1(l[0])
141 s.update(l[1])
141 s.update(l[1])
142 s.update(text)
142 s.update(text)
143 return s.digest()
143 return s.digest()
144
144
145 # index v0:
145 # index v0:
146 # 4 bytes: offset
146 # 4 bytes: offset
147 # 4 bytes: compressed length
147 # 4 bytes: compressed length
148 # 4 bytes: base rev
148 # 4 bytes: base rev
149 # 4 bytes: link rev
149 # 4 bytes: link rev
150 # 20 bytes: parent 1 nodeid
150 # 20 bytes: parent 1 nodeid
151 # 20 bytes: parent 2 nodeid
151 # 20 bytes: parent 2 nodeid
152 # 20 bytes: nodeid
152 # 20 bytes: nodeid
153 indexformatv0 = ">4l20s20s20s"
153 indexformatv0 = ">4l20s20s20s"
154
154
155 class revlogoldio(object):
155 class revlogoldio(object):
156 def __init__(self):
156 def __init__(self):
157 self.size = struct.calcsize(indexformatv0)
157 self.size = struct.calcsize(indexformatv0)
158
158
159 def parseindex(self, data, inline):
159 def parseindex(self, data, inline):
160 s = self.size
160 s = self.size
161 index = []
161 index = []
162 nodemap = {nullid: nullrev}
162 nodemap = {nullid: nullrev}
163 n = off = 0
163 n = off = 0
164 l = len(data)
164 l = len(data)
165 while off + s <= l:
165 while off + s <= l:
166 cur = data[off:off + s]
166 cur = data[off:off + s]
167 off += s
167 off += s
168 e = _unpack(indexformatv0, cur)
168 e = _unpack(indexformatv0, cur)
169 # transform to revlogv1 format
169 # transform to revlogv1 format
170 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
170 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
171 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
171 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
172 index.append(e2)
172 index.append(e2)
173 nodemap[e[6]] = n
173 nodemap[e[6]] = n
174 n += 1
174 n += 1
175
175
176 # add the magic null revision at -1
176 # add the magic null revision at -1
177 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
177 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
178
178
179 return index, nodemap, None
179 return index, nodemap, None
180
180
181 def packentry(self, entry, node, version, rev):
181 def packentry(self, entry, node, version, rev):
182 if gettype(entry[0]):
182 if gettype(entry[0]):
183 raise RevlogError(_("index entry flags need RevlogNG"))
183 raise RevlogError(_("index entry flags need RevlogNG"))
184 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
184 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
185 node(entry[5]), node(entry[6]), entry[7])
185 node(entry[5]), node(entry[6]), entry[7])
186 return _pack(indexformatv0, *e2)
186 return _pack(indexformatv0, *e2)
187
187
188 # index ng:
188 # index ng:
189 # 6 bytes: offset
189 # 6 bytes: offset
190 # 2 bytes: flags
190 # 2 bytes: flags
191 # 4 bytes: compressed length
191 # 4 bytes: compressed length
192 # 4 bytes: uncompressed length
192 # 4 bytes: uncompressed length
193 # 4 bytes: base rev
193 # 4 bytes: base rev
194 # 4 bytes: link rev
194 # 4 bytes: link rev
195 # 4 bytes: parent 1 rev
195 # 4 bytes: parent 1 rev
196 # 4 bytes: parent 2 rev
196 # 4 bytes: parent 2 rev
197 # 32 bytes: nodeid
197 # 32 bytes: nodeid
198 indexformatng = ">Qiiiiii20s12x"
198 indexformatng = ">Qiiiiii20s12x"
199 versionformat = ">I"
199 versionformat = ">I"
200
200
201 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
201 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
202 # signed integer)
202 # signed integer)
203 _maxentrysize = 0x7fffffff
203 _maxentrysize = 0x7fffffff
204
204
205 class revlogio(object):
205 class revlogio(object):
206 def __init__(self):
206 def __init__(self):
207 self.size = struct.calcsize(indexformatng)
207 self.size = struct.calcsize(indexformatng)
208
208
209 def parseindex(self, data, inline):
209 def parseindex(self, data, inline):
210 # call the C implementation to parse the index data
210 # call the C implementation to parse the index data
211 index, cache = parsers.parse_index2(data, inline)
211 index, cache = parsers.parse_index2(data, inline)
212 return index, getattr(index, 'nodemap', None), cache
212 return index, getattr(index, 'nodemap', None), cache
213
213
214 def packentry(self, entry, node, version, rev):
214 def packentry(self, entry, node, version, rev):
215 p = _pack(indexformatng, *entry)
215 p = _pack(indexformatng, *entry)
216 if rev == 0:
216 if rev == 0:
217 p = _pack(versionformat, version) + p[4:]
217 p = _pack(versionformat, version) + p[4:]
218 return p
218 return p
219
219
220 class revlog(object):
220 class revlog(object):
221 """
221 """
222 the underlying revision storage object
222 the underlying revision storage object
223
223
224 A revlog consists of two parts, an index and the revision data.
224 A revlog consists of two parts, an index and the revision data.
225
225
226 The index is a file with a fixed record size containing
226 The index is a file with a fixed record size containing
227 information on each revision, including its nodeid (hash), the
227 information on each revision, including its nodeid (hash), the
228 nodeids of its parents, the position and offset of its data within
228 nodeids of its parents, the position and offset of its data within
229 the data file, and the revision it's based on. Finally, each entry
229 the data file, and the revision it's based on. Finally, each entry
230 contains a linkrev entry that can serve as a pointer to external
230 contains a linkrev entry that can serve as a pointer to external
231 data.
231 data.
232
232
233 The revision data itself is a linear collection of data chunks.
233 The revision data itself is a linear collection of data chunks.
234 Each chunk represents a revision and is usually represented as a
234 Each chunk represents a revision and is usually represented as a
235 delta against the previous chunk. To bound lookup time, runs of
235 delta against the previous chunk. To bound lookup time, runs of
236 deltas are limited to about 2 times the length of the original
236 deltas are limited to about 2 times the length of the original
237 version data. This makes retrieval of a version proportional to
237 version data. This makes retrieval of a version proportional to
238 its size, or O(1) relative to the number of revisions.
238 its size, or O(1) relative to the number of revisions.
239
239
240 Both pieces of the revlog are written to in an append-only
240 Both pieces of the revlog are written to in an append-only
241 fashion, which means we never need to rewrite a file to insert or
241 fashion, which means we never need to rewrite a file to insert or
242 remove data, and can use some simple techniques to avoid the need
242 remove data, and can use some simple techniques to avoid the need
243 for locking while reading.
243 for locking while reading.
244
244
245 If checkambig, indexfile is opened with checkambig=True at
245 If checkambig, indexfile is opened with checkambig=True at
246 writing, to avoid file stat ambiguity.
246 writing, to avoid file stat ambiguity.
247 """
247 """
248 def __init__(self, opener, indexfile, checkambig=False):
248 def __init__(self, opener, indexfile, checkambig=False):
249 """
249 """
250 create a revlog object
250 create a revlog object
251
251
252 opener is a function that abstracts the file opening operation
252 opener is a function that abstracts the file opening operation
253 and can be used to implement COW semantics or the like.
253 and can be used to implement COW semantics or the like.
254 """
254 """
255 self.indexfile = indexfile
255 self.indexfile = indexfile
256 self.datafile = indexfile[:-2] + ".d"
256 self.datafile = indexfile[:-2] + ".d"
257 self.opener = opener
257 self.opener = opener
258 # When True, indexfile is opened with checkambig=True at writing, to
258 # When True, indexfile is opened with checkambig=True at writing, to
259 # avoid file stat ambiguity.
259 # avoid file stat ambiguity.
260 self._checkambig = checkambig
260 self._checkambig = checkambig
261 # 3-tuple of (node, rev, text) for a raw revision.
261 # 3-tuple of (node, rev, text) for a raw revision.
262 self._cache = None
262 self._cache = None
263 # Maps rev to chain base rev.
263 # Maps rev to chain base rev.
264 self._chainbasecache = util.lrucachedict(100)
264 self._chainbasecache = util.lrucachedict(100)
265 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
265 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
266 self._chunkcache = (0, '')
266 self._chunkcache = (0, '')
267 # How much data to read and cache into the raw revlog data cache.
267 # How much data to read and cache into the raw revlog data cache.
268 self._chunkcachesize = 65536
268 self._chunkcachesize = 65536
269 self._maxchainlen = None
269 self._maxchainlen = None
270 self._aggressivemergedeltas = False
270 self._aggressivemergedeltas = False
271 self.index = []
271 self.index = []
272 # Mapping of partial identifiers to full nodes.
272 # Mapping of partial identifiers to full nodes.
273 self._pcache = {}
273 self._pcache = {}
274 # Mapping of revision integer to full node.
274 # Mapping of revision integer to full node.
275 self._nodecache = {nullid: nullrev}
275 self._nodecache = {nullid: nullrev}
276 self._nodepos = None
276 self._nodepos = None
277 self._compengine = 'zlib'
277 self._compengine = 'zlib'
278
278
279 v = REVLOG_DEFAULT_VERSION
279 v = REVLOG_DEFAULT_VERSION
280 opts = getattr(opener, 'options', None)
280 opts = getattr(opener, 'options', None)
281 if opts is not None:
281 if opts is not None:
282 if 'revlogv1' in opts:
282 if 'revlogv1' in opts:
283 if 'generaldelta' in opts:
283 if 'generaldelta' in opts:
284 v |= REVLOGGENERALDELTA
284 v |= REVLOGGENERALDELTA
285 else:
285 else:
286 v = 0
286 v = 0
287 if 'chunkcachesize' in opts:
287 if 'chunkcachesize' in opts:
288 self._chunkcachesize = opts['chunkcachesize']
288 self._chunkcachesize = opts['chunkcachesize']
289 if 'maxchainlen' in opts:
289 if 'maxchainlen' in opts:
290 self._maxchainlen = opts['maxchainlen']
290 self._maxchainlen = opts['maxchainlen']
291 if 'aggressivemergedeltas' in opts:
291 if 'aggressivemergedeltas' in opts:
292 self._aggressivemergedeltas = opts['aggressivemergedeltas']
292 self._aggressivemergedeltas = opts['aggressivemergedeltas']
293 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
293 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
294 if 'compengine' in opts:
294 if 'compengine' in opts:
295 self._compengine = opts['compengine']
295 self._compengine = opts['compengine']
296
296
297 if self._chunkcachesize <= 0:
297 if self._chunkcachesize <= 0:
298 raise RevlogError(_('revlog chunk cache size %r is not greater '
298 raise RevlogError(_('revlog chunk cache size %r is not greater '
299 'than 0') % self._chunkcachesize)
299 'than 0') % self._chunkcachesize)
300 elif self._chunkcachesize & (self._chunkcachesize - 1):
300 elif self._chunkcachesize & (self._chunkcachesize - 1):
301 raise RevlogError(_('revlog chunk cache size %r is not a power '
301 raise RevlogError(_('revlog chunk cache size %r is not a power '
302 'of 2') % self._chunkcachesize)
302 'of 2') % self._chunkcachesize)
303
303
304 indexdata = ''
304 indexdata = ''
305 self._initempty = True
305 self._initempty = True
306 try:
306 try:
307 f = self.opener(self.indexfile)
307 f = self.opener(self.indexfile)
308 indexdata = f.read()
308 indexdata = f.read()
309 f.close()
309 f.close()
310 if len(indexdata) > 0:
310 if len(indexdata) > 0:
311 v = struct.unpack(versionformat, indexdata[:4])[0]
311 v = struct.unpack(versionformat, indexdata[:4])[0]
312 self._initempty = False
312 self._initempty = False
313 except IOError as inst:
313 except IOError as inst:
314 if inst.errno != errno.ENOENT:
314 if inst.errno != errno.ENOENT:
315 raise
315 raise
316
316
317 self.version = v
317 self.version = v
318 self._inline = v & REVLOGNGINLINEDATA
318 self._inline = v & REVLOGNGINLINEDATA
319 self._generaldelta = v & REVLOGGENERALDELTA
319 self._generaldelta = v & REVLOGGENERALDELTA
320 flags = v & ~0xFFFF
320 flags = v & ~0xFFFF
321 fmt = v & 0xFFFF
321 fmt = v & 0xFFFF
322 if fmt == REVLOGV0 and flags:
322 if fmt == REVLOGV0 and flags:
323 raise RevlogError(_("index %s unknown flags %#04x for format v0")
323 raise RevlogError(_("index %s unknown flags %#04x for format v0")
324 % (self.indexfile, flags >> 16))
324 % (self.indexfile, flags >> 16))
325 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
325 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
326 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
326 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
327 % (self.indexfile, flags >> 16))
327 % (self.indexfile, flags >> 16))
328 elif fmt > REVLOGNG:
328 elif fmt > REVLOGNG:
329 raise RevlogError(_("index %s unknown format %d")
329 raise RevlogError(_("index %s unknown format %d")
330 % (self.indexfile, fmt))
330 % (self.indexfile, fmt))
331
331
332 self.storedeltachains = True
332 self.storedeltachains = True
333
333
334 self._io = revlogio()
334 self._io = revlogio()
335 if self.version == REVLOGV0:
335 if self.version == REVLOGV0:
336 self._io = revlogoldio()
336 self._io = revlogoldio()
337 try:
337 try:
338 d = self._io.parseindex(indexdata, self._inline)
338 d = self._io.parseindex(indexdata, self._inline)
339 except (ValueError, IndexError):
339 except (ValueError, IndexError):
340 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
340 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
341 self.index, nodemap, self._chunkcache = d
341 self.index, nodemap, self._chunkcache = d
342 if nodemap is not None:
342 if nodemap is not None:
343 self.nodemap = self._nodecache = nodemap
343 self.nodemap = self._nodecache = nodemap
344 if not self._chunkcache:
344 if not self._chunkcache:
345 self._chunkclear()
345 self._chunkclear()
346 # revnum -> (chain-length, sum-delta-length)
346 # revnum -> (chain-length, sum-delta-length)
347 self._chaininfocache = {}
347 self._chaininfocache = {}
348 # revlog header -> revlog compressor
348 # revlog header -> revlog compressor
349 self._decompressors = {}
349 self._decompressors = {}
350
350
351 @util.propertycache
351 @util.propertycache
352 def _compressor(self):
352 def _compressor(self):
353 return util.compengines[self._compengine].revlogcompressor()
353 return util.compengines[self._compengine].revlogcompressor()
354
354
355 def tip(self):
355 def tip(self):
356 return self.node(len(self.index) - 2)
356 return self.node(len(self.index) - 2)
357 def __contains__(self, rev):
357 def __contains__(self, rev):
358 return 0 <= rev < len(self)
358 return 0 <= rev < len(self)
359 def __len__(self):
359 def __len__(self):
360 return len(self.index) - 1
360 return len(self.index) - 1
361 def __iter__(self):
361 def __iter__(self):
362 return iter(xrange(len(self)))
362 return iter(xrange(len(self)))
363 def revs(self, start=0, stop=None):
363 def revs(self, start=0, stop=None):
364 """iterate over all rev in this revlog (from start to stop)"""
364 """iterate over all rev in this revlog (from start to stop)"""
365 step = 1
365 step = 1
366 if stop is not None:
366 if stop is not None:
367 if start > stop:
367 if start > stop:
368 step = -1
368 step = -1
369 stop += step
369 stop += step
370 else:
370 else:
371 stop = len(self)
371 stop = len(self)
372 return xrange(start, stop, step)
372 return xrange(start, stop, step)
373
373
374 @util.propertycache
374 @util.propertycache
375 def nodemap(self):
375 def nodemap(self):
376 self.rev(self.node(0))
376 self.rev(self.node(0))
377 return self._nodecache
377 return self._nodecache
378
378
379 def hasnode(self, node):
379 def hasnode(self, node):
380 try:
380 try:
381 self.rev(node)
381 self.rev(node)
382 return True
382 return True
383 except KeyError:
383 except KeyError:
384 return False
384 return False
385
385
386 def clearcaches(self):
386 def clearcaches(self):
387 self._cache = None
387 self._cache = None
388 self._chainbasecache.clear()
388 self._chainbasecache.clear()
389 self._chunkcache = (0, '')
389 self._chunkcache = (0, '')
390 self._pcache = {}
390 self._pcache = {}
391
391
392 try:
392 try:
393 self._nodecache.clearcaches()
393 self._nodecache.clearcaches()
394 except AttributeError:
394 except AttributeError:
395 self._nodecache = {nullid: nullrev}
395 self._nodecache = {nullid: nullrev}
396 self._nodepos = None
396 self._nodepos = None
397
397
398 def rev(self, node):
398 def rev(self, node):
399 try:
399 try:
400 return self._nodecache[node]
400 return self._nodecache[node]
401 except TypeError:
401 except TypeError:
402 raise
402 raise
403 except RevlogError:
403 except RevlogError:
404 # parsers.c radix tree lookup failed
404 # parsers.c radix tree lookup failed
405 raise LookupError(node, self.indexfile, _('no node'))
405 raise LookupError(node, self.indexfile, _('no node'))
406 except KeyError:
406 except KeyError:
407 # pure python cache lookup failed
407 # pure python cache lookup failed
408 n = self._nodecache
408 n = self._nodecache
409 i = self.index
409 i = self.index
410 p = self._nodepos
410 p = self._nodepos
411 if p is None:
411 if p is None:
412 p = len(i) - 2
412 p = len(i) - 2
413 for r in xrange(p, -1, -1):
413 for r in xrange(p, -1, -1):
414 v = i[r][7]
414 v = i[r][7]
415 n[v] = r
415 n[v] = r
416 if v == node:
416 if v == node:
417 self._nodepos = r - 1
417 self._nodepos = r - 1
418 return r
418 return r
419 raise LookupError(node, self.indexfile, _('no node'))
419 raise LookupError(node, self.indexfile, _('no node'))
420
420
421 # Accessors for index entries.
421 # Accessors for index entries.
422
422
423 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
423 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
424 # are flags.
424 # are flags.
425 def start(self, rev):
425 def start(self, rev):
426 return int(self.index[rev][0] >> 16)
426 return int(self.index[rev][0] >> 16)
427
427
428 def flags(self, rev):
428 def flags(self, rev):
429 return self.index[rev][0] & 0xFFFF
429 return self.index[rev][0] & 0xFFFF
430
430
431 def length(self, rev):
431 def length(self, rev):
432 return self.index[rev][1]
432 return self.index[rev][1]
433
433
434 def rawsize(self, rev):
434 def rawsize(self, rev):
435 """return the length of the uncompressed text for a given revision"""
435 """return the length of the uncompressed text for a given revision"""
436 l = self.index[rev][2]
436 l = self.index[rev][2]
437 if l >= 0:
437 if l >= 0:
438 return l
438 return l
439
439
440 t = self.revision(self.node(rev))
440 t = self.revision(self.node(rev))
441 return len(t)
441 return len(t)
442 size = rawsize
442 size = rawsize
443
443
444 def chainbase(self, rev):
444 def chainbase(self, rev):
445 base = self._chainbasecache.get(rev)
445 base = self._chainbasecache.get(rev)
446 if base is not None:
446 if base is not None:
447 return base
447 return base
448
448
449 index = self.index
449 index = self.index
450 base = index[rev][3]
450 base = index[rev][3]
451 while base != rev:
451 while base != rev:
452 rev = base
452 rev = base
453 base = index[rev][3]
453 base = index[rev][3]
454
454
455 self._chainbasecache[rev] = base
455 self._chainbasecache[rev] = base
456 return base
456 return base
457
457
458 def linkrev(self, rev):
458 def linkrev(self, rev):
459 return self.index[rev][4]
459 return self.index[rev][4]
460
460
461 def parentrevs(self, rev):
461 def parentrevs(self, rev):
462 return self.index[rev][5:7]
462 return self.index[rev][5:7]
463
463
464 def node(self, rev):
464 def node(self, rev):
465 return self.index[rev][7]
465 return self.index[rev][7]
466
466
467 # Derived from index values.
467 # Derived from index values.
468
468
469 def end(self, rev):
469 def end(self, rev):
470 return self.start(rev) + self.length(rev)
470 return self.start(rev) + self.length(rev)
471
471
472 def parents(self, node):
472 def parents(self, node):
473 i = self.index
473 i = self.index
474 d = i[self.rev(node)]
474 d = i[self.rev(node)]
475 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
475 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
476
476
477 def chainlen(self, rev):
477 def chainlen(self, rev):
478 return self._chaininfo(rev)[0]
478 return self._chaininfo(rev)[0]
479
479
480 def _chaininfo(self, rev):
480 def _chaininfo(self, rev):
481 chaininfocache = self._chaininfocache
481 chaininfocache = self._chaininfocache
482 if rev in chaininfocache:
482 if rev in chaininfocache:
483 return chaininfocache[rev]
483 return chaininfocache[rev]
484 index = self.index
484 index = self.index
485 generaldelta = self._generaldelta
485 generaldelta = self._generaldelta
486 iterrev = rev
486 iterrev = rev
487 e = index[iterrev]
487 e = index[iterrev]
488 clen = 0
488 clen = 0
489 compresseddeltalen = 0
489 compresseddeltalen = 0
490 while iterrev != e[3]:
490 while iterrev != e[3]:
491 clen += 1
491 clen += 1
492 compresseddeltalen += e[1]
492 compresseddeltalen += e[1]
493 if generaldelta:
493 if generaldelta:
494 iterrev = e[3]
494 iterrev = e[3]
495 else:
495 else:
496 iterrev -= 1
496 iterrev -= 1
497 if iterrev in chaininfocache:
497 if iterrev in chaininfocache:
498 t = chaininfocache[iterrev]
498 t = chaininfocache[iterrev]
499 clen += t[0]
499 clen += t[0]
500 compresseddeltalen += t[1]
500 compresseddeltalen += t[1]
501 break
501 break
502 e = index[iterrev]
502 e = index[iterrev]
503 else:
503 else:
504 # Add text length of base since decompressing that also takes
504 # Add text length of base since decompressing that also takes
505 # work. For cache hits the length is already included.
505 # work. For cache hits the length is already included.
506 compresseddeltalen += e[1]
506 compresseddeltalen += e[1]
507 r = (clen, compresseddeltalen)
507 r = (clen, compresseddeltalen)
508 chaininfocache[rev] = r
508 chaininfocache[rev] = r
509 return r
509 return r
510
510
511 def _deltachain(self, rev, stoprev=None):
511 def _deltachain(self, rev, stoprev=None):
512 """Obtain the delta chain for a revision.
512 """Obtain the delta chain for a revision.
513
513
514 ``stoprev`` specifies a revision to stop at. If not specified, we
514 ``stoprev`` specifies a revision to stop at. If not specified, we
515 stop at the base of the chain.
515 stop at the base of the chain.
516
516
517 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
517 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
518 revs in ascending order and ``stopped`` is a bool indicating whether
518 revs in ascending order and ``stopped`` is a bool indicating whether
519 ``stoprev`` was hit.
519 ``stoprev`` was hit.
520 """
520 """
521 chain = []
521 chain = []
522
522
523 # Alias to prevent attribute lookup in tight loop.
523 # Alias to prevent attribute lookup in tight loop.
524 index = self.index
524 index = self.index
525 generaldelta = self._generaldelta
525 generaldelta = self._generaldelta
526
526
527 iterrev = rev
527 iterrev = rev
528 e = index[iterrev]
528 e = index[iterrev]
529 while iterrev != e[3] and iterrev != stoprev:
529 while iterrev != e[3] and iterrev != stoprev:
530 chain.append(iterrev)
530 chain.append(iterrev)
531 if generaldelta:
531 if generaldelta:
532 iterrev = e[3]
532 iterrev = e[3]
533 else:
533 else:
534 iterrev -= 1
534 iterrev -= 1
535 e = index[iterrev]
535 e = index[iterrev]
536
536
537 if iterrev == stoprev:
537 if iterrev == stoprev:
538 stopped = True
538 stopped = True
539 else:
539 else:
540 chain.append(iterrev)
540 chain.append(iterrev)
541 stopped = False
541 stopped = False
542
542
543 chain.reverse()
543 chain.reverse()
544 return chain, stopped
544 return chain, stopped
545
545
546 def ancestors(self, revs, stoprev=0, inclusive=False):
546 def ancestors(self, revs, stoprev=0, inclusive=False):
547 """Generate the ancestors of 'revs' in reverse topological order.
547 """Generate the ancestors of 'revs' in reverse topological order.
548 Does not generate revs lower than stoprev.
548 Does not generate revs lower than stoprev.
549
549
550 See the documentation for ancestor.lazyancestors for more details."""
550 See the documentation for ancestor.lazyancestors for more details."""
551
551
552 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
552 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
553 inclusive=inclusive)
553 inclusive=inclusive)
554
554
555 def descendants(self, revs):
555 def descendants(self, revs):
556 """Generate the descendants of 'revs' in revision order.
556 """Generate the descendants of 'revs' in revision order.
557
557
558 Yield a sequence of revision numbers starting with a child of
558 Yield a sequence of revision numbers starting with a child of
559 some rev in revs, i.e., each revision is *not* considered a
559 some rev in revs, i.e., each revision is *not* considered a
560 descendant of itself. Results are ordered by revision number (a
560 descendant of itself. Results are ordered by revision number (a
561 topological sort)."""
561 topological sort)."""
562 first = min(revs)
562 first = min(revs)
563 if first == nullrev:
563 if first == nullrev:
564 for i in self:
564 for i in self:
565 yield i
565 yield i
566 return
566 return
567
567
568 seen = set(revs)
568 seen = set(revs)
569 for i in self.revs(start=first + 1):
569 for i in self.revs(start=first + 1):
570 for x in self.parentrevs(i):
570 for x in self.parentrevs(i):
571 if x != nullrev and x in seen:
571 if x != nullrev and x in seen:
572 seen.add(i)
572 seen.add(i)
573 yield i
573 yield i
574 break
574 break
575
575
576 def findcommonmissing(self, common=None, heads=None):
576 def findcommonmissing(self, common=None, heads=None):
577 """Return a tuple of the ancestors of common and the ancestors of heads
577 """Return a tuple of the ancestors of common and the ancestors of heads
578 that are not ancestors of common. In revset terminology, we return the
578 that are not ancestors of common. In revset terminology, we return the
579 tuple:
579 tuple:
580
580
581 ::common, (::heads) - (::common)
581 ::common, (::heads) - (::common)
582
582
583 The list is sorted by revision number, meaning it is
583 The list is sorted by revision number, meaning it is
584 topologically sorted.
584 topologically sorted.
585
585
586 'heads' and 'common' are both lists of node IDs. If heads is
586 'heads' and 'common' are both lists of node IDs. If heads is
587 not supplied, uses all of the revlog's heads. If common is not
587 not supplied, uses all of the revlog's heads. If common is not
588 supplied, uses nullid."""
588 supplied, uses nullid."""
589 if common is None:
589 if common is None:
590 common = [nullid]
590 common = [nullid]
591 if heads is None:
591 if heads is None:
592 heads = self.heads()
592 heads = self.heads()
593
593
594 common = [self.rev(n) for n in common]
594 common = [self.rev(n) for n in common]
595 heads = [self.rev(n) for n in heads]
595 heads = [self.rev(n) for n in heads]
596
596
597 # we want the ancestors, but inclusive
597 # we want the ancestors, but inclusive
598 class lazyset(object):
598 class lazyset(object):
599 def __init__(self, lazyvalues):
599 def __init__(self, lazyvalues):
600 self.addedvalues = set()
600 self.addedvalues = set()
601 self.lazyvalues = lazyvalues
601 self.lazyvalues = lazyvalues
602
602
603 def __contains__(self, value):
603 def __contains__(self, value):
604 return value in self.addedvalues or value in self.lazyvalues
604 return value in self.addedvalues or value in self.lazyvalues
605
605
606 def __iter__(self):
606 def __iter__(self):
607 added = self.addedvalues
607 added = self.addedvalues
608 for r in added:
608 for r in added:
609 yield r
609 yield r
610 for r in self.lazyvalues:
610 for r in self.lazyvalues:
611 if not r in added:
611 if not r in added:
612 yield r
612 yield r
613
613
614 def add(self, value):
614 def add(self, value):
615 self.addedvalues.add(value)
615 self.addedvalues.add(value)
616
616
617 def update(self, values):
617 def update(self, values):
618 self.addedvalues.update(values)
618 self.addedvalues.update(values)
619
619
620 has = lazyset(self.ancestors(common))
620 has = lazyset(self.ancestors(common))
621 has.add(nullrev)
621 has.add(nullrev)
622 has.update(common)
622 has.update(common)
623
623
624 # take all ancestors from heads that aren't in has
624 # take all ancestors from heads that aren't in has
625 missing = set()
625 missing = set()
626 visit = collections.deque(r for r in heads if r not in has)
626 visit = collections.deque(r for r in heads if r not in has)
627 while visit:
627 while visit:
628 r = visit.popleft()
628 r = visit.popleft()
629 if r in missing:
629 if r in missing:
630 continue
630 continue
631 else:
631 else:
632 missing.add(r)
632 missing.add(r)
633 for p in self.parentrevs(r):
633 for p in self.parentrevs(r):
634 if p not in has:
634 if p not in has:
635 visit.append(p)
635 visit.append(p)
636 missing = list(missing)
636 missing = list(missing)
637 missing.sort()
637 missing.sort()
638 return has, [self.node(miss) for miss in missing]
638 return has, [self.node(miss) for miss in missing]
639
639
640 def incrementalmissingrevs(self, common=None):
640 def incrementalmissingrevs(self, common=None):
641 """Return an object that can be used to incrementally compute the
641 """Return an object that can be used to incrementally compute the
642 revision numbers of the ancestors of arbitrary sets that are not
642 revision numbers of the ancestors of arbitrary sets that are not
643 ancestors of common. This is an ancestor.incrementalmissingancestors
643 ancestors of common. This is an ancestor.incrementalmissingancestors
644 object.
644 object.
645
645
646 'common' is a list of revision numbers. If common is not supplied, uses
646 'common' is a list of revision numbers. If common is not supplied, uses
647 nullrev.
647 nullrev.
648 """
648 """
649 if common is None:
649 if common is None:
650 common = [nullrev]
650 common = [nullrev]
651
651
652 return ancestor.incrementalmissingancestors(self.parentrevs, common)
652 return ancestor.incrementalmissingancestors(self.parentrevs, common)
653
653
654 def findmissingrevs(self, common=None, heads=None):
654 def findmissingrevs(self, common=None, heads=None):
655 """Return the revision numbers of the ancestors of heads that
655 """Return the revision numbers of the ancestors of heads that
656 are not ancestors of common.
656 are not ancestors of common.
657
657
658 More specifically, return a list of revision numbers corresponding to
658 More specifically, return a list of revision numbers corresponding to
659 nodes N such that every N satisfies the following constraints:
659 nodes N such that every N satisfies the following constraints:
660
660
661 1. N is an ancestor of some node in 'heads'
661 1. N is an ancestor of some node in 'heads'
662 2. N is not an ancestor of any node in 'common'
662 2. N is not an ancestor of any node in 'common'
663
663
664 The list is sorted by revision number, meaning it is
664 The list is sorted by revision number, meaning it is
665 topologically sorted.
665 topologically sorted.
666
666
667 'heads' and 'common' are both lists of revision numbers. If heads is
667 'heads' and 'common' are both lists of revision numbers. If heads is
668 not supplied, uses all of the revlog's heads. If common is not
668 not supplied, uses all of the revlog's heads. If common is not
669 supplied, uses nullid."""
669 supplied, uses nullid."""
670 if common is None:
670 if common is None:
671 common = [nullrev]
671 common = [nullrev]
672 if heads is None:
672 if heads is None:
673 heads = self.headrevs()
673 heads = self.headrevs()
674
674
675 inc = self.incrementalmissingrevs(common=common)
675 inc = self.incrementalmissingrevs(common=common)
676 return inc.missingancestors(heads)
676 return inc.missingancestors(heads)
677
677
678 def findmissing(self, common=None, heads=None):
678 def findmissing(self, common=None, heads=None):
679 """Return the ancestors of heads that are not ancestors of common.
679 """Return the ancestors of heads that are not ancestors of common.
680
680
681 More specifically, return a list of nodes N such that every N
681 More specifically, return a list of nodes N such that every N
682 satisfies the following constraints:
682 satisfies the following constraints:
683
683
684 1. N is an ancestor of some node in 'heads'
684 1. N is an ancestor of some node in 'heads'
685 2. N is not an ancestor of any node in 'common'
685 2. N is not an ancestor of any node in 'common'
686
686
687 The list is sorted by revision number, meaning it is
687 The list is sorted by revision number, meaning it is
688 topologically sorted.
688 topologically sorted.
689
689
690 'heads' and 'common' are both lists of node IDs. If heads is
690 'heads' and 'common' are both lists of node IDs. If heads is
691 not supplied, uses all of the revlog's heads. If common is not
691 not supplied, uses all of the revlog's heads. If common is not
692 supplied, uses nullid."""
692 supplied, uses nullid."""
693 if common is None:
693 if common is None:
694 common = [nullid]
694 common = [nullid]
695 if heads is None:
695 if heads is None:
696 heads = self.heads()
696 heads = self.heads()
697
697
698 common = [self.rev(n) for n in common]
698 common = [self.rev(n) for n in common]
699 heads = [self.rev(n) for n in heads]
699 heads = [self.rev(n) for n in heads]
700
700
701 inc = self.incrementalmissingrevs(common=common)
701 inc = self.incrementalmissingrevs(common=common)
702 return [self.node(r) for r in inc.missingancestors(heads)]
702 return [self.node(r) for r in inc.missingancestors(heads)]
703
703
704 def nodesbetween(self, roots=None, heads=None):
704 def nodesbetween(self, roots=None, heads=None):
705 """Return a topological path from 'roots' to 'heads'.
705 """Return a topological path from 'roots' to 'heads'.
706
706
707 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
707 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
708 topologically sorted list of all nodes N that satisfy both of
708 topologically sorted list of all nodes N that satisfy both of
709 these constraints:
709 these constraints:
710
710
711 1. N is a descendant of some node in 'roots'
711 1. N is a descendant of some node in 'roots'
712 2. N is an ancestor of some node in 'heads'
712 2. N is an ancestor of some node in 'heads'
713
713
714 Every node is considered to be both a descendant and an ancestor
714 Every node is considered to be both a descendant and an ancestor
715 of itself, so every reachable node in 'roots' and 'heads' will be
715 of itself, so every reachable node in 'roots' and 'heads' will be
716 included in 'nodes'.
716 included in 'nodes'.
717
717
718 'outroots' is the list of reachable nodes in 'roots', i.e., the
718 'outroots' is the list of reachable nodes in 'roots', i.e., the
719 subset of 'roots' that is returned in 'nodes'. Likewise,
719 subset of 'roots' that is returned in 'nodes'. Likewise,
720 'outheads' is the subset of 'heads' that is also in 'nodes'.
720 'outheads' is the subset of 'heads' that is also in 'nodes'.
721
721
722 'roots' and 'heads' are both lists of node IDs. If 'roots' is
722 'roots' and 'heads' are both lists of node IDs. If 'roots' is
723 unspecified, uses nullid as the only root. If 'heads' is
723 unspecified, uses nullid as the only root. If 'heads' is
724 unspecified, uses list of all of the revlog's heads."""
724 unspecified, uses list of all of the revlog's heads."""
725 nonodes = ([], [], [])
725 nonodes = ([], [], [])
726 if roots is not None:
726 if roots is not None:
727 roots = list(roots)
727 roots = list(roots)
728 if not roots:
728 if not roots:
729 return nonodes
729 return nonodes
730 lowestrev = min([self.rev(n) for n in roots])
730 lowestrev = min([self.rev(n) for n in roots])
731 else:
731 else:
732 roots = [nullid] # Everybody's a descendant of nullid
732 roots = [nullid] # Everybody's a descendant of nullid
733 lowestrev = nullrev
733 lowestrev = nullrev
734 if (lowestrev == nullrev) and (heads is None):
734 if (lowestrev == nullrev) and (heads is None):
735 # We want _all_ the nodes!
735 # We want _all_ the nodes!
736 return ([self.node(r) for r in self], [nullid], list(self.heads()))
736 return ([self.node(r) for r in self], [nullid], list(self.heads()))
737 if heads is None:
737 if heads is None:
738 # All nodes are ancestors, so the latest ancestor is the last
738 # All nodes are ancestors, so the latest ancestor is the last
739 # node.
739 # node.
740 highestrev = len(self) - 1
740 highestrev = len(self) - 1
741 # Set ancestors to None to signal that every node is an ancestor.
741 # Set ancestors to None to signal that every node is an ancestor.
742 ancestors = None
742 ancestors = None
743 # Set heads to an empty dictionary for later discovery of heads
743 # Set heads to an empty dictionary for later discovery of heads
744 heads = {}
744 heads = {}
745 else:
745 else:
746 heads = list(heads)
746 heads = list(heads)
747 if not heads:
747 if not heads:
748 return nonodes
748 return nonodes
749 ancestors = set()
749 ancestors = set()
750 # Turn heads into a dictionary so we can remove 'fake' heads.
750 # Turn heads into a dictionary so we can remove 'fake' heads.
751 # Also, later we will be using it to filter out the heads we can't
751 # Also, later we will be using it to filter out the heads we can't
752 # find from roots.
752 # find from roots.
753 heads = dict.fromkeys(heads, False)
753 heads = dict.fromkeys(heads, False)
754 # Start at the top and keep marking parents until we're done.
754 # Start at the top and keep marking parents until we're done.
755 nodestotag = set(heads)
755 nodestotag = set(heads)
756 # Remember where the top was so we can use it as a limit later.
756 # Remember where the top was so we can use it as a limit later.
757 highestrev = max([self.rev(n) for n in nodestotag])
757 highestrev = max([self.rev(n) for n in nodestotag])
758 while nodestotag:
758 while nodestotag:
759 # grab a node to tag
759 # grab a node to tag
760 n = nodestotag.pop()
760 n = nodestotag.pop()
761 # Never tag nullid
761 # Never tag nullid
762 if n == nullid:
762 if n == nullid:
763 continue
763 continue
764 # A node's revision number represents its place in a
764 # A node's revision number represents its place in a
765 # topologically sorted list of nodes.
765 # topologically sorted list of nodes.
766 r = self.rev(n)
766 r = self.rev(n)
767 if r >= lowestrev:
767 if r >= lowestrev:
768 if n not in ancestors:
768 if n not in ancestors:
769 # If we are possibly a descendant of one of the roots
769 # If we are possibly a descendant of one of the roots
770 # and we haven't already been marked as an ancestor
770 # and we haven't already been marked as an ancestor
771 ancestors.add(n) # Mark as ancestor
771 ancestors.add(n) # Mark as ancestor
772 # Add non-nullid parents to list of nodes to tag.
772 # Add non-nullid parents to list of nodes to tag.
773 nodestotag.update([p for p in self.parents(n) if
773 nodestotag.update([p for p in self.parents(n) if
774 p != nullid])
774 p != nullid])
775 elif n in heads: # We've seen it before, is it a fake head?
775 elif n in heads: # We've seen it before, is it a fake head?
776 # So it is, real heads should not be the ancestors of
776 # So it is, real heads should not be the ancestors of
777 # any other heads.
777 # any other heads.
778 heads.pop(n)
778 heads.pop(n)
779 if not ancestors:
779 if not ancestors:
780 return nonodes
780 return nonodes
781 # Now that we have our set of ancestors, we want to remove any
781 # Now that we have our set of ancestors, we want to remove any
782 # roots that are not ancestors.
782 # roots that are not ancestors.
783
783
784 # If one of the roots was nullid, everything is included anyway.
784 # If one of the roots was nullid, everything is included anyway.
785 if lowestrev > nullrev:
785 if lowestrev > nullrev:
786 # But, since we weren't, let's recompute the lowest rev to not
786 # But, since we weren't, let's recompute the lowest rev to not
787 # include roots that aren't ancestors.
787 # include roots that aren't ancestors.
788
788
789 # Filter out roots that aren't ancestors of heads
789 # Filter out roots that aren't ancestors of heads
790 roots = [root for root in roots if root in ancestors]
790 roots = [root for root in roots if root in ancestors]
791 # Recompute the lowest revision
791 # Recompute the lowest revision
792 if roots:
792 if roots:
793 lowestrev = min([self.rev(root) for root in roots])
793 lowestrev = min([self.rev(root) for root in roots])
794 else:
794 else:
795 # No more roots? Return empty list
795 # No more roots? Return empty list
796 return nonodes
796 return nonodes
797 else:
797 else:
798 # We are descending from nullid, and don't need to care about
798 # We are descending from nullid, and don't need to care about
799 # any other roots.
799 # any other roots.
800 lowestrev = nullrev
800 lowestrev = nullrev
801 roots = [nullid]
801 roots = [nullid]
802 # Transform our roots list into a set.
802 # Transform our roots list into a set.
803 descendants = set(roots)
803 descendants = set(roots)
804 # Also, keep the original roots so we can filter out roots that aren't
804 # Also, keep the original roots so we can filter out roots that aren't
805 # 'real' roots (i.e. are descended from other roots).
805 # 'real' roots (i.e. are descended from other roots).
806 roots = descendants.copy()
806 roots = descendants.copy()
807 # Our topologically sorted list of output nodes.
807 # Our topologically sorted list of output nodes.
808 orderedout = []
808 orderedout = []
809 # Don't start at nullid since we don't want nullid in our output list,
809 # Don't start at nullid since we don't want nullid in our output list,
810 # and if nullid shows up in descendants, empty parents will look like
810 # and if nullid shows up in descendants, empty parents will look like
811 # they're descendants.
811 # they're descendants.
812 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
812 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
813 n = self.node(r)
813 n = self.node(r)
814 isdescendant = False
814 isdescendant = False
815 if lowestrev == nullrev: # Everybody is a descendant of nullid
815 if lowestrev == nullrev: # Everybody is a descendant of nullid
816 isdescendant = True
816 isdescendant = True
817 elif n in descendants:
817 elif n in descendants:
818 # n is already a descendant
818 # n is already a descendant
819 isdescendant = True
819 isdescendant = True
820 # This check only needs to be done here because all the roots
820 # This check only needs to be done here because all the roots
821 # will start being marked is descendants before the loop.
821 # will start being marked is descendants before the loop.
822 if n in roots:
822 if n in roots:
823 # If n was a root, check if it's a 'real' root.
823 # If n was a root, check if it's a 'real' root.
824 p = tuple(self.parents(n))
824 p = tuple(self.parents(n))
825 # If any of its parents are descendants, it's not a root.
825 # If any of its parents are descendants, it's not a root.
826 if (p[0] in descendants) or (p[1] in descendants):
826 if (p[0] in descendants) or (p[1] in descendants):
827 roots.remove(n)
827 roots.remove(n)
828 else:
828 else:
829 p = tuple(self.parents(n))
829 p = tuple(self.parents(n))
830 # A node is a descendant if either of its parents are
830 # A node is a descendant if either of its parents are
831 # descendants. (We seeded the dependents list with the roots
831 # descendants. (We seeded the dependents list with the roots
832 # up there, remember?)
832 # up there, remember?)
833 if (p[0] in descendants) or (p[1] in descendants):
833 if (p[0] in descendants) or (p[1] in descendants):
834 descendants.add(n)
834 descendants.add(n)
835 isdescendant = True
835 isdescendant = True
836 if isdescendant and ((ancestors is None) or (n in ancestors)):
836 if isdescendant and ((ancestors is None) or (n in ancestors)):
837 # Only include nodes that are both descendants and ancestors.
837 # Only include nodes that are both descendants and ancestors.
838 orderedout.append(n)
838 orderedout.append(n)
839 if (ancestors is not None) and (n in heads):
839 if (ancestors is not None) and (n in heads):
840 # We're trying to figure out which heads are reachable
840 # We're trying to figure out which heads are reachable
841 # from roots.
841 # from roots.
842 # Mark this head as having been reached
842 # Mark this head as having been reached
843 heads[n] = True
843 heads[n] = True
844 elif ancestors is None:
844 elif ancestors is None:
845 # Otherwise, we're trying to discover the heads.
845 # Otherwise, we're trying to discover the heads.
846 # Assume this is a head because if it isn't, the next step
846 # Assume this is a head because if it isn't, the next step
847 # will eventually remove it.
847 # will eventually remove it.
848 heads[n] = True
848 heads[n] = True
849 # But, obviously its parents aren't.
849 # But, obviously its parents aren't.
850 for p in self.parents(n):
850 for p in self.parents(n):
851 heads.pop(p, None)
851 heads.pop(p, None)
852 heads = [head for head, flag in heads.iteritems() if flag]
852 heads = [head for head, flag in heads.iteritems() if flag]
853 roots = list(roots)
853 roots = list(roots)
854 assert orderedout
854 assert orderedout
855 assert roots
855 assert roots
856 assert heads
856 assert heads
857 return (orderedout, roots, heads)
857 return (orderedout, roots, heads)
858
858
859 def headrevs(self):
859 def headrevs(self):
860 try:
860 try:
861 return self.index.headrevs()
861 return self.index.headrevs()
862 except AttributeError:
862 except AttributeError:
863 return self._headrevs()
863 return self._headrevs()
864
864
865 def computephases(self, roots):
865 def computephases(self, roots):
866 return self.index.computephasesmapsets(roots)
866 return self.index.computephasesmapsets(roots)
867
867
868 def _headrevs(self):
868 def _headrevs(self):
869 count = len(self)
869 count = len(self)
870 if not count:
870 if not count:
871 return [nullrev]
871 return [nullrev]
872 # we won't iter over filtered rev so nobody is a head at start
872 # we won't iter over filtered rev so nobody is a head at start
873 ishead = [0] * (count + 1)
873 ishead = [0] * (count + 1)
874 index = self.index
874 index = self.index
875 for r in self:
875 for r in self:
876 ishead[r] = 1 # I may be an head
876 ishead[r] = 1 # I may be an head
877 e = index[r]
877 e = index[r]
878 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
878 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
879 return [r for r, val in enumerate(ishead) if val]
879 return [r for r, val in enumerate(ishead) if val]
880
880
881 def heads(self, start=None, stop=None):
881 def heads(self, start=None, stop=None):
882 """return the list of all nodes that have no children
882 """return the list of all nodes that have no children
883
883
884 if start is specified, only heads that are descendants of
884 if start is specified, only heads that are descendants of
885 start will be returned
885 start will be returned
886 if stop is specified, it will consider all the revs from stop
886 if stop is specified, it will consider all the revs from stop
887 as if they had no children
887 as if they had no children
888 """
888 """
889 if start is None and stop is None:
889 if start is None and stop is None:
890 if not len(self):
890 if not len(self):
891 return [nullid]
891 return [nullid]
892 return [self.node(r) for r in self.headrevs()]
892 return [self.node(r) for r in self.headrevs()]
893
893
894 if start is None:
894 if start is None:
895 start = nullid
895 start = nullid
896 if stop is None:
896 if stop is None:
897 stop = []
897 stop = []
898 stoprevs = set([self.rev(n) for n in stop])
898 stoprevs = set([self.rev(n) for n in stop])
899 startrev = self.rev(start)
899 startrev = self.rev(start)
900 reachable = set((startrev,))
900 reachable = set((startrev,))
901 heads = set((startrev,))
901 heads = set((startrev,))
902
902
903 parentrevs = self.parentrevs
903 parentrevs = self.parentrevs
904 for r in self.revs(start=startrev + 1):
904 for r in self.revs(start=startrev + 1):
905 for p in parentrevs(r):
905 for p in parentrevs(r):
906 if p in reachable:
906 if p in reachable:
907 if r not in stoprevs:
907 if r not in stoprevs:
908 reachable.add(r)
908 reachable.add(r)
909 heads.add(r)
909 heads.add(r)
910 if p in heads and p not in stoprevs:
910 if p in heads and p not in stoprevs:
911 heads.remove(p)
911 heads.remove(p)
912
912
913 return [self.node(r) for r in heads]
913 return [self.node(r) for r in heads]
914
914
915 def children(self, node):
915 def children(self, node):
916 """find the children of a given node"""
916 """find the children of a given node"""
917 c = []
917 c = []
918 p = self.rev(node)
918 p = self.rev(node)
919 for r in self.revs(start=p + 1):
919 for r in self.revs(start=p + 1):
920 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
920 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
921 if prevs:
921 if prevs:
922 for pr in prevs:
922 for pr in prevs:
923 if pr == p:
923 if pr == p:
924 c.append(self.node(r))
924 c.append(self.node(r))
925 elif p == nullrev:
925 elif p == nullrev:
926 c.append(self.node(r))
926 c.append(self.node(r))
927 return c
927 return c
928
928
929 def descendant(self, start, end):
929 def descendant(self, start, end):
930 if start == nullrev:
930 if start == nullrev:
931 return True
931 return True
932 for i in self.descendants([start]):
932 for i in self.descendants([start]):
933 if i == end:
933 if i == end:
934 return True
934 return True
935 elif i > end:
935 elif i > end:
936 break
936 break
937 return False
937 return False
938
938
939 def commonancestorsheads(self, a, b):
939 def commonancestorsheads(self, a, b):
940 """calculate all the heads of the common ancestors of nodes a and b"""
940 """calculate all the heads of the common ancestors of nodes a and b"""
941 a, b = self.rev(a), self.rev(b)
941 a, b = self.rev(a), self.rev(b)
942 try:
942 try:
943 ancs = self.index.commonancestorsheads(a, b)
943 ancs = self.index.commonancestorsheads(a, b)
944 except (AttributeError, OverflowError): # C implementation failed
944 except (AttributeError, OverflowError): # C implementation failed
945 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
945 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
946 return map(self.node, ancs)
946 return map(self.node, ancs)
947
947
948 def isancestor(self, a, b):
948 def isancestor(self, a, b):
949 """return True if node a is an ancestor of node b
949 """return True if node a is an ancestor of node b
950
950
951 The implementation of this is trivial but the use of
951 The implementation of this is trivial but the use of
952 commonancestorsheads is not."""
952 commonancestorsheads is not."""
953 return a in self.commonancestorsheads(a, b)
953 return a in self.commonancestorsheads(a, b)
954
954
955 def ancestor(self, a, b):
955 def ancestor(self, a, b):
956 """calculate the "best" common ancestor of nodes a and b"""
956 """calculate the "best" common ancestor of nodes a and b"""
957
957
958 a, b = self.rev(a), self.rev(b)
958 a, b = self.rev(a), self.rev(b)
959 try:
959 try:
960 ancs = self.index.ancestors(a, b)
960 ancs = self.index.ancestors(a, b)
961 except (AttributeError, OverflowError):
961 except (AttributeError, OverflowError):
962 ancs = ancestor.ancestors(self.parentrevs, a, b)
962 ancs = ancestor.ancestors(self.parentrevs, a, b)
963 if ancs:
963 if ancs:
964 # choose a consistent winner when there's a tie
964 # choose a consistent winner when there's a tie
965 return min(map(self.node, ancs))
965 return min(map(self.node, ancs))
966 return nullid
966 return nullid
967
967
968 def _match(self, id):
968 def _match(self, id):
969 if isinstance(id, int):
969 if isinstance(id, int):
970 # rev
970 # rev
971 return self.node(id)
971 return self.node(id)
972 if len(id) == 20:
972 if len(id) == 20:
973 # possibly a binary node
973 # possibly a binary node
974 # odds of a binary node being all hex in ASCII are 1 in 10**25
974 # odds of a binary node being all hex in ASCII are 1 in 10**25
975 try:
975 try:
976 node = id
976 node = id
977 self.rev(node) # quick search the index
977 self.rev(node) # quick search the index
978 return node
978 return node
979 except LookupError:
979 except LookupError:
980 pass # may be partial hex id
980 pass # may be partial hex id
981 try:
981 try:
982 # str(rev)
982 # str(rev)
983 rev = int(id)
983 rev = int(id)
984 if str(rev) != id:
984 if str(rev) != id:
985 raise ValueError
985 raise ValueError
986 if rev < 0:
986 if rev < 0:
987 rev = len(self) + rev
987 rev = len(self) + rev
988 if rev < 0 or rev >= len(self):
988 if rev < 0 or rev >= len(self):
989 raise ValueError
989 raise ValueError
990 return self.node(rev)
990 return self.node(rev)
991 except (ValueError, OverflowError):
991 except (ValueError, OverflowError):
992 pass
992 pass
993 if len(id) == 40:
993 if len(id) == 40:
994 try:
994 try:
995 # a full hex nodeid?
995 # a full hex nodeid?
996 node = bin(id)
996 node = bin(id)
997 self.rev(node)
997 self.rev(node)
998 return node
998 return node
999 except (TypeError, LookupError):
999 except (TypeError, LookupError):
1000 pass
1000 pass
1001
1001
1002 def _partialmatch(self, id):
1002 def _partialmatch(self, id):
1003 try:
1003 try:
1004 partial = self.index.partialmatch(id)
1004 partial = self.index.partialmatch(id)
1005 if partial and self.hasnode(partial):
1005 if partial and self.hasnode(partial):
1006 return partial
1006 return partial
1007 return None
1007 return None
1008 except RevlogError:
1008 except RevlogError:
1009 # parsers.c radix tree lookup gave multiple matches
1009 # parsers.c radix tree lookup gave multiple matches
1010 # fast path: for unfiltered changelog, radix tree is accurate
1010 # fast path: for unfiltered changelog, radix tree is accurate
1011 if not getattr(self, 'filteredrevs', None):
1011 if not getattr(self, 'filteredrevs', None):
1012 raise LookupError(id, self.indexfile,
1012 raise LookupError(id, self.indexfile,
1013 _('ambiguous identifier'))
1013 _('ambiguous identifier'))
1014 # fall through to slow path that filters hidden revisions
1014 # fall through to slow path that filters hidden revisions
1015 except (AttributeError, ValueError):
1015 except (AttributeError, ValueError):
1016 # we are pure python, or key was too short to search radix tree
1016 # we are pure python, or key was too short to search radix tree
1017 pass
1017 pass
1018
1018
1019 if id in self._pcache:
1019 if id in self._pcache:
1020 return self._pcache[id]
1020 return self._pcache[id]
1021
1021
1022 if len(id) < 40:
1022 if len(id) < 40:
1023 try:
1023 try:
1024 # hex(node)[:...]
1024 # hex(node)[:...]
1025 l = len(id) // 2 # grab an even number of digits
1025 l = len(id) // 2 # grab an even number of digits
1026 prefix = bin(id[:l * 2])
1026 prefix = bin(id[:l * 2])
1027 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1027 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1028 nl = [n for n in nl if hex(n).startswith(id) and
1028 nl = [n for n in nl if hex(n).startswith(id) and
1029 self.hasnode(n)]
1029 self.hasnode(n)]
1030 if len(nl) > 0:
1030 if len(nl) > 0:
1031 if len(nl) == 1:
1031 if len(nl) == 1:
1032 self._pcache[id] = nl[0]
1032 self._pcache[id] = nl[0]
1033 return nl[0]
1033 return nl[0]
1034 raise LookupError(id, self.indexfile,
1034 raise LookupError(id, self.indexfile,
1035 _('ambiguous identifier'))
1035 _('ambiguous identifier'))
1036 return None
1036 return None
1037 except TypeError:
1037 except TypeError:
1038 pass
1038 pass
1039
1039
1040 def lookup(self, id):
1040 def lookup(self, id):
1041 """locate a node based on:
1041 """locate a node based on:
1042 - revision number or str(revision number)
1042 - revision number or str(revision number)
1043 - nodeid or subset of hex nodeid
1043 - nodeid or subset of hex nodeid
1044 """
1044 """
1045 n = self._match(id)
1045 n = self._match(id)
1046 if n is not None:
1046 if n is not None:
1047 return n
1047 return n
1048 n = self._partialmatch(id)
1048 n = self._partialmatch(id)
1049 if n:
1049 if n:
1050 return n
1050 return n
1051
1051
1052 raise LookupError(id, self.indexfile, _('no match found'))
1052 raise LookupError(id, self.indexfile, _('no match found'))
1053
1053
1054 def cmp(self, node, text):
1054 def cmp(self, node, text):
1055 """compare text with a given file revision
1055 """compare text with a given file revision
1056
1056
1057 returns True if text is different than what is stored.
1057 returns True if text is different than what is stored.
1058 """
1058 """
1059 p1, p2 = self.parents(node)
1059 p1, p2 = self.parents(node)
1060 return hash(text, p1, p2) != node
1060 return hash(text, p1, p2) != node
1061
1061
1062 def _addchunk(self, offset, data):
1062 def _addchunk(self, offset, data):
1063 """Add a segment to the revlog cache.
1063 """Add a segment to the revlog cache.
1064
1064
1065 Accepts an absolute offset and the data that is at that location.
1065 Accepts an absolute offset and the data that is at that location.
1066 """
1066 """
1067 o, d = self._chunkcache
1067 o, d = self._chunkcache
1068 # try to add to existing cache
1068 # try to add to existing cache
1069 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1069 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1070 self._chunkcache = o, d + data
1070 self._chunkcache = o, d + data
1071 else:
1071 else:
1072 self._chunkcache = offset, data
1072 self._chunkcache = offset, data
1073
1073
1074 def _loadchunk(self, offset, length, df=None):
1074 def _loadchunk(self, offset, length, df=None):
1075 """Load a segment of raw data from the revlog.
1075 """Load a segment of raw data from the revlog.
1076
1076
1077 Accepts an absolute offset, length to read, and an optional existing
1077 Accepts an absolute offset, length to read, and an optional existing
1078 file handle to read from.
1078 file handle to read from.
1079
1079
1080 If an existing file handle is passed, it will be seeked and the
1080 If an existing file handle is passed, it will be seeked and the
1081 original seek position will NOT be restored.
1081 original seek position will NOT be restored.
1082
1082
1083 Returns a str or buffer of raw byte data.
1083 Returns a str or buffer of raw byte data.
1084 """
1084 """
1085 if df is not None:
1085 if df is not None:
1086 closehandle = False
1086 closehandle = False
1087 else:
1087 else:
1088 if self._inline:
1088 if self._inline:
1089 df = self.opener(self.indexfile)
1089 df = self.opener(self.indexfile)
1090 else:
1090 else:
1091 df = self.opener(self.datafile)
1091 df = self.opener(self.datafile)
1092 closehandle = True
1092 closehandle = True
1093
1093
1094 # Cache data both forward and backward around the requested
1094 # Cache data both forward and backward around the requested
1095 # data, in a fixed size window. This helps speed up operations
1095 # data, in a fixed size window. This helps speed up operations
1096 # involving reading the revlog backwards.
1096 # involving reading the revlog backwards.
1097 cachesize = self._chunkcachesize
1097 cachesize = self._chunkcachesize
1098 realoffset = offset & ~(cachesize - 1)
1098 realoffset = offset & ~(cachesize - 1)
1099 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1099 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1100 - realoffset)
1100 - realoffset)
1101 df.seek(realoffset)
1101 df.seek(realoffset)
1102 d = df.read(reallength)
1102 d = df.read(reallength)
1103 if closehandle:
1103 if closehandle:
1104 df.close()
1104 df.close()
1105 self._addchunk(realoffset, d)
1105 self._addchunk(realoffset, d)
1106 if offset != realoffset or reallength != length:
1106 if offset != realoffset or reallength != length:
1107 return util.buffer(d, offset - realoffset, length)
1107 return util.buffer(d, offset - realoffset, length)
1108 return d
1108 return d
1109
1109
1110 def _getchunk(self, offset, length, df=None):
1110 def _getchunk(self, offset, length, df=None):
1111 """Obtain a segment of raw data from the revlog.
1111 """Obtain a segment of raw data from the revlog.
1112
1112
1113 Accepts an absolute offset, length of bytes to obtain, and an
1113 Accepts an absolute offset, length of bytes to obtain, and an
1114 optional file handle to the already-opened revlog. If the file
1114 optional file handle to the already-opened revlog. If the file
1115 handle is used, it's original seek position will not be preserved.
1115 handle is used, it's original seek position will not be preserved.
1116
1116
1117 Requests for data may be returned from a cache.
1117 Requests for data may be returned from a cache.
1118
1118
1119 Returns a str or a buffer instance of raw byte data.
1119 Returns a str or a buffer instance of raw byte data.
1120 """
1120 """
1121 o, d = self._chunkcache
1121 o, d = self._chunkcache
1122 l = len(d)
1122 l = len(d)
1123
1123
1124 # is it in the cache?
1124 # is it in the cache?
1125 cachestart = offset - o
1125 cachestart = offset - o
1126 cacheend = cachestart + length
1126 cacheend = cachestart + length
1127 if cachestart >= 0 and cacheend <= l:
1127 if cachestart >= 0 and cacheend <= l:
1128 if cachestart == 0 and cacheend == l:
1128 if cachestart == 0 and cacheend == l:
1129 return d # avoid a copy
1129 return d # avoid a copy
1130 return util.buffer(d, cachestart, cacheend - cachestart)
1130 return util.buffer(d, cachestart, cacheend - cachestart)
1131
1131
1132 return self._loadchunk(offset, length, df=df)
1132 return self._loadchunk(offset, length, df=df)
1133
1133
1134 def _chunkraw(self, startrev, endrev, df=None):
1134 def _chunkraw(self, startrev, endrev, df=None):
1135 """Obtain a segment of raw data corresponding to a range of revisions.
1135 """Obtain a segment of raw data corresponding to a range of revisions.
1136
1136
1137 Accepts the start and end revisions and an optional already-open
1137 Accepts the start and end revisions and an optional already-open
1138 file handle to be used for reading. If the file handle is read, its
1138 file handle to be used for reading. If the file handle is read, its
1139 seek position will not be preserved.
1139 seek position will not be preserved.
1140
1140
1141 Requests for data may be satisfied by a cache.
1141 Requests for data may be satisfied by a cache.
1142
1142
1143 Returns a 2-tuple of (offset, data) for the requested range of
1143 Returns a 2-tuple of (offset, data) for the requested range of
1144 revisions. Offset is the integer offset from the beginning of the
1144 revisions. Offset is the integer offset from the beginning of the
1145 revlog and data is a str or buffer of the raw byte data.
1145 revlog and data is a str or buffer of the raw byte data.
1146
1146
1147 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1147 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1148 to determine where each revision's data begins and ends.
1148 to determine where each revision's data begins and ends.
1149 """
1149 """
1150 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1150 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1151 # (functions are expensive).
1151 # (functions are expensive).
1152 index = self.index
1152 index = self.index
1153 istart = index[startrev]
1153 istart = index[startrev]
1154 start = int(istart[0] >> 16)
1154 start = int(istart[0] >> 16)
1155 if startrev == endrev:
1155 if startrev == endrev:
1156 end = start + istart[1]
1156 end = start + istart[1]
1157 else:
1157 else:
1158 iend = index[endrev]
1158 iend = index[endrev]
1159 end = int(iend[0] >> 16) + iend[1]
1159 end = int(iend[0] >> 16) + iend[1]
1160
1160
1161 if self._inline:
1161 if self._inline:
1162 start += (startrev + 1) * self._io.size
1162 start += (startrev + 1) * self._io.size
1163 end += (endrev + 1) * self._io.size
1163 end += (endrev + 1) * self._io.size
1164 length = end - start
1164 length = end - start
1165
1165
1166 return start, self._getchunk(start, length, df=df)
1166 return start, self._getchunk(start, length, df=df)
1167
1167
1168 def _chunk(self, rev, df=None):
1168 def _chunk(self, rev, df=None):
1169 """Obtain a single decompressed chunk for a revision.
1169 """Obtain a single decompressed chunk for a revision.
1170
1170
1171 Accepts an integer revision and an optional already-open file handle
1171 Accepts an integer revision and an optional already-open file handle
1172 to be used for reading. If used, the seek position of the file will not
1172 to be used for reading. If used, the seek position of the file will not
1173 be preserved.
1173 be preserved.
1174
1174
1175 Returns a str holding uncompressed data for the requested revision.
1175 Returns a str holding uncompressed data for the requested revision.
1176 """
1176 """
1177 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1177 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1178
1178
1179 def _chunks(self, revs, df=None):
1179 def _chunks(self, revs, df=None):
1180 """Obtain decompressed chunks for the specified revisions.
1180 """Obtain decompressed chunks for the specified revisions.
1181
1181
1182 Accepts an iterable of numeric revisions that are assumed to be in
1182 Accepts an iterable of numeric revisions that are assumed to be in
1183 ascending order. Also accepts an optional already-open file handle
1183 ascending order. Also accepts an optional already-open file handle
1184 to be used for reading. If used, the seek position of the file will
1184 to be used for reading. If used, the seek position of the file will
1185 not be preserved.
1185 not be preserved.
1186
1186
1187 This function is similar to calling ``self._chunk()`` multiple times,
1187 This function is similar to calling ``self._chunk()`` multiple times,
1188 but is faster.
1188 but is faster.
1189
1189
1190 Returns a list with decompressed data for each requested revision.
1190 Returns a list with decompressed data for each requested revision.
1191 """
1191 """
1192 if not revs:
1192 if not revs:
1193 return []
1193 return []
1194 start = self.start
1194 start = self.start
1195 length = self.length
1195 length = self.length
1196 inline = self._inline
1196 inline = self._inline
1197 iosize = self._io.size
1197 iosize = self._io.size
1198 buffer = util.buffer
1198 buffer = util.buffer
1199
1199
1200 l = []
1200 l = []
1201 ladd = l.append
1201 ladd = l.append
1202
1202
1203 try:
1203 try:
1204 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1204 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1205 except OverflowError:
1205 except OverflowError:
1206 # issue4215 - we can't cache a run of chunks greater than
1206 # issue4215 - we can't cache a run of chunks greater than
1207 # 2G on Windows
1207 # 2G on Windows
1208 return [self._chunk(rev, df=df) for rev in revs]
1208 return [self._chunk(rev, df=df) for rev in revs]
1209
1209
1210 decomp = self.decompress
1210 decomp = self.decompress
1211 for rev in revs:
1211 for rev in revs:
1212 chunkstart = start(rev)
1212 chunkstart = start(rev)
1213 if inline:
1213 if inline:
1214 chunkstart += (rev + 1) * iosize
1214 chunkstart += (rev + 1) * iosize
1215 chunklength = length(rev)
1215 chunklength = length(rev)
1216 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1216 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1217
1217
1218 return l
1218 return l
1219
1219
1220 def _chunkclear(self):
1220 def _chunkclear(self):
1221 """Clear the raw chunk cache."""
1221 """Clear the raw chunk cache."""
1222 self._chunkcache = (0, '')
1222 self._chunkcache = (0, '')
1223
1223
1224 def deltaparent(self, rev):
1224 def deltaparent(self, rev):
1225 """return deltaparent of the given revision"""
1225 """return deltaparent of the given revision"""
1226 base = self.index[rev][3]
1226 base = self.index[rev][3]
1227 if base == rev:
1227 if base == rev:
1228 return nullrev
1228 return nullrev
1229 elif self._generaldelta:
1229 elif self._generaldelta:
1230 return base
1230 return base
1231 else:
1231 else:
1232 return rev - 1
1232 return rev - 1
1233
1233
1234 def revdiff(self, rev1, rev2):
1234 def revdiff(self, rev1, rev2):
1235 """return or calculate a delta between two revisions"""
1235 """return or calculate a delta between two revisions"""
1236 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1236 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1237 return str(self._chunk(rev2))
1237 return str(self._chunk(rev2))
1238
1238
1239 return mdiff.textdiff(self.revision(rev1),
1239 return mdiff.textdiff(self.revision(rev1),
1240 self.revision(rev2))
1240 self.revision(rev2))
1241
1241
1242 def revision(self, nodeorrev, _df=None, raw=False):
1242 def revision(self, nodeorrev, _df=None, raw=False):
1243 """return an uncompressed revision of a given node or revision
1243 """return an uncompressed revision of a given node or revision
1244 number.
1244 number.
1245
1245
1246 _df - an existing file handle to read from. (internal-only)
1246 _df - an existing file handle to read from. (internal-only)
1247 raw - an optional argument specifying if the revision data is to be
1247 raw - an optional argument specifying if the revision data is to be
1248 treated as raw data when applying flag transforms. 'raw' should be set
1248 treated as raw data when applying flag transforms. 'raw' should be set
1249 to True when generating changegroups or in debug commands.
1249 to True when generating changegroups or in debug commands.
1250 """
1250 """
1251 if isinstance(nodeorrev, int):
1251 if isinstance(nodeorrev, int):
1252 rev = nodeorrev
1252 rev = nodeorrev
1253 node = self.node(rev)
1253 node = self.node(rev)
1254 else:
1254 else:
1255 node = nodeorrev
1255 node = nodeorrev
1256 rev = None
1256 rev = None
1257
1257
1258 cachedrev = None
1258 cachedrev = None
1259 if node == nullid:
1259 if node == nullid:
1260 return ""
1260 return ""
1261 if self._cache:
1261 if self._cache:
1262 if self._cache[0] == node:
1262 if self._cache[0] == node:
1263 return self._cache[2]
1263 return self._cache[2]
1264 cachedrev = self._cache[1]
1264 cachedrev = self._cache[1]
1265
1265
1266 # look up what we need to read
1266 # look up what we need to read
1267 text = None
1267 text = None
1268 if rev is None:
1268 if rev is None:
1269 rev = self.rev(node)
1269 rev = self.rev(node)
1270
1270
1271 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1271 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1272 if stopped:
1272 if stopped:
1273 text = self._cache[2]
1273 text = self._cache[2]
1274
1274
1275 # drop cache to save memory
1275 # drop cache to save memory
1276 self._cache = None
1276 self._cache = None
1277
1277
1278 bins = self._chunks(chain, df=_df)
1278 bins = self._chunks(chain, df=_df)
1279 if text is None:
1279 if text is None:
1280 text = str(bins[0])
1280 text = bytes(bins[0])
1281 bins = bins[1:]
1281 bins = bins[1:]
1282
1282
1283 text = mdiff.patches(text, bins)
1283 text = mdiff.patches(text, bins)
1284
1284
1285 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1285 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1286 raw=raw)
1286 raw=raw)
1287 if validatehash:
1287 if validatehash:
1288 self.checkhash(text, node, rev=rev)
1288 self.checkhash(text, node, rev=rev)
1289
1289
1290 self._cache = (node, rev, text)
1290 self._cache = (node, rev, text)
1291 return text
1291 return text
1292
1292
1293 def hash(self, text, p1, p2):
1293 def hash(self, text, p1, p2):
1294 """Compute a node hash.
1294 """Compute a node hash.
1295
1295
1296 Available as a function so that subclasses can replace the hash
1296 Available as a function so that subclasses can replace the hash
1297 as needed.
1297 as needed.
1298 """
1298 """
1299 return hash(text, p1, p2)
1299 return hash(text, p1, p2)
1300
1300
1301 def _processflags(self, text, flags, operation, raw=False):
1301 def _processflags(self, text, flags, operation, raw=False):
1302 """Inspect revision data flags and applies transforms defined by
1302 """Inspect revision data flags and applies transforms defined by
1303 registered flag processors.
1303 registered flag processors.
1304
1304
1305 ``text`` - the revision data to process
1305 ``text`` - the revision data to process
1306 ``flags`` - the revision flags
1306 ``flags`` - the revision flags
1307 ``operation`` - the operation being performed (read or write)
1307 ``operation`` - the operation being performed (read or write)
1308 ``raw`` - an optional argument describing if the raw transform should be
1308 ``raw`` - an optional argument describing if the raw transform should be
1309 applied.
1309 applied.
1310
1310
1311 This method processes the flags in the order (or reverse order if
1311 This method processes the flags in the order (or reverse order if
1312 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1312 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1313 flag processors registered for present flags. The order of flags defined
1313 flag processors registered for present flags. The order of flags defined
1314 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1314 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1315
1315
1316 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1316 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1317 processed text and ``validatehash`` is a bool indicating whether the
1317 processed text and ``validatehash`` is a bool indicating whether the
1318 returned text should be checked for hash integrity.
1318 returned text should be checked for hash integrity.
1319
1319
1320 Note: If the ``raw`` argument is set, it has precedence over the
1320 Note: If the ``raw`` argument is set, it has precedence over the
1321 operation and will only update the value of ``validatehash``.
1321 operation and will only update the value of ``validatehash``.
1322 """
1322 """
1323 if not operation in ('read', 'write'):
1323 if not operation in ('read', 'write'):
1324 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1324 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1325 # Check all flags are known.
1325 # Check all flags are known.
1326 if flags & ~REVIDX_KNOWN_FLAGS:
1326 if flags & ~REVIDX_KNOWN_FLAGS:
1327 raise RevlogError(_("incompatible revision flag '%#x'") %
1327 raise RevlogError(_("incompatible revision flag '%#x'") %
1328 (flags & ~REVIDX_KNOWN_FLAGS))
1328 (flags & ~REVIDX_KNOWN_FLAGS))
1329 validatehash = True
1329 validatehash = True
1330 # Depending on the operation (read or write), the order might be
1330 # Depending on the operation (read or write), the order might be
1331 # reversed due to non-commutative transforms.
1331 # reversed due to non-commutative transforms.
1332 orderedflags = REVIDX_FLAGS_ORDER
1332 orderedflags = REVIDX_FLAGS_ORDER
1333 if operation == 'write':
1333 if operation == 'write':
1334 orderedflags = reversed(orderedflags)
1334 orderedflags = reversed(orderedflags)
1335
1335
1336 for flag in orderedflags:
1336 for flag in orderedflags:
1337 # If a flagprocessor has been registered for a known flag, apply the
1337 # If a flagprocessor has been registered for a known flag, apply the
1338 # related operation transform and update result tuple.
1338 # related operation transform and update result tuple.
1339 if flag & flags:
1339 if flag & flags:
1340 vhash = True
1340 vhash = True
1341
1341
1342 if flag not in _flagprocessors:
1342 if flag not in _flagprocessors:
1343 message = _("missing processor for flag '%#x'") % (flag)
1343 message = _("missing processor for flag '%#x'") % (flag)
1344 raise RevlogError(message)
1344 raise RevlogError(message)
1345
1345
1346 processor = _flagprocessors[flag]
1346 processor = _flagprocessors[flag]
1347 if processor is not None:
1347 if processor is not None:
1348 readtransform, writetransform, rawtransform = processor
1348 readtransform, writetransform, rawtransform = processor
1349
1349
1350 if raw:
1350 if raw:
1351 vhash = rawtransform(self, text)
1351 vhash = rawtransform(self, text)
1352 elif operation == 'read':
1352 elif operation == 'read':
1353 text, vhash = readtransform(self, text)
1353 text, vhash = readtransform(self, text)
1354 else: # write operation
1354 else: # write operation
1355 text, vhash = writetransform(self, text)
1355 text, vhash = writetransform(self, text)
1356 validatehash = validatehash and vhash
1356 validatehash = validatehash and vhash
1357
1357
1358 return text, validatehash
1358 return text, validatehash
1359
1359
1360 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1360 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1361 """Check node hash integrity.
1361 """Check node hash integrity.
1362
1362
1363 Available as a function so that subclasses can extend hash mismatch
1363 Available as a function so that subclasses can extend hash mismatch
1364 behaviors as needed.
1364 behaviors as needed.
1365 """
1365 """
1366 if p1 is None and p2 is None:
1366 if p1 is None and p2 is None:
1367 p1, p2 = self.parents(node)
1367 p1, p2 = self.parents(node)
1368 if node != self.hash(text, p1, p2):
1368 if node != self.hash(text, p1, p2):
1369 revornode = rev
1369 revornode = rev
1370 if revornode is None:
1370 if revornode is None:
1371 revornode = templatefilters.short(hex(node))
1371 revornode = templatefilters.short(hex(node))
1372 raise RevlogError(_("integrity check failed on %s:%s")
1372 raise RevlogError(_("integrity check failed on %s:%s")
1373 % (self.indexfile, revornode))
1373 % (self.indexfile, revornode))
1374
1374
1375 def checkinlinesize(self, tr, fp=None):
1375 def checkinlinesize(self, tr, fp=None):
1376 """Check if the revlog is too big for inline and convert if so.
1376 """Check if the revlog is too big for inline and convert if so.
1377
1377
1378 This should be called after revisions are added to the revlog. If the
1378 This should be called after revisions are added to the revlog. If the
1379 revlog has grown too large to be an inline revlog, it will convert it
1379 revlog has grown too large to be an inline revlog, it will convert it
1380 to use multiple index and data files.
1380 to use multiple index and data files.
1381 """
1381 """
1382 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1382 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1383 return
1383 return
1384
1384
1385 trinfo = tr.find(self.indexfile)
1385 trinfo = tr.find(self.indexfile)
1386 if trinfo is None:
1386 if trinfo is None:
1387 raise RevlogError(_("%s not found in the transaction")
1387 raise RevlogError(_("%s not found in the transaction")
1388 % self.indexfile)
1388 % self.indexfile)
1389
1389
1390 trindex = trinfo[2]
1390 trindex = trinfo[2]
1391 if trindex is not None:
1391 if trindex is not None:
1392 dataoff = self.start(trindex)
1392 dataoff = self.start(trindex)
1393 else:
1393 else:
1394 # revlog was stripped at start of transaction, use all leftover data
1394 # revlog was stripped at start of transaction, use all leftover data
1395 trindex = len(self) - 1
1395 trindex = len(self) - 1
1396 dataoff = self.end(-2)
1396 dataoff = self.end(-2)
1397
1397
1398 tr.add(self.datafile, dataoff)
1398 tr.add(self.datafile, dataoff)
1399
1399
1400 if fp:
1400 if fp:
1401 fp.flush()
1401 fp.flush()
1402 fp.close()
1402 fp.close()
1403
1403
1404 df = self.opener(self.datafile, 'w')
1404 df = self.opener(self.datafile, 'w')
1405 try:
1405 try:
1406 for r in self:
1406 for r in self:
1407 df.write(self._chunkraw(r, r)[1])
1407 df.write(self._chunkraw(r, r)[1])
1408 finally:
1408 finally:
1409 df.close()
1409 df.close()
1410
1410
1411 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1411 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1412 checkambig=self._checkambig)
1412 checkambig=self._checkambig)
1413 self.version &= ~(REVLOGNGINLINEDATA)
1413 self.version &= ~(REVLOGNGINLINEDATA)
1414 self._inline = False
1414 self._inline = False
1415 for i in self:
1415 for i in self:
1416 e = self._io.packentry(self.index[i], self.node, self.version, i)
1416 e = self._io.packentry(self.index[i], self.node, self.version, i)
1417 fp.write(e)
1417 fp.write(e)
1418
1418
1419 # if we don't call close, the temp file will never replace the
1419 # if we don't call close, the temp file will never replace the
1420 # real index
1420 # real index
1421 fp.close()
1421 fp.close()
1422
1422
1423 tr.replace(self.indexfile, trindex * self._io.size)
1423 tr.replace(self.indexfile, trindex * self._io.size)
1424 self._chunkclear()
1424 self._chunkclear()
1425
1425
1426 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1426 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1427 node=None, flags=REVIDX_DEFAULT_FLAGS):
1427 node=None, flags=REVIDX_DEFAULT_FLAGS):
1428 """add a revision to the log
1428 """add a revision to the log
1429
1429
1430 text - the revision data to add
1430 text - the revision data to add
1431 transaction - the transaction object used for rollback
1431 transaction - the transaction object used for rollback
1432 link - the linkrev data to add
1432 link - the linkrev data to add
1433 p1, p2 - the parent nodeids of the revision
1433 p1, p2 - the parent nodeids of the revision
1434 cachedelta - an optional precomputed delta
1434 cachedelta - an optional precomputed delta
1435 node - nodeid of revision; typically node is not specified, and it is
1435 node - nodeid of revision; typically node is not specified, and it is
1436 computed by default as hash(text, p1, p2), however subclasses might
1436 computed by default as hash(text, p1, p2), however subclasses might
1437 use different hashing method (and override checkhash() in such case)
1437 use different hashing method (and override checkhash() in such case)
1438 flags - the known flags to set on the revision
1438 flags - the known flags to set on the revision
1439 """
1439 """
1440 if link == nullrev:
1440 if link == nullrev:
1441 raise RevlogError(_("attempted to add linkrev -1 to %s")
1441 raise RevlogError(_("attempted to add linkrev -1 to %s")
1442 % self.indexfile)
1442 % self.indexfile)
1443
1443
1444 if flags:
1444 if flags:
1445 node = node or self.hash(text, p1, p2)
1445 node = node or self.hash(text, p1, p2)
1446
1446
1447 newtext, validatehash = self._processflags(text, flags, 'write')
1447 newtext, validatehash = self._processflags(text, flags, 'write')
1448
1448
1449 # If the flag processor modifies the revision data, ignore any provided
1449 # If the flag processor modifies the revision data, ignore any provided
1450 # cachedelta.
1450 # cachedelta.
1451 if newtext != text:
1451 if newtext != text:
1452 cachedelta = None
1452 cachedelta = None
1453 text = newtext
1453 text = newtext
1454
1454
1455 if len(text) > _maxentrysize:
1455 if len(text) > _maxentrysize:
1456 raise RevlogError(
1456 raise RevlogError(
1457 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1457 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1458 % (self.indexfile, len(text)))
1458 % (self.indexfile, len(text)))
1459
1459
1460 node = node or self.hash(text, p1, p2)
1460 node = node or self.hash(text, p1, p2)
1461 if node in self.nodemap:
1461 if node in self.nodemap:
1462 return node
1462 return node
1463
1463
1464 if validatehash:
1464 if validatehash:
1465 self.checkhash(text, node, p1=p1, p2=p2)
1465 self.checkhash(text, node, p1=p1, p2=p2)
1466
1466
1467 dfh = None
1467 dfh = None
1468 if not self._inline:
1468 if not self._inline:
1469 dfh = self.opener(self.datafile, "a+")
1469 dfh = self.opener(self.datafile, "a+")
1470 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1470 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1471 try:
1471 try:
1472 return self._addrevision(node, text, transaction, link, p1, p2,
1472 return self._addrevision(node, text, transaction, link, p1, p2,
1473 flags, cachedelta, ifh, dfh)
1473 flags, cachedelta, ifh, dfh)
1474 finally:
1474 finally:
1475 if dfh:
1475 if dfh:
1476 dfh.close()
1476 dfh.close()
1477 ifh.close()
1477 ifh.close()
1478
1478
1479 def compress(self, data):
1479 def compress(self, data):
1480 """Generate a possibly-compressed representation of data."""
1480 """Generate a possibly-compressed representation of data."""
1481 if not data:
1481 if not data:
1482 return '', data
1482 return '', data
1483
1483
1484 compressed = self._compressor.compress(data)
1484 compressed = self._compressor.compress(data)
1485
1485
1486 if compressed:
1486 if compressed:
1487 # The revlog compressor added the header in the returned data.
1487 # The revlog compressor added the header in the returned data.
1488 return '', compressed
1488 return '', compressed
1489
1489
1490 if data[0] == '\0':
1490 if data[0] == '\0':
1491 return '', data
1491 return '', data
1492 return 'u', data
1492 return 'u', data
1493
1493
1494 def decompress(self, data):
1494 def decompress(self, data):
1495 """Decompress a revlog chunk.
1495 """Decompress a revlog chunk.
1496
1496
1497 The chunk is expected to begin with a header identifying the
1497 The chunk is expected to begin with a header identifying the
1498 format type so it can be routed to an appropriate decompressor.
1498 format type so it can be routed to an appropriate decompressor.
1499 """
1499 """
1500 if not data:
1500 if not data:
1501 return data
1501 return data
1502
1502
1503 # Revlogs are read much more frequently than they are written and many
1503 # Revlogs are read much more frequently than they are written and many
1504 # chunks only take microseconds to decompress, so performance is
1504 # chunks only take microseconds to decompress, so performance is
1505 # important here.
1505 # important here.
1506 #
1506 #
1507 # We can make a few assumptions about revlogs:
1507 # We can make a few assumptions about revlogs:
1508 #
1508 #
1509 # 1) the majority of chunks will be compressed (as opposed to inline
1509 # 1) the majority of chunks will be compressed (as opposed to inline
1510 # raw data).
1510 # raw data).
1511 # 2) decompressing *any* data will likely by at least 10x slower than
1511 # 2) decompressing *any* data will likely by at least 10x slower than
1512 # returning raw inline data.
1512 # returning raw inline data.
1513 # 3) we want to prioritize common and officially supported compression
1513 # 3) we want to prioritize common and officially supported compression
1514 # engines
1514 # engines
1515 #
1515 #
1516 # It follows that we want to optimize for "decompress compressed data
1516 # It follows that we want to optimize for "decompress compressed data
1517 # when encoded with common and officially supported compression engines"
1517 # when encoded with common and officially supported compression engines"
1518 # case over "raw data" and "data encoded by less common or non-official
1518 # case over "raw data" and "data encoded by less common or non-official
1519 # compression engines." That is why we have the inline lookup first
1519 # compression engines." That is why we have the inline lookup first
1520 # followed by the compengines lookup.
1520 # followed by the compengines lookup.
1521 #
1521 #
1522 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1522 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1523 # compressed chunks. And this matters for changelog and manifest reads.
1523 # compressed chunks. And this matters for changelog and manifest reads.
1524 t = data[0:1]
1524 t = data[0:1]
1525
1525
1526 if t == 'x':
1526 if t == 'x':
1527 try:
1527 try:
1528 return _zlibdecompress(data)
1528 return _zlibdecompress(data)
1529 except zlib.error as e:
1529 except zlib.error as e:
1530 raise RevlogError(_('revlog decompress error: %s') % str(e))
1530 raise RevlogError(_('revlog decompress error: %s') % str(e))
1531 # '\0' is more common than 'u' so it goes first.
1531 # '\0' is more common than 'u' so it goes first.
1532 elif t == '\0':
1532 elif t == '\0':
1533 return data
1533 return data
1534 elif t == 'u':
1534 elif t == 'u':
1535 return util.buffer(data, 1)
1535 return util.buffer(data, 1)
1536
1536
1537 try:
1537 try:
1538 compressor = self._decompressors[t]
1538 compressor = self._decompressors[t]
1539 except KeyError:
1539 except KeyError:
1540 try:
1540 try:
1541 engine = util.compengines.forrevlogheader(t)
1541 engine = util.compengines.forrevlogheader(t)
1542 compressor = engine.revlogcompressor()
1542 compressor = engine.revlogcompressor()
1543 self._decompressors[t] = compressor
1543 self._decompressors[t] = compressor
1544 except KeyError:
1544 except KeyError:
1545 raise RevlogError(_('unknown compression type %r') % t)
1545 raise RevlogError(_('unknown compression type %r') % t)
1546
1546
1547 return compressor.decompress(data)
1547 return compressor.decompress(data)
1548
1548
1549 def _isgooddelta(self, d, textlen):
1549 def _isgooddelta(self, d, textlen):
1550 """Returns True if the given delta is good. Good means that it is within
1550 """Returns True if the given delta is good. Good means that it is within
1551 the disk span, disk size, and chain length bounds that we know to be
1551 the disk span, disk size, and chain length bounds that we know to be
1552 performant."""
1552 performant."""
1553 if d is None:
1553 if d is None:
1554 return False
1554 return False
1555
1555
1556 # - 'dist' is the distance from the base revision -- bounding it limits
1556 # - 'dist' is the distance from the base revision -- bounding it limits
1557 # the amount of I/O we need to do.
1557 # the amount of I/O we need to do.
1558 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1558 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1559 # to apply -- bounding it limits the amount of CPU we consume.
1559 # to apply -- bounding it limits the amount of CPU we consume.
1560 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1560 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1561 if (dist > textlen * 4 or l > textlen or
1561 if (dist > textlen * 4 or l > textlen or
1562 compresseddeltalen > textlen * 2 or
1562 compresseddeltalen > textlen * 2 or
1563 (self._maxchainlen and chainlen > self._maxchainlen)):
1563 (self._maxchainlen and chainlen > self._maxchainlen)):
1564 return False
1564 return False
1565
1565
1566 return True
1566 return True
1567
1567
1568 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1568 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1569 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1569 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1570 """internal function to add revisions to the log
1570 """internal function to add revisions to the log
1571
1571
1572 see addrevision for argument descriptions.
1572 see addrevision for argument descriptions.
1573 invariants:
1573 invariants:
1574 - text is optional (can be None); if not set, cachedelta must be set.
1574 - text is optional (can be None); if not set, cachedelta must be set.
1575 if both are set, they must correspond to each other.
1575 if both are set, they must correspond to each other.
1576 - raw is optional; if set to True, it indicates the revision data is to
1576 - raw is optional; if set to True, it indicates the revision data is to
1577 be treated by _processflags() as raw. It is usually set by changegroup
1577 be treated by _processflags() as raw. It is usually set by changegroup
1578 generation and debug commands.
1578 generation and debug commands.
1579 """
1579 """
1580 btext = [text]
1580 btext = [text]
1581 def buildtext():
1581 def buildtext():
1582 if btext[0] is not None:
1582 if btext[0] is not None:
1583 return btext[0]
1583 return btext[0]
1584 baserev = cachedelta[0]
1584 baserev = cachedelta[0]
1585 delta = cachedelta[1]
1585 delta = cachedelta[1]
1586 # special case deltas which replace entire base; no need to decode
1586 # special case deltas which replace entire base; no need to decode
1587 # base revision. this neatly avoids censored bases, which throw when
1587 # base revision. this neatly avoids censored bases, which throw when
1588 # they're decoded.
1588 # they're decoded.
1589 hlen = struct.calcsize(">lll")
1589 hlen = struct.calcsize(">lll")
1590 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1590 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1591 len(delta) - hlen):
1591 len(delta) - hlen):
1592 btext[0] = delta[hlen:]
1592 btext[0] = delta[hlen:]
1593 else:
1593 else:
1594 if self._inline:
1594 if self._inline:
1595 fh = ifh
1595 fh = ifh
1596 else:
1596 else:
1597 fh = dfh
1597 fh = dfh
1598 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1598 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1599 btext[0] = mdiff.patch(basetext, delta)
1599 btext[0] = mdiff.patch(basetext, delta)
1600
1600
1601 try:
1601 try:
1602 res = self._processflags(btext[0], flags, 'read', raw=raw)
1602 res = self._processflags(btext[0], flags, 'read', raw=raw)
1603 btext[0], validatehash = res
1603 btext[0], validatehash = res
1604 if validatehash:
1604 if validatehash:
1605 self.checkhash(btext[0], node, p1=p1, p2=p2)
1605 self.checkhash(btext[0], node, p1=p1, p2=p2)
1606 if flags & REVIDX_ISCENSORED:
1606 if flags & REVIDX_ISCENSORED:
1607 raise RevlogError(_('node %s is not censored') % node)
1607 raise RevlogError(_('node %s is not censored') % node)
1608 except CensoredNodeError:
1608 except CensoredNodeError:
1609 # must pass the censored index flag to add censored revisions
1609 # must pass the censored index flag to add censored revisions
1610 if not flags & REVIDX_ISCENSORED:
1610 if not flags & REVIDX_ISCENSORED:
1611 raise
1611 raise
1612 return btext[0]
1612 return btext[0]
1613
1613
1614 def builddelta(rev):
1614 def builddelta(rev):
1615 # can we use the cached delta?
1615 # can we use the cached delta?
1616 if cachedelta and cachedelta[0] == rev:
1616 if cachedelta and cachedelta[0] == rev:
1617 delta = cachedelta[1]
1617 delta = cachedelta[1]
1618 else:
1618 else:
1619 t = buildtext()
1619 t = buildtext()
1620 if self.iscensored(rev):
1620 if self.iscensored(rev):
1621 # deltas based on a censored revision must replace the
1621 # deltas based on a censored revision must replace the
1622 # full content in one patch, so delta works everywhere
1622 # full content in one patch, so delta works everywhere
1623 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1623 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1624 delta = header + t
1624 delta = header + t
1625 else:
1625 else:
1626 if self._inline:
1626 if self._inline:
1627 fh = ifh
1627 fh = ifh
1628 else:
1628 else:
1629 fh = dfh
1629 fh = dfh
1630 ptext = self.revision(self.node(rev), _df=fh)
1630 ptext = self.revision(self.node(rev), _df=fh)
1631 delta = mdiff.textdiff(ptext, t)
1631 delta = mdiff.textdiff(ptext, t)
1632 header, data = self.compress(delta)
1632 header, data = self.compress(delta)
1633 deltalen = len(header) + len(data)
1633 deltalen = len(header) + len(data)
1634 chainbase = self.chainbase(rev)
1634 chainbase = self.chainbase(rev)
1635 dist = deltalen + offset - self.start(chainbase)
1635 dist = deltalen + offset - self.start(chainbase)
1636 if self._generaldelta:
1636 if self._generaldelta:
1637 base = rev
1637 base = rev
1638 else:
1638 else:
1639 base = chainbase
1639 base = chainbase
1640 chainlen, compresseddeltalen = self._chaininfo(rev)
1640 chainlen, compresseddeltalen = self._chaininfo(rev)
1641 chainlen += 1
1641 chainlen += 1
1642 compresseddeltalen += deltalen
1642 compresseddeltalen += deltalen
1643 return (dist, deltalen, (header, data), base,
1643 return (dist, deltalen, (header, data), base,
1644 chainbase, chainlen, compresseddeltalen)
1644 chainbase, chainlen, compresseddeltalen)
1645
1645
1646 curr = len(self)
1646 curr = len(self)
1647 prev = curr - 1
1647 prev = curr - 1
1648 offset = self.end(prev)
1648 offset = self.end(prev)
1649 delta = None
1649 delta = None
1650 p1r, p2r = self.rev(p1), self.rev(p2)
1650 p1r, p2r = self.rev(p1), self.rev(p2)
1651
1651
1652 # full versions are inserted when the needed deltas
1652 # full versions are inserted when the needed deltas
1653 # become comparable to the uncompressed text
1653 # become comparable to the uncompressed text
1654 if text is None:
1654 if text is None:
1655 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1655 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1656 cachedelta[1])
1656 cachedelta[1])
1657 else:
1657 else:
1658 textlen = len(text)
1658 textlen = len(text)
1659
1659
1660 # should we try to build a delta?
1660 # should we try to build a delta?
1661 if prev != nullrev and self.storedeltachains:
1661 if prev != nullrev and self.storedeltachains:
1662 tested = set()
1662 tested = set()
1663 # This condition is true most of the time when processing
1663 # This condition is true most of the time when processing
1664 # changegroup data into a generaldelta repo. The only time it
1664 # changegroup data into a generaldelta repo. The only time it
1665 # isn't true is if this is the first revision in a delta chain
1665 # isn't true is if this is the first revision in a delta chain
1666 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1666 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1667 if cachedelta and self._generaldelta and self._lazydeltabase:
1667 if cachedelta and self._generaldelta and self._lazydeltabase:
1668 # Assume what we received from the server is a good choice
1668 # Assume what we received from the server is a good choice
1669 # build delta will reuse the cache
1669 # build delta will reuse the cache
1670 candidatedelta = builddelta(cachedelta[0])
1670 candidatedelta = builddelta(cachedelta[0])
1671 tested.add(cachedelta[0])
1671 tested.add(cachedelta[0])
1672 if self._isgooddelta(candidatedelta, textlen):
1672 if self._isgooddelta(candidatedelta, textlen):
1673 delta = candidatedelta
1673 delta = candidatedelta
1674 if delta is None and self._generaldelta:
1674 if delta is None and self._generaldelta:
1675 # exclude already lazy tested base if any
1675 # exclude already lazy tested base if any
1676 parents = [p for p in (p1r, p2r)
1676 parents = [p for p in (p1r, p2r)
1677 if p != nullrev and p not in tested]
1677 if p != nullrev and p not in tested]
1678 if parents and not self._aggressivemergedeltas:
1678 if parents and not self._aggressivemergedeltas:
1679 # Pick whichever parent is closer to us (to minimize the
1679 # Pick whichever parent is closer to us (to minimize the
1680 # chance of having to build a fulltext).
1680 # chance of having to build a fulltext).
1681 parents = [max(parents)]
1681 parents = [max(parents)]
1682 tested.update(parents)
1682 tested.update(parents)
1683 pdeltas = []
1683 pdeltas = []
1684 for p in parents:
1684 for p in parents:
1685 pd = builddelta(p)
1685 pd = builddelta(p)
1686 if self._isgooddelta(pd, textlen):
1686 if self._isgooddelta(pd, textlen):
1687 pdeltas.append(pd)
1687 pdeltas.append(pd)
1688 if pdeltas:
1688 if pdeltas:
1689 delta = min(pdeltas, key=lambda x: x[1])
1689 delta = min(pdeltas, key=lambda x: x[1])
1690 if delta is None and prev not in tested:
1690 if delta is None and prev not in tested:
1691 # other approach failed try against prev to hopefully save us a
1691 # other approach failed try against prev to hopefully save us a
1692 # fulltext.
1692 # fulltext.
1693 candidatedelta = builddelta(prev)
1693 candidatedelta = builddelta(prev)
1694 if self._isgooddelta(candidatedelta, textlen):
1694 if self._isgooddelta(candidatedelta, textlen):
1695 delta = candidatedelta
1695 delta = candidatedelta
1696 if delta is not None:
1696 if delta is not None:
1697 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1697 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1698 else:
1698 else:
1699 text = buildtext()
1699 text = buildtext()
1700 data = self.compress(text)
1700 data = self.compress(text)
1701 l = len(data[1]) + len(data[0])
1701 l = len(data[1]) + len(data[0])
1702 base = chainbase = curr
1702 base = chainbase = curr
1703
1703
1704 e = (offset_type(offset, flags), l, textlen,
1704 e = (offset_type(offset, flags), l, textlen,
1705 base, link, p1r, p2r, node)
1705 base, link, p1r, p2r, node)
1706 self.index.insert(-1, e)
1706 self.index.insert(-1, e)
1707 self.nodemap[node] = curr
1707 self.nodemap[node] = curr
1708
1708
1709 entry = self._io.packentry(e, self.node, self.version, curr)
1709 entry = self._io.packentry(e, self.node, self.version, curr)
1710 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1710 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1711
1711
1712 if alwayscache and text is None:
1712 if alwayscache and text is None:
1713 text = buildtext()
1713 text = buildtext()
1714
1714
1715 if type(text) == str: # only accept immutable objects
1715 if type(text) == str: # only accept immutable objects
1716 self._cache = (node, curr, text)
1716 self._cache = (node, curr, text)
1717 self._chainbasecache[curr] = chainbase
1717 self._chainbasecache[curr] = chainbase
1718 return node
1718 return node
1719
1719
1720 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1720 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1721 # Files opened in a+ mode have inconsistent behavior on various
1721 # Files opened in a+ mode have inconsistent behavior on various
1722 # platforms. Windows requires that a file positioning call be made
1722 # platforms. Windows requires that a file positioning call be made
1723 # when the file handle transitions between reads and writes. See
1723 # when the file handle transitions between reads and writes. See
1724 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1724 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1725 # platforms, Python or the platform itself can be buggy. Some versions
1725 # platforms, Python or the platform itself can be buggy. Some versions
1726 # of Solaris have been observed to not append at the end of the file
1726 # of Solaris have been observed to not append at the end of the file
1727 # if the file was seeked to before the end. See issue4943 for more.
1727 # if the file was seeked to before the end. See issue4943 for more.
1728 #
1728 #
1729 # We work around this issue by inserting a seek() before writing.
1729 # We work around this issue by inserting a seek() before writing.
1730 # Note: This is likely not necessary on Python 3.
1730 # Note: This is likely not necessary on Python 3.
1731 ifh.seek(0, os.SEEK_END)
1731 ifh.seek(0, os.SEEK_END)
1732 if dfh:
1732 if dfh:
1733 dfh.seek(0, os.SEEK_END)
1733 dfh.seek(0, os.SEEK_END)
1734
1734
1735 curr = len(self) - 1
1735 curr = len(self) - 1
1736 if not self._inline:
1736 if not self._inline:
1737 transaction.add(self.datafile, offset)
1737 transaction.add(self.datafile, offset)
1738 transaction.add(self.indexfile, curr * len(entry))
1738 transaction.add(self.indexfile, curr * len(entry))
1739 if data[0]:
1739 if data[0]:
1740 dfh.write(data[0])
1740 dfh.write(data[0])
1741 dfh.write(data[1])
1741 dfh.write(data[1])
1742 ifh.write(entry)
1742 ifh.write(entry)
1743 else:
1743 else:
1744 offset += curr * self._io.size
1744 offset += curr * self._io.size
1745 transaction.add(self.indexfile, offset, curr)
1745 transaction.add(self.indexfile, offset, curr)
1746 ifh.write(entry)
1746 ifh.write(entry)
1747 ifh.write(data[0])
1747 ifh.write(data[0])
1748 ifh.write(data[1])
1748 ifh.write(data[1])
1749 self.checkinlinesize(transaction, ifh)
1749 self.checkinlinesize(transaction, ifh)
1750
1750
1751 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1751 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1752 """
1752 """
1753 add a delta group
1753 add a delta group
1754
1754
1755 given a set of deltas, add them to the revision log. the
1755 given a set of deltas, add them to the revision log. the
1756 first delta is against its parent, which should be in our
1756 first delta is against its parent, which should be in our
1757 log, the rest are against the previous delta.
1757 log, the rest are against the previous delta.
1758
1758
1759 If ``addrevisioncb`` is defined, it will be called with arguments of
1759 If ``addrevisioncb`` is defined, it will be called with arguments of
1760 this revlog and the node that was added.
1760 this revlog and the node that was added.
1761 """
1761 """
1762
1762
1763 # track the base of the current delta log
1763 # track the base of the current delta log
1764 content = []
1764 content = []
1765 node = None
1765 node = None
1766
1766
1767 r = len(self)
1767 r = len(self)
1768 end = 0
1768 end = 0
1769 if r:
1769 if r:
1770 end = self.end(r - 1)
1770 end = self.end(r - 1)
1771 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1771 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1772 isize = r * self._io.size
1772 isize = r * self._io.size
1773 if self._inline:
1773 if self._inline:
1774 transaction.add(self.indexfile, end + isize, r)
1774 transaction.add(self.indexfile, end + isize, r)
1775 dfh = None
1775 dfh = None
1776 else:
1776 else:
1777 transaction.add(self.indexfile, isize, r)
1777 transaction.add(self.indexfile, isize, r)
1778 transaction.add(self.datafile, end)
1778 transaction.add(self.datafile, end)
1779 dfh = self.opener(self.datafile, "a+")
1779 dfh = self.opener(self.datafile, "a+")
1780 def flush():
1780 def flush():
1781 if dfh:
1781 if dfh:
1782 dfh.flush()
1782 dfh.flush()
1783 ifh.flush()
1783 ifh.flush()
1784 try:
1784 try:
1785 # loop through our set of deltas
1785 # loop through our set of deltas
1786 chain = None
1786 chain = None
1787 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1787 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1788 node = chunkdata['node']
1788 node = chunkdata['node']
1789 p1 = chunkdata['p1']
1789 p1 = chunkdata['p1']
1790 p2 = chunkdata['p2']
1790 p2 = chunkdata['p2']
1791 cs = chunkdata['cs']
1791 cs = chunkdata['cs']
1792 deltabase = chunkdata['deltabase']
1792 deltabase = chunkdata['deltabase']
1793 delta = chunkdata['delta']
1793 delta = chunkdata['delta']
1794 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1794 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1795
1795
1796 content.append(node)
1796 content.append(node)
1797
1797
1798 link = linkmapper(cs)
1798 link = linkmapper(cs)
1799 if node in self.nodemap:
1799 if node in self.nodemap:
1800 # this can happen if two branches make the same change
1800 # this can happen if two branches make the same change
1801 chain = node
1801 chain = node
1802 continue
1802 continue
1803
1803
1804 for p in (p1, p2):
1804 for p in (p1, p2):
1805 if p not in self.nodemap:
1805 if p not in self.nodemap:
1806 raise LookupError(p, self.indexfile,
1806 raise LookupError(p, self.indexfile,
1807 _('unknown parent'))
1807 _('unknown parent'))
1808
1808
1809 if deltabase not in self.nodemap:
1809 if deltabase not in self.nodemap:
1810 raise LookupError(deltabase, self.indexfile,
1810 raise LookupError(deltabase, self.indexfile,
1811 _('unknown delta base'))
1811 _('unknown delta base'))
1812
1812
1813 baserev = self.rev(deltabase)
1813 baserev = self.rev(deltabase)
1814
1814
1815 if baserev != nullrev and self.iscensored(baserev):
1815 if baserev != nullrev and self.iscensored(baserev):
1816 # if base is censored, delta must be full replacement in a
1816 # if base is censored, delta must be full replacement in a
1817 # single patch operation
1817 # single patch operation
1818 hlen = struct.calcsize(">lll")
1818 hlen = struct.calcsize(">lll")
1819 oldlen = self.rawsize(baserev)
1819 oldlen = self.rawsize(baserev)
1820 newlen = len(delta) - hlen
1820 newlen = len(delta) - hlen
1821 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1821 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1822 raise error.CensoredBaseError(self.indexfile,
1822 raise error.CensoredBaseError(self.indexfile,
1823 self.node(baserev))
1823 self.node(baserev))
1824
1824
1825 if not flags and self._peek_iscensored(baserev, delta, flush):
1825 if not flags and self._peek_iscensored(baserev, delta, flush):
1826 flags |= REVIDX_ISCENSORED
1826 flags |= REVIDX_ISCENSORED
1827
1827
1828 # We assume consumers of addrevisioncb will want to retrieve
1828 # We assume consumers of addrevisioncb will want to retrieve
1829 # the added revision, which will require a call to
1829 # the added revision, which will require a call to
1830 # revision(). revision() will fast path if there is a cache
1830 # revision(). revision() will fast path if there is a cache
1831 # hit. So, we tell _addrevision() to always cache in this case.
1831 # hit. So, we tell _addrevision() to always cache in this case.
1832 # We're only using addgroup() in the context of changegroup
1832 # We're only using addgroup() in the context of changegroup
1833 # generation so the revision data can always be handled as raw
1833 # generation so the revision data can always be handled as raw
1834 # by the flagprocessor.
1834 # by the flagprocessor.
1835 chain = self._addrevision(node, None, transaction, link,
1835 chain = self._addrevision(node, None, transaction, link,
1836 p1, p2, flags, (baserev, delta),
1836 p1, p2, flags, (baserev, delta),
1837 ifh, dfh,
1837 ifh, dfh,
1838 alwayscache=bool(addrevisioncb),
1838 alwayscache=bool(addrevisioncb),
1839 raw=True)
1839 raw=True)
1840
1840
1841 if addrevisioncb:
1841 if addrevisioncb:
1842 addrevisioncb(self, chain)
1842 addrevisioncb(self, chain)
1843
1843
1844 if not dfh and not self._inline:
1844 if not dfh and not self._inline:
1845 # addrevision switched from inline to conventional
1845 # addrevision switched from inline to conventional
1846 # reopen the index
1846 # reopen the index
1847 ifh.close()
1847 ifh.close()
1848 dfh = self.opener(self.datafile, "a+")
1848 dfh = self.opener(self.datafile, "a+")
1849 ifh = self.opener(self.indexfile, "a+",
1849 ifh = self.opener(self.indexfile, "a+",
1850 checkambig=self._checkambig)
1850 checkambig=self._checkambig)
1851 finally:
1851 finally:
1852 if dfh:
1852 if dfh:
1853 dfh.close()
1853 dfh.close()
1854 ifh.close()
1854 ifh.close()
1855
1855
1856 return content
1856 return content
1857
1857
1858 def iscensored(self, rev):
1858 def iscensored(self, rev):
1859 """Check if a file revision is censored."""
1859 """Check if a file revision is censored."""
1860 return False
1860 return False
1861
1861
1862 def _peek_iscensored(self, baserev, delta, flush):
1862 def _peek_iscensored(self, baserev, delta, flush):
1863 """Quickly check if a delta produces a censored revision."""
1863 """Quickly check if a delta produces a censored revision."""
1864 return False
1864 return False
1865
1865
1866 def getstrippoint(self, minlink):
1866 def getstrippoint(self, minlink):
1867 """find the minimum rev that must be stripped to strip the linkrev
1867 """find the minimum rev that must be stripped to strip the linkrev
1868
1868
1869 Returns a tuple containing the minimum rev and a set of all revs that
1869 Returns a tuple containing the minimum rev and a set of all revs that
1870 have linkrevs that will be broken by this strip.
1870 have linkrevs that will be broken by this strip.
1871 """
1871 """
1872 brokenrevs = set()
1872 brokenrevs = set()
1873 strippoint = len(self)
1873 strippoint = len(self)
1874
1874
1875 heads = {}
1875 heads = {}
1876 futurelargelinkrevs = set()
1876 futurelargelinkrevs = set()
1877 for head in self.headrevs():
1877 for head in self.headrevs():
1878 headlinkrev = self.linkrev(head)
1878 headlinkrev = self.linkrev(head)
1879 heads[head] = headlinkrev
1879 heads[head] = headlinkrev
1880 if headlinkrev >= minlink:
1880 if headlinkrev >= minlink:
1881 futurelargelinkrevs.add(headlinkrev)
1881 futurelargelinkrevs.add(headlinkrev)
1882
1882
1883 # This algorithm involves walking down the rev graph, starting at the
1883 # This algorithm involves walking down the rev graph, starting at the
1884 # heads. Since the revs are topologically sorted according to linkrev,
1884 # heads. Since the revs are topologically sorted according to linkrev,
1885 # once all head linkrevs are below the minlink, we know there are
1885 # once all head linkrevs are below the minlink, we know there are
1886 # no more revs that could have a linkrev greater than minlink.
1886 # no more revs that could have a linkrev greater than minlink.
1887 # So we can stop walking.
1887 # So we can stop walking.
1888 while futurelargelinkrevs:
1888 while futurelargelinkrevs:
1889 strippoint -= 1
1889 strippoint -= 1
1890 linkrev = heads.pop(strippoint)
1890 linkrev = heads.pop(strippoint)
1891
1891
1892 if linkrev < minlink:
1892 if linkrev < minlink:
1893 brokenrevs.add(strippoint)
1893 brokenrevs.add(strippoint)
1894 else:
1894 else:
1895 futurelargelinkrevs.remove(linkrev)
1895 futurelargelinkrevs.remove(linkrev)
1896
1896
1897 for p in self.parentrevs(strippoint):
1897 for p in self.parentrevs(strippoint):
1898 if p != nullrev:
1898 if p != nullrev:
1899 plinkrev = self.linkrev(p)
1899 plinkrev = self.linkrev(p)
1900 heads[p] = plinkrev
1900 heads[p] = plinkrev
1901 if plinkrev >= minlink:
1901 if plinkrev >= minlink:
1902 futurelargelinkrevs.add(plinkrev)
1902 futurelargelinkrevs.add(plinkrev)
1903
1903
1904 return strippoint, brokenrevs
1904 return strippoint, brokenrevs
1905
1905
1906 def strip(self, minlink, transaction):
1906 def strip(self, minlink, transaction):
1907 """truncate the revlog on the first revision with a linkrev >= minlink
1907 """truncate the revlog on the first revision with a linkrev >= minlink
1908
1908
1909 This function is called when we're stripping revision minlink and
1909 This function is called when we're stripping revision minlink and
1910 its descendants from the repository.
1910 its descendants from the repository.
1911
1911
1912 We have to remove all revisions with linkrev >= minlink, because
1912 We have to remove all revisions with linkrev >= minlink, because
1913 the equivalent changelog revisions will be renumbered after the
1913 the equivalent changelog revisions will be renumbered after the
1914 strip.
1914 strip.
1915
1915
1916 So we truncate the revlog on the first of these revisions, and
1916 So we truncate the revlog on the first of these revisions, and
1917 trust that the caller has saved the revisions that shouldn't be
1917 trust that the caller has saved the revisions that shouldn't be
1918 removed and that it'll re-add them after this truncation.
1918 removed and that it'll re-add them after this truncation.
1919 """
1919 """
1920 if len(self) == 0:
1920 if len(self) == 0:
1921 return
1921 return
1922
1922
1923 rev, _ = self.getstrippoint(minlink)
1923 rev, _ = self.getstrippoint(minlink)
1924 if rev == len(self):
1924 if rev == len(self):
1925 return
1925 return
1926
1926
1927 # first truncate the files on disk
1927 # first truncate the files on disk
1928 end = self.start(rev)
1928 end = self.start(rev)
1929 if not self._inline:
1929 if not self._inline:
1930 transaction.add(self.datafile, end)
1930 transaction.add(self.datafile, end)
1931 end = rev * self._io.size
1931 end = rev * self._io.size
1932 else:
1932 else:
1933 end += rev * self._io.size
1933 end += rev * self._io.size
1934
1934
1935 transaction.add(self.indexfile, end)
1935 transaction.add(self.indexfile, end)
1936
1936
1937 # then reset internal state in memory to forget those revisions
1937 # then reset internal state in memory to forget those revisions
1938 self._cache = None
1938 self._cache = None
1939 self._chaininfocache = {}
1939 self._chaininfocache = {}
1940 self._chunkclear()
1940 self._chunkclear()
1941 for x in xrange(rev, len(self)):
1941 for x in xrange(rev, len(self)):
1942 del self.nodemap[self.node(x)]
1942 del self.nodemap[self.node(x)]
1943
1943
1944 del self.index[rev:-1]
1944 del self.index[rev:-1]
1945
1945
1946 def checksize(self):
1946 def checksize(self):
1947 expected = 0
1947 expected = 0
1948 if len(self):
1948 if len(self):
1949 expected = max(0, self.end(len(self) - 1))
1949 expected = max(0, self.end(len(self) - 1))
1950
1950
1951 try:
1951 try:
1952 f = self.opener(self.datafile)
1952 f = self.opener(self.datafile)
1953 f.seek(0, 2)
1953 f.seek(0, 2)
1954 actual = f.tell()
1954 actual = f.tell()
1955 f.close()
1955 f.close()
1956 dd = actual - expected
1956 dd = actual - expected
1957 except IOError as inst:
1957 except IOError as inst:
1958 if inst.errno != errno.ENOENT:
1958 if inst.errno != errno.ENOENT:
1959 raise
1959 raise
1960 dd = 0
1960 dd = 0
1961
1961
1962 try:
1962 try:
1963 f = self.opener(self.indexfile)
1963 f = self.opener(self.indexfile)
1964 f.seek(0, 2)
1964 f.seek(0, 2)
1965 actual = f.tell()
1965 actual = f.tell()
1966 f.close()
1966 f.close()
1967 s = self._io.size
1967 s = self._io.size
1968 i = max(0, actual // s)
1968 i = max(0, actual // s)
1969 di = actual - (i * s)
1969 di = actual - (i * s)
1970 if self._inline:
1970 if self._inline:
1971 databytes = 0
1971 databytes = 0
1972 for r in self:
1972 for r in self:
1973 databytes += max(0, self.length(r))
1973 databytes += max(0, self.length(r))
1974 dd = 0
1974 dd = 0
1975 di = actual - len(self) * s - databytes
1975 di = actual - len(self) * s - databytes
1976 except IOError as inst:
1976 except IOError as inst:
1977 if inst.errno != errno.ENOENT:
1977 if inst.errno != errno.ENOENT:
1978 raise
1978 raise
1979 di = 0
1979 di = 0
1980
1980
1981 return (dd, di)
1981 return (dd, di)
1982
1982
1983 def files(self):
1983 def files(self):
1984 res = [self.indexfile]
1984 res = [self.indexfile]
1985 if not self._inline:
1985 if not self._inline:
1986 res.append(self.datafile)
1986 res.append(self.datafile)
1987 return res
1987 return res
1988
1988
1989 DELTAREUSEALWAYS = 'always'
1989 DELTAREUSEALWAYS = 'always'
1990 DELTAREUSESAMEREVS = 'samerevs'
1990 DELTAREUSESAMEREVS = 'samerevs'
1991 DELTAREUSENEVER = 'never'
1991 DELTAREUSENEVER = 'never'
1992
1992
1993 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
1993 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
1994
1994
1995 def clone(self, tr, destrevlog, addrevisioncb=None,
1995 def clone(self, tr, destrevlog, addrevisioncb=None,
1996 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
1996 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
1997 """Copy this revlog to another, possibly with format changes.
1997 """Copy this revlog to another, possibly with format changes.
1998
1998
1999 The destination revlog will contain the same revisions and nodes.
1999 The destination revlog will contain the same revisions and nodes.
2000 However, it may not be bit-for-bit identical due to e.g. delta encoding
2000 However, it may not be bit-for-bit identical due to e.g. delta encoding
2001 differences.
2001 differences.
2002
2002
2003 The ``deltareuse`` argument control how deltas from the existing revlog
2003 The ``deltareuse`` argument control how deltas from the existing revlog
2004 are preserved in the destination revlog. The argument can have the
2004 are preserved in the destination revlog. The argument can have the
2005 following values:
2005 following values:
2006
2006
2007 DELTAREUSEALWAYS
2007 DELTAREUSEALWAYS
2008 Deltas will always be reused (if possible), even if the destination
2008 Deltas will always be reused (if possible), even if the destination
2009 revlog would not select the same revisions for the delta. This is the
2009 revlog would not select the same revisions for the delta. This is the
2010 fastest mode of operation.
2010 fastest mode of operation.
2011 DELTAREUSESAMEREVS
2011 DELTAREUSESAMEREVS
2012 Deltas will be reused if the destination revlog would pick the same
2012 Deltas will be reused if the destination revlog would pick the same
2013 revisions for the delta. This mode strikes a balance between speed
2013 revisions for the delta. This mode strikes a balance between speed
2014 and optimization.
2014 and optimization.
2015 DELTAREUSENEVER
2015 DELTAREUSENEVER
2016 Deltas will never be reused. This is the slowest mode of execution.
2016 Deltas will never be reused. This is the slowest mode of execution.
2017 This mode can be used to recompute deltas (e.g. if the diff/delta
2017 This mode can be used to recompute deltas (e.g. if the diff/delta
2018 algorithm changes).
2018 algorithm changes).
2019
2019
2020 Delta computation can be slow, so the choice of delta reuse policy can
2020 Delta computation can be slow, so the choice of delta reuse policy can
2021 significantly affect run time.
2021 significantly affect run time.
2022
2022
2023 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2023 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2024 two extremes. Deltas will be reused if they are appropriate. But if the
2024 two extremes. Deltas will be reused if they are appropriate. But if the
2025 delta could choose a better revision, it will do so. This means if you
2025 delta could choose a better revision, it will do so. This means if you
2026 are converting a non-generaldelta revlog to a generaldelta revlog,
2026 are converting a non-generaldelta revlog to a generaldelta revlog,
2027 deltas will be recomputed if the delta's parent isn't a parent of the
2027 deltas will be recomputed if the delta's parent isn't a parent of the
2028 revision.
2028 revision.
2029
2029
2030 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2030 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2031 controls whether to compute deltas against both parents for merges.
2031 controls whether to compute deltas against both parents for merges.
2032 By default, the current default is used.
2032 By default, the current default is used.
2033 """
2033 """
2034 if deltareuse not in self.DELTAREUSEALL:
2034 if deltareuse not in self.DELTAREUSEALL:
2035 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2035 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2036
2036
2037 if len(destrevlog):
2037 if len(destrevlog):
2038 raise ValueError(_('destination revlog is not empty'))
2038 raise ValueError(_('destination revlog is not empty'))
2039
2039
2040 if getattr(self, 'filteredrevs', None):
2040 if getattr(self, 'filteredrevs', None):
2041 raise ValueError(_('source revlog has filtered revisions'))
2041 raise ValueError(_('source revlog has filtered revisions'))
2042 if getattr(destrevlog, 'filteredrevs', None):
2042 if getattr(destrevlog, 'filteredrevs', None):
2043 raise ValueError(_('destination revlog has filtered revisions'))
2043 raise ValueError(_('destination revlog has filtered revisions'))
2044
2044
2045 # lazydeltabase controls whether to reuse a cached delta, if possible.
2045 # lazydeltabase controls whether to reuse a cached delta, if possible.
2046 oldlazydeltabase = destrevlog._lazydeltabase
2046 oldlazydeltabase = destrevlog._lazydeltabase
2047 oldamd = destrevlog._aggressivemergedeltas
2047 oldamd = destrevlog._aggressivemergedeltas
2048
2048
2049 try:
2049 try:
2050 if deltareuse == self.DELTAREUSEALWAYS:
2050 if deltareuse == self.DELTAREUSEALWAYS:
2051 destrevlog._lazydeltabase = True
2051 destrevlog._lazydeltabase = True
2052 elif deltareuse == self.DELTAREUSESAMEREVS:
2052 elif deltareuse == self.DELTAREUSESAMEREVS:
2053 destrevlog._lazydeltabase = False
2053 destrevlog._lazydeltabase = False
2054
2054
2055 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2055 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2056
2056
2057 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2057 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2058 self.DELTAREUSESAMEREVS)
2058 self.DELTAREUSESAMEREVS)
2059
2059
2060 index = self.index
2060 index = self.index
2061 for rev in self:
2061 for rev in self:
2062 entry = index[rev]
2062 entry = index[rev]
2063
2063
2064 # Some classes override linkrev to take filtered revs into
2064 # Some classes override linkrev to take filtered revs into
2065 # account. Use raw entry from index.
2065 # account. Use raw entry from index.
2066 flags = entry[0] & 0xffff
2066 flags = entry[0] & 0xffff
2067 linkrev = entry[4]
2067 linkrev = entry[4]
2068 p1 = index[entry[5]][7]
2068 p1 = index[entry[5]][7]
2069 p2 = index[entry[6]][7]
2069 p2 = index[entry[6]][7]
2070 node = entry[7]
2070 node = entry[7]
2071
2071
2072 # (Possibly) reuse the delta from the revlog if allowed and
2072 # (Possibly) reuse the delta from the revlog if allowed and
2073 # the revlog chunk is a delta.
2073 # the revlog chunk is a delta.
2074 cachedelta = None
2074 cachedelta = None
2075 text = None
2075 text = None
2076 if populatecachedelta:
2076 if populatecachedelta:
2077 dp = self.deltaparent(rev)
2077 dp = self.deltaparent(rev)
2078 if dp != nullrev:
2078 if dp != nullrev:
2079 cachedelta = (dp, str(self._chunk(rev)))
2079 cachedelta = (dp, str(self._chunk(rev)))
2080
2080
2081 if not cachedelta:
2081 if not cachedelta:
2082 text = self.revision(rev)
2082 text = self.revision(rev)
2083
2083
2084 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2084 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2085 checkambig=False)
2085 checkambig=False)
2086 dfh = None
2086 dfh = None
2087 if not destrevlog._inline:
2087 if not destrevlog._inline:
2088 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2088 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2089 try:
2089 try:
2090 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
2090 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
2091 flags, cachedelta, ifh, dfh)
2091 flags, cachedelta, ifh, dfh)
2092 finally:
2092 finally:
2093 if dfh:
2093 if dfh:
2094 dfh.close()
2094 dfh.close()
2095 ifh.close()
2095 ifh.close()
2096
2096
2097 if addrevisioncb:
2097 if addrevisioncb:
2098 addrevisioncb(self, rev, node)
2098 addrevisioncb(self, rev, node)
2099 finally:
2099 finally:
2100 destrevlog._lazydeltabase = oldlazydeltabase
2100 destrevlog._lazydeltabase = oldlazydeltabase
2101 destrevlog._aggressivemergedeltas = oldamd
2101 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now