##// END OF EJS Templates
revlog: drop an unused variable assignment...
Matt Harbison -
r44431:bdb35716 default
parent child Browse files
Show More
@@ -1,2981 +1,2979
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 def _verify_revision(rl, skipflags, state, node):
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
153 point for extensions to influence the operation."""
154 if skipflags:
154 if skipflags:
155 state[b'skipread'].add(node)
155 state[b'skipread'].add(node)
156 else:
156 else:
157 # Side-effect: read content and verify hash.
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
158 rl.revision(node)
159
159
160
160
161 @attr.s(slots=True, frozen=True)
161 @attr.s(slots=True, frozen=True)
162 class _revisioninfo(object):
162 class _revisioninfo(object):
163 """Information about a revision that allows building its fulltext
163 """Information about a revision that allows building its fulltext
164 node: expected hash of the revision
164 node: expected hash of the revision
165 p1, p2: parent revs of the revision
165 p1, p2: parent revs of the revision
166 btext: built text cache consisting of a one-element list
166 btext: built text cache consisting of a one-element list
167 cachedelta: (baserev, uncompressed_delta) or None
167 cachedelta: (baserev, uncompressed_delta) or None
168 flags: flags associated to the revision storage
168 flags: flags associated to the revision storage
169
169
170 One of btext[0] or cachedelta must be set.
170 One of btext[0] or cachedelta must be set.
171 """
171 """
172
172
173 node = attr.ib()
173 node = attr.ib()
174 p1 = attr.ib()
174 p1 = attr.ib()
175 p2 = attr.ib()
175 p2 = attr.ib()
176 btext = attr.ib()
176 btext = attr.ib()
177 textlen = attr.ib()
177 textlen = attr.ib()
178 cachedelta = attr.ib()
178 cachedelta = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180
180
181
181
182 @interfaceutil.implementer(repository.irevisiondelta)
182 @interfaceutil.implementer(repository.irevisiondelta)
183 @attr.s(slots=True)
183 @attr.s(slots=True)
184 class revlogrevisiondelta(object):
184 class revlogrevisiondelta(object):
185 node = attr.ib()
185 node = attr.ib()
186 p1node = attr.ib()
186 p1node = attr.ib()
187 p2node = attr.ib()
187 p2node = attr.ib()
188 basenode = attr.ib()
188 basenode = attr.ib()
189 flags = attr.ib()
189 flags = attr.ib()
190 baserevisionsize = attr.ib()
190 baserevisionsize = attr.ib()
191 revision = attr.ib()
191 revision = attr.ib()
192 delta = attr.ib()
192 delta = attr.ib()
193 linknode = attr.ib(default=None)
193 linknode = attr.ib(default=None)
194
194
195
195
196 @interfaceutil.implementer(repository.iverifyproblem)
196 @interfaceutil.implementer(repository.iverifyproblem)
197 @attr.s(frozen=True)
197 @attr.s(frozen=True)
198 class revlogproblem(object):
198 class revlogproblem(object):
199 warning = attr.ib(default=None)
199 warning = attr.ib(default=None)
200 error = attr.ib(default=None)
200 error = attr.ib(default=None)
201 node = attr.ib(default=None)
201 node = attr.ib(default=None)
202
202
203
203
204 # index v0:
204 # index v0:
205 # 4 bytes: offset
205 # 4 bytes: offset
206 # 4 bytes: compressed length
206 # 4 bytes: compressed length
207 # 4 bytes: base rev
207 # 4 bytes: base rev
208 # 4 bytes: link rev
208 # 4 bytes: link rev
209 # 20 bytes: parent 1 nodeid
209 # 20 bytes: parent 1 nodeid
210 # 20 bytes: parent 2 nodeid
210 # 20 bytes: parent 2 nodeid
211 # 20 bytes: nodeid
211 # 20 bytes: nodeid
212 indexformatv0 = struct.Struct(b">4l20s20s20s")
212 indexformatv0 = struct.Struct(b">4l20s20s20s")
213 indexformatv0_pack = indexformatv0.pack
213 indexformatv0_pack = indexformatv0.pack
214 indexformatv0_unpack = indexformatv0.unpack
214 indexformatv0_unpack = indexformatv0.unpack
215
215
216
216
217 class revlogoldindex(list):
217 class revlogoldindex(list):
218 @property
218 @property
219 def nodemap(self):
219 def nodemap(self):
220 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
220 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
221 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
221 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
222 return self._nodemap
222 return self._nodemap
223
223
224 @util.propertycache
224 @util.propertycache
225 def _nodemap(self):
225 def _nodemap(self):
226 nodemap = revlogutils.NodeMap({nullid: nullrev})
226 nodemap = revlogutils.NodeMap({nullid: nullrev})
227 for r in range(0, len(self)):
227 for r in range(0, len(self)):
228 n = self[r][7]
228 n = self[r][7]
229 nodemap[n] = r
229 nodemap[n] = r
230 return nodemap
230 return nodemap
231
231
232 def has_node(self, node):
232 def has_node(self, node):
233 """return True if the node exist in the index"""
233 """return True if the node exist in the index"""
234 return node in self._nodemap
234 return node in self._nodemap
235
235
236 def rev(self, node):
236 def rev(self, node):
237 """return a revision for a node
237 """return a revision for a node
238
238
239 If the node is unknown, raise a RevlogError"""
239 If the node is unknown, raise a RevlogError"""
240 return self._nodemap[node]
240 return self._nodemap[node]
241
241
242 def get_rev(self, node):
242 def get_rev(self, node):
243 """return a revision for a node
243 """return a revision for a node
244
244
245 If the node is unknown, return None"""
245 If the node is unknown, return None"""
246 return self._nodemap.get(node)
246 return self._nodemap.get(node)
247
247
248 def append(self, tup):
248 def append(self, tup):
249 self._nodemap[tup[7]] = len(self)
249 self._nodemap[tup[7]] = len(self)
250 super(revlogoldindex, self).append(tup)
250 super(revlogoldindex, self).append(tup)
251
251
252 def __delitem__(self, i):
252 def __delitem__(self, i):
253 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
253 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
254 raise ValueError(b"deleting slices only supports a:-1 with step 1")
254 raise ValueError(b"deleting slices only supports a:-1 with step 1")
255 for r in pycompat.xrange(i.start, len(self)):
255 for r in pycompat.xrange(i.start, len(self)):
256 del self._nodemap[self[r][7]]
256 del self._nodemap[self[r][7]]
257 super(revlogoldindex, self).__delitem__(i)
257 super(revlogoldindex, self).__delitem__(i)
258
258
259 def clearcaches(self):
259 def clearcaches(self):
260 self.__dict__.pop('_nodemap', None)
260 self.__dict__.pop('_nodemap', None)
261
261
262 def __getitem__(self, i):
262 def __getitem__(self, i):
263 if i == -1:
263 if i == -1:
264 return (0, 0, 0, -1, -1, -1, -1, nullid)
264 return (0, 0, 0, -1, -1, -1, -1, nullid)
265 return list.__getitem__(self, i)
265 return list.__getitem__(self, i)
266
266
267
267
268 class revlogoldio(object):
268 class revlogoldio(object):
269 def __init__(self):
269 def __init__(self):
270 self.size = indexformatv0.size
270 self.size = indexformatv0.size
271
271
272 def parseindex(self, data, inline):
272 def parseindex(self, data, inline):
273 s = self.size
273 s = self.size
274 index = []
274 index = []
275 nodemap = revlogutils.NodeMap({nullid: nullrev})
275 nodemap = revlogutils.NodeMap({nullid: nullrev})
276 n = off = 0
276 n = off = 0
277 l = len(data)
277 l = len(data)
278 while off + s <= l:
278 while off + s <= l:
279 cur = data[off : off + s]
279 cur = data[off : off + s]
280 off += s
280 off += s
281 e = indexformatv0_unpack(cur)
281 e = indexformatv0_unpack(cur)
282 # transform to revlogv1 format
282 # transform to revlogv1 format
283 e2 = (
283 e2 = (
284 offset_type(e[0], 0),
284 offset_type(e[0], 0),
285 e[1],
285 e[1],
286 -1,
286 -1,
287 e[2],
287 e[2],
288 e[3],
288 e[3],
289 nodemap.get(e[4], nullrev),
289 nodemap.get(e[4], nullrev),
290 nodemap.get(e[5], nullrev),
290 nodemap.get(e[5], nullrev),
291 e[6],
291 e[6],
292 )
292 )
293 index.append(e2)
293 index.append(e2)
294 nodemap[e[6]] = n
294 nodemap[e[6]] = n
295 n += 1
295 n += 1
296
296
297 index = revlogoldindex(index)
297 index = revlogoldindex(index)
298 return index, None
298 return index, None
299
299
300 def packentry(self, entry, node, version, rev):
300 def packentry(self, entry, node, version, rev):
301 if gettype(entry[0]):
301 if gettype(entry[0]):
302 raise error.RevlogError(
302 raise error.RevlogError(
303 _(b'index entry flags need revlog version 1')
303 _(b'index entry flags need revlog version 1')
304 )
304 )
305 e2 = (
305 e2 = (
306 getoffset(entry[0]),
306 getoffset(entry[0]),
307 entry[1],
307 entry[1],
308 entry[3],
308 entry[3],
309 entry[4],
309 entry[4],
310 node(entry[5]),
310 node(entry[5]),
311 node(entry[6]),
311 node(entry[6]),
312 entry[7],
312 entry[7],
313 )
313 )
314 return indexformatv0_pack(*e2)
314 return indexformatv0_pack(*e2)
315
315
316
316
317 # index ng:
317 # index ng:
318 # 6 bytes: offset
318 # 6 bytes: offset
319 # 2 bytes: flags
319 # 2 bytes: flags
320 # 4 bytes: compressed length
320 # 4 bytes: compressed length
321 # 4 bytes: uncompressed length
321 # 4 bytes: uncompressed length
322 # 4 bytes: base rev
322 # 4 bytes: base rev
323 # 4 bytes: link rev
323 # 4 bytes: link rev
324 # 4 bytes: parent 1 rev
324 # 4 bytes: parent 1 rev
325 # 4 bytes: parent 2 rev
325 # 4 bytes: parent 2 rev
326 # 32 bytes: nodeid
326 # 32 bytes: nodeid
327 indexformatng = struct.Struct(b">Qiiiiii20s12x")
327 indexformatng = struct.Struct(b">Qiiiiii20s12x")
328 indexformatng_pack = indexformatng.pack
328 indexformatng_pack = indexformatng.pack
329 versionformat = struct.Struct(b">I")
329 versionformat = struct.Struct(b">I")
330 versionformat_pack = versionformat.pack
330 versionformat_pack = versionformat.pack
331 versionformat_unpack = versionformat.unpack
331 versionformat_unpack = versionformat.unpack
332
332
333 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
333 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # signed integer)
334 # signed integer)
335 _maxentrysize = 0x7FFFFFFF
335 _maxentrysize = 0x7FFFFFFF
336
336
337
337
338 class revlogio(object):
338 class revlogio(object):
339 def __init__(self):
339 def __init__(self):
340 self.size = indexformatng.size
340 self.size = indexformatng.size
341
341
342 def parseindex(self, data, inline):
342 def parseindex(self, data, inline):
343 # call the C implementation to parse the index data
343 # call the C implementation to parse the index data
344 index, cache = parsers.parse_index2(data, inline)
344 index, cache = parsers.parse_index2(data, inline)
345 return index, cache
345 return index, cache
346
346
347 def packentry(self, entry, node, version, rev):
347 def packentry(self, entry, node, version, rev):
348 p = indexformatng_pack(*entry)
348 p = indexformatng_pack(*entry)
349 if rev == 0:
349 if rev == 0:
350 p = versionformat_pack(version) + p[4:]
350 p = versionformat_pack(version) + p[4:]
351 return p
351 return p
352
352
353
353
354 class revlog(object):
354 class revlog(object):
355 """
355 """
356 the underlying revision storage object
356 the underlying revision storage object
357
357
358 A revlog consists of two parts, an index and the revision data.
358 A revlog consists of two parts, an index and the revision data.
359
359
360 The index is a file with a fixed record size containing
360 The index is a file with a fixed record size containing
361 information on each revision, including its nodeid (hash), the
361 information on each revision, including its nodeid (hash), the
362 nodeids of its parents, the position and offset of its data within
362 nodeids of its parents, the position and offset of its data within
363 the data file, and the revision it's based on. Finally, each entry
363 the data file, and the revision it's based on. Finally, each entry
364 contains a linkrev entry that can serve as a pointer to external
364 contains a linkrev entry that can serve as a pointer to external
365 data.
365 data.
366
366
367 The revision data itself is a linear collection of data chunks.
367 The revision data itself is a linear collection of data chunks.
368 Each chunk represents a revision and is usually represented as a
368 Each chunk represents a revision and is usually represented as a
369 delta against the previous chunk. To bound lookup time, runs of
369 delta against the previous chunk. To bound lookup time, runs of
370 deltas are limited to about 2 times the length of the original
370 deltas are limited to about 2 times the length of the original
371 version data. This makes retrieval of a version proportional to
371 version data. This makes retrieval of a version proportional to
372 its size, or O(1) relative to the number of revisions.
372 its size, or O(1) relative to the number of revisions.
373
373
374 Both pieces of the revlog are written to in an append-only
374 Both pieces of the revlog are written to in an append-only
375 fashion, which means we never need to rewrite a file to insert or
375 fashion, which means we never need to rewrite a file to insert or
376 remove data, and can use some simple techniques to avoid the need
376 remove data, and can use some simple techniques to avoid the need
377 for locking while reading.
377 for locking while reading.
378
378
379 If checkambig, indexfile is opened with checkambig=True at
379 If checkambig, indexfile is opened with checkambig=True at
380 writing, to avoid file stat ambiguity.
380 writing, to avoid file stat ambiguity.
381
381
382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
383 index will be mmapped rather than read if it is larger than the
383 index will be mmapped rather than read if it is larger than the
384 configured threshold.
384 configured threshold.
385
385
386 If censorable is True, the revlog can have censored revisions.
386 If censorable is True, the revlog can have censored revisions.
387
387
388 If `upperboundcomp` is not None, this is the expected maximal gain from
388 If `upperboundcomp` is not None, this is the expected maximal gain from
389 compression for the data content.
389 compression for the data content.
390 """
390 """
391
391
392 _flagserrorclass = error.RevlogError
392 _flagserrorclass = error.RevlogError
393
393
394 def __init__(
394 def __init__(
395 self,
395 self,
396 opener,
396 opener,
397 indexfile,
397 indexfile,
398 datafile=None,
398 datafile=None,
399 checkambig=False,
399 checkambig=False,
400 mmaplargeindex=False,
400 mmaplargeindex=False,
401 censorable=False,
401 censorable=False,
402 upperboundcomp=None,
402 upperboundcomp=None,
403 ):
403 ):
404 """
404 """
405 create a revlog object
405 create a revlog object
406
406
407 opener is a function that abstracts the file opening operation
407 opener is a function that abstracts the file opening operation
408 and can be used to implement COW semantics or the like.
408 and can be used to implement COW semantics or the like.
409
409
410 """
410 """
411 self.upperboundcomp = upperboundcomp
411 self.upperboundcomp = upperboundcomp
412 self.indexfile = indexfile
412 self.indexfile = indexfile
413 self.datafile = datafile or (indexfile[:-2] + b".d")
413 self.datafile = datafile or (indexfile[:-2] + b".d")
414 self.opener = opener
414 self.opener = opener
415 # When True, indexfile is opened with checkambig=True at writing, to
415 # When True, indexfile is opened with checkambig=True at writing, to
416 # avoid file stat ambiguity.
416 # avoid file stat ambiguity.
417 self._checkambig = checkambig
417 self._checkambig = checkambig
418 self._mmaplargeindex = mmaplargeindex
418 self._mmaplargeindex = mmaplargeindex
419 self._censorable = censorable
419 self._censorable = censorable
420 # 3-tuple of (node, rev, text) for a raw revision.
420 # 3-tuple of (node, rev, text) for a raw revision.
421 self._revisioncache = None
421 self._revisioncache = None
422 # Maps rev to chain base rev.
422 # Maps rev to chain base rev.
423 self._chainbasecache = util.lrucachedict(100)
423 self._chainbasecache = util.lrucachedict(100)
424 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
424 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
425 self._chunkcache = (0, b'')
425 self._chunkcache = (0, b'')
426 # How much data to read and cache into the raw revlog data cache.
426 # How much data to read and cache into the raw revlog data cache.
427 self._chunkcachesize = 65536
427 self._chunkcachesize = 65536
428 self._maxchainlen = None
428 self._maxchainlen = None
429 self._deltabothparents = True
429 self._deltabothparents = True
430 self.index = None
430 self.index = None
431 # Mapping of partial identifiers to full nodes.
431 # Mapping of partial identifiers to full nodes.
432 self._pcache = {}
432 self._pcache = {}
433 # Mapping of revision integer to full node.
433 # Mapping of revision integer to full node.
434 self._compengine = b'zlib'
434 self._compengine = b'zlib'
435 self._compengineopts = {}
435 self._compengineopts = {}
436 self._maxdeltachainspan = -1
436 self._maxdeltachainspan = -1
437 self._withsparseread = False
437 self._withsparseread = False
438 self._sparserevlog = False
438 self._sparserevlog = False
439 self._srdensitythreshold = 0.50
439 self._srdensitythreshold = 0.50
440 self._srmingapsize = 262144
440 self._srmingapsize = 262144
441
441
442 # Make copy of flag processors so each revlog instance can support
442 # Make copy of flag processors so each revlog instance can support
443 # custom flags.
443 # custom flags.
444 self._flagprocessors = dict(flagutil.flagprocessors)
444 self._flagprocessors = dict(flagutil.flagprocessors)
445
445
446 # 2-tuple of file handles being used for active writing.
446 # 2-tuple of file handles being used for active writing.
447 self._writinghandles = None
447 self._writinghandles = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 def _loadindex(self):
451 def _loadindex(self):
452 mmapindexthreshold = None
452 mmapindexthreshold = None
453 opts = self.opener.options
453 opts = self.opener.options
454
454
455 if b'revlogv2' in opts:
455 if b'revlogv2' in opts:
456 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
456 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
457 elif b'revlogv1' in opts:
457 elif b'revlogv1' in opts:
458 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
458 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
459 if b'generaldelta' in opts:
459 if b'generaldelta' in opts:
460 newversionflags |= FLAG_GENERALDELTA
460 newversionflags |= FLAG_GENERALDELTA
461 elif b'revlogv0' in self.opener.options:
461 elif b'revlogv0' in self.opener.options:
462 newversionflags = REVLOGV0
462 newversionflags = REVLOGV0
463 else:
463 else:
464 newversionflags = REVLOG_DEFAULT_VERSION
464 newversionflags = REVLOG_DEFAULT_VERSION
465
465
466 if b'chunkcachesize' in opts:
466 if b'chunkcachesize' in opts:
467 self._chunkcachesize = opts[b'chunkcachesize']
467 self._chunkcachesize = opts[b'chunkcachesize']
468 if b'maxchainlen' in opts:
468 if b'maxchainlen' in opts:
469 self._maxchainlen = opts[b'maxchainlen']
469 self._maxchainlen = opts[b'maxchainlen']
470 if b'deltabothparents' in opts:
470 if b'deltabothparents' in opts:
471 self._deltabothparents = opts[b'deltabothparents']
471 self._deltabothparents = opts[b'deltabothparents']
472 self._lazydelta = bool(opts.get(b'lazydelta', True))
472 self._lazydelta = bool(opts.get(b'lazydelta', True))
473 self._lazydeltabase = False
473 self._lazydeltabase = False
474 if self._lazydelta:
474 if self._lazydelta:
475 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
475 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
476 if b'compengine' in opts:
476 if b'compengine' in opts:
477 self._compengine = opts[b'compengine']
477 self._compengine = opts[b'compengine']
478 if b'zlib.level' in opts:
478 if b'zlib.level' in opts:
479 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
479 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
480 if b'zstd.level' in opts:
480 if b'zstd.level' in opts:
481 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
481 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
482 if b'maxdeltachainspan' in opts:
482 if b'maxdeltachainspan' in opts:
483 self._maxdeltachainspan = opts[b'maxdeltachainspan']
483 self._maxdeltachainspan = opts[b'maxdeltachainspan']
484 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
484 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
485 mmapindexthreshold = opts[b'mmapindexthreshold']
485 mmapindexthreshold = opts[b'mmapindexthreshold']
486 self.hassidedata = bool(opts.get(b'side-data', False))
486 self.hassidedata = bool(opts.get(b'side-data', False))
487 if self.hassidedata:
487 if self.hassidedata:
488 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
488 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
489 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
489 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
490 withsparseread = bool(opts.get(b'with-sparse-read', False))
490 withsparseread = bool(opts.get(b'with-sparse-read', False))
491 # sparse-revlog forces sparse-read
491 # sparse-revlog forces sparse-read
492 self._withsparseread = self._sparserevlog or withsparseread
492 self._withsparseread = self._sparserevlog or withsparseread
493 if b'sparse-read-density-threshold' in opts:
493 if b'sparse-read-density-threshold' in opts:
494 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
494 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
495 if b'sparse-read-min-gap-size' in opts:
495 if b'sparse-read-min-gap-size' in opts:
496 self._srmingapsize = opts[b'sparse-read-min-gap-size']
496 self._srmingapsize = opts[b'sparse-read-min-gap-size']
497 if opts.get(b'enableellipsis'):
497 if opts.get(b'enableellipsis'):
498 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
498 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
499
499
500 # revlog v0 doesn't have flag processors
500 # revlog v0 doesn't have flag processors
501 for flag, processor in pycompat.iteritems(
501 for flag, processor in pycompat.iteritems(
502 opts.get(b'flagprocessors', {})
502 opts.get(b'flagprocessors', {})
503 ):
503 ):
504 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
504 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
505
505
506 if self._chunkcachesize <= 0:
506 if self._chunkcachesize <= 0:
507 raise error.RevlogError(
507 raise error.RevlogError(
508 _(b'revlog chunk cache size %r is not greater than 0')
508 _(b'revlog chunk cache size %r is not greater than 0')
509 % self._chunkcachesize
509 % self._chunkcachesize
510 )
510 )
511 elif self._chunkcachesize & (self._chunkcachesize - 1):
511 elif self._chunkcachesize & (self._chunkcachesize - 1):
512 raise error.RevlogError(
512 raise error.RevlogError(
513 _(b'revlog chunk cache size %r is not a power of 2')
513 _(b'revlog chunk cache size %r is not a power of 2')
514 % self._chunkcachesize
514 % self._chunkcachesize
515 )
515 )
516
516
517 indexdata = b''
517 indexdata = b''
518 self._initempty = True
518 self._initempty = True
519 try:
519 try:
520 with self._indexfp() as f:
520 with self._indexfp() as f:
521 if (
521 if (
522 mmapindexthreshold is not None
522 mmapindexthreshold is not None
523 and self.opener.fstat(f).st_size >= mmapindexthreshold
523 and self.opener.fstat(f).st_size >= mmapindexthreshold
524 ):
524 ):
525 # TODO: should .close() to release resources without
525 # TODO: should .close() to release resources without
526 # relying on Python GC
526 # relying on Python GC
527 indexdata = util.buffer(util.mmapread(f))
527 indexdata = util.buffer(util.mmapread(f))
528 else:
528 else:
529 indexdata = f.read()
529 indexdata = f.read()
530 if len(indexdata) > 0:
530 if len(indexdata) > 0:
531 versionflags = versionformat_unpack(indexdata[:4])[0]
531 versionflags = versionformat_unpack(indexdata[:4])[0]
532 self._initempty = False
532 self._initempty = False
533 else:
533 else:
534 versionflags = newversionflags
534 versionflags = newversionflags
535 except IOError as inst:
535 except IOError as inst:
536 if inst.errno != errno.ENOENT:
536 if inst.errno != errno.ENOENT:
537 raise
537 raise
538
538
539 versionflags = newversionflags
539 versionflags = newversionflags
540
540
541 self.version = versionflags
541 self.version = versionflags
542
542
543 flags = versionflags & ~0xFFFF
543 flags = versionflags & ~0xFFFF
544 fmt = versionflags & 0xFFFF
544 fmt = versionflags & 0xFFFF
545
545
546 if fmt == REVLOGV0:
546 if fmt == REVLOGV0:
547 if flags:
547 if flags:
548 raise error.RevlogError(
548 raise error.RevlogError(
549 _(b'unknown flags (%#04x) in version %d revlog %s')
549 _(b'unknown flags (%#04x) in version %d revlog %s')
550 % (flags >> 16, fmt, self.indexfile)
550 % (flags >> 16, fmt, self.indexfile)
551 )
551 )
552
552
553 self._inline = False
553 self._inline = False
554 self._generaldelta = False
554 self._generaldelta = False
555
555
556 elif fmt == REVLOGV1:
556 elif fmt == REVLOGV1:
557 if flags & ~REVLOGV1_FLAGS:
557 if flags & ~REVLOGV1_FLAGS:
558 raise error.RevlogError(
558 raise error.RevlogError(
559 _(b'unknown flags (%#04x) in version %d revlog %s')
559 _(b'unknown flags (%#04x) in version %d revlog %s')
560 % (flags >> 16, fmt, self.indexfile)
560 % (flags >> 16, fmt, self.indexfile)
561 )
561 )
562
562
563 self._inline = versionflags & FLAG_INLINE_DATA
563 self._inline = versionflags & FLAG_INLINE_DATA
564 self._generaldelta = versionflags & FLAG_GENERALDELTA
564 self._generaldelta = versionflags & FLAG_GENERALDELTA
565
565
566 elif fmt == REVLOGV2:
566 elif fmt == REVLOGV2:
567 if flags & ~REVLOGV2_FLAGS:
567 if flags & ~REVLOGV2_FLAGS:
568 raise error.RevlogError(
568 raise error.RevlogError(
569 _(b'unknown flags (%#04x) in version %d revlog %s')
569 _(b'unknown flags (%#04x) in version %d revlog %s')
570 % (flags >> 16, fmt, self.indexfile)
570 % (flags >> 16, fmt, self.indexfile)
571 )
571 )
572
572
573 self._inline = versionflags & FLAG_INLINE_DATA
573 self._inline = versionflags & FLAG_INLINE_DATA
574 # generaldelta implied by version 2 revlogs.
574 # generaldelta implied by version 2 revlogs.
575 self._generaldelta = True
575 self._generaldelta = True
576
576
577 else:
577 else:
578 raise error.RevlogError(
578 raise error.RevlogError(
579 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
579 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
580 )
580 )
581 # sparse-revlog can't be on without general-delta (issue6056)
581 # sparse-revlog can't be on without general-delta (issue6056)
582 if not self._generaldelta:
582 if not self._generaldelta:
583 self._sparserevlog = False
583 self._sparserevlog = False
584
584
585 self._storedeltachains = True
585 self._storedeltachains = True
586
586
587 self._io = revlogio()
587 self._io = revlogio()
588 if self.version == REVLOGV0:
588 if self.version == REVLOGV0:
589 self._io = revlogoldio()
589 self._io = revlogoldio()
590 try:
590 try:
591 d = self._io.parseindex(indexdata, self._inline)
591 d = self._io.parseindex(indexdata, self._inline)
592 except (ValueError, IndexError):
592 except (ValueError, IndexError):
593 raise error.RevlogError(
593 raise error.RevlogError(
594 _(b"index %s is corrupted") % self.indexfile
594 _(b"index %s is corrupted") % self.indexfile
595 )
595 )
596 self.index, self._chunkcache = d
596 self.index, self._chunkcache = d
597 if not self._chunkcache:
597 if not self._chunkcache:
598 self._chunkclear()
598 self._chunkclear()
599 # revnum -> (chain-length, sum-delta-length)
599 # revnum -> (chain-length, sum-delta-length)
600 self._chaininfocache = {}
600 self._chaininfocache = {}
601 # revlog header -> revlog compressor
601 # revlog header -> revlog compressor
602 self._decompressors = {}
602 self._decompressors = {}
603
603
604 @util.propertycache
604 @util.propertycache
605 def _compressor(self):
605 def _compressor(self):
606 engine = util.compengines[self._compengine]
606 engine = util.compengines[self._compengine]
607 return engine.revlogcompressor(self._compengineopts)
607 return engine.revlogcompressor(self._compengineopts)
608
608
609 def _indexfp(self, mode=b'r'):
609 def _indexfp(self, mode=b'r'):
610 """file object for the revlog's index file"""
610 """file object for the revlog's index file"""
611 args = {'mode': mode}
611 args = {'mode': mode}
612 if mode != b'r':
612 if mode != b'r':
613 args['checkambig'] = self._checkambig
613 args['checkambig'] = self._checkambig
614 if mode == b'w':
614 if mode == b'w':
615 args['atomictemp'] = True
615 args['atomictemp'] = True
616 return self.opener(self.indexfile, **args)
616 return self.opener(self.indexfile, **args)
617
617
618 def _datafp(self, mode=b'r'):
618 def _datafp(self, mode=b'r'):
619 """file object for the revlog's data file"""
619 """file object for the revlog's data file"""
620 return self.opener(self.datafile, mode=mode)
620 return self.opener(self.datafile, mode=mode)
621
621
622 @contextlib.contextmanager
622 @contextlib.contextmanager
623 def _datareadfp(self, existingfp=None):
623 def _datareadfp(self, existingfp=None):
624 """file object suitable to read data"""
624 """file object suitable to read data"""
625 # Use explicit file handle, if given.
625 # Use explicit file handle, if given.
626 if existingfp is not None:
626 if existingfp is not None:
627 yield existingfp
627 yield existingfp
628
628
629 # Use a file handle being actively used for writes, if available.
629 # Use a file handle being actively used for writes, if available.
630 # There is some danger to doing this because reads will seek the
630 # There is some danger to doing this because reads will seek the
631 # file. However, _writeentry() performs a SEEK_END before all writes,
631 # file. However, _writeentry() performs a SEEK_END before all writes,
632 # so we should be safe.
632 # so we should be safe.
633 elif self._writinghandles:
633 elif self._writinghandles:
634 if self._inline:
634 if self._inline:
635 yield self._writinghandles[0]
635 yield self._writinghandles[0]
636 else:
636 else:
637 yield self._writinghandles[1]
637 yield self._writinghandles[1]
638
638
639 # Otherwise open a new file handle.
639 # Otherwise open a new file handle.
640 else:
640 else:
641 if self._inline:
641 if self._inline:
642 func = self._indexfp
642 func = self._indexfp
643 else:
643 else:
644 func = self._datafp
644 func = self._datafp
645 with func() as fp:
645 with func() as fp:
646 yield fp
646 yield fp
647
647
648 def tiprev(self):
648 def tiprev(self):
649 return len(self.index) - 1
649 return len(self.index) - 1
650
650
651 def tip(self):
651 def tip(self):
652 return self.node(self.tiprev())
652 return self.node(self.tiprev())
653
653
654 def __contains__(self, rev):
654 def __contains__(self, rev):
655 return 0 <= rev < len(self)
655 return 0 <= rev < len(self)
656
656
657 def __len__(self):
657 def __len__(self):
658 return len(self.index)
658 return len(self.index)
659
659
660 def __iter__(self):
660 def __iter__(self):
661 return iter(pycompat.xrange(len(self)))
661 return iter(pycompat.xrange(len(self)))
662
662
663 def revs(self, start=0, stop=None):
663 def revs(self, start=0, stop=None):
664 """iterate over all rev in this revlog (from start to stop)"""
664 """iterate over all rev in this revlog (from start to stop)"""
665 return storageutil.iterrevs(len(self), start=start, stop=stop)
665 return storageutil.iterrevs(len(self), start=start, stop=stop)
666
666
667 @property
667 @property
668 def nodemap(self):
668 def nodemap(self):
669 msg = (
669 msg = (
670 b"revlog.nodemap is deprecated, "
670 b"revlog.nodemap is deprecated, "
671 b"use revlog.index.[has_node|rev|get_rev]"
671 b"use revlog.index.[has_node|rev|get_rev]"
672 )
672 )
673 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
673 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
674 return self.index.nodemap
674 return self.index.nodemap
675
675
676 @property
676 @property
677 def _nodecache(self):
677 def _nodecache(self):
678 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
678 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
679 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
679 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
680 return self.index.nodemap
680 return self.index.nodemap
681
681
682 def hasnode(self, node):
682 def hasnode(self, node):
683 try:
683 try:
684 self.rev(node)
684 self.rev(node)
685 return True
685 return True
686 except KeyError:
686 except KeyError:
687 return False
687 return False
688
688
689 def candelta(self, baserev, rev):
689 def candelta(self, baserev, rev):
690 """whether two revisions (baserev, rev) can be delta-ed or not"""
690 """whether two revisions (baserev, rev) can be delta-ed or not"""
691 # Disable delta if either rev requires a content-changing flag
691 # Disable delta if either rev requires a content-changing flag
692 # processor (ex. LFS). This is because such flag processor can alter
692 # processor (ex. LFS). This is because such flag processor can alter
693 # the rawtext content that the delta will be based on, and two clients
693 # the rawtext content that the delta will be based on, and two clients
694 # could have a same revlog node with different flags (i.e. different
694 # could have a same revlog node with different flags (i.e. different
695 # rawtext contents) and the delta could be incompatible.
695 # rawtext contents) and the delta could be incompatible.
696 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
696 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
697 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
697 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
698 ):
698 ):
699 return False
699 return False
700 return True
700 return True
701
701
702 def clearcaches(self):
702 def clearcaches(self):
703 self._revisioncache = None
703 self._revisioncache = None
704 self._chainbasecache.clear()
704 self._chainbasecache.clear()
705 self._chunkcache = (0, b'')
705 self._chunkcache = (0, b'')
706 self._pcache = {}
706 self._pcache = {}
707 self.index.clearcaches()
707 self.index.clearcaches()
708
708
709 def rev(self, node):
709 def rev(self, node):
710 try:
710 try:
711 return self.index.rev(node)
711 return self.index.rev(node)
712 except TypeError:
712 except TypeError:
713 raise
713 raise
714 except error.RevlogError:
714 except error.RevlogError:
715 # parsers.c radix tree lookup failed
715 # parsers.c radix tree lookup failed
716 if node == wdirid or node in wdirfilenodeids:
716 if node == wdirid or node in wdirfilenodeids:
717 raise error.WdirUnsupported
717 raise error.WdirUnsupported
718 raise error.LookupError(node, self.indexfile, _(b'no node'))
718 raise error.LookupError(node, self.indexfile, _(b'no node'))
719
719
720 # Accessors for index entries.
720 # Accessors for index entries.
721
721
722 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
722 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
723 # are flags.
723 # are flags.
724 def start(self, rev):
724 def start(self, rev):
725 return int(self.index[rev][0] >> 16)
725 return int(self.index[rev][0] >> 16)
726
726
727 def flags(self, rev):
727 def flags(self, rev):
728 return self.index[rev][0] & 0xFFFF
728 return self.index[rev][0] & 0xFFFF
729
729
730 def length(self, rev):
730 def length(self, rev):
731 return self.index[rev][1]
731 return self.index[rev][1]
732
732
733 def rawsize(self, rev):
733 def rawsize(self, rev):
734 """return the length of the uncompressed text for a given revision"""
734 """return the length of the uncompressed text for a given revision"""
735 l = self.index[rev][2]
735 l = self.index[rev][2]
736 if l >= 0:
736 if l >= 0:
737 return l
737 return l
738
738
739 t = self.rawdata(rev)
739 t = self.rawdata(rev)
740 return len(t)
740 return len(t)
741
741
742 def size(self, rev):
742 def size(self, rev):
743 """length of non-raw text (processed by a "read" flag processor)"""
743 """length of non-raw text (processed by a "read" flag processor)"""
744 # fast path: if no "read" flag processor could change the content,
744 # fast path: if no "read" flag processor could change the content,
745 # size is rawsize. note: ELLIPSIS is known to not change the content.
745 # size is rawsize. note: ELLIPSIS is known to not change the content.
746 flags = self.flags(rev)
746 flags = self.flags(rev)
747 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
747 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
748 return self.rawsize(rev)
748 return self.rawsize(rev)
749
749
750 return len(self.revision(rev, raw=False))
750 return len(self.revision(rev, raw=False))
751
751
752 def chainbase(self, rev):
752 def chainbase(self, rev):
753 base = self._chainbasecache.get(rev)
753 base = self._chainbasecache.get(rev)
754 if base is not None:
754 if base is not None:
755 return base
755 return base
756
756
757 index = self.index
757 index = self.index
758 iterrev = rev
758 iterrev = rev
759 base = index[iterrev][3]
759 base = index[iterrev][3]
760 while base != iterrev:
760 while base != iterrev:
761 iterrev = base
761 iterrev = base
762 base = index[iterrev][3]
762 base = index[iterrev][3]
763
763
764 self._chainbasecache[rev] = base
764 self._chainbasecache[rev] = base
765 return base
765 return base
766
766
767 def linkrev(self, rev):
767 def linkrev(self, rev):
768 return self.index[rev][4]
768 return self.index[rev][4]
769
769
770 def parentrevs(self, rev):
770 def parentrevs(self, rev):
771 try:
771 try:
772 entry = self.index[rev]
772 entry = self.index[rev]
773 except IndexError:
773 except IndexError:
774 if rev == wdirrev:
774 if rev == wdirrev:
775 raise error.WdirUnsupported
775 raise error.WdirUnsupported
776 raise
776 raise
777
777
778 return entry[5], entry[6]
778 return entry[5], entry[6]
779
779
780 # fast parentrevs(rev) where rev isn't filtered
780 # fast parentrevs(rev) where rev isn't filtered
781 _uncheckedparentrevs = parentrevs
781 _uncheckedparentrevs = parentrevs
782
782
783 def node(self, rev):
783 def node(self, rev):
784 try:
784 try:
785 return self.index[rev][7]
785 return self.index[rev][7]
786 except IndexError:
786 except IndexError:
787 if rev == wdirrev:
787 if rev == wdirrev:
788 raise error.WdirUnsupported
788 raise error.WdirUnsupported
789 raise
789 raise
790
790
791 # Derived from index values.
791 # Derived from index values.
792
792
793 def end(self, rev):
793 def end(self, rev):
794 return self.start(rev) + self.length(rev)
794 return self.start(rev) + self.length(rev)
795
795
796 def parents(self, node):
796 def parents(self, node):
797 i = self.index
797 i = self.index
798 d = i[self.rev(node)]
798 d = i[self.rev(node)]
799 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
799 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
800
800
801 def chainlen(self, rev):
801 def chainlen(self, rev):
802 return self._chaininfo(rev)[0]
802 return self._chaininfo(rev)[0]
803
803
804 def _chaininfo(self, rev):
804 def _chaininfo(self, rev):
805 chaininfocache = self._chaininfocache
805 chaininfocache = self._chaininfocache
806 if rev in chaininfocache:
806 if rev in chaininfocache:
807 return chaininfocache[rev]
807 return chaininfocache[rev]
808 index = self.index
808 index = self.index
809 generaldelta = self._generaldelta
809 generaldelta = self._generaldelta
810 iterrev = rev
810 iterrev = rev
811 e = index[iterrev]
811 e = index[iterrev]
812 clen = 0
812 clen = 0
813 compresseddeltalen = 0
813 compresseddeltalen = 0
814 while iterrev != e[3]:
814 while iterrev != e[3]:
815 clen += 1
815 clen += 1
816 compresseddeltalen += e[1]
816 compresseddeltalen += e[1]
817 if generaldelta:
817 if generaldelta:
818 iterrev = e[3]
818 iterrev = e[3]
819 else:
819 else:
820 iterrev -= 1
820 iterrev -= 1
821 if iterrev in chaininfocache:
821 if iterrev in chaininfocache:
822 t = chaininfocache[iterrev]
822 t = chaininfocache[iterrev]
823 clen += t[0]
823 clen += t[0]
824 compresseddeltalen += t[1]
824 compresseddeltalen += t[1]
825 break
825 break
826 e = index[iterrev]
826 e = index[iterrev]
827 else:
827 else:
828 # Add text length of base since decompressing that also takes
828 # Add text length of base since decompressing that also takes
829 # work. For cache hits the length is already included.
829 # work. For cache hits the length is already included.
830 compresseddeltalen += e[1]
830 compresseddeltalen += e[1]
831 r = (clen, compresseddeltalen)
831 r = (clen, compresseddeltalen)
832 chaininfocache[rev] = r
832 chaininfocache[rev] = r
833 return r
833 return r
834
834
835 def _deltachain(self, rev, stoprev=None):
835 def _deltachain(self, rev, stoprev=None):
836 """Obtain the delta chain for a revision.
836 """Obtain the delta chain for a revision.
837
837
838 ``stoprev`` specifies a revision to stop at. If not specified, we
838 ``stoprev`` specifies a revision to stop at. If not specified, we
839 stop at the base of the chain.
839 stop at the base of the chain.
840
840
841 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
841 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
842 revs in ascending order and ``stopped`` is a bool indicating whether
842 revs in ascending order and ``stopped`` is a bool indicating whether
843 ``stoprev`` was hit.
843 ``stoprev`` was hit.
844 """
844 """
845 # Try C implementation.
845 # Try C implementation.
846 try:
846 try:
847 return self.index.deltachain(rev, stoprev, self._generaldelta)
847 return self.index.deltachain(rev, stoprev, self._generaldelta)
848 except AttributeError:
848 except AttributeError:
849 pass
849 pass
850
850
851 chain = []
851 chain = []
852
852
853 # Alias to prevent attribute lookup in tight loop.
853 # Alias to prevent attribute lookup in tight loop.
854 index = self.index
854 index = self.index
855 generaldelta = self._generaldelta
855 generaldelta = self._generaldelta
856
856
857 iterrev = rev
857 iterrev = rev
858 e = index[iterrev]
858 e = index[iterrev]
859 while iterrev != e[3] and iterrev != stoprev:
859 while iterrev != e[3] and iterrev != stoprev:
860 chain.append(iterrev)
860 chain.append(iterrev)
861 if generaldelta:
861 if generaldelta:
862 iterrev = e[3]
862 iterrev = e[3]
863 else:
863 else:
864 iterrev -= 1
864 iterrev -= 1
865 e = index[iterrev]
865 e = index[iterrev]
866
866
867 if iterrev == stoprev:
867 if iterrev == stoprev:
868 stopped = True
868 stopped = True
869 else:
869 else:
870 chain.append(iterrev)
870 chain.append(iterrev)
871 stopped = False
871 stopped = False
872
872
873 chain.reverse()
873 chain.reverse()
874 return chain, stopped
874 return chain, stopped
875
875
876 def ancestors(self, revs, stoprev=0, inclusive=False):
876 def ancestors(self, revs, stoprev=0, inclusive=False):
877 """Generate the ancestors of 'revs' in reverse revision order.
877 """Generate the ancestors of 'revs' in reverse revision order.
878 Does not generate revs lower than stoprev.
878 Does not generate revs lower than stoprev.
879
879
880 See the documentation for ancestor.lazyancestors for more details."""
880 See the documentation for ancestor.lazyancestors for more details."""
881
881
882 # first, make sure start revisions aren't filtered
882 # first, make sure start revisions aren't filtered
883 revs = list(revs)
883 revs = list(revs)
884 checkrev = self.node
884 checkrev = self.node
885 for r in revs:
885 for r in revs:
886 checkrev(r)
886 checkrev(r)
887 # and we're sure ancestors aren't filtered as well
887 # and we're sure ancestors aren't filtered as well
888
888
889 if rustancestor is not None:
889 if rustancestor is not None:
890 lazyancestors = rustancestor.LazyAncestors
890 lazyancestors = rustancestor.LazyAncestors
891 arg = self.index
891 arg = self.index
892 elif util.safehasattr(parsers, b'rustlazyancestors'):
892 elif util.safehasattr(parsers, b'rustlazyancestors'):
893 lazyancestors = ancestor.rustlazyancestors
893 lazyancestors = ancestor.rustlazyancestors
894 arg = self.index
894 arg = self.index
895 else:
895 else:
896 lazyancestors = ancestor.lazyancestors
896 lazyancestors = ancestor.lazyancestors
897 arg = self._uncheckedparentrevs
897 arg = self._uncheckedparentrevs
898 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
898 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
899
899
900 def descendants(self, revs):
900 def descendants(self, revs):
901 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
901 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
902
902
903 def findcommonmissing(self, common=None, heads=None):
903 def findcommonmissing(self, common=None, heads=None):
904 """Return a tuple of the ancestors of common and the ancestors of heads
904 """Return a tuple of the ancestors of common and the ancestors of heads
905 that are not ancestors of common. In revset terminology, we return the
905 that are not ancestors of common. In revset terminology, we return the
906 tuple:
906 tuple:
907
907
908 ::common, (::heads) - (::common)
908 ::common, (::heads) - (::common)
909
909
910 The list is sorted by revision number, meaning it is
910 The list is sorted by revision number, meaning it is
911 topologically sorted.
911 topologically sorted.
912
912
913 'heads' and 'common' are both lists of node IDs. If heads is
913 'heads' and 'common' are both lists of node IDs. If heads is
914 not supplied, uses all of the revlog's heads. If common is not
914 not supplied, uses all of the revlog's heads. If common is not
915 supplied, uses nullid."""
915 supplied, uses nullid."""
916 if common is None:
916 if common is None:
917 common = [nullid]
917 common = [nullid]
918 if heads is None:
918 if heads is None:
919 heads = self.heads()
919 heads = self.heads()
920
920
921 common = [self.rev(n) for n in common]
921 common = [self.rev(n) for n in common]
922 heads = [self.rev(n) for n in heads]
922 heads = [self.rev(n) for n in heads]
923
923
924 # we want the ancestors, but inclusive
924 # we want the ancestors, but inclusive
925 class lazyset(object):
925 class lazyset(object):
926 def __init__(self, lazyvalues):
926 def __init__(self, lazyvalues):
927 self.addedvalues = set()
927 self.addedvalues = set()
928 self.lazyvalues = lazyvalues
928 self.lazyvalues = lazyvalues
929
929
930 def __contains__(self, value):
930 def __contains__(self, value):
931 return value in self.addedvalues or value in self.lazyvalues
931 return value in self.addedvalues or value in self.lazyvalues
932
932
933 def __iter__(self):
933 def __iter__(self):
934 added = self.addedvalues
934 added = self.addedvalues
935 for r in added:
935 for r in added:
936 yield r
936 yield r
937 for r in self.lazyvalues:
937 for r in self.lazyvalues:
938 if not r in added:
938 if not r in added:
939 yield r
939 yield r
940
940
941 def add(self, value):
941 def add(self, value):
942 self.addedvalues.add(value)
942 self.addedvalues.add(value)
943
943
944 def update(self, values):
944 def update(self, values):
945 self.addedvalues.update(values)
945 self.addedvalues.update(values)
946
946
947 has = lazyset(self.ancestors(common))
947 has = lazyset(self.ancestors(common))
948 has.add(nullrev)
948 has.add(nullrev)
949 has.update(common)
949 has.update(common)
950
950
951 # take all ancestors from heads that aren't in has
951 # take all ancestors from heads that aren't in has
952 missing = set()
952 missing = set()
953 visit = collections.deque(r for r in heads if r not in has)
953 visit = collections.deque(r for r in heads if r not in has)
954 while visit:
954 while visit:
955 r = visit.popleft()
955 r = visit.popleft()
956 if r in missing:
956 if r in missing:
957 continue
957 continue
958 else:
958 else:
959 missing.add(r)
959 missing.add(r)
960 for p in self.parentrevs(r):
960 for p in self.parentrevs(r):
961 if p not in has:
961 if p not in has:
962 visit.append(p)
962 visit.append(p)
963 missing = list(missing)
963 missing = list(missing)
964 missing.sort()
964 missing.sort()
965 return has, [self.node(miss) for miss in missing]
965 return has, [self.node(miss) for miss in missing]
966
966
967 def incrementalmissingrevs(self, common=None):
967 def incrementalmissingrevs(self, common=None):
968 """Return an object that can be used to incrementally compute the
968 """Return an object that can be used to incrementally compute the
969 revision numbers of the ancestors of arbitrary sets that are not
969 revision numbers of the ancestors of arbitrary sets that are not
970 ancestors of common. This is an ancestor.incrementalmissingancestors
970 ancestors of common. This is an ancestor.incrementalmissingancestors
971 object.
971 object.
972
972
973 'common' is a list of revision numbers. If common is not supplied, uses
973 'common' is a list of revision numbers. If common is not supplied, uses
974 nullrev.
974 nullrev.
975 """
975 """
976 if common is None:
976 if common is None:
977 common = [nullrev]
977 common = [nullrev]
978
978
979 if rustancestor is not None:
979 if rustancestor is not None:
980 return rustancestor.MissingAncestors(self.index, common)
980 return rustancestor.MissingAncestors(self.index, common)
981 return ancestor.incrementalmissingancestors(self.parentrevs, common)
981 return ancestor.incrementalmissingancestors(self.parentrevs, common)
982
982
983 def findmissingrevs(self, common=None, heads=None):
983 def findmissingrevs(self, common=None, heads=None):
984 """Return the revision numbers of the ancestors of heads that
984 """Return the revision numbers of the ancestors of heads that
985 are not ancestors of common.
985 are not ancestors of common.
986
986
987 More specifically, return a list of revision numbers corresponding to
987 More specifically, return a list of revision numbers corresponding to
988 nodes N such that every N satisfies the following constraints:
988 nodes N such that every N satisfies the following constraints:
989
989
990 1. N is an ancestor of some node in 'heads'
990 1. N is an ancestor of some node in 'heads'
991 2. N is not an ancestor of any node in 'common'
991 2. N is not an ancestor of any node in 'common'
992
992
993 The list is sorted by revision number, meaning it is
993 The list is sorted by revision number, meaning it is
994 topologically sorted.
994 topologically sorted.
995
995
996 'heads' and 'common' are both lists of revision numbers. If heads is
996 'heads' and 'common' are both lists of revision numbers. If heads is
997 not supplied, uses all of the revlog's heads. If common is not
997 not supplied, uses all of the revlog's heads. If common is not
998 supplied, uses nullid."""
998 supplied, uses nullid."""
999 if common is None:
999 if common is None:
1000 common = [nullrev]
1000 common = [nullrev]
1001 if heads is None:
1001 if heads is None:
1002 heads = self.headrevs()
1002 heads = self.headrevs()
1003
1003
1004 inc = self.incrementalmissingrevs(common=common)
1004 inc = self.incrementalmissingrevs(common=common)
1005 return inc.missingancestors(heads)
1005 return inc.missingancestors(heads)
1006
1006
1007 def findmissing(self, common=None, heads=None):
1007 def findmissing(self, common=None, heads=None):
1008 """Return the ancestors of heads that are not ancestors of common.
1008 """Return the ancestors of heads that are not ancestors of common.
1009
1009
1010 More specifically, return a list of nodes N such that every N
1010 More specifically, return a list of nodes N such that every N
1011 satisfies the following constraints:
1011 satisfies the following constraints:
1012
1012
1013 1. N is an ancestor of some node in 'heads'
1013 1. N is an ancestor of some node in 'heads'
1014 2. N is not an ancestor of any node in 'common'
1014 2. N is not an ancestor of any node in 'common'
1015
1015
1016 The list is sorted by revision number, meaning it is
1016 The list is sorted by revision number, meaning it is
1017 topologically sorted.
1017 topologically sorted.
1018
1018
1019 'heads' and 'common' are both lists of node IDs. If heads is
1019 'heads' and 'common' are both lists of node IDs. If heads is
1020 not supplied, uses all of the revlog's heads. If common is not
1020 not supplied, uses all of the revlog's heads. If common is not
1021 supplied, uses nullid."""
1021 supplied, uses nullid."""
1022 if common is None:
1022 if common is None:
1023 common = [nullid]
1023 common = [nullid]
1024 if heads is None:
1024 if heads is None:
1025 heads = self.heads()
1025 heads = self.heads()
1026
1026
1027 common = [self.rev(n) for n in common]
1027 common = [self.rev(n) for n in common]
1028 heads = [self.rev(n) for n in heads]
1028 heads = [self.rev(n) for n in heads]
1029
1029
1030 inc = self.incrementalmissingrevs(common=common)
1030 inc = self.incrementalmissingrevs(common=common)
1031 return [self.node(r) for r in inc.missingancestors(heads)]
1031 return [self.node(r) for r in inc.missingancestors(heads)]
1032
1032
1033 def nodesbetween(self, roots=None, heads=None):
1033 def nodesbetween(self, roots=None, heads=None):
1034 """Return a topological path from 'roots' to 'heads'.
1034 """Return a topological path from 'roots' to 'heads'.
1035
1035
1036 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1036 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1037 topologically sorted list of all nodes N that satisfy both of
1037 topologically sorted list of all nodes N that satisfy both of
1038 these constraints:
1038 these constraints:
1039
1039
1040 1. N is a descendant of some node in 'roots'
1040 1. N is a descendant of some node in 'roots'
1041 2. N is an ancestor of some node in 'heads'
1041 2. N is an ancestor of some node in 'heads'
1042
1042
1043 Every node is considered to be both a descendant and an ancestor
1043 Every node is considered to be both a descendant and an ancestor
1044 of itself, so every reachable node in 'roots' and 'heads' will be
1044 of itself, so every reachable node in 'roots' and 'heads' will be
1045 included in 'nodes'.
1045 included in 'nodes'.
1046
1046
1047 'outroots' is the list of reachable nodes in 'roots', i.e., the
1047 'outroots' is the list of reachable nodes in 'roots', i.e., the
1048 subset of 'roots' that is returned in 'nodes'. Likewise,
1048 subset of 'roots' that is returned in 'nodes'. Likewise,
1049 'outheads' is the subset of 'heads' that is also in 'nodes'.
1049 'outheads' is the subset of 'heads' that is also in 'nodes'.
1050
1050
1051 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1051 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1052 unspecified, uses nullid as the only root. If 'heads' is
1052 unspecified, uses nullid as the only root. If 'heads' is
1053 unspecified, uses list of all of the revlog's heads."""
1053 unspecified, uses list of all of the revlog's heads."""
1054 nonodes = ([], [], [])
1054 nonodes = ([], [], [])
1055 if roots is not None:
1055 if roots is not None:
1056 roots = list(roots)
1056 roots = list(roots)
1057 if not roots:
1057 if not roots:
1058 return nonodes
1058 return nonodes
1059 lowestrev = min([self.rev(n) for n in roots])
1059 lowestrev = min([self.rev(n) for n in roots])
1060 else:
1060 else:
1061 roots = [nullid] # Everybody's a descendant of nullid
1061 roots = [nullid] # Everybody's a descendant of nullid
1062 lowestrev = nullrev
1062 lowestrev = nullrev
1063 if (lowestrev == nullrev) and (heads is None):
1063 if (lowestrev == nullrev) and (heads is None):
1064 # We want _all_ the nodes!
1064 # We want _all_ the nodes!
1065 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1065 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1066 if heads is None:
1066 if heads is None:
1067 # All nodes are ancestors, so the latest ancestor is the last
1067 # All nodes are ancestors, so the latest ancestor is the last
1068 # node.
1068 # node.
1069 highestrev = len(self) - 1
1069 highestrev = len(self) - 1
1070 # Set ancestors to None to signal that every node is an ancestor.
1070 # Set ancestors to None to signal that every node is an ancestor.
1071 ancestors = None
1071 ancestors = None
1072 # Set heads to an empty dictionary for later discovery of heads
1072 # Set heads to an empty dictionary for later discovery of heads
1073 heads = {}
1073 heads = {}
1074 else:
1074 else:
1075 heads = list(heads)
1075 heads = list(heads)
1076 if not heads:
1076 if not heads:
1077 return nonodes
1077 return nonodes
1078 ancestors = set()
1078 ancestors = set()
1079 # Turn heads into a dictionary so we can remove 'fake' heads.
1079 # Turn heads into a dictionary so we can remove 'fake' heads.
1080 # Also, later we will be using it to filter out the heads we can't
1080 # Also, later we will be using it to filter out the heads we can't
1081 # find from roots.
1081 # find from roots.
1082 heads = dict.fromkeys(heads, False)
1082 heads = dict.fromkeys(heads, False)
1083 # Start at the top and keep marking parents until we're done.
1083 # Start at the top and keep marking parents until we're done.
1084 nodestotag = set(heads)
1084 nodestotag = set(heads)
1085 # Remember where the top was so we can use it as a limit later.
1085 # Remember where the top was so we can use it as a limit later.
1086 highestrev = max([self.rev(n) for n in nodestotag])
1086 highestrev = max([self.rev(n) for n in nodestotag])
1087 while nodestotag:
1087 while nodestotag:
1088 # grab a node to tag
1088 # grab a node to tag
1089 n = nodestotag.pop()
1089 n = nodestotag.pop()
1090 # Never tag nullid
1090 # Never tag nullid
1091 if n == nullid:
1091 if n == nullid:
1092 continue
1092 continue
1093 # A node's revision number represents its place in a
1093 # A node's revision number represents its place in a
1094 # topologically sorted list of nodes.
1094 # topologically sorted list of nodes.
1095 r = self.rev(n)
1095 r = self.rev(n)
1096 if r >= lowestrev:
1096 if r >= lowestrev:
1097 if n not in ancestors:
1097 if n not in ancestors:
1098 # If we are possibly a descendant of one of the roots
1098 # If we are possibly a descendant of one of the roots
1099 # and we haven't already been marked as an ancestor
1099 # and we haven't already been marked as an ancestor
1100 ancestors.add(n) # Mark as ancestor
1100 ancestors.add(n) # Mark as ancestor
1101 # Add non-nullid parents to list of nodes to tag.
1101 # Add non-nullid parents to list of nodes to tag.
1102 nodestotag.update(
1102 nodestotag.update(
1103 [p for p in self.parents(n) if p != nullid]
1103 [p for p in self.parents(n) if p != nullid]
1104 )
1104 )
1105 elif n in heads: # We've seen it before, is it a fake head?
1105 elif n in heads: # We've seen it before, is it a fake head?
1106 # So it is, real heads should not be the ancestors of
1106 # So it is, real heads should not be the ancestors of
1107 # any other heads.
1107 # any other heads.
1108 heads.pop(n)
1108 heads.pop(n)
1109 if not ancestors:
1109 if not ancestors:
1110 return nonodes
1110 return nonodes
1111 # Now that we have our set of ancestors, we want to remove any
1111 # Now that we have our set of ancestors, we want to remove any
1112 # roots that are not ancestors.
1112 # roots that are not ancestors.
1113
1113
1114 # If one of the roots was nullid, everything is included anyway.
1114 # If one of the roots was nullid, everything is included anyway.
1115 if lowestrev > nullrev:
1115 if lowestrev > nullrev:
1116 # But, since we weren't, let's recompute the lowest rev to not
1116 # But, since we weren't, let's recompute the lowest rev to not
1117 # include roots that aren't ancestors.
1117 # include roots that aren't ancestors.
1118
1118
1119 # Filter out roots that aren't ancestors of heads
1119 # Filter out roots that aren't ancestors of heads
1120 roots = [root for root in roots if root in ancestors]
1120 roots = [root for root in roots if root in ancestors]
1121 # Recompute the lowest revision
1121 # Recompute the lowest revision
1122 if roots:
1122 if roots:
1123 lowestrev = min([self.rev(root) for root in roots])
1123 lowestrev = min([self.rev(root) for root in roots])
1124 else:
1124 else:
1125 # No more roots? Return empty list
1125 # No more roots? Return empty list
1126 return nonodes
1126 return nonodes
1127 else:
1127 else:
1128 # We are descending from nullid, and don't need to care about
1128 # We are descending from nullid, and don't need to care about
1129 # any other roots.
1129 # any other roots.
1130 lowestrev = nullrev
1130 lowestrev = nullrev
1131 roots = [nullid]
1131 roots = [nullid]
1132 # Transform our roots list into a set.
1132 # Transform our roots list into a set.
1133 descendants = set(roots)
1133 descendants = set(roots)
1134 # Also, keep the original roots so we can filter out roots that aren't
1134 # Also, keep the original roots so we can filter out roots that aren't
1135 # 'real' roots (i.e. are descended from other roots).
1135 # 'real' roots (i.e. are descended from other roots).
1136 roots = descendants.copy()
1136 roots = descendants.copy()
1137 # Our topologically sorted list of output nodes.
1137 # Our topologically sorted list of output nodes.
1138 orderedout = []
1138 orderedout = []
1139 # Don't start at nullid since we don't want nullid in our output list,
1139 # Don't start at nullid since we don't want nullid in our output list,
1140 # and if nullid shows up in descendants, empty parents will look like
1140 # and if nullid shows up in descendants, empty parents will look like
1141 # they're descendants.
1141 # they're descendants.
1142 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1142 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1143 n = self.node(r)
1143 n = self.node(r)
1144 isdescendant = False
1144 isdescendant = False
1145 if lowestrev == nullrev: # Everybody is a descendant of nullid
1145 if lowestrev == nullrev: # Everybody is a descendant of nullid
1146 isdescendant = True
1146 isdescendant = True
1147 elif n in descendants:
1147 elif n in descendants:
1148 # n is already a descendant
1148 # n is already a descendant
1149 isdescendant = True
1149 isdescendant = True
1150 # This check only needs to be done here because all the roots
1150 # This check only needs to be done here because all the roots
1151 # will start being marked is descendants before the loop.
1151 # will start being marked is descendants before the loop.
1152 if n in roots:
1152 if n in roots:
1153 # If n was a root, check if it's a 'real' root.
1153 # If n was a root, check if it's a 'real' root.
1154 p = tuple(self.parents(n))
1154 p = tuple(self.parents(n))
1155 # If any of its parents are descendants, it's not a root.
1155 # If any of its parents are descendants, it's not a root.
1156 if (p[0] in descendants) or (p[1] in descendants):
1156 if (p[0] in descendants) or (p[1] in descendants):
1157 roots.remove(n)
1157 roots.remove(n)
1158 else:
1158 else:
1159 p = tuple(self.parents(n))
1159 p = tuple(self.parents(n))
1160 # A node is a descendant if either of its parents are
1160 # A node is a descendant if either of its parents are
1161 # descendants. (We seeded the dependents list with the roots
1161 # descendants. (We seeded the dependents list with the roots
1162 # up there, remember?)
1162 # up there, remember?)
1163 if (p[0] in descendants) or (p[1] in descendants):
1163 if (p[0] in descendants) or (p[1] in descendants):
1164 descendants.add(n)
1164 descendants.add(n)
1165 isdescendant = True
1165 isdescendant = True
1166 if isdescendant and ((ancestors is None) or (n in ancestors)):
1166 if isdescendant and ((ancestors is None) or (n in ancestors)):
1167 # Only include nodes that are both descendants and ancestors.
1167 # Only include nodes that are both descendants and ancestors.
1168 orderedout.append(n)
1168 orderedout.append(n)
1169 if (ancestors is not None) and (n in heads):
1169 if (ancestors is not None) and (n in heads):
1170 # We're trying to figure out which heads are reachable
1170 # We're trying to figure out which heads are reachable
1171 # from roots.
1171 # from roots.
1172 # Mark this head as having been reached
1172 # Mark this head as having been reached
1173 heads[n] = True
1173 heads[n] = True
1174 elif ancestors is None:
1174 elif ancestors is None:
1175 # Otherwise, we're trying to discover the heads.
1175 # Otherwise, we're trying to discover the heads.
1176 # Assume this is a head because if it isn't, the next step
1176 # Assume this is a head because if it isn't, the next step
1177 # will eventually remove it.
1177 # will eventually remove it.
1178 heads[n] = True
1178 heads[n] = True
1179 # But, obviously its parents aren't.
1179 # But, obviously its parents aren't.
1180 for p in self.parents(n):
1180 for p in self.parents(n):
1181 heads.pop(p, None)
1181 heads.pop(p, None)
1182 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1182 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1183 roots = list(roots)
1183 roots = list(roots)
1184 assert orderedout
1184 assert orderedout
1185 assert roots
1185 assert roots
1186 assert heads
1186 assert heads
1187 return (orderedout, roots, heads)
1187 return (orderedout, roots, heads)
1188
1188
1189 def headrevs(self, revs=None):
1189 def headrevs(self, revs=None):
1190 if revs is None:
1190 if revs is None:
1191 try:
1191 try:
1192 return self.index.headrevs()
1192 return self.index.headrevs()
1193 except AttributeError:
1193 except AttributeError:
1194 return self._headrevs()
1194 return self._headrevs()
1195 if rustdagop is not None:
1195 if rustdagop is not None:
1196 return rustdagop.headrevs(self.index, revs)
1196 return rustdagop.headrevs(self.index, revs)
1197 return dagop.headrevs(revs, self._uncheckedparentrevs)
1197 return dagop.headrevs(revs, self._uncheckedparentrevs)
1198
1198
1199 def computephases(self, roots):
1199 def computephases(self, roots):
1200 return self.index.computephasesmapsets(roots)
1200 return self.index.computephasesmapsets(roots)
1201
1201
1202 def _headrevs(self):
1202 def _headrevs(self):
1203 count = len(self)
1203 count = len(self)
1204 if not count:
1204 if not count:
1205 return [nullrev]
1205 return [nullrev]
1206 # we won't iter over filtered rev so nobody is a head at start
1206 # we won't iter over filtered rev so nobody is a head at start
1207 ishead = [0] * (count + 1)
1207 ishead = [0] * (count + 1)
1208 index = self.index
1208 index = self.index
1209 for r in self:
1209 for r in self:
1210 ishead[r] = 1 # I may be an head
1210 ishead[r] = 1 # I may be an head
1211 e = index[r]
1211 e = index[r]
1212 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1212 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1213 return [r for r, val in enumerate(ishead) if val]
1213 return [r for r, val in enumerate(ishead) if val]
1214
1214
1215 def heads(self, start=None, stop=None):
1215 def heads(self, start=None, stop=None):
1216 """return the list of all nodes that have no children
1216 """return the list of all nodes that have no children
1217
1217
1218 if start is specified, only heads that are descendants of
1218 if start is specified, only heads that are descendants of
1219 start will be returned
1219 start will be returned
1220 if stop is specified, it will consider all the revs from stop
1220 if stop is specified, it will consider all the revs from stop
1221 as if they had no children
1221 as if they had no children
1222 """
1222 """
1223 if start is None and stop is None:
1223 if start is None and stop is None:
1224 if not len(self):
1224 if not len(self):
1225 return [nullid]
1225 return [nullid]
1226 return [self.node(r) for r in self.headrevs()]
1226 return [self.node(r) for r in self.headrevs()]
1227
1227
1228 if start is None:
1228 if start is None:
1229 start = nullrev
1229 start = nullrev
1230 else:
1230 else:
1231 start = self.rev(start)
1231 start = self.rev(start)
1232
1232
1233 stoprevs = set(self.rev(n) for n in stop or [])
1233 stoprevs = set(self.rev(n) for n in stop or [])
1234
1234
1235 revs = dagop.headrevssubset(
1235 revs = dagop.headrevssubset(
1236 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1236 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1237 )
1237 )
1238
1238
1239 return [self.node(rev) for rev in revs]
1239 return [self.node(rev) for rev in revs]
1240
1240
1241 def children(self, node):
1241 def children(self, node):
1242 """find the children of a given node"""
1242 """find the children of a given node"""
1243 c = []
1243 c = []
1244 p = self.rev(node)
1244 p = self.rev(node)
1245 for r in self.revs(start=p + 1):
1245 for r in self.revs(start=p + 1):
1246 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1246 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1247 if prevs:
1247 if prevs:
1248 for pr in prevs:
1248 for pr in prevs:
1249 if pr == p:
1249 if pr == p:
1250 c.append(self.node(r))
1250 c.append(self.node(r))
1251 elif p == nullrev:
1251 elif p == nullrev:
1252 c.append(self.node(r))
1252 c.append(self.node(r))
1253 return c
1253 return c
1254
1254
1255 def commonancestorsheads(self, a, b):
1255 def commonancestorsheads(self, a, b):
1256 """calculate all the heads of the common ancestors of nodes a and b"""
1256 """calculate all the heads of the common ancestors of nodes a and b"""
1257 a, b = self.rev(a), self.rev(b)
1257 a, b = self.rev(a), self.rev(b)
1258 ancs = self._commonancestorsheads(a, b)
1258 ancs = self._commonancestorsheads(a, b)
1259 return pycompat.maplist(self.node, ancs)
1259 return pycompat.maplist(self.node, ancs)
1260
1260
1261 def _commonancestorsheads(self, *revs):
1261 def _commonancestorsheads(self, *revs):
1262 """calculate all the heads of the common ancestors of revs"""
1262 """calculate all the heads of the common ancestors of revs"""
1263 try:
1263 try:
1264 ancs = self.index.commonancestorsheads(*revs)
1264 ancs = self.index.commonancestorsheads(*revs)
1265 except (AttributeError, OverflowError): # C implementation failed
1265 except (AttributeError, OverflowError): # C implementation failed
1266 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1266 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1267 return ancs
1267 return ancs
1268
1268
1269 def isancestor(self, a, b):
1269 def isancestor(self, a, b):
1270 """return True if node a is an ancestor of node b
1270 """return True if node a is an ancestor of node b
1271
1271
1272 A revision is considered an ancestor of itself."""
1272 A revision is considered an ancestor of itself."""
1273 a, b = self.rev(a), self.rev(b)
1273 a, b = self.rev(a), self.rev(b)
1274 return self.isancestorrev(a, b)
1274 return self.isancestorrev(a, b)
1275
1275
1276 def isancestorrev(self, a, b):
1276 def isancestorrev(self, a, b):
1277 """return True if revision a is an ancestor of revision b
1277 """return True if revision a is an ancestor of revision b
1278
1278
1279 A revision is considered an ancestor of itself.
1279 A revision is considered an ancestor of itself.
1280
1280
1281 The implementation of this is trivial but the use of
1281 The implementation of this is trivial but the use of
1282 reachableroots is not."""
1282 reachableroots is not."""
1283 if a == nullrev:
1283 if a == nullrev:
1284 return True
1284 return True
1285 elif a == b:
1285 elif a == b:
1286 return True
1286 return True
1287 elif a > b:
1287 elif a > b:
1288 return False
1288 return False
1289 return bool(self.reachableroots(a, [b], [a], includepath=False))
1289 return bool(self.reachableroots(a, [b], [a], includepath=False))
1290
1290
1291 def reachableroots(self, minroot, heads, roots, includepath=False):
1291 def reachableroots(self, minroot, heads, roots, includepath=False):
1292 """return (heads(::(<roots> and <roots>::<heads>)))
1292 """return (heads(::(<roots> and <roots>::<heads>)))
1293
1293
1294 If includepath is True, return (<roots>::<heads>)."""
1294 If includepath is True, return (<roots>::<heads>)."""
1295 try:
1295 try:
1296 return self.index.reachableroots2(
1296 return self.index.reachableroots2(
1297 minroot, heads, roots, includepath
1297 minroot, heads, roots, includepath
1298 )
1298 )
1299 except AttributeError:
1299 except AttributeError:
1300 return dagop._reachablerootspure(
1300 return dagop._reachablerootspure(
1301 self.parentrevs, minroot, roots, heads, includepath
1301 self.parentrevs, minroot, roots, heads, includepath
1302 )
1302 )
1303
1303
1304 def ancestor(self, a, b):
1304 def ancestor(self, a, b):
1305 """calculate the "best" common ancestor of nodes a and b"""
1305 """calculate the "best" common ancestor of nodes a and b"""
1306
1306
1307 a, b = self.rev(a), self.rev(b)
1307 a, b = self.rev(a), self.rev(b)
1308 try:
1308 try:
1309 ancs = self.index.ancestors(a, b)
1309 ancs = self.index.ancestors(a, b)
1310 except (AttributeError, OverflowError):
1310 except (AttributeError, OverflowError):
1311 ancs = ancestor.ancestors(self.parentrevs, a, b)
1311 ancs = ancestor.ancestors(self.parentrevs, a, b)
1312 if ancs:
1312 if ancs:
1313 # choose a consistent winner when there's a tie
1313 # choose a consistent winner when there's a tie
1314 return min(map(self.node, ancs))
1314 return min(map(self.node, ancs))
1315 return nullid
1315 return nullid
1316
1316
1317 def _match(self, id):
1317 def _match(self, id):
1318 if isinstance(id, int):
1318 if isinstance(id, int):
1319 # rev
1319 # rev
1320 return self.node(id)
1320 return self.node(id)
1321 if len(id) == 20:
1321 if len(id) == 20:
1322 # possibly a binary node
1322 # possibly a binary node
1323 # odds of a binary node being all hex in ASCII are 1 in 10**25
1323 # odds of a binary node being all hex in ASCII are 1 in 10**25
1324 try:
1324 try:
1325 node = id
1325 node = id
1326 self.rev(node) # quick search the index
1326 self.rev(node) # quick search the index
1327 return node
1327 return node
1328 except error.LookupError:
1328 except error.LookupError:
1329 pass # may be partial hex id
1329 pass # may be partial hex id
1330 try:
1330 try:
1331 # str(rev)
1331 # str(rev)
1332 rev = int(id)
1332 rev = int(id)
1333 if b"%d" % rev != id:
1333 if b"%d" % rev != id:
1334 raise ValueError
1334 raise ValueError
1335 if rev < 0:
1335 if rev < 0:
1336 rev = len(self) + rev
1336 rev = len(self) + rev
1337 if rev < 0 or rev >= len(self):
1337 if rev < 0 or rev >= len(self):
1338 raise ValueError
1338 raise ValueError
1339 return self.node(rev)
1339 return self.node(rev)
1340 except (ValueError, OverflowError):
1340 except (ValueError, OverflowError):
1341 pass
1341 pass
1342 if len(id) == 40:
1342 if len(id) == 40:
1343 try:
1343 try:
1344 # a full hex nodeid?
1344 # a full hex nodeid?
1345 node = bin(id)
1345 node = bin(id)
1346 self.rev(node)
1346 self.rev(node)
1347 return node
1347 return node
1348 except (TypeError, error.LookupError):
1348 except (TypeError, error.LookupError):
1349 pass
1349 pass
1350
1350
1351 def _partialmatch(self, id):
1351 def _partialmatch(self, id):
1352 # we don't care wdirfilenodeids as they should be always full hash
1352 # we don't care wdirfilenodeids as they should be always full hash
1353 maybewdir = wdirhex.startswith(id)
1353 maybewdir = wdirhex.startswith(id)
1354 try:
1354 try:
1355 partial = self.index.partialmatch(id)
1355 partial = self.index.partialmatch(id)
1356 if partial and self.hasnode(partial):
1356 if partial and self.hasnode(partial):
1357 if maybewdir:
1357 if maybewdir:
1358 # single 'ff...' match in radix tree, ambiguous with wdir
1358 # single 'ff...' match in radix tree, ambiguous with wdir
1359 raise error.RevlogError
1359 raise error.RevlogError
1360 return partial
1360 return partial
1361 if maybewdir:
1361 if maybewdir:
1362 # no 'ff...' match in radix tree, wdir identified
1362 # no 'ff...' match in radix tree, wdir identified
1363 raise error.WdirUnsupported
1363 raise error.WdirUnsupported
1364 return None
1364 return None
1365 except error.RevlogError:
1365 except error.RevlogError:
1366 # parsers.c radix tree lookup gave multiple matches
1366 # parsers.c radix tree lookup gave multiple matches
1367 # fast path: for unfiltered changelog, radix tree is accurate
1367 # fast path: for unfiltered changelog, radix tree is accurate
1368 if not getattr(self, 'filteredrevs', None):
1368 if not getattr(self, 'filteredrevs', None):
1369 raise error.AmbiguousPrefixLookupError(
1369 raise error.AmbiguousPrefixLookupError(
1370 id, self.indexfile, _(b'ambiguous identifier')
1370 id, self.indexfile, _(b'ambiguous identifier')
1371 )
1371 )
1372 # fall through to slow path that filters hidden revisions
1372 # fall through to slow path that filters hidden revisions
1373 except (AttributeError, ValueError):
1373 except (AttributeError, ValueError):
1374 # we are pure python, or key was too short to search radix tree
1374 # we are pure python, or key was too short to search radix tree
1375 pass
1375 pass
1376
1376
1377 if id in self._pcache:
1377 if id in self._pcache:
1378 return self._pcache[id]
1378 return self._pcache[id]
1379
1379
1380 if len(id) <= 40:
1380 if len(id) <= 40:
1381 try:
1381 try:
1382 # hex(node)[:...]
1382 # hex(node)[:...]
1383 l = len(id) // 2 # grab an even number of digits
1383 l = len(id) // 2 # grab an even number of digits
1384 prefix = bin(id[: l * 2])
1384 prefix = bin(id[: l * 2])
1385 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1385 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1386 nl = [
1386 nl = [
1387 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1387 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1388 ]
1388 ]
1389 if nullhex.startswith(id):
1389 if nullhex.startswith(id):
1390 nl.append(nullid)
1390 nl.append(nullid)
1391 if len(nl) > 0:
1391 if len(nl) > 0:
1392 if len(nl) == 1 and not maybewdir:
1392 if len(nl) == 1 and not maybewdir:
1393 self._pcache[id] = nl[0]
1393 self._pcache[id] = nl[0]
1394 return nl[0]
1394 return nl[0]
1395 raise error.AmbiguousPrefixLookupError(
1395 raise error.AmbiguousPrefixLookupError(
1396 id, self.indexfile, _(b'ambiguous identifier')
1396 id, self.indexfile, _(b'ambiguous identifier')
1397 )
1397 )
1398 if maybewdir:
1398 if maybewdir:
1399 raise error.WdirUnsupported
1399 raise error.WdirUnsupported
1400 return None
1400 return None
1401 except TypeError:
1401 except TypeError:
1402 pass
1402 pass
1403
1403
1404 def lookup(self, id):
1404 def lookup(self, id):
1405 """locate a node based on:
1405 """locate a node based on:
1406 - revision number or str(revision number)
1406 - revision number or str(revision number)
1407 - nodeid or subset of hex nodeid
1407 - nodeid or subset of hex nodeid
1408 """
1408 """
1409 n = self._match(id)
1409 n = self._match(id)
1410 if n is not None:
1410 if n is not None:
1411 return n
1411 return n
1412 n = self._partialmatch(id)
1412 n = self._partialmatch(id)
1413 if n:
1413 if n:
1414 return n
1414 return n
1415
1415
1416 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1416 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1417
1417
1418 def shortest(self, node, minlength=1):
1418 def shortest(self, node, minlength=1):
1419 """Find the shortest unambiguous prefix that matches node."""
1419 """Find the shortest unambiguous prefix that matches node."""
1420
1420
1421 def isvalid(prefix):
1421 def isvalid(prefix):
1422 try:
1422 try:
1423 matchednode = self._partialmatch(prefix)
1423 matchednode = self._partialmatch(prefix)
1424 except error.AmbiguousPrefixLookupError:
1424 except error.AmbiguousPrefixLookupError:
1425 return False
1425 return False
1426 except error.WdirUnsupported:
1426 except error.WdirUnsupported:
1427 # single 'ff...' match
1427 # single 'ff...' match
1428 return True
1428 return True
1429 if matchednode is None:
1429 if matchednode is None:
1430 raise error.LookupError(node, self.indexfile, _(b'no node'))
1430 raise error.LookupError(node, self.indexfile, _(b'no node'))
1431 return True
1431 return True
1432
1432
1433 def maybewdir(prefix):
1433 def maybewdir(prefix):
1434 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1434 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1435
1435
1436 hexnode = hex(node)
1436 hexnode = hex(node)
1437
1437
1438 def disambiguate(hexnode, minlength):
1438 def disambiguate(hexnode, minlength):
1439 """Disambiguate against wdirid."""
1439 """Disambiguate against wdirid."""
1440 for length in range(minlength, 41):
1440 for length in range(minlength, 41):
1441 prefix = hexnode[:length]
1441 prefix = hexnode[:length]
1442 if not maybewdir(prefix):
1442 if not maybewdir(prefix):
1443 return prefix
1443 return prefix
1444
1444
1445 if not getattr(self, 'filteredrevs', None):
1445 if not getattr(self, 'filteredrevs', None):
1446 try:
1446 try:
1447 length = max(self.index.shortest(node), minlength)
1447 length = max(self.index.shortest(node), minlength)
1448 return disambiguate(hexnode, length)
1448 return disambiguate(hexnode, length)
1449 except error.RevlogError:
1449 except error.RevlogError:
1450 if node != wdirid:
1450 if node != wdirid:
1451 raise error.LookupError(node, self.indexfile, _(b'no node'))
1451 raise error.LookupError(node, self.indexfile, _(b'no node'))
1452 except AttributeError:
1452 except AttributeError:
1453 # Fall through to pure code
1453 # Fall through to pure code
1454 pass
1454 pass
1455
1455
1456 if node == wdirid:
1456 if node == wdirid:
1457 for length in range(minlength, 41):
1457 for length in range(minlength, 41):
1458 prefix = hexnode[:length]
1458 prefix = hexnode[:length]
1459 if isvalid(prefix):
1459 if isvalid(prefix):
1460 return prefix
1460 return prefix
1461
1461
1462 for length in range(minlength, 41):
1462 for length in range(minlength, 41):
1463 prefix = hexnode[:length]
1463 prefix = hexnode[:length]
1464 if isvalid(prefix):
1464 if isvalid(prefix):
1465 return disambiguate(hexnode, length)
1465 return disambiguate(hexnode, length)
1466
1466
1467 def cmp(self, node, text):
1467 def cmp(self, node, text):
1468 """compare text with a given file revision
1468 """compare text with a given file revision
1469
1469
1470 returns True if text is different than what is stored.
1470 returns True if text is different than what is stored.
1471 """
1471 """
1472 p1, p2 = self.parents(node)
1472 p1, p2 = self.parents(node)
1473 return storageutil.hashrevisionsha1(text, p1, p2) != node
1473 return storageutil.hashrevisionsha1(text, p1, p2) != node
1474
1474
1475 def _cachesegment(self, offset, data):
1475 def _cachesegment(self, offset, data):
1476 """Add a segment to the revlog cache.
1476 """Add a segment to the revlog cache.
1477
1477
1478 Accepts an absolute offset and the data that is at that location.
1478 Accepts an absolute offset and the data that is at that location.
1479 """
1479 """
1480 o, d = self._chunkcache
1480 o, d = self._chunkcache
1481 # try to add to existing cache
1481 # try to add to existing cache
1482 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1482 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1483 self._chunkcache = o, d + data
1483 self._chunkcache = o, d + data
1484 else:
1484 else:
1485 self._chunkcache = offset, data
1485 self._chunkcache = offset, data
1486
1486
1487 def _readsegment(self, offset, length, df=None):
1487 def _readsegment(self, offset, length, df=None):
1488 """Load a segment of raw data from the revlog.
1488 """Load a segment of raw data from the revlog.
1489
1489
1490 Accepts an absolute offset, length to read, and an optional existing
1490 Accepts an absolute offset, length to read, and an optional existing
1491 file handle to read from.
1491 file handle to read from.
1492
1492
1493 If an existing file handle is passed, it will be seeked and the
1493 If an existing file handle is passed, it will be seeked and the
1494 original seek position will NOT be restored.
1494 original seek position will NOT be restored.
1495
1495
1496 Returns a str or buffer of raw byte data.
1496 Returns a str or buffer of raw byte data.
1497
1497
1498 Raises if the requested number of bytes could not be read.
1498 Raises if the requested number of bytes could not be read.
1499 """
1499 """
1500 # Cache data both forward and backward around the requested
1500 # Cache data both forward and backward around the requested
1501 # data, in a fixed size window. This helps speed up operations
1501 # data, in a fixed size window. This helps speed up operations
1502 # involving reading the revlog backwards.
1502 # involving reading the revlog backwards.
1503 cachesize = self._chunkcachesize
1503 cachesize = self._chunkcachesize
1504 realoffset = offset & ~(cachesize - 1)
1504 realoffset = offset & ~(cachesize - 1)
1505 reallength = (
1505 reallength = (
1506 (offset + length + cachesize) & ~(cachesize - 1)
1506 (offset + length + cachesize) & ~(cachesize - 1)
1507 ) - realoffset
1507 ) - realoffset
1508 with self._datareadfp(df) as df:
1508 with self._datareadfp(df) as df:
1509 df.seek(realoffset)
1509 df.seek(realoffset)
1510 d = df.read(reallength)
1510 d = df.read(reallength)
1511
1511
1512 self._cachesegment(realoffset, d)
1512 self._cachesegment(realoffset, d)
1513 if offset != realoffset or reallength != length:
1513 if offset != realoffset or reallength != length:
1514 startoffset = offset - realoffset
1514 startoffset = offset - realoffset
1515 if len(d) - startoffset < length:
1515 if len(d) - startoffset < length:
1516 raise error.RevlogError(
1516 raise error.RevlogError(
1517 _(
1517 _(
1518 b'partial read of revlog %s; expected %d bytes from '
1518 b'partial read of revlog %s; expected %d bytes from '
1519 b'offset %d, got %d'
1519 b'offset %d, got %d'
1520 )
1520 )
1521 % (
1521 % (
1522 self.indexfile if self._inline else self.datafile,
1522 self.indexfile if self._inline else self.datafile,
1523 length,
1523 length,
1524 realoffset,
1524 realoffset,
1525 len(d) - startoffset,
1525 len(d) - startoffset,
1526 )
1526 )
1527 )
1527 )
1528
1528
1529 return util.buffer(d, startoffset, length)
1529 return util.buffer(d, startoffset, length)
1530
1530
1531 if len(d) < length:
1531 if len(d) < length:
1532 raise error.RevlogError(
1532 raise error.RevlogError(
1533 _(
1533 _(
1534 b'partial read of revlog %s; expected %d bytes from offset '
1534 b'partial read of revlog %s; expected %d bytes from offset '
1535 b'%d, got %d'
1535 b'%d, got %d'
1536 )
1536 )
1537 % (
1537 % (
1538 self.indexfile if self._inline else self.datafile,
1538 self.indexfile if self._inline else self.datafile,
1539 length,
1539 length,
1540 offset,
1540 offset,
1541 len(d),
1541 len(d),
1542 )
1542 )
1543 )
1543 )
1544
1544
1545 return d
1545 return d
1546
1546
1547 def _getsegment(self, offset, length, df=None):
1547 def _getsegment(self, offset, length, df=None):
1548 """Obtain a segment of raw data from the revlog.
1548 """Obtain a segment of raw data from the revlog.
1549
1549
1550 Accepts an absolute offset, length of bytes to obtain, and an
1550 Accepts an absolute offset, length of bytes to obtain, and an
1551 optional file handle to the already-opened revlog. If the file
1551 optional file handle to the already-opened revlog. If the file
1552 handle is used, it's original seek position will not be preserved.
1552 handle is used, it's original seek position will not be preserved.
1553
1553
1554 Requests for data may be returned from a cache.
1554 Requests for data may be returned from a cache.
1555
1555
1556 Returns a str or a buffer instance of raw byte data.
1556 Returns a str or a buffer instance of raw byte data.
1557 """
1557 """
1558 o, d = self._chunkcache
1558 o, d = self._chunkcache
1559 l = len(d)
1559 l = len(d)
1560
1560
1561 # is it in the cache?
1561 # is it in the cache?
1562 cachestart = offset - o
1562 cachestart = offset - o
1563 cacheend = cachestart + length
1563 cacheend = cachestart + length
1564 if cachestart >= 0 and cacheend <= l:
1564 if cachestart >= 0 and cacheend <= l:
1565 if cachestart == 0 and cacheend == l:
1565 if cachestart == 0 and cacheend == l:
1566 return d # avoid a copy
1566 return d # avoid a copy
1567 return util.buffer(d, cachestart, cacheend - cachestart)
1567 return util.buffer(d, cachestart, cacheend - cachestart)
1568
1568
1569 return self._readsegment(offset, length, df=df)
1569 return self._readsegment(offset, length, df=df)
1570
1570
1571 def _getsegmentforrevs(self, startrev, endrev, df=None):
1571 def _getsegmentforrevs(self, startrev, endrev, df=None):
1572 """Obtain a segment of raw data corresponding to a range of revisions.
1572 """Obtain a segment of raw data corresponding to a range of revisions.
1573
1573
1574 Accepts the start and end revisions and an optional already-open
1574 Accepts the start and end revisions and an optional already-open
1575 file handle to be used for reading. If the file handle is read, its
1575 file handle to be used for reading. If the file handle is read, its
1576 seek position will not be preserved.
1576 seek position will not be preserved.
1577
1577
1578 Requests for data may be satisfied by a cache.
1578 Requests for data may be satisfied by a cache.
1579
1579
1580 Returns a 2-tuple of (offset, data) for the requested range of
1580 Returns a 2-tuple of (offset, data) for the requested range of
1581 revisions. Offset is the integer offset from the beginning of the
1581 revisions. Offset is the integer offset from the beginning of the
1582 revlog and data is a str or buffer of the raw byte data.
1582 revlog and data is a str or buffer of the raw byte data.
1583
1583
1584 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1584 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1585 to determine where each revision's data begins and ends.
1585 to determine where each revision's data begins and ends.
1586 """
1586 """
1587 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1587 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1588 # (functions are expensive).
1588 # (functions are expensive).
1589 index = self.index
1589 index = self.index
1590 istart = index[startrev]
1590 istart = index[startrev]
1591 start = int(istart[0] >> 16)
1591 start = int(istart[0] >> 16)
1592 if startrev == endrev:
1592 if startrev == endrev:
1593 end = start + istart[1]
1593 end = start + istart[1]
1594 else:
1594 else:
1595 iend = index[endrev]
1595 iend = index[endrev]
1596 end = int(iend[0] >> 16) + iend[1]
1596 end = int(iend[0] >> 16) + iend[1]
1597
1597
1598 if self._inline:
1598 if self._inline:
1599 start += (startrev + 1) * self._io.size
1599 start += (startrev + 1) * self._io.size
1600 end += (endrev + 1) * self._io.size
1600 end += (endrev + 1) * self._io.size
1601 length = end - start
1601 length = end - start
1602
1602
1603 return start, self._getsegment(start, length, df=df)
1603 return start, self._getsegment(start, length, df=df)
1604
1604
1605 def _chunk(self, rev, df=None):
1605 def _chunk(self, rev, df=None):
1606 """Obtain a single decompressed chunk for a revision.
1606 """Obtain a single decompressed chunk for a revision.
1607
1607
1608 Accepts an integer revision and an optional already-open file handle
1608 Accepts an integer revision and an optional already-open file handle
1609 to be used for reading. If used, the seek position of the file will not
1609 to be used for reading. If used, the seek position of the file will not
1610 be preserved.
1610 be preserved.
1611
1611
1612 Returns a str holding uncompressed data for the requested revision.
1612 Returns a str holding uncompressed data for the requested revision.
1613 """
1613 """
1614 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1614 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1615
1615
1616 def _chunks(self, revs, df=None, targetsize=None):
1616 def _chunks(self, revs, df=None, targetsize=None):
1617 """Obtain decompressed chunks for the specified revisions.
1617 """Obtain decompressed chunks for the specified revisions.
1618
1618
1619 Accepts an iterable of numeric revisions that are assumed to be in
1619 Accepts an iterable of numeric revisions that are assumed to be in
1620 ascending order. Also accepts an optional already-open file handle
1620 ascending order. Also accepts an optional already-open file handle
1621 to be used for reading. If used, the seek position of the file will
1621 to be used for reading. If used, the seek position of the file will
1622 not be preserved.
1622 not be preserved.
1623
1623
1624 This function is similar to calling ``self._chunk()`` multiple times,
1624 This function is similar to calling ``self._chunk()`` multiple times,
1625 but is faster.
1625 but is faster.
1626
1626
1627 Returns a list with decompressed data for each requested revision.
1627 Returns a list with decompressed data for each requested revision.
1628 """
1628 """
1629 if not revs:
1629 if not revs:
1630 return []
1630 return []
1631 start = self.start
1631 start = self.start
1632 length = self.length
1632 length = self.length
1633 inline = self._inline
1633 inline = self._inline
1634 iosize = self._io.size
1634 iosize = self._io.size
1635 buffer = util.buffer
1635 buffer = util.buffer
1636
1636
1637 l = []
1637 l = []
1638 ladd = l.append
1638 ladd = l.append
1639
1639
1640 if not self._withsparseread:
1640 if not self._withsparseread:
1641 slicedchunks = (revs,)
1641 slicedchunks = (revs,)
1642 else:
1642 else:
1643 slicedchunks = deltautil.slicechunk(
1643 slicedchunks = deltautil.slicechunk(
1644 self, revs, targetsize=targetsize
1644 self, revs, targetsize=targetsize
1645 )
1645 )
1646
1646
1647 for revschunk in slicedchunks:
1647 for revschunk in slicedchunks:
1648 firstrev = revschunk[0]
1648 firstrev = revschunk[0]
1649 # Skip trailing revisions with empty diff
1649 # Skip trailing revisions with empty diff
1650 for lastrev in revschunk[::-1]:
1650 for lastrev in revschunk[::-1]:
1651 if length(lastrev) != 0:
1651 if length(lastrev) != 0:
1652 break
1652 break
1653
1653
1654 try:
1654 try:
1655 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1655 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1656 except OverflowError:
1656 except OverflowError:
1657 # issue4215 - we can't cache a run of chunks greater than
1657 # issue4215 - we can't cache a run of chunks greater than
1658 # 2G on Windows
1658 # 2G on Windows
1659 return [self._chunk(rev, df=df) for rev in revschunk]
1659 return [self._chunk(rev, df=df) for rev in revschunk]
1660
1660
1661 decomp = self.decompress
1661 decomp = self.decompress
1662 for rev in revschunk:
1662 for rev in revschunk:
1663 chunkstart = start(rev)
1663 chunkstart = start(rev)
1664 if inline:
1664 if inline:
1665 chunkstart += (rev + 1) * iosize
1665 chunkstart += (rev + 1) * iosize
1666 chunklength = length(rev)
1666 chunklength = length(rev)
1667 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1667 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1668
1668
1669 return l
1669 return l
1670
1670
1671 def _chunkclear(self):
1671 def _chunkclear(self):
1672 """Clear the raw chunk cache."""
1672 """Clear the raw chunk cache."""
1673 self._chunkcache = (0, b'')
1673 self._chunkcache = (0, b'')
1674
1674
1675 def deltaparent(self, rev):
1675 def deltaparent(self, rev):
1676 """return deltaparent of the given revision"""
1676 """return deltaparent of the given revision"""
1677 base = self.index[rev][3]
1677 base = self.index[rev][3]
1678 if base == rev:
1678 if base == rev:
1679 return nullrev
1679 return nullrev
1680 elif self._generaldelta:
1680 elif self._generaldelta:
1681 return base
1681 return base
1682 else:
1682 else:
1683 return rev - 1
1683 return rev - 1
1684
1684
1685 def issnapshot(self, rev):
1685 def issnapshot(self, rev):
1686 """tells whether rev is a snapshot
1686 """tells whether rev is a snapshot
1687 """
1687 """
1688 if not self._sparserevlog:
1688 if not self._sparserevlog:
1689 return self.deltaparent(rev) == nullrev
1689 return self.deltaparent(rev) == nullrev
1690 elif util.safehasattr(self.index, b'issnapshot'):
1690 elif util.safehasattr(self.index, b'issnapshot'):
1691 # directly assign the method to cache the testing and access
1691 # directly assign the method to cache the testing and access
1692 self.issnapshot = self.index.issnapshot
1692 self.issnapshot = self.index.issnapshot
1693 return self.issnapshot(rev)
1693 return self.issnapshot(rev)
1694 if rev == nullrev:
1694 if rev == nullrev:
1695 return True
1695 return True
1696 entry = self.index[rev]
1696 entry = self.index[rev]
1697 base = entry[3]
1697 base = entry[3]
1698 if base == rev:
1698 if base == rev:
1699 return True
1699 return True
1700 if base == nullrev:
1700 if base == nullrev:
1701 return True
1701 return True
1702 p1 = entry[5]
1702 p1 = entry[5]
1703 p2 = entry[6]
1703 p2 = entry[6]
1704 if base == p1 or base == p2:
1704 if base == p1 or base == p2:
1705 return False
1705 return False
1706 return self.issnapshot(base)
1706 return self.issnapshot(base)
1707
1707
1708 def snapshotdepth(self, rev):
1708 def snapshotdepth(self, rev):
1709 """number of snapshot in the chain before this one"""
1709 """number of snapshot in the chain before this one"""
1710 if not self.issnapshot(rev):
1710 if not self.issnapshot(rev):
1711 raise error.ProgrammingError(b'revision %d not a snapshot')
1711 raise error.ProgrammingError(b'revision %d not a snapshot')
1712 return len(self._deltachain(rev)[0]) - 1
1712 return len(self._deltachain(rev)[0]) - 1
1713
1713
1714 def revdiff(self, rev1, rev2):
1714 def revdiff(self, rev1, rev2):
1715 """return or calculate a delta between two revisions
1715 """return or calculate a delta between two revisions
1716
1716
1717 The delta calculated is in binary form and is intended to be written to
1717 The delta calculated is in binary form and is intended to be written to
1718 revlog data directly. So this function needs raw revision data.
1718 revlog data directly. So this function needs raw revision data.
1719 """
1719 """
1720 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1720 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1721 return bytes(self._chunk(rev2))
1721 return bytes(self._chunk(rev2))
1722
1722
1723 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1723 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1724
1724
1725 def _processflags(self, text, flags, operation, raw=False):
1725 def _processflags(self, text, flags, operation, raw=False):
1726 """deprecated entry point to access flag processors"""
1726 """deprecated entry point to access flag processors"""
1727 msg = b'_processflag(...) use the specialized variant'
1727 msg = b'_processflag(...) use the specialized variant'
1728 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1728 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1729 if raw:
1729 if raw:
1730 return text, flagutil.processflagsraw(self, text, flags)
1730 return text, flagutil.processflagsraw(self, text, flags)
1731 elif operation == b'read':
1731 elif operation == b'read':
1732 return flagutil.processflagsread(self, text, flags)
1732 return flagutil.processflagsread(self, text, flags)
1733 else: # write operation
1733 else: # write operation
1734 return flagutil.processflagswrite(self, text, flags)
1734 return flagutil.processflagswrite(self, text, flags)
1735
1735
1736 def revision(self, nodeorrev, _df=None, raw=False):
1736 def revision(self, nodeorrev, _df=None, raw=False):
1737 """return an uncompressed revision of a given node or revision
1737 """return an uncompressed revision of a given node or revision
1738 number.
1738 number.
1739
1739
1740 _df - an existing file handle to read from. (internal-only)
1740 _df - an existing file handle to read from. (internal-only)
1741 raw - an optional argument specifying if the revision data is to be
1741 raw - an optional argument specifying if the revision data is to be
1742 treated as raw data when applying flag transforms. 'raw' should be set
1742 treated as raw data when applying flag transforms. 'raw' should be set
1743 to True when generating changegroups or in debug commands.
1743 to True when generating changegroups or in debug commands.
1744 """
1744 """
1745 if raw:
1745 if raw:
1746 msg = (
1746 msg = (
1747 b'revlog.revision(..., raw=True) is deprecated, '
1747 b'revlog.revision(..., raw=True) is deprecated, '
1748 b'use revlog.rawdata(...)'
1748 b'use revlog.rawdata(...)'
1749 )
1749 )
1750 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1750 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1751 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1751 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1752
1752
1753 def sidedata(self, nodeorrev, _df=None):
1753 def sidedata(self, nodeorrev, _df=None):
1754 """a map of extra data related to the changeset but not part of the hash
1754 """a map of extra data related to the changeset but not part of the hash
1755
1755
1756 This function currently return a dictionary. However, more advanced
1756 This function currently return a dictionary. However, more advanced
1757 mapping object will likely be used in the future for a more
1757 mapping object will likely be used in the future for a more
1758 efficient/lazy code.
1758 efficient/lazy code.
1759 """
1759 """
1760 return self._revisiondata(nodeorrev, _df)[1]
1760 return self._revisiondata(nodeorrev, _df)[1]
1761
1761
1762 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1762 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1763 # deal with <nodeorrev> argument type
1763 # deal with <nodeorrev> argument type
1764 if isinstance(nodeorrev, int):
1764 if isinstance(nodeorrev, int):
1765 rev = nodeorrev
1765 rev = nodeorrev
1766 node = self.node(rev)
1766 node = self.node(rev)
1767 else:
1767 else:
1768 node = nodeorrev
1768 node = nodeorrev
1769 rev = None
1769 rev = None
1770
1770
1771 # fast path the special `nullid` rev
1771 # fast path the special `nullid` rev
1772 if node == nullid:
1772 if node == nullid:
1773 return b"", {}
1773 return b"", {}
1774
1774
1775 # The text as stored inside the revlog. Might be the revision or might
1775 # ``rawtext`` is the text as stored inside the revlog. Might be the
1776 # need to be processed to retrieve the revision.
1776 # revision or might need to be processed to retrieve the revision.
1777 rawtext = None
1778
1779 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1777 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1780
1778
1781 if raw and validated:
1779 if raw and validated:
1782 # if we don't want to process the raw text and that raw
1780 # if we don't want to process the raw text and that raw
1783 # text is cached, we can exit early.
1781 # text is cached, we can exit early.
1784 return rawtext, {}
1782 return rawtext, {}
1785 if rev is None:
1783 if rev is None:
1786 rev = self.rev(node)
1784 rev = self.rev(node)
1787 # the revlog's flag for this revision
1785 # the revlog's flag for this revision
1788 # (usually alter its state or content)
1786 # (usually alter its state or content)
1789 flags = self.flags(rev)
1787 flags = self.flags(rev)
1790
1788
1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1789 if validated and flags == REVIDX_DEFAULT_FLAGS:
1792 # no extra flags set, no flag processor runs, text = rawtext
1790 # no extra flags set, no flag processor runs, text = rawtext
1793 return rawtext, {}
1791 return rawtext, {}
1794
1792
1795 sidedata = {}
1793 sidedata = {}
1796 if raw:
1794 if raw:
1797 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1795 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1798 text = rawtext
1796 text = rawtext
1799 else:
1797 else:
1800 try:
1798 try:
1801 r = flagutil.processflagsread(self, rawtext, flags)
1799 r = flagutil.processflagsread(self, rawtext, flags)
1802 except error.SidedataHashError as exc:
1800 except error.SidedataHashError as exc:
1803 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1801 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1804 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1802 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1805 raise error.RevlogError(msg)
1803 raise error.RevlogError(msg)
1806 text, validatehash, sidedata = r
1804 text, validatehash, sidedata = r
1807 if validatehash:
1805 if validatehash:
1808 self.checkhash(text, node, rev=rev)
1806 self.checkhash(text, node, rev=rev)
1809 if not validated:
1807 if not validated:
1810 self._revisioncache = (node, rev, rawtext)
1808 self._revisioncache = (node, rev, rawtext)
1811
1809
1812 return text, sidedata
1810 return text, sidedata
1813
1811
1814 def _rawtext(self, node, rev, _df=None):
1812 def _rawtext(self, node, rev, _df=None):
1815 """return the possibly unvalidated rawtext for a revision
1813 """return the possibly unvalidated rawtext for a revision
1816
1814
1817 returns (rev, rawtext, validated)
1815 returns (rev, rawtext, validated)
1818 """
1816 """
1819
1817
1820 # revision in the cache (could be useful to apply delta)
1818 # revision in the cache (could be useful to apply delta)
1821 cachedrev = None
1819 cachedrev = None
1822 # An intermediate text to apply deltas to
1820 # An intermediate text to apply deltas to
1823 basetext = None
1821 basetext = None
1824
1822
1825 # Check if we have the entry in cache
1823 # Check if we have the entry in cache
1826 # The cache entry looks like (node, rev, rawtext)
1824 # The cache entry looks like (node, rev, rawtext)
1827 if self._revisioncache:
1825 if self._revisioncache:
1828 if self._revisioncache[0] == node:
1826 if self._revisioncache[0] == node:
1829 return (rev, self._revisioncache[2], True)
1827 return (rev, self._revisioncache[2], True)
1830 cachedrev = self._revisioncache[1]
1828 cachedrev = self._revisioncache[1]
1831
1829
1832 if rev is None:
1830 if rev is None:
1833 rev = self.rev(node)
1831 rev = self.rev(node)
1834
1832
1835 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1833 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1836 if stopped:
1834 if stopped:
1837 basetext = self._revisioncache[2]
1835 basetext = self._revisioncache[2]
1838
1836
1839 # drop cache to save memory, the caller is expected to
1837 # drop cache to save memory, the caller is expected to
1840 # update self._revisioncache after validating the text
1838 # update self._revisioncache after validating the text
1841 self._revisioncache = None
1839 self._revisioncache = None
1842
1840
1843 targetsize = None
1841 targetsize = None
1844 rawsize = self.index[rev][2]
1842 rawsize = self.index[rev][2]
1845 if 0 <= rawsize:
1843 if 0 <= rawsize:
1846 targetsize = 4 * rawsize
1844 targetsize = 4 * rawsize
1847
1845
1848 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1846 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1849 if basetext is None:
1847 if basetext is None:
1850 basetext = bytes(bins[0])
1848 basetext = bytes(bins[0])
1851 bins = bins[1:]
1849 bins = bins[1:]
1852
1850
1853 rawtext = mdiff.patches(basetext, bins)
1851 rawtext = mdiff.patches(basetext, bins)
1854 del basetext # let us have a chance to free memory early
1852 del basetext # let us have a chance to free memory early
1855 return (rev, rawtext, False)
1853 return (rev, rawtext, False)
1856
1854
1857 def rawdata(self, nodeorrev, _df=None):
1855 def rawdata(self, nodeorrev, _df=None):
1858 """return an uncompressed raw data of a given node or revision number.
1856 """return an uncompressed raw data of a given node or revision number.
1859
1857
1860 _df - an existing file handle to read from. (internal-only)
1858 _df - an existing file handle to read from. (internal-only)
1861 """
1859 """
1862 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1860 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1863
1861
1864 def hash(self, text, p1, p2):
1862 def hash(self, text, p1, p2):
1865 """Compute a node hash.
1863 """Compute a node hash.
1866
1864
1867 Available as a function so that subclasses can replace the hash
1865 Available as a function so that subclasses can replace the hash
1868 as needed.
1866 as needed.
1869 """
1867 """
1870 return storageutil.hashrevisionsha1(text, p1, p2)
1868 return storageutil.hashrevisionsha1(text, p1, p2)
1871
1869
1872 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1870 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1873 """Check node hash integrity.
1871 """Check node hash integrity.
1874
1872
1875 Available as a function so that subclasses can extend hash mismatch
1873 Available as a function so that subclasses can extend hash mismatch
1876 behaviors as needed.
1874 behaviors as needed.
1877 """
1875 """
1878 try:
1876 try:
1879 if p1 is None and p2 is None:
1877 if p1 is None and p2 is None:
1880 p1, p2 = self.parents(node)
1878 p1, p2 = self.parents(node)
1881 if node != self.hash(text, p1, p2):
1879 if node != self.hash(text, p1, p2):
1882 # Clear the revision cache on hash failure. The revision cache
1880 # Clear the revision cache on hash failure. The revision cache
1883 # only stores the raw revision and clearing the cache does have
1881 # only stores the raw revision and clearing the cache does have
1884 # the side-effect that we won't have a cache hit when the raw
1882 # the side-effect that we won't have a cache hit when the raw
1885 # revision data is accessed. But this case should be rare and
1883 # revision data is accessed. But this case should be rare and
1886 # it is extra work to teach the cache about the hash
1884 # it is extra work to teach the cache about the hash
1887 # verification state.
1885 # verification state.
1888 if self._revisioncache and self._revisioncache[0] == node:
1886 if self._revisioncache and self._revisioncache[0] == node:
1889 self._revisioncache = None
1887 self._revisioncache = None
1890
1888
1891 revornode = rev
1889 revornode = rev
1892 if revornode is None:
1890 if revornode is None:
1893 revornode = templatefilters.short(hex(node))
1891 revornode = templatefilters.short(hex(node))
1894 raise error.RevlogError(
1892 raise error.RevlogError(
1895 _(b"integrity check failed on %s:%s")
1893 _(b"integrity check failed on %s:%s")
1896 % (self.indexfile, pycompat.bytestr(revornode))
1894 % (self.indexfile, pycompat.bytestr(revornode))
1897 )
1895 )
1898 except error.RevlogError:
1896 except error.RevlogError:
1899 if self._censorable and storageutil.iscensoredtext(text):
1897 if self._censorable and storageutil.iscensoredtext(text):
1900 raise error.CensoredNodeError(self.indexfile, node, text)
1898 raise error.CensoredNodeError(self.indexfile, node, text)
1901 raise
1899 raise
1902
1900
1903 def _enforceinlinesize(self, tr, fp=None):
1901 def _enforceinlinesize(self, tr, fp=None):
1904 """Check if the revlog is too big for inline and convert if so.
1902 """Check if the revlog is too big for inline and convert if so.
1905
1903
1906 This should be called after revisions are added to the revlog. If the
1904 This should be called after revisions are added to the revlog. If the
1907 revlog has grown too large to be an inline revlog, it will convert it
1905 revlog has grown too large to be an inline revlog, it will convert it
1908 to use multiple index and data files.
1906 to use multiple index and data files.
1909 """
1907 """
1910 tiprev = len(self) - 1
1908 tiprev = len(self) - 1
1911 if (
1909 if (
1912 not self._inline
1910 not self._inline
1913 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1911 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1914 ):
1912 ):
1915 return
1913 return
1916
1914
1917 trinfo = tr.find(self.indexfile)
1915 trinfo = tr.find(self.indexfile)
1918 if trinfo is None:
1916 if trinfo is None:
1919 raise error.RevlogError(
1917 raise error.RevlogError(
1920 _(b"%s not found in the transaction") % self.indexfile
1918 _(b"%s not found in the transaction") % self.indexfile
1921 )
1919 )
1922
1920
1923 trindex = trinfo[2]
1921 trindex = trinfo[2]
1924 if trindex is not None:
1922 if trindex is not None:
1925 dataoff = self.start(trindex)
1923 dataoff = self.start(trindex)
1926 else:
1924 else:
1927 # revlog was stripped at start of transaction, use all leftover data
1925 # revlog was stripped at start of transaction, use all leftover data
1928 trindex = len(self) - 1
1926 trindex = len(self) - 1
1929 dataoff = self.end(tiprev)
1927 dataoff = self.end(tiprev)
1930
1928
1931 tr.add(self.datafile, dataoff)
1929 tr.add(self.datafile, dataoff)
1932
1930
1933 if fp:
1931 if fp:
1934 fp.flush()
1932 fp.flush()
1935 fp.close()
1933 fp.close()
1936 # We can't use the cached file handle after close(). So prevent
1934 # We can't use the cached file handle after close(). So prevent
1937 # its usage.
1935 # its usage.
1938 self._writinghandles = None
1936 self._writinghandles = None
1939
1937
1940 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1938 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 for r in self:
1939 for r in self:
1942 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1940 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943
1941
1944 with self._indexfp(b'w') as fp:
1942 with self._indexfp(b'w') as fp:
1945 self.version &= ~FLAG_INLINE_DATA
1943 self.version &= ~FLAG_INLINE_DATA
1946 self._inline = False
1944 self._inline = False
1947 io = self._io
1945 io = self._io
1948 for i in self:
1946 for i in self:
1949 e = io.packentry(self.index[i], self.node, self.version, i)
1947 e = io.packentry(self.index[i], self.node, self.version, i)
1950 fp.write(e)
1948 fp.write(e)
1951
1949
1952 # the temp file replace the real index when we exit the context
1950 # the temp file replace the real index when we exit the context
1953 # manager
1951 # manager
1954
1952
1955 tr.replace(self.indexfile, trindex * self._io.size)
1953 tr.replace(self.indexfile, trindex * self._io.size)
1956 self._chunkclear()
1954 self._chunkclear()
1957
1955
1958 def _nodeduplicatecallback(self, transaction, node):
1956 def _nodeduplicatecallback(self, transaction, node):
1959 """called when trying to add a node already stored.
1957 """called when trying to add a node already stored.
1960 """
1958 """
1961
1959
1962 def addrevision(
1960 def addrevision(
1963 self,
1961 self,
1964 text,
1962 text,
1965 transaction,
1963 transaction,
1966 link,
1964 link,
1967 p1,
1965 p1,
1968 p2,
1966 p2,
1969 cachedelta=None,
1967 cachedelta=None,
1970 node=None,
1968 node=None,
1971 flags=REVIDX_DEFAULT_FLAGS,
1969 flags=REVIDX_DEFAULT_FLAGS,
1972 deltacomputer=None,
1970 deltacomputer=None,
1973 sidedata=None,
1971 sidedata=None,
1974 ):
1972 ):
1975 """add a revision to the log
1973 """add a revision to the log
1976
1974
1977 text - the revision data to add
1975 text - the revision data to add
1978 transaction - the transaction object used for rollback
1976 transaction - the transaction object used for rollback
1979 link - the linkrev data to add
1977 link - the linkrev data to add
1980 p1, p2 - the parent nodeids of the revision
1978 p1, p2 - the parent nodeids of the revision
1981 cachedelta - an optional precomputed delta
1979 cachedelta - an optional precomputed delta
1982 node - nodeid of revision; typically node is not specified, and it is
1980 node - nodeid of revision; typically node is not specified, and it is
1983 computed by default as hash(text, p1, p2), however subclasses might
1981 computed by default as hash(text, p1, p2), however subclasses might
1984 use different hashing method (and override checkhash() in such case)
1982 use different hashing method (and override checkhash() in such case)
1985 flags - the known flags to set on the revision
1983 flags - the known flags to set on the revision
1986 deltacomputer - an optional deltacomputer instance shared between
1984 deltacomputer - an optional deltacomputer instance shared between
1987 multiple calls
1985 multiple calls
1988 """
1986 """
1989 if link == nullrev:
1987 if link == nullrev:
1990 raise error.RevlogError(
1988 raise error.RevlogError(
1991 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1989 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1992 )
1990 )
1993
1991
1994 if sidedata is None:
1992 if sidedata is None:
1995 sidedata = {}
1993 sidedata = {}
1996 flags = flags & ~REVIDX_SIDEDATA
1994 flags = flags & ~REVIDX_SIDEDATA
1997 elif not self.hassidedata:
1995 elif not self.hassidedata:
1998 raise error.ProgrammingError(
1996 raise error.ProgrammingError(
1999 _(b"trying to add sidedata to a revlog who don't support them")
1997 _(b"trying to add sidedata to a revlog who don't support them")
2000 )
1998 )
2001 else:
1999 else:
2002 flags |= REVIDX_SIDEDATA
2000 flags |= REVIDX_SIDEDATA
2003
2001
2004 if flags:
2002 if flags:
2005 node = node or self.hash(text, p1, p2)
2003 node = node or self.hash(text, p1, p2)
2006
2004
2007 rawtext, validatehash = flagutil.processflagswrite(
2005 rawtext, validatehash = flagutil.processflagswrite(
2008 self, text, flags, sidedata=sidedata
2006 self, text, flags, sidedata=sidedata
2009 )
2007 )
2010
2008
2011 # If the flag processor modifies the revision data, ignore any provided
2009 # If the flag processor modifies the revision data, ignore any provided
2012 # cachedelta.
2010 # cachedelta.
2013 if rawtext != text:
2011 if rawtext != text:
2014 cachedelta = None
2012 cachedelta = None
2015
2013
2016 if len(rawtext) > _maxentrysize:
2014 if len(rawtext) > _maxentrysize:
2017 raise error.RevlogError(
2015 raise error.RevlogError(
2018 _(
2016 _(
2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2017 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2020 )
2018 )
2021 % (self.indexfile, len(rawtext))
2019 % (self.indexfile, len(rawtext))
2022 )
2020 )
2023
2021
2024 node = node or self.hash(rawtext, p1, p2)
2022 node = node or self.hash(rawtext, p1, p2)
2025 if self.index.has_node(node):
2023 if self.index.has_node(node):
2026 return node
2024 return node
2027
2025
2028 if validatehash:
2026 if validatehash:
2029 self.checkhash(rawtext, node, p1=p1, p2=p2)
2027 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030
2028
2031 return self.addrawrevision(
2029 return self.addrawrevision(
2032 rawtext,
2030 rawtext,
2033 transaction,
2031 transaction,
2034 link,
2032 link,
2035 p1,
2033 p1,
2036 p2,
2034 p2,
2037 node,
2035 node,
2038 flags,
2036 flags,
2039 cachedelta=cachedelta,
2037 cachedelta=cachedelta,
2040 deltacomputer=deltacomputer,
2038 deltacomputer=deltacomputer,
2041 )
2039 )
2042
2040
2043 def addrawrevision(
2041 def addrawrevision(
2044 self,
2042 self,
2045 rawtext,
2043 rawtext,
2046 transaction,
2044 transaction,
2047 link,
2045 link,
2048 p1,
2046 p1,
2049 p2,
2047 p2,
2050 node,
2048 node,
2051 flags,
2049 flags,
2052 cachedelta=None,
2050 cachedelta=None,
2053 deltacomputer=None,
2051 deltacomputer=None,
2054 ):
2052 ):
2055 """add a raw revision with known flags, node and parents
2053 """add a raw revision with known flags, node and parents
2056 useful when reusing a revision not stored in this revlog (ex: received
2054 useful when reusing a revision not stored in this revlog (ex: received
2057 over wire, or read from an external bundle).
2055 over wire, or read from an external bundle).
2058 """
2056 """
2059 dfh = None
2057 dfh = None
2060 if not self._inline:
2058 if not self._inline:
2061 dfh = self._datafp(b"a+")
2059 dfh = self._datafp(b"a+")
2062 ifh = self._indexfp(b"a+")
2060 ifh = self._indexfp(b"a+")
2063 try:
2061 try:
2064 return self._addrevision(
2062 return self._addrevision(
2065 node,
2063 node,
2066 rawtext,
2064 rawtext,
2067 transaction,
2065 transaction,
2068 link,
2066 link,
2069 p1,
2067 p1,
2070 p2,
2068 p2,
2071 flags,
2069 flags,
2072 cachedelta,
2070 cachedelta,
2073 ifh,
2071 ifh,
2074 dfh,
2072 dfh,
2075 deltacomputer=deltacomputer,
2073 deltacomputer=deltacomputer,
2076 )
2074 )
2077 finally:
2075 finally:
2078 if dfh:
2076 if dfh:
2079 dfh.close()
2077 dfh.close()
2080 ifh.close()
2078 ifh.close()
2081
2079
2082 def compress(self, data):
2080 def compress(self, data):
2083 """Generate a possibly-compressed representation of data."""
2081 """Generate a possibly-compressed representation of data."""
2084 if not data:
2082 if not data:
2085 return b'', data
2083 return b'', data
2086
2084
2087 compressed = self._compressor.compress(data)
2085 compressed = self._compressor.compress(data)
2088
2086
2089 if compressed:
2087 if compressed:
2090 # The revlog compressor added the header in the returned data.
2088 # The revlog compressor added the header in the returned data.
2091 return b'', compressed
2089 return b'', compressed
2092
2090
2093 if data[0:1] == b'\0':
2091 if data[0:1] == b'\0':
2094 return b'', data
2092 return b'', data
2095 return b'u', data
2093 return b'u', data
2096
2094
2097 def decompress(self, data):
2095 def decompress(self, data):
2098 """Decompress a revlog chunk.
2096 """Decompress a revlog chunk.
2099
2097
2100 The chunk is expected to begin with a header identifying the
2098 The chunk is expected to begin with a header identifying the
2101 format type so it can be routed to an appropriate decompressor.
2099 format type so it can be routed to an appropriate decompressor.
2102 """
2100 """
2103 if not data:
2101 if not data:
2104 return data
2102 return data
2105
2103
2106 # Revlogs are read much more frequently than they are written and many
2104 # Revlogs are read much more frequently than they are written and many
2107 # chunks only take microseconds to decompress, so performance is
2105 # chunks only take microseconds to decompress, so performance is
2108 # important here.
2106 # important here.
2109 #
2107 #
2110 # We can make a few assumptions about revlogs:
2108 # We can make a few assumptions about revlogs:
2111 #
2109 #
2112 # 1) the majority of chunks will be compressed (as opposed to inline
2110 # 1) the majority of chunks will be compressed (as opposed to inline
2113 # raw data).
2111 # raw data).
2114 # 2) decompressing *any* data will likely by at least 10x slower than
2112 # 2) decompressing *any* data will likely by at least 10x slower than
2115 # returning raw inline data.
2113 # returning raw inline data.
2116 # 3) we want to prioritize common and officially supported compression
2114 # 3) we want to prioritize common and officially supported compression
2117 # engines
2115 # engines
2118 #
2116 #
2119 # It follows that we want to optimize for "decompress compressed data
2117 # It follows that we want to optimize for "decompress compressed data
2120 # when encoded with common and officially supported compression engines"
2118 # when encoded with common and officially supported compression engines"
2121 # case over "raw data" and "data encoded by less common or non-official
2119 # case over "raw data" and "data encoded by less common or non-official
2122 # compression engines." That is why we have the inline lookup first
2120 # compression engines." That is why we have the inline lookup first
2123 # followed by the compengines lookup.
2121 # followed by the compengines lookup.
2124 #
2122 #
2125 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2123 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2126 # compressed chunks. And this matters for changelog and manifest reads.
2124 # compressed chunks. And this matters for changelog and manifest reads.
2127 t = data[0:1]
2125 t = data[0:1]
2128
2126
2129 if t == b'x':
2127 if t == b'x':
2130 try:
2128 try:
2131 return _zlibdecompress(data)
2129 return _zlibdecompress(data)
2132 except zlib.error as e:
2130 except zlib.error as e:
2133 raise error.RevlogError(
2131 raise error.RevlogError(
2134 _(b'revlog decompress error: %s')
2132 _(b'revlog decompress error: %s')
2135 % stringutil.forcebytestr(e)
2133 % stringutil.forcebytestr(e)
2136 )
2134 )
2137 # '\0' is more common than 'u' so it goes first.
2135 # '\0' is more common than 'u' so it goes first.
2138 elif t == b'\0':
2136 elif t == b'\0':
2139 return data
2137 return data
2140 elif t == b'u':
2138 elif t == b'u':
2141 return util.buffer(data, 1)
2139 return util.buffer(data, 1)
2142
2140
2143 try:
2141 try:
2144 compressor = self._decompressors[t]
2142 compressor = self._decompressors[t]
2145 except KeyError:
2143 except KeyError:
2146 try:
2144 try:
2147 engine = util.compengines.forrevlogheader(t)
2145 engine = util.compengines.forrevlogheader(t)
2148 compressor = engine.revlogcompressor(self._compengineopts)
2146 compressor = engine.revlogcompressor(self._compengineopts)
2149 self._decompressors[t] = compressor
2147 self._decompressors[t] = compressor
2150 except KeyError:
2148 except KeyError:
2151 raise error.RevlogError(_(b'unknown compression type %r') % t)
2149 raise error.RevlogError(_(b'unknown compression type %r') % t)
2152
2150
2153 return compressor.decompress(data)
2151 return compressor.decompress(data)
2154
2152
2155 def _addrevision(
2153 def _addrevision(
2156 self,
2154 self,
2157 node,
2155 node,
2158 rawtext,
2156 rawtext,
2159 transaction,
2157 transaction,
2160 link,
2158 link,
2161 p1,
2159 p1,
2162 p2,
2160 p2,
2163 flags,
2161 flags,
2164 cachedelta,
2162 cachedelta,
2165 ifh,
2163 ifh,
2166 dfh,
2164 dfh,
2167 alwayscache=False,
2165 alwayscache=False,
2168 deltacomputer=None,
2166 deltacomputer=None,
2169 ):
2167 ):
2170 """internal function to add revisions to the log
2168 """internal function to add revisions to the log
2171
2169
2172 see addrevision for argument descriptions.
2170 see addrevision for argument descriptions.
2173
2171
2174 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2172 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2175
2173
2176 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2174 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2177 be used.
2175 be used.
2178
2176
2179 invariants:
2177 invariants:
2180 - rawtext is optional (can be None); if not set, cachedelta must be set.
2178 - rawtext is optional (can be None); if not set, cachedelta must be set.
2181 if both are set, they must correspond to each other.
2179 if both are set, they must correspond to each other.
2182 """
2180 """
2183 if node == nullid:
2181 if node == nullid:
2184 raise error.RevlogError(
2182 raise error.RevlogError(
2185 _(b"%s: attempt to add null revision") % self.indexfile
2183 _(b"%s: attempt to add null revision") % self.indexfile
2186 )
2184 )
2187 if node == wdirid or node in wdirfilenodeids:
2185 if node == wdirid or node in wdirfilenodeids:
2188 raise error.RevlogError(
2186 raise error.RevlogError(
2189 _(b"%s: attempt to add wdir revision") % self.indexfile
2187 _(b"%s: attempt to add wdir revision") % self.indexfile
2190 )
2188 )
2191
2189
2192 if self._inline:
2190 if self._inline:
2193 fh = ifh
2191 fh = ifh
2194 else:
2192 else:
2195 fh = dfh
2193 fh = dfh
2196
2194
2197 btext = [rawtext]
2195 btext = [rawtext]
2198
2196
2199 curr = len(self)
2197 curr = len(self)
2200 prev = curr - 1
2198 prev = curr - 1
2201 offset = self.end(prev)
2199 offset = self.end(prev)
2202 p1r, p2r = self.rev(p1), self.rev(p2)
2200 p1r, p2r = self.rev(p1), self.rev(p2)
2203
2201
2204 # full versions are inserted when the needed deltas
2202 # full versions are inserted when the needed deltas
2205 # become comparable to the uncompressed text
2203 # become comparable to the uncompressed text
2206 if rawtext is None:
2204 if rawtext is None:
2207 # need rawtext size, before changed by flag processors, which is
2205 # need rawtext size, before changed by flag processors, which is
2208 # the non-raw size. use revlog explicitly to avoid filelog's extra
2206 # the non-raw size. use revlog explicitly to avoid filelog's extra
2209 # logic that might remove metadata size.
2207 # logic that might remove metadata size.
2210 textlen = mdiff.patchedsize(
2208 textlen = mdiff.patchedsize(
2211 revlog.size(self, cachedelta[0]), cachedelta[1]
2209 revlog.size(self, cachedelta[0]), cachedelta[1]
2212 )
2210 )
2213 else:
2211 else:
2214 textlen = len(rawtext)
2212 textlen = len(rawtext)
2215
2213
2216 if deltacomputer is None:
2214 if deltacomputer is None:
2217 deltacomputer = deltautil.deltacomputer(self)
2215 deltacomputer = deltautil.deltacomputer(self)
2218
2216
2219 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2217 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2220
2218
2221 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2219 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2222
2220
2223 e = (
2221 e = (
2224 offset_type(offset, flags),
2222 offset_type(offset, flags),
2225 deltainfo.deltalen,
2223 deltainfo.deltalen,
2226 textlen,
2224 textlen,
2227 deltainfo.base,
2225 deltainfo.base,
2228 link,
2226 link,
2229 p1r,
2227 p1r,
2230 p2r,
2228 p2r,
2231 node,
2229 node,
2232 )
2230 )
2233 self.index.append(e)
2231 self.index.append(e)
2234
2232
2235 entry = self._io.packentry(e, self.node, self.version, curr)
2233 entry = self._io.packentry(e, self.node, self.version, curr)
2236 self._writeentry(
2234 self._writeentry(
2237 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2235 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2238 )
2236 )
2239
2237
2240 rawtext = btext[0]
2238 rawtext = btext[0]
2241
2239
2242 if alwayscache and rawtext is None:
2240 if alwayscache and rawtext is None:
2243 rawtext = deltacomputer.buildtext(revinfo, fh)
2241 rawtext = deltacomputer.buildtext(revinfo, fh)
2244
2242
2245 if type(rawtext) == bytes: # only accept immutable objects
2243 if type(rawtext) == bytes: # only accept immutable objects
2246 self._revisioncache = (node, curr, rawtext)
2244 self._revisioncache = (node, curr, rawtext)
2247 self._chainbasecache[curr] = deltainfo.chainbase
2245 self._chainbasecache[curr] = deltainfo.chainbase
2248 return node
2246 return node
2249
2247
2250 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2248 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2251 # Files opened in a+ mode have inconsistent behavior on various
2249 # Files opened in a+ mode have inconsistent behavior on various
2252 # platforms. Windows requires that a file positioning call be made
2250 # platforms. Windows requires that a file positioning call be made
2253 # when the file handle transitions between reads and writes. See
2251 # when the file handle transitions between reads and writes. See
2254 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2252 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2255 # platforms, Python or the platform itself can be buggy. Some versions
2253 # platforms, Python or the platform itself can be buggy. Some versions
2256 # of Solaris have been observed to not append at the end of the file
2254 # of Solaris have been observed to not append at the end of the file
2257 # if the file was seeked to before the end. See issue4943 for more.
2255 # if the file was seeked to before the end. See issue4943 for more.
2258 #
2256 #
2259 # We work around this issue by inserting a seek() before writing.
2257 # We work around this issue by inserting a seek() before writing.
2260 # Note: This is likely not necessary on Python 3. However, because
2258 # Note: This is likely not necessary on Python 3. However, because
2261 # the file handle is reused for reads and may be seeked there, we need
2259 # the file handle is reused for reads and may be seeked there, we need
2262 # to be careful before changing this.
2260 # to be careful before changing this.
2263 ifh.seek(0, os.SEEK_END)
2261 ifh.seek(0, os.SEEK_END)
2264 if dfh:
2262 if dfh:
2265 dfh.seek(0, os.SEEK_END)
2263 dfh.seek(0, os.SEEK_END)
2266
2264
2267 curr = len(self) - 1
2265 curr = len(self) - 1
2268 if not self._inline:
2266 if not self._inline:
2269 transaction.add(self.datafile, offset)
2267 transaction.add(self.datafile, offset)
2270 transaction.add(self.indexfile, curr * len(entry))
2268 transaction.add(self.indexfile, curr * len(entry))
2271 if data[0]:
2269 if data[0]:
2272 dfh.write(data[0])
2270 dfh.write(data[0])
2273 dfh.write(data[1])
2271 dfh.write(data[1])
2274 ifh.write(entry)
2272 ifh.write(entry)
2275 else:
2273 else:
2276 offset += curr * self._io.size
2274 offset += curr * self._io.size
2277 transaction.add(self.indexfile, offset, curr)
2275 transaction.add(self.indexfile, offset, curr)
2278 ifh.write(entry)
2276 ifh.write(entry)
2279 ifh.write(data[0])
2277 ifh.write(data[0])
2280 ifh.write(data[1])
2278 ifh.write(data[1])
2281 self._enforceinlinesize(transaction, ifh)
2279 self._enforceinlinesize(transaction, ifh)
2282
2280
2283 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2281 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2284 """
2282 """
2285 add a delta group
2283 add a delta group
2286
2284
2287 given a set of deltas, add them to the revision log. the
2285 given a set of deltas, add them to the revision log. the
2288 first delta is against its parent, which should be in our
2286 first delta is against its parent, which should be in our
2289 log, the rest are against the previous delta.
2287 log, the rest are against the previous delta.
2290
2288
2291 If ``addrevisioncb`` is defined, it will be called with arguments of
2289 If ``addrevisioncb`` is defined, it will be called with arguments of
2292 this revlog and the node that was added.
2290 this revlog and the node that was added.
2293 """
2291 """
2294
2292
2295 if self._writinghandles:
2293 if self._writinghandles:
2296 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2294 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2297
2295
2298 nodes = []
2296 nodes = []
2299
2297
2300 r = len(self)
2298 r = len(self)
2301 end = 0
2299 end = 0
2302 if r:
2300 if r:
2303 end = self.end(r - 1)
2301 end = self.end(r - 1)
2304 ifh = self._indexfp(b"a+")
2302 ifh = self._indexfp(b"a+")
2305 isize = r * self._io.size
2303 isize = r * self._io.size
2306 if self._inline:
2304 if self._inline:
2307 transaction.add(self.indexfile, end + isize, r)
2305 transaction.add(self.indexfile, end + isize, r)
2308 dfh = None
2306 dfh = None
2309 else:
2307 else:
2310 transaction.add(self.indexfile, isize, r)
2308 transaction.add(self.indexfile, isize, r)
2311 transaction.add(self.datafile, end)
2309 transaction.add(self.datafile, end)
2312 dfh = self._datafp(b"a+")
2310 dfh = self._datafp(b"a+")
2313
2311
2314 def flush():
2312 def flush():
2315 if dfh:
2313 if dfh:
2316 dfh.flush()
2314 dfh.flush()
2317 ifh.flush()
2315 ifh.flush()
2318
2316
2319 self._writinghandles = (ifh, dfh)
2317 self._writinghandles = (ifh, dfh)
2320
2318
2321 try:
2319 try:
2322 deltacomputer = deltautil.deltacomputer(self)
2320 deltacomputer = deltautil.deltacomputer(self)
2323 # loop through our set of deltas
2321 # loop through our set of deltas
2324 for data in deltas:
2322 for data in deltas:
2325 node, p1, p2, linknode, deltabase, delta, flags = data
2323 node, p1, p2, linknode, deltabase, delta, flags = data
2326 link = linkmapper(linknode)
2324 link = linkmapper(linknode)
2327 flags = flags or REVIDX_DEFAULT_FLAGS
2325 flags = flags or REVIDX_DEFAULT_FLAGS
2328
2326
2329 nodes.append(node)
2327 nodes.append(node)
2330
2328
2331 if self.index.has_node(node):
2329 if self.index.has_node(node):
2332 self._nodeduplicatecallback(transaction, node)
2330 self._nodeduplicatecallback(transaction, node)
2333 # this can happen if two branches make the same change
2331 # this can happen if two branches make the same change
2334 continue
2332 continue
2335
2333
2336 for p in (p1, p2):
2334 for p in (p1, p2):
2337 if not self.index.has_node(p):
2335 if not self.index.has_node(p):
2338 raise error.LookupError(
2336 raise error.LookupError(
2339 p, self.indexfile, _(b'unknown parent')
2337 p, self.indexfile, _(b'unknown parent')
2340 )
2338 )
2341
2339
2342 if not self.index.has_node(deltabase):
2340 if not self.index.has_node(deltabase):
2343 raise error.LookupError(
2341 raise error.LookupError(
2344 deltabase, self.indexfile, _(b'unknown delta base')
2342 deltabase, self.indexfile, _(b'unknown delta base')
2345 )
2343 )
2346
2344
2347 baserev = self.rev(deltabase)
2345 baserev = self.rev(deltabase)
2348
2346
2349 if baserev != nullrev and self.iscensored(baserev):
2347 if baserev != nullrev and self.iscensored(baserev):
2350 # if base is censored, delta must be full replacement in a
2348 # if base is censored, delta must be full replacement in a
2351 # single patch operation
2349 # single patch operation
2352 hlen = struct.calcsize(b">lll")
2350 hlen = struct.calcsize(b">lll")
2353 oldlen = self.rawsize(baserev)
2351 oldlen = self.rawsize(baserev)
2354 newlen = len(delta) - hlen
2352 newlen = len(delta) - hlen
2355 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2353 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2356 raise error.CensoredBaseError(
2354 raise error.CensoredBaseError(
2357 self.indexfile, self.node(baserev)
2355 self.indexfile, self.node(baserev)
2358 )
2356 )
2359
2357
2360 if not flags and self._peek_iscensored(baserev, delta, flush):
2358 if not flags and self._peek_iscensored(baserev, delta, flush):
2361 flags |= REVIDX_ISCENSORED
2359 flags |= REVIDX_ISCENSORED
2362
2360
2363 # We assume consumers of addrevisioncb will want to retrieve
2361 # We assume consumers of addrevisioncb will want to retrieve
2364 # the added revision, which will require a call to
2362 # the added revision, which will require a call to
2365 # revision(). revision() will fast path if there is a cache
2363 # revision(). revision() will fast path if there is a cache
2366 # hit. So, we tell _addrevision() to always cache in this case.
2364 # hit. So, we tell _addrevision() to always cache in this case.
2367 # We're only using addgroup() in the context of changegroup
2365 # We're only using addgroup() in the context of changegroup
2368 # generation so the revision data can always be handled as raw
2366 # generation so the revision data can always be handled as raw
2369 # by the flagprocessor.
2367 # by the flagprocessor.
2370 self._addrevision(
2368 self._addrevision(
2371 node,
2369 node,
2372 None,
2370 None,
2373 transaction,
2371 transaction,
2374 link,
2372 link,
2375 p1,
2373 p1,
2376 p2,
2374 p2,
2377 flags,
2375 flags,
2378 (baserev, delta),
2376 (baserev, delta),
2379 ifh,
2377 ifh,
2380 dfh,
2378 dfh,
2381 alwayscache=bool(addrevisioncb),
2379 alwayscache=bool(addrevisioncb),
2382 deltacomputer=deltacomputer,
2380 deltacomputer=deltacomputer,
2383 )
2381 )
2384
2382
2385 if addrevisioncb:
2383 if addrevisioncb:
2386 addrevisioncb(self, node)
2384 addrevisioncb(self, node)
2387
2385
2388 if not dfh and not self._inline:
2386 if not dfh and not self._inline:
2389 # addrevision switched from inline to conventional
2387 # addrevision switched from inline to conventional
2390 # reopen the index
2388 # reopen the index
2391 ifh.close()
2389 ifh.close()
2392 dfh = self._datafp(b"a+")
2390 dfh = self._datafp(b"a+")
2393 ifh = self._indexfp(b"a+")
2391 ifh = self._indexfp(b"a+")
2394 self._writinghandles = (ifh, dfh)
2392 self._writinghandles = (ifh, dfh)
2395 finally:
2393 finally:
2396 self._writinghandles = None
2394 self._writinghandles = None
2397
2395
2398 if dfh:
2396 if dfh:
2399 dfh.close()
2397 dfh.close()
2400 ifh.close()
2398 ifh.close()
2401
2399
2402 return nodes
2400 return nodes
2403
2401
2404 def iscensored(self, rev):
2402 def iscensored(self, rev):
2405 """Check if a file revision is censored."""
2403 """Check if a file revision is censored."""
2406 if not self._censorable:
2404 if not self._censorable:
2407 return False
2405 return False
2408
2406
2409 return self.flags(rev) & REVIDX_ISCENSORED
2407 return self.flags(rev) & REVIDX_ISCENSORED
2410
2408
2411 def _peek_iscensored(self, baserev, delta, flush):
2409 def _peek_iscensored(self, baserev, delta, flush):
2412 """Quickly check if a delta produces a censored revision."""
2410 """Quickly check if a delta produces a censored revision."""
2413 if not self._censorable:
2411 if not self._censorable:
2414 return False
2412 return False
2415
2413
2416 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2414 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2417
2415
2418 def getstrippoint(self, minlink):
2416 def getstrippoint(self, minlink):
2419 """find the minimum rev that must be stripped to strip the linkrev
2417 """find the minimum rev that must be stripped to strip the linkrev
2420
2418
2421 Returns a tuple containing the minimum rev and a set of all revs that
2419 Returns a tuple containing the minimum rev and a set of all revs that
2422 have linkrevs that will be broken by this strip.
2420 have linkrevs that will be broken by this strip.
2423 """
2421 """
2424 return storageutil.resolvestripinfo(
2422 return storageutil.resolvestripinfo(
2425 minlink,
2423 minlink,
2426 len(self) - 1,
2424 len(self) - 1,
2427 self.headrevs(),
2425 self.headrevs(),
2428 self.linkrev,
2426 self.linkrev,
2429 self.parentrevs,
2427 self.parentrevs,
2430 )
2428 )
2431
2429
2432 def strip(self, minlink, transaction):
2430 def strip(self, minlink, transaction):
2433 """truncate the revlog on the first revision with a linkrev >= minlink
2431 """truncate the revlog on the first revision with a linkrev >= minlink
2434
2432
2435 This function is called when we're stripping revision minlink and
2433 This function is called when we're stripping revision minlink and
2436 its descendants from the repository.
2434 its descendants from the repository.
2437
2435
2438 We have to remove all revisions with linkrev >= minlink, because
2436 We have to remove all revisions with linkrev >= minlink, because
2439 the equivalent changelog revisions will be renumbered after the
2437 the equivalent changelog revisions will be renumbered after the
2440 strip.
2438 strip.
2441
2439
2442 So we truncate the revlog on the first of these revisions, and
2440 So we truncate the revlog on the first of these revisions, and
2443 trust that the caller has saved the revisions that shouldn't be
2441 trust that the caller has saved the revisions that shouldn't be
2444 removed and that it'll re-add them after this truncation.
2442 removed and that it'll re-add them after this truncation.
2445 """
2443 """
2446 if len(self) == 0:
2444 if len(self) == 0:
2447 return
2445 return
2448
2446
2449 rev, _ = self.getstrippoint(minlink)
2447 rev, _ = self.getstrippoint(minlink)
2450 if rev == len(self):
2448 if rev == len(self):
2451 return
2449 return
2452
2450
2453 # first truncate the files on disk
2451 # first truncate the files on disk
2454 end = self.start(rev)
2452 end = self.start(rev)
2455 if not self._inline:
2453 if not self._inline:
2456 transaction.add(self.datafile, end)
2454 transaction.add(self.datafile, end)
2457 end = rev * self._io.size
2455 end = rev * self._io.size
2458 else:
2456 else:
2459 end += rev * self._io.size
2457 end += rev * self._io.size
2460
2458
2461 transaction.add(self.indexfile, end)
2459 transaction.add(self.indexfile, end)
2462
2460
2463 # then reset internal state in memory to forget those revisions
2461 # then reset internal state in memory to forget those revisions
2464 self._revisioncache = None
2462 self._revisioncache = None
2465 self._chaininfocache = {}
2463 self._chaininfocache = {}
2466 self._chunkclear()
2464 self._chunkclear()
2467
2465
2468 del self.index[rev:-1]
2466 del self.index[rev:-1]
2469
2467
2470 def checksize(self):
2468 def checksize(self):
2471 """Check size of index and data files
2469 """Check size of index and data files
2472
2470
2473 return a (dd, di) tuple.
2471 return a (dd, di) tuple.
2474 - dd: extra bytes for the "data" file
2472 - dd: extra bytes for the "data" file
2475 - di: extra bytes for the "index" file
2473 - di: extra bytes for the "index" file
2476
2474
2477 A healthy revlog will return (0, 0).
2475 A healthy revlog will return (0, 0).
2478 """
2476 """
2479 expected = 0
2477 expected = 0
2480 if len(self):
2478 if len(self):
2481 expected = max(0, self.end(len(self) - 1))
2479 expected = max(0, self.end(len(self) - 1))
2482
2480
2483 try:
2481 try:
2484 with self._datafp() as f:
2482 with self._datafp() as f:
2485 f.seek(0, io.SEEK_END)
2483 f.seek(0, io.SEEK_END)
2486 actual = f.tell()
2484 actual = f.tell()
2487 dd = actual - expected
2485 dd = actual - expected
2488 except IOError as inst:
2486 except IOError as inst:
2489 if inst.errno != errno.ENOENT:
2487 if inst.errno != errno.ENOENT:
2490 raise
2488 raise
2491 dd = 0
2489 dd = 0
2492
2490
2493 try:
2491 try:
2494 f = self.opener(self.indexfile)
2492 f = self.opener(self.indexfile)
2495 f.seek(0, io.SEEK_END)
2493 f.seek(0, io.SEEK_END)
2496 actual = f.tell()
2494 actual = f.tell()
2497 f.close()
2495 f.close()
2498 s = self._io.size
2496 s = self._io.size
2499 i = max(0, actual // s)
2497 i = max(0, actual // s)
2500 di = actual - (i * s)
2498 di = actual - (i * s)
2501 if self._inline:
2499 if self._inline:
2502 databytes = 0
2500 databytes = 0
2503 for r in self:
2501 for r in self:
2504 databytes += max(0, self.length(r))
2502 databytes += max(0, self.length(r))
2505 dd = 0
2503 dd = 0
2506 di = actual - len(self) * s - databytes
2504 di = actual - len(self) * s - databytes
2507 except IOError as inst:
2505 except IOError as inst:
2508 if inst.errno != errno.ENOENT:
2506 if inst.errno != errno.ENOENT:
2509 raise
2507 raise
2510 di = 0
2508 di = 0
2511
2509
2512 return (dd, di)
2510 return (dd, di)
2513
2511
2514 def files(self):
2512 def files(self):
2515 res = [self.indexfile]
2513 res = [self.indexfile]
2516 if not self._inline:
2514 if not self._inline:
2517 res.append(self.datafile)
2515 res.append(self.datafile)
2518 return res
2516 return res
2519
2517
2520 def emitrevisions(
2518 def emitrevisions(
2521 self,
2519 self,
2522 nodes,
2520 nodes,
2523 nodesorder=None,
2521 nodesorder=None,
2524 revisiondata=False,
2522 revisiondata=False,
2525 assumehaveparentrevisions=False,
2523 assumehaveparentrevisions=False,
2526 deltamode=repository.CG_DELTAMODE_STD,
2524 deltamode=repository.CG_DELTAMODE_STD,
2527 ):
2525 ):
2528 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2526 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2529 raise error.ProgrammingError(
2527 raise error.ProgrammingError(
2530 b'unhandled value for nodesorder: %s' % nodesorder
2528 b'unhandled value for nodesorder: %s' % nodesorder
2531 )
2529 )
2532
2530
2533 if nodesorder is None and not self._generaldelta:
2531 if nodesorder is None and not self._generaldelta:
2534 nodesorder = b'storage'
2532 nodesorder = b'storage'
2535
2533
2536 if (
2534 if (
2537 not self._storedeltachains
2535 not self._storedeltachains
2538 and deltamode != repository.CG_DELTAMODE_PREV
2536 and deltamode != repository.CG_DELTAMODE_PREV
2539 ):
2537 ):
2540 deltamode = repository.CG_DELTAMODE_FULL
2538 deltamode = repository.CG_DELTAMODE_FULL
2541
2539
2542 return storageutil.emitrevisions(
2540 return storageutil.emitrevisions(
2543 self,
2541 self,
2544 nodes,
2542 nodes,
2545 nodesorder,
2543 nodesorder,
2546 revlogrevisiondelta,
2544 revlogrevisiondelta,
2547 deltaparentfn=self.deltaparent,
2545 deltaparentfn=self.deltaparent,
2548 candeltafn=self.candelta,
2546 candeltafn=self.candelta,
2549 rawsizefn=self.rawsize,
2547 rawsizefn=self.rawsize,
2550 revdifffn=self.revdiff,
2548 revdifffn=self.revdiff,
2551 flagsfn=self.flags,
2549 flagsfn=self.flags,
2552 deltamode=deltamode,
2550 deltamode=deltamode,
2553 revisiondata=revisiondata,
2551 revisiondata=revisiondata,
2554 assumehaveparentrevisions=assumehaveparentrevisions,
2552 assumehaveparentrevisions=assumehaveparentrevisions,
2555 )
2553 )
2556
2554
2557 DELTAREUSEALWAYS = b'always'
2555 DELTAREUSEALWAYS = b'always'
2558 DELTAREUSESAMEREVS = b'samerevs'
2556 DELTAREUSESAMEREVS = b'samerevs'
2559 DELTAREUSENEVER = b'never'
2557 DELTAREUSENEVER = b'never'
2560
2558
2561 DELTAREUSEFULLADD = b'fulladd'
2559 DELTAREUSEFULLADD = b'fulladd'
2562
2560
2563 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2561 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2564
2562
2565 def clone(
2563 def clone(
2566 self,
2564 self,
2567 tr,
2565 tr,
2568 destrevlog,
2566 destrevlog,
2569 addrevisioncb=None,
2567 addrevisioncb=None,
2570 deltareuse=DELTAREUSESAMEREVS,
2568 deltareuse=DELTAREUSESAMEREVS,
2571 forcedeltabothparents=None,
2569 forcedeltabothparents=None,
2572 sidedatacompanion=None,
2570 sidedatacompanion=None,
2573 ):
2571 ):
2574 """Copy this revlog to another, possibly with format changes.
2572 """Copy this revlog to another, possibly with format changes.
2575
2573
2576 The destination revlog will contain the same revisions and nodes.
2574 The destination revlog will contain the same revisions and nodes.
2577 However, it may not be bit-for-bit identical due to e.g. delta encoding
2575 However, it may not be bit-for-bit identical due to e.g. delta encoding
2578 differences.
2576 differences.
2579
2577
2580 The ``deltareuse`` argument control how deltas from the existing revlog
2578 The ``deltareuse`` argument control how deltas from the existing revlog
2581 are preserved in the destination revlog. The argument can have the
2579 are preserved in the destination revlog. The argument can have the
2582 following values:
2580 following values:
2583
2581
2584 DELTAREUSEALWAYS
2582 DELTAREUSEALWAYS
2585 Deltas will always be reused (if possible), even if the destination
2583 Deltas will always be reused (if possible), even if the destination
2586 revlog would not select the same revisions for the delta. This is the
2584 revlog would not select the same revisions for the delta. This is the
2587 fastest mode of operation.
2585 fastest mode of operation.
2588 DELTAREUSESAMEREVS
2586 DELTAREUSESAMEREVS
2589 Deltas will be reused if the destination revlog would pick the same
2587 Deltas will be reused if the destination revlog would pick the same
2590 revisions for the delta. This mode strikes a balance between speed
2588 revisions for the delta. This mode strikes a balance between speed
2591 and optimization.
2589 and optimization.
2592 DELTAREUSENEVER
2590 DELTAREUSENEVER
2593 Deltas will never be reused. This is the slowest mode of execution.
2591 Deltas will never be reused. This is the slowest mode of execution.
2594 This mode can be used to recompute deltas (e.g. if the diff/delta
2592 This mode can be used to recompute deltas (e.g. if the diff/delta
2595 algorithm changes).
2593 algorithm changes).
2596 DELTAREUSEFULLADD
2594 DELTAREUSEFULLADD
2597 Revision will be re-added as if their were new content. This is
2595 Revision will be re-added as if their were new content. This is
2598 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2596 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2599 eg: large file detection and handling.
2597 eg: large file detection and handling.
2600
2598
2601 Delta computation can be slow, so the choice of delta reuse policy can
2599 Delta computation can be slow, so the choice of delta reuse policy can
2602 significantly affect run time.
2600 significantly affect run time.
2603
2601
2604 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2602 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2605 two extremes. Deltas will be reused if they are appropriate. But if the
2603 two extremes. Deltas will be reused if they are appropriate. But if the
2606 delta could choose a better revision, it will do so. This means if you
2604 delta could choose a better revision, it will do so. This means if you
2607 are converting a non-generaldelta revlog to a generaldelta revlog,
2605 are converting a non-generaldelta revlog to a generaldelta revlog,
2608 deltas will be recomputed if the delta's parent isn't a parent of the
2606 deltas will be recomputed if the delta's parent isn't a parent of the
2609 revision.
2607 revision.
2610
2608
2611 In addition to the delta policy, the ``forcedeltabothparents``
2609 In addition to the delta policy, the ``forcedeltabothparents``
2612 argument controls whether to force compute deltas against both parents
2610 argument controls whether to force compute deltas against both parents
2613 for merges. By default, the current default is used.
2611 for merges. By default, the current default is used.
2614
2612
2615 If not None, the `sidedatacompanion` is callable that accept two
2613 If not None, the `sidedatacompanion` is callable that accept two
2616 arguments:
2614 arguments:
2617
2615
2618 (srcrevlog, rev)
2616 (srcrevlog, rev)
2619
2617
2620 and return a triplet that control changes to sidedata content from the
2618 and return a triplet that control changes to sidedata content from the
2621 old revision to the new clone result:
2619 old revision to the new clone result:
2622
2620
2623 (dropall, filterout, update)
2621 (dropall, filterout, update)
2624
2622
2625 * if `dropall` is True, all sidedata should be dropped
2623 * if `dropall` is True, all sidedata should be dropped
2626 * `filterout` is a set of sidedata keys that should be dropped
2624 * `filterout` is a set of sidedata keys that should be dropped
2627 * `update` is a mapping of additionnal/new key -> value
2625 * `update` is a mapping of additionnal/new key -> value
2628 """
2626 """
2629 if deltareuse not in self.DELTAREUSEALL:
2627 if deltareuse not in self.DELTAREUSEALL:
2630 raise ValueError(
2628 raise ValueError(
2631 _(b'value for deltareuse invalid: %s') % deltareuse
2629 _(b'value for deltareuse invalid: %s') % deltareuse
2632 )
2630 )
2633
2631
2634 if len(destrevlog):
2632 if len(destrevlog):
2635 raise ValueError(_(b'destination revlog is not empty'))
2633 raise ValueError(_(b'destination revlog is not empty'))
2636
2634
2637 if getattr(self, 'filteredrevs', None):
2635 if getattr(self, 'filteredrevs', None):
2638 raise ValueError(_(b'source revlog has filtered revisions'))
2636 raise ValueError(_(b'source revlog has filtered revisions'))
2639 if getattr(destrevlog, 'filteredrevs', None):
2637 if getattr(destrevlog, 'filteredrevs', None):
2640 raise ValueError(_(b'destination revlog has filtered revisions'))
2638 raise ValueError(_(b'destination revlog has filtered revisions'))
2641
2639
2642 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2640 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2643 # if possible.
2641 # if possible.
2644 oldlazydelta = destrevlog._lazydelta
2642 oldlazydelta = destrevlog._lazydelta
2645 oldlazydeltabase = destrevlog._lazydeltabase
2643 oldlazydeltabase = destrevlog._lazydeltabase
2646 oldamd = destrevlog._deltabothparents
2644 oldamd = destrevlog._deltabothparents
2647
2645
2648 try:
2646 try:
2649 if deltareuse == self.DELTAREUSEALWAYS:
2647 if deltareuse == self.DELTAREUSEALWAYS:
2650 destrevlog._lazydeltabase = True
2648 destrevlog._lazydeltabase = True
2651 destrevlog._lazydelta = True
2649 destrevlog._lazydelta = True
2652 elif deltareuse == self.DELTAREUSESAMEREVS:
2650 elif deltareuse == self.DELTAREUSESAMEREVS:
2653 destrevlog._lazydeltabase = False
2651 destrevlog._lazydeltabase = False
2654 destrevlog._lazydelta = True
2652 destrevlog._lazydelta = True
2655 elif deltareuse == self.DELTAREUSENEVER:
2653 elif deltareuse == self.DELTAREUSENEVER:
2656 destrevlog._lazydeltabase = False
2654 destrevlog._lazydeltabase = False
2657 destrevlog._lazydelta = False
2655 destrevlog._lazydelta = False
2658
2656
2659 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2657 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2660
2658
2661 self._clone(
2659 self._clone(
2662 tr,
2660 tr,
2663 destrevlog,
2661 destrevlog,
2664 addrevisioncb,
2662 addrevisioncb,
2665 deltareuse,
2663 deltareuse,
2666 forcedeltabothparents,
2664 forcedeltabothparents,
2667 sidedatacompanion,
2665 sidedatacompanion,
2668 )
2666 )
2669
2667
2670 finally:
2668 finally:
2671 destrevlog._lazydelta = oldlazydelta
2669 destrevlog._lazydelta = oldlazydelta
2672 destrevlog._lazydeltabase = oldlazydeltabase
2670 destrevlog._lazydeltabase = oldlazydeltabase
2673 destrevlog._deltabothparents = oldamd
2671 destrevlog._deltabothparents = oldamd
2674
2672
2675 def _clone(
2673 def _clone(
2676 self,
2674 self,
2677 tr,
2675 tr,
2678 destrevlog,
2676 destrevlog,
2679 addrevisioncb,
2677 addrevisioncb,
2680 deltareuse,
2678 deltareuse,
2681 forcedeltabothparents,
2679 forcedeltabothparents,
2682 sidedatacompanion,
2680 sidedatacompanion,
2683 ):
2681 ):
2684 """perform the core duty of `revlog.clone` after parameter processing"""
2682 """perform the core duty of `revlog.clone` after parameter processing"""
2685 deltacomputer = deltautil.deltacomputer(destrevlog)
2683 deltacomputer = deltautil.deltacomputer(destrevlog)
2686 index = self.index
2684 index = self.index
2687 for rev in self:
2685 for rev in self:
2688 entry = index[rev]
2686 entry = index[rev]
2689
2687
2690 # Some classes override linkrev to take filtered revs into
2688 # Some classes override linkrev to take filtered revs into
2691 # account. Use raw entry from index.
2689 # account. Use raw entry from index.
2692 flags = entry[0] & 0xFFFF
2690 flags = entry[0] & 0xFFFF
2693 linkrev = entry[4]
2691 linkrev = entry[4]
2694 p1 = index[entry[5]][7]
2692 p1 = index[entry[5]][7]
2695 p2 = index[entry[6]][7]
2693 p2 = index[entry[6]][7]
2696 node = entry[7]
2694 node = entry[7]
2697
2695
2698 sidedataactions = (False, [], {})
2696 sidedataactions = (False, [], {})
2699 if sidedatacompanion is not None:
2697 if sidedatacompanion is not None:
2700 sidedataactions = sidedatacompanion(self, rev)
2698 sidedataactions = sidedatacompanion(self, rev)
2701
2699
2702 # (Possibly) reuse the delta from the revlog if allowed and
2700 # (Possibly) reuse the delta from the revlog if allowed and
2703 # the revlog chunk is a delta.
2701 # the revlog chunk is a delta.
2704 cachedelta = None
2702 cachedelta = None
2705 rawtext = None
2703 rawtext = None
2706 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2704 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2707 dropall, filterout, update = sidedataactions
2705 dropall, filterout, update = sidedataactions
2708 text, sidedata = self._revisiondata(rev)
2706 text, sidedata = self._revisiondata(rev)
2709 if dropall:
2707 if dropall:
2710 sidedata = {}
2708 sidedata = {}
2711 for key in filterout:
2709 for key in filterout:
2712 sidedata.pop(key, None)
2710 sidedata.pop(key, None)
2713 sidedata.update(update)
2711 sidedata.update(update)
2714 if not sidedata:
2712 if not sidedata:
2715 sidedata = None
2713 sidedata = None
2716 destrevlog.addrevision(
2714 destrevlog.addrevision(
2717 text,
2715 text,
2718 tr,
2716 tr,
2719 linkrev,
2717 linkrev,
2720 p1,
2718 p1,
2721 p2,
2719 p2,
2722 cachedelta=cachedelta,
2720 cachedelta=cachedelta,
2723 node=node,
2721 node=node,
2724 flags=flags,
2722 flags=flags,
2725 deltacomputer=deltacomputer,
2723 deltacomputer=deltacomputer,
2726 sidedata=sidedata,
2724 sidedata=sidedata,
2727 )
2725 )
2728 else:
2726 else:
2729 if destrevlog._lazydelta:
2727 if destrevlog._lazydelta:
2730 dp = self.deltaparent(rev)
2728 dp = self.deltaparent(rev)
2731 if dp != nullrev:
2729 if dp != nullrev:
2732 cachedelta = (dp, bytes(self._chunk(rev)))
2730 cachedelta = (dp, bytes(self._chunk(rev)))
2733
2731
2734 if not cachedelta:
2732 if not cachedelta:
2735 rawtext = self.rawdata(rev)
2733 rawtext = self.rawdata(rev)
2736
2734
2737 ifh = destrevlog.opener(
2735 ifh = destrevlog.opener(
2738 destrevlog.indexfile, b'a+', checkambig=False
2736 destrevlog.indexfile, b'a+', checkambig=False
2739 )
2737 )
2740 dfh = None
2738 dfh = None
2741 if not destrevlog._inline:
2739 if not destrevlog._inline:
2742 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2740 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2743 try:
2741 try:
2744 destrevlog._addrevision(
2742 destrevlog._addrevision(
2745 node,
2743 node,
2746 rawtext,
2744 rawtext,
2747 tr,
2745 tr,
2748 linkrev,
2746 linkrev,
2749 p1,
2747 p1,
2750 p2,
2748 p2,
2751 flags,
2749 flags,
2752 cachedelta,
2750 cachedelta,
2753 ifh,
2751 ifh,
2754 dfh,
2752 dfh,
2755 deltacomputer=deltacomputer,
2753 deltacomputer=deltacomputer,
2756 )
2754 )
2757 finally:
2755 finally:
2758 if dfh:
2756 if dfh:
2759 dfh.close()
2757 dfh.close()
2760 ifh.close()
2758 ifh.close()
2761
2759
2762 if addrevisioncb:
2760 if addrevisioncb:
2763 addrevisioncb(self, rev, node)
2761 addrevisioncb(self, rev, node)
2764
2762
2765 def censorrevision(self, tr, censornode, tombstone=b''):
2763 def censorrevision(self, tr, censornode, tombstone=b''):
2766 if (self.version & 0xFFFF) == REVLOGV0:
2764 if (self.version & 0xFFFF) == REVLOGV0:
2767 raise error.RevlogError(
2765 raise error.RevlogError(
2768 _(b'cannot censor with version %d revlogs') % self.version
2766 _(b'cannot censor with version %d revlogs') % self.version
2769 )
2767 )
2770
2768
2771 censorrev = self.rev(censornode)
2769 censorrev = self.rev(censornode)
2772 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2770 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2773
2771
2774 if len(tombstone) > self.rawsize(censorrev):
2772 if len(tombstone) > self.rawsize(censorrev):
2775 raise error.Abort(
2773 raise error.Abort(
2776 _(b'censor tombstone must be no longer than censored data')
2774 _(b'censor tombstone must be no longer than censored data')
2777 )
2775 )
2778
2776
2779 # Rewriting the revlog in place is hard. Our strategy for censoring is
2777 # Rewriting the revlog in place is hard. Our strategy for censoring is
2780 # to create a new revlog, copy all revisions to it, then replace the
2778 # to create a new revlog, copy all revisions to it, then replace the
2781 # revlogs on transaction close.
2779 # revlogs on transaction close.
2782
2780
2783 newindexfile = self.indexfile + b'.tmpcensored'
2781 newindexfile = self.indexfile + b'.tmpcensored'
2784 newdatafile = self.datafile + b'.tmpcensored'
2782 newdatafile = self.datafile + b'.tmpcensored'
2785
2783
2786 # This is a bit dangerous. We could easily have a mismatch of state.
2784 # This is a bit dangerous. We could easily have a mismatch of state.
2787 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2785 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2788 newrl.version = self.version
2786 newrl.version = self.version
2789 newrl._generaldelta = self._generaldelta
2787 newrl._generaldelta = self._generaldelta
2790 newrl._io = self._io
2788 newrl._io = self._io
2791
2789
2792 for rev in self.revs():
2790 for rev in self.revs():
2793 node = self.node(rev)
2791 node = self.node(rev)
2794 p1, p2 = self.parents(node)
2792 p1, p2 = self.parents(node)
2795
2793
2796 if rev == censorrev:
2794 if rev == censorrev:
2797 newrl.addrawrevision(
2795 newrl.addrawrevision(
2798 tombstone,
2796 tombstone,
2799 tr,
2797 tr,
2800 self.linkrev(censorrev),
2798 self.linkrev(censorrev),
2801 p1,
2799 p1,
2802 p2,
2800 p2,
2803 censornode,
2801 censornode,
2804 REVIDX_ISCENSORED,
2802 REVIDX_ISCENSORED,
2805 )
2803 )
2806
2804
2807 if newrl.deltaparent(rev) != nullrev:
2805 if newrl.deltaparent(rev) != nullrev:
2808 raise error.Abort(
2806 raise error.Abort(
2809 _(
2807 _(
2810 b'censored revision stored as delta; '
2808 b'censored revision stored as delta; '
2811 b'cannot censor'
2809 b'cannot censor'
2812 ),
2810 ),
2813 hint=_(
2811 hint=_(
2814 b'censoring of revlogs is not '
2812 b'censoring of revlogs is not '
2815 b'fully implemented; please report '
2813 b'fully implemented; please report '
2816 b'this bug'
2814 b'this bug'
2817 ),
2815 ),
2818 )
2816 )
2819 continue
2817 continue
2820
2818
2821 if self.iscensored(rev):
2819 if self.iscensored(rev):
2822 if self.deltaparent(rev) != nullrev:
2820 if self.deltaparent(rev) != nullrev:
2823 raise error.Abort(
2821 raise error.Abort(
2824 _(
2822 _(
2825 b'cannot censor due to censored '
2823 b'cannot censor due to censored '
2826 b'revision having delta stored'
2824 b'revision having delta stored'
2827 )
2825 )
2828 )
2826 )
2829 rawtext = self._chunk(rev)
2827 rawtext = self._chunk(rev)
2830 else:
2828 else:
2831 rawtext = self.rawdata(rev)
2829 rawtext = self.rawdata(rev)
2832
2830
2833 newrl.addrawrevision(
2831 newrl.addrawrevision(
2834 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2832 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2835 )
2833 )
2836
2834
2837 tr.addbackup(self.indexfile, location=b'store')
2835 tr.addbackup(self.indexfile, location=b'store')
2838 if not self._inline:
2836 if not self._inline:
2839 tr.addbackup(self.datafile, location=b'store')
2837 tr.addbackup(self.datafile, location=b'store')
2840
2838
2841 self.opener.rename(newrl.indexfile, self.indexfile)
2839 self.opener.rename(newrl.indexfile, self.indexfile)
2842 if not self._inline:
2840 if not self._inline:
2843 self.opener.rename(newrl.datafile, self.datafile)
2841 self.opener.rename(newrl.datafile, self.datafile)
2844
2842
2845 self.clearcaches()
2843 self.clearcaches()
2846 self._loadindex()
2844 self._loadindex()
2847
2845
2848 def verifyintegrity(self, state):
2846 def verifyintegrity(self, state):
2849 """Verifies the integrity of the revlog.
2847 """Verifies the integrity of the revlog.
2850
2848
2851 Yields ``revlogproblem`` instances describing problems that are
2849 Yields ``revlogproblem`` instances describing problems that are
2852 found.
2850 found.
2853 """
2851 """
2854 dd, di = self.checksize()
2852 dd, di = self.checksize()
2855 if dd:
2853 if dd:
2856 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2854 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2857 if di:
2855 if di:
2858 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2856 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2859
2857
2860 version = self.version & 0xFFFF
2858 version = self.version & 0xFFFF
2861
2859
2862 # The verifier tells us what version revlog we should be.
2860 # The verifier tells us what version revlog we should be.
2863 if version != state[b'expectedversion']:
2861 if version != state[b'expectedversion']:
2864 yield revlogproblem(
2862 yield revlogproblem(
2865 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2863 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2866 % (self.indexfile, version, state[b'expectedversion'])
2864 % (self.indexfile, version, state[b'expectedversion'])
2867 )
2865 )
2868
2866
2869 state[b'skipread'] = set()
2867 state[b'skipread'] = set()
2870
2868
2871 for rev in self:
2869 for rev in self:
2872 node = self.node(rev)
2870 node = self.node(rev)
2873
2871
2874 # Verify contents. 4 cases to care about:
2872 # Verify contents. 4 cases to care about:
2875 #
2873 #
2876 # common: the most common case
2874 # common: the most common case
2877 # rename: with a rename
2875 # rename: with a rename
2878 # meta: file content starts with b'\1\n', the metadata
2876 # meta: file content starts with b'\1\n', the metadata
2879 # header defined in filelog.py, but without a rename
2877 # header defined in filelog.py, but without a rename
2880 # ext: content stored externally
2878 # ext: content stored externally
2881 #
2879 #
2882 # More formally, their differences are shown below:
2880 # More formally, their differences are shown below:
2883 #
2881 #
2884 # | common | rename | meta | ext
2882 # | common | rename | meta | ext
2885 # -------------------------------------------------------
2883 # -------------------------------------------------------
2886 # flags() | 0 | 0 | 0 | not 0
2884 # flags() | 0 | 0 | 0 | not 0
2887 # renamed() | False | True | False | ?
2885 # renamed() | False | True | False | ?
2888 # rawtext[0:2]=='\1\n'| False | True | True | ?
2886 # rawtext[0:2]=='\1\n'| False | True | True | ?
2889 #
2887 #
2890 # "rawtext" means the raw text stored in revlog data, which
2888 # "rawtext" means the raw text stored in revlog data, which
2891 # could be retrieved by "rawdata(rev)". "text"
2889 # could be retrieved by "rawdata(rev)". "text"
2892 # mentioned below is "revision(rev)".
2890 # mentioned below is "revision(rev)".
2893 #
2891 #
2894 # There are 3 different lengths stored physically:
2892 # There are 3 different lengths stored physically:
2895 # 1. L1: rawsize, stored in revlog index
2893 # 1. L1: rawsize, stored in revlog index
2896 # 2. L2: len(rawtext), stored in revlog data
2894 # 2. L2: len(rawtext), stored in revlog data
2897 # 3. L3: len(text), stored in revlog data if flags==0, or
2895 # 3. L3: len(text), stored in revlog data if flags==0, or
2898 # possibly somewhere else if flags!=0
2896 # possibly somewhere else if flags!=0
2899 #
2897 #
2900 # L1 should be equal to L2. L3 could be different from them.
2898 # L1 should be equal to L2. L3 could be different from them.
2901 # "text" may or may not affect commit hash depending on flag
2899 # "text" may or may not affect commit hash depending on flag
2902 # processors (see flagutil.addflagprocessor).
2900 # processors (see flagutil.addflagprocessor).
2903 #
2901 #
2904 # | common | rename | meta | ext
2902 # | common | rename | meta | ext
2905 # -------------------------------------------------
2903 # -------------------------------------------------
2906 # rawsize() | L1 | L1 | L1 | L1
2904 # rawsize() | L1 | L1 | L1 | L1
2907 # size() | L1 | L2-LM | L1(*) | L1 (?)
2905 # size() | L1 | L2-LM | L1(*) | L1 (?)
2908 # len(rawtext) | L2 | L2 | L2 | L2
2906 # len(rawtext) | L2 | L2 | L2 | L2
2909 # len(text) | L2 | L2 | L2 | L3
2907 # len(text) | L2 | L2 | L2 | L3
2910 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2908 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2911 #
2909 #
2912 # LM: length of metadata, depending on rawtext
2910 # LM: length of metadata, depending on rawtext
2913 # (*): not ideal, see comment in filelog.size
2911 # (*): not ideal, see comment in filelog.size
2914 # (?): could be "- len(meta)" if the resolved content has
2912 # (?): could be "- len(meta)" if the resolved content has
2915 # rename metadata
2913 # rename metadata
2916 #
2914 #
2917 # Checks needed to be done:
2915 # Checks needed to be done:
2918 # 1. length check: L1 == L2, in all cases.
2916 # 1. length check: L1 == L2, in all cases.
2919 # 2. hash check: depending on flag processor, we may need to
2917 # 2. hash check: depending on flag processor, we may need to
2920 # use either "text" (external), or "rawtext" (in revlog).
2918 # use either "text" (external), or "rawtext" (in revlog).
2921
2919
2922 try:
2920 try:
2923 skipflags = state.get(b'skipflags', 0)
2921 skipflags = state.get(b'skipflags', 0)
2924 if skipflags:
2922 if skipflags:
2925 skipflags &= self.flags(rev)
2923 skipflags &= self.flags(rev)
2926
2924
2927 _verify_revision(self, skipflags, state, node)
2925 _verify_revision(self, skipflags, state, node)
2928
2926
2929 l1 = self.rawsize(rev)
2927 l1 = self.rawsize(rev)
2930 l2 = len(self.rawdata(node))
2928 l2 = len(self.rawdata(node))
2931
2929
2932 if l1 != l2:
2930 if l1 != l2:
2933 yield revlogproblem(
2931 yield revlogproblem(
2934 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2932 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2935 node=node,
2933 node=node,
2936 )
2934 )
2937
2935
2938 except error.CensoredNodeError:
2936 except error.CensoredNodeError:
2939 if state[b'erroroncensored']:
2937 if state[b'erroroncensored']:
2940 yield revlogproblem(
2938 yield revlogproblem(
2941 error=_(b'censored file data'), node=node
2939 error=_(b'censored file data'), node=node
2942 )
2940 )
2943 state[b'skipread'].add(node)
2941 state[b'skipread'].add(node)
2944 except Exception as e:
2942 except Exception as e:
2945 yield revlogproblem(
2943 yield revlogproblem(
2946 error=_(b'unpacking %s: %s')
2944 error=_(b'unpacking %s: %s')
2947 % (short(node), stringutil.forcebytestr(e)),
2945 % (short(node), stringutil.forcebytestr(e)),
2948 node=node,
2946 node=node,
2949 )
2947 )
2950 state[b'skipread'].add(node)
2948 state[b'skipread'].add(node)
2951
2949
2952 def storageinfo(
2950 def storageinfo(
2953 self,
2951 self,
2954 exclusivefiles=False,
2952 exclusivefiles=False,
2955 sharedfiles=False,
2953 sharedfiles=False,
2956 revisionscount=False,
2954 revisionscount=False,
2957 trackedsize=False,
2955 trackedsize=False,
2958 storedsize=False,
2956 storedsize=False,
2959 ):
2957 ):
2960 d = {}
2958 d = {}
2961
2959
2962 if exclusivefiles:
2960 if exclusivefiles:
2963 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2961 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2964 if not self._inline:
2962 if not self._inline:
2965 d[b'exclusivefiles'].append((self.opener, self.datafile))
2963 d[b'exclusivefiles'].append((self.opener, self.datafile))
2966
2964
2967 if sharedfiles:
2965 if sharedfiles:
2968 d[b'sharedfiles'] = []
2966 d[b'sharedfiles'] = []
2969
2967
2970 if revisionscount:
2968 if revisionscount:
2971 d[b'revisionscount'] = len(self)
2969 d[b'revisionscount'] = len(self)
2972
2970
2973 if trackedsize:
2971 if trackedsize:
2974 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2972 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2975
2973
2976 if storedsize:
2974 if storedsize:
2977 d[b'storedsize'] = sum(
2975 d[b'storedsize'] = sum(
2978 self.opener.stat(path).st_size for path in self.files()
2976 self.opener.stat(path).st_size for path in self.files()
2979 )
2977 )
2980
2978
2981 return d
2979 return d
General Comments 0
You need to be logged in to leave comments. Login now