##// END OF EJS Templates
nodemap: only use persistent nodemap for non-inlined revlog...
marmoute -
r44791:daad3aac default
parent child Browse files
Show More
@@ -1,2995 +1,2996 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 nodemap as nodemaputil,
78 nodemap as nodemaputil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109 rustrevlog = policy.importrust('revlog')
109 rustrevlog = policy.importrust('revlog')
110
110
111 # Aliased for performance.
111 # Aliased for performance.
112 _zlibdecompress = zlib.decompress
112 _zlibdecompress = zlib.decompress
113
113
114 # max size of revlog with inline data
114 # max size of revlog with inline data
115 _maxinline = 131072
115 _maxinline = 131072
116 _chunksize = 1048576
116 _chunksize = 1048576
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False, {}
120 return text, False, {}
121
121
122
122
123 def ellipsiswriteprocessor(rl, text, sidedata):
123 def ellipsiswriteprocessor(rl, text, sidedata):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsisrawprocessor(rl, text):
127 def ellipsisrawprocessor(rl, text):
128 return False
128 return False
129
129
130
130
131 ellipsisprocessor = (
131 ellipsisprocessor = (
132 ellipsisreadprocessor,
132 ellipsisreadprocessor,
133 ellipsiswriteprocessor,
133 ellipsiswriteprocessor,
134 ellipsisrawprocessor,
134 ellipsisrawprocessor,
135 )
135 )
136
136
137
137
138 def getoffset(q):
138 def getoffset(q):
139 return int(q >> 16)
139 return int(q >> 16)
140
140
141
141
142 def gettype(q):
142 def gettype(q):
143 return int(q & 0xFFFF)
143 return int(q & 0xFFFF)
144
144
145
145
146 def offset_type(offset, type):
146 def offset_type(offset, type):
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 raise ValueError(b'unknown revlog index flags')
148 raise ValueError(b'unknown revlog index flags')
149 return int(int(offset) << 16 | type)
149 return int(int(offset) << 16 | type)
150
150
151
151
152 def _verify_revision(rl, skipflags, state, node):
152 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
154 point for extensions to influence the operation."""
155 if skipflags:
155 if skipflags:
156 state[b'skipread'].add(node)
156 state[b'skipread'].add(node)
157 else:
157 else:
158 # Side-effect: read content and verify hash.
158 # Side-effect: read content and verify hash.
159 rl.revision(node)
159 rl.revision(node)
160
160
161
161
162 @attr.s(slots=True, frozen=True)
162 @attr.s(slots=True, frozen=True)
163 class _revisioninfo(object):
163 class _revisioninfo(object):
164 """Information about a revision that allows building its fulltext
164 """Information about a revision that allows building its fulltext
165 node: expected hash of the revision
165 node: expected hash of the revision
166 p1, p2: parent revs of the revision
166 p1, p2: parent revs of the revision
167 btext: built text cache consisting of a one-element list
167 btext: built text cache consisting of a one-element list
168 cachedelta: (baserev, uncompressed_delta) or None
168 cachedelta: (baserev, uncompressed_delta) or None
169 flags: flags associated to the revision storage
169 flags: flags associated to the revision storage
170
170
171 One of btext[0] or cachedelta must be set.
171 One of btext[0] or cachedelta must be set.
172 """
172 """
173
173
174 node = attr.ib()
174 node = attr.ib()
175 p1 = attr.ib()
175 p1 = attr.ib()
176 p2 = attr.ib()
176 p2 = attr.ib()
177 btext = attr.ib()
177 btext = attr.ib()
178 textlen = attr.ib()
178 textlen = attr.ib()
179 cachedelta = attr.ib()
179 cachedelta = attr.ib()
180 flags = attr.ib()
180 flags = attr.ib()
181
181
182
182
183 @interfaceutil.implementer(repository.irevisiondelta)
183 @interfaceutil.implementer(repository.irevisiondelta)
184 @attr.s(slots=True)
184 @attr.s(slots=True)
185 class revlogrevisiondelta(object):
185 class revlogrevisiondelta(object):
186 node = attr.ib()
186 node = attr.ib()
187 p1node = attr.ib()
187 p1node = attr.ib()
188 p2node = attr.ib()
188 p2node = attr.ib()
189 basenode = attr.ib()
189 basenode = attr.ib()
190 flags = attr.ib()
190 flags = attr.ib()
191 baserevisionsize = attr.ib()
191 baserevisionsize = attr.ib()
192 revision = attr.ib()
192 revision = attr.ib()
193 delta = attr.ib()
193 delta = attr.ib()
194 linknode = attr.ib(default=None)
194 linknode = attr.ib(default=None)
195
195
196
196
197 @interfaceutil.implementer(repository.iverifyproblem)
197 @interfaceutil.implementer(repository.iverifyproblem)
198 @attr.s(frozen=True)
198 @attr.s(frozen=True)
199 class revlogproblem(object):
199 class revlogproblem(object):
200 warning = attr.ib(default=None)
200 warning = attr.ib(default=None)
201 error = attr.ib(default=None)
201 error = attr.ib(default=None)
202 node = attr.ib(default=None)
202 node = attr.ib(default=None)
203
203
204
204
205 # index v0:
205 # index v0:
206 # 4 bytes: offset
206 # 4 bytes: offset
207 # 4 bytes: compressed length
207 # 4 bytes: compressed length
208 # 4 bytes: base rev
208 # 4 bytes: base rev
209 # 4 bytes: link rev
209 # 4 bytes: link rev
210 # 20 bytes: parent 1 nodeid
210 # 20 bytes: parent 1 nodeid
211 # 20 bytes: parent 2 nodeid
211 # 20 bytes: parent 2 nodeid
212 # 20 bytes: nodeid
212 # 20 bytes: nodeid
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 indexformatv0_pack = indexformatv0.pack
214 indexformatv0_pack = indexformatv0.pack
215 indexformatv0_unpack = indexformatv0.unpack
215 indexformatv0_unpack = indexformatv0.unpack
216
216
217
217
218 class revlogoldindex(list):
218 class revlogoldindex(list):
219 @property
219 @property
220 def nodemap(self):
220 def nodemap(self):
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 return self._nodemap
223 return self._nodemap
224
224
225 @util.propertycache
225 @util.propertycache
226 def _nodemap(self):
226 def _nodemap(self):
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 for r in range(0, len(self)):
228 for r in range(0, len(self)):
229 n = self[r][7]
229 n = self[r][7]
230 nodemap[n] = r
230 nodemap[n] = r
231 return nodemap
231 return nodemap
232
232
233 def has_node(self, node):
233 def has_node(self, node):
234 """return True if the node exist in the index"""
234 """return True if the node exist in the index"""
235 return node in self._nodemap
235 return node in self._nodemap
236
236
237 def rev(self, node):
237 def rev(self, node):
238 """return a revision for a node
238 """return a revision for a node
239
239
240 If the node is unknown, raise a RevlogError"""
240 If the node is unknown, raise a RevlogError"""
241 return self._nodemap[node]
241 return self._nodemap[node]
242
242
243 def get_rev(self, node):
243 def get_rev(self, node):
244 """return a revision for a node
244 """return a revision for a node
245
245
246 If the node is unknown, return None"""
246 If the node is unknown, return None"""
247 return self._nodemap.get(node)
247 return self._nodemap.get(node)
248
248
249 def append(self, tup):
249 def append(self, tup):
250 self._nodemap[tup[7]] = len(self)
250 self._nodemap[tup[7]] = len(self)
251 super(revlogoldindex, self).append(tup)
251 super(revlogoldindex, self).append(tup)
252
252
253 def __delitem__(self, i):
253 def __delitem__(self, i):
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 for r in pycompat.xrange(i.start, len(self)):
256 for r in pycompat.xrange(i.start, len(self)):
257 del self._nodemap[self[r][7]]
257 del self._nodemap[self[r][7]]
258 super(revlogoldindex, self).__delitem__(i)
258 super(revlogoldindex, self).__delitem__(i)
259
259
260 def clearcaches(self):
260 def clearcaches(self):
261 self.__dict__.pop('_nodemap', None)
261 self.__dict__.pop('_nodemap', None)
262
262
263 def __getitem__(self, i):
263 def __getitem__(self, i):
264 if i == -1:
264 if i == -1:
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 return list.__getitem__(self, i)
266 return list.__getitem__(self, i)
267
267
268
268
269 class revlogoldio(object):
269 class revlogoldio(object):
270 def __init__(self):
270 def __init__(self):
271 self.size = indexformatv0.size
271 self.size = indexformatv0.size
272
272
273 def parseindex(self, data, inline):
273 def parseindex(self, data, inline):
274 s = self.size
274 s = self.size
275 index = []
275 index = []
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 n = off = 0
277 n = off = 0
278 l = len(data)
278 l = len(data)
279 while off + s <= l:
279 while off + s <= l:
280 cur = data[off : off + s]
280 cur = data[off : off + s]
281 off += s
281 off += s
282 e = indexformatv0_unpack(cur)
282 e = indexformatv0_unpack(cur)
283 # transform to revlogv1 format
283 # transform to revlogv1 format
284 e2 = (
284 e2 = (
285 offset_type(e[0], 0),
285 offset_type(e[0], 0),
286 e[1],
286 e[1],
287 -1,
287 -1,
288 e[2],
288 e[2],
289 e[3],
289 e[3],
290 nodemap.get(e[4], nullrev),
290 nodemap.get(e[4], nullrev),
291 nodemap.get(e[5], nullrev),
291 nodemap.get(e[5], nullrev),
292 e[6],
292 e[6],
293 )
293 )
294 index.append(e2)
294 index.append(e2)
295 nodemap[e[6]] = n
295 nodemap[e[6]] = n
296 n += 1
296 n += 1
297
297
298 index = revlogoldindex(index)
298 index = revlogoldindex(index)
299 return index, None
299 return index, None
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 if gettype(entry[0]):
302 if gettype(entry[0]):
303 raise error.RevlogError(
303 raise error.RevlogError(
304 _(b'index entry flags need revlog version 1')
304 _(b'index entry flags need revlog version 1')
305 )
305 )
306 e2 = (
306 e2 = (
307 getoffset(entry[0]),
307 getoffset(entry[0]),
308 entry[1],
308 entry[1],
309 entry[3],
309 entry[3],
310 entry[4],
310 entry[4],
311 node(entry[5]),
311 node(entry[5]),
312 node(entry[6]),
312 node(entry[6]),
313 entry[7],
313 entry[7],
314 )
314 )
315 return indexformatv0_pack(*e2)
315 return indexformatv0_pack(*e2)
316
316
317
317
318 # index ng:
318 # index ng:
319 # 6 bytes: offset
319 # 6 bytes: offset
320 # 2 bytes: flags
320 # 2 bytes: flags
321 # 4 bytes: compressed length
321 # 4 bytes: compressed length
322 # 4 bytes: uncompressed length
322 # 4 bytes: uncompressed length
323 # 4 bytes: base rev
323 # 4 bytes: base rev
324 # 4 bytes: link rev
324 # 4 bytes: link rev
325 # 4 bytes: parent 1 rev
325 # 4 bytes: parent 1 rev
326 # 4 bytes: parent 2 rev
326 # 4 bytes: parent 2 rev
327 # 32 bytes: nodeid
327 # 32 bytes: nodeid
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 indexformatng_pack = indexformatng.pack
329 indexformatng_pack = indexformatng.pack
330 versionformat = struct.Struct(b">I")
330 versionformat = struct.Struct(b">I")
331 versionformat_pack = versionformat.pack
331 versionformat_pack = versionformat.pack
332 versionformat_unpack = versionformat.unpack
332 versionformat_unpack = versionformat.unpack
333
333
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 # signed integer)
335 # signed integer)
336 _maxentrysize = 0x7FFFFFFF
336 _maxentrysize = 0x7FFFFFFF
337
337
338
338
339 class revlogio(object):
339 class revlogio(object):
340 def __init__(self):
340 def __init__(self):
341 self.size = indexformatng.size
341 self.size = indexformatng.size
342
342
343 def parseindex(self, data, inline):
343 def parseindex(self, data, inline):
344 # call the C implementation to parse the index data
344 # call the C implementation to parse the index data
345 index, cache = parsers.parse_index2(data, inline)
345 index, cache = parsers.parse_index2(data, inline)
346 return index, cache
346 return index, cache
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 p = indexformatng_pack(*entry)
349 p = indexformatng_pack(*entry)
350 if rev == 0:
350 if rev == 0:
351 p = versionformat_pack(version) + p[4:]
351 p = versionformat_pack(version) + p[4:]
352 return p
352 return p
353
353
354
354
355 class rustrevlogio(revlogio):
355 class rustrevlogio(revlogio):
356 def parseindex(self, data, inline):
356 def parseindex(self, data, inline):
357 index, cache = super(rustrevlogio, self).parseindex(data, inline)
357 index, cache = super(rustrevlogio, self).parseindex(data, inline)
358 return rustrevlog.MixedIndex(index), cache
358 return rustrevlog.MixedIndex(index), cache
359
359
360
360
361 class revlog(object):
361 class revlog(object):
362 """
362 """
363 the underlying revision storage object
363 the underlying revision storage object
364
364
365 A revlog consists of two parts, an index and the revision data.
365 A revlog consists of two parts, an index and the revision data.
366
366
367 The index is a file with a fixed record size containing
367 The index is a file with a fixed record size containing
368 information on each revision, including its nodeid (hash), the
368 information on each revision, including its nodeid (hash), the
369 nodeids of its parents, the position and offset of its data within
369 nodeids of its parents, the position and offset of its data within
370 the data file, and the revision it's based on. Finally, each entry
370 the data file, and the revision it's based on. Finally, each entry
371 contains a linkrev entry that can serve as a pointer to external
371 contains a linkrev entry that can serve as a pointer to external
372 data.
372 data.
373
373
374 The revision data itself is a linear collection of data chunks.
374 The revision data itself is a linear collection of data chunks.
375 Each chunk represents a revision and is usually represented as a
375 Each chunk represents a revision and is usually represented as a
376 delta against the previous chunk. To bound lookup time, runs of
376 delta against the previous chunk. To bound lookup time, runs of
377 deltas are limited to about 2 times the length of the original
377 deltas are limited to about 2 times the length of the original
378 version data. This makes retrieval of a version proportional to
378 version data. This makes retrieval of a version proportional to
379 its size, or O(1) relative to the number of revisions.
379 its size, or O(1) relative to the number of revisions.
380
380
381 Both pieces of the revlog are written to in an append-only
381 Both pieces of the revlog are written to in an append-only
382 fashion, which means we never need to rewrite a file to insert or
382 fashion, which means we never need to rewrite a file to insert or
383 remove data, and can use some simple techniques to avoid the need
383 remove data, and can use some simple techniques to avoid the need
384 for locking while reading.
384 for locking while reading.
385
385
386 If checkambig, indexfile is opened with checkambig=True at
386 If checkambig, indexfile is opened with checkambig=True at
387 writing, to avoid file stat ambiguity.
387 writing, to avoid file stat ambiguity.
388
388
389 If mmaplargeindex is True, and an mmapindexthreshold is set, the
389 If mmaplargeindex is True, and an mmapindexthreshold is set, the
390 index will be mmapped rather than read if it is larger than the
390 index will be mmapped rather than read if it is larger than the
391 configured threshold.
391 configured threshold.
392
392
393 If censorable is True, the revlog can have censored revisions.
393 If censorable is True, the revlog can have censored revisions.
394
394
395 If `upperboundcomp` is not None, this is the expected maximal gain from
395 If `upperboundcomp` is not None, this is the expected maximal gain from
396 compression for the data content.
396 compression for the data content.
397 """
397 """
398
398
399 _flagserrorclass = error.RevlogError
399 _flagserrorclass = error.RevlogError
400
400
401 def __init__(
401 def __init__(
402 self,
402 self,
403 opener,
403 opener,
404 indexfile,
404 indexfile,
405 datafile=None,
405 datafile=None,
406 checkambig=False,
406 checkambig=False,
407 mmaplargeindex=False,
407 mmaplargeindex=False,
408 censorable=False,
408 censorable=False,
409 upperboundcomp=None,
409 upperboundcomp=None,
410 persistentnodemap=False,
410 persistentnodemap=False,
411 ):
411 ):
412 """
412 """
413 create a revlog object
413 create a revlog object
414
414
415 opener is a function that abstracts the file opening operation
415 opener is a function that abstracts the file opening operation
416 and can be used to implement COW semantics or the like.
416 and can be used to implement COW semantics or the like.
417
417
418 """
418 """
419 self.upperboundcomp = upperboundcomp
419 self.upperboundcomp = upperboundcomp
420 self.indexfile = indexfile
420 self.indexfile = indexfile
421 self.datafile = datafile or (indexfile[:-2] + b".d")
421 self.datafile = datafile or (indexfile[:-2] + b".d")
422 self.nodemap_file = None
422 self.nodemap_file = None
423 if persistentnodemap:
423 if persistentnodemap:
424 self.nodemap_file = indexfile[:-2] + b".n"
424 self.nodemap_file = indexfile[:-2] + b".n"
425
425
426 self.opener = opener
426 self.opener = opener
427 # When True, indexfile is opened with checkambig=True at writing, to
427 # When True, indexfile is opened with checkambig=True at writing, to
428 # avoid file stat ambiguity.
428 # avoid file stat ambiguity.
429 self._checkambig = checkambig
429 self._checkambig = checkambig
430 self._mmaplargeindex = mmaplargeindex
430 self._mmaplargeindex = mmaplargeindex
431 self._censorable = censorable
431 self._censorable = censorable
432 # 3-tuple of (node, rev, text) for a raw revision.
432 # 3-tuple of (node, rev, text) for a raw revision.
433 self._revisioncache = None
433 self._revisioncache = None
434 # Maps rev to chain base rev.
434 # Maps rev to chain base rev.
435 self._chainbasecache = util.lrucachedict(100)
435 self._chainbasecache = util.lrucachedict(100)
436 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
436 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
437 self._chunkcache = (0, b'')
437 self._chunkcache = (0, b'')
438 # How much data to read and cache into the raw revlog data cache.
438 # How much data to read and cache into the raw revlog data cache.
439 self._chunkcachesize = 65536
439 self._chunkcachesize = 65536
440 self._maxchainlen = None
440 self._maxchainlen = None
441 self._deltabothparents = True
441 self._deltabothparents = True
442 self.index = None
442 self.index = None
443 # Mapping of partial identifiers to full nodes.
443 # Mapping of partial identifiers to full nodes.
444 self._pcache = {}
444 self._pcache = {}
445 # Mapping of revision integer to full node.
445 # Mapping of revision integer to full node.
446 self._compengine = b'zlib'
446 self._compengine = b'zlib'
447 self._compengineopts = {}
447 self._compengineopts = {}
448 self._maxdeltachainspan = -1
448 self._maxdeltachainspan = -1
449 self._withsparseread = False
449 self._withsparseread = False
450 self._sparserevlog = False
450 self._sparserevlog = False
451 self._srdensitythreshold = 0.50
451 self._srdensitythreshold = 0.50
452 self._srmingapsize = 262144
452 self._srmingapsize = 262144
453
453
454 # Make copy of flag processors so each revlog instance can support
454 # Make copy of flag processors so each revlog instance can support
455 # custom flags.
455 # custom flags.
456 self._flagprocessors = dict(flagutil.flagprocessors)
456 self._flagprocessors = dict(flagutil.flagprocessors)
457
457
458 # 2-tuple of file handles being used for active writing.
458 # 2-tuple of file handles being used for active writing.
459 self._writinghandles = None
459 self._writinghandles = None
460
460
461 self._loadindex()
461 self._loadindex()
462
462
463 def _loadindex(self):
463 def _loadindex(self):
464 mmapindexthreshold = None
464 mmapindexthreshold = None
465 opts = self.opener.options
465 opts = self.opener.options
466
466
467 if b'revlogv2' in opts:
467 if b'revlogv2' in opts:
468 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
468 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
469 elif b'revlogv1' in opts:
469 elif b'revlogv1' in opts:
470 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
470 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
471 if b'generaldelta' in opts:
471 if b'generaldelta' in opts:
472 newversionflags |= FLAG_GENERALDELTA
472 newversionflags |= FLAG_GENERALDELTA
473 elif b'revlogv0' in self.opener.options:
473 elif b'revlogv0' in self.opener.options:
474 newversionflags = REVLOGV0
474 newversionflags = REVLOGV0
475 else:
475 else:
476 newversionflags = REVLOG_DEFAULT_VERSION
476 newversionflags = REVLOG_DEFAULT_VERSION
477
477
478 if b'chunkcachesize' in opts:
478 if b'chunkcachesize' in opts:
479 self._chunkcachesize = opts[b'chunkcachesize']
479 self._chunkcachesize = opts[b'chunkcachesize']
480 if b'maxchainlen' in opts:
480 if b'maxchainlen' in opts:
481 self._maxchainlen = opts[b'maxchainlen']
481 self._maxchainlen = opts[b'maxchainlen']
482 if b'deltabothparents' in opts:
482 if b'deltabothparents' in opts:
483 self._deltabothparents = opts[b'deltabothparents']
483 self._deltabothparents = opts[b'deltabothparents']
484 self._lazydelta = bool(opts.get(b'lazydelta', True))
484 self._lazydelta = bool(opts.get(b'lazydelta', True))
485 self._lazydeltabase = False
485 self._lazydeltabase = False
486 if self._lazydelta:
486 if self._lazydelta:
487 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
487 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
488 if b'compengine' in opts:
488 if b'compengine' in opts:
489 self._compengine = opts[b'compengine']
489 self._compengine = opts[b'compengine']
490 if b'zlib.level' in opts:
490 if b'zlib.level' in opts:
491 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
491 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
492 if b'zstd.level' in opts:
492 if b'zstd.level' in opts:
493 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
493 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
494 if b'maxdeltachainspan' in opts:
494 if b'maxdeltachainspan' in opts:
495 self._maxdeltachainspan = opts[b'maxdeltachainspan']
495 self._maxdeltachainspan = opts[b'maxdeltachainspan']
496 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
496 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
497 mmapindexthreshold = opts[b'mmapindexthreshold']
497 mmapindexthreshold = opts[b'mmapindexthreshold']
498 self.hassidedata = bool(opts.get(b'side-data', False))
498 self.hassidedata = bool(opts.get(b'side-data', False))
499 if self.hassidedata:
499 if self.hassidedata:
500 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
500 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
501 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
501 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
502 withsparseread = bool(opts.get(b'with-sparse-read', False))
502 withsparseread = bool(opts.get(b'with-sparse-read', False))
503 # sparse-revlog forces sparse-read
503 # sparse-revlog forces sparse-read
504 self._withsparseread = self._sparserevlog or withsparseread
504 self._withsparseread = self._sparserevlog or withsparseread
505 if b'sparse-read-density-threshold' in opts:
505 if b'sparse-read-density-threshold' in opts:
506 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
506 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
507 if b'sparse-read-min-gap-size' in opts:
507 if b'sparse-read-min-gap-size' in opts:
508 self._srmingapsize = opts[b'sparse-read-min-gap-size']
508 self._srmingapsize = opts[b'sparse-read-min-gap-size']
509 if opts.get(b'enableellipsis'):
509 if opts.get(b'enableellipsis'):
510 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
510 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
511
511
512 # revlog v0 doesn't have flag processors
512 # revlog v0 doesn't have flag processors
513 for flag, processor in pycompat.iteritems(
513 for flag, processor in pycompat.iteritems(
514 opts.get(b'flagprocessors', {})
514 opts.get(b'flagprocessors', {})
515 ):
515 ):
516 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
516 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
517
517
518 if self._chunkcachesize <= 0:
518 if self._chunkcachesize <= 0:
519 raise error.RevlogError(
519 raise error.RevlogError(
520 _(b'revlog chunk cache size %r is not greater than 0')
520 _(b'revlog chunk cache size %r is not greater than 0')
521 % self._chunkcachesize
521 % self._chunkcachesize
522 )
522 )
523 elif self._chunkcachesize & (self._chunkcachesize - 1):
523 elif self._chunkcachesize & (self._chunkcachesize - 1):
524 raise error.RevlogError(
524 raise error.RevlogError(
525 _(b'revlog chunk cache size %r is not a power of 2')
525 _(b'revlog chunk cache size %r is not a power of 2')
526 % self._chunkcachesize
526 % self._chunkcachesize
527 )
527 )
528
528
529 indexdata = b''
529 indexdata = b''
530 self._initempty = True
530 self._initempty = True
531 try:
531 try:
532 with self._indexfp() as f:
532 with self._indexfp() as f:
533 if (
533 if (
534 mmapindexthreshold is not None
534 mmapindexthreshold is not None
535 and self.opener.fstat(f).st_size >= mmapindexthreshold
535 and self.opener.fstat(f).st_size >= mmapindexthreshold
536 ):
536 ):
537 # TODO: should .close() to release resources without
537 # TODO: should .close() to release resources without
538 # relying on Python GC
538 # relying on Python GC
539 indexdata = util.buffer(util.mmapread(f))
539 indexdata = util.buffer(util.mmapread(f))
540 else:
540 else:
541 indexdata = f.read()
541 indexdata = f.read()
542 if len(indexdata) > 0:
542 if len(indexdata) > 0:
543 versionflags = versionformat_unpack(indexdata[:4])[0]
543 versionflags = versionformat_unpack(indexdata[:4])[0]
544 self._initempty = False
544 self._initempty = False
545 else:
545 else:
546 versionflags = newversionflags
546 versionflags = newversionflags
547 except IOError as inst:
547 except IOError as inst:
548 if inst.errno != errno.ENOENT:
548 if inst.errno != errno.ENOENT:
549 raise
549 raise
550
550
551 versionflags = newversionflags
551 versionflags = newversionflags
552
552
553 self.version = versionflags
553 self.version = versionflags
554
554
555 flags = versionflags & ~0xFFFF
555 flags = versionflags & ~0xFFFF
556 fmt = versionflags & 0xFFFF
556 fmt = versionflags & 0xFFFF
557
557
558 if fmt == REVLOGV0:
558 if fmt == REVLOGV0:
559 if flags:
559 if flags:
560 raise error.RevlogError(
560 raise error.RevlogError(
561 _(b'unknown flags (%#04x) in version %d revlog %s')
561 _(b'unknown flags (%#04x) in version %d revlog %s')
562 % (flags >> 16, fmt, self.indexfile)
562 % (flags >> 16, fmt, self.indexfile)
563 )
563 )
564
564
565 self._inline = False
565 self._inline = False
566 self._generaldelta = False
566 self._generaldelta = False
567
567
568 elif fmt == REVLOGV1:
568 elif fmt == REVLOGV1:
569 if flags & ~REVLOGV1_FLAGS:
569 if flags & ~REVLOGV1_FLAGS:
570 raise error.RevlogError(
570 raise error.RevlogError(
571 _(b'unknown flags (%#04x) in version %d revlog %s')
571 _(b'unknown flags (%#04x) in version %d revlog %s')
572 % (flags >> 16, fmt, self.indexfile)
572 % (flags >> 16, fmt, self.indexfile)
573 )
573 )
574
574
575 self._inline = versionflags & FLAG_INLINE_DATA
575 self._inline = versionflags & FLAG_INLINE_DATA
576 self._generaldelta = versionflags & FLAG_GENERALDELTA
576 self._generaldelta = versionflags & FLAG_GENERALDELTA
577
577
578 elif fmt == REVLOGV2:
578 elif fmt == REVLOGV2:
579 if flags & ~REVLOGV2_FLAGS:
579 if flags & ~REVLOGV2_FLAGS:
580 raise error.RevlogError(
580 raise error.RevlogError(
581 _(b'unknown flags (%#04x) in version %d revlog %s')
581 _(b'unknown flags (%#04x) in version %d revlog %s')
582 % (flags >> 16, fmt, self.indexfile)
582 % (flags >> 16, fmt, self.indexfile)
583 )
583 )
584
584
585 self._inline = versionflags & FLAG_INLINE_DATA
585 self._inline = versionflags & FLAG_INLINE_DATA
586 # generaldelta implied by version 2 revlogs.
586 # generaldelta implied by version 2 revlogs.
587 self._generaldelta = True
587 self._generaldelta = True
588
588
589 else:
589 else:
590 raise error.RevlogError(
590 raise error.RevlogError(
591 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
591 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
592 )
592 )
593 # sparse-revlog can't be on without general-delta (issue6056)
593 # sparse-revlog can't be on without general-delta (issue6056)
594 if not self._generaldelta:
594 if not self._generaldelta:
595 self._sparserevlog = False
595 self._sparserevlog = False
596
596
597 self._storedeltachains = True
597 self._storedeltachains = True
598
598
599 self._io = revlogio()
599 self._io = revlogio()
600 if self.version == REVLOGV0:
600 if self.version == REVLOGV0:
601 self._io = revlogoldio()
601 self._io = revlogoldio()
602 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
602 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
603 self._io = rustrevlogio()
603 self._io = rustrevlogio()
604 try:
604 try:
605 d = self._io.parseindex(indexdata, self._inline)
605 d = self._io.parseindex(indexdata, self._inline)
606 except (ValueError, IndexError):
606 except (ValueError, IndexError):
607 raise error.RevlogError(
607 raise error.RevlogError(
608 _(b"index %s is corrupted") % self.indexfile
608 _(b"index %s is corrupted") % self.indexfile
609 )
609 )
610 self.index, self._chunkcache = d
610 self.index, self._chunkcache = d
611 if not self._chunkcache:
611 if not self._chunkcache:
612 self._chunkclear()
612 self._chunkclear()
613 # revnum -> (chain-length, sum-delta-length)
613 # revnum -> (chain-length, sum-delta-length)
614 self._chaininfocache = {}
614 self._chaininfocache = {}
615 # revlog header -> revlog compressor
615 # revlog header -> revlog compressor
616 self._decompressors = {}
616 self._decompressors = {}
617
617
618 @util.propertycache
618 @util.propertycache
619 def _compressor(self):
619 def _compressor(self):
620 engine = util.compengines[self._compengine]
620 engine = util.compengines[self._compengine]
621 return engine.revlogcompressor(self._compengineopts)
621 return engine.revlogcompressor(self._compengineopts)
622
622
623 def _indexfp(self, mode=b'r'):
623 def _indexfp(self, mode=b'r'):
624 """file object for the revlog's index file"""
624 """file object for the revlog's index file"""
625 args = {'mode': mode}
625 args = {'mode': mode}
626 if mode != b'r':
626 if mode != b'r':
627 args['checkambig'] = self._checkambig
627 args['checkambig'] = self._checkambig
628 if mode == b'w':
628 if mode == b'w':
629 args['atomictemp'] = True
629 args['atomictemp'] = True
630 return self.opener(self.indexfile, **args)
630 return self.opener(self.indexfile, **args)
631
631
632 def _datafp(self, mode=b'r'):
632 def _datafp(self, mode=b'r'):
633 """file object for the revlog's data file"""
633 """file object for the revlog's data file"""
634 return self.opener(self.datafile, mode=mode)
634 return self.opener(self.datafile, mode=mode)
635
635
636 @contextlib.contextmanager
636 @contextlib.contextmanager
637 def _datareadfp(self, existingfp=None):
637 def _datareadfp(self, existingfp=None):
638 """file object suitable to read data"""
638 """file object suitable to read data"""
639 # Use explicit file handle, if given.
639 # Use explicit file handle, if given.
640 if existingfp is not None:
640 if existingfp is not None:
641 yield existingfp
641 yield existingfp
642
642
643 # Use a file handle being actively used for writes, if available.
643 # Use a file handle being actively used for writes, if available.
644 # There is some danger to doing this because reads will seek the
644 # There is some danger to doing this because reads will seek the
645 # file. However, _writeentry() performs a SEEK_END before all writes,
645 # file. However, _writeentry() performs a SEEK_END before all writes,
646 # so we should be safe.
646 # so we should be safe.
647 elif self._writinghandles:
647 elif self._writinghandles:
648 if self._inline:
648 if self._inline:
649 yield self._writinghandles[0]
649 yield self._writinghandles[0]
650 else:
650 else:
651 yield self._writinghandles[1]
651 yield self._writinghandles[1]
652
652
653 # Otherwise open a new file handle.
653 # Otherwise open a new file handle.
654 else:
654 else:
655 if self._inline:
655 if self._inline:
656 func = self._indexfp
656 func = self._indexfp
657 else:
657 else:
658 func = self._datafp
658 func = self._datafp
659 with func() as fp:
659 with func() as fp:
660 yield fp
660 yield fp
661
661
662 def tiprev(self):
662 def tiprev(self):
663 return len(self.index) - 1
663 return len(self.index) - 1
664
664
665 def tip(self):
665 def tip(self):
666 return self.node(self.tiprev())
666 return self.node(self.tiprev())
667
667
668 def __contains__(self, rev):
668 def __contains__(self, rev):
669 return 0 <= rev < len(self)
669 return 0 <= rev < len(self)
670
670
671 def __len__(self):
671 def __len__(self):
672 return len(self.index)
672 return len(self.index)
673
673
674 def __iter__(self):
674 def __iter__(self):
675 return iter(pycompat.xrange(len(self)))
675 return iter(pycompat.xrange(len(self)))
676
676
677 def revs(self, start=0, stop=None):
677 def revs(self, start=0, stop=None):
678 """iterate over all rev in this revlog (from start to stop)"""
678 """iterate over all rev in this revlog (from start to stop)"""
679 return storageutil.iterrevs(len(self), start=start, stop=stop)
679 return storageutil.iterrevs(len(self), start=start, stop=stop)
680
680
681 @property
681 @property
682 def nodemap(self):
682 def nodemap(self):
683 msg = (
683 msg = (
684 b"revlog.nodemap is deprecated, "
684 b"revlog.nodemap is deprecated, "
685 b"use revlog.index.[has_node|rev|get_rev]"
685 b"use revlog.index.[has_node|rev|get_rev]"
686 )
686 )
687 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
687 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
688 return self.index.nodemap
688 return self.index.nodemap
689
689
690 @property
690 @property
691 def _nodecache(self):
691 def _nodecache(self):
692 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
692 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
693 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
693 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
694 return self.index.nodemap
694 return self.index.nodemap
695
695
696 def hasnode(self, node):
696 def hasnode(self, node):
697 try:
697 try:
698 self.rev(node)
698 self.rev(node)
699 return True
699 return True
700 except KeyError:
700 except KeyError:
701 return False
701 return False
702
702
703 def candelta(self, baserev, rev):
703 def candelta(self, baserev, rev):
704 """whether two revisions (baserev, rev) can be delta-ed or not"""
704 """whether two revisions (baserev, rev) can be delta-ed or not"""
705 # Disable delta if either rev requires a content-changing flag
705 # Disable delta if either rev requires a content-changing flag
706 # processor (ex. LFS). This is because such flag processor can alter
706 # processor (ex. LFS). This is because such flag processor can alter
707 # the rawtext content that the delta will be based on, and two clients
707 # the rawtext content that the delta will be based on, and two clients
708 # could have a same revlog node with different flags (i.e. different
708 # could have a same revlog node with different flags (i.e. different
709 # rawtext contents) and the delta could be incompatible.
709 # rawtext contents) and the delta could be incompatible.
710 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
710 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
711 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
711 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
712 ):
712 ):
713 return False
713 return False
714 return True
714 return True
715
715
716 def clearcaches(self):
716 def clearcaches(self):
717 self._revisioncache = None
717 self._revisioncache = None
718 self._chainbasecache.clear()
718 self._chainbasecache.clear()
719 self._chunkcache = (0, b'')
719 self._chunkcache = (0, b'')
720 self._pcache = {}
720 self._pcache = {}
721 self.index.clearcaches()
721 self.index.clearcaches()
722
722
723 def rev(self, node):
723 def rev(self, node):
724 try:
724 try:
725 return self.index.rev(node)
725 return self.index.rev(node)
726 except TypeError:
726 except TypeError:
727 raise
727 raise
728 except error.RevlogError:
728 except error.RevlogError:
729 # parsers.c radix tree lookup failed
729 # parsers.c radix tree lookup failed
730 if node == wdirid or node in wdirfilenodeids:
730 if node == wdirid or node in wdirfilenodeids:
731 raise error.WdirUnsupported
731 raise error.WdirUnsupported
732 raise error.LookupError(node, self.indexfile, _(b'no node'))
732 raise error.LookupError(node, self.indexfile, _(b'no node'))
733
733
734 # Accessors for index entries.
734 # Accessors for index entries.
735
735
736 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
736 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
737 # are flags.
737 # are flags.
738 def start(self, rev):
738 def start(self, rev):
739 return int(self.index[rev][0] >> 16)
739 return int(self.index[rev][0] >> 16)
740
740
741 def flags(self, rev):
741 def flags(self, rev):
742 return self.index[rev][0] & 0xFFFF
742 return self.index[rev][0] & 0xFFFF
743
743
744 def length(self, rev):
744 def length(self, rev):
745 return self.index[rev][1]
745 return self.index[rev][1]
746
746
747 def rawsize(self, rev):
747 def rawsize(self, rev):
748 """return the length of the uncompressed text for a given revision"""
748 """return the length of the uncompressed text for a given revision"""
749 l = self.index[rev][2]
749 l = self.index[rev][2]
750 if l >= 0:
750 if l >= 0:
751 return l
751 return l
752
752
753 t = self.rawdata(rev)
753 t = self.rawdata(rev)
754 return len(t)
754 return len(t)
755
755
756 def size(self, rev):
756 def size(self, rev):
757 """length of non-raw text (processed by a "read" flag processor)"""
757 """length of non-raw text (processed by a "read" flag processor)"""
758 # fast path: if no "read" flag processor could change the content,
758 # fast path: if no "read" flag processor could change the content,
759 # size is rawsize. note: ELLIPSIS is known to not change the content.
759 # size is rawsize. note: ELLIPSIS is known to not change the content.
760 flags = self.flags(rev)
760 flags = self.flags(rev)
761 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
761 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
762 return self.rawsize(rev)
762 return self.rawsize(rev)
763
763
764 return len(self.revision(rev, raw=False))
764 return len(self.revision(rev, raw=False))
765
765
766 def chainbase(self, rev):
766 def chainbase(self, rev):
767 base = self._chainbasecache.get(rev)
767 base = self._chainbasecache.get(rev)
768 if base is not None:
768 if base is not None:
769 return base
769 return base
770
770
771 index = self.index
771 index = self.index
772 iterrev = rev
772 iterrev = rev
773 base = index[iterrev][3]
773 base = index[iterrev][3]
774 while base != iterrev:
774 while base != iterrev:
775 iterrev = base
775 iterrev = base
776 base = index[iterrev][3]
776 base = index[iterrev][3]
777
777
778 self._chainbasecache[rev] = base
778 self._chainbasecache[rev] = base
779 return base
779 return base
780
780
781 def linkrev(self, rev):
781 def linkrev(self, rev):
782 return self.index[rev][4]
782 return self.index[rev][4]
783
783
784 def parentrevs(self, rev):
784 def parentrevs(self, rev):
785 try:
785 try:
786 entry = self.index[rev]
786 entry = self.index[rev]
787 except IndexError:
787 except IndexError:
788 if rev == wdirrev:
788 if rev == wdirrev:
789 raise error.WdirUnsupported
789 raise error.WdirUnsupported
790 raise
790 raise
791
791
792 return entry[5], entry[6]
792 return entry[5], entry[6]
793
793
794 # fast parentrevs(rev) where rev isn't filtered
794 # fast parentrevs(rev) where rev isn't filtered
795 _uncheckedparentrevs = parentrevs
795 _uncheckedparentrevs = parentrevs
796
796
797 def node(self, rev):
797 def node(self, rev):
798 try:
798 try:
799 return self.index[rev][7]
799 return self.index[rev][7]
800 except IndexError:
800 except IndexError:
801 if rev == wdirrev:
801 if rev == wdirrev:
802 raise error.WdirUnsupported
802 raise error.WdirUnsupported
803 raise
803 raise
804
804
805 # Derived from index values.
805 # Derived from index values.
806
806
807 def end(self, rev):
807 def end(self, rev):
808 return self.start(rev) + self.length(rev)
808 return self.start(rev) + self.length(rev)
809
809
810 def parents(self, node):
810 def parents(self, node):
811 i = self.index
811 i = self.index
812 d = i[self.rev(node)]
812 d = i[self.rev(node)]
813 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
813 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
814
814
815 def chainlen(self, rev):
815 def chainlen(self, rev):
816 return self._chaininfo(rev)[0]
816 return self._chaininfo(rev)[0]
817
817
818 def _chaininfo(self, rev):
818 def _chaininfo(self, rev):
819 chaininfocache = self._chaininfocache
819 chaininfocache = self._chaininfocache
820 if rev in chaininfocache:
820 if rev in chaininfocache:
821 return chaininfocache[rev]
821 return chaininfocache[rev]
822 index = self.index
822 index = self.index
823 generaldelta = self._generaldelta
823 generaldelta = self._generaldelta
824 iterrev = rev
824 iterrev = rev
825 e = index[iterrev]
825 e = index[iterrev]
826 clen = 0
826 clen = 0
827 compresseddeltalen = 0
827 compresseddeltalen = 0
828 while iterrev != e[3]:
828 while iterrev != e[3]:
829 clen += 1
829 clen += 1
830 compresseddeltalen += e[1]
830 compresseddeltalen += e[1]
831 if generaldelta:
831 if generaldelta:
832 iterrev = e[3]
832 iterrev = e[3]
833 else:
833 else:
834 iterrev -= 1
834 iterrev -= 1
835 if iterrev in chaininfocache:
835 if iterrev in chaininfocache:
836 t = chaininfocache[iterrev]
836 t = chaininfocache[iterrev]
837 clen += t[0]
837 clen += t[0]
838 compresseddeltalen += t[1]
838 compresseddeltalen += t[1]
839 break
839 break
840 e = index[iterrev]
840 e = index[iterrev]
841 else:
841 else:
842 # Add text length of base since decompressing that also takes
842 # Add text length of base since decompressing that also takes
843 # work. For cache hits the length is already included.
843 # work. For cache hits the length is already included.
844 compresseddeltalen += e[1]
844 compresseddeltalen += e[1]
845 r = (clen, compresseddeltalen)
845 r = (clen, compresseddeltalen)
846 chaininfocache[rev] = r
846 chaininfocache[rev] = r
847 return r
847 return r
848
848
849 def _deltachain(self, rev, stoprev=None):
849 def _deltachain(self, rev, stoprev=None):
850 """Obtain the delta chain for a revision.
850 """Obtain the delta chain for a revision.
851
851
852 ``stoprev`` specifies a revision to stop at. If not specified, we
852 ``stoprev`` specifies a revision to stop at. If not specified, we
853 stop at the base of the chain.
853 stop at the base of the chain.
854
854
855 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
855 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
856 revs in ascending order and ``stopped`` is a bool indicating whether
856 revs in ascending order and ``stopped`` is a bool indicating whether
857 ``stoprev`` was hit.
857 ``stoprev`` was hit.
858 """
858 """
859 # Try C implementation.
859 # Try C implementation.
860 try:
860 try:
861 return self.index.deltachain(rev, stoprev, self._generaldelta)
861 return self.index.deltachain(rev, stoprev, self._generaldelta)
862 except AttributeError:
862 except AttributeError:
863 pass
863 pass
864
864
865 chain = []
865 chain = []
866
866
867 # Alias to prevent attribute lookup in tight loop.
867 # Alias to prevent attribute lookup in tight loop.
868 index = self.index
868 index = self.index
869 generaldelta = self._generaldelta
869 generaldelta = self._generaldelta
870
870
871 iterrev = rev
871 iterrev = rev
872 e = index[iterrev]
872 e = index[iterrev]
873 while iterrev != e[3] and iterrev != stoprev:
873 while iterrev != e[3] and iterrev != stoprev:
874 chain.append(iterrev)
874 chain.append(iterrev)
875 if generaldelta:
875 if generaldelta:
876 iterrev = e[3]
876 iterrev = e[3]
877 else:
877 else:
878 iterrev -= 1
878 iterrev -= 1
879 e = index[iterrev]
879 e = index[iterrev]
880
880
881 if iterrev == stoprev:
881 if iterrev == stoprev:
882 stopped = True
882 stopped = True
883 else:
883 else:
884 chain.append(iterrev)
884 chain.append(iterrev)
885 stopped = False
885 stopped = False
886
886
887 chain.reverse()
887 chain.reverse()
888 return chain, stopped
888 return chain, stopped
889
889
890 def ancestors(self, revs, stoprev=0, inclusive=False):
890 def ancestors(self, revs, stoprev=0, inclusive=False):
891 """Generate the ancestors of 'revs' in reverse revision order.
891 """Generate the ancestors of 'revs' in reverse revision order.
892 Does not generate revs lower than stoprev.
892 Does not generate revs lower than stoprev.
893
893
894 See the documentation for ancestor.lazyancestors for more details."""
894 See the documentation for ancestor.lazyancestors for more details."""
895
895
896 # first, make sure start revisions aren't filtered
896 # first, make sure start revisions aren't filtered
897 revs = list(revs)
897 revs = list(revs)
898 checkrev = self.node
898 checkrev = self.node
899 for r in revs:
899 for r in revs:
900 checkrev(r)
900 checkrev(r)
901 # and we're sure ancestors aren't filtered as well
901 # and we're sure ancestors aren't filtered as well
902
902
903 if rustancestor is not None:
903 if rustancestor is not None:
904 lazyancestors = rustancestor.LazyAncestors
904 lazyancestors = rustancestor.LazyAncestors
905 arg = self.index
905 arg = self.index
906 elif util.safehasattr(parsers, b'rustlazyancestors'):
906 elif util.safehasattr(parsers, b'rustlazyancestors'):
907 lazyancestors = ancestor.rustlazyancestors
907 lazyancestors = ancestor.rustlazyancestors
908 arg = self.index
908 arg = self.index
909 else:
909 else:
910 lazyancestors = ancestor.lazyancestors
910 lazyancestors = ancestor.lazyancestors
911 arg = self._uncheckedparentrevs
911 arg = self._uncheckedparentrevs
912 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
912 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
913
913
914 def descendants(self, revs):
914 def descendants(self, revs):
915 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
915 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
916
916
917 def findcommonmissing(self, common=None, heads=None):
917 def findcommonmissing(self, common=None, heads=None):
918 """Return a tuple of the ancestors of common and the ancestors of heads
918 """Return a tuple of the ancestors of common and the ancestors of heads
919 that are not ancestors of common. In revset terminology, we return the
919 that are not ancestors of common. In revset terminology, we return the
920 tuple:
920 tuple:
921
921
922 ::common, (::heads) - (::common)
922 ::common, (::heads) - (::common)
923
923
924 The list is sorted by revision number, meaning it is
924 The list is sorted by revision number, meaning it is
925 topologically sorted.
925 topologically sorted.
926
926
927 'heads' and 'common' are both lists of node IDs. If heads is
927 'heads' and 'common' are both lists of node IDs. If heads is
928 not supplied, uses all of the revlog's heads. If common is not
928 not supplied, uses all of the revlog's heads. If common is not
929 supplied, uses nullid."""
929 supplied, uses nullid."""
930 if common is None:
930 if common is None:
931 common = [nullid]
931 common = [nullid]
932 if heads is None:
932 if heads is None:
933 heads = self.heads()
933 heads = self.heads()
934
934
935 common = [self.rev(n) for n in common]
935 common = [self.rev(n) for n in common]
936 heads = [self.rev(n) for n in heads]
936 heads = [self.rev(n) for n in heads]
937
937
938 # we want the ancestors, but inclusive
938 # we want the ancestors, but inclusive
939 class lazyset(object):
939 class lazyset(object):
940 def __init__(self, lazyvalues):
940 def __init__(self, lazyvalues):
941 self.addedvalues = set()
941 self.addedvalues = set()
942 self.lazyvalues = lazyvalues
942 self.lazyvalues = lazyvalues
943
943
944 def __contains__(self, value):
944 def __contains__(self, value):
945 return value in self.addedvalues or value in self.lazyvalues
945 return value in self.addedvalues or value in self.lazyvalues
946
946
947 def __iter__(self):
947 def __iter__(self):
948 added = self.addedvalues
948 added = self.addedvalues
949 for r in added:
949 for r in added:
950 yield r
950 yield r
951 for r in self.lazyvalues:
951 for r in self.lazyvalues:
952 if not r in added:
952 if not r in added:
953 yield r
953 yield r
954
954
955 def add(self, value):
955 def add(self, value):
956 self.addedvalues.add(value)
956 self.addedvalues.add(value)
957
957
958 def update(self, values):
958 def update(self, values):
959 self.addedvalues.update(values)
959 self.addedvalues.update(values)
960
960
961 has = lazyset(self.ancestors(common))
961 has = lazyset(self.ancestors(common))
962 has.add(nullrev)
962 has.add(nullrev)
963 has.update(common)
963 has.update(common)
964
964
965 # take all ancestors from heads that aren't in has
965 # take all ancestors from heads that aren't in has
966 missing = set()
966 missing = set()
967 visit = collections.deque(r for r in heads if r not in has)
967 visit = collections.deque(r for r in heads if r not in has)
968 while visit:
968 while visit:
969 r = visit.popleft()
969 r = visit.popleft()
970 if r in missing:
970 if r in missing:
971 continue
971 continue
972 else:
972 else:
973 missing.add(r)
973 missing.add(r)
974 for p in self.parentrevs(r):
974 for p in self.parentrevs(r):
975 if p not in has:
975 if p not in has:
976 visit.append(p)
976 visit.append(p)
977 missing = list(missing)
977 missing = list(missing)
978 missing.sort()
978 missing.sort()
979 return has, [self.node(miss) for miss in missing]
979 return has, [self.node(miss) for miss in missing]
980
980
981 def incrementalmissingrevs(self, common=None):
981 def incrementalmissingrevs(self, common=None):
982 """Return an object that can be used to incrementally compute the
982 """Return an object that can be used to incrementally compute the
983 revision numbers of the ancestors of arbitrary sets that are not
983 revision numbers of the ancestors of arbitrary sets that are not
984 ancestors of common. This is an ancestor.incrementalmissingancestors
984 ancestors of common. This is an ancestor.incrementalmissingancestors
985 object.
985 object.
986
986
987 'common' is a list of revision numbers. If common is not supplied, uses
987 'common' is a list of revision numbers. If common is not supplied, uses
988 nullrev.
988 nullrev.
989 """
989 """
990 if common is None:
990 if common is None:
991 common = [nullrev]
991 common = [nullrev]
992
992
993 if rustancestor is not None:
993 if rustancestor is not None:
994 return rustancestor.MissingAncestors(self.index, common)
994 return rustancestor.MissingAncestors(self.index, common)
995 return ancestor.incrementalmissingancestors(self.parentrevs, common)
995 return ancestor.incrementalmissingancestors(self.parentrevs, common)
996
996
997 def findmissingrevs(self, common=None, heads=None):
997 def findmissingrevs(self, common=None, heads=None):
998 """Return the revision numbers of the ancestors of heads that
998 """Return the revision numbers of the ancestors of heads that
999 are not ancestors of common.
999 are not ancestors of common.
1000
1000
1001 More specifically, return a list of revision numbers corresponding to
1001 More specifically, return a list of revision numbers corresponding to
1002 nodes N such that every N satisfies the following constraints:
1002 nodes N such that every N satisfies the following constraints:
1003
1003
1004 1. N is an ancestor of some node in 'heads'
1004 1. N is an ancestor of some node in 'heads'
1005 2. N is not an ancestor of any node in 'common'
1005 2. N is not an ancestor of any node in 'common'
1006
1006
1007 The list is sorted by revision number, meaning it is
1007 The list is sorted by revision number, meaning it is
1008 topologically sorted.
1008 topologically sorted.
1009
1009
1010 'heads' and 'common' are both lists of revision numbers. If heads is
1010 'heads' and 'common' are both lists of revision numbers. If heads is
1011 not supplied, uses all of the revlog's heads. If common is not
1011 not supplied, uses all of the revlog's heads. If common is not
1012 supplied, uses nullid."""
1012 supplied, uses nullid."""
1013 if common is None:
1013 if common is None:
1014 common = [nullrev]
1014 common = [nullrev]
1015 if heads is None:
1015 if heads is None:
1016 heads = self.headrevs()
1016 heads = self.headrevs()
1017
1017
1018 inc = self.incrementalmissingrevs(common=common)
1018 inc = self.incrementalmissingrevs(common=common)
1019 return inc.missingancestors(heads)
1019 return inc.missingancestors(heads)
1020
1020
1021 def findmissing(self, common=None, heads=None):
1021 def findmissing(self, common=None, heads=None):
1022 """Return the ancestors of heads that are not ancestors of common.
1022 """Return the ancestors of heads that are not ancestors of common.
1023
1023
1024 More specifically, return a list of nodes N such that every N
1024 More specifically, return a list of nodes N such that every N
1025 satisfies the following constraints:
1025 satisfies the following constraints:
1026
1026
1027 1. N is an ancestor of some node in 'heads'
1027 1. N is an ancestor of some node in 'heads'
1028 2. N is not an ancestor of any node in 'common'
1028 2. N is not an ancestor of any node in 'common'
1029
1029
1030 The list is sorted by revision number, meaning it is
1030 The list is sorted by revision number, meaning it is
1031 topologically sorted.
1031 topologically sorted.
1032
1032
1033 'heads' and 'common' are both lists of node IDs. If heads is
1033 'heads' and 'common' are both lists of node IDs. If heads is
1034 not supplied, uses all of the revlog's heads. If common is not
1034 not supplied, uses all of the revlog's heads. If common is not
1035 supplied, uses nullid."""
1035 supplied, uses nullid."""
1036 if common is None:
1036 if common is None:
1037 common = [nullid]
1037 common = [nullid]
1038 if heads is None:
1038 if heads is None:
1039 heads = self.heads()
1039 heads = self.heads()
1040
1040
1041 common = [self.rev(n) for n in common]
1041 common = [self.rev(n) for n in common]
1042 heads = [self.rev(n) for n in heads]
1042 heads = [self.rev(n) for n in heads]
1043
1043
1044 inc = self.incrementalmissingrevs(common=common)
1044 inc = self.incrementalmissingrevs(common=common)
1045 return [self.node(r) for r in inc.missingancestors(heads)]
1045 return [self.node(r) for r in inc.missingancestors(heads)]
1046
1046
1047 def nodesbetween(self, roots=None, heads=None):
1047 def nodesbetween(self, roots=None, heads=None):
1048 """Return a topological path from 'roots' to 'heads'.
1048 """Return a topological path from 'roots' to 'heads'.
1049
1049
1050 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1050 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1051 topologically sorted list of all nodes N that satisfy both of
1051 topologically sorted list of all nodes N that satisfy both of
1052 these constraints:
1052 these constraints:
1053
1053
1054 1. N is a descendant of some node in 'roots'
1054 1. N is a descendant of some node in 'roots'
1055 2. N is an ancestor of some node in 'heads'
1055 2. N is an ancestor of some node in 'heads'
1056
1056
1057 Every node is considered to be both a descendant and an ancestor
1057 Every node is considered to be both a descendant and an ancestor
1058 of itself, so every reachable node in 'roots' and 'heads' will be
1058 of itself, so every reachable node in 'roots' and 'heads' will be
1059 included in 'nodes'.
1059 included in 'nodes'.
1060
1060
1061 'outroots' is the list of reachable nodes in 'roots', i.e., the
1061 'outroots' is the list of reachable nodes in 'roots', i.e., the
1062 subset of 'roots' that is returned in 'nodes'. Likewise,
1062 subset of 'roots' that is returned in 'nodes'. Likewise,
1063 'outheads' is the subset of 'heads' that is also in 'nodes'.
1063 'outheads' is the subset of 'heads' that is also in 'nodes'.
1064
1064
1065 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1065 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1066 unspecified, uses nullid as the only root. If 'heads' is
1066 unspecified, uses nullid as the only root. If 'heads' is
1067 unspecified, uses list of all of the revlog's heads."""
1067 unspecified, uses list of all of the revlog's heads."""
1068 nonodes = ([], [], [])
1068 nonodes = ([], [], [])
1069 if roots is not None:
1069 if roots is not None:
1070 roots = list(roots)
1070 roots = list(roots)
1071 if not roots:
1071 if not roots:
1072 return nonodes
1072 return nonodes
1073 lowestrev = min([self.rev(n) for n in roots])
1073 lowestrev = min([self.rev(n) for n in roots])
1074 else:
1074 else:
1075 roots = [nullid] # Everybody's a descendant of nullid
1075 roots = [nullid] # Everybody's a descendant of nullid
1076 lowestrev = nullrev
1076 lowestrev = nullrev
1077 if (lowestrev == nullrev) and (heads is None):
1077 if (lowestrev == nullrev) and (heads is None):
1078 # We want _all_ the nodes!
1078 # We want _all_ the nodes!
1079 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1079 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1080 if heads is None:
1080 if heads is None:
1081 # All nodes are ancestors, so the latest ancestor is the last
1081 # All nodes are ancestors, so the latest ancestor is the last
1082 # node.
1082 # node.
1083 highestrev = len(self) - 1
1083 highestrev = len(self) - 1
1084 # Set ancestors to None to signal that every node is an ancestor.
1084 # Set ancestors to None to signal that every node is an ancestor.
1085 ancestors = None
1085 ancestors = None
1086 # Set heads to an empty dictionary for later discovery of heads
1086 # Set heads to an empty dictionary for later discovery of heads
1087 heads = {}
1087 heads = {}
1088 else:
1088 else:
1089 heads = list(heads)
1089 heads = list(heads)
1090 if not heads:
1090 if not heads:
1091 return nonodes
1091 return nonodes
1092 ancestors = set()
1092 ancestors = set()
1093 # Turn heads into a dictionary so we can remove 'fake' heads.
1093 # Turn heads into a dictionary so we can remove 'fake' heads.
1094 # Also, later we will be using it to filter out the heads we can't
1094 # Also, later we will be using it to filter out the heads we can't
1095 # find from roots.
1095 # find from roots.
1096 heads = dict.fromkeys(heads, False)
1096 heads = dict.fromkeys(heads, False)
1097 # Start at the top and keep marking parents until we're done.
1097 # Start at the top and keep marking parents until we're done.
1098 nodestotag = set(heads)
1098 nodestotag = set(heads)
1099 # Remember where the top was so we can use it as a limit later.
1099 # Remember where the top was so we can use it as a limit later.
1100 highestrev = max([self.rev(n) for n in nodestotag])
1100 highestrev = max([self.rev(n) for n in nodestotag])
1101 while nodestotag:
1101 while nodestotag:
1102 # grab a node to tag
1102 # grab a node to tag
1103 n = nodestotag.pop()
1103 n = nodestotag.pop()
1104 # Never tag nullid
1104 # Never tag nullid
1105 if n == nullid:
1105 if n == nullid:
1106 continue
1106 continue
1107 # A node's revision number represents its place in a
1107 # A node's revision number represents its place in a
1108 # topologically sorted list of nodes.
1108 # topologically sorted list of nodes.
1109 r = self.rev(n)
1109 r = self.rev(n)
1110 if r >= lowestrev:
1110 if r >= lowestrev:
1111 if n not in ancestors:
1111 if n not in ancestors:
1112 # If we are possibly a descendant of one of the roots
1112 # If we are possibly a descendant of one of the roots
1113 # and we haven't already been marked as an ancestor
1113 # and we haven't already been marked as an ancestor
1114 ancestors.add(n) # Mark as ancestor
1114 ancestors.add(n) # Mark as ancestor
1115 # Add non-nullid parents to list of nodes to tag.
1115 # Add non-nullid parents to list of nodes to tag.
1116 nodestotag.update(
1116 nodestotag.update(
1117 [p for p in self.parents(n) if p != nullid]
1117 [p for p in self.parents(n) if p != nullid]
1118 )
1118 )
1119 elif n in heads: # We've seen it before, is it a fake head?
1119 elif n in heads: # We've seen it before, is it a fake head?
1120 # So it is, real heads should not be the ancestors of
1120 # So it is, real heads should not be the ancestors of
1121 # any other heads.
1121 # any other heads.
1122 heads.pop(n)
1122 heads.pop(n)
1123 if not ancestors:
1123 if not ancestors:
1124 return nonodes
1124 return nonodes
1125 # Now that we have our set of ancestors, we want to remove any
1125 # Now that we have our set of ancestors, we want to remove any
1126 # roots that are not ancestors.
1126 # roots that are not ancestors.
1127
1127
1128 # If one of the roots was nullid, everything is included anyway.
1128 # If one of the roots was nullid, everything is included anyway.
1129 if lowestrev > nullrev:
1129 if lowestrev > nullrev:
1130 # But, since we weren't, let's recompute the lowest rev to not
1130 # But, since we weren't, let's recompute the lowest rev to not
1131 # include roots that aren't ancestors.
1131 # include roots that aren't ancestors.
1132
1132
1133 # Filter out roots that aren't ancestors of heads
1133 # Filter out roots that aren't ancestors of heads
1134 roots = [root for root in roots if root in ancestors]
1134 roots = [root for root in roots if root in ancestors]
1135 # Recompute the lowest revision
1135 # Recompute the lowest revision
1136 if roots:
1136 if roots:
1137 lowestrev = min([self.rev(root) for root in roots])
1137 lowestrev = min([self.rev(root) for root in roots])
1138 else:
1138 else:
1139 # No more roots? Return empty list
1139 # No more roots? Return empty list
1140 return nonodes
1140 return nonodes
1141 else:
1141 else:
1142 # We are descending from nullid, and don't need to care about
1142 # We are descending from nullid, and don't need to care about
1143 # any other roots.
1143 # any other roots.
1144 lowestrev = nullrev
1144 lowestrev = nullrev
1145 roots = [nullid]
1145 roots = [nullid]
1146 # Transform our roots list into a set.
1146 # Transform our roots list into a set.
1147 descendants = set(roots)
1147 descendants = set(roots)
1148 # Also, keep the original roots so we can filter out roots that aren't
1148 # Also, keep the original roots so we can filter out roots that aren't
1149 # 'real' roots (i.e. are descended from other roots).
1149 # 'real' roots (i.e. are descended from other roots).
1150 roots = descendants.copy()
1150 roots = descendants.copy()
1151 # Our topologically sorted list of output nodes.
1151 # Our topologically sorted list of output nodes.
1152 orderedout = []
1152 orderedout = []
1153 # Don't start at nullid since we don't want nullid in our output list,
1153 # Don't start at nullid since we don't want nullid in our output list,
1154 # and if nullid shows up in descendants, empty parents will look like
1154 # and if nullid shows up in descendants, empty parents will look like
1155 # they're descendants.
1155 # they're descendants.
1156 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1156 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1157 n = self.node(r)
1157 n = self.node(r)
1158 isdescendant = False
1158 isdescendant = False
1159 if lowestrev == nullrev: # Everybody is a descendant of nullid
1159 if lowestrev == nullrev: # Everybody is a descendant of nullid
1160 isdescendant = True
1160 isdescendant = True
1161 elif n in descendants:
1161 elif n in descendants:
1162 # n is already a descendant
1162 # n is already a descendant
1163 isdescendant = True
1163 isdescendant = True
1164 # This check only needs to be done here because all the roots
1164 # This check only needs to be done here because all the roots
1165 # will start being marked is descendants before the loop.
1165 # will start being marked is descendants before the loop.
1166 if n in roots:
1166 if n in roots:
1167 # If n was a root, check if it's a 'real' root.
1167 # If n was a root, check if it's a 'real' root.
1168 p = tuple(self.parents(n))
1168 p = tuple(self.parents(n))
1169 # If any of its parents are descendants, it's not a root.
1169 # If any of its parents are descendants, it's not a root.
1170 if (p[0] in descendants) or (p[1] in descendants):
1170 if (p[0] in descendants) or (p[1] in descendants):
1171 roots.remove(n)
1171 roots.remove(n)
1172 else:
1172 else:
1173 p = tuple(self.parents(n))
1173 p = tuple(self.parents(n))
1174 # A node is a descendant if either of its parents are
1174 # A node is a descendant if either of its parents are
1175 # descendants. (We seeded the dependents list with the roots
1175 # descendants. (We seeded the dependents list with the roots
1176 # up there, remember?)
1176 # up there, remember?)
1177 if (p[0] in descendants) or (p[1] in descendants):
1177 if (p[0] in descendants) or (p[1] in descendants):
1178 descendants.add(n)
1178 descendants.add(n)
1179 isdescendant = True
1179 isdescendant = True
1180 if isdescendant and ((ancestors is None) or (n in ancestors)):
1180 if isdescendant and ((ancestors is None) or (n in ancestors)):
1181 # Only include nodes that are both descendants and ancestors.
1181 # Only include nodes that are both descendants and ancestors.
1182 orderedout.append(n)
1182 orderedout.append(n)
1183 if (ancestors is not None) and (n in heads):
1183 if (ancestors is not None) and (n in heads):
1184 # We're trying to figure out which heads are reachable
1184 # We're trying to figure out which heads are reachable
1185 # from roots.
1185 # from roots.
1186 # Mark this head as having been reached
1186 # Mark this head as having been reached
1187 heads[n] = True
1187 heads[n] = True
1188 elif ancestors is None:
1188 elif ancestors is None:
1189 # Otherwise, we're trying to discover the heads.
1189 # Otherwise, we're trying to discover the heads.
1190 # Assume this is a head because if it isn't, the next step
1190 # Assume this is a head because if it isn't, the next step
1191 # will eventually remove it.
1191 # will eventually remove it.
1192 heads[n] = True
1192 heads[n] = True
1193 # But, obviously its parents aren't.
1193 # But, obviously its parents aren't.
1194 for p in self.parents(n):
1194 for p in self.parents(n):
1195 heads.pop(p, None)
1195 heads.pop(p, None)
1196 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1196 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1197 roots = list(roots)
1197 roots = list(roots)
1198 assert orderedout
1198 assert orderedout
1199 assert roots
1199 assert roots
1200 assert heads
1200 assert heads
1201 return (orderedout, roots, heads)
1201 return (orderedout, roots, heads)
1202
1202
1203 def headrevs(self, revs=None):
1203 def headrevs(self, revs=None):
1204 if revs is None:
1204 if revs is None:
1205 try:
1205 try:
1206 return self.index.headrevs()
1206 return self.index.headrevs()
1207 except AttributeError:
1207 except AttributeError:
1208 return self._headrevs()
1208 return self._headrevs()
1209 if rustdagop is not None:
1209 if rustdagop is not None:
1210 return rustdagop.headrevs(self.index, revs)
1210 return rustdagop.headrevs(self.index, revs)
1211 return dagop.headrevs(revs, self._uncheckedparentrevs)
1211 return dagop.headrevs(revs, self._uncheckedparentrevs)
1212
1212
1213 def computephases(self, roots):
1213 def computephases(self, roots):
1214 return self.index.computephasesmapsets(roots)
1214 return self.index.computephasesmapsets(roots)
1215
1215
1216 def _headrevs(self):
1216 def _headrevs(self):
1217 count = len(self)
1217 count = len(self)
1218 if not count:
1218 if not count:
1219 return [nullrev]
1219 return [nullrev]
1220 # we won't iter over filtered rev so nobody is a head at start
1220 # we won't iter over filtered rev so nobody is a head at start
1221 ishead = [0] * (count + 1)
1221 ishead = [0] * (count + 1)
1222 index = self.index
1222 index = self.index
1223 for r in self:
1223 for r in self:
1224 ishead[r] = 1 # I may be an head
1224 ishead[r] = 1 # I may be an head
1225 e = index[r]
1225 e = index[r]
1226 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1226 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1227 return [r for r, val in enumerate(ishead) if val]
1227 return [r for r, val in enumerate(ishead) if val]
1228
1228
1229 def heads(self, start=None, stop=None):
1229 def heads(self, start=None, stop=None):
1230 """return the list of all nodes that have no children
1230 """return the list of all nodes that have no children
1231
1231
1232 if start is specified, only heads that are descendants of
1232 if start is specified, only heads that are descendants of
1233 start will be returned
1233 start will be returned
1234 if stop is specified, it will consider all the revs from stop
1234 if stop is specified, it will consider all the revs from stop
1235 as if they had no children
1235 as if they had no children
1236 """
1236 """
1237 if start is None and stop is None:
1237 if start is None and stop is None:
1238 if not len(self):
1238 if not len(self):
1239 return [nullid]
1239 return [nullid]
1240 return [self.node(r) for r in self.headrevs()]
1240 return [self.node(r) for r in self.headrevs()]
1241
1241
1242 if start is None:
1242 if start is None:
1243 start = nullrev
1243 start = nullrev
1244 else:
1244 else:
1245 start = self.rev(start)
1245 start = self.rev(start)
1246
1246
1247 stoprevs = set(self.rev(n) for n in stop or [])
1247 stoprevs = set(self.rev(n) for n in stop or [])
1248
1248
1249 revs = dagop.headrevssubset(
1249 revs = dagop.headrevssubset(
1250 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1250 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1251 )
1251 )
1252
1252
1253 return [self.node(rev) for rev in revs]
1253 return [self.node(rev) for rev in revs]
1254
1254
1255 def children(self, node):
1255 def children(self, node):
1256 """find the children of a given node"""
1256 """find the children of a given node"""
1257 c = []
1257 c = []
1258 p = self.rev(node)
1258 p = self.rev(node)
1259 for r in self.revs(start=p + 1):
1259 for r in self.revs(start=p + 1):
1260 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1260 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1261 if prevs:
1261 if prevs:
1262 for pr in prevs:
1262 for pr in prevs:
1263 if pr == p:
1263 if pr == p:
1264 c.append(self.node(r))
1264 c.append(self.node(r))
1265 elif p == nullrev:
1265 elif p == nullrev:
1266 c.append(self.node(r))
1266 c.append(self.node(r))
1267 return c
1267 return c
1268
1268
1269 def commonancestorsheads(self, a, b):
1269 def commonancestorsheads(self, a, b):
1270 """calculate all the heads of the common ancestors of nodes a and b"""
1270 """calculate all the heads of the common ancestors of nodes a and b"""
1271 a, b = self.rev(a), self.rev(b)
1271 a, b = self.rev(a), self.rev(b)
1272 ancs = self._commonancestorsheads(a, b)
1272 ancs = self._commonancestorsheads(a, b)
1273 return pycompat.maplist(self.node, ancs)
1273 return pycompat.maplist(self.node, ancs)
1274
1274
1275 def _commonancestorsheads(self, *revs):
1275 def _commonancestorsheads(self, *revs):
1276 """calculate all the heads of the common ancestors of revs"""
1276 """calculate all the heads of the common ancestors of revs"""
1277 try:
1277 try:
1278 ancs = self.index.commonancestorsheads(*revs)
1278 ancs = self.index.commonancestorsheads(*revs)
1279 except (AttributeError, OverflowError): # C implementation failed
1279 except (AttributeError, OverflowError): # C implementation failed
1280 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1280 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1281 return ancs
1281 return ancs
1282
1282
1283 def isancestor(self, a, b):
1283 def isancestor(self, a, b):
1284 """return True if node a is an ancestor of node b
1284 """return True if node a is an ancestor of node b
1285
1285
1286 A revision is considered an ancestor of itself."""
1286 A revision is considered an ancestor of itself."""
1287 a, b = self.rev(a), self.rev(b)
1287 a, b = self.rev(a), self.rev(b)
1288 return self.isancestorrev(a, b)
1288 return self.isancestorrev(a, b)
1289
1289
1290 def isancestorrev(self, a, b):
1290 def isancestorrev(self, a, b):
1291 """return True if revision a is an ancestor of revision b
1291 """return True if revision a is an ancestor of revision b
1292
1292
1293 A revision is considered an ancestor of itself.
1293 A revision is considered an ancestor of itself.
1294
1294
1295 The implementation of this is trivial but the use of
1295 The implementation of this is trivial but the use of
1296 reachableroots is not."""
1296 reachableroots is not."""
1297 if a == nullrev:
1297 if a == nullrev:
1298 return True
1298 return True
1299 elif a == b:
1299 elif a == b:
1300 return True
1300 return True
1301 elif a > b:
1301 elif a > b:
1302 return False
1302 return False
1303 return bool(self.reachableroots(a, [b], [a], includepath=False))
1303 return bool(self.reachableroots(a, [b], [a], includepath=False))
1304
1304
1305 def reachableroots(self, minroot, heads, roots, includepath=False):
1305 def reachableroots(self, minroot, heads, roots, includepath=False):
1306 """return (heads(::(<roots> and <roots>::<heads>)))
1306 """return (heads(::(<roots> and <roots>::<heads>)))
1307
1307
1308 If includepath is True, return (<roots>::<heads>)."""
1308 If includepath is True, return (<roots>::<heads>)."""
1309 try:
1309 try:
1310 return self.index.reachableroots2(
1310 return self.index.reachableroots2(
1311 minroot, heads, roots, includepath
1311 minroot, heads, roots, includepath
1312 )
1312 )
1313 except AttributeError:
1313 except AttributeError:
1314 return dagop._reachablerootspure(
1314 return dagop._reachablerootspure(
1315 self.parentrevs, minroot, roots, heads, includepath
1315 self.parentrevs, minroot, roots, heads, includepath
1316 )
1316 )
1317
1317
1318 def ancestor(self, a, b):
1318 def ancestor(self, a, b):
1319 """calculate the "best" common ancestor of nodes a and b"""
1319 """calculate the "best" common ancestor of nodes a and b"""
1320
1320
1321 a, b = self.rev(a), self.rev(b)
1321 a, b = self.rev(a), self.rev(b)
1322 try:
1322 try:
1323 ancs = self.index.ancestors(a, b)
1323 ancs = self.index.ancestors(a, b)
1324 except (AttributeError, OverflowError):
1324 except (AttributeError, OverflowError):
1325 ancs = ancestor.ancestors(self.parentrevs, a, b)
1325 ancs = ancestor.ancestors(self.parentrevs, a, b)
1326 if ancs:
1326 if ancs:
1327 # choose a consistent winner when there's a tie
1327 # choose a consistent winner when there's a tie
1328 return min(map(self.node, ancs))
1328 return min(map(self.node, ancs))
1329 return nullid
1329 return nullid
1330
1330
1331 def _match(self, id):
1331 def _match(self, id):
1332 if isinstance(id, int):
1332 if isinstance(id, int):
1333 # rev
1333 # rev
1334 return self.node(id)
1334 return self.node(id)
1335 if len(id) == 20:
1335 if len(id) == 20:
1336 # possibly a binary node
1336 # possibly a binary node
1337 # odds of a binary node being all hex in ASCII are 1 in 10**25
1337 # odds of a binary node being all hex in ASCII are 1 in 10**25
1338 try:
1338 try:
1339 node = id
1339 node = id
1340 self.rev(node) # quick search the index
1340 self.rev(node) # quick search the index
1341 return node
1341 return node
1342 except error.LookupError:
1342 except error.LookupError:
1343 pass # may be partial hex id
1343 pass # may be partial hex id
1344 try:
1344 try:
1345 # str(rev)
1345 # str(rev)
1346 rev = int(id)
1346 rev = int(id)
1347 if b"%d" % rev != id:
1347 if b"%d" % rev != id:
1348 raise ValueError
1348 raise ValueError
1349 if rev < 0:
1349 if rev < 0:
1350 rev = len(self) + rev
1350 rev = len(self) + rev
1351 if rev < 0 or rev >= len(self):
1351 if rev < 0 or rev >= len(self):
1352 raise ValueError
1352 raise ValueError
1353 return self.node(rev)
1353 return self.node(rev)
1354 except (ValueError, OverflowError):
1354 except (ValueError, OverflowError):
1355 pass
1355 pass
1356 if len(id) == 40:
1356 if len(id) == 40:
1357 try:
1357 try:
1358 # a full hex nodeid?
1358 # a full hex nodeid?
1359 node = bin(id)
1359 node = bin(id)
1360 self.rev(node)
1360 self.rev(node)
1361 return node
1361 return node
1362 except (TypeError, error.LookupError):
1362 except (TypeError, error.LookupError):
1363 pass
1363 pass
1364
1364
1365 def _partialmatch(self, id):
1365 def _partialmatch(self, id):
1366 # we don't care wdirfilenodeids as they should be always full hash
1366 # we don't care wdirfilenodeids as they should be always full hash
1367 maybewdir = wdirhex.startswith(id)
1367 maybewdir = wdirhex.startswith(id)
1368 try:
1368 try:
1369 partial = self.index.partialmatch(id)
1369 partial = self.index.partialmatch(id)
1370 if partial and self.hasnode(partial):
1370 if partial and self.hasnode(partial):
1371 if maybewdir:
1371 if maybewdir:
1372 # single 'ff...' match in radix tree, ambiguous with wdir
1372 # single 'ff...' match in radix tree, ambiguous with wdir
1373 raise error.RevlogError
1373 raise error.RevlogError
1374 return partial
1374 return partial
1375 if maybewdir:
1375 if maybewdir:
1376 # no 'ff...' match in radix tree, wdir identified
1376 # no 'ff...' match in radix tree, wdir identified
1377 raise error.WdirUnsupported
1377 raise error.WdirUnsupported
1378 return None
1378 return None
1379 except error.RevlogError:
1379 except error.RevlogError:
1380 # parsers.c radix tree lookup gave multiple matches
1380 # parsers.c radix tree lookup gave multiple matches
1381 # fast path: for unfiltered changelog, radix tree is accurate
1381 # fast path: for unfiltered changelog, radix tree is accurate
1382 if not getattr(self, 'filteredrevs', None):
1382 if not getattr(self, 'filteredrevs', None):
1383 raise error.AmbiguousPrefixLookupError(
1383 raise error.AmbiguousPrefixLookupError(
1384 id, self.indexfile, _(b'ambiguous identifier')
1384 id, self.indexfile, _(b'ambiguous identifier')
1385 )
1385 )
1386 # fall through to slow path that filters hidden revisions
1386 # fall through to slow path that filters hidden revisions
1387 except (AttributeError, ValueError):
1387 except (AttributeError, ValueError):
1388 # we are pure python, or key was too short to search radix tree
1388 # we are pure python, or key was too short to search radix tree
1389 pass
1389 pass
1390
1390
1391 if id in self._pcache:
1391 if id in self._pcache:
1392 return self._pcache[id]
1392 return self._pcache[id]
1393
1393
1394 if len(id) <= 40:
1394 if len(id) <= 40:
1395 try:
1395 try:
1396 # hex(node)[:...]
1396 # hex(node)[:...]
1397 l = len(id) // 2 # grab an even number of digits
1397 l = len(id) // 2 # grab an even number of digits
1398 prefix = bin(id[: l * 2])
1398 prefix = bin(id[: l * 2])
1399 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1399 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1400 nl = [
1400 nl = [
1401 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1401 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1402 ]
1402 ]
1403 if nullhex.startswith(id):
1403 if nullhex.startswith(id):
1404 nl.append(nullid)
1404 nl.append(nullid)
1405 if len(nl) > 0:
1405 if len(nl) > 0:
1406 if len(nl) == 1 and not maybewdir:
1406 if len(nl) == 1 and not maybewdir:
1407 self._pcache[id] = nl[0]
1407 self._pcache[id] = nl[0]
1408 return nl[0]
1408 return nl[0]
1409 raise error.AmbiguousPrefixLookupError(
1409 raise error.AmbiguousPrefixLookupError(
1410 id, self.indexfile, _(b'ambiguous identifier')
1410 id, self.indexfile, _(b'ambiguous identifier')
1411 )
1411 )
1412 if maybewdir:
1412 if maybewdir:
1413 raise error.WdirUnsupported
1413 raise error.WdirUnsupported
1414 return None
1414 return None
1415 except TypeError:
1415 except TypeError:
1416 pass
1416 pass
1417
1417
1418 def lookup(self, id):
1418 def lookup(self, id):
1419 """locate a node based on:
1419 """locate a node based on:
1420 - revision number or str(revision number)
1420 - revision number or str(revision number)
1421 - nodeid or subset of hex nodeid
1421 - nodeid or subset of hex nodeid
1422 """
1422 """
1423 n = self._match(id)
1423 n = self._match(id)
1424 if n is not None:
1424 if n is not None:
1425 return n
1425 return n
1426 n = self._partialmatch(id)
1426 n = self._partialmatch(id)
1427 if n:
1427 if n:
1428 return n
1428 return n
1429
1429
1430 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1430 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1431
1431
1432 def shortest(self, node, minlength=1):
1432 def shortest(self, node, minlength=1):
1433 """Find the shortest unambiguous prefix that matches node."""
1433 """Find the shortest unambiguous prefix that matches node."""
1434
1434
1435 def isvalid(prefix):
1435 def isvalid(prefix):
1436 try:
1436 try:
1437 matchednode = self._partialmatch(prefix)
1437 matchednode = self._partialmatch(prefix)
1438 except error.AmbiguousPrefixLookupError:
1438 except error.AmbiguousPrefixLookupError:
1439 return False
1439 return False
1440 except error.WdirUnsupported:
1440 except error.WdirUnsupported:
1441 # single 'ff...' match
1441 # single 'ff...' match
1442 return True
1442 return True
1443 if matchednode is None:
1443 if matchednode is None:
1444 raise error.LookupError(node, self.indexfile, _(b'no node'))
1444 raise error.LookupError(node, self.indexfile, _(b'no node'))
1445 return True
1445 return True
1446
1446
1447 def maybewdir(prefix):
1447 def maybewdir(prefix):
1448 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1448 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1449
1449
1450 hexnode = hex(node)
1450 hexnode = hex(node)
1451
1451
1452 def disambiguate(hexnode, minlength):
1452 def disambiguate(hexnode, minlength):
1453 """Disambiguate against wdirid."""
1453 """Disambiguate against wdirid."""
1454 for length in range(minlength, 41):
1454 for length in range(minlength, 41):
1455 prefix = hexnode[:length]
1455 prefix = hexnode[:length]
1456 if not maybewdir(prefix):
1456 if not maybewdir(prefix):
1457 return prefix
1457 return prefix
1458
1458
1459 if not getattr(self, 'filteredrevs', None):
1459 if not getattr(self, 'filteredrevs', None):
1460 try:
1460 try:
1461 length = max(self.index.shortest(node), minlength)
1461 length = max(self.index.shortest(node), minlength)
1462 return disambiguate(hexnode, length)
1462 return disambiguate(hexnode, length)
1463 except error.RevlogError:
1463 except error.RevlogError:
1464 if node != wdirid:
1464 if node != wdirid:
1465 raise error.LookupError(node, self.indexfile, _(b'no node'))
1465 raise error.LookupError(node, self.indexfile, _(b'no node'))
1466 except AttributeError:
1466 except AttributeError:
1467 # Fall through to pure code
1467 # Fall through to pure code
1468 pass
1468 pass
1469
1469
1470 if node == wdirid:
1470 if node == wdirid:
1471 for length in range(minlength, 41):
1471 for length in range(minlength, 41):
1472 prefix = hexnode[:length]
1472 prefix = hexnode[:length]
1473 if isvalid(prefix):
1473 if isvalid(prefix):
1474 return prefix
1474 return prefix
1475
1475
1476 for length in range(minlength, 41):
1476 for length in range(minlength, 41):
1477 prefix = hexnode[:length]
1477 prefix = hexnode[:length]
1478 if isvalid(prefix):
1478 if isvalid(prefix):
1479 return disambiguate(hexnode, length)
1479 return disambiguate(hexnode, length)
1480
1480
1481 def cmp(self, node, text):
1481 def cmp(self, node, text):
1482 """compare text with a given file revision
1482 """compare text with a given file revision
1483
1483
1484 returns True if text is different than what is stored.
1484 returns True if text is different than what is stored.
1485 """
1485 """
1486 p1, p2 = self.parents(node)
1486 p1, p2 = self.parents(node)
1487 return storageutil.hashrevisionsha1(text, p1, p2) != node
1487 return storageutil.hashrevisionsha1(text, p1, p2) != node
1488
1488
1489 def _cachesegment(self, offset, data):
1489 def _cachesegment(self, offset, data):
1490 """Add a segment to the revlog cache.
1490 """Add a segment to the revlog cache.
1491
1491
1492 Accepts an absolute offset and the data that is at that location.
1492 Accepts an absolute offset and the data that is at that location.
1493 """
1493 """
1494 o, d = self._chunkcache
1494 o, d = self._chunkcache
1495 # try to add to existing cache
1495 # try to add to existing cache
1496 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1496 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1497 self._chunkcache = o, d + data
1497 self._chunkcache = o, d + data
1498 else:
1498 else:
1499 self._chunkcache = offset, data
1499 self._chunkcache = offset, data
1500
1500
1501 def _readsegment(self, offset, length, df=None):
1501 def _readsegment(self, offset, length, df=None):
1502 """Load a segment of raw data from the revlog.
1502 """Load a segment of raw data from the revlog.
1503
1503
1504 Accepts an absolute offset, length to read, and an optional existing
1504 Accepts an absolute offset, length to read, and an optional existing
1505 file handle to read from.
1505 file handle to read from.
1506
1506
1507 If an existing file handle is passed, it will be seeked and the
1507 If an existing file handle is passed, it will be seeked and the
1508 original seek position will NOT be restored.
1508 original seek position will NOT be restored.
1509
1509
1510 Returns a str or buffer of raw byte data.
1510 Returns a str or buffer of raw byte data.
1511
1511
1512 Raises if the requested number of bytes could not be read.
1512 Raises if the requested number of bytes could not be read.
1513 """
1513 """
1514 # Cache data both forward and backward around the requested
1514 # Cache data both forward and backward around the requested
1515 # data, in a fixed size window. This helps speed up operations
1515 # data, in a fixed size window. This helps speed up operations
1516 # involving reading the revlog backwards.
1516 # involving reading the revlog backwards.
1517 cachesize = self._chunkcachesize
1517 cachesize = self._chunkcachesize
1518 realoffset = offset & ~(cachesize - 1)
1518 realoffset = offset & ~(cachesize - 1)
1519 reallength = (
1519 reallength = (
1520 (offset + length + cachesize) & ~(cachesize - 1)
1520 (offset + length + cachesize) & ~(cachesize - 1)
1521 ) - realoffset
1521 ) - realoffset
1522 with self._datareadfp(df) as df:
1522 with self._datareadfp(df) as df:
1523 df.seek(realoffset)
1523 df.seek(realoffset)
1524 d = df.read(reallength)
1524 d = df.read(reallength)
1525
1525
1526 self._cachesegment(realoffset, d)
1526 self._cachesegment(realoffset, d)
1527 if offset != realoffset or reallength != length:
1527 if offset != realoffset or reallength != length:
1528 startoffset = offset - realoffset
1528 startoffset = offset - realoffset
1529 if len(d) - startoffset < length:
1529 if len(d) - startoffset < length:
1530 raise error.RevlogError(
1530 raise error.RevlogError(
1531 _(
1531 _(
1532 b'partial read of revlog %s; expected %d bytes from '
1532 b'partial read of revlog %s; expected %d bytes from '
1533 b'offset %d, got %d'
1533 b'offset %d, got %d'
1534 )
1534 )
1535 % (
1535 % (
1536 self.indexfile if self._inline else self.datafile,
1536 self.indexfile if self._inline else self.datafile,
1537 length,
1537 length,
1538 realoffset,
1538 realoffset,
1539 len(d) - startoffset,
1539 len(d) - startoffset,
1540 )
1540 )
1541 )
1541 )
1542
1542
1543 return util.buffer(d, startoffset, length)
1543 return util.buffer(d, startoffset, length)
1544
1544
1545 if len(d) < length:
1545 if len(d) < length:
1546 raise error.RevlogError(
1546 raise error.RevlogError(
1547 _(
1547 _(
1548 b'partial read of revlog %s; expected %d bytes from offset '
1548 b'partial read of revlog %s; expected %d bytes from offset '
1549 b'%d, got %d'
1549 b'%d, got %d'
1550 )
1550 )
1551 % (
1551 % (
1552 self.indexfile if self._inline else self.datafile,
1552 self.indexfile if self._inline else self.datafile,
1553 length,
1553 length,
1554 offset,
1554 offset,
1555 len(d),
1555 len(d),
1556 )
1556 )
1557 )
1557 )
1558
1558
1559 return d
1559 return d
1560
1560
1561 def _getsegment(self, offset, length, df=None):
1561 def _getsegment(self, offset, length, df=None):
1562 """Obtain a segment of raw data from the revlog.
1562 """Obtain a segment of raw data from the revlog.
1563
1563
1564 Accepts an absolute offset, length of bytes to obtain, and an
1564 Accepts an absolute offset, length of bytes to obtain, and an
1565 optional file handle to the already-opened revlog. If the file
1565 optional file handle to the already-opened revlog. If the file
1566 handle is used, it's original seek position will not be preserved.
1566 handle is used, it's original seek position will not be preserved.
1567
1567
1568 Requests for data may be returned from a cache.
1568 Requests for data may be returned from a cache.
1569
1569
1570 Returns a str or a buffer instance of raw byte data.
1570 Returns a str or a buffer instance of raw byte data.
1571 """
1571 """
1572 o, d = self._chunkcache
1572 o, d = self._chunkcache
1573 l = len(d)
1573 l = len(d)
1574
1574
1575 # is it in the cache?
1575 # is it in the cache?
1576 cachestart = offset - o
1576 cachestart = offset - o
1577 cacheend = cachestart + length
1577 cacheend = cachestart + length
1578 if cachestart >= 0 and cacheend <= l:
1578 if cachestart >= 0 and cacheend <= l:
1579 if cachestart == 0 and cacheend == l:
1579 if cachestart == 0 and cacheend == l:
1580 return d # avoid a copy
1580 return d # avoid a copy
1581 return util.buffer(d, cachestart, cacheend - cachestart)
1581 return util.buffer(d, cachestart, cacheend - cachestart)
1582
1582
1583 return self._readsegment(offset, length, df=df)
1583 return self._readsegment(offset, length, df=df)
1584
1584
1585 def _getsegmentforrevs(self, startrev, endrev, df=None):
1585 def _getsegmentforrevs(self, startrev, endrev, df=None):
1586 """Obtain a segment of raw data corresponding to a range of revisions.
1586 """Obtain a segment of raw data corresponding to a range of revisions.
1587
1587
1588 Accepts the start and end revisions and an optional already-open
1588 Accepts the start and end revisions and an optional already-open
1589 file handle to be used for reading. If the file handle is read, its
1589 file handle to be used for reading. If the file handle is read, its
1590 seek position will not be preserved.
1590 seek position will not be preserved.
1591
1591
1592 Requests for data may be satisfied by a cache.
1592 Requests for data may be satisfied by a cache.
1593
1593
1594 Returns a 2-tuple of (offset, data) for the requested range of
1594 Returns a 2-tuple of (offset, data) for the requested range of
1595 revisions. Offset is the integer offset from the beginning of the
1595 revisions. Offset is the integer offset from the beginning of the
1596 revlog and data is a str or buffer of the raw byte data.
1596 revlog and data is a str or buffer of the raw byte data.
1597
1597
1598 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1598 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1599 to determine where each revision's data begins and ends.
1599 to determine where each revision's data begins and ends.
1600 """
1600 """
1601 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1601 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1602 # (functions are expensive).
1602 # (functions are expensive).
1603 index = self.index
1603 index = self.index
1604 istart = index[startrev]
1604 istart = index[startrev]
1605 start = int(istart[0] >> 16)
1605 start = int(istart[0] >> 16)
1606 if startrev == endrev:
1606 if startrev == endrev:
1607 end = start + istart[1]
1607 end = start + istart[1]
1608 else:
1608 else:
1609 iend = index[endrev]
1609 iend = index[endrev]
1610 end = int(iend[0] >> 16) + iend[1]
1610 end = int(iend[0] >> 16) + iend[1]
1611
1611
1612 if self._inline:
1612 if self._inline:
1613 start += (startrev + 1) * self._io.size
1613 start += (startrev + 1) * self._io.size
1614 end += (endrev + 1) * self._io.size
1614 end += (endrev + 1) * self._io.size
1615 length = end - start
1615 length = end - start
1616
1616
1617 return start, self._getsegment(start, length, df=df)
1617 return start, self._getsegment(start, length, df=df)
1618
1618
1619 def _chunk(self, rev, df=None):
1619 def _chunk(self, rev, df=None):
1620 """Obtain a single decompressed chunk for a revision.
1620 """Obtain a single decompressed chunk for a revision.
1621
1621
1622 Accepts an integer revision and an optional already-open file handle
1622 Accepts an integer revision and an optional already-open file handle
1623 to be used for reading. If used, the seek position of the file will not
1623 to be used for reading. If used, the seek position of the file will not
1624 be preserved.
1624 be preserved.
1625
1625
1626 Returns a str holding uncompressed data for the requested revision.
1626 Returns a str holding uncompressed data for the requested revision.
1627 """
1627 """
1628 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1628 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1629
1629
1630 def _chunks(self, revs, df=None, targetsize=None):
1630 def _chunks(self, revs, df=None, targetsize=None):
1631 """Obtain decompressed chunks for the specified revisions.
1631 """Obtain decompressed chunks for the specified revisions.
1632
1632
1633 Accepts an iterable of numeric revisions that are assumed to be in
1633 Accepts an iterable of numeric revisions that are assumed to be in
1634 ascending order. Also accepts an optional already-open file handle
1634 ascending order. Also accepts an optional already-open file handle
1635 to be used for reading. If used, the seek position of the file will
1635 to be used for reading. If used, the seek position of the file will
1636 not be preserved.
1636 not be preserved.
1637
1637
1638 This function is similar to calling ``self._chunk()`` multiple times,
1638 This function is similar to calling ``self._chunk()`` multiple times,
1639 but is faster.
1639 but is faster.
1640
1640
1641 Returns a list with decompressed data for each requested revision.
1641 Returns a list with decompressed data for each requested revision.
1642 """
1642 """
1643 if not revs:
1643 if not revs:
1644 return []
1644 return []
1645 start = self.start
1645 start = self.start
1646 length = self.length
1646 length = self.length
1647 inline = self._inline
1647 inline = self._inline
1648 iosize = self._io.size
1648 iosize = self._io.size
1649 buffer = util.buffer
1649 buffer = util.buffer
1650
1650
1651 l = []
1651 l = []
1652 ladd = l.append
1652 ladd = l.append
1653
1653
1654 if not self._withsparseread:
1654 if not self._withsparseread:
1655 slicedchunks = (revs,)
1655 slicedchunks = (revs,)
1656 else:
1656 else:
1657 slicedchunks = deltautil.slicechunk(
1657 slicedchunks = deltautil.slicechunk(
1658 self, revs, targetsize=targetsize
1658 self, revs, targetsize=targetsize
1659 )
1659 )
1660
1660
1661 for revschunk in slicedchunks:
1661 for revschunk in slicedchunks:
1662 firstrev = revschunk[0]
1662 firstrev = revschunk[0]
1663 # Skip trailing revisions with empty diff
1663 # Skip trailing revisions with empty diff
1664 for lastrev in revschunk[::-1]:
1664 for lastrev in revschunk[::-1]:
1665 if length(lastrev) != 0:
1665 if length(lastrev) != 0:
1666 break
1666 break
1667
1667
1668 try:
1668 try:
1669 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1669 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1670 except OverflowError:
1670 except OverflowError:
1671 # issue4215 - we can't cache a run of chunks greater than
1671 # issue4215 - we can't cache a run of chunks greater than
1672 # 2G on Windows
1672 # 2G on Windows
1673 return [self._chunk(rev, df=df) for rev in revschunk]
1673 return [self._chunk(rev, df=df) for rev in revschunk]
1674
1674
1675 decomp = self.decompress
1675 decomp = self.decompress
1676 for rev in revschunk:
1676 for rev in revschunk:
1677 chunkstart = start(rev)
1677 chunkstart = start(rev)
1678 if inline:
1678 if inline:
1679 chunkstart += (rev + 1) * iosize
1679 chunkstart += (rev + 1) * iosize
1680 chunklength = length(rev)
1680 chunklength = length(rev)
1681 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1681 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1682
1682
1683 return l
1683 return l
1684
1684
1685 def _chunkclear(self):
1685 def _chunkclear(self):
1686 """Clear the raw chunk cache."""
1686 """Clear the raw chunk cache."""
1687 self._chunkcache = (0, b'')
1687 self._chunkcache = (0, b'')
1688
1688
1689 def deltaparent(self, rev):
1689 def deltaparent(self, rev):
1690 """return deltaparent of the given revision"""
1690 """return deltaparent of the given revision"""
1691 base = self.index[rev][3]
1691 base = self.index[rev][3]
1692 if base == rev:
1692 if base == rev:
1693 return nullrev
1693 return nullrev
1694 elif self._generaldelta:
1694 elif self._generaldelta:
1695 return base
1695 return base
1696 else:
1696 else:
1697 return rev - 1
1697 return rev - 1
1698
1698
1699 def issnapshot(self, rev):
1699 def issnapshot(self, rev):
1700 """tells whether rev is a snapshot
1700 """tells whether rev is a snapshot
1701 """
1701 """
1702 if not self._sparserevlog:
1702 if not self._sparserevlog:
1703 return self.deltaparent(rev) == nullrev
1703 return self.deltaparent(rev) == nullrev
1704 elif util.safehasattr(self.index, b'issnapshot'):
1704 elif util.safehasattr(self.index, b'issnapshot'):
1705 # directly assign the method to cache the testing and access
1705 # directly assign the method to cache the testing and access
1706 self.issnapshot = self.index.issnapshot
1706 self.issnapshot = self.index.issnapshot
1707 return self.issnapshot(rev)
1707 return self.issnapshot(rev)
1708 if rev == nullrev:
1708 if rev == nullrev:
1709 return True
1709 return True
1710 entry = self.index[rev]
1710 entry = self.index[rev]
1711 base = entry[3]
1711 base = entry[3]
1712 if base == rev:
1712 if base == rev:
1713 return True
1713 return True
1714 if base == nullrev:
1714 if base == nullrev:
1715 return True
1715 return True
1716 p1 = entry[5]
1716 p1 = entry[5]
1717 p2 = entry[6]
1717 p2 = entry[6]
1718 if base == p1 or base == p2:
1718 if base == p1 or base == p2:
1719 return False
1719 return False
1720 return self.issnapshot(base)
1720 return self.issnapshot(base)
1721
1721
1722 def snapshotdepth(self, rev):
1722 def snapshotdepth(self, rev):
1723 """number of snapshot in the chain before this one"""
1723 """number of snapshot in the chain before this one"""
1724 if not self.issnapshot(rev):
1724 if not self.issnapshot(rev):
1725 raise error.ProgrammingError(b'revision %d not a snapshot')
1725 raise error.ProgrammingError(b'revision %d not a snapshot')
1726 return len(self._deltachain(rev)[0]) - 1
1726 return len(self._deltachain(rev)[0]) - 1
1727
1727
1728 def revdiff(self, rev1, rev2):
1728 def revdiff(self, rev1, rev2):
1729 """return or calculate a delta between two revisions
1729 """return or calculate a delta between two revisions
1730
1730
1731 The delta calculated is in binary form and is intended to be written to
1731 The delta calculated is in binary form and is intended to be written to
1732 revlog data directly. So this function needs raw revision data.
1732 revlog data directly. So this function needs raw revision data.
1733 """
1733 """
1734 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1734 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1735 return bytes(self._chunk(rev2))
1735 return bytes(self._chunk(rev2))
1736
1736
1737 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1737 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1738
1738
1739 def _processflags(self, text, flags, operation, raw=False):
1739 def _processflags(self, text, flags, operation, raw=False):
1740 """deprecated entry point to access flag processors"""
1740 """deprecated entry point to access flag processors"""
1741 msg = b'_processflag(...) use the specialized variant'
1741 msg = b'_processflag(...) use the specialized variant'
1742 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1742 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1743 if raw:
1743 if raw:
1744 return text, flagutil.processflagsraw(self, text, flags)
1744 return text, flagutil.processflagsraw(self, text, flags)
1745 elif operation == b'read':
1745 elif operation == b'read':
1746 return flagutil.processflagsread(self, text, flags)
1746 return flagutil.processflagsread(self, text, flags)
1747 else: # write operation
1747 else: # write operation
1748 return flagutil.processflagswrite(self, text, flags)
1748 return flagutil.processflagswrite(self, text, flags)
1749
1749
1750 def revision(self, nodeorrev, _df=None, raw=False):
1750 def revision(self, nodeorrev, _df=None, raw=False):
1751 """return an uncompressed revision of a given node or revision
1751 """return an uncompressed revision of a given node or revision
1752 number.
1752 number.
1753
1753
1754 _df - an existing file handle to read from. (internal-only)
1754 _df - an existing file handle to read from. (internal-only)
1755 raw - an optional argument specifying if the revision data is to be
1755 raw - an optional argument specifying if the revision data is to be
1756 treated as raw data when applying flag transforms. 'raw' should be set
1756 treated as raw data when applying flag transforms. 'raw' should be set
1757 to True when generating changegroups or in debug commands.
1757 to True when generating changegroups or in debug commands.
1758 """
1758 """
1759 if raw:
1759 if raw:
1760 msg = (
1760 msg = (
1761 b'revlog.revision(..., raw=True) is deprecated, '
1761 b'revlog.revision(..., raw=True) is deprecated, '
1762 b'use revlog.rawdata(...)'
1762 b'use revlog.rawdata(...)'
1763 )
1763 )
1764 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1764 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1765 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1765 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1766
1766
1767 def sidedata(self, nodeorrev, _df=None):
1767 def sidedata(self, nodeorrev, _df=None):
1768 """a map of extra data related to the changeset but not part of the hash
1768 """a map of extra data related to the changeset but not part of the hash
1769
1769
1770 This function currently return a dictionary. However, more advanced
1770 This function currently return a dictionary. However, more advanced
1771 mapping object will likely be used in the future for a more
1771 mapping object will likely be used in the future for a more
1772 efficient/lazy code.
1772 efficient/lazy code.
1773 """
1773 """
1774 return self._revisiondata(nodeorrev, _df)[1]
1774 return self._revisiondata(nodeorrev, _df)[1]
1775
1775
1776 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1776 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1777 # deal with <nodeorrev> argument type
1777 # deal with <nodeorrev> argument type
1778 if isinstance(nodeorrev, int):
1778 if isinstance(nodeorrev, int):
1779 rev = nodeorrev
1779 rev = nodeorrev
1780 node = self.node(rev)
1780 node = self.node(rev)
1781 else:
1781 else:
1782 node = nodeorrev
1782 node = nodeorrev
1783 rev = None
1783 rev = None
1784
1784
1785 # fast path the special `nullid` rev
1785 # fast path the special `nullid` rev
1786 if node == nullid:
1786 if node == nullid:
1787 return b"", {}
1787 return b"", {}
1788
1788
1789 # ``rawtext`` is the text as stored inside the revlog. Might be the
1789 # ``rawtext`` is the text as stored inside the revlog. Might be the
1790 # revision or might need to be processed to retrieve the revision.
1790 # revision or might need to be processed to retrieve the revision.
1791 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1791 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1792
1792
1793 if raw and validated:
1793 if raw and validated:
1794 # if we don't want to process the raw text and that raw
1794 # if we don't want to process the raw text and that raw
1795 # text is cached, we can exit early.
1795 # text is cached, we can exit early.
1796 return rawtext, {}
1796 return rawtext, {}
1797 if rev is None:
1797 if rev is None:
1798 rev = self.rev(node)
1798 rev = self.rev(node)
1799 # the revlog's flag for this revision
1799 # the revlog's flag for this revision
1800 # (usually alter its state or content)
1800 # (usually alter its state or content)
1801 flags = self.flags(rev)
1801 flags = self.flags(rev)
1802
1802
1803 if validated and flags == REVIDX_DEFAULT_FLAGS:
1803 if validated and flags == REVIDX_DEFAULT_FLAGS:
1804 # no extra flags set, no flag processor runs, text = rawtext
1804 # no extra flags set, no flag processor runs, text = rawtext
1805 return rawtext, {}
1805 return rawtext, {}
1806
1806
1807 sidedata = {}
1807 sidedata = {}
1808 if raw:
1808 if raw:
1809 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1809 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1810 text = rawtext
1810 text = rawtext
1811 else:
1811 else:
1812 try:
1812 try:
1813 r = flagutil.processflagsread(self, rawtext, flags)
1813 r = flagutil.processflagsread(self, rawtext, flags)
1814 except error.SidedataHashError as exc:
1814 except error.SidedataHashError as exc:
1815 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1815 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1816 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1816 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1817 raise error.RevlogError(msg)
1817 raise error.RevlogError(msg)
1818 text, validatehash, sidedata = r
1818 text, validatehash, sidedata = r
1819 if validatehash:
1819 if validatehash:
1820 self.checkhash(text, node, rev=rev)
1820 self.checkhash(text, node, rev=rev)
1821 if not validated:
1821 if not validated:
1822 self._revisioncache = (node, rev, rawtext)
1822 self._revisioncache = (node, rev, rawtext)
1823
1823
1824 return text, sidedata
1824 return text, sidedata
1825
1825
1826 def _rawtext(self, node, rev, _df=None):
1826 def _rawtext(self, node, rev, _df=None):
1827 """return the possibly unvalidated rawtext for a revision
1827 """return the possibly unvalidated rawtext for a revision
1828
1828
1829 returns (rev, rawtext, validated)
1829 returns (rev, rawtext, validated)
1830 """
1830 """
1831
1831
1832 # revision in the cache (could be useful to apply delta)
1832 # revision in the cache (could be useful to apply delta)
1833 cachedrev = None
1833 cachedrev = None
1834 # An intermediate text to apply deltas to
1834 # An intermediate text to apply deltas to
1835 basetext = None
1835 basetext = None
1836
1836
1837 # Check if we have the entry in cache
1837 # Check if we have the entry in cache
1838 # The cache entry looks like (node, rev, rawtext)
1838 # The cache entry looks like (node, rev, rawtext)
1839 if self._revisioncache:
1839 if self._revisioncache:
1840 if self._revisioncache[0] == node:
1840 if self._revisioncache[0] == node:
1841 return (rev, self._revisioncache[2], True)
1841 return (rev, self._revisioncache[2], True)
1842 cachedrev = self._revisioncache[1]
1842 cachedrev = self._revisioncache[1]
1843
1843
1844 if rev is None:
1844 if rev is None:
1845 rev = self.rev(node)
1845 rev = self.rev(node)
1846
1846
1847 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1847 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1848 if stopped:
1848 if stopped:
1849 basetext = self._revisioncache[2]
1849 basetext = self._revisioncache[2]
1850
1850
1851 # drop cache to save memory, the caller is expected to
1851 # drop cache to save memory, the caller is expected to
1852 # update self._revisioncache after validating the text
1852 # update self._revisioncache after validating the text
1853 self._revisioncache = None
1853 self._revisioncache = None
1854
1854
1855 targetsize = None
1855 targetsize = None
1856 rawsize = self.index[rev][2]
1856 rawsize = self.index[rev][2]
1857 if 0 <= rawsize:
1857 if 0 <= rawsize:
1858 targetsize = 4 * rawsize
1858 targetsize = 4 * rawsize
1859
1859
1860 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1860 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1861 if basetext is None:
1861 if basetext is None:
1862 basetext = bytes(bins[0])
1862 basetext = bytes(bins[0])
1863 bins = bins[1:]
1863 bins = bins[1:]
1864
1864
1865 rawtext = mdiff.patches(basetext, bins)
1865 rawtext = mdiff.patches(basetext, bins)
1866 del basetext # let us have a chance to free memory early
1866 del basetext # let us have a chance to free memory early
1867 return (rev, rawtext, False)
1867 return (rev, rawtext, False)
1868
1868
1869 def rawdata(self, nodeorrev, _df=None):
1869 def rawdata(self, nodeorrev, _df=None):
1870 """return an uncompressed raw data of a given node or revision number.
1870 """return an uncompressed raw data of a given node or revision number.
1871
1871
1872 _df - an existing file handle to read from. (internal-only)
1872 _df - an existing file handle to read from. (internal-only)
1873 """
1873 """
1874 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1874 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1875
1875
1876 def hash(self, text, p1, p2):
1876 def hash(self, text, p1, p2):
1877 """Compute a node hash.
1877 """Compute a node hash.
1878
1878
1879 Available as a function so that subclasses can replace the hash
1879 Available as a function so that subclasses can replace the hash
1880 as needed.
1880 as needed.
1881 """
1881 """
1882 return storageutil.hashrevisionsha1(text, p1, p2)
1882 return storageutil.hashrevisionsha1(text, p1, p2)
1883
1883
1884 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1884 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1885 """Check node hash integrity.
1885 """Check node hash integrity.
1886
1886
1887 Available as a function so that subclasses can extend hash mismatch
1887 Available as a function so that subclasses can extend hash mismatch
1888 behaviors as needed.
1888 behaviors as needed.
1889 """
1889 """
1890 try:
1890 try:
1891 if p1 is None and p2 is None:
1891 if p1 is None and p2 is None:
1892 p1, p2 = self.parents(node)
1892 p1, p2 = self.parents(node)
1893 if node != self.hash(text, p1, p2):
1893 if node != self.hash(text, p1, p2):
1894 # Clear the revision cache on hash failure. The revision cache
1894 # Clear the revision cache on hash failure. The revision cache
1895 # only stores the raw revision and clearing the cache does have
1895 # only stores the raw revision and clearing the cache does have
1896 # the side-effect that we won't have a cache hit when the raw
1896 # the side-effect that we won't have a cache hit when the raw
1897 # revision data is accessed. But this case should be rare and
1897 # revision data is accessed. But this case should be rare and
1898 # it is extra work to teach the cache about the hash
1898 # it is extra work to teach the cache about the hash
1899 # verification state.
1899 # verification state.
1900 if self._revisioncache and self._revisioncache[0] == node:
1900 if self._revisioncache and self._revisioncache[0] == node:
1901 self._revisioncache = None
1901 self._revisioncache = None
1902
1902
1903 revornode = rev
1903 revornode = rev
1904 if revornode is None:
1904 if revornode is None:
1905 revornode = templatefilters.short(hex(node))
1905 revornode = templatefilters.short(hex(node))
1906 raise error.RevlogError(
1906 raise error.RevlogError(
1907 _(b"integrity check failed on %s:%s")
1907 _(b"integrity check failed on %s:%s")
1908 % (self.indexfile, pycompat.bytestr(revornode))
1908 % (self.indexfile, pycompat.bytestr(revornode))
1909 )
1909 )
1910 except error.RevlogError:
1910 except error.RevlogError:
1911 if self._censorable and storageutil.iscensoredtext(text):
1911 if self._censorable and storageutil.iscensoredtext(text):
1912 raise error.CensoredNodeError(self.indexfile, node, text)
1912 raise error.CensoredNodeError(self.indexfile, node, text)
1913 raise
1913 raise
1914
1914
1915 def _enforceinlinesize(self, tr, fp=None):
1915 def _enforceinlinesize(self, tr, fp=None):
1916 """Check if the revlog is too big for inline and convert if so.
1916 """Check if the revlog is too big for inline and convert if so.
1917
1917
1918 This should be called after revisions are added to the revlog. If the
1918 This should be called after revisions are added to the revlog. If the
1919 revlog has grown too large to be an inline revlog, it will convert it
1919 revlog has grown too large to be an inline revlog, it will convert it
1920 to use multiple index and data files.
1920 to use multiple index and data files.
1921 """
1921 """
1922 tiprev = len(self) - 1
1922 tiprev = len(self) - 1
1923 if (
1923 if (
1924 not self._inline
1924 not self._inline
1925 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1925 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1926 ):
1926 ):
1927 return
1927 return
1928
1928
1929 trinfo = tr.find(self.indexfile)
1929 trinfo = tr.find(self.indexfile)
1930 if trinfo is None:
1930 if trinfo is None:
1931 raise error.RevlogError(
1931 raise error.RevlogError(
1932 _(b"%s not found in the transaction") % self.indexfile
1932 _(b"%s not found in the transaction") % self.indexfile
1933 )
1933 )
1934
1934
1935 trindex = trinfo[2]
1935 trindex = trinfo[2]
1936 if trindex is not None:
1936 if trindex is not None:
1937 dataoff = self.start(trindex)
1937 dataoff = self.start(trindex)
1938 else:
1938 else:
1939 # revlog was stripped at start of transaction, use all leftover data
1939 # revlog was stripped at start of transaction, use all leftover data
1940 trindex = len(self) - 1
1940 trindex = len(self) - 1
1941 dataoff = self.end(tiprev)
1941 dataoff = self.end(tiprev)
1942
1942
1943 tr.add(self.datafile, dataoff)
1943 tr.add(self.datafile, dataoff)
1944
1944
1945 if fp:
1945 if fp:
1946 fp.flush()
1946 fp.flush()
1947 fp.close()
1947 fp.close()
1948 # We can't use the cached file handle after close(). So prevent
1948 # We can't use the cached file handle after close(). So prevent
1949 # its usage.
1949 # its usage.
1950 self._writinghandles = None
1950 self._writinghandles = None
1951
1951
1952 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1952 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1953 for r in self:
1953 for r in self:
1954 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1954 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1955
1955
1956 with self._indexfp(b'w') as fp:
1956 with self._indexfp(b'w') as fp:
1957 self.version &= ~FLAG_INLINE_DATA
1957 self.version &= ~FLAG_INLINE_DATA
1958 self._inline = False
1958 self._inline = False
1959 io = self._io
1959 io = self._io
1960 for i in self:
1960 for i in self:
1961 e = io.packentry(self.index[i], self.node, self.version, i)
1961 e = io.packentry(self.index[i], self.node, self.version, i)
1962 fp.write(e)
1962 fp.write(e)
1963
1963
1964 # the temp file replace the real index when we exit the context
1964 # the temp file replace the real index when we exit the context
1965 # manager
1965 # manager
1966
1966
1967 tr.replace(self.indexfile, trindex * self._io.size)
1967 tr.replace(self.indexfile, trindex * self._io.size)
1968 nodemaputil.setup_persistent_nodemap(tr, self)
1968 self._chunkclear()
1969 self._chunkclear()
1969
1970
1970 def _nodeduplicatecallback(self, transaction, node):
1971 def _nodeduplicatecallback(self, transaction, node):
1971 """called when trying to add a node already stored.
1972 """called when trying to add a node already stored.
1972 """
1973 """
1973
1974
1974 def addrevision(
1975 def addrevision(
1975 self,
1976 self,
1976 text,
1977 text,
1977 transaction,
1978 transaction,
1978 link,
1979 link,
1979 p1,
1980 p1,
1980 p2,
1981 p2,
1981 cachedelta=None,
1982 cachedelta=None,
1982 node=None,
1983 node=None,
1983 flags=REVIDX_DEFAULT_FLAGS,
1984 flags=REVIDX_DEFAULT_FLAGS,
1984 deltacomputer=None,
1985 deltacomputer=None,
1985 sidedata=None,
1986 sidedata=None,
1986 ):
1987 ):
1987 """add a revision to the log
1988 """add a revision to the log
1988
1989
1989 text - the revision data to add
1990 text - the revision data to add
1990 transaction - the transaction object used for rollback
1991 transaction - the transaction object used for rollback
1991 link - the linkrev data to add
1992 link - the linkrev data to add
1992 p1, p2 - the parent nodeids of the revision
1993 p1, p2 - the parent nodeids of the revision
1993 cachedelta - an optional precomputed delta
1994 cachedelta - an optional precomputed delta
1994 node - nodeid of revision; typically node is not specified, and it is
1995 node - nodeid of revision; typically node is not specified, and it is
1995 computed by default as hash(text, p1, p2), however subclasses might
1996 computed by default as hash(text, p1, p2), however subclasses might
1996 use different hashing method (and override checkhash() in such case)
1997 use different hashing method (and override checkhash() in such case)
1997 flags - the known flags to set on the revision
1998 flags - the known flags to set on the revision
1998 deltacomputer - an optional deltacomputer instance shared between
1999 deltacomputer - an optional deltacomputer instance shared between
1999 multiple calls
2000 multiple calls
2000 """
2001 """
2001 if link == nullrev:
2002 if link == nullrev:
2002 raise error.RevlogError(
2003 raise error.RevlogError(
2003 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2004 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2004 )
2005 )
2005
2006
2006 if sidedata is None:
2007 if sidedata is None:
2007 sidedata = {}
2008 sidedata = {}
2008 flags = flags & ~REVIDX_SIDEDATA
2009 flags = flags & ~REVIDX_SIDEDATA
2009 elif not self.hassidedata:
2010 elif not self.hassidedata:
2010 raise error.ProgrammingError(
2011 raise error.ProgrammingError(
2011 _(b"trying to add sidedata to a revlog who don't support them")
2012 _(b"trying to add sidedata to a revlog who don't support them")
2012 )
2013 )
2013 else:
2014 else:
2014 flags |= REVIDX_SIDEDATA
2015 flags |= REVIDX_SIDEDATA
2015
2016
2016 if flags:
2017 if flags:
2017 node = node or self.hash(text, p1, p2)
2018 node = node or self.hash(text, p1, p2)
2018
2019
2019 rawtext, validatehash = flagutil.processflagswrite(
2020 rawtext, validatehash = flagutil.processflagswrite(
2020 self, text, flags, sidedata=sidedata
2021 self, text, flags, sidedata=sidedata
2021 )
2022 )
2022
2023
2023 # If the flag processor modifies the revision data, ignore any provided
2024 # If the flag processor modifies the revision data, ignore any provided
2024 # cachedelta.
2025 # cachedelta.
2025 if rawtext != text:
2026 if rawtext != text:
2026 cachedelta = None
2027 cachedelta = None
2027
2028
2028 if len(rawtext) > _maxentrysize:
2029 if len(rawtext) > _maxentrysize:
2029 raise error.RevlogError(
2030 raise error.RevlogError(
2030 _(
2031 _(
2031 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2032 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2032 )
2033 )
2033 % (self.indexfile, len(rawtext))
2034 % (self.indexfile, len(rawtext))
2034 )
2035 )
2035
2036
2036 node = node or self.hash(rawtext, p1, p2)
2037 node = node or self.hash(rawtext, p1, p2)
2037 if self.index.has_node(node):
2038 if self.index.has_node(node):
2038 return node
2039 return node
2039
2040
2040 if validatehash:
2041 if validatehash:
2041 self.checkhash(rawtext, node, p1=p1, p2=p2)
2042 self.checkhash(rawtext, node, p1=p1, p2=p2)
2042
2043
2043 return self.addrawrevision(
2044 return self.addrawrevision(
2044 rawtext,
2045 rawtext,
2045 transaction,
2046 transaction,
2046 link,
2047 link,
2047 p1,
2048 p1,
2048 p2,
2049 p2,
2049 node,
2050 node,
2050 flags,
2051 flags,
2051 cachedelta=cachedelta,
2052 cachedelta=cachedelta,
2052 deltacomputer=deltacomputer,
2053 deltacomputer=deltacomputer,
2053 )
2054 )
2054
2055
2055 def addrawrevision(
2056 def addrawrevision(
2056 self,
2057 self,
2057 rawtext,
2058 rawtext,
2058 transaction,
2059 transaction,
2059 link,
2060 link,
2060 p1,
2061 p1,
2061 p2,
2062 p2,
2062 node,
2063 node,
2063 flags,
2064 flags,
2064 cachedelta=None,
2065 cachedelta=None,
2065 deltacomputer=None,
2066 deltacomputer=None,
2066 ):
2067 ):
2067 """add a raw revision with known flags, node and parents
2068 """add a raw revision with known flags, node and parents
2068 useful when reusing a revision not stored in this revlog (ex: received
2069 useful when reusing a revision not stored in this revlog (ex: received
2069 over wire, or read from an external bundle).
2070 over wire, or read from an external bundle).
2070 """
2071 """
2071 dfh = None
2072 dfh = None
2072 if not self._inline:
2073 if not self._inline:
2073 dfh = self._datafp(b"a+")
2074 dfh = self._datafp(b"a+")
2074 ifh = self._indexfp(b"a+")
2075 ifh = self._indexfp(b"a+")
2075 try:
2076 try:
2076 return self._addrevision(
2077 return self._addrevision(
2077 node,
2078 node,
2078 rawtext,
2079 rawtext,
2079 transaction,
2080 transaction,
2080 link,
2081 link,
2081 p1,
2082 p1,
2082 p2,
2083 p2,
2083 flags,
2084 flags,
2084 cachedelta,
2085 cachedelta,
2085 ifh,
2086 ifh,
2086 dfh,
2087 dfh,
2087 deltacomputer=deltacomputer,
2088 deltacomputer=deltacomputer,
2088 )
2089 )
2089 finally:
2090 finally:
2090 if dfh:
2091 if dfh:
2091 dfh.close()
2092 dfh.close()
2092 ifh.close()
2093 ifh.close()
2093
2094
2094 def compress(self, data):
2095 def compress(self, data):
2095 """Generate a possibly-compressed representation of data."""
2096 """Generate a possibly-compressed representation of data."""
2096 if not data:
2097 if not data:
2097 return b'', data
2098 return b'', data
2098
2099
2099 compressed = self._compressor.compress(data)
2100 compressed = self._compressor.compress(data)
2100
2101
2101 if compressed:
2102 if compressed:
2102 # The revlog compressor added the header in the returned data.
2103 # The revlog compressor added the header in the returned data.
2103 return b'', compressed
2104 return b'', compressed
2104
2105
2105 if data[0:1] == b'\0':
2106 if data[0:1] == b'\0':
2106 return b'', data
2107 return b'', data
2107 return b'u', data
2108 return b'u', data
2108
2109
2109 def decompress(self, data):
2110 def decompress(self, data):
2110 """Decompress a revlog chunk.
2111 """Decompress a revlog chunk.
2111
2112
2112 The chunk is expected to begin with a header identifying the
2113 The chunk is expected to begin with a header identifying the
2113 format type so it can be routed to an appropriate decompressor.
2114 format type so it can be routed to an appropriate decompressor.
2114 """
2115 """
2115 if not data:
2116 if not data:
2116 return data
2117 return data
2117
2118
2118 # Revlogs are read much more frequently than they are written and many
2119 # Revlogs are read much more frequently than they are written and many
2119 # chunks only take microseconds to decompress, so performance is
2120 # chunks only take microseconds to decompress, so performance is
2120 # important here.
2121 # important here.
2121 #
2122 #
2122 # We can make a few assumptions about revlogs:
2123 # We can make a few assumptions about revlogs:
2123 #
2124 #
2124 # 1) the majority of chunks will be compressed (as opposed to inline
2125 # 1) the majority of chunks will be compressed (as opposed to inline
2125 # raw data).
2126 # raw data).
2126 # 2) decompressing *any* data will likely by at least 10x slower than
2127 # 2) decompressing *any* data will likely by at least 10x slower than
2127 # returning raw inline data.
2128 # returning raw inline data.
2128 # 3) we want to prioritize common and officially supported compression
2129 # 3) we want to prioritize common and officially supported compression
2129 # engines
2130 # engines
2130 #
2131 #
2131 # It follows that we want to optimize for "decompress compressed data
2132 # It follows that we want to optimize for "decompress compressed data
2132 # when encoded with common and officially supported compression engines"
2133 # when encoded with common and officially supported compression engines"
2133 # case over "raw data" and "data encoded by less common or non-official
2134 # case over "raw data" and "data encoded by less common or non-official
2134 # compression engines." That is why we have the inline lookup first
2135 # compression engines." That is why we have the inline lookup first
2135 # followed by the compengines lookup.
2136 # followed by the compengines lookup.
2136 #
2137 #
2137 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2138 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2138 # compressed chunks. And this matters for changelog and manifest reads.
2139 # compressed chunks. And this matters for changelog and manifest reads.
2139 t = data[0:1]
2140 t = data[0:1]
2140
2141
2141 if t == b'x':
2142 if t == b'x':
2142 try:
2143 try:
2143 return _zlibdecompress(data)
2144 return _zlibdecompress(data)
2144 except zlib.error as e:
2145 except zlib.error as e:
2145 raise error.RevlogError(
2146 raise error.RevlogError(
2146 _(b'revlog decompress error: %s')
2147 _(b'revlog decompress error: %s')
2147 % stringutil.forcebytestr(e)
2148 % stringutil.forcebytestr(e)
2148 )
2149 )
2149 # '\0' is more common than 'u' so it goes first.
2150 # '\0' is more common than 'u' so it goes first.
2150 elif t == b'\0':
2151 elif t == b'\0':
2151 return data
2152 return data
2152 elif t == b'u':
2153 elif t == b'u':
2153 return util.buffer(data, 1)
2154 return util.buffer(data, 1)
2154
2155
2155 try:
2156 try:
2156 compressor = self._decompressors[t]
2157 compressor = self._decompressors[t]
2157 except KeyError:
2158 except KeyError:
2158 try:
2159 try:
2159 engine = util.compengines.forrevlogheader(t)
2160 engine = util.compengines.forrevlogheader(t)
2160 compressor = engine.revlogcompressor(self._compengineopts)
2161 compressor = engine.revlogcompressor(self._compengineopts)
2161 self._decompressors[t] = compressor
2162 self._decompressors[t] = compressor
2162 except KeyError:
2163 except KeyError:
2163 raise error.RevlogError(_(b'unknown compression type %r') % t)
2164 raise error.RevlogError(_(b'unknown compression type %r') % t)
2164
2165
2165 return compressor.decompress(data)
2166 return compressor.decompress(data)
2166
2167
2167 def _addrevision(
2168 def _addrevision(
2168 self,
2169 self,
2169 node,
2170 node,
2170 rawtext,
2171 rawtext,
2171 transaction,
2172 transaction,
2172 link,
2173 link,
2173 p1,
2174 p1,
2174 p2,
2175 p2,
2175 flags,
2176 flags,
2176 cachedelta,
2177 cachedelta,
2177 ifh,
2178 ifh,
2178 dfh,
2179 dfh,
2179 alwayscache=False,
2180 alwayscache=False,
2180 deltacomputer=None,
2181 deltacomputer=None,
2181 ):
2182 ):
2182 """internal function to add revisions to the log
2183 """internal function to add revisions to the log
2183
2184
2184 see addrevision for argument descriptions.
2185 see addrevision for argument descriptions.
2185
2186
2186 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2187 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2187
2188
2188 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2189 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2189 be used.
2190 be used.
2190
2191
2191 invariants:
2192 invariants:
2192 - rawtext is optional (can be None); if not set, cachedelta must be set.
2193 - rawtext is optional (can be None); if not set, cachedelta must be set.
2193 if both are set, they must correspond to each other.
2194 if both are set, they must correspond to each other.
2194 """
2195 """
2195 if node == nullid:
2196 if node == nullid:
2196 raise error.RevlogError(
2197 raise error.RevlogError(
2197 _(b"%s: attempt to add null revision") % self.indexfile
2198 _(b"%s: attempt to add null revision") % self.indexfile
2198 )
2199 )
2199 if node == wdirid or node in wdirfilenodeids:
2200 if node == wdirid or node in wdirfilenodeids:
2200 raise error.RevlogError(
2201 raise error.RevlogError(
2201 _(b"%s: attempt to add wdir revision") % self.indexfile
2202 _(b"%s: attempt to add wdir revision") % self.indexfile
2202 )
2203 )
2203
2204
2204 if self._inline:
2205 if self._inline:
2205 fh = ifh
2206 fh = ifh
2206 else:
2207 else:
2207 fh = dfh
2208 fh = dfh
2208
2209
2209 btext = [rawtext]
2210 btext = [rawtext]
2210
2211
2211 curr = len(self)
2212 curr = len(self)
2212 prev = curr - 1
2213 prev = curr - 1
2213 offset = self.end(prev)
2214 offset = self.end(prev)
2214 p1r, p2r = self.rev(p1), self.rev(p2)
2215 p1r, p2r = self.rev(p1), self.rev(p2)
2215
2216
2216 # full versions are inserted when the needed deltas
2217 # full versions are inserted when the needed deltas
2217 # become comparable to the uncompressed text
2218 # become comparable to the uncompressed text
2218 if rawtext is None:
2219 if rawtext is None:
2219 # need rawtext size, before changed by flag processors, which is
2220 # need rawtext size, before changed by flag processors, which is
2220 # the non-raw size. use revlog explicitly to avoid filelog's extra
2221 # the non-raw size. use revlog explicitly to avoid filelog's extra
2221 # logic that might remove metadata size.
2222 # logic that might remove metadata size.
2222 textlen = mdiff.patchedsize(
2223 textlen = mdiff.patchedsize(
2223 revlog.size(self, cachedelta[0]), cachedelta[1]
2224 revlog.size(self, cachedelta[0]), cachedelta[1]
2224 )
2225 )
2225 else:
2226 else:
2226 textlen = len(rawtext)
2227 textlen = len(rawtext)
2227
2228
2228 if deltacomputer is None:
2229 if deltacomputer is None:
2229 deltacomputer = deltautil.deltacomputer(self)
2230 deltacomputer = deltautil.deltacomputer(self)
2230
2231
2231 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2232 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2232
2233
2233 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2234 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2234
2235
2235 e = (
2236 e = (
2236 offset_type(offset, flags),
2237 offset_type(offset, flags),
2237 deltainfo.deltalen,
2238 deltainfo.deltalen,
2238 textlen,
2239 textlen,
2239 deltainfo.base,
2240 deltainfo.base,
2240 link,
2241 link,
2241 p1r,
2242 p1r,
2242 p2r,
2243 p2r,
2243 node,
2244 node,
2244 )
2245 )
2245 self.index.append(e)
2246 self.index.append(e)
2246
2247
2247 entry = self._io.packentry(e, self.node, self.version, curr)
2248 entry = self._io.packentry(e, self.node, self.version, curr)
2248 self._writeentry(
2249 self._writeentry(
2249 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2250 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2250 )
2251 )
2251
2252
2252 rawtext = btext[0]
2253 rawtext = btext[0]
2253
2254
2254 if alwayscache and rawtext is None:
2255 if alwayscache and rawtext is None:
2255 rawtext = deltacomputer.buildtext(revinfo, fh)
2256 rawtext = deltacomputer.buildtext(revinfo, fh)
2256
2257
2257 if type(rawtext) == bytes: # only accept immutable objects
2258 if type(rawtext) == bytes: # only accept immutable objects
2258 self._revisioncache = (node, curr, rawtext)
2259 self._revisioncache = (node, curr, rawtext)
2259 self._chainbasecache[curr] = deltainfo.chainbase
2260 self._chainbasecache[curr] = deltainfo.chainbase
2260 return node
2261 return node
2261
2262
2262 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2263 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2263 # Files opened in a+ mode have inconsistent behavior on various
2264 # Files opened in a+ mode have inconsistent behavior on various
2264 # platforms. Windows requires that a file positioning call be made
2265 # platforms. Windows requires that a file positioning call be made
2265 # when the file handle transitions between reads and writes. See
2266 # when the file handle transitions between reads and writes. See
2266 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2267 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2267 # platforms, Python or the platform itself can be buggy. Some versions
2268 # platforms, Python or the platform itself can be buggy. Some versions
2268 # of Solaris have been observed to not append at the end of the file
2269 # of Solaris have been observed to not append at the end of the file
2269 # if the file was seeked to before the end. See issue4943 for more.
2270 # if the file was seeked to before the end. See issue4943 for more.
2270 #
2271 #
2271 # We work around this issue by inserting a seek() before writing.
2272 # We work around this issue by inserting a seek() before writing.
2272 # Note: This is likely not necessary on Python 3. However, because
2273 # Note: This is likely not necessary on Python 3. However, because
2273 # the file handle is reused for reads and may be seeked there, we need
2274 # the file handle is reused for reads and may be seeked there, we need
2274 # to be careful before changing this.
2275 # to be careful before changing this.
2275 ifh.seek(0, os.SEEK_END)
2276 ifh.seek(0, os.SEEK_END)
2276 if dfh:
2277 if dfh:
2277 dfh.seek(0, os.SEEK_END)
2278 dfh.seek(0, os.SEEK_END)
2278
2279
2279 curr = len(self) - 1
2280 curr = len(self) - 1
2280 if not self._inline:
2281 if not self._inline:
2281 transaction.add(self.datafile, offset)
2282 transaction.add(self.datafile, offset)
2282 transaction.add(self.indexfile, curr * len(entry))
2283 transaction.add(self.indexfile, curr * len(entry))
2283 if data[0]:
2284 if data[0]:
2284 dfh.write(data[0])
2285 dfh.write(data[0])
2285 dfh.write(data[1])
2286 dfh.write(data[1])
2286 ifh.write(entry)
2287 ifh.write(entry)
2287 else:
2288 else:
2288 offset += curr * self._io.size
2289 offset += curr * self._io.size
2289 transaction.add(self.indexfile, offset, curr)
2290 transaction.add(self.indexfile, offset, curr)
2290 ifh.write(entry)
2291 ifh.write(entry)
2291 ifh.write(data[0])
2292 ifh.write(data[0])
2292 ifh.write(data[1])
2293 ifh.write(data[1])
2293 self._enforceinlinesize(transaction, ifh)
2294 self._enforceinlinesize(transaction, ifh)
2294 nodemaputil.setup_persistent_nodemap(transaction, self)
2295 nodemaputil.setup_persistent_nodemap(transaction, self)
2295
2296
2296 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2297 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2297 """
2298 """
2298 add a delta group
2299 add a delta group
2299
2300
2300 given a set of deltas, add them to the revision log. the
2301 given a set of deltas, add them to the revision log. the
2301 first delta is against its parent, which should be in our
2302 first delta is against its parent, which should be in our
2302 log, the rest are against the previous delta.
2303 log, the rest are against the previous delta.
2303
2304
2304 If ``addrevisioncb`` is defined, it will be called with arguments of
2305 If ``addrevisioncb`` is defined, it will be called with arguments of
2305 this revlog and the node that was added.
2306 this revlog and the node that was added.
2306 """
2307 """
2307
2308
2308 if self._writinghandles:
2309 if self._writinghandles:
2309 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2310 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2310
2311
2311 nodes = []
2312 nodes = []
2312
2313
2313 r = len(self)
2314 r = len(self)
2314 end = 0
2315 end = 0
2315 if r:
2316 if r:
2316 end = self.end(r - 1)
2317 end = self.end(r - 1)
2317 ifh = self._indexfp(b"a+")
2318 ifh = self._indexfp(b"a+")
2318 isize = r * self._io.size
2319 isize = r * self._io.size
2319 if self._inline:
2320 if self._inline:
2320 transaction.add(self.indexfile, end + isize, r)
2321 transaction.add(self.indexfile, end + isize, r)
2321 dfh = None
2322 dfh = None
2322 else:
2323 else:
2323 transaction.add(self.indexfile, isize, r)
2324 transaction.add(self.indexfile, isize, r)
2324 transaction.add(self.datafile, end)
2325 transaction.add(self.datafile, end)
2325 dfh = self._datafp(b"a+")
2326 dfh = self._datafp(b"a+")
2326
2327
2327 def flush():
2328 def flush():
2328 if dfh:
2329 if dfh:
2329 dfh.flush()
2330 dfh.flush()
2330 ifh.flush()
2331 ifh.flush()
2331
2332
2332 self._writinghandles = (ifh, dfh)
2333 self._writinghandles = (ifh, dfh)
2333
2334
2334 try:
2335 try:
2335 deltacomputer = deltautil.deltacomputer(self)
2336 deltacomputer = deltautil.deltacomputer(self)
2336 # loop through our set of deltas
2337 # loop through our set of deltas
2337 for data in deltas:
2338 for data in deltas:
2338 node, p1, p2, linknode, deltabase, delta, flags = data
2339 node, p1, p2, linknode, deltabase, delta, flags = data
2339 link = linkmapper(linknode)
2340 link = linkmapper(linknode)
2340 flags = flags or REVIDX_DEFAULT_FLAGS
2341 flags = flags or REVIDX_DEFAULT_FLAGS
2341
2342
2342 nodes.append(node)
2343 nodes.append(node)
2343
2344
2344 if self.index.has_node(node):
2345 if self.index.has_node(node):
2345 self._nodeduplicatecallback(transaction, node)
2346 self._nodeduplicatecallback(transaction, node)
2346 # this can happen if two branches make the same change
2347 # this can happen if two branches make the same change
2347 continue
2348 continue
2348
2349
2349 for p in (p1, p2):
2350 for p in (p1, p2):
2350 if not self.index.has_node(p):
2351 if not self.index.has_node(p):
2351 raise error.LookupError(
2352 raise error.LookupError(
2352 p, self.indexfile, _(b'unknown parent')
2353 p, self.indexfile, _(b'unknown parent')
2353 )
2354 )
2354
2355
2355 if not self.index.has_node(deltabase):
2356 if not self.index.has_node(deltabase):
2356 raise error.LookupError(
2357 raise error.LookupError(
2357 deltabase, self.indexfile, _(b'unknown delta base')
2358 deltabase, self.indexfile, _(b'unknown delta base')
2358 )
2359 )
2359
2360
2360 baserev = self.rev(deltabase)
2361 baserev = self.rev(deltabase)
2361
2362
2362 if baserev != nullrev and self.iscensored(baserev):
2363 if baserev != nullrev and self.iscensored(baserev):
2363 # if base is censored, delta must be full replacement in a
2364 # if base is censored, delta must be full replacement in a
2364 # single patch operation
2365 # single patch operation
2365 hlen = struct.calcsize(b">lll")
2366 hlen = struct.calcsize(b">lll")
2366 oldlen = self.rawsize(baserev)
2367 oldlen = self.rawsize(baserev)
2367 newlen = len(delta) - hlen
2368 newlen = len(delta) - hlen
2368 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2369 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2369 raise error.CensoredBaseError(
2370 raise error.CensoredBaseError(
2370 self.indexfile, self.node(baserev)
2371 self.indexfile, self.node(baserev)
2371 )
2372 )
2372
2373
2373 if not flags and self._peek_iscensored(baserev, delta, flush):
2374 if not flags and self._peek_iscensored(baserev, delta, flush):
2374 flags |= REVIDX_ISCENSORED
2375 flags |= REVIDX_ISCENSORED
2375
2376
2376 # We assume consumers of addrevisioncb will want to retrieve
2377 # We assume consumers of addrevisioncb will want to retrieve
2377 # the added revision, which will require a call to
2378 # the added revision, which will require a call to
2378 # revision(). revision() will fast path if there is a cache
2379 # revision(). revision() will fast path if there is a cache
2379 # hit. So, we tell _addrevision() to always cache in this case.
2380 # hit. So, we tell _addrevision() to always cache in this case.
2380 # We're only using addgroup() in the context of changegroup
2381 # We're only using addgroup() in the context of changegroup
2381 # generation so the revision data can always be handled as raw
2382 # generation so the revision data can always be handled as raw
2382 # by the flagprocessor.
2383 # by the flagprocessor.
2383 self._addrevision(
2384 self._addrevision(
2384 node,
2385 node,
2385 None,
2386 None,
2386 transaction,
2387 transaction,
2387 link,
2388 link,
2388 p1,
2389 p1,
2389 p2,
2390 p2,
2390 flags,
2391 flags,
2391 (baserev, delta),
2392 (baserev, delta),
2392 ifh,
2393 ifh,
2393 dfh,
2394 dfh,
2394 alwayscache=bool(addrevisioncb),
2395 alwayscache=bool(addrevisioncb),
2395 deltacomputer=deltacomputer,
2396 deltacomputer=deltacomputer,
2396 )
2397 )
2397
2398
2398 if addrevisioncb:
2399 if addrevisioncb:
2399 addrevisioncb(self, node)
2400 addrevisioncb(self, node)
2400
2401
2401 if not dfh and not self._inline:
2402 if not dfh and not self._inline:
2402 # addrevision switched from inline to conventional
2403 # addrevision switched from inline to conventional
2403 # reopen the index
2404 # reopen the index
2404 ifh.close()
2405 ifh.close()
2405 dfh = self._datafp(b"a+")
2406 dfh = self._datafp(b"a+")
2406 ifh = self._indexfp(b"a+")
2407 ifh = self._indexfp(b"a+")
2407 self._writinghandles = (ifh, dfh)
2408 self._writinghandles = (ifh, dfh)
2408 finally:
2409 finally:
2409 self._writinghandles = None
2410 self._writinghandles = None
2410
2411
2411 if dfh:
2412 if dfh:
2412 dfh.close()
2413 dfh.close()
2413 ifh.close()
2414 ifh.close()
2414
2415
2415 return nodes
2416 return nodes
2416
2417
2417 def iscensored(self, rev):
2418 def iscensored(self, rev):
2418 """Check if a file revision is censored."""
2419 """Check if a file revision is censored."""
2419 if not self._censorable:
2420 if not self._censorable:
2420 return False
2421 return False
2421
2422
2422 return self.flags(rev) & REVIDX_ISCENSORED
2423 return self.flags(rev) & REVIDX_ISCENSORED
2423
2424
2424 def _peek_iscensored(self, baserev, delta, flush):
2425 def _peek_iscensored(self, baserev, delta, flush):
2425 """Quickly check if a delta produces a censored revision."""
2426 """Quickly check if a delta produces a censored revision."""
2426 if not self._censorable:
2427 if not self._censorable:
2427 return False
2428 return False
2428
2429
2429 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2430 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2430
2431
2431 def getstrippoint(self, minlink):
2432 def getstrippoint(self, minlink):
2432 """find the minimum rev that must be stripped to strip the linkrev
2433 """find the minimum rev that must be stripped to strip the linkrev
2433
2434
2434 Returns a tuple containing the minimum rev and a set of all revs that
2435 Returns a tuple containing the minimum rev and a set of all revs that
2435 have linkrevs that will be broken by this strip.
2436 have linkrevs that will be broken by this strip.
2436 """
2437 """
2437 return storageutil.resolvestripinfo(
2438 return storageutil.resolvestripinfo(
2438 minlink,
2439 minlink,
2439 len(self) - 1,
2440 len(self) - 1,
2440 self.headrevs(),
2441 self.headrevs(),
2441 self.linkrev,
2442 self.linkrev,
2442 self.parentrevs,
2443 self.parentrevs,
2443 )
2444 )
2444
2445
2445 def strip(self, minlink, transaction):
2446 def strip(self, minlink, transaction):
2446 """truncate the revlog on the first revision with a linkrev >= minlink
2447 """truncate the revlog on the first revision with a linkrev >= minlink
2447
2448
2448 This function is called when we're stripping revision minlink and
2449 This function is called when we're stripping revision minlink and
2449 its descendants from the repository.
2450 its descendants from the repository.
2450
2451
2451 We have to remove all revisions with linkrev >= minlink, because
2452 We have to remove all revisions with linkrev >= minlink, because
2452 the equivalent changelog revisions will be renumbered after the
2453 the equivalent changelog revisions will be renumbered after the
2453 strip.
2454 strip.
2454
2455
2455 So we truncate the revlog on the first of these revisions, and
2456 So we truncate the revlog on the first of these revisions, and
2456 trust that the caller has saved the revisions that shouldn't be
2457 trust that the caller has saved the revisions that shouldn't be
2457 removed and that it'll re-add them after this truncation.
2458 removed and that it'll re-add them after this truncation.
2458 """
2459 """
2459 if len(self) == 0:
2460 if len(self) == 0:
2460 return
2461 return
2461
2462
2462 rev, _ = self.getstrippoint(minlink)
2463 rev, _ = self.getstrippoint(minlink)
2463 if rev == len(self):
2464 if rev == len(self):
2464 return
2465 return
2465
2466
2466 # first truncate the files on disk
2467 # first truncate the files on disk
2467 end = self.start(rev)
2468 end = self.start(rev)
2468 if not self._inline:
2469 if not self._inline:
2469 transaction.add(self.datafile, end)
2470 transaction.add(self.datafile, end)
2470 end = rev * self._io.size
2471 end = rev * self._io.size
2471 else:
2472 else:
2472 end += rev * self._io.size
2473 end += rev * self._io.size
2473
2474
2474 transaction.add(self.indexfile, end)
2475 transaction.add(self.indexfile, end)
2475
2476
2476 # then reset internal state in memory to forget those revisions
2477 # then reset internal state in memory to forget those revisions
2477 self._revisioncache = None
2478 self._revisioncache = None
2478 self._chaininfocache = {}
2479 self._chaininfocache = {}
2479 self._chunkclear()
2480 self._chunkclear()
2480
2481
2481 del self.index[rev:-1]
2482 del self.index[rev:-1]
2482
2483
2483 def checksize(self):
2484 def checksize(self):
2484 """Check size of index and data files
2485 """Check size of index and data files
2485
2486
2486 return a (dd, di) tuple.
2487 return a (dd, di) tuple.
2487 - dd: extra bytes for the "data" file
2488 - dd: extra bytes for the "data" file
2488 - di: extra bytes for the "index" file
2489 - di: extra bytes for the "index" file
2489
2490
2490 A healthy revlog will return (0, 0).
2491 A healthy revlog will return (0, 0).
2491 """
2492 """
2492 expected = 0
2493 expected = 0
2493 if len(self):
2494 if len(self):
2494 expected = max(0, self.end(len(self) - 1))
2495 expected = max(0, self.end(len(self) - 1))
2495
2496
2496 try:
2497 try:
2497 with self._datafp() as f:
2498 with self._datafp() as f:
2498 f.seek(0, io.SEEK_END)
2499 f.seek(0, io.SEEK_END)
2499 actual = f.tell()
2500 actual = f.tell()
2500 dd = actual - expected
2501 dd = actual - expected
2501 except IOError as inst:
2502 except IOError as inst:
2502 if inst.errno != errno.ENOENT:
2503 if inst.errno != errno.ENOENT:
2503 raise
2504 raise
2504 dd = 0
2505 dd = 0
2505
2506
2506 try:
2507 try:
2507 f = self.opener(self.indexfile)
2508 f = self.opener(self.indexfile)
2508 f.seek(0, io.SEEK_END)
2509 f.seek(0, io.SEEK_END)
2509 actual = f.tell()
2510 actual = f.tell()
2510 f.close()
2511 f.close()
2511 s = self._io.size
2512 s = self._io.size
2512 i = max(0, actual // s)
2513 i = max(0, actual // s)
2513 di = actual - (i * s)
2514 di = actual - (i * s)
2514 if self._inline:
2515 if self._inline:
2515 databytes = 0
2516 databytes = 0
2516 for r in self:
2517 for r in self:
2517 databytes += max(0, self.length(r))
2518 databytes += max(0, self.length(r))
2518 dd = 0
2519 dd = 0
2519 di = actual - len(self) * s - databytes
2520 di = actual - len(self) * s - databytes
2520 except IOError as inst:
2521 except IOError as inst:
2521 if inst.errno != errno.ENOENT:
2522 if inst.errno != errno.ENOENT:
2522 raise
2523 raise
2523 di = 0
2524 di = 0
2524
2525
2525 return (dd, di)
2526 return (dd, di)
2526
2527
2527 def files(self):
2528 def files(self):
2528 res = [self.indexfile]
2529 res = [self.indexfile]
2529 if not self._inline:
2530 if not self._inline:
2530 res.append(self.datafile)
2531 res.append(self.datafile)
2531 return res
2532 return res
2532
2533
2533 def emitrevisions(
2534 def emitrevisions(
2534 self,
2535 self,
2535 nodes,
2536 nodes,
2536 nodesorder=None,
2537 nodesorder=None,
2537 revisiondata=False,
2538 revisiondata=False,
2538 assumehaveparentrevisions=False,
2539 assumehaveparentrevisions=False,
2539 deltamode=repository.CG_DELTAMODE_STD,
2540 deltamode=repository.CG_DELTAMODE_STD,
2540 ):
2541 ):
2541 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2542 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2542 raise error.ProgrammingError(
2543 raise error.ProgrammingError(
2543 b'unhandled value for nodesorder: %s' % nodesorder
2544 b'unhandled value for nodesorder: %s' % nodesorder
2544 )
2545 )
2545
2546
2546 if nodesorder is None and not self._generaldelta:
2547 if nodesorder is None and not self._generaldelta:
2547 nodesorder = b'storage'
2548 nodesorder = b'storage'
2548
2549
2549 if (
2550 if (
2550 not self._storedeltachains
2551 not self._storedeltachains
2551 and deltamode != repository.CG_DELTAMODE_PREV
2552 and deltamode != repository.CG_DELTAMODE_PREV
2552 ):
2553 ):
2553 deltamode = repository.CG_DELTAMODE_FULL
2554 deltamode = repository.CG_DELTAMODE_FULL
2554
2555
2555 return storageutil.emitrevisions(
2556 return storageutil.emitrevisions(
2556 self,
2557 self,
2557 nodes,
2558 nodes,
2558 nodesorder,
2559 nodesorder,
2559 revlogrevisiondelta,
2560 revlogrevisiondelta,
2560 deltaparentfn=self.deltaparent,
2561 deltaparentfn=self.deltaparent,
2561 candeltafn=self.candelta,
2562 candeltafn=self.candelta,
2562 rawsizefn=self.rawsize,
2563 rawsizefn=self.rawsize,
2563 revdifffn=self.revdiff,
2564 revdifffn=self.revdiff,
2564 flagsfn=self.flags,
2565 flagsfn=self.flags,
2565 deltamode=deltamode,
2566 deltamode=deltamode,
2566 revisiondata=revisiondata,
2567 revisiondata=revisiondata,
2567 assumehaveparentrevisions=assumehaveparentrevisions,
2568 assumehaveparentrevisions=assumehaveparentrevisions,
2568 )
2569 )
2569
2570
2570 DELTAREUSEALWAYS = b'always'
2571 DELTAREUSEALWAYS = b'always'
2571 DELTAREUSESAMEREVS = b'samerevs'
2572 DELTAREUSESAMEREVS = b'samerevs'
2572 DELTAREUSENEVER = b'never'
2573 DELTAREUSENEVER = b'never'
2573
2574
2574 DELTAREUSEFULLADD = b'fulladd'
2575 DELTAREUSEFULLADD = b'fulladd'
2575
2576
2576 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2577 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2577
2578
2578 def clone(
2579 def clone(
2579 self,
2580 self,
2580 tr,
2581 tr,
2581 destrevlog,
2582 destrevlog,
2582 addrevisioncb=None,
2583 addrevisioncb=None,
2583 deltareuse=DELTAREUSESAMEREVS,
2584 deltareuse=DELTAREUSESAMEREVS,
2584 forcedeltabothparents=None,
2585 forcedeltabothparents=None,
2585 sidedatacompanion=None,
2586 sidedatacompanion=None,
2586 ):
2587 ):
2587 """Copy this revlog to another, possibly with format changes.
2588 """Copy this revlog to another, possibly with format changes.
2588
2589
2589 The destination revlog will contain the same revisions and nodes.
2590 The destination revlog will contain the same revisions and nodes.
2590 However, it may not be bit-for-bit identical due to e.g. delta encoding
2591 However, it may not be bit-for-bit identical due to e.g. delta encoding
2591 differences.
2592 differences.
2592
2593
2593 The ``deltareuse`` argument control how deltas from the existing revlog
2594 The ``deltareuse`` argument control how deltas from the existing revlog
2594 are preserved in the destination revlog. The argument can have the
2595 are preserved in the destination revlog. The argument can have the
2595 following values:
2596 following values:
2596
2597
2597 DELTAREUSEALWAYS
2598 DELTAREUSEALWAYS
2598 Deltas will always be reused (if possible), even if the destination
2599 Deltas will always be reused (if possible), even if the destination
2599 revlog would not select the same revisions for the delta. This is the
2600 revlog would not select the same revisions for the delta. This is the
2600 fastest mode of operation.
2601 fastest mode of operation.
2601 DELTAREUSESAMEREVS
2602 DELTAREUSESAMEREVS
2602 Deltas will be reused if the destination revlog would pick the same
2603 Deltas will be reused if the destination revlog would pick the same
2603 revisions for the delta. This mode strikes a balance between speed
2604 revisions for the delta. This mode strikes a balance between speed
2604 and optimization.
2605 and optimization.
2605 DELTAREUSENEVER
2606 DELTAREUSENEVER
2606 Deltas will never be reused. This is the slowest mode of execution.
2607 Deltas will never be reused. This is the slowest mode of execution.
2607 This mode can be used to recompute deltas (e.g. if the diff/delta
2608 This mode can be used to recompute deltas (e.g. if the diff/delta
2608 algorithm changes).
2609 algorithm changes).
2609 DELTAREUSEFULLADD
2610 DELTAREUSEFULLADD
2610 Revision will be re-added as if their were new content. This is
2611 Revision will be re-added as if their were new content. This is
2611 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2612 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2612 eg: large file detection and handling.
2613 eg: large file detection and handling.
2613
2614
2614 Delta computation can be slow, so the choice of delta reuse policy can
2615 Delta computation can be slow, so the choice of delta reuse policy can
2615 significantly affect run time.
2616 significantly affect run time.
2616
2617
2617 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2618 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2618 two extremes. Deltas will be reused if they are appropriate. But if the
2619 two extremes. Deltas will be reused if they are appropriate. But if the
2619 delta could choose a better revision, it will do so. This means if you
2620 delta could choose a better revision, it will do so. This means if you
2620 are converting a non-generaldelta revlog to a generaldelta revlog,
2621 are converting a non-generaldelta revlog to a generaldelta revlog,
2621 deltas will be recomputed if the delta's parent isn't a parent of the
2622 deltas will be recomputed if the delta's parent isn't a parent of the
2622 revision.
2623 revision.
2623
2624
2624 In addition to the delta policy, the ``forcedeltabothparents``
2625 In addition to the delta policy, the ``forcedeltabothparents``
2625 argument controls whether to force compute deltas against both parents
2626 argument controls whether to force compute deltas against both parents
2626 for merges. By default, the current default is used.
2627 for merges. By default, the current default is used.
2627
2628
2628 If not None, the `sidedatacompanion` is callable that accept two
2629 If not None, the `sidedatacompanion` is callable that accept two
2629 arguments:
2630 arguments:
2630
2631
2631 (srcrevlog, rev)
2632 (srcrevlog, rev)
2632
2633
2633 and return a triplet that control changes to sidedata content from the
2634 and return a triplet that control changes to sidedata content from the
2634 old revision to the new clone result:
2635 old revision to the new clone result:
2635
2636
2636 (dropall, filterout, update)
2637 (dropall, filterout, update)
2637
2638
2638 * if `dropall` is True, all sidedata should be dropped
2639 * if `dropall` is True, all sidedata should be dropped
2639 * `filterout` is a set of sidedata keys that should be dropped
2640 * `filterout` is a set of sidedata keys that should be dropped
2640 * `update` is a mapping of additionnal/new key -> value
2641 * `update` is a mapping of additionnal/new key -> value
2641 """
2642 """
2642 if deltareuse not in self.DELTAREUSEALL:
2643 if deltareuse not in self.DELTAREUSEALL:
2643 raise ValueError(
2644 raise ValueError(
2644 _(b'value for deltareuse invalid: %s') % deltareuse
2645 _(b'value for deltareuse invalid: %s') % deltareuse
2645 )
2646 )
2646
2647
2647 if len(destrevlog):
2648 if len(destrevlog):
2648 raise ValueError(_(b'destination revlog is not empty'))
2649 raise ValueError(_(b'destination revlog is not empty'))
2649
2650
2650 if getattr(self, 'filteredrevs', None):
2651 if getattr(self, 'filteredrevs', None):
2651 raise ValueError(_(b'source revlog has filtered revisions'))
2652 raise ValueError(_(b'source revlog has filtered revisions'))
2652 if getattr(destrevlog, 'filteredrevs', None):
2653 if getattr(destrevlog, 'filteredrevs', None):
2653 raise ValueError(_(b'destination revlog has filtered revisions'))
2654 raise ValueError(_(b'destination revlog has filtered revisions'))
2654
2655
2655 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2656 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2656 # if possible.
2657 # if possible.
2657 oldlazydelta = destrevlog._lazydelta
2658 oldlazydelta = destrevlog._lazydelta
2658 oldlazydeltabase = destrevlog._lazydeltabase
2659 oldlazydeltabase = destrevlog._lazydeltabase
2659 oldamd = destrevlog._deltabothparents
2660 oldamd = destrevlog._deltabothparents
2660
2661
2661 try:
2662 try:
2662 if deltareuse == self.DELTAREUSEALWAYS:
2663 if deltareuse == self.DELTAREUSEALWAYS:
2663 destrevlog._lazydeltabase = True
2664 destrevlog._lazydeltabase = True
2664 destrevlog._lazydelta = True
2665 destrevlog._lazydelta = True
2665 elif deltareuse == self.DELTAREUSESAMEREVS:
2666 elif deltareuse == self.DELTAREUSESAMEREVS:
2666 destrevlog._lazydeltabase = False
2667 destrevlog._lazydeltabase = False
2667 destrevlog._lazydelta = True
2668 destrevlog._lazydelta = True
2668 elif deltareuse == self.DELTAREUSENEVER:
2669 elif deltareuse == self.DELTAREUSENEVER:
2669 destrevlog._lazydeltabase = False
2670 destrevlog._lazydeltabase = False
2670 destrevlog._lazydelta = False
2671 destrevlog._lazydelta = False
2671
2672
2672 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2673 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2673
2674
2674 self._clone(
2675 self._clone(
2675 tr,
2676 tr,
2676 destrevlog,
2677 destrevlog,
2677 addrevisioncb,
2678 addrevisioncb,
2678 deltareuse,
2679 deltareuse,
2679 forcedeltabothparents,
2680 forcedeltabothparents,
2680 sidedatacompanion,
2681 sidedatacompanion,
2681 )
2682 )
2682
2683
2683 finally:
2684 finally:
2684 destrevlog._lazydelta = oldlazydelta
2685 destrevlog._lazydelta = oldlazydelta
2685 destrevlog._lazydeltabase = oldlazydeltabase
2686 destrevlog._lazydeltabase = oldlazydeltabase
2686 destrevlog._deltabothparents = oldamd
2687 destrevlog._deltabothparents = oldamd
2687
2688
2688 def _clone(
2689 def _clone(
2689 self,
2690 self,
2690 tr,
2691 tr,
2691 destrevlog,
2692 destrevlog,
2692 addrevisioncb,
2693 addrevisioncb,
2693 deltareuse,
2694 deltareuse,
2694 forcedeltabothparents,
2695 forcedeltabothparents,
2695 sidedatacompanion,
2696 sidedatacompanion,
2696 ):
2697 ):
2697 """perform the core duty of `revlog.clone` after parameter processing"""
2698 """perform the core duty of `revlog.clone` after parameter processing"""
2698 deltacomputer = deltautil.deltacomputer(destrevlog)
2699 deltacomputer = deltautil.deltacomputer(destrevlog)
2699 index = self.index
2700 index = self.index
2700 for rev in self:
2701 for rev in self:
2701 entry = index[rev]
2702 entry = index[rev]
2702
2703
2703 # Some classes override linkrev to take filtered revs into
2704 # Some classes override linkrev to take filtered revs into
2704 # account. Use raw entry from index.
2705 # account. Use raw entry from index.
2705 flags = entry[0] & 0xFFFF
2706 flags = entry[0] & 0xFFFF
2706 linkrev = entry[4]
2707 linkrev = entry[4]
2707 p1 = index[entry[5]][7]
2708 p1 = index[entry[5]][7]
2708 p2 = index[entry[6]][7]
2709 p2 = index[entry[6]][7]
2709 node = entry[7]
2710 node = entry[7]
2710
2711
2711 sidedataactions = (False, [], {})
2712 sidedataactions = (False, [], {})
2712 if sidedatacompanion is not None:
2713 if sidedatacompanion is not None:
2713 sidedataactions = sidedatacompanion(self, rev)
2714 sidedataactions = sidedatacompanion(self, rev)
2714
2715
2715 # (Possibly) reuse the delta from the revlog if allowed and
2716 # (Possibly) reuse the delta from the revlog if allowed and
2716 # the revlog chunk is a delta.
2717 # the revlog chunk is a delta.
2717 cachedelta = None
2718 cachedelta = None
2718 rawtext = None
2719 rawtext = None
2719 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2720 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2720 dropall, filterout, update = sidedataactions
2721 dropall, filterout, update = sidedataactions
2721 text, sidedata = self._revisiondata(rev)
2722 text, sidedata = self._revisiondata(rev)
2722 if dropall:
2723 if dropall:
2723 sidedata = {}
2724 sidedata = {}
2724 for key in filterout:
2725 for key in filterout:
2725 sidedata.pop(key, None)
2726 sidedata.pop(key, None)
2726 sidedata.update(update)
2727 sidedata.update(update)
2727 if not sidedata:
2728 if not sidedata:
2728 sidedata = None
2729 sidedata = None
2729 destrevlog.addrevision(
2730 destrevlog.addrevision(
2730 text,
2731 text,
2731 tr,
2732 tr,
2732 linkrev,
2733 linkrev,
2733 p1,
2734 p1,
2734 p2,
2735 p2,
2735 cachedelta=cachedelta,
2736 cachedelta=cachedelta,
2736 node=node,
2737 node=node,
2737 flags=flags,
2738 flags=flags,
2738 deltacomputer=deltacomputer,
2739 deltacomputer=deltacomputer,
2739 sidedata=sidedata,
2740 sidedata=sidedata,
2740 )
2741 )
2741 else:
2742 else:
2742 if destrevlog._lazydelta:
2743 if destrevlog._lazydelta:
2743 dp = self.deltaparent(rev)
2744 dp = self.deltaparent(rev)
2744 if dp != nullrev:
2745 if dp != nullrev:
2745 cachedelta = (dp, bytes(self._chunk(rev)))
2746 cachedelta = (dp, bytes(self._chunk(rev)))
2746
2747
2747 if not cachedelta:
2748 if not cachedelta:
2748 rawtext = self.rawdata(rev)
2749 rawtext = self.rawdata(rev)
2749
2750
2750 ifh = destrevlog.opener(
2751 ifh = destrevlog.opener(
2751 destrevlog.indexfile, b'a+', checkambig=False
2752 destrevlog.indexfile, b'a+', checkambig=False
2752 )
2753 )
2753 dfh = None
2754 dfh = None
2754 if not destrevlog._inline:
2755 if not destrevlog._inline:
2755 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2756 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2756 try:
2757 try:
2757 destrevlog._addrevision(
2758 destrevlog._addrevision(
2758 node,
2759 node,
2759 rawtext,
2760 rawtext,
2760 tr,
2761 tr,
2761 linkrev,
2762 linkrev,
2762 p1,
2763 p1,
2763 p2,
2764 p2,
2764 flags,
2765 flags,
2765 cachedelta,
2766 cachedelta,
2766 ifh,
2767 ifh,
2767 dfh,
2768 dfh,
2768 deltacomputer=deltacomputer,
2769 deltacomputer=deltacomputer,
2769 )
2770 )
2770 finally:
2771 finally:
2771 if dfh:
2772 if dfh:
2772 dfh.close()
2773 dfh.close()
2773 ifh.close()
2774 ifh.close()
2774
2775
2775 if addrevisioncb:
2776 if addrevisioncb:
2776 addrevisioncb(self, rev, node)
2777 addrevisioncb(self, rev, node)
2777
2778
2778 def censorrevision(self, tr, censornode, tombstone=b''):
2779 def censorrevision(self, tr, censornode, tombstone=b''):
2779 if (self.version & 0xFFFF) == REVLOGV0:
2780 if (self.version & 0xFFFF) == REVLOGV0:
2780 raise error.RevlogError(
2781 raise error.RevlogError(
2781 _(b'cannot censor with version %d revlogs') % self.version
2782 _(b'cannot censor with version %d revlogs') % self.version
2782 )
2783 )
2783
2784
2784 censorrev = self.rev(censornode)
2785 censorrev = self.rev(censornode)
2785 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2786 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2786
2787
2787 if len(tombstone) > self.rawsize(censorrev):
2788 if len(tombstone) > self.rawsize(censorrev):
2788 raise error.Abort(
2789 raise error.Abort(
2789 _(b'censor tombstone must be no longer than censored data')
2790 _(b'censor tombstone must be no longer than censored data')
2790 )
2791 )
2791
2792
2792 # Rewriting the revlog in place is hard. Our strategy for censoring is
2793 # Rewriting the revlog in place is hard. Our strategy for censoring is
2793 # to create a new revlog, copy all revisions to it, then replace the
2794 # to create a new revlog, copy all revisions to it, then replace the
2794 # revlogs on transaction close.
2795 # revlogs on transaction close.
2795
2796
2796 newindexfile = self.indexfile + b'.tmpcensored'
2797 newindexfile = self.indexfile + b'.tmpcensored'
2797 newdatafile = self.datafile + b'.tmpcensored'
2798 newdatafile = self.datafile + b'.tmpcensored'
2798
2799
2799 # This is a bit dangerous. We could easily have a mismatch of state.
2800 # This is a bit dangerous. We could easily have a mismatch of state.
2800 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2801 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2801 newrl.version = self.version
2802 newrl.version = self.version
2802 newrl._generaldelta = self._generaldelta
2803 newrl._generaldelta = self._generaldelta
2803 newrl._io = self._io
2804 newrl._io = self._io
2804
2805
2805 for rev in self.revs():
2806 for rev in self.revs():
2806 node = self.node(rev)
2807 node = self.node(rev)
2807 p1, p2 = self.parents(node)
2808 p1, p2 = self.parents(node)
2808
2809
2809 if rev == censorrev:
2810 if rev == censorrev:
2810 newrl.addrawrevision(
2811 newrl.addrawrevision(
2811 tombstone,
2812 tombstone,
2812 tr,
2813 tr,
2813 self.linkrev(censorrev),
2814 self.linkrev(censorrev),
2814 p1,
2815 p1,
2815 p2,
2816 p2,
2816 censornode,
2817 censornode,
2817 REVIDX_ISCENSORED,
2818 REVIDX_ISCENSORED,
2818 )
2819 )
2819
2820
2820 if newrl.deltaparent(rev) != nullrev:
2821 if newrl.deltaparent(rev) != nullrev:
2821 raise error.Abort(
2822 raise error.Abort(
2822 _(
2823 _(
2823 b'censored revision stored as delta; '
2824 b'censored revision stored as delta; '
2824 b'cannot censor'
2825 b'cannot censor'
2825 ),
2826 ),
2826 hint=_(
2827 hint=_(
2827 b'censoring of revlogs is not '
2828 b'censoring of revlogs is not '
2828 b'fully implemented; please report '
2829 b'fully implemented; please report '
2829 b'this bug'
2830 b'this bug'
2830 ),
2831 ),
2831 )
2832 )
2832 continue
2833 continue
2833
2834
2834 if self.iscensored(rev):
2835 if self.iscensored(rev):
2835 if self.deltaparent(rev) != nullrev:
2836 if self.deltaparent(rev) != nullrev:
2836 raise error.Abort(
2837 raise error.Abort(
2837 _(
2838 _(
2838 b'cannot censor due to censored '
2839 b'cannot censor due to censored '
2839 b'revision having delta stored'
2840 b'revision having delta stored'
2840 )
2841 )
2841 )
2842 )
2842 rawtext = self._chunk(rev)
2843 rawtext = self._chunk(rev)
2843 else:
2844 else:
2844 rawtext = self.rawdata(rev)
2845 rawtext = self.rawdata(rev)
2845
2846
2846 newrl.addrawrevision(
2847 newrl.addrawrevision(
2847 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2848 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2848 )
2849 )
2849
2850
2850 tr.addbackup(self.indexfile, location=b'store')
2851 tr.addbackup(self.indexfile, location=b'store')
2851 if not self._inline:
2852 if not self._inline:
2852 tr.addbackup(self.datafile, location=b'store')
2853 tr.addbackup(self.datafile, location=b'store')
2853
2854
2854 self.opener.rename(newrl.indexfile, self.indexfile)
2855 self.opener.rename(newrl.indexfile, self.indexfile)
2855 if not self._inline:
2856 if not self._inline:
2856 self.opener.rename(newrl.datafile, self.datafile)
2857 self.opener.rename(newrl.datafile, self.datafile)
2857
2858
2858 self.clearcaches()
2859 self.clearcaches()
2859 self._loadindex()
2860 self._loadindex()
2860
2861
2861 def verifyintegrity(self, state):
2862 def verifyintegrity(self, state):
2862 """Verifies the integrity of the revlog.
2863 """Verifies the integrity of the revlog.
2863
2864
2864 Yields ``revlogproblem`` instances describing problems that are
2865 Yields ``revlogproblem`` instances describing problems that are
2865 found.
2866 found.
2866 """
2867 """
2867 dd, di = self.checksize()
2868 dd, di = self.checksize()
2868 if dd:
2869 if dd:
2869 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2870 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2870 if di:
2871 if di:
2871 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2872 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2872
2873
2873 version = self.version & 0xFFFF
2874 version = self.version & 0xFFFF
2874
2875
2875 # The verifier tells us what version revlog we should be.
2876 # The verifier tells us what version revlog we should be.
2876 if version != state[b'expectedversion']:
2877 if version != state[b'expectedversion']:
2877 yield revlogproblem(
2878 yield revlogproblem(
2878 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2879 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2879 % (self.indexfile, version, state[b'expectedversion'])
2880 % (self.indexfile, version, state[b'expectedversion'])
2880 )
2881 )
2881
2882
2882 state[b'skipread'] = set()
2883 state[b'skipread'] = set()
2883 state[b'safe_renamed'] = set()
2884 state[b'safe_renamed'] = set()
2884
2885
2885 for rev in self:
2886 for rev in self:
2886 node = self.node(rev)
2887 node = self.node(rev)
2887
2888
2888 # Verify contents. 4 cases to care about:
2889 # Verify contents. 4 cases to care about:
2889 #
2890 #
2890 # common: the most common case
2891 # common: the most common case
2891 # rename: with a rename
2892 # rename: with a rename
2892 # meta: file content starts with b'\1\n', the metadata
2893 # meta: file content starts with b'\1\n', the metadata
2893 # header defined in filelog.py, but without a rename
2894 # header defined in filelog.py, but without a rename
2894 # ext: content stored externally
2895 # ext: content stored externally
2895 #
2896 #
2896 # More formally, their differences are shown below:
2897 # More formally, their differences are shown below:
2897 #
2898 #
2898 # | common | rename | meta | ext
2899 # | common | rename | meta | ext
2899 # -------------------------------------------------------
2900 # -------------------------------------------------------
2900 # flags() | 0 | 0 | 0 | not 0
2901 # flags() | 0 | 0 | 0 | not 0
2901 # renamed() | False | True | False | ?
2902 # renamed() | False | True | False | ?
2902 # rawtext[0:2]=='\1\n'| False | True | True | ?
2903 # rawtext[0:2]=='\1\n'| False | True | True | ?
2903 #
2904 #
2904 # "rawtext" means the raw text stored in revlog data, which
2905 # "rawtext" means the raw text stored in revlog data, which
2905 # could be retrieved by "rawdata(rev)". "text"
2906 # could be retrieved by "rawdata(rev)". "text"
2906 # mentioned below is "revision(rev)".
2907 # mentioned below is "revision(rev)".
2907 #
2908 #
2908 # There are 3 different lengths stored physically:
2909 # There are 3 different lengths stored physically:
2909 # 1. L1: rawsize, stored in revlog index
2910 # 1. L1: rawsize, stored in revlog index
2910 # 2. L2: len(rawtext), stored in revlog data
2911 # 2. L2: len(rawtext), stored in revlog data
2911 # 3. L3: len(text), stored in revlog data if flags==0, or
2912 # 3. L3: len(text), stored in revlog data if flags==0, or
2912 # possibly somewhere else if flags!=0
2913 # possibly somewhere else if flags!=0
2913 #
2914 #
2914 # L1 should be equal to L2. L3 could be different from them.
2915 # L1 should be equal to L2. L3 could be different from them.
2915 # "text" may or may not affect commit hash depending on flag
2916 # "text" may or may not affect commit hash depending on flag
2916 # processors (see flagutil.addflagprocessor).
2917 # processors (see flagutil.addflagprocessor).
2917 #
2918 #
2918 # | common | rename | meta | ext
2919 # | common | rename | meta | ext
2919 # -------------------------------------------------
2920 # -------------------------------------------------
2920 # rawsize() | L1 | L1 | L1 | L1
2921 # rawsize() | L1 | L1 | L1 | L1
2921 # size() | L1 | L2-LM | L1(*) | L1 (?)
2922 # size() | L1 | L2-LM | L1(*) | L1 (?)
2922 # len(rawtext) | L2 | L2 | L2 | L2
2923 # len(rawtext) | L2 | L2 | L2 | L2
2923 # len(text) | L2 | L2 | L2 | L3
2924 # len(text) | L2 | L2 | L2 | L3
2924 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2925 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2925 #
2926 #
2926 # LM: length of metadata, depending on rawtext
2927 # LM: length of metadata, depending on rawtext
2927 # (*): not ideal, see comment in filelog.size
2928 # (*): not ideal, see comment in filelog.size
2928 # (?): could be "- len(meta)" if the resolved content has
2929 # (?): could be "- len(meta)" if the resolved content has
2929 # rename metadata
2930 # rename metadata
2930 #
2931 #
2931 # Checks needed to be done:
2932 # Checks needed to be done:
2932 # 1. length check: L1 == L2, in all cases.
2933 # 1. length check: L1 == L2, in all cases.
2933 # 2. hash check: depending on flag processor, we may need to
2934 # 2. hash check: depending on flag processor, we may need to
2934 # use either "text" (external), or "rawtext" (in revlog).
2935 # use either "text" (external), or "rawtext" (in revlog).
2935
2936
2936 try:
2937 try:
2937 skipflags = state.get(b'skipflags', 0)
2938 skipflags = state.get(b'skipflags', 0)
2938 if skipflags:
2939 if skipflags:
2939 skipflags &= self.flags(rev)
2940 skipflags &= self.flags(rev)
2940
2941
2941 _verify_revision(self, skipflags, state, node)
2942 _verify_revision(self, skipflags, state, node)
2942
2943
2943 l1 = self.rawsize(rev)
2944 l1 = self.rawsize(rev)
2944 l2 = len(self.rawdata(node))
2945 l2 = len(self.rawdata(node))
2945
2946
2946 if l1 != l2:
2947 if l1 != l2:
2947 yield revlogproblem(
2948 yield revlogproblem(
2948 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2949 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2949 node=node,
2950 node=node,
2950 )
2951 )
2951
2952
2952 except error.CensoredNodeError:
2953 except error.CensoredNodeError:
2953 if state[b'erroroncensored']:
2954 if state[b'erroroncensored']:
2954 yield revlogproblem(
2955 yield revlogproblem(
2955 error=_(b'censored file data'), node=node
2956 error=_(b'censored file data'), node=node
2956 )
2957 )
2957 state[b'skipread'].add(node)
2958 state[b'skipread'].add(node)
2958 except Exception as e:
2959 except Exception as e:
2959 yield revlogproblem(
2960 yield revlogproblem(
2960 error=_(b'unpacking %s: %s')
2961 error=_(b'unpacking %s: %s')
2961 % (short(node), stringutil.forcebytestr(e)),
2962 % (short(node), stringutil.forcebytestr(e)),
2962 node=node,
2963 node=node,
2963 )
2964 )
2964 state[b'skipread'].add(node)
2965 state[b'skipread'].add(node)
2965
2966
2966 def storageinfo(
2967 def storageinfo(
2967 self,
2968 self,
2968 exclusivefiles=False,
2969 exclusivefiles=False,
2969 sharedfiles=False,
2970 sharedfiles=False,
2970 revisionscount=False,
2971 revisionscount=False,
2971 trackedsize=False,
2972 trackedsize=False,
2972 storedsize=False,
2973 storedsize=False,
2973 ):
2974 ):
2974 d = {}
2975 d = {}
2975
2976
2976 if exclusivefiles:
2977 if exclusivefiles:
2977 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2978 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2978 if not self._inline:
2979 if not self._inline:
2979 d[b'exclusivefiles'].append((self.opener, self.datafile))
2980 d[b'exclusivefiles'].append((self.opener, self.datafile))
2980
2981
2981 if sharedfiles:
2982 if sharedfiles:
2982 d[b'sharedfiles'] = []
2983 d[b'sharedfiles'] = []
2983
2984
2984 if revisionscount:
2985 if revisionscount:
2985 d[b'revisionscount'] = len(self)
2986 d[b'revisionscount'] = len(self)
2986
2987
2987 if trackedsize:
2988 if trackedsize:
2988 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2989 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2989
2990
2990 if storedsize:
2991 if storedsize:
2991 d[b'storedsize'] = sum(
2992 d[b'storedsize'] = sum(
2992 self.opener.stat(path).st_size for path in self.files()
2993 self.opener.stat(path).st_size for path in self.files()
2993 )
2994 )
2994
2995
2995 return d
2996 return d
@@ -1,202 +1,204 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import struct
11 import struct
12
12
13 from .. import (
13 from .. import (
14 error,
14 error,
15 node as nodemod,
15 node as nodemod,
16 pycompat,
16 pycompat,
17 )
17 )
18
18
19
19
20 class NodeMap(dict):
20 class NodeMap(dict):
21 def __missing__(self, x):
21 def __missing__(self, x):
22 raise error.RevlogError(b'unknown node: %s' % x)
22 raise error.RevlogError(b'unknown node: %s' % x)
23
23
24
24
25 def persisted_data(revlog):
25 def persisted_data(revlog):
26 """read the nodemap for a revlog from disk"""
26 """read the nodemap for a revlog from disk"""
27 if revlog.nodemap_file is None:
27 if revlog.nodemap_file is None:
28 return None
28 return None
29 return revlog.opener.tryread(revlog.nodemap_file)
29 return revlog.opener.tryread(revlog.nodemap_file)
30
30
31
31
32 def setup_persistent_nodemap(tr, revlog):
32 def setup_persistent_nodemap(tr, revlog):
33 """Install whatever is needed transaction side to persist a nodemap on disk
33 """Install whatever is needed transaction side to persist a nodemap on disk
34
34
35 (only actually persist the nodemap if this is relevant for this revlog)
35 (only actually persist the nodemap if this is relevant for this revlog)
36 """
36 """
37 if revlog._inline:
38 return # inlined revlog are too small for this to be relevant
37 if revlog.nodemap_file is None:
39 if revlog.nodemap_file is None:
38 return # we do not use persistent_nodemap on this revlog
40 return # we do not use persistent_nodemap on this revlog
39 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
41 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
40 if tr.hasfinalize(callback_id):
42 if tr.hasfinalize(callback_id):
41 return # no need to register again
43 return # no need to register again
42 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
44 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
43
45
44
46
45 def _persist_nodemap(tr, revlog):
47 def _persist_nodemap(tr, revlog):
46 """Write nodemap data on disk for a given revlog
48 """Write nodemap data on disk for a given revlog
47 """
49 """
48 if getattr(revlog, 'filteredrevs', ()):
50 if getattr(revlog, 'filteredrevs', ()):
49 raise error.ProgrammingError(
51 raise error.ProgrammingError(
50 "cannot persist nodemap of a filtered changelog"
52 "cannot persist nodemap of a filtered changelog"
51 )
53 )
52 if revlog.nodemap_file is None:
54 if revlog.nodemap_file is None:
53 msg = "calling persist nodemap on a revlog without the feature enableb"
55 msg = "calling persist nodemap on a revlog without the feature enableb"
54 raise error.ProgrammingError(msg)
56 raise error.ProgrammingError(msg)
55 data = persistent_data(revlog.index)
57 data = persistent_data(revlog.index)
56 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
58 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
57 # store vfs
59 # store vfs
58 with revlog.opener(revlog.nodemap_file, b'w') as f:
60 with revlog.opener(revlog.nodemap_file, b'w') as f:
59 f.write(data)
61 f.write(data)
60 # EXP-TODO: if the transaction abort, we should remove the new data and
62 # EXP-TODO: if the transaction abort, we should remove the new data and
61 # reinstall the old one. (This will be simpler when the file format get a
63 # reinstall the old one. (This will be simpler when the file format get a
62 # bit more advanced)
64 # bit more advanced)
63
65
64
66
65 ### Nodemap Trie
67 ### Nodemap Trie
66 #
68 #
67 # This is a simple reference implementation to compute and persist a nodemap
69 # This is a simple reference implementation to compute and persist a nodemap
68 # trie. This reference implementation is write only. The python version of this
70 # trie. This reference implementation is write only. The python version of this
69 # is not expected to be actually used, since it wont provide performance
71 # is not expected to be actually used, since it wont provide performance
70 # improvement over existing non-persistent C implementation.
72 # improvement over existing non-persistent C implementation.
71 #
73 #
72 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
74 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
73 # revision can be adressed using its node shortest prefix.
75 # revision can be adressed using its node shortest prefix.
74 #
76 #
75 # The trie is stored as a sequence of block. Each block contains 16 entries
77 # The trie is stored as a sequence of block. Each block contains 16 entries
76 # (signed 64bit integer, big endian). Each entry can be one of the following:
78 # (signed 64bit integer, big endian). Each entry can be one of the following:
77 #
79 #
78 # * value >= 0 -> index of sub-block
80 # * value >= 0 -> index of sub-block
79 # * value == -1 -> no value
81 # * value == -1 -> no value
80 # * value < -1 -> a revision value: rev = -(value+10)
82 # * value < -1 -> a revision value: rev = -(value+10)
81 #
83 #
82 # The implementation focus on simplicity, not on performance. A Rust
84 # The implementation focus on simplicity, not on performance. A Rust
83 # implementation should provide a efficient version of the same binary
85 # implementation should provide a efficient version of the same binary
84 # persistence. This reference python implementation is never meant to be
86 # persistence. This reference python implementation is never meant to be
85 # extensively use in production.
87 # extensively use in production.
86
88
87
89
88 def persistent_data(index):
90 def persistent_data(index):
89 """return the persistent binary form for a nodemap for a given index
91 """return the persistent binary form for a nodemap for a given index
90 """
92 """
91 trie = _build_trie(index)
93 trie = _build_trie(index)
92 return _persist_trie(trie)
94 return _persist_trie(trie)
93
95
94
96
95 S_BLOCK = struct.Struct(">" + ("l" * 16))
97 S_BLOCK = struct.Struct(">" + ("l" * 16))
96
98
97 NO_ENTRY = -1
99 NO_ENTRY = -1
98 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
100 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
99 REV_OFFSET = 2
101 REV_OFFSET = 2
100
102
101
103
102 def _transform_rev(rev):
104 def _transform_rev(rev):
103 """Return the number used to represent the rev in the tree.
105 """Return the number used to represent the rev in the tree.
104
106
105 (or retrieve a rev number from such representation)
107 (or retrieve a rev number from such representation)
106
108
107 Note that this is an involution, a function equal to its inverse (i.e.
109 Note that this is an involution, a function equal to its inverse (i.e.
108 which gives the identity when applied to itself).
110 which gives the identity when applied to itself).
109 """
111 """
110 return -(rev + REV_OFFSET)
112 return -(rev + REV_OFFSET)
111
113
112
114
113 def _to_int(hex_digit):
115 def _to_int(hex_digit):
114 """turn an hexadecimal digit into a proper integer"""
116 """turn an hexadecimal digit into a proper integer"""
115 return int(hex_digit, 16)
117 return int(hex_digit, 16)
116
118
117
119
118 def _build_trie(index):
120 def _build_trie(index):
119 """build a nodemap trie
121 """build a nodemap trie
120
122
121 The nodemap stores revision number for each unique prefix.
123 The nodemap stores revision number for each unique prefix.
122
124
123 Each block is a dictionary with keys in `[0, 15]`. Values are either
125 Each block is a dictionary with keys in `[0, 15]`. Values are either
124 another block or a revision number.
126 another block or a revision number.
125 """
127 """
126 root = {}
128 root = {}
127 for rev in range(len(index)):
129 for rev in range(len(index)):
128 hex = nodemod.hex(index[rev][7])
130 hex = nodemod.hex(index[rev][7])
129 _insert_into_block(index, 0, root, rev, hex)
131 _insert_into_block(index, 0, root, rev, hex)
130 return root
132 return root
131
133
132
134
133 def _insert_into_block(index, level, block, current_rev, current_hex):
135 def _insert_into_block(index, level, block, current_rev, current_hex):
134 """insert a new revision in a block
136 """insert a new revision in a block
135
137
136 index: the index we are adding revision for
138 index: the index we are adding revision for
137 level: the depth of the current block in the trie
139 level: the depth of the current block in the trie
138 block: the block currently being considered
140 block: the block currently being considered
139 current_rev: the revision number we are adding
141 current_rev: the revision number we are adding
140 current_hex: the hexadecimal representation of the of that revision
142 current_hex: the hexadecimal representation of the of that revision
141 """
143 """
142 hex_digit = _to_int(current_hex[level : level + 1])
144 hex_digit = _to_int(current_hex[level : level + 1])
143 entry = block.get(hex_digit)
145 entry = block.get(hex_digit)
144 if entry is None:
146 if entry is None:
145 # no entry, simply store the revision number
147 # no entry, simply store the revision number
146 block[hex_digit] = current_rev
148 block[hex_digit] = current_rev
147 elif isinstance(entry, dict):
149 elif isinstance(entry, dict):
148 # need to recurse to an underlying block
150 # need to recurse to an underlying block
149 _insert_into_block(index, level + 1, entry, current_rev, current_hex)
151 _insert_into_block(index, level + 1, entry, current_rev, current_hex)
150 else:
152 else:
151 # collision with a previously unique prefix, inserting new
153 # collision with a previously unique prefix, inserting new
152 # vertices to fit both entry.
154 # vertices to fit both entry.
153 other_hex = nodemod.hex(index[entry][7])
155 other_hex = nodemod.hex(index[entry][7])
154 other_rev = entry
156 other_rev = entry
155 new = {}
157 new = {}
156 block[hex_digit] = new
158 block[hex_digit] = new
157 _insert_into_block(index, level + 1, new, other_rev, other_hex)
159 _insert_into_block(index, level + 1, new, other_rev, other_hex)
158 _insert_into_block(index, level + 1, new, current_rev, current_hex)
160 _insert_into_block(index, level + 1, new, current_rev, current_hex)
159
161
160
162
161 def _persist_trie(root):
163 def _persist_trie(root):
162 """turn a nodemap trie into persistent binary data
164 """turn a nodemap trie into persistent binary data
163
165
164 See `_build_trie` for nodemap trie structure"""
166 See `_build_trie` for nodemap trie structure"""
165 block_map = {}
167 block_map = {}
166 chunks = []
168 chunks = []
167 for tn in _walk_trie(root):
169 for tn in _walk_trie(root):
168 block_map[id(tn)] = len(chunks)
170 block_map[id(tn)] = len(chunks)
169 chunks.append(_persist_block(tn, block_map))
171 chunks.append(_persist_block(tn, block_map))
170 return b''.join(chunks)
172 return b''.join(chunks)
171
173
172
174
173 def _walk_trie(block):
175 def _walk_trie(block):
174 """yield all the block in a trie
176 """yield all the block in a trie
175
177
176 Children blocks are always yield before their parent block.
178 Children blocks are always yield before their parent block.
177 """
179 """
178 for (_, item) in sorted(block.items()):
180 for (_, item) in sorted(block.items()):
179 if isinstance(item, dict):
181 if isinstance(item, dict):
180 for sub_block in _walk_trie(item):
182 for sub_block in _walk_trie(item):
181 yield sub_block
183 yield sub_block
182 yield block
184 yield block
183
185
184
186
185 def _persist_block(block_node, block_map):
187 def _persist_block(block_node, block_map):
186 """produce persistent binary data for a single block
188 """produce persistent binary data for a single block
187
189
188 Children block are assumed to be already persisted and present in
190 Children block are assumed to be already persisted and present in
189 block_map.
191 block_map.
190 """
192 """
191 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
193 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
192 return S_BLOCK.pack(*data)
194 return S_BLOCK.pack(*data)
193
195
194
196
195 def _to_value(item, block_map):
197 def _to_value(item, block_map):
196 """persist any value as an integer"""
198 """persist any value as an integer"""
197 if item is None:
199 if item is None:
198 return NO_ENTRY
200 return NO_ENTRY
199 elif isinstance(item, dict):
201 elif isinstance(item, dict):
200 return block_map[id(item)]
202 return block_map[id(item)]
201 else:
203 else:
202 return _transform_rev(item)
204 return _transform_rev(item)
General Comments 0
You need to be logged in to leave comments. Login now