##// END OF EJS Templates
revlog: improve documentation of the entry tuple...
marmoute -
r48020:02a44635 default
parent child Browse files
Show More
@@ -1,3267 +1,3319 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FEATURES_BY_VERSION,
38 FEATURES_BY_VERSION,
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 INDEX_HEADER,
41 INDEX_HEADER,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 SUPPORTED_FLAGS,
50 SUPPORTED_FLAGS,
51 )
51 )
52 from .revlogutils.flagutil import (
52 from .revlogutils.flagutil import (
53 REVIDX_DEFAULT_FLAGS,
53 REVIDX_DEFAULT_FLAGS,
54 REVIDX_ELLIPSIS,
54 REVIDX_ELLIPSIS,
55 REVIDX_EXTSTORED,
55 REVIDX_EXTSTORED,
56 REVIDX_FLAGS_ORDER,
56 REVIDX_FLAGS_ORDER,
57 REVIDX_HASCOPIESINFO,
57 REVIDX_HASCOPIESINFO,
58 REVIDX_ISCENSORED,
58 REVIDX_ISCENSORED,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 docket as docketutil,
78 docket as docketutil,
79 flagutil,
79 flagutil,
80 nodemap as nodemaputil,
80 nodemap as nodemaputil,
81 revlogv0,
81 revlogv0,
82 sidedata as sidedatautil,
82 sidedata as sidedatautil,
83 )
83 )
84 from .utils import (
84 from .utils import (
85 storageutil,
85 storageutil,
86 stringutil,
86 stringutil,
87 )
87 )
88
88
89 # blanked usage of all the name to prevent pyflakes constraints
89 # blanked usage of all the name to prevent pyflakes constraints
90 # We need these name available in the module for extensions.
90 # We need these name available in the module for extensions.
91
91
92 REVLOGV0
92 REVLOGV0
93 REVLOGV1
93 REVLOGV1
94 REVLOGV2
94 REVLOGV2
95 FLAG_INLINE_DATA
95 FLAG_INLINE_DATA
96 FLAG_GENERALDELTA
96 FLAG_GENERALDELTA
97 REVLOG_DEFAULT_FLAGS
97 REVLOG_DEFAULT_FLAGS
98 REVLOG_DEFAULT_FORMAT
98 REVLOG_DEFAULT_FORMAT
99 REVLOG_DEFAULT_VERSION
99 REVLOG_DEFAULT_VERSION
100 REVLOGV1_FLAGS
100 REVLOGV1_FLAGS
101 REVLOGV2_FLAGS
101 REVLOGV2_FLAGS
102 REVIDX_ISCENSORED
102 REVIDX_ISCENSORED
103 REVIDX_ELLIPSIS
103 REVIDX_ELLIPSIS
104 REVIDX_HASCOPIESINFO
104 REVIDX_HASCOPIESINFO
105 REVIDX_EXTSTORED
105 REVIDX_EXTSTORED
106 REVIDX_DEFAULT_FLAGS
106 REVIDX_DEFAULT_FLAGS
107 REVIDX_FLAGS_ORDER
107 REVIDX_FLAGS_ORDER
108 REVIDX_RAWTEXT_CHANGING_FLAGS
108 REVIDX_RAWTEXT_CHANGING_FLAGS
109
109
110 parsers = policy.importmod('parsers')
110 parsers = policy.importmod('parsers')
111 rustancestor = policy.importrust('ancestor')
111 rustancestor = policy.importrust('ancestor')
112 rustdagop = policy.importrust('dagop')
112 rustdagop = policy.importrust('dagop')
113 rustrevlog = policy.importrust('revlog')
113 rustrevlog = policy.importrust('revlog')
114
114
115 # Aliased for performance.
115 # Aliased for performance.
116 _zlibdecompress = zlib.decompress
116 _zlibdecompress = zlib.decompress
117
117
118 # max size of revlog with inline data
118 # max size of revlog with inline data
119 _maxinline = 131072
119 _maxinline = 131072
120 _chunksize = 1048576
120 _chunksize = 1048576
121
121
122 # Flag processors for REVIDX_ELLIPSIS.
122 # Flag processors for REVIDX_ELLIPSIS.
123 def ellipsisreadprocessor(rl, text):
123 def ellipsisreadprocessor(rl, text):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsiswriteprocessor(rl, text):
127 def ellipsiswriteprocessor(rl, text):
128 return text, False
128 return text, False
129
129
130
130
131 def ellipsisrawprocessor(rl, text):
131 def ellipsisrawprocessor(rl, text):
132 return False
132 return False
133
133
134
134
135 ellipsisprocessor = (
135 ellipsisprocessor = (
136 ellipsisreadprocessor,
136 ellipsisreadprocessor,
137 ellipsiswriteprocessor,
137 ellipsiswriteprocessor,
138 ellipsisrawprocessor,
138 ellipsisrawprocessor,
139 )
139 )
140
140
141
141
142 def offset_type(offset, type):
142 def offset_type(offset, type):
143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
144 raise ValueError(b'unknown revlog index flags')
144 raise ValueError(b'unknown revlog index flags')
145 return int(int(offset) << 16 | type)
145 return int(int(offset) << 16 | type)
146
146
147
147
148 def _verify_revision(rl, skipflags, state, node):
148 def _verify_revision(rl, skipflags, state, node):
149 """Verify the integrity of the given revlog ``node`` while providing a hook
149 """Verify the integrity of the given revlog ``node`` while providing a hook
150 point for extensions to influence the operation."""
150 point for extensions to influence the operation."""
151 if skipflags:
151 if skipflags:
152 state[b'skipread'].add(node)
152 state[b'skipread'].add(node)
153 else:
153 else:
154 # Side-effect: read content and verify hash.
154 # Side-effect: read content and verify hash.
155 rl.revision(node)
155 rl.revision(node)
156
156
157
157
158 # True if a fast implementation for persistent-nodemap is available
158 # True if a fast implementation for persistent-nodemap is available
159 #
159 #
160 # We also consider we have a "fast" implementation in "pure" python because
160 # We also consider we have a "fast" implementation in "pure" python because
161 # people using pure don't really have performance consideration (and a
161 # people using pure don't really have performance consideration (and a
162 # wheelbarrow of other slowness source)
162 # wheelbarrow of other slowness source)
163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
164 parsers, 'BaseIndexObject'
164 parsers, 'BaseIndexObject'
165 )
165 )
166
166
167
167
168 @attr.s(slots=True, frozen=True)
168 @attr.s(slots=True, frozen=True)
169 class _revisioninfo(object):
169 class _revisioninfo(object):
170 """Information about a revision that allows building its fulltext
170 """Information about a revision that allows building its fulltext
171 node: expected hash of the revision
171 node: expected hash of the revision
172 p1, p2: parent revs of the revision
172 p1, p2: parent revs of the revision
173 btext: built text cache consisting of a one-element list
173 btext: built text cache consisting of a one-element list
174 cachedelta: (baserev, uncompressed_delta) or None
174 cachedelta: (baserev, uncompressed_delta) or None
175 flags: flags associated to the revision storage
175 flags: flags associated to the revision storage
176
176
177 One of btext[0] or cachedelta must be set.
177 One of btext[0] or cachedelta must be set.
178 """
178 """
179
179
180 node = attr.ib()
180 node = attr.ib()
181 p1 = attr.ib()
181 p1 = attr.ib()
182 p2 = attr.ib()
182 p2 = attr.ib()
183 btext = attr.ib()
183 btext = attr.ib()
184 textlen = attr.ib()
184 textlen = attr.ib()
185 cachedelta = attr.ib()
185 cachedelta = attr.ib()
186 flags = attr.ib()
186 flags = attr.ib()
187
187
188
188
189 @interfaceutil.implementer(repository.irevisiondelta)
189 @interfaceutil.implementer(repository.irevisiondelta)
190 @attr.s(slots=True)
190 @attr.s(slots=True)
191 class revlogrevisiondelta(object):
191 class revlogrevisiondelta(object):
192 node = attr.ib()
192 node = attr.ib()
193 p1node = attr.ib()
193 p1node = attr.ib()
194 p2node = attr.ib()
194 p2node = attr.ib()
195 basenode = attr.ib()
195 basenode = attr.ib()
196 flags = attr.ib()
196 flags = attr.ib()
197 baserevisionsize = attr.ib()
197 baserevisionsize = attr.ib()
198 revision = attr.ib()
198 revision = attr.ib()
199 delta = attr.ib()
199 delta = attr.ib()
200 sidedata = attr.ib()
200 sidedata = attr.ib()
201 protocol_flags = attr.ib()
201 protocol_flags = attr.ib()
202 linknode = attr.ib(default=None)
202 linknode = attr.ib(default=None)
203
203
204
204
205 @interfaceutil.implementer(repository.iverifyproblem)
205 @interfaceutil.implementer(repository.iverifyproblem)
206 @attr.s(frozen=True)
206 @attr.s(frozen=True)
207 class revlogproblem(object):
207 class revlogproblem(object):
208 warning = attr.ib(default=None)
208 warning = attr.ib(default=None)
209 error = attr.ib(default=None)
209 error = attr.ib(default=None)
210 node = attr.ib(default=None)
210 node = attr.ib(default=None)
211
211
212
212
213 def parse_index_v1(data, inline):
213 def parse_index_v1(data, inline):
214 # call the C implementation to parse the index data
214 # call the C implementation to parse the index data
215 index, cache = parsers.parse_index2(data, inline)
215 index, cache = parsers.parse_index2(data, inline)
216 return index, cache
216 return index, cache
217
217
218
218
219 def parse_index_v2(data, inline):
219 def parse_index_v2(data, inline):
220 # call the C implementation to parse the index data
220 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
222 return index, cache
222 return index, cache
223
223
224
224
225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
226
226
227 def parse_index_v1_nodemap(data, inline):
227 def parse_index_v1_nodemap(data, inline):
228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
229 return index, cache
229 return index, cache
230
230
231
231
232 else:
232 else:
233 parse_index_v1_nodemap = None
233 parse_index_v1_nodemap = None
234
234
235
235
236 def parse_index_v1_mixed(data, inline):
236 def parse_index_v1_mixed(data, inline):
237 index, cache = parse_index_v1(data, inline)
237 index, cache = parse_index_v1(data, inline)
238 return rustrevlog.MixedIndex(index), cache
238 return rustrevlog.MixedIndex(index), cache
239
239
240
240
241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
242 # signed integer)
242 # signed integer)
243 _maxentrysize = 0x7FFFFFFF
243 _maxentrysize = 0x7FFFFFFF
244
244
245
245
246 class revlog(object):
246 class revlog(object):
247 """
247 """
248 the underlying revision storage object
248 the underlying revision storage object
249
249
250 A revlog consists of two parts, an index and the revision data.
250 A revlog consists of two parts, an index and the revision data.
251
251
252 The index is a file with a fixed record size containing
252 The index is a file with a fixed record size containing
253 information on each revision, including its nodeid (hash), the
253 information on each revision, including its nodeid (hash), the
254 nodeids of its parents, the position and offset of its data within
254 nodeids of its parents, the position and offset of its data within
255 the data file, and the revision it's based on. Finally, each entry
255 the data file, and the revision it's based on. Finally, each entry
256 contains a linkrev entry that can serve as a pointer to external
256 contains a linkrev entry that can serve as a pointer to external
257 data.
257 data.
258
258
259 The revision data itself is a linear collection of data chunks.
259 The revision data itself is a linear collection of data chunks.
260 Each chunk represents a revision and is usually represented as a
260 Each chunk represents a revision and is usually represented as a
261 delta against the previous chunk. To bound lookup time, runs of
261 delta against the previous chunk. To bound lookup time, runs of
262 deltas are limited to about 2 times the length of the original
262 deltas are limited to about 2 times the length of the original
263 version data. This makes retrieval of a version proportional to
263 version data. This makes retrieval of a version proportional to
264 its size, or O(1) relative to the number of revisions.
264 its size, or O(1) relative to the number of revisions.
265
265
266 Both pieces of the revlog are written to in an append-only
266 Both pieces of the revlog are written to in an append-only
267 fashion, which means we never need to rewrite a file to insert or
267 fashion, which means we never need to rewrite a file to insert or
268 remove data, and can use some simple techniques to avoid the need
268 remove data, and can use some simple techniques to avoid the need
269 for locking while reading.
269 for locking while reading.
270
270
271 If checkambig, indexfile is opened with checkambig=True at
271 If checkambig, indexfile is opened with checkambig=True at
272 writing, to avoid file stat ambiguity.
272 writing, to avoid file stat ambiguity.
273
273
274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
275 index will be mmapped rather than read if it is larger than the
275 index will be mmapped rather than read if it is larger than the
276 configured threshold.
276 configured threshold.
277
277
278 If censorable is True, the revlog can have censored revisions.
278 If censorable is True, the revlog can have censored revisions.
279
279
280 If `upperboundcomp` is not None, this is the expected maximal gain from
280 If `upperboundcomp` is not None, this is the expected maximal gain from
281 compression for the data content.
281 compression for the data content.
282
282
283 `concurrencychecker` is an optional function that receives 3 arguments: a
283 `concurrencychecker` is an optional function that receives 3 arguments: a
284 file handle, a filename, and an expected position. It should check whether
284 file handle, a filename, and an expected position. It should check whether
285 the current position in the file handle is valid, and log/warn/fail (by
285 the current position in the file handle is valid, and log/warn/fail (by
286 raising).
286 raising).
287
288
289 Internal details
290 ----------------
291
292 A large part of the revlog logic deals with revisions' "index entries", tuple
293 objects that contains the same "items" whatever the revlog version.
294 Different versions will have different ways of storing these items (sometimes
295 not having them at all), but the tuple will always be the same. New fields
296 are usually added at the end to avoid breaking existing code that relies
297 on the existing order. The field are defined as follows:
298
299 [0] offset:
300 The byte index of the start of revision data chunk.
301 That value is shifted up by 16 bits. use "offset = field >> 16" to
302 retrieve it.
303
304 flags:
305 A flag field that carries special information or changes the behavior
306 of the revision. (see `REVIDX_*` constants for details)
307 The flag field only occupies the first 16 bits of this field,
308 use "flags = field & 0xFFFF" to retrieve the value.
309
310 [1] compressed length:
311 The size, in bytes, of the chunk on disk
312
313 [2] uncompressed length:
314 The size, in bytes, of the full revision once reconstructed.
315
316 [3] base rev:
317 Either the base of the revision delta chain (without general
318 delta), or the base of the delta (stored in the data chunk)
319 with general delta.
320
321 [4] link rev:
322 Changelog revision number of the changeset introducing this
323 revision.
324
325 [5] parent 1 rev:
326 Revision number of the first parent
327
328 [6] parent 2 rev:
329 Revision number of the second parent
330
331 [7] node id:
332 The node id of the current revision
333
334 [8] sidedata offset:
335 The byte index of the start of the revision's side-data chunk.
336
337 [9] sidedata chunk length:
338 The size, in bytes, of the revision's side-data chunk.
287 """
339 """
288
340
289 _flagserrorclass = error.RevlogError
341 _flagserrorclass = error.RevlogError
290
342
291 def __init__(
343 def __init__(
292 self,
344 self,
293 opener,
345 opener,
294 target,
346 target,
295 radix,
347 radix,
296 postfix=None, # only exist for `tmpcensored` now
348 postfix=None, # only exist for `tmpcensored` now
297 checkambig=False,
349 checkambig=False,
298 mmaplargeindex=False,
350 mmaplargeindex=False,
299 censorable=False,
351 censorable=False,
300 upperboundcomp=None,
352 upperboundcomp=None,
301 persistentnodemap=False,
353 persistentnodemap=False,
302 concurrencychecker=None,
354 concurrencychecker=None,
303 trypending=False,
355 trypending=False,
304 ):
356 ):
305 """
357 """
306 create a revlog object
358 create a revlog object
307
359
308 opener is a function that abstracts the file opening operation
360 opener is a function that abstracts the file opening operation
309 and can be used to implement COW semantics or the like.
361 and can be used to implement COW semantics or the like.
310
362
311 `target`: a (KIND, ID) tuple that identify the content stored in
363 `target`: a (KIND, ID) tuple that identify the content stored in
312 this revlog. It help the rest of the code to understand what the revlog
364 this revlog. It help the rest of the code to understand what the revlog
313 is about without having to resort to heuristic and index filename
365 is about without having to resort to heuristic and index filename
314 analysis. Note: that this must be reliably be set by normal code, but
366 analysis. Note: that this must be reliably be set by normal code, but
315 that test, debug, or performance measurement code might not set this to
367 that test, debug, or performance measurement code might not set this to
316 accurate value.
368 accurate value.
317 """
369 """
318 self.upperboundcomp = upperboundcomp
370 self.upperboundcomp = upperboundcomp
319
371
320 self.radix = radix
372 self.radix = radix
321
373
322 self._docket_file = None
374 self._docket_file = None
323 self._indexfile = None
375 self._indexfile = None
324 self._datafile = None
376 self._datafile = None
325 self._nodemap_file = None
377 self._nodemap_file = None
326 self.postfix = postfix
378 self.postfix = postfix
327 self._trypending = trypending
379 self._trypending = trypending
328 self.opener = opener
380 self.opener = opener
329 if persistentnodemap:
381 if persistentnodemap:
330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
382 self._nodemap_file = nodemaputil.get_nodemap_file(self)
331
383
332 assert target[0] in ALL_KINDS
384 assert target[0] in ALL_KINDS
333 assert len(target) == 2
385 assert len(target) == 2
334 self.target = target
386 self.target = target
335 # When True, indexfile is opened with checkambig=True at writing, to
387 # When True, indexfile is opened with checkambig=True at writing, to
336 # avoid file stat ambiguity.
388 # avoid file stat ambiguity.
337 self._checkambig = checkambig
389 self._checkambig = checkambig
338 self._mmaplargeindex = mmaplargeindex
390 self._mmaplargeindex = mmaplargeindex
339 self._censorable = censorable
391 self._censorable = censorable
340 # 3-tuple of (node, rev, text) for a raw revision.
392 # 3-tuple of (node, rev, text) for a raw revision.
341 self._revisioncache = None
393 self._revisioncache = None
342 # Maps rev to chain base rev.
394 # Maps rev to chain base rev.
343 self._chainbasecache = util.lrucachedict(100)
395 self._chainbasecache = util.lrucachedict(100)
344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
396 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
345 self._chunkcache = (0, b'')
397 self._chunkcache = (0, b'')
346 # How much data to read and cache into the raw revlog data cache.
398 # How much data to read and cache into the raw revlog data cache.
347 self._chunkcachesize = 65536
399 self._chunkcachesize = 65536
348 self._maxchainlen = None
400 self._maxchainlen = None
349 self._deltabothparents = True
401 self._deltabothparents = True
350 self.index = None
402 self.index = None
351 self._docket = None
403 self._docket = None
352 self._nodemap_docket = None
404 self._nodemap_docket = None
353 # Mapping of partial identifiers to full nodes.
405 # Mapping of partial identifiers to full nodes.
354 self._pcache = {}
406 self._pcache = {}
355 # Mapping of revision integer to full node.
407 # Mapping of revision integer to full node.
356 self._compengine = b'zlib'
408 self._compengine = b'zlib'
357 self._compengineopts = {}
409 self._compengineopts = {}
358 self._maxdeltachainspan = -1
410 self._maxdeltachainspan = -1
359 self._withsparseread = False
411 self._withsparseread = False
360 self._sparserevlog = False
412 self._sparserevlog = False
361 self.hassidedata = False
413 self.hassidedata = False
362 self._srdensitythreshold = 0.50
414 self._srdensitythreshold = 0.50
363 self._srmingapsize = 262144
415 self._srmingapsize = 262144
364
416
365 # Make copy of flag processors so each revlog instance can support
417 # Make copy of flag processors so each revlog instance can support
366 # custom flags.
418 # custom flags.
367 self._flagprocessors = dict(flagutil.flagprocessors)
419 self._flagprocessors = dict(flagutil.flagprocessors)
368
420
369 # 2-tuple of file handles being used for active writing.
421 # 2-tuple of file handles being used for active writing.
370 self._writinghandles = None
422 self._writinghandles = None
371 # prevent nesting of addgroup
423 # prevent nesting of addgroup
372 self._adding_group = None
424 self._adding_group = None
373
425
374 self._loadindex()
426 self._loadindex()
375
427
376 self._concurrencychecker = concurrencychecker
428 self._concurrencychecker = concurrencychecker
377
429
378 def _init_opts(self):
430 def _init_opts(self):
379 """process options (from above/config) to setup associated default revlog mode
431 """process options (from above/config) to setup associated default revlog mode
380
432
381 These values might be affected when actually reading on disk information.
433 These values might be affected when actually reading on disk information.
382
434
383 The relevant values are returned for use in _loadindex().
435 The relevant values are returned for use in _loadindex().
384
436
385 * newversionflags:
437 * newversionflags:
386 version header to use if we need to create a new revlog
438 version header to use if we need to create a new revlog
387
439
388 * mmapindexthreshold:
440 * mmapindexthreshold:
389 minimal index size for start to use mmap
441 minimal index size for start to use mmap
390
442
391 * force_nodemap:
443 * force_nodemap:
392 force the usage of a "development" version of the nodemap code
444 force the usage of a "development" version of the nodemap code
393 """
445 """
394 mmapindexthreshold = None
446 mmapindexthreshold = None
395 opts = self.opener.options
447 opts = self.opener.options
396
448
397 if b'revlogv2' in opts:
449 if b'revlogv2' in opts:
398 new_header = REVLOGV2 | FLAG_INLINE_DATA
450 new_header = REVLOGV2 | FLAG_INLINE_DATA
399 elif b'revlogv1' in opts:
451 elif b'revlogv1' in opts:
400 new_header = REVLOGV1 | FLAG_INLINE_DATA
452 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 if b'generaldelta' in opts:
453 if b'generaldelta' in opts:
402 new_header |= FLAG_GENERALDELTA
454 new_header |= FLAG_GENERALDELTA
403 elif b'revlogv0' in self.opener.options:
455 elif b'revlogv0' in self.opener.options:
404 new_header = REVLOGV0
456 new_header = REVLOGV0
405 else:
457 else:
406 new_header = REVLOG_DEFAULT_VERSION
458 new_header = REVLOG_DEFAULT_VERSION
407
459
408 if b'chunkcachesize' in opts:
460 if b'chunkcachesize' in opts:
409 self._chunkcachesize = opts[b'chunkcachesize']
461 self._chunkcachesize = opts[b'chunkcachesize']
410 if b'maxchainlen' in opts:
462 if b'maxchainlen' in opts:
411 self._maxchainlen = opts[b'maxchainlen']
463 self._maxchainlen = opts[b'maxchainlen']
412 if b'deltabothparents' in opts:
464 if b'deltabothparents' in opts:
413 self._deltabothparents = opts[b'deltabothparents']
465 self._deltabothparents = opts[b'deltabothparents']
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
466 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydeltabase = False
467 self._lazydeltabase = False
416 if self._lazydelta:
468 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
469 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 if b'compengine' in opts:
470 if b'compengine' in opts:
419 self._compengine = opts[b'compengine']
471 self._compengine = opts[b'compengine']
420 if b'zlib.level' in opts:
472 if b'zlib.level' in opts:
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
473 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 if b'zstd.level' in opts:
474 if b'zstd.level' in opts:
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
475 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 if b'maxdeltachainspan' in opts:
476 if b'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
477 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
478 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts[b'mmapindexthreshold']
479 mmapindexthreshold = opts[b'mmapindexthreshold']
428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
480 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 withsparseread = bool(opts.get(b'with-sparse-read', False))
481 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 # sparse-revlog forces sparse-read
482 # sparse-revlog forces sparse-read
431 self._withsparseread = self._sparserevlog or withsparseread
483 self._withsparseread = self._sparserevlog or withsparseread
432 if b'sparse-read-density-threshold' in opts:
484 if b'sparse-read-density-threshold' in opts:
433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
485 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 if b'sparse-read-min-gap-size' in opts:
486 if b'sparse-read-min-gap-size' in opts:
435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
487 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 if opts.get(b'enableellipsis'):
488 if opts.get(b'enableellipsis'):
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
489 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438
490
439 # revlog v0 doesn't have flag processors
491 # revlog v0 doesn't have flag processors
440 for flag, processor in pycompat.iteritems(
492 for flag, processor in pycompat.iteritems(
441 opts.get(b'flagprocessors', {})
493 opts.get(b'flagprocessors', {})
442 ):
494 ):
443 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
495 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444
496
445 if self._chunkcachesize <= 0:
497 if self._chunkcachesize <= 0:
446 raise error.RevlogError(
498 raise error.RevlogError(
447 _(b'revlog chunk cache size %r is not greater than 0')
499 _(b'revlog chunk cache size %r is not greater than 0')
448 % self._chunkcachesize
500 % self._chunkcachesize
449 )
501 )
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
502 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 raise error.RevlogError(
503 raise error.RevlogError(
452 _(b'revlog chunk cache size %r is not a power of 2')
504 _(b'revlog chunk cache size %r is not a power of 2')
453 % self._chunkcachesize
505 % self._chunkcachesize
454 )
506 )
455 force_nodemap = opts.get(b'devel-force-nodemap', False)
507 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 return new_header, mmapindexthreshold, force_nodemap
508 return new_header, mmapindexthreshold, force_nodemap
457
509
458 def _get_data(self, filepath, mmap_threshold, size=None):
510 def _get_data(self, filepath, mmap_threshold, size=None):
459 """return a file content with or without mmap
511 """return a file content with or without mmap
460
512
461 If the file is missing return the empty string"""
513 If the file is missing return the empty string"""
462 try:
514 try:
463 with self.opener(filepath) as fp:
515 with self.opener(filepath) as fp:
464 if mmap_threshold is not None:
516 if mmap_threshold is not None:
465 file_size = self.opener.fstat(fp).st_size
517 file_size = self.opener.fstat(fp).st_size
466 if file_size >= mmap_threshold:
518 if file_size >= mmap_threshold:
467 if size is not None:
519 if size is not None:
468 # avoid potentiel mmap crash
520 # avoid potentiel mmap crash
469 size = min(file_size, size)
521 size = min(file_size, size)
470 # TODO: should .close() to release resources without
522 # TODO: should .close() to release resources without
471 # relying on Python GC
523 # relying on Python GC
472 if size is None:
524 if size is None:
473 return util.buffer(util.mmapread(fp))
525 return util.buffer(util.mmapread(fp))
474 else:
526 else:
475 return util.buffer(util.mmapread(fp, size))
527 return util.buffer(util.mmapread(fp, size))
476 if size is None:
528 if size is None:
477 return fp.read()
529 return fp.read()
478 else:
530 else:
479 return fp.read(size)
531 return fp.read(size)
480 except IOError as inst:
532 except IOError as inst:
481 if inst.errno != errno.ENOENT:
533 if inst.errno != errno.ENOENT:
482 raise
534 raise
483 return b''
535 return b''
484
536
485 def _loadindex(self):
537 def _loadindex(self):
486
538
487 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
539 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488
540
489 if self.postfix is not None:
541 if self.postfix is not None:
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
542 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
543 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 entry_point = b'%s.i.a' % self.radix
544 entry_point = b'%s.i.a' % self.radix
493 else:
545 else:
494 entry_point = b'%s.i' % self.radix
546 entry_point = b'%s.i' % self.radix
495
547
496 entry_data = b''
548 entry_data = b''
497 self._initempty = True
549 self._initempty = True
498 entry_data = self._get_data(entry_point, mmapindexthreshold)
550 entry_data = self._get_data(entry_point, mmapindexthreshold)
499 if len(entry_data) > 0:
551 if len(entry_data) > 0:
500 header = INDEX_HEADER.unpack(entry_data[:4])[0]
552 header = INDEX_HEADER.unpack(entry_data[:4])[0]
501 self._initempty = False
553 self._initempty = False
502 else:
554 else:
503 header = new_header
555 header = new_header
504
556
505 self._format_flags = header & ~0xFFFF
557 self._format_flags = header & ~0xFFFF
506 self._format_version = header & 0xFFFF
558 self._format_version = header & 0xFFFF
507
559
508 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
560 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
509 if supported_flags is None:
561 if supported_flags is None:
510 msg = _(b'unknown version (%d) in revlog %s')
562 msg = _(b'unknown version (%d) in revlog %s')
511 msg %= (self._format_version, self.display_id)
563 msg %= (self._format_version, self.display_id)
512 raise error.RevlogError(msg)
564 raise error.RevlogError(msg)
513 elif self._format_flags & ~supported_flags:
565 elif self._format_flags & ~supported_flags:
514 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
566 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
515 display_flag = self._format_flags >> 16
567 display_flag = self._format_flags >> 16
516 msg %= (display_flag, self._format_version, self.display_id)
568 msg %= (display_flag, self._format_version, self.display_id)
517 raise error.RevlogError(msg)
569 raise error.RevlogError(msg)
518
570
519 features = FEATURES_BY_VERSION[self._format_version]
571 features = FEATURES_BY_VERSION[self._format_version]
520 self._inline = features[b'inline'](self._format_flags)
572 self._inline = features[b'inline'](self._format_flags)
521 self._generaldelta = features[b'generaldelta'](self._format_flags)
573 self._generaldelta = features[b'generaldelta'](self._format_flags)
522 self.hassidedata = features[b'sidedata']
574 self.hassidedata = features[b'sidedata']
523
575
524 if not features[b'docket']:
576 if not features[b'docket']:
525 self._indexfile = entry_point
577 self._indexfile = entry_point
526 index_data = entry_data
578 index_data = entry_data
527 else:
579 else:
528 self._docket_file = entry_point
580 self._docket_file = entry_point
529 if self._initempty:
581 if self._initempty:
530 self._docket = docketutil.default_docket(self, header)
582 self._docket = docketutil.default_docket(self, header)
531 else:
583 else:
532 self._docket = docketutil.parse_docket(
584 self._docket = docketutil.parse_docket(
533 self, entry_data, use_pending=self._trypending
585 self, entry_data, use_pending=self._trypending
534 )
586 )
535 self._indexfile = self._docket.index_filepath()
587 self._indexfile = self._docket.index_filepath()
536 index_data = b''
588 index_data = b''
537 index_size = self._docket.index_end
589 index_size = self._docket.index_end
538 if index_size > 0:
590 if index_size > 0:
539 index_data = self._get_data(
591 index_data = self._get_data(
540 self._indexfile, mmapindexthreshold, size=index_size
592 self._indexfile, mmapindexthreshold, size=index_size
541 )
593 )
542 if len(index_data) < index_size:
594 if len(index_data) < index_size:
543 msg = _(b'too few index data for %s: got %d, expected %d')
595 msg = _(b'too few index data for %s: got %d, expected %d')
544 msg %= (self.display_id, len(index_data), index_size)
596 msg %= (self.display_id, len(index_data), index_size)
545 raise error.RevlogError(msg)
597 raise error.RevlogError(msg)
546
598
547 self._inline = False
599 self._inline = False
548 # generaldelta implied by version 2 revlogs.
600 # generaldelta implied by version 2 revlogs.
549 self._generaldelta = True
601 self._generaldelta = True
550 # the logic for persistent nodemap will be dealt with within the
602 # the logic for persistent nodemap will be dealt with within the
551 # main docket, so disable it for now.
603 # main docket, so disable it for now.
552 self._nodemap_file = None
604 self._nodemap_file = None
553
605
554 if self.postfix is None:
606 if self.postfix is None:
555 self._datafile = b'%s.d' % self.radix
607 self._datafile = b'%s.d' % self.radix
556 else:
608 else:
557 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
609 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
558
610
559 self.nodeconstants = sha1nodeconstants
611 self.nodeconstants = sha1nodeconstants
560 self.nullid = self.nodeconstants.nullid
612 self.nullid = self.nodeconstants.nullid
561
613
562 # sparse-revlog can't be on without general-delta (issue6056)
614 # sparse-revlog can't be on without general-delta (issue6056)
563 if not self._generaldelta:
615 if not self._generaldelta:
564 self._sparserevlog = False
616 self._sparserevlog = False
565
617
566 self._storedeltachains = True
618 self._storedeltachains = True
567
619
568 devel_nodemap = (
620 devel_nodemap = (
569 self._nodemap_file
621 self._nodemap_file
570 and force_nodemap
622 and force_nodemap
571 and parse_index_v1_nodemap is not None
623 and parse_index_v1_nodemap is not None
572 )
624 )
573
625
574 use_rust_index = False
626 use_rust_index = False
575 if rustrevlog is not None:
627 if rustrevlog is not None:
576 if self._nodemap_file is not None:
628 if self._nodemap_file is not None:
577 use_rust_index = True
629 use_rust_index = True
578 else:
630 else:
579 use_rust_index = self.opener.options.get(b'rust.index')
631 use_rust_index = self.opener.options.get(b'rust.index')
580
632
581 self._parse_index = parse_index_v1
633 self._parse_index = parse_index_v1
582 if self._format_version == REVLOGV0:
634 if self._format_version == REVLOGV0:
583 self._parse_index = revlogv0.parse_index_v0
635 self._parse_index = revlogv0.parse_index_v0
584 elif self._format_version == REVLOGV2:
636 elif self._format_version == REVLOGV2:
585 self._parse_index = parse_index_v2
637 self._parse_index = parse_index_v2
586 elif devel_nodemap:
638 elif devel_nodemap:
587 self._parse_index = parse_index_v1_nodemap
639 self._parse_index = parse_index_v1_nodemap
588 elif use_rust_index:
640 elif use_rust_index:
589 self._parse_index = parse_index_v1_mixed
641 self._parse_index = parse_index_v1_mixed
590 try:
642 try:
591 d = self._parse_index(index_data, self._inline)
643 d = self._parse_index(index_data, self._inline)
592 index, _chunkcache = d
644 index, _chunkcache = d
593 use_nodemap = (
645 use_nodemap = (
594 not self._inline
646 not self._inline
595 and self._nodemap_file is not None
647 and self._nodemap_file is not None
596 and util.safehasattr(index, 'update_nodemap_data')
648 and util.safehasattr(index, 'update_nodemap_data')
597 )
649 )
598 if use_nodemap:
650 if use_nodemap:
599 nodemap_data = nodemaputil.persisted_data(self)
651 nodemap_data = nodemaputil.persisted_data(self)
600 if nodemap_data is not None:
652 if nodemap_data is not None:
601 docket = nodemap_data[0]
653 docket = nodemap_data[0]
602 if (
654 if (
603 len(d[0]) > docket.tip_rev
655 len(d[0]) > docket.tip_rev
604 and d[0][docket.tip_rev][7] == docket.tip_node
656 and d[0][docket.tip_rev][7] == docket.tip_node
605 ):
657 ):
606 # no changelog tampering
658 # no changelog tampering
607 self._nodemap_docket = docket
659 self._nodemap_docket = docket
608 index.update_nodemap_data(*nodemap_data)
660 index.update_nodemap_data(*nodemap_data)
609 except (ValueError, IndexError):
661 except (ValueError, IndexError):
610 raise error.RevlogError(
662 raise error.RevlogError(
611 _(b"index %s is corrupted") % self.display_id
663 _(b"index %s is corrupted") % self.display_id
612 )
664 )
613 self.index, self._chunkcache = d
665 self.index, self._chunkcache = d
614 if not self._chunkcache:
666 if not self._chunkcache:
615 self._chunkclear()
667 self._chunkclear()
616 # revnum -> (chain-length, sum-delta-length)
668 # revnum -> (chain-length, sum-delta-length)
617 self._chaininfocache = util.lrucachedict(500)
669 self._chaininfocache = util.lrucachedict(500)
618 # revlog header -> revlog compressor
670 # revlog header -> revlog compressor
619 self._decompressors = {}
671 self._decompressors = {}
620
672
621 @util.propertycache
673 @util.propertycache
622 def revlog_kind(self):
674 def revlog_kind(self):
623 return self.target[0]
675 return self.target[0]
624
676
625 @util.propertycache
677 @util.propertycache
626 def display_id(self):
678 def display_id(self):
627 """The public facing "ID" of the revlog that we use in message"""
679 """The public facing "ID" of the revlog that we use in message"""
628 # Maybe we should build a user facing representation of
680 # Maybe we should build a user facing representation of
629 # revlog.target instead of using `self.radix`
681 # revlog.target instead of using `self.radix`
630 return self.radix
682 return self.radix
631
683
632 @util.propertycache
684 @util.propertycache
633 def _compressor(self):
685 def _compressor(self):
634 engine = util.compengines[self._compengine]
686 engine = util.compengines[self._compengine]
635 return engine.revlogcompressor(self._compengineopts)
687 return engine.revlogcompressor(self._compengineopts)
636
688
637 def _indexfp(self):
689 def _indexfp(self):
638 """file object for the revlog's index file"""
690 """file object for the revlog's index file"""
639 return self.opener(self._indexfile, mode=b"r")
691 return self.opener(self._indexfile, mode=b"r")
640
692
641 def __index_write_fp(self):
693 def __index_write_fp(self):
642 # You should not use this directly and use `_writing` instead
694 # You should not use this directly and use `_writing` instead
643 try:
695 try:
644 f = self.opener(
696 f = self.opener(
645 self._indexfile, mode=b"r+", checkambig=self._checkambig
697 self._indexfile, mode=b"r+", checkambig=self._checkambig
646 )
698 )
647 if self._docket is None:
699 if self._docket is None:
648 f.seek(0, os.SEEK_END)
700 f.seek(0, os.SEEK_END)
649 else:
701 else:
650 f.seek(self._docket.index_end, os.SEEK_SET)
702 f.seek(self._docket.index_end, os.SEEK_SET)
651 return f
703 return f
652 except IOError as inst:
704 except IOError as inst:
653 if inst.errno != errno.ENOENT:
705 if inst.errno != errno.ENOENT:
654 raise
706 raise
655 return self.opener(
707 return self.opener(
656 self._indexfile, mode=b"w+", checkambig=self._checkambig
708 self._indexfile, mode=b"w+", checkambig=self._checkambig
657 )
709 )
658
710
659 def __index_new_fp(self):
711 def __index_new_fp(self):
660 # You should not use this unless you are upgrading from inline revlog
712 # You should not use this unless you are upgrading from inline revlog
661 return self.opener(
713 return self.opener(
662 self._indexfile,
714 self._indexfile,
663 mode=b"w",
715 mode=b"w",
664 checkambig=self._checkambig,
716 checkambig=self._checkambig,
665 atomictemp=True,
717 atomictemp=True,
666 )
718 )
667
719
668 def _datafp(self, mode=b'r'):
720 def _datafp(self, mode=b'r'):
669 """file object for the revlog's data file"""
721 """file object for the revlog's data file"""
670 return self.opener(self._datafile, mode=mode)
722 return self.opener(self._datafile, mode=mode)
671
723
672 @contextlib.contextmanager
724 @contextlib.contextmanager
673 def _datareadfp(self, existingfp=None):
725 def _datareadfp(self, existingfp=None):
674 """file object suitable to read data"""
726 """file object suitable to read data"""
675 # Use explicit file handle, if given.
727 # Use explicit file handle, if given.
676 if existingfp is not None:
728 if existingfp is not None:
677 yield existingfp
729 yield existingfp
678
730
679 # Use a file handle being actively used for writes, if available.
731 # Use a file handle being actively used for writes, if available.
680 # There is some danger to doing this because reads will seek the
732 # There is some danger to doing this because reads will seek the
681 # file. However, _writeentry() performs a SEEK_END before all writes,
733 # file. However, _writeentry() performs a SEEK_END before all writes,
682 # so we should be safe.
734 # so we should be safe.
683 elif self._writinghandles:
735 elif self._writinghandles:
684 if self._inline:
736 if self._inline:
685 yield self._writinghandles[0]
737 yield self._writinghandles[0]
686 else:
738 else:
687 yield self._writinghandles[1]
739 yield self._writinghandles[1]
688
740
689 # Otherwise open a new file handle.
741 # Otherwise open a new file handle.
690 else:
742 else:
691 if self._inline:
743 if self._inline:
692 func = self._indexfp
744 func = self._indexfp
693 else:
745 else:
694 func = self._datafp
746 func = self._datafp
695 with func() as fp:
747 with func() as fp:
696 yield fp
748 yield fp
697
749
698 def tiprev(self):
750 def tiprev(self):
699 return len(self.index) - 1
751 return len(self.index) - 1
700
752
701 def tip(self):
753 def tip(self):
702 return self.node(self.tiprev())
754 return self.node(self.tiprev())
703
755
704 def __contains__(self, rev):
756 def __contains__(self, rev):
705 return 0 <= rev < len(self)
757 return 0 <= rev < len(self)
706
758
707 def __len__(self):
759 def __len__(self):
708 return len(self.index)
760 return len(self.index)
709
761
710 def __iter__(self):
762 def __iter__(self):
711 return iter(pycompat.xrange(len(self)))
763 return iter(pycompat.xrange(len(self)))
712
764
713 def revs(self, start=0, stop=None):
765 def revs(self, start=0, stop=None):
714 """iterate over all rev in this revlog (from start to stop)"""
766 """iterate over all rev in this revlog (from start to stop)"""
715 return storageutil.iterrevs(len(self), start=start, stop=stop)
767 return storageutil.iterrevs(len(self), start=start, stop=stop)
716
768
717 @property
769 @property
718 def nodemap(self):
770 def nodemap(self):
719 msg = (
771 msg = (
720 b"revlog.nodemap is deprecated, "
772 b"revlog.nodemap is deprecated, "
721 b"use revlog.index.[has_node|rev|get_rev]"
773 b"use revlog.index.[has_node|rev|get_rev]"
722 )
774 )
723 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
775 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
724 return self.index.nodemap
776 return self.index.nodemap
725
777
726 @property
778 @property
727 def _nodecache(self):
779 def _nodecache(self):
728 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
780 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
729 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
781 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
730 return self.index.nodemap
782 return self.index.nodemap
731
783
732 def hasnode(self, node):
784 def hasnode(self, node):
733 try:
785 try:
734 self.rev(node)
786 self.rev(node)
735 return True
787 return True
736 except KeyError:
788 except KeyError:
737 return False
789 return False
738
790
739 def candelta(self, baserev, rev):
791 def candelta(self, baserev, rev):
740 """whether two revisions (baserev, rev) can be delta-ed or not"""
792 """whether two revisions (baserev, rev) can be delta-ed or not"""
741 # Disable delta if either rev requires a content-changing flag
793 # Disable delta if either rev requires a content-changing flag
742 # processor (ex. LFS). This is because such flag processor can alter
794 # processor (ex. LFS). This is because such flag processor can alter
743 # the rawtext content that the delta will be based on, and two clients
795 # the rawtext content that the delta will be based on, and two clients
744 # could have a same revlog node with different flags (i.e. different
796 # could have a same revlog node with different flags (i.e. different
745 # rawtext contents) and the delta could be incompatible.
797 # rawtext contents) and the delta could be incompatible.
746 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
798 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
747 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
799 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
748 ):
800 ):
749 return False
801 return False
750 return True
802 return True
751
803
752 def update_caches(self, transaction):
804 def update_caches(self, transaction):
753 if self._nodemap_file is not None:
805 if self._nodemap_file is not None:
754 if transaction is None:
806 if transaction is None:
755 nodemaputil.update_persistent_nodemap(self)
807 nodemaputil.update_persistent_nodemap(self)
756 else:
808 else:
757 nodemaputil.setup_persistent_nodemap(transaction, self)
809 nodemaputil.setup_persistent_nodemap(transaction, self)
758
810
759 def clearcaches(self):
811 def clearcaches(self):
760 self._revisioncache = None
812 self._revisioncache = None
761 self._chainbasecache.clear()
813 self._chainbasecache.clear()
762 self._chunkcache = (0, b'')
814 self._chunkcache = (0, b'')
763 self._pcache = {}
815 self._pcache = {}
764 self._nodemap_docket = None
816 self._nodemap_docket = None
765 self.index.clearcaches()
817 self.index.clearcaches()
766 # The python code is the one responsible for validating the docket, we
818 # The python code is the one responsible for validating the docket, we
767 # end up having to refresh it here.
819 # end up having to refresh it here.
768 use_nodemap = (
820 use_nodemap = (
769 not self._inline
821 not self._inline
770 and self._nodemap_file is not None
822 and self._nodemap_file is not None
771 and util.safehasattr(self.index, 'update_nodemap_data')
823 and util.safehasattr(self.index, 'update_nodemap_data')
772 )
824 )
773 if use_nodemap:
825 if use_nodemap:
774 nodemap_data = nodemaputil.persisted_data(self)
826 nodemap_data = nodemaputil.persisted_data(self)
775 if nodemap_data is not None:
827 if nodemap_data is not None:
776 self._nodemap_docket = nodemap_data[0]
828 self._nodemap_docket = nodemap_data[0]
777 self.index.update_nodemap_data(*nodemap_data)
829 self.index.update_nodemap_data(*nodemap_data)
778
830
779 def rev(self, node):
831 def rev(self, node):
780 try:
832 try:
781 return self.index.rev(node)
833 return self.index.rev(node)
782 except TypeError:
834 except TypeError:
783 raise
835 raise
784 except error.RevlogError:
836 except error.RevlogError:
785 # parsers.c radix tree lookup failed
837 # parsers.c radix tree lookup failed
786 if (
838 if (
787 node == self.nodeconstants.wdirid
839 node == self.nodeconstants.wdirid
788 or node in self.nodeconstants.wdirfilenodeids
840 or node in self.nodeconstants.wdirfilenodeids
789 ):
841 ):
790 raise error.WdirUnsupported
842 raise error.WdirUnsupported
791 raise error.LookupError(node, self.display_id, _(b'no node'))
843 raise error.LookupError(node, self.display_id, _(b'no node'))
792
844
793 # Accessors for index entries.
845 # Accessors for index entries.
794
846
795 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
847 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
796 # are flags.
848 # are flags.
797 def start(self, rev):
849 def start(self, rev):
798 return int(self.index[rev][0] >> 16)
850 return int(self.index[rev][0] >> 16)
799
851
800 def flags(self, rev):
852 def flags(self, rev):
801 return self.index[rev][0] & 0xFFFF
853 return self.index[rev][0] & 0xFFFF
802
854
803 def length(self, rev):
855 def length(self, rev):
804 return self.index[rev][1]
856 return self.index[rev][1]
805
857
806 def sidedata_length(self, rev):
858 def sidedata_length(self, rev):
807 if not self.hassidedata:
859 if not self.hassidedata:
808 return 0
860 return 0
809 return self.index[rev][9]
861 return self.index[rev][9]
810
862
811 def rawsize(self, rev):
863 def rawsize(self, rev):
812 """return the length of the uncompressed text for a given revision"""
864 """return the length of the uncompressed text for a given revision"""
813 l = self.index[rev][2]
865 l = self.index[rev][2]
814 if l >= 0:
866 if l >= 0:
815 return l
867 return l
816
868
817 t = self.rawdata(rev)
869 t = self.rawdata(rev)
818 return len(t)
870 return len(t)
819
871
820 def size(self, rev):
872 def size(self, rev):
821 """length of non-raw text (processed by a "read" flag processor)"""
873 """length of non-raw text (processed by a "read" flag processor)"""
822 # fast path: if no "read" flag processor could change the content,
874 # fast path: if no "read" flag processor could change the content,
823 # size is rawsize. note: ELLIPSIS is known to not change the content.
875 # size is rawsize. note: ELLIPSIS is known to not change the content.
824 flags = self.flags(rev)
876 flags = self.flags(rev)
825 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
877 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
826 return self.rawsize(rev)
878 return self.rawsize(rev)
827
879
828 return len(self.revision(rev, raw=False))
880 return len(self.revision(rev, raw=False))
829
881
830 def chainbase(self, rev):
882 def chainbase(self, rev):
831 base = self._chainbasecache.get(rev)
883 base = self._chainbasecache.get(rev)
832 if base is not None:
884 if base is not None:
833 return base
885 return base
834
886
835 index = self.index
887 index = self.index
836 iterrev = rev
888 iterrev = rev
837 base = index[iterrev][3]
889 base = index[iterrev][3]
838 while base != iterrev:
890 while base != iterrev:
839 iterrev = base
891 iterrev = base
840 base = index[iterrev][3]
892 base = index[iterrev][3]
841
893
842 self._chainbasecache[rev] = base
894 self._chainbasecache[rev] = base
843 return base
895 return base
844
896
845 def linkrev(self, rev):
897 def linkrev(self, rev):
846 return self.index[rev][4]
898 return self.index[rev][4]
847
899
848 def parentrevs(self, rev):
900 def parentrevs(self, rev):
849 try:
901 try:
850 entry = self.index[rev]
902 entry = self.index[rev]
851 except IndexError:
903 except IndexError:
852 if rev == wdirrev:
904 if rev == wdirrev:
853 raise error.WdirUnsupported
905 raise error.WdirUnsupported
854 raise
906 raise
855 if entry[5] == nullrev:
907 if entry[5] == nullrev:
856 return entry[6], entry[5]
908 return entry[6], entry[5]
857 else:
909 else:
858 return entry[5], entry[6]
910 return entry[5], entry[6]
859
911
860 # fast parentrevs(rev) where rev isn't filtered
912 # fast parentrevs(rev) where rev isn't filtered
861 _uncheckedparentrevs = parentrevs
913 _uncheckedparentrevs = parentrevs
862
914
863 def node(self, rev):
915 def node(self, rev):
864 try:
916 try:
865 return self.index[rev][7]
917 return self.index[rev][7]
866 except IndexError:
918 except IndexError:
867 if rev == wdirrev:
919 if rev == wdirrev:
868 raise error.WdirUnsupported
920 raise error.WdirUnsupported
869 raise
921 raise
870
922
871 # Derived from index values.
923 # Derived from index values.
872
924
873 def end(self, rev):
925 def end(self, rev):
874 return self.start(rev) + self.length(rev)
926 return self.start(rev) + self.length(rev)
875
927
876 def parents(self, node):
928 def parents(self, node):
877 i = self.index
929 i = self.index
878 d = i[self.rev(node)]
930 d = i[self.rev(node)]
879 # inline node() to avoid function call overhead
931 # inline node() to avoid function call overhead
880 if d[5] == self.nullid:
932 if d[5] == self.nullid:
881 return i[d[6]][7], i[d[5]][7]
933 return i[d[6]][7], i[d[5]][7]
882 else:
934 else:
883 return i[d[5]][7], i[d[6]][7]
935 return i[d[5]][7], i[d[6]][7]
884
936
885 def chainlen(self, rev):
937 def chainlen(self, rev):
886 return self._chaininfo(rev)[0]
938 return self._chaininfo(rev)[0]
887
939
888 def _chaininfo(self, rev):
940 def _chaininfo(self, rev):
889 chaininfocache = self._chaininfocache
941 chaininfocache = self._chaininfocache
890 if rev in chaininfocache:
942 if rev in chaininfocache:
891 return chaininfocache[rev]
943 return chaininfocache[rev]
892 index = self.index
944 index = self.index
893 generaldelta = self._generaldelta
945 generaldelta = self._generaldelta
894 iterrev = rev
946 iterrev = rev
895 e = index[iterrev]
947 e = index[iterrev]
896 clen = 0
948 clen = 0
897 compresseddeltalen = 0
949 compresseddeltalen = 0
898 while iterrev != e[3]:
950 while iterrev != e[3]:
899 clen += 1
951 clen += 1
900 compresseddeltalen += e[1]
952 compresseddeltalen += e[1]
901 if generaldelta:
953 if generaldelta:
902 iterrev = e[3]
954 iterrev = e[3]
903 else:
955 else:
904 iterrev -= 1
956 iterrev -= 1
905 if iterrev in chaininfocache:
957 if iterrev in chaininfocache:
906 t = chaininfocache[iterrev]
958 t = chaininfocache[iterrev]
907 clen += t[0]
959 clen += t[0]
908 compresseddeltalen += t[1]
960 compresseddeltalen += t[1]
909 break
961 break
910 e = index[iterrev]
962 e = index[iterrev]
911 else:
963 else:
912 # Add text length of base since decompressing that also takes
964 # Add text length of base since decompressing that also takes
913 # work. For cache hits the length is already included.
965 # work. For cache hits the length is already included.
914 compresseddeltalen += e[1]
966 compresseddeltalen += e[1]
915 r = (clen, compresseddeltalen)
967 r = (clen, compresseddeltalen)
916 chaininfocache[rev] = r
968 chaininfocache[rev] = r
917 return r
969 return r
918
970
919 def _deltachain(self, rev, stoprev=None):
971 def _deltachain(self, rev, stoprev=None):
920 """Obtain the delta chain for a revision.
972 """Obtain the delta chain for a revision.
921
973
922 ``stoprev`` specifies a revision to stop at. If not specified, we
974 ``stoprev`` specifies a revision to stop at. If not specified, we
923 stop at the base of the chain.
975 stop at the base of the chain.
924
976
925 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
977 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
926 revs in ascending order and ``stopped`` is a bool indicating whether
978 revs in ascending order and ``stopped`` is a bool indicating whether
927 ``stoprev`` was hit.
979 ``stoprev`` was hit.
928 """
980 """
929 # Try C implementation.
981 # Try C implementation.
930 try:
982 try:
931 return self.index.deltachain(rev, stoprev, self._generaldelta)
983 return self.index.deltachain(rev, stoprev, self._generaldelta)
932 except AttributeError:
984 except AttributeError:
933 pass
985 pass
934
986
935 chain = []
987 chain = []
936
988
937 # Alias to prevent attribute lookup in tight loop.
989 # Alias to prevent attribute lookup in tight loop.
938 index = self.index
990 index = self.index
939 generaldelta = self._generaldelta
991 generaldelta = self._generaldelta
940
992
941 iterrev = rev
993 iterrev = rev
942 e = index[iterrev]
994 e = index[iterrev]
943 while iterrev != e[3] and iterrev != stoprev:
995 while iterrev != e[3] and iterrev != stoprev:
944 chain.append(iterrev)
996 chain.append(iterrev)
945 if generaldelta:
997 if generaldelta:
946 iterrev = e[3]
998 iterrev = e[3]
947 else:
999 else:
948 iterrev -= 1
1000 iterrev -= 1
949 e = index[iterrev]
1001 e = index[iterrev]
950
1002
951 if iterrev == stoprev:
1003 if iterrev == stoprev:
952 stopped = True
1004 stopped = True
953 else:
1005 else:
954 chain.append(iterrev)
1006 chain.append(iterrev)
955 stopped = False
1007 stopped = False
956
1008
957 chain.reverse()
1009 chain.reverse()
958 return chain, stopped
1010 return chain, stopped
959
1011
960 def ancestors(self, revs, stoprev=0, inclusive=False):
1012 def ancestors(self, revs, stoprev=0, inclusive=False):
961 """Generate the ancestors of 'revs' in reverse revision order.
1013 """Generate the ancestors of 'revs' in reverse revision order.
962 Does not generate revs lower than stoprev.
1014 Does not generate revs lower than stoprev.
963
1015
964 See the documentation for ancestor.lazyancestors for more details."""
1016 See the documentation for ancestor.lazyancestors for more details."""
965
1017
966 # first, make sure start revisions aren't filtered
1018 # first, make sure start revisions aren't filtered
967 revs = list(revs)
1019 revs = list(revs)
968 checkrev = self.node
1020 checkrev = self.node
969 for r in revs:
1021 for r in revs:
970 checkrev(r)
1022 checkrev(r)
971 # and we're sure ancestors aren't filtered as well
1023 # and we're sure ancestors aren't filtered as well
972
1024
973 if rustancestor is not None:
1025 if rustancestor is not None:
974 lazyancestors = rustancestor.LazyAncestors
1026 lazyancestors = rustancestor.LazyAncestors
975 arg = self.index
1027 arg = self.index
976 else:
1028 else:
977 lazyancestors = ancestor.lazyancestors
1029 lazyancestors = ancestor.lazyancestors
978 arg = self._uncheckedparentrevs
1030 arg = self._uncheckedparentrevs
979 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1031 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
980
1032
981 def descendants(self, revs):
1033 def descendants(self, revs):
982 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1034 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
983
1035
984 def findcommonmissing(self, common=None, heads=None):
1036 def findcommonmissing(self, common=None, heads=None):
985 """Return a tuple of the ancestors of common and the ancestors of heads
1037 """Return a tuple of the ancestors of common and the ancestors of heads
986 that are not ancestors of common. In revset terminology, we return the
1038 that are not ancestors of common. In revset terminology, we return the
987 tuple:
1039 tuple:
988
1040
989 ::common, (::heads) - (::common)
1041 ::common, (::heads) - (::common)
990
1042
991 The list is sorted by revision number, meaning it is
1043 The list is sorted by revision number, meaning it is
992 topologically sorted.
1044 topologically sorted.
993
1045
994 'heads' and 'common' are both lists of node IDs. If heads is
1046 'heads' and 'common' are both lists of node IDs. If heads is
995 not supplied, uses all of the revlog's heads. If common is not
1047 not supplied, uses all of the revlog's heads. If common is not
996 supplied, uses nullid."""
1048 supplied, uses nullid."""
997 if common is None:
1049 if common is None:
998 common = [self.nullid]
1050 common = [self.nullid]
999 if heads is None:
1051 if heads is None:
1000 heads = self.heads()
1052 heads = self.heads()
1001
1053
1002 common = [self.rev(n) for n in common]
1054 common = [self.rev(n) for n in common]
1003 heads = [self.rev(n) for n in heads]
1055 heads = [self.rev(n) for n in heads]
1004
1056
1005 # we want the ancestors, but inclusive
1057 # we want the ancestors, but inclusive
1006 class lazyset(object):
1058 class lazyset(object):
1007 def __init__(self, lazyvalues):
1059 def __init__(self, lazyvalues):
1008 self.addedvalues = set()
1060 self.addedvalues = set()
1009 self.lazyvalues = lazyvalues
1061 self.lazyvalues = lazyvalues
1010
1062
1011 def __contains__(self, value):
1063 def __contains__(self, value):
1012 return value in self.addedvalues or value in self.lazyvalues
1064 return value in self.addedvalues or value in self.lazyvalues
1013
1065
1014 def __iter__(self):
1066 def __iter__(self):
1015 added = self.addedvalues
1067 added = self.addedvalues
1016 for r in added:
1068 for r in added:
1017 yield r
1069 yield r
1018 for r in self.lazyvalues:
1070 for r in self.lazyvalues:
1019 if not r in added:
1071 if not r in added:
1020 yield r
1072 yield r
1021
1073
1022 def add(self, value):
1074 def add(self, value):
1023 self.addedvalues.add(value)
1075 self.addedvalues.add(value)
1024
1076
1025 def update(self, values):
1077 def update(self, values):
1026 self.addedvalues.update(values)
1078 self.addedvalues.update(values)
1027
1079
1028 has = lazyset(self.ancestors(common))
1080 has = lazyset(self.ancestors(common))
1029 has.add(nullrev)
1081 has.add(nullrev)
1030 has.update(common)
1082 has.update(common)
1031
1083
1032 # take all ancestors from heads that aren't in has
1084 # take all ancestors from heads that aren't in has
1033 missing = set()
1085 missing = set()
1034 visit = collections.deque(r for r in heads if r not in has)
1086 visit = collections.deque(r for r in heads if r not in has)
1035 while visit:
1087 while visit:
1036 r = visit.popleft()
1088 r = visit.popleft()
1037 if r in missing:
1089 if r in missing:
1038 continue
1090 continue
1039 else:
1091 else:
1040 missing.add(r)
1092 missing.add(r)
1041 for p in self.parentrevs(r):
1093 for p in self.parentrevs(r):
1042 if p not in has:
1094 if p not in has:
1043 visit.append(p)
1095 visit.append(p)
1044 missing = list(missing)
1096 missing = list(missing)
1045 missing.sort()
1097 missing.sort()
1046 return has, [self.node(miss) for miss in missing]
1098 return has, [self.node(miss) for miss in missing]
1047
1099
1048 def incrementalmissingrevs(self, common=None):
1100 def incrementalmissingrevs(self, common=None):
1049 """Return an object that can be used to incrementally compute the
1101 """Return an object that can be used to incrementally compute the
1050 revision numbers of the ancestors of arbitrary sets that are not
1102 revision numbers of the ancestors of arbitrary sets that are not
1051 ancestors of common. This is an ancestor.incrementalmissingancestors
1103 ancestors of common. This is an ancestor.incrementalmissingancestors
1052 object.
1104 object.
1053
1105
1054 'common' is a list of revision numbers. If common is not supplied, uses
1106 'common' is a list of revision numbers. If common is not supplied, uses
1055 nullrev.
1107 nullrev.
1056 """
1108 """
1057 if common is None:
1109 if common is None:
1058 common = [nullrev]
1110 common = [nullrev]
1059
1111
1060 if rustancestor is not None:
1112 if rustancestor is not None:
1061 return rustancestor.MissingAncestors(self.index, common)
1113 return rustancestor.MissingAncestors(self.index, common)
1062 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1114 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1063
1115
1064 def findmissingrevs(self, common=None, heads=None):
1116 def findmissingrevs(self, common=None, heads=None):
1065 """Return the revision numbers of the ancestors of heads that
1117 """Return the revision numbers of the ancestors of heads that
1066 are not ancestors of common.
1118 are not ancestors of common.
1067
1119
1068 More specifically, return a list of revision numbers corresponding to
1120 More specifically, return a list of revision numbers corresponding to
1069 nodes N such that every N satisfies the following constraints:
1121 nodes N such that every N satisfies the following constraints:
1070
1122
1071 1. N is an ancestor of some node in 'heads'
1123 1. N is an ancestor of some node in 'heads'
1072 2. N is not an ancestor of any node in 'common'
1124 2. N is not an ancestor of any node in 'common'
1073
1125
1074 The list is sorted by revision number, meaning it is
1126 The list is sorted by revision number, meaning it is
1075 topologically sorted.
1127 topologically sorted.
1076
1128
1077 'heads' and 'common' are both lists of revision numbers. If heads is
1129 'heads' and 'common' are both lists of revision numbers. If heads is
1078 not supplied, uses all of the revlog's heads. If common is not
1130 not supplied, uses all of the revlog's heads. If common is not
1079 supplied, uses nullid."""
1131 supplied, uses nullid."""
1080 if common is None:
1132 if common is None:
1081 common = [nullrev]
1133 common = [nullrev]
1082 if heads is None:
1134 if heads is None:
1083 heads = self.headrevs()
1135 heads = self.headrevs()
1084
1136
1085 inc = self.incrementalmissingrevs(common=common)
1137 inc = self.incrementalmissingrevs(common=common)
1086 return inc.missingancestors(heads)
1138 return inc.missingancestors(heads)
1087
1139
1088 def findmissing(self, common=None, heads=None):
1140 def findmissing(self, common=None, heads=None):
1089 """Return the ancestors of heads that are not ancestors of common.
1141 """Return the ancestors of heads that are not ancestors of common.
1090
1142
1091 More specifically, return a list of nodes N such that every N
1143 More specifically, return a list of nodes N such that every N
1092 satisfies the following constraints:
1144 satisfies the following constraints:
1093
1145
1094 1. N is an ancestor of some node in 'heads'
1146 1. N is an ancestor of some node in 'heads'
1095 2. N is not an ancestor of any node in 'common'
1147 2. N is not an ancestor of any node in 'common'
1096
1148
1097 The list is sorted by revision number, meaning it is
1149 The list is sorted by revision number, meaning it is
1098 topologically sorted.
1150 topologically sorted.
1099
1151
1100 'heads' and 'common' are both lists of node IDs. If heads is
1152 'heads' and 'common' are both lists of node IDs. If heads is
1101 not supplied, uses all of the revlog's heads. If common is not
1153 not supplied, uses all of the revlog's heads. If common is not
1102 supplied, uses nullid."""
1154 supplied, uses nullid."""
1103 if common is None:
1155 if common is None:
1104 common = [self.nullid]
1156 common = [self.nullid]
1105 if heads is None:
1157 if heads is None:
1106 heads = self.heads()
1158 heads = self.heads()
1107
1159
1108 common = [self.rev(n) for n in common]
1160 common = [self.rev(n) for n in common]
1109 heads = [self.rev(n) for n in heads]
1161 heads = [self.rev(n) for n in heads]
1110
1162
1111 inc = self.incrementalmissingrevs(common=common)
1163 inc = self.incrementalmissingrevs(common=common)
1112 return [self.node(r) for r in inc.missingancestors(heads)]
1164 return [self.node(r) for r in inc.missingancestors(heads)]
1113
1165
1114 def nodesbetween(self, roots=None, heads=None):
1166 def nodesbetween(self, roots=None, heads=None):
1115 """Return a topological path from 'roots' to 'heads'.
1167 """Return a topological path from 'roots' to 'heads'.
1116
1168
1117 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1169 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1118 topologically sorted list of all nodes N that satisfy both of
1170 topologically sorted list of all nodes N that satisfy both of
1119 these constraints:
1171 these constraints:
1120
1172
1121 1. N is a descendant of some node in 'roots'
1173 1. N is a descendant of some node in 'roots'
1122 2. N is an ancestor of some node in 'heads'
1174 2. N is an ancestor of some node in 'heads'
1123
1175
1124 Every node is considered to be both a descendant and an ancestor
1176 Every node is considered to be both a descendant and an ancestor
1125 of itself, so every reachable node in 'roots' and 'heads' will be
1177 of itself, so every reachable node in 'roots' and 'heads' will be
1126 included in 'nodes'.
1178 included in 'nodes'.
1127
1179
1128 'outroots' is the list of reachable nodes in 'roots', i.e., the
1180 'outroots' is the list of reachable nodes in 'roots', i.e., the
1129 subset of 'roots' that is returned in 'nodes'. Likewise,
1181 subset of 'roots' that is returned in 'nodes'. Likewise,
1130 'outheads' is the subset of 'heads' that is also in 'nodes'.
1182 'outheads' is the subset of 'heads' that is also in 'nodes'.
1131
1183
1132 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1184 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1133 unspecified, uses nullid as the only root. If 'heads' is
1185 unspecified, uses nullid as the only root. If 'heads' is
1134 unspecified, uses list of all of the revlog's heads."""
1186 unspecified, uses list of all of the revlog's heads."""
1135 nonodes = ([], [], [])
1187 nonodes = ([], [], [])
1136 if roots is not None:
1188 if roots is not None:
1137 roots = list(roots)
1189 roots = list(roots)
1138 if not roots:
1190 if not roots:
1139 return nonodes
1191 return nonodes
1140 lowestrev = min([self.rev(n) for n in roots])
1192 lowestrev = min([self.rev(n) for n in roots])
1141 else:
1193 else:
1142 roots = [self.nullid] # Everybody's a descendant of nullid
1194 roots = [self.nullid] # Everybody's a descendant of nullid
1143 lowestrev = nullrev
1195 lowestrev = nullrev
1144 if (lowestrev == nullrev) and (heads is None):
1196 if (lowestrev == nullrev) and (heads is None):
1145 # We want _all_ the nodes!
1197 # We want _all_ the nodes!
1146 return (
1198 return (
1147 [self.node(r) for r in self],
1199 [self.node(r) for r in self],
1148 [self.nullid],
1200 [self.nullid],
1149 list(self.heads()),
1201 list(self.heads()),
1150 )
1202 )
1151 if heads is None:
1203 if heads is None:
1152 # All nodes are ancestors, so the latest ancestor is the last
1204 # All nodes are ancestors, so the latest ancestor is the last
1153 # node.
1205 # node.
1154 highestrev = len(self) - 1
1206 highestrev = len(self) - 1
1155 # Set ancestors to None to signal that every node is an ancestor.
1207 # Set ancestors to None to signal that every node is an ancestor.
1156 ancestors = None
1208 ancestors = None
1157 # Set heads to an empty dictionary for later discovery of heads
1209 # Set heads to an empty dictionary for later discovery of heads
1158 heads = {}
1210 heads = {}
1159 else:
1211 else:
1160 heads = list(heads)
1212 heads = list(heads)
1161 if not heads:
1213 if not heads:
1162 return nonodes
1214 return nonodes
1163 ancestors = set()
1215 ancestors = set()
1164 # Turn heads into a dictionary so we can remove 'fake' heads.
1216 # Turn heads into a dictionary so we can remove 'fake' heads.
1165 # Also, later we will be using it to filter out the heads we can't
1217 # Also, later we will be using it to filter out the heads we can't
1166 # find from roots.
1218 # find from roots.
1167 heads = dict.fromkeys(heads, False)
1219 heads = dict.fromkeys(heads, False)
1168 # Start at the top and keep marking parents until we're done.
1220 # Start at the top and keep marking parents until we're done.
1169 nodestotag = set(heads)
1221 nodestotag = set(heads)
1170 # Remember where the top was so we can use it as a limit later.
1222 # Remember where the top was so we can use it as a limit later.
1171 highestrev = max([self.rev(n) for n in nodestotag])
1223 highestrev = max([self.rev(n) for n in nodestotag])
1172 while nodestotag:
1224 while nodestotag:
1173 # grab a node to tag
1225 # grab a node to tag
1174 n = nodestotag.pop()
1226 n = nodestotag.pop()
1175 # Never tag nullid
1227 # Never tag nullid
1176 if n == self.nullid:
1228 if n == self.nullid:
1177 continue
1229 continue
1178 # A node's revision number represents its place in a
1230 # A node's revision number represents its place in a
1179 # topologically sorted list of nodes.
1231 # topologically sorted list of nodes.
1180 r = self.rev(n)
1232 r = self.rev(n)
1181 if r >= lowestrev:
1233 if r >= lowestrev:
1182 if n not in ancestors:
1234 if n not in ancestors:
1183 # If we are possibly a descendant of one of the roots
1235 # If we are possibly a descendant of one of the roots
1184 # and we haven't already been marked as an ancestor
1236 # and we haven't already been marked as an ancestor
1185 ancestors.add(n) # Mark as ancestor
1237 ancestors.add(n) # Mark as ancestor
1186 # Add non-nullid parents to list of nodes to tag.
1238 # Add non-nullid parents to list of nodes to tag.
1187 nodestotag.update(
1239 nodestotag.update(
1188 [p for p in self.parents(n) if p != self.nullid]
1240 [p for p in self.parents(n) if p != self.nullid]
1189 )
1241 )
1190 elif n in heads: # We've seen it before, is it a fake head?
1242 elif n in heads: # We've seen it before, is it a fake head?
1191 # So it is, real heads should not be the ancestors of
1243 # So it is, real heads should not be the ancestors of
1192 # any other heads.
1244 # any other heads.
1193 heads.pop(n)
1245 heads.pop(n)
1194 if not ancestors:
1246 if not ancestors:
1195 return nonodes
1247 return nonodes
1196 # Now that we have our set of ancestors, we want to remove any
1248 # Now that we have our set of ancestors, we want to remove any
1197 # roots that are not ancestors.
1249 # roots that are not ancestors.
1198
1250
1199 # If one of the roots was nullid, everything is included anyway.
1251 # If one of the roots was nullid, everything is included anyway.
1200 if lowestrev > nullrev:
1252 if lowestrev > nullrev:
1201 # But, since we weren't, let's recompute the lowest rev to not
1253 # But, since we weren't, let's recompute the lowest rev to not
1202 # include roots that aren't ancestors.
1254 # include roots that aren't ancestors.
1203
1255
1204 # Filter out roots that aren't ancestors of heads
1256 # Filter out roots that aren't ancestors of heads
1205 roots = [root for root in roots if root in ancestors]
1257 roots = [root for root in roots if root in ancestors]
1206 # Recompute the lowest revision
1258 # Recompute the lowest revision
1207 if roots:
1259 if roots:
1208 lowestrev = min([self.rev(root) for root in roots])
1260 lowestrev = min([self.rev(root) for root in roots])
1209 else:
1261 else:
1210 # No more roots? Return empty list
1262 # No more roots? Return empty list
1211 return nonodes
1263 return nonodes
1212 else:
1264 else:
1213 # We are descending from nullid, and don't need to care about
1265 # We are descending from nullid, and don't need to care about
1214 # any other roots.
1266 # any other roots.
1215 lowestrev = nullrev
1267 lowestrev = nullrev
1216 roots = [self.nullid]
1268 roots = [self.nullid]
1217 # Transform our roots list into a set.
1269 # Transform our roots list into a set.
1218 descendants = set(roots)
1270 descendants = set(roots)
1219 # Also, keep the original roots so we can filter out roots that aren't
1271 # Also, keep the original roots so we can filter out roots that aren't
1220 # 'real' roots (i.e. are descended from other roots).
1272 # 'real' roots (i.e. are descended from other roots).
1221 roots = descendants.copy()
1273 roots = descendants.copy()
1222 # Our topologically sorted list of output nodes.
1274 # Our topologically sorted list of output nodes.
1223 orderedout = []
1275 orderedout = []
1224 # Don't start at nullid since we don't want nullid in our output list,
1276 # Don't start at nullid since we don't want nullid in our output list,
1225 # and if nullid shows up in descendants, empty parents will look like
1277 # and if nullid shows up in descendants, empty parents will look like
1226 # they're descendants.
1278 # they're descendants.
1227 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1279 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1228 n = self.node(r)
1280 n = self.node(r)
1229 isdescendant = False
1281 isdescendant = False
1230 if lowestrev == nullrev: # Everybody is a descendant of nullid
1282 if lowestrev == nullrev: # Everybody is a descendant of nullid
1231 isdescendant = True
1283 isdescendant = True
1232 elif n in descendants:
1284 elif n in descendants:
1233 # n is already a descendant
1285 # n is already a descendant
1234 isdescendant = True
1286 isdescendant = True
1235 # This check only needs to be done here because all the roots
1287 # This check only needs to be done here because all the roots
1236 # will start being marked is descendants before the loop.
1288 # will start being marked is descendants before the loop.
1237 if n in roots:
1289 if n in roots:
1238 # If n was a root, check if it's a 'real' root.
1290 # If n was a root, check if it's a 'real' root.
1239 p = tuple(self.parents(n))
1291 p = tuple(self.parents(n))
1240 # If any of its parents are descendants, it's not a root.
1292 # If any of its parents are descendants, it's not a root.
1241 if (p[0] in descendants) or (p[1] in descendants):
1293 if (p[0] in descendants) or (p[1] in descendants):
1242 roots.remove(n)
1294 roots.remove(n)
1243 else:
1295 else:
1244 p = tuple(self.parents(n))
1296 p = tuple(self.parents(n))
1245 # A node is a descendant if either of its parents are
1297 # A node is a descendant if either of its parents are
1246 # descendants. (We seeded the dependents list with the roots
1298 # descendants. (We seeded the dependents list with the roots
1247 # up there, remember?)
1299 # up there, remember?)
1248 if (p[0] in descendants) or (p[1] in descendants):
1300 if (p[0] in descendants) or (p[1] in descendants):
1249 descendants.add(n)
1301 descendants.add(n)
1250 isdescendant = True
1302 isdescendant = True
1251 if isdescendant and ((ancestors is None) or (n in ancestors)):
1303 if isdescendant and ((ancestors is None) or (n in ancestors)):
1252 # Only include nodes that are both descendants and ancestors.
1304 # Only include nodes that are both descendants and ancestors.
1253 orderedout.append(n)
1305 orderedout.append(n)
1254 if (ancestors is not None) and (n in heads):
1306 if (ancestors is not None) and (n in heads):
1255 # We're trying to figure out which heads are reachable
1307 # We're trying to figure out which heads are reachable
1256 # from roots.
1308 # from roots.
1257 # Mark this head as having been reached
1309 # Mark this head as having been reached
1258 heads[n] = True
1310 heads[n] = True
1259 elif ancestors is None:
1311 elif ancestors is None:
1260 # Otherwise, we're trying to discover the heads.
1312 # Otherwise, we're trying to discover the heads.
1261 # Assume this is a head because if it isn't, the next step
1313 # Assume this is a head because if it isn't, the next step
1262 # will eventually remove it.
1314 # will eventually remove it.
1263 heads[n] = True
1315 heads[n] = True
1264 # But, obviously its parents aren't.
1316 # But, obviously its parents aren't.
1265 for p in self.parents(n):
1317 for p in self.parents(n):
1266 heads.pop(p, None)
1318 heads.pop(p, None)
1267 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1319 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1268 roots = list(roots)
1320 roots = list(roots)
1269 assert orderedout
1321 assert orderedout
1270 assert roots
1322 assert roots
1271 assert heads
1323 assert heads
1272 return (orderedout, roots, heads)
1324 return (orderedout, roots, heads)
1273
1325
1274 def headrevs(self, revs=None):
1326 def headrevs(self, revs=None):
1275 if revs is None:
1327 if revs is None:
1276 try:
1328 try:
1277 return self.index.headrevs()
1329 return self.index.headrevs()
1278 except AttributeError:
1330 except AttributeError:
1279 return self._headrevs()
1331 return self._headrevs()
1280 if rustdagop is not None:
1332 if rustdagop is not None:
1281 return rustdagop.headrevs(self.index, revs)
1333 return rustdagop.headrevs(self.index, revs)
1282 return dagop.headrevs(revs, self._uncheckedparentrevs)
1334 return dagop.headrevs(revs, self._uncheckedparentrevs)
1283
1335
1284 def computephases(self, roots):
1336 def computephases(self, roots):
1285 return self.index.computephasesmapsets(roots)
1337 return self.index.computephasesmapsets(roots)
1286
1338
1287 def _headrevs(self):
1339 def _headrevs(self):
1288 count = len(self)
1340 count = len(self)
1289 if not count:
1341 if not count:
1290 return [nullrev]
1342 return [nullrev]
1291 # we won't iter over filtered rev so nobody is a head at start
1343 # we won't iter over filtered rev so nobody is a head at start
1292 ishead = [0] * (count + 1)
1344 ishead = [0] * (count + 1)
1293 index = self.index
1345 index = self.index
1294 for r in self:
1346 for r in self:
1295 ishead[r] = 1 # I may be an head
1347 ishead[r] = 1 # I may be an head
1296 e = index[r]
1348 e = index[r]
1297 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1349 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1298 return [r for r, val in enumerate(ishead) if val]
1350 return [r for r, val in enumerate(ishead) if val]
1299
1351
1300 def heads(self, start=None, stop=None):
1352 def heads(self, start=None, stop=None):
1301 """return the list of all nodes that have no children
1353 """return the list of all nodes that have no children
1302
1354
1303 if start is specified, only heads that are descendants of
1355 if start is specified, only heads that are descendants of
1304 start will be returned
1356 start will be returned
1305 if stop is specified, it will consider all the revs from stop
1357 if stop is specified, it will consider all the revs from stop
1306 as if they had no children
1358 as if they had no children
1307 """
1359 """
1308 if start is None and stop is None:
1360 if start is None and stop is None:
1309 if not len(self):
1361 if not len(self):
1310 return [self.nullid]
1362 return [self.nullid]
1311 return [self.node(r) for r in self.headrevs()]
1363 return [self.node(r) for r in self.headrevs()]
1312
1364
1313 if start is None:
1365 if start is None:
1314 start = nullrev
1366 start = nullrev
1315 else:
1367 else:
1316 start = self.rev(start)
1368 start = self.rev(start)
1317
1369
1318 stoprevs = {self.rev(n) for n in stop or []}
1370 stoprevs = {self.rev(n) for n in stop or []}
1319
1371
1320 revs = dagop.headrevssubset(
1372 revs = dagop.headrevssubset(
1321 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1373 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1322 )
1374 )
1323
1375
1324 return [self.node(rev) for rev in revs]
1376 return [self.node(rev) for rev in revs]
1325
1377
1326 def children(self, node):
1378 def children(self, node):
1327 """find the children of a given node"""
1379 """find the children of a given node"""
1328 c = []
1380 c = []
1329 p = self.rev(node)
1381 p = self.rev(node)
1330 for r in self.revs(start=p + 1):
1382 for r in self.revs(start=p + 1):
1331 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1383 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1332 if prevs:
1384 if prevs:
1333 for pr in prevs:
1385 for pr in prevs:
1334 if pr == p:
1386 if pr == p:
1335 c.append(self.node(r))
1387 c.append(self.node(r))
1336 elif p == nullrev:
1388 elif p == nullrev:
1337 c.append(self.node(r))
1389 c.append(self.node(r))
1338 return c
1390 return c
1339
1391
1340 def commonancestorsheads(self, a, b):
1392 def commonancestorsheads(self, a, b):
1341 """calculate all the heads of the common ancestors of nodes a and b"""
1393 """calculate all the heads of the common ancestors of nodes a and b"""
1342 a, b = self.rev(a), self.rev(b)
1394 a, b = self.rev(a), self.rev(b)
1343 ancs = self._commonancestorsheads(a, b)
1395 ancs = self._commonancestorsheads(a, b)
1344 return pycompat.maplist(self.node, ancs)
1396 return pycompat.maplist(self.node, ancs)
1345
1397
1346 def _commonancestorsheads(self, *revs):
1398 def _commonancestorsheads(self, *revs):
1347 """calculate all the heads of the common ancestors of revs"""
1399 """calculate all the heads of the common ancestors of revs"""
1348 try:
1400 try:
1349 ancs = self.index.commonancestorsheads(*revs)
1401 ancs = self.index.commonancestorsheads(*revs)
1350 except (AttributeError, OverflowError): # C implementation failed
1402 except (AttributeError, OverflowError): # C implementation failed
1351 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1403 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1352 return ancs
1404 return ancs
1353
1405
1354 def isancestor(self, a, b):
1406 def isancestor(self, a, b):
1355 """return True if node a is an ancestor of node b
1407 """return True if node a is an ancestor of node b
1356
1408
1357 A revision is considered an ancestor of itself."""
1409 A revision is considered an ancestor of itself."""
1358 a, b = self.rev(a), self.rev(b)
1410 a, b = self.rev(a), self.rev(b)
1359 return self.isancestorrev(a, b)
1411 return self.isancestorrev(a, b)
1360
1412
1361 def isancestorrev(self, a, b):
1413 def isancestorrev(self, a, b):
1362 """return True if revision a is an ancestor of revision b
1414 """return True if revision a is an ancestor of revision b
1363
1415
1364 A revision is considered an ancestor of itself.
1416 A revision is considered an ancestor of itself.
1365
1417
1366 The implementation of this is trivial but the use of
1418 The implementation of this is trivial but the use of
1367 reachableroots is not."""
1419 reachableroots is not."""
1368 if a == nullrev:
1420 if a == nullrev:
1369 return True
1421 return True
1370 elif a == b:
1422 elif a == b:
1371 return True
1423 return True
1372 elif a > b:
1424 elif a > b:
1373 return False
1425 return False
1374 return bool(self.reachableroots(a, [b], [a], includepath=False))
1426 return bool(self.reachableroots(a, [b], [a], includepath=False))
1375
1427
1376 def reachableroots(self, minroot, heads, roots, includepath=False):
1428 def reachableroots(self, minroot, heads, roots, includepath=False):
1377 """return (heads(::(<roots> and <roots>::<heads>)))
1429 """return (heads(::(<roots> and <roots>::<heads>)))
1378
1430
1379 If includepath is True, return (<roots>::<heads>)."""
1431 If includepath is True, return (<roots>::<heads>)."""
1380 try:
1432 try:
1381 return self.index.reachableroots2(
1433 return self.index.reachableroots2(
1382 minroot, heads, roots, includepath
1434 minroot, heads, roots, includepath
1383 )
1435 )
1384 except AttributeError:
1436 except AttributeError:
1385 return dagop._reachablerootspure(
1437 return dagop._reachablerootspure(
1386 self.parentrevs, minroot, roots, heads, includepath
1438 self.parentrevs, minroot, roots, heads, includepath
1387 )
1439 )
1388
1440
1389 def ancestor(self, a, b):
1441 def ancestor(self, a, b):
1390 """calculate the "best" common ancestor of nodes a and b"""
1442 """calculate the "best" common ancestor of nodes a and b"""
1391
1443
1392 a, b = self.rev(a), self.rev(b)
1444 a, b = self.rev(a), self.rev(b)
1393 try:
1445 try:
1394 ancs = self.index.ancestors(a, b)
1446 ancs = self.index.ancestors(a, b)
1395 except (AttributeError, OverflowError):
1447 except (AttributeError, OverflowError):
1396 ancs = ancestor.ancestors(self.parentrevs, a, b)
1448 ancs = ancestor.ancestors(self.parentrevs, a, b)
1397 if ancs:
1449 if ancs:
1398 # choose a consistent winner when there's a tie
1450 # choose a consistent winner when there's a tie
1399 return min(map(self.node, ancs))
1451 return min(map(self.node, ancs))
1400 return self.nullid
1452 return self.nullid
1401
1453
1402 def _match(self, id):
1454 def _match(self, id):
1403 if isinstance(id, int):
1455 if isinstance(id, int):
1404 # rev
1456 # rev
1405 return self.node(id)
1457 return self.node(id)
1406 if len(id) == self.nodeconstants.nodelen:
1458 if len(id) == self.nodeconstants.nodelen:
1407 # possibly a binary node
1459 # possibly a binary node
1408 # odds of a binary node being all hex in ASCII are 1 in 10**25
1460 # odds of a binary node being all hex in ASCII are 1 in 10**25
1409 try:
1461 try:
1410 node = id
1462 node = id
1411 self.rev(node) # quick search the index
1463 self.rev(node) # quick search the index
1412 return node
1464 return node
1413 except error.LookupError:
1465 except error.LookupError:
1414 pass # may be partial hex id
1466 pass # may be partial hex id
1415 try:
1467 try:
1416 # str(rev)
1468 # str(rev)
1417 rev = int(id)
1469 rev = int(id)
1418 if b"%d" % rev != id:
1470 if b"%d" % rev != id:
1419 raise ValueError
1471 raise ValueError
1420 if rev < 0:
1472 if rev < 0:
1421 rev = len(self) + rev
1473 rev = len(self) + rev
1422 if rev < 0 or rev >= len(self):
1474 if rev < 0 or rev >= len(self):
1423 raise ValueError
1475 raise ValueError
1424 return self.node(rev)
1476 return self.node(rev)
1425 except (ValueError, OverflowError):
1477 except (ValueError, OverflowError):
1426 pass
1478 pass
1427 if len(id) == 2 * self.nodeconstants.nodelen:
1479 if len(id) == 2 * self.nodeconstants.nodelen:
1428 try:
1480 try:
1429 # a full hex nodeid?
1481 # a full hex nodeid?
1430 node = bin(id)
1482 node = bin(id)
1431 self.rev(node)
1483 self.rev(node)
1432 return node
1484 return node
1433 except (TypeError, error.LookupError):
1485 except (TypeError, error.LookupError):
1434 pass
1486 pass
1435
1487
1436 def _partialmatch(self, id):
1488 def _partialmatch(self, id):
1437 # we don't care wdirfilenodeids as they should be always full hash
1489 # we don't care wdirfilenodeids as they should be always full hash
1438 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1490 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1439 try:
1491 try:
1440 partial = self.index.partialmatch(id)
1492 partial = self.index.partialmatch(id)
1441 if partial and self.hasnode(partial):
1493 if partial and self.hasnode(partial):
1442 if maybewdir:
1494 if maybewdir:
1443 # single 'ff...' match in radix tree, ambiguous with wdir
1495 # single 'ff...' match in radix tree, ambiguous with wdir
1444 raise error.RevlogError
1496 raise error.RevlogError
1445 return partial
1497 return partial
1446 if maybewdir:
1498 if maybewdir:
1447 # no 'ff...' match in radix tree, wdir identified
1499 # no 'ff...' match in radix tree, wdir identified
1448 raise error.WdirUnsupported
1500 raise error.WdirUnsupported
1449 return None
1501 return None
1450 except error.RevlogError:
1502 except error.RevlogError:
1451 # parsers.c radix tree lookup gave multiple matches
1503 # parsers.c radix tree lookup gave multiple matches
1452 # fast path: for unfiltered changelog, radix tree is accurate
1504 # fast path: for unfiltered changelog, radix tree is accurate
1453 if not getattr(self, 'filteredrevs', None):
1505 if not getattr(self, 'filteredrevs', None):
1454 raise error.AmbiguousPrefixLookupError(
1506 raise error.AmbiguousPrefixLookupError(
1455 id, self.display_id, _(b'ambiguous identifier')
1507 id, self.display_id, _(b'ambiguous identifier')
1456 )
1508 )
1457 # fall through to slow path that filters hidden revisions
1509 # fall through to slow path that filters hidden revisions
1458 except (AttributeError, ValueError):
1510 except (AttributeError, ValueError):
1459 # we are pure python, or key was too short to search radix tree
1511 # we are pure python, or key was too short to search radix tree
1460 pass
1512 pass
1461
1513
1462 if id in self._pcache:
1514 if id in self._pcache:
1463 return self._pcache[id]
1515 return self._pcache[id]
1464
1516
1465 if len(id) <= 40:
1517 if len(id) <= 40:
1466 try:
1518 try:
1467 # hex(node)[:...]
1519 # hex(node)[:...]
1468 l = len(id) // 2 # grab an even number of digits
1520 l = len(id) // 2 # grab an even number of digits
1469 prefix = bin(id[: l * 2])
1521 prefix = bin(id[: l * 2])
1470 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1522 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1471 nl = [
1523 nl = [
1472 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1524 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1473 ]
1525 ]
1474 if self.nodeconstants.nullhex.startswith(id):
1526 if self.nodeconstants.nullhex.startswith(id):
1475 nl.append(self.nullid)
1527 nl.append(self.nullid)
1476 if len(nl) > 0:
1528 if len(nl) > 0:
1477 if len(nl) == 1 and not maybewdir:
1529 if len(nl) == 1 and not maybewdir:
1478 self._pcache[id] = nl[0]
1530 self._pcache[id] = nl[0]
1479 return nl[0]
1531 return nl[0]
1480 raise error.AmbiguousPrefixLookupError(
1532 raise error.AmbiguousPrefixLookupError(
1481 id, self.display_id, _(b'ambiguous identifier')
1533 id, self.display_id, _(b'ambiguous identifier')
1482 )
1534 )
1483 if maybewdir:
1535 if maybewdir:
1484 raise error.WdirUnsupported
1536 raise error.WdirUnsupported
1485 return None
1537 return None
1486 except TypeError:
1538 except TypeError:
1487 pass
1539 pass
1488
1540
1489 def lookup(self, id):
1541 def lookup(self, id):
1490 """locate a node based on:
1542 """locate a node based on:
1491 - revision number or str(revision number)
1543 - revision number or str(revision number)
1492 - nodeid or subset of hex nodeid
1544 - nodeid or subset of hex nodeid
1493 """
1545 """
1494 n = self._match(id)
1546 n = self._match(id)
1495 if n is not None:
1547 if n is not None:
1496 return n
1548 return n
1497 n = self._partialmatch(id)
1549 n = self._partialmatch(id)
1498 if n:
1550 if n:
1499 return n
1551 return n
1500
1552
1501 raise error.LookupError(id, self.display_id, _(b'no match found'))
1553 raise error.LookupError(id, self.display_id, _(b'no match found'))
1502
1554
1503 def shortest(self, node, minlength=1):
1555 def shortest(self, node, minlength=1):
1504 """Find the shortest unambiguous prefix that matches node."""
1556 """Find the shortest unambiguous prefix that matches node."""
1505
1557
1506 def isvalid(prefix):
1558 def isvalid(prefix):
1507 try:
1559 try:
1508 matchednode = self._partialmatch(prefix)
1560 matchednode = self._partialmatch(prefix)
1509 except error.AmbiguousPrefixLookupError:
1561 except error.AmbiguousPrefixLookupError:
1510 return False
1562 return False
1511 except error.WdirUnsupported:
1563 except error.WdirUnsupported:
1512 # single 'ff...' match
1564 # single 'ff...' match
1513 return True
1565 return True
1514 if matchednode is None:
1566 if matchednode is None:
1515 raise error.LookupError(node, self.display_id, _(b'no node'))
1567 raise error.LookupError(node, self.display_id, _(b'no node'))
1516 return True
1568 return True
1517
1569
1518 def maybewdir(prefix):
1570 def maybewdir(prefix):
1519 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1571 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1520
1572
1521 hexnode = hex(node)
1573 hexnode = hex(node)
1522
1574
1523 def disambiguate(hexnode, minlength):
1575 def disambiguate(hexnode, minlength):
1524 """Disambiguate against wdirid."""
1576 """Disambiguate against wdirid."""
1525 for length in range(minlength, len(hexnode) + 1):
1577 for length in range(minlength, len(hexnode) + 1):
1526 prefix = hexnode[:length]
1578 prefix = hexnode[:length]
1527 if not maybewdir(prefix):
1579 if not maybewdir(prefix):
1528 return prefix
1580 return prefix
1529
1581
1530 if not getattr(self, 'filteredrevs', None):
1582 if not getattr(self, 'filteredrevs', None):
1531 try:
1583 try:
1532 length = max(self.index.shortest(node), minlength)
1584 length = max(self.index.shortest(node), minlength)
1533 return disambiguate(hexnode, length)
1585 return disambiguate(hexnode, length)
1534 except error.RevlogError:
1586 except error.RevlogError:
1535 if node != self.nodeconstants.wdirid:
1587 if node != self.nodeconstants.wdirid:
1536 raise error.LookupError(
1588 raise error.LookupError(
1537 node, self.display_id, _(b'no node')
1589 node, self.display_id, _(b'no node')
1538 )
1590 )
1539 except AttributeError:
1591 except AttributeError:
1540 # Fall through to pure code
1592 # Fall through to pure code
1541 pass
1593 pass
1542
1594
1543 if node == self.nodeconstants.wdirid:
1595 if node == self.nodeconstants.wdirid:
1544 for length in range(minlength, len(hexnode) + 1):
1596 for length in range(minlength, len(hexnode) + 1):
1545 prefix = hexnode[:length]
1597 prefix = hexnode[:length]
1546 if isvalid(prefix):
1598 if isvalid(prefix):
1547 return prefix
1599 return prefix
1548
1600
1549 for length in range(minlength, len(hexnode) + 1):
1601 for length in range(minlength, len(hexnode) + 1):
1550 prefix = hexnode[:length]
1602 prefix = hexnode[:length]
1551 if isvalid(prefix):
1603 if isvalid(prefix):
1552 return disambiguate(hexnode, length)
1604 return disambiguate(hexnode, length)
1553
1605
1554 def cmp(self, node, text):
1606 def cmp(self, node, text):
1555 """compare text with a given file revision
1607 """compare text with a given file revision
1556
1608
1557 returns True if text is different than what is stored.
1609 returns True if text is different than what is stored.
1558 """
1610 """
1559 p1, p2 = self.parents(node)
1611 p1, p2 = self.parents(node)
1560 return storageutil.hashrevisionsha1(text, p1, p2) != node
1612 return storageutil.hashrevisionsha1(text, p1, p2) != node
1561
1613
1562 def _cachesegment(self, offset, data):
1614 def _cachesegment(self, offset, data):
1563 """Add a segment to the revlog cache.
1615 """Add a segment to the revlog cache.
1564
1616
1565 Accepts an absolute offset and the data that is at that location.
1617 Accepts an absolute offset and the data that is at that location.
1566 """
1618 """
1567 o, d = self._chunkcache
1619 o, d = self._chunkcache
1568 # try to add to existing cache
1620 # try to add to existing cache
1569 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1621 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1570 self._chunkcache = o, d + data
1622 self._chunkcache = o, d + data
1571 else:
1623 else:
1572 self._chunkcache = offset, data
1624 self._chunkcache = offset, data
1573
1625
1574 def _readsegment(self, offset, length, df=None):
1626 def _readsegment(self, offset, length, df=None):
1575 """Load a segment of raw data from the revlog.
1627 """Load a segment of raw data from the revlog.
1576
1628
1577 Accepts an absolute offset, length to read, and an optional existing
1629 Accepts an absolute offset, length to read, and an optional existing
1578 file handle to read from.
1630 file handle to read from.
1579
1631
1580 If an existing file handle is passed, it will be seeked and the
1632 If an existing file handle is passed, it will be seeked and the
1581 original seek position will NOT be restored.
1633 original seek position will NOT be restored.
1582
1634
1583 Returns a str or buffer of raw byte data.
1635 Returns a str or buffer of raw byte data.
1584
1636
1585 Raises if the requested number of bytes could not be read.
1637 Raises if the requested number of bytes could not be read.
1586 """
1638 """
1587 # Cache data both forward and backward around the requested
1639 # Cache data both forward and backward around the requested
1588 # data, in a fixed size window. This helps speed up operations
1640 # data, in a fixed size window. This helps speed up operations
1589 # involving reading the revlog backwards.
1641 # involving reading the revlog backwards.
1590 cachesize = self._chunkcachesize
1642 cachesize = self._chunkcachesize
1591 realoffset = offset & ~(cachesize - 1)
1643 realoffset = offset & ~(cachesize - 1)
1592 reallength = (
1644 reallength = (
1593 (offset + length + cachesize) & ~(cachesize - 1)
1645 (offset + length + cachesize) & ~(cachesize - 1)
1594 ) - realoffset
1646 ) - realoffset
1595 with self._datareadfp(df) as df:
1647 with self._datareadfp(df) as df:
1596 df.seek(realoffset)
1648 df.seek(realoffset)
1597 d = df.read(reallength)
1649 d = df.read(reallength)
1598
1650
1599 self._cachesegment(realoffset, d)
1651 self._cachesegment(realoffset, d)
1600 if offset != realoffset or reallength != length:
1652 if offset != realoffset or reallength != length:
1601 startoffset = offset - realoffset
1653 startoffset = offset - realoffset
1602 if len(d) - startoffset < length:
1654 if len(d) - startoffset < length:
1603 raise error.RevlogError(
1655 raise error.RevlogError(
1604 _(
1656 _(
1605 b'partial read of revlog %s; expected %d bytes from '
1657 b'partial read of revlog %s; expected %d bytes from '
1606 b'offset %d, got %d'
1658 b'offset %d, got %d'
1607 )
1659 )
1608 % (
1660 % (
1609 self._indexfile if self._inline else self._datafile,
1661 self._indexfile if self._inline else self._datafile,
1610 length,
1662 length,
1611 offset,
1663 offset,
1612 len(d) - startoffset,
1664 len(d) - startoffset,
1613 )
1665 )
1614 )
1666 )
1615
1667
1616 return util.buffer(d, startoffset, length)
1668 return util.buffer(d, startoffset, length)
1617
1669
1618 if len(d) < length:
1670 if len(d) < length:
1619 raise error.RevlogError(
1671 raise error.RevlogError(
1620 _(
1672 _(
1621 b'partial read of revlog %s; expected %d bytes from offset '
1673 b'partial read of revlog %s; expected %d bytes from offset '
1622 b'%d, got %d'
1674 b'%d, got %d'
1623 )
1675 )
1624 % (
1676 % (
1625 self._indexfile if self._inline else self._datafile,
1677 self._indexfile if self._inline else self._datafile,
1626 length,
1678 length,
1627 offset,
1679 offset,
1628 len(d),
1680 len(d),
1629 )
1681 )
1630 )
1682 )
1631
1683
1632 return d
1684 return d
1633
1685
1634 def _getsegment(self, offset, length, df=None):
1686 def _getsegment(self, offset, length, df=None):
1635 """Obtain a segment of raw data from the revlog.
1687 """Obtain a segment of raw data from the revlog.
1636
1688
1637 Accepts an absolute offset, length of bytes to obtain, and an
1689 Accepts an absolute offset, length of bytes to obtain, and an
1638 optional file handle to the already-opened revlog. If the file
1690 optional file handle to the already-opened revlog. If the file
1639 handle is used, it's original seek position will not be preserved.
1691 handle is used, it's original seek position will not be preserved.
1640
1692
1641 Requests for data may be returned from a cache.
1693 Requests for data may be returned from a cache.
1642
1694
1643 Returns a str or a buffer instance of raw byte data.
1695 Returns a str or a buffer instance of raw byte data.
1644 """
1696 """
1645 o, d = self._chunkcache
1697 o, d = self._chunkcache
1646 l = len(d)
1698 l = len(d)
1647
1699
1648 # is it in the cache?
1700 # is it in the cache?
1649 cachestart = offset - o
1701 cachestart = offset - o
1650 cacheend = cachestart + length
1702 cacheend = cachestart + length
1651 if cachestart >= 0 and cacheend <= l:
1703 if cachestart >= 0 and cacheend <= l:
1652 if cachestart == 0 and cacheend == l:
1704 if cachestart == 0 and cacheend == l:
1653 return d # avoid a copy
1705 return d # avoid a copy
1654 return util.buffer(d, cachestart, cacheend - cachestart)
1706 return util.buffer(d, cachestart, cacheend - cachestart)
1655
1707
1656 return self._readsegment(offset, length, df=df)
1708 return self._readsegment(offset, length, df=df)
1657
1709
1658 def _getsegmentforrevs(self, startrev, endrev, df=None):
1710 def _getsegmentforrevs(self, startrev, endrev, df=None):
1659 """Obtain a segment of raw data corresponding to a range of revisions.
1711 """Obtain a segment of raw data corresponding to a range of revisions.
1660
1712
1661 Accepts the start and end revisions and an optional already-open
1713 Accepts the start and end revisions and an optional already-open
1662 file handle to be used for reading. If the file handle is read, its
1714 file handle to be used for reading. If the file handle is read, its
1663 seek position will not be preserved.
1715 seek position will not be preserved.
1664
1716
1665 Requests for data may be satisfied by a cache.
1717 Requests for data may be satisfied by a cache.
1666
1718
1667 Returns a 2-tuple of (offset, data) for the requested range of
1719 Returns a 2-tuple of (offset, data) for the requested range of
1668 revisions. Offset is the integer offset from the beginning of the
1720 revisions. Offset is the integer offset from the beginning of the
1669 revlog and data is a str or buffer of the raw byte data.
1721 revlog and data is a str or buffer of the raw byte data.
1670
1722
1671 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1723 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1672 to determine where each revision's data begins and ends.
1724 to determine where each revision's data begins and ends.
1673 """
1725 """
1674 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1726 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1675 # (functions are expensive).
1727 # (functions are expensive).
1676 index = self.index
1728 index = self.index
1677 istart = index[startrev]
1729 istart = index[startrev]
1678 start = int(istart[0] >> 16)
1730 start = int(istart[0] >> 16)
1679 if startrev == endrev:
1731 if startrev == endrev:
1680 end = start + istart[1]
1732 end = start + istart[1]
1681 else:
1733 else:
1682 iend = index[endrev]
1734 iend = index[endrev]
1683 end = int(iend[0] >> 16) + iend[1]
1735 end = int(iend[0] >> 16) + iend[1]
1684
1736
1685 if self._inline:
1737 if self._inline:
1686 start += (startrev + 1) * self.index.entry_size
1738 start += (startrev + 1) * self.index.entry_size
1687 end += (endrev + 1) * self.index.entry_size
1739 end += (endrev + 1) * self.index.entry_size
1688 length = end - start
1740 length = end - start
1689
1741
1690 return start, self._getsegment(start, length, df=df)
1742 return start, self._getsegment(start, length, df=df)
1691
1743
1692 def _chunk(self, rev, df=None):
1744 def _chunk(self, rev, df=None):
1693 """Obtain a single decompressed chunk for a revision.
1745 """Obtain a single decompressed chunk for a revision.
1694
1746
1695 Accepts an integer revision and an optional already-open file handle
1747 Accepts an integer revision and an optional already-open file handle
1696 to be used for reading. If used, the seek position of the file will not
1748 to be used for reading. If used, the seek position of the file will not
1697 be preserved.
1749 be preserved.
1698
1750
1699 Returns a str holding uncompressed data for the requested revision.
1751 Returns a str holding uncompressed data for the requested revision.
1700 """
1752 """
1701 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1753 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1702
1754
1703 def _chunks(self, revs, df=None, targetsize=None):
1755 def _chunks(self, revs, df=None, targetsize=None):
1704 """Obtain decompressed chunks for the specified revisions.
1756 """Obtain decompressed chunks for the specified revisions.
1705
1757
1706 Accepts an iterable of numeric revisions that are assumed to be in
1758 Accepts an iterable of numeric revisions that are assumed to be in
1707 ascending order. Also accepts an optional already-open file handle
1759 ascending order. Also accepts an optional already-open file handle
1708 to be used for reading. If used, the seek position of the file will
1760 to be used for reading. If used, the seek position of the file will
1709 not be preserved.
1761 not be preserved.
1710
1762
1711 This function is similar to calling ``self._chunk()`` multiple times,
1763 This function is similar to calling ``self._chunk()`` multiple times,
1712 but is faster.
1764 but is faster.
1713
1765
1714 Returns a list with decompressed data for each requested revision.
1766 Returns a list with decompressed data for each requested revision.
1715 """
1767 """
1716 if not revs:
1768 if not revs:
1717 return []
1769 return []
1718 start = self.start
1770 start = self.start
1719 length = self.length
1771 length = self.length
1720 inline = self._inline
1772 inline = self._inline
1721 iosize = self.index.entry_size
1773 iosize = self.index.entry_size
1722 buffer = util.buffer
1774 buffer = util.buffer
1723
1775
1724 l = []
1776 l = []
1725 ladd = l.append
1777 ladd = l.append
1726
1778
1727 if not self._withsparseread:
1779 if not self._withsparseread:
1728 slicedchunks = (revs,)
1780 slicedchunks = (revs,)
1729 else:
1781 else:
1730 slicedchunks = deltautil.slicechunk(
1782 slicedchunks = deltautil.slicechunk(
1731 self, revs, targetsize=targetsize
1783 self, revs, targetsize=targetsize
1732 )
1784 )
1733
1785
1734 for revschunk in slicedchunks:
1786 for revschunk in slicedchunks:
1735 firstrev = revschunk[0]
1787 firstrev = revschunk[0]
1736 # Skip trailing revisions with empty diff
1788 # Skip trailing revisions with empty diff
1737 for lastrev in revschunk[::-1]:
1789 for lastrev in revschunk[::-1]:
1738 if length(lastrev) != 0:
1790 if length(lastrev) != 0:
1739 break
1791 break
1740
1792
1741 try:
1793 try:
1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1794 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1743 except OverflowError:
1795 except OverflowError:
1744 # issue4215 - we can't cache a run of chunks greater than
1796 # issue4215 - we can't cache a run of chunks greater than
1745 # 2G on Windows
1797 # 2G on Windows
1746 return [self._chunk(rev, df=df) for rev in revschunk]
1798 return [self._chunk(rev, df=df) for rev in revschunk]
1747
1799
1748 decomp = self.decompress
1800 decomp = self.decompress
1749 for rev in revschunk:
1801 for rev in revschunk:
1750 chunkstart = start(rev)
1802 chunkstart = start(rev)
1751 if inline:
1803 if inline:
1752 chunkstart += (rev + 1) * iosize
1804 chunkstart += (rev + 1) * iosize
1753 chunklength = length(rev)
1805 chunklength = length(rev)
1754 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1806 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1755
1807
1756 return l
1808 return l
1757
1809
1758 def _chunkclear(self):
1810 def _chunkclear(self):
1759 """Clear the raw chunk cache."""
1811 """Clear the raw chunk cache."""
1760 self._chunkcache = (0, b'')
1812 self._chunkcache = (0, b'')
1761
1813
1762 def deltaparent(self, rev):
1814 def deltaparent(self, rev):
1763 """return deltaparent of the given revision"""
1815 """return deltaparent of the given revision"""
1764 base = self.index[rev][3]
1816 base = self.index[rev][3]
1765 if base == rev:
1817 if base == rev:
1766 return nullrev
1818 return nullrev
1767 elif self._generaldelta:
1819 elif self._generaldelta:
1768 return base
1820 return base
1769 else:
1821 else:
1770 return rev - 1
1822 return rev - 1
1771
1823
1772 def issnapshot(self, rev):
1824 def issnapshot(self, rev):
1773 """tells whether rev is a snapshot"""
1825 """tells whether rev is a snapshot"""
1774 if not self._sparserevlog:
1826 if not self._sparserevlog:
1775 return self.deltaparent(rev) == nullrev
1827 return self.deltaparent(rev) == nullrev
1776 elif util.safehasattr(self.index, b'issnapshot'):
1828 elif util.safehasattr(self.index, b'issnapshot'):
1777 # directly assign the method to cache the testing and access
1829 # directly assign the method to cache the testing and access
1778 self.issnapshot = self.index.issnapshot
1830 self.issnapshot = self.index.issnapshot
1779 return self.issnapshot(rev)
1831 return self.issnapshot(rev)
1780 if rev == nullrev:
1832 if rev == nullrev:
1781 return True
1833 return True
1782 entry = self.index[rev]
1834 entry = self.index[rev]
1783 base = entry[3]
1835 base = entry[3]
1784 if base == rev:
1836 if base == rev:
1785 return True
1837 return True
1786 if base == nullrev:
1838 if base == nullrev:
1787 return True
1839 return True
1788 p1 = entry[5]
1840 p1 = entry[5]
1789 p2 = entry[6]
1841 p2 = entry[6]
1790 if base == p1 or base == p2:
1842 if base == p1 or base == p2:
1791 return False
1843 return False
1792 return self.issnapshot(base)
1844 return self.issnapshot(base)
1793
1845
1794 def snapshotdepth(self, rev):
1846 def snapshotdepth(self, rev):
1795 """number of snapshot in the chain before this one"""
1847 """number of snapshot in the chain before this one"""
1796 if not self.issnapshot(rev):
1848 if not self.issnapshot(rev):
1797 raise error.ProgrammingError(b'revision %d not a snapshot')
1849 raise error.ProgrammingError(b'revision %d not a snapshot')
1798 return len(self._deltachain(rev)[0]) - 1
1850 return len(self._deltachain(rev)[0]) - 1
1799
1851
1800 def revdiff(self, rev1, rev2):
1852 def revdiff(self, rev1, rev2):
1801 """return or calculate a delta between two revisions
1853 """return or calculate a delta between two revisions
1802
1854
1803 The delta calculated is in binary form and is intended to be written to
1855 The delta calculated is in binary form and is intended to be written to
1804 revlog data directly. So this function needs raw revision data.
1856 revlog data directly. So this function needs raw revision data.
1805 """
1857 """
1806 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1858 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1807 return bytes(self._chunk(rev2))
1859 return bytes(self._chunk(rev2))
1808
1860
1809 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1861 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1810
1862
1811 def _processflags(self, text, flags, operation, raw=False):
1863 def _processflags(self, text, flags, operation, raw=False):
1812 """deprecated entry point to access flag processors"""
1864 """deprecated entry point to access flag processors"""
1813 msg = b'_processflag(...) use the specialized variant'
1865 msg = b'_processflag(...) use the specialized variant'
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1866 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 if raw:
1867 if raw:
1816 return text, flagutil.processflagsraw(self, text, flags)
1868 return text, flagutil.processflagsraw(self, text, flags)
1817 elif operation == b'read':
1869 elif operation == b'read':
1818 return flagutil.processflagsread(self, text, flags)
1870 return flagutil.processflagsread(self, text, flags)
1819 else: # write operation
1871 else: # write operation
1820 return flagutil.processflagswrite(self, text, flags)
1872 return flagutil.processflagswrite(self, text, flags)
1821
1873
1822 def revision(self, nodeorrev, _df=None, raw=False):
1874 def revision(self, nodeorrev, _df=None, raw=False):
1823 """return an uncompressed revision of a given node or revision
1875 """return an uncompressed revision of a given node or revision
1824 number.
1876 number.
1825
1877
1826 _df - an existing file handle to read from. (internal-only)
1878 _df - an existing file handle to read from. (internal-only)
1827 raw - an optional argument specifying if the revision data is to be
1879 raw - an optional argument specifying if the revision data is to be
1828 treated as raw data when applying flag transforms. 'raw' should be set
1880 treated as raw data when applying flag transforms. 'raw' should be set
1829 to True when generating changegroups or in debug commands.
1881 to True when generating changegroups or in debug commands.
1830 """
1882 """
1831 if raw:
1883 if raw:
1832 msg = (
1884 msg = (
1833 b'revlog.revision(..., raw=True) is deprecated, '
1885 b'revlog.revision(..., raw=True) is deprecated, '
1834 b'use revlog.rawdata(...)'
1886 b'use revlog.rawdata(...)'
1835 )
1887 )
1836 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1888 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1837 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1889 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1838
1890
1839 def sidedata(self, nodeorrev, _df=None):
1891 def sidedata(self, nodeorrev, _df=None):
1840 """a map of extra data related to the changeset but not part of the hash
1892 """a map of extra data related to the changeset but not part of the hash
1841
1893
1842 This function currently return a dictionary. However, more advanced
1894 This function currently return a dictionary. However, more advanced
1843 mapping object will likely be used in the future for a more
1895 mapping object will likely be used in the future for a more
1844 efficient/lazy code.
1896 efficient/lazy code.
1845 """
1897 """
1846 return self._revisiondata(nodeorrev, _df)[1]
1898 return self._revisiondata(nodeorrev, _df)[1]
1847
1899
1848 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1900 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1849 # deal with <nodeorrev> argument type
1901 # deal with <nodeorrev> argument type
1850 if isinstance(nodeorrev, int):
1902 if isinstance(nodeorrev, int):
1851 rev = nodeorrev
1903 rev = nodeorrev
1852 node = self.node(rev)
1904 node = self.node(rev)
1853 else:
1905 else:
1854 node = nodeorrev
1906 node = nodeorrev
1855 rev = None
1907 rev = None
1856
1908
1857 # fast path the special `nullid` rev
1909 # fast path the special `nullid` rev
1858 if node == self.nullid:
1910 if node == self.nullid:
1859 return b"", {}
1911 return b"", {}
1860
1912
1861 # ``rawtext`` is the text as stored inside the revlog. Might be the
1913 # ``rawtext`` is the text as stored inside the revlog. Might be the
1862 # revision or might need to be processed to retrieve the revision.
1914 # revision or might need to be processed to retrieve the revision.
1863 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1915 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1864
1916
1865 if self.hassidedata:
1917 if self.hassidedata:
1866 if rev is None:
1918 if rev is None:
1867 rev = self.rev(node)
1919 rev = self.rev(node)
1868 sidedata = self._sidedata(rev)
1920 sidedata = self._sidedata(rev)
1869 else:
1921 else:
1870 sidedata = {}
1922 sidedata = {}
1871
1923
1872 if raw and validated:
1924 if raw and validated:
1873 # if we don't want to process the raw text and that raw
1925 # if we don't want to process the raw text and that raw
1874 # text is cached, we can exit early.
1926 # text is cached, we can exit early.
1875 return rawtext, sidedata
1927 return rawtext, sidedata
1876 if rev is None:
1928 if rev is None:
1877 rev = self.rev(node)
1929 rev = self.rev(node)
1878 # the revlog's flag for this revision
1930 # the revlog's flag for this revision
1879 # (usually alter its state or content)
1931 # (usually alter its state or content)
1880 flags = self.flags(rev)
1932 flags = self.flags(rev)
1881
1933
1882 if validated and flags == REVIDX_DEFAULT_FLAGS:
1934 if validated and flags == REVIDX_DEFAULT_FLAGS:
1883 # no extra flags set, no flag processor runs, text = rawtext
1935 # no extra flags set, no flag processor runs, text = rawtext
1884 return rawtext, sidedata
1936 return rawtext, sidedata
1885
1937
1886 if raw:
1938 if raw:
1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1939 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 text = rawtext
1940 text = rawtext
1889 else:
1941 else:
1890 r = flagutil.processflagsread(self, rawtext, flags)
1942 r = flagutil.processflagsread(self, rawtext, flags)
1891 text, validatehash = r
1943 text, validatehash = r
1892 if validatehash:
1944 if validatehash:
1893 self.checkhash(text, node, rev=rev)
1945 self.checkhash(text, node, rev=rev)
1894 if not validated:
1946 if not validated:
1895 self._revisioncache = (node, rev, rawtext)
1947 self._revisioncache = (node, rev, rawtext)
1896
1948
1897 return text, sidedata
1949 return text, sidedata
1898
1950
1899 def _rawtext(self, node, rev, _df=None):
1951 def _rawtext(self, node, rev, _df=None):
1900 """return the possibly unvalidated rawtext for a revision
1952 """return the possibly unvalidated rawtext for a revision
1901
1953
1902 returns (rev, rawtext, validated)
1954 returns (rev, rawtext, validated)
1903 """
1955 """
1904
1956
1905 # revision in the cache (could be useful to apply delta)
1957 # revision in the cache (could be useful to apply delta)
1906 cachedrev = None
1958 cachedrev = None
1907 # An intermediate text to apply deltas to
1959 # An intermediate text to apply deltas to
1908 basetext = None
1960 basetext = None
1909
1961
1910 # Check if we have the entry in cache
1962 # Check if we have the entry in cache
1911 # The cache entry looks like (node, rev, rawtext)
1963 # The cache entry looks like (node, rev, rawtext)
1912 if self._revisioncache:
1964 if self._revisioncache:
1913 if self._revisioncache[0] == node:
1965 if self._revisioncache[0] == node:
1914 return (rev, self._revisioncache[2], True)
1966 return (rev, self._revisioncache[2], True)
1915 cachedrev = self._revisioncache[1]
1967 cachedrev = self._revisioncache[1]
1916
1968
1917 if rev is None:
1969 if rev is None:
1918 rev = self.rev(node)
1970 rev = self.rev(node)
1919
1971
1920 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1972 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1921 if stopped:
1973 if stopped:
1922 basetext = self._revisioncache[2]
1974 basetext = self._revisioncache[2]
1923
1975
1924 # drop cache to save memory, the caller is expected to
1976 # drop cache to save memory, the caller is expected to
1925 # update self._revisioncache after validating the text
1977 # update self._revisioncache after validating the text
1926 self._revisioncache = None
1978 self._revisioncache = None
1927
1979
1928 targetsize = None
1980 targetsize = None
1929 rawsize = self.index[rev][2]
1981 rawsize = self.index[rev][2]
1930 if 0 <= rawsize:
1982 if 0 <= rawsize:
1931 targetsize = 4 * rawsize
1983 targetsize = 4 * rawsize
1932
1984
1933 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1985 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1934 if basetext is None:
1986 if basetext is None:
1935 basetext = bytes(bins[0])
1987 basetext = bytes(bins[0])
1936 bins = bins[1:]
1988 bins = bins[1:]
1937
1989
1938 rawtext = mdiff.patches(basetext, bins)
1990 rawtext = mdiff.patches(basetext, bins)
1939 del basetext # let us have a chance to free memory early
1991 del basetext # let us have a chance to free memory early
1940 return (rev, rawtext, False)
1992 return (rev, rawtext, False)
1941
1993
1942 def _sidedata(self, rev):
1994 def _sidedata(self, rev):
1943 """Return the sidedata for a given revision number."""
1995 """Return the sidedata for a given revision number."""
1944 index_entry = self.index[rev]
1996 index_entry = self.index[rev]
1945 sidedata_offset = index_entry[8]
1997 sidedata_offset = index_entry[8]
1946 sidedata_size = index_entry[9]
1998 sidedata_size = index_entry[9]
1947
1999
1948 if self._inline:
2000 if self._inline:
1949 sidedata_offset += self.index.entry_size * (1 + rev)
2001 sidedata_offset += self.index.entry_size * (1 + rev)
1950 if sidedata_size == 0:
2002 if sidedata_size == 0:
1951 return {}
2003 return {}
1952
2004
1953 segment = self._getsegment(sidedata_offset, sidedata_size)
2005 segment = self._getsegment(sidedata_offset, sidedata_size)
1954 sidedata = sidedatautil.deserialize_sidedata(segment)
2006 sidedata = sidedatautil.deserialize_sidedata(segment)
1955 return sidedata
2007 return sidedata
1956
2008
1957 def rawdata(self, nodeorrev, _df=None):
2009 def rawdata(self, nodeorrev, _df=None):
1958 """return an uncompressed raw data of a given node or revision number.
2010 """return an uncompressed raw data of a given node or revision number.
1959
2011
1960 _df - an existing file handle to read from. (internal-only)
2012 _df - an existing file handle to read from. (internal-only)
1961 """
2013 """
1962 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2014 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1963
2015
1964 def hash(self, text, p1, p2):
2016 def hash(self, text, p1, p2):
1965 """Compute a node hash.
2017 """Compute a node hash.
1966
2018
1967 Available as a function so that subclasses can replace the hash
2019 Available as a function so that subclasses can replace the hash
1968 as needed.
2020 as needed.
1969 """
2021 """
1970 return storageutil.hashrevisionsha1(text, p1, p2)
2022 return storageutil.hashrevisionsha1(text, p1, p2)
1971
2023
1972 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2024 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1973 """Check node hash integrity.
2025 """Check node hash integrity.
1974
2026
1975 Available as a function so that subclasses can extend hash mismatch
2027 Available as a function so that subclasses can extend hash mismatch
1976 behaviors as needed.
2028 behaviors as needed.
1977 """
2029 """
1978 try:
2030 try:
1979 if p1 is None and p2 is None:
2031 if p1 is None and p2 is None:
1980 p1, p2 = self.parents(node)
2032 p1, p2 = self.parents(node)
1981 if node != self.hash(text, p1, p2):
2033 if node != self.hash(text, p1, p2):
1982 # Clear the revision cache on hash failure. The revision cache
2034 # Clear the revision cache on hash failure. The revision cache
1983 # only stores the raw revision and clearing the cache does have
2035 # only stores the raw revision and clearing the cache does have
1984 # the side-effect that we won't have a cache hit when the raw
2036 # the side-effect that we won't have a cache hit when the raw
1985 # revision data is accessed. But this case should be rare and
2037 # revision data is accessed. But this case should be rare and
1986 # it is extra work to teach the cache about the hash
2038 # it is extra work to teach the cache about the hash
1987 # verification state.
2039 # verification state.
1988 if self._revisioncache and self._revisioncache[0] == node:
2040 if self._revisioncache and self._revisioncache[0] == node:
1989 self._revisioncache = None
2041 self._revisioncache = None
1990
2042
1991 revornode = rev
2043 revornode = rev
1992 if revornode is None:
2044 if revornode is None:
1993 revornode = templatefilters.short(hex(node))
2045 revornode = templatefilters.short(hex(node))
1994 raise error.RevlogError(
2046 raise error.RevlogError(
1995 _(b"integrity check failed on %s:%s")
2047 _(b"integrity check failed on %s:%s")
1996 % (self.display_id, pycompat.bytestr(revornode))
2048 % (self.display_id, pycompat.bytestr(revornode))
1997 )
2049 )
1998 except error.RevlogError:
2050 except error.RevlogError:
1999 if self._censorable and storageutil.iscensoredtext(text):
2051 if self._censorable and storageutil.iscensoredtext(text):
2000 raise error.CensoredNodeError(self.display_id, node, text)
2052 raise error.CensoredNodeError(self.display_id, node, text)
2001 raise
2053 raise
2002
2054
2003 def _enforceinlinesize(self, tr):
2055 def _enforceinlinesize(self, tr):
2004 """Check if the revlog is too big for inline and convert if so.
2056 """Check if the revlog is too big for inline and convert if so.
2005
2057
2006 This should be called after revisions are added to the revlog. If the
2058 This should be called after revisions are added to the revlog. If the
2007 revlog has grown too large to be an inline revlog, it will convert it
2059 revlog has grown too large to be an inline revlog, it will convert it
2008 to use multiple index and data files.
2060 to use multiple index and data files.
2009 """
2061 """
2010 tiprev = len(self) - 1
2062 tiprev = len(self) - 1
2011 total_size = self.start(tiprev) + self.length(tiprev)
2063 total_size = self.start(tiprev) + self.length(tiprev)
2012 if not self._inline or total_size < _maxinline:
2064 if not self._inline or total_size < _maxinline:
2013 return
2065 return
2014
2066
2015 troffset = tr.findoffset(self._indexfile)
2067 troffset = tr.findoffset(self._indexfile)
2016 if troffset is None:
2068 if troffset is None:
2017 raise error.RevlogError(
2069 raise error.RevlogError(
2018 _(b"%s not found in the transaction") % self._indexfile
2070 _(b"%s not found in the transaction") % self._indexfile
2019 )
2071 )
2020 trindex = 0
2072 trindex = 0
2021 tr.add(self._datafile, 0)
2073 tr.add(self._datafile, 0)
2022
2074
2023 existing_handles = False
2075 existing_handles = False
2024 if self._writinghandles is not None:
2076 if self._writinghandles is not None:
2025 existing_handles = True
2077 existing_handles = True
2026 fp = self._writinghandles[0]
2078 fp = self._writinghandles[0]
2027 fp.flush()
2079 fp.flush()
2028 fp.close()
2080 fp.close()
2029 # We can't use the cached file handle after close(). So prevent
2081 # We can't use the cached file handle after close(). So prevent
2030 # its usage.
2082 # its usage.
2031 self._writinghandles = None
2083 self._writinghandles = None
2032
2084
2033 new_dfh = self._datafp(b'w+')
2085 new_dfh = self._datafp(b'w+')
2034 new_dfh.truncate(0) # drop any potentially existing data
2086 new_dfh.truncate(0) # drop any potentially existing data
2035 try:
2087 try:
2036 with self._indexfp() as read_ifh:
2088 with self._indexfp() as read_ifh:
2037 for r in self:
2089 for r in self:
2038 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2090 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2039 if troffset <= self.start(r):
2091 if troffset <= self.start(r):
2040 trindex = r
2092 trindex = r
2041 new_dfh.flush()
2093 new_dfh.flush()
2042
2094
2043 with self.__index_new_fp() as fp:
2095 with self.__index_new_fp() as fp:
2044 self._format_flags &= ~FLAG_INLINE_DATA
2096 self._format_flags &= ~FLAG_INLINE_DATA
2045 self._inline = False
2097 self._inline = False
2046 for i in self:
2098 for i in self:
2047 e = self.index.entry_binary(i)
2099 e = self.index.entry_binary(i)
2048 if i == 0 and self._docket is None:
2100 if i == 0 and self._docket is None:
2049 header = self._format_flags | self._format_version
2101 header = self._format_flags | self._format_version
2050 header = self.index.pack_header(header)
2102 header = self.index.pack_header(header)
2051 e = header + e
2103 e = header + e
2052 fp.write(e)
2104 fp.write(e)
2053 if self._docket is not None:
2105 if self._docket is not None:
2054 self._docket.index_end = fp.tell()
2106 self._docket.index_end = fp.tell()
2055 # the temp file replace the real index when we exit the context
2107 # the temp file replace the real index when we exit the context
2056 # manager
2108 # manager
2057
2109
2058 tr.replace(self._indexfile, trindex * self.index.entry_size)
2110 tr.replace(self._indexfile, trindex * self.index.entry_size)
2059 nodemaputil.setup_persistent_nodemap(tr, self)
2111 nodemaputil.setup_persistent_nodemap(tr, self)
2060 self._chunkclear()
2112 self._chunkclear()
2061
2113
2062 if existing_handles:
2114 if existing_handles:
2063 # switched from inline to conventional reopen the index
2115 # switched from inline to conventional reopen the index
2064 ifh = self.__index_write_fp()
2116 ifh = self.__index_write_fp()
2065 self._writinghandles = (ifh, new_dfh)
2117 self._writinghandles = (ifh, new_dfh)
2066 new_dfh = None
2118 new_dfh = None
2067 finally:
2119 finally:
2068 if new_dfh is not None:
2120 if new_dfh is not None:
2069 new_dfh.close()
2121 new_dfh.close()
2070
2122
2071 def _nodeduplicatecallback(self, transaction, node):
2123 def _nodeduplicatecallback(self, transaction, node):
2072 """called when trying to add a node already stored."""
2124 """called when trying to add a node already stored."""
2073
2125
2074 @contextlib.contextmanager
2126 @contextlib.contextmanager
2075 def _writing(self, transaction):
2127 def _writing(self, transaction):
2076 if self._trypending:
2128 if self._trypending:
2077 msg = b'try to write in a `trypending` revlog: %s'
2129 msg = b'try to write in a `trypending` revlog: %s'
2078 msg %= self.display_id
2130 msg %= self.display_id
2079 raise error.ProgrammingError(msg)
2131 raise error.ProgrammingError(msg)
2080 if self._writinghandles is not None:
2132 if self._writinghandles is not None:
2081 yield
2133 yield
2082 else:
2134 else:
2083 r = len(self)
2135 r = len(self)
2084 dsize = 0
2136 dsize = 0
2085 if r:
2137 if r:
2086 dsize = self.end(r - 1)
2138 dsize = self.end(r - 1)
2087 dfh = None
2139 dfh = None
2088 if not self._inline:
2140 if not self._inline:
2089 try:
2141 try:
2090 dfh = self._datafp(b"r+")
2142 dfh = self._datafp(b"r+")
2091 if self._docket is None:
2143 if self._docket is None:
2092 dfh.seek(0, os.SEEK_END)
2144 dfh.seek(0, os.SEEK_END)
2093 else:
2145 else:
2094 dfh.seek(self._docket.data_end, os.SEEK_SET)
2146 dfh.seek(self._docket.data_end, os.SEEK_SET)
2095 except IOError as inst:
2147 except IOError as inst:
2096 if inst.errno != errno.ENOENT:
2148 if inst.errno != errno.ENOENT:
2097 raise
2149 raise
2098 dfh = self._datafp(b"w+")
2150 dfh = self._datafp(b"w+")
2099 transaction.add(self._datafile, dsize)
2151 transaction.add(self._datafile, dsize)
2100 try:
2152 try:
2101 isize = r * self.index.entry_size
2153 isize = r * self.index.entry_size
2102 ifh = self.__index_write_fp()
2154 ifh = self.__index_write_fp()
2103 if self._inline:
2155 if self._inline:
2104 transaction.add(self._indexfile, dsize + isize)
2156 transaction.add(self._indexfile, dsize + isize)
2105 else:
2157 else:
2106 transaction.add(self._indexfile, isize)
2158 transaction.add(self._indexfile, isize)
2107 try:
2159 try:
2108 self._writinghandles = (ifh, dfh)
2160 self._writinghandles = (ifh, dfh)
2109 try:
2161 try:
2110 yield
2162 yield
2111 if self._docket is not None:
2163 if self._docket is not None:
2112 self._write_docket(transaction)
2164 self._write_docket(transaction)
2113 finally:
2165 finally:
2114 self._writinghandles = None
2166 self._writinghandles = None
2115 finally:
2167 finally:
2116 ifh.close()
2168 ifh.close()
2117 finally:
2169 finally:
2118 if dfh is not None:
2170 if dfh is not None:
2119 dfh.close()
2171 dfh.close()
2120
2172
2121 def _write_docket(self, transaction):
2173 def _write_docket(self, transaction):
2122 """write the current docket on disk
2174 """write the current docket on disk
2123
2175
2124 Exist as a method to help changelog to implement transaction logic
2176 Exist as a method to help changelog to implement transaction logic
2125
2177
2126 We could also imagine using the same transaction logic for all revlog
2178 We could also imagine using the same transaction logic for all revlog
2127 since docket are cheap."""
2179 since docket are cheap."""
2128 self._docket.write(transaction)
2180 self._docket.write(transaction)
2129
2181
2130 def addrevision(
2182 def addrevision(
2131 self,
2183 self,
2132 text,
2184 text,
2133 transaction,
2185 transaction,
2134 link,
2186 link,
2135 p1,
2187 p1,
2136 p2,
2188 p2,
2137 cachedelta=None,
2189 cachedelta=None,
2138 node=None,
2190 node=None,
2139 flags=REVIDX_DEFAULT_FLAGS,
2191 flags=REVIDX_DEFAULT_FLAGS,
2140 deltacomputer=None,
2192 deltacomputer=None,
2141 sidedata=None,
2193 sidedata=None,
2142 ):
2194 ):
2143 """add a revision to the log
2195 """add a revision to the log
2144
2196
2145 text - the revision data to add
2197 text - the revision data to add
2146 transaction - the transaction object used for rollback
2198 transaction - the transaction object used for rollback
2147 link - the linkrev data to add
2199 link - the linkrev data to add
2148 p1, p2 - the parent nodeids of the revision
2200 p1, p2 - the parent nodeids of the revision
2149 cachedelta - an optional precomputed delta
2201 cachedelta - an optional precomputed delta
2150 node - nodeid of revision; typically node is not specified, and it is
2202 node - nodeid of revision; typically node is not specified, and it is
2151 computed by default as hash(text, p1, p2), however subclasses might
2203 computed by default as hash(text, p1, p2), however subclasses might
2152 use different hashing method (and override checkhash() in such case)
2204 use different hashing method (and override checkhash() in such case)
2153 flags - the known flags to set on the revision
2205 flags - the known flags to set on the revision
2154 deltacomputer - an optional deltacomputer instance shared between
2206 deltacomputer - an optional deltacomputer instance shared between
2155 multiple calls
2207 multiple calls
2156 """
2208 """
2157 if link == nullrev:
2209 if link == nullrev:
2158 raise error.RevlogError(
2210 raise error.RevlogError(
2159 _(b"attempted to add linkrev -1 to %s") % self.display_id
2211 _(b"attempted to add linkrev -1 to %s") % self.display_id
2160 )
2212 )
2161
2213
2162 if sidedata is None:
2214 if sidedata is None:
2163 sidedata = {}
2215 sidedata = {}
2164 elif sidedata and not self.hassidedata:
2216 elif sidedata and not self.hassidedata:
2165 raise error.ProgrammingError(
2217 raise error.ProgrammingError(
2166 _(b"trying to add sidedata to a revlog who don't support them")
2218 _(b"trying to add sidedata to a revlog who don't support them")
2167 )
2219 )
2168
2220
2169 if flags:
2221 if flags:
2170 node = node or self.hash(text, p1, p2)
2222 node = node or self.hash(text, p1, p2)
2171
2223
2172 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2224 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2173
2225
2174 # If the flag processor modifies the revision data, ignore any provided
2226 # If the flag processor modifies the revision data, ignore any provided
2175 # cachedelta.
2227 # cachedelta.
2176 if rawtext != text:
2228 if rawtext != text:
2177 cachedelta = None
2229 cachedelta = None
2178
2230
2179 if len(rawtext) > _maxentrysize:
2231 if len(rawtext) > _maxentrysize:
2180 raise error.RevlogError(
2232 raise error.RevlogError(
2181 _(
2233 _(
2182 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2234 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2183 )
2235 )
2184 % (self.display_id, len(rawtext))
2236 % (self.display_id, len(rawtext))
2185 )
2237 )
2186
2238
2187 node = node or self.hash(rawtext, p1, p2)
2239 node = node or self.hash(rawtext, p1, p2)
2188 rev = self.index.get_rev(node)
2240 rev = self.index.get_rev(node)
2189 if rev is not None:
2241 if rev is not None:
2190 return rev
2242 return rev
2191
2243
2192 if validatehash:
2244 if validatehash:
2193 self.checkhash(rawtext, node, p1=p1, p2=p2)
2245 self.checkhash(rawtext, node, p1=p1, p2=p2)
2194
2246
2195 return self.addrawrevision(
2247 return self.addrawrevision(
2196 rawtext,
2248 rawtext,
2197 transaction,
2249 transaction,
2198 link,
2250 link,
2199 p1,
2251 p1,
2200 p2,
2252 p2,
2201 node,
2253 node,
2202 flags,
2254 flags,
2203 cachedelta=cachedelta,
2255 cachedelta=cachedelta,
2204 deltacomputer=deltacomputer,
2256 deltacomputer=deltacomputer,
2205 sidedata=sidedata,
2257 sidedata=sidedata,
2206 )
2258 )
2207
2259
2208 def addrawrevision(
2260 def addrawrevision(
2209 self,
2261 self,
2210 rawtext,
2262 rawtext,
2211 transaction,
2263 transaction,
2212 link,
2264 link,
2213 p1,
2265 p1,
2214 p2,
2266 p2,
2215 node,
2267 node,
2216 flags,
2268 flags,
2217 cachedelta=None,
2269 cachedelta=None,
2218 deltacomputer=None,
2270 deltacomputer=None,
2219 sidedata=None,
2271 sidedata=None,
2220 ):
2272 ):
2221 """add a raw revision with known flags, node and parents
2273 """add a raw revision with known flags, node and parents
2222 useful when reusing a revision not stored in this revlog (ex: received
2274 useful when reusing a revision not stored in this revlog (ex: received
2223 over wire, or read from an external bundle).
2275 over wire, or read from an external bundle).
2224 """
2276 """
2225 with self._writing(transaction):
2277 with self._writing(transaction):
2226 return self._addrevision(
2278 return self._addrevision(
2227 node,
2279 node,
2228 rawtext,
2280 rawtext,
2229 transaction,
2281 transaction,
2230 link,
2282 link,
2231 p1,
2283 p1,
2232 p2,
2284 p2,
2233 flags,
2285 flags,
2234 cachedelta,
2286 cachedelta,
2235 deltacomputer=deltacomputer,
2287 deltacomputer=deltacomputer,
2236 sidedata=sidedata,
2288 sidedata=sidedata,
2237 )
2289 )
2238
2290
2239 def compress(self, data):
2291 def compress(self, data):
2240 """Generate a possibly-compressed representation of data."""
2292 """Generate a possibly-compressed representation of data."""
2241 if not data:
2293 if not data:
2242 return b'', data
2294 return b'', data
2243
2295
2244 compressed = self._compressor.compress(data)
2296 compressed = self._compressor.compress(data)
2245
2297
2246 if compressed:
2298 if compressed:
2247 # The revlog compressor added the header in the returned data.
2299 # The revlog compressor added the header in the returned data.
2248 return b'', compressed
2300 return b'', compressed
2249
2301
2250 if data[0:1] == b'\0':
2302 if data[0:1] == b'\0':
2251 return b'', data
2303 return b'', data
2252 return b'u', data
2304 return b'u', data
2253
2305
2254 def decompress(self, data):
2306 def decompress(self, data):
2255 """Decompress a revlog chunk.
2307 """Decompress a revlog chunk.
2256
2308
2257 The chunk is expected to begin with a header identifying the
2309 The chunk is expected to begin with a header identifying the
2258 format type so it can be routed to an appropriate decompressor.
2310 format type so it can be routed to an appropriate decompressor.
2259 """
2311 """
2260 if not data:
2312 if not data:
2261 return data
2313 return data
2262
2314
2263 # Revlogs are read much more frequently than they are written and many
2315 # Revlogs are read much more frequently than they are written and many
2264 # chunks only take microseconds to decompress, so performance is
2316 # chunks only take microseconds to decompress, so performance is
2265 # important here.
2317 # important here.
2266 #
2318 #
2267 # We can make a few assumptions about revlogs:
2319 # We can make a few assumptions about revlogs:
2268 #
2320 #
2269 # 1) the majority of chunks will be compressed (as opposed to inline
2321 # 1) the majority of chunks will be compressed (as opposed to inline
2270 # raw data).
2322 # raw data).
2271 # 2) decompressing *any* data will likely by at least 10x slower than
2323 # 2) decompressing *any* data will likely by at least 10x slower than
2272 # returning raw inline data.
2324 # returning raw inline data.
2273 # 3) we want to prioritize common and officially supported compression
2325 # 3) we want to prioritize common and officially supported compression
2274 # engines
2326 # engines
2275 #
2327 #
2276 # It follows that we want to optimize for "decompress compressed data
2328 # It follows that we want to optimize for "decompress compressed data
2277 # when encoded with common and officially supported compression engines"
2329 # when encoded with common and officially supported compression engines"
2278 # case over "raw data" and "data encoded by less common or non-official
2330 # case over "raw data" and "data encoded by less common or non-official
2279 # compression engines." That is why we have the inline lookup first
2331 # compression engines." That is why we have the inline lookup first
2280 # followed by the compengines lookup.
2332 # followed by the compengines lookup.
2281 #
2333 #
2282 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2334 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2283 # compressed chunks. And this matters for changelog and manifest reads.
2335 # compressed chunks. And this matters for changelog and manifest reads.
2284 t = data[0:1]
2336 t = data[0:1]
2285
2337
2286 if t == b'x':
2338 if t == b'x':
2287 try:
2339 try:
2288 return _zlibdecompress(data)
2340 return _zlibdecompress(data)
2289 except zlib.error as e:
2341 except zlib.error as e:
2290 raise error.RevlogError(
2342 raise error.RevlogError(
2291 _(b'revlog decompress error: %s')
2343 _(b'revlog decompress error: %s')
2292 % stringutil.forcebytestr(e)
2344 % stringutil.forcebytestr(e)
2293 )
2345 )
2294 # '\0' is more common than 'u' so it goes first.
2346 # '\0' is more common than 'u' so it goes first.
2295 elif t == b'\0':
2347 elif t == b'\0':
2296 return data
2348 return data
2297 elif t == b'u':
2349 elif t == b'u':
2298 return util.buffer(data, 1)
2350 return util.buffer(data, 1)
2299
2351
2300 try:
2352 try:
2301 compressor = self._decompressors[t]
2353 compressor = self._decompressors[t]
2302 except KeyError:
2354 except KeyError:
2303 try:
2355 try:
2304 engine = util.compengines.forrevlogheader(t)
2356 engine = util.compengines.forrevlogheader(t)
2305 compressor = engine.revlogcompressor(self._compengineopts)
2357 compressor = engine.revlogcompressor(self._compengineopts)
2306 self._decompressors[t] = compressor
2358 self._decompressors[t] = compressor
2307 except KeyError:
2359 except KeyError:
2308 raise error.RevlogError(
2360 raise error.RevlogError(
2309 _(b'unknown compression type %s') % binascii.hexlify(t)
2361 _(b'unknown compression type %s') % binascii.hexlify(t)
2310 )
2362 )
2311
2363
2312 return compressor.decompress(data)
2364 return compressor.decompress(data)
2313
2365
2314 def _addrevision(
2366 def _addrevision(
2315 self,
2367 self,
2316 node,
2368 node,
2317 rawtext,
2369 rawtext,
2318 transaction,
2370 transaction,
2319 link,
2371 link,
2320 p1,
2372 p1,
2321 p2,
2373 p2,
2322 flags,
2374 flags,
2323 cachedelta,
2375 cachedelta,
2324 alwayscache=False,
2376 alwayscache=False,
2325 deltacomputer=None,
2377 deltacomputer=None,
2326 sidedata=None,
2378 sidedata=None,
2327 ):
2379 ):
2328 """internal function to add revisions to the log
2380 """internal function to add revisions to the log
2329
2381
2330 see addrevision for argument descriptions.
2382 see addrevision for argument descriptions.
2331
2383
2332 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2384 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2333
2385
2334 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2386 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2335 be used.
2387 be used.
2336
2388
2337 invariants:
2389 invariants:
2338 - rawtext is optional (can be None); if not set, cachedelta must be set.
2390 - rawtext is optional (can be None); if not set, cachedelta must be set.
2339 if both are set, they must correspond to each other.
2391 if both are set, they must correspond to each other.
2340 """
2392 """
2341 if node == self.nullid:
2393 if node == self.nullid:
2342 raise error.RevlogError(
2394 raise error.RevlogError(
2343 _(b"%s: attempt to add null revision") % self.display_id
2395 _(b"%s: attempt to add null revision") % self.display_id
2344 )
2396 )
2345 if (
2397 if (
2346 node == self.nodeconstants.wdirid
2398 node == self.nodeconstants.wdirid
2347 or node in self.nodeconstants.wdirfilenodeids
2399 or node in self.nodeconstants.wdirfilenodeids
2348 ):
2400 ):
2349 raise error.RevlogError(
2401 raise error.RevlogError(
2350 _(b"%s: attempt to add wdir revision") % self.display_id
2402 _(b"%s: attempt to add wdir revision") % self.display_id
2351 )
2403 )
2352 if self._writinghandles is None:
2404 if self._writinghandles is None:
2353 msg = b'adding revision outside `revlog._writing` context'
2405 msg = b'adding revision outside `revlog._writing` context'
2354 raise error.ProgrammingError(msg)
2406 raise error.ProgrammingError(msg)
2355
2407
2356 if self._inline:
2408 if self._inline:
2357 fh = self._writinghandles[0]
2409 fh = self._writinghandles[0]
2358 else:
2410 else:
2359 fh = self._writinghandles[1]
2411 fh = self._writinghandles[1]
2360
2412
2361 btext = [rawtext]
2413 btext = [rawtext]
2362
2414
2363 curr = len(self)
2415 curr = len(self)
2364 prev = curr - 1
2416 prev = curr - 1
2365
2417
2366 offset = self._get_data_offset(prev)
2418 offset = self._get_data_offset(prev)
2367
2419
2368 if self._concurrencychecker:
2420 if self._concurrencychecker:
2369 ifh, dfh = self._writinghandles
2421 ifh, dfh = self._writinghandles
2370 if self._inline:
2422 if self._inline:
2371 # offset is "as if" it were in the .d file, so we need to add on
2423 # offset is "as if" it were in the .d file, so we need to add on
2372 # the size of the entry metadata.
2424 # the size of the entry metadata.
2373 self._concurrencychecker(
2425 self._concurrencychecker(
2374 ifh, self._indexfile, offset + curr * self.index.entry_size
2426 ifh, self._indexfile, offset + curr * self.index.entry_size
2375 )
2427 )
2376 else:
2428 else:
2377 # Entries in the .i are a consistent size.
2429 # Entries in the .i are a consistent size.
2378 self._concurrencychecker(
2430 self._concurrencychecker(
2379 ifh, self._indexfile, curr * self.index.entry_size
2431 ifh, self._indexfile, curr * self.index.entry_size
2380 )
2432 )
2381 self._concurrencychecker(dfh, self._datafile, offset)
2433 self._concurrencychecker(dfh, self._datafile, offset)
2382
2434
2383 p1r, p2r = self.rev(p1), self.rev(p2)
2435 p1r, p2r = self.rev(p1), self.rev(p2)
2384
2436
2385 # full versions are inserted when the needed deltas
2437 # full versions are inserted when the needed deltas
2386 # become comparable to the uncompressed text
2438 # become comparable to the uncompressed text
2387 if rawtext is None:
2439 if rawtext is None:
2388 # need rawtext size, before changed by flag processors, which is
2440 # need rawtext size, before changed by flag processors, which is
2389 # the non-raw size. use revlog explicitly to avoid filelog's extra
2441 # the non-raw size. use revlog explicitly to avoid filelog's extra
2390 # logic that might remove metadata size.
2442 # logic that might remove metadata size.
2391 textlen = mdiff.patchedsize(
2443 textlen = mdiff.patchedsize(
2392 revlog.size(self, cachedelta[0]), cachedelta[1]
2444 revlog.size(self, cachedelta[0]), cachedelta[1]
2393 )
2445 )
2394 else:
2446 else:
2395 textlen = len(rawtext)
2447 textlen = len(rawtext)
2396
2448
2397 if deltacomputer is None:
2449 if deltacomputer is None:
2398 deltacomputer = deltautil.deltacomputer(self)
2450 deltacomputer = deltautil.deltacomputer(self)
2399
2451
2400 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2452 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2401
2453
2402 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2454 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2403
2455
2404 if sidedata and self.hassidedata:
2456 if sidedata and self.hassidedata:
2405 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2457 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2406 sidedata_offset = offset + deltainfo.deltalen
2458 sidedata_offset = offset + deltainfo.deltalen
2407 else:
2459 else:
2408 serialized_sidedata = b""
2460 serialized_sidedata = b""
2409 # Don't store the offset if the sidedata is empty, that way
2461 # Don't store the offset if the sidedata is empty, that way
2410 # we can easily detect empty sidedata and they will be no different
2462 # we can easily detect empty sidedata and they will be no different
2411 # than ones we manually add.
2463 # than ones we manually add.
2412 sidedata_offset = 0
2464 sidedata_offset = 0
2413
2465
2414 e = (
2466 e = (
2415 offset_type(offset, flags),
2467 offset_type(offset, flags),
2416 deltainfo.deltalen,
2468 deltainfo.deltalen,
2417 textlen,
2469 textlen,
2418 deltainfo.base,
2470 deltainfo.base,
2419 link,
2471 link,
2420 p1r,
2472 p1r,
2421 p2r,
2473 p2r,
2422 node,
2474 node,
2423 sidedata_offset,
2475 sidedata_offset,
2424 len(serialized_sidedata),
2476 len(serialized_sidedata),
2425 )
2477 )
2426
2478
2427 self.index.append(e)
2479 self.index.append(e)
2428 entry = self.index.entry_binary(curr)
2480 entry = self.index.entry_binary(curr)
2429 if curr == 0 and self._docket is None:
2481 if curr == 0 and self._docket is None:
2430 header = self._format_flags | self._format_version
2482 header = self._format_flags | self._format_version
2431 header = self.index.pack_header(header)
2483 header = self.index.pack_header(header)
2432 entry = header + entry
2484 entry = header + entry
2433 self._writeentry(
2485 self._writeentry(
2434 transaction,
2486 transaction,
2435 entry,
2487 entry,
2436 deltainfo.data,
2488 deltainfo.data,
2437 link,
2489 link,
2438 offset,
2490 offset,
2439 serialized_sidedata,
2491 serialized_sidedata,
2440 )
2492 )
2441
2493
2442 rawtext = btext[0]
2494 rawtext = btext[0]
2443
2495
2444 if alwayscache and rawtext is None:
2496 if alwayscache and rawtext is None:
2445 rawtext = deltacomputer.buildtext(revinfo, fh)
2497 rawtext = deltacomputer.buildtext(revinfo, fh)
2446
2498
2447 if type(rawtext) == bytes: # only accept immutable objects
2499 if type(rawtext) == bytes: # only accept immutable objects
2448 self._revisioncache = (node, curr, rawtext)
2500 self._revisioncache = (node, curr, rawtext)
2449 self._chainbasecache[curr] = deltainfo.chainbase
2501 self._chainbasecache[curr] = deltainfo.chainbase
2450 return curr
2502 return curr
2451
2503
2452 def _get_data_offset(self, prev):
2504 def _get_data_offset(self, prev):
2453 """Returns the current offset in the (in-transaction) data file.
2505 """Returns the current offset in the (in-transaction) data file.
2454 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2506 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2455 file to store that information: since sidedata can be rewritten to the
2507 file to store that information: since sidedata can be rewritten to the
2456 end of the data file within a transaction, you can have cases where, for
2508 end of the data file within a transaction, you can have cases where, for
2457 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2509 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2458 to `n - 1`'s sidedata being written after `n`'s data.
2510 to `n - 1`'s sidedata being written after `n`'s data.
2459
2511
2460 TODO cache this in a docket file before getting out of experimental."""
2512 TODO cache this in a docket file before getting out of experimental."""
2461 if self._docket is None:
2513 if self._docket is None:
2462 return self.end(prev)
2514 return self.end(prev)
2463 else:
2515 else:
2464 return self._docket.data_end
2516 return self._docket.data_end
2465
2517
2466 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2518 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2467 # Files opened in a+ mode have inconsistent behavior on various
2519 # Files opened in a+ mode have inconsistent behavior on various
2468 # platforms. Windows requires that a file positioning call be made
2520 # platforms. Windows requires that a file positioning call be made
2469 # when the file handle transitions between reads and writes. See
2521 # when the file handle transitions between reads and writes. See
2470 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2522 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2471 # platforms, Python or the platform itself can be buggy. Some versions
2523 # platforms, Python or the platform itself can be buggy. Some versions
2472 # of Solaris have been observed to not append at the end of the file
2524 # of Solaris have been observed to not append at the end of the file
2473 # if the file was seeked to before the end. See issue4943 for more.
2525 # if the file was seeked to before the end. See issue4943 for more.
2474 #
2526 #
2475 # We work around this issue by inserting a seek() before writing.
2527 # We work around this issue by inserting a seek() before writing.
2476 # Note: This is likely not necessary on Python 3. However, because
2528 # Note: This is likely not necessary on Python 3. However, because
2477 # the file handle is reused for reads and may be seeked there, we need
2529 # the file handle is reused for reads and may be seeked there, we need
2478 # to be careful before changing this.
2530 # to be careful before changing this.
2479 if self._writinghandles is None:
2531 if self._writinghandles is None:
2480 msg = b'adding revision outside `revlog._writing` context'
2532 msg = b'adding revision outside `revlog._writing` context'
2481 raise error.ProgrammingError(msg)
2533 raise error.ProgrammingError(msg)
2482 ifh, dfh = self._writinghandles
2534 ifh, dfh = self._writinghandles
2483 if self._docket is None:
2535 if self._docket is None:
2484 ifh.seek(0, os.SEEK_END)
2536 ifh.seek(0, os.SEEK_END)
2485 else:
2537 else:
2486 ifh.seek(self._docket.index_end, os.SEEK_SET)
2538 ifh.seek(self._docket.index_end, os.SEEK_SET)
2487 if dfh:
2539 if dfh:
2488 if self._docket is None:
2540 if self._docket is None:
2489 dfh.seek(0, os.SEEK_END)
2541 dfh.seek(0, os.SEEK_END)
2490 else:
2542 else:
2491 dfh.seek(self._docket.data_end, os.SEEK_SET)
2543 dfh.seek(self._docket.data_end, os.SEEK_SET)
2492
2544
2493 curr = len(self) - 1
2545 curr = len(self) - 1
2494 if not self._inline:
2546 if not self._inline:
2495 transaction.add(self._datafile, offset)
2547 transaction.add(self._datafile, offset)
2496 transaction.add(self._indexfile, curr * len(entry))
2548 transaction.add(self._indexfile, curr * len(entry))
2497 if data[0]:
2549 if data[0]:
2498 dfh.write(data[0])
2550 dfh.write(data[0])
2499 dfh.write(data[1])
2551 dfh.write(data[1])
2500 if sidedata:
2552 if sidedata:
2501 dfh.write(sidedata)
2553 dfh.write(sidedata)
2502 ifh.write(entry)
2554 ifh.write(entry)
2503 else:
2555 else:
2504 offset += curr * self.index.entry_size
2556 offset += curr * self.index.entry_size
2505 transaction.add(self._indexfile, offset)
2557 transaction.add(self._indexfile, offset)
2506 ifh.write(entry)
2558 ifh.write(entry)
2507 ifh.write(data[0])
2559 ifh.write(data[0])
2508 ifh.write(data[1])
2560 ifh.write(data[1])
2509 if sidedata:
2561 if sidedata:
2510 ifh.write(sidedata)
2562 ifh.write(sidedata)
2511 self._enforceinlinesize(transaction)
2563 self._enforceinlinesize(transaction)
2512 if self._docket is not None:
2564 if self._docket is not None:
2513 self._docket.index_end = self._writinghandles[0].tell()
2565 self._docket.index_end = self._writinghandles[0].tell()
2514 self._docket.data_end = self._writinghandles[1].tell()
2566 self._docket.data_end = self._writinghandles[1].tell()
2515
2567
2516 nodemaputil.setup_persistent_nodemap(transaction, self)
2568 nodemaputil.setup_persistent_nodemap(transaction, self)
2517
2569
2518 def addgroup(
2570 def addgroup(
2519 self,
2571 self,
2520 deltas,
2572 deltas,
2521 linkmapper,
2573 linkmapper,
2522 transaction,
2574 transaction,
2523 alwayscache=False,
2575 alwayscache=False,
2524 addrevisioncb=None,
2576 addrevisioncb=None,
2525 duplicaterevisioncb=None,
2577 duplicaterevisioncb=None,
2526 ):
2578 ):
2527 """
2579 """
2528 add a delta group
2580 add a delta group
2529
2581
2530 given a set of deltas, add them to the revision log. the
2582 given a set of deltas, add them to the revision log. the
2531 first delta is against its parent, which should be in our
2583 first delta is against its parent, which should be in our
2532 log, the rest are against the previous delta.
2584 log, the rest are against the previous delta.
2533
2585
2534 If ``addrevisioncb`` is defined, it will be called with arguments of
2586 If ``addrevisioncb`` is defined, it will be called with arguments of
2535 this revlog and the node that was added.
2587 this revlog and the node that was added.
2536 """
2588 """
2537
2589
2538 if self._adding_group:
2590 if self._adding_group:
2539 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2591 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2540
2592
2541 self._adding_group = True
2593 self._adding_group = True
2542 empty = True
2594 empty = True
2543 try:
2595 try:
2544 with self._writing(transaction):
2596 with self._writing(transaction):
2545 deltacomputer = deltautil.deltacomputer(self)
2597 deltacomputer = deltautil.deltacomputer(self)
2546 # loop through our set of deltas
2598 # loop through our set of deltas
2547 for data in deltas:
2599 for data in deltas:
2548 (
2600 (
2549 node,
2601 node,
2550 p1,
2602 p1,
2551 p2,
2603 p2,
2552 linknode,
2604 linknode,
2553 deltabase,
2605 deltabase,
2554 delta,
2606 delta,
2555 flags,
2607 flags,
2556 sidedata,
2608 sidedata,
2557 ) = data
2609 ) = data
2558 link = linkmapper(linknode)
2610 link = linkmapper(linknode)
2559 flags = flags or REVIDX_DEFAULT_FLAGS
2611 flags = flags or REVIDX_DEFAULT_FLAGS
2560
2612
2561 rev = self.index.get_rev(node)
2613 rev = self.index.get_rev(node)
2562 if rev is not None:
2614 if rev is not None:
2563 # this can happen if two branches make the same change
2615 # this can happen if two branches make the same change
2564 self._nodeduplicatecallback(transaction, rev)
2616 self._nodeduplicatecallback(transaction, rev)
2565 if duplicaterevisioncb:
2617 if duplicaterevisioncb:
2566 duplicaterevisioncb(self, rev)
2618 duplicaterevisioncb(self, rev)
2567 empty = False
2619 empty = False
2568 continue
2620 continue
2569
2621
2570 for p in (p1, p2):
2622 for p in (p1, p2):
2571 if not self.index.has_node(p):
2623 if not self.index.has_node(p):
2572 raise error.LookupError(
2624 raise error.LookupError(
2573 p, self.radix, _(b'unknown parent')
2625 p, self.radix, _(b'unknown parent')
2574 )
2626 )
2575
2627
2576 if not self.index.has_node(deltabase):
2628 if not self.index.has_node(deltabase):
2577 raise error.LookupError(
2629 raise error.LookupError(
2578 deltabase, self.display_id, _(b'unknown delta base')
2630 deltabase, self.display_id, _(b'unknown delta base')
2579 )
2631 )
2580
2632
2581 baserev = self.rev(deltabase)
2633 baserev = self.rev(deltabase)
2582
2634
2583 if baserev != nullrev and self.iscensored(baserev):
2635 if baserev != nullrev and self.iscensored(baserev):
2584 # if base is censored, delta must be full replacement in a
2636 # if base is censored, delta must be full replacement in a
2585 # single patch operation
2637 # single patch operation
2586 hlen = struct.calcsize(b">lll")
2638 hlen = struct.calcsize(b">lll")
2587 oldlen = self.rawsize(baserev)
2639 oldlen = self.rawsize(baserev)
2588 newlen = len(delta) - hlen
2640 newlen = len(delta) - hlen
2589 if delta[:hlen] != mdiff.replacediffheader(
2641 if delta[:hlen] != mdiff.replacediffheader(
2590 oldlen, newlen
2642 oldlen, newlen
2591 ):
2643 ):
2592 raise error.CensoredBaseError(
2644 raise error.CensoredBaseError(
2593 self.display_id, self.node(baserev)
2645 self.display_id, self.node(baserev)
2594 )
2646 )
2595
2647
2596 if not flags and self._peek_iscensored(baserev, delta):
2648 if not flags and self._peek_iscensored(baserev, delta):
2597 flags |= REVIDX_ISCENSORED
2649 flags |= REVIDX_ISCENSORED
2598
2650
2599 # We assume consumers of addrevisioncb will want to retrieve
2651 # We assume consumers of addrevisioncb will want to retrieve
2600 # the added revision, which will require a call to
2652 # the added revision, which will require a call to
2601 # revision(). revision() will fast path if there is a cache
2653 # revision(). revision() will fast path if there is a cache
2602 # hit. So, we tell _addrevision() to always cache in this case.
2654 # hit. So, we tell _addrevision() to always cache in this case.
2603 # We're only using addgroup() in the context of changegroup
2655 # We're only using addgroup() in the context of changegroup
2604 # generation so the revision data can always be handled as raw
2656 # generation so the revision data can always be handled as raw
2605 # by the flagprocessor.
2657 # by the flagprocessor.
2606 rev = self._addrevision(
2658 rev = self._addrevision(
2607 node,
2659 node,
2608 None,
2660 None,
2609 transaction,
2661 transaction,
2610 link,
2662 link,
2611 p1,
2663 p1,
2612 p2,
2664 p2,
2613 flags,
2665 flags,
2614 (baserev, delta),
2666 (baserev, delta),
2615 alwayscache=alwayscache,
2667 alwayscache=alwayscache,
2616 deltacomputer=deltacomputer,
2668 deltacomputer=deltacomputer,
2617 sidedata=sidedata,
2669 sidedata=sidedata,
2618 )
2670 )
2619
2671
2620 if addrevisioncb:
2672 if addrevisioncb:
2621 addrevisioncb(self, rev)
2673 addrevisioncb(self, rev)
2622 empty = False
2674 empty = False
2623 finally:
2675 finally:
2624 self._adding_group = False
2676 self._adding_group = False
2625 return not empty
2677 return not empty
2626
2678
2627 def iscensored(self, rev):
2679 def iscensored(self, rev):
2628 """Check if a file revision is censored."""
2680 """Check if a file revision is censored."""
2629 if not self._censorable:
2681 if not self._censorable:
2630 return False
2682 return False
2631
2683
2632 return self.flags(rev) & REVIDX_ISCENSORED
2684 return self.flags(rev) & REVIDX_ISCENSORED
2633
2685
2634 def _peek_iscensored(self, baserev, delta):
2686 def _peek_iscensored(self, baserev, delta):
2635 """Quickly check if a delta produces a censored revision."""
2687 """Quickly check if a delta produces a censored revision."""
2636 if not self._censorable:
2688 if not self._censorable:
2637 return False
2689 return False
2638
2690
2639 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2691 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2640
2692
2641 def getstrippoint(self, minlink):
2693 def getstrippoint(self, minlink):
2642 """find the minimum rev that must be stripped to strip the linkrev
2694 """find the minimum rev that must be stripped to strip the linkrev
2643
2695
2644 Returns a tuple containing the minimum rev and a set of all revs that
2696 Returns a tuple containing the minimum rev and a set of all revs that
2645 have linkrevs that will be broken by this strip.
2697 have linkrevs that will be broken by this strip.
2646 """
2698 """
2647 return storageutil.resolvestripinfo(
2699 return storageutil.resolvestripinfo(
2648 minlink,
2700 minlink,
2649 len(self) - 1,
2701 len(self) - 1,
2650 self.headrevs(),
2702 self.headrevs(),
2651 self.linkrev,
2703 self.linkrev,
2652 self.parentrevs,
2704 self.parentrevs,
2653 )
2705 )
2654
2706
2655 def strip(self, minlink, transaction):
2707 def strip(self, minlink, transaction):
2656 """truncate the revlog on the first revision with a linkrev >= minlink
2708 """truncate the revlog on the first revision with a linkrev >= minlink
2657
2709
2658 This function is called when we're stripping revision minlink and
2710 This function is called when we're stripping revision minlink and
2659 its descendants from the repository.
2711 its descendants from the repository.
2660
2712
2661 We have to remove all revisions with linkrev >= minlink, because
2713 We have to remove all revisions with linkrev >= minlink, because
2662 the equivalent changelog revisions will be renumbered after the
2714 the equivalent changelog revisions will be renumbered after the
2663 strip.
2715 strip.
2664
2716
2665 So we truncate the revlog on the first of these revisions, and
2717 So we truncate the revlog on the first of these revisions, and
2666 trust that the caller has saved the revisions that shouldn't be
2718 trust that the caller has saved the revisions that shouldn't be
2667 removed and that it'll re-add them after this truncation.
2719 removed and that it'll re-add them after this truncation.
2668 """
2720 """
2669 if len(self) == 0:
2721 if len(self) == 0:
2670 return
2722 return
2671
2723
2672 rev, _ = self.getstrippoint(minlink)
2724 rev, _ = self.getstrippoint(minlink)
2673 if rev == len(self):
2725 if rev == len(self):
2674 return
2726 return
2675
2727
2676 # first truncate the files on disk
2728 # first truncate the files on disk
2677 data_end = self.start(rev)
2729 data_end = self.start(rev)
2678 if not self._inline:
2730 if not self._inline:
2679 transaction.add(self._datafile, data_end)
2731 transaction.add(self._datafile, data_end)
2680 end = rev * self.index.entry_size
2732 end = rev * self.index.entry_size
2681 else:
2733 else:
2682 end = data_end + (rev * self.index.entry_size)
2734 end = data_end + (rev * self.index.entry_size)
2683
2735
2684 transaction.add(self._indexfile, end)
2736 transaction.add(self._indexfile, end)
2685 if self._docket is not None:
2737 if self._docket is not None:
2686 # XXX we could, leverage the docket while stripping. However it is
2738 # XXX we could, leverage the docket while stripping. However it is
2687 # not powerfull enough at the time of this comment
2739 # not powerfull enough at the time of this comment
2688 self._docket.index_end = end
2740 self._docket.index_end = end
2689 self._docket.data_end = data_end
2741 self._docket.data_end = data_end
2690 self._docket.write(transaction, stripping=True)
2742 self._docket.write(transaction, stripping=True)
2691
2743
2692 # then reset internal state in memory to forget those revisions
2744 # then reset internal state in memory to forget those revisions
2693 self._revisioncache = None
2745 self._revisioncache = None
2694 self._chaininfocache = util.lrucachedict(500)
2746 self._chaininfocache = util.lrucachedict(500)
2695 self._chunkclear()
2747 self._chunkclear()
2696
2748
2697 del self.index[rev:-1]
2749 del self.index[rev:-1]
2698
2750
2699 def checksize(self):
2751 def checksize(self):
2700 """Check size of index and data files
2752 """Check size of index and data files
2701
2753
2702 return a (dd, di) tuple.
2754 return a (dd, di) tuple.
2703 - dd: extra bytes for the "data" file
2755 - dd: extra bytes for the "data" file
2704 - di: extra bytes for the "index" file
2756 - di: extra bytes for the "index" file
2705
2757
2706 A healthy revlog will return (0, 0).
2758 A healthy revlog will return (0, 0).
2707 """
2759 """
2708 expected = 0
2760 expected = 0
2709 if len(self):
2761 if len(self):
2710 expected = max(0, self.end(len(self) - 1))
2762 expected = max(0, self.end(len(self) - 1))
2711
2763
2712 try:
2764 try:
2713 with self._datafp() as f:
2765 with self._datafp() as f:
2714 f.seek(0, io.SEEK_END)
2766 f.seek(0, io.SEEK_END)
2715 actual = f.tell()
2767 actual = f.tell()
2716 dd = actual - expected
2768 dd = actual - expected
2717 except IOError as inst:
2769 except IOError as inst:
2718 if inst.errno != errno.ENOENT:
2770 if inst.errno != errno.ENOENT:
2719 raise
2771 raise
2720 dd = 0
2772 dd = 0
2721
2773
2722 try:
2774 try:
2723 f = self.opener(self._indexfile)
2775 f = self.opener(self._indexfile)
2724 f.seek(0, io.SEEK_END)
2776 f.seek(0, io.SEEK_END)
2725 actual = f.tell()
2777 actual = f.tell()
2726 f.close()
2778 f.close()
2727 s = self.index.entry_size
2779 s = self.index.entry_size
2728 i = max(0, actual // s)
2780 i = max(0, actual // s)
2729 di = actual - (i * s)
2781 di = actual - (i * s)
2730 if self._inline:
2782 if self._inline:
2731 databytes = 0
2783 databytes = 0
2732 for r in self:
2784 for r in self:
2733 databytes += max(0, self.length(r))
2785 databytes += max(0, self.length(r))
2734 dd = 0
2786 dd = 0
2735 di = actual - len(self) * s - databytes
2787 di = actual - len(self) * s - databytes
2736 except IOError as inst:
2788 except IOError as inst:
2737 if inst.errno != errno.ENOENT:
2789 if inst.errno != errno.ENOENT:
2738 raise
2790 raise
2739 di = 0
2791 di = 0
2740
2792
2741 return (dd, di)
2793 return (dd, di)
2742
2794
2743 def files(self):
2795 def files(self):
2744 res = [self._indexfile]
2796 res = [self._indexfile]
2745 if not self._inline:
2797 if not self._inline:
2746 res.append(self._datafile)
2798 res.append(self._datafile)
2747 return res
2799 return res
2748
2800
2749 def emitrevisions(
2801 def emitrevisions(
2750 self,
2802 self,
2751 nodes,
2803 nodes,
2752 nodesorder=None,
2804 nodesorder=None,
2753 revisiondata=False,
2805 revisiondata=False,
2754 assumehaveparentrevisions=False,
2806 assumehaveparentrevisions=False,
2755 deltamode=repository.CG_DELTAMODE_STD,
2807 deltamode=repository.CG_DELTAMODE_STD,
2756 sidedata_helpers=None,
2808 sidedata_helpers=None,
2757 ):
2809 ):
2758 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2810 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2759 raise error.ProgrammingError(
2811 raise error.ProgrammingError(
2760 b'unhandled value for nodesorder: %s' % nodesorder
2812 b'unhandled value for nodesorder: %s' % nodesorder
2761 )
2813 )
2762
2814
2763 if nodesorder is None and not self._generaldelta:
2815 if nodesorder is None and not self._generaldelta:
2764 nodesorder = b'storage'
2816 nodesorder = b'storage'
2765
2817
2766 if (
2818 if (
2767 not self._storedeltachains
2819 not self._storedeltachains
2768 and deltamode != repository.CG_DELTAMODE_PREV
2820 and deltamode != repository.CG_DELTAMODE_PREV
2769 ):
2821 ):
2770 deltamode = repository.CG_DELTAMODE_FULL
2822 deltamode = repository.CG_DELTAMODE_FULL
2771
2823
2772 return storageutil.emitrevisions(
2824 return storageutil.emitrevisions(
2773 self,
2825 self,
2774 nodes,
2826 nodes,
2775 nodesorder,
2827 nodesorder,
2776 revlogrevisiondelta,
2828 revlogrevisiondelta,
2777 deltaparentfn=self.deltaparent,
2829 deltaparentfn=self.deltaparent,
2778 candeltafn=self.candelta,
2830 candeltafn=self.candelta,
2779 rawsizefn=self.rawsize,
2831 rawsizefn=self.rawsize,
2780 revdifffn=self.revdiff,
2832 revdifffn=self.revdiff,
2781 flagsfn=self.flags,
2833 flagsfn=self.flags,
2782 deltamode=deltamode,
2834 deltamode=deltamode,
2783 revisiondata=revisiondata,
2835 revisiondata=revisiondata,
2784 assumehaveparentrevisions=assumehaveparentrevisions,
2836 assumehaveparentrevisions=assumehaveparentrevisions,
2785 sidedata_helpers=sidedata_helpers,
2837 sidedata_helpers=sidedata_helpers,
2786 )
2838 )
2787
2839
2788 DELTAREUSEALWAYS = b'always'
2840 DELTAREUSEALWAYS = b'always'
2789 DELTAREUSESAMEREVS = b'samerevs'
2841 DELTAREUSESAMEREVS = b'samerevs'
2790 DELTAREUSENEVER = b'never'
2842 DELTAREUSENEVER = b'never'
2791
2843
2792 DELTAREUSEFULLADD = b'fulladd'
2844 DELTAREUSEFULLADD = b'fulladd'
2793
2845
2794 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2846 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2795
2847
2796 def clone(
2848 def clone(
2797 self,
2849 self,
2798 tr,
2850 tr,
2799 destrevlog,
2851 destrevlog,
2800 addrevisioncb=None,
2852 addrevisioncb=None,
2801 deltareuse=DELTAREUSESAMEREVS,
2853 deltareuse=DELTAREUSESAMEREVS,
2802 forcedeltabothparents=None,
2854 forcedeltabothparents=None,
2803 sidedata_helpers=None,
2855 sidedata_helpers=None,
2804 ):
2856 ):
2805 """Copy this revlog to another, possibly with format changes.
2857 """Copy this revlog to another, possibly with format changes.
2806
2858
2807 The destination revlog will contain the same revisions and nodes.
2859 The destination revlog will contain the same revisions and nodes.
2808 However, it may not be bit-for-bit identical due to e.g. delta encoding
2860 However, it may not be bit-for-bit identical due to e.g. delta encoding
2809 differences.
2861 differences.
2810
2862
2811 The ``deltareuse`` argument control how deltas from the existing revlog
2863 The ``deltareuse`` argument control how deltas from the existing revlog
2812 are preserved in the destination revlog. The argument can have the
2864 are preserved in the destination revlog. The argument can have the
2813 following values:
2865 following values:
2814
2866
2815 DELTAREUSEALWAYS
2867 DELTAREUSEALWAYS
2816 Deltas will always be reused (if possible), even if the destination
2868 Deltas will always be reused (if possible), even if the destination
2817 revlog would not select the same revisions for the delta. This is the
2869 revlog would not select the same revisions for the delta. This is the
2818 fastest mode of operation.
2870 fastest mode of operation.
2819 DELTAREUSESAMEREVS
2871 DELTAREUSESAMEREVS
2820 Deltas will be reused if the destination revlog would pick the same
2872 Deltas will be reused if the destination revlog would pick the same
2821 revisions for the delta. This mode strikes a balance between speed
2873 revisions for the delta. This mode strikes a balance between speed
2822 and optimization.
2874 and optimization.
2823 DELTAREUSENEVER
2875 DELTAREUSENEVER
2824 Deltas will never be reused. This is the slowest mode of execution.
2876 Deltas will never be reused. This is the slowest mode of execution.
2825 This mode can be used to recompute deltas (e.g. if the diff/delta
2877 This mode can be used to recompute deltas (e.g. if the diff/delta
2826 algorithm changes).
2878 algorithm changes).
2827 DELTAREUSEFULLADD
2879 DELTAREUSEFULLADD
2828 Revision will be re-added as if their were new content. This is
2880 Revision will be re-added as if their were new content. This is
2829 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2881 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2830 eg: large file detection and handling.
2882 eg: large file detection and handling.
2831
2883
2832 Delta computation can be slow, so the choice of delta reuse policy can
2884 Delta computation can be slow, so the choice of delta reuse policy can
2833 significantly affect run time.
2885 significantly affect run time.
2834
2886
2835 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2887 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2836 two extremes. Deltas will be reused if they are appropriate. But if the
2888 two extremes. Deltas will be reused if they are appropriate. But if the
2837 delta could choose a better revision, it will do so. This means if you
2889 delta could choose a better revision, it will do so. This means if you
2838 are converting a non-generaldelta revlog to a generaldelta revlog,
2890 are converting a non-generaldelta revlog to a generaldelta revlog,
2839 deltas will be recomputed if the delta's parent isn't a parent of the
2891 deltas will be recomputed if the delta's parent isn't a parent of the
2840 revision.
2892 revision.
2841
2893
2842 In addition to the delta policy, the ``forcedeltabothparents``
2894 In addition to the delta policy, the ``forcedeltabothparents``
2843 argument controls whether to force compute deltas against both parents
2895 argument controls whether to force compute deltas against both parents
2844 for merges. By default, the current default is used.
2896 for merges. By default, the current default is used.
2845
2897
2846 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2898 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2847 `sidedata_helpers`.
2899 `sidedata_helpers`.
2848 """
2900 """
2849 if deltareuse not in self.DELTAREUSEALL:
2901 if deltareuse not in self.DELTAREUSEALL:
2850 raise ValueError(
2902 raise ValueError(
2851 _(b'value for deltareuse invalid: %s') % deltareuse
2903 _(b'value for deltareuse invalid: %s') % deltareuse
2852 )
2904 )
2853
2905
2854 if len(destrevlog):
2906 if len(destrevlog):
2855 raise ValueError(_(b'destination revlog is not empty'))
2907 raise ValueError(_(b'destination revlog is not empty'))
2856
2908
2857 if getattr(self, 'filteredrevs', None):
2909 if getattr(self, 'filteredrevs', None):
2858 raise ValueError(_(b'source revlog has filtered revisions'))
2910 raise ValueError(_(b'source revlog has filtered revisions'))
2859 if getattr(destrevlog, 'filteredrevs', None):
2911 if getattr(destrevlog, 'filteredrevs', None):
2860 raise ValueError(_(b'destination revlog has filtered revisions'))
2912 raise ValueError(_(b'destination revlog has filtered revisions'))
2861
2913
2862 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2914 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2863 # if possible.
2915 # if possible.
2864 oldlazydelta = destrevlog._lazydelta
2916 oldlazydelta = destrevlog._lazydelta
2865 oldlazydeltabase = destrevlog._lazydeltabase
2917 oldlazydeltabase = destrevlog._lazydeltabase
2866 oldamd = destrevlog._deltabothparents
2918 oldamd = destrevlog._deltabothparents
2867
2919
2868 try:
2920 try:
2869 if deltareuse == self.DELTAREUSEALWAYS:
2921 if deltareuse == self.DELTAREUSEALWAYS:
2870 destrevlog._lazydeltabase = True
2922 destrevlog._lazydeltabase = True
2871 destrevlog._lazydelta = True
2923 destrevlog._lazydelta = True
2872 elif deltareuse == self.DELTAREUSESAMEREVS:
2924 elif deltareuse == self.DELTAREUSESAMEREVS:
2873 destrevlog._lazydeltabase = False
2925 destrevlog._lazydeltabase = False
2874 destrevlog._lazydelta = True
2926 destrevlog._lazydelta = True
2875 elif deltareuse == self.DELTAREUSENEVER:
2927 elif deltareuse == self.DELTAREUSENEVER:
2876 destrevlog._lazydeltabase = False
2928 destrevlog._lazydeltabase = False
2877 destrevlog._lazydelta = False
2929 destrevlog._lazydelta = False
2878
2930
2879 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2931 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2880
2932
2881 self._clone(
2933 self._clone(
2882 tr,
2934 tr,
2883 destrevlog,
2935 destrevlog,
2884 addrevisioncb,
2936 addrevisioncb,
2885 deltareuse,
2937 deltareuse,
2886 forcedeltabothparents,
2938 forcedeltabothparents,
2887 sidedata_helpers,
2939 sidedata_helpers,
2888 )
2940 )
2889
2941
2890 finally:
2942 finally:
2891 destrevlog._lazydelta = oldlazydelta
2943 destrevlog._lazydelta = oldlazydelta
2892 destrevlog._lazydeltabase = oldlazydeltabase
2944 destrevlog._lazydeltabase = oldlazydeltabase
2893 destrevlog._deltabothparents = oldamd
2945 destrevlog._deltabothparents = oldamd
2894
2946
2895 def _clone(
2947 def _clone(
2896 self,
2948 self,
2897 tr,
2949 tr,
2898 destrevlog,
2950 destrevlog,
2899 addrevisioncb,
2951 addrevisioncb,
2900 deltareuse,
2952 deltareuse,
2901 forcedeltabothparents,
2953 forcedeltabothparents,
2902 sidedata_helpers,
2954 sidedata_helpers,
2903 ):
2955 ):
2904 """perform the core duty of `revlog.clone` after parameter processing"""
2956 """perform the core duty of `revlog.clone` after parameter processing"""
2905 deltacomputer = deltautil.deltacomputer(destrevlog)
2957 deltacomputer = deltautil.deltacomputer(destrevlog)
2906 index = self.index
2958 index = self.index
2907 for rev in self:
2959 for rev in self:
2908 entry = index[rev]
2960 entry = index[rev]
2909
2961
2910 # Some classes override linkrev to take filtered revs into
2962 # Some classes override linkrev to take filtered revs into
2911 # account. Use raw entry from index.
2963 # account. Use raw entry from index.
2912 flags = entry[0] & 0xFFFF
2964 flags = entry[0] & 0xFFFF
2913 linkrev = entry[4]
2965 linkrev = entry[4]
2914 p1 = index[entry[5]][7]
2966 p1 = index[entry[5]][7]
2915 p2 = index[entry[6]][7]
2967 p2 = index[entry[6]][7]
2916 node = entry[7]
2968 node = entry[7]
2917
2969
2918 # (Possibly) reuse the delta from the revlog if allowed and
2970 # (Possibly) reuse the delta from the revlog if allowed and
2919 # the revlog chunk is a delta.
2971 # the revlog chunk is a delta.
2920 cachedelta = None
2972 cachedelta = None
2921 rawtext = None
2973 rawtext = None
2922 if deltareuse == self.DELTAREUSEFULLADD:
2974 if deltareuse == self.DELTAREUSEFULLADD:
2923 text, sidedata = self._revisiondata(rev)
2975 text, sidedata = self._revisiondata(rev)
2924
2976
2925 if sidedata_helpers is not None:
2977 if sidedata_helpers is not None:
2926 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2978 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2927 self, sidedata_helpers, sidedata, rev
2979 self, sidedata_helpers, sidedata, rev
2928 )
2980 )
2929 flags = flags | new_flags[0] & ~new_flags[1]
2981 flags = flags | new_flags[0] & ~new_flags[1]
2930
2982
2931 destrevlog.addrevision(
2983 destrevlog.addrevision(
2932 text,
2984 text,
2933 tr,
2985 tr,
2934 linkrev,
2986 linkrev,
2935 p1,
2987 p1,
2936 p2,
2988 p2,
2937 cachedelta=cachedelta,
2989 cachedelta=cachedelta,
2938 node=node,
2990 node=node,
2939 flags=flags,
2991 flags=flags,
2940 deltacomputer=deltacomputer,
2992 deltacomputer=deltacomputer,
2941 sidedata=sidedata,
2993 sidedata=sidedata,
2942 )
2994 )
2943 else:
2995 else:
2944 if destrevlog._lazydelta:
2996 if destrevlog._lazydelta:
2945 dp = self.deltaparent(rev)
2997 dp = self.deltaparent(rev)
2946 if dp != nullrev:
2998 if dp != nullrev:
2947 cachedelta = (dp, bytes(self._chunk(rev)))
2999 cachedelta = (dp, bytes(self._chunk(rev)))
2948
3000
2949 sidedata = None
3001 sidedata = None
2950 if not cachedelta:
3002 if not cachedelta:
2951 rawtext, sidedata = self._revisiondata(rev)
3003 rawtext, sidedata = self._revisiondata(rev)
2952 if sidedata is None:
3004 if sidedata is None:
2953 sidedata = self.sidedata(rev)
3005 sidedata = self.sidedata(rev)
2954
3006
2955 if sidedata_helpers is not None:
3007 if sidedata_helpers is not None:
2956 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3008 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2957 self, sidedata_helpers, sidedata, rev
3009 self, sidedata_helpers, sidedata, rev
2958 )
3010 )
2959 flags = flags | new_flags[0] & ~new_flags[1]
3011 flags = flags | new_flags[0] & ~new_flags[1]
2960
3012
2961 with destrevlog._writing(tr):
3013 with destrevlog._writing(tr):
2962 destrevlog._addrevision(
3014 destrevlog._addrevision(
2963 node,
3015 node,
2964 rawtext,
3016 rawtext,
2965 tr,
3017 tr,
2966 linkrev,
3018 linkrev,
2967 p1,
3019 p1,
2968 p2,
3020 p2,
2969 flags,
3021 flags,
2970 cachedelta,
3022 cachedelta,
2971 deltacomputer=deltacomputer,
3023 deltacomputer=deltacomputer,
2972 sidedata=sidedata,
3024 sidedata=sidedata,
2973 )
3025 )
2974
3026
2975 if addrevisioncb:
3027 if addrevisioncb:
2976 addrevisioncb(self, rev, node)
3028 addrevisioncb(self, rev, node)
2977
3029
2978 def censorrevision(self, tr, censornode, tombstone=b''):
3030 def censorrevision(self, tr, censornode, tombstone=b''):
2979 if self._format_version == REVLOGV0:
3031 if self._format_version == REVLOGV0:
2980 raise error.RevlogError(
3032 raise error.RevlogError(
2981 _(b'cannot censor with version %d revlogs')
3033 _(b'cannot censor with version %d revlogs')
2982 % self._format_version
3034 % self._format_version
2983 )
3035 )
2984
3036
2985 censorrev = self.rev(censornode)
3037 censorrev = self.rev(censornode)
2986 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3038 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2987
3039
2988 if len(tombstone) > self.rawsize(censorrev):
3040 if len(tombstone) > self.rawsize(censorrev):
2989 raise error.Abort(
3041 raise error.Abort(
2990 _(b'censor tombstone must be no longer than censored data')
3042 _(b'censor tombstone must be no longer than censored data')
2991 )
3043 )
2992
3044
2993 # Rewriting the revlog in place is hard. Our strategy for censoring is
3045 # Rewriting the revlog in place is hard. Our strategy for censoring is
2994 # to create a new revlog, copy all revisions to it, then replace the
3046 # to create a new revlog, copy all revisions to it, then replace the
2995 # revlogs on transaction close.
3047 # revlogs on transaction close.
2996 #
3048 #
2997 # This is a bit dangerous. We could easily have a mismatch of state.
3049 # This is a bit dangerous. We could easily have a mismatch of state.
2998 newrl = revlog(
3050 newrl = revlog(
2999 self.opener,
3051 self.opener,
3000 target=self.target,
3052 target=self.target,
3001 radix=self.radix,
3053 radix=self.radix,
3002 postfix=b'tmpcensored',
3054 postfix=b'tmpcensored',
3003 censorable=True,
3055 censorable=True,
3004 )
3056 )
3005 newrl._format_version = self._format_version
3057 newrl._format_version = self._format_version
3006 newrl._format_flags = self._format_flags
3058 newrl._format_flags = self._format_flags
3007 newrl._generaldelta = self._generaldelta
3059 newrl._generaldelta = self._generaldelta
3008 newrl._parse_index = self._parse_index
3060 newrl._parse_index = self._parse_index
3009
3061
3010 for rev in self.revs():
3062 for rev in self.revs():
3011 node = self.node(rev)
3063 node = self.node(rev)
3012 p1, p2 = self.parents(node)
3064 p1, p2 = self.parents(node)
3013
3065
3014 if rev == censorrev:
3066 if rev == censorrev:
3015 newrl.addrawrevision(
3067 newrl.addrawrevision(
3016 tombstone,
3068 tombstone,
3017 tr,
3069 tr,
3018 self.linkrev(censorrev),
3070 self.linkrev(censorrev),
3019 p1,
3071 p1,
3020 p2,
3072 p2,
3021 censornode,
3073 censornode,
3022 REVIDX_ISCENSORED,
3074 REVIDX_ISCENSORED,
3023 )
3075 )
3024
3076
3025 if newrl.deltaparent(rev) != nullrev:
3077 if newrl.deltaparent(rev) != nullrev:
3026 raise error.Abort(
3078 raise error.Abort(
3027 _(
3079 _(
3028 b'censored revision stored as delta; '
3080 b'censored revision stored as delta; '
3029 b'cannot censor'
3081 b'cannot censor'
3030 ),
3082 ),
3031 hint=_(
3083 hint=_(
3032 b'censoring of revlogs is not '
3084 b'censoring of revlogs is not '
3033 b'fully implemented; please report '
3085 b'fully implemented; please report '
3034 b'this bug'
3086 b'this bug'
3035 ),
3087 ),
3036 )
3088 )
3037 continue
3089 continue
3038
3090
3039 if self.iscensored(rev):
3091 if self.iscensored(rev):
3040 if self.deltaparent(rev) != nullrev:
3092 if self.deltaparent(rev) != nullrev:
3041 raise error.Abort(
3093 raise error.Abort(
3042 _(
3094 _(
3043 b'cannot censor due to censored '
3095 b'cannot censor due to censored '
3044 b'revision having delta stored'
3096 b'revision having delta stored'
3045 )
3097 )
3046 )
3098 )
3047 rawtext = self._chunk(rev)
3099 rawtext = self._chunk(rev)
3048 else:
3100 else:
3049 rawtext = self.rawdata(rev)
3101 rawtext = self.rawdata(rev)
3050
3102
3051 newrl.addrawrevision(
3103 newrl.addrawrevision(
3052 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3104 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3053 )
3105 )
3054
3106
3055 tr.addbackup(self._indexfile, location=b'store')
3107 tr.addbackup(self._indexfile, location=b'store')
3056 if not self._inline:
3108 if not self._inline:
3057 tr.addbackup(self._datafile, location=b'store')
3109 tr.addbackup(self._datafile, location=b'store')
3058
3110
3059 self.opener.rename(newrl._indexfile, self._indexfile)
3111 self.opener.rename(newrl._indexfile, self._indexfile)
3060 if not self._inline:
3112 if not self._inline:
3061 self.opener.rename(newrl._datafile, self._datafile)
3113 self.opener.rename(newrl._datafile, self._datafile)
3062
3114
3063 self.clearcaches()
3115 self.clearcaches()
3064 self._loadindex()
3116 self._loadindex()
3065
3117
3066 def verifyintegrity(self, state):
3118 def verifyintegrity(self, state):
3067 """Verifies the integrity of the revlog.
3119 """Verifies the integrity of the revlog.
3068
3120
3069 Yields ``revlogproblem`` instances describing problems that are
3121 Yields ``revlogproblem`` instances describing problems that are
3070 found.
3122 found.
3071 """
3123 """
3072 dd, di = self.checksize()
3124 dd, di = self.checksize()
3073 if dd:
3125 if dd:
3074 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3126 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3075 if di:
3127 if di:
3076 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3128 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3077
3129
3078 version = self._format_version
3130 version = self._format_version
3079
3131
3080 # The verifier tells us what version revlog we should be.
3132 # The verifier tells us what version revlog we should be.
3081 if version != state[b'expectedversion']:
3133 if version != state[b'expectedversion']:
3082 yield revlogproblem(
3134 yield revlogproblem(
3083 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3135 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3084 % (self.display_id, version, state[b'expectedversion'])
3136 % (self.display_id, version, state[b'expectedversion'])
3085 )
3137 )
3086
3138
3087 state[b'skipread'] = set()
3139 state[b'skipread'] = set()
3088 state[b'safe_renamed'] = set()
3140 state[b'safe_renamed'] = set()
3089
3141
3090 for rev in self:
3142 for rev in self:
3091 node = self.node(rev)
3143 node = self.node(rev)
3092
3144
3093 # Verify contents. 4 cases to care about:
3145 # Verify contents. 4 cases to care about:
3094 #
3146 #
3095 # common: the most common case
3147 # common: the most common case
3096 # rename: with a rename
3148 # rename: with a rename
3097 # meta: file content starts with b'\1\n', the metadata
3149 # meta: file content starts with b'\1\n', the metadata
3098 # header defined in filelog.py, but without a rename
3150 # header defined in filelog.py, but without a rename
3099 # ext: content stored externally
3151 # ext: content stored externally
3100 #
3152 #
3101 # More formally, their differences are shown below:
3153 # More formally, their differences are shown below:
3102 #
3154 #
3103 # | common | rename | meta | ext
3155 # | common | rename | meta | ext
3104 # -------------------------------------------------------
3156 # -------------------------------------------------------
3105 # flags() | 0 | 0 | 0 | not 0
3157 # flags() | 0 | 0 | 0 | not 0
3106 # renamed() | False | True | False | ?
3158 # renamed() | False | True | False | ?
3107 # rawtext[0:2]=='\1\n'| False | True | True | ?
3159 # rawtext[0:2]=='\1\n'| False | True | True | ?
3108 #
3160 #
3109 # "rawtext" means the raw text stored in revlog data, which
3161 # "rawtext" means the raw text stored in revlog data, which
3110 # could be retrieved by "rawdata(rev)". "text"
3162 # could be retrieved by "rawdata(rev)". "text"
3111 # mentioned below is "revision(rev)".
3163 # mentioned below is "revision(rev)".
3112 #
3164 #
3113 # There are 3 different lengths stored physically:
3165 # There are 3 different lengths stored physically:
3114 # 1. L1: rawsize, stored in revlog index
3166 # 1. L1: rawsize, stored in revlog index
3115 # 2. L2: len(rawtext), stored in revlog data
3167 # 2. L2: len(rawtext), stored in revlog data
3116 # 3. L3: len(text), stored in revlog data if flags==0, or
3168 # 3. L3: len(text), stored in revlog data if flags==0, or
3117 # possibly somewhere else if flags!=0
3169 # possibly somewhere else if flags!=0
3118 #
3170 #
3119 # L1 should be equal to L2. L3 could be different from them.
3171 # L1 should be equal to L2. L3 could be different from them.
3120 # "text" may or may not affect commit hash depending on flag
3172 # "text" may or may not affect commit hash depending on flag
3121 # processors (see flagutil.addflagprocessor).
3173 # processors (see flagutil.addflagprocessor).
3122 #
3174 #
3123 # | common | rename | meta | ext
3175 # | common | rename | meta | ext
3124 # -------------------------------------------------
3176 # -------------------------------------------------
3125 # rawsize() | L1 | L1 | L1 | L1
3177 # rawsize() | L1 | L1 | L1 | L1
3126 # size() | L1 | L2-LM | L1(*) | L1 (?)
3178 # size() | L1 | L2-LM | L1(*) | L1 (?)
3127 # len(rawtext) | L2 | L2 | L2 | L2
3179 # len(rawtext) | L2 | L2 | L2 | L2
3128 # len(text) | L2 | L2 | L2 | L3
3180 # len(text) | L2 | L2 | L2 | L3
3129 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3181 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3130 #
3182 #
3131 # LM: length of metadata, depending on rawtext
3183 # LM: length of metadata, depending on rawtext
3132 # (*): not ideal, see comment in filelog.size
3184 # (*): not ideal, see comment in filelog.size
3133 # (?): could be "- len(meta)" if the resolved content has
3185 # (?): could be "- len(meta)" if the resolved content has
3134 # rename metadata
3186 # rename metadata
3135 #
3187 #
3136 # Checks needed to be done:
3188 # Checks needed to be done:
3137 # 1. length check: L1 == L2, in all cases.
3189 # 1. length check: L1 == L2, in all cases.
3138 # 2. hash check: depending on flag processor, we may need to
3190 # 2. hash check: depending on flag processor, we may need to
3139 # use either "text" (external), or "rawtext" (in revlog).
3191 # use either "text" (external), or "rawtext" (in revlog).
3140
3192
3141 try:
3193 try:
3142 skipflags = state.get(b'skipflags', 0)
3194 skipflags = state.get(b'skipflags', 0)
3143 if skipflags:
3195 if skipflags:
3144 skipflags &= self.flags(rev)
3196 skipflags &= self.flags(rev)
3145
3197
3146 _verify_revision(self, skipflags, state, node)
3198 _verify_revision(self, skipflags, state, node)
3147
3199
3148 l1 = self.rawsize(rev)
3200 l1 = self.rawsize(rev)
3149 l2 = len(self.rawdata(node))
3201 l2 = len(self.rawdata(node))
3150
3202
3151 if l1 != l2:
3203 if l1 != l2:
3152 yield revlogproblem(
3204 yield revlogproblem(
3153 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3205 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3154 node=node,
3206 node=node,
3155 )
3207 )
3156
3208
3157 except error.CensoredNodeError:
3209 except error.CensoredNodeError:
3158 if state[b'erroroncensored']:
3210 if state[b'erroroncensored']:
3159 yield revlogproblem(
3211 yield revlogproblem(
3160 error=_(b'censored file data'), node=node
3212 error=_(b'censored file data'), node=node
3161 )
3213 )
3162 state[b'skipread'].add(node)
3214 state[b'skipread'].add(node)
3163 except Exception as e:
3215 except Exception as e:
3164 yield revlogproblem(
3216 yield revlogproblem(
3165 error=_(b'unpacking %s: %s')
3217 error=_(b'unpacking %s: %s')
3166 % (short(node), stringutil.forcebytestr(e)),
3218 % (short(node), stringutil.forcebytestr(e)),
3167 node=node,
3219 node=node,
3168 )
3220 )
3169 state[b'skipread'].add(node)
3221 state[b'skipread'].add(node)
3170
3222
3171 def storageinfo(
3223 def storageinfo(
3172 self,
3224 self,
3173 exclusivefiles=False,
3225 exclusivefiles=False,
3174 sharedfiles=False,
3226 sharedfiles=False,
3175 revisionscount=False,
3227 revisionscount=False,
3176 trackedsize=False,
3228 trackedsize=False,
3177 storedsize=False,
3229 storedsize=False,
3178 ):
3230 ):
3179 d = {}
3231 d = {}
3180
3232
3181 if exclusivefiles:
3233 if exclusivefiles:
3182 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3234 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3183 if not self._inline:
3235 if not self._inline:
3184 d[b'exclusivefiles'].append((self.opener, self._datafile))
3236 d[b'exclusivefiles'].append((self.opener, self._datafile))
3185
3237
3186 if sharedfiles:
3238 if sharedfiles:
3187 d[b'sharedfiles'] = []
3239 d[b'sharedfiles'] = []
3188
3240
3189 if revisionscount:
3241 if revisionscount:
3190 d[b'revisionscount'] = len(self)
3242 d[b'revisionscount'] = len(self)
3191
3243
3192 if trackedsize:
3244 if trackedsize:
3193 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3245 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3194
3246
3195 if storedsize:
3247 if storedsize:
3196 d[b'storedsize'] = sum(
3248 d[b'storedsize'] = sum(
3197 self.opener.stat(path).st_size for path in self.files()
3249 self.opener.stat(path).st_size for path in self.files()
3198 )
3250 )
3199
3251
3200 return d
3252 return d
3201
3253
3202 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3254 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3203 if not self.hassidedata:
3255 if not self.hassidedata:
3204 return
3256 return
3205 # revlog formats with sidedata support does not support inline
3257 # revlog formats with sidedata support does not support inline
3206 assert not self._inline
3258 assert not self._inline
3207 if not helpers[1] and not helpers[2]:
3259 if not helpers[1] and not helpers[2]:
3208 # Nothing to generate or remove
3260 # Nothing to generate or remove
3209 return
3261 return
3210
3262
3211 new_entries = []
3263 new_entries = []
3212 # append the new sidedata
3264 # append the new sidedata
3213 with self._writing(transaction):
3265 with self._writing(transaction):
3214 ifh, dfh = self._writinghandles
3266 ifh, dfh = self._writinghandles
3215 if self._docket is not None:
3267 if self._docket is not None:
3216 dfh.seek(self._docket.data_end, os.SEEK_SET)
3268 dfh.seek(self._docket.data_end, os.SEEK_SET)
3217 else:
3269 else:
3218 dfh.seek(0, os.SEEK_END)
3270 dfh.seek(0, os.SEEK_END)
3219
3271
3220 current_offset = dfh.tell()
3272 current_offset = dfh.tell()
3221 for rev in range(startrev, endrev + 1):
3273 for rev in range(startrev, endrev + 1):
3222 entry = self.index[rev]
3274 entry = self.index[rev]
3223 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3275 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3224 store=self,
3276 store=self,
3225 sidedata_helpers=helpers,
3277 sidedata_helpers=helpers,
3226 sidedata={},
3278 sidedata={},
3227 rev=rev,
3279 rev=rev,
3228 )
3280 )
3229
3281
3230 serialized_sidedata = sidedatautil.serialize_sidedata(
3282 serialized_sidedata = sidedatautil.serialize_sidedata(
3231 new_sidedata
3283 new_sidedata
3232 )
3284 )
3233 if entry[8] != 0 or entry[9] != 0:
3285 if entry[8] != 0 or entry[9] != 0:
3234 # rewriting entries that already have sidedata is not
3286 # rewriting entries that already have sidedata is not
3235 # supported yet, because it introduces garbage data in the
3287 # supported yet, because it introduces garbage data in the
3236 # revlog.
3288 # revlog.
3237 msg = b"rewriting existing sidedata is not supported yet"
3289 msg = b"rewriting existing sidedata is not supported yet"
3238 raise error.Abort(msg)
3290 raise error.Abort(msg)
3239
3291
3240 # Apply (potential) flags to add and to remove after running
3292 # Apply (potential) flags to add and to remove after running
3241 # the sidedata helpers
3293 # the sidedata helpers
3242 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3294 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3243 entry_update = (
3295 entry_update = (
3244 current_offset,
3296 current_offset,
3245 len(serialized_sidedata),
3297 len(serialized_sidedata),
3246 new_offset_flags,
3298 new_offset_flags,
3247 )
3299 )
3248
3300
3249 # the sidedata computation might have move the file cursors around
3301 # the sidedata computation might have move the file cursors around
3250 dfh.seek(current_offset, os.SEEK_SET)
3302 dfh.seek(current_offset, os.SEEK_SET)
3251 dfh.write(serialized_sidedata)
3303 dfh.write(serialized_sidedata)
3252 new_entries.append(entry_update)
3304 new_entries.append(entry_update)
3253 current_offset += len(serialized_sidedata)
3305 current_offset += len(serialized_sidedata)
3254 if self._docket is not None:
3306 if self._docket is not None:
3255 self._docket.data_end = dfh.tell()
3307 self._docket.data_end = dfh.tell()
3256
3308
3257 # rewrite the new index entries
3309 # rewrite the new index entries
3258 ifh.seek(startrev * self.index.entry_size)
3310 ifh.seek(startrev * self.index.entry_size)
3259 for i, e in enumerate(new_entries):
3311 for i, e in enumerate(new_entries):
3260 rev = startrev + i
3312 rev = startrev + i
3261 self.index.replace_sidedata_info(rev, *e)
3313 self.index.replace_sidedata_info(rev, *e)
3262 packed = self.index.entry_binary(rev)
3314 packed = self.index.entry_binary(rev)
3263 if rev == 0 and self._docket is None:
3315 if rev == 0 and self._docket is None:
3264 header = self._format_flags | self._format_version
3316 header = self._format_flags | self._format_version
3265 header = self.index.pack_header(header)
3317 header = self.index.pack_header(header)
3266 packed = header + packed
3318 packed = header + packed
3267 ifh.write(packed)
3319 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now