##// END OF EJS Templates
revlog: move entry documentation alongside new related constants...
marmoute -
r48185:7a0ec25d default
parent child Browse files
Show More
@@ -1,3463 +1,3405 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 templatefilters,
75 templatefilters,
76 util,
76 util,
77 )
77 )
78 from .interfaces import (
78 from .interfaces import (
79 repository,
79 repository,
80 util as interfaceutil,
80 util as interfaceutil,
81 )
81 )
82 from .revlogutils import (
82 from .revlogutils import (
83 censor,
83 censor,
84 deltas as deltautil,
84 deltas as deltautil,
85 docket as docketutil,
85 docket as docketutil,
86 flagutil,
86 flagutil,
87 nodemap as nodemaputil,
87 nodemap as nodemaputil,
88 revlogv0,
88 revlogv0,
89 sidedata as sidedatautil,
89 sidedata as sidedatautil,
90 )
90 )
91 from .utils import (
91 from .utils import (
92 storageutil,
92 storageutil,
93 stringutil,
93 stringutil,
94 )
94 )
95
95
96 # blanked usage of all the name to prevent pyflakes constraints
96 # blanked usage of all the name to prevent pyflakes constraints
97 # We need these name available in the module for extensions.
97 # We need these name available in the module for extensions.
98
98
99 REVLOGV0
99 REVLOGV0
100 REVLOGV1
100 REVLOGV1
101 REVLOGV2
101 REVLOGV2
102 FLAG_INLINE_DATA
102 FLAG_INLINE_DATA
103 FLAG_GENERALDELTA
103 FLAG_GENERALDELTA
104 REVLOG_DEFAULT_FLAGS
104 REVLOG_DEFAULT_FLAGS
105 REVLOG_DEFAULT_FORMAT
105 REVLOG_DEFAULT_FORMAT
106 REVLOG_DEFAULT_VERSION
106 REVLOG_DEFAULT_VERSION
107 REVLOGV1_FLAGS
107 REVLOGV1_FLAGS
108 REVLOGV2_FLAGS
108 REVLOGV2_FLAGS
109 REVIDX_ISCENSORED
109 REVIDX_ISCENSORED
110 REVIDX_ELLIPSIS
110 REVIDX_ELLIPSIS
111 REVIDX_HASCOPIESINFO
111 REVIDX_HASCOPIESINFO
112 REVIDX_EXTSTORED
112 REVIDX_EXTSTORED
113 REVIDX_DEFAULT_FLAGS
113 REVIDX_DEFAULT_FLAGS
114 REVIDX_FLAGS_ORDER
114 REVIDX_FLAGS_ORDER
115 REVIDX_RAWTEXT_CHANGING_FLAGS
115 REVIDX_RAWTEXT_CHANGING_FLAGS
116
116
117 parsers = policy.importmod('parsers')
117 parsers = policy.importmod('parsers')
118 rustancestor = policy.importrust('ancestor')
118 rustancestor = policy.importrust('ancestor')
119 rustdagop = policy.importrust('dagop')
119 rustdagop = policy.importrust('dagop')
120 rustrevlog = policy.importrust('revlog')
120 rustrevlog = policy.importrust('revlog')
121
121
122 # Aliased for performance.
122 # Aliased for performance.
123 _zlibdecompress = zlib.decompress
123 _zlibdecompress = zlib.decompress
124
124
125 # max size of revlog with inline data
125 # max size of revlog with inline data
126 _maxinline = 131072
126 _maxinline = 131072
127 _chunksize = 1048576
127 _chunksize = 1048576
128
128
129 # Flag processors for REVIDX_ELLIPSIS.
129 # Flag processors for REVIDX_ELLIPSIS.
130 def ellipsisreadprocessor(rl, text):
130 def ellipsisreadprocessor(rl, text):
131 return text, False
131 return text, False
132
132
133
133
134 def ellipsiswriteprocessor(rl, text):
134 def ellipsiswriteprocessor(rl, text):
135 return text, False
135 return text, False
136
136
137
137
138 def ellipsisrawprocessor(rl, text):
138 def ellipsisrawprocessor(rl, text):
139 return False
139 return False
140
140
141
141
142 ellipsisprocessor = (
142 ellipsisprocessor = (
143 ellipsisreadprocessor,
143 ellipsisreadprocessor,
144 ellipsiswriteprocessor,
144 ellipsiswriteprocessor,
145 ellipsisrawprocessor,
145 ellipsisrawprocessor,
146 )
146 )
147
147
148
148
149 def offset_type(offset, type):
149 def offset_type(offset, type):
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 raise ValueError(b'unknown revlog index flags')
151 raise ValueError(b'unknown revlog index flags')
152 return int(int(offset) << 16 | type)
152 return int(int(offset) << 16 | type)
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @attr.s(slots=True, frozen=True)
175 @attr.s(slots=True, frozen=True)
176 class _revisioninfo(object):
176 class _revisioninfo(object):
177 """Information about a revision that allows building its fulltext
177 """Information about a revision that allows building its fulltext
178 node: expected hash of the revision
178 node: expected hash of the revision
179 p1, p2: parent revs of the revision
179 p1, p2: parent revs of the revision
180 btext: built text cache consisting of a one-element list
180 btext: built text cache consisting of a one-element list
181 cachedelta: (baserev, uncompressed_delta) or None
181 cachedelta: (baserev, uncompressed_delta) or None
182 flags: flags associated to the revision storage
182 flags: flags associated to the revision storage
183
183
184 One of btext[0] or cachedelta must be set.
184 One of btext[0] or cachedelta must be set.
185 """
185 """
186
186
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195
195
196 @interfaceutil.implementer(repository.irevisiondelta)
196 @interfaceutil.implementer(repository.irevisiondelta)
197 @attr.s(slots=True)
197 @attr.s(slots=True)
198 class revlogrevisiondelta(object):
198 class revlogrevisiondelta(object):
199 node = attr.ib()
199 node = attr.ib()
200 p1node = attr.ib()
200 p1node = attr.ib()
201 p2node = attr.ib()
201 p2node = attr.ib()
202 basenode = attr.ib()
202 basenode = attr.ib()
203 flags = attr.ib()
203 flags = attr.ib()
204 baserevisionsize = attr.ib()
204 baserevisionsize = attr.ib()
205 revision = attr.ib()
205 revision = attr.ib()
206 delta = attr.ib()
206 delta = attr.ib()
207 sidedata = attr.ib()
207 sidedata = attr.ib()
208 protocol_flags = attr.ib()
208 protocol_flags = attr.ib()
209 linknode = attr.ib(default=None)
209 linknode = attr.ib(default=None)
210
210
211
211
212 @interfaceutil.implementer(repository.iverifyproblem)
212 @interfaceutil.implementer(repository.iverifyproblem)
213 @attr.s(frozen=True)
213 @attr.s(frozen=True)
214 class revlogproblem(object):
214 class revlogproblem(object):
215 warning = attr.ib(default=None)
215 warning = attr.ib(default=None)
216 error = attr.ib(default=None)
216 error = attr.ib(default=None)
217 node = attr.ib(default=None)
217 node = attr.ib(default=None)
218
218
219
219
220 def parse_index_v1(data, inline):
220 def parse_index_v1(data, inline):
221 # call the C implementation to parse the index data
221 # call the C implementation to parse the index data
222 index, cache = parsers.parse_index2(data, inline)
222 index, cache = parsers.parse_index2(data, inline)
223 return index, cache
223 return index, cache
224
224
225
225
226 def parse_index_v2(data, inline):
226 def parse_index_v2(data, inline):
227 # call the C implementation to parse the index data
227 # call the C implementation to parse the index data
228 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
228 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
229 return index, cache
229 return index, cache
230
230
231
231
232 def parse_index_cl_v2(data, inline):
232 def parse_index_cl_v2(data, inline):
233 # call the C implementation to parse the index data
233 # call the C implementation to parse the index data
234 assert not inline
234 assert not inline
235 from .pure.parsers import parse_index_cl_v2
235 from .pure.parsers import parse_index_cl_v2
236
236
237 index, cache = parse_index_cl_v2(data)
237 index, cache = parse_index_cl_v2(data)
238 return index, cache
238 return index, cache
239
239
240
240
241 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
241 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
242
242
243 def parse_index_v1_nodemap(data, inline):
243 def parse_index_v1_nodemap(data, inline):
244 index, cache = parsers.parse_index_devel_nodemap(data, inline)
244 index, cache = parsers.parse_index_devel_nodemap(data, inline)
245 return index, cache
245 return index, cache
246
246
247
247
248 else:
248 else:
249 parse_index_v1_nodemap = None
249 parse_index_v1_nodemap = None
250
250
251
251
252 def parse_index_v1_mixed(data, inline):
252 def parse_index_v1_mixed(data, inline):
253 index, cache = parse_index_v1(data, inline)
253 index, cache = parse_index_v1(data, inline)
254 return rustrevlog.MixedIndex(index), cache
254 return rustrevlog.MixedIndex(index), cache
255
255
256
256
257 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
257 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
258 # signed integer)
258 # signed integer)
259 _maxentrysize = 0x7FFFFFFF
259 _maxentrysize = 0x7FFFFFFF
260
260
261 PARTIAL_READ_MSG = _(
261 PARTIAL_READ_MSG = _(
262 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
262 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
263 )
263 )
264
264
265 FILE_TOO_SHORT_MSG = _(
265 FILE_TOO_SHORT_MSG = _(
266 b'cannot read from revlog %s;'
266 b'cannot read from revlog %s;'
267 b' expected %d bytes from offset %d, data size is %d'
267 b' expected %d bytes from offset %d, data size is %d'
268 )
268 )
269
269
270
270
271 class revlog(object):
271 class revlog(object):
272 """
272 """
273 the underlying revision storage object
273 the underlying revision storage object
274
274
275 A revlog consists of two parts, an index and the revision data.
275 A revlog consists of two parts, an index and the revision data.
276
276
277 The index is a file with a fixed record size containing
277 The index is a file with a fixed record size containing
278 information on each revision, including its nodeid (hash), the
278 information on each revision, including its nodeid (hash), the
279 nodeids of its parents, the position and offset of its data within
279 nodeids of its parents, the position and offset of its data within
280 the data file, and the revision it's based on. Finally, each entry
280 the data file, and the revision it's based on. Finally, each entry
281 contains a linkrev entry that can serve as a pointer to external
281 contains a linkrev entry that can serve as a pointer to external
282 data.
282 data.
283
283
284 The revision data itself is a linear collection of data chunks.
284 The revision data itself is a linear collection of data chunks.
285 Each chunk represents a revision and is usually represented as a
285 Each chunk represents a revision and is usually represented as a
286 delta against the previous chunk. To bound lookup time, runs of
286 delta against the previous chunk. To bound lookup time, runs of
287 deltas are limited to about 2 times the length of the original
287 deltas are limited to about 2 times the length of the original
288 version data. This makes retrieval of a version proportional to
288 version data. This makes retrieval of a version proportional to
289 its size, or O(1) relative to the number of revisions.
289 its size, or O(1) relative to the number of revisions.
290
290
291 Both pieces of the revlog are written to in an append-only
291 Both pieces of the revlog are written to in an append-only
292 fashion, which means we never need to rewrite a file to insert or
292 fashion, which means we never need to rewrite a file to insert or
293 remove data, and can use some simple techniques to avoid the need
293 remove data, and can use some simple techniques to avoid the need
294 for locking while reading.
294 for locking while reading.
295
295
296 If checkambig, indexfile is opened with checkambig=True at
296 If checkambig, indexfile is opened with checkambig=True at
297 writing, to avoid file stat ambiguity.
297 writing, to avoid file stat ambiguity.
298
298
299 If mmaplargeindex is True, and an mmapindexthreshold is set, the
299 If mmaplargeindex is True, and an mmapindexthreshold is set, the
300 index will be mmapped rather than read if it is larger than the
300 index will be mmapped rather than read if it is larger than the
301 configured threshold.
301 configured threshold.
302
302
303 If censorable is True, the revlog can have censored revisions.
303 If censorable is True, the revlog can have censored revisions.
304
304
305 If `upperboundcomp` is not None, this is the expected maximal gain from
305 If `upperboundcomp` is not None, this is the expected maximal gain from
306 compression for the data content.
306 compression for the data content.
307
307
308 `concurrencychecker` is an optional function that receives 3 arguments: a
308 `concurrencychecker` is an optional function that receives 3 arguments: a
309 file handle, a filename, and an expected position. It should check whether
309 file handle, a filename, and an expected position. It should check whether
310 the current position in the file handle is valid, and log/warn/fail (by
310 the current position in the file handle is valid, and log/warn/fail (by
311 raising).
311 raising).
312
312
313
313 See mercurial/revlogutils/contants.py for details about the content of an
314 Internal details
314 index entry.
315 ----------------
316
317 A large part of the revlog logic deals with revisions' "index entries", tuple
318 objects that contains the same "items" whatever the revlog version.
319 Different versions will have different ways of storing these items (sometimes
320 not having them at all), but the tuple will always be the same. New fields
321 are usually added at the end to avoid breaking existing code that relies
322 on the existing order. The field are defined as follows:
323
324 [0] offset:
325 The byte index of the start of revision data chunk.
326 That value is shifted up by 16 bits. use "offset = field >> 16" to
327 retrieve it.
328
329 flags:
330 A flag field that carries special information or changes the behavior
331 of the revision. (see `REVIDX_*` constants for details)
332 The flag field only occupies the first 16 bits of this field,
333 use "flags = field & 0xFFFF" to retrieve the value.
334
335 [1] compressed length:
336 The size, in bytes, of the chunk on disk
337
338 [2] uncompressed length:
339 The size, in bytes, of the full revision once reconstructed.
340
341 [3] base rev:
342 Either the base of the revision delta chain (without general
343 delta), or the base of the delta (stored in the data chunk)
344 with general delta.
345
346 [4] link rev:
347 Changelog revision number of the changeset introducing this
348 revision.
349
350 [5] parent 1 rev:
351 Revision number of the first parent
352
353 [6] parent 2 rev:
354 Revision number of the second parent
355
356 [7] node id:
357 The node id of the current revision
358
359 [8] sidedata offset:
360 The byte index of the start of the revision's side-data chunk.
361
362 [9] sidedata chunk length:
363 The size, in bytes, of the revision's side-data chunk.
364
365 [10] data compression mode:
366 two bits that detail the way the data chunk is compressed on disk.
367 (see "COMP_MODE_*" constants for details). For revlog version 0 and
368 1 this will always be COMP_MODE_INLINE.
369
370 [11] side-data compression mode:
371 two bits that detail the way the sidedata chunk is compressed on disk.
372 (see "COMP_MODE_*" constants for details)
373 """
315 """
374
316
375 _flagserrorclass = error.RevlogError
317 _flagserrorclass = error.RevlogError
376
318
377 def __init__(
319 def __init__(
378 self,
320 self,
379 opener,
321 opener,
380 target,
322 target,
381 radix,
323 radix,
382 postfix=None, # only exist for `tmpcensored` now
324 postfix=None, # only exist for `tmpcensored` now
383 checkambig=False,
325 checkambig=False,
384 mmaplargeindex=False,
326 mmaplargeindex=False,
385 censorable=False,
327 censorable=False,
386 upperboundcomp=None,
328 upperboundcomp=None,
387 persistentnodemap=False,
329 persistentnodemap=False,
388 concurrencychecker=None,
330 concurrencychecker=None,
389 trypending=False,
331 trypending=False,
390 ):
332 ):
391 """
333 """
392 create a revlog object
334 create a revlog object
393
335
394 opener is a function that abstracts the file opening operation
336 opener is a function that abstracts the file opening operation
395 and can be used to implement COW semantics or the like.
337 and can be used to implement COW semantics or the like.
396
338
397 `target`: a (KIND, ID) tuple that identify the content stored in
339 `target`: a (KIND, ID) tuple that identify the content stored in
398 this revlog. It help the rest of the code to understand what the revlog
340 this revlog. It help the rest of the code to understand what the revlog
399 is about without having to resort to heuristic and index filename
341 is about without having to resort to heuristic and index filename
400 analysis. Note: that this must be reliably be set by normal code, but
342 analysis. Note: that this must be reliably be set by normal code, but
401 that test, debug, or performance measurement code might not set this to
343 that test, debug, or performance measurement code might not set this to
402 accurate value.
344 accurate value.
403 """
345 """
404 self.upperboundcomp = upperboundcomp
346 self.upperboundcomp = upperboundcomp
405
347
406 self.radix = radix
348 self.radix = radix
407
349
408 self._docket_file = None
350 self._docket_file = None
409 self._indexfile = None
351 self._indexfile = None
410 self._datafile = None
352 self._datafile = None
411 self._sidedatafile = None
353 self._sidedatafile = None
412 self._nodemap_file = None
354 self._nodemap_file = None
413 self.postfix = postfix
355 self.postfix = postfix
414 self._trypending = trypending
356 self._trypending = trypending
415 self.opener = opener
357 self.opener = opener
416 if persistentnodemap:
358 if persistentnodemap:
417 self._nodemap_file = nodemaputil.get_nodemap_file(self)
359 self._nodemap_file = nodemaputil.get_nodemap_file(self)
418
360
419 assert target[0] in ALL_KINDS
361 assert target[0] in ALL_KINDS
420 assert len(target) == 2
362 assert len(target) == 2
421 self.target = target
363 self.target = target
422 # When True, indexfile is opened with checkambig=True at writing, to
364 # When True, indexfile is opened with checkambig=True at writing, to
423 # avoid file stat ambiguity.
365 # avoid file stat ambiguity.
424 self._checkambig = checkambig
366 self._checkambig = checkambig
425 self._mmaplargeindex = mmaplargeindex
367 self._mmaplargeindex = mmaplargeindex
426 self._censorable = censorable
368 self._censorable = censorable
427 # 3-tuple of (node, rev, text) for a raw revision.
369 # 3-tuple of (node, rev, text) for a raw revision.
428 self._revisioncache = None
370 self._revisioncache = None
429 # Maps rev to chain base rev.
371 # Maps rev to chain base rev.
430 self._chainbasecache = util.lrucachedict(100)
372 self._chainbasecache = util.lrucachedict(100)
431 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
373 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
432 self._chunkcache = (0, b'')
374 self._chunkcache = (0, b'')
433 # How much data to read and cache into the raw revlog data cache.
375 # How much data to read and cache into the raw revlog data cache.
434 self._chunkcachesize = 65536
376 self._chunkcachesize = 65536
435 self._maxchainlen = None
377 self._maxchainlen = None
436 self._deltabothparents = True
378 self._deltabothparents = True
437 self.index = None
379 self.index = None
438 self._docket = None
380 self._docket = None
439 self._nodemap_docket = None
381 self._nodemap_docket = None
440 # Mapping of partial identifiers to full nodes.
382 # Mapping of partial identifiers to full nodes.
441 self._pcache = {}
383 self._pcache = {}
442 # Mapping of revision integer to full node.
384 # Mapping of revision integer to full node.
443 self._compengine = b'zlib'
385 self._compengine = b'zlib'
444 self._compengineopts = {}
386 self._compengineopts = {}
445 self._maxdeltachainspan = -1
387 self._maxdeltachainspan = -1
446 self._withsparseread = False
388 self._withsparseread = False
447 self._sparserevlog = False
389 self._sparserevlog = False
448 self.hassidedata = False
390 self.hassidedata = False
449 self._srdensitythreshold = 0.50
391 self._srdensitythreshold = 0.50
450 self._srmingapsize = 262144
392 self._srmingapsize = 262144
451
393
452 # Make copy of flag processors so each revlog instance can support
394 # Make copy of flag processors so each revlog instance can support
453 # custom flags.
395 # custom flags.
454 self._flagprocessors = dict(flagutil.flagprocessors)
396 self._flagprocessors = dict(flagutil.flagprocessors)
455
397
456 # 3-tuple of file handles being used for active writing.
398 # 3-tuple of file handles being used for active writing.
457 self._writinghandles = None
399 self._writinghandles = None
458 # prevent nesting of addgroup
400 # prevent nesting of addgroup
459 self._adding_group = None
401 self._adding_group = None
460
402
461 self._loadindex()
403 self._loadindex()
462
404
463 self._concurrencychecker = concurrencychecker
405 self._concurrencychecker = concurrencychecker
464
406
465 def _init_opts(self):
407 def _init_opts(self):
466 """process options (from above/config) to setup associated default revlog mode
408 """process options (from above/config) to setup associated default revlog mode
467
409
468 These values might be affected when actually reading on disk information.
410 These values might be affected when actually reading on disk information.
469
411
470 The relevant values are returned for use in _loadindex().
412 The relevant values are returned for use in _loadindex().
471
413
472 * newversionflags:
414 * newversionflags:
473 version header to use if we need to create a new revlog
415 version header to use if we need to create a new revlog
474
416
475 * mmapindexthreshold:
417 * mmapindexthreshold:
476 minimal index size for start to use mmap
418 minimal index size for start to use mmap
477
419
478 * force_nodemap:
420 * force_nodemap:
479 force the usage of a "development" version of the nodemap code
421 force the usage of a "development" version of the nodemap code
480 """
422 """
481 mmapindexthreshold = None
423 mmapindexthreshold = None
482 opts = self.opener.options
424 opts = self.opener.options
483
425
484 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
426 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
485 new_header = CHANGELOGV2
427 new_header = CHANGELOGV2
486 elif b'revlogv2' in opts:
428 elif b'revlogv2' in opts:
487 new_header = REVLOGV2
429 new_header = REVLOGV2
488 elif b'revlogv1' in opts:
430 elif b'revlogv1' in opts:
489 new_header = REVLOGV1 | FLAG_INLINE_DATA
431 new_header = REVLOGV1 | FLAG_INLINE_DATA
490 if b'generaldelta' in opts:
432 if b'generaldelta' in opts:
491 new_header |= FLAG_GENERALDELTA
433 new_header |= FLAG_GENERALDELTA
492 elif b'revlogv0' in self.opener.options:
434 elif b'revlogv0' in self.opener.options:
493 new_header = REVLOGV0
435 new_header = REVLOGV0
494 else:
436 else:
495 new_header = REVLOG_DEFAULT_VERSION
437 new_header = REVLOG_DEFAULT_VERSION
496
438
497 if b'chunkcachesize' in opts:
439 if b'chunkcachesize' in opts:
498 self._chunkcachesize = opts[b'chunkcachesize']
440 self._chunkcachesize = opts[b'chunkcachesize']
499 if b'maxchainlen' in opts:
441 if b'maxchainlen' in opts:
500 self._maxchainlen = opts[b'maxchainlen']
442 self._maxchainlen = opts[b'maxchainlen']
501 if b'deltabothparents' in opts:
443 if b'deltabothparents' in opts:
502 self._deltabothparents = opts[b'deltabothparents']
444 self._deltabothparents = opts[b'deltabothparents']
503 self._lazydelta = bool(opts.get(b'lazydelta', True))
445 self._lazydelta = bool(opts.get(b'lazydelta', True))
504 self._lazydeltabase = False
446 self._lazydeltabase = False
505 if self._lazydelta:
447 if self._lazydelta:
506 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
448 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
507 if b'compengine' in opts:
449 if b'compengine' in opts:
508 self._compengine = opts[b'compengine']
450 self._compengine = opts[b'compengine']
509 if b'zlib.level' in opts:
451 if b'zlib.level' in opts:
510 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
452 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
511 if b'zstd.level' in opts:
453 if b'zstd.level' in opts:
512 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
454 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
513 if b'maxdeltachainspan' in opts:
455 if b'maxdeltachainspan' in opts:
514 self._maxdeltachainspan = opts[b'maxdeltachainspan']
456 self._maxdeltachainspan = opts[b'maxdeltachainspan']
515 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
457 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
516 mmapindexthreshold = opts[b'mmapindexthreshold']
458 mmapindexthreshold = opts[b'mmapindexthreshold']
517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
459 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
518 withsparseread = bool(opts.get(b'with-sparse-read', False))
460 withsparseread = bool(opts.get(b'with-sparse-read', False))
519 # sparse-revlog forces sparse-read
461 # sparse-revlog forces sparse-read
520 self._withsparseread = self._sparserevlog or withsparseread
462 self._withsparseread = self._sparserevlog or withsparseread
521 if b'sparse-read-density-threshold' in opts:
463 if b'sparse-read-density-threshold' in opts:
522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
464 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
523 if b'sparse-read-min-gap-size' in opts:
465 if b'sparse-read-min-gap-size' in opts:
524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
466 self._srmingapsize = opts[b'sparse-read-min-gap-size']
525 if opts.get(b'enableellipsis'):
467 if opts.get(b'enableellipsis'):
526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
468 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
527
469
528 # revlog v0 doesn't have flag processors
470 # revlog v0 doesn't have flag processors
529 for flag, processor in pycompat.iteritems(
471 for flag, processor in pycompat.iteritems(
530 opts.get(b'flagprocessors', {})
472 opts.get(b'flagprocessors', {})
531 ):
473 ):
532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
474 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
533
475
534 if self._chunkcachesize <= 0:
476 if self._chunkcachesize <= 0:
535 raise error.RevlogError(
477 raise error.RevlogError(
536 _(b'revlog chunk cache size %r is not greater than 0')
478 _(b'revlog chunk cache size %r is not greater than 0')
537 % self._chunkcachesize
479 % self._chunkcachesize
538 )
480 )
539 elif self._chunkcachesize & (self._chunkcachesize - 1):
481 elif self._chunkcachesize & (self._chunkcachesize - 1):
540 raise error.RevlogError(
482 raise error.RevlogError(
541 _(b'revlog chunk cache size %r is not a power of 2')
483 _(b'revlog chunk cache size %r is not a power of 2')
542 % self._chunkcachesize
484 % self._chunkcachesize
543 )
485 )
544 force_nodemap = opts.get(b'devel-force-nodemap', False)
486 force_nodemap = opts.get(b'devel-force-nodemap', False)
545 return new_header, mmapindexthreshold, force_nodemap
487 return new_header, mmapindexthreshold, force_nodemap
546
488
547 def _get_data(self, filepath, mmap_threshold, size=None):
489 def _get_data(self, filepath, mmap_threshold, size=None):
548 """return a file content with or without mmap
490 """return a file content with or without mmap
549
491
550 If the file is missing return the empty string"""
492 If the file is missing return the empty string"""
551 try:
493 try:
552 with self.opener(filepath) as fp:
494 with self.opener(filepath) as fp:
553 if mmap_threshold is not None:
495 if mmap_threshold is not None:
554 file_size = self.opener.fstat(fp).st_size
496 file_size = self.opener.fstat(fp).st_size
555 if file_size >= mmap_threshold:
497 if file_size >= mmap_threshold:
556 if size is not None:
498 if size is not None:
557 # avoid potentiel mmap crash
499 # avoid potentiel mmap crash
558 size = min(file_size, size)
500 size = min(file_size, size)
559 # TODO: should .close() to release resources without
501 # TODO: should .close() to release resources without
560 # relying on Python GC
502 # relying on Python GC
561 if size is None:
503 if size is None:
562 return util.buffer(util.mmapread(fp))
504 return util.buffer(util.mmapread(fp))
563 else:
505 else:
564 return util.buffer(util.mmapread(fp, size))
506 return util.buffer(util.mmapread(fp, size))
565 if size is None:
507 if size is None:
566 return fp.read()
508 return fp.read()
567 else:
509 else:
568 return fp.read(size)
510 return fp.read(size)
569 except IOError as inst:
511 except IOError as inst:
570 if inst.errno != errno.ENOENT:
512 if inst.errno != errno.ENOENT:
571 raise
513 raise
572 return b''
514 return b''
573
515
574 def _loadindex(self):
516 def _loadindex(self):
575
517
576 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
518 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
577
519
578 if self.postfix is not None:
520 if self.postfix is not None:
579 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
521 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
580 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
522 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
581 entry_point = b'%s.i.a' % self.radix
523 entry_point = b'%s.i.a' % self.radix
582 else:
524 else:
583 entry_point = b'%s.i' % self.radix
525 entry_point = b'%s.i' % self.radix
584
526
585 entry_data = b''
527 entry_data = b''
586 self._initempty = True
528 self._initempty = True
587 entry_data = self._get_data(entry_point, mmapindexthreshold)
529 entry_data = self._get_data(entry_point, mmapindexthreshold)
588 if len(entry_data) > 0:
530 if len(entry_data) > 0:
589 header = INDEX_HEADER.unpack(entry_data[:4])[0]
531 header = INDEX_HEADER.unpack(entry_data[:4])[0]
590 self._initempty = False
532 self._initempty = False
591 else:
533 else:
592 header = new_header
534 header = new_header
593
535
594 self._format_flags = header & ~0xFFFF
536 self._format_flags = header & ~0xFFFF
595 self._format_version = header & 0xFFFF
537 self._format_version = header & 0xFFFF
596
538
597 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
539 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
598 if supported_flags is None:
540 if supported_flags is None:
599 msg = _(b'unknown version (%d) in revlog %s')
541 msg = _(b'unknown version (%d) in revlog %s')
600 msg %= (self._format_version, self.display_id)
542 msg %= (self._format_version, self.display_id)
601 raise error.RevlogError(msg)
543 raise error.RevlogError(msg)
602 elif self._format_flags & ~supported_flags:
544 elif self._format_flags & ~supported_flags:
603 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
545 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
604 display_flag = self._format_flags >> 16
546 display_flag = self._format_flags >> 16
605 msg %= (display_flag, self._format_version, self.display_id)
547 msg %= (display_flag, self._format_version, self.display_id)
606 raise error.RevlogError(msg)
548 raise error.RevlogError(msg)
607
549
608 features = FEATURES_BY_VERSION[self._format_version]
550 features = FEATURES_BY_VERSION[self._format_version]
609 self._inline = features[b'inline'](self._format_flags)
551 self._inline = features[b'inline'](self._format_flags)
610 self._generaldelta = features[b'generaldelta'](self._format_flags)
552 self._generaldelta = features[b'generaldelta'](self._format_flags)
611 self.hassidedata = features[b'sidedata']
553 self.hassidedata = features[b'sidedata']
612
554
613 if not features[b'docket']:
555 if not features[b'docket']:
614 self._indexfile = entry_point
556 self._indexfile = entry_point
615 index_data = entry_data
557 index_data = entry_data
616 else:
558 else:
617 self._docket_file = entry_point
559 self._docket_file = entry_point
618 if self._initempty:
560 if self._initempty:
619 self._docket = docketutil.default_docket(self, header)
561 self._docket = docketutil.default_docket(self, header)
620 else:
562 else:
621 self._docket = docketutil.parse_docket(
563 self._docket = docketutil.parse_docket(
622 self, entry_data, use_pending=self._trypending
564 self, entry_data, use_pending=self._trypending
623 )
565 )
624 self._indexfile = self._docket.index_filepath()
566 self._indexfile = self._docket.index_filepath()
625 index_data = b''
567 index_data = b''
626 index_size = self._docket.index_end
568 index_size = self._docket.index_end
627 if index_size > 0:
569 if index_size > 0:
628 index_data = self._get_data(
570 index_data = self._get_data(
629 self._indexfile, mmapindexthreshold, size=index_size
571 self._indexfile, mmapindexthreshold, size=index_size
630 )
572 )
631 if len(index_data) < index_size:
573 if len(index_data) < index_size:
632 msg = _(b'too few index data for %s: got %d, expected %d')
574 msg = _(b'too few index data for %s: got %d, expected %d')
633 msg %= (self.display_id, len(index_data), index_size)
575 msg %= (self.display_id, len(index_data), index_size)
634 raise error.RevlogError(msg)
576 raise error.RevlogError(msg)
635
577
636 self._inline = False
578 self._inline = False
637 # generaldelta implied by version 2 revlogs.
579 # generaldelta implied by version 2 revlogs.
638 self._generaldelta = True
580 self._generaldelta = True
639 # the logic for persistent nodemap will be dealt with within the
581 # the logic for persistent nodemap will be dealt with within the
640 # main docket, so disable it for now.
582 # main docket, so disable it for now.
641 self._nodemap_file = None
583 self._nodemap_file = None
642
584
643 if self._docket is not None:
585 if self._docket is not None:
644 self._datafile = self._docket.data_filepath()
586 self._datafile = self._docket.data_filepath()
645 self._sidedatafile = self._docket.sidedata_filepath()
587 self._sidedatafile = self._docket.sidedata_filepath()
646 elif self.postfix is None:
588 elif self.postfix is None:
647 self._datafile = b'%s.d' % self.radix
589 self._datafile = b'%s.d' % self.radix
648 else:
590 else:
649 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
650
592
651 self.nodeconstants = sha1nodeconstants
593 self.nodeconstants = sha1nodeconstants
652 self.nullid = self.nodeconstants.nullid
594 self.nullid = self.nodeconstants.nullid
653
595
654 # sparse-revlog can't be on without general-delta (issue6056)
596 # sparse-revlog can't be on without general-delta (issue6056)
655 if not self._generaldelta:
597 if not self._generaldelta:
656 self._sparserevlog = False
598 self._sparserevlog = False
657
599
658 self._storedeltachains = True
600 self._storedeltachains = True
659
601
660 devel_nodemap = (
602 devel_nodemap = (
661 self._nodemap_file
603 self._nodemap_file
662 and force_nodemap
604 and force_nodemap
663 and parse_index_v1_nodemap is not None
605 and parse_index_v1_nodemap is not None
664 )
606 )
665
607
666 use_rust_index = False
608 use_rust_index = False
667 if rustrevlog is not None:
609 if rustrevlog is not None:
668 if self._nodemap_file is not None:
610 if self._nodemap_file is not None:
669 use_rust_index = True
611 use_rust_index = True
670 else:
612 else:
671 use_rust_index = self.opener.options.get(b'rust.index')
613 use_rust_index = self.opener.options.get(b'rust.index')
672
614
673 self._parse_index = parse_index_v1
615 self._parse_index = parse_index_v1
674 if self._format_version == REVLOGV0:
616 if self._format_version == REVLOGV0:
675 self._parse_index = revlogv0.parse_index_v0
617 self._parse_index = revlogv0.parse_index_v0
676 elif self._format_version == REVLOGV2:
618 elif self._format_version == REVLOGV2:
677 self._parse_index = parse_index_v2
619 self._parse_index = parse_index_v2
678 elif self._format_version == CHANGELOGV2:
620 elif self._format_version == CHANGELOGV2:
679 self._parse_index = parse_index_cl_v2
621 self._parse_index = parse_index_cl_v2
680 elif devel_nodemap:
622 elif devel_nodemap:
681 self._parse_index = parse_index_v1_nodemap
623 self._parse_index = parse_index_v1_nodemap
682 elif use_rust_index:
624 elif use_rust_index:
683 self._parse_index = parse_index_v1_mixed
625 self._parse_index = parse_index_v1_mixed
684 try:
626 try:
685 d = self._parse_index(index_data, self._inline)
627 d = self._parse_index(index_data, self._inline)
686 index, _chunkcache = d
628 index, _chunkcache = d
687 use_nodemap = (
629 use_nodemap = (
688 not self._inline
630 not self._inline
689 and self._nodemap_file is not None
631 and self._nodemap_file is not None
690 and util.safehasattr(index, 'update_nodemap_data')
632 and util.safehasattr(index, 'update_nodemap_data')
691 )
633 )
692 if use_nodemap:
634 if use_nodemap:
693 nodemap_data = nodemaputil.persisted_data(self)
635 nodemap_data = nodemaputil.persisted_data(self)
694 if nodemap_data is not None:
636 if nodemap_data is not None:
695 docket = nodemap_data[0]
637 docket = nodemap_data[0]
696 if (
638 if (
697 len(d[0]) > docket.tip_rev
639 len(d[0]) > docket.tip_rev
698 and d[0][docket.tip_rev][7] == docket.tip_node
640 and d[0][docket.tip_rev][7] == docket.tip_node
699 ):
641 ):
700 # no changelog tampering
642 # no changelog tampering
701 self._nodemap_docket = docket
643 self._nodemap_docket = docket
702 index.update_nodemap_data(*nodemap_data)
644 index.update_nodemap_data(*nodemap_data)
703 except (ValueError, IndexError):
645 except (ValueError, IndexError):
704 raise error.RevlogError(
646 raise error.RevlogError(
705 _(b"index %s is corrupted") % self.display_id
647 _(b"index %s is corrupted") % self.display_id
706 )
648 )
707 self.index, self._chunkcache = d
649 self.index, self._chunkcache = d
708 if not self._chunkcache:
650 if not self._chunkcache:
709 self._chunkclear()
651 self._chunkclear()
710 # revnum -> (chain-length, sum-delta-length)
652 # revnum -> (chain-length, sum-delta-length)
711 self._chaininfocache = util.lrucachedict(500)
653 self._chaininfocache = util.lrucachedict(500)
712 # revlog header -> revlog compressor
654 # revlog header -> revlog compressor
713 self._decompressors = {}
655 self._decompressors = {}
714
656
715 @util.propertycache
657 @util.propertycache
716 def revlog_kind(self):
658 def revlog_kind(self):
717 return self.target[0]
659 return self.target[0]
718
660
719 @util.propertycache
661 @util.propertycache
720 def display_id(self):
662 def display_id(self):
721 """The public facing "ID" of the revlog that we use in message"""
663 """The public facing "ID" of the revlog that we use in message"""
722 # Maybe we should build a user facing representation of
664 # Maybe we should build a user facing representation of
723 # revlog.target instead of using `self.radix`
665 # revlog.target instead of using `self.radix`
724 return self.radix
666 return self.radix
725
667
726 def _get_decompressor(self, t):
668 def _get_decompressor(self, t):
727 try:
669 try:
728 compressor = self._decompressors[t]
670 compressor = self._decompressors[t]
729 except KeyError:
671 except KeyError:
730 try:
672 try:
731 engine = util.compengines.forrevlogheader(t)
673 engine = util.compengines.forrevlogheader(t)
732 compressor = engine.revlogcompressor(self._compengineopts)
674 compressor = engine.revlogcompressor(self._compengineopts)
733 self._decompressors[t] = compressor
675 self._decompressors[t] = compressor
734 except KeyError:
676 except KeyError:
735 raise error.RevlogError(
677 raise error.RevlogError(
736 _(b'unknown compression type %s') % binascii.hexlify(t)
678 _(b'unknown compression type %s') % binascii.hexlify(t)
737 )
679 )
738 return compressor
680 return compressor
739
681
740 @util.propertycache
682 @util.propertycache
741 def _compressor(self):
683 def _compressor(self):
742 engine = util.compengines[self._compengine]
684 engine = util.compengines[self._compengine]
743 return engine.revlogcompressor(self._compengineopts)
685 return engine.revlogcompressor(self._compengineopts)
744
686
745 @util.propertycache
687 @util.propertycache
746 def _decompressor(self):
688 def _decompressor(self):
747 """the default decompressor"""
689 """the default decompressor"""
748 if self._docket is None:
690 if self._docket is None:
749 return None
691 return None
750 t = self._docket.default_compression_header
692 t = self._docket.default_compression_header
751 c = self._get_decompressor(t)
693 c = self._get_decompressor(t)
752 return c.decompress
694 return c.decompress
753
695
754 def _indexfp(self):
696 def _indexfp(self):
755 """file object for the revlog's index file"""
697 """file object for the revlog's index file"""
756 return self.opener(self._indexfile, mode=b"r")
698 return self.opener(self._indexfile, mode=b"r")
757
699
758 def __index_write_fp(self):
700 def __index_write_fp(self):
759 # You should not use this directly and use `_writing` instead
701 # You should not use this directly and use `_writing` instead
760 try:
702 try:
761 f = self.opener(
703 f = self.opener(
762 self._indexfile, mode=b"r+", checkambig=self._checkambig
704 self._indexfile, mode=b"r+", checkambig=self._checkambig
763 )
705 )
764 if self._docket is None:
706 if self._docket is None:
765 f.seek(0, os.SEEK_END)
707 f.seek(0, os.SEEK_END)
766 else:
708 else:
767 f.seek(self._docket.index_end, os.SEEK_SET)
709 f.seek(self._docket.index_end, os.SEEK_SET)
768 return f
710 return f
769 except IOError as inst:
711 except IOError as inst:
770 if inst.errno != errno.ENOENT:
712 if inst.errno != errno.ENOENT:
771 raise
713 raise
772 return self.opener(
714 return self.opener(
773 self._indexfile, mode=b"w+", checkambig=self._checkambig
715 self._indexfile, mode=b"w+", checkambig=self._checkambig
774 )
716 )
775
717
776 def __index_new_fp(self):
718 def __index_new_fp(self):
777 # You should not use this unless you are upgrading from inline revlog
719 # You should not use this unless you are upgrading from inline revlog
778 return self.opener(
720 return self.opener(
779 self._indexfile,
721 self._indexfile,
780 mode=b"w",
722 mode=b"w",
781 checkambig=self._checkambig,
723 checkambig=self._checkambig,
782 atomictemp=True,
724 atomictemp=True,
783 )
725 )
784
726
785 def _datafp(self, mode=b'r'):
727 def _datafp(self, mode=b'r'):
786 """file object for the revlog's data file"""
728 """file object for the revlog's data file"""
787 return self.opener(self._datafile, mode=mode)
729 return self.opener(self._datafile, mode=mode)
788
730
789 @contextlib.contextmanager
731 @contextlib.contextmanager
790 def _datareadfp(self, existingfp=None):
732 def _datareadfp(self, existingfp=None):
791 """file object suitable to read data"""
733 """file object suitable to read data"""
792 # Use explicit file handle, if given.
734 # Use explicit file handle, if given.
793 if existingfp is not None:
735 if existingfp is not None:
794 yield existingfp
736 yield existingfp
795
737
796 # Use a file handle being actively used for writes, if available.
738 # Use a file handle being actively used for writes, if available.
797 # There is some danger to doing this because reads will seek the
739 # There is some danger to doing this because reads will seek the
798 # file. However, _writeentry() performs a SEEK_END before all writes,
740 # file. However, _writeentry() performs a SEEK_END before all writes,
799 # so we should be safe.
741 # so we should be safe.
800 elif self._writinghandles:
742 elif self._writinghandles:
801 if self._inline:
743 if self._inline:
802 yield self._writinghandles[0]
744 yield self._writinghandles[0]
803 else:
745 else:
804 yield self._writinghandles[1]
746 yield self._writinghandles[1]
805
747
806 # Otherwise open a new file handle.
748 # Otherwise open a new file handle.
807 else:
749 else:
808 if self._inline:
750 if self._inline:
809 func = self._indexfp
751 func = self._indexfp
810 else:
752 else:
811 func = self._datafp
753 func = self._datafp
812 with func() as fp:
754 with func() as fp:
813 yield fp
755 yield fp
814
756
815 @contextlib.contextmanager
757 @contextlib.contextmanager
816 def _sidedatareadfp(self):
758 def _sidedatareadfp(self):
817 """file object suitable to read sidedata"""
759 """file object suitable to read sidedata"""
818 if self._writinghandles:
760 if self._writinghandles:
819 yield self._writinghandles[2]
761 yield self._writinghandles[2]
820 else:
762 else:
821 with self.opener(self._sidedatafile) as fp:
763 with self.opener(self._sidedatafile) as fp:
822 yield fp
764 yield fp
823
765
824 def tiprev(self):
766 def tiprev(self):
825 return len(self.index) - 1
767 return len(self.index) - 1
826
768
827 def tip(self):
769 def tip(self):
828 return self.node(self.tiprev())
770 return self.node(self.tiprev())
829
771
830 def __contains__(self, rev):
772 def __contains__(self, rev):
831 return 0 <= rev < len(self)
773 return 0 <= rev < len(self)
832
774
833 def __len__(self):
775 def __len__(self):
834 return len(self.index)
776 return len(self.index)
835
777
836 def __iter__(self):
778 def __iter__(self):
837 return iter(pycompat.xrange(len(self)))
779 return iter(pycompat.xrange(len(self)))
838
780
839 def revs(self, start=0, stop=None):
781 def revs(self, start=0, stop=None):
840 """iterate over all rev in this revlog (from start to stop)"""
782 """iterate over all rev in this revlog (from start to stop)"""
841 return storageutil.iterrevs(len(self), start=start, stop=stop)
783 return storageutil.iterrevs(len(self), start=start, stop=stop)
842
784
843 @property
785 @property
844 def nodemap(self):
786 def nodemap(self):
845 msg = (
787 msg = (
846 b"revlog.nodemap is deprecated, "
788 b"revlog.nodemap is deprecated, "
847 b"use revlog.index.[has_node|rev|get_rev]"
789 b"use revlog.index.[has_node|rev|get_rev]"
848 )
790 )
849 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
791 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
850 return self.index.nodemap
792 return self.index.nodemap
851
793
852 @property
794 @property
853 def _nodecache(self):
795 def _nodecache(self):
854 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
796 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
855 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
797 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
856 return self.index.nodemap
798 return self.index.nodemap
857
799
858 def hasnode(self, node):
800 def hasnode(self, node):
859 try:
801 try:
860 self.rev(node)
802 self.rev(node)
861 return True
803 return True
862 except KeyError:
804 except KeyError:
863 return False
805 return False
864
806
865 def candelta(self, baserev, rev):
807 def candelta(self, baserev, rev):
866 """whether two revisions (baserev, rev) can be delta-ed or not"""
808 """whether two revisions (baserev, rev) can be delta-ed or not"""
867 # Disable delta if either rev requires a content-changing flag
809 # Disable delta if either rev requires a content-changing flag
868 # processor (ex. LFS). This is because such flag processor can alter
810 # processor (ex. LFS). This is because such flag processor can alter
869 # the rawtext content that the delta will be based on, and two clients
811 # the rawtext content that the delta will be based on, and two clients
870 # could have a same revlog node with different flags (i.e. different
812 # could have a same revlog node with different flags (i.e. different
871 # rawtext contents) and the delta could be incompatible.
813 # rawtext contents) and the delta could be incompatible.
872 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
814 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
873 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
815 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
874 ):
816 ):
875 return False
817 return False
876 return True
818 return True
877
819
878 def update_caches(self, transaction):
820 def update_caches(self, transaction):
879 if self._nodemap_file is not None:
821 if self._nodemap_file is not None:
880 if transaction is None:
822 if transaction is None:
881 nodemaputil.update_persistent_nodemap(self)
823 nodemaputil.update_persistent_nodemap(self)
882 else:
824 else:
883 nodemaputil.setup_persistent_nodemap(transaction, self)
825 nodemaputil.setup_persistent_nodemap(transaction, self)
884
826
885 def clearcaches(self):
827 def clearcaches(self):
886 self._revisioncache = None
828 self._revisioncache = None
887 self._chainbasecache.clear()
829 self._chainbasecache.clear()
888 self._chunkcache = (0, b'')
830 self._chunkcache = (0, b'')
889 self._pcache = {}
831 self._pcache = {}
890 self._nodemap_docket = None
832 self._nodemap_docket = None
891 self.index.clearcaches()
833 self.index.clearcaches()
892 # The python code is the one responsible for validating the docket, we
834 # The python code is the one responsible for validating the docket, we
893 # end up having to refresh it here.
835 # end up having to refresh it here.
894 use_nodemap = (
836 use_nodemap = (
895 not self._inline
837 not self._inline
896 and self._nodemap_file is not None
838 and self._nodemap_file is not None
897 and util.safehasattr(self.index, 'update_nodemap_data')
839 and util.safehasattr(self.index, 'update_nodemap_data')
898 )
840 )
899 if use_nodemap:
841 if use_nodemap:
900 nodemap_data = nodemaputil.persisted_data(self)
842 nodemap_data = nodemaputil.persisted_data(self)
901 if nodemap_data is not None:
843 if nodemap_data is not None:
902 self._nodemap_docket = nodemap_data[0]
844 self._nodemap_docket = nodemap_data[0]
903 self.index.update_nodemap_data(*nodemap_data)
845 self.index.update_nodemap_data(*nodemap_data)
904
846
905 def rev(self, node):
847 def rev(self, node):
906 try:
848 try:
907 return self.index.rev(node)
849 return self.index.rev(node)
908 except TypeError:
850 except TypeError:
909 raise
851 raise
910 except error.RevlogError:
852 except error.RevlogError:
911 # parsers.c radix tree lookup failed
853 # parsers.c radix tree lookup failed
912 if (
854 if (
913 node == self.nodeconstants.wdirid
855 node == self.nodeconstants.wdirid
914 or node in self.nodeconstants.wdirfilenodeids
856 or node in self.nodeconstants.wdirfilenodeids
915 ):
857 ):
916 raise error.WdirUnsupported
858 raise error.WdirUnsupported
917 raise error.LookupError(node, self.display_id, _(b'no node'))
859 raise error.LookupError(node, self.display_id, _(b'no node'))
918
860
919 # Accessors for index entries.
861 # Accessors for index entries.
920
862
921 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
863 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
922 # are flags.
864 # are flags.
923 def start(self, rev):
865 def start(self, rev):
924 return int(self.index[rev][0] >> 16)
866 return int(self.index[rev][0] >> 16)
925
867
926 def sidedata_cut_off(self, rev):
868 def sidedata_cut_off(self, rev):
927 sd_cut_off = self.index[rev][8]
869 sd_cut_off = self.index[rev][8]
928 if sd_cut_off != 0:
870 if sd_cut_off != 0:
929 return sd_cut_off
871 return sd_cut_off
930 # This is some annoying dance, because entries without sidedata
872 # This is some annoying dance, because entries without sidedata
931 # currently use 0 as their ofsset. (instead of previous-offset +
873 # currently use 0 as their ofsset. (instead of previous-offset +
932 # previous-size)
874 # previous-size)
933 #
875 #
934 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
876 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
935 # In the meantime, we need this.
877 # In the meantime, we need this.
936 while 0 <= rev:
878 while 0 <= rev:
937 e = self.index[rev]
879 e = self.index[rev]
938 if e[9] != 0:
880 if e[9] != 0:
939 return e[8] + e[9]
881 return e[8] + e[9]
940 rev -= 1
882 rev -= 1
941 return 0
883 return 0
942
884
943 def flags(self, rev):
885 def flags(self, rev):
944 return self.index[rev][0] & 0xFFFF
886 return self.index[rev][0] & 0xFFFF
945
887
946 def length(self, rev):
888 def length(self, rev):
947 return self.index[rev][1]
889 return self.index[rev][1]
948
890
949 def sidedata_length(self, rev):
891 def sidedata_length(self, rev):
950 if not self.hassidedata:
892 if not self.hassidedata:
951 return 0
893 return 0
952 return self.index[rev][9]
894 return self.index[rev][9]
953
895
954 def rawsize(self, rev):
896 def rawsize(self, rev):
955 """return the length of the uncompressed text for a given revision"""
897 """return the length of the uncompressed text for a given revision"""
956 l = self.index[rev][2]
898 l = self.index[rev][2]
957 if l >= 0:
899 if l >= 0:
958 return l
900 return l
959
901
960 t = self.rawdata(rev)
902 t = self.rawdata(rev)
961 return len(t)
903 return len(t)
962
904
963 def size(self, rev):
905 def size(self, rev):
964 """length of non-raw text (processed by a "read" flag processor)"""
906 """length of non-raw text (processed by a "read" flag processor)"""
965 # fast path: if no "read" flag processor could change the content,
907 # fast path: if no "read" flag processor could change the content,
966 # size is rawsize. note: ELLIPSIS is known to not change the content.
908 # size is rawsize. note: ELLIPSIS is known to not change the content.
967 flags = self.flags(rev)
909 flags = self.flags(rev)
968 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
910 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
969 return self.rawsize(rev)
911 return self.rawsize(rev)
970
912
971 return len(self.revision(rev, raw=False))
913 return len(self.revision(rev, raw=False))
972
914
973 def chainbase(self, rev):
915 def chainbase(self, rev):
974 base = self._chainbasecache.get(rev)
916 base = self._chainbasecache.get(rev)
975 if base is not None:
917 if base is not None:
976 return base
918 return base
977
919
978 index = self.index
920 index = self.index
979 iterrev = rev
921 iterrev = rev
980 base = index[iterrev][3]
922 base = index[iterrev][3]
981 while base != iterrev:
923 while base != iterrev:
982 iterrev = base
924 iterrev = base
983 base = index[iterrev][3]
925 base = index[iterrev][3]
984
926
985 self._chainbasecache[rev] = base
927 self._chainbasecache[rev] = base
986 return base
928 return base
987
929
988 def linkrev(self, rev):
930 def linkrev(self, rev):
989 return self.index[rev][4]
931 return self.index[rev][4]
990
932
991 def parentrevs(self, rev):
933 def parentrevs(self, rev):
992 try:
934 try:
993 entry = self.index[rev]
935 entry = self.index[rev]
994 except IndexError:
936 except IndexError:
995 if rev == wdirrev:
937 if rev == wdirrev:
996 raise error.WdirUnsupported
938 raise error.WdirUnsupported
997 raise
939 raise
998 if entry[5] == nullrev:
940 if entry[5] == nullrev:
999 return entry[6], entry[5]
941 return entry[6], entry[5]
1000 else:
942 else:
1001 return entry[5], entry[6]
943 return entry[5], entry[6]
1002
944
1003 # fast parentrevs(rev) where rev isn't filtered
945 # fast parentrevs(rev) where rev isn't filtered
1004 _uncheckedparentrevs = parentrevs
946 _uncheckedparentrevs = parentrevs
1005
947
1006 def node(self, rev):
948 def node(self, rev):
1007 try:
949 try:
1008 return self.index[rev][7]
950 return self.index[rev][7]
1009 except IndexError:
951 except IndexError:
1010 if rev == wdirrev:
952 if rev == wdirrev:
1011 raise error.WdirUnsupported
953 raise error.WdirUnsupported
1012 raise
954 raise
1013
955
1014 # Derived from index values.
956 # Derived from index values.
1015
957
1016 def end(self, rev):
958 def end(self, rev):
1017 return self.start(rev) + self.length(rev)
959 return self.start(rev) + self.length(rev)
1018
960
1019 def parents(self, node):
961 def parents(self, node):
1020 i = self.index
962 i = self.index
1021 d = i[self.rev(node)]
963 d = i[self.rev(node)]
1022 # inline node() to avoid function call overhead
964 # inline node() to avoid function call overhead
1023 if d[5] == self.nullid:
965 if d[5] == self.nullid:
1024 return i[d[6]][7], i[d[5]][7]
966 return i[d[6]][7], i[d[5]][7]
1025 else:
967 else:
1026 return i[d[5]][7], i[d[6]][7]
968 return i[d[5]][7], i[d[6]][7]
1027
969
1028 def chainlen(self, rev):
970 def chainlen(self, rev):
1029 return self._chaininfo(rev)[0]
971 return self._chaininfo(rev)[0]
1030
972
1031 def _chaininfo(self, rev):
973 def _chaininfo(self, rev):
1032 chaininfocache = self._chaininfocache
974 chaininfocache = self._chaininfocache
1033 if rev in chaininfocache:
975 if rev in chaininfocache:
1034 return chaininfocache[rev]
976 return chaininfocache[rev]
1035 index = self.index
977 index = self.index
1036 generaldelta = self._generaldelta
978 generaldelta = self._generaldelta
1037 iterrev = rev
979 iterrev = rev
1038 e = index[iterrev]
980 e = index[iterrev]
1039 clen = 0
981 clen = 0
1040 compresseddeltalen = 0
982 compresseddeltalen = 0
1041 while iterrev != e[3]:
983 while iterrev != e[3]:
1042 clen += 1
984 clen += 1
1043 compresseddeltalen += e[1]
985 compresseddeltalen += e[1]
1044 if generaldelta:
986 if generaldelta:
1045 iterrev = e[3]
987 iterrev = e[3]
1046 else:
988 else:
1047 iterrev -= 1
989 iterrev -= 1
1048 if iterrev in chaininfocache:
990 if iterrev in chaininfocache:
1049 t = chaininfocache[iterrev]
991 t = chaininfocache[iterrev]
1050 clen += t[0]
992 clen += t[0]
1051 compresseddeltalen += t[1]
993 compresseddeltalen += t[1]
1052 break
994 break
1053 e = index[iterrev]
995 e = index[iterrev]
1054 else:
996 else:
1055 # Add text length of base since decompressing that also takes
997 # Add text length of base since decompressing that also takes
1056 # work. For cache hits the length is already included.
998 # work. For cache hits the length is already included.
1057 compresseddeltalen += e[1]
999 compresseddeltalen += e[1]
1058 r = (clen, compresseddeltalen)
1000 r = (clen, compresseddeltalen)
1059 chaininfocache[rev] = r
1001 chaininfocache[rev] = r
1060 return r
1002 return r
1061
1003
1062 def _deltachain(self, rev, stoprev=None):
1004 def _deltachain(self, rev, stoprev=None):
1063 """Obtain the delta chain for a revision.
1005 """Obtain the delta chain for a revision.
1064
1006
1065 ``stoprev`` specifies a revision to stop at. If not specified, we
1007 ``stoprev`` specifies a revision to stop at. If not specified, we
1066 stop at the base of the chain.
1008 stop at the base of the chain.
1067
1009
1068 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1010 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1069 revs in ascending order and ``stopped`` is a bool indicating whether
1011 revs in ascending order and ``stopped`` is a bool indicating whether
1070 ``stoprev`` was hit.
1012 ``stoprev`` was hit.
1071 """
1013 """
1072 # Try C implementation.
1014 # Try C implementation.
1073 try:
1015 try:
1074 return self.index.deltachain(rev, stoprev, self._generaldelta)
1016 return self.index.deltachain(rev, stoprev, self._generaldelta)
1075 except AttributeError:
1017 except AttributeError:
1076 pass
1018 pass
1077
1019
1078 chain = []
1020 chain = []
1079
1021
1080 # Alias to prevent attribute lookup in tight loop.
1022 # Alias to prevent attribute lookup in tight loop.
1081 index = self.index
1023 index = self.index
1082 generaldelta = self._generaldelta
1024 generaldelta = self._generaldelta
1083
1025
1084 iterrev = rev
1026 iterrev = rev
1085 e = index[iterrev]
1027 e = index[iterrev]
1086 while iterrev != e[3] and iterrev != stoprev:
1028 while iterrev != e[3] and iterrev != stoprev:
1087 chain.append(iterrev)
1029 chain.append(iterrev)
1088 if generaldelta:
1030 if generaldelta:
1089 iterrev = e[3]
1031 iterrev = e[3]
1090 else:
1032 else:
1091 iterrev -= 1
1033 iterrev -= 1
1092 e = index[iterrev]
1034 e = index[iterrev]
1093
1035
1094 if iterrev == stoprev:
1036 if iterrev == stoprev:
1095 stopped = True
1037 stopped = True
1096 else:
1038 else:
1097 chain.append(iterrev)
1039 chain.append(iterrev)
1098 stopped = False
1040 stopped = False
1099
1041
1100 chain.reverse()
1042 chain.reverse()
1101 return chain, stopped
1043 return chain, stopped
1102
1044
1103 def ancestors(self, revs, stoprev=0, inclusive=False):
1045 def ancestors(self, revs, stoprev=0, inclusive=False):
1104 """Generate the ancestors of 'revs' in reverse revision order.
1046 """Generate the ancestors of 'revs' in reverse revision order.
1105 Does not generate revs lower than stoprev.
1047 Does not generate revs lower than stoprev.
1106
1048
1107 See the documentation for ancestor.lazyancestors for more details."""
1049 See the documentation for ancestor.lazyancestors for more details."""
1108
1050
1109 # first, make sure start revisions aren't filtered
1051 # first, make sure start revisions aren't filtered
1110 revs = list(revs)
1052 revs = list(revs)
1111 checkrev = self.node
1053 checkrev = self.node
1112 for r in revs:
1054 for r in revs:
1113 checkrev(r)
1055 checkrev(r)
1114 # and we're sure ancestors aren't filtered as well
1056 # and we're sure ancestors aren't filtered as well
1115
1057
1116 if rustancestor is not None and self.index.rust_ext_compat:
1058 if rustancestor is not None and self.index.rust_ext_compat:
1117 lazyancestors = rustancestor.LazyAncestors
1059 lazyancestors = rustancestor.LazyAncestors
1118 arg = self.index
1060 arg = self.index
1119 else:
1061 else:
1120 lazyancestors = ancestor.lazyancestors
1062 lazyancestors = ancestor.lazyancestors
1121 arg = self._uncheckedparentrevs
1063 arg = self._uncheckedparentrevs
1122 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1064 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1123
1065
1124 def descendants(self, revs):
1066 def descendants(self, revs):
1125 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1067 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1126
1068
1127 def findcommonmissing(self, common=None, heads=None):
1069 def findcommonmissing(self, common=None, heads=None):
1128 """Return a tuple of the ancestors of common and the ancestors of heads
1070 """Return a tuple of the ancestors of common and the ancestors of heads
1129 that are not ancestors of common. In revset terminology, we return the
1071 that are not ancestors of common. In revset terminology, we return the
1130 tuple:
1072 tuple:
1131
1073
1132 ::common, (::heads) - (::common)
1074 ::common, (::heads) - (::common)
1133
1075
1134 The list is sorted by revision number, meaning it is
1076 The list is sorted by revision number, meaning it is
1135 topologically sorted.
1077 topologically sorted.
1136
1078
1137 'heads' and 'common' are both lists of node IDs. If heads is
1079 'heads' and 'common' are both lists of node IDs. If heads is
1138 not supplied, uses all of the revlog's heads. If common is not
1080 not supplied, uses all of the revlog's heads. If common is not
1139 supplied, uses nullid."""
1081 supplied, uses nullid."""
1140 if common is None:
1082 if common is None:
1141 common = [self.nullid]
1083 common = [self.nullid]
1142 if heads is None:
1084 if heads is None:
1143 heads = self.heads()
1085 heads = self.heads()
1144
1086
1145 common = [self.rev(n) for n in common]
1087 common = [self.rev(n) for n in common]
1146 heads = [self.rev(n) for n in heads]
1088 heads = [self.rev(n) for n in heads]
1147
1089
1148 # we want the ancestors, but inclusive
1090 # we want the ancestors, but inclusive
1149 class lazyset(object):
1091 class lazyset(object):
1150 def __init__(self, lazyvalues):
1092 def __init__(self, lazyvalues):
1151 self.addedvalues = set()
1093 self.addedvalues = set()
1152 self.lazyvalues = lazyvalues
1094 self.lazyvalues = lazyvalues
1153
1095
1154 def __contains__(self, value):
1096 def __contains__(self, value):
1155 return value in self.addedvalues or value in self.lazyvalues
1097 return value in self.addedvalues or value in self.lazyvalues
1156
1098
1157 def __iter__(self):
1099 def __iter__(self):
1158 added = self.addedvalues
1100 added = self.addedvalues
1159 for r in added:
1101 for r in added:
1160 yield r
1102 yield r
1161 for r in self.lazyvalues:
1103 for r in self.lazyvalues:
1162 if not r in added:
1104 if not r in added:
1163 yield r
1105 yield r
1164
1106
1165 def add(self, value):
1107 def add(self, value):
1166 self.addedvalues.add(value)
1108 self.addedvalues.add(value)
1167
1109
1168 def update(self, values):
1110 def update(self, values):
1169 self.addedvalues.update(values)
1111 self.addedvalues.update(values)
1170
1112
1171 has = lazyset(self.ancestors(common))
1113 has = lazyset(self.ancestors(common))
1172 has.add(nullrev)
1114 has.add(nullrev)
1173 has.update(common)
1115 has.update(common)
1174
1116
1175 # take all ancestors from heads that aren't in has
1117 # take all ancestors from heads that aren't in has
1176 missing = set()
1118 missing = set()
1177 visit = collections.deque(r for r in heads if r not in has)
1119 visit = collections.deque(r for r in heads if r not in has)
1178 while visit:
1120 while visit:
1179 r = visit.popleft()
1121 r = visit.popleft()
1180 if r in missing:
1122 if r in missing:
1181 continue
1123 continue
1182 else:
1124 else:
1183 missing.add(r)
1125 missing.add(r)
1184 for p in self.parentrevs(r):
1126 for p in self.parentrevs(r):
1185 if p not in has:
1127 if p not in has:
1186 visit.append(p)
1128 visit.append(p)
1187 missing = list(missing)
1129 missing = list(missing)
1188 missing.sort()
1130 missing.sort()
1189 return has, [self.node(miss) for miss in missing]
1131 return has, [self.node(miss) for miss in missing]
1190
1132
1191 def incrementalmissingrevs(self, common=None):
1133 def incrementalmissingrevs(self, common=None):
1192 """Return an object that can be used to incrementally compute the
1134 """Return an object that can be used to incrementally compute the
1193 revision numbers of the ancestors of arbitrary sets that are not
1135 revision numbers of the ancestors of arbitrary sets that are not
1194 ancestors of common. This is an ancestor.incrementalmissingancestors
1136 ancestors of common. This is an ancestor.incrementalmissingancestors
1195 object.
1137 object.
1196
1138
1197 'common' is a list of revision numbers. If common is not supplied, uses
1139 'common' is a list of revision numbers. If common is not supplied, uses
1198 nullrev.
1140 nullrev.
1199 """
1141 """
1200 if common is None:
1142 if common is None:
1201 common = [nullrev]
1143 common = [nullrev]
1202
1144
1203 if rustancestor is not None and self.index.rust_ext_compat:
1145 if rustancestor is not None and self.index.rust_ext_compat:
1204 return rustancestor.MissingAncestors(self.index, common)
1146 return rustancestor.MissingAncestors(self.index, common)
1205 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1147 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1206
1148
1207 def findmissingrevs(self, common=None, heads=None):
1149 def findmissingrevs(self, common=None, heads=None):
1208 """Return the revision numbers of the ancestors of heads that
1150 """Return the revision numbers of the ancestors of heads that
1209 are not ancestors of common.
1151 are not ancestors of common.
1210
1152
1211 More specifically, return a list of revision numbers corresponding to
1153 More specifically, return a list of revision numbers corresponding to
1212 nodes N such that every N satisfies the following constraints:
1154 nodes N such that every N satisfies the following constraints:
1213
1155
1214 1. N is an ancestor of some node in 'heads'
1156 1. N is an ancestor of some node in 'heads'
1215 2. N is not an ancestor of any node in 'common'
1157 2. N is not an ancestor of any node in 'common'
1216
1158
1217 The list is sorted by revision number, meaning it is
1159 The list is sorted by revision number, meaning it is
1218 topologically sorted.
1160 topologically sorted.
1219
1161
1220 'heads' and 'common' are both lists of revision numbers. If heads is
1162 'heads' and 'common' are both lists of revision numbers. If heads is
1221 not supplied, uses all of the revlog's heads. If common is not
1163 not supplied, uses all of the revlog's heads. If common is not
1222 supplied, uses nullid."""
1164 supplied, uses nullid."""
1223 if common is None:
1165 if common is None:
1224 common = [nullrev]
1166 common = [nullrev]
1225 if heads is None:
1167 if heads is None:
1226 heads = self.headrevs()
1168 heads = self.headrevs()
1227
1169
1228 inc = self.incrementalmissingrevs(common=common)
1170 inc = self.incrementalmissingrevs(common=common)
1229 return inc.missingancestors(heads)
1171 return inc.missingancestors(heads)
1230
1172
1231 def findmissing(self, common=None, heads=None):
1173 def findmissing(self, common=None, heads=None):
1232 """Return the ancestors of heads that are not ancestors of common.
1174 """Return the ancestors of heads that are not ancestors of common.
1233
1175
1234 More specifically, return a list of nodes N such that every N
1176 More specifically, return a list of nodes N such that every N
1235 satisfies the following constraints:
1177 satisfies the following constraints:
1236
1178
1237 1. N is an ancestor of some node in 'heads'
1179 1. N is an ancestor of some node in 'heads'
1238 2. N is not an ancestor of any node in 'common'
1180 2. N is not an ancestor of any node in 'common'
1239
1181
1240 The list is sorted by revision number, meaning it is
1182 The list is sorted by revision number, meaning it is
1241 topologically sorted.
1183 topologically sorted.
1242
1184
1243 'heads' and 'common' are both lists of node IDs. If heads is
1185 'heads' and 'common' are both lists of node IDs. If heads is
1244 not supplied, uses all of the revlog's heads. If common is not
1186 not supplied, uses all of the revlog's heads. If common is not
1245 supplied, uses nullid."""
1187 supplied, uses nullid."""
1246 if common is None:
1188 if common is None:
1247 common = [self.nullid]
1189 common = [self.nullid]
1248 if heads is None:
1190 if heads is None:
1249 heads = self.heads()
1191 heads = self.heads()
1250
1192
1251 common = [self.rev(n) for n in common]
1193 common = [self.rev(n) for n in common]
1252 heads = [self.rev(n) for n in heads]
1194 heads = [self.rev(n) for n in heads]
1253
1195
1254 inc = self.incrementalmissingrevs(common=common)
1196 inc = self.incrementalmissingrevs(common=common)
1255 return [self.node(r) for r in inc.missingancestors(heads)]
1197 return [self.node(r) for r in inc.missingancestors(heads)]
1256
1198
1257 def nodesbetween(self, roots=None, heads=None):
1199 def nodesbetween(self, roots=None, heads=None):
1258 """Return a topological path from 'roots' to 'heads'.
1200 """Return a topological path from 'roots' to 'heads'.
1259
1201
1260 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1202 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1261 topologically sorted list of all nodes N that satisfy both of
1203 topologically sorted list of all nodes N that satisfy both of
1262 these constraints:
1204 these constraints:
1263
1205
1264 1. N is a descendant of some node in 'roots'
1206 1. N is a descendant of some node in 'roots'
1265 2. N is an ancestor of some node in 'heads'
1207 2. N is an ancestor of some node in 'heads'
1266
1208
1267 Every node is considered to be both a descendant and an ancestor
1209 Every node is considered to be both a descendant and an ancestor
1268 of itself, so every reachable node in 'roots' and 'heads' will be
1210 of itself, so every reachable node in 'roots' and 'heads' will be
1269 included in 'nodes'.
1211 included in 'nodes'.
1270
1212
1271 'outroots' is the list of reachable nodes in 'roots', i.e., the
1213 'outroots' is the list of reachable nodes in 'roots', i.e., the
1272 subset of 'roots' that is returned in 'nodes'. Likewise,
1214 subset of 'roots' that is returned in 'nodes'. Likewise,
1273 'outheads' is the subset of 'heads' that is also in 'nodes'.
1215 'outheads' is the subset of 'heads' that is also in 'nodes'.
1274
1216
1275 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1217 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1276 unspecified, uses nullid as the only root. If 'heads' is
1218 unspecified, uses nullid as the only root. If 'heads' is
1277 unspecified, uses list of all of the revlog's heads."""
1219 unspecified, uses list of all of the revlog's heads."""
1278 nonodes = ([], [], [])
1220 nonodes = ([], [], [])
1279 if roots is not None:
1221 if roots is not None:
1280 roots = list(roots)
1222 roots = list(roots)
1281 if not roots:
1223 if not roots:
1282 return nonodes
1224 return nonodes
1283 lowestrev = min([self.rev(n) for n in roots])
1225 lowestrev = min([self.rev(n) for n in roots])
1284 else:
1226 else:
1285 roots = [self.nullid] # Everybody's a descendant of nullid
1227 roots = [self.nullid] # Everybody's a descendant of nullid
1286 lowestrev = nullrev
1228 lowestrev = nullrev
1287 if (lowestrev == nullrev) and (heads is None):
1229 if (lowestrev == nullrev) and (heads is None):
1288 # We want _all_ the nodes!
1230 # We want _all_ the nodes!
1289 return (
1231 return (
1290 [self.node(r) for r in self],
1232 [self.node(r) for r in self],
1291 [self.nullid],
1233 [self.nullid],
1292 list(self.heads()),
1234 list(self.heads()),
1293 )
1235 )
1294 if heads is None:
1236 if heads is None:
1295 # All nodes are ancestors, so the latest ancestor is the last
1237 # All nodes are ancestors, so the latest ancestor is the last
1296 # node.
1238 # node.
1297 highestrev = len(self) - 1
1239 highestrev = len(self) - 1
1298 # Set ancestors to None to signal that every node is an ancestor.
1240 # Set ancestors to None to signal that every node is an ancestor.
1299 ancestors = None
1241 ancestors = None
1300 # Set heads to an empty dictionary for later discovery of heads
1242 # Set heads to an empty dictionary for later discovery of heads
1301 heads = {}
1243 heads = {}
1302 else:
1244 else:
1303 heads = list(heads)
1245 heads = list(heads)
1304 if not heads:
1246 if not heads:
1305 return nonodes
1247 return nonodes
1306 ancestors = set()
1248 ancestors = set()
1307 # Turn heads into a dictionary so we can remove 'fake' heads.
1249 # Turn heads into a dictionary so we can remove 'fake' heads.
1308 # Also, later we will be using it to filter out the heads we can't
1250 # Also, later we will be using it to filter out the heads we can't
1309 # find from roots.
1251 # find from roots.
1310 heads = dict.fromkeys(heads, False)
1252 heads = dict.fromkeys(heads, False)
1311 # Start at the top and keep marking parents until we're done.
1253 # Start at the top and keep marking parents until we're done.
1312 nodestotag = set(heads)
1254 nodestotag = set(heads)
1313 # Remember where the top was so we can use it as a limit later.
1255 # Remember where the top was so we can use it as a limit later.
1314 highestrev = max([self.rev(n) for n in nodestotag])
1256 highestrev = max([self.rev(n) for n in nodestotag])
1315 while nodestotag:
1257 while nodestotag:
1316 # grab a node to tag
1258 # grab a node to tag
1317 n = nodestotag.pop()
1259 n = nodestotag.pop()
1318 # Never tag nullid
1260 # Never tag nullid
1319 if n == self.nullid:
1261 if n == self.nullid:
1320 continue
1262 continue
1321 # A node's revision number represents its place in a
1263 # A node's revision number represents its place in a
1322 # topologically sorted list of nodes.
1264 # topologically sorted list of nodes.
1323 r = self.rev(n)
1265 r = self.rev(n)
1324 if r >= lowestrev:
1266 if r >= lowestrev:
1325 if n not in ancestors:
1267 if n not in ancestors:
1326 # If we are possibly a descendant of one of the roots
1268 # If we are possibly a descendant of one of the roots
1327 # and we haven't already been marked as an ancestor
1269 # and we haven't already been marked as an ancestor
1328 ancestors.add(n) # Mark as ancestor
1270 ancestors.add(n) # Mark as ancestor
1329 # Add non-nullid parents to list of nodes to tag.
1271 # Add non-nullid parents to list of nodes to tag.
1330 nodestotag.update(
1272 nodestotag.update(
1331 [p for p in self.parents(n) if p != self.nullid]
1273 [p for p in self.parents(n) if p != self.nullid]
1332 )
1274 )
1333 elif n in heads: # We've seen it before, is it a fake head?
1275 elif n in heads: # We've seen it before, is it a fake head?
1334 # So it is, real heads should not be the ancestors of
1276 # So it is, real heads should not be the ancestors of
1335 # any other heads.
1277 # any other heads.
1336 heads.pop(n)
1278 heads.pop(n)
1337 if not ancestors:
1279 if not ancestors:
1338 return nonodes
1280 return nonodes
1339 # Now that we have our set of ancestors, we want to remove any
1281 # Now that we have our set of ancestors, we want to remove any
1340 # roots that are not ancestors.
1282 # roots that are not ancestors.
1341
1283
1342 # If one of the roots was nullid, everything is included anyway.
1284 # If one of the roots was nullid, everything is included anyway.
1343 if lowestrev > nullrev:
1285 if lowestrev > nullrev:
1344 # But, since we weren't, let's recompute the lowest rev to not
1286 # But, since we weren't, let's recompute the lowest rev to not
1345 # include roots that aren't ancestors.
1287 # include roots that aren't ancestors.
1346
1288
1347 # Filter out roots that aren't ancestors of heads
1289 # Filter out roots that aren't ancestors of heads
1348 roots = [root for root in roots if root in ancestors]
1290 roots = [root for root in roots if root in ancestors]
1349 # Recompute the lowest revision
1291 # Recompute the lowest revision
1350 if roots:
1292 if roots:
1351 lowestrev = min([self.rev(root) for root in roots])
1293 lowestrev = min([self.rev(root) for root in roots])
1352 else:
1294 else:
1353 # No more roots? Return empty list
1295 # No more roots? Return empty list
1354 return nonodes
1296 return nonodes
1355 else:
1297 else:
1356 # We are descending from nullid, and don't need to care about
1298 # We are descending from nullid, and don't need to care about
1357 # any other roots.
1299 # any other roots.
1358 lowestrev = nullrev
1300 lowestrev = nullrev
1359 roots = [self.nullid]
1301 roots = [self.nullid]
1360 # Transform our roots list into a set.
1302 # Transform our roots list into a set.
1361 descendants = set(roots)
1303 descendants = set(roots)
1362 # Also, keep the original roots so we can filter out roots that aren't
1304 # Also, keep the original roots so we can filter out roots that aren't
1363 # 'real' roots (i.e. are descended from other roots).
1305 # 'real' roots (i.e. are descended from other roots).
1364 roots = descendants.copy()
1306 roots = descendants.copy()
1365 # Our topologically sorted list of output nodes.
1307 # Our topologically sorted list of output nodes.
1366 orderedout = []
1308 orderedout = []
1367 # Don't start at nullid since we don't want nullid in our output list,
1309 # Don't start at nullid since we don't want nullid in our output list,
1368 # and if nullid shows up in descendants, empty parents will look like
1310 # and if nullid shows up in descendants, empty parents will look like
1369 # they're descendants.
1311 # they're descendants.
1370 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1312 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1371 n = self.node(r)
1313 n = self.node(r)
1372 isdescendant = False
1314 isdescendant = False
1373 if lowestrev == nullrev: # Everybody is a descendant of nullid
1315 if lowestrev == nullrev: # Everybody is a descendant of nullid
1374 isdescendant = True
1316 isdescendant = True
1375 elif n in descendants:
1317 elif n in descendants:
1376 # n is already a descendant
1318 # n is already a descendant
1377 isdescendant = True
1319 isdescendant = True
1378 # This check only needs to be done here because all the roots
1320 # This check only needs to be done here because all the roots
1379 # will start being marked is descendants before the loop.
1321 # will start being marked is descendants before the loop.
1380 if n in roots:
1322 if n in roots:
1381 # If n was a root, check if it's a 'real' root.
1323 # If n was a root, check if it's a 'real' root.
1382 p = tuple(self.parents(n))
1324 p = tuple(self.parents(n))
1383 # If any of its parents are descendants, it's not a root.
1325 # If any of its parents are descendants, it's not a root.
1384 if (p[0] in descendants) or (p[1] in descendants):
1326 if (p[0] in descendants) or (p[1] in descendants):
1385 roots.remove(n)
1327 roots.remove(n)
1386 else:
1328 else:
1387 p = tuple(self.parents(n))
1329 p = tuple(self.parents(n))
1388 # A node is a descendant if either of its parents are
1330 # A node is a descendant if either of its parents are
1389 # descendants. (We seeded the dependents list with the roots
1331 # descendants. (We seeded the dependents list with the roots
1390 # up there, remember?)
1332 # up there, remember?)
1391 if (p[0] in descendants) or (p[1] in descendants):
1333 if (p[0] in descendants) or (p[1] in descendants):
1392 descendants.add(n)
1334 descendants.add(n)
1393 isdescendant = True
1335 isdescendant = True
1394 if isdescendant and ((ancestors is None) or (n in ancestors)):
1336 if isdescendant and ((ancestors is None) or (n in ancestors)):
1395 # Only include nodes that are both descendants and ancestors.
1337 # Only include nodes that are both descendants and ancestors.
1396 orderedout.append(n)
1338 orderedout.append(n)
1397 if (ancestors is not None) and (n in heads):
1339 if (ancestors is not None) and (n in heads):
1398 # We're trying to figure out which heads are reachable
1340 # We're trying to figure out which heads are reachable
1399 # from roots.
1341 # from roots.
1400 # Mark this head as having been reached
1342 # Mark this head as having been reached
1401 heads[n] = True
1343 heads[n] = True
1402 elif ancestors is None:
1344 elif ancestors is None:
1403 # Otherwise, we're trying to discover the heads.
1345 # Otherwise, we're trying to discover the heads.
1404 # Assume this is a head because if it isn't, the next step
1346 # Assume this is a head because if it isn't, the next step
1405 # will eventually remove it.
1347 # will eventually remove it.
1406 heads[n] = True
1348 heads[n] = True
1407 # But, obviously its parents aren't.
1349 # But, obviously its parents aren't.
1408 for p in self.parents(n):
1350 for p in self.parents(n):
1409 heads.pop(p, None)
1351 heads.pop(p, None)
1410 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1352 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1411 roots = list(roots)
1353 roots = list(roots)
1412 assert orderedout
1354 assert orderedout
1413 assert roots
1355 assert roots
1414 assert heads
1356 assert heads
1415 return (orderedout, roots, heads)
1357 return (orderedout, roots, heads)
1416
1358
1417 def headrevs(self, revs=None):
1359 def headrevs(self, revs=None):
1418 if revs is None:
1360 if revs is None:
1419 try:
1361 try:
1420 return self.index.headrevs()
1362 return self.index.headrevs()
1421 except AttributeError:
1363 except AttributeError:
1422 return self._headrevs()
1364 return self._headrevs()
1423 if rustdagop is not None and self.index.rust_ext_compat:
1365 if rustdagop is not None and self.index.rust_ext_compat:
1424 return rustdagop.headrevs(self.index, revs)
1366 return rustdagop.headrevs(self.index, revs)
1425 return dagop.headrevs(revs, self._uncheckedparentrevs)
1367 return dagop.headrevs(revs, self._uncheckedparentrevs)
1426
1368
1427 def computephases(self, roots):
1369 def computephases(self, roots):
1428 return self.index.computephasesmapsets(roots)
1370 return self.index.computephasesmapsets(roots)
1429
1371
1430 def _headrevs(self):
1372 def _headrevs(self):
1431 count = len(self)
1373 count = len(self)
1432 if not count:
1374 if not count:
1433 return [nullrev]
1375 return [nullrev]
1434 # we won't iter over filtered rev so nobody is a head at start
1376 # we won't iter over filtered rev so nobody is a head at start
1435 ishead = [0] * (count + 1)
1377 ishead = [0] * (count + 1)
1436 index = self.index
1378 index = self.index
1437 for r in self:
1379 for r in self:
1438 ishead[r] = 1 # I may be an head
1380 ishead[r] = 1 # I may be an head
1439 e = index[r]
1381 e = index[r]
1440 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1382 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1441 return [r for r, val in enumerate(ishead) if val]
1383 return [r for r, val in enumerate(ishead) if val]
1442
1384
1443 def heads(self, start=None, stop=None):
1385 def heads(self, start=None, stop=None):
1444 """return the list of all nodes that have no children
1386 """return the list of all nodes that have no children
1445
1387
1446 if start is specified, only heads that are descendants of
1388 if start is specified, only heads that are descendants of
1447 start will be returned
1389 start will be returned
1448 if stop is specified, it will consider all the revs from stop
1390 if stop is specified, it will consider all the revs from stop
1449 as if they had no children
1391 as if they had no children
1450 """
1392 """
1451 if start is None and stop is None:
1393 if start is None and stop is None:
1452 if not len(self):
1394 if not len(self):
1453 return [self.nullid]
1395 return [self.nullid]
1454 return [self.node(r) for r in self.headrevs()]
1396 return [self.node(r) for r in self.headrevs()]
1455
1397
1456 if start is None:
1398 if start is None:
1457 start = nullrev
1399 start = nullrev
1458 else:
1400 else:
1459 start = self.rev(start)
1401 start = self.rev(start)
1460
1402
1461 stoprevs = {self.rev(n) for n in stop or []}
1403 stoprevs = {self.rev(n) for n in stop or []}
1462
1404
1463 revs = dagop.headrevssubset(
1405 revs = dagop.headrevssubset(
1464 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1406 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1465 )
1407 )
1466
1408
1467 return [self.node(rev) for rev in revs]
1409 return [self.node(rev) for rev in revs]
1468
1410
1469 def children(self, node):
1411 def children(self, node):
1470 """find the children of a given node"""
1412 """find the children of a given node"""
1471 c = []
1413 c = []
1472 p = self.rev(node)
1414 p = self.rev(node)
1473 for r in self.revs(start=p + 1):
1415 for r in self.revs(start=p + 1):
1474 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1416 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1475 if prevs:
1417 if prevs:
1476 for pr in prevs:
1418 for pr in prevs:
1477 if pr == p:
1419 if pr == p:
1478 c.append(self.node(r))
1420 c.append(self.node(r))
1479 elif p == nullrev:
1421 elif p == nullrev:
1480 c.append(self.node(r))
1422 c.append(self.node(r))
1481 return c
1423 return c
1482
1424
1483 def commonancestorsheads(self, a, b):
1425 def commonancestorsheads(self, a, b):
1484 """calculate all the heads of the common ancestors of nodes a and b"""
1426 """calculate all the heads of the common ancestors of nodes a and b"""
1485 a, b = self.rev(a), self.rev(b)
1427 a, b = self.rev(a), self.rev(b)
1486 ancs = self._commonancestorsheads(a, b)
1428 ancs = self._commonancestorsheads(a, b)
1487 return pycompat.maplist(self.node, ancs)
1429 return pycompat.maplist(self.node, ancs)
1488
1430
1489 def _commonancestorsheads(self, *revs):
1431 def _commonancestorsheads(self, *revs):
1490 """calculate all the heads of the common ancestors of revs"""
1432 """calculate all the heads of the common ancestors of revs"""
1491 try:
1433 try:
1492 ancs = self.index.commonancestorsheads(*revs)
1434 ancs = self.index.commonancestorsheads(*revs)
1493 except (AttributeError, OverflowError): # C implementation failed
1435 except (AttributeError, OverflowError): # C implementation failed
1494 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1436 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1495 return ancs
1437 return ancs
1496
1438
1497 def isancestor(self, a, b):
1439 def isancestor(self, a, b):
1498 """return True if node a is an ancestor of node b
1440 """return True if node a is an ancestor of node b
1499
1441
1500 A revision is considered an ancestor of itself."""
1442 A revision is considered an ancestor of itself."""
1501 a, b = self.rev(a), self.rev(b)
1443 a, b = self.rev(a), self.rev(b)
1502 return self.isancestorrev(a, b)
1444 return self.isancestorrev(a, b)
1503
1445
1504 def isancestorrev(self, a, b):
1446 def isancestorrev(self, a, b):
1505 """return True if revision a is an ancestor of revision b
1447 """return True if revision a is an ancestor of revision b
1506
1448
1507 A revision is considered an ancestor of itself.
1449 A revision is considered an ancestor of itself.
1508
1450
1509 The implementation of this is trivial but the use of
1451 The implementation of this is trivial but the use of
1510 reachableroots is not."""
1452 reachableroots is not."""
1511 if a == nullrev:
1453 if a == nullrev:
1512 return True
1454 return True
1513 elif a == b:
1455 elif a == b:
1514 return True
1456 return True
1515 elif a > b:
1457 elif a > b:
1516 return False
1458 return False
1517 return bool(self.reachableroots(a, [b], [a], includepath=False))
1459 return bool(self.reachableroots(a, [b], [a], includepath=False))
1518
1460
1519 def reachableroots(self, minroot, heads, roots, includepath=False):
1461 def reachableroots(self, minroot, heads, roots, includepath=False):
1520 """return (heads(::(<roots> and <roots>::<heads>)))
1462 """return (heads(::(<roots> and <roots>::<heads>)))
1521
1463
1522 If includepath is True, return (<roots>::<heads>)."""
1464 If includepath is True, return (<roots>::<heads>)."""
1523 try:
1465 try:
1524 return self.index.reachableroots2(
1466 return self.index.reachableroots2(
1525 minroot, heads, roots, includepath
1467 minroot, heads, roots, includepath
1526 )
1468 )
1527 except AttributeError:
1469 except AttributeError:
1528 return dagop._reachablerootspure(
1470 return dagop._reachablerootspure(
1529 self.parentrevs, minroot, roots, heads, includepath
1471 self.parentrevs, minroot, roots, heads, includepath
1530 )
1472 )
1531
1473
1532 def ancestor(self, a, b):
1474 def ancestor(self, a, b):
1533 """calculate the "best" common ancestor of nodes a and b"""
1475 """calculate the "best" common ancestor of nodes a and b"""
1534
1476
1535 a, b = self.rev(a), self.rev(b)
1477 a, b = self.rev(a), self.rev(b)
1536 try:
1478 try:
1537 ancs = self.index.ancestors(a, b)
1479 ancs = self.index.ancestors(a, b)
1538 except (AttributeError, OverflowError):
1480 except (AttributeError, OverflowError):
1539 ancs = ancestor.ancestors(self.parentrevs, a, b)
1481 ancs = ancestor.ancestors(self.parentrevs, a, b)
1540 if ancs:
1482 if ancs:
1541 # choose a consistent winner when there's a tie
1483 # choose a consistent winner when there's a tie
1542 return min(map(self.node, ancs))
1484 return min(map(self.node, ancs))
1543 return self.nullid
1485 return self.nullid
1544
1486
1545 def _match(self, id):
1487 def _match(self, id):
1546 if isinstance(id, int):
1488 if isinstance(id, int):
1547 # rev
1489 # rev
1548 return self.node(id)
1490 return self.node(id)
1549 if len(id) == self.nodeconstants.nodelen:
1491 if len(id) == self.nodeconstants.nodelen:
1550 # possibly a binary node
1492 # possibly a binary node
1551 # odds of a binary node being all hex in ASCII are 1 in 10**25
1493 # odds of a binary node being all hex in ASCII are 1 in 10**25
1552 try:
1494 try:
1553 node = id
1495 node = id
1554 self.rev(node) # quick search the index
1496 self.rev(node) # quick search the index
1555 return node
1497 return node
1556 except error.LookupError:
1498 except error.LookupError:
1557 pass # may be partial hex id
1499 pass # may be partial hex id
1558 try:
1500 try:
1559 # str(rev)
1501 # str(rev)
1560 rev = int(id)
1502 rev = int(id)
1561 if b"%d" % rev != id:
1503 if b"%d" % rev != id:
1562 raise ValueError
1504 raise ValueError
1563 if rev < 0:
1505 if rev < 0:
1564 rev = len(self) + rev
1506 rev = len(self) + rev
1565 if rev < 0 or rev >= len(self):
1507 if rev < 0 or rev >= len(self):
1566 raise ValueError
1508 raise ValueError
1567 return self.node(rev)
1509 return self.node(rev)
1568 except (ValueError, OverflowError):
1510 except (ValueError, OverflowError):
1569 pass
1511 pass
1570 if len(id) == 2 * self.nodeconstants.nodelen:
1512 if len(id) == 2 * self.nodeconstants.nodelen:
1571 try:
1513 try:
1572 # a full hex nodeid?
1514 # a full hex nodeid?
1573 node = bin(id)
1515 node = bin(id)
1574 self.rev(node)
1516 self.rev(node)
1575 return node
1517 return node
1576 except (TypeError, error.LookupError):
1518 except (TypeError, error.LookupError):
1577 pass
1519 pass
1578
1520
1579 def _partialmatch(self, id):
1521 def _partialmatch(self, id):
1580 # we don't care wdirfilenodeids as they should be always full hash
1522 # we don't care wdirfilenodeids as they should be always full hash
1581 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1523 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1582 ambiguous = False
1524 ambiguous = False
1583 try:
1525 try:
1584 partial = self.index.partialmatch(id)
1526 partial = self.index.partialmatch(id)
1585 if partial and self.hasnode(partial):
1527 if partial and self.hasnode(partial):
1586 if maybewdir:
1528 if maybewdir:
1587 # single 'ff...' match in radix tree, ambiguous with wdir
1529 # single 'ff...' match in radix tree, ambiguous with wdir
1588 ambiguous = True
1530 ambiguous = True
1589 else:
1531 else:
1590 return partial
1532 return partial
1591 elif maybewdir:
1533 elif maybewdir:
1592 # no 'ff...' match in radix tree, wdir identified
1534 # no 'ff...' match in radix tree, wdir identified
1593 raise error.WdirUnsupported
1535 raise error.WdirUnsupported
1594 else:
1536 else:
1595 return None
1537 return None
1596 except error.RevlogError:
1538 except error.RevlogError:
1597 # parsers.c radix tree lookup gave multiple matches
1539 # parsers.c radix tree lookup gave multiple matches
1598 # fast path: for unfiltered changelog, radix tree is accurate
1540 # fast path: for unfiltered changelog, radix tree is accurate
1599 if not getattr(self, 'filteredrevs', None):
1541 if not getattr(self, 'filteredrevs', None):
1600 ambiguous = True
1542 ambiguous = True
1601 # fall through to slow path that filters hidden revisions
1543 # fall through to slow path that filters hidden revisions
1602 except (AttributeError, ValueError):
1544 except (AttributeError, ValueError):
1603 # we are pure python, or key was too short to search radix tree
1545 # we are pure python, or key was too short to search radix tree
1604 pass
1546 pass
1605 if ambiguous:
1547 if ambiguous:
1606 raise error.AmbiguousPrefixLookupError(
1548 raise error.AmbiguousPrefixLookupError(
1607 id, self.display_id, _(b'ambiguous identifier')
1549 id, self.display_id, _(b'ambiguous identifier')
1608 )
1550 )
1609
1551
1610 if id in self._pcache:
1552 if id in self._pcache:
1611 return self._pcache[id]
1553 return self._pcache[id]
1612
1554
1613 if len(id) <= 40:
1555 if len(id) <= 40:
1614 try:
1556 try:
1615 # hex(node)[:...]
1557 # hex(node)[:...]
1616 l = len(id) // 2 # grab an even number of digits
1558 l = len(id) // 2 # grab an even number of digits
1617 prefix = bin(id[: l * 2])
1559 prefix = bin(id[: l * 2])
1618 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1560 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1619 nl = [
1561 nl = [
1620 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1562 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1621 ]
1563 ]
1622 if self.nodeconstants.nullhex.startswith(id):
1564 if self.nodeconstants.nullhex.startswith(id):
1623 nl.append(self.nullid)
1565 nl.append(self.nullid)
1624 if len(nl) > 0:
1566 if len(nl) > 0:
1625 if len(nl) == 1 and not maybewdir:
1567 if len(nl) == 1 and not maybewdir:
1626 self._pcache[id] = nl[0]
1568 self._pcache[id] = nl[0]
1627 return nl[0]
1569 return nl[0]
1628 raise error.AmbiguousPrefixLookupError(
1570 raise error.AmbiguousPrefixLookupError(
1629 id, self.display_id, _(b'ambiguous identifier')
1571 id, self.display_id, _(b'ambiguous identifier')
1630 )
1572 )
1631 if maybewdir:
1573 if maybewdir:
1632 raise error.WdirUnsupported
1574 raise error.WdirUnsupported
1633 return None
1575 return None
1634 except TypeError:
1576 except TypeError:
1635 pass
1577 pass
1636
1578
1637 def lookup(self, id):
1579 def lookup(self, id):
1638 """locate a node based on:
1580 """locate a node based on:
1639 - revision number or str(revision number)
1581 - revision number or str(revision number)
1640 - nodeid or subset of hex nodeid
1582 - nodeid or subset of hex nodeid
1641 """
1583 """
1642 n = self._match(id)
1584 n = self._match(id)
1643 if n is not None:
1585 if n is not None:
1644 return n
1586 return n
1645 n = self._partialmatch(id)
1587 n = self._partialmatch(id)
1646 if n:
1588 if n:
1647 return n
1589 return n
1648
1590
1649 raise error.LookupError(id, self.display_id, _(b'no match found'))
1591 raise error.LookupError(id, self.display_id, _(b'no match found'))
1650
1592
1651 def shortest(self, node, minlength=1):
1593 def shortest(self, node, minlength=1):
1652 """Find the shortest unambiguous prefix that matches node."""
1594 """Find the shortest unambiguous prefix that matches node."""
1653
1595
1654 def isvalid(prefix):
1596 def isvalid(prefix):
1655 try:
1597 try:
1656 matchednode = self._partialmatch(prefix)
1598 matchednode = self._partialmatch(prefix)
1657 except error.AmbiguousPrefixLookupError:
1599 except error.AmbiguousPrefixLookupError:
1658 return False
1600 return False
1659 except error.WdirUnsupported:
1601 except error.WdirUnsupported:
1660 # single 'ff...' match
1602 # single 'ff...' match
1661 return True
1603 return True
1662 if matchednode is None:
1604 if matchednode is None:
1663 raise error.LookupError(node, self.display_id, _(b'no node'))
1605 raise error.LookupError(node, self.display_id, _(b'no node'))
1664 return True
1606 return True
1665
1607
1666 def maybewdir(prefix):
1608 def maybewdir(prefix):
1667 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1609 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1668
1610
1669 hexnode = hex(node)
1611 hexnode = hex(node)
1670
1612
1671 def disambiguate(hexnode, minlength):
1613 def disambiguate(hexnode, minlength):
1672 """Disambiguate against wdirid."""
1614 """Disambiguate against wdirid."""
1673 for length in range(minlength, len(hexnode) + 1):
1615 for length in range(minlength, len(hexnode) + 1):
1674 prefix = hexnode[:length]
1616 prefix = hexnode[:length]
1675 if not maybewdir(prefix):
1617 if not maybewdir(prefix):
1676 return prefix
1618 return prefix
1677
1619
1678 if not getattr(self, 'filteredrevs', None):
1620 if not getattr(self, 'filteredrevs', None):
1679 try:
1621 try:
1680 length = max(self.index.shortest(node), minlength)
1622 length = max(self.index.shortest(node), minlength)
1681 return disambiguate(hexnode, length)
1623 return disambiguate(hexnode, length)
1682 except error.RevlogError:
1624 except error.RevlogError:
1683 if node != self.nodeconstants.wdirid:
1625 if node != self.nodeconstants.wdirid:
1684 raise error.LookupError(
1626 raise error.LookupError(
1685 node, self.display_id, _(b'no node')
1627 node, self.display_id, _(b'no node')
1686 )
1628 )
1687 except AttributeError:
1629 except AttributeError:
1688 # Fall through to pure code
1630 # Fall through to pure code
1689 pass
1631 pass
1690
1632
1691 if node == self.nodeconstants.wdirid:
1633 if node == self.nodeconstants.wdirid:
1692 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1693 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1694 if isvalid(prefix):
1636 if isvalid(prefix):
1695 return prefix
1637 return prefix
1696
1638
1697 for length in range(minlength, len(hexnode) + 1):
1639 for length in range(minlength, len(hexnode) + 1):
1698 prefix = hexnode[:length]
1640 prefix = hexnode[:length]
1699 if isvalid(prefix):
1641 if isvalid(prefix):
1700 return disambiguate(hexnode, length)
1642 return disambiguate(hexnode, length)
1701
1643
1702 def cmp(self, node, text):
1644 def cmp(self, node, text):
1703 """compare text with a given file revision
1645 """compare text with a given file revision
1704
1646
1705 returns True if text is different than what is stored.
1647 returns True if text is different than what is stored.
1706 """
1648 """
1707 p1, p2 = self.parents(node)
1649 p1, p2 = self.parents(node)
1708 return storageutil.hashrevisionsha1(text, p1, p2) != node
1650 return storageutil.hashrevisionsha1(text, p1, p2) != node
1709
1651
1710 def _cachesegment(self, offset, data):
1652 def _cachesegment(self, offset, data):
1711 """Add a segment to the revlog cache.
1653 """Add a segment to the revlog cache.
1712
1654
1713 Accepts an absolute offset and the data that is at that location.
1655 Accepts an absolute offset and the data that is at that location.
1714 """
1656 """
1715 o, d = self._chunkcache
1657 o, d = self._chunkcache
1716 # try to add to existing cache
1658 # try to add to existing cache
1717 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1659 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1718 self._chunkcache = o, d + data
1660 self._chunkcache = o, d + data
1719 else:
1661 else:
1720 self._chunkcache = offset, data
1662 self._chunkcache = offset, data
1721
1663
1722 def _readsegment(self, offset, length, df=None):
1664 def _readsegment(self, offset, length, df=None):
1723 """Load a segment of raw data from the revlog.
1665 """Load a segment of raw data from the revlog.
1724
1666
1725 Accepts an absolute offset, length to read, and an optional existing
1667 Accepts an absolute offset, length to read, and an optional existing
1726 file handle to read from.
1668 file handle to read from.
1727
1669
1728 If an existing file handle is passed, it will be seeked and the
1670 If an existing file handle is passed, it will be seeked and the
1729 original seek position will NOT be restored.
1671 original seek position will NOT be restored.
1730
1672
1731 Returns a str or buffer of raw byte data.
1673 Returns a str or buffer of raw byte data.
1732
1674
1733 Raises if the requested number of bytes could not be read.
1675 Raises if the requested number of bytes could not be read.
1734 """
1676 """
1735 # Cache data both forward and backward around the requested
1677 # Cache data both forward and backward around the requested
1736 # data, in a fixed size window. This helps speed up operations
1678 # data, in a fixed size window. This helps speed up operations
1737 # involving reading the revlog backwards.
1679 # involving reading the revlog backwards.
1738 cachesize = self._chunkcachesize
1680 cachesize = self._chunkcachesize
1739 realoffset = offset & ~(cachesize - 1)
1681 realoffset = offset & ~(cachesize - 1)
1740 reallength = (
1682 reallength = (
1741 (offset + length + cachesize) & ~(cachesize - 1)
1683 (offset + length + cachesize) & ~(cachesize - 1)
1742 ) - realoffset
1684 ) - realoffset
1743 with self._datareadfp(df) as df:
1685 with self._datareadfp(df) as df:
1744 df.seek(realoffset)
1686 df.seek(realoffset)
1745 d = df.read(reallength)
1687 d = df.read(reallength)
1746
1688
1747 self._cachesegment(realoffset, d)
1689 self._cachesegment(realoffset, d)
1748 if offset != realoffset or reallength != length:
1690 if offset != realoffset or reallength != length:
1749 startoffset = offset - realoffset
1691 startoffset = offset - realoffset
1750 if len(d) - startoffset < length:
1692 if len(d) - startoffset < length:
1751 filename = self._indexfile if self._inline else self._datafile
1693 filename = self._indexfile if self._inline else self._datafile
1752 got = len(d) - startoffset
1694 got = len(d) - startoffset
1753 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1695 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1754 raise error.RevlogError(m)
1696 raise error.RevlogError(m)
1755 return util.buffer(d, startoffset, length)
1697 return util.buffer(d, startoffset, length)
1756
1698
1757 if len(d) < length:
1699 if len(d) < length:
1758 filename = self._indexfile if self._inline else self._datafile
1700 filename = self._indexfile if self._inline else self._datafile
1759 got = len(d) - startoffset
1701 got = len(d) - startoffset
1760 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1702 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1761 raise error.RevlogError(m)
1703 raise error.RevlogError(m)
1762
1704
1763 return d
1705 return d
1764
1706
1765 def _getsegment(self, offset, length, df=None):
1707 def _getsegment(self, offset, length, df=None):
1766 """Obtain a segment of raw data from the revlog.
1708 """Obtain a segment of raw data from the revlog.
1767
1709
1768 Accepts an absolute offset, length of bytes to obtain, and an
1710 Accepts an absolute offset, length of bytes to obtain, and an
1769 optional file handle to the already-opened revlog. If the file
1711 optional file handle to the already-opened revlog. If the file
1770 handle is used, it's original seek position will not be preserved.
1712 handle is used, it's original seek position will not be preserved.
1771
1713
1772 Requests for data may be returned from a cache.
1714 Requests for data may be returned from a cache.
1773
1715
1774 Returns a str or a buffer instance of raw byte data.
1716 Returns a str or a buffer instance of raw byte data.
1775 """
1717 """
1776 o, d = self._chunkcache
1718 o, d = self._chunkcache
1777 l = len(d)
1719 l = len(d)
1778
1720
1779 # is it in the cache?
1721 # is it in the cache?
1780 cachestart = offset - o
1722 cachestart = offset - o
1781 cacheend = cachestart + length
1723 cacheend = cachestart + length
1782 if cachestart >= 0 and cacheend <= l:
1724 if cachestart >= 0 and cacheend <= l:
1783 if cachestart == 0 and cacheend == l:
1725 if cachestart == 0 and cacheend == l:
1784 return d # avoid a copy
1726 return d # avoid a copy
1785 return util.buffer(d, cachestart, cacheend - cachestart)
1727 return util.buffer(d, cachestart, cacheend - cachestart)
1786
1728
1787 return self._readsegment(offset, length, df=df)
1729 return self._readsegment(offset, length, df=df)
1788
1730
1789 def _getsegmentforrevs(self, startrev, endrev, df=None):
1731 def _getsegmentforrevs(self, startrev, endrev, df=None):
1790 """Obtain a segment of raw data corresponding to a range of revisions.
1732 """Obtain a segment of raw data corresponding to a range of revisions.
1791
1733
1792 Accepts the start and end revisions and an optional already-open
1734 Accepts the start and end revisions and an optional already-open
1793 file handle to be used for reading. If the file handle is read, its
1735 file handle to be used for reading. If the file handle is read, its
1794 seek position will not be preserved.
1736 seek position will not be preserved.
1795
1737
1796 Requests for data may be satisfied by a cache.
1738 Requests for data may be satisfied by a cache.
1797
1739
1798 Returns a 2-tuple of (offset, data) for the requested range of
1740 Returns a 2-tuple of (offset, data) for the requested range of
1799 revisions. Offset is the integer offset from the beginning of the
1741 revisions. Offset is the integer offset from the beginning of the
1800 revlog and data is a str or buffer of the raw byte data.
1742 revlog and data is a str or buffer of the raw byte data.
1801
1743
1802 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1744 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1803 to determine where each revision's data begins and ends.
1745 to determine where each revision's data begins and ends.
1804 """
1746 """
1805 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1747 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1806 # (functions are expensive).
1748 # (functions are expensive).
1807 index = self.index
1749 index = self.index
1808 istart = index[startrev]
1750 istart = index[startrev]
1809 start = int(istart[0] >> 16)
1751 start = int(istart[0] >> 16)
1810 if startrev == endrev:
1752 if startrev == endrev:
1811 end = start + istart[1]
1753 end = start + istart[1]
1812 else:
1754 else:
1813 iend = index[endrev]
1755 iend = index[endrev]
1814 end = int(iend[0] >> 16) + iend[1]
1756 end = int(iend[0] >> 16) + iend[1]
1815
1757
1816 if self._inline:
1758 if self._inline:
1817 start += (startrev + 1) * self.index.entry_size
1759 start += (startrev + 1) * self.index.entry_size
1818 end += (endrev + 1) * self.index.entry_size
1760 end += (endrev + 1) * self.index.entry_size
1819 length = end - start
1761 length = end - start
1820
1762
1821 return start, self._getsegment(start, length, df=df)
1763 return start, self._getsegment(start, length, df=df)
1822
1764
1823 def _chunk(self, rev, df=None):
1765 def _chunk(self, rev, df=None):
1824 """Obtain a single decompressed chunk for a revision.
1766 """Obtain a single decompressed chunk for a revision.
1825
1767
1826 Accepts an integer revision and an optional already-open file handle
1768 Accepts an integer revision and an optional already-open file handle
1827 to be used for reading. If used, the seek position of the file will not
1769 to be used for reading. If used, the seek position of the file will not
1828 be preserved.
1770 be preserved.
1829
1771
1830 Returns a str holding uncompressed data for the requested revision.
1772 Returns a str holding uncompressed data for the requested revision.
1831 """
1773 """
1832 compression_mode = self.index[rev][10]
1774 compression_mode = self.index[rev][10]
1833 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1775 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1834 if compression_mode == COMP_MODE_PLAIN:
1776 if compression_mode == COMP_MODE_PLAIN:
1835 return data
1777 return data
1836 elif compression_mode == COMP_MODE_DEFAULT:
1778 elif compression_mode == COMP_MODE_DEFAULT:
1837 return self._decompressor(data)
1779 return self._decompressor(data)
1838 elif compression_mode == COMP_MODE_INLINE:
1780 elif compression_mode == COMP_MODE_INLINE:
1839 return self.decompress(data)
1781 return self.decompress(data)
1840 else:
1782 else:
1841 msg = 'unknown compression mode %d'
1783 msg = 'unknown compression mode %d'
1842 msg %= compression_mode
1784 msg %= compression_mode
1843 raise error.RevlogError(msg)
1785 raise error.RevlogError(msg)
1844
1786
1845 def _chunks(self, revs, df=None, targetsize=None):
1787 def _chunks(self, revs, df=None, targetsize=None):
1846 """Obtain decompressed chunks for the specified revisions.
1788 """Obtain decompressed chunks for the specified revisions.
1847
1789
1848 Accepts an iterable of numeric revisions that are assumed to be in
1790 Accepts an iterable of numeric revisions that are assumed to be in
1849 ascending order. Also accepts an optional already-open file handle
1791 ascending order. Also accepts an optional already-open file handle
1850 to be used for reading. If used, the seek position of the file will
1792 to be used for reading. If used, the seek position of the file will
1851 not be preserved.
1793 not be preserved.
1852
1794
1853 This function is similar to calling ``self._chunk()`` multiple times,
1795 This function is similar to calling ``self._chunk()`` multiple times,
1854 but is faster.
1796 but is faster.
1855
1797
1856 Returns a list with decompressed data for each requested revision.
1798 Returns a list with decompressed data for each requested revision.
1857 """
1799 """
1858 if not revs:
1800 if not revs:
1859 return []
1801 return []
1860 start = self.start
1802 start = self.start
1861 length = self.length
1803 length = self.length
1862 inline = self._inline
1804 inline = self._inline
1863 iosize = self.index.entry_size
1805 iosize = self.index.entry_size
1864 buffer = util.buffer
1806 buffer = util.buffer
1865
1807
1866 l = []
1808 l = []
1867 ladd = l.append
1809 ladd = l.append
1868
1810
1869 if not self._withsparseread:
1811 if not self._withsparseread:
1870 slicedchunks = (revs,)
1812 slicedchunks = (revs,)
1871 else:
1813 else:
1872 slicedchunks = deltautil.slicechunk(
1814 slicedchunks = deltautil.slicechunk(
1873 self, revs, targetsize=targetsize
1815 self, revs, targetsize=targetsize
1874 )
1816 )
1875
1817
1876 for revschunk in slicedchunks:
1818 for revschunk in slicedchunks:
1877 firstrev = revschunk[0]
1819 firstrev = revschunk[0]
1878 # Skip trailing revisions with empty diff
1820 # Skip trailing revisions with empty diff
1879 for lastrev in revschunk[::-1]:
1821 for lastrev in revschunk[::-1]:
1880 if length(lastrev) != 0:
1822 if length(lastrev) != 0:
1881 break
1823 break
1882
1824
1883 try:
1825 try:
1884 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1826 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1885 except OverflowError:
1827 except OverflowError:
1886 # issue4215 - we can't cache a run of chunks greater than
1828 # issue4215 - we can't cache a run of chunks greater than
1887 # 2G on Windows
1829 # 2G on Windows
1888 return [self._chunk(rev, df=df) for rev in revschunk]
1830 return [self._chunk(rev, df=df) for rev in revschunk]
1889
1831
1890 decomp = self.decompress
1832 decomp = self.decompress
1891 # self._decompressor might be None, but will not be used in that case
1833 # self._decompressor might be None, but will not be used in that case
1892 def_decomp = self._decompressor
1834 def_decomp = self._decompressor
1893 for rev in revschunk:
1835 for rev in revschunk:
1894 chunkstart = start(rev)
1836 chunkstart = start(rev)
1895 if inline:
1837 if inline:
1896 chunkstart += (rev + 1) * iosize
1838 chunkstart += (rev + 1) * iosize
1897 chunklength = length(rev)
1839 chunklength = length(rev)
1898 comp_mode = self.index[rev][10]
1840 comp_mode = self.index[rev][10]
1899 c = buffer(data, chunkstart - offset, chunklength)
1841 c = buffer(data, chunkstart - offset, chunklength)
1900 if comp_mode == COMP_MODE_PLAIN:
1842 if comp_mode == COMP_MODE_PLAIN:
1901 ladd(c)
1843 ladd(c)
1902 elif comp_mode == COMP_MODE_INLINE:
1844 elif comp_mode == COMP_MODE_INLINE:
1903 ladd(decomp(c))
1845 ladd(decomp(c))
1904 elif comp_mode == COMP_MODE_DEFAULT:
1846 elif comp_mode == COMP_MODE_DEFAULT:
1905 ladd(def_decomp(c))
1847 ladd(def_decomp(c))
1906 else:
1848 else:
1907 msg = 'unknown compression mode %d'
1849 msg = 'unknown compression mode %d'
1908 msg %= comp_mode
1850 msg %= comp_mode
1909 raise error.RevlogError(msg)
1851 raise error.RevlogError(msg)
1910
1852
1911 return l
1853 return l
1912
1854
1913 def _chunkclear(self):
1855 def _chunkclear(self):
1914 """Clear the raw chunk cache."""
1856 """Clear the raw chunk cache."""
1915 self._chunkcache = (0, b'')
1857 self._chunkcache = (0, b'')
1916
1858
1917 def deltaparent(self, rev):
1859 def deltaparent(self, rev):
1918 """return deltaparent of the given revision"""
1860 """return deltaparent of the given revision"""
1919 base = self.index[rev][3]
1861 base = self.index[rev][3]
1920 if base == rev:
1862 if base == rev:
1921 return nullrev
1863 return nullrev
1922 elif self._generaldelta:
1864 elif self._generaldelta:
1923 return base
1865 return base
1924 else:
1866 else:
1925 return rev - 1
1867 return rev - 1
1926
1868
1927 def issnapshot(self, rev):
1869 def issnapshot(self, rev):
1928 """tells whether rev is a snapshot"""
1870 """tells whether rev is a snapshot"""
1929 if not self._sparserevlog:
1871 if not self._sparserevlog:
1930 return self.deltaparent(rev) == nullrev
1872 return self.deltaparent(rev) == nullrev
1931 elif util.safehasattr(self.index, b'issnapshot'):
1873 elif util.safehasattr(self.index, b'issnapshot'):
1932 # directly assign the method to cache the testing and access
1874 # directly assign the method to cache the testing and access
1933 self.issnapshot = self.index.issnapshot
1875 self.issnapshot = self.index.issnapshot
1934 return self.issnapshot(rev)
1876 return self.issnapshot(rev)
1935 if rev == nullrev:
1877 if rev == nullrev:
1936 return True
1878 return True
1937 entry = self.index[rev]
1879 entry = self.index[rev]
1938 base = entry[3]
1880 base = entry[3]
1939 if base == rev:
1881 if base == rev:
1940 return True
1882 return True
1941 if base == nullrev:
1883 if base == nullrev:
1942 return True
1884 return True
1943 p1 = entry[5]
1885 p1 = entry[5]
1944 p2 = entry[6]
1886 p2 = entry[6]
1945 if base == p1 or base == p2:
1887 if base == p1 or base == p2:
1946 return False
1888 return False
1947 return self.issnapshot(base)
1889 return self.issnapshot(base)
1948
1890
1949 def snapshotdepth(self, rev):
1891 def snapshotdepth(self, rev):
1950 """number of snapshot in the chain before this one"""
1892 """number of snapshot in the chain before this one"""
1951 if not self.issnapshot(rev):
1893 if not self.issnapshot(rev):
1952 raise error.ProgrammingError(b'revision %d not a snapshot')
1894 raise error.ProgrammingError(b'revision %d not a snapshot')
1953 return len(self._deltachain(rev)[0]) - 1
1895 return len(self._deltachain(rev)[0]) - 1
1954
1896
1955 def revdiff(self, rev1, rev2):
1897 def revdiff(self, rev1, rev2):
1956 """return or calculate a delta between two revisions
1898 """return or calculate a delta between two revisions
1957
1899
1958 The delta calculated is in binary form and is intended to be written to
1900 The delta calculated is in binary form and is intended to be written to
1959 revlog data directly. So this function needs raw revision data.
1901 revlog data directly. So this function needs raw revision data.
1960 """
1902 """
1961 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1903 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1962 return bytes(self._chunk(rev2))
1904 return bytes(self._chunk(rev2))
1963
1905
1964 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1906 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1965
1907
1966 def _processflags(self, text, flags, operation, raw=False):
1908 def _processflags(self, text, flags, operation, raw=False):
1967 """deprecated entry point to access flag processors"""
1909 """deprecated entry point to access flag processors"""
1968 msg = b'_processflag(...) use the specialized variant'
1910 msg = b'_processflag(...) use the specialized variant'
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1911 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1970 if raw:
1912 if raw:
1971 return text, flagutil.processflagsraw(self, text, flags)
1913 return text, flagutil.processflagsraw(self, text, flags)
1972 elif operation == b'read':
1914 elif operation == b'read':
1973 return flagutil.processflagsread(self, text, flags)
1915 return flagutil.processflagsread(self, text, flags)
1974 else: # write operation
1916 else: # write operation
1975 return flagutil.processflagswrite(self, text, flags)
1917 return flagutil.processflagswrite(self, text, flags)
1976
1918
1977 def revision(self, nodeorrev, _df=None, raw=False):
1919 def revision(self, nodeorrev, _df=None, raw=False):
1978 """return an uncompressed revision of a given node or revision
1920 """return an uncompressed revision of a given node or revision
1979 number.
1921 number.
1980
1922
1981 _df - an existing file handle to read from. (internal-only)
1923 _df - an existing file handle to read from. (internal-only)
1982 raw - an optional argument specifying if the revision data is to be
1924 raw - an optional argument specifying if the revision data is to be
1983 treated as raw data when applying flag transforms. 'raw' should be set
1925 treated as raw data when applying flag transforms. 'raw' should be set
1984 to True when generating changegroups or in debug commands.
1926 to True when generating changegroups or in debug commands.
1985 """
1927 """
1986 if raw:
1928 if raw:
1987 msg = (
1929 msg = (
1988 b'revlog.revision(..., raw=True) is deprecated, '
1930 b'revlog.revision(..., raw=True) is deprecated, '
1989 b'use revlog.rawdata(...)'
1931 b'use revlog.rawdata(...)'
1990 )
1932 )
1991 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1933 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1992 return self._revisiondata(nodeorrev, _df, raw=raw)
1934 return self._revisiondata(nodeorrev, _df, raw=raw)
1993
1935
1994 def sidedata(self, nodeorrev, _df=None):
1936 def sidedata(self, nodeorrev, _df=None):
1995 """a map of extra data related to the changeset but not part of the hash
1937 """a map of extra data related to the changeset but not part of the hash
1996
1938
1997 This function currently return a dictionary. However, more advanced
1939 This function currently return a dictionary. However, more advanced
1998 mapping object will likely be used in the future for a more
1940 mapping object will likely be used in the future for a more
1999 efficient/lazy code.
1941 efficient/lazy code.
2000 """
1942 """
2001 # deal with <nodeorrev> argument type
1943 # deal with <nodeorrev> argument type
2002 if isinstance(nodeorrev, int):
1944 if isinstance(nodeorrev, int):
2003 rev = nodeorrev
1945 rev = nodeorrev
2004 else:
1946 else:
2005 rev = self.rev(nodeorrev)
1947 rev = self.rev(nodeorrev)
2006 return self._sidedata(rev)
1948 return self._sidedata(rev)
2007
1949
2008 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1950 def _revisiondata(self, nodeorrev, _df=None, raw=False):
2009 # deal with <nodeorrev> argument type
1951 # deal with <nodeorrev> argument type
2010 if isinstance(nodeorrev, int):
1952 if isinstance(nodeorrev, int):
2011 rev = nodeorrev
1953 rev = nodeorrev
2012 node = self.node(rev)
1954 node = self.node(rev)
2013 else:
1955 else:
2014 node = nodeorrev
1956 node = nodeorrev
2015 rev = None
1957 rev = None
2016
1958
2017 # fast path the special `nullid` rev
1959 # fast path the special `nullid` rev
2018 if node == self.nullid:
1960 if node == self.nullid:
2019 return b""
1961 return b""
2020
1962
2021 # ``rawtext`` is the text as stored inside the revlog. Might be the
1963 # ``rawtext`` is the text as stored inside the revlog. Might be the
2022 # revision or might need to be processed to retrieve the revision.
1964 # revision or might need to be processed to retrieve the revision.
2023 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1965 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2024
1966
2025 if raw and validated:
1967 if raw and validated:
2026 # if we don't want to process the raw text and that raw
1968 # if we don't want to process the raw text and that raw
2027 # text is cached, we can exit early.
1969 # text is cached, we can exit early.
2028 return rawtext
1970 return rawtext
2029 if rev is None:
1971 if rev is None:
2030 rev = self.rev(node)
1972 rev = self.rev(node)
2031 # the revlog's flag for this revision
1973 # the revlog's flag for this revision
2032 # (usually alter its state or content)
1974 # (usually alter its state or content)
2033 flags = self.flags(rev)
1975 flags = self.flags(rev)
2034
1976
2035 if validated and flags == REVIDX_DEFAULT_FLAGS:
1977 if validated and flags == REVIDX_DEFAULT_FLAGS:
2036 # no extra flags set, no flag processor runs, text = rawtext
1978 # no extra flags set, no flag processor runs, text = rawtext
2037 return rawtext
1979 return rawtext
2038
1980
2039 if raw:
1981 if raw:
2040 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1982 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2041 text = rawtext
1983 text = rawtext
2042 else:
1984 else:
2043 r = flagutil.processflagsread(self, rawtext, flags)
1985 r = flagutil.processflagsread(self, rawtext, flags)
2044 text, validatehash = r
1986 text, validatehash = r
2045 if validatehash:
1987 if validatehash:
2046 self.checkhash(text, node, rev=rev)
1988 self.checkhash(text, node, rev=rev)
2047 if not validated:
1989 if not validated:
2048 self._revisioncache = (node, rev, rawtext)
1990 self._revisioncache = (node, rev, rawtext)
2049
1991
2050 return text
1992 return text
2051
1993
2052 def _rawtext(self, node, rev, _df=None):
1994 def _rawtext(self, node, rev, _df=None):
2053 """return the possibly unvalidated rawtext for a revision
1995 """return the possibly unvalidated rawtext for a revision
2054
1996
2055 returns (rev, rawtext, validated)
1997 returns (rev, rawtext, validated)
2056 """
1998 """
2057
1999
2058 # revision in the cache (could be useful to apply delta)
2000 # revision in the cache (could be useful to apply delta)
2059 cachedrev = None
2001 cachedrev = None
2060 # An intermediate text to apply deltas to
2002 # An intermediate text to apply deltas to
2061 basetext = None
2003 basetext = None
2062
2004
2063 # Check if we have the entry in cache
2005 # Check if we have the entry in cache
2064 # The cache entry looks like (node, rev, rawtext)
2006 # The cache entry looks like (node, rev, rawtext)
2065 if self._revisioncache:
2007 if self._revisioncache:
2066 if self._revisioncache[0] == node:
2008 if self._revisioncache[0] == node:
2067 return (rev, self._revisioncache[2], True)
2009 return (rev, self._revisioncache[2], True)
2068 cachedrev = self._revisioncache[1]
2010 cachedrev = self._revisioncache[1]
2069
2011
2070 if rev is None:
2012 if rev is None:
2071 rev = self.rev(node)
2013 rev = self.rev(node)
2072
2014
2073 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2015 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2074 if stopped:
2016 if stopped:
2075 basetext = self._revisioncache[2]
2017 basetext = self._revisioncache[2]
2076
2018
2077 # drop cache to save memory, the caller is expected to
2019 # drop cache to save memory, the caller is expected to
2078 # update self._revisioncache after validating the text
2020 # update self._revisioncache after validating the text
2079 self._revisioncache = None
2021 self._revisioncache = None
2080
2022
2081 targetsize = None
2023 targetsize = None
2082 rawsize = self.index[rev][2]
2024 rawsize = self.index[rev][2]
2083 if 0 <= rawsize:
2025 if 0 <= rawsize:
2084 targetsize = 4 * rawsize
2026 targetsize = 4 * rawsize
2085
2027
2086 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2028 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2087 if basetext is None:
2029 if basetext is None:
2088 basetext = bytes(bins[0])
2030 basetext = bytes(bins[0])
2089 bins = bins[1:]
2031 bins = bins[1:]
2090
2032
2091 rawtext = mdiff.patches(basetext, bins)
2033 rawtext = mdiff.patches(basetext, bins)
2092 del basetext # let us have a chance to free memory early
2034 del basetext # let us have a chance to free memory early
2093 return (rev, rawtext, False)
2035 return (rev, rawtext, False)
2094
2036
2095 def _sidedata(self, rev):
2037 def _sidedata(self, rev):
2096 """Return the sidedata for a given revision number."""
2038 """Return the sidedata for a given revision number."""
2097 index_entry = self.index[rev]
2039 index_entry = self.index[rev]
2098 sidedata_offset = index_entry[8]
2040 sidedata_offset = index_entry[8]
2099 sidedata_size = index_entry[9]
2041 sidedata_size = index_entry[9]
2100
2042
2101 if self._inline:
2043 if self._inline:
2102 sidedata_offset += self.index.entry_size * (1 + rev)
2044 sidedata_offset += self.index.entry_size * (1 + rev)
2103 if sidedata_size == 0:
2045 if sidedata_size == 0:
2104 return {}
2046 return {}
2105
2047
2106 # XXX this need caching, as we do for data
2048 # XXX this need caching, as we do for data
2107 with self._sidedatareadfp() as sdf:
2049 with self._sidedatareadfp() as sdf:
2108 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2050 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2109 filename = self._sidedatafile
2051 filename = self._sidedatafile
2110 end = self._docket.sidedata_end
2052 end = self._docket.sidedata_end
2111 offset = sidedata_offset
2053 offset = sidedata_offset
2112 length = sidedata_size
2054 length = sidedata_size
2113 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2055 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2114 raise error.RevlogError(m)
2056 raise error.RevlogError(m)
2115
2057
2116 sdf.seek(sidedata_offset, os.SEEK_SET)
2058 sdf.seek(sidedata_offset, os.SEEK_SET)
2117 comp_segment = sdf.read(sidedata_size)
2059 comp_segment = sdf.read(sidedata_size)
2118
2060
2119 if len(comp_segment) < sidedata_size:
2061 if len(comp_segment) < sidedata_size:
2120 filename = self._sidedatafile
2062 filename = self._sidedatafile
2121 length = sidedata_size
2063 length = sidedata_size
2122 offset = sidedata_offset
2064 offset = sidedata_offset
2123 got = len(comp_segment)
2065 got = len(comp_segment)
2124 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2066 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2125 raise error.RevlogError(m)
2067 raise error.RevlogError(m)
2126
2068
2127 comp = self.index[rev][11]
2069 comp = self.index[rev][11]
2128 if comp == COMP_MODE_PLAIN:
2070 if comp == COMP_MODE_PLAIN:
2129 segment = comp_segment
2071 segment = comp_segment
2130 elif comp == COMP_MODE_DEFAULT:
2072 elif comp == COMP_MODE_DEFAULT:
2131 segment = self._decompressor(comp_segment)
2073 segment = self._decompressor(comp_segment)
2132 elif comp == COMP_MODE_INLINE:
2074 elif comp == COMP_MODE_INLINE:
2133 segment = self.decompress(comp_segment)
2075 segment = self.decompress(comp_segment)
2134 else:
2076 else:
2135 msg = 'unknown compression mode %d'
2077 msg = 'unknown compression mode %d'
2136 msg %= comp
2078 msg %= comp
2137 raise error.RevlogError(msg)
2079 raise error.RevlogError(msg)
2138
2080
2139 sidedata = sidedatautil.deserialize_sidedata(segment)
2081 sidedata = sidedatautil.deserialize_sidedata(segment)
2140 return sidedata
2082 return sidedata
2141
2083
2142 def rawdata(self, nodeorrev, _df=None):
2084 def rawdata(self, nodeorrev, _df=None):
2143 """return an uncompressed raw data of a given node or revision number.
2085 """return an uncompressed raw data of a given node or revision number.
2144
2086
2145 _df - an existing file handle to read from. (internal-only)
2087 _df - an existing file handle to read from. (internal-only)
2146 """
2088 """
2147 return self._revisiondata(nodeorrev, _df, raw=True)
2089 return self._revisiondata(nodeorrev, _df, raw=True)
2148
2090
2149 def hash(self, text, p1, p2):
2091 def hash(self, text, p1, p2):
2150 """Compute a node hash.
2092 """Compute a node hash.
2151
2093
2152 Available as a function so that subclasses can replace the hash
2094 Available as a function so that subclasses can replace the hash
2153 as needed.
2095 as needed.
2154 """
2096 """
2155 return storageutil.hashrevisionsha1(text, p1, p2)
2097 return storageutil.hashrevisionsha1(text, p1, p2)
2156
2098
2157 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2099 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2158 """Check node hash integrity.
2100 """Check node hash integrity.
2159
2101
2160 Available as a function so that subclasses can extend hash mismatch
2102 Available as a function so that subclasses can extend hash mismatch
2161 behaviors as needed.
2103 behaviors as needed.
2162 """
2104 """
2163 try:
2105 try:
2164 if p1 is None and p2 is None:
2106 if p1 is None and p2 is None:
2165 p1, p2 = self.parents(node)
2107 p1, p2 = self.parents(node)
2166 if node != self.hash(text, p1, p2):
2108 if node != self.hash(text, p1, p2):
2167 # Clear the revision cache on hash failure. The revision cache
2109 # Clear the revision cache on hash failure. The revision cache
2168 # only stores the raw revision and clearing the cache does have
2110 # only stores the raw revision and clearing the cache does have
2169 # the side-effect that we won't have a cache hit when the raw
2111 # the side-effect that we won't have a cache hit when the raw
2170 # revision data is accessed. But this case should be rare and
2112 # revision data is accessed. But this case should be rare and
2171 # it is extra work to teach the cache about the hash
2113 # it is extra work to teach the cache about the hash
2172 # verification state.
2114 # verification state.
2173 if self._revisioncache and self._revisioncache[0] == node:
2115 if self._revisioncache and self._revisioncache[0] == node:
2174 self._revisioncache = None
2116 self._revisioncache = None
2175
2117
2176 revornode = rev
2118 revornode = rev
2177 if revornode is None:
2119 if revornode is None:
2178 revornode = templatefilters.short(hex(node))
2120 revornode = templatefilters.short(hex(node))
2179 raise error.RevlogError(
2121 raise error.RevlogError(
2180 _(b"integrity check failed on %s:%s")
2122 _(b"integrity check failed on %s:%s")
2181 % (self.display_id, pycompat.bytestr(revornode))
2123 % (self.display_id, pycompat.bytestr(revornode))
2182 )
2124 )
2183 except error.RevlogError:
2125 except error.RevlogError:
2184 if self._censorable and storageutil.iscensoredtext(text):
2126 if self._censorable and storageutil.iscensoredtext(text):
2185 raise error.CensoredNodeError(self.display_id, node, text)
2127 raise error.CensoredNodeError(self.display_id, node, text)
2186 raise
2128 raise
2187
2129
2188 def _enforceinlinesize(self, tr):
2130 def _enforceinlinesize(self, tr):
2189 """Check if the revlog is too big for inline and convert if so.
2131 """Check if the revlog is too big for inline and convert if so.
2190
2132
2191 This should be called after revisions are added to the revlog. If the
2133 This should be called after revisions are added to the revlog. If the
2192 revlog has grown too large to be an inline revlog, it will convert it
2134 revlog has grown too large to be an inline revlog, it will convert it
2193 to use multiple index and data files.
2135 to use multiple index and data files.
2194 """
2136 """
2195 tiprev = len(self) - 1
2137 tiprev = len(self) - 1
2196 total_size = self.start(tiprev) + self.length(tiprev)
2138 total_size = self.start(tiprev) + self.length(tiprev)
2197 if not self._inline or total_size < _maxinline:
2139 if not self._inline or total_size < _maxinline:
2198 return
2140 return
2199
2141
2200 troffset = tr.findoffset(self._indexfile)
2142 troffset = tr.findoffset(self._indexfile)
2201 if troffset is None:
2143 if troffset is None:
2202 raise error.RevlogError(
2144 raise error.RevlogError(
2203 _(b"%s not found in the transaction") % self._indexfile
2145 _(b"%s not found in the transaction") % self._indexfile
2204 )
2146 )
2205 trindex = 0
2147 trindex = 0
2206 tr.add(self._datafile, 0)
2148 tr.add(self._datafile, 0)
2207
2149
2208 existing_handles = False
2150 existing_handles = False
2209 if self._writinghandles is not None:
2151 if self._writinghandles is not None:
2210 existing_handles = True
2152 existing_handles = True
2211 fp = self._writinghandles[0]
2153 fp = self._writinghandles[0]
2212 fp.flush()
2154 fp.flush()
2213 fp.close()
2155 fp.close()
2214 # We can't use the cached file handle after close(). So prevent
2156 # We can't use the cached file handle after close(). So prevent
2215 # its usage.
2157 # its usage.
2216 self._writinghandles = None
2158 self._writinghandles = None
2217
2159
2218 new_dfh = self._datafp(b'w+')
2160 new_dfh = self._datafp(b'w+')
2219 new_dfh.truncate(0) # drop any potentially existing data
2161 new_dfh.truncate(0) # drop any potentially existing data
2220 try:
2162 try:
2221 with self._indexfp() as read_ifh:
2163 with self._indexfp() as read_ifh:
2222 for r in self:
2164 for r in self:
2223 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2165 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2224 if troffset <= self.start(r) + r * self.index.entry_size:
2166 if troffset <= self.start(r) + r * self.index.entry_size:
2225 trindex = r
2167 trindex = r
2226 new_dfh.flush()
2168 new_dfh.flush()
2227
2169
2228 with self.__index_new_fp() as fp:
2170 with self.__index_new_fp() as fp:
2229 self._format_flags &= ~FLAG_INLINE_DATA
2171 self._format_flags &= ~FLAG_INLINE_DATA
2230 self._inline = False
2172 self._inline = False
2231 for i in self:
2173 for i in self:
2232 e = self.index.entry_binary(i)
2174 e = self.index.entry_binary(i)
2233 if i == 0 and self._docket is None:
2175 if i == 0 and self._docket is None:
2234 header = self._format_flags | self._format_version
2176 header = self._format_flags | self._format_version
2235 header = self.index.pack_header(header)
2177 header = self.index.pack_header(header)
2236 e = header + e
2178 e = header + e
2237 fp.write(e)
2179 fp.write(e)
2238 if self._docket is not None:
2180 if self._docket is not None:
2239 self._docket.index_end = fp.tell()
2181 self._docket.index_end = fp.tell()
2240
2182
2241 # There is a small transactional race here. If the rename of
2183 # There is a small transactional race here. If the rename of
2242 # the index fails, we should remove the datafile. It is more
2184 # the index fails, we should remove the datafile. It is more
2243 # important to ensure that the data file is not truncated
2185 # important to ensure that the data file is not truncated
2244 # when the index is replaced as otherwise data is lost.
2186 # when the index is replaced as otherwise data is lost.
2245 tr.replace(self._datafile, self.start(trindex))
2187 tr.replace(self._datafile, self.start(trindex))
2246
2188
2247 # the temp file replace the real index when we exit the context
2189 # the temp file replace the real index when we exit the context
2248 # manager
2190 # manager
2249
2191
2250 tr.replace(self._indexfile, trindex * self.index.entry_size)
2192 tr.replace(self._indexfile, trindex * self.index.entry_size)
2251 nodemaputil.setup_persistent_nodemap(tr, self)
2193 nodemaputil.setup_persistent_nodemap(tr, self)
2252 self._chunkclear()
2194 self._chunkclear()
2253
2195
2254 if existing_handles:
2196 if existing_handles:
2255 # switched from inline to conventional reopen the index
2197 # switched from inline to conventional reopen the index
2256 ifh = self.__index_write_fp()
2198 ifh = self.__index_write_fp()
2257 self._writinghandles = (ifh, new_dfh, None)
2199 self._writinghandles = (ifh, new_dfh, None)
2258 new_dfh = None
2200 new_dfh = None
2259 finally:
2201 finally:
2260 if new_dfh is not None:
2202 if new_dfh is not None:
2261 new_dfh.close()
2203 new_dfh.close()
2262
2204
2263 def _nodeduplicatecallback(self, transaction, node):
2205 def _nodeduplicatecallback(self, transaction, node):
2264 """called when trying to add a node already stored."""
2206 """called when trying to add a node already stored."""
2265
2207
2266 @contextlib.contextmanager
2208 @contextlib.contextmanager
2267 def _writing(self, transaction):
2209 def _writing(self, transaction):
2268 if self._trypending:
2210 if self._trypending:
2269 msg = b'try to write in a `trypending` revlog: %s'
2211 msg = b'try to write in a `trypending` revlog: %s'
2270 msg %= self.display_id
2212 msg %= self.display_id
2271 raise error.ProgrammingError(msg)
2213 raise error.ProgrammingError(msg)
2272 if self._writinghandles is not None:
2214 if self._writinghandles is not None:
2273 yield
2215 yield
2274 else:
2216 else:
2275 ifh = dfh = sdfh = None
2217 ifh = dfh = sdfh = None
2276 try:
2218 try:
2277 r = len(self)
2219 r = len(self)
2278 # opening the data file.
2220 # opening the data file.
2279 dsize = 0
2221 dsize = 0
2280 if r:
2222 if r:
2281 dsize = self.end(r - 1)
2223 dsize = self.end(r - 1)
2282 dfh = None
2224 dfh = None
2283 if not self._inline:
2225 if not self._inline:
2284 try:
2226 try:
2285 dfh = self._datafp(b"r+")
2227 dfh = self._datafp(b"r+")
2286 if self._docket is None:
2228 if self._docket is None:
2287 dfh.seek(0, os.SEEK_END)
2229 dfh.seek(0, os.SEEK_END)
2288 else:
2230 else:
2289 dfh.seek(self._docket.data_end, os.SEEK_SET)
2231 dfh.seek(self._docket.data_end, os.SEEK_SET)
2290 except IOError as inst:
2232 except IOError as inst:
2291 if inst.errno != errno.ENOENT:
2233 if inst.errno != errno.ENOENT:
2292 raise
2234 raise
2293 dfh = self._datafp(b"w+")
2235 dfh = self._datafp(b"w+")
2294 transaction.add(self._datafile, dsize)
2236 transaction.add(self._datafile, dsize)
2295 if self._sidedatafile is not None:
2237 if self._sidedatafile is not None:
2296 try:
2238 try:
2297 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2239 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2298 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2240 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2299 except IOError as inst:
2241 except IOError as inst:
2300 if inst.errno != errno.ENOENT:
2242 if inst.errno != errno.ENOENT:
2301 raise
2243 raise
2302 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2244 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2303 transaction.add(
2245 transaction.add(
2304 self._sidedatafile, self._docket.sidedata_end
2246 self._sidedatafile, self._docket.sidedata_end
2305 )
2247 )
2306
2248
2307 # opening the index file.
2249 # opening the index file.
2308 isize = r * self.index.entry_size
2250 isize = r * self.index.entry_size
2309 ifh = self.__index_write_fp()
2251 ifh = self.__index_write_fp()
2310 if self._inline:
2252 if self._inline:
2311 transaction.add(self._indexfile, dsize + isize)
2253 transaction.add(self._indexfile, dsize + isize)
2312 else:
2254 else:
2313 transaction.add(self._indexfile, isize)
2255 transaction.add(self._indexfile, isize)
2314 # exposing all file handle for writing.
2256 # exposing all file handle for writing.
2315 self._writinghandles = (ifh, dfh, sdfh)
2257 self._writinghandles = (ifh, dfh, sdfh)
2316 yield
2258 yield
2317 if self._docket is not None:
2259 if self._docket is not None:
2318 self._write_docket(transaction)
2260 self._write_docket(transaction)
2319 finally:
2261 finally:
2320 self._writinghandles = None
2262 self._writinghandles = None
2321 if dfh is not None:
2263 if dfh is not None:
2322 dfh.close()
2264 dfh.close()
2323 if sdfh is not None:
2265 if sdfh is not None:
2324 dfh.close()
2266 dfh.close()
2325 # closing the index file last to avoid exposing referent to
2267 # closing the index file last to avoid exposing referent to
2326 # potential unflushed data content.
2268 # potential unflushed data content.
2327 if ifh is not None:
2269 if ifh is not None:
2328 ifh.close()
2270 ifh.close()
2329
2271
2330 def _write_docket(self, transaction):
2272 def _write_docket(self, transaction):
2331 """write the current docket on disk
2273 """write the current docket on disk
2332
2274
2333 Exist as a method to help changelog to implement transaction logic
2275 Exist as a method to help changelog to implement transaction logic
2334
2276
2335 We could also imagine using the same transaction logic for all revlog
2277 We could also imagine using the same transaction logic for all revlog
2336 since docket are cheap."""
2278 since docket are cheap."""
2337 self._docket.write(transaction)
2279 self._docket.write(transaction)
2338
2280
2339 def addrevision(
2281 def addrevision(
2340 self,
2282 self,
2341 text,
2283 text,
2342 transaction,
2284 transaction,
2343 link,
2285 link,
2344 p1,
2286 p1,
2345 p2,
2287 p2,
2346 cachedelta=None,
2288 cachedelta=None,
2347 node=None,
2289 node=None,
2348 flags=REVIDX_DEFAULT_FLAGS,
2290 flags=REVIDX_DEFAULT_FLAGS,
2349 deltacomputer=None,
2291 deltacomputer=None,
2350 sidedata=None,
2292 sidedata=None,
2351 ):
2293 ):
2352 """add a revision to the log
2294 """add a revision to the log
2353
2295
2354 text - the revision data to add
2296 text - the revision data to add
2355 transaction - the transaction object used for rollback
2297 transaction - the transaction object used for rollback
2356 link - the linkrev data to add
2298 link - the linkrev data to add
2357 p1, p2 - the parent nodeids of the revision
2299 p1, p2 - the parent nodeids of the revision
2358 cachedelta - an optional precomputed delta
2300 cachedelta - an optional precomputed delta
2359 node - nodeid of revision; typically node is not specified, and it is
2301 node - nodeid of revision; typically node is not specified, and it is
2360 computed by default as hash(text, p1, p2), however subclasses might
2302 computed by default as hash(text, p1, p2), however subclasses might
2361 use different hashing method (and override checkhash() in such case)
2303 use different hashing method (and override checkhash() in such case)
2362 flags - the known flags to set on the revision
2304 flags - the known flags to set on the revision
2363 deltacomputer - an optional deltacomputer instance shared between
2305 deltacomputer - an optional deltacomputer instance shared between
2364 multiple calls
2306 multiple calls
2365 """
2307 """
2366 if link == nullrev:
2308 if link == nullrev:
2367 raise error.RevlogError(
2309 raise error.RevlogError(
2368 _(b"attempted to add linkrev -1 to %s") % self.display_id
2310 _(b"attempted to add linkrev -1 to %s") % self.display_id
2369 )
2311 )
2370
2312
2371 if sidedata is None:
2313 if sidedata is None:
2372 sidedata = {}
2314 sidedata = {}
2373 elif sidedata and not self.hassidedata:
2315 elif sidedata and not self.hassidedata:
2374 raise error.ProgrammingError(
2316 raise error.ProgrammingError(
2375 _(b"trying to add sidedata to a revlog who don't support them")
2317 _(b"trying to add sidedata to a revlog who don't support them")
2376 )
2318 )
2377
2319
2378 if flags:
2320 if flags:
2379 node = node or self.hash(text, p1, p2)
2321 node = node or self.hash(text, p1, p2)
2380
2322
2381 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2323 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2382
2324
2383 # If the flag processor modifies the revision data, ignore any provided
2325 # If the flag processor modifies the revision data, ignore any provided
2384 # cachedelta.
2326 # cachedelta.
2385 if rawtext != text:
2327 if rawtext != text:
2386 cachedelta = None
2328 cachedelta = None
2387
2329
2388 if len(rawtext) > _maxentrysize:
2330 if len(rawtext) > _maxentrysize:
2389 raise error.RevlogError(
2331 raise error.RevlogError(
2390 _(
2332 _(
2391 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2333 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2392 )
2334 )
2393 % (self.display_id, len(rawtext))
2335 % (self.display_id, len(rawtext))
2394 )
2336 )
2395
2337
2396 node = node or self.hash(rawtext, p1, p2)
2338 node = node or self.hash(rawtext, p1, p2)
2397 rev = self.index.get_rev(node)
2339 rev = self.index.get_rev(node)
2398 if rev is not None:
2340 if rev is not None:
2399 return rev
2341 return rev
2400
2342
2401 if validatehash:
2343 if validatehash:
2402 self.checkhash(rawtext, node, p1=p1, p2=p2)
2344 self.checkhash(rawtext, node, p1=p1, p2=p2)
2403
2345
2404 return self.addrawrevision(
2346 return self.addrawrevision(
2405 rawtext,
2347 rawtext,
2406 transaction,
2348 transaction,
2407 link,
2349 link,
2408 p1,
2350 p1,
2409 p2,
2351 p2,
2410 node,
2352 node,
2411 flags,
2353 flags,
2412 cachedelta=cachedelta,
2354 cachedelta=cachedelta,
2413 deltacomputer=deltacomputer,
2355 deltacomputer=deltacomputer,
2414 sidedata=sidedata,
2356 sidedata=sidedata,
2415 )
2357 )
2416
2358
2417 def addrawrevision(
2359 def addrawrevision(
2418 self,
2360 self,
2419 rawtext,
2361 rawtext,
2420 transaction,
2362 transaction,
2421 link,
2363 link,
2422 p1,
2364 p1,
2423 p2,
2365 p2,
2424 node,
2366 node,
2425 flags,
2367 flags,
2426 cachedelta=None,
2368 cachedelta=None,
2427 deltacomputer=None,
2369 deltacomputer=None,
2428 sidedata=None,
2370 sidedata=None,
2429 ):
2371 ):
2430 """add a raw revision with known flags, node and parents
2372 """add a raw revision with known flags, node and parents
2431 useful when reusing a revision not stored in this revlog (ex: received
2373 useful when reusing a revision not stored in this revlog (ex: received
2432 over wire, or read from an external bundle).
2374 over wire, or read from an external bundle).
2433 """
2375 """
2434 with self._writing(transaction):
2376 with self._writing(transaction):
2435 return self._addrevision(
2377 return self._addrevision(
2436 node,
2378 node,
2437 rawtext,
2379 rawtext,
2438 transaction,
2380 transaction,
2439 link,
2381 link,
2440 p1,
2382 p1,
2441 p2,
2383 p2,
2442 flags,
2384 flags,
2443 cachedelta,
2385 cachedelta,
2444 deltacomputer=deltacomputer,
2386 deltacomputer=deltacomputer,
2445 sidedata=sidedata,
2387 sidedata=sidedata,
2446 )
2388 )
2447
2389
2448 def compress(self, data):
2390 def compress(self, data):
2449 """Generate a possibly-compressed representation of data."""
2391 """Generate a possibly-compressed representation of data."""
2450 if not data:
2392 if not data:
2451 return b'', data
2393 return b'', data
2452
2394
2453 compressed = self._compressor.compress(data)
2395 compressed = self._compressor.compress(data)
2454
2396
2455 if compressed:
2397 if compressed:
2456 # The revlog compressor added the header in the returned data.
2398 # The revlog compressor added the header in the returned data.
2457 return b'', compressed
2399 return b'', compressed
2458
2400
2459 if data[0:1] == b'\0':
2401 if data[0:1] == b'\0':
2460 return b'', data
2402 return b'', data
2461 return b'u', data
2403 return b'u', data
2462
2404
2463 def decompress(self, data):
2405 def decompress(self, data):
2464 """Decompress a revlog chunk.
2406 """Decompress a revlog chunk.
2465
2407
2466 The chunk is expected to begin with a header identifying the
2408 The chunk is expected to begin with a header identifying the
2467 format type so it can be routed to an appropriate decompressor.
2409 format type so it can be routed to an appropriate decompressor.
2468 """
2410 """
2469 if not data:
2411 if not data:
2470 return data
2412 return data
2471
2413
2472 # Revlogs are read much more frequently than they are written and many
2414 # Revlogs are read much more frequently than they are written and many
2473 # chunks only take microseconds to decompress, so performance is
2415 # chunks only take microseconds to decompress, so performance is
2474 # important here.
2416 # important here.
2475 #
2417 #
2476 # We can make a few assumptions about revlogs:
2418 # We can make a few assumptions about revlogs:
2477 #
2419 #
2478 # 1) the majority of chunks will be compressed (as opposed to inline
2420 # 1) the majority of chunks will be compressed (as opposed to inline
2479 # raw data).
2421 # raw data).
2480 # 2) decompressing *any* data will likely by at least 10x slower than
2422 # 2) decompressing *any* data will likely by at least 10x slower than
2481 # returning raw inline data.
2423 # returning raw inline data.
2482 # 3) we want to prioritize common and officially supported compression
2424 # 3) we want to prioritize common and officially supported compression
2483 # engines
2425 # engines
2484 #
2426 #
2485 # It follows that we want to optimize for "decompress compressed data
2427 # It follows that we want to optimize for "decompress compressed data
2486 # when encoded with common and officially supported compression engines"
2428 # when encoded with common and officially supported compression engines"
2487 # case over "raw data" and "data encoded by less common or non-official
2429 # case over "raw data" and "data encoded by less common or non-official
2488 # compression engines." That is why we have the inline lookup first
2430 # compression engines." That is why we have the inline lookup first
2489 # followed by the compengines lookup.
2431 # followed by the compengines lookup.
2490 #
2432 #
2491 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2433 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2492 # compressed chunks. And this matters for changelog and manifest reads.
2434 # compressed chunks. And this matters for changelog and manifest reads.
2493 t = data[0:1]
2435 t = data[0:1]
2494
2436
2495 if t == b'x':
2437 if t == b'x':
2496 try:
2438 try:
2497 return _zlibdecompress(data)
2439 return _zlibdecompress(data)
2498 except zlib.error as e:
2440 except zlib.error as e:
2499 raise error.RevlogError(
2441 raise error.RevlogError(
2500 _(b'revlog decompress error: %s')
2442 _(b'revlog decompress error: %s')
2501 % stringutil.forcebytestr(e)
2443 % stringutil.forcebytestr(e)
2502 )
2444 )
2503 # '\0' is more common than 'u' so it goes first.
2445 # '\0' is more common than 'u' so it goes first.
2504 elif t == b'\0':
2446 elif t == b'\0':
2505 return data
2447 return data
2506 elif t == b'u':
2448 elif t == b'u':
2507 return util.buffer(data, 1)
2449 return util.buffer(data, 1)
2508
2450
2509 compressor = self._get_decompressor(t)
2451 compressor = self._get_decompressor(t)
2510
2452
2511 return compressor.decompress(data)
2453 return compressor.decompress(data)
2512
2454
2513 def _addrevision(
2455 def _addrevision(
2514 self,
2456 self,
2515 node,
2457 node,
2516 rawtext,
2458 rawtext,
2517 transaction,
2459 transaction,
2518 link,
2460 link,
2519 p1,
2461 p1,
2520 p2,
2462 p2,
2521 flags,
2463 flags,
2522 cachedelta,
2464 cachedelta,
2523 alwayscache=False,
2465 alwayscache=False,
2524 deltacomputer=None,
2466 deltacomputer=None,
2525 sidedata=None,
2467 sidedata=None,
2526 ):
2468 ):
2527 """internal function to add revisions to the log
2469 """internal function to add revisions to the log
2528
2470
2529 see addrevision for argument descriptions.
2471 see addrevision for argument descriptions.
2530
2472
2531 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2473 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2532
2474
2533 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2475 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2534 be used.
2476 be used.
2535
2477
2536 invariants:
2478 invariants:
2537 - rawtext is optional (can be None); if not set, cachedelta must be set.
2479 - rawtext is optional (can be None); if not set, cachedelta must be set.
2538 if both are set, they must correspond to each other.
2480 if both are set, they must correspond to each other.
2539 """
2481 """
2540 if node == self.nullid:
2482 if node == self.nullid:
2541 raise error.RevlogError(
2483 raise error.RevlogError(
2542 _(b"%s: attempt to add null revision") % self.display_id
2484 _(b"%s: attempt to add null revision") % self.display_id
2543 )
2485 )
2544 if (
2486 if (
2545 node == self.nodeconstants.wdirid
2487 node == self.nodeconstants.wdirid
2546 or node in self.nodeconstants.wdirfilenodeids
2488 or node in self.nodeconstants.wdirfilenodeids
2547 ):
2489 ):
2548 raise error.RevlogError(
2490 raise error.RevlogError(
2549 _(b"%s: attempt to add wdir revision") % self.display_id
2491 _(b"%s: attempt to add wdir revision") % self.display_id
2550 )
2492 )
2551 if self._writinghandles is None:
2493 if self._writinghandles is None:
2552 msg = b'adding revision outside `revlog._writing` context'
2494 msg = b'adding revision outside `revlog._writing` context'
2553 raise error.ProgrammingError(msg)
2495 raise error.ProgrammingError(msg)
2554
2496
2555 if self._inline:
2497 if self._inline:
2556 fh = self._writinghandles[0]
2498 fh = self._writinghandles[0]
2557 else:
2499 else:
2558 fh = self._writinghandles[1]
2500 fh = self._writinghandles[1]
2559
2501
2560 btext = [rawtext]
2502 btext = [rawtext]
2561
2503
2562 curr = len(self)
2504 curr = len(self)
2563 prev = curr - 1
2505 prev = curr - 1
2564
2506
2565 offset = self._get_data_offset(prev)
2507 offset = self._get_data_offset(prev)
2566
2508
2567 if self._concurrencychecker:
2509 if self._concurrencychecker:
2568 ifh, dfh, sdfh = self._writinghandles
2510 ifh, dfh, sdfh = self._writinghandles
2569 # XXX no checking for the sidedata file
2511 # XXX no checking for the sidedata file
2570 if self._inline:
2512 if self._inline:
2571 # offset is "as if" it were in the .d file, so we need to add on
2513 # offset is "as if" it were in the .d file, so we need to add on
2572 # the size of the entry metadata.
2514 # the size of the entry metadata.
2573 self._concurrencychecker(
2515 self._concurrencychecker(
2574 ifh, self._indexfile, offset + curr * self.index.entry_size
2516 ifh, self._indexfile, offset + curr * self.index.entry_size
2575 )
2517 )
2576 else:
2518 else:
2577 # Entries in the .i are a consistent size.
2519 # Entries in the .i are a consistent size.
2578 self._concurrencychecker(
2520 self._concurrencychecker(
2579 ifh, self._indexfile, curr * self.index.entry_size
2521 ifh, self._indexfile, curr * self.index.entry_size
2580 )
2522 )
2581 self._concurrencychecker(dfh, self._datafile, offset)
2523 self._concurrencychecker(dfh, self._datafile, offset)
2582
2524
2583 p1r, p2r = self.rev(p1), self.rev(p2)
2525 p1r, p2r = self.rev(p1), self.rev(p2)
2584
2526
2585 # full versions are inserted when the needed deltas
2527 # full versions are inserted when the needed deltas
2586 # become comparable to the uncompressed text
2528 # become comparable to the uncompressed text
2587 if rawtext is None:
2529 if rawtext is None:
2588 # need rawtext size, before changed by flag processors, which is
2530 # need rawtext size, before changed by flag processors, which is
2589 # the non-raw size. use revlog explicitly to avoid filelog's extra
2531 # the non-raw size. use revlog explicitly to avoid filelog's extra
2590 # logic that might remove metadata size.
2532 # logic that might remove metadata size.
2591 textlen = mdiff.patchedsize(
2533 textlen = mdiff.patchedsize(
2592 revlog.size(self, cachedelta[0]), cachedelta[1]
2534 revlog.size(self, cachedelta[0]), cachedelta[1]
2593 )
2535 )
2594 else:
2536 else:
2595 textlen = len(rawtext)
2537 textlen = len(rawtext)
2596
2538
2597 if deltacomputer is None:
2539 if deltacomputer is None:
2598 deltacomputer = deltautil.deltacomputer(self)
2540 deltacomputer = deltautil.deltacomputer(self)
2599
2541
2600 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2542 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2601
2543
2602 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2544 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2603
2545
2604 compression_mode = COMP_MODE_INLINE
2546 compression_mode = COMP_MODE_INLINE
2605 if self._docket is not None:
2547 if self._docket is not None:
2606 h, d = deltainfo.data
2548 h, d = deltainfo.data
2607 if not h and not d:
2549 if not h and not d:
2608 # not data to store at all... declare them uncompressed
2550 # not data to store at all... declare them uncompressed
2609 compression_mode = COMP_MODE_PLAIN
2551 compression_mode = COMP_MODE_PLAIN
2610 elif not h:
2552 elif not h:
2611 t = d[0:1]
2553 t = d[0:1]
2612 if t == b'\0':
2554 if t == b'\0':
2613 compression_mode = COMP_MODE_PLAIN
2555 compression_mode = COMP_MODE_PLAIN
2614 elif t == self._docket.default_compression_header:
2556 elif t == self._docket.default_compression_header:
2615 compression_mode = COMP_MODE_DEFAULT
2557 compression_mode = COMP_MODE_DEFAULT
2616 elif h == b'u':
2558 elif h == b'u':
2617 # we have a more efficient way to declare uncompressed
2559 # we have a more efficient way to declare uncompressed
2618 h = b''
2560 h = b''
2619 compression_mode = COMP_MODE_PLAIN
2561 compression_mode = COMP_MODE_PLAIN
2620 deltainfo = deltautil.drop_u_compression(deltainfo)
2562 deltainfo = deltautil.drop_u_compression(deltainfo)
2621
2563
2622 sidedata_compression_mode = COMP_MODE_INLINE
2564 sidedata_compression_mode = COMP_MODE_INLINE
2623 if sidedata and self.hassidedata:
2565 if sidedata and self.hassidedata:
2624 sidedata_compression_mode = COMP_MODE_PLAIN
2566 sidedata_compression_mode = COMP_MODE_PLAIN
2625 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2567 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2626 sidedata_offset = self._docket.sidedata_end
2568 sidedata_offset = self._docket.sidedata_end
2627 h, comp_sidedata = self.compress(serialized_sidedata)
2569 h, comp_sidedata = self.compress(serialized_sidedata)
2628 if (
2570 if (
2629 h != b'u'
2571 h != b'u'
2630 and comp_sidedata[0:1] != b'\0'
2572 and comp_sidedata[0:1] != b'\0'
2631 and len(comp_sidedata) < len(serialized_sidedata)
2573 and len(comp_sidedata) < len(serialized_sidedata)
2632 ):
2574 ):
2633 assert not h
2575 assert not h
2634 if (
2576 if (
2635 comp_sidedata[0:1]
2577 comp_sidedata[0:1]
2636 == self._docket.default_compression_header
2578 == self._docket.default_compression_header
2637 ):
2579 ):
2638 sidedata_compression_mode = COMP_MODE_DEFAULT
2580 sidedata_compression_mode = COMP_MODE_DEFAULT
2639 serialized_sidedata = comp_sidedata
2581 serialized_sidedata = comp_sidedata
2640 else:
2582 else:
2641 sidedata_compression_mode = COMP_MODE_INLINE
2583 sidedata_compression_mode = COMP_MODE_INLINE
2642 serialized_sidedata = comp_sidedata
2584 serialized_sidedata = comp_sidedata
2643 else:
2585 else:
2644 serialized_sidedata = b""
2586 serialized_sidedata = b""
2645 # Don't store the offset if the sidedata is empty, that way
2587 # Don't store the offset if the sidedata is empty, that way
2646 # we can easily detect empty sidedata and they will be no different
2588 # we can easily detect empty sidedata and they will be no different
2647 # than ones we manually add.
2589 # than ones we manually add.
2648 sidedata_offset = 0
2590 sidedata_offset = 0
2649
2591
2650 e = (
2592 e = (
2651 offset_type(offset, flags),
2593 offset_type(offset, flags),
2652 deltainfo.deltalen,
2594 deltainfo.deltalen,
2653 textlen,
2595 textlen,
2654 deltainfo.base,
2596 deltainfo.base,
2655 link,
2597 link,
2656 p1r,
2598 p1r,
2657 p2r,
2599 p2r,
2658 node,
2600 node,
2659 sidedata_offset,
2601 sidedata_offset,
2660 len(serialized_sidedata),
2602 len(serialized_sidedata),
2661 compression_mode,
2603 compression_mode,
2662 sidedata_compression_mode,
2604 sidedata_compression_mode,
2663 )
2605 )
2664
2606
2665 self.index.append(e)
2607 self.index.append(e)
2666 entry = self.index.entry_binary(curr)
2608 entry = self.index.entry_binary(curr)
2667 if curr == 0 and self._docket is None:
2609 if curr == 0 and self._docket is None:
2668 header = self._format_flags | self._format_version
2610 header = self._format_flags | self._format_version
2669 header = self.index.pack_header(header)
2611 header = self.index.pack_header(header)
2670 entry = header + entry
2612 entry = header + entry
2671 self._writeentry(
2613 self._writeentry(
2672 transaction,
2614 transaction,
2673 entry,
2615 entry,
2674 deltainfo.data,
2616 deltainfo.data,
2675 link,
2617 link,
2676 offset,
2618 offset,
2677 serialized_sidedata,
2619 serialized_sidedata,
2678 sidedata_offset,
2620 sidedata_offset,
2679 )
2621 )
2680
2622
2681 rawtext = btext[0]
2623 rawtext = btext[0]
2682
2624
2683 if alwayscache and rawtext is None:
2625 if alwayscache and rawtext is None:
2684 rawtext = deltacomputer.buildtext(revinfo, fh)
2626 rawtext = deltacomputer.buildtext(revinfo, fh)
2685
2627
2686 if type(rawtext) == bytes: # only accept immutable objects
2628 if type(rawtext) == bytes: # only accept immutable objects
2687 self._revisioncache = (node, curr, rawtext)
2629 self._revisioncache = (node, curr, rawtext)
2688 self._chainbasecache[curr] = deltainfo.chainbase
2630 self._chainbasecache[curr] = deltainfo.chainbase
2689 return curr
2631 return curr
2690
2632
2691 def _get_data_offset(self, prev):
2633 def _get_data_offset(self, prev):
2692 """Returns the current offset in the (in-transaction) data file.
2634 """Returns the current offset in the (in-transaction) data file.
2693 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2635 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2694 file to store that information: since sidedata can be rewritten to the
2636 file to store that information: since sidedata can be rewritten to the
2695 end of the data file within a transaction, you can have cases where, for
2637 end of the data file within a transaction, you can have cases where, for
2696 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2638 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2697 to `n - 1`'s sidedata being written after `n`'s data.
2639 to `n - 1`'s sidedata being written after `n`'s data.
2698
2640
2699 TODO cache this in a docket file before getting out of experimental."""
2641 TODO cache this in a docket file before getting out of experimental."""
2700 if self._docket is None:
2642 if self._docket is None:
2701 return self.end(prev)
2643 return self.end(prev)
2702 else:
2644 else:
2703 return self._docket.data_end
2645 return self._docket.data_end
2704
2646
2705 def _writeentry(
2647 def _writeentry(
2706 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2648 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2707 ):
2649 ):
2708 # Files opened in a+ mode have inconsistent behavior on various
2650 # Files opened in a+ mode have inconsistent behavior on various
2709 # platforms. Windows requires that a file positioning call be made
2651 # platforms. Windows requires that a file positioning call be made
2710 # when the file handle transitions between reads and writes. See
2652 # when the file handle transitions between reads and writes. See
2711 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2653 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2712 # platforms, Python or the platform itself can be buggy. Some versions
2654 # platforms, Python or the platform itself can be buggy. Some versions
2713 # of Solaris have been observed to not append at the end of the file
2655 # of Solaris have been observed to not append at the end of the file
2714 # if the file was seeked to before the end. See issue4943 for more.
2656 # if the file was seeked to before the end. See issue4943 for more.
2715 #
2657 #
2716 # We work around this issue by inserting a seek() before writing.
2658 # We work around this issue by inserting a seek() before writing.
2717 # Note: This is likely not necessary on Python 3. However, because
2659 # Note: This is likely not necessary on Python 3. However, because
2718 # the file handle is reused for reads and may be seeked there, we need
2660 # the file handle is reused for reads and may be seeked there, we need
2719 # to be careful before changing this.
2661 # to be careful before changing this.
2720 if self._writinghandles is None:
2662 if self._writinghandles is None:
2721 msg = b'adding revision outside `revlog._writing` context'
2663 msg = b'adding revision outside `revlog._writing` context'
2722 raise error.ProgrammingError(msg)
2664 raise error.ProgrammingError(msg)
2723 ifh, dfh, sdfh = self._writinghandles
2665 ifh, dfh, sdfh = self._writinghandles
2724 if self._docket is None:
2666 if self._docket is None:
2725 ifh.seek(0, os.SEEK_END)
2667 ifh.seek(0, os.SEEK_END)
2726 else:
2668 else:
2727 ifh.seek(self._docket.index_end, os.SEEK_SET)
2669 ifh.seek(self._docket.index_end, os.SEEK_SET)
2728 if dfh:
2670 if dfh:
2729 if self._docket is None:
2671 if self._docket is None:
2730 dfh.seek(0, os.SEEK_END)
2672 dfh.seek(0, os.SEEK_END)
2731 else:
2673 else:
2732 dfh.seek(self._docket.data_end, os.SEEK_SET)
2674 dfh.seek(self._docket.data_end, os.SEEK_SET)
2733 if sdfh:
2675 if sdfh:
2734 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2676 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2735
2677
2736 curr = len(self) - 1
2678 curr = len(self) - 1
2737 if not self._inline:
2679 if not self._inline:
2738 transaction.add(self._datafile, offset)
2680 transaction.add(self._datafile, offset)
2739 if self._sidedatafile:
2681 if self._sidedatafile:
2740 transaction.add(self._sidedatafile, sidedata_offset)
2682 transaction.add(self._sidedatafile, sidedata_offset)
2741 transaction.add(self._indexfile, curr * len(entry))
2683 transaction.add(self._indexfile, curr * len(entry))
2742 if data[0]:
2684 if data[0]:
2743 dfh.write(data[0])
2685 dfh.write(data[0])
2744 dfh.write(data[1])
2686 dfh.write(data[1])
2745 if sidedata:
2687 if sidedata:
2746 sdfh.write(sidedata)
2688 sdfh.write(sidedata)
2747 ifh.write(entry)
2689 ifh.write(entry)
2748 else:
2690 else:
2749 offset += curr * self.index.entry_size
2691 offset += curr * self.index.entry_size
2750 transaction.add(self._indexfile, offset)
2692 transaction.add(self._indexfile, offset)
2751 ifh.write(entry)
2693 ifh.write(entry)
2752 ifh.write(data[0])
2694 ifh.write(data[0])
2753 ifh.write(data[1])
2695 ifh.write(data[1])
2754 assert not sidedata
2696 assert not sidedata
2755 self._enforceinlinesize(transaction)
2697 self._enforceinlinesize(transaction)
2756 if self._docket is not None:
2698 if self._docket is not None:
2757 self._docket.index_end = self._writinghandles[0].tell()
2699 self._docket.index_end = self._writinghandles[0].tell()
2758 self._docket.data_end = self._writinghandles[1].tell()
2700 self._docket.data_end = self._writinghandles[1].tell()
2759 self._docket.sidedata_end = self._writinghandles[2].tell()
2701 self._docket.sidedata_end = self._writinghandles[2].tell()
2760
2702
2761 nodemaputil.setup_persistent_nodemap(transaction, self)
2703 nodemaputil.setup_persistent_nodemap(transaction, self)
2762
2704
2763 def addgroup(
2705 def addgroup(
2764 self,
2706 self,
2765 deltas,
2707 deltas,
2766 linkmapper,
2708 linkmapper,
2767 transaction,
2709 transaction,
2768 alwayscache=False,
2710 alwayscache=False,
2769 addrevisioncb=None,
2711 addrevisioncb=None,
2770 duplicaterevisioncb=None,
2712 duplicaterevisioncb=None,
2771 ):
2713 ):
2772 """
2714 """
2773 add a delta group
2715 add a delta group
2774
2716
2775 given a set of deltas, add them to the revision log. the
2717 given a set of deltas, add them to the revision log. the
2776 first delta is against its parent, which should be in our
2718 first delta is against its parent, which should be in our
2777 log, the rest are against the previous delta.
2719 log, the rest are against the previous delta.
2778
2720
2779 If ``addrevisioncb`` is defined, it will be called with arguments of
2721 If ``addrevisioncb`` is defined, it will be called with arguments of
2780 this revlog and the node that was added.
2722 this revlog and the node that was added.
2781 """
2723 """
2782
2724
2783 if self._adding_group:
2725 if self._adding_group:
2784 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2726 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2785
2727
2786 self._adding_group = True
2728 self._adding_group = True
2787 empty = True
2729 empty = True
2788 try:
2730 try:
2789 with self._writing(transaction):
2731 with self._writing(transaction):
2790 deltacomputer = deltautil.deltacomputer(self)
2732 deltacomputer = deltautil.deltacomputer(self)
2791 # loop through our set of deltas
2733 # loop through our set of deltas
2792 for data in deltas:
2734 for data in deltas:
2793 (
2735 (
2794 node,
2736 node,
2795 p1,
2737 p1,
2796 p2,
2738 p2,
2797 linknode,
2739 linknode,
2798 deltabase,
2740 deltabase,
2799 delta,
2741 delta,
2800 flags,
2742 flags,
2801 sidedata,
2743 sidedata,
2802 ) = data
2744 ) = data
2803 link = linkmapper(linknode)
2745 link = linkmapper(linknode)
2804 flags = flags or REVIDX_DEFAULT_FLAGS
2746 flags = flags or REVIDX_DEFAULT_FLAGS
2805
2747
2806 rev = self.index.get_rev(node)
2748 rev = self.index.get_rev(node)
2807 if rev is not None:
2749 if rev is not None:
2808 # this can happen if two branches make the same change
2750 # this can happen if two branches make the same change
2809 self._nodeduplicatecallback(transaction, rev)
2751 self._nodeduplicatecallback(transaction, rev)
2810 if duplicaterevisioncb:
2752 if duplicaterevisioncb:
2811 duplicaterevisioncb(self, rev)
2753 duplicaterevisioncb(self, rev)
2812 empty = False
2754 empty = False
2813 continue
2755 continue
2814
2756
2815 for p in (p1, p2):
2757 for p in (p1, p2):
2816 if not self.index.has_node(p):
2758 if not self.index.has_node(p):
2817 raise error.LookupError(
2759 raise error.LookupError(
2818 p, self.radix, _(b'unknown parent')
2760 p, self.radix, _(b'unknown parent')
2819 )
2761 )
2820
2762
2821 if not self.index.has_node(deltabase):
2763 if not self.index.has_node(deltabase):
2822 raise error.LookupError(
2764 raise error.LookupError(
2823 deltabase, self.display_id, _(b'unknown delta base')
2765 deltabase, self.display_id, _(b'unknown delta base')
2824 )
2766 )
2825
2767
2826 baserev = self.rev(deltabase)
2768 baserev = self.rev(deltabase)
2827
2769
2828 if baserev != nullrev and self.iscensored(baserev):
2770 if baserev != nullrev and self.iscensored(baserev):
2829 # if base is censored, delta must be full replacement in a
2771 # if base is censored, delta must be full replacement in a
2830 # single patch operation
2772 # single patch operation
2831 hlen = struct.calcsize(b">lll")
2773 hlen = struct.calcsize(b">lll")
2832 oldlen = self.rawsize(baserev)
2774 oldlen = self.rawsize(baserev)
2833 newlen = len(delta) - hlen
2775 newlen = len(delta) - hlen
2834 if delta[:hlen] != mdiff.replacediffheader(
2776 if delta[:hlen] != mdiff.replacediffheader(
2835 oldlen, newlen
2777 oldlen, newlen
2836 ):
2778 ):
2837 raise error.CensoredBaseError(
2779 raise error.CensoredBaseError(
2838 self.display_id, self.node(baserev)
2780 self.display_id, self.node(baserev)
2839 )
2781 )
2840
2782
2841 if not flags and self._peek_iscensored(baserev, delta):
2783 if not flags and self._peek_iscensored(baserev, delta):
2842 flags |= REVIDX_ISCENSORED
2784 flags |= REVIDX_ISCENSORED
2843
2785
2844 # We assume consumers of addrevisioncb will want to retrieve
2786 # We assume consumers of addrevisioncb will want to retrieve
2845 # the added revision, which will require a call to
2787 # the added revision, which will require a call to
2846 # revision(). revision() will fast path if there is a cache
2788 # revision(). revision() will fast path if there is a cache
2847 # hit. So, we tell _addrevision() to always cache in this case.
2789 # hit. So, we tell _addrevision() to always cache in this case.
2848 # We're only using addgroup() in the context of changegroup
2790 # We're only using addgroup() in the context of changegroup
2849 # generation so the revision data can always be handled as raw
2791 # generation so the revision data can always be handled as raw
2850 # by the flagprocessor.
2792 # by the flagprocessor.
2851 rev = self._addrevision(
2793 rev = self._addrevision(
2852 node,
2794 node,
2853 None,
2795 None,
2854 transaction,
2796 transaction,
2855 link,
2797 link,
2856 p1,
2798 p1,
2857 p2,
2799 p2,
2858 flags,
2800 flags,
2859 (baserev, delta),
2801 (baserev, delta),
2860 alwayscache=alwayscache,
2802 alwayscache=alwayscache,
2861 deltacomputer=deltacomputer,
2803 deltacomputer=deltacomputer,
2862 sidedata=sidedata,
2804 sidedata=sidedata,
2863 )
2805 )
2864
2806
2865 if addrevisioncb:
2807 if addrevisioncb:
2866 addrevisioncb(self, rev)
2808 addrevisioncb(self, rev)
2867 empty = False
2809 empty = False
2868 finally:
2810 finally:
2869 self._adding_group = False
2811 self._adding_group = False
2870 return not empty
2812 return not empty
2871
2813
2872 def iscensored(self, rev):
2814 def iscensored(self, rev):
2873 """Check if a file revision is censored."""
2815 """Check if a file revision is censored."""
2874 if not self._censorable:
2816 if not self._censorable:
2875 return False
2817 return False
2876
2818
2877 return self.flags(rev) & REVIDX_ISCENSORED
2819 return self.flags(rev) & REVIDX_ISCENSORED
2878
2820
2879 def _peek_iscensored(self, baserev, delta):
2821 def _peek_iscensored(self, baserev, delta):
2880 """Quickly check if a delta produces a censored revision."""
2822 """Quickly check if a delta produces a censored revision."""
2881 if not self._censorable:
2823 if not self._censorable:
2882 return False
2824 return False
2883
2825
2884 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2826 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2885
2827
2886 def getstrippoint(self, minlink):
2828 def getstrippoint(self, minlink):
2887 """find the minimum rev that must be stripped to strip the linkrev
2829 """find the minimum rev that must be stripped to strip the linkrev
2888
2830
2889 Returns a tuple containing the minimum rev and a set of all revs that
2831 Returns a tuple containing the minimum rev and a set of all revs that
2890 have linkrevs that will be broken by this strip.
2832 have linkrevs that will be broken by this strip.
2891 """
2833 """
2892 return storageutil.resolvestripinfo(
2834 return storageutil.resolvestripinfo(
2893 minlink,
2835 minlink,
2894 len(self) - 1,
2836 len(self) - 1,
2895 self.headrevs(),
2837 self.headrevs(),
2896 self.linkrev,
2838 self.linkrev,
2897 self.parentrevs,
2839 self.parentrevs,
2898 )
2840 )
2899
2841
2900 def strip(self, minlink, transaction):
2842 def strip(self, minlink, transaction):
2901 """truncate the revlog on the first revision with a linkrev >= minlink
2843 """truncate the revlog on the first revision with a linkrev >= minlink
2902
2844
2903 This function is called when we're stripping revision minlink and
2845 This function is called when we're stripping revision minlink and
2904 its descendants from the repository.
2846 its descendants from the repository.
2905
2847
2906 We have to remove all revisions with linkrev >= minlink, because
2848 We have to remove all revisions with linkrev >= minlink, because
2907 the equivalent changelog revisions will be renumbered after the
2849 the equivalent changelog revisions will be renumbered after the
2908 strip.
2850 strip.
2909
2851
2910 So we truncate the revlog on the first of these revisions, and
2852 So we truncate the revlog on the first of these revisions, and
2911 trust that the caller has saved the revisions that shouldn't be
2853 trust that the caller has saved the revisions that shouldn't be
2912 removed and that it'll re-add them after this truncation.
2854 removed and that it'll re-add them after this truncation.
2913 """
2855 """
2914 if len(self) == 0:
2856 if len(self) == 0:
2915 return
2857 return
2916
2858
2917 rev, _ = self.getstrippoint(minlink)
2859 rev, _ = self.getstrippoint(minlink)
2918 if rev == len(self):
2860 if rev == len(self):
2919 return
2861 return
2920
2862
2921 # first truncate the files on disk
2863 # first truncate the files on disk
2922 data_end = self.start(rev)
2864 data_end = self.start(rev)
2923 if not self._inline:
2865 if not self._inline:
2924 transaction.add(self._datafile, data_end)
2866 transaction.add(self._datafile, data_end)
2925 end = rev * self.index.entry_size
2867 end = rev * self.index.entry_size
2926 else:
2868 else:
2927 end = data_end + (rev * self.index.entry_size)
2869 end = data_end + (rev * self.index.entry_size)
2928
2870
2929 if self._sidedatafile:
2871 if self._sidedatafile:
2930 sidedata_end = self.sidedata_cut_off(rev)
2872 sidedata_end = self.sidedata_cut_off(rev)
2931 transaction.add(self._sidedatafile, sidedata_end)
2873 transaction.add(self._sidedatafile, sidedata_end)
2932
2874
2933 transaction.add(self._indexfile, end)
2875 transaction.add(self._indexfile, end)
2934 if self._docket is not None:
2876 if self._docket is not None:
2935 # XXX we could, leverage the docket while stripping. However it is
2877 # XXX we could, leverage the docket while stripping. However it is
2936 # not powerfull enough at the time of this comment
2878 # not powerfull enough at the time of this comment
2937 self._docket.index_end = end
2879 self._docket.index_end = end
2938 self._docket.data_end = data_end
2880 self._docket.data_end = data_end
2939 self._docket.sidedata_end = sidedata_end
2881 self._docket.sidedata_end = sidedata_end
2940 self._docket.write(transaction, stripping=True)
2882 self._docket.write(transaction, stripping=True)
2941
2883
2942 # then reset internal state in memory to forget those revisions
2884 # then reset internal state in memory to forget those revisions
2943 self._revisioncache = None
2885 self._revisioncache = None
2944 self._chaininfocache = util.lrucachedict(500)
2886 self._chaininfocache = util.lrucachedict(500)
2945 self._chunkclear()
2887 self._chunkclear()
2946
2888
2947 del self.index[rev:-1]
2889 del self.index[rev:-1]
2948
2890
2949 def checksize(self):
2891 def checksize(self):
2950 """Check size of index and data files
2892 """Check size of index and data files
2951
2893
2952 return a (dd, di) tuple.
2894 return a (dd, di) tuple.
2953 - dd: extra bytes for the "data" file
2895 - dd: extra bytes for the "data" file
2954 - di: extra bytes for the "index" file
2896 - di: extra bytes for the "index" file
2955
2897
2956 A healthy revlog will return (0, 0).
2898 A healthy revlog will return (0, 0).
2957 """
2899 """
2958 expected = 0
2900 expected = 0
2959 if len(self):
2901 if len(self):
2960 expected = max(0, self.end(len(self) - 1))
2902 expected = max(0, self.end(len(self) - 1))
2961
2903
2962 try:
2904 try:
2963 with self._datafp() as f:
2905 with self._datafp() as f:
2964 f.seek(0, io.SEEK_END)
2906 f.seek(0, io.SEEK_END)
2965 actual = f.tell()
2907 actual = f.tell()
2966 dd = actual - expected
2908 dd = actual - expected
2967 except IOError as inst:
2909 except IOError as inst:
2968 if inst.errno != errno.ENOENT:
2910 if inst.errno != errno.ENOENT:
2969 raise
2911 raise
2970 dd = 0
2912 dd = 0
2971
2913
2972 try:
2914 try:
2973 f = self.opener(self._indexfile)
2915 f = self.opener(self._indexfile)
2974 f.seek(0, io.SEEK_END)
2916 f.seek(0, io.SEEK_END)
2975 actual = f.tell()
2917 actual = f.tell()
2976 f.close()
2918 f.close()
2977 s = self.index.entry_size
2919 s = self.index.entry_size
2978 i = max(0, actual // s)
2920 i = max(0, actual // s)
2979 di = actual - (i * s)
2921 di = actual - (i * s)
2980 if self._inline:
2922 if self._inline:
2981 databytes = 0
2923 databytes = 0
2982 for r in self:
2924 for r in self:
2983 databytes += max(0, self.length(r))
2925 databytes += max(0, self.length(r))
2984 dd = 0
2926 dd = 0
2985 di = actual - len(self) * s - databytes
2927 di = actual - len(self) * s - databytes
2986 except IOError as inst:
2928 except IOError as inst:
2987 if inst.errno != errno.ENOENT:
2929 if inst.errno != errno.ENOENT:
2988 raise
2930 raise
2989 di = 0
2931 di = 0
2990
2932
2991 return (dd, di)
2933 return (dd, di)
2992
2934
2993 def files(self):
2935 def files(self):
2994 res = [self._indexfile]
2936 res = [self._indexfile]
2995 if not self._inline:
2937 if not self._inline:
2996 res.append(self._datafile)
2938 res.append(self._datafile)
2997 return res
2939 return res
2998
2940
2999 def emitrevisions(
2941 def emitrevisions(
3000 self,
2942 self,
3001 nodes,
2943 nodes,
3002 nodesorder=None,
2944 nodesorder=None,
3003 revisiondata=False,
2945 revisiondata=False,
3004 assumehaveparentrevisions=False,
2946 assumehaveparentrevisions=False,
3005 deltamode=repository.CG_DELTAMODE_STD,
2947 deltamode=repository.CG_DELTAMODE_STD,
3006 sidedata_helpers=None,
2948 sidedata_helpers=None,
3007 ):
2949 ):
3008 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2950 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3009 raise error.ProgrammingError(
2951 raise error.ProgrammingError(
3010 b'unhandled value for nodesorder: %s' % nodesorder
2952 b'unhandled value for nodesorder: %s' % nodesorder
3011 )
2953 )
3012
2954
3013 if nodesorder is None and not self._generaldelta:
2955 if nodesorder is None and not self._generaldelta:
3014 nodesorder = b'storage'
2956 nodesorder = b'storage'
3015
2957
3016 if (
2958 if (
3017 not self._storedeltachains
2959 not self._storedeltachains
3018 and deltamode != repository.CG_DELTAMODE_PREV
2960 and deltamode != repository.CG_DELTAMODE_PREV
3019 ):
2961 ):
3020 deltamode = repository.CG_DELTAMODE_FULL
2962 deltamode = repository.CG_DELTAMODE_FULL
3021
2963
3022 return storageutil.emitrevisions(
2964 return storageutil.emitrevisions(
3023 self,
2965 self,
3024 nodes,
2966 nodes,
3025 nodesorder,
2967 nodesorder,
3026 revlogrevisiondelta,
2968 revlogrevisiondelta,
3027 deltaparentfn=self.deltaparent,
2969 deltaparentfn=self.deltaparent,
3028 candeltafn=self.candelta,
2970 candeltafn=self.candelta,
3029 rawsizefn=self.rawsize,
2971 rawsizefn=self.rawsize,
3030 revdifffn=self.revdiff,
2972 revdifffn=self.revdiff,
3031 flagsfn=self.flags,
2973 flagsfn=self.flags,
3032 deltamode=deltamode,
2974 deltamode=deltamode,
3033 revisiondata=revisiondata,
2975 revisiondata=revisiondata,
3034 assumehaveparentrevisions=assumehaveparentrevisions,
2976 assumehaveparentrevisions=assumehaveparentrevisions,
3035 sidedata_helpers=sidedata_helpers,
2977 sidedata_helpers=sidedata_helpers,
3036 )
2978 )
3037
2979
3038 DELTAREUSEALWAYS = b'always'
2980 DELTAREUSEALWAYS = b'always'
3039 DELTAREUSESAMEREVS = b'samerevs'
2981 DELTAREUSESAMEREVS = b'samerevs'
3040 DELTAREUSENEVER = b'never'
2982 DELTAREUSENEVER = b'never'
3041
2983
3042 DELTAREUSEFULLADD = b'fulladd'
2984 DELTAREUSEFULLADD = b'fulladd'
3043
2985
3044 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2986 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3045
2987
3046 def clone(
2988 def clone(
3047 self,
2989 self,
3048 tr,
2990 tr,
3049 destrevlog,
2991 destrevlog,
3050 addrevisioncb=None,
2992 addrevisioncb=None,
3051 deltareuse=DELTAREUSESAMEREVS,
2993 deltareuse=DELTAREUSESAMEREVS,
3052 forcedeltabothparents=None,
2994 forcedeltabothparents=None,
3053 sidedata_helpers=None,
2995 sidedata_helpers=None,
3054 ):
2996 ):
3055 """Copy this revlog to another, possibly with format changes.
2997 """Copy this revlog to another, possibly with format changes.
3056
2998
3057 The destination revlog will contain the same revisions and nodes.
2999 The destination revlog will contain the same revisions and nodes.
3058 However, it may not be bit-for-bit identical due to e.g. delta encoding
3000 However, it may not be bit-for-bit identical due to e.g. delta encoding
3059 differences.
3001 differences.
3060
3002
3061 The ``deltareuse`` argument control how deltas from the existing revlog
3003 The ``deltareuse`` argument control how deltas from the existing revlog
3062 are preserved in the destination revlog. The argument can have the
3004 are preserved in the destination revlog. The argument can have the
3063 following values:
3005 following values:
3064
3006
3065 DELTAREUSEALWAYS
3007 DELTAREUSEALWAYS
3066 Deltas will always be reused (if possible), even if the destination
3008 Deltas will always be reused (if possible), even if the destination
3067 revlog would not select the same revisions for the delta. This is the
3009 revlog would not select the same revisions for the delta. This is the
3068 fastest mode of operation.
3010 fastest mode of operation.
3069 DELTAREUSESAMEREVS
3011 DELTAREUSESAMEREVS
3070 Deltas will be reused if the destination revlog would pick the same
3012 Deltas will be reused if the destination revlog would pick the same
3071 revisions for the delta. This mode strikes a balance between speed
3013 revisions for the delta. This mode strikes a balance between speed
3072 and optimization.
3014 and optimization.
3073 DELTAREUSENEVER
3015 DELTAREUSENEVER
3074 Deltas will never be reused. This is the slowest mode of execution.
3016 Deltas will never be reused. This is the slowest mode of execution.
3075 This mode can be used to recompute deltas (e.g. if the diff/delta
3017 This mode can be used to recompute deltas (e.g. if the diff/delta
3076 algorithm changes).
3018 algorithm changes).
3077 DELTAREUSEFULLADD
3019 DELTAREUSEFULLADD
3078 Revision will be re-added as if their were new content. This is
3020 Revision will be re-added as if their were new content. This is
3079 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3021 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3080 eg: large file detection and handling.
3022 eg: large file detection and handling.
3081
3023
3082 Delta computation can be slow, so the choice of delta reuse policy can
3024 Delta computation can be slow, so the choice of delta reuse policy can
3083 significantly affect run time.
3025 significantly affect run time.
3084
3026
3085 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3027 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3086 two extremes. Deltas will be reused if they are appropriate. But if the
3028 two extremes. Deltas will be reused if they are appropriate. But if the
3087 delta could choose a better revision, it will do so. This means if you
3029 delta could choose a better revision, it will do so. This means if you
3088 are converting a non-generaldelta revlog to a generaldelta revlog,
3030 are converting a non-generaldelta revlog to a generaldelta revlog,
3089 deltas will be recomputed if the delta's parent isn't a parent of the
3031 deltas will be recomputed if the delta's parent isn't a parent of the
3090 revision.
3032 revision.
3091
3033
3092 In addition to the delta policy, the ``forcedeltabothparents``
3034 In addition to the delta policy, the ``forcedeltabothparents``
3093 argument controls whether to force compute deltas against both parents
3035 argument controls whether to force compute deltas against both parents
3094 for merges. By default, the current default is used.
3036 for merges. By default, the current default is used.
3095
3037
3096 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3038 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3097 `sidedata_helpers`.
3039 `sidedata_helpers`.
3098 """
3040 """
3099 if deltareuse not in self.DELTAREUSEALL:
3041 if deltareuse not in self.DELTAREUSEALL:
3100 raise ValueError(
3042 raise ValueError(
3101 _(b'value for deltareuse invalid: %s') % deltareuse
3043 _(b'value for deltareuse invalid: %s') % deltareuse
3102 )
3044 )
3103
3045
3104 if len(destrevlog):
3046 if len(destrevlog):
3105 raise ValueError(_(b'destination revlog is not empty'))
3047 raise ValueError(_(b'destination revlog is not empty'))
3106
3048
3107 if getattr(self, 'filteredrevs', None):
3049 if getattr(self, 'filteredrevs', None):
3108 raise ValueError(_(b'source revlog has filtered revisions'))
3050 raise ValueError(_(b'source revlog has filtered revisions'))
3109 if getattr(destrevlog, 'filteredrevs', None):
3051 if getattr(destrevlog, 'filteredrevs', None):
3110 raise ValueError(_(b'destination revlog has filtered revisions'))
3052 raise ValueError(_(b'destination revlog has filtered revisions'))
3111
3053
3112 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3054 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3113 # if possible.
3055 # if possible.
3114 oldlazydelta = destrevlog._lazydelta
3056 oldlazydelta = destrevlog._lazydelta
3115 oldlazydeltabase = destrevlog._lazydeltabase
3057 oldlazydeltabase = destrevlog._lazydeltabase
3116 oldamd = destrevlog._deltabothparents
3058 oldamd = destrevlog._deltabothparents
3117
3059
3118 try:
3060 try:
3119 if deltareuse == self.DELTAREUSEALWAYS:
3061 if deltareuse == self.DELTAREUSEALWAYS:
3120 destrevlog._lazydeltabase = True
3062 destrevlog._lazydeltabase = True
3121 destrevlog._lazydelta = True
3063 destrevlog._lazydelta = True
3122 elif deltareuse == self.DELTAREUSESAMEREVS:
3064 elif deltareuse == self.DELTAREUSESAMEREVS:
3123 destrevlog._lazydeltabase = False
3065 destrevlog._lazydeltabase = False
3124 destrevlog._lazydelta = True
3066 destrevlog._lazydelta = True
3125 elif deltareuse == self.DELTAREUSENEVER:
3067 elif deltareuse == self.DELTAREUSENEVER:
3126 destrevlog._lazydeltabase = False
3068 destrevlog._lazydeltabase = False
3127 destrevlog._lazydelta = False
3069 destrevlog._lazydelta = False
3128
3070
3129 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3071 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3130
3072
3131 self._clone(
3073 self._clone(
3132 tr,
3074 tr,
3133 destrevlog,
3075 destrevlog,
3134 addrevisioncb,
3076 addrevisioncb,
3135 deltareuse,
3077 deltareuse,
3136 forcedeltabothparents,
3078 forcedeltabothparents,
3137 sidedata_helpers,
3079 sidedata_helpers,
3138 )
3080 )
3139
3081
3140 finally:
3082 finally:
3141 destrevlog._lazydelta = oldlazydelta
3083 destrevlog._lazydelta = oldlazydelta
3142 destrevlog._lazydeltabase = oldlazydeltabase
3084 destrevlog._lazydeltabase = oldlazydeltabase
3143 destrevlog._deltabothparents = oldamd
3085 destrevlog._deltabothparents = oldamd
3144
3086
3145 def _clone(
3087 def _clone(
3146 self,
3088 self,
3147 tr,
3089 tr,
3148 destrevlog,
3090 destrevlog,
3149 addrevisioncb,
3091 addrevisioncb,
3150 deltareuse,
3092 deltareuse,
3151 forcedeltabothparents,
3093 forcedeltabothparents,
3152 sidedata_helpers,
3094 sidedata_helpers,
3153 ):
3095 ):
3154 """perform the core duty of `revlog.clone` after parameter processing"""
3096 """perform the core duty of `revlog.clone` after parameter processing"""
3155 deltacomputer = deltautil.deltacomputer(destrevlog)
3097 deltacomputer = deltautil.deltacomputer(destrevlog)
3156 index = self.index
3098 index = self.index
3157 for rev in self:
3099 for rev in self:
3158 entry = index[rev]
3100 entry = index[rev]
3159
3101
3160 # Some classes override linkrev to take filtered revs into
3102 # Some classes override linkrev to take filtered revs into
3161 # account. Use raw entry from index.
3103 # account. Use raw entry from index.
3162 flags = entry[0] & 0xFFFF
3104 flags = entry[0] & 0xFFFF
3163 linkrev = entry[4]
3105 linkrev = entry[4]
3164 p1 = index[entry[5]][7]
3106 p1 = index[entry[5]][7]
3165 p2 = index[entry[6]][7]
3107 p2 = index[entry[6]][7]
3166 node = entry[7]
3108 node = entry[7]
3167
3109
3168 # (Possibly) reuse the delta from the revlog if allowed and
3110 # (Possibly) reuse the delta from the revlog if allowed and
3169 # the revlog chunk is a delta.
3111 # the revlog chunk is a delta.
3170 cachedelta = None
3112 cachedelta = None
3171 rawtext = None
3113 rawtext = None
3172 if deltareuse == self.DELTAREUSEFULLADD:
3114 if deltareuse == self.DELTAREUSEFULLADD:
3173 text = self._revisiondata(rev)
3115 text = self._revisiondata(rev)
3174 sidedata = self.sidedata(rev)
3116 sidedata = self.sidedata(rev)
3175
3117
3176 if sidedata_helpers is not None:
3118 if sidedata_helpers is not None:
3177 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3119 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3178 self, sidedata_helpers, sidedata, rev
3120 self, sidedata_helpers, sidedata, rev
3179 )
3121 )
3180 flags = flags | new_flags[0] & ~new_flags[1]
3122 flags = flags | new_flags[0] & ~new_flags[1]
3181
3123
3182 destrevlog.addrevision(
3124 destrevlog.addrevision(
3183 text,
3125 text,
3184 tr,
3126 tr,
3185 linkrev,
3127 linkrev,
3186 p1,
3128 p1,
3187 p2,
3129 p2,
3188 cachedelta=cachedelta,
3130 cachedelta=cachedelta,
3189 node=node,
3131 node=node,
3190 flags=flags,
3132 flags=flags,
3191 deltacomputer=deltacomputer,
3133 deltacomputer=deltacomputer,
3192 sidedata=sidedata,
3134 sidedata=sidedata,
3193 )
3135 )
3194 else:
3136 else:
3195 if destrevlog._lazydelta:
3137 if destrevlog._lazydelta:
3196 dp = self.deltaparent(rev)
3138 dp = self.deltaparent(rev)
3197 if dp != nullrev:
3139 if dp != nullrev:
3198 cachedelta = (dp, bytes(self._chunk(rev)))
3140 cachedelta = (dp, bytes(self._chunk(rev)))
3199
3141
3200 sidedata = None
3142 sidedata = None
3201 if not cachedelta:
3143 if not cachedelta:
3202 rawtext = self._revisiondata(rev)
3144 rawtext = self._revisiondata(rev)
3203 sidedata = self.sidedata(rev)
3145 sidedata = self.sidedata(rev)
3204 if sidedata is None:
3146 if sidedata is None:
3205 sidedata = self.sidedata(rev)
3147 sidedata = self.sidedata(rev)
3206
3148
3207 if sidedata_helpers is not None:
3149 if sidedata_helpers is not None:
3208 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3150 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3209 self, sidedata_helpers, sidedata, rev
3151 self, sidedata_helpers, sidedata, rev
3210 )
3152 )
3211 flags = flags | new_flags[0] & ~new_flags[1]
3153 flags = flags | new_flags[0] & ~new_flags[1]
3212
3154
3213 with destrevlog._writing(tr):
3155 with destrevlog._writing(tr):
3214 destrevlog._addrevision(
3156 destrevlog._addrevision(
3215 node,
3157 node,
3216 rawtext,
3158 rawtext,
3217 tr,
3159 tr,
3218 linkrev,
3160 linkrev,
3219 p1,
3161 p1,
3220 p2,
3162 p2,
3221 flags,
3163 flags,
3222 cachedelta,
3164 cachedelta,
3223 deltacomputer=deltacomputer,
3165 deltacomputer=deltacomputer,
3224 sidedata=sidedata,
3166 sidedata=sidedata,
3225 )
3167 )
3226
3168
3227 if addrevisioncb:
3169 if addrevisioncb:
3228 addrevisioncb(self, rev, node)
3170 addrevisioncb(self, rev, node)
3229
3171
3230 def censorrevision(self, tr, censornode, tombstone=b''):
3172 def censorrevision(self, tr, censornode, tombstone=b''):
3231 if self._format_version == REVLOGV0:
3173 if self._format_version == REVLOGV0:
3232 raise error.RevlogError(
3174 raise error.RevlogError(
3233 _(b'cannot censor with version %d revlogs')
3175 _(b'cannot censor with version %d revlogs')
3234 % self._format_version
3176 % self._format_version
3235 )
3177 )
3236 elif self._format_version == REVLOGV1:
3178 elif self._format_version == REVLOGV1:
3237 censor.v1_censor(self, tr, censornode, tombstone)
3179 censor.v1_censor(self, tr, censornode, tombstone)
3238 else:
3180 else:
3239 # revlog v2
3181 # revlog v2
3240 raise error.RevlogError(
3182 raise error.RevlogError(
3241 _(b'cannot censor with version %d revlogs')
3183 _(b'cannot censor with version %d revlogs')
3242 % self._format_version
3184 % self._format_version
3243 )
3185 )
3244
3186
3245 def verifyintegrity(self, state):
3187 def verifyintegrity(self, state):
3246 """Verifies the integrity of the revlog.
3188 """Verifies the integrity of the revlog.
3247
3189
3248 Yields ``revlogproblem`` instances describing problems that are
3190 Yields ``revlogproblem`` instances describing problems that are
3249 found.
3191 found.
3250 """
3192 """
3251 dd, di = self.checksize()
3193 dd, di = self.checksize()
3252 if dd:
3194 if dd:
3253 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3195 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3254 if di:
3196 if di:
3255 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3197 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3256
3198
3257 version = self._format_version
3199 version = self._format_version
3258
3200
3259 # The verifier tells us what version revlog we should be.
3201 # The verifier tells us what version revlog we should be.
3260 if version != state[b'expectedversion']:
3202 if version != state[b'expectedversion']:
3261 yield revlogproblem(
3203 yield revlogproblem(
3262 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3204 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3263 % (self.display_id, version, state[b'expectedversion'])
3205 % (self.display_id, version, state[b'expectedversion'])
3264 )
3206 )
3265
3207
3266 state[b'skipread'] = set()
3208 state[b'skipread'] = set()
3267 state[b'safe_renamed'] = set()
3209 state[b'safe_renamed'] = set()
3268
3210
3269 for rev in self:
3211 for rev in self:
3270 node = self.node(rev)
3212 node = self.node(rev)
3271
3213
3272 # Verify contents. 4 cases to care about:
3214 # Verify contents. 4 cases to care about:
3273 #
3215 #
3274 # common: the most common case
3216 # common: the most common case
3275 # rename: with a rename
3217 # rename: with a rename
3276 # meta: file content starts with b'\1\n', the metadata
3218 # meta: file content starts with b'\1\n', the metadata
3277 # header defined in filelog.py, but without a rename
3219 # header defined in filelog.py, but without a rename
3278 # ext: content stored externally
3220 # ext: content stored externally
3279 #
3221 #
3280 # More formally, their differences are shown below:
3222 # More formally, their differences are shown below:
3281 #
3223 #
3282 # | common | rename | meta | ext
3224 # | common | rename | meta | ext
3283 # -------------------------------------------------------
3225 # -------------------------------------------------------
3284 # flags() | 0 | 0 | 0 | not 0
3226 # flags() | 0 | 0 | 0 | not 0
3285 # renamed() | False | True | False | ?
3227 # renamed() | False | True | False | ?
3286 # rawtext[0:2]=='\1\n'| False | True | True | ?
3228 # rawtext[0:2]=='\1\n'| False | True | True | ?
3287 #
3229 #
3288 # "rawtext" means the raw text stored in revlog data, which
3230 # "rawtext" means the raw text stored in revlog data, which
3289 # could be retrieved by "rawdata(rev)". "text"
3231 # could be retrieved by "rawdata(rev)". "text"
3290 # mentioned below is "revision(rev)".
3232 # mentioned below is "revision(rev)".
3291 #
3233 #
3292 # There are 3 different lengths stored physically:
3234 # There are 3 different lengths stored physically:
3293 # 1. L1: rawsize, stored in revlog index
3235 # 1. L1: rawsize, stored in revlog index
3294 # 2. L2: len(rawtext), stored in revlog data
3236 # 2. L2: len(rawtext), stored in revlog data
3295 # 3. L3: len(text), stored in revlog data if flags==0, or
3237 # 3. L3: len(text), stored in revlog data if flags==0, or
3296 # possibly somewhere else if flags!=0
3238 # possibly somewhere else if flags!=0
3297 #
3239 #
3298 # L1 should be equal to L2. L3 could be different from them.
3240 # L1 should be equal to L2. L3 could be different from them.
3299 # "text" may or may not affect commit hash depending on flag
3241 # "text" may or may not affect commit hash depending on flag
3300 # processors (see flagutil.addflagprocessor).
3242 # processors (see flagutil.addflagprocessor).
3301 #
3243 #
3302 # | common | rename | meta | ext
3244 # | common | rename | meta | ext
3303 # -------------------------------------------------
3245 # -------------------------------------------------
3304 # rawsize() | L1 | L1 | L1 | L1
3246 # rawsize() | L1 | L1 | L1 | L1
3305 # size() | L1 | L2-LM | L1(*) | L1 (?)
3247 # size() | L1 | L2-LM | L1(*) | L1 (?)
3306 # len(rawtext) | L2 | L2 | L2 | L2
3248 # len(rawtext) | L2 | L2 | L2 | L2
3307 # len(text) | L2 | L2 | L2 | L3
3249 # len(text) | L2 | L2 | L2 | L3
3308 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3250 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3309 #
3251 #
3310 # LM: length of metadata, depending on rawtext
3252 # LM: length of metadata, depending on rawtext
3311 # (*): not ideal, see comment in filelog.size
3253 # (*): not ideal, see comment in filelog.size
3312 # (?): could be "- len(meta)" if the resolved content has
3254 # (?): could be "- len(meta)" if the resolved content has
3313 # rename metadata
3255 # rename metadata
3314 #
3256 #
3315 # Checks needed to be done:
3257 # Checks needed to be done:
3316 # 1. length check: L1 == L2, in all cases.
3258 # 1. length check: L1 == L2, in all cases.
3317 # 2. hash check: depending on flag processor, we may need to
3259 # 2. hash check: depending on flag processor, we may need to
3318 # use either "text" (external), or "rawtext" (in revlog).
3260 # use either "text" (external), or "rawtext" (in revlog).
3319
3261
3320 try:
3262 try:
3321 skipflags = state.get(b'skipflags', 0)
3263 skipflags = state.get(b'skipflags', 0)
3322 if skipflags:
3264 if skipflags:
3323 skipflags &= self.flags(rev)
3265 skipflags &= self.flags(rev)
3324
3266
3325 _verify_revision(self, skipflags, state, node)
3267 _verify_revision(self, skipflags, state, node)
3326
3268
3327 l1 = self.rawsize(rev)
3269 l1 = self.rawsize(rev)
3328 l2 = len(self.rawdata(node))
3270 l2 = len(self.rawdata(node))
3329
3271
3330 if l1 != l2:
3272 if l1 != l2:
3331 yield revlogproblem(
3273 yield revlogproblem(
3332 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3274 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3333 node=node,
3275 node=node,
3334 )
3276 )
3335
3277
3336 except error.CensoredNodeError:
3278 except error.CensoredNodeError:
3337 if state[b'erroroncensored']:
3279 if state[b'erroroncensored']:
3338 yield revlogproblem(
3280 yield revlogproblem(
3339 error=_(b'censored file data'), node=node
3281 error=_(b'censored file data'), node=node
3340 )
3282 )
3341 state[b'skipread'].add(node)
3283 state[b'skipread'].add(node)
3342 except Exception as e:
3284 except Exception as e:
3343 yield revlogproblem(
3285 yield revlogproblem(
3344 error=_(b'unpacking %s: %s')
3286 error=_(b'unpacking %s: %s')
3345 % (short(node), stringutil.forcebytestr(e)),
3287 % (short(node), stringutil.forcebytestr(e)),
3346 node=node,
3288 node=node,
3347 )
3289 )
3348 state[b'skipread'].add(node)
3290 state[b'skipread'].add(node)
3349
3291
3350 def storageinfo(
3292 def storageinfo(
3351 self,
3293 self,
3352 exclusivefiles=False,
3294 exclusivefiles=False,
3353 sharedfiles=False,
3295 sharedfiles=False,
3354 revisionscount=False,
3296 revisionscount=False,
3355 trackedsize=False,
3297 trackedsize=False,
3356 storedsize=False,
3298 storedsize=False,
3357 ):
3299 ):
3358 d = {}
3300 d = {}
3359
3301
3360 if exclusivefiles:
3302 if exclusivefiles:
3361 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3303 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3362 if not self._inline:
3304 if not self._inline:
3363 d[b'exclusivefiles'].append((self.opener, self._datafile))
3305 d[b'exclusivefiles'].append((self.opener, self._datafile))
3364
3306
3365 if sharedfiles:
3307 if sharedfiles:
3366 d[b'sharedfiles'] = []
3308 d[b'sharedfiles'] = []
3367
3309
3368 if revisionscount:
3310 if revisionscount:
3369 d[b'revisionscount'] = len(self)
3311 d[b'revisionscount'] = len(self)
3370
3312
3371 if trackedsize:
3313 if trackedsize:
3372 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3314 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3373
3315
3374 if storedsize:
3316 if storedsize:
3375 d[b'storedsize'] = sum(
3317 d[b'storedsize'] = sum(
3376 self.opener.stat(path).st_size for path in self.files()
3318 self.opener.stat(path).st_size for path in self.files()
3377 )
3319 )
3378
3320
3379 return d
3321 return d
3380
3322
3381 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3323 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3382 if not self.hassidedata:
3324 if not self.hassidedata:
3383 return
3325 return
3384 # revlog formats with sidedata support does not support inline
3326 # revlog formats with sidedata support does not support inline
3385 assert not self._inline
3327 assert not self._inline
3386 if not helpers[1] and not helpers[2]:
3328 if not helpers[1] and not helpers[2]:
3387 # Nothing to generate or remove
3329 # Nothing to generate or remove
3388 return
3330 return
3389
3331
3390 new_entries = []
3332 new_entries = []
3391 # append the new sidedata
3333 # append the new sidedata
3392 with self._writing(transaction):
3334 with self._writing(transaction):
3393 ifh, dfh, sdfh = self._writinghandles
3335 ifh, dfh, sdfh = self._writinghandles
3394 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3336 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3395
3337
3396 current_offset = sdfh.tell()
3338 current_offset = sdfh.tell()
3397 for rev in range(startrev, endrev + 1):
3339 for rev in range(startrev, endrev + 1):
3398 entry = self.index[rev]
3340 entry = self.index[rev]
3399 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3341 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3400 store=self,
3342 store=self,
3401 sidedata_helpers=helpers,
3343 sidedata_helpers=helpers,
3402 sidedata={},
3344 sidedata={},
3403 rev=rev,
3345 rev=rev,
3404 )
3346 )
3405
3347
3406 serialized_sidedata = sidedatautil.serialize_sidedata(
3348 serialized_sidedata = sidedatautil.serialize_sidedata(
3407 new_sidedata
3349 new_sidedata
3408 )
3350 )
3409
3351
3410 sidedata_compression_mode = COMP_MODE_INLINE
3352 sidedata_compression_mode = COMP_MODE_INLINE
3411 if serialized_sidedata and self.hassidedata:
3353 if serialized_sidedata and self.hassidedata:
3412 sidedata_compression_mode = COMP_MODE_PLAIN
3354 sidedata_compression_mode = COMP_MODE_PLAIN
3413 h, comp_sidedata = self.compress(serialized_sidedata)
3355 h, comp_sidedata = self.compress(serialized_sidedata)
3414 if (
3356 if (
3415 h != b'u'
3357 h != b'u'
3416 and comp_sidedata[0] != b'\0'
3358 and comp_sidedata[0] != b'\0'
3417 and len(comp_sidedata) < len(serialized_sidedata)
3359 and len(comp_sidedata) < len(serialized_sidedata)
3418 ):
3360 ):
3419 assert not h
3361 assert not h
3420 if (
3362 if (
3421 comp_sidedata[0]
3363 comp_sidedata[0]
3422 == self._docket.default_compression_header
3364 == self._docket.default_compression_header
3423 ):
3365 ):
3424 sidedata_compression_mode = COMP_MODE_DEFAULT
3366 sidedata_compression_mode = COMP_MODE_DEFAULT
3425 serialized_sidedata = comp_sidedata
3367 serialized_sidedata = comp_sidedata
3426 else:
3368 else:
3427 sidedata_compression_mode = COMP_MODE_INLINE
3369 sidedata_compression_mode = COMP_MODE_INLINE
3428 serialized_sidedata = comp_sidedata
3370 serialized_sidedata = comp_sidedata
3429 if entry[8] != 0 or entry[9] != 0:
3371 if entry[8] != 0 or entry[9] != 0:
3430 # rewriting entries that already have sidedata is not
3372 # rewriting entries that already have sidedata is not
3431 # supported yet, because it introduces garbage data in the
3373 # supported yet, because it introduces garbage data in the
3432 # revlog.
3374 # revlog.
3433 msg = b"rewriting existing sidedata is not supported yet"
3375 msg = b"rewriting existing sidedata is not supported yet"
3434 raise error.Abort(msg)
3376 raise error.Abort(msg)
3435
3377
3436 # Apply (potential) flags to add and to remove after running
3378 # Apply (potential) flags to add and to remove after running
3437 # the sidedata helpers
3379 # the sidedata helpers
3438 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3380 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3439 entry_update = (
3381 entry_update = (
3440 current_offset,
3382 current_offset,
3441 len(serialized_sidedata),
3383 len(serialized_sidedata),
3442 new_offset_flags,
3384 new_offset_flags,
3443 sidedata_compression_mode,
3385 sidedata_compression_mode,
3444 )
3386 )
3445
3387
3446 # the sidedata computation might have move the file cursors around
3388 # the sidedata computation might have move the file cursors around
3447 sdfh.seek(current_offset, os.SEEK_SET)
3389 sdfh.seek(current_offset, os.SEEK_SET)
3448 sdfh.write(serialized_sidedata)
3390 sdfh.write(serialized_sidedata)
3449 new_entries.append(entry_update)
3391 new_entries.append(entry_update)
3450 current_offset += len(serialized_sidedata)
3392 current_offset += len(serialized_sidedata)
3451 self._docket.sidedata_end = sdfh.tell()
3393 self._docket.sidedata_end = sdfh.tell()
3452
3394
3453 # rewrite the new index entries
3395 # rewrite the new index entries
3454 ifh.seek(startrev * self.index.entry_size)
3396 ifh.seek(startrev * self.index.entry_size)
3455 for i, e in enumerate(new_entries):
3397 for i, e in enumerate(new_entries):
3456 rev = startrev + i
3398 rev = startrev + i
3457 self.index.replace_sidedata_info(rev, *e)
3399 self.index.replace_sidedata_info(rev, *e)
3458 packed = self.index.entry_binary(rev)
3400 packed = self.index.entry_binary(rev)
3459 if rev == 0 and self._docket is None:
3401 if rev == 0 and self._docket is None:
3460 header = self._format_flags | self._format_version
3402 header = self._format_flags | self._format_version
3461 header = self.index.pack_header(header)
3403 header = self.index.pack_header(header)
3462 packed = header + packed
3404 packed = header + packed
3463 ifh.write(packed)
3405 ifh.write(packed)
@@ -1,206 +1,281 b''
1 # revlogdeltas.py - constant used for revlog logic.
1 # revlogdeltas.py - constant used for revlog logic.
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### Internal utily constants
16 ### Internal utily constants
17
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
21 KIND_OTHER = 1004
22
22
23 ALL_KINDS = {
23 ALL_KINDS = {
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_OTHER,
27 KIND_OTHER,
28 }
28 }
29
29
30 ### Index entry key
31 #
32 #
33 # Internal details
34 # ----------------
35 #
36 # A large part of the revlog logic deals with revisions' "index entries", tuple
37 # objects that contains the same "items" whatever the revlog version.
38 # Different versions will have different ways of storing these items (sometimes
39 # not having them at all), but the tuple will always be the same. New fields
40 # are usually added at the end to avoid breaking existing code that relies
41 # on the existing order. The field are defined as follows:
42
43 # [0] offset:
44 # The byte index of the start of revision data chunk.
45 # That value is shifted up by 16 bits. use "offset = field >> 16" to
46 # retrieve it.
47 #
48 # flags:
49 # A flag field that carries special information or changes the behavior
50 # of the revision. (see `REVIDX_*` constants for details)
51 # The flag field only occupies the first 16 bits of this field,
52 # use "flags = field & 0xFFFF" to retrieve the value.
53 ENTRY_DATA_OFFSET = 0
54
55 # [1] compressed length:
56 # The size, in bytes, of the chunk on disk
57 ENTRY_DATA_COMPRESSED_LENGTH = 1
58
59 # [2] uncompressed length:
60 # The size, in bytes, of the full revision once reconstructed.
61 ENTRY_DATA_UNCOMPRESSED_LENGTH = 2
62
63 # [3] base rev:
64 # Either the base of the revision delta chain (without general
65 # delta), or the base of the delta (stored in the data chunk)
66 # with general delta.
67 ENTRY_DELTA_BASE = 3
68
69 # [4] link rev:
70 # Changelog revision number of the changeset introducing this
71 # revision.
72 ENTRY_LINK_REV = 4
73
74 # [5] parent 1 rev:
75 # Revision number of the first parent
76 ENTRY_PARENT_1 = 5
77
78 # [6] parent 2 rev:
79 # Revision number of the second parent
80 ENTRY_PARENT_2 = 6
81
82 # [7] node id:
83 # The node id of the current revision
84 ENTRY_NODE_ID = 7
85
86 # [8] sidedata offset:
87 # The byte index of the start of the revision's side-data chunk.
88 ENTRY_SIDEDATA_OFFSET = 8
89
90 # [9] sidedata chunk length:
91 # The size, in bytes, of the revision's side-data chunk.
92 ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9
93
94 # [10] data compression mode:
95 # two bits that detail the way the data chunk is compressed on disk.
96 # (see "COMP_MODE_*" constants for details). For revlog version 0 and
97 # 1 this will always be COMP_MODE_INLINE.
98 ENTRY_DATA_COMPRESSION_MODE = 10
99
100 # [11] side-data compression mode:
101 # two bits that detail the way the sidedata chunk is compressed on disk.
102 # (see "COMP_MODE_*" constants for details)
103 ENTRY_SIDEDATA_COMPRESSION_MODE = 11
104
30 ### main revlog header
105 ### main revlog header
31
106
32 # We cannot rely on Struct.format is inconsistent for python <=3.6 versus above
107 # We cannot rely on Struct.format is inconsistent for python <=3.6 versus above
33 INDEX_HEADER_FMT = b">I"
108 INDEX_HEADER_FMT = b">I"
34 INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT)
109 INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT)
35
110
36 ## revlog version
111 ## revlog version
37 REVLOGV0 = 0
112 REVLOGV0 = 0
38 REVLOGV1 = 1
113 REVLOGV1 = 1
39 # Dummy value until file format is finalized.
114 # Dummy value until file format is finalized.
40 REVLOGV2 = 0xDEAD
115 REVLOGV2 = 0xDEAD
41 # Dummy value until file format is finalized.
116 # Dummy value until file format is finalized.
42 CHANGELOGV2 = 0xD34D
117 CHANGELOGV2 = 0xD34D
43
118
44 ## global revlog header flags
119 ## global revlog header flags
45 # Shared across v1 and v2.
120 # Shared across v1 and v2.
46 FLAG_INLINE_DATA = 1 << 16
121 FLAG_INLINE_DATA = 1 << 16
47 # Only used by v1, implied by v2.
122 # Only used by v1, implied by v2.
48 FLAG_GENERALDELTA = 1 << 17
123 FLAG_GENERALDELTA = 1 << 17
49 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
124 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
50 REVLOG_DEFAULT_FORMAT = REVLOGV1
125 REVLOG_DEFAULT_FORMAT = REVLOGV1
51 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
126 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOGV0_FLAGS = 0
127 REVLOGV0_FLAGS = 0
53 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
128 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
54 REVLOGV2_FLAGS = FLAG_INLINE_DATA
129 REVLOGV2_FLAGS = FLAG_INLINE_DATA
55 CHANGELOGV2_FLAGS = 0
130 CHANGELOGV2_FLAGS = 0
56
131
57 ### individual entry
132 ### individual entry
58
133
59 ## index v0:
134 ## index v0:
60 # 4 bytes: offset
135 # 4 bytes: offset
61 # 4 bytes: compressed length
136 # 4 bytes: compressed length
62 # 4 bytes: base rev
137 # 4 bytes: base rev
63 # 4 bytes: link rev
138 # 4 bytes: link rev
64 # 20 bytes: parent 1 nodeid
139 # 20 bytes: parent 1 nodeid
65 # 20 bytes: parent 2 nodeid
140 # 20 bytes: parent 2 nodeid
66 # 20 bytes: nodeid
141 # 20 bytes: nodeid
67 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
142 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
68
143
69 ## index v1
144 ## index v1
70 # 6 bytes: offset
145 # 6 bytes: offset
71 # 2 bytes: flags
146 # 2 bytes: flags
72 # 4 bytes: compressed length
147 # 4 bytes: compressed length
73 # 4 bytes: uncompressed length
148 # 4 bytes: uncompressed length
74 # 4 bytes: base rev
149 # 4 bytes: base rev
75 # 4 bytes: link rev
150 # 4 bytes: link rev
76 # 4 bytes: parent 1 rev
151 # 4 bytes: parent 1 rev
77 # 4 bytes: parent 2 rev
152 # 4 bytes: parent 2 rev
78 # 32 bytes: nodeid
153 # 32 bytes: nodeid
79 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
154 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
80 assert INDEX_ENTRY_V1.size == 32 * 2
155 assert INDEX_ENTRY_V1.size == 32 * 2
81
156
82 # 6 bytes: offset
157 # 6 bytes: offset
83 # 2 bytes: flags
158 # 2 bytes: flags
84 # 4 bytes: compressed length
159 # 4 bytes: compressed length
85 # 4 bytes: uncompressed length
160 # 4 bytes: uncompressed length
86 # 4 bytes: base rev
161 # 4 bytes: base rev
87 # 4 bytes: link rev
162 # 4 bytes: link rev
88 # 4 bytes: parent 1 rev
163 # 4 bytes: parent 1 rev
89 # 4 bytes: parent 2 rev
164 # 4 bytes: parent 2 rev
90 # 32 bytes: nodeid
165 # 32 bytes: nodeid
91 # 8 bytes: sidedata offset
166 # 8 bytes: sidedata offset
92 # 4 bytes: sidedata compressed length
167 # 4 bytes: sidedata compressed length
93 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
168 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
94 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
169 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
95 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
170 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
96 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
171 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
97
172
98 # 6 bytes: offset
173 # 6 bytes: offset
99 # 2 bytes: flags
174 # 2 bytes: flags
100 # 4 bytes: compressed length
175 # 4 bytes: compressed length
101 # 4 bytes: uncompressed length
176 # 4 bytes: uncompressed length
102 # 4 bytes: parent 1 rev
177 # 4 bytes: parent 1 rev
103 # 4 bytes: parent 2 rev
178 # 4 bytes: parent 2 rev
104 # 32 bytes: nodeid
179 # 32 bytes: nodeid
105 # 8 bytes: sidedata offset
180 # 8 bytes: sidedata offset
106 # 4 bytes: sidedata compressed length
181 # 4 bytes: sidedata compressed length
107 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
182 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
108 # 27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
183 # 27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
109 INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x")
184 INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x")
110 assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_V2.size
185 assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_V2.size
111
186
112 # revlog index flags
187 # revlog index flags
113
188
114 # For historical reasons, revlog's internal flags were exposed via the
189 # For historical reasons, revlog's internal flags were exposed via the
115 # wire protocol and are even exposed in parts of the storage APIs.
190 # wire protocol and are even exposed in parts of the storage APIs.
116
191
117 # revision has censor metadata, must be verified
192 # revision has censor metadata, must be verified
118 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
193 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
119 # revision hash does not match data (narrowhg)
194 # revision hash does not match data (narrowhg)
120 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
195 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
121 # revision data is stored externally
196 # revision data is stored externally
122 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
197 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
123 # revision changes files in a way that could affect copy tracing.
198 # revision changes files in a way that could affect copy tracing.
124 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
199 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
125 REVIDX_DEFAULT_FLAGS = 0
200 REVIDX_DEFAULT_FLAGS = 0
126 # stable order in which flags need to be processed and their processors applied
201 # stable order in which flags need to be processed and their processors applied
127 REVIDX_FLAGS_ORDER = [
202 REVIDX_FLAGS_ORDER = [
128 REVIDX_ISCENSORED,
203 REVIDX_ISCENSORED,
129 REVIDX_ELLIPSIS,
204 REVIDX_ELLIPSIS,
130 REVIDX_EXTSTORED,
205 REVIDX_EXTSTORED,
131 REVIDX_HASCOPIESINFO,
206 REVIDX_HASCOPIESINFO,
132 ]
207 ]
133
208
134 # bitmark for flags that could cause rawdata content change
209 # bitmark for flags that could cause rawdata content change
135 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
210 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
136
211
137 ## chunk compression mode constants:
212 ## chunk compression mode constants:
138 # These constants are used in revlog version >=2 to denote the compression used
213 # These constants are used in revlog version >=2 to denote the compression used
139 # for a chunk.
214 # for a chunk.
140
215
141 # Chunk use no compression, the data stored on disk can be directly use as
216 # Chunk use no compression, the data stored on disk can be directly use as
142 # chunk value. Without any header information prefixed.
217 # chunk value. Without any header information prefixed.
143 COMP_MODE_PLAIN = 0
218 COMP_MODE_PLAIN = 0
144
219
145 # Chunk use the "default compression" for the revlog (usually defined in the
220 # Chunk use the "default compression" for the revlog (usually defined in the
146 # revlog docket). A header is still used.
221 # revlog docket). A header is still used.
147 #
222 #
148 # XXX: keeping a header is probably not useful and we should probably drop it.
223 # XXX: keeping a header is probably not useful and we should probably drop it.
149 #
224 #
150 # XXX: The value of allow mixed type of compression in the revlog is unclear
225 # XXX: The value of allow mixed type of compression in the revlog is unclear
151 # and we should consider making PLAIN/DEFAULT the only available mode for
226 # and we should consider making PLAIN/DEFAULT the only available mode for
152 # revlog v2, disallowing INLINE mode.
227 # revlog v2, disallowing INLINE mode.
153 COMP_MODE_DEFAULT = 1
228 COMP_MODE_DEFAULT = 1
154
229
155 # Chunk use a compression mode stored "inline" at the start of the chunk
230 # Chunk use a compression mode stored "inline" at the start of the chunk
156 # itself. This is the mode always used for revlog version "0" and "1"
231 # itself. This is the mode always used for revlog version "0" and "1"
157 COMP_MODE_INLINE = 2
232 COMP_MODE_INLINE = 2
158
233
159 SUPPORTED_FLAGS = {
234 SUPPORTED_FLAGS = {
160 REVLOGV0: REVLOGV0_FLAGS,
235 REVLOGV0: REVLOGV0_FLAGS,
161 REVLOGV1: REVLOGV1_FLAGS,
236 REVLOGV1: REVLOGV1_FLAGS,
162 REVLOGV2: REVLOGV2_FLAGS,
237 REVLOGV2: REVLOGV2_FLAGS,
163 CHANGELOGV2: CHANGELOGV2_FLAGS,
238 CHANGELOGV2: CHANGELOGV2_FLAGS,
164 }
239 }
165
240
166 _no = lambda flags: False
241 _no = lambda flags: False
167 _yes = lambda flags: True
242 _yes = lambda flags: True
168
243
169
244
170 def _from_flag(flag):
245 def _from_flag(flag):
171 return lambda flags: bool(flags & flag)
246 return lambda flags: bool(flags & flag)
172
247
173
248
174 FEATURES_BY_VERSION = {
249 FEATURES_BY_VERSION = {
175 REVLOGV0: {
250 REVLOGV0: {
176 b'inline': _no,
251 b'inline': _no,
177 b'generaldelta': _no,
252 b'generaldelta': _no,
178 b'sidedata': False,
253 b'sidedata': False,
179 b'docket': False,
254 b'docket': False,
180 },
255 },
181 REVLOGV1: {
256 REVLOGV1: {
182 b'inline': _from_flag(FLAG_INLINE_DATA),
257 b'inline': _from_flag(FLAG_INLINE_DATA),
183 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
258 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
184 b'sidedata': False,
259 b'sidedata': False,
185 b'docket': False,
260 b'docket': False,
186 },
261 },
187 REVLOGV2: {
262 REVLOGV2: {
188 # The point of inline-revlog is to reduce the number of files used in
263 # The point of inline-revlog is to reduce the number of files used in
189 # the store. Using a docket defeat this purpose. So we needs other
264 # the store. Using a docket defeat this purpose. So we needs other
190 # means to reduce the number of files for revlogv2.
265 # means to reduce the number of files for revlogv2.
191 b'inline': _no,
266 b'inline': _no,
192 b'generaldelta': _yes,
267 b'generaldelta': _yes,
193 b'sidedata': True,
268 b'sidedata': True,
194 b'docket': True,
269 b'docket': True,
195 },
270 },
196 CHANGELOGV2: {
271 CHANGELOGV2: {
197 b'inline': _no,
272 b'inline': _no,
198 # General delta is useless for changelog since we don't do any delta
273 # General delta is useless for changelog since we don't do any delta
199 b'generaldelta': _no,
274 b'generaldelta': _no,
200 b'sidedata': True,
275 b'sidedata': True,
201 b'docket': True,
276 b'docket': True,
202 },
277 },
203 }
278 }
204
279
205
280
206 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
281 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now