##// END OF EJS Templates
revlog: implement a "default compression" mode...
marmoute -
r48029:ff9fd710 default
parent child Browse files
Show More
@@ -1,3364 +1,3384 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
39 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
40 FEATURES_BY_VERSION,
41 FEATURES_BY_VERSION,
41 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
42 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
43 INDEX_HEADER,
44 INDEX_HEADER,
44 REVLOGV0,
45 REVLOGV0,
45 REVLOGV1,
46 REVLOGV1,
46 REVLOGV1_FLAGS,
47 REVLOGV1_FLAGS,
47 REVLOGV2,
48 REVLOGV2,
48 REVLOGV2_FLAGS,
49 REVLOGV2_FLAGS,
49 REVLOG_DEFAULT_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
50 REVLOG_DEFAULT_FORMAT,
51 REVLOG_DEFAULT_FORMAT,
51 REVLOG_DEFAULT_VERSION,
52 REVLOG_DEFAULT_VERSION,
52 SUPPORTED_FLAGS,
53 SUPPORTED_FLAGS,
53 )
54 )
54 from .revlogutils.flagutil import (
55 from .revlogutils.flagutil import (
55 REVIDX_DEFAULT_FLAGS,
56 REVIDX_DEFAULT_FLAGS,
56 REVIDX_ELLIPSIS,
57 REVIDX_ELLIPSIS,
57 REVIDX_EXTSTORED,
58 REVIDX_EXTSTORED,
58 REVIDX_FLAGS_ORDER,
59 REVIDX_FLAGS_ORDER,
59 REVIDX_HASCOPIESINFO,
60 REVIDX_HASCOPIESINFO,
60 REVIDX_ISCENSORED,
61 REVIDX_ISCENSORED,
61 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 )
63 )
63 from .thirdparty import attr
64 from .thirdparty import attr
64 from . import (
65 from . import (
65 ancestor,
66 ancestor,
66 dagop,
67 dagop,
67 error,
68 error,
68 mdiff,
69 mdiff,
69 policy,
70 policy,
70 pycompat,
71 pycompat,
71 templatefilters,
72 templatefilters,
72 util,
73 util,
73 )
74 )
74 from .interfaces import (
75 from .interfaces import (
75 repository,
76 repository,
76 util as interfaceutil,
77 util as interfaceutil,
77 )
78 )
78 from .revlogutils import (
79 from .revlogutils import (
79 deltas as deltautil,
80 deltas as deltautil,
80 docket as docketutil,
81 docket as docketutil,
81 flagutil,
82 flagutil,
82 nodemap as nodemaputil,
83 nodemap as nodemaputil,
83 revlogv0,
84 revlogv0,
84 sidedata as sidedatautil,
85 sidedata as sidedatautil,
85 )
86 )
86 from .utils import (
87 from .utils import (
87 storageutil,
88 storageutil,
88 stringutil,
89 stringutil,
89 )
90 )
90
91
91 # blanked usage of all the name to prevent pyflakes constraints
92 # blanked usage of all the name to prevent pyflakes constraints
92 # We need these name available in the module for extensions.
93 # We need these name available in the module for extensions.
93
94
94 REVLOGV0
95 REVLOGV0
95 REVLOGV1
96 REVLOGV1
96 REVLOGV2
97 REVLOGV2
97 FLAG_INLINE_DATA
98 FLAG_INLINE_DATA
98 FLAG_GENERALDELTA
99 FLAG_GENERALDELTA
99 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_VERSION
102 REVLOG_DEFAULT_VERSION
102 REVLOGV1_FLAGS
103 REVLOGV1_FLAGS
103 REVLOGV2_FLAGS
104 REVLOGV2_FLAGS
104 REVIDX_ISCENSORED
105 REVIDX_ISCENSORED
105 REVIDX_ELLIPSIS
106 REVIDX_ELLIPSIS
106 REVIDX_HASCOPIESINFO
107 REVIDX_HASCOPIESINFO
107 REVIDX_EXTSTORED
108 REVIDX_EXTSTORED
108 REVIDX_DEFAULT_FLAGS
109 REVIDX_DEFAULT_FLAGS
109 REVIDX_FLAGS_ORDER
110 REVIDX_FLAGS_ORDER
110 REVIDX_RAWTEXT_CHANGING_FLAGS
111 REVIDX_RAWTEXT_CHANGING_FLAGS
111
112
112 parsers = policy.importmod('parsers')
113 parsers = policy.importmod('parsers')
113 rustancestor = policy.importrust('ancestor')
114 rustancestor = policy.importrust('ancestor')
114 rustdagop = policy.importrust('dagop')
115 rustdagop = policy.importrust('dagop')
115 rustrevlog = policy.importrust('revlog')
116 rustrevlog = policy.importrust('revlog')
116
117
117 # Aliased for performance.
118 # Aliased for performance.
118 _zlibdecompress = zlib.decompress
119 _zlibdecompress = zlib.decompress
119
120
120 # max size of revlog with inline data
121 # max size of revlog with inline data
121 _maxinline = 131072
122 _maxinline = 131072
122 _chunksize = 1048576
123 _chunksize = 1048576
123
124
124 # Flag processors for REVIDX_ELLIPSIS.
125 # Flag processors for REVIDX_ELLIPSIS.
125 def ellipsisreadprocessor(rl, text):
126 def ellipsisreadprocessor(rl, text):
126 return text, False
127 return text, False
127
128
128
129
129 def ellipsiswriteprocessor(rl, text):
130 def ellipsiswriteprocessor(rl, text):
130 return text, False
131 return text, False
131
132
132
133
133 def ellipsisrawprocessor(rl, text):
134 def ellipsisrawprocessor(rl, text):
134 return False
135 return False
135
136
136
137
137 ellipsisprocessor = (
138 ellipsisprocessor = (
138 ellipsisreadprocessor,
139 ellipsisreadprocessor,
139 ellipsiswriteprocessor,
140 ellipsiswriteprocessor,
140 ellipsisrawprocessor,
141 ellipsisrawprocessor,
141 )
142 )
142
143
143
144
144 def offset_type(offset, type):
145 def offset_type(offset, type):
145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
147 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
148
149
149
150
150 def _verify_revision(rl, skipflags, state, node):
151 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
153 point for extensions to influence the operation."""
153 if skipflags:
154 if skipflags:
154 state[b'skipread'].add(node)
155 state[b'skipread'].add(node)
155 else:
156 else:
156 # Side-effect: read content and verify hash.
157 # Side-effect: read content and verify hash.
157 rl.revision(node)
158 rl.revision(node)
158
159
159
160
160 # True if a fast implementation for persistent-nodemap is available
161 # True if a fast implementation for persistent-nodemap is available
161 #
162 #
162 # We also consider we have a "fast" implementation in "pure" python because
163 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
164 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
165 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
167 parsers, 'BaseIndexObject'
167 )
168 )
168
169
169
170
170 @attr.s(slots=True, frozen=True)
171 @attr.s(slots=True, frozen=True)
171 class _revisioninfo(object):
172 class _revisioninfo(object):
172 """Information about a revision that allows building its fulltext
173 """Information about a revision that allows building its fulltext
173 node: expected hash of the revision
174 node: expected hash of the revision
174 p1, p2: parent revs of the revision
175 p1, p2: parent revs of the revision
175 btext: built text cache consisting of a one-element list
176 btext: built text cache consisting of a one-element list
176 cachedelta: (baserev, uncompressed_delta) or None
177 cachedelta: (baserev, uncompressed_delta) or None
177 flags: flags associated to the revision storage
178 flags: flags associated to the revision storage
178
179
179 One of btext[0] or cachedelta must be set.
180 One of btext[0] or cachedelta must be set.
180 """
181 """
181
182
182 node = attr.ib()
183 node = attr.ib()
183 p1 = attr.ib()
184 p1 = attr.ib()
184 p2 = attr.ib()
185 p2 = attr.ib()
185 btext = attr.ib()
186 btext = attr.ib()
186 textlen = attr.ib()
187 textlen = attr.ib()
187 cachedelta = attr.ib()
188 cachedelta = attr.ib()
188 flags = attr.ib()
189 flags = attr.ib()
189
190
190
191
191 @interfaceutil.implementer(repository.irevisiondelta)
192 @interfaceutil.implementer(repository.irevisiondelta)
192 @attr.s(slots=True)
193 @attr.s(slots=True)
193 class revlogrevisiondelta(object):
194 class revlogrevisiondelta(object):
194 node = attr.ib()
195 node = attr.ib()
195 p1node = attr.ib()
196 p1node = attr.ib()
196 p2node = attr.ib()
197 p2node = attr.ib()
197 basenode = attr.ib()
198 basenode = attr.ib()
198 flags = attr.ib()
199 flags = attr.ib()
199 baserevisionsize = attr.ib()
200 baserevisionsize = attr.ib()
200 revision = attr.ib()
201 revision = attr.ib()
201 delta = attr.ib()
202 delta = attr.ib()
202 sidedata = attr.ib()
203 sidedata = attr.ib()
203 protocol_flags = attr.ib()
204 protocol_flags = attr.ib()
204 linknode = attr.ib(default=None)
205 linknode = attr.ib(default=None)
205
206
206
207
207 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @attr.s(frozen=True)
209 @attr.s(frozen=True)
209 class revlogproblem(object):
210 class revlogproblem(object):
210 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
211 error = attr.ib(default=None)
212 error = attr.ib(default=None)
212 node = attr.ib(default=None)
213 node = attr.ib(default=None)
213
214
214
215
215 def parse_index_v1(data, inline):
216 def parse_index_v1(data, inline):
216 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
217 index, cache = parsers.parse_index2(data, inline)
218 index, cache = parsers.parse_index2(data, inline)
218 return index, cache
219 return index, cache
219
220
220
221
221 def parse_index_v2(data, inline):
222 def parse_index_v2(data, inline):
222 # call the C implementation to parse the index data
223 # call the C implementation to parse the index data
223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 return index, cache
225 return index, cache
225
226
226
227
227 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
228
229
229 def parse_index_v1_nodemap(data, inline):
230 def parse_index_v1_nodemap(data, inline):
230 index, cache = parsers.parse_index_devel_nodemap(data, inline)
231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
231 return index, cache
232 return index, cache
232
233
233
234
234 else:
235 else:
235 parse_index_v1_nodemap = None
236 parse_index_v1_nodemap = None
236
237
237
238
238 def parse_index_v1_mixed(data, inline):
239 def parse_index_v1_mixed(data, inline):
239 index, cache = parse_index_v1(data, inline)
240 index, cache = parse_index_v1(data, inline)
240 return rustrevlog.MixedIndex(index), cache
241 return rustrevlog.MixedIndex(index), cache
241
242
242
243
243 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 # signed integer)
245 # signed integer)
245 _maxentrysize = 0x7FFFFFFF
246 _maxentrysize = 0x7FFFFFFF
246
247
247
248
248 class revlog(object):
249 class revlog(object):
249 """
250 """
250 the underlying revision storage object
251 the underlying revision storage object
251
252
252 A revlog consists of two parts, an index and the revision data.
253 A revlog consists of two parts, an index and the revision data.
253
254
254 The index is a file with a fixed record size containing
255 The index is a file with a fixed record size containing
255 information on each revision, including its nodeid (hash), the
256 information on each revision, including its nodeid (hash), the
256 nodeids of its parents, the position and offset of its data within
257 nodeids of its parents, the position and offset of its data within
257 the data file, and the revision it's based on. Finally, each entry
258 the data file, and the revision it's based on. Finally, each entry
258 contains a linkrev entry that can serve as a pointer to external
259 contains a linkrev entry that can serve as a pointer to external
259 data.
260 data.
260
261
261 The revision data itself is a linear collection of data chunks.
262 The revision data itself is a linear collection of data chunks.
262 Each chunk represents a revision and is usually represented as a
263 Each chunk represents a revision and is usually represented as a
263 delta against the previous chunk. To bound lookup time, runs of
264 delta against the previous chunk. To bound lookup time, runs of
264 deltas are limited to about 2 times the length of the original
265 deltas are limited to about 2 times the length of the original
265 version data. This makes retrieval of a version proportional to
266 version data. This makes retrieval of a version proportional to
266 its size, or O(1) relative to the number of revisions.
267 its size, or O(1) relative to the number of revisions.
267
268
268 Both pieces of the revlog are written to in an append-only
269 Both pieces of the revlog are written to in an append-only
269 fashion, which means we never need to rewrite a file to insert or
270 fashion, which means we never need to rewrite a file to insert or
270 remove data, and can use some simple techniques to avoid the need
271 remove data, and can use some simple techniques to avoid the need
271 for locking while reading.
272 for locking while reading.
272
273
273 If checkambig, indexfile is opened with checkambig=True at
274 If checkambig, indexfile is opened with checkambig=True at
274 writing, to avoid file stat ambiguity.
275 writing, to avoid file stat ambiguity.
275
276
276 If mmaplargeindex is True, and an mmapindexthreshold is set, the
277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
277 index will be mmapped rather than read if it is larger than the
278 index will be mmapped rather than read if it is larger than the
278 configured threshold.
279 configured threshold.
279
280
280 If censorable is True, the revlog can have censored revisions.
281 If censorable is True, the revlog can have censored revisions.
281
282
282 If `upperboundcomp` is not None, this is the expected maximal gain from
283 If `upperboundcomp` is not None, this is the expected maximal gain from
283 compression for the data content.
284 compression for the data content.
284
285
285 `concurrencychecker` is an optional function that receives 3 arguments: a
286 `concurrencychecker` is an optional function that receives 3 arguments: a
286 file handle, a filename, and an expected position. It should check whether
287 file handle, a filename, and an expected position. It should check whether
287 the current position in the file handle is valid, and log/warn/fail (by
288 the current position in the file handle is valid, and log/warn/fail (by
288 raising).
289 raising).
289
290
290
291
291 Internal details
292 Internal details
292 ----------------
293 ----------------
293
294
294 A large part of the revlog logic deals with revisions' "index entries", tuple
295 A large part of the revlog logic deals with revisions' "index entries", tuple
295 objects that contains the same "items" whatever the revlog version.
296 objects that contains the same "items" whatever the revlog version.
296 Different versions will have different ways of storing these items (sometimes
297 Different versions will have different ways of storing these items (sometimes
297 not having them at all), but the tuple will always be the same. New fields
298 not having them at all), but the tuple will always be the same. New fields
298 are usually added at the end to avoid breaking existing code that relies
299 are usually added at the end to avoid breaking existing code that relies
299 on the existing order. The field are defined as follows:
300 on the existing order. The field are defined as follows:
300
301
301 [0] offset:
302 [0] offset:
302 The byte index of the start of revision data chunk.
303 The byte index of the start of revision data chunk.
303 That value is shifted up by 16 bits. use "offset = field >> 16" to
304 That value is shifted up by 16 bits. use "offset = field >> 16" to
304 retrieve it.
305 retrieve it.
305
306
306 flags:
307 flags:
307 A flag field that carries special information or changes the behavior
308 A flag field that carries special information or changes the behavior
308 of the revision. (see `REVIDX_*` constants for details)
309 of the revision. (see `REVIDX_*` constants for details)
309 The flag field only occupies the first 16 bits of this field,
310 The flag field only occupies the first 16 bits of this field,
310 use "flags = field & 0xFFFF" to retrieve the value.
311 use "flags = field & 0xFFFF" to retrieve the value.
311
312
312 [1] compressed length:
313 [1] compressed length:
313 The size, in bytes, of the chunk on disk
314 The size, in bytes, of the chunk on disk
314
315
315 [2] uncompressed length:
316 [2] uncompressed length:
316 The size, in bytes, of the full revision once reconstructed.
317 The size, in bytes, of the full revision once reconstructed.
317
318
318 [3] base rev:
319 [3] base rev:
319 Either the base of the revision delta chain (without general
320 Either the base of the revision delta chain (without general
320 delta), or the base of the delta (stored in the data chunk)
321 delta), or the base of the delta (stored in the data chunk)
321 with general delta.
322 with general delta.
322
323
323 [4] link rev:
324 [4] link rev:
324 Changelog revision number of the changeset introducing this
325 Changelog revision number of the changeset introducing this
325 revision.
326 revision.
326
327
327 [5] parent 1 rev:
328 [5] parent 1 rev:
328 Revision number of the first parent
329 Revision number of the first parent
329
330
330 [6] parent 2 rev:
331 [6] parent 2 rev:
331 Revision number of the second parent
332 Revision number of the second parent
332
333
333 [7] node id:
334 [7] node id:
334 The node id of the current revision
335 The node id of the current revision
335
336
336 [8] sidedata offset:
337 [8] sidedata offset:
337 The byte index of the start of the revision's side-data chunk.
338 The byte index of the start of the revision's side-data chunk.
338
339
339 [9] sidedata chunk length:
340 [9] sidedata chunk length:
340 The size, in bytes, of the revision's side-data chunk.
341 The size, in bytes, of the revision's side-data chunk.
341
342
342 [10] data compression mode:
343 [10] data compression mode:
343 two bits that detail the way the data chunk is compressed on disk.
344 two bits that detail the way the data chunk is compressed on disk.
344 (see "COMP_MODE_*" constants for details). For revlog version 0 and
345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
345 1 this will always be COMP_MODE_INLINE.
346 1 this will always be COMP_MODE_INLINE.
346
347
347 """
348 """
348
349
349 _flagserrorclass = error.RevlogError
350 _flagserrorclass = error.RevlogError
350
351
351 def __init__(
352 def __init__(
352 self,
353 self,
353 opener,
354 opener,
354 target,
355 target,
355 radix,
356 radix,
356 postfix=None, # only exist for `tmpcensored` now
357 postfix=None, # only exist for `tmpcensored` now
357 checkambig=False,
358 checkambig=False,
358 mmaplargeindex=False,
359 mmaplargeindex=False,
359 censorable=False,
360 censorable=False,
360 upperboundcomp=None,
361 upperboundcomp=None,
361 persistentnodemap=False,
362 persistentnodemap=False,
362 concurrencychecker=None,
363 concurrencychecker=None,
363 trypending=False,
364 trypending=False,
364 ):
365 ):
365 """
366 """
366 create a revlog object
367 create a revlog object
367
368
368 opener is a function that abstracts the file opening operation
369 opener is a function that abstracts the file opening operation
369 and can be used to implement COW semantics or the like.
370 and can be used to implement COW semantics or the like.
370
371
371 `target`: a (KIND, ID) tuple that identify the content stored in
372 `target`: a (KIND, ID) tuple that identify the content stored in
372 this revlog. It help the rest of the code to understand what the revlog
373 this revlog. It help the rest of the code to understand what the revlog
373 is about without having to resort to heuristic and index filename
374 is about without having to resort to heuristic and index filename
374 analysis. Note: that this must be reliably be set by normal code, but
375 analysis. Note: that this must be reliably be set by normal code, but
375 that test, debug, or performance measurement code might not set this to
376 that test, debug, or performance measurement code might not set this to
376 accurate value.
377 accurate value.
377 """
378 """
378 self.upperboundcomp = upperboundcomp
379 self.upperboundcomp = upperboundcomp
379
380
380 self.radix = radix
381 self.radix = radix
381
382
382 self._docket_file = None
383 self._docket_file = None
383 self._indexfile = None
384 self._indexfile = None
384 self._datafile = None
385 self._datafile = None
385 self._nodemap_file = None
386 self._nodemap_file = None
386 self.postfix = postfix
387 self.postfix = postfix
387 self._trypending = trypending
388 self._trypending = trypending
388 self.opener = opener
389 self.opener = opener
389 if persistentnodemap:
390 if persistentnodemap:
390 self._nodemap_file = nodemaputil.get_nodemap_file(self)
391 self._nodemap_file = nodemaputil.get_nodemap_file(self)
391
392
392 assert target[0] in ALL_KINDS
393 assert target[0] in ALL_KINDS
393 assert len(target) == 2
394 assert len(target) == 2
394 self.target = target
395 self.target = target
395 # When True, indexfile is opened with checkambig=True at writing, to
396 # When True, indexfile is opened with checkambig=True at writing, to
396 # avoid file stat ambiguity.
397 # avoid file stat ambiguity.
397 self._checkambig = checkambig
398 self._checkambig = checkambig
398 self._mmaplargeindex = mmaplargeindex
399 self._mmaplargeindex = mmaplargeindex
399 self._censorable = censorable
400 self._censorable = censorable
400 # 3-tuple of (node, rev, text) for a raw revision.
401 # 3-tuple of (node, rev, text) for a raw revision.
401 self._revisioncache = None
402 self._revisioncache = None
402 # Maps rev to chain base rev.
403 # Maps rev to chain base rev.
403 self._chainbasecache = util.lrucachedict(100)
404 self._chainbasecache = util.lrucachedict(100)
404 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
405 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
405 self._chunkcache = (0, b'')
406 self._chunkcache = (0, b'')
406 # How much data to read and cache into the raw revlog data cache.
407 # How much data to read and cache into the raw revlog data cache.
407 self._chunkcachesize = 65536
408 self._chunkcachesize = 65536
408 self._maxchainlen = None
409 self._maxchainlen = None
409 self._deltabothparents = True
410 self._deltabothparents = True
410 self.index = None
411 self.index = None
411 self._docket = None
412 self._docket = None
412 self._nodemap_docket = None
413 self._nodemap_docket = None
413 # Mapping of partial identifiers to full nodes.
414 # Mapping of partial identifiers to full nodes.
414 self._pcache = {}
415 self._pcache = {}
415 # Mapping of revision integer to full node.
416 # Mapping of revision integer to full node.
416 self._compengine = b'zlib'
417 self._compengine = b'zlib'
417 self._compengineopts = {}
418 self._compengineopts = {}
418 self._maxdeltachainspan = -1
419 self._maxdeltachainspan = -1
419 self._withsparseread = False
420 self._withsparseread = False
420 self._sparserevlog = False
421 self._sparserevlog = False
421 self.hassidedata = False
422 self.hassidedata = False
422 self._srdensitythreshold = 0.50
423 self._srdensitythreshold = 0.50
423 self._srmingapsize = 262144
424 self._srmingapsize = 262144
424
425
425 # Make copy of flag processors so each revlog instance can support
426 # Make copy of flag processors so each revlog instance can support
426 # custom flags.
427 # custom flags.
427 self._flagprocessors = dict(flagutil.flagprocessors)
428 self._flagprocessors = dict(flagutil.flagprocessors)
428
429
429 # 2-tuple of file handles being used for active writing.
430 # 2-tuple of file handles being used for active writing.
430 self._writinghandles = None
431 self._writinghandles = None
431 # prevent nesting of addgroup
432 # prevent nesting of addgroup
432 self._adding_group = None
433 self._adding_group = None
433
434
434 self._loadindex()
435 self._loadindex()
435
436
436 self._concurrencychecker = concurrencychecker
437 self._concurrencychecker = concurrencychecker
437
438
438 def _init_opts(self):
439 def _init_opts(self):
439 """process options (from above/config) to setup associated default revlog mode
440 """process options (from above/config) to setup associated default revlog mode
440
441
441 These values might be affected when actually reading on disk information.
442 These values might be affected when actually reading on disk information.
442
443
443 The relevant values are returned for use in _loadindex().
444 The relevant values are returned for use in _loadindex().
444
445
445 * newversionflags:
446 * newversionflags:
446 version header to use if we need to create a new revlog
447 version header to use if we need to create a new revlog
447
448
448 * mmapindexthreshold:
449 * mmapindexthreshold:
449 minimal index size for start to use mmap
450 minimal index size for start to use mmap
450
451
451 * force_nodemap:
452 * force_nodemap:
452 force the usage of a "development" version of the nodemap code
453 force the usage of a "development" version of the nodemap code
453 """
454 """
454 mmapindexthreshold = None
455 mmapindexthreshold = None
455 opts = self.opener.options
456 opts = self.opener.options
456
457
457 if b'revlogv2' in opts:
458 if b'revlogv2' in opts:
458 new_header = REVLOGV2 | FLAG_INLINE_DATA
459 new_header = REVLOGV2 | FLAG_INLINE_DATA
459 elif b'revlogv1' in opts:
460 elif b'revlogv1' in opts:
460 new_header = REVLOGV1 | FLAG_INLINE_DATA
461 new_header = REVLOGV1 | FLAG_INLINE_DATA
461 if b'generaldelta' in opts:
462 if b'generaldelta' in opts:
462 new_header |= FLAG_GENERALDELTA
463 new_header |= FLAG_GENERALDELTA
463 elif b'revlogv0' in self.opener.options:
464 elif b'revlogv0' in self.opener.options:
464 new_header = REVLOGV0
465 new_header = REVLOGV0
465 else:
466 else:
466 new_header = REVLOG_DEFAULT_VERSION
467 new_header = REVLOG_DEFAULT_VERSION
467
468
468 if b'chunkcachesize' in opts:
469 if b'chunkcachesize' in opts:
469 self._chunkcachesize = opts[b'chunkcachesize']
470 self._chunkcachesize = opts[b'chunkcachesize']
470 if b'maxchainlen' in opts:
471 if b'maxchainlen' in opts:
471 self._maxchainlen = opts[b'maxchainlen']
472 self._maxchainlen = opts[b'maxchainlen']
472 if b'deltabothparents' in opts:
473 if b'deltabothparents' in opts:
473 self._deltabothparents = opts[b'deltabothparents']
474 self._deltabothparents = opts[b'deltabothparents']
474 self._lazydelta = bool(opts.get(b'lazydelta', True))
475 self._lazydelta = bool(opts.get(b'lazydelta', True))
475 self._lazydeltabase = False
476 self._lazydeltabase = False
476 if self._lazydelta:
477 if self._lazydelta:
477 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
478 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
478 if b'compengine' in opts:
479 if b'compengine' in opts:
479 self._compengine = opts[b'compengine']
480 self._compengine = opts[b'compengine']
480 if b'zlib.level' in opts:
481 if b'zlib.level' in opts:
481 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
482 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
482 if b'zstd.level' in opts:
483 if b'zstd.level' in opts:
483 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
484 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
484 if b'maxdeltachainspan' in opts:
485 if b'maxdeltachainspan' in opts:
485 self._maxdeltachainspan = opts[b'maxdeltachainspan']
486 self._maxdeltachainspan = opts[b'maxdeltachainspan']
486 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
487 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
487 mmapindexthreshold = opts[b'mmapindexthreshold']
488 mmapindexthreshold = opts[b'mmapindexthreshold']
488 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
489 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
489 withsparseread = bool(opts.get(b'with-sparse-read', False))
490 withsparseread = bool(opts.get(b'with-sparse-read', False))
490 # sparse-revlog forces sparse-read
491 # sparse-revlog forces sparse-read
491 self._withsparseread = self._sparserevlog or withsparseread
492 self._withsparseread = self._sparserevlog or withsparseread
492 if b'sparse-read-density-threshold' in opts:
493 if b'sparse-read-density-threshold' in opts:
493 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
494 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
494 if b'sparse-read-min-gap-size' in opts:
495 if b'sparse-read-min-gap-size' in opts:
495 self._srmingapsize = opts[b'sparse-read-min-gap-size']
496 self._srmingapsize = opts[b'sparse-read-min-gap-size']
496 if opts.get(b'enableellipsis'):
497 if opts.get(b'enableellipsis'):
497 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
498 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
498
499
499 # revlog v0 doesn't have flag processors
500 # revlog v0 doesn't have flag processors
500 for flag, processor in pycompat.iteritems(
501 for flag, processor in pycompat.iteritems(
501 opts.get(b'flagprocessors', {})
502 opts.get(b'flagprocessors', {})
502 ):
503 ):
503 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
504 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
504
505
505 if self._chunkcachesize <= 0:
506 if self._chunkcachesize <= 0:
506 raise error.RevlogError(
507 raise error.RevlogError(
507 _(b'revlog chunk cache size %r is not greater than 0')
508 _(b'revlog chunk cache size %r is not greater than 0')
508 % self._chunkcachesize
509 % self._chunkcachesize
509 )
510 )
510 elif self._chunkcachesize & (self._chunkcachesize - 1):
511 elif self._chunkcachesize & (self._chunkcachesize - 1):
511 raise error.RevlogError(
512 raise error.RevlogError(
512 _(b'revlog chunk cache size %r is not a power of 2')
513 _(b'revlog chunk cache size %r is not a power of 2')
513 % self._chunkcachesize
514 % self._chunkcachesize
514 )
515 )
515 force_nodemap = opts.get(b'devel-force-nodemap', False)
516 force_nodemap = opts.get(b'devel-force-nodemap', False)
516 return new_header, mmapindexthreshold, force_nodemap
517 return new_header, mmapindexthreshold, force_nodemap
517
518
518 def _get_data(self, filepath, mmap_threshold, size=None):
519 def _get_data(self, filepath, mmap_threshold, size=None):
519 """return a file content with or without mmap
520 """return a file content with or without mmap
520
521
521 If the file is missing return the empty string"""
522 If the file is missing return the empty string"""
522 try:
523 try:
523 with self.opener(filepath) as fp:
524 with self.opener(filepath) as fp:
524 if mmap_threshold is not None:
525 if mmap_threshold is not None:
525 file_size = self.opener.fstat(fp).st_size
526 file_size = self.opener.fstat(fp).st_size
526 if file_size >= mmap_threshold:
527 if file_size >= mmap_threshold:
527 if size is not None:
528 if size is not None:
528 # avoid potentiel mmap crash
529 # avoid potentiel mmap crash
529 size = min(file_size, size)
530 size = min(file_size, size)
530 # TODO: should .close() to release resources without
531 # TODO: should .close() to release resources without
531 # relying on Python GC
532 # relying on Python GC
532 if size is None:
533 if size is None:
533 return util.buffer(util.mmapread(fp))
534 return util.buffer(util.mmapread(fp))
534 else:
535 else:
535 return util.buffer(util.mmapread(fp, size))
536 return util.buffer(util.mmapread(fp, size))
536 if size is None:
537 if size is None:
537 return fp.read()
538 return fp.read()
538 else:
539 else:
539 return fp.read(size)
540 return fp.read(size)
540 except IOError as inst:
541 except IOError as inst:
541 if inst.errno != errno.ENOENT:
542 if inst.errno != errno.ENOENT:
542 raise
543 raise
543 return b''
544 return b''
544
545
545 def _loadindex(self):
546 def _loadindex(self):
546
547
547 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
548 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
548
549
549 if self.postfix is not None:
550 if self.postfix is not None:
550 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
551 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
551 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
552 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
552 entry_point = b'%s.i.a' % self.radix
553 entry_point = b'%s.i.a' % self.radix
553 else:
554 else:
554 entry_point = b'%s.i' % self.radix
555 entry_point = b'%s.i' % self.radix
555
556
556 entry_data = b''
557 entry_data = b''
557 self._initempty = True
558 self._initempty = True
558 entry_data = self._get_data(entry_point, mmapindexthreshold)
559 entry_data = self._get_data(entry_point, mmapindexthreshold)
559 if len(entry_data) > 0:
560 if len(entry_data) > 0:
560 header = INDEX_HEADER.unpack(entry_data[:4])[0]
561 header = INDEX_HEADER.unpack(entry_data[:4])[0]
561 self._initempty = False
562 self._initempty = False
562 else:
563 else:
563 header = new_header
564 header = new_header
564
565
565 self._format_flags = header & ~0xFFFF
566 self._format_flags = header & ~0xFFFF
566 self._format_version = header & 0xFFFF
567 self._format_version = header & 0xFFFF
567
568
568 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
569 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
569 if supported_flags is None:
570 if supported_flags is None:
570 msg = _(b'unknown version (%d) in revlog %s')
571 msg = _(b'unknown version (%d) in revlog %s')
571 msg %= (self._format_version, self.display_id)
572 msg %= (self._format_version, self.display_id)
572 raise error.RevlogError(msg)
573 raise error.RevlogError(msg)
573 elif self._format_flags & ~supported_flags:
574 elif self._format_flags & ~supported_flags:
574 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
575 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
575 display_flag = self._format_flags >> 16
576 display_flag = self._format_flags >> 16
576 msg %= (display_flag, self._format_version, self.display_id)
577 msg %= (display_flag, self._format_version, self.display_id)
577 raise error.RevlogError(msg)
578 raise error.RevlogError(msg)
578
579
579 features = FEATURES_BY_VERSION[self._format_version]
580 features = FEATURES_BY_VERSION[self._format_version]
580 self._inline = features[b'inline'](self._format_flags)
581 self._inline = features[b'inline'](self._format_flags)
581 self._generaldelta = features[b'generaldelta'](self._format_flags)
582 self._generaldelta = features[b'generaldelta'](self._format_flags)
582 self.hassidedata = features[b'sidedata']
583 self.hassidedata = features[b'sidedata']
583
584
584 if not features[b'docket']:
585 if not features[b'docket']:
585 self._indexfile = entry_point
586 self._indexfile = entry_point
586 index_data = entry_data
587 index_data = entry_data
587 else:
588 else:
588 self._docket_file = entry_point
589 self._docket_file = entry_point
589 if self._initempty:
590 if self._initempty:
590 self._docket = docketutil.default_docket(self, header)
591 self._docket = docketutil.default_docket(self, header)
591 else:
592 else:
592 self._docket = docketutil.parse_docket(
593 self._docket = docketutil.parse_docket(
593 self, entry_data, use_pending=self._trypending
594 self, entry_data, use_pending=self._trypending
594 )
595 )
595 self._indexfile = self._docket.index_filepath()
596 self._indexfile = self._docket.index_filepath()
596 index_data = b''
597 index_data = b''
597 index_size = self._docket.index_end
598 index_size = self._docket.index_end
598 if index_size > 0:
599 if index_size > 0:
599 index_data = self._get_data(
600 index_data = self._get_data(
600 self._indexfile, mmapindexthreshold, size=index_size
601 self._indexfile, mmapindexthreshold, size=index_size
601 )
602 )
602 if len(index_data) < index_size:
603 if len(index_data) < index_size:
603 msg = _(b'too few index data for %s: got %d, expected %d')
604 msg = _(b'too few index data for %s: got %d, expected %d')
604 msg %= (self.display_id, len(index_data), index_size)
605 msg %= (self.display_id, len(index_data), index_size)
605 raise error.RevlogError(msg)
606 raise error.RevlogError(msg)
606
607
607 self._inline = False
608 self._inline = False
608 # generaldelta implied by version 2 revlogs.
609 # generaldelta implied by version 2 revlogs.
609 self._generaldelta = True
610 self._generaldelta = True
610 # the logic for persistent nodemap will be dealt with within the
611 # the logic for persistent nodemap will be dealt with within the
611 # main docket, so disable it for now.
612 # main docket, so disable it for now.
612 self._nodemap_file = None
613 self._nodemap_file = None
613
614
614 if self.postfix is None:
615 if self.postfix is None:
615 self._datafile = b'%s.d' % self.radix
616 self._datafile = b'%s.d' % self.radix
616 else:
617 else:
617 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
618 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
618
619
619 self.nodeconstants = sha1nodeconstants
620 self.nodeconstants = sha1nodeconstants
620 self.nullid = self.nodeconstants.nullid
621 self.nullid = self.nodeconstants.nullid
621
622
622 # sparse-revlog can't be on without general-delta (issue6056)
623 # sparse-revlog can't be on without general-delta (issue6056)
623 if not self._generaldelta:
624 if not self._generaldelta:
624 self._sparserevlog = False
625 self._sparserevlog = False
625
626
626 self._storedeltachains = True
627 self._storedeltachains = True
627
628
628 devel_nodemap = (
629 devel_nodemap = (
629 self._nodemap_file
630 self._nodemap_file
630 and force_nodemap
631 and force_nodemap
631 and parse_index_v1_nodemap is not None
632 and parse_index_v1_nodemap is not None
632 )
633 )
633
634
634 use_rust_index = False
635 use_rust_index = False
635 if rustrevlog is not None:
636 if rustrevlog is not None:
636 if self._nodemap_file is not None:
637 if self._nodemap_file is not None:
637 use_rust_index = True
638 use_rust_index = True
638 else:
639 else:
639 use_rust_index = self.opener.options.get(b'rust.index')
640 use_rust_index = self.opener.options.get(b'rust.index')
640
641
641 self._parse_index = parse_index_v1
642 self._parse_index = parse_index_v1
642 if self._format_version == REVLOGV0:
643 if self._format_version == REVLOGV0:
643 self._parse_index = revlogv0.parse_index_v0
644 self._parse_index = revlogv0.parse_index_v0
644 elif self._format_version == REVLOGV2:
645 elif self._format_version == REVLOGV2:
645 self._parse_index = parse_index_v2
646 self._parse_index = parse_index_v2
646 elif devel_nodemap:
647 elif devel_nodemap:
647 self._parse_index = parse_index_v1_nodemap
648 self._parse_index = parse_index_v1_nodemap
648 elif use_rust_index:
649 elif use_rust_index:
649 self._parse_index = parse_index_v1_mixed
650 self._parse_index = parse_index_v1_mixed
650 try:
651 try:
651 d = self._parse_index(index_data, self._inline)
652 d = self._parse_index(index_data, self._inline)
652 index, _chunkcache = d
653 index, _chunkcache = d
653 use_nodemap = (
654 use_nodemap = (
654 not self._inline
655 not self._inline
655 and self._nodemap_file is not None
656 and self._nodemap_file is not None
656 and util.safehasattr(index, 'update_nodemap_data')
657 and util.safehasattr(index, 'update_nodemap_data')
657 )
658 )
658 if use_nodemap:
659 if use_nodemap:
659 nodemap_data = nodemaputil.persisted_data(self)
660 nodemap_data = nodemaputil.persisted_data(self)
660 if nodemap_data is not None:
661 if nodemap_data is not None:
661 docket = nodemap_data[0]
662 docket = nodemap_data[0]
662 if (
663 if (
663 len(d[0]) > docket.tip_rev
664 len(d[0]) > docket.tip_rev
664 and d[0][docket.tip_rev][7] == docket.tip_node
665 and d[0][docket.tip_rev][7] == docket.tip_node
665 ):
666 ):
666 # no changelog tampering
667 # no changelog tampering
667 self._nodemap_docket = docket
668 self._nodemap_docket = docket
668 index.update_nodemap_data(*nodemap_data)
669 index.update_nodemap_data(*nodemap_data)
669 except (ValueError, IndexError):
670 except (ValueError, IndexError):
670 raise error.RevlogError(
671 raise error.RevlogError(
671 _(b"index %s is corrupted") % self.display_id
672 _(b"index %s is corrupted") % self.display_id
672 )
673 )
673 self.index, self._chunkcache = d
674 self.index, self._chunkcache = d
674 if not self._chunkcache:
675 if not self._chunkcache:
675 self._chunkclear()
676 self._chunkclear()
676 # revnum -> (chain-length, sum-delta-length)
677 # revnum -> (chain-length, sum-delta-length)
677 self._chaininfocache = util.lrucachedict(500)
678 self._chaininfocache = util.lrucachedict(500)
678 # revlog header -> revlog compressor
679 # revlog header -> revlog compressor
679 self._decompressors = {}
680 self._decompressors = {}
680
681
681 @util.propertycache
682 @util.propertycache
682 def revlog_kind(self):
683 def revlog_kind(self):
683 return self.target[0]
684 return self.target[0]
684
685
685 @util.propertycache
686 @util.propertycache
686 def display_id(self):
687 def display_id(self):
687 """The public facing "ID" of the revlog that we use in message"""
688 """The public facing "ID" of the revlog that we use in message"""
688 # Maybe we should build a user facing representation of
689 # Maybe we should build a user facing representation of
689 # revlog.target instead of using `self.radix`
690 # revlog.target instead of using `self.radix`
690 return self.radix
691 return self.radix
691
692
692 def _get_decompressor(self, t):
693 def _get_decompressor(self, t):
693 try:
694 try:
694 compressor = self._decompressors[t]
695 compressor = self._decompressors[t]
695 except KeyError:
696 except KeyError:
696 try:
697 try:
697 engine = util.compengines.forrevlogheader(t)
698 engine = util.compengines.forrevlogheader(t)
698 compressor = engine.revlogcompressor(self._compengineopts)
699 compressor = engine.revlogcompressor(self._compengineopts)
699 self._decompressors[t] = compressor
700 self._decompressors[t] = compressor
700 except KeyError:
701 except KeyError:
701 raise error.RevlogError(
702 raise error.RevlogError(
702 _(b'unknown compression type %s') % binascii.hexlify(t)
703 _(b'unknown compression type %s') % binascii.hexlify(t)
703 )
704 )
704 return compressor
705 return compressor
705
706
706 @util.propertycache
707 @util.propertycache
707 def _compressor(self):
708 def _compressor(self):
708 engine = util.compengines[self._compengine]
709 engine = util.compengines[self._compengine]
709 return engine.revlogcompressor(self._compengineopts)
710 return engine.revlogcompressor(self._compengineopts)
710
711
712 @util.propertycache
713 def _decompressor(self):
714 """the default decompressor"""
715 if self._docket is None:
716 return None
717 t = self._docket.default_compression_header
718 c = self._get_decompressor(t)
719 return c.decompress
720
711 def _indexfp(self):
721 def _indexfp(self):
712 """file object for the revlog's index file"""
722 """file object for the revlog's index file"""
713 return self.opener(self._indexfile, mode=b"r")
723 return self.opener(self._indexfile, mode=b"r")
714
724
715 def __index_write_fp(self):
725 def __index_write_fp(self):
716 # You should not use this directly and use `_writing` instead
726 # You should not use this directly and use `_writing` instead
717 try:
727 try:
718 f = self.opener(
728 f = self.opener(
719 self._indexfile, mode=b"r+", checkambig=self._checkambig
729 self._indexfile, mode=b"r+", checkambig=self._checkambig
720 )
730 )
721 if self._docket is None:
731 if self._docket is None:
722 f.seek(0, os.SEEK_END)
732 f.seek(0, os.SEEK_END)
723 else:
733 else:
724 f.seek(self._docket.index_end, os.SEEK_SET)
734 f.seek(self._docket.index_end, os.SEEK_SET)
725 return f
735 return f
726 except IOError as inst:
736 except IOError as inst:
727 if inst.errno != errno.ENOENT:
737 if inst.errno != errno.ENOENT:
728 raise
738 raise
729 return self.opener(
739 return self.opener(
730 self._indexfile, mode=b"w+", checkambig=self._checkambig
740 self._indexfile, mode=b"w+", checkambig=self._checkambig
731 )
741 )
732
742
733 def __index_new_fp(self):
743 def __index_new_fp(self):
734 # You should not use this unless you are upgrading from inline revlog
744 # You should not use this unless you are upgrading from inline revlog
735 return self.opener(
745 return self.opener(
736 self._indexfile,
746 self._indexfile,
737 mode=b"w",
747 mode=b"w",
738 checkambig=self._checkambig,
748 checkambig=self._checkambig,
739 atomictemp=True,
749 atomictemp=True,
740 )
750 )
741
751
742 def _datafp(self, mode=b'r'):
752 def _datafp(self, mode=b'r'):
743 """file object for the revlog's data file"""
753 """file object for the revlog's data file"""
744 return self.opener(self._datafile, mode=mode)
754 return self.opener(self._datafile, mode=mode)
745
755
746 @contextlib.contextmanager
756 @contextlib.contextmanager
747 def _datareadfp(self, existingfp=None):
757 def _datareadfp(self, existingfp=None):
748 """file object suitable to read data"""
758 """file object suitable to read data"""
749 # Use explicit file handle, if given.
759 # Use explicit file handle, if given.
750 if existingfp is not None:
760 if existingfp is not None:
751 yield existingfp
761 yield existingfp
752
762
753 # Use a file handle being actively used for writes, if available.
763 # Use a file handle being actively used for writes, if available.
754 # There is some danger to doing this because reads will seek the
764 # There is some danger to doing this because reads will seek the
755 # file. However, _writeentry() performs a SEEK_END before all writes,
765 # file. However, _writeentry() performs a SEEK_END before all writes,
756 # so we should be safe.
766 # so we should be safe.
757 elif self._writinghandles:
767 elif self._writinghandles:
758 if self._inline:
768 if self._inline:
759 yield self._writinghandles[0]
769 yield self._writinghandles[0]
760 else:
770 else:
761 yield self._writinghandles[1]
771 yield self._writinghandles[1]
762
772
763 # Otherwise open a new file handle.
773 # Otherwise open a new file handle.
764 else:
774 else:
765 if self._inline:
775 if self._inline:
766 func = self._indexfp
776 func = self._indexfp
767 else:
777 else:
768 func = self._datafp
778 func = self._datafp
769 with func() as fp:
779 with func() as fp:
770 yield fp
780 yield fp
771
781
772 def tiprev(self):
782 def tiprev(self):
773 return len(self.index) - 1
783 return len(self.index) - 1
774
784
775 def tip(self):
785 def tip(self):
776 return self.node(self.tiprev())
786 return self.node(self.tiprev())
777
787
778 def __contains__(self, rev):
788 def __contains__(self, rev):
779 return 0 <= rev < len(self)
789 return 0 <= rev < len(self)
780
790
781 def __len__(self):
791 def __len__(self):
782 return len(self.index)
792 return len(self.index)
783
793
784 def __iter__(self):
794 def __iter__(self):
785 return iter(pycompat.xrange(len(self)))
795 return iter(pycompat.xrange(len(self)))
786
796
787 def revs(self, start=0, stop=None):
797 def revs(self, start=0, stop=None):
788 """iterate over all rev in this revlog (from start to stop)"""
798 """iterate over all rev in this revlog (from start to stop)"""
789 return storageutil.iterrevs(len(self), start=start, stop=stop)
799 return storageutil.iterrevs(len(self), start=start, stop=stop)
790
800
791 @property
801 @property
792 def nodemap(self):
802 def nodemap(self):
793 msg = (
803 msg = (
794 b"revlog.nodemap is deprecated, "
804 b"revlog.nodemap is deprecated, "
795 b"use revlog.index.[has_node|rev|get_rev]"
805 b"use revlog.index.[has_node|rev|get_rev]"
796 )
806 )
797 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
807 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
798 return self.index.nodemap
808 return self.index.nodemap
799
809
800 @property
810 @property
801 def _nodecache(self):
811 def _nodecache(self):
802 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
812 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
803 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
813 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
804 return self.index.nodemap
814 return self.index.nodemap
805
815
806 def hasnode(self, node):
816 def hasnode(self, node):
807 try:
817 try:
808 self.rev(node)
818 self.rev(node)
809 return True
819 return True
810 except KeyError:
820 except KeyError:
811 return False
821 return False
812
822
813 def candelta(self, baserev, rev):
823 def candelta(self, baserev, rev):
814 """whether two revisions (baserev, rev) can be delta-ed or not"""
824 """whether two revisions (baserev, rev) can be delta-ed or not"""
815 # Disable delta if either rev requires a content-changing flag
825 # Disable delta if either rev requires a content-changing flag
816 # processor (ex. LFS). This is because such flag processor can alter
826 # processor (ex. LFS). This is because such flag processor can alter
817 # the rawtext content that the delta will be based on, and two clients
827 # the rawtext content that the delta will be based on, and two clients
818 # could have a same revlog node with different flags (i.e. different
828 # could have a same revlog node with different flags (i.e. different
819 # rawtext contents) and the delta could be incompatible.
829 # rawtext contents) and the delta could be incompatible.
820 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
830 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
821 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
831 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
822 ):
832 ):
823 return False
833 return False
824 return True
834 return True
825
835
826 def update_caches(self, transaction):
836 def update_caches(self, transaction):
827 if self._nodemap_file is not None:
837 if self._nodemap_file is not None:
828 if transaction is None:
838 if transaction is None:
829 nodemaputil.update_persistent_nodemap(self)
839 nodemaputil.update_persistent_nodemap(self)
830 else:
840 else:
831 nodemaputil.setup_persistent_nodemap(transaction, self)
841 nodemaputil.setup_persistent_nodemap(transaction, self)
832
842
833 def clearcaches(self):
843 def clearcaches(self):
834 self._revisioncache = None
844 self._revisioncache = None
835 self._chainbasecache.clear()
845 self._chainbasecache.clear()
836 self._chunkcache = (0, b'')
846 self._chunkcache = (0, b'')
837 self._pcache = {}
847 self._pcache = {}
838 self._nodemap_docket = None
848 self._nodemap_docket = None
839 self.index.clearcaches()
849 self.index.clearcaches()
840 # The python code is the one responsible for validating the docket, we
850 # The python code is the one responsible for validating the docket, we
841 # end up having to refresh it here.
851 # end up having to refresh it here.
842 use_nodemap = (
852 use_nodemap = (
843 not self._inline
853 not self._inline
844 and self._nodemap_file is not None
854 and self._nodemap_file is not None
845 and util.safehasattr(self.index, 'update_nodemap_data')
855 and util.safehasattr(self.index, 'update_nodemap_data')
846 )
856 )
847 if use_nodemap:
857 if use_nodemap:
848 nodemap_data = nodemaputil.persisted_data(self)
858 nodemap_data = nodemaputil.persisted_data(self)
849 if nodemap_data is not None:
859 if nodemap_data is not None:
850 self._nodemap_docket = nodemap_data[0]
860 self._nodemap_docket = nodemap_data[0]
851 self.index.update_nodemap_data(*nodemap_data)
861 self.index.update_nodemap_data(*nodemap_data)
852
862
853 def rev(self, node):
863 def rev(self, node):
854 try:
864 try:
855 return self.index.rev(node)
865 return self.index.rev(node)
856 except TypeError:
866 except TypeError:
857 raise
867 raise
858 except error.RevlogError:
868 except error.RevlogError:
859 # parsers.c radix tree lookup failed
869 # parsers.c radix tree lookup failed
860 if (
870 if (
861 node == self.nodeconstants.wdirid
871 node == self.nodeconstants.wdirid
862 or node in self.nodeconstants.wdirfilenodeids
872 or node in self.nodeconstants.wdirfilenodeids
863 ):
873 ):
864 raise error.WdirUnsupported
874 raise error.WdirUnsupported
865 raise error.LookupError(node, self.display_id, _(b'no node'))
875 raise error.LookupError(node, self.display_id, _(b'no node'))
866
876
867 # Accessors for index entries.
877 # Accessors for index entries.
868
878
869 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
879 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
870 # are flags.
880 # are flags.
871 def start(self, rev):
881 def start(self, rev):
872 return int(self.index[rev][0] >> 16)
882 return int(self.index[rev][0] >> 16)
873
883
874 def flags(self, rev):
884 def flags(self, rev):
875 return self.index[rev][0] & 0xFFFF
885 return self.index[rev][0] & 0xFFFF
876
886
877 def length(self, rev):
887 def length(self, rev):
878 return self.index[rev][1]
888 return self.index[rev][1]
879
889
880 def sidedata_length(self, rev):
890 def sidedata_length(self, rev):
881 if not self.hassidedata:
891 if not self.hassidedata:
882 return 0
892 return 0
883 return self.index[rev][9]
893 return self.index[rev][9]
884
894
885 def rawsize(self, rev):
895 def rawsize(self, rev):
886 """return the length of the uncompressed text for a given revision"""
896 """return the length of the uncompressed text for a given revision"""
887 l = self.index[rev][2]
897 l = self.index[rev][2]
888 if l >= 0:
898 if l >= 0:
889 return l
899 return l
890
900
891 t = self.rawdata(rev)
901 t = self.rawdata(rev)
892 return len(t)
902 return len(t)
893
903
894 def size(self, rev):
904 def size(self, rev):
895 """length of non-raw text (processed by a "read" flag processor)"""
905 """length of non-raw text (processed by a "read" flag processor)"""
896 # fast path: if no "read" flag processor could change the content,
906 # fast path: if no "read" flag processor could change the content,
897 # size is rawsize. note: ELLIPSIS is known to not change the content.
907 # size is rawsize. note: ELLIPSIS is known to not change the content.
898 flags = self.flags(rev)
908 flags = self.flags(rev)
899 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
909 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
900 return self.rawsize(rev)
910 return self.rawsize(rev)
901
911
902 return len(self.revision(rev, raw=False))
912 return len(self.revision(rev, raw=False))
903
913
904 def chainbase(self, rev):
914 def chainbase(self, rev):
905 base = self._chainbasecache.get(rev)
915 base = self._chainbasecache.get(rev)
906 if base is not None:
916 if base is not None:
907 return base
917 return base
908
918
909 index = self.index
919 index = self.index
910 iterrev = rev
920 iterrev = rev
911 base = index[iterrev][3]
921 base = index[iterrev][3]
912 while base != iterrev:
922 while base != iterrev:
913 iterrev = base
923 iterrev = base
914 base = index[iterrev][3]
924 base = index[iterrev][3]
915
925
916 self._chainbasecache[rev] = base
926 self._chainbasecache[rev] = base
917 return base
927 return base
918
928
919 def linkrev(self, rev):
929 def linkrev(self, rev):
920 return self.index[rev][4]
930 return self.index[rev][4]
921
931
922 def parentrevs(self, rev):
932 def parentrevs(self, rev):
923 try:
933 try:
924 entry = self.index[rev]
934 entry = self.index[rev]
925 except IndexError:
935 except IndexError:
926 if rev == wdirrev:
936 if rev == wdirrev:
927 raise error.WdirUnsupported
937 raise error.WdirUnsupported
928 raise
938 raise
929 if entry[5] == nullrev:
939 if entry[5] == nullrev:
930 return entry[6], entry[5]
940 return entry[6], entry[5]
931 else:
941 else:
932 return entry[5], entry[6]
942 return entry[5], entry[6]
933
943
934 # fast parentrevs(rev) where rev isn't filtered
944 # fast parentrevs(rev) where rev isn't filtered
935 _uncheckedparentrevs = parentrevs
945 _uncheckedparentrevs = parentrevs
936
946
937 def node(self, rev):
947 def node(self, rev):
938 try:
948 try:
939 return self.index[rev][7]
949 return self.index[rev][7]
940 except IndexError:
950 except IndexError:
941 if rev == wdirrev:
951 if rev == wdirrev:
942 raise error.WdirUnsupported
952 raise error.WdirUnsupported
943 raise
953 raise
944
954
945 # Derived from index values.
955 # Derived from index values.
946
956
947 def end(self, rev):
957 def end(self, rev):
948 return self.start(rev) + self.length(rev)
958 return self.start(rev) + self.length(rev)
949
959
950 def parents(self, node):
960 def parents(self, node):
951 i = self.index
961 i = self.index
952 d = i[self.rev(node)]
962 d = i[self.rev(node)]
953 # inline node() to avoid function call overhead
963 # inline node() to avoid function call overhead
954 if d[5] == self.nullid:
964 if d[5] == self.nullid:
955 return i[d[6]][7], i[d[5]][7]
965 return i[d[6]][7], i[d[5]][7]
956 else:
966 else:
957 return i[d[5]][7], i[d[6]][7]
967 return i[d[5]][7], i[d[6]][7]
958
968
959 def chainlen(self, rev):
969 def chainlen(self, rev):
960 return self._chaininfo(rev)[0]
970 return self._chaininfo(rev)[0]
961
971
962 def _chaininfo(self, rev):
972 def _chaininfo(self, rev):
963 chaininfocache = self._chaininfocache
973 chaininfocache = self._chaininfocache
964 if rev in chaininfocache:
974 if rev in chaininfocache:
965 return chaininfocache[rev]
975 return chaininfocache[rev]
966 index = self.index
976 index = self.index
967 generaldelta = self._generaldelta
977 generaldelta = self._generaldelta
968 iterrev = rev
978 iterrev = rev
969 e = index[iterrev]
979 e = index[iterrev]
970 clen = 0
980 clen = 0
971 compresseddeltalen = 0
981 compresseddeltalen = 0
972 while iterrev != e[3]:
982 while iterrev != e[3]:
973 clen += 1
983 clen += 1
974 compresseddeltalen += e[1]
984 compresseddeltalen += e[1]
975 if generaldelta:
985 if generaldelta:
976 iterrev = e[3]
986 iterrev = e[3]
977 else:
987 else:
978 iterrev -= 1
988 iterrev -= 1
979 if iterrev in chaininfocache:
989 if iterrev in chaininfocache:
980 t = chaininfocache[iterrev]
990 t = chaininfocache[iterrev]
981 clen += t[0]
991 clen += t[0]
982 compresseddeltalen += t[1]
992 compresseddeltalen += t[1]
983 break
993 break
984 e = index[iterrev]
994 e = index[iterrev]
985 else:
995 else:
986 # Add text length of base since decompressing that also takes
996 # Add text length of base since decompressing that also takes
987 # work. For cache hits the length is already included.
997 # work. For cache hits the length is already included.
988 compresseddeltalen += e[1]
998 compresseddeltalen += e[1]
989 r = (clen, compresseddeltalen)
999 r = (clen, compresseddeltalen)
990 chaininfocache[rev] = r
1000 chaininfocache[rev] = r
991 return r
1001 return r
992
1002
993 def _deltachain(self, rev, stoprev=None):
1003 def _deltachain(self, rev, stoprev=None):
994 """Obtain the delta chain for a revision.
1004 """Obtain the delta chain for a revision.
995
1005
996 ``stoprev`` specifies a revision to stop at. If not specified, we
1006 ``stoprev`` specifies a revision to stop at. If not specified, we
997 stop at the base of the chain.
1007 stop at the base of the chain.
998
1008
999 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1009 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1000 revs in ascending order and ``stopped`` is a bool indicating whether
1010 revs in ascending order and ``stopped`` is a bool indicating whether
1001 ``stoprev`` was hit.
1011 ``stoprev`` was hit.
1002 """
1012 """
1003 # Try C implementation.
1013 # Try C implementation.
1004 try:
1014 try:
1005 return self.index.deltachain(rev, stoprev, self._generaldelta)
1015 return self.index.deltachain(rev, stoprev, self._generaldelta)
1006 except AttributeError:
1016 except AttributeError:
1007 pass
1017 pass
1008
1018
1009 chain = []
1019 chain = []
1010
1020
1011 # Alias to prevent attribute lookup in tight loop.
1021 # Alias to prevent attribute lookup in tight loop.
1012 index = self.index
1022 index = self.index
1013 generaldelta = self._generaldelta
1023 generaldelta = self._generaldelta
1014
1024
1015 iterrev = rev
1025 iterrev = rev
1016 e = index[iterrev]
1026 e = index[iterrev]
1017 while iterrev != e[3] and iterrev != stoprev:
1027 while iterrev != e[3] and iterrev != stoprev:
1018 chain.append(iterrev)
1028 chain.append(iterrev)
1019 if generaldelta:
1029 if generaldelta:
1020 iterrev = e[3]
1030 iterrev = e[3]
1021 else:
1031 else:
1022 iterrev -= 1
1032 iterrev -= 1
1023 e = index[iterrev]
1033 e = index[iterrev]
1024
1034
1025 if iterrev == stoprev:
1035 if iterrev == stoprev:
1026 stopped = True
1036 stopped = True
1027 else:
1037 else:
1028 chain.append(iterrev)
1038 chain.append(iterrev)
1029 stopped = False
1039 stopped = False
1030
1040
1031 chain.reverse()
1041 chain.reverse()
1032 return chain, stopped
1042 return chain, stopped
1033
1043
1034 def ancestors(self, revs, stoprev=0, inclusive=False):
1044 def ancestors(self, revs, stoprev=0, inclusive=False):
1035 """Generate the ancestors of 'revs' in reverse revision order.
1045 """Generate the ancestors of 'revs' in reverse revision order.
1036 Does not generate revs lower than stoprev.
1046 Does not generate revs lower than stoprev.
1037
1047
1038 See the documentation for ancestor.lazyancestors for more details."""
1048 See the documentation for ancestor.lazyancestors for more details."""
1039
1049
1040 # first, make sure start revisions aren't filtered
1050 # first, make sure start revisions aren't filtered
1041 revs = list(revs)
1051 revs = list(revs)
1042 checkrev = self.node
1052 checkrev = self.node
1043 for r in revs:
1053 for r in revs:
1044 checkrev(r)
1054 checkrev(r)
1045 # and we're sure ancestors aren't filtered as well
1055 # and we're sure ancestors aren't filtered as well
1046
1056
1047 if rustancestor is not None:
1057 if rustancestor is not None:
1048 lazyancestors = rustancestor.LazyAncestors
1058 lazyancestors = rustancestor.LazyAncestors
1049 arg = self.index
1059 arg = self.index
1050 else:
1060 else:
1051 lazyancestors = ancestor.lazyancestors
1061 lazyancestors = ancestor.lazyancestors
1052 arg = self._uncheckedparentrevs
1062 arg = self._uncheckedparentrevs
1053 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1063 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1054
1064
1055 def descendants(self, revs):
1065 def descendants(self, revs):
1056 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1066 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1057
1067
1058 def findcommonmissing(self, common=None, heads=None):
1068 def findcommonmissing(self, common=None, heads=None):
1059 """Return a tuple of the ancestors of common and the ancestors of heads
1069 """Return a tuple of the ancestors of common and the ancestors of heads
1060 that are not ancestors of common. In revset terminology, we return the
1070 that are not ancestors of common. In revset terminology, we return the
1061 tuple:
1071 tuple:
1062
1072
1063 ::common, (::heads) - (::common)
1073 ::common, (::heads) - (::common)
1064
1074
1065 The list is sorted by revision number, meaning it is
1075 The list is sorted by revision number, meaning it is
1066 topologically sorted.
1076 topologically sorted.
1067
1077
1068 'heads' and 'common' are both lists of node IDs. If heads is
1078 'heads' and 'common' are both lists of node IDs. If heads is
1069 not supplied, uses all of the revlog's heads. If common is not
1079 not supplied, uses all of the revlog's heads. If common is not
1070 supplied, uses nullid."""
1080 supplied, uses nullid."""
1071 if common is None:
1081 if common is None:
1072 common = [self.nullid]
1082 common = [self.nullid]
1073 if heads is None:
1083 if heads is None:
1074 heads = self.heads()
1084 heads = self.heads()
1075
1085
1076 common = [self.rev(n) for n in common]
1086 common = [self.rev(n) for n in common]
1077 heads = [self.rev(n) for n in heads]
1087 heads = [self.rev(n) for n in heads]
1078
1088
1079 # we want the ancestors, but inclusive
1089 # we want the ancestors, but inclusive
1080 class lazyset(object):
1090 class lazyset(object):
1081 def __init__(self, lazyvalues):
1091 def __init__(self, lazyvalues):
1082 self.addedvalues = set()
1092 self.addedvalues = set()
1083 self.lazyvalues = lazyvalues
1093 self.lazyvalues = lazyvalues
1084
1094
1085 def __contains__(self, value):
1095 def __contains__(self, value):
1086 return value in self.addedvalues or value in self.lazyvalues
1096 return value in self.addedvalues or value in self.lazyvalues
1087
1097
1088 def __iter__(self):
1098 def __iter__(self):
1089 added = self.addedvalues
1099 added = self.addedvalues
1090 for r in added:
1100 for r in added:
1091 yield r
1101 yield r
1092 for r in self.lazyvalues:
1102 for r in self.lazyvalues:
1093 if not r in added:
1103 if not r in added:
1094 yield r
1104 yield r
1095
1105
1096 def add(self, value):
1106 def add(self, value):
1097 self.addedvalues.add(value)
1107 self.addedvalues.add(value)
1098
1108
1099 def update(self, values):
1109 def update(self, values):
1100 self.addedvalues.update(values)
1110 self.addedvalues.update(values)
1101
1111
1102 has = lazyset(self.ancestors(common))
1112 has = lazyset(self.ancestors(common))
1103 has.add(nullrev)
1113 has.add(nullrev)
1104 has.update(common)
1114 has.update(common)
1105
1115
1106 # take all ancestors from heads that aren't in has
1116 # take all ancestors from heads that aren't in has
1107 missing = set()
1117 missing = set()
1108 visit = collections.deque(r for r in heads if r not in has)
1118 visit = collections.deque(r for r in heads if r not in has)
1109 while visit:
1119 while visit:
1110 r = visit.popleft()
1120 r = visit.popleft()
1111 if r in missing:
1121 if r in missing:
1112 continue
1122 continue
1113 else:
1123 else:
1114 missing.add(r)
1124 missing.add(r)
1115 for p in self.parentrevs(r):
1125 for p in self.parentrevs(r):
1116 if p not in has:
1126 if p not in has:
1117 visit.append(p)
1127 visit.append(p)
1118 missing = list(missing)
1128 missing = list(missing)
1119 missing.sort()
1129 missing.sort()
1120 return has, [self.node(miss) for miss in missing]
1130 return has, [self.node(miss) for miss in missing]
1121
1131
1122 def incrementalmissingrevs(self, common=None):
1132 def incrementalmissingrevs(self, common=None):
1123 """Return an object that can be used to incrementally compute the
1133 """Return an object that can be used to incrementally compute the
1124 revision numbers of the ancestors of arbitrary sets that are not
1134 revision numbers of the ancestors of arbitrary sets that are not
1125 ancestors of common. This is an ancestor.incrementalmissingancestors
1135 ancestors of common. This is an ancestor.incrementalmissingancestors
1126 object.
1136 object.
1127
1137
1128 'common' is a list of revision numbers. If common is not supplied, uses
1138 'common' is a list of revision numbers. If common is not supplied, uses
1129 nullrev.
1139 nullrev.
1130 """
1140 """
1131 if common is None:
1141 if common is None:
1132 common = [nullrev]
1142 common = [nullrev]
1133
1143
1134 if rustancestor is not None:
1144 if rustancestor is not None:
1135 return rustancestor.MissingAncestors(self.index, common)
1145 return rustancestor.MissingAncestors(self.index, common)
1136 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1146 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1137
1147
1138 def findmissingrevs(self, common=None, heads=None):
1148 def findmissingrevs(self, common=None, heads=None):
1139 """Return the revision numbers of the ancestors of heads that
1149 """Return the revision numbers of the ancestors of heads that
1140 are not ancestors of common.
1150 are not ancestors of common.
1141
1151
1142 More specifically, return a list of revision numbers corresponding to
1152 More specifically, return a list of revision numbers corresponding to
1143 nodes N such that every N satisfies the following constraints:
1153 nodes N such that every N satisfies the following constraints:
1144
1154
1145 1. N is an ancestor of some node in 'heads'
1155 1. N is an ancestor of some node in 'heads'
1146 2. N is not an ancestor of any node in 'common'
1156 2. N is not an ancestor of any node in 'common'
1147
1157
1148 The list is sorted by revision number, meaning it is
1158 The list is sorted by revision number, meaning it is
1149 topologically sorted.
1159 topologically sorted.
1150
1160
1151 'heads' and 'common' are both lists of revision numbers. If heads is
1161 'heads' and 'common' are both lists of revision numbers. If heads is
1152 not supplied, uses all of the revlog's heads. If common is not
1162 not supplied, uses all of the revlog's heads. If common is not
1153 supplied, uses nullid."""
1163 supplied, uses nullid."""
1154 if common is None:
1164 if common is None:
1155 common = [nullrev]
1165 common = [nullrev]
1156 if heads is None:
1166 if heads is None:
1157 heads = self.headrevs()
1167 heads = self.headrevs()
1158
1168
1159 inc = self.incrementalmissingrevs(common=common)
1169 inc = self.incrementalmissingrevs(common=common)
1160 return inc.missingancestors(heads)
1170 return inc.missingancestors(heads)
1161
1171
1162 def findmissing(self, common=None, heads=None):
1172 def findmissing(self, common=None, heads=None):
1163 """Return the ancestors of heads that are not ancestors of common.
1173 """Return the ancestors of heads that are not ancestors of common.
1164
1174
1165 More specifically, return a list of nodes N such that every N
1175 More specifically, return a list of nodes N such that every N
1166 satisfies the following constraints:
1176 satisfies the following constraints:
1167
1177
1168 1. N is an ancestor of some node in 'heads'
1178 1. N is an ancestor of some node in 'heads'
1169 2. N is not an ancestor of any node in 'common'
1179 2. N is not an ancestor of any node in 'common'
1170
1180
1171 The list is sorted by revision number, meaning it is
1181 The list is sorted by revision number, meaning it is
1172 topologically sorted.
1182 topologically sorted.
1173
1183
1174 'heads' and 'common' are both lists of node IDs. If heads is
1184 'heads' and 'common' are both lists of node IDs. If heads is
1175 not supplied, uses all of the revlog's heads. If common is not
1185 not supplied, uses all of the revlog's heads. If common is not
1176 supplied, uses nullid."""
1186 supplied, uses nullid."""
1177 if common is None:
1187 if common is None:
1178 common = [self.nullid]
1188 common = [self.nullid]
1179 if heads is None:
1189 if heads is None:
1180 heads = self.heads()
1190 heads = self.heads()
1181
1191
1182 common = [self.rev(n) for n in common]
1192 common = [self.rev(n) for n in common]
1183 heads = [self.rev(n) for n in heads]
1193 heads = [self.rev(n) for n in heads]
1184
1194
1185 inc = self.incrementalmissingrevs(common=common)
1195 inc = self.incrementalmissingrevs(common=common)
1186 return [self.node(r) for r in inc.missingancestors(heads)]
1196 return [self.node(r) for r in inc.missingancestors(heads)]
1187
1197
1188 def nodesbetween(self, roots=None, heads=None):
1198 def nodesbetween(self, roots=None, heads=None):
1189 """Return a topological path from 'roots' to 'heads'.
1199 """Return a topological path from 'roots' to 'heads'.
1190
1200
1191 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1201 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1192 topologically sorted list of all nodes N that satisfy both of
1202 topologically sorted list of all nodes N that satisfy both of
1193 these constraints:
1203 these constraints:
1194
1204
1195 1. N is a descendant of some node in 'roots'
1205 1. N is a descendant of some node in 'roots'
1196 2. N is an ancestor of some node in 'heads'
1206 2. N is an ancestor of some node in 'heads'
1197
1207
1198 Every node is considered to be both a descendant and an ancestor
1208 Every node is considered to be both a descendant and an ancestor
1199 of itself, so every reachable node in 'roots' and 'heads' will be
1209 of itself, so every reachable node in 'roots' and 'heads' will be
1200 included in 'nodes'.
1210 included in 'nodes'.
1201
1211
1202 'outroots' is the list of reachable nodes in 'roots', i.e., the
1212 'outroots' is the list of reachable nodes in 'roots', i.e., the
1203 subset of 'roots' that is returned in 'nodes'. Likewise,
1213 subset of 'roots' that is returned in 'nodes'. Likewise,
1204 'outheads' is the subset of 'heads' that is also in 'nodes'.
1214 'outheads' is the subset of 'heads' that is also in 'nodes'.
1205
1215
1206 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1216 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1207 unspecified, uses nullid as the only root. If 'heads' is
1217 unspecified, uses nullid as the only root. If 'heads' is
1208 unspecified, uses list of all of the revlog's heads."""
1218 unspecified, uses list of all of the revlog's heads."""
1209 nonodes = ([], [], [])
1219 nonodes = ([], [], [])
1210 if roots is not None:
1220 if roots is not None:
1211 roots = list(roots)
1221 roots = list(roots)
1212 if not roots:
1222 if not roots:
1213 return nonodes
1223 return nonodes
1214 lowestrev = min([self.rev(n) for n in roots])
1224 lowestrev = min([self.rev(n) for n in roots])
1215 else:
1225 else:
1216 roots = [self.nullid] # Everybody's a descendant of nullid
1226 roots = [self.nullid] # Everybody's a descendant of nullid
1217 lowestrev = nullrev
1227 lowestrev = nullrev
1218 if (lowestrev == nullrev) and (heads is None):
1228 if (lowestrev == nullrev) and (heads is None):
1219 # We want _all_ the nodes!
1229 # We want _all_ the nodes!
1220 return (
1230 return (
1221 [self.node(r) for r in self],
1231 [self.node(r) for r in self],
1222 [self.nullid],
1232 [self.nullid],
1223 list(self.heads()),
1233 list(self.heads()),
1224 )
1234 )
1225 if heads is None:
1235 if heads is None:
1226 # All nodes are ancestors, so the latest ancestor is the last
1236 # All nodes are ancestors, so the latest ancestor is the last
1227 # node.
1237 # node.
1228 highestrev = len(self) - 1
1238 highestrev = len(self) - 1
1229 # Set ancestors to None to signal that every node is an ancestor.
1239 # Set ancestors to None to signal that every node is an ancestor.
1230 ancestors = None
1240 ancestors = None
1231 # Set heads to an empty dictionary for later discovery of heads
1241 # Set heads to an empty dictionary for later discovery of heads
1232 heads = {}
1242 heads = {}
1233 else:
1243 else:
1234 heads = list(heads)
1244 heads = list(heads)
1235 if not heads:
1245 if not heads:
1236 return nonodes
1246 return nonodes
1237 ancestors = set()
1247 ancestors = set()
1238 # Turn heads into a dictionary so we can remove 'fake' heads.
1248 # Turn heads into a dictionary so we can remove 'fake' heads.
1239 # Also, later we will be using it to filter out the heads we can't
1249 # Also, later we will be using it to filter out the heads we can't
1240 # find from roots.
1250 # find from roots.
1241 heads = dict.fromkeys(heads, False)
1251 heads = dict.fromkeys(heads, False)
1242 # Start at the top and keep marking parents until we're done.
1252 # Start at the top and keep marking parents until we're done.
1243 nodestotag = set(heads)
1253 nodestotag = set(heads)
1244 # Remember where the top was so we can use it as a limit later.
1254 # Remember where the top was so we can use it as a limit later.
1245 highestrev = max([self.rev(n) for n in nodestotag])
1255 highestrev = max([self.rev(n) for n in nodestotag])
1246 while nodestotag:
1256 while nodestotag:
1247 # grab a node to tag
1257 # grab a node to tag
1248 n = nodestotag.pop()
1258 n = nodestotag.pop()
1249 # Never tag nullid
1259 # Never tag nullid
1250 if n == self.nullid:
1260 if n == self.nullid:
1251 continue
1261 continue
1252 # A node's revision number represents its place in a
1262 # A node's revision number represents its place in a
1253 # topologically sorted list of nodes.
1263 # topologically sorted list of nodes.
1254 r = self.rev(n)
1264 r = self.rev(n)
1255 if r >= lowestrev:
1265 if r >= lowestrev:
1256 if n not in ancestors:
1266 if n not in ancestors:
1257 # If we are possibly a descendant of one of the roots
1267 # If we are possibly a descendant of one of the roots
1258 # and we haven't already been marked as an ancestor
1268 # and we haven't already been marked as an ancestor
1259 ancestors.add(n) # Mark as ancestor
1269 ancestors.add(n) # Mark as ancestor
1260 # Add non-nullid parents to list of nodes to tag.
1270 # Add non-nullid parents to list of nodes to tag.
1261 nodestotag.update(
1271 nodestotag.update(
1262 [p for p in self.parents(n) if p != self.nullid]
1272 [p for p in self.parents(n) if p != self.nullid]
1263 )
1273 )
1264 elif n in heads: # We've seen it before, is it a fake head?
1274 elif n in heads: # We've seen it before, is it a fake head?
1265 # So it is, real heads should not be the ancestors of
1275 # So it is, real heads should not be the ancestors of
1266 # any other heads.
1276 # any other heads.
1267 heads.pop(n)
1277 heads.pop(n)
1268 if not ancestors:
1278 if not ancestors:
1269 return nonodes
1279 return nonodes
1270 # Now that we have our set of ancestors, we want to remove any
1280 # Now that we have our set of ancestors, we want to remove any
1271 # roots that are not ancestors.
1281 # roots that are not ancestors.
1272
1282
1273 # If one of the roots was nullid, everything is included anyway.
1283 # If one of the roots was nullid, everything is included anyway.
1274 if lowestrev > nullrev:
1284 if lowestrev > nullrev:
1275 # But, since we weren't, let's recompute the lowest rev to not
1285 # But, since we weren't, let's recompute the lowest rev to not
1276 # include roots that aren't ancestors.
1286 # include roots that aren't ancestors.
1277
1287
1278 # Filter out roots that aren't ancestors of heads
1288 # Filter out roots that aren't ancestors of heads
1279 roots = [root for root in roots if root in ancestors]
1289 roots = [root for root in roots if root in ancestors]
1280 # Recompute the lowest revision
1290 # Recompute the lowest revision
1281 if roots:
1291 if roots:
1282 lowestrev = min([self.rev(root) for root in roots])
1292 lowestrev = min([self.rev(root) for root in roots])
1283 else:
1293 else:
1284 # No more roots? Return empty list
1294 # No more roots? Return empty list
1285 return nonodes
1295 return nonodes
1286 else:
1296 else:
1287 # We are descending from nullid, and don't need to care about
1297 # We are descending from nullid, and don't need to care about
1288 # any other roots.
1298 # any other roots.
1289 lowestrev = nullrev
1299 lowestrev = nullrev
1290 roots = [self.nullid]
1300 roots = [self.nullid]
1291 # Transform our roots list into a set.
1301 # Transform our roots list into a set.
1292 descendants = set(roots)
1302 descendants = set(roots)
1293 # Also, keep the original roots so we can filter out roots that aren't
1303 # Also, keep the original roots so we can filter out roots that aren't
1294 # 'real' roots (i.e. are descended from other roots).
1304 # 'real' roots (i.e. are descended from other roots).
1295 roots = descendants.copy()
1305 roots = descendants.copy()
1296 # Our topologically sorted list of output nodes.
1306 # Our topologically sorted list of output nodes.
1297 orderedout = []
1307 orderedout = []
1298 # Don't start at nullid since we don't want nullid in our output list,
1308 # Don't start at nullid since we don't want nullid in our output list,
1299 # and if nullid shows up in descendants, empty parents will look like
1309 # and if nullid shows up in descendants, empty parents will look like
1300 # they're descendants.
1310 # they're descendants.
1301 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1311 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1302 n = self.node(r)
1312 n = self.node(r)
1303 isdescendant = False
1313 isdescendant = False
1304 if lowestrev == nullrev: # Everybody is a descendant of nullid
1314 if lowestrev == nullrev: # Everybody is a descendant of nullid
1305 isdescendant = True
1315 isdescendant = True
1306 elif n in descendants:
1316 elif n in descendants:
1307 # n is already a descendant
1317 # n is already a descendant
1308 isdescendant = True
1318 isdescendant = True
1309 # This check only needs to be done here because all the roots
1319 # This check only needs to be done here because all the roots
1310 # will start being marked is descendants before the loop.
1320 # will start being marked is descendants before the loop.
1311 if n in roots:
1321 if n in roots:
1312 # If n was a root, check if it's a 'real' root.
1322 # If n was a root, check if it's a 'real' root.
1313 p = tuple(self.parents(n))
1323 p = tuple(self.parents(n))
1314 # If any of its parents are descendants, it's not a root.
1324 # If any of its parents are descendants, it's not a root.
1315 if (p[0] in descendants) or (p[1] in descendants):
1325 if (p[0] in descendants) or (p[1] in descendants):
1316 roots.remove(n)
1326 roots.remove(n)
1317 else:
1327 else:
1318 p = tuple(self.parents(n))
1328 p = tuple(self.parents(n))
1319 # A node is a descendant if either of its parents are
1329 # A node is a descendant if either of its parents are
1320 # descendants. (We seeded the dependents list with the roots
1330 # descendants. (We seeded the dependents list with the roots
1321 # up there, remember?)
1331 # up there, remember?)
1322 if (p[0] in descendants) or (p[1] in descendants):
1332 if (p[0] in descendants) or (p[1] in descendants):
1323 descendants.add(n)
1333 descendants.add(n)
1324 isdescendant = True
1334 isdescendant = True
1325 if isdescendant and ((ancestors is None) or (n in ancestors)):
1335 if isdescendant and ((ancestors is None) or (n in ancestors)):
1326 # Only include nodes that are both descendants and ancestors.
1336 # Only include nodes that are both descendants and ancestors.
1327 orderedout.append(n)
1337 orderedout.append(n)
1328 if (ancestors is not None) and (n in heads):
1338 if (ancestors is not None) and (n in heads):
1329 # We're trying to figure out which heads are reachable
1339 # We're trying to figure out which heads are reachable
1330 # from roots.
1340 # from roots.
1331 # Mark this head as having been reached
1341 # Mark this head as having been reached
1332 heads[n] = True
1342 heads[n] = True
1333 elif ancestors is None:
1343 elif ancestors is None:
1334 # Otherwise, we're trying to discover the heads.
1344 # Otherwise, we're trying to discover the heads.
1335 # Assume this is a head because if it isn't, the next step
1345 # Assume this is a head because if it isn't, the next step
1336 # will eventually remove it.
1346 # will eventually remove it.
1337 heads[n] = True
1347 heads[n] = True
1338 # But, obviously its parents aren't.
1348 # But, obviously its parents aren't.
1339 for p in self.parents(n):
1349 for p in self.parents(n):
1340 heads.pop(p, None)
1350 heads.pop(p, None)
1341 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1351 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1342 roots = list(roots)
1352 roots = list(roots)
1343 assert orderedout
1353 assert orderedout
1344 assert roots
1354 assert roots
1345 assert heads
1355 assert heads
1346 return (orderedout, roots, heads)
1356 return (orderedout, roots, heads)
1347
1357
1348 def headrevs(self, revs=None):
1358 def headrevs(self, revs=None):
1349 if revs is None:
1359 if revs is None:
1350 try:
1360 try:
1351 return self.index.headrevs()
1361 return self.index.headrevs()
1352 except AttributeError:
1362 except AttributeError:
1353 return self._headrevs()
1363 return self._headrevs()
1354 if rustdagop is not None:
1364 if rustdagop is not None:
1355 return rustdagop.headrevs(self.index, revs)
1365 return rustdagop.headrevs(self.index, revs)
1356 return dagop.headrevs(revs, self._uncheckedparentrevs)
1366 return dagop.headrevs(revs, self._uncheckedparentrevs)
1357
1367
1358 def computephases(self, roots):
1368 def computephases(self, roots):
1359 return self.index.computephasesmapsets(roots)
1369 return self.index.computephasesmapsets(roots)
1360
1370
1361 def _headrevs(self):
1371 def _headrevs(self):
1362 count = len(self)
1372 count = len(self)
1363 if not count:
1373 if not count:
1364 return [nullrev]
1374 return [nullrev]
1365 # we won't iter over filtered rev so nobody is a head at start
1375 # we won't iter over filtered rev so nobody is a head at start
1366 ishead = [0] * (count + 1)
1376 ishead = [0] * (count + 1)
1367 index = self.index
1377 index = self.index
1368 for r in self:
1378 for r in self:
1369 ishead[r] = 1 # I may be an head
1379 ishead[r] = 1 # I may be an head
1370 e = index[r]
1380 e = index[r]
1371 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1381 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1372 return [r for r, val in enumerate(ishead) if val]
1382 return [r for r, val in enumerate(ishead) if val]
1373
1383
1374 def heads(self, start=None, stop=None):
1384 def heads(self, start=None, stop=None):
1375 """return the list of all nodes that have no children
1385 """return the list of all nodes that have no children
1376
1386
1377 if start is specified, only heads that are descendants of
1387 if start is specified, only heads that are descendants of
1378 start will be returned
1388 start will be returned
1379 if stop is specified, it will consider all the revs from stop
1389 if stop is specified, it will consider all the revs from stop
1380 as if they had no children
1390 as if they had no children
1381 """
1391 """
1382 if start is None and stop is None:
1392 if start is None and stop is None:
1383 if not len(self):
1393 if not len(self):
1384 return [self.nullid]
1394 return [self.nullid]
1385 return [self.node(r) for r in self.headrevs()]
1395 return [self.node(r) for r in self.headrevs()]
1386
1396
1387 if start is None:
1397 if start is None:
1388 start = nullrev
1398 start = nullrev
1389 else:
1399 else:
1390 start = self.rev(start)
1400 start = self.rev(start)
1391
1401
1392 stoprevs = {self.rev(n) for n in stop or []}
1402 stoprevs = {self.rev(n) for n in stop or []}
1393
1403
1394 revs = dagop.headrevssubset(
1404 revs = dagop.headrevssubset(
1395 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1405 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1396 )
1406 )
1397
1407
1398 return [self.node(rev) for rev in revs]
1408 return [self.node(rev) for rev in revs]
1399
1409
1400 def children(self, node):
1410 def children(self, node):
1401 """find the children of a given node"""
1411 """find the children of a given node"""
1402 c = []
1412 c = []
1403 p = self.rev(node)
1413 p = self.rev(node)
1404 for r in self.revs(start=p + 1):
1414 for r in self.revs(start=p + 1):
1405 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1415 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1406 if prevs:
1416 if prevs:
1407 for pr in prevs:
1417 for pr in prevs:
1408 if pr == p:
1418 if pr == p:
1409 c.append(self.node(r))
1419 c.append(self.node(r))
1410 elif p == nullrev:
1420 elif p == nullrev:
1411 c.append(self.node(r))
1421 c.append(self.node(r))
1412 return c
1422 return c
1413
1423
1414 def commonancestorsheads(self, a, b):
1424 def commonancestorsheads(self, a, b):
1415 """calculate all the heads of the common ancestors of nodes a and b"""
1425 """calculate all the heads of the common ancestors of nodes a and b"""
1416 a, b = self.rev(a), self.rev(b)
1426 a, b = self.rev(a), self.rev(b)
1417 ancs = self._commonancestorsheads(a, b)
1427 ancs = self._commonancestorsheads(a, b)
1418 return pycompat.maplist(self.node, ancs)
1428 return pycompat.maplist(self.node, ancs)
1419
1429
1420 def _commonancestorsheads(self, *revs):
1430 def _commonancestorsheads(self, *revs):
1421 """calculate all the heads of the common ancestors of revs"""
1431 """calculate all the heads of the common ancestors of revs"""
1422 try:
1432 try:
1423 ancs = self.index.commonancestorsheads(*revs)
1433 ancs = self.index.commonancestorsheads(*revs)
1424 except (AttributeError, OverflowError): # C implementation failed
1434 except (AttributeError, OverflowError): # C implementation failed
1425 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1435 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1426 return ancs
1436 return ancs
1427
1437
1428 def isancestor(self, a, b):
1438 def isancestor(self, a, b):
1429 """return True if node a is an ancestor of node b
1439 """return True if node a is an ancestor of node b
1430
1440
1431 A revision is considered an ancestor of itself."""
1441 A revision is considered an ancestor of itself."""
1432 a, b = self.rev(a), self.rev(b)
1442 a, b = self.rev(a), self.rev(b)
1433 return self.isancestorrev(a, b)
1443 return self.isancestorrev(a, b)
1434
1444
1435 def isancestorrev(self, a, b):
1445 def isancestorrev(self, a, b):
1436 """return True if revision a is an ancestor of revision b
1446 """return True if revision a is an ancestor of revision b
1437
1447
1438 A revision is considered an ancestor of itself.
1448 A revision is considered an ancestor of itself.
1439
1449
1440 The implementation of this is trivial but the use of
1450 The implementation of this is trivial but the use of
1441 reachableroots is not."""
1451 reachableroots is not."""
1442 if a == nullrev:
1452 if a == nullrev:
1443 return True
1453 return True
1444 elif a == b:
1454 elif a == b:
1445 return True
1455 return True
1446 elif a > b:
1456 elif a > b:
1447 return False
1457 return False
1448 return bool(self.reachableroots(a, [b], [a], includepath=False))
1458 return bool(self.reachableroots(a, [b], [a], includepath=False))
1449
1459
1450 def reachableroots(self, minroot, heads, roots, includepath=False):
1460 def reachableroots(self, minroot, heads, roots, includepath=False):
1451 """return (heads(::(<roots> and <roots>::<heads>)))
1461 """return (heads(::(<roots> and <roots>::<heads>)))
1452
1462
1453 If includepath is True, return (<roots>::<heads>)."""
1463 If includepath is True, return (<roots>::<heads>)."""
1454 try:
1464 try:
1455 return self.index.reachableroots2(
1465 return self.index.reachableroots2(
1456 minroot, heads, roots, includepath
1466 minroot, heads, roots, includepath
1457 )
1467 )
1458 except AttributeError:
1468 except AttributeError:
1459 return dagop._reachablerootspure(
1469 return dagop._reachablerootspure(
1460 self.parentrevs, minroot, roots, heads, includepath
1470 self.parentrevs, minroot, roots, heads, includepath
1461 )
1471 )
1462
1472
1463 def ancestor(self, a, b):
1473 def ancestor(self, a, b):
1464 """calculate the "best" common ancestor of nodes a and b"""
1474 """calculate the "best" common ancestor of nodes a and b"""
1465
1475
1466 a, b = self.rev(a), self.rev(b)
1476 a, b = self.rev(a), self.rev(b)
1467 try:
1477 try:
1468 ancs = self.index.ancestors(a, b)
1478 ancs = self.index.ancestors(a, b)
1469 except (AttributeError, OverflowError):
1479 except (AttributeError, OverflowError):
1470 ancs = ancestor.ancestors(self.parentrevs, a, b)
1480 ancs = ancestor.ancestors(self.parentrevs, a, b)
1471 if ancs:
1481 if ancs:
1472 # choose a consistent winner when there's a tie
1482 # choose a consistent winner when there's a tie
1473 return min(map(self.node, ancs))
1483 return min(map(self.node, ancs))
1474 return self.nullid
1484 return self.nullid
1475
1485
1476 def _match(self, id):
1486 def _match(self, id):
1477 if isinstance(id, int):
1487 if isinstance(id, int):
1478 # rev
1488 # rev
1479 return self.node(id)
1489 return self.node(id)
1480 if len(id) == self.nodeconstants.nodelen:
1490 if len(id) == self.nodeconstants.nodelen:
1481 # possibly a binary node
1491 # possibly a binary node
1482 # odds of a binary node being all hex in ASCII are 1 in 10**25
1492 # odds of a binary node being all hex in ASCII are 1 in 10**25
1483 try:
1493 try:
1484 node = id
1494 node = id
1485 self.rev(node) # quick search the index
1495 self.rev(node) # quick search the index
1486 return node
1496 return node
1487 except error.LookupError:
1497 except error.LookupError:
1488 pass # may be partial hex id
1498 pass # may be partial hex id
1489 try:
1499 try:
1490 # str(rev)
1500 # str(rev)
1491 rev = int(id)
1501 rev = int(id)
1492 if b"%d" % rev != id:
1502 if b"%d" % rev != id:
1493 raise ValueError
1503 raise ValueError
1494 if rev < 0:
1504 if rev < 0:
1495 rev = len(self) + rev
1505 rev = len(self) + rev
1496 if rev < 0 or rev >= len(self):
1506 if rev < 0 or rev >= len(self):
1497 raise ValueError
1507 raise ValueError
1498 return self.node(rev)
1508 return self.node(rev)
1499 except (ValueError, OverflowError):
1509 except (ValueError, OverflowError):
1500 pass
1510 pass
1501 if len(id) == 2 * self.nodeconstants.nodelen:
1511 if len(id) == 2 * self.nodeconstants.nodelen:
1502 try:
1512 try:
1503 # a full hex nodeid?
1513 # a full hex nodeid?
1504 node = bin(id)
1514 node = bin(id)
1505 self.rev(node)
1515 self.rev(node)
1506 return node
1516 return node
1507 except (TypeError, error.LookupError):
1517 except (TypeError, error.LookupError):
1508 pass
1518 pass
1509
1519
1510 def _partialmatch(self, id):
1520 def _partialmatch(self, id):
1511 # we don't care wdirfilenodeids as they should be always full hash
1521 # we don't care wdirfilenodeids as they should be always full hash
1512 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1522 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1513 try:
1523 try:
1514 partial = self.index.partialmatch(id)
1524 partial = self.index.partialmatch(id)
1515 if partial and self.hasnode(partial):
1525 if partial and self.hasnode(partial):
1516 if maybewdir:
1526 if maybewdir:
1517 # single 'ff...' match in radix tree, ambiguous with wdir
1527 # single 'ff...' match in radix tree, ambiguous with wdir
1518 raise error.RevlogError
1528 raise error.RevlogError
1519 return partial
1529 return partial
1520 if maybewdir:
1530 if maybewdir:
1521 # no 'ff...' match in radix tree, wdir identified
1531 # no 'ff...' match in radix tree, wdir identified
1522 raise error.WdirUnsupported
1532 raise error.WdirUnsupported
1523 return None
1533 return None
1524 except error.RevlogError:
1534 except error.RevlogError:
1525 # parsers.c radix tree lookup gave multiple matches
1535 # parsers.c radix tree lookup gave multiple matches
1526 # fast path: for unfiltered changelog, radix tree is accurate
1536 # fast path: for unfiltered changelog, radix tree is accurate
1527 if not getattr(self, 'filteredrevs', None):
1537 if not getattr(self, 'filteredrevs', None):
1528 raise error.AmbiguousPrefixLookupError(
1538 raise error.AmbiguousPrefixLookupError(
1529 id, self.display_id, _(b'ambiguous identifier')
1539 id, self.display_id, _(b'ambiguous identifier')
1530 )
1540 )
1531 # fall through to slow path that filters hidden revisions
1541 # fall through to slow path that filters hidden revisions
1532 except (AttributeError, ValueError):
1542 except (AttributeError, ValueError):
1533 # we are pure python, or key was too short to search radix tree
1543 # we are pure python, or key was too short to search radix tree
1534 pass
1544 pass
1535
1545
1536 if id in self._pcache:
1546 if id in self._pcache:
1537 return self._pcache[id]
1547 return self._pcache[id]
1538
1548
1539 if len(id) <= 40:
1549 if len(id) <= 40:
1540 try:
1550 try:
1541 # hex(node)[:...]
1551 # hex(node)[:...]
1542 l = len(id) // 2 # grab an even number of digits
1552 l = len(id) // 2 # grab an even number of digits
1543 prefix = bin(id[: l * 2])
1553 prefix = bin(id[: l * 2])
1544 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1554 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1545 nl = [
1555 nl = [
1546 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1556 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1547 ]
1557 ]
1548 if self.nodeconstants.nullhex.startswith(id):
1558 if self.nodeconstants.nullhex.startswith(id):
1549 nl.append(self.nullid)
1559 nl.append(self.nullid)
1550 if len(nl) > 0:
1560 if len(nl) > 0:
1551 if len(nl) == 1 and not maybewdir:
1561 if len(nl) == 1 and not maybewdir:
1552 self._pcache[id] = nl[0]
1562 self._pcache[id] = nl[0]
1553 return nl[0]
1563 return nl[0]
1554 raise error.AmbiguousPrefixLookupError(
1564 raise error.AmbiguousPrefixLookupError(
1555 id, self.display_id, _(b'ambiguous identifier')
1565 id, self.display_id, _(b'ambiguous identifier')
1556 )
1566 )
1557 if maybewdir:
1567 if maybewdir:
1558 raise error.WdirUnsupported
1568 raise error.WdirUnsupported
1559 return None
1569 return None
1560 except TypeError:
1570 except TypeError:
1561 pass
1571 pass
1562
1572
1563 def lookup(self, id):
1573 def lookup(self, id):
1564 """locate a node based on:
1574 """locate a node based on:
1565 - revision number or str(revision number)
1575 - revision number or str(revision number)
1566 - nodeid or subset of hex nodeid
1576 - nodeid or subset of hex nodeid
1567 """
1577 """
1568 n = self._match(id)
1578 n = self._match(id)
1569 if n is not None:
1579 if n is not None:
1570 return n
1580 return n
1571 n = self._partialmatch(id)
1581 n = self._partialmatch(id)
1572 if n:
1582 if n:
1573 return n
1583 return n
1574
1584
1575 raise error.LookupError(id, self.display_id, _(b'no match found'))
1585 raise error.LookupError(id, self.display_id, _(b'no match found'))
1576
1586
1577 def shortest(self, node, minlength=1):
1587 def shortest(self, node, minlength=1):
1578 """Find the shortest unambiguous prefix that matches node."""
1588 """Find the shortest unambiguous prefix that matches node."""
1579
1589
1580 def isvalid(prefix):
1590 def isvalid(prefix):
1581 try:
1591 try:
1582 matchednode = self._partialmatch(prefix)
1592 matchednode = self._partialmatch(prefix)
1583 except error.AmbiguousPrefixLookupError:
1593 except error.AmbiguousPrefixLookupError:
1584 return False
1594 return False
1585 except error.WdirUnsupported:
1595 except error.WdirUnsupported:
1586 # single 'ff...' match
1596 # single 'ff...' match
1587 return True
1597 return True
1588 if matchednode is None:
1598 if matchednode is None:
1589 raise error.LookupError(node, self.display_id, _(b'no node'))
1599 raise error.LookupError(node, self.display_id, _(b'no node'))
1590 return True
1600 return True
1591
1601
1592 def maybewdir(prefix):
1602 def maybewdir(prefix):
1593 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1603 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1594
1604
1595 hexnode = hex(node)
1605 hexnode = hex(node)
1596
1606
1597 def disambiguate(hexnode, minlength):
1607 def disambiguate(hexnode, minlength):
1598 """Disambiguate against wdirid."""
1608 """Disambiguate against wdirid."""
1599 for length in range(minlength, len(hexnode) + 1):
1609 for length in range(minlength, len(hexnode) + 1):
1600 prefix = hexnode[:length]
1610 prefix = hexnode[:length]
1601 if not maybewdir(prefix):
1611 if not maybewdir(prefix):
1602 return prefix
1612 return prefix
1603
1613
1604 if not getattr(self, 'filteredrevs', None):
1614 if not getattr(self, 'filteredrevs', None):
1605 try:
1615 try:
1606 length = max(self.index.shortest(node), minlength)
1616 length = max(self.index.shortest(node), minlength)
1607 return disambiguate(hexnode, length)
1617 return disambiguate(hexnode, length)
1608 except error.RevlogError:
1618 except error.RevlogError:
1609 if node != self.nodeconstants.wdirid:
1619 if node != self.nodeconstants.wdirid:
1610 raise error.LookupError(
1620 raise error.LookupError(
1611 node, self.display_id, _(b'no node')
1621 node, self.display_id, _(b'no node')
1612 )
1622 )
1613 except AttributeError:
1623 except AttributeError:
1614 # Fall through to pure code
1624 # Fall through to pure code
1615 pass
1625 pass
1616
1626
1617 if node == self.nodeconstants.wdirid:
1627 if node == self.nodeconstants.wdirid:
1618 for length in range(minlength, len(hexnode) + 1):
1628 for length in range(minlength, len(hexnode) + 1):
1619 prefix = hexnode[:length]
1629 prefix = hexnode[:length]
1620 if isvalid(prefix):
1630 if isvalid(prefix):
1621 return prefix
1631 return prefix
1622
1632
1623 for length in range(minlength, len(hexnode) + 1):
1633 for length in range(minlength, len(hexnode) + 1):
1624 prefix = hexnode[:length]
1634 prefix = hexnode[:length]
1625 if isvalid(prefix):
1635 if isvalid(prefix):
1626 return disambiguate(hexnode, length)
1636 return disambiguate(hexnode, length)
1627
1637
1628 def cmp(self, node, text):
1638 def cmp(self, node, text):
1629 """compare text with a given file revision
1639 """compare text with a given file revision
1630
1640
1631 returns True if text is different than what is stored.
1641 returns True if text is different than what is stored.
1632 """
1642 """
1633 p1, p2 = self.parents(node)
1643 p1, p2 = self.parents(node)
1634 return storageutil.hashrevisionsha1(text, p1, p2) != node
1644 return storageutil.hashrevisionsha1(text, p1, p2) != node
1635
1645
1636 def _cachesegment(self, offset, data):
1646 def _cachesegment(self, offset, data):
1637 """Add a segment to the revlog cache.
1647 """Add a segment to the revlog cache.
1638
1648
1639 Accepts an absolute offset and the data that is at that location.
1649 Accepts an absolute offset and the data that is at that location.
1640 """
1650 """
1641 o, d = self._chunkcache
1651 o, d = self._chunkcache
1642 # try to add to existing cache
1652 # try to add to existing cache
1643 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1653 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1644 self._chunkcache = o, d + data
1654 self._chunkcache = o, d + data
1645 else:
1655 else:
1646 self._chunkcache = offset, data
1656 self._chunkcache = offset, data
1647
1657
1648 def _readsegment(self, offset, length, df=None):
1658 def _readsegment(self, offset, length, df=None):
1649 """Load a segment of raw data from the revlog.
1659 """Load a segment of raw data from the revlog.
1650
1660
1651 Accepts an absolute offset, length to read, and an optional existing
1661 Accepts an absolute offset, length to read, and an optional existing
1652 file handle to read from.
1662 file handle to read from.
1653
1663
1654 If an existing file handle is passed, it will be seeked and the
1664 If an existing file handle is passed, it will be seeked and the
1655 original seek position will NOT be restored.
1665 original seek position will NOT be restored.
1656
1666
1657 Returns a str or buffer of raw byte data.
1667 Returns a str or buffer of raw byte data.
1658
1668
1659 Raises if the requested number of bytes could not be read.
1669 Raises if the requested number of bytes could not be read.
1660 """
1670 """
1661 # Cache data both forward and backward around the requested
1671 # Cache data both forward and backward around the requested
1662 # data, in a fixed size window. This helps speed up operations
1672 # data, in a fixed size window. This helps speed up operations
1663 # involving reading the revlog backwards.
1673 # involving reading the revlog backwards.
1664 cachesize = self._chunkcachesize
1674 cachesize = self._chunkcachesize
1665 realoffset = offset & ~(cachesize - 1)
1675 realoffset = offset & ~(cachesize - 1)
1666 reallength = (
1676 reallength = (
1667 (offset + length + cachesize) & ~(cachesize - 1)
1677 (offset + length + cachesize) & ~(cachesize - 1)
1668 ) - realoffset
1678 ) - realoffset
1669 with self._datareadfp(df) as df:
1679 with self._datareadfp(df) as df:
1670 df.seek(realoffset)
1680 df.seek(realoffset)
1671 d = df.read(reallength)
1681 d = df.read(reallength)
1672
1682
1673 self._cachesegment(realoffset, d)
1683 self._cachesegment(realoffset, d)
1674 if offset != realoffset or reallength != length:
1684 if offset != realoffset or reallength != length:
1675 startoffset = offset - realoffset
1685 startoffset = offset - realoffset
1676 if len(d) - startoffset < length:
1686 if len(d) - startoffset < length:
1677 raise error.RevlogError(
1687 raise error.RevlogError(
1678 _(
1688 _(
1679 b'partial read of revlog %s; expected %d bytes from '
1689 b'partial read of revlog %s; expected %d bytes from '
1680 b'offset %d, got %d'
1690 b'offset %d, got %d'
1681 )
1691 )
1682 % (
1692 % (
1683 self._indexfile if self._inline else self._datafile,
1693 self._indexfile if self._inline else self._datafile,
1684 length,
1694 length,
1685 offset,
1695 offset,
1686 len(d) - startoffset,
1696 len(d) - startoffset,
1687 )
1697 )
1688 )
1698 )
1689
1699
1690 return util.buffer(d, startoffset, length)
1700 return util.buffer(d, startoffset, length)
1691
1701
1692 if len(d) < length:
1702 if len(d) < length:
1693 raise error.RevlogError(
1703 raise error.RevlogError(
1694 _(
1704 _(
1695 b'partial read of revlog %s; expected %d bytes from offset '
1705 b'partial read of revlog %s; expected %d bytes from offset '
1696 b'%d, got %d'
1706 b'%d, got %d'
1697 )
1707 )
1698 % (
1708 % (
1699 self._indexfile if self._inline else self._datafile,
1709 self._indexfile if self._inline else self._datafile,
1700 length,
1710 length,
1701 offset,
1711 offset,
1702 len(d),
1712 len(d),
1703 )
1713 )
1704 )
1714 )
1705
1715
1706 return d
1716 return d
1707
1717
1708 def _getsegment(self, offset, length, df=None):
1718 def _getsegment(self, offset, length, df=None):
1709 """Obtain a segment of raw data from the revlog.
1719 """Obtain a segment of raw data from the revlog.
1710
1720
1711 Accepts an absolute offset, length of bytes to obtain, and an
1721 Accepts an absolute offset, length of bytes to obtain, and an
1712 optional file handle to the already-opened revlog. If the file
1722 optional file handle to the already-opened revlog. If the file
1713 handle is used, it's original seek position will not be preserved.
1723 handle is used, it's original seek position will not be preserved.
1714
1724
1715 Requests for data may be returned from a cache.
1725 Requests for data may be returned from a cache.
1716
1726
1717 Returns a str or a buffer instance of raw byte data.
1727 Returns a str or a buffer instance of raw byte data.
1718 """
1728 """
1719 o, d = self._chunkcache
1729 o, d = self._chunkcache
1720 l = len(d)
1730 l = len(d)
1721
1731
1722 # is it in the cache?
1732 # is it in the cache?
1723 cachestart = offset - o
1733 cachestart = offset - o
1724 cacheend = cachestart + length
1734 cacheend = cachestart + length
1725 if cachestart >= 0 and cacheend <= l:
1735 if cachestart >= 0 and cacheend <= l:
1726 if cachestart == 0 and cacheend == l:
1736 if cachestart == 0 and cacheend == l:
1727 return d # avoid a copy
1737 return d # avoid a copy
1728 return util.buffer(d, cachestart, cacheend - cachestart)
1738 return util.buffer(d, cachestart, cacheend - cachestart)
1729
1739
1730 return self._readsegment(offset, length, df=df)
1740 return self._readsegment(offset, length, df=df)
1731
1741
1732 def _getsegmentforrevs(self, startrev, endrev, df=None):
1742 def _getsegmentforrevs(self, startrev, endrev, df=None):
1733 """Obtain a segment of raw data corresponding to a range of revisions.
1743 """Obtain a segment of raw data corresponding to a range of revisions.
1734
1744
1735 Accepts the start and end revisions and an optional already-open
1745 Accepts the start and end revisions and an optional already-open
1736 file handle to be used for reading. If the file handle is read, its
1746 file handle to be used for reading. If the file handle is read, its
1737 seek position will not be preserved.
1747 seek position will not be preserved.
1738
1748
1739 Requests for data may be satisfied by a cache.
1749 Requests for data may be satisfied by a cache.
1740
1750
1741 Returns a 2-tuple of (offset, data) for the requested range of
1751 Returns a 2-tuple of (offset, data) for the requested range of
1742 revisions. Offset is the integer offset from the beginning of the
1752 revisions. Offset is the integer offset from the beginning of the
1743 revlog and data is a str or buffer of the raw byte data.
1753 revlog and data is a str or buffer of the raw byte data.
1744
1754
1745 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1755 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1746 to determine where each revision's data begins and ends.
1756 to determine where each revision's data begins and ends.
1747 """
1757 """
1748 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1758 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1749 # (functions are expensive).
1759 # (functions are expensive).
1750 index = self.index
1760 index = self.index
1751 istart = index[startrev]
1761 istart = index[startrev]
1752 start = int(istart[0] >> 16)
1762 start = int(istart[0] >> 16)
1753 if startrev == endrev:
1763 if startrev == endrev:
1754 end = start + istart[1]
1764 end = start + istart[1]
1755 else:
1765 else:
1756 iend = index[endrev]
1766 iend = index[endrev]
1757 end = int(iend[0] >> 16) + iend[1]
1767 end = int(iend[0] >> 16) + iend[1]
1758
1768
1759 if self._inline:
1769 if self._inline:
1760 start += (startrev + 1) * self.index.entry_size
1770 start += (startrev + 1) * self.index.entry_size
1761 end += (endrev + 1) * self.index.entry_size
1771 end += (endrev + 1) * self.index.entry_size
1762 length = end - start
1772 length = end - start
1763
1773
1764 return start, self._getsegment(start, length, df=df)
1774 return start, self._getsegment(start, length, df=df)
1765
1775
1766 def _chunk(self, rev, df=None):
1776 def _chunk(self, rev, df=None):
1767 """Obtain a single decompressed chunk for a revision.
1777 """Obtain a single decompressed chunk for a revision.
1768
1778
1769 Accepts an integer revision and an optional already-open file handle
1779 Accepts an integer revision and an optional already-open file handle
1770 to be used for reading. If used, the seek position of the file will not
1780 to be used for reading. If used, the seek position of the file will not
1771 be preserved.
1781 be preserved.
1772
1782
1773 Returns a str holding uncompressed data for the requested revision.
1783 Returns a str holding uncompressed data for the requested revision.
1774 """
1784 """
1775 compression_mode = self.index[rev][10]
1785 compression_mode = self.index[rev][10]
1776 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1786 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1777 if compression_mode == COMP_MODE_PLAIN:
1787 if compression_mode == COMP_MODE_PLAIN:
1778 return data
1788 return data
1789 elif compression_mode == COMP_MODE_DEFAULT:
1790 return self._decompressor(data)
1779 elif compression_mode == COMP_MODE_INLINE:
1791 elif compression_mode == COMP_MODE_INLINE:
1780 return self.decompress(data)
1792 return self.decompress(data)
1781 else:
1793 else:
1782 msg = 'unknown compression mode %d'
1794 msg = 'unknown compression mode %d'
1783 msg %= compression_mode
1795 msg %= compression_mode
1784 raise error.RevlogError(msg)
1796 raise error.RevlogError(msg)
1785
1797
1786 def _chunks(self, revs, df=None, targetsize=None):
1798 def _chunks(self, revs, df=None, targetsize=None):
1787 """Obtain decompressed chunks for the specified revisions.
1799 """Obtain decompressed chunks for the specified revisions.
1788
1800
1789 Accepts an iterable of numeric revisions that are assumed to be in
1801 Accepts an iterable of numeric revisions that are assumed to be in
1790 ascending order. Also accepts an optional already-open file handle
1802 ascending order. Also accepts an optional already-open file handle
1791 to be used for reading. If used, the seek position of the file will
1803 to be used for reading. If used, the seek position of the file will
1792 not be preserved.
1804 not be preserved.
1793
1805
1794 This function is similar to calling ``self._chunk()`` multiple times,
1806 This function is similar to calling ``self._chunk()`` multiple times,
1795 but is faster.
1807 but is faster.
1796
1808
1797 Returns a list with decompressed data for each requested revision.
1809 Returns a list with decompressed data for each requested revision.
1798 """
1810 """
1799 if not revs:
1811 if not revs:
1800 return []
1812 return []
1801 start = self.start
1813 start = self.start
1802 length = self.length
1814 length = self.length
1803 inline = self._inline
1815 inline = self._inline
1804 iosize = self.index.entry_size
1816 iosize = self.index.entry_size
1805 buffer = util.buffer
1817 buffer = util.buffer
1806
1818
1807 l = []
1819 l = []
1808 ladd = l.append
1820 ladd = l.append
1809
1821
1810 if not self._withsparseread:
1822 if not self._withsparseread:
1811 slicedchunks = (revs,)
1823 slicedchunks = (revs,)
1812 else:
1824 else:
1813 slicedchunks = deltautil.slicechunk(
1825 slicedchunks = deltautil.slicechunk(
1814 self, revs, targetsize=targetsize
1826 self, revs, targetsize=targetsize
1815 )
1827 )
1816
1828
1817 for revschunk in slicedchunks:
1829 for revschunk in slicedchunks:
1818 firstrev = revschunk[0]
1830 firstrev = revschunk[0]
1819 # Skip trailing revisions with empty diff
1831 # Skip trailing revisions with empty diff
1820 for lastrev in revschunk[::-1]:
1832 for lastrev in revschunk[::-1]:
1821 if length(lastrev) != 0:
1833 if length(lastrev) != 0:
1822 break
1834 break
1823
1835
1824 try:
1836 try:
1825 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1837 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1826 except OverflowError:
1838 except OverflowError:
1827 # issue4215 - we can't cache a run of chunks greater than
1839 # issue4215 - we can't cache a run of chunks greater than
1828 # 2G on Windows
1840 # 2G on Windows
1829 return [self._chunk(rev, df=df) for rev in revschunk]
1841 return [self._chunk(rev, df=df) for rev in revschunk]
1830
1842
1831 decomp = self.decompress
1843 decomp = self.decompress
1844 # self._decompressor might be None, but will not be used in that case
1845 def_decomp = self._decompressor
1832 for rev in revschunk:
1846 for rev in revschunk:
1833 chunkstart = start(rev)
1847 chunkstart = start(rev)
1834 if inline:
1848 if inline:
1835 chunkstart += (rev + 1) * iosize
1849 chunkstart += (rev + 1) * iosize
1836 chunklength = length(rev)
1850 chunklength = length(rev)
1837 comp_mode = self.index[rev][10]
1851 comp_mode = self.index[rev][10]
1838 c = buffer(data, chunkstart - offset, chunklength)
1852 c = buffer(data, chunkstart - offset, chunklength)
1839 if comp_mode == COMP_MODE_PLAIN:
1853 if comp_mode == COMP_MODE_PLAIN:
1840 ladd(c)
1854 ladd(c)
1841 elif comp_mode == COMP_MODE_INLINE:
1855 elif comp_mode == COMP_MODE_INLINE:
1842 ladd(decomp(c))
1856 ladd(decomp(c))
1857 elif comp_mode == COMP_MODE_DEFAULT:
1858 ladd(def_decomp(c))
1843 else:
1859 else:
1844 msg = 'unknown compression mode %d'
1860 msg = 'unknown compression mode %d'
1845 msg %= comp_mode
1861 msg %= comp_mode
1846 raise error.RevlogError(msg)
1862 raise error.RevlogError(msg)
1847
1863
1848 return l
1864 return l
1849
1865
1850 def _chunkclear(self):
1866 def _chunkclear(self):
1851 """Clear the raw chunk cache."""
1867 """Clear the raw chunk cache."""
1852 self._chunkcache = (0, b'')
1868 self._chunkcache = (0, b'')
1853
1869
1854 def deltaparent(self, rev):
1870 def deltaparent(self, rev):
1855 """return deltaparent of the given revision"""
1871 """return deltaparent of the given revision"""
1856 base = self.index[rev][3]
1872 base = self.index[rev][3]
1857 if base == rev:
1873 if base == rev:
1858 return nullrev
1874 return nullrev
1859 elif self._generaldelta:
1875 elif self._generaldelta:
1860 return base
1876 return base
1861 else:
1877 else:
1862 return rev - 1
1878 return rev - 1
1863
1879
1864 def issnapshot(self, rev):
1880 def issnapshot(self, rev):
1865 """tells whether rev is a snapshot"""
1881 """tells whether rev is a snapshot"""
1866 if not self._sparserevlog:
1882 if not self._sparserevlog:
1867 return self.deltaparent(rev) == nullrev
1883 return self.deltaparent(rev) == nullrev
1868 elif util.safehasattr(self.index, b'issnapshot'):
1884 elif util.safehasattr(self.index, b'issnapshot'):
1869 # directly assign the method to cache the testing and access
1885 # directly assign the method to cache the testing and access
1870 self.issnapshot = self.index.issnapshot
1886 self.issnapshot = self.index.issnapshot
1871 return self.issnapshot(rev)
1887 return self.issnapshot(rev)
1872 if rev == nullrev:
1888 if rev == nullrev:
1873 return True
1889 return True
1874 entry = self.index[rev]
1890 entry = self.index[rev]
1875 base = entry[3]
1891 base = entry[3]
1876 if base == rev:
1892 if base == rev:
1877 return True
1893 return True
1878 if base == nullrev:
1894 if base == nullrev:
1879 return True
1895 return True
1880 p1 = entry[5]
1896 p1 = entry[5]
1881 p2 = entry[6]
1897 p2 = entry[6]
1882 if base == p1 or base == p2:
1898 if base == p1 or base == p2:
1883 return False
1899 return False
1884 return self.issnapshot(base)
1900 return self.issnapshot(base)
1885
1901
1886 def snapshotdepth(self, rev):
1902 def snapshotdepth(self, rev):
1887 """number of snapshot in the chain before this one"""
1903 """number of snapshot in the chain before this one"""
1888 if not self.issnapshot(rev):
1904 if not self.issnapshot(rev):
1889 raise error.ProgrammingError(b'revision %d not a snapshot')
1905 raise error.ProgrammingError(b'revision %d not a snapshot')
1890 return len(self._deltachain(rev)[0]) - 1
1906 return len(self._deltachain(rev)[0]) - 1
1891
1907
1892 def revdiff(self, rev1, rev2):
1908 def revdiff(self, rev1, rev2):
1893 """return or calculate a delta between two revisions
1909 """return or calculate a delta between two revisions
1894
1910
1895 The delta calculated is in binary form and is intended to be written to
1911 The delta calculated is in binary form and is intended to be written to
1896 revlog data directly. So this function needs raw revision data.
1912 revlog data directly. So this function needs raw revision data.
1897 """
1913 """
1898 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1914 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1899 return bytes(self._chunk(rev2))
1915 return bytes(self._chunk(rev2))
1900
1916
1901 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1917 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1902
1918
1903 def _processflags(self, text, flags, operation, raw=False):
1919 def _processflags(self, text, flags, operation, raw=False):
1904 """deprecated entry point to access flag processors"""
1920 """deprecated entry point to access flag processors"""
1905 msg = b'_processflag(...) use the specialized variant'
1921 msg = b'_processflag(...) use the specialized variant'
1906 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1922 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1907 if raw:
1923 if raw:
1908 return text, flagutil.processflagsraw(self, text, flags)
1924 return text, flagutil.processflagsraw(self, text, flags)
1909 elif operation == b'read':
1925 elif operation == b'read':
1910 return flagutil.processflagsread(self, text, flags)
1926 return flagutil.processflagsread(self, text, flags)
1911 else: # write operation
1927 else: # write operation
1912 return flagutil.processflagswrite(self, text, flags)
1928 return flagutil.processflagswrite(self, text, flags)
1913
1929
1914 def revision(self, nodeorrev, _df=None, raw=False):
1930 def revision(self, nodeorrev, _df=None, raw=False):
1915 """return an uncompressed revision of a given node or revision
1931 """return an uncompressed revision of a given node or revision
1916 number.
1932 number.
1917
1933
1918 _df - an existing file handle to read from. (internal-only)
1934 _df - an existing file handle to read from. (internal-only)
1919 raw - an optional argument specifying if the revision data is to be
1935 raw - an optional argument specifying if the revision data is to be
1920 treated as raw data when applying flag transforms. 'raw' should be set
1936 treated as raw data when applying flag transforms. 'raw' should be set
1921 to True when generating changegroups or in debug commands.
1937 to True when generating changegroups or in debug commands.
1922 """
1938 """
1923 if raw:
1939 if raw:
1924 msg = (
1940 msg = (
1925 b'revlog.revision(..., raw=True) is deprecated, '
1941 b'revlog.revision(..., raw=True) is deprecated, '
1926 b'use revlog.rawdata(...)'
1942 b'use revlog.rawdata(...)'
1927 )
1943 )
1928 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1944 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1929 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1945 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1930
1946
1931 def sidedata(self, nodeorrev, _df=None):
1947 def sidedata(self, nodeorrev, _df=None):
1932 """a map of extra data related to the changeset but not part of the hash
1948 """a map of extra data related to the changeset but not part of the hash
1933
1949
1934 This function currently return a dictionary. However, more advanced
1950 This function currently return a dictionary. However, more advanced
1935 mapping object will likely be used in the future for a more
1951 mapping object will likely be used in the future for a more
1936 efficient/lazy code.
1952 efficient/lazy code.
1937 """
1953 """
1938 return self._revisiondata(nodeorrev, _df)[1]
1954 return self._revisiondata(nodeorrev, _df)[1]
1939
1955
1940 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1956 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1941 # deal with <nodeorrev> argument type
1957 # deal with <nodeorrev> argument type
1942 if isinstance(nodeorrev, int):
1958 if isinstance(nodeorrev, int):
1943 rev = nodeorrev
1959 rev = nodeorrev
1944 node = self.node(rev)
1960 node = self.node(rev)
1945 else:
1961 else:
1946 node = nodeorrev
1962 node = nodeorrev
1947 rev = None
1963 rev = None
1948
1964
1949 # fast path the special `nullid` rev
1965 # fast path the special `nullid` rev
1950 if node == self.nullid:
1966 if node == self.nullid:
1951 return b"", {}
1967 return b"", {}
1952
1968
1953 # ``rawtext`` is the text as stored inside the revlog. Might be the
1969 # ``rawtext`` is the text as stored inside the revlog. Might be the
1954 # revision or might need to be processed to retrieve the revision.
1970 # revision or might need to be processed to retrieve the revision.
1955 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1971 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1956
1972
1957 if self.hassidedata:
1973 if self.hassidedata:
1958 if rev is None:
1974 if rev is None:
1959 rev = self.rev(node)
1975 rev = self.rev(node)
1960 sidedata = self._sidedata(rev)
1976 sidedata = self._sidedata(rev)
1961 else:
1977 else:
1962 sidedata = {}
1978 sidedata = {}
1963
1979
1964 if raw and validated:
1980 if raw and validated:
1965 # if we don't want to process the raw text and that raw
1981 # if we don't want to process the raw text and that raw
1966 # text is cached, we can exit early.
1982 # text is cached, we can exit early.
1967 return rawtext, sidedata
1983 return rawtext, sidedata
1968 if rev is None:
1984 if rev is None:
1969 rev = self.rev(node)
1985 rev = self.rev(node)
1970 # the revlog's flag for this revision
1986 # the revlog's flag for this revision
1971 # (usually alter its state or content)
1987 # (usually alter its state or content)
1972 flags = self.flags(rev)
1988 flags = self.flags(rev)
1973
1989
1974 if validated and flags == REVIDX_DEFAULT_FLAGS:
1990 if validated and flags == REVIDX_DEFAULT_FLAGS:
1975 # no extra flags set, no flag processor runs, text = rawtext
1991 # no extra flags set, no flag processor runs, text = rawtext
1976 return rawtext, sidedata
1992 return rawtext, sidedata
1977
1993
1978 if raw:
1994 if raw:
1979 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1995 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1980 text = rawtext
1996 text = rawtext
1981 else:
1997 else:
1982 r = flagutil.processflagsread(self, rawtext, flags)
1998 r = flagutil.processflagsread(self, rawtext, flags)
1983 text, validatehash = r
1999 text, validatehash = r
1984 if validatehash:
2000 if validatehash:
1985 self.checkhash(text, node, rev=rev)
2001 self.checkhash(text, node, rev=rev)
1986 if not validated:
2002 if not validated:
1987 self._revisioncache = (node, rev, rawtext)
2003 self._revisioncache = (node, rev, rawtext)
1988
2004
1989 return text, sidedata
2005 return text, sidedata
1990
2006
1991 def _rawtext(self, node, rev, _df=None):
2007 def _rawtext(self, node, rev, _df=None):
1992 """return the possibly unvalidated rawtext for a revision
2008 """return the possibly unvalidated rawtext for a revision
1993
2009
1994 returns (rev, rawtext, validated)
2010 returns (rev, rawtext, validated)
1995 """
2011 """
1996
2012
1997 # revision in the cache (could be useful to apply delta)
2013 # revision in the cache (could be useful to apply delta)
1998 cachedrev = None
2014 cachedrev = None
1999 # An intermediate text to apply deltas to
2015 # An intermediate text to apply deltas to
2000 basetext = None
2016 basetext = None
2001
2017
2002 # Check if we have the entry in cache
2018 # Check if we have the entry in cache
2003 # The cache entry looks like (node, rev, rawtext)
2019 # The cache entry looks like (node, rev, rawtext)
2004 if self._revisioncache:
2020 if self._revisioncache:
2005 if self._revisioncache[0] == node:
2021 if self._revisioncache[0] == node:
2006 return (rev, self._revisioncache[2], True)
2022 return (rev, self._revisioncache[2], True)
2007 cachedrev = self._revisioncache[1]
2023 cachedrev = self._revisioncache[1]
2008
2024
2009 if rev is None:
2025 if rev is None:
2010 rev = self.rev(node)
2026 rev = self.rev(node)
2011
2027
2012 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2028 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2013 if stopped:
2029 if stopped:
2014 basetext = self._revisioncache[2]
2030 basetext = self._revisioncache[2]
2015
2031
2016 # drop cache to save memory, the caller is expected to
2032 # drop cache to save memory, the caller is expected to
2017 # update self._revisioncache after validating the text
2033 # update self._revisioncache after validating the text
2018 self._revisioncache = None
2034 self._revisioncache = None
2019
2035
2020 targetsize = None
2036 targetsize = None
2021 rawsize = self.index[rev][2]
2037 rawsize = self.index[rev][2]
2022 if 0 <= rawsize:
2038 if 0 <= rawsize:
2023 targetsize = 4 * rawsize
2039 targetsize = 4 * rawsize
2024
2040
2025 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2041 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2026 if basetext is None:
2042 if basetext is None:
2027 basetext = bytes(bins[0])
2043 basetext = bytes(bins[0])
2028 bins = bins[1:]
2044 bins = bins[1:]
2029
2045
2030 rawtext = mdiff.patches(basetext, bins)
2046 rawtext = mdiff.patches(basetext, bins)
2031 del basetext # let us have a chance to free memory early
2047 del basetext # let us have a chance to free memory early
2032 return (rev, rawtext, False)
2048 return (rev, rawtext, False)
2033
2049
2034 def _sidedata(self, rev):
2050 def _sidedata(self, rev):
2035 """Return the sidedata for a given revision number."""
2051 """Return the sidedata for a given revision number."""
2036 index_entry = self.index[rev]
2052 index_entry = self.index[rev]
2037 sidedata_offset = index_entry[8]
2053 sidedata_offset = index_entry[8]
2038 sidedata_size = index_entry[9]
2054 sidedata_size = index_entry[9]
2039
2055
2040 if self._inline:
2056 if self._inline:
2041 sidedata_offset += self.index.entry_size * (1 + rev)
2057 sidedata_offset += self.index.entry_size * (1 + rev)
2042 if sidedata_size == 0:
2058 if sidedata_size == 0:
2043 return {}
2059 return {}
2044
2060
2045 segment = self._getsegment(sidedata_offset, sidedata_size)
2061 segment = self._getsegment(sidedata_offset, sidedata_size)
2046 sidedata = sidedatautil.deserialize_sidedata(segment)
2062 sidedata = sidedatautil.deserialize_sidedata(segment)
2047 return sidedata
2063 return sidedata
2048
2064
2049 def rawdata(self, nodeorrev, _df=None):
2065 def rawdata(self, nodeorrev, _df=None):
2050 """return an uncompressed raw data of a given node or revision number.
2066 """return an uncompressed raw data of a given node or revision number.
2051
2067
2052 _df - an existing file handle to read from. (internal-only)
2068 _df - an existing file handle to read from. (internal-only)
2053 """
2069 """
2054 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2070 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2055
2071
2056 def hash(self, text, p1, p2):
2072 def hash(self, text, p1, p2):
2057 """Compute a node hash.
2073 """Compute a node hash.
2058
2074
2059 Available as a function so that subclasses can replace the hash
2075 Available as a function so that subclasses can replace the hash
2060 as needed.
2076 as needed.
2061 """
2077 """
2062 return storageutil.hashrevisionsha1(text, p1, p2)
2078 return storageutil.hashrevisionsha1(text, p1, p2)
2063
2079
2064 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2080 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2065 """Check node hash integrity.
2081 """Check node hash integrity.
2066
2082
2067 Available as a function so that subclasses can extend hash mismatch
2083 Available as a function so that subclasses can extend hash mismatch
2068 behaviors as needed.
2084 behaviors as needed.
2069 """
2085 """
2070 try:
2086 try:
2071 if p1 is None and p2 is None:
2087 if p1 is None and p2 is None:
2072 p1, p2 = self.parents(node)
2088 p1, p2 = self.parents(node)
2073 if node != self.hash(text, p1, p2):
2089 if node != self.hash(text, p1, p2):
2074 # Clear the revision cache on hash failure. The revision cache
2090 # Clear the revision cache on hash failure. The revision cache
2075 # only stores the raw revision and clearing the cache does have
2091 # only stores the raw revision and clearing the cache does have
2076 # the side-effect that we won't have a cache hit when the raw
2092 # the side-effect that we won't have a cache hit when the raw
2077 # revision data is accessed. But this case should be rare and
2093 # revision data is accessed. But this case should be rare and
2078 # it is extra work to teach the cache about the hash
2094 # it is extra work to teach the cache about the hash
2079 # verification state.
2095 # verification state.
2080 if self._revisioncache and self._revisioncache[0] == node:
2096 if self._revisioncache and self._revisioncache[0] == node:
2081 self._revisioncache = None
2097 self._revisioncache = None
2082
2098
2083 revornode = rev
2099 revornode = rev
2084 if revornode is None:
2100 if revornode is None:
2085 revornode = templatefilters.short(hex(node))
2101 revornode = templatefilters.short(hex(node))
2086 raise error.RevlogError(
2102 raise error.RevlogError(
2087 _(b"integrity check failed on %s:%s")
2103 _(b"integrity check failed on %s:%s")
2088 % (self.display_id, pycompat.bytestr(revornode))
2104 % (self.display_id, pycompat.bytestr(revornode))
2089 )
2105 )
2090 except error.RevlogError:
2106 except error.RevlogError:
2091 if self._censorable and storageutil.iscensoredtext(text):
2107 if self._censorable and storageutil.iscensoredtext(text):
2092 raise error.CensoredNodeError(self.display_id, node, text)
2108 raise error.CensoredNodeError(self.display_id, node, text)
2093 raise
2109 raise
2094
2110
2095 def _enforceinlinesize(self, tr):
2111 def _enforceinlinesize(self, tr):
2096 """Check if the revlog is too big for inline and convert if so.
2112 """Check if the revlog is too big for inline and convert if so.
2097
2113
2098 This should be called after revisions are added to the revlog. If the
2114 This should be called after revisions are added to the revlog. If the
2099 revlog has grown too large to be an inline revlog, it will convert it
2115 revlog has grown too large to be an inline revlog, it will convert it
2100 to use multiple index and data files.
2116 to use multiple index and data files.
2101 """
2117 """
2102 tiprev = len(self) - 1
2118 tiprev = len(self) - 1
2103 total_size = self.start(tiprev) + self.length(tiprev)
2119 total_size = self.start(tiprev) + self.length(tiprev)
2104 if not self._inline or total_size < _maxinline:
2120 if not self._inline or total_size < _maxinline:
2105 return
2121 return
2106
2122
2107 troffset = tr.findoffset(self._indexfile)
2123 troffset = tr.findoffset(self._indexfile)
2108 if troffset is None:
2124 if troffset is None:
2109 raise error.RevlogError(
2125 raise error.RevlogError(
2110 _(b"%s not found in the transaction") % self._indexfile
2126 _(b"%s not found in the transaction") % self._indexfile
2111 )
2127 )
2112 trindex = 0
2128 trindex = 0
2113 tr.add(self._datafile, 0)
2129 tr.add(self._datafile, 0)
2114
2130
2115 existing_handles = False
2131 existing_handles = False
2116 if self._writinghandles is not None:
2132 if self._writinghandles is not None:
2117 existing_handles = True
2133 existing_handles = True
2118 fp = self._writinghandles[0]
2134 fp = self._writinghandles[0]
2119 fp.flush()
2135 fp.flush()
2120 fp.close()
2136 fp.close()
2121 # We can't use the cached file handle after close(). So prevent
2137 # We can't use the cached file handle after close(). So prevent
2122 # its usage.
2138 # its usage.
2123 self._writinghandles = None
2139 self._writinghandles = None
2124
2140
2125 new_dfh = self._datafp(b'w+')
2141 new_dfh = self._datafp(b'w+')
2126 new_dfh.truncate(0) # drop any potentially existing data
2142 new_dfh.truncate(0) # drop any potentially existing data
2127 try:
2143 try:
2128 with self._indexfp() as read_ifh:
2144 with self._indexfp() as read_ifh:
2129 for r in self:
2145 for r in self:
2130 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2146 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2131 if troffset <= self.start(r):
2147 if troffset <= self.start(r):
2132 trindex = r
2148 trindex = r
2133 new_dfh.flush()
2149 new_dfh.flush()
2134
2150
2135 with self.__index_new_fp() as fp:
2151 with self.__index_new_fp() as fp:
2136 self._format_flags &= ~FLAG_INLINE_DATA
2152 self._format_flags &= ~FLAG_INLINE_DATA
2137 self._inline = False
2153 self._inline = False
2138 for i in self:
2154 for i in self:
2139 e = self.index.entry_binary(i)
2155 e = self.index.entry_binary(i)
2140 if i == 0 and self._docket is None:
2156 if i == 0 and self._docket is None:
2141 header = self._format_flags | self._format_version
2157 header = self._format_flags | self._format_version
2142 header = self.index.pack_header(header)
2158 header = self.index.pack_header(header)
2143 e = header + e
2159 e = header + e
2144 fp.write(e)
2160 fp.write(e)
2145 if self._docket is not None:
2161 if self._docket is not None:
2146 self._docket.index_end = fp.tell()
2162 self._docket.index_end = fp.tell()
2147 # the temp file replace the real index when we exit the context
2163 # the temp file replace the real index when we exit the context
2148 # manager
2164 # manager
2149
2165
2150 tr.replace(self._indexfile, trindex * self.index.entry_size)
2166 tr.replace(self._indexfile, trindex * self.index.entry_size)
2151 nodemaputil.setup_persistent_nodemap(tr, self)
2167 nodemaputil.setup_persistent_nodemap(tr, self)
2152 self._chunkclear()
2168 self._chunkclear()
2153
2169
2154 if existing_handles:
2170 if existing_handles:
2155 # switched from inline to conventional reopen the index
2171 # switched from inline to conventional reopen the index
2156 ifh = self.__index_write_fp()
2172 ifh = self.__index_write_fp()
2157 self._writinghandles = (ifh, new_dfh)
2173 self._writinghandles = (ifh, new_dfh)
2158 new_dfh = None
2174 new_dfh = None
2159 finally:
2175 finally:
2160 if new_dfh is not None:
2176 if new_dfh is not None:
2161 new_dfh.close()
2177 new_dfh.close()
2162
2178
2163 def _nodeduplicatecallback(self, transaction, node):
2179 def _nodeduplicatecallback(self, transaction, node):
2164 """called when trying to add a node already stored."""
2180 """called when trying to add a node already stored."""
2165
2181
2166 @contextlib.contextmanager
2182 @contextlib.contextmanager
2167 def _writing(self, transaction):
2183 def _writing(self, transaction):
2168 if self._trypending:
2184 if self._trypending:
2169 msg = b'try to write in a `trypending` revlog: %s'
2185 msg = b'try to write in a `trypending` revlog: %s'
2170 msg %= self.display_id
2186 msg %= self.display_id
2171 raise error.ProgrammingError(msg)
2187 raise error.ProgrammingError(msg)
2172 if self._writinghandles is not None:
2188 if self._writinghandles is not None:
2173 yield
2189 yield
2174 else:
2190 else:
2175 r = len(self)
2191 r = len(self)
2176 dsize = 0
2192 dsize = 0
2177 if r:
2193 if r:
2178 dsize = self.end(r - 1)
2194 dsize = self.end(r - 1)
2179 dfh = None
2195 dfh = None
2180 if not self._inline:
2196 if not self._inline:
2181 try:
2197 try:
2182 dfh = self._datafp(b"r+")
2198 dfh = self._datafp(b"r+")
2183 if self._docket is None:
2199 if self._docket is None:
2184 dfh.seek(0, os.SEEK_END)
2200 dfh.seek(0, os.SEEK_END)
2185 else:
2201 else:
2186 dfh.seek(self._docket.data_end, os.SEEK_SET)
2202 dfh.seek(self._docket.data_end, os.SEEK_SET)
2187 except IOError as inst:
2203 except IOError as inst:
2188 if inst.errno != errno.ENOENT:
2204 if inst.errno != errno.ENOENT:
2189 raise
2205 raise
2190 dfh = self._datafp(b"w+")
2206 dfh = self._datafp(b"w+")
2191 transaction.add(self._datafile, dsize)
2207 transaction.add(self._datafile, dsize)
2192 try:
2208 try:
2193 isize = r * self.index.entry_size
2209 isize = r * self.index.entry_size
2194 ifh = self.__index_write_fp()
2210 ifh = self.__index_write_fp()
2195 if self._inline:
2211 if self._inline:
2196 transaction.add(self._indexfile, dsize + isize)
2212 transaction.add(self._indexfile, dsize + isize)
2197 else:
2213 else:
2198 transaction.add(self._indexfile, isize)
2214 transaction.add(self._indexfile, isize)
2199 try:
2215 try:
2200 self._writinghandles = (ifh, dfh)
2216 self._writinghandles = (ifh, dfh)
2201 try:
2217 try:
2202 yield
2218 yield
2203 if self._docket is not None:
2219 if self._docket is not None:
2204 self._write_docket(transaction)
2220 self._write_docket(transaction)
2205 finally:
2221 finally:
2206 self._writinghandles = None
2222 self._writinghandles = None
2207 finally:
2223 finally:
2208 ifh.close()
2224 ifh.close()
2209 finally:
2225 finally:
2210 if dfh is not None:
2226 if dfh is not None:
2211 dfh.close()
2227 dfh.close()
2212
2228
2213 def _write_docket(self, transaction):
2229 def _write_docket(self, transaction):
2214 """write the current docket on disk
2230 """write the current docket on disk
2215
2231
2216 Exist as a method to help changelog to implement transaction logic
2232 Exist as a method to help changelog to implement transaction logic
2217
2233
2218 We could also imagine using the same transaction logic for all revlog
2234 We could also imagine using the same transaction logic for all revlog
2219 since docket are cheap."""
2235 since docket are cheap."""
2220 self._docket.write(transaction)
2236 self._docket.write(transaction)
2221
2237
2222 def addrevision(
2238 def addrevision(
2223 self,
2239 self,
2224 text,
2240 text,
2225 transaction,
2241 transaction,
2226 link,
2242 link,
2227 p1,
2243 p1,
2228 p2,
2244 p2,
2229 cachedelta=None,
2245 cachedelta=None,
2230 node=None,
2246 node=None,
2231 flags=REVIDX_DEFAULT_FLAGS,
2247 flags=REVIDX_DEFAULT_FLAGS,
2232 deltacomputer=None,
2248 deltacomputer=None,
2233 sidedata=None,
2249 sidedata=None,
2234 ):
2250 ):
2235 """add a revision to the log
2251 """add a revision to the log
2236
2252
2237 text - the revision data to add
2253 text - the revision data to add
2238 transaction - the transaction object used for rollback
2254 transaction - the transaction object used for rollback
2239 link - the linkrev data to add
2255 link - the linkrev data to add
2240 p1, p2 - the parent nodeids of the revision
2256 p1, p2 - the parent nodeids of the revision
2241 cachedelta - an optional precomputed delta
2257 cachedelta - an optional precomputed delta
2242 node - nodeid of revision; typically node is not specified, and it is
2258 node - nodeid of revision; typically node is not specified, and it is
2243 computed by default as hash(text, p1, p2), however subclasses might
2259 computed by default as hash(text, p1, p2), however subclasses might
2244 use different hashing method (and override checkhash() in such case)
2260 use different hashing method (and override checkhash() in such case)
2245 flags - the known flags to set on the revision
2261 flags - the known flags to set on the revision
2246 deltacomputer - an optional deltacomputer instance shared between
2262 deltacomputer - an optional deltacomputer instance shared between
2247 multiple calls
2263 multiple calls
2248 """
2264 """
2249 if link == nullrev:
2265 if link == nullrev:
2250 raise error.RevlogError(
2266 raise error.RevlogError(
2251 _(b"attempted to add linkrev -1 to %s") % self.display_id
2267 _(b"attempted to add linkrev -1 to %s") % self.display_id
2252 )
2268 )
2253
2269
2254 if sidedata is None:
2270 if sidedata is None:
2255 sidedata = {}
2271 sidedata = {}
2256 elif sidedata and not self.hassidedata:
2272 elif sidedata and not self.hassidedata:
2257 raise error.ProgrammingError(
2273 raise error.ProgrammingError(
2258 _(b"trying to add sidedata to a revlog who don't support them")
2274 _(b"trying to add sidedata to a revlog who don't support them")
2259 )
2275 )
2260
2276
2261 if flags:
2277 if flags:
2262 node = node or self.hash(text, p1, p2)
2278 node = node or self.hash(text, p1, p2)
2263
2279
2264 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2280 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2265
2281
2266 # If the flag processor modifies the revision data, ignore any provided
2282 # If the flag processor modifies the revision data, ignore any provided
2267 # cachedelta.
2283 # cachedelta.
2268 if rawtext != text:
2284 if rawtext != text:
2269 cachedelta = None
2285 cachedelta = None
2270
2286
2271 if len(rawtext) > _maxentrysize:
2287 if len(rawtext) > _maxentrysize:
2272 raise error.RevlogError(
2288 raise error.RevlogError(
2273 _(
2289 _(
2274 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2290 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2275 )
2291 )
2276 % (self.display_id, len(rawtext))
2292 % (self.display_id, len(rawtext))
2277 )
2293 )
2278
2294
2279 node = node or self.hash(rawtext, p1, p2)
2295 node = node or self.hash(rawtext, p1, p2)
2280 rev = self.index.get_rev(node)
2296 rev = self.index.get_rev(node)
2281 if rev is not None:
2297 if rev is not None:
2282 return rev
2298 return rev
2283
2299
2284 if validatehash:
2300 if validatehash:
2285 self.checkhash(rawtext, node, p1=p1, p2=p2)
2301 self.checkhash(rawtext, node, p1=p1, p2=p2)
2286
2302
2287 return self.addrawrevision(
2303 return self.addrawrevision(
2288 rawtext,
2304 rawtext,
2289 transaction,
2305 transaction,
2290 link,
2306 link,
2291 p1,
2307 p1,
2292 p2,
2308 p2,
2293 node,
2309 node,
2294 flags,
2310 flags,
2295 cachedelta=cachedelta,
2311 cachedelta=cachedelta,
2296 deltacomputer=deltacomputer,
2312 deltacomputer=deltacomputer,
2297 sidedata=sidedata,
2313 sidedata=sidedata,
2298 )
2314 )
2299
2315
2300 def addrawrevision(
2316 def addrawrevision(
2301 self,
2317 self,
2302 rawtext,
2318 rawtext,
2303 transaction,
2319 transaction,
2304 link,
2320 link,
2305 p1,
2321 p1,
2306 p2,
2322 p2,
2307 node,
2323 node,
2308 flags,
2324 flags,
2309 cachedelta=None,
2325 cachedelta=None,
2310 deltacomputer=None,
2326 deltacomputer=None,
2311 sidedata=None,
2327 sidedata=None,
2312 ):
2328 ):
2313 """add a raw revision with known flags, node and parents
2329 """add a raw revision with known flags, node and parents
2314 useful when reusing a revision not stored in this revlog (ex: received
2330 useful when reusing a revision not stored in this revlog (ex: received
2315 over wire, or read from an external bundle).
2331 over wire, or read from an external bundle).
2316 """
2332 """
2317 with self._writing(transaction):
2333 with self._writing(transaction):
2318 return self._addrevision(
2334 return self._addrevision(
2319 node,
2335 node,
2320 rawtext,
2336 rawtext,
2321 transaction,
2337 transaction,
2322 link,
2338 link,
2323 p1,
2339 p1,
2324 p2,
2340 p2,
2325 flags,
2341 flags,
2326 cachedelta,
2342 cachedelta,
2327 deltacomputer=deltacomputer,
2343 deltacomputer=deltacomputer,
2328 sidedata=sidedata,
2344 sidedata=sidedata,
2329 )
2345 )
2330
2346
2331 def compress(self, data):
2347 def compress(self, data):
2332 """Generate a possibly-compressed representation of data."""
2348 """Generate a possibly-compressed representation of data."""
2333 if not data:
2349 if not data:
2334 return b'', data
2350 return b'', data
2335
2351
2336 compressed = self._compressor.compress(data)
2352 compressed = self._compressor.compress(data)
2337
2353
2338 if compressed:
2354 if compressed:
2339 # The revlog compressor added the header in the returned data.
2355 # The revlog compressor added the header in the returned data.
2340 return b'', compressed
2356 return b'', compressed
2341
2357
2342 if data[0:1] == b'\0':
2358 if data[0:1] == b'\0':
2343 return b'', data
2359 return b'', data
2344 return b'u', data
2360 return b'u', data
2345
2361
2346 def decompress(self, data):
2362 def decompress(self, data):
2347 """Decompress a revlog chunk.
2363 """Decompress a revlog chunk.
2348
2364
2349 The chunk is expected to begin with a header identifying the
2365 The chunk is expected to begin with a header identifying the
2350 format type so it can be routed to an appropriate decompressor.
2366 format type so it can be routed to an appropriate decompressor.
2351 """
2367 """
2352 if not data:
2368 if not data:
2353 return data
2369 return data
2354
2370
2355 # Revlogs are read much more frequently than they are written and many
2371 # Revlogs are read much more frequently than they are written and many
2356 # chunks only take microseconds to decompress, so performance is
2372 # chunks only take microseconds to decompress, so performance is
2357 # important here.
2373 # important here.
2358 #
2374 #
2359 # We can make a few assumptions about revlogs:
2375 # We can make a few assumptions about revlogs:
2360 #
2376 #
2361 # 1) the majority of chunks will be compressed (as opposed to inline
2377 # 1) the majority of chunks will be compressed (as opposed to inline
2362 # raw data).
2378 # raw data).
2363 # 2) decompressing *any* data will likely by at least 10x slower than
2379 # 2) decompressing *any* data will likely by at least 10x slower than
2364 # returning raw inline data.
2380 # returning raw inline data.
2365 # 3) we want to prioritize common and officially supported compression
2381 # 3) we want to prioritize common and officially supported compression
2366 # engines
2382 # engines
2367 #
2383 #
2368 # It follows that we want to optimize for "decompress compressed data
2384 # It follows that we want to optimize for "decompress compressed data
2369 # when encoded with common and officially supported compression engines"
2385 # when encoded with common and officially supported compression engines"
2370 # case over "raw data" and "data encoded by less common or non-official
2386 # case over "raw data" and "data encoded by less common or non-official
2371 # compression engines." That is why we have the inline lookup first
2387 # compression engines." That is why we have the inline lookup first
2372 # followed by the compengines lookup.
2388 # followed by the compengines lookup.
2373 #
2389 #
2374 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2390 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2375 # compressed chunks. And this matters for changelog and manifest reads.
2391 # compressed chunks. And this matters for changelog and manifest reads.
2376 t = data[0:1]
2392 t = data[0:1]
2377
2393
2378 if t == b'x':
2394 if t == b'x':
2379 try:
2395 try:
2380 return _zlibdecompress(data)
2396 return _zlibdecompress(data)
2381 except zlib.error as e:
2397 except zlib.error as e:
2382 raise error.RevlogError(
2398 raise error.RevlogError(
2383 _(b'revlog decompress error: %s')
2399 _(b'revlog decompress error: %s')
2384 % stringutil.forcebytestr(e)
2400 % stringutil.forcebytestr(e)
2385 )
2401 )
2386 # '\0' is more common than 'u' so it goes first.
2402 # '\0' is more common than 'u' so it goes first.
2387 elif t == b'\0':
2403 elif t == b'\0':
2388 return data
2404 return data
2389 elif t == b'u':
2405 elif t == b'u':
2390 return util.buffer(data, 1)
2406 return util.buffer(data, 1)
2391
2407
2392 compressor = self._get_decompressor(t)
2408 compressor = self._get_decompressor(t)
2393
2409
2394 return compressor.decompress(data)
2410 return compressor.decompress(data)
2395
2411
2396 def _addrevision(
2412 def _addrevision(
2397 self,
2413 self,
2398 node,
2414 node,
2399 rawtext,
2415 rawtext,
2400 transaction,
2416 transaction,
2401 link,
2417 link,
2402 p1,
2418 p1,
2403 p2,
2419 p2,
2404 flags,
2420 flags,
2405 cachedelta,
2421 cachedelta,
2406 alwayscache=False,
2422 alwayscache=False,
2407 deltacomputer=None,
2423 deltacomputer=None,
2408 sidedata=None,
2424 sidedata=None,
2409 ):
2425 ):
2410 """internal function to add revisions to the log
2426 """internal function to add revisions to the log
2411
2427
2412 see addrevision for argument descriptions.
2428 see addrevision for argument descriptions.
2413
2429
2414 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2430 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2415
2431
2416 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2432 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2417 be used.
2433 be used.
2418
2434
2419 invariants:
2435 invariants:
2420 - rawtext is optional (can be None); if not set, cachedelta must be set.
2436 - rawtext is optional (can be None); if not set, cachedelta must be set.
2421 if both are set, they must correspond to each other.
2437 if both are set, they must correspond to each other.
2422 """
2438 """
2423 if node == self.nullid:
2439 if node == self.nullid:
2424 raise error.RevlogError(
2440 raise error.RevlogError(
2425 _(b"%s: attempt to add null revision") % self.display_id
2441 _(b"%s: attempt to add null revision") % self.display_id
2426 )
2442 )
2427 if (
2443 if (
2428 node == self.nodeconstants.wdirid
2444 node == self.nodeconstants.wdirid
2429 or node in self.nodeconstants.wdirfilenodeids
2445 or node in self.nodeconstants.wdirfilenodeids
2430 ):
2446 ):
2431 raise error.RevlogError(
2447 raise error.RevlogError(
2432 _(b"%s: attempt to add wdir revision") % self.display_id
2448 _(b"%s: attempt to add wdir revision") % self.display_id
2433 )
2449 )
2434 if self._writinghandles is None:
2450 if self._writinghandles is None:
2435 msg = b'adding revision outside `revlog._writing` context'
2451 msg = b'adding revision outside `revlog._writing` context'
2436 raise error.ProgrammingError(msg)
2452 raise error.ProgrammingError(msg)
2437
2453
2438 if self._inline:
2454 if self._inline:
2439 fh = self._writinghandles[0]
2455 fh = self._writinghandles[0]
2440 else:
2456 else:
2441 fh = self._writinghandles[1]
2457 fh = self._writinghandles[1]
2442
2458
2443 btext = [rawtext]
2459 btext = [rawtext]
2444
2460
2445 curr = len(self)
2461 curr = len(self)
2446 prev = curr - 1
2462 prev = curr - 1
2447
2463
2448 offset = self._get_data_offset(prev)
2464 offset = self._get_data_offset(prev)
2449
2465
2450 if self._concurrencychecker:
2466 if self._concurrencychecker:
2451 ifh, dfh = self._writinghandles
2467 ifh, dfh = self._writinghandles
2452 if self._inline:
2468 if self._inline:
2453 # offset is "as if" it were in the .d file, so we need to add on
2469 # offset is "as if" it were in the .d file, so we need to add on
2454 # the size of the entry metadata.
2470 # the size of the entry metadata.
2455 self._concurrencychecker(
2471 self._concurrencychecker(
2456 ifh, self._indexfile, offset + curr * self.index.entry_size
2472 ifh, self._indexfile, offset + curr * self.index.entry_size
2457 )
2473 )
2458 else:
2474 else:
2459 # Entries in the .i are a consistent size.
2475 # Entries in the .i are a consistent size.
2460 self._concurrencychecker(
2476 self._concurrencychecker(
2461 ifh, self._indexfile, curr * self.index.entry_size
2477 ifh, self._indexfile, curr * self.index.entry_size
2462 )
2478 )
2463 self._concurrencychecker(dfh, self._datafile, offset)
2479 self._concurrencychecker(dfh, self._datafile, offset)
2464
2480
2465 p1r, p2r = self.rev(p1), self.rev(p2)
2481 p1r, p2r = self.rev(p1), self.rev(p2)
2466
2482
2467 # full versions are inserted when the needed deltas
2483 # full versions are inserted when the needed deltas
2468 # become comparable to the uncompressed text
2484 # become comparable to the uncompressed text
2469 if rawtext is None:
2485 if rawtext is None:
2470 # need rawtext size, before changed by flag processors, which is
2486 # need rawtext size, before changed by flag processors, which is
2471 # the non-raw size. use revlog explicitly to avoid filelog's extra
2487 # the non-raw size. use revlog explicitly to avoid filelog's extra
2472 # logic that might remove metadata size.
2488 # logic that might remove metadata size.
2473 textlen = mdiff.patchedsize(
2489 textlen = mdiff.patchedsize(
2474 revlog.size(self, cachedelta[0]), cachedelta[1]
2490 revlog.size(self, cachedelta[0]), cachedelta[1]
2475 )
2491 )
2476 else:
2492 else:
2477 textlen = len(rawtext)
2493 textlen = len(rawtext)
2478
2494
2479 if deltacomputer is None:
2495 if deltacomputer is None:
2480 deltacomputer = deltautil.deltacomputer(self)
2496 deltacomputer = deltautil.deltacomputer(self)
2481
2497
2482 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2498 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2483
2499
2484 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2500 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2485
2501
2486 compression_mode = COMP_MODE_INLINE
2502 compression_mode = COMP_MODE_INLINE
2487 if self._docket is not None:
2503 if self._docket is not None:
2488 h, d = deltainfo.data
2504 h, d = deltainfo.data
2489 if not h and not d:
2505 if not h and not d:
2490 # not data to store at all... declare them uncompressed
2506 # not data to store at all... declare them uncompressed
2491 compression_mode = COMP_MODE_PLAIN
2507 compression_mode = COMP_MODE_PLAIN
2492 elif not h and d[0:1] == b'\0':
2508 elif not h:
2493 compression_mode = COMP_MODE_PLAIN
2509 t = d[0:1]
2510 if t == b'\0':
2511 compression_mode = COMP_MODE_PLAIN
2512 elif t == self._docket.default_compression_header:
2513 compression_mode = COMP_MODE_DEFAULT
2494 elif h == b'u':
2514 elif h == b'u':
2495 # we have a more efficient way to declare uncompressed
2515 # we have a more efficient way to declare uncompressed
2496 h = b''
2516 h = b''
2497 compression_mode = COMP_MODE_PLAIN
2517 compression_mode = COMP_MODE_PLAIN
2498 deltainfo = deltautil.drop_u_compression(deltainfo)
2518 deltainfo = deltautil.drop_u_compression(deltainfo)
2499
2519
2500 if sidedata and self.hassidedata:
2520 if sidedata and self.hassidedata:
2501 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2521 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2502 sidedata_offset = offset + deltainfo.deltalen
2522 sidedata_offset = offset + deltainfo.deltalen
2503 else:
2523 else:
2504 serialized_sidedata = b""
2524 serialized_sidedata = b""
2505 # Don't store the offset if the sidedata is empty, that way
2525 # Don't store the offset if the sidedata is empty, that way
2506 # we can easily detect empty sidedata and they will be no different
2526 # we can easily detect empty sidedata and they will be no different
2507 # than ones we manually add.
2527 # than ones we manually add.
2508 sidedata_offset = 0
2528 sidedata_offset = 0
2509
2529
2510 e = (
2530 e = (
2511 offset_type(offset, flags),
2531 offset_type(offset, flags),
2512 deltainfo.deltalen,
2532 deltainfo.deltalen,
2513 textlen,
2533 textlen,
2514 deltainfo.base,
2534 deltainfo.base,
2515 link,
2535 link,
2516 p1r,
2536 p1r,
2517 p2r,
2537 p2r,
2518 node,
2538 node,
2519 sidedata_offset,
2539 sidedata_offset,
2520 len(serialized_sidedata),
2540 len(serialized_sidedata),
2521 compression_mode,
2541 compression_mode,
2522 )
2542 )
2523
2543
2524 self.index.append(e)
2544 self.index.append(e)
2525 entry = self.index.entry_binary(curr)
2545 entry = self.index.entry_binary(curr)
2526 if curr == 0 and self._docket is None:
2546 if curr == 0 and self._docket is None:
2527 header = self._format_flags | self._format_version
2547 header = self._format_flags | self._format_version
2528 header = self.index.pack_header(header)
2548 header = self.index.pack_header(header)
2529 entry = header + entry
2549 entry = header + entry
2530 self._writeentry(
2550 self._writeentry(
2531 transaction,
2551 transaction,
2532 entry,
2552 entry,
2533 deltainfo.data,
2553 deltainfo.data,
2534 link,
2554 link,
2535 offset,
2555 offset,
2536 serialized_sidedata,
2556 serialized_sidedata,
2537 )
2557 )
2538
2558
2539 rawtext = btext[0]
2559 rawtext = btext[0]
2540
2560
2541 if alwayscache and rawtext is None:
2561 if alwayscache and rawtext is None:
2542 rawtext = deltacomputer.buildtext(revinfo, fh)
2562 rawtext = deltacomputer.buildtext(revinfo, fh)
2543
2563
2544 if type(rawtext) == bytes: # only accept immutable objects
2564 if type(rawtext) == bytes: # only accept immutable objects
2545 self._revisioncache = (node, curr, rawtext)
2565 self._revisioncache = (node, curr, rawtext)
2546 self._chainbasecache[curr] = deltainfo.chainbase
2566 self._chainbasecache[curr] = deltainfo.chainbase
2547 return curr
2567 return curr
2548
2568
2549 def _get_data_offset(self, prev):
2569 def _get_data_offset(self, prev):
2550 """Returns the current offset in the (in-transaction) data file.
2570 """Returns the current offset in the (in-transaction) data file.
2551 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2571 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2552 file to store that information: since sidedata can be rewritten to the
2572 file to store that information: since sidedata can be rewritten to the
2553 end of the data file within a transaction, you can have cases where, for
2573 end of the data file within a transaction, you can have cases where, for
2554 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2574 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2555 to `n - 1`'s sidedata being written after `n`'s data.
2575 to `n - 1`'s sidedata being written after `n`'s data.
2556
2576
2557 TODO cache this in a docket file before getting out of experimental."""
2577 TODO cache this in a docket file before getting out of experimental."""
2558 if self._docket is None:
2578 if self._docket is None:
2559 return self.end(prev)
2579 return self.end(prev)
2560 else:
2580 else:
2561 return self._docket.data_end
2581 return self._docket.data_end
2562
2582
2563 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2583 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2564 # Files opened in a+ mode have inconsistent behavior on various
2584 # Files opened in a+ mode have inconsistent behavior on various
2565 # platforms. Windows requires that a file positioning call be made
2585 # platforms. Windows requires that a file positioning call be made
2566 # when the file handle transitions between reads and writes. See
2586 # when the file handle transitions between reads and writes. See
2567 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2587 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2568 # platforms, Python or the platform itself can be buggy. Some versions
2588 # platforms, Python or the platform itself can be buggy. Some versions
2569 # of Solaris have been observed to not append at the end of the file
2589 # of Solaris have been observed to not append at the end of the file
2570 # if the file was seeked to before the end. See issue4943 for more.
2590 # if the file was seeked to before the end. See issue4943 for more.
2571 #
2591 #
2572 # We work around this issue by inserting a seek() before writing.
2592 # We work around this issue by inserting a seek() before writing.
2573 # Note: This is likely not necessary on Python 3. However, because
2593 # Note: This is likely not necessary on Python 3. However, because
2574 # the file handle is reused for reads and may be seeked there, we need
2594 # the file handle is reused for reads and may be seeked there, we need
2575 # to be careful before changing this.
2595 # to be careful before changing this.
2576 if self._writinghandles is None:
2596 if self._writinghandles is None:
2577 msg = b'adding revision outside `revlog._writing` context'
2597 msg = b'adding revision outside `revlog._writing` context'
2578 raise error.ProgrammingError(msg)
2598 raise error.ProgrammingError(msg)
2579 ifh, dfh = self._writinghandles
2599 ifh, dfh = self._writinghandles
2580 if self._docket is None:
2600 if self._docket is None:
2581 ifh.seek(0, os.SEEK_END)
2601 ifh.seek(0, os.SEEK_END)
2582 else:
2602 else:
2583 ifh.seek(self._docket.index_end, os.SEEK_SET)
2603 ifh.seek(self._docket.index_end, os.SEEK_SET)
2584 if dfh:
2604 if dfh:
2585 if self._docket is None:
2605 if self._docket is None:
2586 dfh.seek(0, os.SEEK_END)
2606 dfh.seek(0, os.SEEK_END)
2587 else:
2607 else:
2588 dfh.seek(self._docket.data_end, os.SEEK_SET)
2608 dfh.seek(self._docket.data_end, os.SEEK_SET)
2589
2609
2590 curr = len(self) - 1
2610 curr = len(self) - 1
2591 if not self._inline:
2611 if not self._inline:
2592 transaction.add(self._datafile, offset)
2612 transaction.add(self._datafile, offset)
2593 transaction.add(self._indexfile, curr * len(entry))
2613 transaction.add(self._indexfile, curr * len(entry))
2594 if data[0]:
2614 if data[0]:
2595 dfh.write(data[0])
2615 dfh.write(data[0])
2596 dfh.write(data[1])
2616 dfh.write(data[1])
2597 if sidedata:
2617 if sidedata:
2598 dfh.write(sidedata)
2618 dfh.write(sidedata)
2599 ifh.write(entry)
2619 ifh.write(entry)
2600 else:
2620 else:
2601 offset += curr * self.index.entry_size
2621 offset += curr * self.index.entry_size
2602 transaction.add(self._indexfile, offset)
2622 transaction.add(self._indexfile, offset)
2603 ifh.write(entry)
2623 ifh.write(entry)
2604 ifh.write(data[0])
2624 ifh.write(data[0])
2605 ifh.write(data[1])
2625 ifh.write(data[1])
2606 if sidedata:
2626 if sidedata:
2607 ifh.write(sidedata)
2627 ifh.write(sidedata)
2608 self._enforceinlinesize(transaction)
2628 self._enforceinlinesize(transaction)
2609 if self._docket is not None:
2629 if self._docket is not None:
2610 self._docket.index_end = self._writinghandles[0].tell()
2630 self._docket.index_end = self._writinghandles[0].tell()
2611 self._docket.data_end = self._writinghandles[1].tell()
2631 self._docket.data_end = self._writinghandles[1].tell()
2612
2632
2613 nodemaputil.setup_persistent_nodemap(transaction, self)
2633 nodemaputil.setup_persistent_nodemap(transaction, self)
2614
2634
2615 def addgroup(
2635 def addgroup(
2616 self,
2636 self,
2617 deltas,
2637 deltas,
2618 linkmapper,
2638 linkmapper,
2619 transaction,
2639 transaction,
2620 alwayscache=False,
2640 alwayscache=False,
2621 addrevisioncb=None,
2641 addrevisioncb=None,
2622 duplicaterevisioncb=None,
2642 duplicaterevisioncb=None,
2623 ):
2643 ):
2624 """
2644 """
2625 add a delta group
2645 add a delta group
2626
2646
2627 given a set of deltas, add them to the revision log. the
2647 given a set of deltas, add them to the revision log. the
2628 first delta is against its parent, which should be in our
2648 first delta is against its parent, which should be in our
2629 log, the rest are against the previous delta.
2649 log, the rest are against the previous delta.
2630
2650
2631 If ``addrevisioncb`` is defined, it will be called with arguments of
2651 If ``addrevisioncb`` is defined, it will be called with arguments of
2632 this revlog and the node that was added.
2652 this revlog and the node that was added.
2633 """
2653 """
2634
2654
2635 if self._adding_group:
2655 if self._adding_group:
2636 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2656 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2637
2657
2638 self._adding_group = True
2658 self._adding_group = True
2639 empty = True
2659 empty = True
2640 try:
2660 try:
2641 with self._writing(transaction):
2661 with self._writing(transaction):
2642 deltacomputer = deltautil.deltacomputer(self)
2662 deltacomputer = deltautil.deltacomputer(self)
2643 # loop through our set of deltas
2663 # loop through our set of deltas
2644 for data in deltas:
2664 for data in deltas:
2645 (
2665 (
2646 node,
2666 node,
2647 p1,
2667 p1,
2648 p2,
2668 p2,
2649 linknode,
2669 linknode,
2650 deltabase,
2670 deltabase,
2651 delta,
2671 delta,
2652 flags,
2672 flags,
2653 sidedata,
2673 sidedata,
2654 ) = data
2674 ) = data
2655 link = linkmapper(linknode)
2675 link = linkmapper(linknode)
2656 flags = flags or REVIDX_DEFAULT_FLAGS
2676 flags = flags or REVIDX_DEFAULT_FLAGS
2657
2677
2658 rev = self.index.get_rev(node)
2678 rev = self.index.get_rev(node)
2659 if rev is not None:
2679 if rev is not None:
2660 # this can happen if two branches make the same change
2680 # this can happen if two branches make the same change
2661 self._nodeduplicatecallback(transaction, rev)
2681 self._nodeduplicatecallback(transaction, rev)
2662 if duplicaterevisioncb:
2682 if duplicaterevisioncb:
2663 duplicaterevisioncb(self, rev)
2683 duplicaterevisioncb(self, rev)
2664 empty = False
2684 empty = False
2665 continue
2685 continue
2666
2686
2667 for p in (p1, p2):
2687 for p in (p1, p2):
2668 if not self.index.has_node(p):
2688 if not self.index.has_node(p):
2669 raise error.LookupError(
2689 raise error.LookupError(
2670 p, self.radix, _(b'unknown parent')
2690 p, self.radix, _(b'unknown parent')
2671 )
2691 )
2672
2692
2673 if not self.index.has_node(deltabase):
2693 if not self.index.has_node(deltabase):
2674 raise error.LookupError(
2694 raise error.LookupError(
2675 deltabase, self.display_id, _(b'unknown delta base')
2695 deltabase, self.display_id, _(b'unknown delta base')
2676 )
2696 )
2677
2697
2678 baserev = self.rev(deltabase)
2698 baserev = self.rev(deltabase)
2679
2699
2680 if baserev != nullrev and self.iscensored(baserev):
2700 if baserev != nullrev and self.iscensored(baserev):
2681 # if base is censored, delta must be full replacement in a
2701 # if base is censored, delta must be full replacement in a
2682 # single patch operation
2702 # single patch operation
2683 hlen = struct.calcsize(b">lll")
2703 hlen = struct.calcsize(b">lll")
2684 oldlen = self.rawsize(baserev)
2704 oldlen = self.rawsize(baserev)
2685 newlen = len(delta) - hlen
2705 newlen = len(delta) - hlen
2686 if delta[:hlen] != mdiff.replacediffheader(
2706 if delta[:hlen] != mdiff.replacediffheader(
2687 oldlen, newlen
2707 oldlen, newlen
2688 ):
2708 ):
2689 raise error.CensoredBaseError(
2709 raise error.CensoredBaseError(
2690 self.display_id, self.node(baserev)
2710 self.display_id, self.node(baserev)
2691 )
2711 )
2692
2712
2693 if not flags and self._peek_iscensored(baserev, delta):
2713 if not flags and self._peek_iscensored(baserev, delta):
2694 flags |= REVIDX_ISCENSORED
2714 flags |= REVIDX_ISCENSORED
2695
2715
2696 # We assume consumers of addrevisioncb will want to retrieve
2716 # We assume consumers of addrevisioncb will want to retrieve
2697 # the added revision, which will require a call to
2717 # the added revision, which will require a call to
2698 # revision(). revision() will fast path if there is a cache
2718 # revision(). revision() will fast path if there is a cache
2699 # hit. So, we tell _addrevision() to always cache in this case.
2719 # hit. So, we tell _addrevision() to always cache in this case.
2700 # We're only using addgroup() in the context of changegroup
2720 # We're only using addgroup() in the context of changegroup
2701 # generation so the revision data can always be handled as raw
2721 # generation so the revision data can always be handled as raw
2702 # by the flagprocessor.
2722 # by the flagprocessor.
2703 rev = self._addrevision(
2723 rev = self._addrevision(
2704 node,
2724 node,
2705 None,
2725 None,
2706 transaction,
2726 transaction,
2707 link,
2727 link,
2708 p1,
2728 p1,
2709 p2,
2729 p2,
2710 flags,
2730 flags,
2711 (baserev, delta),
2731 (baserev, delta),
2712 alwayscache=alwayscache,
2732 alwayscache=alwayscache,
2713 deltacomputer=deltacomputer,
2733 deltacomputer=deltacomputer,
2714 sidedata=sidedata,
2734 sidedata=sidedata,
2715 )
2735 )
2716
2736
2717 if addrevisioncb:
2737 if addrevisioncb:
2718 addrevisioncb(self, rev)
2738 addrevisioncb(self, rev)
2719 empty = False
2739 empty = False
2720 finally:
2740 finally:
2721 self._adding_group = False
2741 self._adding_group = False
2722 return not empty
2742 return not empty
2723
2743
2724 def iscensored(self, rev):
2744 def iscensored(self, rev):
2725 """Check if a file revision is censored."""
2745 """Check if a file revision is censored."""
2726 if not self._censorable:
2746 if not self._censorable:
2727 return False
2747 return False
2728
2748
2729 return self.flags(rev) & REVIDX_ISCENSORED
2749 return self.flags(rev) & REVIDX_ISCENSORED
2730
2750
2731 def _peek_iscensored(self, baserev, delta):
2751 def _peek_iscensored(self, baserev, delta):
2732 """Quickly check if a delta produces a censored revision."""
2752 """Quickly check if a delta produces a censored revision."""
2733 if not self._censorable:
2753 if not self._censorable:
2734 return False
2754 return False
2735
2755
2736 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2756 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2737
2757
2738 def getstrippoint(self, minlink):
2758 def getstrippoint(self, minlink):
2739 """find the minimum rev that must be stripped to strip the linkrev
2759 """find the minimum rev that must be stripped to strip the linkrev
2740
2760
2741 Returns a tuple containing the minimum rev and a set of all revs that
2761 Returns a tuple containing the minimum rev and a set of all revs that
2742 have linkrevs that will be broken by this strip.
2762 have linkrevs that will be broken by this strip.
2743 """
2763 """
2744 return storageutil.resolvestripinfo(
2764 return storageutil.resolvestripinfo(
2745 minlink,
2765 minlink,
2746 len(self) - 1,
2766 len(self) - 1,
2747 self.headrevs(),
2767 self.headrevs(),
2748 self.linkrev,
2768 self.linkrev,
2749 self.parentrevs,
2769 self.parentrevs,
2750 )
2770 )
2751
2771
2752 def strip(self, minlink, transaction):
2772 def strip(self, minlink, transaction):
2753 """truncate the revlog on the first revision with a linkrev >= minlink
2773 """truncate the revlog on the first revision with a linkrev >= minlink
2754
2774
2755 This function is called when we're stripping revision minlink and
2775 This function is called when we're stripping revision minlink and
2756 its descendants from the repository.
2776 its descendants from the repository.
2757
2777
2758 We have to remove all revisions with linkrev >= minlink, because
2778 We have to remove all revisions with linkrev >= minlink, because
2759 the equivalent changelog revisions will be renumbered after the
2779 the equivalent changelog revisions will be renumbered after the
2760 strip.
2780 strip.
2761
2781
2762 So we truncate the revlog on the first of these revisions, and
2782 So we truncate the revlog on the first of these revisions, and
2763 trust that the caller has saved the revisions that shouldn't be
2783 trust that the caller has saved the revisions that shouldn't be
2764 removed and that it'll re-add them after this truncation.
2784 removed and that it'll re-add them after this truncation.
2765 """
2785 """
2766 if len(self) == 0:
2786 if len(self) == 0:
2767 return
2787 return
2768
2788
2769 rev, _ = self.getstrippoint(minlink)
2789 rev, _ = self.getstrippoint(minlink)
2770 if rev == len(self):
2790 if rev == len(self):
2771 return
2791 return
2772
2792
2773 # first truncate the files on disk
2793 # first truncate the files on disk
2774 data_end = self.start(rev)
2794 data_end = self.start(rev)
2775 if not self._inline:
2795 if not self._inline:
2776 transaction.add(self._datafile, data_end)
2796 transaction.add(self._datafile, data_end)
2777 end = rev * self.index.entry_size
2797 end = rev * self.index.entry_size
2778 else:
2798 else:
2779 end = data_end + (rev * self.index.entry_size)
2799 end = data_end + (rev * self.index.entry_size)
2780
2800
2781 transaction.add(self._indexfile, end)
2801 transaction.add(self._indexfile, end)
2782 if self._docket is not None:
2802 if self._docket is not None:
2783 # XXX we could, leverage the docket while stripping. However it is
2803 # XXX we could, leverage the docket while stripping. However it is
2784 # not powerfull enough at the time of this comment
2804 # not powerfull enough at the time of this comment
2785 self._docket.index_end = end
2805 self._docket.index_end = end
2786 self._docket.data_end = data_end
2806 self._docket.data_end = data_end
2787 self._docket.write(transaction, stripping=True)
2807 self._docket.write(transaction, stripping=True)
2788
2808
2789 # then reset internal state in memory to forget those revisions
2809 # then reset internal state in memory to forget those revisions
2790 self._revisioncache = None
2810 self._revisioncache = None
2791 self._chaininfocache = util.lrucachedict(500)
2811 self._chaininfocache = util.lrucachedict(500)
2792 self._chunkclear()
2812 self._chunkclear()
2793
2813
2794 del self.index[rev:-1]
2814 del self.index[rev:-1]
2795
2815
2796 def checksize(self):
2816 def checksize(self):
2797 """Check size of index and data files
2817 """Check size of index and data files
2798
2818
2799 return a (dd, di) tuple.
2819 return a (dd, di) tuple.
2800 - dd: extra bytes for the "data" file
2820 - dd: extra bytes for the "data" file
2801 - di: extra bytes for the "index" file
2821 - di: extra bytes for the "index" file
2802
2822
2803 A healthy revlog will return (0, 0).
2823 A healthy revlog will return (0, 0).
2804 """
2824 """
2805 expected = 0
2825 expected = 0
2806 if len(self):
2826 if len(self):
2807 expected = max(0, self.end(len(self) - 1))
2827 expected = max(0, self.end(len(self) - 1))
2808
2828
2809 try:
2829 try:
2810 with self._datafp() as f:
2830 with self._datafp() as f:
2811 f.seek(0, io.SEEK_END)
2831 f.seek(0, io.SEEK_END)
2812 actual = f.tell()
2832 actual = f.tell()
2813 dd = actual - expected
2833 dd = actual - expected
2814 except IOError as inst:
2834 except IOError as inst:
2815 if inst.errno != errno.ENOENT:
2835 if inst.errno != errno.ENOENT:
2816 raise
2836 raise
2817 dd = 0
2837 dd = 0
2818
2838
2819 try:
2839 try:
2820 f = self.opener(self._indexfile)
2840 f = self.opener(self._indexfile)
2821 f.seek(0, io.SEEK_END)
2841 f.seek(0, io.SEEK_END)
2822 actual = f.tell()
2842 actual = f.tell()
2823 f.close()
2843 f.close()
2824 s = self.index.entry_size
2844 s = self.index.entry_size
2825 i = max(0, actual // s)
2845 i = max(0, actual // s)
2826 di = actual - (i * s)
2846 di = actual - (i * s)
2827 if self._inline:
2847 if self._inline:
2828 databytes = 0
2848 databytes = 0
2829 for r in self:
2849 for r in self:
2830 databytes += max(0, self.length(r))
2850 databytes += max(0, self.length(r))
2831 dd = 0
2851 dd = 0
2832 di = actual - len(self) * s - databytes
2852 di = actual - len(self) * s - databytes
2833 except IOError as inst:
2853 except IOError as inst:
2834 if inst.errno != errno.ENOENT:
2854 if inst.errno != errno.ENOENT:
2835 raise
2855 raise
2836 di = 0
2856 di = 0
2837
2857
2838 return (dd, di)
2858 return (dd, di)
2839
2859
2840 def files(self):
2860 def files(self):
2841 res = [self._indexfile]
2861 res = [self._indexfile]
2842 if not self._inline:
2862 if not self._inline:
2843 res.append(self._datafile)
2863 res.append(self._datafile)
2844 return res
2864 return res
2845
2865
2846 def emitrevisions(
2866 def emitrevisions(
2847 self,
2867 self,
2848 nodes,
2868 nodes,
2849 nodesorder=None,
2869 nodesorder=None,
2850 revisiondata=False,
2870 revisiondata=False,
2851 assumehaveparentrevisions=False,
2871 assumehaveparentrevisions=False,
2852 deltamode=repository.CG_DELTAMODE_STD,
2872 deltamode=repository.CG_DELTAMODE_STD,
2853 sidedata_helpers=None,
2873 sidedata_helpers=None,
2854 ):
2874 ):
2855 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2875 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2856 raise error.ProgrammingError(
2876 raise error.ProgrammingError(
2857 b'unhandled value for nodesorder: %s' % nodesorder
2877 b'unhandled value for nodesorder: %s' % nodesorder
2858 )
2878 )
2859
2879
2860 if nodesorder is None and not self._generaldelta:
2880 if nodesorder is None and not self._generaldelta:
2861 nodesorder = b'storage'
2881 nodesorder = b'storage'
2862
2882
2863 if (
2883 if (
2864 not self._storedeltachains
2884 not self._storedeltachains
2865 and deltamode != repository.CG_DELTAMODE_PREV
2885 and deltamode != repository.CG_DELTAMODE_PREV
2866 ):
2886 ):
2867 deltamode = repository.CG_DELTAMODE_FULL
2887 deltamode = repository.CG_DELTAMODE_FULL
2868
2888
2869 return storageutil.emitrevisions(
2889 return storageutil.emitrevisions(
2870 self,
2890 self,
2871 nodes,
2891 nodes,
2872 nodesorder,
2892 nodesorder,
2873 revlogrevisiondelta,
2893 revlogrevisiondelta,
2874 deltaparentfn=self.deltaparent,
2894 deltaparentfn=self.deltaparent,
2875 candeltafn=self.candelta,
2895 candeltafn=self.candelta,
2876 rawsizefn=self.rawsize,
2896 rawsizefn=self.rawsize,
2877 revdifffn=self.revdiff,
2897 revdifffn=self.revdiff,
2878 flagsfn=self.flags,
2898 flagsfn=self.flags,
2879 deltamode=deltamode,
2899 deltamode=deltamode,
2880 revisiondata=revisiondata,
2900 revisiondata=revisiondata,
2881 assumehaveparentrevisions=assumehaveparentrevisions,
2901 assumehaveparentrevisions=assumehaveparentrevisions,
2882 sidedata_helpers=sidedata_helpers,
2902 sidedata_helpers=sidedata_helpers,
2883 )
2903 )
2884
2904
2885 DELTAREUSEALWAYS = b'always'
2905 DELTAREUSEALWAYS = b'always'
2886 DELTAREUSESAMEREVS = b'samerevs'
2906 DELTAREUSESAMEREVS = b'samerevs'
2887 DELTAREUSENEVER = b'never'
2907 DELTAREUSENEVER = b'never'
2888
2908
2889 DELTAREUSEFULLADD = b'fulladd'
2909 DELTAREUSEFULLADD = b'fulladd'
2890
2910
2891 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2911 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2892
2912
2893 def clone(
2913 def clone(
2894 self,
2914 self,
2895 tr,
2915 tr,
2896 destrevlog,
2916 destrevlog,
2897 addrevisioncb=None,
2917 addrevisioncb=None,
2898 deltareuse=DELTAREUSESAMEREVS,
2918 deltareuse=DELTAREUSESAMEREVS,
2899 forcedeltabothparents=None,
2919 forcedeltabothparents=None,
2900 sidedata_helpers=None,
2920 sidedata_helpers=None,
2901 ):
2921 ):
2902 """Copy this revlog to another, possibly with format changes.
2922 """Copy this revlog to another, possibly with format changes.
2903
2923
2904 The destination revlog will contain the same revisions and nodes.
2924 The destination revlog will contain the same revisions and nodes.
2905 However, it may not be bit-for-bit identical due to e.g. delta encoding
2925 However, it may not be bit-for-bit identical due to e.g. delta encoding
2906 differences.
2926 differences.
2907
2927
2908 The ``deltareuse`` argument control how deltas from the existing revlog
2928 The ``deltareuse`` argument control how deltas from the existing revlog
2909 are preserved in the destination revlog. The argument can have the
2929 are preserved in the destination revlog. The argument can have the
2910 following values:
2930 following values:
2911
2931
2912 DELTAREUSEALWAYS
2932 DELTAREUSEALWAYS
2913 Deltas will always be reused (if possible), even if the destination
2933 Deltas will always be reused (if possible), even if the destination
2914 revlog would not select the same revisions for the delta. This is the
2934 revlog would not select the same revisions for the delta. This is the
2915 fastest mode of operation.
2935 fastest mode of operation.
2916 DELTAREUSESAMEREVS
2936 DELTAREUSESAMEREVS
2917 Deltas will be reused if the destination revlog would pick the same
2937 Deltas will be reused if the destination revlog would pick the same
2918 revisions for the delta. This mode strikes a balance between speed
2938 revisions for the delta. This mode strikes a balance between speed
2919 and optimization.
2939 and optimization.
2920 DELTAREUSENEVER
2940 DELTAREUSENEVER
2921 Deltas will never be reused. This is the slowest mode of execution.
2941 Deltas will never be reused. This is the slowest mode of execution.
2922 This mode can be used to recompute deltas (e.g. if the diff/delta
2942 This mode can be used to recompute deltas (e.g. if the diff/delta
2923 algorithm changes).
2943 algorithm changes).
2924 DELTAREUSEFULLADD
2944 DELTAREUSEFULLADD
2925 Revision will be re-added as if their were new content. This is
2945 Revision will be re-added as if their were new content. This is
2926 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2946 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2927 eg: large file detection and handling.
2947 eg: large file detection and handling.
2928
2948
2929 Delta computation can be slow, so the choice of delta reuse policy can
2949 Delta computation can be slow, so the choice of delta reuse policy can
2930 significantly affect run time.
2950 significantly affect run time.
2931
2951
2932 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2952 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2933 two extremes. Deltas will be reused if they are appropriate. But if the
2953 two extremes. Deltas will be reused if they are appropriate. But if the
2934 delta could choose a better revision, it will do so. This means if you
2954 delta could choose a better revision, it will do so. This means if you
2935 are converting a non-generaldelta revlog to a generaldelta revlog,
2955 are converting a non-generaldelta revlog to a generaldelta revlog,
2936 deltas will be recomputed if the delta's parent isn't a parent of the
2956 deltas will be recomputed if the delta's parent isn't a parent of the
2937 revision.
2957 revision.
2938
2958
2939 In addition to the delta policy, the ``forcedeltabothparents``
2959 In addition to the delta policy, the ``forcedeltabothparents``
2940 argument controls whether to force compute deltas against both parents
2960 argument controls whether to force compute deltas against both parents
2941 for merges. By default, the current default is used.
2961 for merges. By default, the current default is used.
2942
2962
2943 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2963 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2944 `sidedata_helpers`.
2964 `sidedata_helpers`.
2945 """
2965 """
2946 if deltareuse not in self.DELTAREUSEALL:
2966 if deltareuse not in self.DELTAREUSEALL:
2947 raise ValueError(
2967 raise ValueError(
2948 _(b'value for deltareuse invalid: %s') % deltareuse
2968 _(b'value for deltareuse invalid: %s') % deltareuse
2949 )
2969 )
2950
2970
2951 if len(destrevlog):
2971 if len(destrevlog):
2952 raise ValueError(_(b'destination revlog is not empty'))
2972 raise ValueError(_(b'destination revlog is not empty'))
2953
2973
2954 if getattr(self, 'filteredrevs', None):
2974 if getattr(self, 'filteredrevs', None):
2955 raise ValueError(_(b'source revlog has filtered revisions'))
2975 raise ValueError(_(b'source revlog has filtered revisions'))
2956 if getattr(destrevlog, 'filteredrevs', None):
2976 if getattr(destrevlog, 'filteredrevs', None):
2957 raise ValueError(_(b'destination revlog has filtered revisions'))
2977 raise ValueError(_(b'destination revlog has filtered revisions'))
2958
2978
2959 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2979 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2960 # if possible.
2980 # if possible.
2961 oldlazydelta = destrevlog._lazydelta
2981 oldlazydelta = destrevlog._lazydelta
2962 oldlazydeltabase = destrevlog._lazydeltabase
2982 oldlazydeltabase = destrevlog._lazydeltabase
2963 oldamd = destrevlog._deltabothparents
2983 oldamd = destrevlog._deltabothparents
2964
2984
2965 try:
2985 try:
2966 if deltareuse == self.DELTAREUSEALWAYS:
2986 if deltareuse == self.DELTAREUSEALWAYS:
2967 destrevlog._lazydeltabase = True
2987 destrevlog._lazydeltabase = True
2968 destrevlog._lazydelta = True
2988 destrevlog._lazydelta = True
2969 elif deltareuse == self.DELTAREUSESAMEREVS:
2989 elif deltareuse == self.DELTAREUSESAMEREVS:
2970 destrevlog._lazydeltabase = False
2990 destrevlog._lazydeltabase = False
2971 destrevlog._lazydelta = True
2991 destrevlog._lazydelta = True
2972 elif deltareuse == self.DELTAREUSENEVER:
2992 elif deltareuse == self.DELTAREUSENEVER:
2973 destrevlog._lazydeltabase = False
2993 destrevlog._lazydeltabase = False
2974 destrevlog._lazydelta = False
2994 destrevlog._lazydelta = False
2975
2995
2976 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2996 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2977
2997
2978 self._clone(
2998 self._clone(
2979 tr,
2999 tr,
2980 destrevlog,
3000 destrevlog,
2981 addrevisioncb,
3001 addrevisioncb,
2982 deltareuse,
3002 deltareuse,
2983 forcedeltabothparents,
3003 forcedeltabothparents,
2984 sidedata_helpers,
3004 sidedata_helpers,
2985 )
3005 )
2986
3006
2987 finally:
3007 finally:
2988 destrevlog._lazydelta = oldlazydelta
3008 destrevlog._lazydelta = oldlazydelta
2989 destrevlog._lazydeltabase = oldlazydeltabase
3009 destrevlog._lazydeltabase = oldlazydeltabase
2990 destrevlog._deltabothparents = oldamd
3010 destrevlog._deltabothparents = oldamd
2991
3011
2992 def _clone(
3012 def _clone(
2993 self,
3013 self,
2994 tr,
3014 tr,
2995 destrevlog,
3015 destrevlog,
2996 addrevisioncb,
3016 addrevisioncb,
2997 deltareuse,
3017 deltareuse,
2998 forcedeltabothparents,
3018 forcedeltabothparents,
2999 sidedata_helpers,
3019 sidedata_helpers,
3000 ):
3020 ):
3001 """perform the core duty of `revlog.clone` after parameter processing"""
3021 """perform the core duty of `revlog.clone` after parameter processing"""
3002 deltacomputer = deltautil.deltacomputer(destrevlog)
3022 deltacomputer = deltautil.deltacomputer(destrevlog)
3003 index = self.index
3023 index = self.index
3004 for rev in self:
3024 for rev in self:
3005 entry = index[rev]
3025 entry = index[rev]
3006
3026
3007 # Some classes override linkrev to take filtered revs into
3027 # Some classes override linkrev to take filtered revs into
3008 # account. Use raw entry from index.
3028 # account. Use raw entry from index.
3009 flags = entry[0] & 0xFFFF
3029 flags = entry[0] & 0xFFFF
3010 linkrev = entry[4]
3030 linkrev = entry[4]
3011 p1 = index[entry[5]][7]
3031 p1 = index[entry[5]][7]
3012 p2 = index[entry[6]][7]
3032 p2 = index[entry[6]][7]
3013 node = entry[7]
3033 node = entry[7]
3014
3034
3015 # (Possibly) reuse the delta from the revlog if allowed and
3035 # (Possibly) reuse the delta from the revlog if allowed and
3016 # the revlog chunk is a delta.
3036 # the revlog chunk is a delta.
3017 cachedelta = None
3037 cachedelta = None
3018 rawtext = None
3038 rawtext = None
3019 if deltareuse == self.DELTAREUSEFULLADD:
3039 if deltareuse == self.DELTAREUSEFULLADD:
3020 text, sidedata = self._revisiondata(rev)
3040 text, sidedata = self._revisiondata(rev)
3021
3041
3022 if sidedata_helpers is not None:
3042 if sidedata_helpers is not None:
3023 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3043 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3024 self, sidedata_helpers, sidedata, rev
3044 self, sidedata_helpers, sidedata, rev
3025 )
3045 )
3026 flags = flags | new_flags[0] & ~new_flags[1]
3046 flags = flags | new_flags[0] & ~new_flags[1]
3027
3047
3028 destrevlog.addrevision(
3048 destrevlog.addrevision(
3029 text,
3049 text,
3030 tr,
3050 tr,
3031 linkrev,
3051 linkrev,
3032 p1,
3052 p1,
3033 p2,
3053 p2,
3034 cachedelta=cachedelta,
3054 cachedelta=cachedelta,
3035 node=node,
3055 node=node,
3036 flags=flags,
3056 flags=flags,
3037 deltacomputer=deltacomputer,
3057 deltacomputer=deltacomputer,
3038 sidedata=sidedata,
3058 sidedata=sidedata,
3039 )
3059 )
3040 else:
3060 else:
3041 if destrevlog._lazydelta:
3061 if destrevlog._lazydelta:
3042 dp = self.deltaparent(rev)
3062 dp = self.deltaparent(rev)
3043 if dp != nullrev:
3063 if dp != nullrev:
3044 cachedelta = (dp, bytes(self._chunk(rev)))
3064 cachedelta = (dp, bytes(self._chunk(rev)))
3045
3065
3046 sidedata = None
3066 sidedata = None
3047 if not cachedelta:
3067 if not cachedelta:
3048 rawtext, sidedata = self._revisiondata(rev)
3068 rawtext, sidedata = self._revisiondata(rev)
3049 if sidedata is None:
3069 if sidedata is None:
3050 sidedata = self.sidedata(rev)
3070 sidedata = self.sidedata(rev)
3051
3071
3052 if sidedata_helpers is not None:
3072 if sidedata_helpers is not None:
3053 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3073 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3054 self, sidedata_helpers, sidedata, rev
3074 self, sidedata_helpers, sidedata, rev
3055 )
3075 )
3056 flags = flags | new_flags[0] & ~new_flags[1]
3076 flags = flags | new_flags[0] & ~new_flags[1]
3057
3077
3058 with destrevlog._writing(tr):
3078 with destrevlog._writing(tr):
3059 destrevlog._addrevision(
3079 destrevlog._addrevision(
3060 node,
3080 node,
3061 rawtext,
3081 rawtext,
3062 tr,
3082 tr,
3063 linkrev,
3083 linkrev,
3064 p1,
3084 p1,
3065 p2,
3085 p2,
3066 flags,
3086 flags,
3067 cachedelta,
3087 cachedelta,
3068 deltacomputer=deltacomputer,
3088 deltacomputer=deltacomputer,
3069 sidedata=sidedata,
3089 sidedata=sidedata,
3070 )
3090 )
3071
3091
3072 if addrevisioncb:
3092 if addrevisioncb:
3073 addrevisioncb(self, rev, node)
3093 addrevisioncb(self, rev, node)
3074
3094
3075 def censorrevision(self, tr, censornode, tombstone=b''):
3095 def censorrevision(self, tr, censornode, tombstone=b''):
3076 if self._format_version == REVLOGV0:
3096 if self._format_version == REVLOGV0:
3077 raise error.RevlogError(
3097 raise error.RevlogError(
3078 _(b'cannot censor with version %d revlogs')
3098 _(b'cannot censor with version %d revlogs')
3079 % self._format_version
3099 % self._format_version
3080 )
3100 )
3081
3101
3082 censorrev = self.rev(censornode)
3102 censorrev = self.rev(censornode)
3083 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3103 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3084
3104
3085 if len(tombstone) > self.rawsize(censorrev):
3105 if len(tombstone) > self.rawsize(censorrev):
3086 raise error.Abort(
3106 raise error.Abort(
3087 _(b'censor tombstone must be no longer than censored data')
3107 _(b'censor tombstone must be no longer than censored data')
3088 )
3108 )
3089
3109
3090 # Rewriting the revlog in place is hard. Our strategy for censoring is
3110 # Rewriting the revlog in place is hard. Our strategy for censoring is
3091 # to create a new revlog, copy all revisions to it, then replace the
3111 # to create a new revlog, copy all revisions to it, then replace the
3092 # revlogs on transaction close.
3112 # revlogs on transaction close.
3093 #
3113 #
3094 # This is a bit dangerous. We could easily have a mismatch of state.
3114 # This is a bit dangerous. We could easily have a mismatch of state.
3095 newrl = revlog(
3115 newrl = revlog(
3096 self.opener,
3116 self.opener,
3097 target=self.target,
3117 target=self.target,
3098 radix=self.radix,
3118 radix=self.radix,
3099 postfix=b'tmpcensored',
3119 postfix=b'tmpcensored',
3100 censorable=True,
3120 censorable=True,
3101 )
3121 )
3102 newrl._format_version = self._format_version
3122 newrl._format_version = self._format_version
3103 newrl._format_flags = self._format_flags
3123 newrl._format_flags = self._format_flags
3104 newrl._generaldelta = self._generaldelta
3124 newrl._generaldelta = self._generaldelta
3105 newrl._parse_index = self._parse_index
3125 newrl._parse_index = self._parse_index
3106
3126
3107 for rev in self.revs():
3127 for rev in self.revs():
3108 node = self.node(rev)
3128 node = self.node(rev)
3109 p1, p2 = self.parents(node)
3129 p1, p2 = self.parents(node)
3110
3130
3111 if rev == censorrev:
3131 if rev == censorrev:
3112 newrl.addrawrevision(
3132 newrl.addrawrevision(
3113 tombstone,
3133 tombstone,
3114 tr,
3134 tr,
3115 self.linkrev(censorrev),
3135 self.linkrev(censorrev),
3116 p1,
3136 p1,
3117 p2,
3137 p2,
3118 censornode,
3138 censornode,
3119 REVIDX_ISCENSORED,
3139 REVIDX_ISCENSORED,
3120 )
3140 )
3121
3141
3122 if newrl.deltaparent(rev) != nullrev:
3142 if newrl.deltaparent(rev) != nullrev:
3123 raise error.Abort(
3143 raise error.Abort(
3124 _(
3144 _(
3125 b'censored revision stored as delta; '
3145 b'censored revision stored as delta; '
3126 b'cannot censor'
3146 b'cannot censor'
3127 ),
3147 ),
3128 hint=_(
3148 hint=_(
3129 b'censoring of revlogs is not '
3149 b'censoring of revlogs is not '
3130 b'fully implemented; please report '
3150 b'fully implemented; please report '
3131 b'this bug'
3151 b'this bug'
3132 ),
3152 ),
3133 )
3153 )
3134 continue
3154 continue
3135
3155
3136 if self.iscensored(rev):
3156 if self.iscensored(rev):
3137 if self.deltaparent(rev) != nullrev:
3157 if self.deltaparent(rev) != nullrev:
3138 raise error.Abort(
3158 raise error.Abort(
3139 _(
3159 _(
3140 b'cannot censor due to censored '
3160 b'cannot censor due to censored '
3141 b'revision having delta stored'
3161 b'revision having delta stored'
3142 )
3162 )
3143 )
3163 )
3144 rawtext = self._chunk(rev)
3164 rawtext = self._chunk(rev)
3145 else:
3165 else:
3146 rawtext = self.rawdata(rev)
3166 rawtext = self.rawdata(rev)
3147
3167
3148 newrl.addrawrevision(
3168 newrl.addrawrevision(
3149 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3169 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3150 )
3170 )
3151
3171
3152 tr.addbackup(self._indexfile, location=b'store')
3172 tr.addbackup(self._indexfile, location=b'store')
3153 if not self._inline:
3173 if not self._inline:
3154 tr.addbackup(self._datafile, location=b'store')
3174 tr.addbackup(self._datafile, location=b'store')
3155
3175
3156 self.opener.rename(newrl._indexfile, self._indexfile)
3176 self.opener.rename(newrl._indexfile, self._indexfile)
3157 if not self._inline:
3177 if not self._inline:
3158 self.opener.rename(newrl._datafile, self._datafile)
3178 self.opener.rename(newrl._datafile, self._datafile)
3159
3179
3160 self.clearcaches()
3180 self.clearcaches()
3161 self._loadindex()
3181 self._loadindex()
3162
3182
3163 def verifyintegrity(self, state):
3183 def verifyintegrity(self, state):
3164 """Verifies the integrity of the revlog.
3184 """Verifies the integrity of the revlog.
3165
3185
3166 Yields ``revlogproblem`` instances describing problems that are
3186 Yields ``revlogproblem`` instances describing problems that are
3167 found.
3187 found.
3168 """
3188 """
3169 dd, di = self.checksize()
3189 dd, di = self.checksize()
3170 if dd:
3190 if dd:
3171 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3191 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3172 if di:
3192 if di:
3173 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3193 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3174
3194
3175 version = self._format_version
3195 version = self._format_version
3176
3196
3177 # The verifier tells us what version revlog we should be.
3197 # The verifier tells us what version revlog we should be.
3178 if version != state[b'expectedversion']:
3198 if version != state[b'expectedversion']:
3179 yield revlogproblem(
3199 yield revlogproblem(
3180 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3200 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3181 % (self.display_id, version, state[b'expectedversion'])
3201 % (self.display_id, version, state[b'expectedversion'])
3182 )
3202 )
3183
3203
3184 state[b'skipread'] = set()
3204 state[b'skipread'] = set()
3185 state[b'safe_renamed'] = set()
3205 state[b'safe_renamed'] = set()
3186
3206
3187 for rev in self:
3207 for rev in self:
3188 node = self.node(rev)
3208 node = self.node(rev)
3189
3209
3190 # Verify contents. 4 cases to care about:
3210 # Verify contents. 4 cases to care about:
3191 #
3211 #
3192 # common: the most common case
3212 # common: the most common case
3193 # rename: with a rename
3213 # rename: with a rename
3194 # meta: file content starts with b'\1\n', the metadata
3214 # meta: file content starts with b'\1\n', the metadata
3195 # header defined in filelog.py, but without a rename
3215 # header defined in filelog.py, but without a rename
3196 # ext: content stored externally
3216 # ext: content stored externally
3197 #
3217 #
3198 # More formally, their differences are shown below:
3218 # More formally, their differences are shown below:
3199 #
3219 #
3200 # | common | rename | meta | ext
3220 # | common | rename | meta | ext
3201 # -------------------------------------------------------
3221 # -------------------------------------------------------
3202 # flags() | 0 | 0 | 0 | not 0
3222 # flags() | 0 | 0 | 0 | not 0
3203 # renamed() | False | True | False | ?
3223 # renamed() | False | True | False | ?
3204 # rawtext[0:2]=='\1\n'| False | True | True | ?
3224 # rawtext[0:2]=='\1\n'| False | True | True | ?
3205 #
3225 #
3206 # "rawtext" means the raw text stored in revlog data, which
3226 # "rawtext" means the raw text stored in revlog data, which
3207 # could be retrieved by "rawdata(rev)". "text"
3227 # could be retrieved by "rawdata(rev)". "text"
3208 # mentioned below is "revision(rev)".
3228 # mentioned below is "revision(rev)".
3209 #
3229 #
3210 # There are 3 different lengths stored physically:
3230 # There are 3 different lengths stored physically:
3211 # 1. L1: rawsize, stored in revlog index
3231 # 1. L1: rawsize, stored in revlog index
3212 # 2. L2: len(rawtext), stored in revlog data
3232 # 2. L2: len(rawtext), stored in revlog data
3213 # 3. L3: len(text), stored in revlog data if flags==0, or
3233 # 3. L3: len(text), stored in revlog data if flags==0, or
3214 # possibly somewhere else if flags!=0
3234 # possibly somewhere else if flags!=0
3215 #
3235 #
3216 # L1 should be equal to L2. L3 could be different from them.
3236 # L1 should be equal to L2. L3 could be different from them.
3217 # "text" may or may not affect commit hash depending on flag
3237 # "text" may or may not affect commit hash depending on flag
3218 # processors (see flagutil.addflagprocessor).
3238 # processors (see flagutil.addflagprocessor).
3219 #
3239 #
3220 # | common | rename | meta | ext
3240 # | common | rename | meta | ext
3221 # -------------------------------------------------
3241 # -------------------------------------------------
3222 # rawsize() | L1 | L1 | L1 | L1
3242 # rawsize() | L1 | L1 | L1 | L1
3223 # size() | L1 | L2-LM | L1(*) | L1 (?)
3243 # size() | L1 | L2-LM | L1(*) | L1 (?)
3224 # len(rawtext) | L2 | L2 | L2 | L2
3244 # len(rawtext) | L2 | L2 | L2 | L2
3225 # len(text) | L2 | L2 | L2 | L3
3245 # len(text) | L2 | L2 | L2 | L3
3226 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3246 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3227 #
3247 #
3228 # LM: length of metadata, depending on rawtext
3248 # LM: length of metadata, depending on rawtext
3229 # (*): not ideal, see comment in filelog.size
3249 # (*): not ideal, see comment in filelog.size
3230 # (?): could be "- len(meta)" if the resolved content has
3250 # (?): could be "- len(meta)" if the resolved content has
3231 # rename metadata
3251 # rename metadata
3232 #
3252 #
3233 # Checks needed to be done:
3253 # Checks needed to be done:
3234 # 1. length check: L1 == L2, in all cases.
3254 # 1. length check: L1 == L2, in all cases.
3235 # 2. hash check: depending on flag processor, we may need to
3255 # 2. hash check: depending on flag processor, we may need to
3236 # use either "text" (external), or "rawtext" (in revlog).
3256 # use either "text" (external), or "rawtext" (in revlog).
3237
3257
3238 try:
3258 try:
3239 skipflags = state.get(b'skipflags', 0)
3259 skipflags = state.get(b'skipflags', 0)
3240 if skipflags:
3260 if skipflags:
3241 skipflags &= self.flags(rev)
3261 skipflags &= self.flags(rev)
3242
3262
3243 _verify_revision(self, skipflags, state, node)
3263 _verify_revision(self, skipflags, state, node)
3244
3264
3245 l1 = self.rawsize(rev)
3265 l1 = self.rawsize(rev)
3246 l2 = len(self.rawdata(node))
3266 l2 = len(self.rawdata(node))
3247
3267
3248 if l1 != l2:
3268 if l1 != l2:
3249 yield revlogproblem(
3269 yield revlogproblem(
3250 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3270 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3251 node=node,
3271 node=node,
3252 )
3272 )
3253
3273
3254 except error.CensoredNodeError:
3274 except error.CensoredNodeError:
3255 if state[b'erroroncensored']:
3275 if state[b'erroroncensored']:
3256 yield revlogproblem(
3276 yield revlogproblem(
3257 error=_(b'censored file data'), node=node
3277 error=_(b'censored file data'), node=node
3258 )
3278 )
3259 state[b'skipread'].add(node)
3279 state[b'skipread'].add(node)
3260 except Exception as e:
3280 except Exception as e:
3261 yield revlogproblem(
3281 yield revlogproblem(
3262 error=_(b'unpacking %s: %s')
3282 error=_(b'unpacking %s: %s')
3263 % (short(node), stringutil.forcebytestr(e)),
3283 % (short(node), stringutil.forcebytestr(e)),
3264 node=node,
3284 node=node,
3265 )
3285 )
3266 state[b'skipread'].add(node)
3286 state[b'skipread'].add(node)
3267
3287
3268 def storageinfo(
3288 def storageinfo(
3269 self,
3289 self,
3270 exclusivefiles=False,
3290 exclusivefiles=False,
3271 sharedfiles=False,
3291 sharedfiles=False,
3272 revisionscount=False,
3292 revisionscount=False,
3273 trackedsize=False,
3293 trackedsize=False,
3274 storedsize=False,
3294 storedsize=False,
3275 ):
3295 ):
3276 d = {}
3296 d = {}
3277
3297
3278 if exclusivefiles:
3298 if exclusivefiles:
3279 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3299 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3280 if not self._inline:
3300 if not self._inline:
3281 d[b'exclusivefiles'].append((self.opener, self._datafile))
3301 d[b'exclusivefiles'].append((self.opener, self._datafile))
3282
3302
3283 if sharedfiles:
3303 if sharedfiles:
3284 d[b'sharedfiles'] = []
3304 d[b'sharedfiles'] = []
3285
3305
3286 if revisionscount:
3306 if revisionscount:
3287 d[b'revisionscount'] = len(self)
3307 d[b'revisionscount'] = len(self)
3288
3308
3289 if trackedsize:
3309 if trackedsize:
3290 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3310 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3291
3311
3292 if storedsize:
3312 if storedsize:
3293 d[b'storedsize'] = sum(
3313 d[b'storedsize'] = sum(
3294 self.opener.stat(path).st_size for path in self.files()
3314 self.opener.stat(path).st_size for path in self.files()
3295 )
3315 )
3296
3316
3297 return d
3317 return d
3298
3318
3299 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3319 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3300 if not self.hassidedata:
3320 if not self.hassidedata:
3301 return
3321 return
3302 # revlog formats with sidedata support does not support inline
3322 # revlog formats with sidedata support does not support inline
3303 assert not self._inline
3323 assert not self._inline
3304 if not helpers[1] and not helpers[2]:
3324 if not helpers[1] and not helpers[2]:
3305 # Nothing to generate or remove
3325 # Nothing to generate or remove
3306 return
3326 return
3307
3327
3308 new_entries = []
3328 new_entries = []
3309 # append the new sidedata
3329 # append the new sidedata
3310 with self._writing(transaction):
3330 with self._writing(transaction):
3311 ifh, dfh = self._writinghandles
3331 ifh, dfh = self._writinghandles
3312 if self._docket is not None:
3332 if self._docket is not None:
3313 dfh.seek(self._docket.data_end, os.SEEK_SET)
3333 dfh.seek(self._docket.data_end, os.SEEK_SET)
3314 else:
3334 else:
3315 dfh.seek(0, os.SEEK_END)
3335 dfh.seek(0, os.SEEK_END)
3316
3336
3317 current_offset = dfh.tell()
3337 current_offset = dfh.tell()
3318 for rev in range(startrev, endrev + 1):
3338 for rev in range(startrev, endrev + 1):
3319 entry = self.index[rev]
3339 entry = self.index[rev]
3320 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3340 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3321 store=self,
3341 store=self,
3322 sidedata_helpers=helpers,
3342 sidedata_helpers=helpers,
3323 sidedata={},
3343 sidedata={},
3324 rev=rev,
3344 rev=rev,
3325 )
3345 )
3326
3346
3327 serialized_sidedata = sidedatautil.serialize_sidedata(
3347 serialized_sidedata = sidedatautil.serialize_sidedata(
3328 new_sidedata
3348 new_sidedata
3329 )
3349 )
3330 if entry[8] != 0 or entry[9] != 0:
3350 if entry[8] != 0 or entry[9] != 0:
3331 # rewriting entries that already have sidedata is not
3351 # rewriting entries that already have sidedata is not
3332 # supported yet, because it introduces garbage data in the
3352 # supported yet, because it introduces garbage data in the
3333 # revlog.
3353 # revlog.
3334 msg = b"rewriting existing sidedata is not supported yet"
3354 msg = b"rewriting existing sidedata is not supported yet"
3335 raise error.Abort(msg)
3355 raise error.Abort(msg)
3336
3356
3337 # Apply (potential) flags to add and to remove after running
3357 # Apply (potential) flags to add and to remove after running
3338 # the sidedata helpers
3358 # the sidedata helpers
3339 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3359 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3340 entry_update = (
3360 entry_update = (
3341 current_offset,
3361 current_offset,
3342 len(serialized_sidedata),
3362 len(serialized_sidedata),
3343 new_offset_flags,
3363 new_offset_flags,
3344 )
3364 )
3345
3365
3346 # the sidedata computation might have move the file cursors around
3366 # the sidedata computation might have move the file cursors around
3347 dfh.seek(current_offset, os.SEEK_SET)
3367 dfh.seek(current_offset, os.SEEK_SET)
3348 dfh.write(serialized_sidedata)
3368 dfh.write(serialized_sidedata)
3349 new_entries.append(entry_update)
3369 new_entries.append(entry_update)
3350 current_offset += len(serialized_sidedata)
3370 current_offset += len(serialized_sidedata)
3351 if self._docket is not None:
3371 if self._docket is not None:
3352 self._docket.data_end = dfh.tell()
3372 self._docket.data_end = dfh.tell()
3353
3373
3354 # rewrite the new index entries
3374 # rewrite the new index entries
3355 ifh.seek(startrev * self.index.entry_size)
3375 ifh.seek(startrev * self.index.entry_size)
3356 for i, e in enumerate(new_entries):
3376 for i, e in enumerate(new_entries):
3357 rev = startrev + i
3377 rev = startrev + i
3358 self.index.replace_sidedata_info(rev, *e)
3378 self.index.replace_sidedata_info(rev, *e)
3359 packed = self.index.entry_binary(rev)
3379 packed = self.index.entry_binary(rev)
3360 if rev == 0 and self._docket is None:
3380 if rev == 0 and self._docket is None:
3361 header = self._format_flags | self._format_version
3381 header = self._format_flags | self._format_version
3362 header = self.index.pack_header(header)
3382 header = self.index.pack_header(header)
3363 packed = header + packed
3383 packed = header + packed
3364 ifh.write(packed)
3384 ifh.write(packed)
@@ -1,169 +1,179 b''
1 # revlogdeltas.py - constant used for revlog logic.
1 # revlogdeltas.py - constant used for revlog logic.
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### Internal utily constants
16 ### Internal utily constants
17
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
21 KIND_OTHER = 1004
22
22
23 ALL_KINDS = {
23 ALL_KINDS = {
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_OTHER,
27 KIND_OTHER,
28 }
28 }
29
29
30 ### main revlog header
30 ### main revlog header
31
31
32 INDEX_HEADER = struct.Struct(b">I")
32 INDEX_HEADER = struct.Struct(b">I")
33
33
34 ## revlog version
34 ## revlog version
35 REVLOGV0 = 0
35 REVLOGV0 = 0
36 REVLOGV1 = 1
36 REVLOGV1 = 1
37 # Dummy value until file format is finalized.
37 # Dummy value until file format is finalized.
38 REVLOGV2 = 0xDEAD
38 REVLOGV2 = 0xDEAD
39
39
40 ## global revlog header flags
40 ## global revlog header flags
41 # Shared across v1 and v2.
41 # Shared across v1 and v2.
42 FLAG_INLINE_DATA = 1 << 16
42 FLAG_INLINE_DATA = 1 << 16
43 # Only used by v1, implied by v2.
43 # Only used by v1, implied by v2.
44 FLAG_GENERALDELTA = 1 << 17
44 FLAG_GENERALDELTA = 1 << 17
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 REVLOGV0_FLAGS = 0
48 REVLOGV0_FLAGS = 0
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51
51
52 ### individual entry
52 ### individual entry
53
53
54 ## index v0:
54 ## index v0:
55 # 4 bytes: offset
55 # 4 bytes: offset
56 # 4 bytes: compressed length
56 # 4 bytes: compressed length
57 # 4 bytes: base rev
57 # 4 bytes: base rev
58 # 4 bytes: link rev
58 # 4 bytes: link rev
59 # 20 bytes: parent 1 nodeid
59 # 20 bytes: parent 1 nodeid
60 # 20 bytes: parent 2 nodeid
60 # 20 bytes: parent 2 nodeid
61 # 20 bytes: nodeid
61 # 20 bytes: nodeid
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63
63
64 ## index v1
64 ## index v1
65 # 6 bytes: offset
65 # 6 bytes: offset
66 # 2 bytes: flags
66 # 2 bytes: flags
67 # 4 bytes: compressed length
67 # 4 bytes: compressed length
68 # 4 bytes: uncompressed length
68 # 4 bytes: uncompressed length
69 # 4 bytes: base rev
69 # 4 bytes: base rev
70 # 4 bytes: link rev
70 # 4 bytes: link rev
71 # 4 bytes: parent 1 rev
71 # 4 bytes: parent 1 rev
72 # 4 bytes: parent 2 rev
72 # 4 bytes: parent 2 rev
73 # 32 bytes: nodeid
73 # 32 bytes: nodeid
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 assert INDEX_ENTRY_V1.size == 32 * 2
75 assert INDEX_ENTRY_V1.size == 32 * 2
76
76
77 # 6 bytes: offset
77 # 6 bytes: offset
78 # 2 bytes: flags
78 # 2 bytes: flags
79 # 4 bytes: compressed length
79 # 4 bytes: compressed length
80 # 4 bytes: uncompressed length
80 # 4 bytes: uncompressed length
81 # 4 bytes: base rev
81 # 4 bytes: base rev
82 # 4 bytes: link rev
82 # 4 bytes: link rev
83 # 4 bytes: parent 1 rev
83 # 4 bytes: parent 1 rev
84 # 4 bytes: parent 2 rev
84 # 4 bytes: parent 2 rev
85 # 32 bytes: nodeid
85 # 32 bytes: nodeid
86 # 8 bytes: sidedata offset
86 # 8 bytes: sidedata offset
87 # 4 bytes: sidedata compressed length
87 # 4 bytes: sidedata compressed length
88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
92
92
93 # revlog index flags
93 # revlog index flags
94
94
95 # For historical reasons, revlog's internal flags were exposed via the
95 # For historical reasons, revlog's internal flags were exposed via the
96 # wire protocol and are even exposed in parts of the storage APIs.
96 # wire protocol and are even exposed in parts of the storage APIs.
97
97
98 # revision has censor metadata, must be verified
98 # revision has censor metadata, must be verified
99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
100 # revision hash does not match data (narrowhg)
100 # revision hash does not match data (narrowhg)
101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
102 # revision data is stored externally
102 # revision data is stored externally
103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
104 # revision changes files in a way that could affect copy tracing.
104 # revision changes files in a way that could affect copy tracing.
105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
106 REVIDX_DEFAULT_FLAGS = 0
106 REVIDX_DEFAULT_FLAGS = 0
107 # stable order in which flags need to be processed and their processors applied
107 # stable order in which flags need to be processed and their processors applied
108 REVIDX_FLAGS_ORDER = [
108 REVIDX_FLAGS_ORDER = [
109 REVIDX_ISCENSORED,
109 REVIDX_ISCENSORED,
110 REVIDX_ELLIPSIS,
110 REVIDX_ELLIPSIS,
111 REVIDX_EXTSTORED,
111 REVIDX_EXTSTORED,
112 REVIDX_HASCOPIESINFO,
112 REVIDX_HASCOPIESINFO,
113 ]
113 ]
114
114
115 # bitmark for flags that could cause rawdata content change
115 # bitmark for flags that could cause rawdata content change
116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
117
117
118 ## chunk compression mode constants:
118 ## chunk compression mode constants:
119 # These constants are used in revlog version >=2 to denote the compression used
119 # These constants are used in revlog version >=2 to denote the compression used
120 # for a chunk.
120 # for a chunk.
121
121
122 # Chunk use no compression, the data stored on disk can be directly use as
122 # Chunk use no compression, the data stored on disk can be directly use as
123 # chunk value. Without any header information prefixed.
123 # chunk value. Without any header information prefixed.
124 COMP_MODE_PLAIN = 0
124 COMP_MODE_PLAIN = 0
125
125
126 # Chunk use the "default compression" for the revlog (usually defined in the
127 # revlog docket). A header is still used.
128 #
129 # XXX: keeping a header is probably not useful and we should probably drop it.
130 #
131 # XXX: The value of allow mixed type of compression in the revlog is unclear
132 # and we should consider making PLAIN/DEFAULT the only available mode for
133 # revlog v2, disallowing INLINE mode.
134 COMP_MODE_DEFAULT = 1
135
126 # Chunk use a compression mode stored "inline" at the start of the chunk
136 # Chunk use a compression mode stored "inline" at the start of the chunk
127 # itself. This is the mode always used for revlog version "0" and "1"
137 # itself. This is the mode always used for revlog version "0" and "1"
128 COMP_MODE_INLINE = 2
138 COMP_MODE_INLINE = 2
129
139
130 SUPPORTED_FLAGS = {
140 SUPPORTED_FLAGS = {
131 REVLOGV0: REVLOGV0_FLAGS,
141 REVLOGV0: REVLOGV0_FLAGS,
132 REVLOGV1: REVLOGV1_FLAGS,
142 REVLOGV1: REVLOGV1_FLAGS,
133 REVLOGV2: REVLOGV2_FLAGS,
143 REVLOGV2: REVLOGV2_FLAGS,
134 }
144 }
135
145
136 _no = lambda flags: False
146 _no = lambda flags: False
137 _yes = lambda flags: True
147 _yes = lambda flags: True
138
148
139
149
140 def _from_flag(flag):
150 def _from_flag(flag):
141 return lambda flags: bool(flags & flag)
151 return lambda flags: bool(flags & flag)
142
152
143
153
144 FEATURES_BY_VERSION = {
154 FEATURES_BY_VERSION = {
145 REVLOGV0: {
155 REVLOGV0: {
146 b'inline': _no,
156 b'inline': _no,
147 b'generaldelta': _no,
157 b'generaldelta': _no,
148 b'sidedata': False,
158 b'sidedata': False,
149 b'docket': False,
159 b'docket': False,
150 },
160 },
151 REVLOGV1: {
161 REVLOGV1: {
152 b'inline': _from_flag(FLAG_INLINE_DATA),
162 b'inline': _from_flag(FLAG_INLINE_DATA),
153 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
163 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
154 b'sidedata': False,
164 b'sidedata': False,
155 b'docket': False,
165 b'docket': False,
156 },
166 },
157 REVLOGV2: {
167 REVLOGV2: {
158 # The point of inline-revlog is to reduce the number of files used in
168 # The point of inline-revlog is to reduce the number of files used in
159 # the store. Using a docket defeat this purpose. So we needs other
169 # the store. Using a docket defeat this purpose. So we needs other
160 # means to reduce the number of files for revlogv2.
170 # means to reduce the number of files for revlogv2.
161 b'inline': _no,
171 b'inline': _no,
162 b'generaldelta': _yes,
172 b'generaldelta': _yes,
163 b'sidedata': True,
173 b'sidedata': True,
164 b'docket': True,
174 b'docket': True,
165 },
175 },
166 }
176 }
167
177
168
178
169 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
179 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,167 +1,179 b''
1 # docket - code related to revlog "docket"
1 # docket - code related to revlog "docket"
2 #
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 ### Revlog docket file
8 ### Revlog docket file
9 #
9 #
10 # The revlog is stored on disk using multiple files:
10 # The revlog is stored on disk using multiple files:
11 #
11 #
12 # * a small docket file, containing metadata and a pointer,
12 # * a small docket file, containing metadata and a pointer,
13 #
13 #
14 # * an index file, containing fixed width information about revisions,
14 # * an index file, containing fixed width information about revisions,
15 #
15 #
16 # * a data file, containing variable width data for these revisions,
16 # * a data file, containing variable width data for these revisions,
17
17
18 from __future__ import absolute_import
18 from __future__ import absolute_import
19
19
20 import struct
20 import struct
21
21
22 from .. import (
22 from .. import (
23 error,
23 error,
24 util,
24 )
25 )
25
26
26 from . import (
27 from . import (
27 constants,
28 constants,
28 )
29 )
29
30
30 # Docket format
31 # Docket format
31 #
32 #
32 # * 4 bytes: revlog version
33 # * 4 bytes: revlog version
33 # | This is mandatory as docket must be compatible with the previous
34 # | This is mandatory as docket must be compatible with the previous
34 # | revlog index header.
35 # | revlog index header.
35 # * 8 bytes: size of index-data
36 # * 8 bytes: size of index-data
36 # * 8 bytes: pending size of index-data
37 # * 8 bytes: pending size of index-data
37 # * 8 bytes: size of data
38 # * 8 bytes: size of data
38 # * 8 bytes: pending size of data
39 # * 8 bytes: pending size of data
39 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL')
40 # * 1 bytes: default compression header
41 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
40
42
41
43
42 class RevlogDocket(object):
44 class RevlogDocket(object):
43 """metadata associated with revlog"""
45 """metadata associated with revlog"""
44
46
45 def __init__(
47 def __init__(
46 self,
48 self,
47 revlog,
49 revlog,
48 use_pending=False,
50 use_pending=False,
49 version_header=None,
51 version_header=None,
50 index_end=0,
52 index_end=0,
51 pending_index_end=0,
53 pending_index_end=0,
52 data_end=0,
54 data_end=0,
53 pending_data_end=0,
55 pending_data_end=0,
56 default_compression_header=None,
54 ):
57 ):
55 self._version_header = version_header
58 self._version_header = version_header
56 self._read_only = bool(use_pending)
59 self._read_only = bool(use_pending)
57 self._dirty = False
60 self._dirty = False
58 self._radix = revlog.radix
61 self._radix = revlog.radix
59 self._path = revlog._docket_file
62 self._path = revlog._docket_file
60 self._opener = revlog.opener
63 self._opener = revlog.opener
61 # thes asserts should be True as long as we have a single index filename
64 # thes asserts should be True as long as we have a single index filename
62 assert index_end <= pending_index_end
65 assert index_end <= pending_index_end
63 assert data_end <= pending_data_end
66 assert data_end <= pending_data_end
64 self._initial_index_end = index_end
67 self._initial_index_end = index_end
65 self._pending_index_end = pending_index_end
68 self._pending_index_end = pending_index_end
66 self._initial_data_end = data_end
69 self._initial_data_end = data_end
67 self._pending_data_end = pending_data_end
70 self._pending_data_end = pending_data_end
68 if use_pending:
71 if use_pending:
69 self._index_end = self._pending_index_end
72 self._index_end = self._pending_index_end
70 self._data_end = self._pending_data_end
73 self._data_end = self._pending_data_end
71 else:
74 else:
72 self._index_end = self._initial_index_end
75 self._index_end = self._initial_index_end
73 self._data_end = self._initial_data_end
76 self._data_end = self._initial_data_end
77 self.default_compression_header = default_compression_header
74
78
75 def index_filepath(self):
79 def index_filepath(self):
76 """file path to the current index file associated to this docket"""
80 """file path to the current index file associated to this docket"""
77 # very simplistic version at first
81 # very simplistic version at first
78 return b"%s.idx" % self._radix
82 return b"%s.idx" % self._radix
79
83
80 @property
84 @property
81 def index_end(self):
85 def index_end(self):
82 return self._index_end
86 return self._index_end
83
87
84 @index_end.setter
88 @index_end.setter
85 def index_end(self, new_size):
89 def index_end(self, new_size):
86 if new_size != self._index_end:
90 if new_size != self._index_end:
87 self._index_end = new_size
91 self._index_end = new_size
88 self._dirty = True
92 self._dirty = True
89
93
90 @property
94 @property
91 def data_end(self):
95 def data_end(self):
92 return self._data_end
96 return self._data_end
93
97
94 @data_end.setter
98 @data_end.setter
95 def data_end(self, new_size):
99 def data_end(self, new_size):
96 if new_size != self._data_end:
100 if new_size != self._data_end:
97 self._data_end = new_size
101 self._data_end = new_size
98 self._dirty = True
102 self._dirty = True
99
103
100 def write(self, transaction, pending=False, stripping=False):
104 def write(self, transaction, pending=False, stripping=False):
101 """write the modification of disk if any
105 """write the modification of disk if any
102
106
103 This make the new content visible to all process"""
107 This make the new content visible to all process"""
104 if not self._dirty:
108 if not self._dirty:
105 return False
109 return False
106 else:
110 else:
107 if self._read_only:
111 if self._read_only:
108 msg = b'writing read-only docket: %s'
112 msg = b'writing read-only docket: %s'
109 msg %= self._path
113 msg %= self._path
110 raise error.ProgrammingError(msg)
114 raise error.ProgrammingError(msg)
111 if not stripping:
115 if not stripping:
112 # XXX we could, leverage the docket while stripping. However it
116 # XXX we could, leverage the docket while stripping. However it
113 # is not powerfull enough at the time of this comment
117 # is not powerfull enough at the time of this comment
114 transaction.addbackup(self._path, location=b'store')
118 transaction.addbackup(self._path, location=b'store')
115 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
119 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
116 f.write(self._serialize(pending=pending))
120 f.write(self._serialize(pending=pending))
117 # if pending we still need to the write final data eventually
121 # if pending we still need to the write final data eventually
118 self._dirty = pending
122 self._dirty = pending
119 return True
123 return True
120
124
121 def _serialize(self, pending=False):
125 def _serialize(self, pending=False):
122 if pending:
126 if pending:
123 official_index_end = self._initial_index_end
127 official_index_end = self._initial_index_end
124 official_data_end = self._initial_data_end
128 official_data_end = self._initial_data_end
125 else:
129 else:
126 official_index_end = self._index_end
130 official_index_end = self._index_end
127 official_data_end = self._data_end
131 official_data_end = self._data_end
128
132
129 # this assert should be True as long as we have a single index filename
133 # this assert should be True as long as we have a single index filename
130 assert official_data_end <= self._data_end
134 assert official_data_end <= self._data_end
131 data = (
135 data = (
132 self._version_header,
136 self._version_header,
133 official_index_end,
137 official_index_end,
134 self._index_end,
138 self._index_end,
135 official_data_end,
139 official_data_end,
136 self._data_end,
140 self._data_end,
141 self.default_compression_header,
137 )
142 )
138 return S_HEADER.pack(*data)
143 return S_HEADER.pack(*data)
139
144
140
145
141 def default_docket(revlog, version_header):
146 def default_docket(revlog, version_header):
142 """given a revlog version a new docket object for the given revlog"""
147 """given a revlog version a new docket object for the given revlog"""
143 if (version_header & 0xFFFF) != constants.REVLOGV2:
148 if (version_header & 0xFFFF) != constants.REVLOGV2:
144 return None
149 return None
145 docket = RevlogDocket(revlog, version_header=version_header)
150 comp = util.compengines[revlog._compengine].revlogheader()
151 docket = RevlogDocket(
152 revlog,
153 version_header=version_header,
154 default_compression_header=comp,
155 )
146 docket._dirty = True
156 docket._dirty = True
147 return docket
157 return docket
148
158
149
159
150 def parse_docket(revlog, data, use_pending=False):
160 def parse_docket(revlog, data, use_pending=False):
151 """given some docket data return a docket object for the given revlog"""
161 """given some docket data return a docket object for the given revlog"""
152 header = S_HEADER.unpack(data[: S_HEADER.size])
162 header = S_HEADER.unpack(data[: S_HEADER.size])
153 version_header = header[0]
163 version_header = header[0]
154 index_size = header[1]
164 index_size = header[1]
155 pending_index_size = header[2]
165 pending_index_size = header[2]
156 data_size = header[3]
166 data_size = header[3]
157 pending_data_size = header[4]
167 pending_data_size = header[4]
168 default_compression_header = header[5]
158 docket = RevlogDocket(
169 docket = RevlogDocket(
159 revlog,
170 revlog,
160 use_pending=use_pending,
171 use_pending=use_pending,
161 version_header=version_header,
172 version_header=version_header,
162 index_end=index_size,
173 index_end=index_size,
163 pending_index_end=pending_index_size,
174 pending_index_end=pending_index_size,
164 data_end=data_size,
175 data_end=data_size,
165 pending_data_end=pending_data_size,
176 pending_data_end=pending_data_size,
177 default_compression_header=default_compression_header,
166 )
178 )
167 return docket
179 return docket
General Comments 0
You need to be logged in to leave comments. Login now