##// END OF EJS Templates
revlog: factor the logic to determine the delta compression out...
marmoute -
r48245:c6844912 default
parent child Browse files
Show More
@@ -1,3306 +1,3294 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 censor,
84 censor,
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 randomaccessfile,
89 randomaccessfile,
90 revlogv0,
90 revlogv0,
91 sidedata as sidedatautil,
91 sidedata as sidedatautil,
92 )
92 )
93 from .utils import (
93 from .utils import (
94 storageutil,
94 storageutil,
95 stringutil,
95 stringutil,
96 )
96 )
97
97
98 # blanked usage of all the name to prevent pyflakes constraints
98 # blanked usage of all the name to prevent pyflakes constraints
99 # We need these name available in the module for extensions.
99 # We need these name available in the module for extensions.
100
100
101 REVLOGV0
101 REVLOGV0
102 REVLOGV1
102 REVLOGV1
103 REVLOGV2
103 REVLOGV2
104 FLAG_INLINE_DATA
104 FLAG_INLINE_DATA
105 FLAG_GENERALDELTA
105 FLAG_GENERALDELTA
106 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FLAGS
107 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_FORMAT
108 REVLOG_DEFAULT_VERSION
108 REVLOG_DEFAULT_VERSION
109 REVLOGV1_FLAGS
109 REVLOGV1_FLAGS
110 REVLOGV2_FLAGS
110 REVLOGV2_FLAGS
111 REVIDX_ISCENSORED
111 REVIDX_ISCENSORED
112 REVIDX_ELLIPSIS
112 REVIDX_ELLIPSIS
113 REVIDX_HASCOPIESINFO
113 REVIDX_HASCOPIESINFO
114 REVIDX_EXTSTORED
114 REVIDX_EXTSTORED
115 REVIDX_DEFAULT_FLAGS
115 REVIDX_DEFAULT_FLAGS
116 REVIDX_FLAGS_ORDER
116 REVIDX_FLAGS_ORDER
117 REVIDX_RAWTEXT_CHANGING_FLAGS
117 REVIDX_RAWTEXT_CHANGING_FLAGS
118
118
119 parsers = policy.importmod('parsers')
119 parsers = policy.importmod('parsers')
120 rustancestor = policy.importrust('ancestor')
120 rustancestor = policy.importrust('ancestor')
121 rustdagop = policy.importrust('dagop')
121 rustdagop = policy.importrust('dagop')
122 rustrevlog = policy.importrust('revlog')
122 rustrevlog = policy.importrust('revlog')
123
123
124 # Aliased for performance.
124 # Aliased for performance.
125 _zlibdecompress = zlib.decompress
125 _zlibdecompress = zlib.decompress
126
126
127 # max size of revlog with inline data
127 # max size of revlog with inline data
128 _maxinline = 131072
128 _maxinline = 131072
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @interfaceutil.implementer(repository.irevisiondelta)
170 @interfaceutil.implementer(repository.irevisiondelta)
171 @attr.s(slots=True)
171 @attr.s(slots=True)
172 class revlogrevisiondelta(object):
172 class revlogrevisiondelta(object):
173 node = attr.ib()
173 node = attr.ib()
174 p1node = attr.ib()
174 p1node = attr.ib()
175 p2node = attr.ib()
175 p2node = attr.ib()
176 basenode = attr.ib()
176 basenode = attr.ib()
177 flags = attr.ib()
177 flags = attr.ib()
178 baserevisionsize = attr.ib()
178 baserevisionsize = attr.ib()
179 revision = attr.ib()
179 revision = attr.ib()
180 delta = attr.ib()
180 delta = attr.ib()
181 sidedata = attr.ib()
181 sidedata = attr.ib()
182 protocol_flags = attr.ib()
182 protocol_flags = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 def parse_index_v1(data, inline):
194 def parse_index_v1(data, inline):
195 # call the C implementation to parse the index data
195 # call the C implementation to parse the index data
196 index, cache = parsers.parse_index2(data, inline)
196 index, cache = parsers.parse_index2(data, inline)
197 return index, cache
197 return index, cache
198
198
199
199
200 def parse_index_v2(data, inline):
200 def parse_index_v2(data, inline):
201 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 return index, cache
203 return index, cache
204
204
205
205
206 def parse_index_cl_v2(data, inline):
206 def parse_index_cl_v2(data, inline):
207 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
208 assert not inline
208 assert not inline
209 from .pure.parsers import parse_index_cl_v2
209 from .pure.parsers import parse_index_cl_v2
210
210
211 index, cache = parse_index_cl_v2(data)
211 index, cache = parse_index_cl_v2(data)
212 return index, cache
212 return index, cache
213
213
214
214
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
216
217 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 else:
222 else:
223 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
224
224
225
225
226 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
227 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
228 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
229
229
230
230
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # signed integer)
232 # signed integer)
233 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
234
234
235 FILE_TOO_SHORT_MSG = _(
235 FILE_TOO_SHORT_MSG = _(
236 b'cannot read from revlog %s;'
236 b'cannot read from revlog %s;'
237 b' expected %d bytes from offset %d, data size is %d'
237 b' expected %d bytes from offset %d, data size is %d'
238 )
238 )
239
239
240
240
241 class revlog(object):
241 class revlog(object):
242 """
242 """
243 the underlying revision storage object
243 the underlying revision storage object
244
244
245 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
246
246
247 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
248 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
249 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
250 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
251 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
252 data.
252 data.
253
253
254 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
255 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
256 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
257 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
258 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
259 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
260
260
261 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
262 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
263 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
264 for locking while reading.
264 for locking while reading.
265
265
266 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
267 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
268
268
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
271 configured threshold.
271 configured threshold.
272
272
273 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
274
274
275 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 compression for the data content.
276 compression for the data content.
277
277
278 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
280 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
281 raising).
281 raising).
282
282
283 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
284 index entry.
284 index entry.
285 """
285 """
286
286
287 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
288
288
289 def __init__(
289 def __init__(
290 self,
290 self,
291 opener,
291 opener,
292 target,
292 target,
293 radix,
293 radix,
294 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
295 checkambig=False,
295 checkambig=False,
296 mmaplargeindex=False,
296 mmaplargeindex=False,
297 censorable=False,
297 censorable=False,
298 upperboundcomp=None,
298 upperboundcomp=None,
299 persistentnodemap=False,
299 persistentnodemap=False,
300 concurrencychecker=None,
300 concurrencychecker=None,
301 trypending=False,
301 trypending=False,
302 ):
302 ):
303 """
303 """
304 create a revlog object
304 create a revlog object
305
305
306 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
307 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
308
308
309 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
310 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
311 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
312 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
313 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
314 accurate value.
314 accurate value.
315 """
315 """
316 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
317
317
318 self.radix = radix
318 self.radix = radix
319
319
320 self._docket_file = None
320 self._docket_file = None
321 self._indexfile = None
321 self._indexfile = None
322 self._datafile = None
322 self._datafile = None
323 self._sidedatafile = None
323 self._sidedatafile = None
324 self._nodemap_file = None
324 self._nodemap_file = None
325 self.postfix = postfix
325 self.postfix = postfix
326 self._trypending = trypending
326 self._trypending = trypending
327 self.opener = opener
327 self.opener = opener
328 if persistentnodemap:
328 if persistentnodemap:
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330
330
331 assert target[0] in ALL_KINDS
331 assert target[0] in ALL_KINDS
332 assert len(target) == 2
332 assert len(target) == 2
333 self.target = target
333 self.target = target
334 # When True, indexfile is opened with checkambig=True at writing, to
334 # When True, indexfile is opened with checkambig=True at writing, to
335 # avoid file stat ambiguity.
335 # avoid file stat ambiguity.
336 self._checkambig = checkambig
336 self._checkambig = checkambig
337 self._mmaplargeindex = mmaplargeindex
337 self._mmaplargeindex = mmaplargeindex
338 self._censorable = censorable
338 self._censorable = censorable
339 # 3-tuple of (node, rev, text) for a raw revision.
339 # 3-tuple of (node, rev, text) for a raw revision.
340 self._revisioncache = None
340 self._revisioncache = None
341 # Maps rev to chain base rev.
341 # Maps rev to chain base rev.
342 self._chainbasecache = util.lrucachedict(100)
342 self._chainbasecache = util.lrucachedict(100)
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 self._chunkcache = (0, b'')
344 self._chunkcache = (0, b'')
345 # How much data to read and cache into the raw revlog data cache.
345 # How much data to read and cache into the raw revlog data cache.
346 self._chunkcachesize = 65536
346 self._chunkcachesize = 65536
347 self._maxchainlen = None
347 self._maxchainlen = None
348 self._deltabothparents = True
348 self._deltabothparents = True
349 self.index = None
349 self.index = None
350 self._docket = None
350 self._docket = None
351 self._nodemap_docket = None
351 self._nodemap_docket = None
352 # Mapping of partial identifiers to full nodes.
352 # Mapping of partial identifiers to full nodes.
353 self._pcache = {}
353 self._pcache = {}
354 # Mapping of revision integer to full node.
354 # Mapping of revision integer to full node.
355 self._compengine = b'zlib'
355 self._compengine = b'zlib'
356 self._compengineopts = {}
356 self._compengineopts = {}
357 self._maxdeltachainspan = -1
357 self._maxdeltachainspan = -1
358 self._withsparseread = False
358 self._withsparseread = False
359 self._sparserevlog = False
359 self._sparserevlog = False
360 self.hassidedata = False
360 self.hassidedata = False
361 self._srdensitythreshold = 0.50
361 self._srdensitythreshold = 0.50
362 self._srmingapsize = 262144
362 self._srmingapsize = 262144
363
363
364 # Make copy of flag processors so each revlog instance can support
364 # Make copy of flag processors so each revlog instance can support
365 # custom flags.
365 # custom flags.
366 self._flagprocessors = dict(flagutil.flagprocessors)
366 self._flagprocessors = dict(flagutil.flagprocessors)
367
367
368 # 3-tuple of file handles being used for active writing.
368 # 3-tuple of file handles being used for active writing.
369 self._writinghandles = None
369 self._writinghandles = None
370 # prevent nesting of addgroup
370 # prevent nesting of addgroup
371 self._adding_group = None
371 self._adding_group = None
372
372
373 self._loadindex()
373 self._loadindex()
374
374
375 self._concurrencychecker = concurrencychecker
375 self._concurrencychecker = concurrencychecker
376
376
377 def _init_opts(self):
377 def _init_opts(self):
378 """process options (from above/config) to setup associated default revlog mode
378 """process options (from above/config) to setup associated default revlog mode
379
379
380 These values might be affected when actually reading on disk information.
380 These values might be affected when actually reading on disk information.
381
381
382 The relevant values are returned for use in _loadindex().
382 The relevant values are returned for use in _loadindex().
383
383
384 * newversionflags:
384 * newversionflags:
385 version header to use if we need to create a new revlog
385 version header to use if we need to create a new revlog
386
386
387 * mmapindexthreshold:
387 * mmapindexthreshold:
388 minimal index size for start to use mmap
388 minimal index size for start to use mmap
389
389
390 * force_nodemap:
390 * force_nodemap:
391 force the usage of a "development" version of the nodemap code
391 force the usage of a "development" version of the nodemap code
392 """
392 """
393 mmapindexthreshold = None
393 mmapindexthreshold = None
394 opts = self.opener.options
394 opts = self.opener.options
395
395
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 new_header = CHANGELOGV2
397 new_header = CHANGELOGV2
398 elif b'revlogv2' in opts:
398 elif b'revlogv2' in opts:
399 new_header = REVLOGV2
399 new_header = REVLOGV2
400 elif b'revlogv1' in opts:
400 elif b'revlogv1' in opts:
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 if b'generaldelta' in opts:
402 if b'generaldelta' in opts:
403 new_header |= FLAG_GENERALDELTA
403 new_header |= FLAG_GENERALDELTA
404 elif b'revlogv0' in self.opener.options:
404 elif b'revlogv0' in self.opener.options:
405 new_header = REVLOGV0
405 new_header = REVLOGV0
406 else:
406 else:
407 new_header = REVLOG_DEFAULT_VERSION
407 new_header = REVLOG_DEFAULT_VERSION
408
408
409 if b'chunkcachesize' in opts:
409 if b'chunkcachesize' in opts:
410 self._chunkcachesize = opts[b'chunkcachesize']
410 self._chunkcachesize = opts[b'chunkcachesize']
411 if b'maxchainlen' in opts:
411 if b'maxchainlen' in opts:
412 self._maxchainlen = opts[b'maxchainlen']
412 self._maxchainlen = opts[b'maxchainlen']
413 if b'deltabothparents' in opts:
413 if b'deltabothparents' in opts:
414 self._deltabothparents = opts[b'deltabothparents']
414 self._deltabothparents = opts[b'deltabothparents']
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 self._lazydeltabase = False
416 self._lazydeltabase = False
417 if self._lazydelta:
417 if self._lazydelta:
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 if b'compengine' in opts:
419 if b'compengine' in opts:
420 self._compengine = opts[b'compengine']
420 self._compengine = opts[b'compengine']
421 if b'zlib.level' in opts:
421 if b'zlib.level' in opts:
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 if b'zstd.level' in opts:
423 if b'zstd.level' in opts:
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 if b'maxdeltachainspan' in opts:
425 if b'maxdeltachainspan' in opts:
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 mmapindexthreshold = opts[b'mmapindexthreshold']
428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return new_header, mmapindexthreshold, force_nodemap
457 return new_header, mmapindexthreshold, force_nodemap
458
458
459 def _get_data(self, filepath, mmap_threshold, size=None):
459 def _get_data(self, filepath, mmap_threshold, size=None):
460 """return a file content with or without mmap
460 """return a file content with or without mmap
461
461
462 If the file is missing return the empty string"""
462 If the file is missing return the empty string"""
463 try:
463 try:
464 with self.opener(filepath) as fp:
464 with self.opener(filepath) as fp:
465 if mmap_threshold is not None:
465 if mmap_threshold is not None:
466 file_size = self.opener.fstat(fp).st_size
466 file_size = self.opener.fstat(fp).st_size
467 if file_size >= mmap_threshold:
467 if file_size >= mmap_threshold:
468 if size is not None:
468 if size is not None:
469 # avoid potentiel mmap crash
469 # avoid potentiel mmap crash
470 size = min(file_size, size)
470 size = min(file_size, size)
471 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
472 # relying on Python GC
472 # relying on Python GC
473 if size is None:
473 if size is None:
474 return util.buffer(util.mmapread(fp))
474 return util.buffer(util.mmapread(fp))
475 else:
475 else:
476 return util.buffer(util.mmapread(fp, size))
476 return util.buffer(util.mmapread(fp, size))
477 if size is None:
477 if size is None:
478 return fp.read()
478 return fp.read()
479 else:
479 else:
480 return fp.read(size)
480 return fp.read(size)
481 except IOError as inst:
481 except IOError as inst:
482 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
483 raise
483 raise
484 return b''
484 return b''
485
485
486 def _loadindex(self, docket=None):
486 def _loadindex(self, docket=None):
487
487
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489
489
490 if self.postfix is not None:
490 if self.postfix is not None:
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 entry_point = b'%s.i.a' % self.radix
493 entry_point = b'%s.i.a' % self.radix
494 else:
494 else:
495 entry_point = b'%s.i' % self.radix
495 entry_point = b'%s.i' % self.radix
496
496
497 if docket is not None:
497 if docket is not None:
498 self._docket = docket
498 self._docket = docket
499 self._docket_file = entry_point
499 self._docket_file = entry_point
500 else:
500 else:
501 entry_data = b''
501 entry_data = b''
502 self._initempty = True
502 self._initempty = True
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 if len(entry_data) > 0:
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
506 self._initempty = False
507 else:
507 else:
508 header = new_header
508 header = new_header
509
509
510 self._format_flags = header & ~0xFFFF
510 self._format_flags = header & ~0xFFFF
511 self._format_version = header & 0xFFFF
511 self._format_version = header & 0xFFFF
512
512
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 if supported_flags is None:
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
522 raise error.RevlogError(msg)
523
523
524 features = FEATURES_BY_VERSION[self._format_version]
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
527 self.hassidedata = features[b'sidedata']
528
528
529 if not features[b'docket']:
529 if not features[b'docket']:
530 self._indexfile = entry_point
530 self._indexfile = entry_point
531 index_data = entry_data
531 index_data = entry_data
532 else:
532 else:
533 self._docket_file = entry_point
533 self._docket_file = entry_point
534 if self._initempty:
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
535 self._docket = docketutil.default_docket(self, header)
536 else:
536 else:
537 self._docket = docketutil.parse_docket(
537 self._docket = docketutil.parse_docket(
538 self, entry_data, use_pending=self._trypending
538 self, entry_data, use_pending=self._trypending
539 )
539 )
540
540
541 if self._docket is not None:
541 if self._docket is not None:
542 self._indexfile = self._docket.index_filepath()
542 self._indexfile = self._docket.index_filepath()
543 index_data = b''
543 index_data = b''
544 index_size = self._docket.index_end
544 index_size = self._docket.index_end
545 if index_size > 0:
545 if index_size > 0:
546 index_data = self._get_data(
546 index_data = self._get_data(
547 self._indexfile, mmapindexthreshold, size=index_size
547 self._indexfile, mmapindexthreshold, size=index_size
548 )
548 )
549 if len(index_data) < index_size:
549 if len(index_data) < index_size:
550 msg = _(b'too few index data for %s: got %d, expected %d')
550 msg = _(b'too few index data for %s: got %d, expected %d')
551 msg %= (self.display_id, len(index_data), index_size)
551 msg %= (self.display_id, len(index_data), index_size)
552 raise error.RevlogError(msg)
552 raise error.RevlogError(msg)
553
553
554 self._inline = False
554 self._inline = False
555 # generaldelta implied by version 2 revlogs.
555 # generaldelta implied by version 2 revlogs.
556 self._generaldelta = True
556 self._generaldelta = True
557 # the logic for persistent nodemap will be dealt with within the
557 # the logic for persistent nodemap will be dealt with within the
558 # main docket, so disable it for now.
558 # main docket, so disable it for now.
559 self._nodemap_file = None
559 self._nodemap_file = None
560
560
561 if self._docket is not None:
561 if self._docket is not None:
562 self._datafile = self._docket.data_filepath()
562 self._datafile = self._docket.data_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
564 elif self.postfix is None:
564 elif self.postfix is None:
565 self._datafile = b'%s.d' % self.radix
565 self._datafile = b'%s.d' % self.radix
566 else:
566 else:
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568
568
569 self.nodeconstants = sha1nodeconstants
569 self.nodeconstants = sha1nodeconstants
570 self.nullid = self.nodeconstants.nullid
570 self.nullid = self.nodeconstants.nullid
571
571
572 # sparse-revlog can't be on without general-delta (issue6056)
572 # sparse-revlog can't be on without general-delta (issue6056)
573 if not self._generaldelta:
573 if not self._generaldelta:
574 self._sparserevlog = False
574 self._sparserevlog = False
575
575
576 self._storedeltachains = True
576 self._storedeltachains = True
577
577
578 devel_nodemap = (
578 devel_nodemap = (
579 self._nodemap_file
579 self._nodemap_file
580 and force_nodemap
580 and force_nodemap
581 and parse_index_v1_nodemap is not None
581 and parse_index_v1_nodemap is not None
582 )
582 )
583
583
584 use_rust_index = False
584 use_rust_index = False
585 if rustrevlog is not None:
585 if rustrevlog is not None:
586 if self._nodemap_file is not None:
586 if self._nodemap_file is not None:
587 use_rust_index = True
587 use_rust_index = True
588 else:
588 else:
589 use_rust_index = self.opener.options.get(b'rust.index')
589 use_rust_index = self.opener.options.get(b'rust.index')
590
590
591 self._parse_index = parse_index_v1
591 self._parse_index = parse_index_v1
592 if self._format_version == REVLOGV0:
592 if self._format_version == REVLOGV0:
593 self._parse_index = revlogv0.parse_index_v0
593 self._parse_index = revlogv0.parse_index_v0
594 elif self._format_version == REVLOGV2:
594 elif self._format_version == REVLOGV2:
595 self._parse_index = parse_index_v2
595 self._parse_index = parse_index_v2
596 elif self._format_version == CHANGELOGV2:
596 elif self._format_version == CHANGELOGV2:
597 self._parse_index = parse_index_cl_v2
597 self._parse_index = parse_index_cl_v2
598 elif devel_nodemap:
598 elif devel_nodemap:
599 self._parse_index = parse_index_v1_nodemap
599 self._parse_index = parse_index_v1_nodemap
600 elif use_rust_index:
600 elif use_rust_index:
601 self._parse_index = parse_index_v1_mixed
601 self._parse_index = parse_index_v1_mixed
602 try:
602 try:
603 d = self._parse_index(index_data, self._inline)
603 d = self._parse_index(index_data, self._inline)
604 index, chunkcache = d
604 index, chunkcache = d
605 use_nodemap = (
605 use_nodemap = (
606 not self._inline
606 not self._inline
607 and self._nodemap_file is not None
607 and self._nodemap_file is not None
608 and util.safehasattr(index, 'update_nodemap_data')
608 and util.safehasattr(index, 'update_nodemap_data')
609 )
609 )
610 if use_nodemap:
610 if use_nodemap:
611 nodemap_data = nodemaputil.persisted_data(self)
611 nodemap_data = nodemaputil.persisted_data(self)
612 if nodemap_data is not None:
612 if nodemap_data is not None:
613 docket = nodemap_data[0]
613 docket = nodemap_data[0]
614 if (
614 if (
615 len(d[0]) > docket.tip_rev
615 len(d[0]) > docket.tip_rev
616 and d[0][docket.tip_rev][7] == docket.tip_node
616 and d[0][docket.tip_rev][7] == docket.tip_node
617 ):
617 ):
618 # no changelog tampering
618 # no changelog tampering
619 self._nodemap_docket = docket
619 self._nodemap_docket = docket
620 index.update_nodemap_data(*nodemap_data)
620 index.update_nodemap_data(*nodemap_data)
621 except (ValueError, IndexError):
621 except (ValueError, IndexError):
622 raise error.RevlogError(
622 raise error.RevlogError(
623 _(b"index %s is corrupted") % self.display_id
623 _(b"index %s is corrupted") % self.display_id
624 )
624 )
625 self.index = index
625 self.index = index
626 self._segmentfile = randomaccessfile.randomaccessfile(
626 self._segmentfile = randomaccessfile.randomaccessfile(
627 self.opener,
627 self.opener,
628 (self._indexfile if self._inline else self._datafile),
628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize,
629 self._chunkcachesize,
630 chunkcache,
630 chunkcache,
631 )
631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
633 self.opener,
634 self._sidedatafile,
634 self._sidedatafile,
635 self._chunkcachesize,
635 self._chunkcachesize,
636 )
636 )
637 # revnum -> (chain-length, sum-delta-length)
637 # revnum -> (chain-length, sum-delta-length)
638 self._chaininfocache = util.lrucachedict(500)
638 self._chaininfocache = util.lrucachedict(500)
639 # revlog header -> revlog compressor
639 # revlog header -> revlog compressor
640 self._decompressors = {}
640 self._decompressors = {}
641
641
642 @util.propertycache
642 @util.propertycache
643 def revlog_kind(self):
643 def revlog_kind(self):
644 return self.target[0]
644 return self.target[0]
645
645
646 @util.propertycache
646 @util.propertycache
647 def display_id(self):
647 def display_id(self):
648 """The public facing "ID" of the revlog that we use in message"""
648 """The public facing "ID" of the revlog that we use in message"""
649 # Maybe we should build a user facing representation of
649 # Maybe we should build a user facing representation of
650 # revlog.target instead of using `self.radix`
650 # revlog.target instead of using `self.radix`
651 return self.radix
651 return self.radix
652
652
653 def _get_decompressor(self, t):
653 def _get_decompressor(self, t):
654 try:
654 try:
655 compressor = self._decompressors[t]
655 compressor = self._decompressors[t]
656 except KeyError:
656 except KeyError:
657 try:
657 try:
658 engine = util.compengines.forrevlogheader(t)
658 engine = util.compengines.forrevlogheader(t)
659 compressor = engine.revlogcompressor(self._compengineopts)
659 compressor = engine.revlogcompressor(self._compengineopts)
660 self._decompressors[t] = compressor
660 self._decompressors[t] = compressor
661 except KeyError:
661 except KeyError:
662 raise error.RevlogError(
662 raise error.RevlogError(
663 _(b'unknown compression type %s') % binascii.hexlify(t)
663 _(b'unknown compression type %s') % binascii.hexlify(t)
664 )
664 )
665 return compressor
665 return compressor
666
666
667 @util.propertycache
667 @util.propertycache
668 def _compressor(self):
668 def _compressor(self):
669 engine = util.compengines[self._compengine]
669 engine = util.compengines[self._compengine]
670 return engine.revlogcompressor(self._compengineopts)
670 return engine.revlogcompressor(self._compengineopts)
671
671
672 @util.propertycache
672 @util.propertycache
673 def _decompressor(self):
673 def _decompressor(self):
674 """the default decompressor"""
674 """the default decompressor"""
675 if self._docket is None:
675 if self._docket is None:
676 return None
676 return None
677 t = self._docket.default_compression_header
677 t = self._docket.default_compression_header
678 c = self._get_decompressor(t)
678 c = self._get_decompressor(t)
679 return c.decompress
679 return c.decompress
680
680
681 def _indexfp(self):
681 def _indexfp(self):
682 """file object for the revlog's index file"""
682 """file object for the revlog's index file"""
683 return self.opener(self._indexfile, mode=b"r")
683 return self.opener(self._indexfile, mode=b"r")
684
684
685 def __index_write_fp(self):
685 def __index_write_fp(self):
686 # You should not use this directly and use `_writing` instead
686 # You should not use this directly and use `_writing` instead
687 try:
687 try:
688 f = self.opener(
688 f = self.opener(
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 )
690 )
691 if self._docket is None:
691 if self._docket is None:
692 f.seek(0, os.SEEK_END)
692 f.seek(0, os.SEEK_END)
693 else:
693 else:
694 f.seek(self._docket.index_end, os.SEEK_SET)
694 f.seek(self._docket.index_end, os.SEEK_SET)
695 return f
695 return f
696 except IOError as inst:
696 except IOError as inst:
697 if inst.errno != errno.ENOENT:
697 if inst.errno != errno.ENOENT:
698 raise
698 raise
699 return self.opener(
699 return self.opener(
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 )
701 )
702
702
703 def __index_new_fp(self):
703 def __index_new_fp(self):
704 # You should not use this unless you are upgrading from inline revlog
704 # You should not use this unless you are upgrading from inline revlog
705 return self.opener(
705 return self.opener(
706 self._indexfile,
706 self._indexfile,
707 mode=b"w",
707 mode=b"w",
708 checkambig=self._checkambig,
708 checkambig=self._checkambig,
709 atomictemp=True,
709 atomictemp=True,
710 )
710 )
711
711
712 def _datafp(self, mode=b'r'):
712 def _datafp(self, mode=b'r'):
713 """file object for the revlog's data file"""
713 """file object for the revlog's data file"""
714 return self.opener(self._datafile, mode=mode)
714 return self.opener(self._datafile, mode=mode)
715
715
716 @contextlib.contextmanager
716 @contextlib.contextmanager
717 def _sidedatareadfp(self):
717 def _sidedatareadfp(self):
718 """file object suitable to read sidedata"""
718 """file object suitable to read sidedata"""
719 if self._writinghandles:
719 if self._writinghandles:
720 yield self._writinghandles[2]
720 yield self._writinghandles[2]
721 else:
721 else:
722 with self.opener(self._sidedatafile) as fp:
722 with self.opener(self._sidedatafile) as fp:
723 yield fp
723 yield fp
724
724
725 def tiprev(self):
725 def tiprev(self):
726 return len(self.index) - 1
726 return len(self.index) - 1
727
727
728 def tip(self):
728 def tip(self):
729 return self.node(self.tiprev())
729 return self.node(self.tiprev())
730
730
731 def __contains__(self, rev):
731 def __contains__(self, rev):
732 return 0 <= rev < len(self)
732 return 0 <= rev < len(self)
733
733
734 def __len__(self):
734 def __len__(self):
735 return len(self.index)
735 return len(self.index)
736
736
737 def __iter__(self):
737 def __iter__(self):
738 return iter(pycompat.xrange(len(self)))
738 return iter(pycompat.xrange(len(self)))
739
739
740 def revs(self, start=0, stop=None):
740 def revs(self, start=0, stop=None):
741 """iterate over all rev in this revlog (from start to stop)"""
741 """iterate over all rev in this revlog (from start to stop)"""
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
743
743
744 @property
744 @property
745 def nodemap(self):
745 def nodemap(self):
746 msg = (
746 msg = (
747 b"revlog.nodemap is deprecated, "
747 b"revlog.nodemap is deprecated, "
748 b"use revlog.index.[has_node|rev|get_rev]"
748 b"use revlog.index.[has_node|rev|get_rev]"
749 )
749 )
750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751 return self.index.nodemap
751 return self.index.nodemap
752
752
753 @property
753 @property
754 def _nodecache(self):
754 def _nodecache(self):
755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
757 return self.index.nodemap
757 return self.index.nodemap
758
758
759 def hasnode(self, node):
759 def hasnode(self, node):
760 try:
760 try:
761 self.rev(node)
761 self.rev(node)
762 return True
762 return True
763 except KeyError:
763 except KeyError:
764 return False
764 return False
765
765
766 def candelta(self, baserev, rev):
766 def candelta(self, baserev, rev):
767 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 """whether two revisions (baserev, rev) can be delta-ed or not"""
768 # Disable delta if either rev requires a content-changing flag
768 # Disable delta if either rev requires a content-changing flag
769 # processor (ex. LFS). This is because such flag processor can alter
769 # processor (ex. LFS). This is because such flag processor can alter
770 # the rawtext content that the delta will be based on, and two clients
770 # the rawtext content that the delta will be based on, and two clients
771 # could have a same revlog node with different flags (i.e. different
771 # could have a same revlog node with different flags (i.e. different
772 # rawtext contents) and the delta could be incompatible.
772 # rawtext contents) and the delta could be incompatible.
773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
775 ):
775 ):
776 return False
776 return False
777 return True
777 return True
778
778
779 def update_caches(self, transaction):
779 def update_caches(self, transaction):
780 if self._nodemap_file is not None:
780 if self._nodemap_file is not None:
781 if transaction is None:
781 if transaction is None:
782 nodemaputil.update_persistent_nodemap(self)
782 nodemaputil.update_persistent_nodemap(self)
783 else:
783 else:
784 nodemaputil.setup_persistent_nodemap(transaction, self)
784 nodemaputil.setup_persistent_nodemap(transaction, self)
785
785
786 def clearcaches(self):
786 def clearcaches(self):
787 self._revisioncache = None
787 self._revisioncache = None
788 self._chainbasecache.clear()
788 self._chainbasecache.clear()
789 self._segmentfile.clear_cache()
789 self._segmentfile.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
791 self._pcache = {}
791 self._pcache = {}
792 self._nodemap_docket = None
792 self._nodemap_docket = None
793 self.index.clearcaches()
793 self.index.clearcaches()
794 # The python code is the one responsible for validating the docket, we
794 # The python code is the one responsible for validating the docket, we
795 # end up having to refresh it here.
795 # end up having to refresh it here.
796 use_nodemap = (
796 use_nodemap = (
797 not self._inline
797 not self._inline
798 and self._nodemap_file is not None
798 and self._nodemap_file is not None
799 and util.safehasattr(self.index, 'update_nodemap_data')
799 and util.safehasattr(self.index, 'update_nodemap_data')
800 )
800 )
801 if use_nodemap:
801 if use_nodemap:
802 nodemap_data = nodemaputil.persisted_data(self)
802 nodemap_data = nodemaputil.persisted_data(self)
803 if nodemap_data is not None:
803 if nodemap_data is not None:
804 self._nodemap_docket = nodemap_data[0]
804 self._nodemap_docket = nodemap_data[0]
805 self.index.update_nodemap_data(*nodemap_data)
805 self.index.update_nodemap_data(*nodemap_data)
806
806
807 def rev(self, node):
807 def rev(self, node):
808 try:
808 try:
809 return self.index.rev(node)
809 return self.index.rev(node)
810 except TypeError:
810 except TypeError:
811 raise
811 raise
812 except error.RevlogError:
812 except error.RevlogError:
813 # parsers.c radix tree lookup failed
813 # parsers.c radix tree lookup failed
814 if (
814 if (
815 node == self.nodeconstants.wdirid
815 node == self.nodeconstants.wdirid
816 or node in self.nodeconstants.wdirfilenodeids
816 or node in self.nodeconstants.wdirfilenodeids
817 ):
817 ):
818 raise error.WdirUnsupported
818 raise error.WdirUnsupported
819 raise error.LookupError(node, self.display_id, _(b'no node'))
819 raise error.LookupError(node, self.display_id, _(b'no node'))
820
820
821 # Accessors for index entries.
821 # Accessors for index entries.
822
822
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 # are flags.
824 # are flags.
825 def start(self, rev):
825 def start(self, rev):
826 return int(self.index[rev][0] >> 16)
826 return int(self.index[rev][0] >> 16)
827
827
828 def sidedata_cut_off(self, rev):
828 def sidedata_cut_off(self, rev):
829 sd_cut_off = self.index[rev][8]
829 sd_cut_off = self.index[rev][8]
830 if sd_cut_off != 0:
830 if sd_cut_off != 0:
831 return sd_cut_off
831 return sd_cut_off
832 # This is some annoying dance, because entries without sidedata
832 # This is some annoying dance, because entries without sidedata
833 # currently use 0 as their ofsset. (instead of previous-offset +
833 # currently use 0 as their ofsset. (instead of previous-offset +
834 # previous-size)
834 # previous-size)
835 #
835 #
836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
837 # In the meantime, we need this.
837 # In the meantime, we need this.
838 while 0 <= rev:
838 while 0 <= rev:
839 e = self.index[rev]
839 e = self.index[rev]
840 if e[9] != 0:
840 if e[9] != 0:
841 return e[8] + e[9]
841 return e[8] + e[9]
842 rev -= 1
842 rev -= 1
843 return 0
843 return 0
844
844
845 def flags(self, rev):
845 def flags(self, rev):
846 return self.index[rev][0] & 0xFFFF
846 return self.index[rev][0] & 0xFFFF
847
847
848 def length(self, rev):
848 def length(self, rev):
849 return self.index[rev][1]
849 return self.index[rev][1]
850
850
851 def sidedata_length(self, rev):
851 def sidedata_length(self, rev):
852 if not self.hassidedata:
852 if not self.hassidedata:
853 return 0
853 return 0
854 return self.index[rev][9]
854 return self.index[rev][9]
855
855
856 def rawsize(self, rev):
856 def rawsize(self, rev):
857 """return the length of the uncompressed text for a given revision"""
857 """return the length of the uncompressed text for a given revision"""
858 l = self.index[rev][2]
858 l = self.index[rev][2]
859 if l >= 0:
859 if l >= 0:
860 return l
860 return l
861
861
862 t = self.rawdata(rev)
862 t = self.rawdata(rev)
863 return len(t)
863 return len(t)
864
864
865 def size(self, rev):
865 def size(self, rev):
866 """length of non-raw text (processed by a "read" flag processor)"""
866 """length of non-raw text (processed by a "read" flag processor)"""
867 # fast path: if no "read" flag processor could change the content,
867 # fast path: if no "read" flag processor could change the content,
868 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 # size is rawsize. note: ELLIPSIS is known to not change the content.
869 flags = self.flags(rev)
869 flags = self.flags(rev)
870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
871 return self.rawsize(rev)
871 return self.rawsize(rev)
872
872
873 return len(self.revision(rev, raw=False))
873 return len(self.revision(rev, raw=False))
874
874
875 def chainbase(self, rev):
875 def chainbase(self, rev):
876 base = self._chainbasecache.get(rev)
876 base = self._chainbasecache.get(rev)
877 if base is not None:
877 if base is not None:
878 return base
878 return base
879
879
880 index = self.index
880 index = self.index
881 iterrev = rev
881 iterrev = rev
882 base = index[iterrev][3]
882 base = index[iterrev][3]
883 while base != iterrev:
883 while base != iterrev:
884 iterrev = base
884 iterrev = base
885 base = index[iterrev][3]
885 base = index[iterrev][3]
886
886
887 self._chainbasecache[rev] = base
887 self._chainbasecache[rev] = base
888 return base
888 return base
889
889
890 def linkrev(self, rev):
890 def linkrev(self, rev):
891 return self.index[rev][4]
891 return self.index[rev][4]
892
892
893 def parentrevs(self, rev):
893 def parentrevs(self, rev):
894 try:
894 try:
895 entry = self.index[rev]
895 entry = self.index[rev]
896 except IndexError:
896 except IndexError:
897 if rev == wdirrev:
897 if rev == wdirrev:
898 raise error.WdirUnsupported
898 raise error.WdirUnsupported
899 raise
899 raise
900 if entry[5] == nullrev:
900 if entry[5] == nullrev:
901 return entry[6], entry[5]
901 return entry[6], entry[5]
902 else:
902 else:
903 return entry[5], entry[6]
903 return entry[5], entry[6]
904
904
905 # fast parentrevs(rev) where rev isn't filtered
905 # fast parentrevs(rev) where rev isn't filtered
906 _uncheckedparentrevs = parentrevs
906 _uncheckedparentrevs = parentrevs
907
907
908 def node(self, rev):
908 def node(self, rev):
909 try:
909 try:
910 return self.index[rev][7]
910 return self.index[rev][7]
911 except IndexError:
911 except IndexError:
912 if rev == wdirrev:
912 if rev == wdirrev:
913 raise error.WdirUnsupported
913 raise error.WdirUnsupported
914 raise
914 raise
915
915
916 # Derived from index values.
916 # Derived from index values.
917
917
918 def end(self, rev):
918 def end(self, rev):
919 return self.start(rev) + self.length(rev)
919 return self.start(rev) + self.length(rev)
920
920
921 def parents(self, node):
921 def parents(self, node):
922 i = self.index
922 i = self.index
923 d = i[self.rev(node)]
923 d = i[self.rev(node)]
924 # inline node() to avoid function call overhead
924 # inline node() to avoid function call overhead
925 if d[5] == self.nullid:
925 if d[5] == self.nullid:
926 return i[d[6]][7], i[d[5]][7]
926 return i[d[6]][7], i[d[5]][7]
927 else:
927 else:
928 return i[d[5]][7], i[d[6]][7]
928 return i[d[5]][7], i[d[6]][7]
929
929
930 def chainlen(self, rev):
930 def chainlen(self, rev):
931 return self._chaininfo(rev)[0]
931 return self._chaininfo(rev)[0]
932
932
933 def _chaininfo(self, rev):
933 def _chaininfo(self, rev):
934 chaininfocache = self._chaininfocache
934 chaininfocache = self._chaininfocache
935 if rev in chaininfocache:
935 if rev in chaininfocache:
936 return chaininfocache[rev]
936 return chaininfocache[rev]
937 index = self.index
937 index = self.index
938 generaldelta = self._generaldelta
938 generaldelta = self._generaldelta
939 iterrev = rev
939 iterrev = rev
940 e = index[iterrev]
940 e = index[iterrev]
941 clen = 0
941 clen = 0
942 compresseddeltalen = 0
942 compresseddeltalen = 0
943 while iterrev != e[3]:
943 while iterrev != e[3]:
944 clen += 1
944 clen += 1
945 compresseddeltalen += e[1]
945 compresseddeltalen += e[1]
946 if generaldelta:
946 if generaldelta:
947 iterrev = e[3]
947 iterrev = e[3]
948 else:
948 else:
949 iterrev -= 1
949 iterrev -= 1
950 if iterrev in chaininfocache:
950 if iterrev in chaininfocache:
951 t = chaininfocache[iterrev]
951 t = chaininfocache[iterrev]
952 clen += t[0]
952 clen += t[0]
953 compresseddeltalen += t[1]
953 compresseddeltalen += t[1]
954 break
954 break
955 e = index[iterrev]
955 e = index[iterrev]
956 else:
956 else:
957 # Add text length of base since decompressing that also takes
957 # Add text length of base since decompressing that also takes
958 # work. For cache hits the length is already included.
958 # work. For cache hits the length is already included.
959 compresseddeltalen += e[1]
959 compresseddeltalen += e[1]
960 r = (clen, compresseddeltalen)
960 r = (clen, compresseddeltalen)
961 chaininfocache[rev] = r
961 chaininfocache[rev] = r
962 return r
962 return r
963
963
964 def _deltachain(self, rev, stoprev=None):
964 def _deltachain(self, rev, stoprev=None):
965 """Obtain the delta chain for a revision.
965 """Obtain the delta chain for a revision.
966
966
967 ``stoprev`` specifies a revision to stop at. If not specified, we
967 ``stoprev`` specifies a revision to stop at. If not specified, we
968 stop at the base of the chain.
968 stop at the base of the chain.
969
969
970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
971 revs in ascending order and ``stopped`` is a bool indicating whether
971 revs in ascending order and ``stopped`` is a bool indicating whether
972 ``stoprev`` was hit.
972 ``stoprev`` was hit.
973 """
973 """
974 # Try C implementation.
974 # Try C implementation.
975 try:
975 try:
976 return self.index.deltachain(rev, stoprev, self._generaldelta)
976 return self.index.deltachain(rev, stoprev, self._generaldelta)
977 except AttributeError:
977 except AttributeError:
978 pass
978 pass
979
979
980 chain = []
980 chain = []
981
981
982 # Alias to prevent attribute lookup in tight loop.
982 # Alias to prevent attribute lookup in tight loop.
983 index = self.index
983 index = self.index
984 generaldelta = self._generaldelta
984 generaldelta = self._generaldelta
985
985
986 iterrev = rev
986 iterrev = rev
987 e = index[iterrev]
987 e = index[iterrev]
988 while iterrev != e[3] and iterrev != stoprev:
988 while iterrev != e[3] and iterrev != stoprev:
989 chain.append(iterrev)
989 chain.append(iterrev)
990 if generaldelta:
990 if generaldelta:
991 iterrev = e[3]
991 iterrev = e[3]
992 else:
992 else:
993 iterrev -= 1
993 iterrev -= 1
994 e = index[iterrev]
994 e = index[iterrev]
995
995
996 if iterrev == stoprev:
996 if iterrev == stoprev:
997 stopped = True
997 stopped = True
998 else:
998 else:
999 chain.append(iterrev)
999 chain.append(iterrev)
1000 stopped = False
1000 stopped = False
1001
1001
1002 chain.reverse()
1002 chain.reverse()
1003 return chain, stopped
1003 return chain, stopped
1004
1004
1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1006 """Generate the ancestors of 'revs' in reverse revision order.
1006 """Generate the ancestors of 'revs' in reverse revision order.
1007 Does not generate revs lower than stoprev.
1007 Does not generate revs lower than stoprev.
1008
1008
1009 See the documentation for ancestor.lazyancestors for more details."""
1009 See the documentation for ancestor.lazyancestors for more details."""
1010
1010
1011 # first, make sure start revisions aren't filtered
1011 # first, make sure start revisions aren't filtered
1012 revs = list(revs)
1012 revs = list(revs)
1013 checkrev = self.node
1013 checkrev = self.node
1014 for r in revs:
1014 for r in revs:
1015 checkrev(r)
1015 checkrev(r)
1016 # and we're sure ancestors aren't filtered as well
1016 # and we're sure ancestors aren't filtered as well
1017
1017
1018 if rustancestor is not None and self.index.rust_ext_compat:
1018 if rustancestor is not None and self.index.rust_ext_compat:
1019 lazyancestors = rustancestor.LazyAncestors
1019 lazyancestors = rustancestor.LazyAncestors
1020 arg = self.index
1020 arg = self.index
1021 else:
1021 else:
1022 lazyancestors = ancestor.lazyancestors
1022 lazyancestors = ancestor.lazyancestors
1023 arg = self._uncheckedparentrevs
1023 arg = self._uncheckedparentrevs
1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1025
1025
1026 def descendants(self, revs):
1026 def descendants(self, revs):
1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1028
1028
1029 def findcommonmissing(self, common=None, heads=None):
1029 def findcommonmissing(self, common=None, heads=None):
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1031 that are not ancestors of common. In revset terminology, we return the
1031 that are not ancestors of common. In revset terminology, we return the
1032 tuple:
1032 tuple:
1033
1033
1034 ::common, (::heads) - (::common)
1034 ::common, (::heads) - (::common)
1035
1035
1036 The list is sorted by revision number, meaning it is
1036 The list is sorted by revision number, meaning it is
1037 topologically sorted.
1037 topologically sorted.
1038
1038
1039 'heads' and 'common' are both lists of node IDs. If heads is
1039 'heads' and 'common' are both lists of node IDs. If heads is
1040 not supplied, uses all of the revlog's heads. If common is not
1040 not supplied, uses all of the revlog's heads. If common is not
1041 supplied, uses nullid."""
1041 supplied, uses nullid."""
1042 if common is None:
1042 if common is None:
1043 common = [self.nullid]
1043 common = [self.nullid]
1044 if heads is None:
1044 if heads is None:
1045 heads = self.heads()
1045 heads = self.heads()
1046
1046
1047 common = [self.rev(n) for n in common]
1047 common = [self.rev(n) for n in common]
1048 heads = [self.rev(n) for n in heads]
1048 heads = [self.rev(n) for n in heads]
1049
1049
1050 # we want the ancestors, but inclusive
1050 # we want the ancestors, but inclusive
1051 class lazyset(object):
1051 class lazyset(object):
1052 def __init__(self, lazyvalues):
1052 def __init__(self, lazyvalues):
1053 self.addedvalues = set()
1053 self.addedvalues = set()
1054 self.lazyvalues = lazyvalues
1054 self.lazyvalues = lazyvalues
1055
1055
1056 def __contains__(self, value):
1056 def __contains__(self, value):
1057 return value in self.addedvalues or value in self.lazyvalues
1057 return value in self.addedvalues or value in self.lazyvalues
1058
1058
1059 def __iter__(self):
1059 def __iter__(self):
1060 added = self.addedvalues
1060 added = self.addedvalues
1061 for r in added:
1061 for r in added:
1062 yield r
1062 yield r
1063 for r in self.lazyvalues:
1063 for r in self.lazyvalues:
1064 if not r in added:
1064 if not r in added:
1065 yield r
1065 yield r
1066
1066
1067 def add(self, value):
1067 def add(self, value):
1068 self.addedvalues.add(value)
1068 self.addedvalues.add(value)
1069
1069
1070 def update(self, values):
1070 def update(self, values):
1071 self.addedvalues.update(values)
1071 self.addedvalues.update(values)
1072
1072
1073 has = lazyset(self.ancestors(common))
1073 has = lazyset(self.ancestors(common))
1074 has.add(nullrev)
1074 has.add(nullrev)
1075 has.update(common)
1075 has.update(common)
1076
1076
1077 # take all ancestors from heads that aren't in has
1077 # take all ancestors from heads that aren't in has
1078 missing = set()
1078 missing = set()
1079 visit = collections.deque(r for r in heads if r not in has)
1079 visit = collections.deque(r for r in heads if r not in has)
1080 while visit:
1080 while visit:
1081 r = visit.popleft()
1081 r = visit.popleft()
1082 if r in missing:
1082 if r in missing:
1083 continue
1083 continue
1084 else:
1084 else:
1085 missing.add(r)
1085 missing.add(r)
1086 for p in self.parentrevs(r):
1086 for p in self.parentrevs(r):
1087 if p not in has:
1087 if p not in has:
1088 visit.append(p)
1088 visit.append(p)
1089 missing = list(missing)
1089 missing = list(missing)
1090 missing.sort()
1090 missing.sort()
1091 return has, [self.node(miss) for miss in missing]
1091 return has, [self.node(miss) for miss in missing]
1092
1092
1093 def incrementalmissingrevs(self, common=None):
1093 def incrementalmissingrevs(self, common=None):
1094 """Return an object that can be used to incrementally compute the
1094 """Return an object that can be used to incrementally compute the
1095 revision numbers of the ancestors of arbitrary sets that are not
1095 revision numbers of the ancestors of arbitrary sets that are not
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 object.
1097 object.
1098
1098
1099 'common' is a list of revision numbers. If common is not supplied, uses
1099 'common' is a list of revision numbers. If common is not supplied, uses
1100 nullrev.
1100 nullrev.
1101 """
1101 """
1102 if common is None:
1102 if common is None:
1103 common = [nullrev]
1103 common = [nullrev]
1104
1104
1105 if rustancestor is not None and self.index.rust_ext_compat:
1105 if rustancestor is not None and self.index.rust_ext_compat:
1106 return rustancestor.MissingAncestors(self.index, common)
1106 return rustancestor.MissingAncestors(self.index, common)
1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1108
1108
1109 def findmissingrevs(self, common=None, heads=None):
1109 def findmissingrevs(self, common=None, heads=None):
1110 """Return the revision numbers of the ancestors of heads that
1110 """Return the revision numbers of the ancestors of heads that
1111 are not ancestors of common.
1111 are not ancestors of common.
1112
1112
1113 More specifically, return a list of revision numbers corresponding to
1113 More specifically, return a list of revision numbers corresponding to
1114 nodes N such that every N satisfies the following constraints:
1114 nodes N such that every N satisfies the following constraints:
1115
1115
1116 1. N is an ancestor of some node in 'heads'
1116 1. N is an ancestor of some node in 'heads'
1117 2. N is not an ancestor of any node in 'common'
1117 2. N is not an ancestor of any node in 'common'
1118
1118
1119 The list is sorted by revision number, meaning it is
1119 The list is sorted by revision number, meaning it is
1120 topologically sorted.
1120 topologically sorted.
1121
1121
1122 'heads' and 'common' are both lists of revision numbers. If heads is
1122 'heads' and 'common' are both lists of revision numbers. If heads is
1123 not supplied, uses all of the revlog's heads. If common is not
1123 not supplied, uses all of the revlog's heads. If common is not
1124 supplied, uses nullid."""
1124 supplied, uses nullid."""
1125 if common is None:
1125 if common is None:
1126 common = [nullrev]
1126 common = [nullrev]
1127 if heads is None:
1127 if heads is None:
1128 heads = self.headrevs()
1128 heads = self.headrevs()
1129
1129
1130 inc = self.incrementalmissingrevs(common=common)
1130 inc = self.incrementalmissingrevs(common=common)
1131 return inc.missingancestors(heads)
1131 return inc.missingancestors(heads)
1132
1132
1133 def findmissing(self, common=None, heads=None):
1133 def findmissing(self, common=None, heads=None):
1134 """Return the ancestors of heads that are not ancestors of common.
1134 """Return the ancestors of heads that are not ancestors of common.
1135
1135
1136 More specifically, return a list of nodes N such that every N
1136 More specifically, return a list of nodes N such that every N
1137 satisfies the following constraints:
1137 satisfies the following constraints:
1138
1138
1139 1. N is an ancestor of some node in 'heads'
1139 1. N is an ancestor of some node in 'heads'
1140 2. N is not an ancestor of any node in 'common'
1140 2. N is not an ancestor of any node in 'common'
1141
1141
1142 The list is sorted by revision number, meaning it is
1142 The list is sorted by revision number, meaning it is
1143 topologically sorted.
1143 topologically sorted.
1144
1144
1145 'heads' and 'common' are both lists of node IDs. If heads is
1145 'heads' and 'common' are both lists of node IDs. If heads is
1146 not supplied, uses all of the revlog's heads. If common is not
1146 not supplied, uses all of the revlog's heads. If common is not
1147 supplied, uses nullid."""
1147 supplied, uses nullid."""
1148 if common is None:
1148 if common is None:
1149 common = [self.nullid]
1149 common = [self.nullid]
1150 if heads is None:
1150 if heads is None:
1151 heads = self.heads()
1151 heads = self.heads()
1152
1152
1153 common = [self.rev(n) for n in common]
1153 common = [self.rev(n) for n in common]
1154 heads = [self.rev(n) for n in heads]
1154 heads = [self.rev(n) for n in heads]
1155
1155
1156 inc = self.incrementalmissingrevs(common=common)
1156 inc = self.incrementalmissingrevs(common=common)
1157 return [self.node(r) for r in inc.missingancestors(heads)]
1157 return [self.node(r) for r in inc.missingancestors(heads)]
1158
1158
1159 def nodesbetween(self, roots=None, heads=None):
1159 def nodesbetween(self, roots=None, heads=None):
1160 """Return a topological path from 'roots' to 'heads'.
1160 """Return a topological path from 'roots' to 'heads'.
1161
1161
1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1163 topologically sorted list of all nodes N that satisfy both of
1163 topologically sorted list of all nodes N that satisfy both of
1164 these constraints:
1164 these constraints:
1165
1165
1166 1. N is a descendant of some node in 'roots'
1166 1. N is a descendant of some node in 'roots'
1167 2. N is an ancestor of some node in 'heads'
1167 2. N is an ancestor of some node in 'heads'
1168
1168
1169 Every node is considered to be both a descendant and an ancestor
1169 Every node is considered to be both a descendant and an ancestor
1170 of itself, so every reachable node in 'roots' and 'heads' will be
1170 of itself, so every reachable node in 'roots' and 'heads' will be
1171 included in 'nodes'.
1171 included in 'nodes'.
1172
1172
1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1176
1176
1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1178 unspecified, uses nullid as the only root. If 'heads' is
1178 unspecified, uses nullid as the only root. If 'heads' is
1179 unspecified, uses list of all of the revlog's heads."""
1179 unspecified, uses list of all of the revlog's heads."""
1180 nonodes = ([], [], [])
1180 nonodes = ([], [], [])
1181 if roots is not None:
1181 if roots is not None:
1182 roots = list(roots)
1182 roots = list(roots)
1183 if not roots:
1183 if not roots:
1184 return nonodes
1184 return nonodes
1185 lowestrev = min([self.rev(n) for n in roots])
1185 lowestrev = min([self.rev(n) for n in roots])
1186 else:
1186 else:
1187 roots = [self.nullid] # Everybody's a descendant of nullid
1187 roots = [self.nullid] # Everybody's a descendant of nullid
1188 lowestrev = nullrev
1188 lowestrev = nullrev
1189 if (lowestrev == nullrev) and (heads is None):
1189 if (lowestrev == nullrev) and (heads is None):
1190 # We want _all_ the nodes!
1190 # We want _all_ the nodes!
1191 return (
1191 return (
1192 [self.node(r) for r in self],
1192 [self.node(r) for r in self],
1193 [self.nullid],
1193 [self.nullid],
1194 list(self.heads()),
1194 list(self.heads()),
1195 )
1195 )
1196 if heads is None:
1196 if heads is None:
1197 # All nodes are ancestors, so the latest ancestor is the last
1197 # All nodes are ancestors, so the latest ancestor is the last
1198 # node.
1198 # node.
1199 highestrev = len(self) - 1
1199 highestrev = len(self) - 1
1200 # Set ancestors to None to signal that every node is an ancestor.
1200 # Set ancestors to None to signal that every node is an ancestor.
1201 ancestors = None
1201 ancestors = None
1202 # Set heads to an empty dictionary for later discovery of heads
1202 # Set heads to an empty dictionary for later discovery of heads
1203 heads = {}
1203 heads = {}
1204 else:
1204 else:
1205 heads = list(heads)
1205 heads = list(heads)
1206 if not heads:
1206 if not heads:
1207 return nonodes
1207 return nonodes
1208 ancestors = set()
1208 ancestors = set()
1209 # Turn heads into a dictionary so we can remove 'fake' heads.
1209 # Turn heads into a dictionary so we can remove 'fake' heads.
1210 # Also, later we will be using it to filter out the heads we can't
1210 # Also, later we will be using it to filter out the heads we can't
1211 # find from roots.
1211 # find from roots.
1212 heads = dict.fromkeys(heads, False)
1212 heads = dict.fromkeys(heads, False)
1213 # Start at the top and keep marking parents until we're done.
1213 # Start at the top and keep marking parents until we're done.
1214 nodestotag = set(heads)
1214 nodestotag = set(heads)
1215 # Remember where the top was so we can use it as a limit later.
1215 # Remember where the top was so we can use it as a limit later.
1216 highestrev = max([self.rev(n) for n in nodestotag])
1216 highestrev = max([self.rev(n) for n in nodestotag])
1217 while nodestotag:
1217 while nodestotag:
1218 # grab a node to tag
1218 # grab a node to tag
1219 n = nodestotag.pop()
1219 n = nodestotag.pop()
1220 # Never tag nullid
1220 # Never tag nullid
1221 if n == self.nullid:
1221 if n == self.nullid:
1222 continue
1222 continue
1223 # A node's revision number represents its place in a
1223 # A node's revision number represents its place in a
1224 # topologically sorted list of nodes.
1224 # topologically sorted list of nodes.
1225 r = self.rev(n)
1225 r = self.rev(n)
1226 if r >= lowestrev:
1226 if r >= lowestrev:
1227 if n not in ancestors:
1227 if n not in ancestors:
1228 # If we are possibly a descendant of one of the roots
1228 # If we are possibly a descendant of one of the roots
1229 # and we haven't already been marked as an ancestor
1229 # and we haven't already been marked as an ancestor
1230 ancestors.add(n) # Mark as ancestor
1230 ancestors.add(n) # Mark as ancestor
1231 # Add non-nullid parents to list of nodes to tag.
1231 # Add non-nullid parents to list of nodes to tag.
1232 nodestotag.update(
1232 nodestotag.update(
1233 [p for p in self.parents(n) if p != self.nullid]
1233 [p for p in self.parents(n) if p != self.nullid]
1234 )
1234 )
1235 elif n in heads: # We've seen it before, is it a fake head?
1235 elif n in heads: # We've seen it before, is it a fake head?
1236 # So it is, real heads should not be the ancestors of
1236 # So it is, real heads should not be the ancestors of
1237 # any other heads.
1237 # any other heads.
1238 heads.pop(n)
1238 heads.pop(n)
1239 if not ancestors:
1239 if not ancestors:
1240 return nonodes
1240 return nonodes
1241 # Now that we have our set of ancestors, we want to remove any
1241 # Now that we have our set of ancestors, we want to remove any
1242 # roots that are not ancestors.
1242 # roots that are not ancestors.
1243
1243
1244 # If one of the roots was nullid, everything is included anyway.
1244 # If one of the roots was nullid, everything is included anyway.
1245 if lowestrev > nullrev:
1245 if lowestrev > nullrev:
1246 # But, since we weren't, let's recompute the lowest rev to not
1246 # But, since we weren't, let's recompute the lowest rev to not
1247 # include roots that aren't ancestors.
1247 # include roots that aren't ancestors.
1248
1248
1249 # Filter out roots that aren't ancestors of heads
1249 # Filter out roots that aren't ancestors of heads
1250 roots = [root for root in roots if root in ancestors]
1250 roots = [root for root in roots if root in ancestors]
1251 # Recompute the lowest revision
1251 # Recompute the lowest revision
1252 if roots:
1252 if roots:
1253 lowestrev = min([self.rev(root) for root in roots])
1253 lowestrev = min([self.rev(root) for root in roots])
1254 else:
1254 else:
1255 # No more roots? Return empty list
1255 # No more roots? Return empty list
1256 return nonodes
1256 return nonodes
1257 else:
1257 else:
1258 # We are descending from nullid, and don't need to care about
1258 # We are descending from nullid, and don't need to care about
1259 # any other roots.
1259 # any other roots.
1260 lowestrev = nullrev
1260 lowestrev = nullrev
1261 roots = [self.nullid]
1261 roots = [self.nullid]
1262 # Transform our roots list into a set.
1262 # Transform our roots list into a set.
1263 descendants = set(roots)
1263 descendants = set(roots)
1264 # Also, keep the original roots so we can filter out roots that aren't
1264 # Also, keep the original roots so we can filter out roots that aren't
1265 # 'real' roots (i.e. are descended from other roots).
1265 # 'real' roots (i.e. are descended from other roots).
1266 roots = descendants.copy()
1266 roots = descendants.copy()
1267 # Our topologically sorted list of output nodes.
1267 # Our topologically sorted list of output nodes.
1268 orderedout = []
1268 orderedout = []
1269 # Don't start at nullid since we don't want nullid in our output list,
1269 # Don't start at nullid since we don't want nullid in our output list,
1270 # and if nullid shows up in descendants, empty parents will look like
1270 # and if nullid shows up in descendants, empty parents will look like
1271 # they're descendants.
1271 # they're descendants.
1272 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1272 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1273 n = self.node(r)
1273 n = self.node(r)
1274 isdescendant = False
1274 isdescendant = False
1275 if lowestrev == nullrev: # Everybody is a descendant of nullid
1275 if lowestrev == nullrev: # Everybody is a descendant of nullid
1276 isdescendant = True
1276 isdescendant = True
1277 elif n in descendants:
1277 elif n in descendants:
1278 # n is already a descendant
1278 # n is already a descendant
1279 isdescendant = True
1279 isdescendant = True
1280 # This check only needs to be done here because all the roots
1280 # This check only needs to be done here because all the roots
1281 # will start being marked is descendants before the loop.
1281 # will start being marked is descendants before the loop.
1282 if n in roots:
1282 if n in roots:
1283 # If n was a root, check if it's a 'real' root.
1283 # If n was a root, check if it's a 'real' root.
1284 p = tuple(self.parents(n))
1284 p = tuple(self.parents(n))
1285 # If any of its parents are descendants, it's not a root.
1285 # If any of its parents are descendants, it's not a root.
1286 if (p[0] in descendants) or (p[1] in descendants):
1286 if (p[0] in descendants) or (p[1] in descendants):
1287 roots.remove(n)
1287 roots.remove(n)
1288 else:
1288 else:
1289 p = tuple(self.parents(n))
1289 p = tuple(self.parents(n))
1290 # A node is a descendant if either of its parents are
1290 # A node is a descendant if either of its parents are
1291 # descendants. (We seeded the dependents list with the roots
1291 # descendants. (We seeded the dependents list with the roots
1292 # up there, remember?)
1292 # up there, remember?)
1293 if (p[0] in descendants) or (p[1] in descendants):
1293 if (p[0] in descendants) or (p[1] in descendants):
1294 descendants.add(n)
1294 descendants.add(n)
1295 isdescendant = True
1295 isdescendant = True
1296 if isdescendant and ((ancestors is None) or (n in ancestors)):
1296 if isdescendant and ((ancestors is None) or (n in ancestors)):
1297 # Only include nodes that are both descendants and ancestors.
1297 # Only include nodes that are both descendants and ancestors.
1298 orderedout.append(n)
1298 orderedout.append(n)
1299 if (ancestors is not None) and (n in heads):
1299 if (ancestors is not None) and (n in heads):
1300 # We're trying to figure out which heads are reachable
1300 # We're trying to figure out which heads are reachable
1301 # from roots.
1301 # from roots.
1302 # Mark this head as having been reached
1302 # Mark this head as having been reached
1303 heads[n] = True
1303 heads[n] = True
1304 elif ancestors is None:
1304 elif ancestors is None:
1305 # Otherwise, we're trying to discover the heads.
1305 # Otherwise, we're trying to discover the heads.
1306 # Assume this is a head because if it isn't, the next step
1306 # Assume this is a head because if it isn't, the next step
1307 # will eventually remove it.
1307 # will eventually remove it.
1308 heads[n] = True
1308 heads[n] = True
1309 # But, obviously its parents aren't.
1309 # But, obviously its parents aren't.
1310 for p in self.parents(n):
1310 for p in self.parents(n):
1311 heads.pop(p, None)
1311 heads.pop(p, None)
1312 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1312 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1313 roots = list(roots)
1313 roots = list(roots)
1314 assert orderedout
1314 assert orderedout
1315 assert roots
1315 assert roots
1316 assert heads
1316 assert heads
1317 return (orderedout, roots, heads)
1317 return (orderedout, roots, heads)
1318
1318
1319 def headrevs(self, revs=None):
1319 def headrevs(self, revs=None):
1320 if revs is None:
1320 if revs is None:
1321 try:
1321 try:
1322 return self.index.headrevs()
1322 return self.index.headrevs()
1323 except AttributeError:
1323 except AttributeError:
1324 return self._headrevs()
1324 return self._headrevs()
1325 if rustdagop is not None and self.index.rust_ext_compat:
1325 if rustdagop is not None and self.index.rust_ext_compat:
1326 return rustdagop.headrevs(self.index, revs)
1326 return rustdagop.headrevs(self.index, revs)
1327 return dagop.headrevs(revs, self._uncheckedparentrevs)
1327 return dagop.headrevs(revs, self._uncheckedparentrevs)
1328
1328
1329 def computephases(self, roots):
1329 def computephases(self, roots):
1330 return self.index.computephasesmapsets(roots)
1330 return self.index.computephasesmapsets(roots)
1331
1331
1332 def _headrevs(self):
1332 def _headrevs(self):
1333 count = len(self)
1333 count = len(self)
1334 if not count:
1334 if not count:
1335 return [nullrev]
1335 return [nullrev]
1336 # we won't iter over filtered rev so nobody is a head at start
1336 # we won't iter over filtered rev so nobody is a head at start
1337 ishead = [0] * (count + 1)
1337 ishead = [0] * (count + 1)
1338 index = self.index
1338 index = self.index
1339 for r in self:
1339 for r in self:
1340 ishead[r] = 1 # I may be an head
1340 ishead[r] = 1 # I may be an head
1341 e = index[r]
1341 e = index[r]
1342 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1342 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1343 return [r for r, val in enumerate(ishead) if val]
1343 return [r for r, val in enumerate(ishead) if val]
1344
1344
1345 def heads(self, start=None, stop=None):
1345 def heads(self, start=None, stop=None):
1346 """return the list of all nodes that have no children
1346 """return the list of all nodes that have no children
1347
1347
1348 if start is specified, only heads that are descendants of
1348 if start is specified, only heads that are descendants of
1349 start will be returned
1349 start will be returned
1350 if stop is specified, it will consider all the revs from stop
1350 if stop is specified, it will consider all the revs from stop
1351 as if they had no children
1351 as if they had no children
1352 """
1352 """
1353 if start is None and stop is None:
1353 if start is None and stop is None:
1354 if not len(self):
1354 if not len(self):
1355 return [self.nullid]
1355 return [self.nullid]
1356 return [self.node(r) for r in self.headrevs()]
1356 return [self.node(r) for r in self.headrevs()]
1357
1357
1358 if start is None:
1358 if start is None:
1359 start = nullrev
1359 start = nullrev
1360 else:
1360 else:
1361 start = self.rev(start)
1361 start = self.rev(start)
1362
1362
1363 stoprevs = {self.rev(n) for n in stop or []}
1363 stoprevs = {self.rev(n) for n in stop or []}
1364
1364
1365 revs = dagop.headrevssubset(
1365 revs = dagop.headrevssubset(
1366 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1366 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1367 )
1367 )
1368
1368
1369 return [self.node(rev) for rev in revs]
1369 return [self.node(rev) for rev in revs]
1370
1370
1371 def children(self, node):
1371 def children(self, node):
1372 """find the children of a given node"""
1372 """find the children of a given node"""
1373 c = []
1373 c = []
1374 p = self.rev(node)
1374 p = self.rev(node)
1375 for r in self.revs(start=p + 1):
1375 for r in self.revs(start=p + 1):
1376 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1376 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1377 if prevs:
1377 if prevs:
1378 for pr in prevs:
1378 for pr in prevs:
1379 if pr == p:
1379 if pr == p:
1380 c.append(self.node(r))
1380 c.append(self.node(r))
1381 elif p == nullrev:
1381 elif p == nullrev:
1382 c.append(self.node(r))
1382 c.append(self.node(r))
1383 return c
1383 return c
1384
1384
1385 def commonancestorsheads(self, a, b):
1385 def commonancestorsheads(self, a, b):
1386 """calculate all the heads of the common ancestors of nodes a and b"""
1386 """calculate all the heads of the common ancestors of nodes a and b"""
1387 a, b = self.rev(a), self.rev(b)
1387 a, b = self.rev(a), self.rev(b)
1388 ancs = self._commonancestorsheads(a, b)
1388 ancs = self._commonancestorsheads(a, b)
1389 return pycompat.maplist(self.node, ancs)
1389 return pycompat.maplist(self.node, ancs)
1390
1390
1391 def _commonancestorsheads(self, *revs):
1391 def _commonancestorsheads(self, *revs):
1392 """calculate all the heads of the common ancestors of revs"""
1392 """calculate all the heads of the common ancestors of revs"""
1393 try:
1393 try:
1394 ancs = self.index.commonancestorsheads(*revs)
1394 ancs = self.index.commonancestorsheads(*revs)
1395 except (AttributeError, OverflowError): # C implementation failed
1395 except (AttributeError, OverflowError): # C implementation failed
1396 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1396 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1397 return ancs
1397 return ancs
1398
1398
1399 def isancestor(self, a, b):
1399 def isancestor(self, a, b):
1400 """return True if node a is an ancestor of node b
1400 """return True if node a is an ancestor of node b
1401
1401
1402 A revision is considered an ancestor of itself."""
1402 A revision is considered an ancestor of itself."""
1403 a, b = self.rev(a), self.rev(b)
1403 a, b = self.rev(a), self.rev(b)
1404 return self.isancestorrev(a, b)
1404 return self.isancestorrev(a, b)
1405
1405
1406 def isancestorrev(self, a, b):
1406 def isancestorrev(self, a, b):
1407 """return True if revision a is an ancestor of revision b
1407 """return True if revision a is an ancestor of revision b
1408
1408
1409 A revision is considered an ancestor of itself.
1409 A revision is considered an ancestor of itself.
1410
1410
1411 The implementation of this is trivial but the use of
1411 The implementation of this is trivial but the use of
1412 reachableroots is not."""
1412 reachableroots is not."""
1413 if a == nullrev:
1413 if a == nullrev:
1414 return True
1414 return True
1415 elif a == b:
1415 elif a == b:
1416 return True
1416 return True
1417 elif a > b:
1417 elif a > b:
1418 return False
1418 return False
1419 return bool(self.reachableroots(a, [b], [a], includepath=False))
1419 return bool(self.reachableroots(a, [b], [a], includepath=False))
1420
1420
1421 def reachableroots(self, minroot, heads, roots, includepath=False):
1421 def reachableroots(self, minroot, heads, roots, includepath=False):
1422 """return (heads(::(<roots> and <roots>::<heads>)))
1422 """return (heads(::(<roots> and <roots>::<heads>)))
1423
1423
1424 If includepath is True, return (<roots>::<heads>)."""
1424 If includepath is True, return (<roots>::<heads>)."""
1425 try:
1425 try:
1426 return self.index.reachableroots2(
1426 return self.index.reachableroots2(
1427 minroot, heads, roots, includepath
1427 minroot, heads, roots, includepath
1428 )
1428 )
1429 except AttributeError:
1429 except AttributeError:
1430 return dagop._reachablerootspure(
1430 return dagop._reachablerootspure(
1431 self.parentrevs, minroot, roots, heads, includepath
1431 self.parentrevs, minroot, roots, heads, includepath
1432 )
1432 )
1433
1433
1434 def ancestor(self, a, b):
1434 def ancestor(self, a, b):
1435 """calculate the "best" common ancestor of nodes a and b"""
1435 """calculate the "best" common ancestor of nodes a and b"""
1436
1436
1437 a, b = self.rev(a), self.rev(b)
1437 a, b = self.rev(a), self.rev(b)
1438 try:
1438 try:
1439 ancs = self.index.ancestors(a, b)
1439 ancs = self.index.ancestors(a, b)
1440 except (AttributeError, OverflowError):
1440 except (AttributeError, OverflowError):
1441 ancs = ancestor.ancestors(self.parentrevs, a, b)
1441 ancs = ancestor.ancestors(self.parentrevs, a, b)
1442 if ancs:
1442 if ancs:
1443 # choose a consistent winner when there's a tie
1443 # choose a consistent winner when there's a tie
1444 return min(map(self.node, ancs))
1444 return min(map(self.node, ancs))
1445 return self.nullid
1445 return self.nullid
1446
1446
1447 def _match(self, id):
1447 def _match(self, id):
1448 if isinstance(id, int):
1448 if isinstance(id, int):
1449 # rev
1449 # rev
1450 return self.node(id)
1450 return self.node(id)
1451 if len(id) == self.nodeconstants.nodelen:
1451 if len(id) == self.nodeconstants.nodelen:
1452 # possibly a binary node
1452 # possibly a binary node
1453 # odds of a binary node being all hex in ASCII are 1 in 10**25
1453 # odds of a binary node being all hex in ASCII are 1 in 10**25
1454 try:
1454 try:
1455 node = id
1455 node = id
1456 self.rev(node) # quick search the index
1456 self.rev(node) # quick search the index
1457 return node
1457 return node
1458 except error.LookupError:
1458 except error.LookupError:
1459 pass # may be partial hex id
1459 pass # may be partial hex id
1460 try:
1460 try:
1461 # str(rev)
1461 # str(rev)
1462 rev = int(id)
1462 rev = int(id)
1463 if b"%d" % rev != id:
1463 if b"%d" % rev != id:
1464 raise ValueError
1464 raise ValueError
1465 if rev < 0:
1465 if rev < 0:
1466 rev = len(self) + rev
1466 rev = len(self) + rev
1467 if rev < 0 or rev >= len(self):
1467 if rev < 0 or rev >= len(self):
1468 raise ValueError
1468 raise ValueError
1469 return self.node(rev)
1469 return self.node(rev)
1470 except (ValueError, OverflowError):
1470 except (ValueError, OverflowError):
1471 pass
1471 pass
1472 if len(id) == 2 * self.nodeconstants.nodelen:
1472 if len(id) == 2 * self.nodeconstants.nodelen:
1473 try:
1473 try:
1474 # a full hex nodeid?
1474 # a full hex nodeid?
1475 node = bin(id)
1475 node = bin(id)
1476 self.rev(node)
1476 self.rev(node)
1477 return node
1477 return node
1478 except (TypeError, error.LookupError):
1478 except (TypeError, error.LookupError):
1479 pass
1479 pass
1480
1480
1481 def _partialmatch(self, id):
1481 def _partialmatch(self, id):
1482 # we don't care wdirfilenodeids as they should be always full hash
1482 # we don't care wdirfilenodeids as they should be always full hash
1483 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1483 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1484 ambiguous = False
1484 ambiguous = False
1485 try:
1485 try:
1486 partial = self.index.partialmatch(id)
1486 partial = self.index.partialmatch(id)
1487 if partial and self.hasnode(partial):
1487 if partial and self.hasnode(partial):
1488 if maybewdir:
1488 if maybewdir:
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1490 ambiguous = True
1490 ambiguous = True
1491 else:
1491 else:
1492 return partial
1492 return partial
1493 elif maybewdir:
1493 elif maybewdir:
1494 # no 'ff...' match in radix tree, wdir identified
1494 # no 'ff...' match in radix tree, wdir identified
1495 raise error.WdirUnsupported
1495 raise error.WdirUnsupported
1496 else:
1496 else:
1497 return None
1497 return None
1498 except error.RevlogError:
1498 except error.RevlogError:
1499 # parsers.c radix tree lookup gave multiple matches
1499 # parsers.c radix tree lookup gave multiple matches
1500 # fast path: for unfiltered changelog, radix tree is accurate
1500 # fast path: for unfiltered changelog, radix tree is accurate
1501 if not getattr(self, 'filteredrevs', None):
1501 if not getattr(self, 'filteredrevs', None):
1502 ambiguous = True
1502 ambiguous = True
1503 # fall through to slow path that filters hidden revisions
1503 # fall through to slow path that filters hidden revisions
1504 except (AttributeError, ValueError):
1504 except (AttributeError, ValueError):
1505 # we are pure python, or key was too short to search radix tree
1505 # we are pure python, or key was too short to search radix tree
1506 pass
1506 pass
1507 if ambiguous:
1507 if ambiguous:
1508 raise error.AmbiguousPrefixLookupError(
1508 raise error.AmbiguousPrefixLookupError(
1509 id, self.display_id, _(b'ambiguous identifier')
1509 id, self.display_id, _(b'ambiguous identifier')
1510 )
1510 )
1511
1511
1512 if id in self._pcache:
1512 if id in self._pcache:
1513 return self._pcache[id]
1513 return self._pcache[id]
1514
1514
1515 if len(id) <= 40:
1515 if len(id) <= 40:
1516 try:
1516 try:
1517 # hex(node)[:...]
1517 # hex(node)[:...]
1518 l = len(id) // 2 # grab an even number of digits
1518 l = len(id) // 2 # grab an even number of digits
1519 prefix = bin(id[: l * 2])
1519 prefix = bin(id[: l * 2])
1520 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1520 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1521 nl = [
1521 nl = [
1522 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1522 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1523 ]
1523 ]
1524 if self.nodeconstants.nullhex.startswith(id):
1524 if self.nodeconstants.nullhex.startswith(id):
1525 nl.append(self.nullid)
1525 nl.append(self.nullid)
1526 if len(nl) > 0:
1526 if len(nl) > 0:
1527 if len(nl) == 1 and not maybewdir:
1527 if len(nl) == 1 and not maybewdir:
1528 self._pcache[id] = nl[0]
1528 self._pcache[id] = nl[0]
1529 return nl[0]
1529 return nl[0]
1530 raise error.AmbiguousPrefixLookupError(
1530 raise error.AmbiguousPrefixLookupError(
1531 id, self.display_id, _(b'ambiguous identifier')
1531 id, self.display_id, _(b'ambiguous identifier')
1532 )
1532 )
1533 if maybewdir:
1533 if maybewdir:
1534 raise error.WdirUnsupported
1534 raise error.WdirUnsupported
1535 return None
1535 return None
1536 except TypeError:
1536 except TypeError:
1537 pass
1537 pass
1538
1538
1539 def lookup(self, id):
1539 def lookup(self, id):
1540 """locate a node based on:
1540 """locate a node based on:
1541 - revision number or str(revision number)
1541 - revision number or str(revision number)
1542 - nodeid or subset of hex nodeid
1542 - nodeid or subset of hex nodeid
1543 """
1543 """
1544 n = self._match(id)
1544 n = self._match(id)
1545 if n is not None:
1545 if n is not None:
1546 return n
1546 return n
1547 n = self._partialmatch(id)
1547 n = self._partialmatch(id)
1548 if n:
1548 if n:
1549 return n
1549 return n
1550
1550
1551 raise error.LookupError(id, self.display_id, _(b'no match found'))
1551 raise error.LookupError(id, self.display_id, _(b'no match found'))
1552
1552
1553 def shortest(self, node, minlength=1):
1553 def shortest(self, node, minlength=1):
1554 """Find the shortest unambiguous prefix that matches node."""
1554 """Find the shortest unambiguous prefix that matches node."""
1555
1555
1556 def isvalid(prefix):
1556 def isvalid(prefix):
1557 try:
1557 try:
1558 matchednode = self._partialmatch(prefix)
1558 matchednode = self._partialmatch(prefix)
1559 except error.AmbiguousPrefixLookupError:
1559 except error.AmbiguousPrefixLookupError:
1560 return False
1560 return False
1561 except error.WdirUnsupported:
1561 except error.WdirUnsupported:
1562 # single 'ff...' match
1562 # single 'ff...' match
1563 return True
1563 return True
1564 if matchednode is None:
1564 if matchednode is None:
1565 raise error.LookupError(node, self.display_id, _(b'no node'))
1565 raise error.LookupError(node, self.display_id, _(b'no node'))
1566 return True
1566 return True
1567
1567
1568 def maybewdir(prefix):
1568 def maybewdir(prefix):
1569 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1569 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1570
1570
1571 hexnode = hex(node)
1571 hexnode = hex(node)
1572
1572
1573 def disambiguate(hexnode, minlength):
1573 def disambiguate(hexnode, minlength):
1574 """Disambiguate against wdirid."""
1574 """Disambiguate against wdirid."""
1575 for length in range(minlength, len(hexnode) + 1):
1575 for length in range(minlength, len(hexnode) + 1):
1576 prefix = hexnode[:length]
1576 prefix = hexnode[:length]
1577 if not maybewdir(prefix):
1577 if not maybewdir(prefix):
1578 return prefix
1578 return prefix
1579
1579
1580 if not getattr(self, 'filteredrevs', None):
1580 if not getattr(self, 'filteredrevs', None):
1581 try:
1581 try:
1582 length = max(self.index.shortest(node), minlength)
1582 length = max(self.index.shortest(node), minlength)
1583 return disambiguate(hexnode, length)
1583 return disambiguate(hexnode, length)
1584 except error.RevlogError:
1584 except error.RevlogError:
1585 if node != self.nodeconstants.wdirid:
1585 if node != self.nodeconstants.wdirid:
1586 raise error.LookupError(
1586 raise error.LookupError(
1587 node, self.display_id, _(b'no node')
1587 node, self.display_id, _(b'no node')
1588 )
1588 )
1589 except AttributeError:
1589 except AttributeError:
1590 # Fall through to pure code
1590 # Fall through to pure code
1591 pass
1591 pass
1592
1592
1593 if node == self.nodeconstants.wdirid:
1593 if node == self.nodeconstants.wdirid:
1594 for length in range(minlength, len(hexnode) + 1):
1594 for length in range(minlength, len(hexnode) + 1):
1595 prefix = hexnode[:length]
1595 prefix = hexnode[:length]
1596 if isvalid(prefix):
1596 if isvalid(prefix):
1597 return prefix
1597 return prefix
1598
1598
1599 for length in range(minlength, len(hexnode) + 1):
1599 for length in range(minlength, len(hexnode) + 1):
1600 prefix = hexnode[:length]
1600 prefix = hexnode[:length]
1601 if isvalid(prefix):
1601 if isvalid(prefix):
1602 return disambiguate(hexnode, length)
1602 return disambiguate(hexnode, length)
1603
1603
1604 def cmp(self, node, text):
1604 def cmp(self, node, text):
1605 """compare text with a given file revision
1605 """compare text with a given file revision
1606
1606
1607 returns True if text is different than what is stored.
1607 returns True if text is different than what is stored.
1608 """
1608 """
1609 p1, p2 = self.parents(node)
1609 p1, p2 = self.parents(node)
1610 return storageutil.hashrevisionsha1(text, p1, p2) != node
1610 return storageutil.hashrevisionsha1(text, p1, p2) != node
1611
1611
1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1613 """Obtain a segment of raw data corresponding to a range of revisions.
1613 """Obtain a segment of raw data corresponding to a range of revisions.
1614
1614
1615 Accepts the start and end revisions and an optional already-open
1615 Accepts the start and end revisions and an optional already-open
1616 file handle to be used for reading. If the file handle is read, its
1616 file handle to be used for reading. If the file handle is read, its
1617 seek position will not be preserved.
1617 seek position will not be preserved.
1618
1618
1619 Requests for data may be satisfied by a cache.
1619 Requests for data may be satisfied by a cache.
1620
1620
1621 Returns a 2-tuple of (offset, data) for the requested range of
1621 Returns a 2-tuple of (offset, data) for the requested range of
1622 revisions. Offset is the integer offset from the beginning of the
1622 revisions. Offset is the integer offset from the beginning of the
1623 revlog and data is a str or buffer of the raw byte data.
1623 revlog and data is a str or buffer of the raw byte data.
1624
1624
1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1626 to determine where each revision's data begins and ends.
1626 to determine where each revision's data begins and ends.
1627 """
1627 """
1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1629 # (functions are expensive).
1629 # (functions are expensive).
1630 index = self.index
1630 index = self.index
1631 istart = index[startrev]
1631 istart = index[startrev]
1632 start = int(istart[0] >> 16)
1632 start = int(istart[0] >> 16)
1633 if startrev == endrev:
1633 if startrev == endrev:
1634 end = start + istart[1]
1634 end = start + istart[1]
1635 else:
1635 else:
1636 iend = index[endrev]
1636 iend = index[endrev]
1637 end = int(iend[0] >> 16) + iend[1]
1637 end = int(iend[0] >> 16) + iend[1]
1638
1638
1639 if self._inline:
1639 if self._inline:
1640 start += (startrev + 1) * self.index.entry_size
1640 start += (startrev + 1) * self.index.entry_size
1641 end += (endrev + 1) * self.index.entry_size
1641 end += (endrev + 1) * self.index.entry_size
1642 length = end - start
1642 length = end - start
1643
1643
1644 return start, self._segmentfile.read_chunk(start, length, df)
1644 return start, self._segmentfile.read_chunk(start, length, df)
1645
1645
1646 def _chunk(self, rev, df=None):
1646 def _chunk(self, rev, df=None):
1647 """Obtain a single decompressed chunk for a revision.
1647 """Obtain a single decompressed chunk for a revision.
1648
1648
1649 Accepts an integer revision and an optional already-open file handle
1649 Accepts an integer revision and an optional already-open file handle
1650 to be used for reading. If used, the seek position of the file will not
1650 to be used for reading. If used, the seek position of the file will not
1651 be preserved.
1651 be preserved.
1652
1652
1653 Returns a str holding uncompressed data for the requested revision.
1653 Returns a str holding uncompressed data for the requested revision.
1654 """
1654 """
1655 compression_mode = self.index[rev][10]
1655 compression_mode = self.index[rev][10]
1656 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1656 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1657 if compression_mode == COMP_MODE_PLAIN:
1657 if compression_mode == COMP_MODE_PLAIN:
1658 return data
1658 return data
1659 elif compression_mode == COMP_MODE_DEFAULT:
1659 elif compression_mode == COMP_MODE_DEFAULT:
1660 return self._decompressor(data)
1660 return self._decompressor(data)
1661 elif compression_mode == COMP_MODE_INLINE:
1661 elif compression_mode == COMP_MODE_INLINE:
1662 return self.decompress(data)
1662 return self.decompress(data)
1663 else:
1663 else:
1664 msg = b'unknown compression mode %d'
1664 msg = b'unknown compression mode %d'
1665 msg %= compression_mode
1665 msg %= compression_mode
1666 raise error.RevlogError(msg)
1666 raise error.RevlogError(msg)
1667
1667
1668 def _chunks(self, revs, df=None, targetsize=None):
1668 def _chunks(self, revs, df=None, targetsize=None):
1669 """Obtain decompressed chunks for the specified revisions.
1669 """Obtain decompressed chunks for the specified revisions.
1670
1670
1671 Accepts an iterable of numeric revisions that are assumed to be in
1671 Accepts an iterable of numeric revisions that are assumed to be in
1672 ascending order. Also accepts an optional already-open file handle
1672 ascending order. Also accepts an optional already-open file handle
1673 to be used for reading. If used, the seek position of the file will
1673 to be used for reading. If used, the seek position of the file will
1674 not be preserved.
1674 not be preserved.
1675
1675
1676 This function is similar to calling ``self._chunk()`` multiple times,
1676 This function is similar to calling ``self._chunk()`` multiple times,
1677 but is faster.
1677 but is faster.
1678
1678
1679 Returns a list with decompressed data for each requested revision.
1679 Returns a list with decompressed data for each requested revision.
1680 """
1680 """
1681 if not revs:
1681 if not revs:
1682 return []
1682 return []
1683 start = self.start
1683 start = self.start
1684 length = self.length
1684 length = self.length
1685 inline = self._inline
1685 inline = self._inline
1686 iosize = self.index.entry_size
1686 iosize = self.index.entry_size
1687 buffer = util.buffer
1687 buffer = util.buffer
1688
1688
1689 l = []
1689 l = []
1690 ladd = l.append
1690 ladd = l.append
1691
1691
1692 if not self._withsparseread:
1692 if not self._withsparseread:
1693 slicedchunks = (revs,)
1693 slicedchunks = (revs,)
1694 else:
1694 else:
1695 slicedchunks = deltautil.slicechunk(
1695 slicedchunks = deltautil.slicechunk(
1696 self, revs, targetsize=targetsize
1696 self, revs, targetsize=targetsize
1697 )
1697 )
1698
1698
1699 for revschunk in slicedchunks:
1699 for revschunk in slicedchunks:
1700 firstrev = revschunk[0]
1700 firstrev = revschunk[0]
1701 # Skip trailing revisions with empty diff
1701 # Skip trailing revisions with empty diff
1702 for lastrev in revschunk[::-1]:
1702 for lastrev in revschunk[::-1]:
1703 if length(lastrev) != 0:
1703 if length(lastrev) != 0:
1704 break
1704 break
1705
1705
1706 try:
1706 try:
1707 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1707 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1708 except OverflowError:
1708 except OverflowError:
1709 # issue4215 - we can't cache a run of chunks greater than
1709 # issue4215 - we can't cache a run of chunks greater than
1710 # 2G on Windows
1710 # 2G on Windows
1711 return [self._chunk(rev, df=df) for rev in revschunk]
1711 return [self._chunk(rev, df=df) for rev in revschunk]
1712
1712
1713 decomp = self.decompress
1713 decomp = self.decompress
1714 # self._decompressor might be None, but will not be used in that case
1714 # self._decompressor might be None, but will not be used in that case
1715 def_decomp = self._decompressor
1715 def_decomp = self._decompressor
1716 for rev in revschunk:
1716 for rev in revschunk:
1717 chunkstart = start(rev)
1717 chunkstart = start(rev)
1718 if inline:
1718 if inline:
1719 chunkstart += (rev + 1) * iosize
1719 chunkstart += (rev + 1) * iosize
1720 chunklength = length(rev)
1720 chunklength = length(rev)
1721 comp_mode = self.index[rev][10]
1721 comp_mode = self.index[rev][10]
1722 c = buffer(data, chunkstart - offset, chunklength)
1722 c = buffer(data, chunkstart - offset, chunklength)
1723 if comp_mode == COMP_MODE_PLAIN:
1723 if comp_mode == COMP_MODE_PLAIN:
1724 ladd(c)
1724 ladd(c)
1725 elif comp_mode == COMP_MODE_INLINE:
1725 elif comp_mode == COMP_MODE_INLINE:
1726 ladd(decomp(c))
1726 ladd(decomp(c))
1727 elif comp_mode == COMP_MODE_DEFAULT:
1727 elif comp_mode == COMP_MODE_DEFAULT:
1728 ladd(def_decomp(c))
1728 ladd(def_decomp(c))
1729 else:
1729 else:
1730 msg = b'unknown compression mode %d'
1730 msg = b'unknown compression mode %d'
1731 msg %= comp_mode
1731 msg %= comp_mode
1732 raise error.RevlogError(msg)
1732 raise error.RevlogError(msg)
1733
1733
1734 return l
1734 return l
1735
1735
1736 def deltaparent(self, rev):
1736 def deltaparent(self, rev):
1737 """return deltaparent of the given revision"""
1737 """return deltaparent of the given revision"""
1738 base = self.index[rev][3]
1738 base = self.index[rev][3]
1739 if base == rev:
1739 if base == rev:
1740 return nullrev
1740 return nullrev
1741 elif self._generaldelta:
1741 elif self._generaldelta:
1742 return base
1742 return base
1743 else:
1743 else:
1744 return rev - 1
1744 return rev - 1
1745
1745
1746 def issnapshot(self, rev):
1746 def issnapshot(self, rev):
1747 """tells whether rev is a snapshot"""
1747 """tells whether rev is a snapshot"""
1748 if not self._sparserevlog:
1748 if not self._sparserevlog:
1749 return self.deltaparent(rev) == nullrev
1749 return self.deltaparent(rev) == nullrev
1750 elif util.safehasattr(self.index, b'issnapshot'):
1750 elif util.safehasattr(self.index, b'issnapshot'):
1751 # directly assign the method to cache the testing and access
1751 # directly assign the method to cache the testing and access
1752 self.issnapshot = self.index.issnapshot
1752 self.issnapshot = self.index.issnapshot
1753 return self.issnapshot(rev)
1753 return self.issnapshot(rev)
1754 if rev == nullrev:
1754 if rev == nullrev:
1755 return True
1755 return True
1756 entry = self.index[rev]
1756 entry = self.index[rev]
1757 base = entry[3]
1757 base = entry[3]
1758 if base == rev:
1758 if base == rev:
1759 return True
1759 return True
1760 if base == nullrev:
1760 if base == nullrev:
1761 return True
1761 return True
1762 p1 = entry[5]
1762 p1 = entry[5]
1763 p2 = entry[6]
1763 p2 = entry[6]
1764 if base == p1 or base == p2:
1764 if base == p1 or base == p2:
1765 return False
1765 return False
1766 return self.issnapshot(base)
1766 return self.issnapshot(base)
1767
1767
1768 def snapshotdepth(self, rev):
1768 def snapshotdepth(self, rev):
1769 """number of snapshot in the chain before this one"""
1769 """number of snapshot in the chain before this one"""
1770 if not self.issnapshot(rev):
1770 if not self.issnapshot(rev):
1771 raise error.ProgrammingError(b'revision %d not a snapshot')
1771 raise error.ProgrammingError(b'revision %d not a snapshot')
1772 return len(self._deltachain(rev)[0]) - 1
1772 return len(self._deltachain(rev)[0]) - 1
1773
1773
1774 def revdiff(self, rev1, rev2):
1774 def revdiff(self, rev1, rev2):
1775 """return or calculate a delta between two revisions
1775 """return or calculate a delta between two revisions
1776
1776
1777 The delta calculated is in binary form and is intended to be written to
1777 The delta calculated is in binary form and is intended to be written to
1778 revlog data directly. So this function needs raw revision data.
1778 revlog data directly. So this function needs raw revision data.
1779 """
1779 """
1780 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1780 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1781 return bytes(self._chunk(rev2))
1781 return bytes(self._chunk(rev2))
1782
1782
1783 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1783 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1784
1784
1785 def _processflags(self, text, flags, operation, raw=False):
1785 def _processflags(self, text, flags, operation, raw=False):
1786 """deprecated entry point to access flag processors"""
1786 """deprecated entry point to access flag processors"""
1787 msg = b'_processflag(...) use the specialized variant'
1787 msg = b'_processflag(...) use the specialized variant'
1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1789 if raw:
1789 if raw:
1790 return text, flagutil.processflagsraw(self, text, flags)
1790 return text, flagutil.processflagsraw(self, text, flags)
1791 elif operation == b'read':
1791 elif operation == b'read':
1792 return flagutil.processflagsread(self, text, flags)
1792 return flagutil.processflagsread(self, text, flags)
1793 else: # write operation
1793 else: # write operation
1794 return flagutil.processflagswrite(self, text, flags)
1794 return flagutil.processflagswrite(self, text, flags)
1795
1795
1796 def revision(self, nodeorrev, _df=None, raw=False):
1796 def revision(self, nodeorrev, _df=None, raw=False):
1797 """return an uncompressed revision of a given node or revision
1797 """return an uncompressed revision of a given node or revision
1798 number.
1798 number.
1799
1799
1800 _df - an existing file handle to read from. (internal-only)
1800 _df - an existing file handle to read from. (internal-only)
1801 raw - an optional argument specifying if the revision data is to be
1801 raw - an optional argument specifying if the revision data is to be
1802 treated as raw data when applying flag transforms. 'raw' should be set
1802 treated as raw data when applying flag transforms. 'raw' should be set
1803 to True when generating changegroups or in debug commands.
1803 to True when generating changegroups or in debug commands.
1804 """
1804 """
1805 if raw:
1805 if raw:
1806 msg = (
1806 msg = (
1807 b'revlog.revision(..., raw=True) is deprecated, '
1807 b'revlog.revision(..., raw=True) is deprecated, '
1808 b'use revlog.rawdata(...)'
1808 b'use revlog.rawdata(...)'
1809 )
1809 )
1810 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1810 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1811 return self._revisiondata(nodeorrev, _df, raw=raw)
1811 return self._revisiondata(nodeorrev, _df, raw=raw)
1812
1812
1813 def sidedata(self, nodeorrev, _df=None):
1813 def sidedata(self, nodeorrev, _df=None):
1814 """a map of extra data related to the changeset but not part of the hash
1814 """a map of extra data related to the changeset but not part of the hash
1815
1815
1816 This function currently return a dictionary. However, more advanced
1816 This function currently return a dictionary. However, more advanced
1817 mapping object will likely be used in the future for a more
1817 mapping object will likely be used in the future for a more
1818 efficient/lazy code.
1818 efficient/lazy code.
1819 """
1819 """
1820 # deal with <nodeorrev> argument type
1820 # deal with <nodeorrev> argument type
1821 if isinstance(nodeorrev, int):
1821 if isinstance(nodeorrev, int):
1822 rev = nodeorrev
1822 rev = nodeorrev
1823 else:
1823 else:
1824 rev = self.rev(nodeorrev)
1824 rev = self.rev(nodeorrev)
1825 return self._sidedata(rev)
1825 return self._sidedata(rev)
1826
1826
1827 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1827 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1828 # deal with <nodeorrev> argument type
1828 # deal with <nodeorrev> argument type
1829 if isinstance(nodeorrev, int):
1829 if isinstance(nodeorrev, int):
1830 rev = nodeorrev
1830 rev = nodeorrev
1831 node = self.node(rev)
1831 node = self.node(rev)
1832 else:
1832 else:
1833 node = nodeorrev
1833 node = nodeorrev
1834 rev = None
1834 rev = None
1835
1835
1836 # fast path the special `nullid` rev
1836 # fast path the special `nullid` rev
1837 if node == self.nullid:
1837 if node == self.nullid:
1838 return b""
1838 return b""
1839
1839
1840 # ``rawtext`` is the text as stored inside the revlog. Might be the
1840 # ``rawtext`` is the text as stored inside the revlog. Might be the
1841 # revision or might need to be processed to retrieve the revision.
1841 # revision or might need to be processed to retrieve the revision.
1842 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1842 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1843
1843
1844 if raw and validated:
1844 if raw and validated:
1845 # if we don't want to process the raw text and that raw
1845 # if we don't want to process the raw text and that raw
1846 # text is cached, we can exit early.
1846 # text is cached, we can exit early.
1847 return rawtext
1847 return rawtext
1848 if rev is None:
1848 if rev is None:
1849 rev = self.rev(node)
1849 rev = self.rev(node)
1850 # the revlog's flag for this revision
1850 # the revlog's flag for this revision
1851 # (usually alter its state or content)
1851 # (usually alter its state or content)
1852 flags = self.flags(rev)
1852 flags = self.flags(rev)
1853
1853
1854 if validated and flags == REVIDX_DEFAULT_FLAGS:
1854 if validated and flags == REVIDX_DEFAULT_FLAGS:
1855 # no extra flags set, no flag processor runs, text = rawtext
1855 # no extra flags set, no flag processor runs, text = rawtext
1856 return rawtext
1856 return rawtext
1857
1857
1858 if raw:
1858 if raw:
1859 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1859 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1860 text = rawtext
1860 text = rawtext
1861 else:
1861 else:
1862 r = flagutil.processflagsread(self, rawtext, flags)
1862 r = flagutil.processflagsread(self, rawtext, flags)
1863 text, validatehash = r
1863 text, validatehash = r
1864 if validatehash:
1864 if validatehash:
1865 self.checkhash(text, node, rev=rev)
1865 self.checkhash(text, node, rev=rev)
1866 if not validated:
1866 if not validated:
1867 self._revisioncache = (node, rev, rawtext)
1867 self._revisioncache = (node, rev, rawtext)
1868
1868
1869 return text
1869 return text
1870
1870
1871 def _rawtext(self, node, rev, _df=None):
1871 def _rawtext(self, node, rev, _df=None):
1872 """return the possibly unvalidated rawtext for a revision
1872 """return the possibly unvalidated rawtext for a revision
1873
1873
1874 returns (rev, rawtext, validated)
1874 returns (rev, rawtext, validated)
1875 """
1875 """
1876
1876
1877 # revision in the cache (could be useful to apply delta)
1877 # revision in the cache (could be useful to apply delta)
1878 cachedrev = None
1878 cachedrev = None
1879 # An intermediate text to apply deltas to
1879 # An intermediate text to apply deltas to
1880 basetext = None
1880 basetext = None
1881
1881
1882 # Check if we have the entry in cache
1882 # Check if we have the entry in cache
1883 # The cache entry looks like (node, rev, rawtext)
1883 # The cache entry looks like (node, rev, rawtext)
1884 if self._revisioncache:
1884 if self._revisioncache:
1885 if self._revisioncache[0] == node:
1885 if self._revisioncache[0] == node:
1886 return (rev, self._revisioncache[2], True)
1886 return (rev, self._revisioncache[2], True)
1887 cachedrev = self._revisioncache[1]
1887 cachedrev = self._revisioncache[1]
1888
1888
1889 if rev is None:
1889 if rev is None:
1890 rev = self.rev(node)
1890 rev = self.rev(node)
1891
1891
1892 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1892 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1893 if stopped:
1893 if stopped:
1894 basetext = self._revisioncache[2]
1894 basetext = self._revisioncache[2]
1895
1895
1896 # drop cache to save memory, the caller is expected to
1896 # drop cache to save memory, the caller is expected to
1897 # update self._revisioncache after validating the text
1897 # update self._revisioncache after validating the text
1898 self._revisioncache = None
1898 self._revisioncache = None
1899
1899
1900 targetsize = None
1900 targetsize = None
1901 rawsize = self.index[rev][2]
1901 rawsize = self.index[rev][2]
1902 if 0 <= rawsize:
1902 if 0 <= rawsize:
1903 targetsize = 4 * rawsize
1903 targetsize = 4 * rawsize
1904
1904
1905 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1905 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1906 if basetext is None:
1906 if basetext is None:
1907 basetext = bytes(bins[0])
1907 basetext = bytes(bins[0])
1908 bins = bins[1:]
1908 bins = bins[1:]
1909
1909
1910 rawtext = mdiff.patches(basetext, bins)
1910 rawtext = mdiff.patches(basetext, bins)
1911 del basetext # let us have a chance to free memory early
1911 del basetext # let us have a chance to free memory early
1912 return (rev, rawtext, False)
1912 return (rev, rawtext, False)
1913
1913
1914 def _sidedata(self, rev):
1914 def _sidedata(self, rev):
1915 """Return the sidedata for a given revision number."""
1915 """Return the sidedata for a given revision number."""
1916 index_entry = self.index[rev]
1916 index_entry = self.index[rev]
1917 sidedata_offset = index_entry[8]
1917 sidedata_offset = index_entry[8]
1918 sidedata_size = index_entry[9]
1918 sidedata_size = index_entry[9]
1919
1919
1920 if self._inline:
1920 if self._inline:
1921 sidedata_offset += self.index.entry_size * (1 + rev)
1921 sidedata_offset += self.index.entry_size * (1 + rev)
1922 if sidedata_size == 0:
1922 if sidedata_size == 0:
1923 return {}
1923 return {}
1924
1924
1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1926 filename = self._sidedatafile
1926 filename = self._sidedatafile
1927 end = self._docket.sidedata_end
1927 end = self._docket.sidedata_end
1928 offset = sidedata_offset
1928 offset = sidedata_offset
1929 length = sidedata_size
1929 length = sidedata_size
1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1931 raise error.RevlogError(m)
1931 raise error.RevlogError(m)
1932
1932
1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1934 sidedata_offset, sidedata_size
1934 sidedata_offset, sidedata_size
1935 )
1935 )
1936
1936
1937 comp = self.index[rev][11]
1937 comp = self.index[rev][11]
1938 if comp == COMP_MODE_PLAIN:
1938 if comp == COMP_MODE_PLAIN:
1939 segment = comp_segment
1939 segment = comp_segment
1940 elif comp == COMP_MODE_DEFAULT:
1940 elif comp == COMP_MODE_DEFAULT:
1941 segment = self._decompressor(comp_segment)
1941 segment = self._decompressor(comp_segment)
1942 elif comp == COMP_MODE_INLINE:
1942 elif comp == COMP_MODE_INLINE:
1943 segment = self.decompress(comp_segment)
1943 segment = self.decompress(comp_segment)
1944 else:
1944 else:
1945 msg = b'unknown compression mode %d'
1945 msg = b'unknown compression mode %d'
1946 msg %= comp
1946 msg %= comp
1947 raise error.RevlogError(msg)
1947 raise error.RevlogError(msg)
1948
1948
1949 sidedata = sidedatautil.deserialize_sidedata(segment)
1949 sidedata = sidedatautil.deserialize_sidedata(segment)
1950 return sidedata
1950 return sidedata
1951
1951
1952 def rawdata(self, nodeorrev, _df=None):
1952 def rawdata(self, nodeorrev, _df=None):
1953 """return an uncompressed raw data of a given node or revision number.
1953 """return an uncompressed raw data of a given node or revision number.
1954
1954
1955 _df - an existing file handle to read from. (internal-only)
1955 _df - an existing file handle to read from. (internal-only)
1956 """
1956 """
1957 return self._revisiondata(nodeorrev, _df, raw=True)
1957 return self._revisiondata(nodeorrev, _df, raw=True)
1958
1958
1959 def hash(self, text, p1, p2):
1959 def hash(self, text, p1, p2):
1960 """Compute a node hash.
1960 """Compute a node hash.
1961
1961
1962 Available as a function so that subclasses can replace the hash
1962 Available as a function so that subclasses can replace the hash
1963 as needed.
1963 as needed.
1964 """
1964 """
1965 return storageutil.hashrevisionsha1(text, p1, p2)
1965 return storageutil.hashrevisionsha1(text, p1, p2)
1966
1966
1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1968 """Check node hash integrity.
1968 """Check node hash integrity.
1969
1969
1970 Available as a function so that subclasses can extend hash mismatch
1970 Available as a function so that subclasses can extend hash mismatch
1971 behaviors as needed.
1971 behaviors as needed.
1972 """
1972 """
1973 try:
1973 try:
1974 if p1 is None and p2 is None:
1974 if p1 is None and p2 is None:
1975 p1, p2 = self.parents(node)
1975 p1, p2 = self.parents(node)
1976 if node != self.hash(text, p1, p2):
1976 if node != self.hash(text, p1, p2):
1977 # Clear the revision cache on hash failure. The revision cache
1977 # Clear the revision cache on hash failure. The revision cache
1978 # only stores the raw revision and clearing the cache does have
1978 # only stores the raw revision and clearing the cache does have
1979 # the side-effect that we won't have a cache hit when the raw
1979 # the side-effect that we won't have a cache hit when the raw
1980 # revision data is accessed. But this case should be rare and
1980 # revision data is accessed. But this case should be rare and
1981 # it is extra work to teach the cache about the hash
1981 # it is extra work to teach the cache about the hash
1982 # verification state.
1982 # verification state.
1983 if self._revisioncache and self._revisioncache[0] == node:
1983 if self._revisioncache and self._revisioncache[0] == node:
1984 self._revisioncache = None
1984 self._revisioncache = None
1985
1985
1986 revornode = rev
1986 revornode = rev
1987 if revornode is None:
1987 if revornode is None:
1988 revornode = templatefilters.short(hex(node))
1988 revornode = templatefilters.short(hex(node))
1989 raise error.RevlogError(
1989 raise error.RevlogError(
1990 _(b"integrity check failed on %s:%s")
1990 _(b"integrity check failed on %s:%s")
1991 % (self.display_id, pycompat.bytestr(revornode))
1991 % (self.display_id, pycompat.bytestr(revornode))
1992 )
1992 )
1993 except error.RevlogError:
1993 except error.RevlogError:
1994 if self._censorable and storageutil.iscensoredtext(text):
1994 if self._censorable and storageutil.iscensoredtext(text):
1995 raise error.CensoredNodeError(self.display_id, node, text)
1995 raise error.CensoredNodeError(self.display_id, node, text)
1996 raise
1996 raise
1997
1997
1998 def _enforceinlinesize(self, tr):
1998 def _enforceinlinesize(self, tr):
1999 """Check if the revlog is too big for inline and convert if so.
1999 """Check if the revlog is too big for inline and convert if so.
2000
2000
2001 This should be called after revisions are added to the revlog. If the
2001 This should be called after revisions are added to the revlog. If the
2002 revlog has grown too large to be an inline revlog, it will convert it
2002 revlog has grown too large to be an inline revlog, it will convert it
2003 to use multiple index and data files.
2003 to use multiple index and data files.
2004 """
2004 """
2005 tiprev = len(self) - 1
2005 tiprev = len(self) - 1
2006 total_size = self.start(tiprev) + self.length(tiprev)
2006 total_size = self.start(tiprev) + self.length(tiprev)
2007 if not self._inline or total_size < _maxinline:
2007 if not self._inline or total_size < _maxinline:
2008 return
2008 return
2009
2009
2010 troffset = tr.findoffset(self._indexfile)
2010 troffset = tr.findoffset(self._indexfile)
2011 if troffset is None:
2011 if troffset is None:
2012 raise error.RevlogError(
2012 raise error.RevlogError(
2013 _(b"%s not found in the transaction") % self._indexfile
2013 _(b"%s not found in the transaction") % self._indexfile
2014 )
2014 )
2015 trindex = 0
2015 trindex = 0
2016 tr.add(self._datafile, 0)
2016 tr.add(self._datafile, 0)
2017
2017
2018 existing_handles = False
2018 existing_handles = False
2019 if self._writinghandles is not None:
2019 if self._writinghandles is not None:
2020 existing_handles = True
2020 existing_handles = True
2021 fp = self._writinghandles[0]
2021 fp = self._writinghandles[0]
2022 fp.flush()
2022 fp.flush()
2023 fp.close()
2023 fp.close()
2024 # We can't use the cached file handle after close(). So prevent
2024 # We can't use the cached file handle after close(). So prevent
2025 # its usage.
2025 # its usage.
2026 self._writinghandles = None
2026 self._writinghandles = None
2027 self._segmentfile.writing_handle = None
2027 self._segmentfile.writing_handle = None
2028 # No need to deal with sidedata writing handle as it is only
2028 # No need to deal with sidedata writing handle as it is only
2029 # relevant with revlog-v2 which is never inline, not reaching
2029 # relevant with revlog-v2 which is never inline, not reaching
2030 # this code
2030 # this code
2031
2031
2032 new_dfh = self._datafp(b'w+')
2032 new_dfh = self._datafp(b'w+')
2033 new_dfh.truncate(0) # drop any potentially existing data
2033 new_dfh.truncate(0) # drop any potentially existing data
2034 try:
2034 try:
2035 with self._indexfp() as read_ifh:
2035 with self._indexfp() as read_ifh:
2036 for r in self:
2036 for r in self:
2037 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2037 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2038 if troffset <= self.start(r) + r * self.index.entry_size:
2038 if troffset <= self.start(r) + r * self.index.entry_size:
2039 trindex = r
2039 trindex = r
2040 new_dfh.flush()
2040 new_dfh.flush()
2041
2041
2042 with self.__index_new_fp() as fp:
2042 with self.__index_new_fp() as fp:
2043 self._format_flags &= ~FLAG_INLINE_DATA
2043 self._format_flags &= ~FLAG_INLINE_DATA
2044 self._inline = False
2044 self._inline = False
2045 for i in self:
2045 for i in self:
2046 e = self.index.entry_binary(i)
2046 e = self.index.entry_binary(i)
2047 if i == 0 and self._docket is None:
2047 if i == 0 and self._docket is None:
2048 header = self._format_flags | self._format_version
2048 header = self._format_flags | self._format_version
2049 header = self.index.pack_header(header)
2049 header = self.index.pack_header(header)
2050 e = header + e
2050 e = header + e
2051 fp.write(e)
2051 fp.write(e)
2052 if self._docket is not None:
2052 if self._docket is not None:
2053 self._docket.index_end = fp.tell()
2053 self._docket.index_end = fp.tell()
2054
2054
2055 # There is a small transactional race here. If the rename of
2055 # There is a small transactional race here. If the rename of
2056 # the index fails, we should remove the datafile. It is more
2056 # the index fails, we should remove the datafile. It is more
2057 # important to ensure that the data file is not truncated
2057 # important to ensure that the data file is not truncated
2058 # when the index is replaced as otherwise data is lost.
2058 # when the index is replaced as otherwise data is lost.
2059 tr.replace(self._datafile, self.start(trindex))
2059 tr.replace(self._datafile, self.start(trindex))
2060
2060
2061 # the temp file replace the real index when we exit the context
2061 # the temp file replace the real index when we exit the context
2062 # manager
2062 # manager
2063
2063
2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2065 nodemaputil.setup_persistent_nodemap(tr, self)
2065 nodemaputil.setup_persistent_nodemap(tr, self)
2066 self._segmentfile = randomaccessfile.randomaccessfile(
2066 self._segmentfile = randomaccessfile.randomaccessfile(
2067 self.opener,
2067 self.opener,
2068 self._datafile,
2068 self._datafile,
2069 self._chunkcachesize,
2069 self._chunkcachesize,
2070 )
2070 )
2071
2071
2072 if existing_handles:
2072 if existing_handles:
2073 # switched from inline to conventional reopen the index
2073 # switched from inline to conventional reopen the index
2074 ifh = self.__index_write_fp()
2074 ifh = self.__index_write_fp()
2075 self._writinghandles = (ifh, new_dfh, None)
2075 self._writinghandles = (ifh, new_dfh, None)
2076 self._segmentfile.writing_handle = new_dfh
2076 self._segmentfile.writing_handle = new_dfh
2077 new_dfh = None
2077 new_dfh = None
2078 # No need to deal with sidedata writing handle as it is only
2078 # No need to deal with sidedata writing handle as it is only
2079 # relevant with revlog-v2 which is never inline, not reaching
2079 # relevant with revlog-v2 which is never inline, not reaching
2080 # this code
2080 # this code
2081 finally:
2081 finally:
2082 if new_dfh is not None:
2082 if new_dfh is not None:
2083 new_dfh.close()
2083 new_dfh.close()
2084
2084
2085 def _nodeduplicatecallback(self, transaction, node):
2085 def _nodeduplicatecallback(self, transaction, node):
2086 """called when trying to add a node already stored."""
2086 """called when trying to add a node already stored."""
2087
2087
2088 @contextlib.contextmanager
2088 @contextlib.contextmanager
2089 def _writing(self, transaction):
2089 def _writing(self, transaction):
2090 if self._trypending:
2090 if self._trypending:
2091 msg = b'try to write in a `trypending` revlog: %s'
2091 msg = b'try to write in a `trypending` revlog: %s'
2092 msg %= self.display_id
2092 msg %= self.display_id
2093 raise error.ProgrammingError(msg)
2093 raise error.ProgrammingError(msg)
2094 if self._writinghandles is not None:
2094 if self._writinghandles is not None:
2095 yield
2095 yield
2096 else:
2096 else:
2097 ifh = dfh = sdfh = None
2097 ifh = dfh = sdfh = None
2098 try:
2098 try:
2099 r = len(self)
2099 r = len(self)
2100 # opening the data file.
2100 # opening the data file.
2101 dsize = 0
2101 dsize = 0
2102 if r:
2102 if r:
2103 dsize = self.end(r - 1)
2103 dsize = self.end(r - 1)
2104 dfh = None
2104 dfh = None
2105 if not self._inline:
2105 if not self._inline:
2106 try:
2106 try:
2107 dfh = self._datafp(b"r+")
2107 dfh = self._datafp(b"r+")
2108 if self._docket is None:
2108 if self._docket is None:
2109 dfh.seek(0, os.SEEK_END)
2109 dfh.seek(0, os.SEEK_END)
2110 else:
2110 else:
2111 dfh.seek(self._docket.data_end, os.SEEK_SET)
2111 dfh.seek(self._docket.data_end, os.SEEK_SET)
2112 except IOError as inst:
2112 except IOError as inst:
2113 if inst.errno != errno.ENOENT:
2113 if inst.errno != errno.ENOENT:
2114 raise
2114 raise
2115 dfh = self._datafp(b"w+")
2115 dfh = self._datafp(b"w+")
2116 transaction.add(self._datafile, dsize)
2116 transaction.add(self._datafile, dsize)
2117 if self._sidedatafile is not None:
2117 if self._sidedatafile is not None:
2118 try:
2118 try:
2119 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2119 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2120 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2120 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2121 except IOError as inst:
2121 except IOError as inst:
2122 if inst.errno != errno.ENOENT:
2122 if inst.errno != errno.ENOENT:
2123 raise
2123 raise
2124 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2124 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2125 transaction.add(
2125 transaction.add(
2126 self._sidedatafile, self._docket.sidedata_end
2126 self._sidedatafile, self._docket.sidedata_end
2127 )
2127 )
2128
2128
2129 # opening the index file.
2129 # opening the index file.
2130 isize = r * self.index.entry_size
2130 isize = r * self.index.entry_size
2131 ifh = self.__index_write_fp()
2131 ifh = self.__index_write_fp()
2132 if self._inline:
2132 if self._inline:
2133 transaction.add(self._indexfile, dsize + isize)
2133 transaction.add(self._indexfile, dsize + isize)
2134 else:
2134 else:
2135 transaction.add(self._indexfile, isize)
2135 transaction.add(self._indexfile, isize)
2136 # exposing all file handle for writing.
2136 # exposing all file handle for writing.
2137 self._writinghandles = (ifh, dfh, sdfh)
2137 self._writinghandles = (ifh, dfh, sdfh)
2138 self._segmentfile.writing_handle = ifh if self._inline else dfh
2138 self._segmentfile.writing_handle = ifh if self._inline else dfh
2139 self._segmentfile_sidedata.writing_handle = sdfh
2139 self._segmentfile_sidedata.writing_handle = sdfh
2140 yield
2140 yield
2141 if self._docket is not None:
2141 if self._docket is not None:
2142 self._write_docket(transaction)
2142 self._write_docket(transaction)
2143 finally:
2143 finally:
2144 self._writinghandles = None
2144 self._writinghandles = None
2145 self._segmentfile.writing_handle = None
2145 self._segmentfile.writing_handle = None
2146 self._segmentfile_sidedata.writing_handle = None
2146 self._segmentfile_sidedata.writing_handle = None
2147 if dfh is not None:
2147 if dfh is not None:
2148 dfh.close()
2148 dfh.close()
2149 if sdfh is not None:
2149 if sdfh is not None:
2150 sdfh.close()
2150 sdfh.close()
2151 # closing the index file last to avoid exposing referent to
2151 # closing the index file last to avoid exposing referent to
2152 # potential unflushed data content.
2152 # potential unflushed data content.
2153 if ifh is not None:
2153 if ifh is not None:
2154 ifh.close()
2154 ifh.close()
2155
2155
2156 def _write_docket(self, transaction):
2156 def _write_docket(self, transaction):
2157 """write the current docket on disk
2157 """write the current docket on disk
2158
2158
2159 Exist as a method to help changelog to implement transaction logic
2159 Exist as a method to help changelog to implement transaction logic
2160
2160
2161 We could also imagine using the same transaction logic for all revlog
2161 We could also imagine using the same transaction logic for all revlog
2162 since docket are cheap."""
2162 since docket are cheap."""
2163 self._docket.write(transaction)
2163 self._docket.write(transaction)
2164
2164
2165 def addrevision(
2165 def addrevision(
2166 self,
2166 self,
2167 text,
2167 text,
2168 transaction,
2168 transaction,
2169 link,
2169 link,
2170 p1,
2170 p1,
2171 p2,
2171 p2,
2172 cachedelta=None,
2172 cachedelta=None,
2173 node=None,
2173 node=None,
2174 flags=REVIDX_DEFAULT_FLAGS,
2174 flags=REVIDX_DEFAULT_FLAGS,
2175 deltacomputer=None,
2175 deltacomputer=None,
2176 sidedata=None,
2176 sidedata=None,
2177 ):
2177 ):
2178 """add a revision to the log
2178 """add a revision to the log
2179
2179
2180 text - the revision data to add
2180 text - the revision data to add
2181 transaction - the transaction object used for rollback
2181 transaction - the transaction object used for rollback
2182 link - the linkrev data to add
2182 link - the linkrev data to add
2183 p1, p2 - the parent nodeids of the revision
2183 p1, p2 - the parent nodeids of the revision
2184 cachedelta - an optional precomputed delta
2184 cachedelta - an optional precomputed delta
2185 node - nodeid of revision; typically node is not specified, and it is
2185 node - nodeid of revision; typically node is not specified, and it is
2186 computed by default as hash(text, p1, p2), however subclasses might
2186 computed by default as hash(text, p1, p2), however subclasses might
2187 use different hashing method (and override checkhash() in such case)
2187 use different hashing method (and override checkhash() in such case)
2188 flags - the known flags to set on the revision
2188 flags - the known flags to set on the revision
2189 deltacomputer - an optional deltacomputer instance shared between
2189 deltacomputer - an optional deltacomputer instance shared between
2190 multiple calls
2190 multiple calls
2191 """
2191 """
2192 if link == nullrev:
2192 if link == nullrev:
2193 raise error.RevlogError(
2193 raise error.RevlogError(
2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2195 )
2195 )
2196
2196
2197 if sidedata is None:
2197 if sidedata is None:
2198 sidedata = {}
2198 sidedata = {}
2199 elif sidedata and not self.hassidedata:
2199 elif sidedata and not self.hassidedata:
2200 raise error.ProgrammingError(
2200 raise error.ProgrammingError(
2201 _(b"trying to add sidedata to a revlog who don't support them")
2201 _(b"trying to add sidedata to a revlog who don't support them")
2202 )
2202 )
2203
2203
2204 if flags:
2204 if flags:
2205 node = node or self.hash(text, p1, p2)
2205 node = node or self.hash(text, p1, p2)
2206
2206
2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2208
2208
2209 # If the flag processor modifies the revision data, ignore any provided
2209 # If the flag processor modifies the revision data, ignore any provided
2210 # cachedelta.
2210 # cachedelta.
2211 if rawtext != text:
2211 if rawtext != text:
2212 cachedelta = None
2212 cachedelta = None
2213
2213
2214 if len(rawtext) > _maxentrysize:
2214 if len(rawtext) > _maxentrysize:
2215 raise error.RevlogError(
2215 raise error.RevlogError(
2216 _(
2216 _(
2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2218 )
2218 )
2219 % (self.display_id, len(rawtext))
2219 % (self.display_id, len(rawtext))
2220 )
2220 )
2221
2221
2222 node = node or self.hash(rawtext, p1, p2)
2222 node = node or self.hash(rawtext, p1, p2)
2223 rev = self.index.get_rev(node)
2223 rev = self.index.get_rev(node)
2224 if rev is not None:
2224 if rev is not None:
2225 return rev
2225 return rev
2226
2226
2227 if validatehash:
2227 if validatehash:
2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2229
2229
2230 return self.addrawrevision(
2230 return self.addrawrevision(
2231 rawtext,
2231 rawtext,
2232 transaction,
2232 transaction,
2233 link,
2233 link,
2234 p1,
2234 p1,
2235 p2,
2235 p2,
2236 node,
2236 node,
2237 flags,
2237 flags,
2238 cachedelta=cachedelta,
2238 cachedelta=cachedelta,
2239 deltacomputer=deltacomputer,
2239 deltacomputer=deltacomputer,
2240 sidedata=sidedata,
2240 sidedata=sidedata,
2241 )
2241 )
2242
2242
2243 def addrawrevision(
2243 def addrawrevision(
2244 self,
2244 self,
2245 rawtext,
2245 rawtext,
2246 transaction,
2246 transaction,
2247 link,
2247 link,
2248 p1,
2248 p1,
2249 p2,
2249 p2,
2250 node,
2250 node,
2251 flags,
2251 flags,
2252 cachedelta=None,
2252 cachedelta=None,
2253 deltacomputer=None,
2253 deltacomputer=None,
2254 sidedata=None,
2254 sidedata=None,
2255 ):
2255 ):
2256 """add a raw revision with known flags, node and parents
2256 """add a raw revision with known flags, node and parents
2257 useful when reusing a revision not stored in this revlog (ex: received
2257 useful when reusing a revision not stored in this revlog (ex: received
2258 over wire, or read from an external bundle).
2258 over wire, or read from an external bundle).
2259 """
2259 """
2260 with self._writing(transaction):
2260 with self._writing(transaction):
2261 return self._addrevision(
2261 return self._addrevision(
2262 node,
2262 node,
2263 rawtext,
2263 rawtext,
2264 transaction,
2264 transaction,
2265 link,
2265 link,
2266 p1,
2266 p1,
2267 p2,
2267 p2,
2268 flags,
2268 flags,
2269 cachedelta,
2269 cachedelta,
2270 deltacomputer=deltacomputer,
2270 deltacomputer=deltacomputer,
2271 sidedata=sidedata,
2271 sidedata=sidedata,
2272 )
2272 )
2273
2273
2274 def compress(self, data):
2274 def compress(self, data):
2275 """Generate a possibly-compressed representation of data."""
2275 """Generate a possibly-compressed representation of data."""
2276 if not data:
2276 if not data:
2277 return b'', data
2277 return b'', data
2278
2278
2279 compressed = self._compressor.compress(data)
2279 compressed = self._compressor.compress(data)
2280
2280
2281 if compressed:
2281 if compressed:
2282 # The revlog compressor added the header in the returned data.
2282 # The revlog compressor added the header in the returned data.
2283 return b'', compressed
2283 return b'', compressed
2284
2284
2285 if data[0:1] == b'\0':
2285 if data[0:1] == b'\0':
2286 return b'', data
2286 return b'', data
2287 return b'u', data
2287 return b'u', data
2288
2288
2289 def decompress(self, data):
2289 def decompress(self, data):
2290 """Decompress a revlog chunk.
2290 """Decompress a revlog chunk.
2291
2291
2292 The chunk is expected to begin with a header identifying the
2292 The chunk is expected to begin with a header identifying the
2293 format type so it can be routed to an appropriate decompressor.
2293 format type so it can be routed to an appropriate decompressor.
2294 """
2294 """
2295 if not data:
2295 if not data:
2296 return data
2296 return data
2297
2297
2298 # Revlogs are read much more frequently than they are written and many
2298 # Revlogs are read much more frequently than they are written and many
2299 # chunks only take microseconds to decompress, so performance is
2299 # chunks only take microseconds to decompress, so performance is
2300 # important here.
2300 # important here.
2301 #
2301 #
2302 # We can make a few assumptions about revlogs:
2302 # We can make a few assumptions about revlogs:
2303 #
2303 #
2304 # 1) the majority of chunks will be compressed (as opposed to inline
2304 # 1) the majority of chunks will be compressed (as opposed to inline
2305 # raw data).
2305 # raw data).
2306 # 2) decompressing *any* data will likely by at least 10x slower than
2306 # 2) decompressing *any* data will likely by at least 10x slower than
2307 # returning raw inline data.
2307 # returning raw inline data.
2308 # 3) we want to prioritize common and officially supported compression
2308 # 3) we want to prioritize common and officially supported compression
2309 # engines
2309 # engines
2310 #
2310 #
2311 # It follows that we want to optimize for "decompress compressed data
2311 # It follows that we want to optimize for "decompress compressed data
2312 # when encoded with common and officially supported compression engines"
2312 # when encoded with common and officially supported compression engines"
2313 # case over "raw data" and "data encoded by less common or non-official
2313 # case over "raw data" and "data encoded by less common or non-official
2314 # compression engines." That is why we have the inline lookup first
2314 # compression engines." That is why we have the inline lookup first
2315 # followed by the compengines lookup.
2315 # followed by the compengines lookup.
2316 #
2316 #
2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2318 # compressed chunks. And this matters for changelog and manifest reads.
2318 # compressed chunks. And this matters for changelog and manifest reads.
2319 t = data[0:1]
2319 t = data[0:1]
2320
2320
2321 if t == b'x':
2321 if t == b'x':
2322 try:
2322 try:
2323 return _zlibdecompress(data)
2323 return _zlibdecompress(data)
2324 except zlib.error as e:
2324 except zlib.error as e:
2325 raise error.RevlogError(
2325 raise error.RevlogError(
2326 _(b'revlog decompress error: %s')
2326 _(b'revlog decompress error: %s')
2327 % stringutil.forcebytestr(e)
2327 % stringutil.forcebytestr(e)
2328 )
2328 )
2329 # '\0' is more common than 'u' so it goes first.
2329 # '\0' is more common than 'u' so it goes first.
2330 elif t == b'\0':
2330 elif t == b'\0':
2331 return data
2331 return data
2332 elif t == b'u':
2332 elif t == b'u':
2333 return util.buffer(data, 1)
2333 return util.buffer(data, 1)
2334
2334
2335 compressor = self._get_decompressor(t)
2335 compressor = self._get_decompressor(t)
2336
2336
2337 return compressor.decompress(data)
2337 return compressor.decompress(data)
2338
2338
2339 def _addrevision(
2339 def _addrevision(
2340 self,
2340 self,
2341 node,
2341 node,
2342 rawtext,
2342 rawtext,
2343 transaction,
2343 transaction,
2344 link,
2344 link,
2345 p1,
2345 p1,
2346 p2,
2346 p2,
2347 flags,
2347 flags,
2348 cachedelta,
2348 cachedelta,
2349 alwayscache=False,
2349 alwayscache=False,
2350 deltacomputer=None,
2350 deltacomputer=None,
2351 sidedata=None,
2351 sidedata=None,
2352 ):
2352 ):
2353 """internal function to add revisions to the log
2353 """internal function to add revisions to the log
2354
2354
2355 see addrevision for argument descriptions.
2355 see addrevision for argument descriptions.
2356
2356
2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2358
2358
2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2360 be used.
2360 be used.
2361
2361
2362 invariants:
2362 invariants:
2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2364 if both are set, they must correspond to each other.
2364 if both are set, they must correspond to each other.
2365 """
2365 """
2366 if node == self.nullid:
2366 if node == self.nullid:
2367 raise error.RevlogError(
2367 raise error.RevlogError(
2368 _(b"%s: attempt to add null revision") % self.display_id
2368 _(b"%s: attempt to add null revision") % self.display_id
2369 )
2369 )
2370 if (
2370 if (
2371 node == self.nodeconstants.wdirid
2371 node == self.nodeconstants.wdirid
2372 or node in self.nodeconstants.wdirfilenodeids
2372 or node in self.nodeconstants.wdirfilenodeids
2373 ):
2373 ):
2374 raise error.RevlogError(
2374 raise error.RevlogError(
2375 _(b"%s: attempt to add wdir revision") % self.display_id
2375 _(b"%s: attempt to add wdir revision") % self.display_id
2376 )
2376 )
2377 if self._writinghandles is None:
2377 if self._writinghandles is None:
2378 msg = b'adding revision outside `revlog._writing` context'
2378 msg = b'adding revision outside `revlog._writing` context'
2379 raise error.ProgrammingError(msg)
2379 raise error.ProgrammingError(msg)
2380
2380
2381 if self._inline:
2381 if self._inline:
2382 fh = self._writinghandles[0]
2382 fh = self._writinghandles[0]
2383 else:
2383 else:
2384 fh = self._writinghandles[1]
2384 fh = self._writinghandles[1]
2385
2385
2386 btext = [rawtext]
2386 btext = [rawtext]
2387
2387
2388 curr = len(self)
2388 curr = len(self)
2389 prev = curr - 1
2389 prev = curr - 1
2390
2390
2391 offset = self._get_data_offset(prev)
2391 offset = self._get_data_offset(prev)
2392
2392
2393 if self._concurrencychecker:
2393 if self._concurrencychecker:
2394 ifh, dfh, sdfh = self._writinghandles
2394 ifh, dfh, sdfh = self._writinghandles
2395 # XXX no checking for the sidedata file
2395 # XXX no checking for the sidedata file
2396 if self._inline:
2396 if self._inline:
2397 # offset is "as if" it were in the .d file, so we need to add on
2397 # offset is "as if" it were in the .d file, so we need to add on
2398 # the size of the entry metadata.
2398 # the size of the entry metadata.
2399 self._concurrencychecker(
2399 self._concurrencychecker(
2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2401 )
2401 )
2402 else:
2402 else:
2403 # Entries in the .i are a consistent size.
2403 # Entries in the .i are a consistent size.
2404 self._concurrencychecker(
2404 self._concurrencychecker(
2405 ifh, self._indexfile, curr * self.index.entry_size
2405 ifh, self._indexfile, curr * self.index.entry_size
2406 )
2406 )
2407 self._concurrencychecker(dfh, self._datafile, offset)
2407 self._concurrencychecker(dfh, self._datafile, offset)
2408
2408
2409 p1r, p2r = self.rev(p1), self.rev(p2)
2409 p1r, p2r = self.rev(p1), self.rev(p2)
2410
2410
2411 # full versions are inserted when the needed deltas
2411 # full versions are inserted when the needed deltas
2412 # become comparable to the uncompressed text
2412 # become comparable to the uncompressed text
2413 if rawtext is None:
2413 if rawtext is None:
2414 # need rawtext size, before changed by flag processors, which is
2414 # need rawtext size, before changed by flag processors, which is
2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2416 # logic that might remove metadata size.
2416 # logic that might remove metadata size.
2417 textlen = mdiff.patchedsize(
2417 textlen = mdiff.patchedsize(
2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2419 )
2419 )
2420 else:
2420 else:
2421 textlen = len(rawtext)
2421 textlen = len(rawtext)
2422
2422
2423 if deltacomputer is None:
2423 if deltacomputer is None:
2424 deltacomputer = deltautil.deltacomputer(self)
2424 deltacomputer = deltautil.deltacomputer(self)
2425
2425
2426 revinfo = revlogutils.revisioninfo(
2426 revinfo = revlogutils.revisioninfo(
2427 node,
2427 node,
2428 p1,
2428 p1,
2429 p2,
2429 p2,
2430 btext,
2430 btext,
2431 textlen,
2431 textlen,
2432 cachedelta,
2432 cachedelta,
2433 flags,
2433 flags,
2434 )
2434 )
2435
2435
2436 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2436 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2437
2437
2438 compression_mode = COMP_MODE_INLINE
2438 compression_mode = COMP_MODE_INLINE
2439 if self._docket is not None:
2439 if self._docket is not None:
2440 h, d = deltainfo.data
2440 default_comp = self._docket.default_compression_header
2441 if not h and not d:
2441 r = deltautil.delta_compression(default_comp, deltainfo)
2442 # not data to store at all... declare them uncompressed
2442 compression_mode, deltainfo = r
2443 compression_mode = COMP_MODE_PLAIN
2444 elif not h:
2445 t = d[0:1]
2446 if t == b'\0':
2447 compression_mode = COMP_MODE_PLAIN
2448 elif t == self._docket.default_compression_header:
2449 compression_mode = COMP_MODE_DEFAULT
2450 elif h == b'u':
2451 # we have a more efficient way to declare uncompressed
2452 h = b''
2453 compression_mode = COMP_MODE_PLAIN
2454 deltainfo = deltautil.drop_u_compression(deltainfo)
2455
2443
2456 sidedata_compression_mode = COMP_MODE_INLINE
2444 sidedata_compression_mode = COMP_MODE_INLINE
2457 if sidedata and self.hassidedata:
2445 if sidedata and self.hassidedata:
2458 sidedata_compression_mode = COMP_MODE_PLAIN
2446 sidedata_compression_mode = COMP_MODE_PLAIN
2459 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2447 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2460 sidedata_offset = self._docket.sidedata_end
2448 sidedata_offset = self._docket.sidedata_end
2461 h, comp_sidedata = self.compress(serialized_sidedata)
2449 h, comp_sidedata = self.compress(serialized_sidedata)
2462 if (
2450 if (
2463 h != b'u'
2451 h != b'u'
2464 and comp_sidedata[0:1] != b'\0'
2452 and comp_sidedata[0:1] != b'\0'
2465 and len(comp_sidedata) < len(serialized_sidedata)
2453 and len(comp_sidedata) < len(serialized_sidedata)
2466 ):
2454 ):
2467 assert not h
2455 assert not h
2468 if (
2456 if (
2469 comp_sidedata[0:1]
2457 comp_sidedata[0:1]
2470 == self._docket.default_compression_header
2458 == self._docket.default_compression_header
2471 ):
2459 ):
2472 sidedata_compression_mode = COMP_MODE_DEFAULT
2460 sidedata_compression_mode = COMP_MODE_DEFAULT
2473 serialized_sidedata = comp_sidedata
2461 serialized_sidedata = comp_sidedata
2474 else:
2462 else:
2475 sidedata_compression_mode = COMP_MODE_INLINE
2463 sidedata_compression_mode = COMP_MODE_INLINE
2476 serialized_sidedata = comp_sidedata
2464 serialized_sidedata = comp_sidedata
2477 else:
2465 else:
2478 serialized_sidedata = b""
2466 serialized_sidedata = b""
2479 # Don't store the offset if the sidedata is empty, that way
2467 # Don't store the offset if the sidedata is empty, that way
2480 # we can easily detect empty sidedata and they will be no different
2468 # we can easily detect empty sidedata and they will be no different
2481 # than ones we manually add.
2469 # than ones we manually add.
2482 sidedata_offset = 0
2470 sidedata_offset = 0
2483
2471
2484 e = revlogutils.entry(
2472 e = revlogutils.entry(
2485 flags=flags,
2473 flags=flags,
2486 data_offset=offset,
2474 data_offset=offset,
2487 data_compressed_length=deltainfo.deltalen,
2475 data_compressed_length=deltainfo.deltalen,
2488 data_uncompressed_length=textlen,
2476 data_uncompressed_length=textlen,
2489 data_compression_mode=compression_mode,
2477 data_compression_mode=compression_mode,
2490 data_delta_base=deltainfo.base,
2478 data_delta_base=deltainfo.base,
2491 link_rev=link,
2479 link_rev=link,
2492 parent_rev_1=p1r,
2480 parent_rev_1=p1r,
2493 parent_rev_2=p2r,
2481 parent_rev_2=p2r,
2494 node_id=node,
2482 node_id=node,
2495 sidedata_offset=sidedata_offset,
2483 sidedata_offset=sidedata_offset,
2496 sidedata_compressed_length=len(serialized_sidedata),
2484 sidedata_compressed_length=len(serialized_sidedata),
2497 sidedata_compression_mode=sidedata_compression_mode,
2485 sidedata_compression_mode=sidedata_compression_mode,
2498 )
2486 )
2499
2487
2500 self.index.append(e)
2488 self.index.append(e)
2501 entry = self.index.entry_binary(curr)
2489 entry = self.index.entry_binary(curr)
2502 if curr == 0 and self._docket is None:
2490 if curr == 0 and self._docket is None:
2503 header = self._format_flags | self._format_version
2491 header = self._format_flags | self._format_version
2504 header = self.index.pack_header(header)
2492 header = self.index.pack_header(header)
2505 entry = header + entry
2493 entry = header + entry
2506 self._writeentry(
2494 self._writeentry(
2507 transaction,
2495 transaction,
2508 entry,
2496 entry,
2509 deltainfo.data,
2497 deltainfo.data,
2510 link,
2498 link,
2511 offset,
2499 offset,
2512 serialized_sidedata,
2500 serialized_sidedata,
2513 sidedata_offset,
2501 sidedata_offset,
2514 )
2502 )
2515
2503
2516 rawtext = btext[0]
2504 rawtext = btext[0]
2517
2505
2518 if alwayscache and rawtext is None:
2506 if alwayscache and rawtext is None:
2519 rawtext = deltacomputer.buildtext(revinfo, fh)
2507 rawtext = deltacomputer.buildtext(revinfo, fh)
2520
2508
2521 if type(rawtext) == bytes: # only accept immutable objects
2509 if type(rawtext) == bytes: # only accept immutable objects
2522 self._revisioncache = (node, curr, rawtext)
2510 self._revisioncache = (node, curr, rawtext)
2523 self._chainbasecache[curr] = deltainfo.chainbase
2511 self._chainbasecache[curr] = deltainfo.chainbase
2524 return curr
2512 return curr
2525
2513
2526 def _get_data_offset(self, prev):
2514 def _get_data_offset(self, prev):
2527 """Returns the current offset in the (in-transaction) data file.
2515 """Returns the current offset in the (in-transaction) data file.
2528 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2516 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2529 file to store that information: since sidedata can be rewritten to the
2517 file to store that information: since sidedata can be rewritten to the
2530 end of the data file within a transaction, you can have cases where, for
2518 end of the data file within a transaction, you can have cases where, for
2531 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2519 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2532 to `n - 1`'s sidedata being written after `n`'s data.
2520 to `n - 1`'s sidedata being written after `n`'s data.
2533
2521
2534 TODO cache this in a docket file before getting out of experimental."""
2522 TODO cache this in a docket file before getting out of experimental."""
2535 if self._docket is None:
2523 if self._docket is None:
2536 return self.end(prev)
2524 return self.end(prev)
2537 else:
2525 else:
2538 return self._docket.data_end
2526 return self._docket.data_end
2539
2527
2540 def _writeentry(
2528 def _writeentry(
2541 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2529 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2542 ):
2530 ):
2543 # Files opened in a+ mode have inconsistent behavior on various
2531 # Files opened in a+ mode have inconsistent behavior on various
2544 # platforms. Windows requires that a file positioning call be made
2532 # platforms. Windows requires that a file positioning call be made
2545 # when the file handle transitions between reads and writes. See
2533 # when the file handle transitions between reads and writes. See
2546 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2534 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2547 # platforms, Python or the platform itself can be buggy. Some versions
2535 # platforms, Python or the platform itself can be buggy. Some versions
2548 # of Solaris have been observed to not append at the end of the file
2536 # of Solaris have been observed to not append at the end of the file
2549 # if the file was seeked to before the end. See issue4943 for more.
2537 # if the file was seeked to before the end. See issue4943 for more.
2550 #
2538 #
2551 # We work around this issue by inserting a seek() before writing.
2539 # We work around this issue by inserting a seek() before writing.
2552 # Note: This is likely not necessary on Python 3. However, because
2540 # Note: This is likely not necessary on Python 3. However, because
2553 # the file handle is reused for reads and may be seeked there, we need
2541 # the file handle is reused for reads and may be seeked there, we need
2554 # to be careful before changing this.
2542 # to be careful before changing this.
2555 if self._writinghandles is None:
2543 if self._writinghandles is None:
2556 msg = b'adding revision outside `revlog._writing` context'
2544 msg = b'adding revision outside `revlog._writing` context'
2557 raise error.ProgrammingError(msg)
2545 raise error.ProgrammingError(msg)
2558 ifh, dfh, sdfh = self._writinghandles
2546 ifh, dfh, sdfh = self._writinghandles
2559 if self._docket is None:
2547 if self._docket is None:
2560 ifh.seek(0, os.SEEK_END)
2548 ifh.seek(0, os.SEEK_END)
2561 else:
2549 else:
2562 ifh.seek(self._docket.index_end, os.SEEK_SET)
2550 ifh.seek(self._docket.index_end, os.SEEK_SET)
2563 if dfh:
2551 if dfh:
2564 if self._docket is None:
2552 if self._docket is None:
2565 dfh.seek(0, os.SEEK_END)
2553 dfh.seek(0, os.SEEK_END)
2566 else:
2554 else:
2567 dfh.seek(self._docket.data_end, os.SEEK_SET)
2555 dfh.seek(self._docket.data_end, os.SEEK_SET)
2568 if sdfh:
2556 if sdfh:
2569 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2557 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2570
2558
2571 curr = len(self) - 1
2559 curr = len(self) - 1
2572 if not self._inline:
2560 if not self._inline:
2573 transaction.add(self._datafile, offset)
2561 transaction.add(self._datafile, offset)
2574 if self._sidedatafile:
2562 if self._sidedatafile:
2575 transaction.add(self._sidedatafile, sidedata_offset)
2563 transaction.add(self._sidedatafile, sidedata_offset)
2576 transaction.add(self._indexfile, curr * len(entry))
2564 transaction.add(self._indexfile, curr * len(entry))
2577 if data[0]:
2565 if data[0]:
2578 dfh.write(data[0])
2566 dfh.write(data[0])
2579 dfh.write(data[1])
2567 dfh.write(data[1])
2580 if sidedata:
2568 if sidedata:
2581 sdfh.write(sidedata)
2569 sdfh.write(sidedata)
2582 ifh.write(entry)
2570 ifh.write(entry)
2583 else:
2571 else:
2584 offset += curr * self.index.entry_size
2572 offset += curr * self.index.entry_size
2585 transaction.add(self._indexfile, offset)
2573 transaction.add(self._indexfile, offset)
2586 ifh.write(entry)
2574 ifh.write(entry)
2587 ifh.write(data[0])
2575 ifh.write(data[0])
2588 ifh.write(data[1])
2576 ifh.write(data[1])
2589 assert not sidedata
2577 assert not sidedata
2590 self._enforceinlinesize(transaction)
2578 self._enforceinlinesize(transaction)
2591 if self._docket is not None:
2579 if self._docket is not None:
2592 self._docket.index_end = self._writinghandles[0].tell()
2580 self._docket.index_end = self._writinghandles[0].tell()
2593 self._docket.data_end = self._writinghandles[1].tell()
2581 self._docket.data_end = self._writinghandles[1].tell()
2594 self._docket.sidedata_end = self._writinghandles[2].tell()
2582 self._docket.sidedata_end = self._writinghandles[2].tell()
2595
2583
2596 nodemaputil.setup_persistent_nodemap(transaction, self)
2584 nodemaputil.setup_persistent_nodemap(transaction, self)
2597
2585
2598 def addgroup(
2586 def addgroup(
2599 self,
2587 self,
2600 deltas,
2588 deltas,
2601 linkmapper,
2589 linkmapper,
2602 transaction,
2590 transaction,
2603 alwayscache=False,
2591 alwayscache=False,
2604 addrevisioncb=None,
2592 addrevisioncb=None,
2605 duplicaterevisioncb=None,
2593 duplicaterevisioncb=None,
2606 ):
2594 ):
2607 """
2595 """
2608 add a delta group
2596 add a delta group
2609
2597
2610 given a set of deltas, add them to the revision log. the
2598 given a set of deltas, add them to the revision log. the
2611 first delta is against its parent, which should be in our
2599 first delta is against its parent, which should be in our
2612 log, the rest are against the previous delta.
2600 log, the rest are against the previous delta.
2613
2601
2614 If ``addrevisioncb`` is defined, it will be called with arguments of
2602 If ``addrevisioncb`` is defined, it will be called with arguments of
2615 this revlog and the node that was added.
2603 this revlog and the node that was added.
2616 """
2604 """
2617
2605
2618 if self._adding_group:
2606 if self._adding_group:
2619 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2607 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2620
2608
2621 self._adding_group = True
2609 self._adding_group = True
2622 empty = True
2610 empty = True
2623 try:
2611 try:
2624 with self._writing(transaction):
2612 with self._writing(transaction):
2625 deltacomputer = deltautil.deltacomputer(self)
2613 deltacomputer = deltautil.deltacomputer(self)
2626 # loop through our set of deltas
2614 # loop through our set of deltas
2627 for data in deltas:
2615 for data in deltas:
2628 (
2616 (
2629 node,
2617 node,
2630 p1,
2618 p1,
2631 p2,
2619 p2,
2632 linknode,
2620 linknode,
2633 deltabase,
2621 deltabase,
2634 delta,
2622 delta,
2635 flags,
2623 flags,
2636 sidedata,
2624 sidedata,
2637 ) = data
2625 ) = data
2638 link = linkmapper(linknode)
2626 link = linkmapper(linknode)
2639 flags = flags or REVIDX_DEFAULT_FLAGS
2627 flags = flags or REVIDX_DEFAULT_FLAGS
2640
2628
2641 rev = self.index.get_rev(node)
2629 rev = self.index.get_rev(node)
2642 if rev is not None:
2630 if rev is not None:
2643 # this can happen if two branches make the same change
2631 # this can happen if two branches make the same change
2644 self._nodeduplicatecallback(transaction, rev)
2632 self._nodeduplicatecallback(transaction, rev)
2645 if duplicaterevisioncb:
2633 if duplicaterevisioncb:
2646 duplicaterevisioncb(self, rev)
2634 duplicaterevisioncb(self, rev)
2647 empty = False
2635 empty = False
2648 continue
2636 continue
2649
2637
2650 for p in (p1, p2):
2638 for p in (p1, p2):
2651 if not self.index.has_node(p):
2639 if not self.index.has_node(p):
2652 raise error.LookupError(
2640 raise error.LookupError(
2653 p, self.radix, _(b'unknown parent')
2641 p, self.radix, _(b'unknown parent')
2654 )
2642 )
2655
2643
2656 if not self.index.has_node(deltabase):
2644 if not self.index.has_node(deltabase):
2657 raise error.LookupError(
2645 raise error.LookupError(
2658 deltabase, self.display_id, _(b'unknown delta base')
2646 deltabase, self.display_id, _(b'unknown delta base')
2659 )
2647 )
2660
2648
2661 baserev = self.rev(deltabase)
2649 baserev = self.rev(deltabase)
2662
2650
2663 if baserev != nullrev and self.iscensored(baserev):
2651 if baserev != nullrev and self.iscensored(baserev):
2664 # if base is censored, delta must be full replacement in a
2652 # if base is censored, delta must be full replacement in a
2665 # single patch operation
2653 # single patch operation
2666 hlen = struct.calcsize(b">lll")
2654 hlen = struct.calcsize(b">lll")
2667 oldlen = self.rawsize(baserev)
2655 oldlen = self.rawsize(baserev)
2668 newlen = len(delta) - hlen
2656 newlen = len(delta) - hlen
2669 if delta[:hlen] != mdiff.replacediffheader(
2657 if delta[:hlen] != mdiff.replacediffheader(
2670 oldlen, newlen
2658 oldlen, newlen
2671 ):
2659 ):
2672 raise error.CensoredBaseError(
2660 raise error.CensoredBaseError(
2673 self.display_id, self.node(baserev)
2661 self.display_id, self.node(baserev)
2674 )
2662 )
2675
2663
2676 if not flags and self._peek_iscensored(baserev, delta):
2664 if not flags and self._peek_iscensored(baserev, delta):
2677 flags |= REVIDX_ISCENSORED
2665 flags |= REVIDX_ISCENSORED
2678
2666
2679 # We assume consumers of addrevisioncb will want to retrieve
2667 # We assume consumers of addrevisioncb will want to retrieve
2680 # the added revision, which will require a call to
2668 # the added revision, which will require a call to
2681 # revision(). revision() will fast path if there is a cache
2669 # revision(). revision() will fast path if there is a cache
2682 # hit. So, we tell _addrevision() to always cache in this case.
2670 # hit. So, we tell _addrevision() to always cache in this case.
2683 # We're only using addgroup() in the context of changegroup
2671 # We're only using addgroup() in the context of changegroup
2684 # generation so the revision data can always be handled as raw
2672 # generation so the revision data can always be handled as raw
2685 # by the flagprocessor.
2673 # by the flagprocessor.
2686 rev = self._addrevision(
2674 rev = self._addrevision(
2687 node,
2675 node,
2688 None,
2676 None,
2689 transaction,
2677 transaction,
2690 link,
2678 link,
2691 p1,
2679 p1,
2692 p2,
2680 p2,
2693 flags,
2681 flags,
2694 (baserev, delta),
2682 (baserev, delta),
2695 alwayscache=alwayscache,
2683 alwayscache=alwayscache,
2696 deltacomputer=deltacomputer,
2684 deltacomputer=deltacomputer,
2697 sidedata=sidedata,
2685 sidedata=sidedata,
2698 )
2686 )
2699
2687
2700 if addrevisioncb:
2688 if addrevisioncb:
2701 addrevisioncb(self, rev)
2689 addrevisioncb(self, rev)
2702 empty = False
2690 empty = False
2703 finally:
2691 finally:
2704 self._adding_group = False
2692 self._adding_group = False
2705 return not empty
2693 return not empty
2706
2694
2707 def iscensored(self, rev):
2695 def iscensored(self, rev):
2708 """Check if a file revision is censored."""
2696 """Check if a file revision is censored."""
2709 if not self._censorable:
2697 if not self._censorable:
2710 return False
2698 return False
2711
2699
2712 return self.flags(rev) & REVIDX_ISCENSORED
2700 return self.flags(rev) & REVIDX_ISCENSORED
2713
2701
2714 def _peek_iscensored(self, baserev, delta):
2702 def _peek_iscensored(self, baserev, delta):
2715 """Quickly check if a delta produces a censored revision."""
2703 """Quickly check if a delta produces a censored revision."""
2716 if not self._censorable:
2704 if not self._censorable:
2717 return False
2705 return False
2718
2706
2719 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2707 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2720
2708
2721 def getstrippoint(self, minlink):
2709 def getstrippoint(self, minlink):
2722 """find the minimum rev that must be stripped to strip the linkrev
2710 """find the minimum rev that must be stripped to strip the linkrev
2723
2711
2724 Returns a tuple containing the minimum rev and a set of all revs that
2712 Returns a tuple containing the minimum rev and a set of all revs that
2725 have linkrevs that will be broken by this strip.
2713 have linkrevs that will be broken by this strip.
2726 """
2714 """
2727 return storageutil.resolvestripinfo(
2715 return storageutil.resolvestripinfo(
2728 minlink,
2716 minlink,
2729 len(self) - 1,
2717 len(self) - 1,
2730 self.headrevs(),
2718 self.headrevs(),
2731 self.linkrev,
2719 self.linkrev,
2732 self.parentrevs,
2720 self.parentrevs,
2733 )
2721 )
2734
2722
2735 def strip(self, minlink, transaction):
2723 def strip(self, minlink, transaction):
2736 """truncate the revlog on the first revision with a linkrev >= minlink
2724 """truncate the revlog on the first revision with a linkrev >= minlink
2737
2725
2738 This function is called when we're stripping revision minlink and
2726 This function is called when we're stripping revision minlink and
2739 its descendants from the repository.
2727 its descendants from the repository.
2740
2728
2741 We have to remove all revisions with linkrev >= minlink, because
2729 We have to remove all revisions with linkrev >= minlink, because
2742 the equivalent changelog revisions will be renumbered after the
2730 the equivalent changelog revisions will be renumbered after the
2743 strip.
2731 strip.
2744
2732
2745 So we truncate the revlog on the first of these revisions, and
2733 So we truncate the revlog on the first of these revisions, and
2746 trust that the caller has saved the revisions that shouldn't be
2734 trust that the caller has saved the revisions that shouldn't be
2747 removed and that it'll re-add them after this truncation.
2735 removed and that it'll re-add them after this truncation.
2748 """
2736 """
2749 if len(self) == 0:
2737 if len(self) == 0:
2750 return
2738 return
2751
2739
2752 rev, _ = self.getstrippoint(minlink)
2740 rev, _ = self.getstrippoint(minlink)
2753 if rev == len(self):
2741 if rev == len(self):
2754 return
2742 return
2755
2743
2756 # first truncate the files on disk
2744 # first truncate the files on disk
2757 data_end = self.start(rev)
2745 data_end = self.start(rev)
2758 if not self._inline:
2746 if not self._inline:
2759 transaction.add(self._datafile, data_end)
2747 transaction.add(self._datafile, data_end)
2760 end = rev * self.index.entry_size
2748 end = rev * self.index.entry_size
2761 else:
2749 else:
2762 end = data_end + (rev * self.index.entry_size)
2750 end = data_end + (rev * self.index.entry_size)
2763
2751
2764 if self._sidedatafile:
2752 if self._sidedatafile:
2765 sidedata_end = self.sidedata_cut_off(rev)
2753 sidedata_end = self.sidedata_cut_off(rev)
2766 transaction.add(self._sidedatafile, sidedata_end)
2754 transaction.add(self._sidedatafile, sidedata_end)
2767
2755
2768 transaction.add(self._indexfile, end)
2756 transaction.add(self._indexfile, end)
2769 if self._docket is not None:
2757 if self._docket is not None:
2770 # XXX we could, leverage the docket while stripping. However it is
2758 # XXX we could, leverage the docket while stripping. However it is
2771 # not powerfull enough at the time of this comment
2759 # not powerfull enough at the time of this comment
2772 self._docket.index_end = end
2760 self._docket.index_end = end
2773 self._docket.data_end = data_end
2761 self._docket.data_end = data_end
2774 self._docket.sidedata_end = sidedata_end
2762 self._docket.sidedata_end = sidedata_end
2775 self._docket.write(transaction, stripping=True)
2763 self._docket.write(transaction, stripping=True)
2776
2764
2777 # then reset internal state in memory to forget those revisions
2765 # then reset internal state in memory to forget those revisions
2778 self._revisioncache = None
2766 self._revisioncache = None
2779 self._chaininfocache = util.lrucachedict(500)
2767 self._chaininfocache = util.lrucachedict(500)
2780 self._segmentfile.clear_cache()
2768 self._segmentfile.clear_cache()
2781 self._segmentfile_sidedata.clear_cache()
2769 self._segmentfile_sidedata.clear_cache()
2782
2770
2783 del self.index[rev:-1]
2771 del self.index[rev:-1]
2784
2772
2785 def checksize(self):
2773 def checksize(self):
2786 """Check size of index and data files
2774 """Check size of index and data files
2787
2775
2788 return a (dd, di) tuple.
2776 return a (dd, di) tuple.
2789 - dd: extra bytes for the "data" file
2777 - dd: extra bytes for the "data" file
2790 - di: extra bytes for the "index" file
2778 - di: extra bytes for the "index" file
2791
2779
2792 A healthy revlog will return (0, 0).
2780 A healthy revlog will return (0, 0).
2793 """
2781 """
2794 expected = 0
2782 expected = 0
2795 if len(self):
2783 if len(self):
2796 expected = max(0, self.end(len(self) - 1))
2784 expected = max(0, self.end(len(self) - 1))
2797
2785
2798 try:
2786 try:
2799 with self._datafp() as f:
2787 with self._datafp() as f:
2800 f.seek(0, io.SEEK_END)
2788 f.seek(0, io.SEEK_END)
2801 actual = f.tell()
2789 actual = f.tell()
2802 dd = actual - expected
2790 dd = actual - expected
2803 except IOError as inst:
2791 except IOError as inst:
2804 if inst.errno != errno.ENOENT:
2792 if inst.errno != errno.ENOENT:
2805 raise
2793 raise
2806 dd = 0
2794 dd = 0
2807
2795
2808 try:
2796 try:
2809 f = self.opener(self._indexfile)
2797 f = self.opener(self._indexfile)
2810 f.seek(0, io.SEEK_END)
2798 f.seek(0, io.SEEK_END)
2811 actual = f.tell()
2799 actual = f.tell()
2812 f.close()
2800 f.close()
2813 s = self.index.entry_size
2801 s = self.index.entry_size
2814 i = max(0, actual // s)
2802 i = max(0, actual // s)
2815 di = actual - (i * s)
2803 di = actual - (i * s)
2816 if self._inline:
2804 if self._inline:
2817 databytes = 0
2805 databytes = 0
2818 for r in self:
2806 for r in self:
2819 databytes += max(0, self.length(r))
2807 databytes += max(0, self.length(r))
2820 dd = 0
2808 dd = 0
2821 di = actual - len(self) * s - databytes
2809 di = actual - len(self) * s - databytes
2822 except IOError as inst:
2810 except IOError as inst:
2823 if inst.errno != errno.ENOENT:
2811 if inst.errno != errno.ENOENT:
2824 raise
2812 raise
2825 di = 0
2813 di = 0
2826
2814
2827 return (dd, di)
2815 return (dd, di)
2828
2816
2829 def files(self):
2817 def files(self):
2830 res = [self._indexfile]
2818 res = [self._indexfile]
2831 if self._docket_file is None:
2819 if self._docket_file is None:
2832 if not self._inline:
2820 if not self._inline:
2833 res.append(self._datafile)
2821 res.append(self._datafile)
2834 else:
2822 else:
2835 res.append(self._docket_file)
2823 res.append(self._docket_file)
2836 if self._docket.data_end:
2824 if self._docket.data_end:
2837 res.append(self._datafile)
2825 res.append(self._datafile)
2838 if self._docket.sidedata_end:
2826 if self._docket.sidedata_end:
2839 res.append(self._sidedatafile)
2827 res.append(self._sidedatafile)
2840 return res
2828 return res
2841
2829
2842 def emitrevisions(
2830 def emitrevisions(
2843 self,
2831 self,
2844 nodes,
2832 nodes,
2845 nodesorder=None,
2833 nodesorder=None,
2846 revisiondata=False,
2834 revisiondata=False,
2847 assumehaveparentrevisions=False,
2835 assumehaveparentrevisions=False,
2848 deltamode=repository.CG_DELTAMODE_STD,
2836 deltamode=repository.CG_DELTAMODE_STD,
2849 sidedata_helpers=None,
2837 sidedata_helpers=None,
2850 ):
2838 ):
2851 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2839 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2852 raise error.ProgrammingError(
2840 raise error.ProgrammingError(
2853 b'unhandled value for nodesorder: %s' % nodesorder
2841 b'unhandled value for nodesorder: %s' % nodesorder
2854 )
2842 )
2855
2843
2856 if nodesorder is None and not self._generaldelta:
2844 if nodesorder is None and not self._generaldelta:
2857 nodesorder = b'storage'
2845 nodesorder = b'storage'
2858
2846
2859 if (
2847 if (
2860 not self._storedeltachains
2848 not self._storedeltachains
2861 and deltamode != repository.CG_DELTAMODE_PREV
2849 and deltamode != repository.CG_DELTAMODE_PREV
2862 ):
2850 ):
2863 deltamode = repository.CG_DELTAMODE_FULL
2851 deltamode = repository.CG_DELTAMODE_FULL
2864
2852
2865 return storageutil.emitrevisions(
2853 return storageutil.emitrevisions(
2866 self,
2854 self,
2867 nodes,
2855 nodes,
2868 nodesorder,
2856 nodesorder,
2869 revlogrevisiondelta,
2857 revlogrevisiondelta,
2870 deltaparentfn=self.deltaparent,
2858 deltaparentfn=self.deltaparent,
2871 candeltafn=self.candelta,
2859 candeltafn=self.candelta,
2872 rawsizefn=self.rawsize,
2860 rawsizefn=self.rawsize,
2873 revdifffn=self.revdiff,
2861 revdifffn=self.revdiff,
2874 flagsfn=self.flags,
2862 flagsfn=self.flags,
2875 deltamode=deltamode,
2863 deltamode=deltamode,
2876 revisiondata=revisiondata,
2864 revisiondata=revisiondata,
2877 assumehaveparentrevisions=assumehaveparentrevisions,
2865 assumehaveparentrevisions=assumehaveparentrevisions,
2878 sidedata_helpers=sidedata_helpers,
2866 sidedata_helpers=sidedata_helpers,
2879 )
2867 )
2880
2868
2881 DELTAREUSEALWAYS = b'always'
2869 DELTAREUSEALWAYS = b'always'
2882 DELTAREUSESAMEREVS = b'samerevs'
2870 DELTAREUSESAMEREVS = b'samerevs'
2883 DELTAREUSENEVER = b'never'
2871 DELTAREUSENEVER = b'never'
2884
2872
2885 DELTAREUSEFULLADD = b'fulladd'
2873 DELTAREUSEFULLADD = b'fulladd'
2886
2874
2887 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2875 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2888
2876
2889 def clone(
2877 def clone(
2890 self,
2878 self,
2891 tr,
2879 tr,
2892 destrevlog,
2880 destrevlog,
2893 addrevisioncb=None,
2881 addrevisioncb=None,
2894 deltareuse=DELTAREUSESAMEREVS,
2882 deltareuse=DELTAREUSESAMEREVS,
2895 forcedeltabothparents=None,
2883 forcedeltabothparents=None,
2896 sidedata_helpers=None,
2884 sidedata_helpers=None,
2897 ):
2885 ):
2898 """Copy this revlog to another, possibly with format changes.
2886 """Copy this revlog to another, possibly with format changes.
2899
2887
2900 The destination revlog will contain the same revisions and nodes.
2888 The destination revlog will contain the same revisions and nodes.
2901 However, it may not be bit-for-bit identical due to e.g. delta encoding
2889 However, it may not be bit-for-bit identical due to e.g. delta encoding
2902 differences.
2890 differences.
2903
2891
2904 The ``deltareuse`` argument control how deltas from the existing revlog
2892 The ``deltareuse`` argument control how deltas from the existing revlog
2905 are preserved in the destination revlog. The argument can have the
2893 are preserved in the destination revlog. The argument can have the
2906 following values:
2894 following values:
2907
2895
2908 DELTAREUSEALWAYS
2896 DELTAREUSEALWAYS
2909 Deltas will always be reused (if possible), even if the destination
2897 Deltas will always be reused (if possible), even if the destination
2910 revlog would not select the same revisions for the delta. This is the
2898 revlog would not select the same revisions for the delta. This is the
2911 fastest mode of operation.
2899 fastest mode of operation.
2912 DELTAREUSESAMEREVS
2900 DELTAREUSESAMEREVS
2913 Deltas will be reused if the destination revlog would pick the same
2901 Deltas will be reused if the destination revlog would pick the same
2914 revisions for the delta. This mode strikes a balance between speed
2902 revisions for the delta. This mode strikes a balance between speed
2915 and optimization.
2903 and optimization.
2916 DELTAREUSENEVER
2904 DELTAREUSENEVER
2917 Deltas will never be reused. This is the slowest mode of execution.
2905 Deltas will never be reused. This is the slowest mode of execution.
2918 This mode can be used to recompute deltas (e.g. if the diff/delta
2906 This mode can be used to recompute deltas (e.g. if the diff/delta
2919 algorithm changes).
2907 algorithm changes).
2920 DELTAREUSEFULLADD
2908 DELTAREUSEFULLADD
2921 Revision will be re-added as if their were new content. This is
2909 Revision will be re-added as if their were new content. This is
2922 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2910 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2923 eg: large file detection and handling.
2911 eg: large file detection and handling.
2924
2912
2925 Delta computation can be slow, so the choice of delta reuse policy can
2913 Delta computation can be slow, so the choice of delta reuse policy can
2926 significantly affect run time.
2914 significantly affect run time.
2927
2915
2928 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2916 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2929 two extremes. Deltas will be reused if they are appropriate. But if the
2917 two extremes. Deltas will be reused if they are appropriate. But if the
2930 delta could choose a better revision, it will do so. This means if you
2918 delta could choose a better revision, it will do so. This means if you
2931 are converting a non-generaldelta revlog to a generaldelta revlog,
2919 are converting a non-generaldelta revlog to a generaldelta revlog,
2932 deltas will be recomputed if the delta's parent isn't a parent of the
2920 deltas will be recomputed if the delta's parent isn't a parent of the
2933 revision.
2921 revision.
2934
2922
2935 In addition to the delta policy, the ``forcedeltabothparents``
2923 In addition to the delta policy, the ``forcedeltabothparents``
2936 argument controls whether to force compute deltas against both parents
2924 argument controls whether to force compute deltas against both parents
2937 for merges. By default, the current default is used.
2925 for merges. By default, the current default is used.
2938
2926
2939 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2927 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2940 `sidedata_helpers`.
2928 `sidedata_helpers`.
2941 """
2929 """
2942 if deltareuse not in self.DELTAREUSEALL:
2930 if deltareuse not in self.DELTAREUSEALL:
2943 raise ValueError(
2931 raise ValueError(
2944 _(b'value for deltareuse invalid: %s') % deltareuse
2932 _(b'value for deltareuse invalid: %s') % deltareuse
2945 )
2933 )
2946
2934
2947 if len(destrevlog):
2935 if len(destrevlog):
2948 raise ValueError(_(b'destination revlog is not empty'))
2936 raise ValueError(_(b'destination revlog is not empty'))
2949
2937
2950 if getattr(self, 'filteredrevs', None):
2938 if getattr(self, 'filteredrevs', None):
2951 raise ValueError(_(b'source revlog has filtered revisions'))
2939 raise ValueError(_(b'source revlog has filtered revisions'))
2952 if getattr(destrevlog, 'filteredrevs', None):
2940 if getattr(destrevlog, 'filteredrevs', None):
2953 raise ValueError(_(b'destination revlog has filtered revisions'))
2941 raise ValueError(_(b'destination revlog has filtered revisions'))
2954
2942
2955 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2943 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2956 # if possible.
2944 # if possible.
2957 oldlazydelta = destrevlog._lazydelta
2945 oldlazydelta = destrevlog._lazydelta
2958 oldlazydeltabase = destrevlog._lazydeltabase
2946 oldlazydeltabase = destrevlog._lazydeltabase
2959 oldamd = destrevlog._deltabothparents
2947 oldamd = destrevlog._deltabothparents
2960
2948
2961 try:
2949 try:
2962 if deltareuse == self.DELTAREUSEALWAYS:
2950 if deltareuse == self.DELTAREUSEALWAYS:
2963 destrevlog._lazydeltabase = True
2951 destrevlog._lazydeltabase = True
2964 destrevlog._lazydelta = True
2952 destrevlog._lazydelta = True
2965 elif deltareuse == self.DELTAREUSESAMEREVS:
2953 elif deltareuse == self.DELTAREUSESAMEREVS:
2966 destrevlog._lazydeltabase = False
2954 destrevlog._lazydeltabase = False
2967 destrevlog._lazydelta = True
2955 destrevlog._lazydelta = True
2968 elif deltareuse == self.DELTAREUSENEVER:
2956 elif deltareuse == self.DELTAREUSENEVER:
2969 destrevlog._lazydeltabase = False
2957 destrevlog._lazydeltabase = False
2970 destrevlog._lazydelta = False
2958 destrevlog._lazydelta = False
2971
2959
2972 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2960 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2973
2961
2974 self._clone(
2962 self._clone(
2975 tr,
2963 tr,
2976 destrevlog,
2964 destrevlog,
2977 addrevisioncb,
2965 addrevisioncb,
2978 deltareuse,
2966 deltareuse,
2979 forcedeltabothparents,
2967 forcedeltabothparents,
2980 sidedata_helpers,
2968 sidedata_helpers,
2981 )
2969 )
2982
2970
2983 finally:
2971 finally:
2984 destrevlog._lazydelta = oldlazydelta
2972 destrevlog._lazydelta = oldlazydelta
2985 destrevlog._lazydeltabase = oldlazydeltabase
2973 destrevlog._lazydeltabase = oldlazydeltabase
2986 destrevlog._deltabothparents = oldamd
2974 destrevlog._deltabothparents = oldamd
2987
2975
2988 def _clone(
2976 def _clone(
2989 self,
2977 self,
2990 tr,
2978 tr,
2991 destrevlog,
2979 destrevlog,
2992 addrevisioncb,
2980 addrevisioncb,
2993 deltareuse,
2981 deltareuse,
2994 forcedeltabothparents,
2982 forcedeltabothparents,
2995 sidedata_helpers,
2983 sidedata_helpers,
2996 ):
2984 ):
2997 """perform the core duty of `revlog.clone` after parameter processing"""
2985 """perform the core duty of `revlog.clone` after parameter processing"""
2998 deltacomputer = deltautil.deltacomputer(destrevlog)
2986 deltacomputer = deltautil.deltacomputer(destrevlog)
2999 index = self.index
2987 index = self.index
3000 for rev in self:
2988 for rev in self:
3001 entry = index[rev]
2989 entry = index[rev]
3002
2990
3003 # Some classes override linkrev to take filtered revs into
2991 # Some classes override linkrev to take filtered revs into
3004 # account. Use raw entry from index.
2992 # account. Use raw entry from index.
3005 flags = entry[0] & 0xFFFF
2993 flags = entry[0] & 0xFFFF
3006 linkrev = entry[4]
2994 linkrev = entry[4]
3007 p1 = index[entry[5]][7]
2995 p1 = index[entry[5]][7]
3008 p2 = index[entry[6]][7]
2996 p2 = index[entry[6]][7]
3009 node = entry[7]
2997 node = entry[7]
3010
2998
3011 # (Possibly) reuse the delta from the revlog if allowed and
2999 # (Possibly) reuse the delta from the revlog if allowed and
3012 # the revlog chunk is a delta.
3000 # the revlog chunk is a delta.
3013 cachedelta = None
3001 cachedelta = None
3014 rawtext = None
3002 rawtext = None
3015 if deltareuse == self.DELTAREUSEFULLADD:
3003 if deltareuse == self.DELTAREUSEFULLADD:
3016 text = self._revisiondata(rev)
3004 text = self._revisiondata(rev)
3017 sidedata = self.sidedata(rev)
3005 sidedata = self.sidedata(rev)
3018
3006
3019 if sidedata_helpers is not None:
3007 if sidedata_helpers is not None:
3020 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3008 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3021 self, sidedata_helpers, sidedata, rev
3009 self, sidedata_helpers, sidedata, rev
3022 )
3010 )
3023 flags = flags | new_flags[0] & ~new_flags[1]
3011 flags = flags | new_flags[0] & ~new_flags[1]
3024
3012
3025 destrevlog.addrevision(
3013 destrevlog.addrevision(
3026 text,
3014 text,
3027 tr,
3015 tr,
3028 linkrev,
3016 linkrev,
3029 p1,
3017 p1,
3030 p2,
3018 p2,
3031 cachedelta=cachedelta,
3019 cachedelta=cachedelta,
3032 node=node,
3020 node=node,
3033 flags=flags,
3021 flags=flags,
3034 deltacomputer=deltacomputer,
3022 deltacomputer=deltacomputer,
3035 sidedata=sidedata,
3023 sidedata=sidedata,
3036 )
3024 )
3037 else:
3025 else:
3038 if destrevlog._lazydelta:
3026 if destrevlog._lazydelta:
3039 dp = self.deltaparent(rev)
3027 dp = self.deltaparent(rev)
3040 if dp != nullrev:
3028 if dp != nullrev:
3041 cachedelta = (dp, bytes(self._chunk(rev)))
3029 cachedelta = (dp, bytes(self._chunk(rev)))
3042
3030
3043 sidedata = None
3031 sidedata = None
3044 if not cachedelta:
3032 if not cachedelta:
3045 rawtext = self._revisiondata(rev)
3033 rawtext = self._revisiondata(rev)
3046 sidedata = self.sidedata(rev)
3034 sidedata = self.sidedata(rev)
3047 if sidedata is None:
3035 if sidedata is None:
3048 sidedata = self.sidedata(rev)
3036 sidedata = self.sidedata(rev)
3049
3037
3050 if sidedata_helpers is not None:
3038 if sidedata_helpers is not None:
3051 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3039 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3052 self, sidedata_helpers, sidedata, rev
3040 self, sidedata_helpers, sidedata, rev
3053 )
3041 )
3054 flags = flags | new_flags[0] & ~new_flags[1]
3042 flags = flags | new_flags[0] & ~new_flags[1]
3055
3043
3056 with destrevlog._writing(tr):
3044 with destrevlog._writing(tr):
3057 destrevlog._addrevision(
3045 destrevlog._addrevision(
3058 node,
3046 node,
3059 rawtext,
3047 rawtext,
3060 tr,
3048 tr,
3061 linkrev,
3049 linkrev,
3062 p1,
3050 p1,
3063 p2,
3051 p2,
3064 flags,
3052 flags,
3065 cachedelta,
3053 cachedelta,
3066 deltacomputer=deltacomputer,
3054 deltacomputer=deltacomputer,
3067 sidedata=sidedata,
3055 sidedata=sidedata,
3068 )
3056 )
3069
3057
3070 if addrevisioncb:
3058 if addrevisioncb:
3071 addrevisioncb(self, rev, node)
3059 addrevisioncb(self, rev, node)
3072
3060
3073 def censorrevision(self, tr, censornode, tombstone=b''):
3061 def censorrevision(self, tr, censornode, tombstone=b''):
3074 if self._format_version == REVLOGV0:
3062 if self._format_version == REVLOGV0:
3075 raise error.RevlogError(
3063 raise error.RevlogError(
3076 _(b'cannot censor with version %d revlogs')
3064 _(b'cannot censor with version %d revlogs')
3077 % self._format_version
3065 % self._format_version
3078 )
3066 )
3079 elif self._format_version == REVLOGV1:
3067 elif self._format_version == REVLOGV1:
3080 censor.v1_censor(self, tr, censornode, tombstone)
3068 censor.v1_censor(self, tr, censornode, tombstone)
3081 else:
3069 else:
3082 # revlog v2
3070 # revlog v2
3083 raise error.RevlogError(
3071 raise error.RevlogError(
3084 _(b'cannot censor with version %d revlogs')
3072 _(b'cannot censor with version %d revlogs')
3085 % self._format_version
3073 % self._format_version
3086 )
3074 )
3087
3075
3088 def verifyintegrity(self, state):
3076 def verifyintegrity(self, state):
3089 """Verifies the integrity of the revlog.
3077 """Verifies the integrity of the revlog.
3090
3078
3091 Yields ``revlogproblem`` instances describing problems that are
3079 Yields ``revlogproblem`` instances describing problems that are
3092 found.
3080 found.
3093 """
3081 """
3094 dd, di = self.checksize()
3082 dd, di = self.checksize()
3095 if dd:
3083 if dd:
3096 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3084 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3097 if di:
3085 if di:
3098 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3086 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3099
3087
3100 version = self._format_version
3088 version = self._format_version
3101
3089
3102 # The verifier tells us what version revlog we should be.
3090 # The verifier tells us what version revlog we should be.
3103 if version != state[b'expectedversion']:
3091 if version != state[b'expectedversion']:
3104 yield revlogproblem(
3092 yield revlogproblem(
3105 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3093 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3106 % (self.display_id, version, state[b'expectedversion'])
3094 % (self.display_id, version, state[b'expectedversion'])
3107 )
3095 )
3108
3096
3109 state[b'skipread'] = set()
3097 state[b'skipread'] = set()
3110 state[b'safe_renamed'] = set()
3098 state[b'safe_renamed'] = set()
3111
3099
3112 for rev in self:
3100 for rev in self:
3113 node = self.node(rev)
3101 node = self.node(rev)
3114
3102
3115 # Verify contents. 4 cases to care about:
3103 # Verify contents. 4 cases to care about:
3116 #
3104 #
3117 # common: the most common case
3105 # common: the most common case
3118 # rename: with a rename
3106 # rename: with a rename
3119 # meta: file content starts with b'\1\n', the metadata
3107 # meta: file content starts with b'\1\n', the metadata
3120 # header defined in filelog.py, but without a rename
3108 # header defined in filelog.py, but without a rename
3121 # ext: content stored externally
3109 # ext: content stored externally
3122 #
3110 #
3123 # More formally, their differences are shown below:
3111 # More formally, their differences are shown below:
3124 #
3112 #
3125 # | common | rename | meta | ext
3113 # | common | rename | meta | ext
3126 # -------------------------------------------------------
3114 # -------------------------------------------------------
3127 # flags() | 0 | 0 | 0 | not 0
3115 # flags() | 0 | 0 | 0 | not 0
3128 # renamed() | False | True | False | ?
3116 # renamed() | False | True | False | ?
3129 # rawtext[0:2]=='\1\n'| False | True | True | ?
3117 # rawtext[0:2]=='\1\n'| False | True | True | ?
3130 #
3118 #
3131 # "rawtext" means the raw text stored in revlog data, which
3119 # "rawtext" means the raw text stored in revlog data, which
3132 # could be retrieved by "rawdata(rev)". "text"
3120 # could be retrieved by "rawdata(rev)". "text"
3133 # mentioned below is "revision(rev)".
3121 # mentioned below is "revision(rev)".
3134 #
3122 #
3135 # There are 3 different lengths stored physically:
3123 # There are 3 different lengths stored physically:
3136 # 1. L1: rawsize, stored in revlog index
3124 # 1. L1: rawsize, stored in revlog index
3137 # 2. L2: len(rawtext), stored in revlog data
3125 # 2. L2: len(rawtext), stored in revlog data
3138 # 3. L3: len(text), stored in revlog data if flags==0, or
3126 # 3. L3: len(text), stored in revlog data if flags==0, or
3139 # possibly somewhere else if flags!=0
3127 # possibly somewhere else if flags!=0
3140 #
3128 #
3141 # L1 should be equal to L2. L3 could be different from them.
3129 # L1 should be equal to L2. L3 could be different from them.
3142 # "text" may or may not affect commit hash depending on flag
3130 # "text" may or may not affect commit hash depending on flag
3143 # processors (see flagutil.addflagprocessor).
3131 # processors (see flagutil.addflagprocessor).
3144 #
3132 #
3145 # | common | rename | meta | ext
3133 # | common | rename | meta | ext
3146 # -------------------------------------------------
3134 # -------------------------------------------------
3147 # rawsize() | L1 | L1 | L1 | L1
3135 # rawsize() | L1 | L1 | L1 | L1
3148 # size() | L1 | L2-LM | L1(*) | L1 (?)
3136 # size() | L1 | L2-LM | L1(*) | L1 (?)
3149 # len(rawtext) | L2 | L2 | L2 | L2
3137 # len(rawtext) | L2 | L2 | L2 | L2
3150 # len(text) | L2 | L2 | L2 | L3
3138 # len(text) | L2 | L2 | L2 | L3
3151 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3139 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3152 #
3140 #
3153 # LM: length of metadata, depending on rawtext
3141 # LM: length of metadata, depending on rawtext
3154 # (*): not ideal, see comment in filelog.size
3142 # (*): not ideal, see comment in filelog.size
3155 # (?): could be "- len(meta)" if the resolved content has
3143 # (?): could be "- len(meta)" if the resolved content has
3156 # rename metadata
3144 # rename metadata
3157 #
3145 #
3158 # Checks needed to be done:
3146 # Checks needed to be done:
3159 # 1. length check: L1 == L2, in all cases.
3147 # 1. length check: L1 == L2, in all cases.
3160 # 2. hash check: depending on flag processor, we may need to
3148 # 2. hash check: depending on flag processor, we may need to
3161 # use either "text" (external), or "rawtext" (in revlog).
3149 # use either "text" (external), or "rawtext" (in revlog).
3162
3150
3163 try:
3151 try:
3164 skipflags = state.get(b'skipflags', 0)
3152 skipflags = state.get(b'skipflags', 0)
3165 if skipflags:
3153 if skipflags:
3166 skipflags &= self.flags(rev)
3154 skipflags &= self.flags(rev)
3167
3155
3168 _verify_revision(self, skipflags, state, node)
3156 _verify_revision(self, skipflags, state, node)
3169
3157
3170 l1 = self.rawsize(rev)
3158 l1 = self.rawsize(rev)
3171 l2 = len(self.rawdata(node))
3159 l2 = len(self.rawdata(node))
3172
3160
3173 if l1 != l2:
3161 if l1 != l2:
3174 yield revlogproblem(
3162 yield revlogproblem(
3175 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3163 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3176 node=node,
3164 node=node,
3177 )
3165 )
3178
3166
3179 except error.CensoredNodeError:
3167 except error.CensoredNodeError:
3180 if state[b'erroroncensored']:
3168 if state[b'erroroncensored']:
3181 yield revlogproblem(
3169 yield revlogproblem(
3182 error=_(b'censored file data'), node=node
3170 error=_(b'censored file data'), node=node
3183 )
3171 )
3184 state[b'skipread'].add(node)
3172 state[b'skipread'].add(node)
3185 except Exception as e:
3173 except Exception as e:
3186 yield revlogproblem(
3174 yield revlogproblem(
3187 error=_(b'unpacking %s: %s')
3175 error=_(b'unpacking %s: %s')
3188 % (short(node), stringutil.forcebytestr(e)),
3176 % (short(node), stringutil.forcebytestr(e)),
3189 node=node,
3177 node=node,
3190 )
3178 )
3191 state[b'skipread'].add(node)
3179 state[b'skipread'].add(node)
3192
3180
3193 def storageinfo(
3181 def storageinfo(
3194 self,
3182 self,
3195 exclusivefiles=False,
3183 exclusivefiles=False,
3196 sharedfiles=False,
3184 sharedfiles=False,
3197 revisionscount=False,
3185 revisionscount=False,
3198 trackedsize=False,
3186 trackedsize=False,
3199 storedsize=False,
3187 storedsize=False,
3200 ):
3188 ):
3201 d = {}
3189 d = {}
3202
3190
3203 if exclusivefiles:
3191 if exclusivefiles:
3204 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3192 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3205 if not self._inline:
3193 if not self._inline:
3206 d[b'exclusivefiles'].append((self.opener, self._datafile))
3194 d[b'exclusivefiles'].append((self.opener, self._datafile))
3207
3195
3208 if sharedfiles:
3196 if sharedfiles:
3209 d[b'sharedfiles'] = []
3197 d[b'sharedfiles'] = []
3210
3198
3211 if revisionscount:
3199 if revisionscount:
3212 d[b'revisionscount'] = len(self)
3200 d[b'revisionscount'] = len(self)
3213
3201
3214 if trackedsize:
3202 if trackedsize:
3215 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3203 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3216
3204
3217 if storedsize:
3205 if storedsize:
3218 d[b'storedsize'] = sum(
3206 d[b'storedsize'] = sum(
3219 self.opener.stat(path).st_size for path in self.files()
3207 self.opener.stat(path).st_size for path in self.files()
3220 )
3208 )
3221
3209
3222 return d
3210 return d
3223
3211
3224 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3212 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3225 if not self.hassidedata:
3213 if not self.hassidedata:
3226 return
3214 return
3227 # revlog formats with sidedata support does not support inline
3215 # revlog formats with sidedata support does not support inline
3228 assert not self._inline
3216 assert not self._inline
3229 if not helpers[1] and not helpers[2]:
3217 if not helpers[1] and not helpers[2]:
3230 # Nothing to generate or remove
3218 # Nothing to generate or remove
3231 return
3219 return
3232
3220
3233 new_entries = []
3221 new_entries = []
3234 # append the new sidedata
3222 # append the new sidedata
3235 with self._writing(transaction):
3223 with self._writing(transaction):
3236 ifh, dfh, sdfh = self._writinghandles
3224 ifh, dfh, sdfh = self._writinghandles
3237 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3225 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3238
3226
3239 current_offset = sdfh.tell()
3227 current_offset = sdfh.tell()
3240 for rev in range(startrev, endrev + 1):
3228 for rev in range(startrev, endrev + 1):
3241 entry = self.index[rev]
3229 entry = self.index[rev]
3242 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3230 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3243 store=self,
3231 store=self,
3244 sidedata_helpers=helpers,
3232 sidedata_helpers=helpers,
3245 sidedata={},
3233 sidedata={},
3246 rev=rev,
3234 rev=rev,
3247 )
3235 )
3248
3236
3249 serialized_sidedata = sidedatautil.serialize_sidedata(
3237 serialized_sidedata = sidedatautil.serialize_sidedata(
3250 new_sidedata
3238 new_sidedata
3251 )
3239 )
3252
3240
3253 sidedata_compression_mode = COMP_MODE_INLINE
3241 sidedata_compression_mode = COMP_MODE_INLINE
3254 if serialized_sidedata and self.hassidedata:
3242 if serialized_sidedata and self.hassidedata:
3255 sidedata_compression_mode = COMP_MODE_PLAIN
3243 sidedata_compression_mode = COMP_MODE_PLAIN
3256 h, comp_sidedata = self.compress(serialized_sidedata)
3244 h, comp_sidedata = self.compress(serialized_sidedata)
3257 if (
3245 if (
3258 h != b'u'
3246 h != b'u'
3259 and comp_sidedata[0] != b'\0'
3247 and comp_sidedata[0] != b'\0'
3260 and len(comp_sidedata) < len(serialized_sidedata)
3248 and len(comp_sidedata) < len(serialized_sidedata)
3261 ):
3249 ):
3262 assert not h
3250 assert not h
3263 if (
3251 if (
3264 comp_sidedata[0]
3252 comp_sidedata[0]
3265 == self._docket.default_compression_header
3253 == self._docket.default_compression_header
3266 ):
3254 ):
3267 sidedata_compression_mode = COMP_MODE_DEFAULT
3255 sidedata_compression_mode = COMP_MODE_DEFAULT
3268 serialized_sidedata = comp_sidedata
3256 serialized_sidedata = comp_sidedata
3269 else:
3257 else:
3270 sidedata_compression_mode = COMP_MODE_INLINE
3258 sidedata_compression_mode = COMP_MODE_INLINE
3271 serialized_sidedata = comp_sidedata
3259 serialized_sidedata = comp_sidedata
3272 if entry[8] != 0 or entry[9] != 0:
3260 if entry[8] != 0 or entry[9] != 0:
3273 # rewriting entries that already have sidedata is not
3261 # rewriting entries that already have sidedata is not
3274 # supported yet, because it introduces garbage data in the
3262 # supported yet, because it introduces garbage data in the
3275 # revlog.
3263 # revlog.
3276 msg = b"rewriting existing sidedata is not supported yet"
3264 msg = b"rewriting existing sidedata is not supported yet"
3277 raise error.Abort(msg)
3265 raise error.Abort(msg)
3278
3266
3279 # Apply (potential) flags to add and to remove after running
3267 # Apply (potential) flags to add and to remove after running
3280 # the sidedata helpers
3268 # the sidedata helpers
3281 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3269 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3282 entry_update = (
3270 entry_update = (
3283 current_offset,
3271 current_offset,
3284 len(serialized_sidedata),
3272 len(serialized_sidedata),
3285 new_offset_flags,
3273 new_offset_flags,
3286 sidedata_compression_mode,
3274 sidedata_compression_mode,
3287 )
3275 )
3288
3276
3289 # the sidedata computation might have move the file cursors around
3277 # the sidedata computation might have move the file cursors around
3290 sdfh.seek(current_offset, os.SEEK_SET)
3278 sdfh.seek(current_offset, os.SEEK_SET)
3291 sdfh.write(serialized_sidedata)
3279 sdfh.write(serialized_sidedata)
3292 new_entries.append(entry_update)
3280 new_entries.append(entry_update)
3293 current_offset += len(serialized_sidedata)
3281 current_offset += len(serialized_sidedata)
3294 self._docket.sidedata_end = sdfh.tell()
3282 self._docket.sidedata_end = sdfh.tell()
3295
3283
3296 # rewrite the new index entries
3284 # rewrite the new index entries
3297 ifh.seek(startrev * self.index.entry_size)
3285 ifh.seek(startrev * self.index.entry_size)
3298 for i, e in enumerate(new_entries):
3286 for i, e in enumerate(new_entries):
3299 rev = startrev + i
3287 rev = startrev + i
3300 self.index.replace_sidedata_info(rev, *e)
3288 self.index.replace_sidedata_info(rev, *e)
3301 packed = self.index.entry_binary(rev)
3289 packed = self.index.entry_binary(rev)
3302 if rev == 0 and self._docket is None:
3290 if rev == 0 and self._docket is None:
3303 header = self._format_flags | self._format_version
3291 header = self._format_flags | self._format_version
3304 header = self.index.pack_header(header)
3292 header = self.index.pack_header(header)
3305 packed = header + packed
3293 packed = header + packed
3306 ifh.write(packed)
3294 ifh.write(packed)
@@ -1,1115 +1,1143 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18 from ..pycompat import getattr
18 from ..pycompat import getattr
19
19
20 from .constants import (
20 from .constants import (
21 COMP_MODE_DEFAULT,
22 COMP_MODE_INLINE,
23 COMP_MODE_PLAIN,
21 REVIDX_ISCENSORED,
24 REVIDX_ISCENSORED,
22 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 )
26 )
24
27
25 from ..thirdparty import attr
28 from ..thirdparty import attr
26
29
27 from .. import (
30 from .. import (
28 error,
31 error,
29 mdiff,
32 mdiff,
30 util,
33 util,
31 )
34 )
32
35
33 from . import flagutil
36 from . import flagutil
34
37
35 # maximum <delta-chain-data>/<revision-text-length> ratio
38 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
39 LIMIT_DELTA2TEXT = 2
37
40
38
41
39 class _testrevlog(object):
42 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
43 """minimalist fake revlog to use in doctests"""
41
44
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
45 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 """data is an list of revision payload boundaries"""
46 """data is an list of revision payload boundaries"""
44 self._data = data
47 self._data = data
45 self._srdensitythreshold = density
48 self._srdensitythreshold = density
46 self._srmingapsize = mingap
49 self._srmingapsize = mingap
47 self._snapshot = set(snapshot)
50 self._snapshot = set(snapshot)
48 self.index = None
51 self.index = None
49
52
50 def start(self, rev):
53 def start(self, rev):
51 if rev == nullrev:
54 if rev == nullrev:
52 return 0
55 return 0
53 if rev == 0:
56 if rev == 0:
54 return 0
57 return 0
55 return self._data[rev - 1]
58 return self._data[rev - 1]
56
59
57 def end(self, rev):
60 def end(self, rev):
58 if rev == nullrev:
61 if rev == nullrev:
59 return 0
62 return 0
60 return self._data[rev]
63 return self._data[rev]
61
64
62 def length(self, rev):
65 def length(self, rev):
63 return self.end(rev) - self.start(rev)
66 return self.end(rev) - self.start(rev)
64
67
65 def __len__(self):
68 def __len__(self):
66 return len(self._data)
69 return len(self._data)
67
70
68 def issnapshot(self, rev):
71 def issnapshot(self, rev):
69 if rev == nullrev:
72 if rev == nullrev:
70 return True
73 return True
71 return rev in self._snapshot
74 return rev in self._snapshot
72
75
73
76
74 def slicechunk(revlog, revs, targetsize=None):
77 def slicechunk(revlog, revs, targetsize=None):
75 """slice revs to reduce the amount of unrelated data to be read from disk.
78 """slice revs to reduce the amount of unrelated data to be read from disk.
76
79
77 ``revs`` is sliced into groups that should be read in one time.
80 ``revs`` is sliced into groups that should be read in one time.
78 Assume that revs are sorted.
81 Assume that revs are sorted.
79
82
80 The initial chunk is sliced until the overall density (payload/chunks-span
83 The initial chunk is sliced until the overall density (payload/chunks-span
81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
84 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
82 `revlog._srmingapsize` is skipped.
85 `revlog._srmingapsize` is skipped.
83
86
84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
87 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
85 For consistency with other slicing choice, this limit won't go lower than
88 For consistency with other slicing choice, this limit won't go lower than
86 `revlog._srmingapsize`.
89 `revlog._srmingapsize`.
87
90
88 If individual revisions chunk are larger than this limit, they will still
91 If individual revisions chunk are larger than this limit, they will still
89 be raised individually.
92 be raised individually.
90
93
91 >>> data = [
94 >>> data = [
92 ... 5, #00 (5)
95 ... 5, #00 (5)
93 ... 10, #01 (5)
96 ... 10, #01 (5)
94 ... 12, #02 (2)
97 ... 12, #02 (2)
95 ... 12, #03 (empty)
98 ... 12, #03 (empty)
96 ... 27, #04 (15)
99 ... 27, #04 (15)
97 ... 31, #05 (4)
100 ... 31, #05 (4)
98 ... 31, #06 (empty)
101 ... 31, #06 (empty)
99 ... 42, #07 (11)
102 ... 42, #07 (11)
100 ... 47, #08 (5)
103 ... 47, #08 (5)
101 ... 47, #09 (empty)
104 ... 47, #09 (empty)
102 ... 48, #10 (1)
105 ... 48, #10 (1)
103 ... 51, #11 (3)
106 ... 51, #11 (3)
104 ... 74, #12 (23)
107 ... 74, #12 (23)
105 ... 85, #13 (11)
108 ... 85, #13 (11)
106 ... 86, #14 (1)
109 ... 86, #14 (1)
107 ... 91, #15 (5)
110 ... 91, #15 (5)
108 ... ]
111 ... ]
109 >>> revlog = _testrevlog(data, snapshot=range(16))
112 >>> revlog = _testrevlog(data, snapshot=range(16))
110
113
111 >>> list(slicechunk(revlog, list(range(16))))
114 >>> list(slicechunk(revlog, list(range(16))))
112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
115 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
113 >>> list(slicechunk(revlog, [0, 15]))
116 >>> list(slicechunk(revlog, [0, 15]))
114 [[0], [15]]
117 [[0], [15]]
115 >>> list(slicechunk(revlog, [0, 11, 15]))
118 >>> list(slicechunk(revlog, [0, 11, 15]))
116 [[0], [11], [15]]
119 [[0], [11], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
120 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
118 [[0], [11, 13, 15]]
121 [[0], [11, 13, 15]]
119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
122 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
120 [[1, 2], [5, 8, 10, 11], [14]]
123 [[1, 2], [5, 8, 10, 11], [14]]
121
124
122 Slicing with a maximum chunk size
125 Slicing with a maximum chunk size
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
126 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
124 [[0], [11], [13], [15]]
127 [[0], [11], [13], [15]]
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
126 [[0], [11], [13, 15]]
129 [[0], [11], [13, 15]]
127
130
128 Slicing involving nullrev
131 Slicing involving nullrev
129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
130 [[-1, 0], [11], [13, 15]]
133 [[-1, 0], [11], [13, 15]]
131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
134 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
132 [[-1], [13], [15]]
135 [[-1], [13], [15]]
133 """
136 """
134 if targetsize is not None:
137 if targetsize is not None:
135 targetsize = max(targetsize, revlog._srmingapsize)
138 targetsize = max(targetsize, revlog._srmingapsize)
136 # targetsize should not be specified when evaluating delta candidates:
139 # targetsize should not be specified when evaluating delta candidates:
137 # * targetsize is used to ensure we stay within specification when reading,
140 # * targetsize is used to ensure we stay within specification when reading,
138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
141 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
139 if densityslicing is None:
142 if densityslicing is None:
140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
143 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
141 for chunk in densityslicing(
144 for chunk in densityslicing(
142 revs, revlog._srdensitythreshold, revlog._srmingapsize
145 revs, revlog._srdensitythreshold, revlog._srmingapsize
143 ):
146 ):
144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
147 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
145 yield subchunk
148 yield subchunk
146
149
147
150
148 def _slicechunktosize(revlog, revs, targetsize=None):
151 def _slicechunktosize(revlog, revs, targetsize=None):
149 """slice revs to match the target size
152 """slice revs to match the target size
150
153
151 This is intended to be used on chunk that density slicing selected by that
154 This is intended to be used on chunk that density slicing selected by that
152 are still too large compared to the read garantee of revlog. This might
155 are still too large compared to the read garantee of revlog. This might
153 happens when "minimal gap size" interrupted the slicing or when chain are
156 happens when "minimal gap size" interrupted the slicing or when chain are
154 built in a way that create large blocks next to each other.
157 built in a way that create large blocks next to each other.
155
158
156 >>> data = [
159 >>> data = [
157 ... 3, #0 (3)
160 ... 3, #0 (3)
158 ... 5, #1 (2)
161 ... 5, #1 (2)
159 ... 6, #2 (1)
162 ... 6, #2 (1)
160 ... 8, #3 (2)
163 ... 8, #3 (2)
161 ... 8, #4 (empty)
164 ... 8, #4 (empty)
162 ... 11, #5 (3)
165 ... 11, #5 (3)
163 ... 12, #6 (1)
166 ... 12, #6 (1)
164 ... 13, #7 (1)
167 ... 13, #7 (1)
165 ... 14, #8 (1)
168 ... 14, #8 (1)
166 ... ]
169 ... ]
167
170
168 == All snapshots cases ==
171 == All snapshots cases ==
169 >>> revlog = _testrevlog(data, snapshot=range(9))
172 >>> revlog = _testrevlog(data, snapshot=range(9))
170
173
171 Cases where chunk is already small enough
174 Cases where chunk is already small enough
172 >>> list(_slicechunktosize(revlog, [0], 3))
175 >>> list(_slicechunktosize(revlog, [0], 3))
173 [[0]]
176 [[0]]
174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
177 >>> list(_slicechunktosize(revlog, [6, 7], 3))
175 [[6, 7]]
178 [[6, 7]]
176 >>> list(_slicechunktosize(revlog, [0], None))
179 >>> list(_slicechunktosize(revlog, [0], None))
177 [[0]]
180 [[0]]
178 >>> list(_slicechunktosize(revlog, [6, 7], None))
181 >>> list(_slicechunktosize(revlog, [6, 7], None))
179 [[6, 7]]
182 [[6, 7]]
180
183
181 cases where we need actual slicing
184 cases where we need actual slicing
182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
185 >>> list(_slicechunktosize(revlog, [0, 1], 3))
183 [[0], [1]]
186 [[0], [1]]
184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
187 >>> list(_slicechunktosize(revlog, [1, 3], 3))
185 [[1], [3]]
188 [[1], [3]]
186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
189 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
187 [[1, 2], [3]]
190 [[1, 2], [3]]
188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
191 >>> list(_slicechunktosize(revlog, [3, 5], 3))
189 [[3], [5]]
192 [[3], [5]]
190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
193 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
191 [[3], [5]]
194 [[3], [5]]
192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
195 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
193 [[5], [6, 7, 8]]
196 [[5], [6, 7, 8]]
194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
197 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
195 [[0], [1, 2], [3], [5], [6, 7, 8]]
198 [[0], [1, 2], [3], [5], [6, 7, 8]]
196
199
197 Case with too large individual chunk (must return valid chunk)
200 Case with too large individual chunk (must return valid chunk)
198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
201 >>> list(_slicechunktosize(revlog, [0, 1], 2))
199 [[0], [1]]
202 [[0], [1]]
200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
203 >>> list(_slicechunktosize(revlog, [1, 3], 1))
201 [[1], [3]]
204 [[1], [3]]
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
205 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
203 [[3], [5]]
206 [[3], [5]]
204
207
205 == No Snapshot cases ==
208 == No Snapshot cases ==
206 >>> revlog = _testrevlog(data)
209 >>> revlog = _testrevlog(data)
207
210
208 Cases where chunk is already small enough
211 Cases where chunk is already small enough
209 >>> list(_slicechunktosize(revlog, [0], 3))
212 >>> list(_slicechunktosize(revlog, [0], 3))
210 [[0]]
213 [[0]]
211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
214 >>> list(_slicechunktosize(revlog, [6, 7], 3))
212 [[6, 7]]
215 [[6, 7]]
213 >>> list(_slicechunktosize(revlog, [0], None))
216 >>> list(_slicechunktosize(revlog, [0], None))
214 [[0]]
217 [[0]]
215 >>> list(_slicechunktosize(revlog, [6, 7], None))
218 >>> list(_slicechunktosize(revlog, [6, 7], None))
216 [[6, 7]]
219 [[6, 7]]
217
220
218 cases where we need actual slicing
221 cases where we need actual slicing
219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
222 >>> list(_slicechunktosize(revlog, [0, 1], 3))
220 [[0], [1]]
223 [[0], [1]]
221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
224 >>> list(_slicechunktosize(revlog, [1, 3], 3))
222 [[1], [3]]
225 [[1], [3]]
223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
226 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
224 [[1], [2, 3]]
227 [[1], [2, 3]]
225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
228 >>> list(_slicechunktosize(revlog, [3, 5], 3))
226 [[3], [5]]
229 [[3], [5]]
227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
230 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
228 [[3], [4, 5]]
231 [[3], [4, 5]]
229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
232 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
230 [[5], [6, 7, 8]]
233 [[5], [6, 7, 8]]
231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
234 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
232 [[0], [1, 2], [3], [5], [6, 7, 8]]
235 [[0], [1, 2], [3], [5], [6, 7, 8]]
233
236
234 Case with too large individual chunk (must return valid chunk)
237 Case with too large individual chunk (must return valid chunk)
235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
238 >>> list(_slicechunktosize(revlog, [0, 1], 2))
236 [[0], [1]]
239 [[0], [1]]
237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
240 >>> list(_slicechunktosize(revlog, [1, 3], 1))
238 [[1], [3]]
241 [[1], [3]]
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
242 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
240 [[3], [5]]
243 [[3], [5]]
241
244
242 == mixed case ==
245 == mixed case ==
243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
246 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
247 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
248 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
246 """
249 """
247 assert targetsize is None or 0 <= targetsize
250 assert targetsize is None or 0 <= targetsize
248 startdata = revlog.start(revs[0])
251 startdata = revlog.start(revs[0])
249 enddata = revlog.end(revs[-1])
252 enddata = revlog.end(revs[-1])
250 fullspan = enddata - startdata
253 fullspan = enddata - startdata
251 if targetsize is None or fullspan <= targetsize:
254 if targetsize is None or fullspan <= targetsize:
252 yield revs
255 yield revs
253 return
256 return
254
257
255 startrevidx = 0
258 startrevidx = 0
256 endrevidx = 1
259 endrevidx = 1
257 iterrevs = enumerate(revs)
260 iterrevs = enumerate(revs)
258 next(iterrevs) # skip first rev.
261 next(iterrevs) # skip first rev.
259 # first step: get snapshots out of the way
262 # first step: get snapshots out of the way
260 for idx, r in iterrevs:
263 for idx, r in iterrevs:
261 span = revlog.end(r) - startdata
264 span = revlog.end(r) - startdata
262 snapshot = revlog.issnapshot(r)
265 snapshot = revlog.issnapshot(r)
263 if span <= targetsize and snapshot:
266 if span <= targetsize and snapshot:
264 endrevidx = idx + 1
267 endrevidx = idx + 1
265 else:
268 else:
266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
269 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
267 if chunk:
270 if chunk:
268 yield chunk
271 yield chunk
269 startrevidx = idx
272 startrevidx = idx
270 startdata = revlog.start(r)
273 startdata = revlog.start(r)
271 endrevidx = idx + 1
274 endrevidx = idx + 1
272 if not snapshot:
275 if not snapshot:
273 break
276 break
274
277
275 # for the others, we use binary slicing to quickly converge toward valid
278 # for the others, we use binary slicing to quickly converge toward valid
276 # chunks (otherwise, we might end up looking for start/end of many
279 # chunks (otherwise, we might end up looking for start/end of many
277 # revisions). This logic is not looking for the perfect slicing point, it
280 # revisions). This logic is not looking for the perfect slicing point, it
278 # focuses on quickly converging toward valid chunks.
281 # focuses on quickly converging toward valid chunks.
279 nbitem = len(revs)
282 nbitem = len(revs)
280 while (enddata - startdata) > targetsize:
283 while (enddata - startdata) > targetsize:
281 endrevidx = nbitem
284 endrevidx = nbitem
282 if nbitem - startrevidx <= 1:
285 if nbitem - startrevidx <= 1:
283 break # protect against individual chunk larger than limit
286 break # protect against individual chunk larger than limit
284 localenddata = revlog.end(revs[endrevidx - 1])
287 localenddata = revlog.end(revs[endrevidx - 1])
285 span = localenddata - startdata
288 span = localenddata - startdata
286 while span > targetsize:
289 while span > targetsize:
287 if endrevidx - startrevidx <= 1:
290 if endrevidx - startrevidx <= 1:
288 break # protect against individual chunk larger than limit
291 break # protect against individual chunk larger than limit
289 endrevidx -= (endrevidx - startrevidx) // 2
292 endrevidx -= (endrevidx - startrevidx) // 2
290 localenddata = revlog.end(revs[endrevidx - 1])
293 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
294 span = localenddata - startdata
292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
295 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
293 if chunk:
296 if chunk:
294 yield chunk
297 yield chunk
295 startrevidx = endrevidx
298 startrevidx = endrevidx
296 startdata = revlog.start(revs[startrevidx])
299 startdata = revlog.start(revs[startrevidx])
297
300
298 chunk = _trimchunk(revlog, revs, startrevidx)
301 chunk = _trimchunk(revlog, revs, startrevidx)
299 if chunk:
302 if chunk:
300 yield chunk
303 yield chunk
301
304
302
305
303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
306 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
304 """slice revs to reduce the amount of unrelated data to be read from disk.
307 """slice revs to reduce the amount of unrelated data to be read from disk.
305
308
306 ``revs`` is sliced into groups that should be read in one time.
309 ``revs`` is sliced into groups that should be read in one time.
307 Assume that revs are sorted.
310 Assume that revs are sorted.
308
311
309 The initial chunk is sliced until the overall density (payload/chunks-span
312 The initial chunk is sliced until the overall density (payload/chunks-span
310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
313 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
311 skipped.
314 skipped.
312
315
313 >>> revlog = _testrevlog([
316 >>> revlog = _testrevlog([
314 ... 5, #00 (5)
317 ... 5, #00 (5)
315 ... 10, #01 (5)
318 ... 10, #01 (5)
316 ... 12, #02 (2)
319 ... 12, #02 (2)
317 ... 12, #03 (empty)
320 ... 12, #03 (empty)
318 ... 27, #04 (15)
321 ... 27, #04 (15)
319 ... 31, #05 (4)
322 ... 31, #05 (4)
320 ... 31, #06 (empty)
323 ... 31, #06 (empty)
321 ... 42, #07 (11)
324 ... 42, #07 (11)
322 ... 47, #08 (5)
325 ... 47, #08 (5)
323 ... 47, #09 (empty)
326 ... 47, #09 (empty)
324 ... 48, #10 (1)
327 ... 48, #10 (1)
325 ... 51, #11 (3)
328 ... 51, #11 (3)
326 ... 74, #12 (23)
329 ... 74, #12 (23)
327 ... 85, #13 (11)
330 ... 85, #13 (11)
328 ... 86, #14 (1)
331 ... 86, #14 (1)
329 ... 91, #15 (5)
332 ... 91, #15 (5)
330 ... ])
333 ... ])
331
334
332 >>> list(_slicechunktodensity(revlog, list(range(16))))
335 >>> list(_slicechunktodensity(revlog, list(range(16))))
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
336 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 >>> list(_slicechunktodensity(revlog, [0, 15]))
337 >>> list(_slicechunktodensity(revlog, [0, 15]))
335 [[0], [15]]
338 [[0], [15]]
336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
339 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
337 [[0], [11], [15]]
340 [[0], [11], [15]]
338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
339 [[0], [11, 13, 15]]
342 [[0], [11, 13, 15]]
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 [[1, 2], [5, 8, 10, 11], [14]]
344 [[1, 2], [5, 8, 10, 11], [14]]
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 ... mingapsize=20))
346 ... mingapsize=20))
344 [[1, 2, 3, 5, 8, 10, 11], [14]]
347 [[1, 2, 3, 5, 8, 10, 11], [14]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 ... targetdensity=0.95))
349 ... targetdensity=0.95))
347 [[1, 2], [5], [8, 10, 11], [14]]
350 [[1, 2], [5], [8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... targetdensity=0.95, mingapsize=12))
352 ... targetdensity=0.95, mingapsize=12))
350 [[1, 2], [5, 8, 10, 11], [14]]
353 [[1, 2], [5, 8, 10, 11], [14]]
351 """
354 """
352 start = revlog.start
355 start = revlog.start
353 length = revlog.length
356 length = revlog.length
354
357
355 if len(revs) <= 1:
358 if len(revs) <= 1:
356 yield revs
359 yield revs
357 return
360 return
358
361
359 deltachainspan = segmentspan(revlog, revs)
362 deltachainspan = segmentspan(revlog, revs)
360
363
361 if deltachainspan < mingapsize:
364 if deltachainspan < mingapsize:
362 yield revs
365 yield revs
363 return
366 return
364
367
365 readdata = deltachainspan
368 readdata = deltachainspan
366 chainpayload = sum(length(r) for r in revs)
369 chainpayload = sum(length(r) for r in revs)
367
370
368 if deltachainspan:
371 if deltachainspan:
369 density = chainpayload / float(deltachainspan)
372 density = chainpayload / float(deltachainspan)
370 else:
373 else:
371 density = 1.0
374 density = 1.0
372
375
373 if density >= targetdensity:
376 if density >= targetdensity:
374 yield revs
377 yield revs
375 return
378 return
376
379
377 # Store the gaps in a heap to have them sorted by decreasing size
380 # Store the gaps in a heap to have them sorted by decreasing size
378 gaps = []
381 gaps = []
379 prevend = None
382 prevend = None
380 for i, rev in enumerate(revs):
383 for i, rev in enumerate(revs):
381 revstart = start(rev)
384 revstart = start(rev)
382 revlen = length(rev)
385 revlen = length(rev)
383
386
384 # Skip empty revisions to form larger holes
387 # Skip empty revisions to form larger holes
385 if revlen == 0:
388 if revlen == 0:
386 continue
389 continue
387
390
388 if prevend is not None:
391 if prevend is not None:
389 gapsize = revstart - prevend
392 gapsize = revstart - prevend
390 # only consider holes that are large enough
393 # only consider holes that are large enough
391 if gapsize > mingapsize:
394 if gapsize > mingapsize:
392 gaps.append((gapsize, i))
395 gaps.append((gapsize, i))
393
396
394 prevend = revstart + revlen
397 prevend = revstart + revlen
395 # sort the gaps to pop them from largest to small
398 # sort the gaps to pop them from largest to small
396 gaps.sort()
399 gaps.sort()
397
400
398 # Collect the indices of the largest holes until the density is acceptable
401 # Collect the indices of the largest holes until the density is acceptable
399 selected = []
402 selected = []
400 while gaps and density < targetdensity:
403 while gaps and density < targetdensity:
401 gapsize, gapidx = gaps.pop()
404 gapsize, gapidx = gaps.pop()
402
405
403 selected.append(gapidx)
406 selected.append(gapidx)
404
407
405 # the gap sizes are stored as negatives to be sorted decreasingly
408 # the gap sizes are stored as negatives to be sorted decreasingly
406 # by the heap
409 # by the heap
407 readdata -= gapsize
410 readdata -= gapsize
408 if readdata > 0:
411 if readdata > 0:
409 density = chainpayload / float(readdata)
412 density = chainpayload / float(readdata)
410 else:
413 else:
411 density = 1.0
414 density = 1.0
412 selected.sort()
415 selected.sort()
413
416
414 # Cut the revs at collected indices
417 # Cut the revs at collected indices
415 previdx = 0
418 previdx = 0
416 for idx in selected:
419 for idx in selected:
417
420
418 chunk = _trimchunk(revlog, revs, previdx, idx)
421 chunk = _trimchunk(revlog, revs, previdx, idx)
419 if chunk:
422 if chunk:
420 yield chunk
423 yield chunk
421
424
422 previdx = idx
425 previdx = idx
423
426
424 chunk = _trimchunk(revlog, revs, previdx)
427 chunk = _trimchunk(revlog, revs, previdx)
425 if chunk:
428 if chunk:
426 yield chunk
429 yield chunk
427
430
428
431
429 def _trimchunk(revlog, revs, startidx, endidx=None):
432 def _trimchunk(revlog, revs, startidx, endidx=None):
430 """returns revs[startidx:endidx] without empty trailing revs
433 """returns revs[startidx:endidx] without empty trailing revs
431
434
432 Doctest Setup
435 Doctest Setup
433 >>> revlog = _testrevlog([
436 >>> revlog = _testrevlog([
434 ... 5, #0
437 ... 5, #0
435 ... 10, #1
438 ... 10, #1
436 ... 12, #2
439 ... 12, #2
437 ... 12, #3 (empty)
440 ... 12, #3 (empty)
438 ... 17, #4
441 ... 17, #4
439 ... 21, #5
442 ... 21, #5
440 ... 21, #6 (empty)
443 ... 21, #6 (empty)
441 ... ])
444 ... ])
442
445
443 Contiguous cases:
446 Contiguous cases:
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
445 [0, 1, 2, 3, 4, 5]
448 [0, 1, 2, 3, 4, 5]
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
447 [0, 1, 2, 3, 4]
450 [0, 1, 2, 3, 4]
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
449 [0, 1, 2]
452 [0, 1, 2]
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
451 [2]
454 [2]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
453 [3, 4, 5]
456 [3, 4, 5]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
455 [3, 4]
458 [3, 4]
456
459
457 Discontiguous cases:
460 Discontiguous cases:
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
459 [1, 3, 5]
462 [1, 3, 5]
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
461 [1]
464 [1]
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
463 [3, 5]
466 [3, 5]
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
465 [3, 5]
468 [3, 5]
466 """
469 """
467 length = revlog.length
470 length = revlog.length
468
471
469 if endidx is None:
472 if endidx is None:
470 endidx = len(revs)
473 endidx = len(revs)
471
474
472 # If we have a non-emtpy delta candidate, there are nothing to trim
475 # If we have a non-emtpy delta candidate, there are nothing to trim
473 if revs[endidx - 1] < len(revlog):
476 if revs[endidx - 1] < len(revlog):
474 # Trim empty revs at the end, except the very first revision of a chain
477 # Trim empty revs at the end, except the very first revision of a chain
475 while (
478 while (
476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
479 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
477 ):
480 ):
478 endidx -= 1
481 endidx -= 1
479
482
480 return revs[startidx:endidx]
483 return revs[startidx:endidx]
481
484
482
485
483 def segmentspan(revlog, revs):
486 def segmentspan(revlog, revs):
484 """Get the byte span of a segment of revisions
487 """Get the byte span of a segment of revisions
485
488
486 revs is a sorted array of revision numbers
489 revs is a sorted array of revision numbers
487
490
488 >>> revlog = _testrevlog([
491 >>> revlog = _testrevlog([
489 ... 5, #0
492 ... 5, #0
490 ... 10, #1
493 ... 10, #1
491 ... 12, #2
494 ... 12, #2
492 ... 12, #3 (empty)
495 ... 12, #3 (empty)
493 ... 17, #4
496 ... 17, #4
494 ... ])
497 ... ])
495
498
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
499 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
497 17
500 17
498 >>> segmentspan(revlog, [0, 4])
501 >>> segmentspan(revlog, [0, 4])
499 17
502 17
500 >>> segmentspan(revlog, [3, 4])
503 >>> segmentspan(revlog, [3, 4])
501 5
504 5
502 >>> segmentspan(revlog, [1, 2, 3,])
505 >>> segmentspan(revlog, [1, 2, 3,])
503 7
506 7
504 >>> segmentspan(revlog, [1, 3])
507 >>> segmentspan(revlog, [1, 3])
505 7
508 7
506 """
509 """
507 if not revs:
510 if not revs:
508 return 0
511 return 0
509 end = revlog.end(revs[-1])
512 end = revlog.end(revs[-1])
510 return end - revlog.start(revs[0])
513 return end - revlog.start(revs[0])
511
514
512
515
513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
516 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
514 """build full text from a (base, delta) pair and other metadata"""
517 """build full text from a (base, delta) pair and other metadata"""
515 # special case deltas which replace entire base; no need to decode
518 # special case deltas which replace entire base; no need to decode
516 # base revision. this neatly avoids censored bases, which throw when
519 # base revision. this neatly avoids censored bases, which throw when
517 # they're decoded.
520 # they're decoded.
518 hlen = struct.calcsize(b">lll")
521 hlen = struct.calcsize(b">lll")
519 if delta[:hlen] == mdiff.replacediffheader(
522 if delta[:hlen] == mdiff.replacediffheader(
520 revlog.rawsize(baserev), len(delta) - hlen
523 revlog.rawsize(baserev), len(delta) - hlen
521 ):
524 ):
522 fulltext = delta[hlen:]
525 fulltext = delta[hlen:]
523 else:
526 else:
524 # deltabase is rawtext before changed by flag processors, which is
527 # deltabase is rawtext before changed by flag processors, which is
525 # equivalent to non-raw text
528 # equivalent to non-raw text
526 basetext = revlog.revision(baserev, _df=fh, raw=False)
529 basetext = revlog.revision(baserev, _df=fh, raw=False)
527 fulltext = mdiff.patch(basetext, delta)
530 fulltext = mdiff.patch(basetext, delta)
528
531
529 try:
532 try:
530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
533 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
531 if validatehash:
534 if validatehash:
532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
535 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
533 if flags & REVIDX_ISCENSORED:
536 if flags & REVIDX_ISCENSORED:
534 raise error.StorageError(
537 raise error.StorageError(
535 _(b'node %s is not censored') % expectednode
538 _(b'node %s is not censored') % expectednode
536 )
539 )
537 except error.CensoredNodeError:
540 except error.CensoredNodeError:
538 # must pass the censored index flag to add censored revisions
541 # must pass the censored index flag to add censored revisions
539 if not flags & REVIDX_ISCENSORED:
542 if not flags & REVIDX_ISCENSORED:
540 raise
543 raise
541 return fulltext
544 return fulltext
542
545
543
546
544 @attr.s(slots=True, frozen=True)
547 @attr.s(slots=True, frozen=True)
545 class _deltainfo(object):
548 class _deltainfo(object):
546 distance = attr.ib()
549 distance = attr.ib()
547 deltalen = attr.ib()
550 deltalen = attr.ib()
548 data = attr.ib()
551 data = attr.ib()
549 base = attr.ib()
552 base = attr.ib()
550 chainbase = attr.ib()
553 chainbase = attr.ib()
551 chainlen = attr.ib()
554 chainlen = attr.ib()
552 compresseddeltalen = attr.ib()
555 compresseddeltalen = attr.ib()
553 snapshotdepth = attr.ib()
556 snapshotdepth = attr.ib()
554
557
555
558
556 def drop_u_compression(delta):
559 def drop_u_compression(delta):
557 """turn into a "u" (no-compression) into no-compression without header
560 """turn into a "u" (no-compression) into no-compression without header
558
561
559 This is useful for revlog format that has better compression method.
562 This is useful for revlog format that has better compression method.
560 """
563 """
561 assert delta.data[0] == b'u', delta.data[0]
564 assert delta.data[0] == b'u', delta.data[0]
562 return _deltainfo(
565 return _deltainfo(
563 delta.distance,
566 delta.distance,
564 delta.deltalen - 1,
567 delta.deltalen - 1,
565 (b'', delta.data[1]),
568 (b'', delta.data[1]),
566 delta.base,
569 delta.base,
567 delta.chainbase,
570 delta.chainbase,
568 delta.chainlen,
571 delta.chainlen,
569 delta.compresseddeltalen,
572 delta.compresseddeltalen,
570 delta.snapshotdepth,
573 delta.snapshotdepth,
571 )
574 )
572
575
573
576
574 def isgooddeltainfo(revlog, deltainfo, revinfo):
577 def isgooddeltainfo(revlog, deltainfo, revinfo):
575 """Returns True if the given delta is good. Good means that it is within
578 """Returns True if the given delta is good. Good means that it is within
576 the disk span, disk size, and chain length bounds that we know to be
579 the disk span, disk size, and chain length bounds that we know to be
577 performant."""
580 performant."""
578 if deltainfo is None:
581 if deltainfo is None:
579 return False
582 return False
580
583
581 # - 'deltainfo.distance' is the distance from the base revision --
584 # - 'deltainfo.distance' is the distance from the base revision --
582 # bounding it limits the amount of I/O we need to do.
585 # bounding it limits the amount of I/O we need to do.
583 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
586 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
584 # deltas we need to apply -- bounding it limits the amount of CPU
587 # deltas we need to apply -- bounding it limits the amount of CPU
585 # we consume.
588 # we consume.
586
589
587 textlen = revinfo.textlen
590 textlen = revinfo.textlen
588 defaultmax = textlen * 4
591 defaultmax = textlen * 4
589 maxdist = revlog._maxdeltachainspan
592 maxdist = revlog._maxdeltachainspan
590 if not maxdist:
593 if not maxdist:
591 maxdist = deltainfo.distance # ensure the conditional pass
594 maxdist = deltainfo.distance # ensure the conditional pass
592 maxdist = max(maxdist, defaultmax)
595 maxdist = max(maxdist, defaultmax)
593
596
594 # Bad delta from read span:
597 # Bad delta from read span:
595 #
598 #
596 # If the span of data read is larger than the maximum allowed.
599 # If the span of data read is larger than the maximum allowed.
597 #
600 #
598 # In the sparse-revlog case, we rely on the associated "sparse reading"
601 # In the sparse-revlog case, we rely on the associated "sparse reading"
599 # to avoid issue related to the span of data. In theory, it would be
602 # to avoid issue related to the span of data. In theory, it would be
600 # possible to build pathological revlog where delta pattern would lead
603 # possible to build pathological revlog where delta pattern would lead
601 # to too many reads. However, they do not happen in practice at all. So
604 # to too many reads. However, they do not happen in practice at all. So
602 # we skip the span check entirely.
605 # we skip the span check entirely.
603 if not revlog._sparserevlog and maxdist < deltainfo.distance:
606 if not revlog._sparserevlog and maxdist < deltainfo.distance:
604 return False
607 return False
605
608
606 # Bad delta from new delta size:
609 # Bad delta from new delta size:
607 #
610 #
608 # If the delta size is larger than the target text, storing the
611 # If the delta size is larger than the target text, storing the
609 # delta will be inefficient.
612 # delta will be inefficient.
610 if textlen < deltainfo.deltalen:
613 if textlen < deltainfo.deltalen:
611 return False
614 return False
612
615
613 # Bad delta from cumulated payload size:
616 # Bad delta from cumulated payload size:
614 #
617 #
615 # If the sum of delta get larger than K * target text length.
618 # If the sum of delta get larger than K * target text length.
616 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
619 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
617 return False
620 return False
618
621
619 # Bad delta from chain length:
622 # Bad delta from chain length:
620 #
623 #
621 # If the number of delta in the chain gets too high.
624 # If the number of delta in the chain gets too high.
622 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
625 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
623 return False
626 return False
624
627
625 # bad delta from intermediate snapshot size limit
628 # bad delta from intermediate snapshot size limit
626 #
629 #
627 # If an intermediate snapshot size is higher than the limit. The
630 # If an intermediate snapshot size is higher than the limit. The
628 # limit exist to prevent endless chain of intermediate delta to be
631 # limit exist to prevent endless chain of intermediate delta to be
629 # created.
632 # created.
630 if (
633 if (
631 deltainfo.snapshotdepth is not None
634 deltainfo.snapshotdepth is not None
632 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
635 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
633 ):
636 ):
634 return False
637 return False
635
638
636 # bad delta if new intermediate snapshot is larger than the previous
639 # bad delta if new intermediate snapshot is larger than the previous
637 # snapshot
640 # snapshot
638 if (
641 if (
639 deltainfo.snapshotdepth
642 deltainfo.snapshotdepth
640 and revlog.length(deltainfo.base) < deltainfo.deltalen
643 and revlog.length(deltainfo.base) < deltainfo.deltalen
641 ):
644 ):
642 return False
645 return False
643
646
644 return True
647 return True
645
648
646
649
647 # If a revision's full text is that much bigger than a base candidate full
650 # If a revision's full text is that much bigger than a base candidate full
648 # text's, it is very unlikely that it will produce a valid delta. We no longer
651 # text's, it is very unlikely that it will produce a valid delta. We no longer
649 # consider these candidates.
652 # consider these candidates.
650 LIMIT_BASE2TEXT = 500
653 LIMIT_BASE2TEXT = 500
651
654
652
655
653 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
656 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
654 """Provides group of revision to be tested as delta base
657 """Provides group of revision to be tested as delta base
655
658
656 This top level function focus on emitting groups with unique and worthwhile
659 This top level function focus on emitting groups with unique and worthwhile
657 content. See _raw_candidate_groups for details about the group order.
660 content. See _raw_candidate_groups for details about the group order.
658 """
661 """
659 # should we try to build a delta?
662 # should we try to build a delta?
660 if not (len(revlog) and revlog._storedeltachains):
663 if not (len(revlog) and revlog._storedeltachains):
661 yield None
664 yield None
662 return
665 return
663
666
664 deltalength = revlog.length
667 deltalength = revlog.length
665 deltaparent = revlog.deltaparent
668 deltaparent = revlog.deltaparent
666 sparse = revlog._sparserevlog
669 sparse = revlog._sparserevlog
667 good = None
670 good = None
668
671
669 deltas_limit = textlen * LIMIT_DELTA2TEXT
672 deltas_limit = textlen * LIMIT_DELTA2TEXT
670
673
671 tested = {nullrev}
674 tested = {nullrev}
672 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
675 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
673 while True:
676 while True:
674 temptative = candidates.send(good)
677 temptative = candidates.send(good)
675 if temptative is None:
678 if temptative is None:
676 break
679 break
677 group = []
680 group = []
678 for rev in temptative:
681 for rev in temptative:
679 # skip over empty delta (no need to include them in a chain)
682 # skip over empty delta (no need to include them in a chain)
680 while revlog._generaldelta and not (
683 while revlog._generaldelta and not (
681 rev == nullrev or rev in tested or deltalength(rev)
684 rev == nullrev or rev in tested or deltalength(rev)
682 ):
685 ):
683 tested.add(rev)
686 tested.add(rev)
684 rev = deltaparent(rev)
687 rev = deltaparent(rev)
685 # no need to try a delta against nullrev, this will be done as a
688 # no need to try a delta against nullrev, this will be done as a
686 # last resort.
689 # last resort.
687 if rev == nullrev:
690 if rev == nullrev:
688 continue
691 continue
689 # filter out revision we tested already
692 # filter out revision we tested already
690 if rev in tested:
693 if rev in tested:
691 continue
694 continue
692 tested.add(rev)
695 tested.add(rev)
693 # filter out delta base that will never produce good delta
696 # filter out delta base that will never produce good delta
694 if deltas_limit < revlog.length(rev):
697 if deltas_limit < revlog.length(rev):
695 continue
698 continue
696 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
699 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
697 continue
700 continue
698 # no delta for rawtext-changing revs (see "candelta" for why)
701 # no delta for rawtext-changing revs (see "candelta" for why)
699 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
702 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
700 continue
703 continue
701 # If we reach here, we are about to build and test a delta.
704 # If we reach here, we are about to build and test a delta.
702 # The delta building process will compute the chaininfo in all
705 # The delta building process will compute the chaininfo in all
703 # case, since that computation is cached, it is fine to access it
706 # case, since that computation is cached, it is fine to access it
704 # here too.
707 # here too.
705 chainlen, chainsize = revlog._chaininfo(rev)
708 chainlen, chainsize = revlog._chaininfo(rev)
706 # if chain will be too long, skip base
709 # if chain will be too long, skip base
707 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
710 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
708 continue
711 continue
709 # if chain already have too much data, skip base
712 # if chain already have too much data, skip base
710 if deltas_limit < chainsize:
713 if deltas_limit < chainsize:
711 continue
714 continue
712 if sparse and revlog.upperboundcomp is not None:
715 if sparse and revlog.upperboundcomp is not None:
713 maxcomp = revlog.upperboundcomp
716 maxcomp = revlog.upperboundcomp
714 basenotsnap = (p1, p2, nullrev)
717 basenotsnap = (p1, p2, nullrev)
715 if rev not in basenotsnap and revlog.issnapshot(rev):
718 if rev not in basenotsnap and revlog.issnapshot(rev):
716 snapshotdepth = revlog.snapshotdepth(rev)
719 snapshotdepth = revlog.snapshotdepth(rev)
717 # If text is significantly larger than the base, we can
720 # If text is significantly larger than the base, we can
718 # expect the resulting delta to be proportional to the size
721 # expect the resulting delta to be proportional to the size
719 # difference
722 # difference
720 revsize = revlog.rawsize(rev)
723 revsize = revlog.rawsize(rev)
721 rawsizedistance = max(textlen - revsize, 0)
724 rawsizedistance = max(textlen - revsize, 0)
722 # use an estimate of the compression upper bound.
725 # use an estimate of the compression upper bound.
723 lowestrealisticdeltalen = rawsizedistance // maxcomp
726 lowestrealisticdeltalen = rawsizedistance // maxcomp
724
727
725 # check the absolute constraint on the delta size
728 # check the absolute constraint on the delta size
726 snapshotlimit = textlen >> snapshotdepth
729 snapshotlimit = textlen >> snapshotdepth
727 if snapshotlimit < lowestrealisticdeltalen:
730 if snapshotlimit < lowestrealisticdeltalen:
728 # delta lower bound is larger than accepted upper bound
731 # delta lower bound is larger than accepted upper bound
729 continue
732 continue
730
733
731 # check the relative constraint on the delta size
734 # check the relative constraint on the delta size
732 revlength = revlog.length(rev)
735 revlength = revlog.length(rev)
733 if revlength < lowestrealisticdeltalen:
736 if revlength < lowestrealisticdeltalen:
734 # delta probable lower bound is larger than target base
737 # delta probable lower bound is larger than target base
735 continue
738 continue
736
739
737 group.append(rev)
740 group.append(rev)
738 if group:
741 if group:
739 # XXX: in the sparse revlog case, group can become large,
742 # XXX: in the sparse revlog case, group can become large,
740 # impacting performances. Some bounding or slicing mecanism
743 # impacting performances. Some bounding or slicing mecanism
741 # would help to reduce this impact.
744 # would help to reduce this impact.
742 good = yield tuple(group)
745 good = yield tuple(group)
743 yield None
746 yield None
744
747
745
748
746 def _findsnapshots(revlog, cache, start_rev):
749 def _findsnapshots(revlog, cache, start_rev):
747 """find snapshot from start_rev to tip"""
750 """find snapshot from start_rev to tip"""
748 if util.safehasattr(revlog.index, b'findsnapshots'):
751 if util.safehasattr(revlog.index, b'findsnapshots'):
749 revlog.index.findsnapshots(cache, start_rev)
752 revlog.index.findsnapshots(cache, start_rev)
750 else:
753 else:
751 deltaparent = revlog.deltaparent
754 deltaparent = revlog.deltaparent
752 issnapshot = revlog.issnapshot
755 issnapshot = revlog.issnapshot
753 for rev in revlog.revs(start_rev):
756 for rev in revlog.revs(start_rev):
754 if issnapshot(rev):
757 if issnapshot(rev):
755 cache[deltaparent(rev)].append(rev)
758 cache[deltaparent(rev)].append(rev)
756
759
757
760
758 def _refinedgroups(revlog, p1, p2, cachedelta):
761 def _refinedgroups(revlog, p1, p2, cachedelta):
759 good = None
762 good = None
760 # First we try to reuse a the delta contained in the bundle.
763 # First we try to reuse a the delta contained in the bundle.
761 # (or from the source revlog)
764 # (or from the source revlog)
762 #
765 #
763 # This logic only applies to general delta repositories and can be disabled
766 # This logic only applies to general delta repositories and can be disabled
764 # through configuration. Disabling reuse source delta is useful when
767 # through configuration. Disabling reuse source delta is useful when
765 # we want to make sure we recomputed "optimal" deltas.
768 # we want to make sure we recomputed "optimal" deltas.
766 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
769 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
767 # Assume what we received from the server is a good choice
770 # Assume what we received from the server is a good choice
768 # build delta will reuse the cache
771 # build delta will reuse the cache
769 good = yield (cachedelta[0],)
772 good = yield (cachedelta[0],)
770 if good is not None:
773 if good is not None:
771 yield None
774 yield None
772 return
775 return
773 snapshots = collections.defaultdict(list)
776 snapshots = collections.defaultdict(list)
774 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
777 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
775 good = yield candidates
778 good = yield candidates
776 if good is not None:
779 if good is not None:
777 break
780 break
778
781
779 # If sparse revlog is enabled, we can try to refine the available deltas
782 # If sparse revlog is enabled, we can try to refine the available deltas
780 if not revlog._sparserevlog:
783 if not revlog._sparserevlog:
781 yield None
784 yield None
782 return
785 return
783
786
784 # if we have a refinable value, try to refine it
787 # if we have a refinable value, try to refine it
785 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
788 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
786 # refine snapshot down
789 # refine snapshot down
787 previous = None
790 previous = None
788 while previous != good:
791 while previous != good:
789 previous = good
792 previous = good
790 base = revlog.deltaparent(good)
793 base = revlog.deltaparent(good)
791 if base == nullrev:
794 if base == nullrev:
792 break
795 break
793 good = yield (base,)
796 good = yield (base,)
794 # refine snapshot up
797 # refine snapshot up
795 if not snapshots:
798 if not snapshots:
796 _findsnapshots(revlog, snapshots, good + 1)
799 _findsnapshots(revlog, snapshots, good + 1)
797 previous = None
800 previous = None
798 while good != previous:
801 while good != previous:
799 previous = good
802 previous = good
800 children = tuple(sorted(c for c in snapshots[good]))
803 children = tuple(sorted(c for c in snapshots[good]))
801 good = yield children
804 good = yield children
802
805
803 # we have found nothing
806 # we have found nothing
804 yield None
807 yield None
805
808
806
809
807 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
810 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
808 """Provides group of revision to be tested as delta base
811 """Provides group of revision to be tested as delta base
809
812
810 This lower level function focus on emitting delta theorically interresting
813 This lower level function focus on emitting delta theorically interresting
811 without looking it any practical details.
814 without looking it any practical details.
812
815
813 The group order aims at providing fast or small candidates first.
816 The group order aims at providing fast or small candidates first.
814 """
817 """
815 gdelta = revlog._generaldelta
818 gdelta = revlog._generaldelta
816 # gate sparse behind general-delta because of issue6056
819 # gate sparse behind general-delta because of issue6056
817 sparse = gdelta and revlog._sparserevlog
820 sparse = gdelta and revlog._sparserevlog
818 curr = len(revlog)
821 curr = len(revlog)
819 prev = curr - 1
822 prev = curr - 1
820 deltachain = lambda rev: revlog._deltachain(rev)[0]
823 deltachain = lambda rev: revlog._deltachain(rev)[0]
821
824
822 if gdelta:
825 if gdelta:
823 # exclude already lazy tested base if any
826 # exclude already lazy tested base if any
824 parents = [p for p in (p1, p2) if p != nullrev]
827 parents = [p for p in (p1, p2) if p != nullrev]
825
828
826 if not revlog._deltabothparents and len(parents) == 2:
829 if not revlog._deltabothparents and len(parents) == 2:
827 parents.sort()
830 parents.sort()
828 # To minimize the chance of having to build a fulltext,
831 # To minimize the chance of having to build a fulltext,
829 # pick first whichever parent is closest to us (max rev)
832 # pick first whichever parent is closest to us (max rev)
830 yield (parents[1],)
833 yield (parents[1],)
831 # then the other one (min rev) if the first did not fit
834 # then the other one (min rev) if the first did not fit
832 yield (parents[0],)
835 yield (parents[0],)
833 elif len(parents) > 0:
836 elif len(parents) > 0:
834 # Test all parents (1 or 2), and keep the best candidate
837 # Test all parents (1 or 2), and keep the best candidate
835 yield parents
838 yield parents
836
839
837 if sparse and parents:
840 if sparse and parents:
838 if snapshots is None:
841 if snapshots is None:
839 # map: base-rev: snapshot-rev
842 # map: base-rev: snapshot-rev
840 snapshots = collections.defaultdict(list)
843 snapshots = collections.defaultdict(list)
841 # See if we can use an existing snapshot in the parent chains to use as
844 # See if we can use an existing snapshot in the parent chains to use as
842 # a base for a new intermediate-snapshot
845 # a base for a new intermediate-snapshot
843 #
846 #
844 # search for snapshot in parents delta chain
847 # search for snapshot in parents delta chain
845 # map: snapshot-level: snapshot-rev
848 # map: snapshot-level: snapshot-rev
846 parents_snaps = collections.defaultdict(set)
849 parents_snaps = collections.defaultdict(set)
847 candidate_chains = [deltachain(p) for p in parents]
850 candidate_chains = [deltachain(p) for p in parents]
848 for chain in candidate_chains:
851 for chain in candidate_chains:
849 for idx, s in enumerate(chain):
852 for idx, s in enumerate(chain):
850 if not revlog.issnapshot(s):
853 if not revlog.issnapshot(s):
851 break
854 break
852 parents_snaps[idx].add(s)
855 parents_snaps[idx].add(s)
853 snapfloor = min(parents_snaps[0]) + 1
856 snapfloor = min(parents_snaps[0]) + 1
854 _findsnapshots(revlog, snapshots, snapfloor)
857 _findsnapshots(revlog, snapshots, snapfloor)
855 # search for the highest "unrelated" revision
858 # search for the highest "unrelated" revision
856 #
859 #
857 # Adding snapshots used by "unrelated" revision increase the odd we
860 # Adding snapshots used by "unrelated" revision increase the odd we
858 # reuse an independant, yet better snapshot chain.
861 # reuse an independant, yet better snapshot chain.
859 #
862 #
860 # XXX instead of building a set of revisions, we could lazily enumerate
863 # XXX instead of building a set of revisions, we could lazily enumerate
861 # over the chains. That would be more efficient, however we stick to
864 # over the chains. That would be more efficient, however we stick to
862 # simple code for now.
865 # simple code for now.
863 all_revs = set()
866 all_revs = set()
864 for chain in candidate_chains:
867 for chain in candidate_chains:
865 all_revs.update(chain)
868 all_revs.update(chain)
866 other = None
869 other = None
867 for r in revlog.revs(prev, snapfloor):
870 for r in revlog.revs(prev, snapfloor):
868 if r not in all_revs:
871 if r not in all_revs:
869 other = r
872 other = r
870 break
873 break
871 if other is not None:
874 if other is not None:
872 # To avoid unfair competition, we won't use unrelated intermediate
875 # To avoid unfair competition, we won't use unrelated intermediate
873 # snapshot that are deeper than the ones from the parent delta
876 # snapshot that are deeper than the ones from the parent delta
874 # chain.
877 # chain.
875 max_depth = max(parents_snaps.keys())
878 max_depth = max(parents_snaps.keys())
876 chain = deltachain(other)
879 chain = deltachain(other)
877 for idx, s in enumerate(chain):
880 for idx, s in enumerate(chain):
878 if s < snapfloor:
881 if s < snapfloor:
879 continue
882 continue
880 if max_depth < idx:
883 if max_depth < idx:
881 break
884 break
882 if not revlog.issnapshot(s):
885 if not revlog.issnapshot(s):
883 break
886 break
884 parents_snaps[idx].add(s)
887 parents_snaps[idx].add(s)
885 # Test them as possible intermediate snapshot base
888 # Test them as possible intermediate snapshot base
886 # We test them from highest to lowest level. High level one are more
889 # We test them from highest to lowest level. High level one are more
887 # likely to result in small delta
890 # likely to result in small delta
888 floor = None
891 floor = None
889 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
892 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
890 siblings = set()
893 siblings = set()
891 for s in snaps:
894 for s in snaps:
892 siblings.update(snapshots[s])
895 siblings.update(snapshots[s])
893 # Before considering making a new intermediate snapshot, we check
896 # Before considering making a new intermediate snapshot, we check
894 # if an existing snapshot, children of base we consider, would be
897 # if an existing snapshot, children of base we consider, would be
895 # suitable.
898 # suitable.
896 #
899 #
897 # It give a change to reuse a delta chain "unrelated" to the
900 # It give a change to reuse a delta chain "unrelated" to the
898 # current revision instead of starting our own. Without such
901 # current revision instead of starting our own. Without such
899 # re-use, topological branches would keep reopening new chains.
902 # re-use, topological branches would keep reopening new chains.
900 # Creating more and more snapshot as the repository grow.
903 # Creating more and more snapshot as the repository grow.
901
904
902 if floor is not None:
905 if floor is not None:
903 # We only do this for siblings created after the one in our
906 # We only do this for siblings created after the one in our
904 # parent's delta chain. Those created before has less chances
907 # parent's delta chain. Those created before has less chances
905 # to be valid base since our ancestors had to create a new
908 # to be valid base since our ancestors had to create a new
906 # snapshot.
909 # snapshot.
907 siblings = [r for r in siblings if floor < r]
910 siblings = [r for r in siblings if floor < r]
908 yield tuple(sorted(siblings))
911 yield tuple(sorted(siblings))
909 # then test the base from our parent's delta chain.
912 # then test the base from our parent's delta chain.
910 yield tuple(sorted(snaps))
913 yield tuple(sorted(snaps))
911 floor = min(snaps)
914 floor = min(snaps)
912 # No suitable base found in the parent chain, search if any full
915 # No suitable base found in the parent chain, search if any full
913 # snapshots emitted since parent's base would be a suitable base for an
916 # snapshots emitted since parent's base would be a suitable base for an
914 # intermediate snapshot.
917 # intermediate snapshot.
915 #
918 #
916 # It give a chance to reuse a delta chain unrelated to the current
919 # It give a chance to reuse a delta chain unrelated to the current
917 # revisions instead of starting our own. Without such re-use,
920 # revisions instead of starting our own. Without such re-use,
918 # topological branches would keep reopening new full chains. Creating
921 # topological branches would keep reopening new full chains. Creating
919 # more and more snapshot as the repository grow.
922 # more and more snapshot as the repository grow.
920 yield tuple(snapshots[nullrev])
923 yield tuple(snapshots[nullrev])
921
924
922 if not sparse:
925 if not sparse:
923 # other approach failed try against prev to hopefully save us a
926 # other approach failed try against prev to hopefully save us a
924 # fulltext.
927 # fulltext.
925 yield (prev,)
928 yield (prev,)
926
929
927
930
928 class deltacomputer(object):
931 class deltacomputer(object):
929 def __init__(self, revlog):
932 def __init__(self, revlog):
930 self.revlog = revlog
933 self.revlog = revlog
931
934
932 def buildtext(self, revinfo, fh):
935 def buildtext(self, revinfo, fh):
933 """Builds a fulltext version of a revision
936 """Builds a fulltext version of a revision
934
937
935 revinfo: revisioninfo instance that contains all needed info
938 revinfo: revisioninfo instance that contains all needed info
936 fh: file handle to either the .i or the .d revlog file,
939 fh: file handle to either the .i or the .d revlog file,
937 depending on whether it is inlined or not
940 depending on whether it is inlined or not
938 """
941 """
939 btext = revinfo.btext
942 btext = revinfo.btext
940 if btext[0] is not None:
943 if btext[0] is not None:
941 return btext[0]
944 return btext[0]
942
945
943 revlog = self.revlog
946 revlog = self.revlog
944 cachedelta = revinfo.cachedelta
947 cachedelta = revinfo.cachedelta
945 baserev = cachedelta[0]
948 baserev = cachedelta[0]
946 delta = cachedelta[1]
949 delta = cachedelta[1]
947
950
948 fulltext = btext[0] = _textfromdelta(
951 fulltext = btext[0] = _textfromdelta(
949 fh,
952 fh,
950 revlog,
953 revlog,
951 baserev,
954 baserev,
952 delta,
955 delta,
953 revinfo.p1,
956 revinfo.p1,
954 revinfo.p2,
957 revinfo.p2,
955 revinfo.flags,
958 revinfo.flags,
956 revinfo.node,
959 revinfo.node,
957 )
960 )
958 return fulltext
961 return fulltext
959
962
960 def _builddeltadiff(self, base, revinfo, fh):
963 def _builddeltadiff(self, base, revinfo, fh):
961 revlog = self.revlog
964 revlog = self.revlog
962 t = self.buildtext(revinfo, fh)
965 t = self.buildtext(revinfo, fh)
963 if revlog.iscensored(base):
966 if revlog.iscensored(base):
964 # deltas based on a censored revision must replace the
967 # deltas based on a censored revision must replace the
965 # full content in one patch, so delta works everywhere
968 # full content in one patch, so delta works everywhere
966 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
969 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
967 delta = header + t
970 delta = header + t
968 else:
971 else:
969 ptext = revlog.rawdata(base, _df=fh)
972 ptext = revlog.rawdata(base, _df=fh)
970 delta = mdiff.textdiff(ptext, t)
973 delta = mdiff.textdiff(ptext, t)
971
974
972 return delta
975 return delta
973
976
974 def _builddeltainfo(self, revinfo, base, fh):
977 def _builddeltainfo(self, revinfo, base, fh):
975 # can we use the cached delta?
978 # can we use the cached delta?
976 revlog = self.revlog
979 revlog = self.revlog
977 chainbase = revlog.chainbase(base)
980 chainbase = revlog.chainbase(base)
978 if revlog._generaldelta:
981 if revlog._generaldelta:
979 deltabase = base
982 deltabase = base
980 else:
983 else:
981 deltabase = chainbase
984 deltabase = chainbase
982 snapshotdepth = None
985 snapshotdepth = None
983 if revlog._sparserevlog and deltabase == nullrev:
986 if revlog._sparserevlog and deltabase == nullrev:
984 snapshotdepth = 0
987 snapshotdepth = 0
985 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
988 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
986 # A delta chain should always be one full snapshot,
989 # A delta chain should always be one full snapshot,
987 # zero or more semi-snapshots, and zero or more deltas
990 # zero or more semi-snapshots, and zero or more deltas
988 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
991 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
989 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
992 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
990 snapshotdepth = len(revlog._deltachain(deltabase)[0])
993 snapshotdepth = len(revlog._deltachain(deltabase)[0])
991 delta = None
994 delta = None
992 if revinfo.cachedelta:
995 if revinfo.cachedelta:
993 cachebase, cachediff = revinfo.cachedelta
996 cachebase, cachediff = revinfo.cachedelta
994 # check if the diff still apply
997 # check if the diff still apply
995 currentbase = cachebase
998 currentbase = cachebase
996 while (
999 while (
997 currentbase != nullrev
1000 currentbase != nullrev
998 and currentbase != base
1001 and currentbase != base
999 and self.revlog.length(currentbase) == 0
1002 and self.revlog.length(currentbase) == 0
1000 ):
1003 ):
1001 currentbase = self.revlog.deltaparent(currentbase)
1004 currentbase = self.revlog.deltaparent(currentbase)
1002 if self.revlog._lazydelta and currentbase == base:
1005 if self.revlog._lazydelta and currentbase == base:
1003 delta = revinfo.cachedelta[1]
1006 delta = revinfo.cachedelta[1]
1004 if delta is None:
1007 if delta is None:
1005 delta = self._builddeltadiff(base, revinfo, fh)
1008 delta = self._builddeltadiff(base, revinfo, fh)
1006 # snapshotdept need to be neither None nor 0 level snapshot
1009 # snapshotdept need to be neither None nor 0 level snapshot
1007 if revlog.upperboundcomp is not None and snapshotdepth:
1010 if revlog.upperboundcomp is not None and snapshotdepth:
1008 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1011 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1009 snapshotlimit = revinfo.textlen >> snapshotdepth
1012 snapshotlimit = revinfo.textlen >> snapshotdepth
1010 if snapshotlimit < lowestrealisticdeltalen:
1013 if snapshotlimit < lowestrealisticdeltalen:
1011 return None
1014 return None
1012 if revlog.length(base) < lowestrealisticdeltalen:
1015 if revlog.length(base) < lowestrealisticdeltalen:
1013 return None
1016 return None
1014 header, data = revlog.compress(delta)
1017 header, data = revlog.compress(delta)
1015 deltalen = len(header) + len(data)
1018 deltalen = len(header) + len(data)
1016 offset = revlog.end(len(revlog) - 1)
1019 offset = revlog.end(len(revlog) - 1)
1017 dist = deltalen + offset - revlog.start(chainbase)
1020 dist = deltalen + offset - revlog.start(chainbase)
1018 chainlen, compresseddeltalen = revlog._chaininfo(base)
1021 chainlen, compresseddeltalen = revlog._chaininfo(base)
1019 chainlen += 1
1022 chainlen += 1
1020 compresseddeltalen += deltalen
1023 compresseddeltalen += deltalen
1021
1024
1022 return _deltainfo(
1025 return _deltainfo(
1023 dist,
1026 dist,
1024 deltalen,
1027 deltalen,
1025 (header, data),
1028 (header, data),
1026 deltabase,
1029 deltabase,
1027 chainbase,
1030 chainbase,
1028 chainlen,
1031 chainlen,
1029 compresseddeltalen,
1032 compresseddeltalen,
1030 snapshotdepth,
1033 snapshotdepth,
1031 )
1034 )
1032
1035
1033 def _fullsnapshotinfo(self, fh, revinfo):
1036 def _fullsnapshotinfo(self, fh, revinfo):
1034 curr = len(self.revlog)
1037 curr = len(self.revlog)
1035 rawtext = self.buildtext(revinfo, fh)
1038 rawtext = self.buildtext(revinfo, fh)
1036 data = self.revlog.compress(rawtext)
1039 data = self.revlog.compress(rawtext)
1037 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1040 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1038 deltabase = chainbase = curr
1041 deltabase = chainbase = curr
1039 snapshotdepth = 0
1042 snapshotdepth = 0
1040 chainlen = 1
1043 chainlen = 1
1041
1044
1042 return _deltainfo(
1045 return _deltainfo(
1043 dist,
1046 dist,
1044 deltalen,
1047 deltalen,
1045 data,
1048 data,
1046 deltabase,
1049 deltabase,
1047 chainbase,
1050 chainbase,
1048 chainlen,
1051 chainlen,
1049 compresseddeltalen,
1052 compresseddeltalen,
1050 snapshotdepth,
1053 snapshotdepth,
1051 )
1054 )
1052
1055
1053 def finddeltainfo(self, revinfo, fh, excluded_bases=None):
1056 def finddeltainfo(self, revinfo, fh, excluded_bases=None):
1054 """Find an acceptable delta against a candidate revision
1057 """Find an acceptable delta against a candidate revision
1055
1058
1056 revinfo: information about the revision (instance of _revisioninfo)
1059 revinfo: information about the revision (instance of _revisioninfo)
1057 fh: file handle to either the .i or the .d revlog file,
1060 fh: file handle to either the .i or the .d revlog file,
1058 depending on whether it is inlined or not
1061 depending on whether it is inlined or not
1059
1062
1060 Returns the first acceptable candidate revision, as ordered by
1063 Returns the first acceptable candidate revision, as ordered by
1061 _candidategroups
1064 _candidategroups
1062
1065
1063 If no suitable deltabase is found, we return delta info for a full
1066 If no suitable deltabase is found, we return delta info for a full
1064 snapshot.
1067 snapshot.
1065
1068
1066 `excluded_bases` is an optional set of revision that cannot be used as
1069 `excluded_bases` is an optional set of revision that cannot be used as
1067 a delta base. Use this to recompute delta suitable in censor or strip
1070 a delta base. Use this to recompute delta suitable in censor or strip
1068 context.
1071 context.
1069 """
1072 """
1070 if not revinfo.textlen:
1073 if not revinfo.textlen:
1071 return self._fullsnapshotinfo(fh, revinfo)
1074 return self._fullsnapshotinfo(fh, revinfo)
1072
1075
1073 if excluded_bases is None:
1076 if excluded_bases is None:
1074 excluded_bases = set()
1077 excluded_bases = set()
1075
1078
1076 # no delta for flag processor revision (see "candelta" for why)
1079 # no delta for flag processor revision (see "candelta" for why)
1077 # not calling candelta since only one revision needs test, also to
1080 # not calling candelta since only one revision needs test, also to
1078 # avoid overhead fetching flags again.
1081 # avoid overhead fetching flags again.
1079 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1082 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1080 return self._fullsnapshotinfo(fh, revinfo)
1083 return self._fullsnapshotinfo(fh, revinfo)
1081
1084
1082 cachedelta = revinfo.cachedelta
1085 cachedelta = revinfo.cachedelta
1083 p1 = revinfo.p1
1086 p1 = revinfo.p1
1084 p2 = revinfo.p2
1087 p2 = revinfo.p2
1085 revlog = self.revlog
1088 revlog = self.revlog
1086
1089
1087 deltainfo = None
1090 deltainfo = None
1088 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1091 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1089 groups = _candidategroups(
1092 groups = _candidategroups(
1090 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1093 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1091 )
1094 )
1092 candidaterevs = next(groups)
1095 candidaterevs = next(groups)
1093 while candidaterevs is not None:
1096 while candidaterevs is not None:
1094 nominateddeltas = []
1097 nominateddeltas = []
1095 if deltainfo is not None:
1098 if deltainfo is not None:
1096 # if we already found a good delta,
1099 # if we already found a good delta,
1097 # challenge it against refined candidates
1100 # challenge it against refined candidates
1098 nominateddeltas.append(deltainfo)
1101 nominateddeltas.append(deltainfo)
1099 for candidaterev in candidaterevs:
1102 for candidaterev in candidaterevs:
1100 if candidaterev in excluded_bases:
1103 if candidaterev in excluded_bases:
1101 continue
1104 continue
1102 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1105 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1103 if candidatedelta is not None:
1106 if candidatedelta is not None:
1104 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1107 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1105 nominateddeltas.append(candidatedelta)
1108 nominateddeltas.append(candidatedelta)
1106 if nominateddeltas:
1109 if nominateddeltas:
1107 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1110 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1108 if deltainfo is not None:
1111 if deltainfo is not None:
1109 candidaterevs = groups.send(deltainfo.base)
1112 candidaterevs = groups.send(deltainfo.base)
1110 else:
1113 else:
1111 candidaterevs = next(groups)
1114 candidaterevs = next(groups)
1112
1115
1113 if deltainfo is None:
1116 if deltainfo is None:
1114 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1117 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1115 return deltainfo
1118 return deltainfo
1119
1120
1121 def delta_compression(default_compression_header, deltainfo):
1122 """return (COMPRESSION_MODE, deltainfo)
1123
1124 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1125 compression.
1126 """
1127 h, d = deltainfo.data
1128 compression_mode = COMP_MODE_INLINE
1129 if not h and not d:
1130 # not data to store at all... declare them uncompressed
1131 compression_mode = COMP_MODE_PLAIN
1132 elif not h:
1133 t = d[0:1]
1134 if t == b'\0':
1135 compression_mode = COMP_MODE_PLAIN
1136 elif t == default_compression_header:
1137 compression_mode = COMP_MODE_DEFAULT
1138 elif h == b'u':
1139 # we have a more efficient way to declare uncompressed
1140 h = b''
1141 compression_mode = COMP_MODE_PLAIN
1142 deltainfo = drop_u_compression(deltainfo)
1143 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now