##// END OF EJS Templates
revlog: use rust rank computation if available...
pacien -
r49710:6ea9ead5 default
parent child Browse files
Show More
@@ -1,3307 +1,3310 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 RANK_UNKNOWN,
49 RANK_UNKNOWN,
50 REVLOGV0,
50 REVLOGV0,
51 REVLOGV1,
51 REVLOGV1,
52 REVLOGV1_FLAGS,
52 REVLOGV1_FLAGS,
53 REVLOGV2,
53 REVLOGV2,
54 REVLOGV2_FLAGS,
54 REVLOGV2_FLAGS,
55 REVLOG_DEFAULT_FLAGS,
55 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FORMAT,
56 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_VERSION,
57 REVLOG_DEFAULT_VERSION,
58 SUPPORTED_FLAGS,
58 SUPPORTED_FLAGS,
59 )
59 )
60 from .revlogutils.flagutil import (
60 from .revlogutils.flagutil import (
61 REVIDX_DEFAULT_FLAGS,
61 REVIDX_DEFAULT_FLAGS,
62 REVIDX_ELLIPSIS,
62 REVIDX_ELLIPSIS,
63 REVIDX_EXTSTORED,
63 REVIDX_EXTSTORED,
64 REVIDX_FLAGS_ORDER,
64 REVIDX_FLAGS_ORDER,
65 REVIDX_HASCOPIESINFO,
65 REVIDX_HASCOPIESINFO,
66 REVIDX_ISCENSORED,
66 REVIDX_ISCENSORED,
67 REVIDX_RAWTEXT_CHANGING_FLAGS,
67 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 )
68 )
69 from .thirdparty import attr
69 from .thirdparty import attr
70 from . import (
70 from . import (
71 ancestor,
71 ancestor,
72 dagop,
72 dagop,
73 error,
73 error,
74 mdiff,
74 mdiff,
75 policy,
75 policy,
76 pycompat,
76 pycompat,
77 revlogutils,
77 revlogutils,
78 templatefilters,
78 templatefilters,
79 util,
79 util,
80 )
80 )
81 from .interfaces import (
81 from .interfaces import (
82 repository,
82 repository,
83 util as interfaceutil,
83 util as interfaceutil,
84 )
84 )
85 from .revlogutils import (
85 from .revlogutils import (
86 deltas as deltautil,
86 deltas as deltautil,
87 docket as docketutil,
87 docket as docketutil,
88 flagutil,
88 flagutil,
89 nodemap as nodemaputil,
89 nodemap as nodemaputil,
90 randomaccessfile,
90 randomaccessfile,
91 revlogv0,
91 revlogv0,
92 rewrite,
92 rewrite,
93 sidedata as sidedatautil,
93 sidedata as sidedatautil,
94 )
94 )
95 from .utils import (
95 from .utils import (
96 storageutil,
96 storageutil,
97 stringutil,
97 stringutil,
98 )
98 )
99
99
100 # blanked usage of all the name to prevent pyflakes constraints
100 # blanked usage of all the name to prevent pyflakes constraints
101 # We need these name available in the module for extensions.
101 # We need these name available in the module for extensions.
102
102
103 REVLOGV0
103 REVLOGV0
104 REVLOGV1
104 REVLOGV1
105 REVLOGV2
105 REVLOGV2
106 CHANGELOGV2
106 CHANGELOGV2
107 FLAG_INLINE_DATA
107 FLAG_INLINE_DATA
108 FLAG_GENERALDELTA
108 FLAG_GENERALDELTA
109 REVLOG_DEFAULT_FLAGS
109 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FORMAT
110 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_VERSION
111 REVLOG_DEFAULT_VERSION
112 REVLOGV1_FLAGS
112 REVLOGV1_FLAGS
113 REVLOGV2_FLAGS
113 REVLOGV2_FLAGS
114 REVIDX_ISCENSORED
114 REVIDX_ISCENSORED
115 REVIDX_ELLIPSIS
115 REVIDX_ELLIPSIS
116 REVIDX_HASCOPIESINFO
116 REVIDX_HASCOPIESINFO
117 REVIDX_EXTSTORED
117 REVIDX_EXTSTORED
118 REVIDX_DEFAULT_FLAGS
118 REVIDX_DEFAULT_FLAGS
119 REVIDX_FLAGS_ORDER
119 REVIDX_FLAGS_ORDER
120 REVIDX_RAWTEXT_CHANGING_FLAGS
120 REVIDX_RAWTEXT_CHANGING_FLAGS
121
121
122 parsers = policy.importmod('parsers')
122 parsers = policy.importmod('parsers')
123 rustancestor = policy.importrust('ancestor')
123 rustancestor = policy.importrust('ancestor')
124 rustdagop = policy.importrust('dagop')
124 rustdagop = policy.importrust('dagop')
125 rustrevlog = policy.importrust('revlog')
125 rustrevlog = policy.importrust('revlog')
126
126
127 # Aliased for performance.
127 # Aliased for performance.
128 _zlibdecompress = zlib.decompress
128 _zlibdecompress = zlib.decompress
129
129
130 # max size of revlog with inline data
130 # max size of revlog with inline data
131 _maxinline = 131072
131 _maxinline = 131072
132
132
133 # Flag processors for REVIDX_ELLIPSIS.
133 # Flag processors for REVIDX_ELLIPSIS.
134 def ellipsisreadprocessor(rl, text):
134 def ellipsisreadprocessor(rl, text):
135 return text, False
135 return text, False
136
136
137
137
138 def ellipsiswriteprocessor(rl, text):
138 def ellipsiswriteprocessor(rl, text):
139 return text, False
139 return text, False
140
140
141
141
142 def ellipsisrawprocessor(rl, text):
142 def ellipsisrawprocessor(rl, text):
143 return False
143 return False
144
144
145
145
146 ellipsisprocessor = (
146 ellipsisprocessor = (
147 ellipsisreadprocessor,
147 ellipsisreadprocessor,
148 ellipsiswriteprocessor,
148 ellipsiswriteprocessor,
149 ellipsisrawprocessor,
149 ellipsisrawprocessor,
150 )
150 )
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @interfaceutil.implementer(repository.irevisiondelta)
173 @interfaceutil.implementer(repository.irevisiondelta)
174 @attr.s(slots=True)
174 @attr.s(slots=True)
175 class revlogrevisiondelta(object):
175 class revlogrevisiondelta(object):
176 node = attr.ib()
176 node = attr.ib()
177 p1node = attr.ib()
177 p1node = attr.ib()
178 p2node = attr.ib()
178 p2node = attr.ib()
179 basenode = attr.ib()
179 basenode = attr.ib()
180 flags = attr.ib()
180 flags = attr.ib()
181 baserevisionsize = attr.ib()
181 baserevisionsize = attr.ib()
182 revision = attr.ib()
182 revision = attr.ib()
183 delta = attr.ib()
183 delta = attr.ib()
184 sidedata = attr.ib()
184 sidedata = attr.ib()
185 protocol_flags = attr.ib()
185 protocol_flags = attr.ib()
186 linknode = attr.ib(default=None)
186 linknode = attr.ib(default=None)
187
187
188
188
189 @interfaceutil.implementer(repository.iverifyproblem)
189 @interfaceutil.implementer(repository.iverifyproblem)
190 @attr.s(frozen=True)
190 @attr.s(frozen=True)
191 class revlogproblem(object):
191 class revlogproblem(object):
192 warning = attr.ib(default=None)
192 warning = attr.ib(default=None)
193 error = attr.ib(default=None)
193 error = attr.ib(default=None)
194 node = attr.ib(default=None)
194 node = attr.ib(default=None)
195
195
196
196
197 def parse_index_v1(data, inline):
197 def parse_index_v1(data, inline):
198 # call the C implementation to parse the index data
198 # call the C implementation to parse the index data
199 index, cache = parsers.parse_index2(data, inline)
199 index, cache = parsers.parse_index2(data, inline)
200 return index, cache
200 return index, cache
201
201
202
202
203 def parse_index_v2(data, inline):
203 def parse_index_v2(data, inline):
204 # call the C implementation to parse the index data
204 # call the C implementation to parse the index data
205 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
205 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 return index, cache
206 return index, cache
207
207
208
208
209 def parse_index_cl_v2(data, inline):
209 def parse_index_cl_v2(data, inline):
210 # call the C implementation to parse the index data
210 # call the C implementation to parse the index data
211 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
211 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 return index, cache
212 return index, cache
213
213
214
214
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
216
217 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 else:
222 else:
223 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
224
224
225
225
226 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
227 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
228 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
229
229
230
230
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # signed integer)
232 # signed integer)
233 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
234
234
235 FILE_TOO_SHORT_MSG = _(
235 FILE_TOO_SHORT_MSG = _(
236 b'cannot read from revlog %s;'
236 b'cannot read from revlog %s;'
237 b' expected %d bytes from offset %d, data size is %d'
237 b' expected %d bytes from offset %d, data size is %d'
238 )
238 )
239
239
240
240
241 class revlog(object):
241 class revlog(object):
242 """
242 """
243 the underlying revision storage object
243 the underlying revision storage object
244
244
245 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
246
246
247 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
248 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
249 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
250 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
251 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
252 data.
252 data.
253
253
254 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
255 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
256 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
257 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
258 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
259 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
260
260
261 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
262 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
263 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
264 for locking while reading.
264 for locking while reading.
265
265
266 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
267 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
268
268
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
271 configured threshold.
271 configured threshold.
272
272
273 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
274
274
275 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 compression for the data content.
276 compression for the data content.
277
277
278 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
280 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
281 raising).
281 raising).
282
282
283 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
284 index entry.
284 index entry.
285 """
285 """
286
286
287 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
288
288
289 def __init__(
289 def __init__(
290 self,
290 self,
291 opener,
291 opener,
292 target,
292 target,
293 radix,
293 radix,
294 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
295 checkambig=False,
295 checkambig=False,
296 mmaplargeindex=False,
296 mmaplargeindex=False,
297 censorable=False,
297 censorable=False,
298 upperboundcomp=None,
298 upperboundcomp=None,
299 persistentnodemap=False,
299 persistentnodemap=False,
300 concurrencychecker=None,
300 concurrencychecker=None,
301 trypending=False,
301 trypending=False,
302 ):
302 ):
303 """
303 """
304 create a revlog object
304 create a revlog object
305
305
306 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
307 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
308
308
309 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
310 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
311 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
312 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
313 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
314 accurate value.
314 accurate value.
315 """
315 """
316 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
317
317
318 self.radix = radix
318 self.radix = radix
319
319
320 self._docket_file = None
320 self._docket_file = None
321 self._indexfile = None
321 self._indexfile = None
322 self._datafile = None
322 self._datafile = None
323 self._sidedatafile = None
323 self._sidedatafile = None
324 self._nodemap_file = None
324 self._nodemap_file = None
325 self.postfix = postfix
325 self.postfix = postfix
326 self._trypending = trypending
326 self._trypending = trypending
327 self.opener = opener
327 self.opener = opener
328 if persistentnodemap:
328 if persistentnodemap:
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330
330
331 assert target[0] in ALL_KINDS
331 assert target[0] in ALL_KINDS
332 assert len(target) == 2
332 assert len(target) == 2
333 self.target = target
333 self.target = target
334 # When True, indexfile is opened with checkambig=True at writing, to
334 # When True, indexfile is opened with checkambig=True at writing, to
335 # avoid file stat ambiguity.
335 # avoid file stat ambiguity.
336 self._checkambig = checkambig
336 self._checkambig = checkambig
337 self._mmaplargeindex = mmaplargeindex
337 self._mmaplargeindex = mmaplargeindex
338 self._censorable = censorable
338 self._censorable = censorable
339 # 3-tuple of (node, rev, text) for a raw revision.
339 # 3-tuple of (node, rev, text) for a raw revision.
340 self._revisioncache = None
340 self._revisioncache = None
341 # Maps rev to chain base rev.
341 # Maps rev to chain base rev.
342 self._chainbasecache = util.lrucachedict(100)
342 self._chainbasecache = util.lrucachedict(100)
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 self._chunkcache = (0, b'')
344 self._chunkcache = (0, b'')
345 # How much data to read and cache into the raw revlog data cache.
345 # How much data to read and cache into the raw revlog data cache.
346 self._chunkcachesize = 65536
346 self._chunkcachesize = 65536
347 self._maxchainlen = None
347 self._maxchainlen = None
348 self._deltabothparents = True
348 self._deltabothparents = True
349 self.index = None
349 self.index = None
350 self._docket = None
350 self._docket = None
351 self._nodemap_docket = None
351 self._nodemap_docket = None
352 # Mapping of partial identifiers to full nodes.
352 # Mapping of partial identifiers to full nodes.
353 self._pcache = {}
353 self._pcache = {}
354 # Mapping of revision integer to full node.
354 # Mapping of revision integer to full node.
355 self._compengine = b'zlib'
355 self._compengine = b'zlib'
356 self._compengineopts = {}
356 self._compengineopts = {}
357 self._maxdeltachainspan = -1
357 self._maxdeltachainspan = -1
358 self._withsparseread = False
358 self._withsparseread = False
359 self._sparserevlog = False
359 self._sparserevlog = False
360 self.hassidedata = False
360 self.hassidedata = False
361 self._srdensitythreshold = 0.50
361 self._srdensitythreshold = 0.50
362 self._srmingapsize = 262144
362 self._srmingapsize = 262144
363
363
364 # Make copy of flag processors so each revlog instance can support
364 # Make copy of flag processors so each revlog instance can support
365 # custom flags.
365 # custom flags.
366 self._flagprocessors = dict(flagutil.flagprocessors)
366 self._flagprocessors = dict(flagutil.flagprocessors)
367
367
368 # 3-tuple of file handles being used for active writing.
368 # 3-tuple of file handles being used for active writing.
369 self._writinghandles = None
369 self._writinghandles = None
370 # prevent nesting of addgroup
370 # prevent nesting of addgroup
371 self._adding_group = None
371 self._adding_group = None
372
372
373 self._loadindex()
373 self._loadindex()
374
374
375 self._concurrencychecker = concurrencychecker
375 self._concurrencychecker = concurrencychecker
376
376
377 def _init_opts(self):
377 def _init_opts(self):
378 """process options (from above/config) to setup associated default revlog mode
378 """process options (from above/config) to setup associated default revlog mode
379
379
380 These values might be affected when actually reading on disk information.
380 These values might be affected when actually reading on disk information.
381
381
382 The relevant values are returned for use in _loadindex().
382 The relevant values are returned for use in _loadindex().
383
383
384 * newversionflags:
384 * newversionflags:
385 version header to use if we need to create a new revlog
385 version header to use if we need to create a new revlog
386
386
387 * mmapindexthreshold:
387 * mmapindexthreshold:
388 minimal index size for start to use mmap
388 minimal index size for start to use mmap
389
389
390 * force_nodemap:
390 * force_nodemap:
391 force the usage of a "development" version of the nodemap code
391 force the usage of a "development" version of the nodemap code
392 """
392 """
393 mmapindexthreshold = None
393 mmapindexthreshold = None
394 opts = self.opener.options
394 opts = self.opener.options
395
395
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 new_header = CHANGELOGV2
397 new_header = CHANGELOGV2
398 elif b'revlogv2' in opts:
398 elif b'revlogv2' in opts:
399 new_header = REVLOGV2
399 new_header = REVLOGV2
400 elif b'revlogv1' in opts:
400 elif b'revlogv1' in opts:
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 if b'generaldelta' in opts:
402 if b'generaldelta' in opts:
403 new_header |= FLAG_GENERALDELTA
403 new_header |= FLAG_GENERALDELTA
404 elif b'revlogv0' in self.opener.options:
404 elif b'revlogv0' in self.opener.options:
405 new_header = REVLOGV0
405 new_header = REVLOGV0
406 else:
406 else:
407 new_header = REVLOG_DEFAULT_VERSION
407 new_header = REVLOG_DEFAULT_VERSION
408
408
409 if b'chunkcachesize' in opts:
409 if b'chunkcachesize' in opts:
410 self._chunkcachesize = opts[b'chunkcachesize']
410 self._chunkcachesize = opts[b'chunkcachesize']
411 if b'maxchainlen' in opts:
411 if b'maxchainlen' in opts:
412 self._maxchainlen = opts[b'maxchainlen']
412 self._maxchainlen = opts[b'maxchainlen']
413 if b'deltabothparents' in opts:
413 if b'deltabothparents' in opts:
414 self._deltabothparents = opts[b'deltabothparents']
414 self._deltabothparents = opts[b'deltabothparents']
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 self._lazydeltabase = False
416 self._lazydeltabase = False
417 if self._lazydelta:
417 if self._lazydelta:
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 if b'compengine' in opts:
419 if b'compengine' in opts:
420 self._compengine = opts[b'compengine']
420 self._compengine = opts[b'compengine']
421 if b'zlib.level' in opts:
421 if b'zlib.level' in opts:
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 if b'zstd.level' in opts:
423 if b'zstd.level' in opts:
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 if b'maxdeltachainspan' in opts:
425 if b'maxdeltachainspan' in opts:
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 mmapindexthreshold = opts[b'mmapindexthreshold']
428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return new_header, mmapindexthreshold, force_nodemap
457 return new_header, mmapindexthreshold, force_nodemap
458
458
459 def _get_data(self, filepath, mmap_threshold, size=None):
459 def _get_data(self, filepath, mmap_threshold, size=None):
460 """return a file content with or without mmap
460 """return a file content with or without mmap
461
461
462 If the file is missing return the empty string"""
462 If the file is missing return the empty string"""
463 try:
463 try:
464 with self.opener(filepath) as fp:
464 with self.opener(filepath) as fp:
465 if mmap_threshold is not None:
465 if mmap_threshold is not None:
466 file_size = self.opener.fstat(fp).st_size
466 file_size = self.opener.fstat(fp).st_size
467 if file_size >= mmap_threshold:
467 if file_size >= mmap_threshold:
468 if size is not None:
468 if size is not None:
469 # avoid potentiel mmap crash
469 # avoid potentiel mmap crash
470 size = min(file_size, size)
470 size = min(file_size, size)
471 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
472 # relying on Python GC
472 # relying on Python GC
473 if size is None:
473 if size is None:
474 return util.buffer(util.mmapread(fp))
474 return util.buffer(util.mmapread(fp))
475 else:
475 else:
476 return util.buffer(util.mmapread(fp, size))
476 return util.buffer(util.mmapread(fp, size))
477 if size is None:
477 if size is None:
478 return fp.read()
478 return fp.read()
479 else:
479 else:
480 return fp.read(size)
480 return fp.read(size)
481 except IOError as inst:
481 except IOError as inst:
482 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
483 raise
483 raise
484 return b''
484 return b''
485
485
486 def _loadindex(self, docket=None):
486 def _loadindex(self, docket=None):
487
487
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489
489
490 if self.postfix is not None:
490 if self.postfix is not None:
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 entry_point = b'%s.i.a' % self.radix
493 entry_point = b'%s.i.a' % self.radix
494 else:
494 else:
495 entry_point = b'%s.i' % self.radix
495 entry_point = b'%s.i' % self.radix
496
496
497 if docket is not None:
497 if docket is not None:
498 self._docket = docket
498 self._docket = docket
499 self._docket_file = entry_point
499 self._docket_file = entry_point
500 else:
500 else:
501 entry_data = b''
501 entry_data = b''
502 self._initempty = True
502 self._initempty = True
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 if len(entry_data) > 0:
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
506 self._initempty = False
507 else:
507 else:
508 header = new_header
508 header = new_header
509
509
510 self._format_flags = header & ~0xFFFF
510 self._format_flags = header & ~0xFFFF
511 self._format_version = header & 0xFFFF
511 self._format_version = header & 0xFFFF
512
512
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 if supported_flags is None:
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
522 raise error.RevlogError(msg)
523
523
524 features = FEATURES_BY_VERSION[self._format_version]
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
527 self.hassidedata = features[b'sidedata']
528
528
529 if not features[b'docket']:
529 if not features[b'docket']:
530 self._indexfile = entry_point
530 self._indexfile = entry_point
531 index_data = entry_data
531 index_data = entry_data
532 else:
532 else:
533 self._docket_file = entry_point
533 self._docket_file = entry_point
534 if self._initempty:
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
535 self._docket = docketutil.default_docket(self, header)
536 else:
536 else:
537 self._docket = docketutil.parse_docket(
537 self._docket = docketutil.parse_docket(
538 self, entry_data, use_pending=self._trypending
538 self, entry_data, use_pending=self._trypending
539 )
539 )
540
540
541 if self._docket is not None:
541 if self._docket is not None:
542 self._indexfile = self._docket.index_filepath()
542 self._indexfile = self._docket.index_filepath()
543 index_data = b''
543 index_data = b''
544 index_size = self._docket.index_end
544 index_size = self._docket.index_end
545 if index_size > 0:
545 if index_size > 0:
546 index_data = self._get_data(
546 index_data = self._get_data(
547 self._indexfile, mmapindexthreshold, size=index_size
547 self._indexfile, mmapindexthreshold, size=index_size
548 )
548 )
549 if len(index_data) < index_size:
549 if len(index_data) < index_size:
550 msg = _(b'too few index data for %s: got %d, expected %d')
550 msg = _(b'too few index data for %s: got %d, expected %d')
551 msg %= (self.display_id, len(index_data), index_size)
551 msg %= (self.display_id, len(index_data), index_size)
552 raise error.RevlogError(msg)
552 raise error.RevlogError(msg)
553
553
554 self._inline = False
554 self._inline = False
555 # generaldelta implied by version 2 revlogs.
555 # generaldelta implied by version 2 revlogs.
556 self._generaldelta = True
556 self._generaldelta = True
557 # the logic for persistent nodemap will be dealt with within the
557 # the logic for persistent nodemap will be dealt with within the
558 # main docket, so disable it for now.
558 # main docket, so disable it for now.
559 self._nodemap_file = None
559 self._nodemap_file = None
560
560
561 if self._docket is not None:
561 if self._docket is not None:
562 self._datafile = self._docket.data_filepath()
562 self._datafile = self._docket.data_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
564 elif self.postfix is None:
564 elif self.postfix is None:
565 self._datafile = b'%s.d' % self.radix
565 self._datafile = b'%s.d' % self.radix
566 else:
566 else:
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568
568
569 self.nodeconstants = sha1nodeconstants
569 self.nodeconstants = sha1nodeconstants
570 self.nullid = self.nodeconstants.nullid
570 self.nullid = self.nodeconstants.nullid
571
571
572 # sparse-revlog can't be on without general-delta (issue6056)
572 # sparse-revlog can't be on without general-delta (issue6056)
573 if not self._generaldelta:
573 if not self._generaldelta:
574 self._sparserevlog = False
574 self._sparserevlog = False
575
575
576 self._storedeltachains = True
576 self._storedeltachains = True
577
577
578 devel_nodemap = (
578 devel_nodemap = (
579 self._nodemap_file
579 self._nodemap_file
580 and force_nodemap
580 and force_nodemap
581 and parse_index_v1_nodemap is not None
581 and parse_index_v1_nodemap is not None
582 )
582 )
583
583
584 use_rust_index = False
584 use_rust_index = False
585 if rustrevlog is not None:
585 if rustrevlog is not None:
586 if self._nodemap_file is not None:
586 if self._nodemap_file is not None:
587 use_rust_index = True
587 use_rust_index = True
588 else:
588 else:
589 use_rust_index = self.opener.options.get(b'rust.index')
589 use_rust_index = self.opener.options.get(b'rust.index')
590
590
591 self._parse_index = parse_index_v1
591 self._parse_index = parse_index_v1
592 if self._format_version == REVLOGV0:
592 if self._format_version == REVLOGV0:
593 self._parse_index = revlogv0.parse_index_v0
593 self._parse_index = revlogv0.parse_index_v0
594 elif self._format_version == REVLOGV2:
594 elif self._format_version == REVLOGV2:
595 self._parse_index = parse_index_v2
595 self._parse_index = parse_index_v2
596 elif self._format_version == CHANGELOGV2:
596 elif self._format_version == CHANGELOGV2:
597 self._parse_index = parse_index_cl_v2
597 self._parse_index = parse_index_cl_v2
598 elif devel_nodemap:
598 elif devel_nodemap:
599 self._parse_index = parse_index_v1_nodemap
599 self._parse_index = parse_index_v1_nodemap
600 elif use_rust_index:
600 elif use_rust_index:
601 self._parse_index = parse_index_v1_mixed
601 self._parse_index = parse_index_v1_mixed
602 try:
602 try:
603 d = self._parse_index(index_data, self._inline)
603 d = self._parse_index(index_data, self._inline)
604 index, chunkcache = d
604 index, chunkcache = d
605 use_nodemap = (
605 use_nodemap = (
606 not self._inline
606 not self._inline
607 and self._nodemap_file is not None
607 and self._nodemap_file is not None
608 and util.safehasattr(index, 'update_nodemap_data')
608 and util.safehasattr(index, 'update_nodemap_data')
609 )
609 )
610 if use_nodemap:
610 if use_nodemap:
611 nodemap_data = nodemaputil.persisted_data(self)
611 nodemap_data = nodemaputil.persisted_data(self)
612 if nodemap_data is not None:
612 if nodemap_data is not None:
613 docket = nodemap_data[0]
613 docket = nodemap_data[0]
614 if (
614 if (
615 len(d[0]) > docket.tip_rev
615 len(d[0]) > docket.tip_rev
616 and d[0][docket.tip_rev][7] == docket.tip_node
616 and d[0][docket.tip_rev][7] == docket.tip_node
617 ):
617 ):
618 # no changelog tampering
618 # no changelog tampering
619 self._nodemap_docket = docket
619 self._nodemap_docket = docket
620 index.update_nodemap_data(*nodemap_data)
620 index.update_nodemap_data(*nodemap_data)
621 except (ValueError, IndexError):
621 except (ValueError, IndexError):
622 raise error.RevlogError(
622 raise error.RevlogError(
623 _(b"index %s is corrupted") % self.display_id
623 _(b"index %s is corrupted") % self.display_id
624 )
624 )
625 self.index = index
625 self.index = index
626 self._segmentfile = randomaccessfile.randomaccessfile(
626 self._segmentfile = randomaccessfile.randomaccessfile(
627 self.opener,
627 self.opener,
628 (self._indexfile if self._inline else self._datafile),
628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize,
629 self._chunkcachesize,
630 chunkcache,
630 chunkcache,
631 )
631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
633 self.opener,
634 self._sidedatafile,
634 self._sidedatafile,
635 self._chunkcachesize,
635 self._chunkcachesize,
636 )
636 )
637 # revnum -> (chain-length, sum-delta-length)
637 # revnum -> (chain-length, sum-delta-length)
638 self._chaininfocache = util.lrucachedict(500)
638 self._chaininfocache = util.lrucachedict(500)
639 # revlog header -> revlog compressor
639 # revlog header -> revlog compressor
640 self._decompressors = {}
640 self._decompressors = {}
641
641
642 @util.propertycache
642 @util.propertycache
643 def revlog_kind(self):
643 def revlog_kind(self):
644 return self.target[0]
644 return self.target[0]
645
645
646 @util.propertycache
646 @util.propertycache
647 def display_id(self):
647 def display_id(self):
648 """The public facing "ID" of the revlog that we use in message"""
648 """The public facing "ID" of the revlog that we use in message"""
649 # Maybe we should build a user facing representation of
649 # Maybe we should build a user facing representation of
650 # revlog.target instead of using `self.radix`
650 # revlog.target instead of using `self.radix`
651 return self.radix
651 return self.radix
652
652
653 def _get_decompressor(self, t):
653 def _get_decompressor(self, t):
654 try:
654 try:
655 compressor = self._decompressors[t]
655 compressor = self._decompressors[t]
656 except KeyError:
656 except KeyError:
657 try:
657 try:
658 engine = util.compengines.forrevlogheader(t)
658 engine = util.compengines.forrevlogheader(t)
659 compressor = engine.revlogcompressor(self._compengineopts)
659 compressor = engine.revlogcompressor(self._compengineopts)
660 self._decompressors[t] = compressor
660 self._decompressors[t] = compressor
661 except KeyError:
661 except KeyError:
662 raise error.RevlogError(
662 raise error.RevlogError(
663 _(b'unknown compression type %s') % binascii.hexlify(t)
663 _(b'unknown compression type %s') % binascii.hexlify(t)
664 )
664 )
665 return compressor
665 return compressor
666
666
667 @util.propertycache
667 @util.propertycache
668 def _compressor(self):
668 def _compressor(self):
669 engine = util.compengines[self._compengine]
669 engine = util.compengines[self._compengine]
670 return engine.revlogcompressor(self._compengineopts)
670 return engine.revlogcompressor(self._compengineopts)
671
671
672 @util.propertycache
672 @util.propertycache
673 def _decompressor(self):
673 def _decompressor(self):
674 """the default decompressor"""
674 """the default decompressor"""
675 if self._docket is None:
675 if self._docket is None:
676 return None
676 return None
677 t = self._docket.default_compression_header
677 t = self._docket.default_compression_header
678 c = self._get_decompressor(t)
678 c = self._get_decompressor(t)
679 return c.decompress
679 return c.decompress
680
680
681 def _indexfp(self):
681 def _indexfp(self):
682 """file object for the revlog's index file"""
682 """file object for the revlog's index file"""
683 return self.opener(self._indexfile, mode=b"r")
683 return self.opener(self._indexfile, mode=b"r")
684
684
685 def __index_write_fp(self):
685 def __index_write_fp(self):
686 # You should not use this directly and use `_writing` instead
686 # You should not use this directly and use `_writing` instead
687 try:
687 try:
688 f = self.opener(
688 f = self.opener(
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 )
690 )
691 if self._docket is None:
691 if self._docket is None:
692 f.seek(0, os.SEEK_END)
692 f.seek(0, os.SEEK_END)
693 else:
693 else:
694 f.seek(self._docket.index_end, os.SEEK_SET)
694 f.seek(self._docket.index_end, os.SEEK_SET)
695 return f
695 return f
696 except IOError as inst:
696 except IOError as inst:
697 if inst.errno != errno.ENOENT:
697 if inst.errno != errno.ENOENT:
698 raise
698 raise
699 return self.opener(
699 return self.opener(
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 )
701 )
702
702
703 def __index_new_fp(self):
703 def __index_new_fp(self):
704 # You should not use this unless you are upgrading from inline revlog
704 # You should not use this unless you are upgrading from inline revlog
705 return self.opener(
705 return self.opener(
706 self._indexfile,
706 self._indexfile,
707 mode=b"w",
707 mode=b"w",
708 checkambig=self._checkambig,
708 checkambig=self._checkambig,
709 atomictemp=True,
709 atomictemp=True,
710 )
710 )
711
711
712 def _datafp(self, mode=b'r'):
712 def _datafp(self, mode=b'r'):
713 """file object for the revlog's data file"""
713 """file object for the revlog's data file"""
714 return self.opener(self._datafile, mode=mode)
714 return self.opener(self._datafile, mode=mode)
715
715
716 @contextlib.contextmanager
716 @contextlib.contextmanager
717 def _sidedatareadfp(self):
717 def _sidedatareadfp(self):
718 """file object suitable to read sidedata"""
718 """file object suitable to read sidedata"""
719 if self._writinghandles:
719 if self._writinghandles:
720 yield self._writinghandles[2]
720 yield self._writinghandles[2]
721 else:
721 else:
722 with self.opener(self._sidedatafile) as fp:
722 with self.opener(self._sidedatafile) as fp:
723 yield fp
723 yield fp
724
724
725 def tiprev(self):
725 def tiprev(self):
726 return len(self.index) - 1
726 return len(self.index) - 1
727
727
728 def tip(self):
728 def tip(self):
729 return self.node(self.tiprev())
729 return self.node(self.tiprev())
730
730
731 def __contains__(self, rev):
731 def __contains__(self, rev):
732 return 0 <= rev < len(self)
732 return 0 <= rev < len(self)
733
733
734 def __len__(self):
734 def __len__(self):
735 return len(self.index)
735 return len(self.index)
736
736
737 def __iter__(self):
737 def __iter__(self):
738 return iter(pycompat.xrange(len(self)))
738 return iter(pycompat.xrange(len(self)))
739
739
740 def revs(self, start=0, stop=None):
740 def revs(self, start=0, stop=None):
741 """iterate over all rev in this revlog (from start to stop)"""
741 """iterate over all rev in this revlog (from start to stop)"""
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
743
743
744 def hasnode(self, node):
744 def hasnode(self, node):
745 try:
745 try:
746 self.rev(node)
746 self.rev(node)
747 return True
747 return True
748 except KeyError:
748 except KeyError:
749 return False
749 return False
750
750
751 def candelta(self, baserev, rev):
751 def candelta(self, baserev, rev):
752 """whether two revisions (baserev, rev) can be delta-ed or not"""
752 """whether two revisions (baserev, rev) can be delta-ed or not"""
753 # Disable delta if either rev requires a content-changing flag
753 # Disable delta if either rev requires a content-changing flag
754 # processor (ex. LFS). This is because such flag processor can alter
754 # processor (ex. LFS). This is because such flag processor can alter
755 # the rawtext content that the delta will be based on, and two clients
755 # the rawtext content that the delta will be based on, and two clients
756 # could have a same revlog node with different flags (i.e. different
756 # could have a same revlog node with different flags (i.e. different
757 # rawtext contents) and the delta could be incompatible.
757 # rawtext contents) and the delta could be incompatible.
758 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
758 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
759 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
759 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
760 ):
760 ):
761 return False
761 return False
762 return True
762 return True
763
763
764 def update_caches(self, transaction):
764 def update_caches(self, transaction):
765 if self._nodemap_file is not None:
765 if self._nodemap_file is not None:
766 if transaction is None:
766 if transaction is None:
767 nodemaputil.update_persistent_nodemap(self)
767 nodemaputil.update_persistent_nodemap(self)
768 else:
768 else:
769 nodemaputil.setup_persistent_nodemap(transaction, self)
769 nodemaputil.setup_persistent_nodemap(transaction, self)
770
770
771 def clearcaches(self):
771 def clearcaches(self):
772 self._revisioncache = None
772 self._revisioncache = None
773 self._chainbasecache.clear()
773 self._chainbasecache.clear()
774 self._segmentfile.clear_cache()
774 self._segmentfile.clear_cache()
775 self._segmentfile_sidedata.clear_cache()
775 self._segmentfile_sidedata.clear_cache()
776 self._pcache = {}
776 self._pcache = {}
777 self._nodemap_docket = None
777 self._nodemap_docket = None
778 self.index.clearcaches()
778 self.index.clearcaches()
779 # The python code is the one responsible for validating the docket, we
779 # The python code is the one responsible for validating the docket, we
780 # end up having to refresh it here.
780 # end up having to refresh it here.
781 use_nodemap = (
781 use_nodemap = (
782 not self._inline
782 not self._inline
783 and self._nodemap_file is not None
783 and self._nodemap_file is not None
784 and util.safehasattr(self.index, 'update_nodemap_data')
784 and util.safehasattr(self.index, 'update_nodemap_data')
785 )
785 )
786 if use_nodemap:
786 if use_nodemap:
787 nodemap_data = nodemaputil.persisted_data(self)
787 nodemap_data = nodemaputil.persisted_data(self)
788 if nodemap_data is not None:
788 if nodemap_data is not None:
789 self._nodemap_docket = nodemap_data[0]
789 self._nodemap_docket = nodemap_data[0]
790 self.index.update_nodemap_data(*nodemap_data)
790 self.index.update_nodemap_data(*nodemap_data)
791
791
792 def rev(self, node):
792 def rev(self, node):
793 try:
793 try:
794 return self.index.rev(node)
794 return self.index.rev(node)
795 except TypeError:
795 except TypeError:
796 raise
796 raise
797 except error.RevlogError:
797 except error.RevlogError:
798 # parsers.c radix tree lookup failed
798 # parsers.c radix tree lookup failed
799 if (
799 if (
800 node == self.nodeconstants.wdirid
800 node == self.nodeconstants.wdirid
801 or node in self.nodeconstants.wdirfilenodeids
801 or node in self.nodeconstants.wdirfilenodeids
802 ):
802 ):
803 raise error.WdirUnsupported
803 raise error.WdirUnsupported
804 raise error.LookupError(node, self.display_id, _(b'no node'))
804 raise error.LookupError(node, self.display_id, _(b'no node'))
805
805
806 # Accessors for index entries.
806 # Accessors for index entries.
807
807
808 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
808 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
809 # are flags.
809 # are flags.
810 def start(self, rev):
810 def start(self, rev):
811 return int(self.index[rev][0] >> 16)
811 return int(self.index[rev][0] >> 16)
812
812
813 def sidedata_cut_off(self, rev):
813 def sidedata_cut_off(self, rev):
814 sd_cut_off = self.index[rev][8]
814 sd_cut_off = self.index[rev][8]
815 if sd_cut_off != 0:
815 if sd_cut_off != 0:
816 return sd_cut_off
816 return sd_cut_off
817 # This is some annoying dance, because entries without sidedata
817 # This is some annoying dance, because entries without sidedata
818 # currently use 0 as their ofsset. (instead of previous-offset +
818 # currently use 0 as their ofsset. (instead of previous-offset +
819 # previous-size)
819 # previous-size)
820 #
820 #
821 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
821 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
822 # In the meantime, we need this.
822 # In the meantime, we need this.
823 while 0 <= rev:
823 while 0 <= rev:
824 e = self.index[rev]
824 e = self.index[rev]
825 if e[9] != 0:
825 if e[9] != 0:
826 return e[8] + e[9]
826 return e[8] + e[9]
827 rev -= 1
827 rev -= 1
828 return 0
828 return 0
829
829
830 def flags(self, rev):
830 def flags(self, rev):
831 return self.index[rev][0] & 0xFFFF
831 return self.index[rev][0] & 0xFFFF
832
832
833 def length(self, rev):
833 def length(self, rev):
834 return self.index[rev][1]
834 return self.index[rev][1]
835
835
836 def sidedata_length(self, rev):
836 def sidedata_length(self, rev):
837 if not self.hassidedata:
837 if not self.hassidedata:
838 return 0
838 return 0
839 return self.index[rev][9]
839 return self.index[rev][9]
840
840
841 def rawsize(self, rev):
841 def rawsize(self, rev):
842 """return the length of the uncompressed text for a given revision"""
842 """return the length of the uncompressed text for a given revision"""
843 l = self.index[rev][2]
843 l = self.index[rev][2]
844 if l >= 0:
844 if l >= 0:
845 return l
845 return l
846
846
847 t = self.rawdata(rev)
847 t = self.rawdata(rev)
848 return len(t)
848 return len(t)
849
849
850 def size(self, rev):
850 def size(self, rev):
851 """length of non-raw text (processed by a "read" flag processor)"""
851 """length of non-raw text (processed by a "read" flag processor)"""
852 # fast path: if no "read" flag processor could change the content,
852 # fast path: if no "read" flag processor could change the content,
853 # size is rawsize. note: ELLIPSIS is known to not change the content.
853 # size is rawsize. note: ELLIPSIS is known to not change the content.
854 flags = self.flags(rev)
854 flags = self.flags(rev)
855 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
855 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
856 return self.rawsize(rev)
856 return self.rawsize(rev)
857
857
858 return len(self.revision(rev))
858 return len(self.revision(rev))
859
859
860 def fast_rank(self, rev):
860 def fast_rank(self, rev):
861 """Return the rank of a revision if already known, or None otherwise.
861 """Return the rank of a revision if already known, or None otherwise.
862
862
863 The rank of a revision is the size of the sub-graph it defines as a
863 The rank of a revision is the size of the sub-graph it defines as a
864 head. Equivalently, the rank of a revision `r` is the size of the set
864 head. Equivalently, the rank of a revision `r` is the size of the set
865 `ancestors(r)`, `r` included.
865 `ancestors(r)`, `r` included.
866
866
867 This method returns the rank retrieved from the revlog in constant
867 This method returns the rank retrieved from the revlog in constant
868 time. It makes no attempt at computing unknown values for versions of
868 time. It makes no attempt at computing unknown values for versions of
869 the revlog which do not persist the rank.
869 the revlog which do not persist the rank.
870 """
870 """
871 rank = self.index[rev][ENTRY_RANK]
871 rank = self.index[rev][ENTRY_RANK]
872 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
872 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
873 return None
873 return None
874 if rev == nullrev:
874 if rev == nullrev:
875 return 0 # convention
875 return 0 # convention
876 return rank
876 return rank
877
877
878 def chainbase(self, rev):
878 def chainbase(self, rev):
879 base = self._chainbasecache.get(rev)
879 base = self._chainbasecache.get(rev)
880 if base is not None:
880 if base is not None:
881 return base
881 return base
882
882
883 index = self.index
883 index = self.index
884 iterrev = rev
884 iterrev = rev
885 base = index[iterrev][3]
885 base = index[iterrev][3]
886 while base != iterrev:
886 while base != iterrev:
887 iterrev = base
887 iterrev = base
888 base = index[iterrev][3]
888 base = index[iterrev][3]
889
889
890 self._chainbasecache[rev] = base
890 self._chainbasecache[rev] = base
891 return base
891 return base
892
892
893 def linkrev(self, rev):
893 def linkrev(self, rev):
894 return self.index[rev][4]
894 return self.index[rev][4]
895
895
896 def parentrevs(self, rev):
896 def parentrevs(self, rev):
897 try:
897 try:
898 entry = self.index[rev]
898 entry = self.index[rev]
899 except IndexError:
899 except IndexError:
900 if rev == wdirrev:
900 if rev == wdirrev:
901 raise error.WdirUnsupported
901 raise error.WdirUnsupported
902 raise
902 raise
903
903
904 return entry[5], entry[6]
904 return entry[5], entry[6]
905
905
906 # fast parentrevs(rev) where rev isn't filtered
906 # fast parentrevs(rev) where rev isn't filtered
907 _uncheckedparentrevs = parentrevs
907 _uncheckedparentrevs = parentrevs
908
908
909 def node(self, rev):
909 def node(self, rev):
910 try:
910 try:
911 return self.index[rev][7]
911 return self.index[rev][7]
912 except IndexError:
912 except IndexError:
913 if rev == wdirrev:
913 if rev == wdirrev:
914 raise error.WdirUnsupported
914 raise error.WdirUnsupported
915 raise
915 raise
916
916
917 # Derived from index values.
917 # Derived from index values.
918
918
919 def end(self, rev):
919 def end(self, rev):
920 return self.start(rev) + self.length(rev)
920 return self.start(rev) + self.length(rev)
921
921
922 def parents(self, node):
922 def parents(self, node):
923 i = self.index
923 i = self.index
924 d = i[self.rev(node)]
924 d = i[self.rev(node)]
925 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
925 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
926
926
927 def chainlen(self, rev):
927 def chainlen(self, rev):
928 return self._chaininfo(rev)[0]
928 return self._chaininfo(rev)[0]
929
929
930 def _chaininfo(self, rev):
930 def _chaininfo(self, rev):
931 chaininfocache = self._chaininfocache
931 chaininfocache = self._chaininfocache
932 if rev in chaininfocache:
932 if rev in chaininfocache:
933 return chaininfocache[rev]
933 return chaininfocache[rev]
934 index = self.index
934 index = self.index
935 generaldelta = self._generaldelta
935 generaldelta = self._generaldelta
936 iterrev = rev
936 iterrev = rev
937 e = index[iterrev]
937 e = index[iterrev]
938 clen = 0
938 clen = 0
939 compresseddeltalen = 0
939 compresseddeltalen = 0
940 while iterrev != e[3]:
940 while iterrev != e[3]:
941 clen += 1
941 clen += 1
942 compresseddeltalen += e[1]
942 compresseddeltalen += e[1]
943 if generaldelta:
943 if generaldelta:
944 iterrev = e[3]
944 iterrev = e[3]
945 else:
945 else:
946 iterrev -= 1
946 iterrev -= 1
947 if iterrev in chaininfocache:
947 if iterrev in chaininfocache:
948 t = chaininfocache[iterrev]
948 t = chaininfocache[iterrev]
949 clen += t[0]
949 clen += t[0]
950 compresseddeltalen += t[1]
950 compresseddeltalen += t[1]
951 break
951 break
952 e = index[iterrev]
952 e = index[iterrev]
953 else:
953 else:
954 # Add text length of base since decompressing that also takes
954 # Add text length of base since decompressing that also takes
955 # work. For cache hits the length is already included.
955 # work. For cache hits the length is already included.
956 compresseddeltalen += e[1]
956 compresseddeltalen += e[1]
957 r = (clen, compresseddeltalen)
957 r = (clen, compresseddeltalen)
958 chaininfocache[rev] = r
958 chaininfocache[rev] = r
959 return r
959 return r
960
960
961 def _deltachain(self, rev, stoprev=None):
961 def _deltachain(self, rev, stoprev=None):
962 """Obtain the delta chain for a revision.
962 """Obtain the delta chain for a revision.
963
963
964 ``stoprev`` specifies a revision to stop at. If not specified, we
964 ``stoprev`` specifies a revision to stop at. If not specified, we
965 stop at the base of the chain.
965 stop at the base of the chain.
966
966
967 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
967 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
968 revs in ascending order and ``stopped`` is a bool indicating whether
968 revs in ascending order and ``stopped`` is a bool indicating whether
969 ``stoprev`` was hit.
969 ``stoprev`` was hit.
970 """
970 """
971 # Try C implementation.
971 # Try C implementation.
972 try:
972 try:
973 return self.index.deltachain(rev, stoprev, self._generaldelta)
973 return self.index.deltachain(rev, stoprev, self._generaldelta)
974 except AttributeError:
974 except AttributeError:
975 pass
975 pass
976
976
977 chain = []
977 chain = []
978
978
979 # Alias to prevent attribute lookup in tight loop.
979 # Alias to prevent attribute lookup in tight loop.
980 index = self.index
980 index = self.index
981 generaldelta = self._generaldelta
981 generaldelta = self._generaldelta
982
982
983 iterrev = rev
983 iterrev = rev
984 e = index[iterrev]
984 e = index[iterrev]
985 while iterrev != e[3] and iterrev != stoprev:
985 while iterrev != e[3] and iterrev != stoprev:
986 chain.append(iterrev)
986 chain.append(iterrev)
987 if generaldelta:
987 if generaldelta:
988 iterrev = e[3]
988 iterrev = e[3]
989 else:
989 else:
990 iterrev -= 1
990 iterrev -= 1
991 e = index[iterrev]
991 e = index[iterrev]
992
992
993 if iterrev == stoprev:
993 if iterrev == stoprev:
994 stopped = True
994 stopped = True
995 else:
995 else:
996 chain.append(iterrev)
996 chain.append(iterrev)
997 stopped = False
997 stopped = False
998
998
999 chain.reverse()
999 chain.reverse()
1000 return chain, stopped
1000 return chain, stopped
1001
1001
1002 def ancestors(self, revs, stoprev=0, inclusive=False):
1002 def ancestors(self, revs, stoprev=0, inclusive=False):
1003 """Generate the ancestors of 'revs' in reverse revision order.
1003 """Generate the ancestors of 'revs' in reverse revision order.
1004 Does not generate revs lower than stoprev.
1004 Does not generate revs lower than stoprev.
1005
1005
1006 See the documentation for ancestor.lazyancestors for more details."""
1006 See the documentation for ancestor.lazyancestors for more details."""
1007
1007
1008 # first, make sure start revisions aren't filtered
1008 # first, make sure start revisions aren't filtered
1009 revs = list(revs)
1009 revs = list(revs)
1010 checkrev = self.node
1010 checkrev = self.node
1011 for r in revs:
1011 for r in revs:
1012 checkrev(r)
1012 checkrev(r)
1013 # and we're sure ancestors aren't filtered as well
1013 # and we're sure ancestors aren't filtered as well
1014
1014
1015 if rustancestor is not None and self.index.rust_ext_compat:
1015 if rustancestor is not None and self.index.rust_ext_compat:
1016 lazyancestors = rustancestor.LazyAncestors
1016 lazyancestors = rustancestor.LazyAncestors
1017 arg = self.index
1017 arg = self.index
1018 else:
1018 else:
1019 lazyancestors = ancestor.lazyancestors
1019 lazyancestors = ancestor.lazyancestors
1020 arg = self._uncheckedparentrevs
1020 arg = self._uncheckedparentrevs
1021 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1021 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1022
1022
1023 def descendants(self, revs):
1023 def descendants(self, revs):
1024 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1024 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1025
1025
1026 def findcommonmissing(self, common=None, heads=None):
1026 def findcommonmissing(self, common=None, heads=None):
1027 """Return a tuple of the ancestors of common and the ancestors of heads
1027 """Return a tuple of the ancestors of common and the ancestors of heads
1028 that are not ancestors of common. In revset terminology, we return the
1028 that are not ancestors of common. In revset terminology, we return the
1029 tuple:
1029 tuple:
1030
1030
1031 ::common, (::heads) - (::common)
1031 ::common, (::heads) - (::common)
1032
1032
1033 The list is sorted by revision number, meaning it is
1033 The list is sorted by revision number, meaning it is
1034 topologically sorted.
1034 topologically sorted.
1035
1035
1036 'heads' and 'common' are both lists of node IDs. If heads is
1036 'heads' and 'common' are both lists of node IDs. If heads is
1037 not supplied, uses all of the revlog's heads. If common is not
1037 not supplied, uses all of the revlog's heads. If common is not
1038 supplied, uses nullid."""
1038 supplied, uses nullid."""
1039 if common is None:
1039 if common is None:
1040 common = [self.nullid]
1040 common = [self.nullid]
1041 if heads is None:
1041 if heads is None:
1042 heads = self.heads()
1042 heads = self.heads()
1043
1043
1044 common = [self.rev(n) for n in common]
1044 common = [self.rev(n) for n in common]
1045 heads = [self.rev(n) for n in heads]
1045 heads = [self.rev(n) for n in heads]
1046
1046
1047 # we want the ancestors, but inclusive
1047 # we want the ancestors, but inclusive
1048 class lazyset(object):
1048 class lazyset(object):
1049 def __init__(self, lazyvalues):
1049 def __init__(self, lazyvalues):
1050 self.addedvalues = set()
1050 self.addedvalues = set()
1051 self.lazyvalues = lazyvalues
1051 self.lazyvalues = lazyvalues
1052
1052
1053 def __contains__(self, value):
1053 def __contains__(self, value):
1054 return value in self.addedvalues or value in self.lazyvalues
1054 return value in self.addedvalues or value in self.lazyvalues
1055
1055
1056 def __iter__(self):
1056 def __iter__(self):
1057 added = self.addedvalues
1057 added = self.addedvalues
1058 for r in added:
1058 for r in added:
1059 yield r
1059 yield r
1060 for r in self.lazyvalues:
1060 for r in self.lazyvalues:
1061 if not r in added:
1061 if not r in added:
1062 yield r
1062 yield r
1063
1063
1064 def add(self, value):
1064 def add(self, value):
1065 self.addedvalues.add(value)
1065 self.addedvalues.add(value)
1066
1066
1067 def update(self, values):
1067 def update(self, values):
1068 self.addedvalues.update(values)
1068 self.addedvalues.update(values)
1069
1069
1070 has = lazyset(self.ancestors(common))
1070 has = lazyset(self.ancestors(common))
1071 has.add(nullrev)
1071 has.add(nullrev)
1072 has.update(common)
1072 has.update(common)
1073
1073
1074 # take all ancestors from heads that aren't in has
1074 # take all ancestors from heads that aren't in has
1075 missing = set()
1075 missing = set()
1076 visit = collections.deque(r for r in heads if r not in has)
1076 visit = collections.deque(r for r in heads if r not in has)
1077 while visit:
1077 while visit:
1078 r = visit.popleft()
1078 r = visit.popleft()
1079 if r in missing:
1079 if r in missing:
1080 continue
1080 continue
1081 else:
1081 else:
1082 missing.add(r)
1082 missing.add(r)
1083 for p in self.parentrevs(r):
1083 for p in self.parentrevs(r):
1084 if p not in has:
1084 if p not in has:
1085 visit.append(p)
1085 visit.append(p)
1086 missing = list(missing)
1086 missing = list(missing)
1087 missing.sort()
1087 missing.sort()
1088 return has, [self.node(miss) for miss in missing]
1088 return has, [self.node(miss) for miss in missing]
1089
1089
1090 def incrementalmissingrevs(self, common=None):
1090 def incrementalmissingrevs(self, common=None):
1091 """Return an object that can be used to incrementally compute the
1091 """Return an object that can be used to incrementally compute the
1092 revision numbers of the ancestors of arbitrary sets that are not
1092 revision numbers of the ancestors of arbitrary sets that are not
1093 ancestors of common. This is an ancestor.incrementalmissingancestors
1093 ancestors of common. This is an ancestor.incrementalmissingancestors
1094 object.
1094 object.
1095
1095
1096 'common' is a list of revision numbers. If common is not supplied, uses
1096 'common' is a list of revision numbers. If common is not supplied, uses
1097 nullrev.
1097 nullrev.
1098 """
1098 """
1099 if common is None:
1099 if common is None:
1100 common = [nullrev]
1100 common = [nullrev]
1101
1101
1102 if rustancestor is not None and self.index.rust_ext_compat:
1102 if rustancestor is not None and self.index.rust_ext_compat:
1103 return rustancestor.MissingAncestors(self.index, common)
1103 return rustancestor.MissingAncestors(self.index, common)
1104 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1104 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1105
1105
1106 def findmissingrevs(self, common=None, heads=None):
1106 def findmissingrevs(self, common=None, heads=None):
1107 """Return the revision numbers of the ancestors of heads that
1107 """Return the revision numbers of the ancestors of heads that
1108 are not ancestors of common.
1108 are not ancestors of common.
1109
1109
1110 More specifically, return a list of revision numbers corresponding to
1110 More specifically, return a list of revision numbers corresponding to
1111 nodes N such that every N satisfies the following constraints:
1111 nodes N such that every N satisfies the following constraints:
1112
1112
1113 1. N is an ancestor of some node in 'heads'
1113 1. N is an ancestor of some node in 'heads'
1114 2. N is not an ancestor of any node in 'common'
1114 2. N is not an ancestor of any node in 'common'
1115
1115
1116 The list is sorted by revision number, meaning it is
1116 The list is sorted by revision number, meaning it is
1117 topologically sorted.
1117 topologically sorted.
1118
1118
1119 'heads' and 'common' are both lists of revision numbers. If heads is
1119 'heads' and 'common' are both lists of revision numbers. If heads is
1120 not supplied, uses all of the revlog's heads. If common is not
1120 not supplied, uses all of the revlog's heads. If common is not
1121 supplied, uses nullid."""
1121 supplied, uses nullid."""
1122 if common is None:
1122 if common is None:
1123 common = [nullrev]
1123 common = [nullrev]
1124 if heads is None:
1124 if heads is None:
1125 heads = self.headrevs()
1125 heads = self.headrevs()
1126
1126
1127 inc = self.incrementalmissingrevs(common=common)
1127 inc = self.incrementalmissingrevs(common=common)
1128 return inc.missingancestors(heads)
1128 return inc.missingancestors(heads)
1129
1129
1130 def findmissing(self, common=None, heads=None):
1130 def findmissing(self, common=None, heads=None):
1131 """Return the ancestors of heads that are not ancestors of common.
1131 """Return the ancestors of heads that are not ancestors of common.
1132
1132
1133 More specifically, return a list of nodes N such that every N
1133 More specifically, return a list of nodes N such that every N
1134 satisfies the following constraints:
1134 satisfies the following constraints:
1135
1135
1136 1. N is an ancestor of some node in 'heads'
1136 1. N is an ancestor of some node in 'heads'
1137 2. N is not an ancestor of any node in 'common'
1137 2. N is not an ancestor of any node in 'common'
1138
1138
1139 The list is sorted by revision number, meaning it is
1139 The list is sorted by revision number, meaning it is
1140 topologically sorted.
1140 topologically sorted.
1141
1141
1142 'heads' and 'common' are both lists of node IDs. If heads is
1142 'heads' and 'common' are both lists of node IDs. If heads is
1143 not supplied, uses all of the revlog's heads. If common is not
1143 not supplied, uses all of the revlog's heads. If common is not
1144 supplied, uses nullid."""
1144 supplied, uses nullid."""
1145 if common is None:
1145 if common is None:
1146 common = [self.nullid]
1146 common = [self.nullid]
1147 if heads is None:
1147 if heads is None:
1148 heads = self.heads()
1148 heads = self.heads()
1149
1149
1150 common = [self.rev(n) for n in common]
1150 common = [self.rev(n) for n in common]
1151 heads = [self.rev(n) for n in heads]
1151 heads = [self.rev(n) for n in heads]
1152
1152
1153 inc = self.incrementalmissingrevs(common=common)
1153 inc = self.incrementalmissingrevs(common=common)
1154 return [self.node(r) for r in inc.missingancestors(heads)]
1154 return [self.node(r) for r in inc.missingancestors(heads)]
1155
1155
1156 def nodesbetween(self, roots=None, heads=None):
1156 def nodesbetween(self, roots=None, heads=None):
1157 """Return a topological path from 'roots' to 'heads'.
1157 """Return a topological path from 'roots' to 'heads'.
1158
1158
1159 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1159 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1160 topologically sorted list of all nodes N that satisfy both of
1160 topologically sorted list of all nodes N that satisfy both of
1161 these constraints:
1161 these constraints:
1162
1162
1163 1. N is a descendant of some node in 'roots'
1163 1. N is a descendant of some node in 'roots'
1164 2. N is an ancestor of some node in 'heads'
1164 2. N is an ancestor of some node in 'heads'
1165
1165
1166 Every node is considered to be both a descendant and an ancestor
1166 Every node is considered to be both a descendant and an ancestor
1167 of itself, so every reachable node in 'roots' and 'heads' will be
1167 of itself, so every reachable node in 'roots' and 'heads' will be
1168 included in 'nodes'.
1168 included in 'nodes'.
1169
1169
1170 'outroots' is the list of reachable nodes in 'roots', i.e., the
1170 'outroots' is the list of reachable nodes in 'roots', i.e., the
1171 subset of 'roots' that is returned in 'nodes'. Likewise,
1171 subset of 'roots' that is returned in 'nodes'. Likewise,
1172 'outheads' is the subset of 'heads' that is also in 'nodes'.
1172 'outheads' is the subset of 'heads' that is also in 'nodes'.
1173
1173
1174 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1174 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1175 unspecified, uses nullid as the only root. If 'heads' is
1175 unspecified, uses nullid as the only root. If 'heads' is
1176 unspecified, uses list of all of the revlog's heads."""
1176 unspecified, uses list of all of the revlog's heads."""
1177 nonodes = ([], [], [])
1177 nonodes = ([], [], [])
1178 if roots is not None:
1178 if roots is not None:
1179 roots = list(roots)
1179 roots = list(roots)
1180 if not roots:
1180 if not roots:
1181 return nonodes
1181 return nonodes
1182 lowestrev = min([self.rev(n) for n in roots])
1182 lowestrev = min([self.rev(n) for n in roots])
1183 else:
1183 else:
1184 roots = [self.nullid] # Everybody's a descendant of nullid
1184 roots = [self.nullid] # Everybody's a descendant of nullid
1185 lowestrev = nullrev
1185 lowestrev = nullrev
1186 if (lowestrev == nullrev) and (heads is None):
1186 if (lowestrev == nullrev) and (heads is None):
1187 # We want _all_ the nodes!
1187 # We want _all_ the nodes!
1188 return (
1188 return (
1189 [self.node(r) for r in self],
1189 [self.node(r) for r in self],
1190 [self.nullid],
1190 [self.nullid],
1191 list(self.heads()),
1191 list(self.heads()),
1192 )
1192 )
1193 if heads is None:
1193 if heads is None:
1194 # All nodes are ancestors, so the latest ancestor is the last
1194 # All nodes are ancestors, so the latest ancestor is the last
1195 # node.
1195 # node.
1196 highestrev = len(self) - 1
1196 highestrev = len(self) - 1
1197 # Set ancestors to None to signal that every node is an ancestor.
1197 # Set ancestors to None to signal that every node is an ancestor.
1198 ancestors = None
1198 ancestors = None
1199 # Set heads to an empty dictionary for later discovery of heads
1199 # Set heads to an empty dictionary for later discovery of heads
1200 heads = {}
1200 heads = {}
1201 else:
1201 else:
1202 heads = list(heads)
1202 heads = list(heads)
1203 if not heads:
1203 if not heads:
1204 return nonodes
1204 return nonodes
1205 ancestors = set()
1205 ancestors = set()
1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1207 # Also, later we will be using it to filter out the heads we can't
1207 # Also, later we will be using it to filter out the heads we can't
1208 # find from roots.
1208 # find from roots.
1209 heads = dict.fromkeys(heads, False)
1209 heads = dict.fromkeys(heads, False)
1210 # Start at the top and keep marking parents until we're done.
1210 # Start at the top and keep marking parents until we're done.
1211 nodestotag = set(heads)
1211 nodestotag = set(heads)
1212 # Remember where the top was so we can use it as a limit later.
1212 # Remember where the top was so we can use it as a limit later.
1213 highestrev = max([self.rev(n) for n in nodestotag])
1213 highestrev = max([self.rev(n) for n in nodestotag])
1214 while nodestotag:
1214 while nodestotag:
1215 # grab a node to tag
1215 # grab a node to tag
1216 n = nodestotag.pop()
1216 n = nodestotag.pop()
1217 # Never tag nullid
1217 # Never tag nullid
1218 if n == self.nullid:
1218 if n == self.nullid:
1219 continue
1219 continue
1220 # A node's revision number represents its place in a
1220 # A node's revision number represents its place in a
1221 # topologically sorted list of nodes.
1221 # topologically sorted list of nodes.
1222 r = self.rev(n)
1222 r = self.rev(n)
1223 if r >= lowestrev:
1223 if r >= lowestrev:
1224 if n not in ancestors:
1224 if n not in ancestors:
1225 # If we are possibly a descendant of one of the roots
1225 # If we are possibly a descendant of one of the roots
1226 # and we haven't already been marked as an ancestor
1226 # and we haven't already been marked as an ancestor
1227 ancestors.add(n) # Mark as ancestor
1227 ancestors.add(n) # Mark as ancestor
1228 # Add non-nullid parents to list of nodes to tag.
1228 # Add non-nullid parents to list of nodes to tag.
1229 nodestotag.update(
1229 nodestotag.update(
1230 [p for p in self.parents(n) if p != self.nullid]
1230 [p for p in self.parents(n) if p != self.nullid]
1231 )
1231 )
1232 elif n in heads: # We've seen it before, is it a fake head?
1232 elif n in heads: # We've seen it before, is it a fake head?
1233 # So it is, real heads should not be the ancestors of
1233 # So it is, real heads should not be the ancestors of
1234 # any other heads.
1234 # any other heads.
1235 heads.pop(n)
1235 heads.pop(n)
1236 if not ancestors:
1236 if not ancestors:
1237 return nonodes
1237 return nonodes
1238 # Now that we have our set of ancestors, we want to remove any
1238 # Now that we have our set of ancestors, we want to remove any
1239 # roots that are not ancestors.
1239 # roots that are not ancestors.
1240
1240
1241 # If one of the roots was nullid, everything is included anyway.
1241 # If one of the roots was nullid, everything is included anyway.
1242 if lowestrev > nullrev:
1242 if lowestrev > nullrev:
1243 # But, since we weren't, let's recompute the lowest rev to not
1243 # But, since we weren't, let's recompute the lowest rev to not
1244 # include roots that aren't ancestors.
1244 # include roots that aren't ancestors.
1245
1245
1246 # Filter out roots that aren't ancestors of heads
1246 # Filter out roots that aren't ancestors of heads
1247 roots = [root for root in roots if root in ancestors]
1247 roots = [root for root in roots if root in ancestors]
1248 # Recompute the lowest revision
1248 # Recompute the lowest revision
1249 if roots:
1249 if roots:
1250 lowestrev = min([self.rev(root) for root in roots])
1250 lowestrev = min([self.rev(root) for root in roots])
1251 else:
1251 else:
1252 # No more roots? Return empty list
1252 # No more roots? Return empty list
1253 return nonodes
1253 return nonodes
1254 else:
1254 else:
1255 # We are descending from nullid, and don't need to care about
1255 # We are descending from nullid, and don't need to care about
1256 # any other roots.
1256 # any other roots.
1257 lowestrev = nullrev
1257 lowestrev = nullrev
1258 roots = [self.nullid]
1258 roots = [self.nullid]
1259 # Transform our roots list into a set.
1259 # Transform our roots list into a set.
1260 descendants = set(roots)
1260 descendants = set(roots)
1261 # Also, keep the original roots so we can filter out roots that aren't
1261 # Also, keep the original roots so we can filter out roots that aren't
1262 # 'real' roots (i.e. are descended from other roots).
1262 # 'real' roots (i.e. are descended from other roots).
1263 roots = descendants.copy()
1263 roots = descendants.copy()
1264 # Our topologically sorted list of output nodes.
1264 # Our topologically sorted list of output nodes.
1265 orderedout = []
1265 orderedout = []
1266 # Don't start at nullid since we don't want nullid in our output list,
1266 # Don't start at nullid since we don't want nullid in our output list,
1267 # and if nullid shows up in descendants, empty parents will look like
1267 # and if nullid shows up in descendants, empty parents will look like
1268 # they're descendants.
1268 # they're descendants.
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1270 n = self.node(r)
1270 n = self.node(r)
1271 isdescendant = False
1271 isdescendant = False
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1273 isdescendant = True
1273 isdescendant = True
1274 elif n in descendants:
1274 elif n in descendants:
1275 # n is already a descendant
1275 # n is already a descendant
1276 isdescendant = True
1276 isdescendant = True
1277 # This check only needs to be done here because all the roots
1277 # This check only needs to be done here because all the roots
1278 # will start being marked is descendants before the loop.
1278 # will start being marked is descendants before the loop.
1279 if n in roots:
1279 if n in roots:
1280 # If n was a root, check if it's a 'real' root.
1280 # If n was a root, check if it's a 'real' root.
1281 p = tuple(self.parents(n))
1281 p = tuple(self.parents(n))
1282 # If any of its parents are descendants, it's not a root.
1282 # If any of its parents are descendants, it's not a root.
1283 if (p[0] in descendants) or (p[1] in descendants):
1283 if (p[0] in descendants) or (p[1] in descendants):
1284 roots.remove(n)
1284 roots.remove(n)
1285 else:
1285 else:
1286 p = tuple(self.parents(n))
1286 p = tuple(self.parents(n))
1287 # A node is a descendant if either of its parents are
1287 # A node is a descendant if either of its parents are
1288 # descendants. (We seeded the dependents list with the roots
1288 # descendants. (We seeded the dependents list with the roots
1289 # up there, remember?)
1289 # up there, remember?)
1290 if (p[0] in descendants) or (p[1] in descendants):
1290 if (p[0] in descendants) or (p[1] in descendants):
1291 descendants.add(n)
1291 descendants.add(n)
1292 isdescendant = True
1292 isdescendant = True
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1294 # Only include nodes that are both descendants and ancestors.
1294 # Only include nodes that are both descendants and ancestors.
1295 orderedout.append(n)
1295 orderedout.append(n)
1296 if (ancestors is not None) and (n in heads):
1296 if (ancestors is not None) and (n in heads):
1297 # We're trying to figure out which heads are reachable
1297 # We're trying to figure out which heads are reachable
1298 # from roots.
1298 # from roots.
1299 # Mark this head as having been reached
1299 # Mark this head as having been reached
1300 heads[n] = True
1300 heads[n] = True
1301 elif ancestors is None:
1301 elif ancestors is None:
1302 # Otherwise, we're trying to discover the heads.
1302 # Otherwise, we're trying to discover the heads.
1303 # Assume this is a head because if it isn't, the next step
1303 # Assume this is a head because if it isn't, the next step
1304 # will eventually remove it.
1304 # will eventually remove it.
1305 heads[n] = True
1305 heads[n] = True
1306 # But, obviously its parents aren't.
1306 # But, obviously its parents aren't.
1307 for p in self.parents(n):
1307 for p in self.parents(n):
1308 heads.pop(p, None)
1308 heads.pop(p, None)
1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1310 roots = list(roots)
1310 roots = list(roots)
1311 assert orderedout
1311 assert orderedout
1312 assert roots
1312 assert roots
1313 assert heads
1313 assert heads
1314 return (orderedout, roots, heads)
1314 return (orderedout, roots, heads)
1315
1315
1316 def headrevs(self, revs=None):
1316 def headrevs(self, revs=None):
1317 if revs is None:
1317 if revs is None:
1318 try:
1318 try:
1319 return self.index.headrevs()
1319 return self.index.headrevs()
1320 except AttributeError:
1320 except AttributeError:
1321 return self._headrevs()
1321 return self._headrevs()
1322 if rustdagop is not None and self.index.rust_ext_compat:
1322 if rustdagop is not None and self.index.rust_ext_compat:
1323 return rustdagop.headrevs(self.index, revs)
1323 return rustdagop.headrevs(self.index, revs)
1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1325
1325
1326 def computephases(self, roots):
1326 def computephases(self, roots):
1327 return self.index.computephasesmapsets(roots)
1327 return self.index.computephasesmapsets(roots)
1328
1328
1329 def _headrevs(self):
1329 def _headrevs(self):
1330 count = len(self)
1330 count = len(self)
1331 if not count:
1331 if not count:
1332 return [nullrev]
1332 return [nullrev]
1333 # we won't iter over filtered rev so nobody is a head at start
1333 # we won't iter over filtered rev so nobody is a head at start
1334 ishead = [0] * (count + 1)
1334 ishead = [0] * (count + 1)
1335 index = self.index
1335 index = self.index
1336 for r in self:
1336 for r in self:
1337 ishead[r] = 1 # I may be an head
1337 ishead[r] = 1 # I may be an head
1338 e = index[r]
1338 e = index[r]
1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1340 return [r for r, val in enumerate(ishead) if val]
1340 return [r for r, val in enumerate(ishead) if val]
1341
1341
1342 def heads(self, start=None, stop=None):
1342 def heads(self, start=None, stop=None):
1343 """return the list of all nodes that have no children
1343 """return the list of all nodes that have no children
1344
1344
1345 if start is specified, only heads that are descendants of
1345 if start is specified, only heads that are descendants of
1346 start will be returned
1346 start will be returned
1347 if stop is specified, it will consider all the revs from stop
1347 if stop is specified, it will consider all the revs from stop
1348 as if they had no children
1348 as if they had no children
1349 """
1349 """
1350 if start is None and stop is None:
1350 if start is None and stop is None:
1351 if not len(self):
1351 if not len(self):
1352 return [self.nullid]
1352 return [self.nullid]
1353 return [self.node(r) for r in self.headrevs()]
1353 return [self.node(r) for r in self.headrevs()]
1354
1354
1355 if start is None:
1355 if start is None:
1356 start = nullrev
1356 start = nullrev
1357 else:
1357 else:
1358 start = self.rev(start)
1358 start = self.rev(start)
1359
1359
1360 stoprevs = {self.rev(n) for n in stop or []}
1360 stoprevs = {self.rev(n) for n in stop or []}
1361
1361
1362 revs = dagop.headrevssubset(
1362 revs = dagop.headrevssubset(
1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1364 )
1364 )
1365
1365
1366 return [self.node(rev) for rev in revs]
1366 return [self.node(rev) for rev in revs]
1367
1367
1368 def children(self, node):
1368 def children(self, node):
1369 """find the children of a given node"""
1369 """find the children of a given node"""
1370 c = []
1370 c = []
1371 p = self.rev(node)
1371 p = self.rev(node)
1372 for r in self.revs(start=p + 1):
1372 for r in self.revs(start=p + 1):
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 if prevs:
1374 if prevs:
1375 for pr in prevs:
1375 for pr in prevs:
1376 if pr == p:
1376 if pr == p:
1377 c.append(self.node(r))
1377 c.append(self.node(r))
1378 elif p == nullrev:
1378 elif p == nullrev:
1379 c.append(self.node(r))
1379 c.append(self.node(r))
1380 return c
1380 return c
1381
1381
1382 def commonancestorsheads(self, a, b):
1382 def commonancestorsheads(self, a, b):
1383 """calculate all the heads of the common ancestors of nodes a and b"""
1383 """calculate all the heads of the common ancestors of nodes a and b"""
1384 a, b = self.rev(a), self.rev(b)
1384 a, b = self.rev(a), self.rev(b)
1385 ancs = self._commonancestorsheads(a, b)
1385 ancs = self._commonancestorsheads(a, b)
1386 return pycompat.maplist(self.node, ancs)
1386 return pycompat.maplist(self.node, ancs)
1387
1387
1388 def _commonancestorsheads(self, *revs):
1388 def _commonancestorsheads(self, *revs):
1389 """calculate all the heads of the common ancestors of revs"""
1389 """calculate all the heads of the common ancestors of revs"""
1390 try:
1390 try:
1391 ancs = self.index.commonancestorsheads(*revs)
1391 ancs = self.index.commonancestorsheads(*revs)
1392 except (AttributeError, OverflowError): # C implementation failed
1392 except (AttributeError, OverflowError): # C implementation failed
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1394 return ancs
1394 return ancs
1395
1395
1396 def isancestor(self, a, b):
1396 def isancestor(self, a, b):
1397 """return True if node a is an ancestor of node b
1397 """return True if node a is an ancestor of node b
1398
1398
1399 A revision is considered an ancestor of itself."""
1399 A revision is considered an ancestor of itself."""
1400 a, b = self.rev(a), self.rev(b)
1400 a, b = self.rev(a), self.rev(b)
1401 return self.isancestorrev(a, b)
1401 return self.isancestorrev(a, b)
1402
1402
1403 def isancestorrev(self, a, b):
1403 def isancestorrev(self, a, b):
1404 """return True if revision a is an ancestor of revision b
1404 """return True if revision a is an ancestor of revision b
1405
1405
1406 A revision is considered an ancestor of itself.
1406 A revision is considered an ancestor of itself.
1407
1407
1408 The implementation of this is trivial but the use of
1408 The implementation of this is trivial but the use of
1409 reachableroots is not."""
1409 reachableroots is not."""
1410 if a == nullrev:
1410 if a == nullrev:
1411 return True
1411 return True
1412 elif a == b:
1412 elif a == b:
1413 return True
1413 return True
1414 elif a > b:
1414 elif a > b:
1415 return False
1415 return False
1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1417
1417
1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1419 """return (heads(::(<roots> and <roots>::<heads>)))
1419 """return (heads(::(<roots> and <roots>::<heads>)))
1420
1420
1421 If includepath is True, return (<roots>::<heads>)."""
1421 If includepath is True, return (<roots>::<heads>)."""
1422 try:
1422 try:
1423 return self.index.reachableroots2(
1423 return self.index.reachableroots2(
1424 minroot, heads, roots, includepath
1424 minroot, heads, roots, includepath
1425 )
1425 )
1426 except AttributeError:
1426 except AttributeError:
1427 return dagop._reachablerootspure(
1427 return dagop._reachablerootspure(
1428 self.parentrevs, minroot, roots, heads, includepath
1428 self.parentrevs, minroot, roots, heads, includepath
1429 )
1429 )
1430
1430
1431 def ancestor(self, a, b):
1431 def ancestor(self, a, b):
1432 """calculate the "best" common ancestor of nodes a and b"""
1432 """calculate the "best" common ancestor of nodes a and b"""
1433
1433
1434 a, b = self.rev(a), self.rev(b)
1434 a, b = self.rev(a), self.rev(b)
1435 try:
1435 try:
1436 ancs = self.index.ancestors(a, b)
1436 ancs = self.index.ancestors(a, b)
1437 except (AttributeError, OverflowError):
1437 except (AttributeError, OverflowError):
1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1439 if ancs:
1439 if ancs:
1440 # choose a consistent winner when there's a tie
1440 # choose a consistent winner when there's a tie
1441 return min(map(self.node, ancs))
1441 return min(map(self.node, ancs))
1442 return self.nullid
1442 return self.nullid
1443
1443
1444 def _match(self, id):
1444 def _match(self, id):
1445 if isinstance(id, int):
1445 if isinstance(id, int):
1446 # rev
1446 # rev
1447 return self.node(id)
1447 return self.node(id)
1448 if len(id) == self.nodeconstants.nodelen:
1448 if len(id) == self.nodeconstants.nodelen:
1449 # possibly a binary node
1449 # possibly a binary node
1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1451 try:
1451 try:
1452 node = id
1452 node = id
1453 self.rev(node) # quick search the index
1453 self.rev(node) # quick search the index
1454 return node
1454 return node
1455 except error.LookupError:
1455 except error.LookupError:
1456 pass # may be partial hex id
1456 pass # may be partial hex id
1457 try:
1457 try:
1458 # str(rev)
1458 # str(rev)
1459 rev = int(id)
1459 rev = int(id)
1460 if b"%d" % rev != id:
1460 if b"%d" % rev != id:
1461 raise ValueError
1461 raise ValueError
1462 if rev < 0:
1462 if rev < 0:
1463 rev = len(self) + rev
1463 rev = len(self) + rev
1464 if rev < 0 or rev >= len(self):
1464 if rev < 0 or rev >= len(self):
1465 raise ValueError
1465 raise ValueError
1466 return self.node(rev)
1466 return self.node(rev)
1467 except (ValueError, OverflowError):
1467 except (ValueError, OverflowError):
1468 pass
1468 pass
1469 if len(id) == 2 * self.nodeconstants.nodelen:
1469 if len(id) == 2 * self.nodeconstants.nodelen:
1470 try:
1470 try:
1471 # a full hex nodeid?
1471 # a full hex nodeid?
1472 node = bin(id)
1472 node = bin(id)
1473 self.rev(node)
1473 self.rev(node)
1474 return node
1474 return node
1475 except (TypeError, error.LookupError):
1475 except (TypeError, error.LookupError):
1476 pass
1476 pass
1477
1477
1478 def _partialmatch(self, id):
1478 def _partialmatch(self, id):
1479 # we don't care wdirfilenodeids as they should be always full hash
1479 # we don't care wdirfilenodeids as they should be always full hash
1480 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1480 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1481 ambiguous = False
1481 ambiguous = False
1482 try:
1482 try:
1483 partial = self.index.partialmatch(id)
1483 partial = self.index.partialmatch(id)
1484 if partial and self.hasnode(partial):
1484 if partial and self.hasnode(partial):
1485 if maybewdir:
1485 if maybewdir:
1486 # single 'ff...' match in radix tree, ambiguous with wdir
1486 # single 'ff...' match in radix tree, ambiguous with wdir
1487 ambiguous = True
1487 ambiguous = True
1488 else:
1488 else:
1489 return partial
1489 return partial
1490 elif maybewdir:
1490 elif maybewdir:
1491 # no 'ff...' match in radix tree, wdir identified
1491 # no 'ff...' match in radix tree, wdir identified
1492 raise error.WdirUnsupported
1492 raise error.WdirUnsupported
1493 else:
1493 else:
1494 return None
1494 return None
1495 except error.RevlogError:
1495 except error.RevlogError:
1496 # parsers.c radix tree lookup gave multiple matches
1496 # parsers.c radix tree lookup gave multiple matches
1497 # fast path: for unfiltered changelog, radix tree is accurate
1497 # fast path: for unfiltered changelog, radix tree is accurate
1498 if not getattr(self, 'filteredrevs', None):
1498 if not getattr(self, 'filteredrevs', None):
1499 ambiguous = True
1499 ambiguous = True
1500 # fall through to slow path that filters hidden revisions
1500 # fall through to slow path that filters hidden revisions
1501 except (AttributeError, ValueError):
1501 except (AttributeError, ValueError):
1502 # we are pure python, or key was too short to search radix tree
1502 # we are pure python, or key was too short to search radix tree
1503 pass
1503 pass
1504 if ambiguous:
1504 if ambiguous:
1505 raise error.AmbiguousPrefixLookupError(
1505 raise error.AmbiguousPrefixLookupError(
1506 id, self.display_id, _(b'ambiguous identifier')
1506 id, self.display_id, _(b'ambiguous identifier')
1507 )
1507 )
1508
1508
1509 if id in self._pcache:
1509 if id in self._pcache:
1510 return self._pcache[id]
1510 return self._pcache[id]
1511
1511
1512 if len(id) <= 40:
1512 if len(id) <= 40:
1513 try:
1513 try:
1514 # hex(node)[:...]
1514 # hex(node)[:...]
1515 l = len(id) // 2 # grab an even number of digits
1515 l = len(id) // 2 # grab an even number of digits
1516 prefix = bin(id[: l * 2])
1516 prefix = bin(id[: l * 2])
1517 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1517 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1518 nl = [
1518 nl = [
1519 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1519 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1520 ]
1520 ]
1521 if self.nodeconstants.nullhex.startswith(id):
1521 if self.nodeconstants.nullhex.startswith(id):
1522 nl.append(self.nullid)
1522 nl.append(self.nullid)
1523 if len(nl) > 0:
1523 if len(nl) > 0:
1524 if len(nl) == 1 and not maybewdir:
1524 if len(nl) == 1 and not maybewdir:
1525 self._pcache[id] = nl[0]
1525 self._pcache[id] = nl[0]
1526 return nl[0]
1526 return nl[0]
1527 raise error.AmbiguousPrefixLookupError(
1527 raise error.AmbiguousPrefixLookupError(
1528 id, self.display_id, _(b'ambiguous identifier')
1528 id, self.display_id, _(b'ambiguous identifier')
1529 )
1529 )
1530 if maybewdir:
1530 if maybewdir:
1531 raise error.WdirUnsupported
1531 raise error.WdirUnsupported
1532 return None
1532 return None
1533 except TypeError:
1533 except TypeError:
1534 pass
1534 pass
1535
1535
1536 def lookup(self, id):
1536 def lookup(self, id):
1537 """locate a node based on:
1537 """locate a node based on:
1538 - revision number or str(revision number)
1538 - revision number or str(revision number)
1539 - nodeid or subset of hex nodeid
1539 - nodeid or subset of hex nodeid
1540 """
1540 """
1541 n = self._match(id)
1541 n = self._match(id)
1542 if n is not None:
1542 if n is not None:
1543 return n
1543 return n
1544 n = self._partialmatch(id)
1544 n = self._partialmatch(id)
1545 if n:
1545 if n:
1546 return n
1546 return n
1547
1547
1548 raise error.LookupError(id, self.display_id, _(b'no match found'))
1548 raise error.LookupError(id, self.display_id, _(b'no match found'))
1549
1549
1550 def shortest(self, node, minlength=1):
1550 def shortest(self, node, minlength=1):
1551 """Find the shortest unambiguous prefix that matches node."""
1551 """Find the shortest unambiguous prefix that matches node."""
1552
1552
1553 def isvalid(prefix):
1553 def isvalid(prefix):
1554 try:
1554 try:
1555 matchednode = self._partialmatch(prefix)
1555 matchednode = self._partialmatch(prefix)
1556 except error.AmbiguousPrefixLookupError:
1556 except error.AmbiguousPrefixLookupError:
1557 return False
1557 return False
1558 except error.WdirUnsupported:
1558 except error.WdirUnsupported:
1559 # single 'ff...' match
1559 # single 'ff...' match
1560 return True
1560 return True
1561 if matchednode is None:
1561 if matchednode is None:
1562 raise error.LookupError(node, self.display_id, _(b'no node'))
1562 raise error.LookupError(node, self.display_id, _(b'no node'))
1563 return True
1563 return True
1564
1564
1565 def maybewdir(prefix):
1565 def maybewdir(prefix):
1566 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1566 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1567
1567
1568 hexnode = hex(node)
1568 hexnode = hex(node)
1569
1569
1570 def disambiguate(hexnode, minlength):
1570 def disambiguate(hexnode, minlength):
1571 """Disambiguate against wdirid."""
1571 """Disambiguate against wdirid."""
1572 for length in range(minlength, len(hexnode) + 1):
1572 for length in range(minlength, len(hexnode) + 1):
1573 prefix = hexnode[:length]
1573 prefix = hexnode[:length]
1574 if not maybewdir(prefix):
1574 if not maybewdir(prefix):
1575 return prefix
1575 return prefix
1576
1576
1577 if not getattr(self, 'filteredrevs', None):
1577 if not getattr(self, 'filteredrevs', None):
1578 try:
1578 try:
1579 length = max(self.index.shortest(node), minlength)
1579 length = max(self.index.shortest(node), minlength)
1580 return disambiguate(hexnode, length)
1580 return disambiguate(hexnode, length)
1581 except error.RevlogError:
1581 except error.RevlogError:
1582 if node != self.nodeconstants.wdirid:
1582 if node != self.nodeconstants.wdirid:
1583 raise error.LookupError(
1583 raise error.LookupError(
1584 node, self.display_id, _(b'no node')
1584 node, self.display_id, _(b'no node')
1585 )
1585 )
1586 except AttributeError:
1586 except AttributeError:
1587 # Fall through to pure code
1587 # Fall through to pure code
1588 pass
1588 pass
1589
1589
1590 if node == self.nodeconstants.wdirid:
1590 if node == self.nodeconstants.wdirid:
1591 for length in range(minlength, len(hexnode) + 1):
1591 for length in range(minlength, len(hexnode) + 1):
1592 prefix = hexnode[:length]
1592 prefix = hexnode[:length]
1593 if isvalid(prefix):
1593 if isvalid(prefix):
1594 return prefix
1594 return prefix
1595
1595
1596 for length in range(minlength, len(hexnode) + 1):
1596 for length in range(minlength, len(hexnode) + 1):
1597 prefix = hexnode[:length]
1597 prefix = hexnode[:length]
1598 if isvalid(prefix):
1598 if isvalid(prefix):
1599 return disambiguate(hexnode, length)
1599 return disambiguate(hexnode, length)
1600
1600
1601 def cmp(self, node, text):
1601 def cmp(self, node, text):
1602 """compare text with a given file revision
1602 """compare text with a given file revision
1603
1603
1604 returns True if text is different than what is stored.
1604 returns True if text is different than what is stored.
1605 """
1605 """
1606 p1, p2 = self.parents(node)
1606 p1, p2 = self.parents(node)
1607 return storageutil.hashrevisionsha1(text, p1, p2) != node
1607 return storageutil.hashrevisionsha1(text, p1, p2) != node
1608
1608
1609 def _getsegmentforrevs(self, startrev, endrev, df=None):
1609 def _getsegmentforrevs(self, startrev, endrev, df=None):
1610 """Obtain a segment of raw data corresponding to a range of revisions.
1610 """Obtain a segment of raw data corresponding to a range of revisions.
1611
1611
1612 Accepts the start and end revisions and an optional already-open
1612 Accepts the start and end revisions and an optional already-open
1613 file handle to be used for reading. If the file handle is read, its
1613 file handle to be used for reading. If the file handle is read, its
1614 seek position will not be preserved.
1614 seek position will not be preserved.
1615
1615
1616 Requests for data may be satisfied by a cache.
1616 Requests for data may be satisfied by a cache.
1617
1617
1618 Returns a 2-tuple of (offset, data) for the requested range of
1618 Returns a 2-tuple of (offset, data) for the requested range of
1619 revisions. Offset is the integer offset from the beginning of the
1619 revisions. Offset is the integer offset from the beginning of the
1620 revlog and data is a str or buffer of the raw byte data.
1620 revlog and data is a str or buffer of the raw byte data.
1621
1621
1622 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1622 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1623 to determine where each revision's data begins and ends.
1623 to determine where each revision's data begins and ends.
1624 """
1624 """
1625 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1625 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1626 # (functions are expensive).
1626 # (functions are expensive).
1627 index = self.index
1627 index = self.index
1628 istart = index[startrev]
1628 istart = index[startrev]
1629 start = int(istart[0] >> 16)
1629 start = int(istart[0] >> 16)
1630 if startrev == endrev:
1630 if startrev == endrev:
1631 end = start + istart[1]
1631 end = start + istart[1]
1632 else:
1632 else:
1633 iend = index[endrev]
1633 iend = index[endrev]
1634 end = int(iend[0] >> 16) + iend[1]
1634 end = int(iend[0] >> 16) + iend[1]
1635
1635
1636 if self._inline:
1636 if self._inline:
1637 start += (startrev + 1) * self.index.entry_size
1637 start += (startrev + 1) * self.index.entry_size
1638 end += (endrev + 1) * self.index.entry_size
1638 end += (endrev + 1) * self.index.entry_size
1639 length = end - start
1639 length = end - start
1640
1640
1641 return start, self._segmentfile.read_chunk(start, length, df)
1641 return start, self._segmentfile.read_chunk(start, length, df)
1642
1642
1643 def _chunk(self, rev, df=None):
1643 def _chunk(self, rev, df=None):
1644 """Obtain a single decompressed chunk for a revision.
1644 """Obtain a single decompressed chunk for a revision.
1645
1645
1646 Accepts an integer revision and an optional already-open file handle
1646 Accepts an integer revision and an optional already-open file handle
1647 to be used for reading. If used, the seek position of the file will not
1647 to be used for reading. If used, the seek position of the file will not
1648 be preserved.
1648 be preserved.
1649
1649
1650 Returns a str holding uncompressed data for the requested revision.
1650 Returns a str holding uncompressed data for the requested revision.
1651 """
1651 """
1652 compression_mode = self.index[rev][10]
1652 compression_mode = self.index[rev][10]
1653 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1653 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1654 if compression_mode == COMP_MODE_PLAIN:
1654 if compression_mode == COMP_MODE_PLAIN:
1655 return data
1655 return data
1656 elif compression_mode == COMP_MODE_DEFAULT:
1656 elif compression_mode == COMP_MODE_DEFAULT:
1657 return self._decompressor(data)
1657 return self._decompressor(data)
1658 elif compression_mode == COMP_MODE_INLINE:
1658 elif compression_mode == COMP_MODE_INLINE:
1659 return self.decompress(data)
1659 return self.decompress(data)
1660 else:
1660 else:
1661 msg = b'unknown compression mode %d'
1661 msg = b'unknown compression mode %d'
1662 msg %= compression_mode
1662 msg %= compression_mode
1663 raise error.RevlogError(msg)
1663 raise error.RevlogError(msg)
1664
1664
1665 def _chunks(self, revs, df=None, targetsize=None):
1665 def _chunks(self, revs, df=None, targetsize=None):
1666 """Obtain decompressed chunks for the specified revisions.
1666 """Obtain decompressed chunks for the specified revisions.
1667
1667
1668 Accepts an iterable of numeric revisions that are assumed to be in
1668 Accepts an iterable of numeric revisions that are assumed to be in
1669 ascending order. Also accepts an optional already-open file handle
1669 ascending order. Also accepts an optional already-open file handle
1670 to be used for reading. If used, the seek position of the file will
1670 to be used for reading. If used, the seek position of the file will
1671 not be preserved.
1671 not be preserved.
1672
1672
1673 This function is similar to calling ``self._chunk()`` multiple times,
1673 This function is similar to calling ``self._chunk()`` multiple times,
1674 but is faster.
1674 but is faster.
1675
1675
1676 Returns a list with decompressed data for each requested revision.
1676 Returns a list with decompressed data for each requested revision.
1677 """
1677 """
1678 if not revs:
1678 if not revs:
1679 return []
1679 return []
1680 start = self.start
1680 start = self.start
1681 length = self.length
1681 length = self.length
1682 inline = self._inline
1682 inline = self._inline
1683 iosize = self.index.entry_size
1683 iosize = self.index.entry_size
1684 buffer = util.buffer
1684 buffer = util.buffer
1685
1685
1686 l = []
1686 l = []
1687 ladd = l.append
1687 ladd = l.append
1688
1688
1689 if not self._withsparseread:
1689 if not self._withsparseread:
1690 slicedchunks = (revs,)
1690 slicedchunks = (revs,)
1691 else:
1691 else:
1692 slicedchunks = deltautil.slicechunk(
1692 slicedchunks = deltautil.slicechunk(
1693 self, revs, targetsize=targetsize
1693 self, revs, targetsize=targetsize
1694 )
1694 )
1695
1695
1696 for revschunk in slicedchunks:
1696 for revschunk in slicedchunks:
1697 firstrev = revschunk[0]
1697 firstrev = revschunk[0]
1698 # Skip trailing revisions with empty diff
1698 # Skip trailing revisions with empty diff
1699 for lastrev in revschunk[::-1]:
1699 for lastrev in revschunk[::-1]:
1700 if length(lastrev) != 0:
1700 if length(lastrev) != 0:
1701 break
1701 break
1702
1702
1703 try:
1703 try:
1704 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1704 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1705 except OverflowError:
1705 except OverflowError:
1706 # issue4215 - we can't cache a run of chunks greater than
1706 # issue4215 - we can't cache a run of chunks greater than
1707 # 2G on Windows
1707 # 2G on Windows
1708 return [self._chunk(rev, df=df) for rev in revschunk]
1708 return [self._chunk(rev, df=df) for rev in revschunk]
1709
1709
1710 decomp = self.decompress
1710 decomp = self.decompress
1711 # self._decompressor might be None, but will not be used in that case
1711 # self._decompressor might be None, but will not be used in that case
1712 def_decomp = self._decompressor
1712 def_decomp = self._decompressor
1713 for rev in revschunk:
1713 for rev in revschunk:
1714 chunkstart = start(rev)
1714 chunkstart = start(rev)
1715 if inline:
1715 if inline:
1716 chunkstart += (rev + 1) * iosize
1716 chunkstart += (rev + 1) * iosize
1717 chunklength = length(rev)
1717 chunklength = length(rev)
1718 comp_mode = self.index[rev][10]
1718 comp_mode = self.index[rev][10]
1719 c = buffer(data, chunkstart - offset, chunklength)
1719 c = buffer(data, chunkstart - offset, chunklength)
1720 if comp_mode == COMP_MODE_PLAIN:
1720 if comp_mode == COMP_MODE_PLAIN:
1721 ladd(c)
1721 ladd(c)
1722 elif comp_mode == COMP_MODE_INLINE:
1722 elif comp_mode == COMP_MODE_INLINE:
1723 ladd(decomp(c))
1723 ladd(decomp(c))
1724 elif comp_mode == COMP_MODE_DEFAULT:
1724 elif comp_mode == COMP_MODE_DEFAULT:
1725 ladd(def_decomp(c))
1725 ladd(def_decomp(c))
1726 else:
1726 else:
1727 msg = b'unknown compression mode %d'
1727 msg = b'unknown compression mode %d'
1728 msg %= comp_mode
1728 msg %= comp_mode
1729 raise error.RevlogError(msg)
1729 raise error.RevlogError(msg)
1730
1730
1731 return l
1731 return l
1732
1732
1733 def deltaparent(self, rev):
1733 def deltaparent(self, rev):
1734 """return deltaparent of the given revision"""
1734 """return deltaparent of the given revision"""
1735 base = self.index[rev][3]
1735 base = self.index[rev][3]
1736 if base == rev:
1736 if base == rev:
1737 return nullrev
1737 return nullrev
1738 elif self._generaldelta:
1738 elif self._generaldelta:
1739 return base
1739 return base
1740 else:
1740 else:
1741 return rev - 1
1741 return rev - 1
1742
1742
1743 def issnapshot(self, rev):
1743 def issnapshot(self, rev):
1744 """tells whether rev is a snapshot"""
1744 """tells whether rev is a snapshot"""
1745 if not self._sparserevlog:
1745 if not self._sparserevlog:
1746 return self.deltaparent(rev) == nullrev
1746 return self.deltaparent(rev) == nullrev
1747 elif util.safehasattr(self.index, b'issnapshot'):
1747 elif util.safehasattr(self.index, b'issnapshot'):
1748 # directly assign the method to cache the testing and access
1748 # directly assign the method to cache the testing and access
1749 self.issnapshot = self.index.issnapshot
1749 self.issnapshot = self.index.issnapshot
1750 return self.issnapshot(rev)
1750 return self.issnapshot(rev)
1751 if rev == nullrev:
1751 if rev == nullrev:
1752 return True
1752 return True
1753 entry = self.index[rev]
1753 entry = self.index[rev]
1754 base = entry[3]
1754 base = entry[3]
1755 if base == rev:
1755 if base == rev:
1756 return True
1756 return True
1757 if base == nullrev:
1757 if base == nullrev:
1758 return True
1758 return True
1759 p1 = entry[5]
1759 p1 = entry[5]
1760 p2 = entry[6]
1760 p2 = entry[6]
1761 if base == p1 or base == p2:
1761 if base == p1 or base == p2:
1762 return False
1762 return False
1763 return self.issnapshot(base)
1763 return self.issnapshot(base)
1764
1764
1765 def snapshotdepth(self, rev):
1765 def snapshotdepth(self, rev):
1766 """number of snapshot in the chain before this one"""
1766 """number of snapshot in the chain before this one"""
1767 if not self.issnapshot(rev):
1767 if not self.issnapshot(rev):
1768 raise error.ProgrammingError(b'revision %d not a snapshot')
1768 raise error.ProgrammingError(b'revision %d not a snapshot')
1769 return len(self._deltachain(rev)[0]) - 1
1769 return len(self._deltachain(rev)[0]) - 1
1770
1770
1771 def revdiff(self, rev1, rev2):
1771 def revdiff(self, rev1, rev2):
1772 """return or calculate a delta between two revisions
1772 """return or calculate a delta between two revisions
1773
1773
1774 The delta calculated is in binary form and is intended to be written to
1774 The delta calculated is in binary form and is intended to be written to
1775 revlog data directly. So this function needs raw revision data.
1775 revlog data directly. So this function needs raw revision data.
1776 """
1776 """
1777 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1777 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1778 return bytes(self._chunk(rev2))
1778 return bytes(self._chunk(rev2))
1779
1779
1780 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1780 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1781
1781
1782 def revision(self, nodeorrev, _df=None):
1782 def revision(self, nodeorrev, _df=None):
1783 """return an uncompressed revision of a given node or revision
1783 """return an uncompressed revision of a given node or revision
1784 number.
1784 number.
1785
1785
1786 _df - an existing file handle to read from. (internal-only)
1786 _df - an existing file handle to read from. (internal-only)
1787 """
1787 """
1788 return self._revisiondata(nodeorrev, _df)
1788 return self._revisiondata(nodeorrev, _df)
1789
1789
1790 def sidedata(self, nodeorrev, _df=None):
1790 def sidedata(self, nodeorrev, _df=None):
1791 """a map of extra data related to the changeset but not part of the hash
1791 """a map of extra data related to the changeset but not part of the hash
1792
1792
1793 This function currently return a dictionary. However, more advanced
1793 This function currently return a dictionary. However, more advanced
1794 mapping object will likely be used in the future for a more
1794 mapping object will likely be used in the future for a more
1795 efficient/lazy code.
1795 efficient/lazy code.
1796 """
1796 """
1797 # deal with <nodeorrev> argument type
1797 # deal with <nodeorrev> argument type
1798 if isinstance(nodeorrev, int):
1798 if isinstance(nodeorrev, int):
1799 rev = nodeorrev
1799 rev = nodeorrev
1800 else:
1800 else:
1801 rev = self.rev(nodeorrev)
1801 rev = self.rev(nodeorrev)
1802 return self._sidedata(rev)
1802 return self._sidedata(rev)
1803
1803
1804 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1804 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1805 # deal with <nodeorrev> argument type
1805 # deal with <nodeorrev> argument type
1806 if isinstance(nodeorrev, int):
1806 if isinstance(nodeorrev, int):
1807 rev = nodeorrev
1807 rev = nodeorrev
1808 node = self.node(rev)
1808 node = self.node(rev)
1809 else:
1809 else:
1810 node = nodeorrev
1810 node = nodeorrev
1811 rev = None
1811 rev = None
1812
1812
1813 # fast path the special `nullid` rev
1813 # fast path the special `nullid` rev
1814 if node == self.nullid:
1814 if node == self.nullid:
1815 return b""
1815 return b""
1816
1816
1817 # ``rawtext`` is the text as stored inside the revlog. Might be the
1817 # ``rawtext`` is the text as stored inside the revlog. Might be the
1818 # revision or might need to be processed to retrieve the revision.
1818 # revision or might need to be processed to retrieve the revision.
1819 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1819 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1820
1820
1821 if raw and validated:
1821 if raw and validated:
1822 # if we don't want to process the raw text and that raw
1822 # if we don't want to process the raw text and that raw
1823 # text is cached, we can exit early.
1823 # text is cached, we can exit early.
1824 return rawtext
1824 return rawtext
1825 if rev is None:
1825 if rev is None:
1826 rev = self.rev(node)
1826 rev = self.rev(node)
1827 # the revlog's flag for this revision
1827 # the revlog's flag for this revision
1828 # (usually alter its state or content)
1828 # (usually alter its state or content)
1829 flags = self.flags(rev)
1829 flags = self.flags(rev)
1830
1830
1831 if validated and flags == REVIDX_DEFAULT_FLAGS:
1831 if validated and flags == REVIDX_DEFAULT_FLAGS:
1832 # no extra flags set, no flag processor runs, text = rawtext
1832 # no extra flags set, no flag processor runs, text = rawtext
1833 return rawtext
1833 return rawtext
1834
1834
1835 if raw:
1835 if raw:
1836 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1836 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1837 text = rawtext
1837 text = rawtext
1838 else:
1838 else:
1839 r = flagutil.processflagsread(self, rawtext, flags)
1839 r = flagutil.processflagsread(self, rawtext, flags)
1840 text, validatehash = r
1840 text, validatehash = r
1841 if validatehash:
1841 if validatehash:
1842 self.checkhash(text, node, rev=rev)
1842 self.checkhash(text, node, rev=rev)
1843 if not validated:
1843 if not validated:
1844 self._revisioncache = (node, rev, rawtext)
1844 self._revisioncache = (node, rev, rawtext)
1845
1845
1846 return text
1846 return text
1847
1847
1848 def _rawtext(self, node, rev, _df=None):
1848 def _rawtext(self, node, rev, _df=None):
1849 """return the possibly unvalidated rawtext for a revision
1849 """return the possibly unvalidated rawtext for a revision
1850
1850
1851 returns (rev, rawtext, validated)
1851 returns (rev, rawtext, validated)
1852 """
1852 """
1853
1853
1854 # revision in the cache (could be useful to apply delta)
1854 # revision in the cache (could be useful to apply delta)
1855 cachedrev = None
1855 cachedrev = None
1856 # An intermediate text to apply deltas to
1856 # An intermediate text to apply deltas to
1857 basetext = None
1857 basetext = None
1858
1858
1859 # Check if we have the entry in cache
1859 # Check if we have the entry in cache
1860 # The cache entry looks like (node, rev, rawtext)
1860 # The cache entry looks like (node, rev, rawtext)
1861 if self._revisioncache:
1861 if self._revisioncache:
1862 if self._revisioncache[0] == node:
1862 if self._revisioncache[0] == node:
1863 return (rev, self._revisioncache[2], True)
1863 return (rev, self._revisioncache[2], True)
1864 cachedrev = self._revisioncache[1]
1864 cachedrev = self._revisioncache[1]
1865
1865
1866 if rev is None:
1866 if rev is None:
1867 rev = self.rev(node)
1867 rev = self.rev(node)
1868
1868
1869 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1869 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1870 if stopped:
1870 if stopped:
1871 basetext = self._revisioncache[2]
1871 basetext = self._revisioncache[2]
1872
1872
1873 # drop cache to save memory, the caller is expected to
1873 # drop cache to save memory, the caller is expected to
1874 # update self._revisioncache after validating the text
1874 # update self._revisioncache after validating the text
1875 self._revisioncache = None
1875 self._revisioncache = None
1876
1876
1877 targetsize = None
1877 targetsize = None
1878 rawsize = self.index[rev][2]
1878 rawsize = self.index[rev][2]
1879 if 0 <= rawsize:
1879 if 0 <= rawsize:
1880 targetsize = 4 * rawsize
1880 targetsize = 4 * rawsize
1881
1881
1882 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1882 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1883 if basetext is None:
1883 if basetext is None:
1884 basetext = bytes(bins[0])
1884 basetext = bytes(bins[0])
1885 bins = bins[1:]
1885 bins = bins[1:]
1886
1886
1887 rawtext = mdiff.patches(basetext, bins)
1887 rawtext = mdiff.patches(basetext, bins)
1888 del basetext # let us have a chance to free memory early
1888 del basetext # let us have a chance to free memory early
1889 return (rev, rawtext, False)
1889 return (rev, rawtext, False)
1890
1890
1891 def _sidedata(self, rev):
1891 def _sidedata(self, rev):
1892 """Return the sidedata for a given revision number."""
1892 """Return the sidedata for a given revision number."""
1893 index_entry = self.index[rev]
1893 index_entry = self.index[rev]
1894 sidedata_offset = index_entry[8]
1894 sidedata_offset = index_entry[8]
1895 sidedata_size = index_entry[9]
1895 sidedata_size = index_entry[9]
1896
1896
1897 if self._inline:
1897 if self._inline:
1898 sidedata_offset += self.index.entry_size * (1 + rev)
1898 sidedata_offset += self.index.entry_size * (1 + rev)
1899 if sidedata_size == 0:
1899 if sidedata_size == 0:
1900 return {}
1900 return {}
1901
1901
1902 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1902 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1903 filename = self._sidedatafile
1903 filename = self._sidedatafile
1904 end = self._docket.sidedata_end
1904 end = self._docket.sidedata_end
1905 offset = sidedata_offset
1905 offset = sidedata_offset
1906 length = sidedata_size
1906 length = sidedata_size
1907 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1907 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1908 raise error.RevlogError(m)
1908 raise error.RevlogError(m)
1909
1909
1910 comp_segment = self._segmentfile_sidedata.read_chunk(
1910 comp_segment = self._segmentfile_sidedata.read_chunk(
1911 sidedata_offset, sidedata_size
1911 sidedata_offset, sidedata_size
1912 )
1912 )
1913
1913
1914 comp = self.index[rev][11]
1914 comp = self.index[rev][11]
1915 if comp == COMP_MODE_PLAIN:
1915 if comp == COMP_MODE_PLAIN:
1916 segment = comp_segment
1916 segment = comp_segment
1917 elif comp == COMP_MODE_DEFAULT:
1917 elif comp == COMP_MODE_DEFAULT:
1918 segment = self._decompressor(comp_segment)
1918 segment = self._decompressor(comp_segment)
1919 elif comp == COMP_MODE_INLINE:
1919 elif comp == COMP_MODE_INLINE:
1920 segment = self.decompress(comp_segment)
1920 segment = self.decompress(comp_segment)
1921 else:
1921 else:
1922 msg = b'unknown compression mode %d'
1922 msg = b'unknown compression mode %d'
1923 msg %= comp
1923 msg %= comp
1924 raise error.RevlogError(msg)
1924 raise error.RevlogError(msg)
1925
1925
1926 sidedata = sidedatautil.deserialize_sidedata(segment)
1926 sidedata = sidedatautil.deserialize_sidedata(segment)
1927 return sidedata
1927 return sidedata
1928
1928
1929 def rawdata(self, nodeorrev, _df=None):
1929 def rawdata(self, nodeorrev, _df=None):
1930 """return an uncompressed raw data of a given node or revision number.
1930 """return an uncompressed raw data of a given node or revision number.
1931
1931
1932 _df - an existing file handle to read from. (internal-only)
1932 _df - an existing file handle to read from. (internal-only)
1933 """
1933 """
1934 return self._revisiondata(nodeorrev, _df, raw=True)
1934 return self._revisiondata(nodeorrev, _df, raw=True)
1935
1935
1936 def hash(self, text, p1, p2):
1936 def hash(self, text, p1, p2):
1937 """Compute a node hash.
1937 """Compute a node hash.
1938
1938
1939 Available as a function so that subclasses can replace the hash
1939 Available as a function so that subclasses can replace the hash
1940 as needed.
1940 as needed.
1941 """
1941 """
1942 return storageutil.hashrevisionsha1(text, p1, p2)
1942 return storageutil.hashrevisionsha1(text, p1, p2)
1943
1943
1944 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1944 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1945 """Check node hash integrity.
1945 """Check node hash integrity.
1946
1946
1947 Available as a function so that subclasses can extend hash mismatch
1947 Available as a function so that subclasses can extend hash mismatch
1948 behaviors as needed.
1948 behaviors as needed.
1949 """
1949 """
1950 try:
1950 try:
1951 if p1 is None and p2 is None:
1951 if p1 is None and p2 is None:
1952 p1, p2 = self.parents(node)
1952 p1, p2 = self.parents(node)
1953 if node != self.hash(text, p1, p2):
1953 if node != self.hash(text, p1, p2):
1954 # Clear the revision cache on hash failure. The revision cache
1954 # Clear the revision cache on hash failure. The revision cache
1955 # only stores the raw revision and clearing the cache does have
1955 # only stores the raw revision and clearing the cache does have
1956 # the side-effect that we won't have a cache hit when the raw
1956 # the side-effect that we won't have a cache hit when the raw
1957 # revision data is accessed. But this case should be rare and
1957 # revision data is accessed. But this case should be rare and
1958 # it is extra work to teach the cache about the hash
1958 # it is extra work to teach the cache about the hash
1959 # verification state.
1959 # verification state.
1960 if self._revisioncache and self._revisioncache[0] == node:
1960 if self._revisioncache and self._revisioncache[0] == node:
1961 self._revisioncache = None
1961 self._revisioncache = None
1962
1962
1963 revornode = rev
1963 revornode = rev
1964 if revornode is None:
1964 if revornode is None:
1965 revornode = templatefilters.short(hex(node))
1965 revornode = templatefilters.short(hex(node))
1966 raise error.RevlogError(
1966 raise error.RevlogError(
1967 _(b"integrity check failed on %s:%s")
1967 _(b"integrity check failed on %s:%s")
1968 % (self.display_id, pycompat.bytestr(revornode))
1968 % (self.display_id, pycompat.bytestr(revornode))
1969 )
1969 )
1970 except error.RevlogError:
1970 except error.RevlogError:
1971 if self._censorable and storageutil.iscensoredtext(text):
1971 if self._censorable and storageutil.iscensoredtext(text):
1972 raise error.CensoredNodeError(self.display_id, node, text)
1972 raise error.CensoredNodeError(self.display_id, node, text)
1973 raise
1973 raise
1974
1974
1975 def _enforceinlinesize(self, tr):
1975 def _enforceinlinesize(self, tr):
1976 """Check if the revlog is too big for inline and convert if so.
1976 """Check if the revlog is too big for inline and convert if so.
1977
1977
1978 This should be called after revisions are added to the revlog. If the
1978 This should be called after revisions are added to the revlog. If the
1979 revlog has grown too large to be an inline revlog, it will convert it
1979 revlog has grown too large to be an inline revlog, it will convert it
1980 to use multiple index and data files.
1980 to use multiple index and data files.
1981 """
1981 """
1982 tiprev = len(self) - 1
1982 tiprev = len(self) - 1
1983 total_size = self.start(tiprev) + self.length(tiprev)
1983 total_size = self.start(tiprev) + self.length(tiprev)
1984 if not self._inline or total_size < _maxinline:
1984 if not self._inline or total_size < _maxinline:
1985 return
1985 return
1986
1986
1987 troffset = tr.findoffset(self._indexfile)
1987 troffset = tr.findoffset(self._indexfile)
1988 if troffset is None:
1988 if troffset is None:
1989 raise error.RevlogError(
1989 raise error.RevlogError(
1990 _(b"%s not found in the transaction") % self._indexfile
1990 _(b"%s not found in the transaction") % self._indexfile
1991 )
1991 )
1992 trindex = None
1992 trindex = None
1993 tr.add(self._datafile, 0)
1993 tr.add(self._datafile, 0)
1994
1994
1995 existing_handles = False
1995 existing_handles = False
1996 if self._writinghandles is not None:
1996 if self._writinghandles is not None:
1997 existing_handles = True
1997 existing_handles = True
1998 fp = self._writinghandles[0]
1998 fp = self._writinghandles[0]
1999 fp.flush()
1999 fp.flush()
2000 fp.close()
2000 fp.close()
2001 # We can't use the cached file handle after close(). So prevent
2001 # We can't use the cached file handle after close(). So prevent
2002 # its usage.
2002 # its usage.
2003 self._writinghandles = None
2003 self._writinghandles = None
2004 self._segmentfile.writing_handle = None
2004 self._segmentfile.writing_handle = None
2005 # No need to deal with sidedata writing handle as it is only
2005 # No need to deal with sidedata writing handle as it is only
2006 # relevant with revlog-v2 which is never inline, not reaching
2006 # relevant with revlog-v2 which is never inline, not reaching
2007 # this code
2007 # this code
2008
2008
2009 new_dfh = self._datafp(b'w+')
2009 new_dfh = self._datafp(b'w+')
2010 new_dfh.truncate(0) # drop any potentially existing data
2010 new_dfh.truncate(0) # drop any potentially existing data
2011 try:
2011 try:
2012 with self._indexfp() as read_ifh:
2012 with self._indexfp() as read_ifh:
2013 for r in self:
2013 for r in self:
2014 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2014 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2015 if (
2015 if (
2016 trindex is None
2016 trindex is None
2017 and troffset
2017 and troffset
2018 <= self.start(r) + r * self.index.entry_size
2018 <= self.start(r) + r * self.index.entry_size
2019 ):
2019 ):
2020 trindex = r
2020 trindex = r
2021 new_dfh.flush()
2021 new_dfh.flush()
2022
2022
2023 if trindex is None:
2023 if trindex is None:
2024 trindex = 0
2024 trindex = 0
2025
2025
2026 with self.__index_new_fp() as fp:
2026 with self.__index_new_fp() as fp:
2027 self._format_flags &= ~FLAG_INLINE_DATA
2027 self._format_flags &= ~FLAG_INLINE_DATA
2028 self._inline = False
2028 self._inline = False
2029 for i in self:
2029 for i in self:
2030 e = self.index.entry_binary(i)
2030 e = self.index.entry_binary(i)
2031 if i == 0 and self._docket is None:
2031 if i == 0 and self._docket is None:
2032 header = self._format_flags | self._format_version
2032 header = self._format_flags | self._format_version
2033 header = self.index.pack_header(header)
2033 header = self.index.pack_header(header)
2034 e = header + e
2034 e = header + e
2035 fp.write(e)
2035 fp.write(e)
2036 if self._docket is not None:
2036 if self._docket is not None:
2037 self._docket.index_end = fp.tell()
2037 self._docket.index_end = fp.tell()
2038
2038
2039 # There is a small transactional race here. If the rename of
2039 # There is a small transactional race here. If the rename of
2040 # the index fails, we should remove the datafile. It is more
2040 # the index fails, we should remove the datafile. It is more
2041 # important to ensure that the data file is not truncated
2041 # important to ensure that the data file is not truncated
2042 # when the index is replaced as otherwise data is lost.
2042 # when the index is replaced as otherwise data is lost.
2043 tr.replace(self._datafile, self.start(trindex))
2043 tr.replace(self._datafile, self.start(trindex))
2044
2044
2045 # the temp file replace the real index when we exit the context
2045 # the temp file replace the real index when we exit the context
2046 # manager
2046 # manager
2047
2047
2048 tr.replace(self._indexfile, trindex * self.index.entry_size)
2048 tr.replace(self._indexfile, trindex * self.index.entry_size)
2049 nodemaputil.setup_persistent_nodemap(tr, self)
2049 nodemaputil.setup_persistent_nodemap(tr, self)
2050 self._segmentfile = randomaccessfile.randomaccessfile(
2050 self._segmentfile = randomaccessfile.randomaccessfile(
2051 self.opener,
2051 self.opener,
2052 self._datafile,
2052 self._datafile,
2053 self._chunkcachesize,
2053 self._chunkcachesize,
2054 )
2054 )
2055
2055
2056 if existing_handles:
2056 if existing_handles:
2057 # switched from inline to conventional reopen the index
2057 # switched from inline to conventional reopen the index
2058 ifh = self.__index_write_fp()
2058 ifh = self.__index_write_fp()
2059 self._writinghandles = (ifh, new_dfh, None)
2059 self._writinghandles = (ifh, new_dfh, None)
2060 self._segmentfile.writing_handle = new_dfh
2060 self._segmentfile.writing_handle = new_dfh
2061 new_dfh = None
2061 new_dfh = None
2062 # No need to deal with sidedata writing handle as it is only
2062 # No need to deal with sidedata writing handle as it is only
2063 # relevant with revlog-v2 which is never inline, not reaching
2063 # relevant with revlog-v2 which is never inline, not reaching
2064 # this code
2064 # this code
2065 finally:
2065 finally:
2066 if new_dfh is not None:
2066 if new_dfh is not None:
2067 new_dfh.close()
2067 new_dfh.close()
2068
2068
2069 def _nodeduplicatecallback(self, transaction, node):
2069 def _nodeduplicatecallback(self, transaction, node):
2070 """called when trying to add a node already stored."""
2070 """called when trying to add a node already stored."""
2071
2071
2072 @contextlib.contextmanager
2072 @contextlib.contextmanager
2073 def reading(self):
2073 def reading(self):
2074 """Context manager that keeps data and sidedata files open for reading"""
2074 """Context manager that keeps data and sidedata files open for reading"""
2075 with self._segmentfile.reading():
2075 with self._segmentfile.reading():
2076 with self._segmentfile_sidedata.reading():
2076 with self._segmentfile_sidedata.reading():
2077 yield
2077 yield
2078
2078
2079 @contextlib.contextmanager
2079 @contextlib.contextmanager
2080 def _writing(self, transaction):
2080 def _writing(self, transaction):
2081 if self._trypending:
2081 if self._trypending:
2082 msg = b'try to write in a `trypending` revlog: %s'
2082 msg = b'try to write in a `trypending` revlog: %s'
2083 msg %= self.display_id
2083 msg %= self.display_id
2084 raise error.ProgrammingError(msg)
2084 raise error.ProgrammingError(msg)
2085 if self._writinghandles is not None:
2085 if self._writinghandles is not None:
2086 yield
2086 yield
2087 else:
2087 else:
2088 ifh = dfh = sdfh = None
2088 ifh = dfh = sdfh = None
2089 try:
2089 try:
2090 r = len(self)
2090 r = len(self)
2091 # opening the data file.
2091 # opening the data file.
2092 dsize = 0
2092 dsize = 0
2093 if r:
2093 if r:
2094 dsize = self.end(r - 1)
2094 dsize = self.end(r - 1)
2095 dfh = None
2095 dfh = None
2096 if not self._inline:
2096 if not self._inline:
2097 try:
2097 try:
2098 dfh = self._datafp(b"r+")
2098 dfh = self._datafp(b"r+")
2099 if self._docket is None:
2099 if self._docket is None:
2100 dfh.seek(0, os.SEEK_END)
2100 dfh.seek(0, os.SEEK_END)
2101 else:
2101 else:
2102 dfh.seek(self._docket.data_end, os.SEEK_SET)
2102 dfh.seek(self._docket.data_end, os.SEEK_SET)
2103 except IOError as inst:
2103 except IOError as inst:
2104 if inst.errno != errno.ENOENT:
2104 if inst.errno != errno.ENOENT:
2105 raise
2105 raise
2106 dfh = self._datafp(b"w+")
2106 dfh = self._datafp(b"w+")
2107 transaction.add(self._datafile, dsize)
2107 transaction.add(self._datafile, dsize)
2108 if self._sidedatafile is not None:
2108 if self._sidedatafile is not None:
2109 # revlog-v2 does not inline, help Pytype
2109 # revlog-v2 does not inline, help Pytype
2110 assert dfh is not None
2110 assert dfh is not None
2111 try:
2111 try:
2112 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2112 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2113 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2113 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2114 except IOError as inst:
2114 except IOError as inst:
2115 if inst.errno != errno.ENOENT:
2115 if inst.errno != errno.ENOENT:
2116 raise
2116 raise
2117 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2117 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2118 transaction.add(
2118 transaction.add(
2119 self._sidedatafile, self._docket.sidedata_end
2119 self._sidedatafile, self._docket.sidedata_end
2120 )
2120 )
2121
2121
2122 # opening the index file.
2122 # opening the index file.
2123 isize = r * self.index.entry_size
2123 isize = r * self.index.entry_size
2124 ifh = self.__index_write_fp()
2124 ifh = self.__index_write_fp()
2125 if self._inline:
2125 if self._inline:
2126 transaction.add(self._indexfile, dsize + isize)
2126 transaction.add(self._indexfile, dsize + isize)
2127 else:
2127 else:
2128 transaction.add(self._indexfile, isize)
2128 transaction.add(self._indexfile, isize)
2129 # exposing all file handle for writing.
2129 # exposing all file handle for writing.
2130 self._writinghandles = (ifh, dfh, sdfh)
2130 self._writinghandles = (ifh, dfh, sdfh)
2131 self._segmentfile.writing_handle = ifh if self._inline else dfh
2131 self._segmentfile.writing_handle = ifh if self._inline else dfh
2132 self._segmentfile_sidedata.writing_handle = sdfh
2132 self._segmentfile_sidedata.writing_handle = sdfh
2133 yield
2133 yield
2134 if self._docket is not None:
2134 if self._docket is not None:
2135 self._write_docket(transaction)
2135 self._write_docket(transaction)
2136 finally:
2136 finally:
2137 self._writinghandles = None
2137 self._writinghandles = None
2138 self._segmentfile.writing_handle = None
2138 self._segmentfile.writing_handle = None
2139 self._segmentfile_sidedata.writing_handle = None
2139 self._segmentfile_sidedata.writing_handle = None
2140 if dfh is not None:
2140 if dfh is not None:
2141 dfh.close()
2141 dfh.close()
2142 if sdfh is not None:
2142 if sdfh is not None:
2143 sdfh.close()
2143 sdfh.close()
2144 # closing the index file last to avoid exposing referent to
2144 # closing the index file last to avoid exposing referent to
2145 # potential unflushed data content.
2145 # potential unflushed data content.
2146 if ifh is not None:
2146 if ifh is not None:
2147 ifh.close()
2147 ifh.close()
2148
2148
2149 def _write_docket(self, transaction):
2149 def _write_docket(self, transaction):
2150 """write the current docket on disk
2150 """write the current docket on disk
2151
2151
2152 Exist as a method to help changelog to implement transaction logic
2152 Exist as a method to help changelog to implement transaction logic
2153
2153
2154 We could also imagine using the same transaction logic for all revlog
2154 We could also imagine using the same transaction logic for all revlog
2155 since docket are cheap."""
2155 since docket are cheap."""
2156 self._docket.write(transaction)
2156 self._docket.write(transaction)
2157
2157
2158 def addrevision(
2158 def addrevision(
2159 self,
2159 self,
2160 text,
2160 text,
2161 transaction,
2161 transaction,
2162 link,
2162 link,
2163 p1,
2163 p1,
2164 p2,
2164 p2,
2165 cachedelta=None,
2165 cachedelta=None,
2166 node=None,
2166 node=None,
2167 flags=REVIDX_DEFAULT_FLAGS,
2167 flags=REVIDX_DEFAULT_FLAGS,
2168 deltacomputer=None,
2168 deltacomputer=None,
2169 sidedata=None,
2169 sidedata=None,
2170 ):
2170 ):
2171 """add a revision to the log
2171 """add a revision to the log
2172
2172
2173 text - the revision data to add
2173 text - the revision data to add
2174 transaction - the transaction object used for rollback
2174 transaction - the transaction object used for rollback
2175 link - the linkrev data to add
2175 link - the linkrev data to add
2176 p1, p2 - the parent nodeids of the revision
2176 p1, p2 - the parent nodeids of the revision
2177 cachedelta - an optional precomputed delta
2177 cachedelta - an optional precomputed delta
2178 node - nodeid of revision; typically node is not specified, and it is
2178 node - nodeid of revision; typically node is not specified, and it is
2179 computed by default as hash(text, p1, p2), however subclasses might
2179 computed by default as hash(text, p1, p2), however subclasses might
2180 use different hashing method (and override checkhash() in such case)
2180 use different hashing method (and override checkhash() in such case)
2181 flags - the known flags to set on the revision
2181 flags - the known flags to set on the revision
2182 deltacomputer - an optional deltacomputer instance shared between
2182 deltacomputer - an optional deltacomputer instance shared between
2183 multiple calls
2183 multiple calls
2184 """
2184 """
2185 if link == nullrev:
2185 if link == nullrev:
2186 raise error.RevlogError(
2186 raise error.RevlogError(
2187 _(b"attempted to add linkrev -1 to %s") % self.display_id
2187 _(b"attempted to add linkrev -1 to %s") % self.display_id
2188 )
2188 )
2189
2189
2190 if sidedata is None:
2190 if sidedata is None:
2191 sidedata = {}
2191 sidedata = {}
2192 elif sidedata and not self.hassidedata:
2192 elif sidedata and not self.hassidedata:
2193 raise error.ProgrammingError(
2193 raise error.ProgrammingError(
2194 _(b"trying to add sidedata to a revlog who don't support them")
2194 _(b"trying to add sidedata to a revlog who don't support them")
2195 )
2195 )
2196
2196
2197 if flags:
2197 if flags:
2198 node = node or self.hash(text, p1, p2)
2198 node = node or self.hash(text, p1, p2)
2199
2199
2200 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2200 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2201
2201
2202 # If the flag processor modifies the revision data, ignore any provided
2202 # If the flag processor modifies the revision data, ignore any provided
2203 # cachedelta.
2203 # cachedelta.
2204 if rawtext != text:
2204 if rawtext != text:
2205 cachedelta = None
2205 cachedelta = None
2206
2206
2207 if len(rawtext) > _maxentrysize:
2207 if len(rawtext) > _maxentrysize:
2208 raise error.RevlogError(
2208 raise error.RevlogError(
2209 _(
2209 _(
2210 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2210 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2211 )
2211 )
2212 % (self.display_id, len(rawtext))
2212 % (self.display_id, len(rawtext))
2213 )
2213 )
2214
2214
2215 node = node or self.hash(rawtext, p1, p2)
2215 node = node or self.hash(rawtext, p1, p2)
2216 rev = self.index.get_rev(node)
2216 rev = self.index.get_rev(node)
2217 if rev is not None:
2217 if rev is not None:
2218 return rev
2218 return rev
2219
2219
2220 if validatehash:
2220 if validatehash:
2221 self.checkhash(rawtext, node, p1=p1, p2=p2)
2221 self.checkhash(rawtext, node, p1=p1, p2=p2)
2222
2222
2223 return self.addrawrevision(
2223 return self.addrawrevision(
2224 rawtext,
2224 rawtext,
2225 transaction,
2225 transaction,
2226 link,
2226 link,
2227 p1,
2227 p1,
2228 p2,
2228 p2,
2229 node,
2229 node,
2230 flags,
2230 flags,
2231 cachedelta=cachedelta,
2231 cachedelta=cachedelta,
2232 deltacomputer=deltacomputer,
2232 deltacomputer=deltacomputer,
2233 sidedata=sidedata,
2233 sidedata=sidedata,
2234 )
2234 )
2235
2235
2236 def addrawrevision(
2236 def addrawrevision(
2237 self,
2237 self,
2238 rawtext,
2238 rawtext,
2239 transaction,
2239 transaction,
2240 link,
2240 link,
2241 p1,
2241 p1,
2242 p2,
2242 p2,
2243 node,
2243 node,
2244 flags,
2244 flags,
2245 cachedelta=None,
2245 cachedelta=None,
2246 deltacomputer=None,
2246 deltacomputer=None,
2247 sidedata=None,
2247 sidedata=None,
2248 ):
2248 ):
2249 """add a raw revision with known flags, node and parents
2249 """add a raw revision with known flags, node and parents
2250 useful when reusing a revision not stored in this revlog (ex: received
2250 useful when reusing a revision not stored in this revlog (ex: received
2251 over wire, or read from an external bundle).
2251 over wire, or read from an external bundle).
2252 """
2252 """
2253 with self._writing(transaction):
2253 with self._writing(transaction):
2254 return self._addrevision(
2254 return self._addrevision(
2255 node,
2255 node,
2256 rawtext,
2256 rawtext,
2257 transaction,
2257 transaction,
2258 link,
2258 link,
2259 p1,
2259 p1,
2260 p2,
2260 p2,
2261 flags,
2261 flags,
2262 cachedelta,
2262 cachedelta,
2263 deltacomputer=deltacomputer,
2263 deltacomputer=deltacomputer,
2264 sidedata=sidedata,
2264 sidedata=sidedata,
2265 )
2265 )
2266
2266
2267 def compress(self, data):
2267 def compress(self, data):
2268 """Generate a possibly-compressed representation of data."""
2268 """Generate a possibly-compressed representation of data."""
2269 if not data:
2269 if not data:
2270 return b'', data
2270 return b'', data
2271
2271
2272 compressed = self._compressor.compress(data)
2272 compressed = self._compressor.compress(data)
2273
2273
2274 if compressed:
2274 if compressed:
2275 # The revlog compressor added the header in the returned data.
2275 # The revlog compressor added the header in the returned data.
2276 return b'', compressed
2276 return b'', compressed
2277
2277
2278 if data[0:1] == b'\0':
2278 if data[0:1] == b'\0':
2279 return b'', data
2279 return b'', data
2280 return b'u', data
2280 return b'u', data
2281
2281
2282 def decompress(self, data):
2282 def decompress(self, data):
2283 """Decompress a revlog chunk.
2283 """Decompress a revlog chunk.
2284
2284
2285 The chunk is expected to begin with a header identifying the
2285 The chunk is expected to begin with a header identifying the
2286 format type so it can be routed to an appropriate decompressor.
2286 format type so it can be routed to an appropriate decompressor.
2287 """
2287 """
2288 if not data:
2288 if not data:
2289 return data
2289 return data
2290
2290
2291 # Revlogs are read much more frequently than they are written and many
2291 # Revlogs are read much more frequently than they are written and many
2292 # chunks only take microseconds to decompress, so performance is
2292 # chunks only take microseconds to decompress, so performance is
2293 # important here.
2293 # important here.
2294 #
2294 #
2295 # We can make a few assumptions about revlogs:
2295 # We can make a few assumptions about revlogs:
2296 #
2296 #
2297 # 1) the majority of chunks will be compressed (as opposed to inline
2297 # 1) the majority of chunks will be compressed (as opposed to inline
2298 # raw data).
2298 # raw data).
2299 # 2) decompressing *any* data will likely by at least 10x slower than
2299 # 2) decompressing *any* data will likely by at least 10x slower than
2300 # returning raw inline data.
2300 # returning raw inline data.
2301 # 3) we want to prioritize common and officially supported compression
2301 # 3) we want to prioritize common and officially supported compression
2302 # engines
2302 # engines
2303 #
2303 #
2304 # It follows that we want to optimize for "decompress compressed data
2304 # It follows that we want to optimize for "decompress compressed data
2305 # when encoded with common and officially supported compression engines"
2305 # when encoded with common and officially supported compression engines"
2306 # case over "raw data" and "data encoded by less common or non-official
2306 # case over "raw data" and "data encoded by less common or non-official
2307 # compression engines." That is why we have the inline lookup first
2307 # compression engines." That is why we have the inline lookup first
2308 # followed by the compengines lookup.
2308 # followed by the compengines lookup.
2309 #
2309 #
2310 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2310 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2311 # compressed chunks. And this matters for changelog and manifest reads.
2311 # compressed chunks. And this matters for changelog and manifest reads.
2312 t = data[0:1]
2312 t = data[0:1]
2313
2313
2314 if t == b'x':
2314 if t == b'x':
2315 try:
2315 try:
2316 return _zlibdecompress(data)
2316 return _zlibdecompress(data)
2317 except zlib.error as e:
2317 except zlib.error as e:
2318 raise error.RevlogError(
2318 raise error.RevlogError(
2319 _(b'revlog decompress error: %s')
2319 _(b'revlog decompress error: %s')
2320 % stringutil.forcebytestr(e)
2320 % stringutil.forcebytestr(e)
2321 )
2321 )
2322 # '\0' is more common than 'u' so it goes first.
2322 # '\0' is more common than 'u' so it goes first.
2323 elif t == b'\0':
2323 elif t == b'\0':
2324 return data
2324 return data
2325 elif t == b'u':
2325 elif t == b'u':
2326 return util.buffer(data, 1)
2326 return util.buffer(data, 1)
2327
2327
2328 compressor = self._get_decompressor(t)
2328 compressor = self._get_decompressor(t)
2329
2329
2330 return compressor.decompress(data)
2330 return compressor.decompress(data)
2331
2331
2332 def _addrevision(
2332 def _addrevision(
2333 self,
2333 self,
2334 node,
2334 node,
2335 rawtext,
2335 rawtext,
2336 transaction,
2336 transaction,
2337 link,
2337 link,
2338 p1,
2338 p1,
2339 p2,
2339 p2,
2340 flags,
2340 flags,
2341 cachedelta,
2341 cachedelta,
2342 alwayscache=False,
2342 alwayscache=False,
2343 deltacomputer=None,
2343 deltacomputer=None,
2344 sidedata=None,
2344 sidedata=None,
2345 ):
2345 ):
2346 """internal function to add revisions to the log
2346 """internal function to add revisions to the log
2347
2347
2348 see addrevision for argument descriptions.
2348 see addrevision for argument descriptions.
2349
2349
2350 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2350 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2351
2351
2352 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2352 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2353 be used.
2353 be used.
2354
2354
2355 invariants:
2355 invariants:
2356 - rawtext is optional (can be None); if not set, cachedelta must be set.
2356 - rawtext is optional (can be None); if not set, cachedelta must be set.
2357 if both are set, they must correspond to each other.
2357 if both are set, they must correspond to each other.
2358 """
2358 """
2359 if node == self.nullid:
2359 if node == self.nullid:
2360 raise error.RevlogError(
2360 raise error.RevlogError(
2361 _(b"%s: attempt to add null revision") % self.display_id
2361 _(b"%s: attempt to add null revision") % self.display_id
2362 )
2362 )
2363 if (
2363 if (
2364 node == self.nodeconstants.wdirid
2364 node == self.nodeconstants.wdirid
2365 or node in self.nodeconstants.wdirfilenodeids
2365 or node in self.nodeconstants.wdirfilenodeids
2366 ):
2366 ):
2367 raise error.RevlogError(
2367 raise error.RevlogError(
2368 _(b"%s: attempt to add wdir revision") % self.display_id
2368 _(b"%s: attempt to add wdir revision") % self.display_id
2369 )
2369 )
2370 if self._writinghandles is None:
2370 if self._writinghandles is None:
2371 msg = b'adding revision outside `revlog._writing` context'
2371 msg = b'adding revision outside `revlog._writing` context'
2372 raise error.ProgrammingError(msg)
2372 raise error.ProgrammingError(msg)
2373
2373
2374 if self._inline:
2374 if self._inline:
2375 fh = self._writinghandles[0]
2375 fh = self._writinghandles[0]
2376 else:
2376 else:
2377 fh = self._writinghandles[1]
2377 fh = self._writinghandles[1]
2378
2378
2379 btext = [rawtext]
2379 btext = [rawtext]
2380
2380
2381 curr = len(self)
2381 curr = len(self)
2382 prev = curr - 1
2382 prev = curr - 1
2383
2383
2384 offset = self._get_data_offset(prev)
2384 offset = self._get_data_offset(prev)
2385
2385
2386 if self._concurrencychecker:
2386 if self._concurrencychecker:
2387 ifh, dfh, sdfh = self._writinghandles
2387 ifh, dfh, sdfh = self._writinghandles
2388 # XXX no checking for the sidedata file
2388 # XXX no checking for the sidedata file
2389 if self._inline:
2389 if self._inline:
2390 # offset is "as if" it were in the .d file, so we need to add on
2390 # offset is "as if" it were in the .d file, so we need to add on
2391 # the size of the entry metadata.
2391 # the size of the entry metadata.
2392 self._concurrencychecker(
2392 self._concurrencychecker(
2393 ifh, self._indexfile, offset + curr * self.index.entry_size
2393 ifh, self._indexfile, offset + curr * self.index.entry_size
2394 )
2394 )
2395 else:
2395 else:
2396 # Entries in the .i are a consistent size.
2396 # Entries in the .i are a consistent size.
2397 self._concurrencychecker(
2397 self._concurrencychecker(
2398 ifh, self._indexfile, curr * self.index.entry_size
2398 ifh, self._indexfile, curr * self.index.entry_size
2399 )
2399 )
2400 self._concurrencychecker(dfh, self._datafile, offset)
2400 self._concurrencychecker(dfh, self._datafile, offset)
2401
2401
2402 p1r, p2r = self.rev(p1), self.rev(p2)
2402 p1r, p2r = self.rev(p1), self.rev(p2)
2403
2403
2404 # full versions are inserted when the needed deltas
2404 # full versions are inserted when the needed deltas
2405 # become comparable to the uncompressed text
2405 # become comparable to the uncompressed text
2406 if rawtext is None:
2406 if rawtext is None:
2407 # need rawtext size, before changed by flag processors, which is
2407 # need rawtext size, before changed by flag processors, which is
2408 # the non-raw size. use revlog explicitly to avoid filelog's extra
2408 # the non-raw size. use revlog explicitly to avoid filelog's extra
2409 # logic that might remove metadata size.
2409 # logic that might remove metadata size.
2410 textlen = mdiff.patchedsize(
2410 textlen = mdiff.patchedsize(
2411 revlog.size(self, cachedelta[0]), cachedelta[1]
2411 revlog.size(self, cachedelta[0]), cachedelta[1]
2412 )
2412 )
2413 else:
2413 else:
2414 textlen = len(rawtext)
2414 textlen = len(rawtext)
2415
2415
2416 if deltacomputer is None:
2416 if deltacomputer is None:
2417 deltacomputer = deltautil.deltacomputer(self)
2417 deltacomputer = deltautil.deltacomputer(self)
2418
2418
2419 revinfo = revlogutils.revisioninfo(
2419 revinfo = revlogutils.revisioninfo(
2420 node,
2420 node,
2421 p1,
2421 p1,
2422 p2,
2422 p2,
2423 btext,
2423 btext,
2424 textlen,
2424 textlen,
2425 cachedelta,
2425 cachedelta,
2426 flags,
2426 flags,
2427 )
2427 )
2428
2428
2429 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2429 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2430
2430
2431 compression_mode = COMP_MODE_INLINE
2431 compression_mode = COMP_MODE_INLINE
2432 if self._docket is not None:
2432 if self._docket is not None:
2433 default_comp = self._docket.default_compression_header
2433 default_comp = self._docket.default_compression_header
2434 r = deltautil.delta_compression(default_comp, deltainfo)
2434 r = deltautil.delta_compression(default_comp, deltainfo)
2435 compression_mode, deltainfo = r
2435 compression_mode, deltainfo = r
2436
2436
2437 sidedata_compression_mode = COMP_MODE_INLINE
2437 sidedata_compression_mode = COMP_MODE_INLINE
2438 if sidedata and self.hassidedata:
2438 if sidedata and self.hassidedata:
2439 sidedata_compression_mode = COMP_MODE_PLAIN
2439 sidedata_compression_mode = COMP_MODE_PLAIN
2440 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2440 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2441 sidedata_offset = self._docket.sidedata_end
2441 sidedata_offset = self._docket.sidedata_end
2442 h, comp_sidedata = self.compress(serialized_sidedata)
2442 h, comp_sidedata = self.compress(serialized_sidedata)
2443 if (
2443 if (
2444 h != b'u'
2444 h != b'u'
2445 and comp_sidedata[0:1] != b'\0'
2445 and comp_sidedata[0:1] != b'\0'
2446 and len(comp_sidedata) < len(serialized_sidedata)
2446 and len(comp_sidedata) < len(serialized_sidedata)
2447 ):
2447 ):
2448 assert not h
2448 assert not h
2449 if (
2449 if (
2450 comp_sidedata[0:1]
2450 comp_sidedata[0:1]
2451 == self._docket.default_compression_header
2451 == self._docket.default_compression_header
2452 ):
2452 ):
2453 sidedata_compression_mode = COMP_MODE_DEFAULT
2453 sidedata_compression_mode = COMP_MODE_DEFAULT
2454 serialized_sidedata = comp_sidedata
2454 serialized_sidedata = comp_sidedata
2455 else:
2455 else:
2456 sidedata_compression_mode = COMP_MODE_INLINE
2456 sidedata_compression_mode = COMP_MODE_INLINE
2457 serialized_sidedata = comp_sidedata
2457 serialized_sidedata = comp_sidedata
2458 else:
2458 else:
2459 serialized_sidedata = b""
2459 serialized_sidedata = b""
2460 # Don't store the offset if the sidedata is empty, that way
2460 # Don't store the offset if the sidedata is empty, that way
2461 # we can easily detect empty sidedata and they will be no different
2461 # we can easily detect empty sidedata and they will be no different
2462 # than ones we manually add.
2462 # than ones we manually add.
2463 sidedata_offset = 0
2463 sidedata_offset = 0
2464
2464
2465 rank = RANK_UNKNOWN
2465 rank = RANK_UNKNOWN
2466 if self._format_version == CHANGELOGV2:
2466 if self._format_version == CHANGELOGV2:
2467 if (p1r, p2r) == (nullrev, nullrev):
2467 if (p1r, p2r) == (nullrev, nullrev):
2468 rank = 1
2468 rank = 1
2469 elif p1r != nullrev and p2r == nullrev:
2469 elif p1r != nullrev and p2r == nullrev:
2470 rank = 1 + self.fast_rank(p1r)
2470 rank = 1 + self.fast_rank(p1r)
2471 elif p1r == nullrev and p2r != nullrev:
2471 elif p1r == nullrev and p2r != nullrev:
2472 rank = 1 + self.fast_rank(p2r)
2472 rank = 1 + self.fast_rank(p2r)
2473 else: # merge node
2473 else: # merge node
2474 if rustdagop is not None and self.index.rust_ext_compat:
2475 rank = rustdagop.rank(self.index, p1r, p2r)
2476 else:
2474 pmin, pmax = sorted((p1r, p2r))
2477 pmin, pmax = sorted((p1r, p2r))
2475 rank = 1 + self.fast_rank(pmax)
2478 rank = 1 + self.fast_rank(pmax)
2476 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2479 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2477
2480
2478 e = revlogutils.entry(
2481 e = revlogutils.entry(
2479 flags=flags,
2482 flags=flags,
2480 data_offset=offset,
2483 data_offset=offset,
2481 data_compressed_length=deltainfo.deltalen,
2484 data_compressed_length=deltainfo.deltalen,
2482 data_uncompressed_length=textlen,
2485 data_uncompressed_length=textlen,
2483 data_compression_mode=compression_mode,
2486 data_compression_mode=compression_mode,
2484 data_delta_base=deltainfo.base,
2487 data_delta_base=deltainfo.base,
2485 link_rev=link,
2488 link_rev=link,
2486 parent_rev_1=p1r,
2489 parent_rev_1=p1r,
2487 parent_rev_2=p2r,
2490 parent_rev_2=p2r,
2488 node_id=node,
2491 node_id=node,
2489 sidedata_offset=sidedata_offset,
2492 sidedata_offset=sidedata_offset,
2490 sidedata_compressed_length=len(serialized_sidedata),
2493 sidedata_compressed_length=len(serialized_sidedata),
2491 sidedata_compression_mode=sidedata_compression_mode,
2494 sidedata_compression_mode=sidedata_compression_mode,
2492 rank=rank,
2495 rank=rank,
2493 )
2496 )
2494
2497
2495 self.index.append(e)
2498 self.index.append(e)
2496 entry = self.index.entry_binary(curr)
2499 entry = self.index.entry_binary(curr)
2497 if curr == 0 and self._docket is None:
2500 if curr == 0 and self._docket is None:
2498 header = self._format_flags | self._format_version
2501 header = self._format_flags | self._format_version
2499 header = self.index.pack_header(header)
2502 header = self.index.pack_header(header)
2500 entry = header + entry
2503 entry = header + entry
2501 self._writeentry(
2504 self._writeentry(
2502 transaction,
2505 transaction,
2503 entry,
2506 entry,
2504 deltainfo.data,
2507 deltainfo.data,
2505 link,
2508 link,
2506 offset,
2509 offset,
2507 serialized_sidedata,
2510 serialized_sidedata,
2508 sidedata_offset,
2511 sidedata_offset,
2509 )
2512 )
2510
2513
2511 rawtext = btext[0]
2514 rawtext = btext[0]
2512
2515
2513 if alwayscache and rawtext is None:
2516 if alwayscache and rawtext is None:
2514 rawtext = deltacomputer.buildtext(revinfo, fh)
2517 rawtext = deltacomputer.buildtext(revinfo, fh)
2515
2518
2516 if type(rawtext) == bytes: # only accept immutable objects
2519 if type(rawtext) == bytes: # only accept immutable objects
2517 self._revisioncache = (node, curr, rawtext)
2520 self._revisioncache = (node, curr, rawtext)
2518 self._chainbasecache[curr] = deltainfo.chainbase
2521 self._chainbasecache[curr] = deltainfo.chainbase
2519 return curr
2522 return curr
2520
2523
2521 def _get_data_offset(self, prev):
2524 def _get_data_offset(self, prev):
2522 """Returns the current offset in the (in-transaction) data file.
2525 """Returns the current offset in the (in-transaction) data file.
2523 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2526 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2524 file to store that information: since sidedata can be rewritten to the
2527 file to store that information: since sidedata can be rewritten to the
2525 end of the data file within a transaction, you can have cases where, for
2528 end of the data file within a transaction, you can have cases where, for
2526 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2529 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2527 to `n - 1`'s sidedata being written after `n`'s data.
2530 to `n - 1`'s sidedata being written after `n`'s data.
2528
2531
2529 TODO cache this in a docket file before getting out of experimental."""
2532 TODO cache this in a docket file before getting out of experimental."""
2530 if self._docket is None:
2533 if self._docket is None:
2531 return self.end(prev)
2534 return self.end(prev)
2532 else:
2535 else:
2533 return self._docket.data_end
2536 return self._docket.data_end
2534
2537
2535 def _writeentry(
2538 def _writeentry(
2536 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2539 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2537 ):
2540 ):
2538 # Files opened in a+ mode have inconsistent behavior on various
2541 # Files opened in a+ mode have inconsistent behavior on various
2539 # platforms. Windows requires that a file positioning call be made
2542 # platforms. Windows requires that a file positioning call be made
2540 # when the file handle transitions between reads and writes. See
2543 # when the file handle transitions between reads and writes. See
2541 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2544 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2542 # platforms, Python or the platform itself can be buggy. Some versions
2545 # platforms, Python or the platform itself can be buggy. Some versions
2543 # of Solaris have been observed to not append at the end of the file
2546 # of Solaris have been observed to not append at the end of the file
2544 # if the file was seeked to before the end. See issue4943 for more.
2547 # if the file was seeked to before the end. See issue4943 for more.
2545 #
2548 #
2546 # We work around this issue by inserting a seek() before writing.
2549 # We work around this issue by inserting a seek() before writing.
2547 # Note: This is likely not necessary on Python 3. However, because
2550 # Note: This is likely not necessary on Python 3. However, because
2548 # the file handle is reused for reads and may be seeked there, we need
2551 # the file handle is reused for reads and may be seeked there, we need
2549 # to be careful before changing this.
2552 # to be careful before changing this.
2550 if self._writinghandles is None:
2553 if self._writinghandles is None:
2551 msg = b'adding revision outside `revlog._writing` context'
2554 msg = b'adding revision outside `revlog._writing` context'
2552 raise error.ProgrammingError(msg)
2555 raise error.ProgrammingError(msg)
2553 ifh, dfh, sdfh = self._writinghandles
2556 ifh, dfh, sdfh = self._writinghandles
2554 if self._docket is None:
2557 if self._docket is None:
2555 ifh.seek(0, os.SEEK_END)
2558 ifh.seek(0, os.SEEK_END)
2556 else:
2559 else:
2557 ifh.seek(self._docket.index_end, os.SEEK_SET)
2560 ifh.seek(self._docket.index_end, os.SEEK_SET)
2558 if dfh:
2561 if dfh:
2559 if self._docket is None:
2562 if self._docket is None:
2560 dfh.seek(0, os.SEEK_END)
2563 dfh.seek(0, os.SEEK_END)
2561 else:
2564 else:
2562 dfh.seek(self._docket.data_end, os.SEEK_SET)
2565 dfh.seek(self._docket.data_end, os.SEEK_SET)
2563 if sdfh:
2566 if sdfh:
2564 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2567 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2565
2568
2566 curr = len(self) - 1
2569 curr = len(self) - 1
2567 if not self._inline:
2570 if not self._inline:
2568 transaction.add(self._datafile, offset)
2571 transaction.add(self._datafile, offset)
2569 if self._sidedatafile:
2572 if self._sidedatafile:
2570 transaction.add(self._sidedatafile, sidedata_offset)
2573 transaction.add(self._sidedatafile, sidedata_offset)
2571 transaction.add(self._indexfile, curr * len(entry))
2574 transaction.add(self._indexfile, curr * len(entry))
2572 if data[0]:
2575 if data[0]:
2573 dfh.write(data[0])
2576 dfh.write(data[0])
2574 dfh.write(data[1])
2577 dfh.write(data[1])
2575 if sidedata:
2578 if sidedata:
2576 sdfh.write(sidedata)
2579 sdfh.write(sidedata)
2577 ifh.write(entry)
2580 ifh.write(entry)
2578 else:
2581 else:
2579 offset += curr * self.index.entry_size
2582 offset += curr * self.index.entry_size
2580 transaction.add(self._indexfile, offset)
2583 transaction.add(self._indexfile, offset)
2581 ifh.write(entry)
2584 ifh.write(entry)
2582 ifh.write(data[0])
2585 ifh.write(data[0])
2583 ifh.write(data[1])
2586 ifh.write(data[1])
2584 assert not sidedata
2587 assert not sidedata
2585 self._enforceinlinesize(transaction)
2588 self._enforceinlinesize(transaction)
2586 if self._docket is not None:
2589 if self._docket is not None:
2587 # revlog-v2 always has 3 writing handles, help Pytype
2590 # revlog-v2 always has 3 writing handles, help Pytype
2588 wh1 = self._writinghandles[0]
2591 wh1 = self._writinghandles[0]
2589 wh2 = self._writinghandles[1]
2592 wh2 = self._writinghandles[1]
2590 wh3 = self._writinghandles[2]
2593 wh3 = self._writinghandles[2]
2591 assert wh1 is not None
2594 assert wh1 is not None
2592 assert wh2 is not None
2595 assert wh2 is not None
2593 assert wh3 is not None
2596 assert wh3 is not None
2594 self._docket.index_end = wh1.tell()
2597 self._docket.index_end = wh1.tell()
2595 self._docket.data_end = wh2.tell()
2598 self._docket.data_end = wh2.tell()
2596 self._docket.sidedata_end = wh3.tell()
2599 self._docket.sidedata_end = wh3.tell()
2597
2600
2598 nodemaputil.setup_persistent_nodemap(transaction, self)
2601 nodemaputil.setup_persistent_nodemap(transaction, self)
2599
2602
2600 def addgroup(
2603 def addgroup(
2601 self,
2604 self,
2602 deltas,
2605 deltas,
2603 linkmapper,
2606 linkmapper,
2604 transaction,
2607 transaction,
2605 alwayscache=False,
2608 alwayscache=False,
2606 addrevisioncb=None,
2609 addrevisioncb=None,
2607 duplicaterevisioncb=None,
2610 duplicaterevisioncb=None,
2608 ):
2611 ):
2609 """
2612 """
2610 add a delta group
2613 add a delta group
2611
2614
2612 given a set of deltas, add them to the revision log. the
2615 given a set of deltas, add them to the revision log. the
2613 first delta is against its parent, which should be in our
2616 first delta is against its parent, which should be in our
2614 log, the rest are against the previous delta.
2617 log, the rest are against the previous delta.
2615
2618
2616 If ``addrevisioncb`` is defined, it will be called with arguments of
2619 If ``addrevisioncb`` is defined, it will be called with arguments of
2617 this revlog and the node that was added.
2620 this revlog and the node that was added.
2618 """
2621 """
2619
2622
2620 if self._adding_group:
2623 if self._adding_group:
2621 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2624 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2622
2625
2623 self._adding_group = True
2626 self._adding_group = True
2624 empty = True
2627 empty = True
2625 try:
2628 try:
2626 with self._writing(transaction):
2629 with self._writing(transaction):
2627 deltacomputer = deltautil.deltacomputer(self)
2630 deltacomputer = deltautil.deltacomputer(self)
2628 # loop through our set of deltas
2631 # loop through our set of deltas
2629 for data in deltas:
2632 for data in deltas:
2630 (
2633 (
2631 node,
2634 node,
2632 p1,
2635 p1,
2633 p2,
2636 p2,
2634 linknode,
2637 linknode,
2635 deltabase,
2638 deltabase,
2636 delta,
2639 delta,
2637 flags,
2640 flags,
2638 sidedata,
2641 sidedata,
2639 ) = data
2642 ) = data
2640 link = linkmapper(linknode)
2643 link = linkmapper(linknode)
2641 flags = flags or REVIDX_DEFAULT_FLAGS
2644 flags = flags or REVIDX_DEFAULT_FLAGS
2642
2645
2643 rev = self.index.get_rev(node)
2646 rev = self.index.get_rev(node)
2644 if rev is not None:
2647 if rev is not None:
2645 # this can happen if two branches make the same change
2648 # this can happen if two branches make the same change
2646 self._nodeduplicatecallback(transaction, rev)
2649 self._nodeduplicatecallback(transaction, rev)
2647 if duplicaterevisioncb:
2650 if duplicaterevisioncb:
2648 duplicaterevisioncb(self, rev)
2651 duplicaterevisioncb(self, rev)
2649 empty = False
2652 empty = False
2650 continue
2653 continue
2651
2654
2652 for p in (p1, p2):
2655 for p in (p1, p2):
2653 if not self.index.has_node(p):
2656 if not self.index.has_node(p):
2654 raise error.LookupError(
2657 raise error.LookupError(
2655 p, self.radix, _(b'unknown parent')
2658 p, self.radix, _(b'unknown parent')
2656 )
2659 )
2657
2660
2658 if not self.index.has_node(deltabase):
2661 if not self.index.has_node(deltabase):
2659 raise error.LookupError(
2662 raise error.LookupError(
2660 deltabase, self.display_id, _(b'unknown delta base')
2663 deltabase, self.display_id, _(b'unknown delta base')
2661 )
2664 )
2662
2665
2663 baserev = self.rev(deltabase)
2666 baserev = self.rev(deltabase)
2664
2667
2665 if baserev != nullrev and self.iscensored(baserev):
2668 if baserev != nullrev and self.iscensored(baserev):
2666 # if base is censored, delta must be full replacement in a
2669 # if base is censored, delta must be full replacement in a
2667 # single patch operation
2670 # single patch operation
2668 hlen = struct.calcsize(b">lll")
2671 hlen = struct.calcsize(b">lll")
2669 oldlen = self.rawsize(baserev)
2672 oldlen = self.rawsize(baserev)
2670 newlen = len(delta) - hlen
2673 newlen = len(delta) - hlen
2671 if delta[:hlen] != mdiff.replacediffheader(
2674 if delta[:hlen] != mdiff.replacediffheader(
2672 oldlen, newlen
2675 oldlen, newlen
2673 ):
2676 ):
2674 raise error.CensoredBaseError(
2677 raise error.CensoredBaseError(
2675 self.display_id, self.node(baserev)
2678 self.display_id, self.node(baserev)
2676 )
2679 )
2677
2680
2678 if not flags and self._peek_iscensored(baserev, delta):
2681 if not flags and self._peek_iscensored(baserev, delta):
2679 flags |= REVIDX_ISCENSORED
2682 flags |= REVIDX_ISCENSORED
2680
2683
2681 # We assume consumers of addrevisioncb will want to retrieve
2684 # We assume consumers of addrevisioncb will want to retrieve
2682 # the added revision, which will require a call to
2685 # the added revision, which will require a call to
2683 # revision(). revision() will fast path if there is a cache
2686 # revision(). revision() will fast path if there is a cache
2684 # hit. So, we tell _addrevision() to always cache in this case.
2687 # hit. So, we tell _addrevision() to always cache in this case.
2685 # We're only using addgroup() in the context of changegroup
2688 # We're only using addgroup() in the context of changegroup
2686 # generation so the revision data can always be handled as raw
2689 # generation so the revision data can always be handled as raw
2687 # by the flagprocessor.
2690 # by the flagprocessor.
2688 rev = self._addrevision(
2691 rev = self._addrevision(
2689 node,
2692 node,
2690 None,
2693 None,
2691 transaction,
2694 transaction,
2692 link,
2695 link,
2693 p1,
2696 p1,
2694 p2,
2697 p2,
2695 flags,
2698 flags,
2696 (baserev, delta),
2699 (baserev, delta),
2697 alwayscache=alwayscache,
2700 alwayscache=alwayscache,
2698 deltacomputer=deltacomputer,
2701 deltacomputer=deltacomputer,
2699 sidedata=sidedata,
2702 sidedata=sidedata,
2700 )
2703 )
2701
2704
2702 if addrevisioncb:
2705 if addrevisioncb:
2703 addrevisioncb(self, rev)
2706 addrevisioncb(self, rev)
2704 empty = False
2707 empty = False
2705 finally:
2708 finally:
2706 self._adding_group = False
2709 self._adding_group = False
2707 return not empty
2710 return not empty
2708
2711
2709 def iscensored(self, rev):
2712 def iscensored(self, rev):
2710 """Check if a file revision is censored."""
2713 """Check if a file revision is censored."""
2711 if not self._censorable:
2714 if not self._censorable:
2712 return False
2715 return False
2713
2716
2714 return self.flags(rev) & REVIDX_ISCENSORED
2717 return self.flags(rev) & REVIDX_ISCENSORED
2715
2718
2716 def _peek_iscensored(self, baserev, delta):
2719 def _peek_iscensored(self, baserev, delta):
2717 """Quickly check if a delta produces a censored revision."""
2720 """Quickly check if a delta produces a censored revision."""
2718 if not self._censorable:
2721 if not self._censorable:
2719 return False
2722 return False
2720
2723
2721 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2724 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2722
2725
2723 def getstrippoint(self, minlink):
2726 def getstrippoint(self, minlink):
2724 """find the minimum rev that must be stripped to strip the linkrev
2727 """find the minimum rev that must be stripped to strip the linkrev
2725
2728
2726 Returns a tuple containing the minimum rev and a set of all revs that
2729 Returns a tuple containing the minimum rev and a set of all revs that
2727 have linkrevs that will be broken by this strip.
2730 have linkrevs that will be broken by this strip.
2728 """
2731 """
2729 return storageutil.resolvestripinfo(
2732 return storageutil.resolvestripinfo(
2730 minlink,
2733 minlink,
2731 len(self) - 1,
2734 len(self) - 1,
2732 self.headrevs(),
2735 self.headrevs(),
2733 self.linkrev,
2736 self.linkrev,
2734 self.parentrevs,
2737 self.parentrevs,
2735 )
2738 )
2736
2739
2737 def strip(self, minlink, transaction):
2740 def strip(self, minlink, transaction):
2738 """truncate the revlog on the first revision with a linkrev >= minlink
2741 """truncate the revlog on the first revision with a linkrev >= minlink
2739
2742
2740 This function is called when we're stripping revision minlink and
2743 This function is called when we're stripping revision minlink and
2741 its descendants from the repository.
2744 its descendants from the repository.
2742
2745
2743 We have to remove all revisions with linkrev >= minlink, because
2746 We have to remove all revisions with linkrev >= minlink, because
2744 the equivalent changelog revisions will be renumbered after the
2747 the equivalent changelog revisions will be renumbered after the
2745 strip.
2748 strip.
2746
2749
2747 So we truncate the revlog on the first of these revisions, and
2750 So we truncate the revlog on the first of these revisions, and
2748 trust that the caller has saved the revisions that shouldn't be
2751 trust that the caller has saved the revisions that shouldn't be
2749 removed and that it'll re-add them after this truncation.
2752 removed and that it'll re-add them after this truncation.
2750 """
2753 """
2751 if len(self) == 0:
2754 if len(self) == 0:
2752 return
2755 return
2753
2756
2754 rev, _ = self.getstrippoint(minlink)
2757 rev, _ = self.getstrippoint(minlink)
2755 if rev == len(self):
2758 if rev == len(self):
2756 return
2759 return
2757
2760
2758 # first truncate the files on disk
2761 # first truncate the files on disk
2759 data_end = self.start(rev)
2762 data_end = self.start(rev)
2760 if not self._inline:
2763 if not self._inline:
2761 transaction.add(self._datafile, data_end)
2764 transaction.add(self._datafile, data_end)
2762 end = rev * self.index.entry_size
2765 end = rev * self.index.entry_size
2763 else:
2766 else:
2764 end = data_end + (rev * self.index.entry_size)
2767 end = data_end + (rev * self.index.entry_size)
2765
2768
2766 if self._sidedatafile:
2769 if self._sidedatafile:
2767 sidedata_end = self.sidedata_cut_off(rev)
2770 sidedata_end = self.sidedata_cut_off(rev)
2768 transaction.add(self._sidedatafile, sidedata_end)
2771 transaction.add(self._sidedatafile, sidedata_end)
2769
2772
2770 transaction.add(self._indexfile, end)
2773 transaction.add(self._indexfile, end)
2771 if self._docket is not None:
2774 if self._docket is not None:
2772 # XXX we could, leverage the docket while stripping. However it is
2775 # XXX we could, leverage the docket while stripping. However it is
2773 # not powerfull enough at the time of this comment
2776 # not powerfull enough at the time of this comment
2774 self._docket.index_end = end
2777 self._docket.index_end = end
2775 self._docket.data_end = data_end
2778 self._docket.data_end = data_end
2776 self._docket.sidedata_end = sidedata_end
2779 self._docket.sidedata_end = sidedata_end
2777 self._docket.write(transaction, stripping=True)
2780 self._docket.write(transaction, stripping=True)
2778
2781
2779 # then reset internal state in memory to forget those revisions
2782 # then reset internal state in memory to forget those revisions
2780 self._revisioncache = None
2783 self._revisioncache = None
2781 self._chaininfocache = util.lrucachedict(500)
2784 self._chaininfocache = util.lrucachedict(500)
2782 self._segmentfile.clear_cache()
2785 self._segmentfile.clear_cache()
2783 self._segmentfile_sidedata.clear_cache()
2786 self._segmentfile_sidedata.clear_cache()
2784
2787
2785 del self.index[rev:-1]
2788 del self.index[rev:-1]
2786
2789
2787 def checksize(self):
2790 def checksize(self):
2788 """Check size of index and data files
2791 """Check size of index and data files
2789
2792
2790 return a (dd, di) tuple.
2793 return a (dd, di) tuple.
2791 - dd: extra bytes for the "data" file
2794 - dd: extra bytes for the "data" file
2792 - di: extra bytes for the "index" file
2795 - di: extra bytes for the "index" file
2793
2796
2794 A healthy revlog will return (0, 0).
2797 A healthy revlog will return (0, 0).
2795 """
2798 """
2796 expected = 0
2799 expected = 0
2797 if len(self):
2800 if len(self):
2798 expected = max(0, self.end(len(self) - 1))
2801 expected = max(0, self.end(len(self) - 1))
2799
2802
2800 try:
2803 try:
2801 with self._datafp() as f:
2804 with self._datafp() as f:
2802 f.seek(0, io.SEEK_END)
2805 f.seek(0, io.SEEK_END)
2803 actual = f.tell()
2806 actual = f.tell()
2804 dd = actual - expected
2807 dd = actual - expected
2805 except IOError as inst:
2808 except IOError as inst:
2806 if inst.errno != errno.ENOENT:
2809 if inst.errno != errno.ENOENT:
2807 raise
2810 raise
2808 dd = 0
2811 dd = 0
2809
2812
2810 try:
2813 try:
2811 f = self.opener(self._indexfile)
2814 f = self.opener(self._indexfile)
2812 f.seek(0, io.SEEK_END)
2815 f.seek(0, io.SEEK_END)
2813 actual = f.tell()
2816 actual = f.tell()
2814 f.close()
2817 f.close()
2815 s = self.index.entry_size
2818 s = self.index.entry_size
2816 i = max(0, actual // s)
2819 i = max(0, actual // s)
2817 di = actual - (i * s)
2820 di = actual - (i * s)
2818 if self._inline:
2821 if self._inline:
2819 databytes = 0
2822 databytes = 0
2820 for r in self:
2823 for r in self:
2821 databytes += max(0, self.length(r))
2824 databytes += max(0, self.length(r))
2822 dd = 0
2825 dd = 0
2823 di = actual - len(self) * s - databytes
2826 di = actual - len(self) * s - databytes
2824 except IOError as inst:
2827 except IOError as inst:
2825 if inst.errno != errno.ENOENT:
2828 if inst.errno != errno.ENOENT:
2826 raise
2829 raise
2827 di = 0
2830 di = 0
2828
2831
2829 return (dd, di)
2832 return (dd, di)
2830
2833
2831 def files(self):
2834 def files(self):
2832 res = [self._indexfile]
2835 res = [self._indexfile]
2833 if self._docket_file is None:
2836 if self._docket_file is None:
2834 if not self._inline:
2837 if not self._inline:
2835 res.append(self._datafile)
2838 res.append(self._datafile)
2836 else:
2839 else:
2837 res.append(self._docket_file)
2840 res.append(self._docket_file)
2838 res.extend(self._docket.old_index_filepaths(include_empty=False))
2841 res.extend(self._docket.old_index_filepaths(include_empty=False))
2839 if self._docket.data_end:
2842 if self._docket.data_end:
2840 res.append(self._datafile)
2843 res.append(self._datafile)
2841 res.extend(self._docket.old_data_filepaths(include_empty=False))
2844 res.extend(self._docket.old_data_filepaths(include_empty=False))
2842 if self._docket.sidedata_end:
2845 if self._docket.sidedata_end:
2843 res.append(self._sidedatafile)
2846 res.append(self._sidedatafile)
2844 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2847 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2845 return res
2848 return res
2846
2849
2847 def emitrevisions(
2850 def emitrevisions(
2848 self,
2851 self,
2849 nodes,
2852 nodes,
2850 nodesorder=None,
2853 nodesorder=None,
2851 revisiondata=False,
2854 revisiondata=False,
2852 assumehaveparentrevisions=False,
2855 assumehaveparentrevisions=False,
2853 deltamode=repository.CG_DELTAMODE_STD,
2856 deltamode=repository.CG_DELTAMODE_STD,
2854 sidedata_helpers=None,
2857 sidedata_helpers=None,
2855 ):
2858 ):
2856 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2859 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2857 raise error.ProgrammingError(
2860 raise error.ProgrammingError(
2858 b'unhandled value for nodesorder: %s' % nodesorder
2861 b'unhandled value for nodesorder: %s' % nodesorder
2859 )
2862 )
2860
2863
2861 if nodesorder is None and not self._generaldelta:
2864 if nodesorder is None and not self._generaldelta:
2862 nodesorder = b'storage'
2865 nodesorder = b'storage'
2863
2866
2864 if (
2867 if (
2865 not self._storedeltachains
2868 not self._storedeltachains
2866 and deltamode != repository.CG_DELTAMODE_PREV
2869 and deltamode != repository.CG_DELTAMODE_PREV
2867 ):
2870 ):
2868 deltamode = repository.CG_DELTAMODE_FULL
2871 deltamode = repository.CG_DELTAMODE_FULL
2869
2872
2870 return storageutil.emitrevisions(
2873 return storageutil.emitrevisions(
2871 self,
2874 self,
2872 nodes,
2875 nodes,
2873 nodesorder,
2876 nodesorder,
2874 revlogrevisiondelta,
2877 revlogrevisiondelta,
2875 deltaparentfn=self.deltaparent,
2878 deltaparentfn=self.deltaparent,
2876 candeltafn=self.candelta,
2879 candeltafn=self.candelta,
2877 rawsizefn=self.rawsize,
2880 rawsizefn=self.rawsize,
2878 revdifffn=self.revdiff,
2881 revdifffn=self.revdiff,
2879 flagsfn=self.flags,
2882 flagsfn=self.flags,
2880 deltamode=deltamode,
2883 deltamode=deltamode,
2881 revisiondata=revisiondata,
2884 revisiondata=revisiondata,
2882 assumehaveparentrevisions=assumehaveparentrevisions,
2885 assumehaveparentrevisions=assumehaveparentrevisions,
2883 sidedata_helpers=sidedata_helpers,
2886 sidedata_helpers=sidedata_helpers,
2884 )
2887 )
2885
2888
2886 DELTAREUSEALWAYS = b'always'
2889 DELTAREUSEALWAYS = b'always'
2887 DELTAREUSESAMEREVS = b'samerevs'
2890 DELTAREUSESAMEREVS = b'samerevs'
2888 DELTAREUSENEVER = b'never'
2891 DELTAREUSENEVER = b'never'
2889
2892
2890 DELTAREUSEFULLADD = b'fulladd'
2893 DELTAREUSEFULLADD = b'fulladd'
2891
2894
2892 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2895 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2893
2896
2894 def clone(
2897 def clone(
2895 self,
2898 self,
2896 tr,
2899 tr,
2897 destrevlog,
2900 destrevlog,
2898 addrevisioncb=None,
2901 addrevisioncb=None,
2899 deltareuse=DELTAREUSESAMEREVS,
2902 deltareuse=DELTAREUSESAMEREVS,
2900 forcedeltabothparents=None,
2903 forcedeltabothparents=None,
2901 sidedata_helpers=None,
2904 sidedata_helpers=None,
2902 ):
2905 ):
2903 """Copy this revlog to another, possibly with format changes.
2906 """Copy this revlog to another, possibly with format changes.
2904
2907
2905 The destination revlog will contain the same revisions and nodes.
2908 The destination revlog will contain the same revisions and nodes.
2906 However, it may not be bit-for-bit identical due to e.g. delta encoding
2909 However, it may not be bit-for-bit identical due to e.g. delta encoding
2907 differences.
2910 differences.
2908
2911
2909 The ``deltareuse`` argument control how deltas from the existing revlog
2912 The ``deltareuse`` argument control how deltas from the existing revlog
2910 are preserved in the destination revlog. The argument can have the
2913 are preserved in the destination revlog. The argument can have the
2911 following values:
2914 following values:
2912
2915
2913 DELTAREUSEALWAYS
2916 DELTAREUSEALWAYS
2914 Deltas will always be reused (if possible), even if the destination
2917 Deltas will always be reused (if possible), even if the destination
2915 revlog would not select the same revisions for the delta. This is the
2918 revlog would not select the same revisions for the delta. This is the
2916 fastest mode of operation.
2919 fastest mode of operation.
2917 DELTAREUSESAMEREVS
2920 DELTAREUSESAMEREVS
2918 Deltas will be reused if the destination revlog would pick the same
2921 Deltas will be reused if the destination revlog would pick the same
2919 revisions for the delta. This mode strikes a balance between speed
2922 revisions for the delta. This mode strikes a balance between speed
2920 and optimization.
2923 and optimization.
2921 DELTAREUSENEVER
2924 DELTAREUSENEVER
2922 Deltas will never be reused. This is the slowest mode of execution.
2925 Deltas will never be reused. This is the slowest mode of execution.
2923 This mode can be used to recompute deltas (e.g. if the diff/delta
2926 This mode can be used to recompute deltas (e.g. if the diff/delta
2924 algorithm changes).
2927 algorithm changes).
2925 DELTAREUSEFULLADD
2928 DELTAREUSEFULLADD
2926 Revision will be re-added as if their were new content. This is
2929 Revision will be re-added as if their were new content. This is
2927 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2930 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2928 eg: large file detection and handling.
2931 eg: large file detection and handling.
2929
2932
2930 Delta computation can be slow, so the choice of delta reuse policy can
2933 Delta computation can be slow, so the choice of delta reuse policy can
2931 significantly affect run time.
2934 significantly affect run time.
2932
2935
2933 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2936 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2934 two extremes. Deltas will be reused if they are appropriate. But if the
2937 two extremes. Deltas will be reused if they are appropriate. But if the
2935 delta could choose a better revision, it will do so. This means if you
2938 delta could choose a better revision, it will do so. This means if you
2936 are converting a non-generaldelta revlog to a generaldelta revlog,
2939 are converting a non-generaldelta revlog to a generaldelta revlog,
2937 deltas will be recomputed if the delta's parent isn't a parent of the
2940 deltas will be recomputed if the delta's parent isn't a parent of the
2938 revision.
2941 revision.
2939
2942
2940 In addition to the delta policy, the ``forcedeltabothparents``
2943 In addition to the delta policy, the ``forcedeltabothparents``
2941 argument controls whether to force compute deltas against both parents
2944 argument controls whether to force compute deltas against both parents
2942 for merges. By default, the current default is used.
2945 for merges. By default, the current default is used.
2943
2946
2944 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2947 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2945 `sidedata_helpers`.
2948 `sidedata_helpers`.
2946 """
2949 """
2947 if deltareuse not in self.DELTAREUSEALL:
2950 if deltareuse not in self.DELTAREUSEALL:
2948 raise ValueError(
2951 raise ValueError(
2949 _(b'value for deltareuse invalid: %s') % deltareuse
2952 _(b'value for deltareuse invalid: %s') % deltareuse
2950 )
2953 )
2951
2954
2952 if len(destrevlog):
2955 if len(destrevlog):
2953 raise ValueError(_(b'destination revlog is not empty'))
2956 raise ValueError(_(b'destination revlog is not empty'))
2954
2957
2955 if getattr(self, 'filteredrevs', None):
2958 if getattr(self, 'filteredrevs', None):
2956 raise ValueError(_(b'source revlog has filtered revisions'))
2959 raise ValueError(_(b'source revlog has filtered revisions'))
2957 if getattr(destrevlog, 'filteredrevs', None):
2960 if getattr(destrevlog, 'filteredrevs', None):
2958 raise ValueError(_(b'destination revlog has filtered revisions'))
2961 raise ValueError(_(b'destination revlog has filtered revisions'))
2959
2962
2960 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2963 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2961 # if possible.
2964 # if possible.
2962 oldlazydelta = destrevlog._lazydelta
2965 oldlazydelta = destrevlog._lazydelta
2963 oldlazydeltabase = destrevlog._lazydeltabase
2966 oldlazydeltabase = destrevlog._lazydeltabase
2964 oldamd = destrevlog._deltabothparents
2967 oldamd = destrevlog._deltabothparents
2965
2968
2966 try:
2969 try:
2967 if deltareuse == self.DELTAREUSEALWAYS:
2970 if deltareuse == self.DELTAREUSEALWAYS:
2968 destrevlog._lazydeltabase = True
2971 destrevlog._lazydeltabase = True
2969 destrevlog._lazydelta = True
2972 destrevlog._lazydelta = True
2970 elif deltareuse == self.DELTAREUSESAMEREVS:
2973 elif deltareuse == self.DELTAREUSESAMEREVS:
2971 destrevlog._lazydeltabase = False
2974 destrevlog._lazydeltabase = False
2972 destrevlog._lazydelta = True
2975 destrevlog._lazydelta = True
2973 elif deltareuse == self.DELTAREUSENEVER:
2976 elif deltareuse == self.DELTAREUSENEVER:
2974 destrevlog._lazydeltabase = False
2977 destrevlog._lazydeltabase = False
2975 destrevlog._lazydelta = False
2978 destrevlog._lazydelta = False
2976
2979
2977 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2980 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2978
2981
2979 self._clone(
2982 self._clone(
2980 tr,
2983 tr,
2981 destrevlog,
2984 destrevlog,
2982 addrevisioncb,
2985 addrevisioncb,
2983 deltareuse,
2986 deltareuse,
2984 forcedeltabothparents,
2987 forcedeltabothparents,
2985 sidedata_helpers,
2988 sidedata_helpers,
2986 )
2989 )
2987
2990
2988 finally:
2991 finally:
2989 destrevlog._lazydelta = oldlazydelta
2992 destrevlog._lazydelta = oldlazydelta
2990 destrevlog._lazydeltabase = oldlazydeltabase
2993 destrevlog._lazydeltabase = oldlazydeltabase
2991 destrevlog._deltabothparents = oldamd
2994 destrevlog._deltabothparents = oldamd
2992
2995
2993 def _clone(
2996 def _clone(
2994 self,
2997 self,
2995 tr,
2998 tr,
2996 destrevlog,
2999 destrevlog,
2997 addrevisioncb,
3000 addrevisioncb,
2998 deltareuse,
3001 deltareuse,
2999 forcedeltabothparents,
3002 forcedeltabothparents,
3000 sidedata_helpers,
3003 sidedata_helpers,
3001 ):
3004 ):
3002 """perform the core duty of `revlog.clone` after parameter processing"""
3005 """perform the core duty of `revlog.clone` after parameter processing"""
3003 deltacomputer = deltautil.deltacomputer(destrevlog)
3006 deltacomputer = deltautil.deltacomputer(destrevlog)
3004 index = self.index
3007 index = self.index
3005 for rev in self:
3008 for rev in self:
3006 entry = index[rev]
3009 entry = index[rev]
3007
3010
3008 # Some classes override linkrev to take filtered revs into
3011 # Some classes override linkrev to take filtered revs into
3009 # account. Use raw entry from index.
3012 # account. Use raw entry from index.
3010 flags = entry[0] & 0xFFFF
3013 flags = entry[0] & 0xFFFF
3011 linkrev = entry[4]
3014 linkrev = entry[4]
3012 p1 = index[entry[5]][7]
3015 p1 = index[entry[5]][7]
3013 p2 = index[entry[6]][7]
3016 p2 = index[entry[6]][7]
3014 node = entry[7]
3017 node = entry[7]
3015
3018
3016 # (Possibly) reuse the delta from the revlog if allowed and
3019 # (Possibly) reuse the delta from the revlog if allowed and
3017 # the revlog chunk is a delta.
3020 # the revlog chunk is a delta.
3018 cachedelta = None
3021 cachedelta = None
3019 rawtext = None
3022 rawtext = None
3020 if deltareuse == self.DELTAREUSEFULLADD:
3023 if deltareuse == self.DELTAREUSEFULLADD:
3021 text = self._revisiondata(rev)
3024 text = self._revisiondata(rev)
3022 sidedata = self.sidedata(rev)
3025 sidedata = self.sidedata(rev)
3023
3026
3024 if sidedata_helpers is not None:
3027 if sidedata_helpers is not None:
3025 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3028 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3026 self, sidedata_helpers, sidedata, rev
3029 self, sidedata_helpers, sidedata, rev
3027 )
3030 )
3028 flags = flags | new_flags[0] & ~new_flags[1]
3031 flags = flags | new_flags[0] & ~new_flags[1]
3029
3032
3030 destrevlog.addrevision(
3033 destrevlog.addrevision(
3031 text,
3034 text,
3032 tr,
3035 tr,
3033 linkrev,
3036 linkrev,
3034 p1,
3037 p1,
3035 p2,
3038 p2,
3036 cachedelta=cachedelta,
3039 cachedelta=cachedelta,
3037 node=node,
3040 node=node,
3038 flags=flags,
3041 flags=flags,
3039 deltacomputer=deltacomputer,
3042 deltacomputer=deltacomputer,
3040 sidedata=sidedata,
3043 sidedata=sidedata,
3041 )
3044 )
3042 else:
3045 else:
3043 if destrevlog._lazydelta:
3046 if destrevlog._lazydelta:
3044 dp = self.deltaparent(rev)
3047 dp = self.deltaparent(rev)
3045 if dp != nullrev:
3048 if dp != nullrev:
3046 cachedelta = (dp, bytes(self._chunk(rev)))
3049 cachedelta = (dp, bytes(self._chunk(rev)))
3047
3050
3048 sidedata = None
3051 sidedata = None
3049 if not cachedelta:
3052 if not cachedelta:
3050 rawtext = self._revisiondata(rev)
3053 rawtext = self._revisiondata(rev)
3051 sidedata = self.sidedata(rev)
3054 sidedata = self.sidedata(rev)
3052 if sidedata is None:
3055 if sidedata is None:
3053 sidedata = self.sidedata(rev)
3056 sidedata = self.sidedata(rev)
3054
3057
3055 if sidedata_helpers is not None:
3058 if sidedata_helpers is not None:
3056 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3059 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3057 self, sidedata_helpers, sidedata, rev
3060 self, sidedata_helpers, sidedata, rev
3058 )
3061 )
3059 flags = flags | new_flags[0] & ~new_flags[1]
3062 flags = flags | new_flags[0] & ~new_flags[1]
3060
3063
3061 with destrevlog._writing(tr):
3064 with destrevlog._writing(tr):
3062 destrevlog._addrevision(
3065 destrevlog._addrevision(
3063 node,
3066 node,
3064 rawtext,
3067 rawtext,
3065 tr,
3068 tr,
3066 linkrev,
3069 linkrev,
3067 p1,
3070 p1,
3068 p2,
3071 p2,
3069 flags,
3072 flags,
3070 cachedelta,
3073 cachedelta,
3071 deltacomputer=deltacomputer,
3074 deltacomputer=deltacomputer,
3072 sidedata=sidedata,
3075 sidedata=sidedata,
3073 )
3076 )
3074
3077
3075 if addrevisioncb:
3078 if addrevisioncb:
3076 addrevisioncb(self, rev, node)
3079 addrevisioncb(self, rev, node)
3077
3080
3078 def censorrevision(self, tr, censornode, tombstone=b''):
3081 def censorrevision(self, tr, censornode, tombstone=b''):
3079 if self._format_version == REVLOGV0:
3082 if self._format_version == REVLOGV0:
3080 raise error.RevlogError(
3083 raise error.RevlogError(
3081 _(b'cannot censor with version %d revlogs')
3084 _(b'cannot censor with version %d revlogs')
3082 % self._format_version
3085 % self._format_version
3083 )
3086 )
3084 elif self._format_version == REVLOGV1:
3087 elif self._format_version == REVLOGV1:
3085 rewrite.v1_censor(self, tr, censornode, tombstone)
3088 rewrite.v1_censor(self, tr, censornode, tombstone)
3086 else:
3089 else:
3087 rewrite.v2_censor(self, tr, censornode, tombstone)
3090 rewrite.v2_censor(self, tr, censornode, tombstone)
3088
3091
3089 def verifyintegrity(self, state):
3092 def verifyintegrity(self, state):
3090 """Verifies the integrity of the revlog.
3093 """Verifies the integrity of the revlog.
3091
3094
3092 Yields ``revlogproblem`` instances describing problems that are
3095 Yields ``revlogproblem`` instances describing problems that are
3093 found.
3096 found.
3094 """
3097 """
3095 dd, di = self.checksize()
3098 dd, di = self.checksize()
3096 if dd:
3099 if dd:
3097 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3100 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3098 if di:
3101 if di:
3099 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3102 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3100
3103
3101 version = self._format_version
3104 version = self._format_version
3102
3105
3103 # The verifier tells us what version revlog we should be.
3106 # The verifier tells us what version revlog we should be.
3104 if version != state[b'expectedversion']:
3107 if version != state[b'expectedversion']:
3105 yield revlogproblem(
3108 yield revlogproblem(
3106 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3109 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3107 % (self.display_id, version, state[b'expectedversion'])
3110 % (self.display_id, version, state[b'expectedversion'])
3108 )
3111 )
3109
3112
3110 state[b'skipread'] = set()
3113 state[b'skipread'] = set()
3111 state[b'safe_renamed'] = set()
3114 state[b'safe_renamed'] = set()
3112
3115
3113 for rev in self:
3116 for rev in self:
3114 node = self.node(rev)
3117 node = self.node(rev)
3115
3118
3116 # Verify contents. 4 cases to care about:
3119 # Verify contents. 4 cases to care about:
3117 #
3120 #
3118 # common: the most common case
3121 # common: the most common case
3119 # rename: with a rename
3122 # rename: with a rename
3120 # meta: file content starts with b'\1\n', the metadata
3123 # meta: file content starts with b'\1\n', the metadata
3121 # header defined in filelog.py, but without a rename
3124 # header defined in filelog.py, but without a rename
3122 # ext: content stored externally
3125 # ext: content stored externally
3123 #
3126 #
3124 # More formally, their differences are shown below:
3127 # More formally, their differences are shown below:
3125 #
3128 #
3126 # | common | rename | meta | ext
3129 # | common | rename | meta | ext
3127 # -------------------------------------------------------
3130 # -------------------------------------------------------
3128 # flags() | 0 | 0 | 0 | not 0
3131 # flags() | 0 | 0 | 0 | not 0
3129 # renamed() | False | True | False | ?
3132 # renamed() | False | True | False | ?
3130 # rawtext[0:2]=='\1\n'| False | True | True | ?
3133 # rawtext[0:2]=='\1\n'| False | True | True | ?
3131 #
3134 #
3132 # "rawtext" means the raw text stored in revlog data, which
3135 # "rawtext" means the raw text stored in revlog data, which
3133 # could be retrieved by "rawdata(rev)". "text"
3136 # could be retrieved by "rawdata(rev)". "text"
3134 # mentioned below is "revision(rev)".
3137 # mentioned below is "revision(rev)".
3135 #
3138 #
3136 # There are 3 different lengths stored physically:
3139 # There are 3 different lengths stored physically:
3137 # 1. L1: rawsize, stored in revlog index
3140 # 1. L1: rawsize, stored in revlog index
3138 # 2. L2: len(rawtext), stored in revlog data
3141 # 2. L2: len(rawtext), stored in revlog data
3139 # 3. L3: len(text), stored in revlog data if flags==0, or
3142 # 3. L3: len(text), stored in revlog data if flags==0, or
3140 # possibly somewhere else if flags!=0
3143 # possibly somewhere else if flags!=0
3141 #
3144 #
3142 # L1 should be equal to L2. L3 could be different from them.
3145 # L1 should be equal to L2. L3 could be different from them.
3143 # "text" may or may not affect commit hash depending on flag
3146 # "text" may or may not affect commit hash depending on flag
3144 # processors (see flagutil.addflagprocessor).
3147 # processors (see flagutil.addflagprocessor).
3145 #
3148 #
3146 # | common | rename | meta | ext
3149 # | common | rename | meta | ext
3147 # -------------------------------------------------
3150 # -------------------------------------------------
3148 # rawsize() | L1 | L1 | L1 | L1
3151 # rawsize() | L1 | L1 | L1 | L1
3149 # size() | L1 | L2-LM | L1(*) | L1 (?)
3152 # size() | L1 | L2-LM | L1(*) | L1 (?)
3150 # len(rawtext) | L2 | L2 | L2 | L2
3153 # len(rawtext) | L2 | L2 | L2 | L2
3151 # len(text) | L2 | L2 | L2 | L3
3154 # len(text) | L2 | L2 | L2 | L3
3152 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3155 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3153 #
3156 #
3154 # LM: length of metadata, depending on rawtext
3157 # LM: length of metadata, depending on rawtext
3155 # (*): not ideal, see comment in filelog.size
3158 # (*): not ideal, see comment in filelog.size
3156 # (?): could be "- len(meta)" if the resolved content has
3159 # (?): could be "- len(meta)" if the resolved content has
3157 # rename metadata
3160 # rename metadata
3158 #
3161 #
3159 # Checks needed to be done:
3162 # Checks needed to be done:
3160 # 1. length check: L1 == L2, in all cases.
3163 # 1. length check: L1 == L2, in all cases.
3161 # 2. hash check: depending on flag processor, we may need to
3164 # 2. hash check: depending on flag processor, we may need to
3162 # use either "text" (external), or "rawtext" (in revlog).
3165 # use either "text" (external), or "rawtext" (in revlog).
3163
3166
3164 try:
3167 try:
3165 skipflags = state.get(b'skipflags', 0)
3168 skipflags = state.get(b'skipflags', 0)
3166 if skipflags:
3169 if skipflags:
3167 skipflags &= self.flags(rev)
3170 skipflags &= self.flags(rev)
3168
3171
3169 _verify_revision(self, skipflags, state, node)
3172 _verify_revision(self, skipflags, state, node)
3170
3173
3171 l1 = self.rawsize(rev)
3174 l1 = self.rawsize(rev)
3172 l2 = len(self.rawdata(node))
3175 l2 = len(self.rawdata(node))
3173
3176
3174 if l1 != l2:
3177 if l1 != l2:
3175 yield revlogproblem(
3178 yield revlogproblem(
3176 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3179 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3177 node=node,
3180 node=node,
3178 )
3181 )
3179
3182
3180 except error.CensoredNodeError:
3183 except error.CensoredNodeError:
3181 if state[b'erroroncensored']:
3184 if state[b'erroroncensored']:
3182 yield revlogproblem(
3185 yield revlogproblem(
3183 error=_(b'censored file data'), node=node
3186 error=_(b'censored file data'), node=node
3184 )
3187 )
3185 state[b'skipread'].add(node)
3188 state[b'skipread'].add(node)
3186 except Exception as e:
3189 except Exception as e:
3187 yield revlogproblem(
3190 yield revlogproblem(
3188 error=_(b'unpacking %s: %s')
3191 error=_(b'unpacking %s: %s')
3189 % (short(node), stringutil.forcebytestr(e)),
3192 % (short(node), stringutil.forcebytestr(e)),
3190 node=node,
3193 node=node,
3191 )
3194 )
3192 state[b'skipread'].add(node)
3195 state[b'skipread'].add(node)
3193
3196
3194 def storageinfo(
3197 def storageinfo(
3195 self,
3198 self,
3196 exclusivefiles=False,
3199 exclusivefiles=False,
3197 sharedfiles=False,
3200 sharedfiles=False,
3198 revisionscount=False,
3201 revisionscount=False,
3199 trackedsize=False,
3202 trackedsize=False,
3200 storedsize=False,
3203 storedsize=False,
3201 ):
3204 ):
3202 d = {}
3205 d = {}
3203
3206
3204 if exclusivefiles:
3207 if exclusivefiles:
3205 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3208 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3206 if not self._inline:
3209 if not self._inline:
3207 d[b'exclusivefiles'].append((self.opener, self._datafile))
3210 d[b'exclusivefiles'].append((self.opener, self._datafile))
3208
3211
3209 if sharedfiles:
3212 if sharedfiles:
3210 d[b'sharedfiles'] = []
3213 d[b'sharedfiles'] = []
3211
3214
3212 if revisionscount:
3215 if revisionscount:
3213 d[b'revisionscount'] = len(self)
3216 d[b'revisionscount'] = len(self)
3214
3217
3215 if trackedsize:
3218 if trackedsize:
3216 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3219 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3217
3220
3218 if storedsize:
3221 if storedsize:
3219 d[b'storedsize'] = sum(
3222 d[b'storedsize'] = sum(
3220 self.opener.stat(path).st_size for path in self.files()
3223 self.opener.stat(path).st_size for path in self.files()
3221 )
3224 )
3222
3225
3223 return d
3226 return d
3224
3227
3225 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3228 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3226 if not self.hassidedata:
3229 if not self.hassidedata:
3227 return
3230 return
3228 # revlog formats with sidedata support does not support inline
3231 # revlog formats with sidedata support does not support inline
3229 assert not self._inline
3232 assert not self._inline
3230 if not helpers[1] and not helpers[2]:
3233 if not helpers[1] and not helpers[2]:
3231 # Nothing to generate or remove
3234 # Nothing to generate or remove
3232 return
3235 return
3233
3236
3234 new_entries = []
3237 new_entries = []
3235 # append the new sidedata
3238 # append the new sidedata
3236 with self._writing(transaction):
3239 with self._writing(transaction):
3237 ifh, dfh, sdfh = self._writinghandles
3240 ifh, dfh, sdfh = self._writinghandles
3238 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3241 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3239
3242
3240 current_offset = sdfh.tell()
3243 current_offset = sdfh.tell()
3241 for rev in range(startrev, endrev + 1):
3244 for rev in range(startrev, endrev + 1):
3242 entry = self.index[rev]
3245 entry = self.index[rev]
3243 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3246 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3244 store=self,
3247 store=self,
3245 sidedata_helpers=helpers,
3248 sidedata_helpers=helpers,
3246 sidedata={},
3249 sidedata={},
3247 rev=rev,
3250 rev=rev,
3248 )
3251 )
3249
3252
3250 serialized_sidedata = sidedatautil.serialize_sidedata(
3253 serialized_sidedata = sidedatautil.serialize_sidedata(
3251 new_sidedata
3254 new_sidedata
3252 )
3255 )
3253
3256
3254 sidedata_compression_mode = COMP_MODE_INLINE
3257 sidedata_compression_mode = COMP_MODE_INLINE
3255 if serialized_sidedata and self.hassidedata:
3258 if serialized_sidedata and self.hassidedata:
3256 sidedata_compression_mode = COMP_MODE_PLAIN
3259 sidedata_compression_mode = COMP_MODE_PLAIN
3257 h, comp_sidedata = self.compress(serialized_sidedata)
3260 h, comp_sidedata = self.compress(serialized_sidedata)
3258 if (
3261 if (
3259 h != b'u'
3262 h != b'u'
3260 and comp_sidedata[0] != b'\0'
3263 and comp_sidedata[0] != b'\0'
3261 and len(comp_sidedata) < len(serialized_sidedata)
3264 and len(comp_sidedata) < len(serialized_sidedata)
3262 ):
3265 ):
3263 assert not h
3266 assert not h
3264 if (
3267 if (
3265 comp_sidedata[0]
3268 comp_sidedata[0]
3266 == self._docket.default_compression_header
3269 == self._docket.default_compression_header
3267 ):
3270 ):
3268 sidedata_compression_mode = COMP_MODE_DEFAULT
3271 sidedata_compression_mode = COMP_MODE_DEFAULT
3269 serialized_sidedata = comp_sidedata
3272 serialized_sidedata = comp_sidedata
3270 else:
3273 else:
3271 sidedata_compression_mode = COMP_MODE_INLINE
3274 sidedata_compression_mode = COMP_MODE_INLINE
3272 serialized_sidedata = comp_sidedata
3275 serialized_sidedata = comp_sidedata
3273 if entry[8] != 0 or entry[9] != 0:
3276 if entry[8] != 0 or entry[9] != 0:
3274 # rewriting entries that already have sidedata is not
3277 # rewriting entries that already have sidedata is not
3275 # supported yet, because it introduces garbage data in the
3278 # supported yet, because it introduces garbage data in the
3276 # revlog.
3279 # revlog.
3277 msg = b"rewriting existing sidedata is not supported yet"
3280 msg = b"rewriting existing sidedata is not supported yet"
3278 raise error.Abort(msg)
3281 raise error.Abort(msg)
3279
3282
3280 # Apply (potential) flags to add and to remove after running
3283 # Apply (potential) flags to add and to remove after running
3281 # the sidedata helpers
3284 # the sidedata helpers
3282 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3285 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3283 entry_update = (
3286 entry_update = (
3284 current_offset,
3287 current_offset,
3285 len(serialized_sidedata),
3288 len(serialized_sidedata),
3286 new_offset_flags,
3289 new_offset_flags,
3287 sidedata_compression_mode,
3290 sidedata_compression_mode,
3288 )
3291 )
3289
3292
3290 # the sidedata computation might have move the file cursors around
3293 # the sidedata computation might have move the file cursors around
3291 sdfh.seek(current_offset, os.SEEK_SET)
3294 sdfh.seek(current_offset, os.SEEK_SET)
3292 sdfh.write(serialized_sidedata)
3295 sdfh.write(serialized_sidedata)
3293 new_entries.append(entry_update)
3296 new_entries.append(entry_update)
3294 current_offset += len(serialized_sidedata)
3297 current_offset += len(serialized_sidedata)
3295 self._docket.sidedata_end = sdfh.tell()
3298 self._docket.sidedata_end = sdfh.tell()
3296
3299
3297 # rewrite the new index entries
3300 # rewrite the new index entries
3298 ifh.seek(startrev * self.index.entry_size)
3301 ifh.seek(startrev * self.index.entry_size)
3299 for i, e in enumerate(new_entries):
3302 for i, e in enumerate(new_entries):
3300 rev = startrev + i
3303 rev = startrev + i
3301 self.index.replace_sidedata_info(rev, *e)
3304 self.index.replace_sidedata_info(rev, *e)
3302 packed = self.index.entry_binary(rev)
3305 packed = self.index.entry_binary(rev)
3303 if rev == 0 and self._docket is None:
3306 if rev == 0 and self._docket is None:
3304 header = self._format_flags | self._format_version
3307 header = self._format_flags | self._format_version
3305 header = self.index.pack_header(header)
3308 header = self.index.pack_header(header)
3306 packed = header + packed
3309 packed = header + packed
3307 ifh.write(packed)
3310 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now