##// END OF EJS Templates
revlog: remove pycompat.iteritems()...
Gregory Szorc -
r49783:ceafb0f8 default
parent child Browse files
Show More
@@ -1,3309 +1,3307 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 ENTRY_RANK,
42 ENTRY_RANK,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 RANK_UNKNOWN,
48 RANK_UNKNOWN,
49 REVLOGV0,
49 REVLOGV0,
50 REVLOGV1,
50 REVLOGV1,
51 REVLOGV1_FLAGS,
51 REVLOGV1_FLAGS,
52 REVLOGV2,
52 REVLOGV2,
53 REVLOGV2_FLAGS,
53 REVLOGV2_FLAGS,
54 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FLAGS,
55 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_FORMAT,
56 REVLOG_DEFAULT_VERSION,
56 REVLOG_DEFAULT_VERSION,
57 SUPPORTED_FLAGS,
57 SUPPORTED_FLAGS,
58 )
58 )
59 from .revlogutils.flagutil import (
59 from .revlogutils.flagutil import (
60 REVIDX_DEFAULT_FLAGS,
60 REVIDX_DEFAULT_FLAGS,
61 REVIDX_ELLIPSIS,
61 REVIDX_ELLIPSIS,
62 REVIDX_EXTSTORED,
62 REVIDX_EXTSTORED,
63 REVIDX_FLAGS_ORDER,
63 REVIDX_FLAGS_ORDER,
64 REVIDX_HASCOPIESINFO,
64 REVIDX_HASCOPIESINFO,
65 REVIDX_ISCENSORED,
65 REVIDX_ISCENSORED,
66 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 REVIDX_RAWTEXT_CHANGING_FLAGS,
67 )
67 )
68 from .thirdparty import attr
68 from .thirdparty import attr
69 from . import (
69 from . import (
70 ancestor,
70 ancestor,
71 dagop,
71 dagop,
72 error,
72 error,
73 mdiff,
73 mdiff,
74 policy,
74 policy,
75 pycompat,
75 pycompat,
76 revlogutils,
76 revlogutils,
77 templatefilters,
77 templatefilters,
78 util,
78 util,
79 )
79 )
80 from .interfaces import (
80 from .interfaces import (
81 repository,
81 repository,
82 util as interfaceutil,
82 util as interfaceutil,
83 )
83 )
84 from .revlogutils import (
84 from .revlogutils import (
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 randomaccessfile,
89 randomaccessfile,
90 revlogv0,
90 revlogv0,
91 rewrite,
91 rewrite,
92 sidedata as sidedatautil,
92 sidedata as sidedatautil,
93 )
93 )
94 from .utils import (
94 from .utils import (
95 storageutil,
95 storageutil,
96 stringutil,
96 stringutil,
97 )
97 )
98
98
99 # blanked usage of all the name to prevent pyflakes constraints
99 # blanked usage of all the name to prevent pyflakes constraints
100 # We need these name available in the module for extensions.
100 # We need these name available in the module for extensions.
101
101
102 REVLOGV0
102 REVLOGV0
103 REVLOGV1
103 REVLOGV1
104 REVLOGV2
104 REVLOGV2
105 CHANGELOGV2
105 CHANGELOGV2
106 FLAG_INLINE_DATA
106 FLAG_INLINE_DATA
107 FLAG_GENERALDELTA
107 FLAG_GENERALDELTA
108 REVLOG_DEFAULT_FLAGS
108 REVLOG_DEFAULT_FLAGS
109 REVLOG_DEFAULT_FORMAT
109 REVLOG_DEFAULT_FORMAT
110 REVLOG_DEFAULT_VERSION
110 REVLOG_DEFAULT_VERSION
111 REVLOGV1_FLAGS
111 REVLOGV1_FLAGS
112 REVLOGV2_FLAGS
112 REVLOGV2_FLAGS
113 REVIDX_ISCENSORED
113 REVIDX_ISCENSORED
114 REVIDX_ELLIPSIS
114 REVIDX_ELLIPSIS
115 REVIDX_HASCOPIESINFO
115 REVIDX_HASCOPIESINFO
116 REVIDX_EXTSTORED
116 REVIDX_EXTSTORED
117 REVIDX_DEFAULT_FLAGS
117 REVIDX_DEFAULT_FLAGS
118 REVIDX_FLAGS_ORDER
118 REVIDX_FLAGS_ORDER
119 REVIDX_RAWTEXT_CHANGING_FLAGS
119 REVIDX_RAWTEXT_CHANGING_FLAGS
120
120
121 parsers = policy.importmod('parsers')
121 parsers = policy.importmod('parsers')
122 rustancestor = policy.importrust('ancestor')
122 rustancestor = policy.importrust('ancestor')
123 rustdagop = policy.importrust('dagop')
123 rustdagop = policy.importrust('dagop')
124 rustrevlog = policy.importrust('revlog')
124 rustrevlog = policy.importrust('revlog')
125
125
126 # Aliased for performance.
126 # Aliased for performance.
127 _zlibdecompress = zlib.decompress
127 _zlibdecompress = zlib.decompress
128
128
129 # max size of revlog with inline data
129 # max size of revlog with inline data
130 _maxinline = 131072
130 _maxinline = 131072
131
131
132 # Flag processors for REVIDX_ELLIPSIS.
132 # Flag processors for REVIDX_ELLIPSIS.
133 def ellipsisreadprocessor(rl, text):
133 def ellipsisreadprocessor(rl, text):
134 return text, False
134 return text, False
135
135
136
136
137 def ellipsiswriteprocessor(rl, text):
137 def ellipsiswriteprocessor(rl, text):
138 return text, False
138 return text, False
139
139
140
140
141 def ellipsisrawprocessor(rl, text):
141 def ellipsisrawprocessor(rl, text):
142 return False
142 return False
143
143
144
144
145 ellipsisprocessor = (
145 ellipsisprocessor = (
146 ellipsisreadprocessor,
146 ellipsisreadprocessor,
147 ellipsiswriteprocessor,
147 ellipsiswriteprocessor,
148 ellipsisrawprocessor,
148 ellipsisrawprocessor,
149 )
149 )
150
150
151
151
152 def _verify_revision(rl, skipflags, state, node):
152 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
154 point for extensions to influence the operation."""
155 if skipflags:
155 if skipflags:
156 state[b'skipread'].add(node)
156 state[b'skipread'].add(node)
157 else:
157 else:
158 # Side-effect: read content and verify hash.
158 # Side-effect: read content and verify hash.
159 rl.revision(node)
159 rl.revision(node)
160
160
161
161
162 # True if a fast implementation for persistent-nodemap is available
162 # True if a fast implementation for persistent-nodemap is available
163 #
163 #
164 # We also consider we have a "fast" implementation in "pure" python because
164 # We also consider we have a "fast" implementation in "pure" python because
165 # people using pure don't really have performance consideration (and a
165 # people using pure don't really have performance consideration (and a
166 # wheelbarrow of other slowness source)
166 # wheelbarrow of other slowness source)
167 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 parsers, 'BaseIndexObject'
168 parsers, 'BaseIndexObject'
169 )
169 )
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 sidedata = attr.ib()
183 sidedata = attr.ib()
184 protocol_flags = attr.ib()
184 protocol_flags = attr.ib()
185 linknode = attr.ib(default=None)
185 linknode = attr.ib(default=None)
186
186
187
187
188 @interfaceutil.implementer(repository.iverifyproblem)
188 @interfaceutil.implementer(repository.iverifyproblem)
189 @attr.s(frozen=True)
189 @attr.s(frozen=True)
190 class revlogproblem(object):
190 class revlogproblem(object):
191 warning = attr.ib(default=None)
191 warning = attr.ib(default=None)
192 error = attr.ib(default=None)
192 error = attr.ib(default=None)
193 node = attr.ib(default=None)
193 node = attr.ib(default=None)
194
194
195
195
196 def parse_index_v1(data, inline):
196 def parse_index_v1(data, inline):
197 # call the C implementation to parse the index data
197 # call the C implementation to parse the index data
198 index, cache = parsers.parse_index2(data, inline)
198 index, cache = parsers.parse_index2(data, inline)
199 return index, cache
199 return index, cache
200
200
201
201
202 def parse_index_v2(data, inline):
202 def parse_index_v2(data, inline):
203 # call the C implementation to parse the index data
203 # call the C implementation to parse the index data
204 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
204 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
205 return index, cache
205 return index, cache
206
206
207
207
208 def parse_index_cl_v2(data, inline):
208 def parse_index_cl_v2(data, inline):
209 # call the C implementation to parse the index data
209 # call the C implementation to parse the index data
210 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
210 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
211 return index, cache
211 return index, cache
212
212
213
213
214 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
214 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215
215
216 def parse_index_v1_nodemap(data, inline):
216 def parse_index_v1_nodemap(data, inline):
217 index, cache = parsers.parse_index_devel_nodemap(data, inline)
217 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 return index, cache
218 return index, cache
219
219
220
220
221 else:
221 else:
222 parse_index_v1_nodemap = None
222 parse_index_v1_nodemap = None
223
223
224
224
225 def parse_index_v1_mixed(data, inline):
225 def parse_index_v1_mixed(data, inline):
226 index, cache = parse_index_v1(data, inline)
226 index, cache = parse_index_v1(data, inline)
227 return rustrevlog.MixedIndex(index), cache
227 return rustrevlog.MixedIndex(index), cache
228
228
229
229
230 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
230 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # signed integer)
231 # signed integer)
232 _maxentrysize = 0x7FFFFFFF
232 _maxentrysize = 0x7FFFFFFF
233
233
234 FILE_TOO_SHORT_MSG = _(
234 FILE_TOO_SHORT_MSG = _(
235 b'cannot read from revlog %s;'
235 b'cannot read from revlog %s;'
236 b' expected %d bytes from offset %d, data size is %d'
236 b' expected %d bytes from offset %d, data size is %d'
237 )
237 )
238
238
239
239
240 class revlog(object):
240 class revlog(object):
241 """
241 """
242 the underlying revision storage object
242 the underlying revision storage object
243
243
244 A revlog consists of two parts, an index and the revision data.
244 A revlog consists of two parts, an index and the revision data.
245
245
246 The index is a file with a fixed record size containing
246 The index is a file with a fixed record size containing
247 information on each revision, including its nodeid (hash), the
247 information on each revision, including its nodeid (hash), the
248 nodeids of its parents, the position and offset of its data within
248 nodeids of its parents, the position and offset of its data within
249 the data file, and the revision it's based on. Finally, each entry
249 the data file, and the revision it's based on. Finally, each entry
250 contains a linkrev entry that can serve as a pointer to external
250 contains a linkrev entry that can serve as a pointer to external
251 data.
251 data.
252
252
253 The revision data itself is a linear collection of data chunks.
253 The revision data itself is a linear collection of data chunks.
254 Each chunk represents a revision and is usually represented as a
254 Each chunk represents a revision and is usually represented as a
255 delta against the previous chunk. To bound lookup time, runs of
255 delta against the previous chunk. To bound lookup time, runs of
256 deltas are limited to about 2 times the length of the original
256 deltas are limited to about 2 times the length of the original
257 version data. This makes retrieval of a version proportional to
257 version data. This makes retrieval of a version proportional to
258 its size, or O(1) relative to the number of revisions.
258 its size, or O(1) relative to the number of revisions.
259
259
260 Both pieces of the revlog are written to in an append-only
260 Both pieces of the revlog are written to in an append-only
261 fashion, which means we never need to rewrite a file to insert or
261 fashion, which means we never need to rewrite a file to insert or
262 remove data, and can use some simple techniques to avoid the need
262 remove data, and can use some simple techniques to avoid the need
263 for locking while reading.
263 for locking while reading.
264
264
265 If checkambig, indexfile is opened with checkambig=True at
265 If checkambig, indexfile is opened with checkambig=True at
266 writing, to avoid file stat ambiguity.
266 writing, to avoid file stat ambiguity.
267
267
268 If mmaplargeindex is True, and an mmapindexthreshold is set, the
268 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 index will be mmapped rather than read if it is larger than the
269 index will be mmapped rather than read if it is larger than the
270 configured threshold.
270 configured threshold.
271
271
272 If censorable is True, the revlog can have censored revisions.
272 If censorable is True, the revlog can have censored revisions.
273
273
274 If `upperboundcomp` is not None, this is the expected maximal gain from
274 If `upperboundcomp` is not None, this is the expected maximal gain from
275 compression for the data content.
275 compression for the data content.
276
276
277 `concurrencychecker` is an optional function that receives 3 arguments: a
277 `concurrencychecker` is an optional function that receives 3 arguments: a
278 file handle, a filename, and an expected position. It should check whether
278 file handle, a filename, and an expected position. It should check whether
279 the current position in the file handle is valid, and log/warn/fail (by
279 the current position in the file handle is valid, and log/warn/fail (by
280 raising).
280 raising).
281
281
282 See mercurial/revlogutils/contants.py for details about the content of an
282 See mercurial/revlogutils/contants.py for details about the content of an
283 index entry.
283 index entry.
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None, # only exist for `tmpcensored` now
293 postfix=None, # only exist for `tmpcensored` now
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 trypending=False,
300 trypending=False,
301 ):
301 ):
302 """
302 """
303 create a revlog object
303 create a revlog object
304
304
305 opener is a function that abstracts the file opening operation
305 opener is a function that abstracts the file opening operation
306 and can be used to implement COW semantics or the like.
306 and can be used to implement COW semantics or the like.
307
307
308 `target`: a (KIND, ID) tuple that identify the content stored in
308 `target`: a (KIND, ID) tuple that identify the content stored in
309 this revlog. It help the rest of the code to understand what the revlog
309 this revlog. It help the rest of the code to understand what the revlog
310 is about without having to resort to heuristic and index filename
310 is about without having to resort to heuristic and index filename
311 analysis. Note: that this must be reliably be set by normal code, but
311 analysis. Note: that this must be reliably be set by normal code, but
312 that test, debug, or performance measurement code might not set this to
312 that test, debug, or performance measurement code might not set this to
313 accurate value.
313 accurate value.
314 """
314 """
315 self.upperboundcomp = upperboundcomp
315 self.upperboundcomp = upperboundcomp
316
316
317 self.radix = radix
317 self.radix = radix
318
318
319 self._docket_file = None
319 self._docket_file = None
320 self._indexfile = None
320 self._indexfile = None
321 self._datafile = None
321 self._datafile = None
322 self._sidedatafile = None
322 self._sidedatafile = None
323 self._nodemap_file = None
323 self._nodemap_file = None
324 self.postfix = postfix
324 self.postfix = postfix
325 self._trypending = trypending
325 self._trypending = trypending
326 self.opener = opener
326 self.opener = opener
327 if persistentnodemap:
327 if persistentnodemap:
328 self._nodemap_file = nodemaputil.get_nodemap_file(self)
328 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329
329
330 assert target[0] in ALL_KINDS
330 assert target[0] in ALL_KINDS
331 assert len(target) == 2
331 assert len(target) == 2
332 self.target = target
332 self.target = target
333 # When True, indexfile is opened with checkambig=True at writing, to
333 # When True, indexfile is opened with checkambig=True at writing, to
334 # avoid file stat ambiguity.
334 # avoid file stat ambiguity.
335 self._checkambig = checkambig
335 self._checkambig = checkambig
336 self._mmaplargeindex = mmaplargeindex
336 self._mmaplargeindex = mmaplargeindex
337 self._censorable = censorable
337 self._censorable = censorable
338 # 3-tuple of (node, rev, text) for a raw revision.
338 # 3-tuple of (node, rev, text) for a raw revision.
339 self._revisioncache = None
339 self._revisioncache = None
340 # Maps rev to chain base rev.
340 # Maps rev to chain base rev.
341 self._chainbasecache = util.lrucachedict(100)
341 self._chainbasecache = util.lrucachedict(100)
342 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
342 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 self._chunkcache = (0, b'')
343 self._chunkcache = (0, b'')
344 # How much data to read and cache into the raw revlog data cache.
344 # How much data to read and cache into the raw revlog data cache.
345 self._chunkcachesize = 65536
345 self._chunkcachesize = 65536
346 self._maxchainlen = None
346 self._maxchainlen = None
347 self._deltabothparents = True
347 self._deltabothparents = True
348 self.index = None
348 self.index = None
349 self._docket = None
349 self._docket = None
350 self._nodemap_docket = None
350 self._nodemap_docket = None
351 # Mapping of partial identifiers to full nodes.
351 # Mapping of partial identifiers to full nodes.
352 self._pcache = {}
352 self._pcache = {}
353 # Mapping of revision integer to full node.
353 # Mapping of revision integer to full node.
354 self._compengine = b'zlib'
354 self._compengine = b'zlib'
355 self._compengineopts = {}
355 self._compengineopts = {}
356 self._maxdeltachainspan = -1
356 self._maxdeltachainspan = -1
357 self._withsparseread = False
357 self._withsparseread = False
358 self._sparserevlog = False
358 self._sparserevlog = False
359 self.hassidedata = False
359 self.hassidedata = False
360 self._srdensitythreshold = 0.50
360 self._srdensitythreshold = 0.50
361 self._srmingapsize = 262144
361 self._srmingapsize = 262144
362
362
363 # Make copy of flag processors so each revlog instance can support
363 # Make copy of flag processors so each revlog instance can support
364 # custom flags.
364 # custom flags.
365 self._flagprocessors = dict(flagutil.flagprocessors)
365 self._flagprocessors = dict(flagutil.flagprocessors)
366
366
367 # 3-tuple of file handles being used for active writing.
367 # 3-tuple of file handles being used for active writing.
368 self._writinghandles = None
368 self._writinghandles = None
369 # prevent nesting of addgroup
369 # prevent nesting of addgroup
370 self._adding_group = None
370 self._adding_group = None
371
371
372 self._loadindex()
372 self._loadindex()
373
373
374 self._concurrencychecker = concurrencychecker
374 self._concurrencychecker = concurrencychecker
375
375
376 def _init_opts(self):
376 def _init_opts(self):
377 """process options (from above/config) to setup associated default revlog mode
377 """process options (from above/config) to setup associated default revlog mode
378
378
379 These values might be affected when actually reading on disk information.
379 These values might be affected when actually reading on disk information.
380
380
381 The relevant values are returned for use in _loadindex().
381 The relevant values are returned for use in _loadindex().
382
382
383 * newversionflags:
383 * newversionflags:
384 version header to use if we need to create a new revlog
384 version header to use if we need to create a new revlog
385
385
386 * mmapindexthreshold:
386 * mmapindexthreshold:
387 minimal index size for start to use mmap
387 minimal index size for start to use mmap
388
388
389 * force_nodemap:
389 * force_nodemap:
390 force the usage of a "development" version of the nodemap code
390 force the usage of a "development" version of the nodemap code
391 """
391 """
392 mmapindexthreshold = None
392 mmapindexthreshold = None
393 opts = self.opener.options
393 opts = self.opener.options
394
394
395 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
395 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
396 new_header = CHANGELOGV2
396 new_header = CHANGELOGV2
397 elif b'revlogv2' in opts:
397 elif b'revlogv2' in opts:
398 new_header = REVLOGV2
398 new_header = REVLOGV2
399 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
400 new_header = REVLOGV1 | FLAG_INLINE_DATA
400 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
402 new_header |= FLAG_GENERALDELTA
402 new_header |= FLAG_GENERALDELTA
403 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
404 new_header = REVLOGV0
404 new_header = REVLOGV0
405 else:
405 else:
406 new_header = REVLOG_DEFAULT_VERSION
406 new_header = REVLOG_DEFAULT_VERSION
407
407
408 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
409 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
410 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
411 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
412 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
413 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydeltabase = False
415 self._lazydeltabase = False
416 if self._lazydelta:
416 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 if b'compengine' in opts:
418 if b'compengine' in opts:
419 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
420 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 withsparseread = bool(opts.get(b'with-sparse-read', False))
429 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 # sparse-revlog forces sparse-read
430 # sparse-revlog forces sparse-read
431 self._withsparseread = self._sparserevlog or withsparseread
431 self._withsparseread = self._sparserevlog or withsparseread
432 if b'sparse-read-density-threshold' in opts:
432 if b'sparse-read-density-threshold' in opts:
433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 if b'sparse-read-min-gap-size' in opts:
434 if b'sparse-read-min-gap-size' in opts:
435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 if opts.get(b'enableellipsis'):
436 if opts.get(b'enableellipsis'):
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438
438
439 # revlog v0 doesn't have flag processors
439 # revlog v0 doesn't have flag processors
440 for flag, processor in pycompat.iteritems(
440 for flag, processor in opts.get(b'flagprocessors', {}).items():
441 opts.get(b'flagprocessors', {})
442 ):
443 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
441 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444
442
445 if self._chunkcachesize <= 0:
443 if self._chunkcachesize <= 0:
446 raise error.RevlogError(
444 raise error.RevlogError(
447 _(b'revlog chunk cache size %r is not greater than 0')
445 _(b'revlog chunk cache size %r is not greater than 0')
448 % self._chunkcachesize
446 % self._chunkcachesize
449 )
447 )
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
448 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 raise error.RevlogError(
449 raise error.RevlogError(
452 _(b'revlog chunk cache size %r is not a power of 2')
450 _(b'revlog chunk cache size %r is not a power of 2')
453 % self._chunkcachesize
451 % self._chunkcachesize
454 )
452 )
455 force_nodemap = opts.get(b'devel-force-nodemap', False)
453 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 return new_header, mmapindexthreshold, force_nodemap
454 return new_header, mmapindexthreshold, force_nodemap
457
455
458 def _get_data(self, filepath, mmap_threshold, size=None):
456 def _get_data(self, filepath, mmap_threshold, size=None):
459 """return a file content with or without mmap
457 """return a file content with or without mmap
460
458
461 If the file is missing return the empty string"""
459 If the file is missing return the empty string"""
462 try:
460 try:
463 with self.opener(filepath) as fp:
461 with self.opener(filepath) as fp:
464 if mmap_threshold is not None:
462 if mmap_threshold is not None:
465 file_size = self.opener.fstat(fp).st_size
463 file_size = self.opener.fstat(fp).st_size
466 if file_size >= mmap_threshold:
464 if file_size >= mmap_threshold:
467 if size is not None:
465 if size is not None:
468 # avoid potentiel mmap crash
466 # avoid potentiel mmap crash
469 size = min(file_size, size)
467 size = min(file_size, size)
470 # TODO: should .close() to release resources without
468 # TODO: should .close() to release resources without
471 # relying on Python GC
469 # relying on Python GC
472 if size is None:
470 if size is None:
473 return util.buffer(util.mmapread(fp))
471 return util.buffer(util.mmapread(fp))
474 else:
472 else:
475 return util.buffer(util.mmapread(fp, size))
473 return util.buffer(util.mmapread(fp, size))
476 if size is None:
474 if size is None:
477 return fp.read()
475 return fp.read()
478 else:
476 else:
479 return fp.read(size)
477 return fp.read(size)
480 except IOError as inst:
478 except IOError as inst:
481 if inst.errno != errno.ENOENT:
479 if inst.errno != errno.ENOENT:
482 raise
480 raise
483 return b''
481 return b''
484
482
485 def _loadindex(self, docket=None):
483 def _loadindex(self, docket=None):
486
484
487 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
485 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488
486
489 if self.postfix is not None:
487 if self.postfix is not None:
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
488 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
489 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 entry_point = b'%s.i.a' % self.radix
490 entry_point = b'%s.i.a' % self.radix
493 else:
491 else:
494 entry_point = b'%s.i' % self.radix
492 entry_point = b'%s.i' % self.radix
495
493
496 if docket is not None:
494 if docket is not None:
497 self._docket = docket
495 self._docket = docket
498 self._docket_file = entry_point
496 self._docket_file = entry_point
499 else:
497 else:
500 entry_data = b''
498 entry_data = b''
501 self._initempty = True
499 self._initempty = True
502 entry_data = self._get_data(entry_point, mmapindexthreshold)
500 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 if len(entry_data) > 0:
501 if len(entry_data) > 0:
504 header = INDEX_HEADER.unpack(entry_data[:4])[0]
502 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 self._initempty = False
503 self._initempty = False
506 else:
504 else:
507 header = new_header
505 header = new_header
508
506
509 self._format_flags = header & ~0xFFFF
507 self._format_flags = header & ~0xFFFF
510 self._format_version = header & 0xFFFF
508 self._format_version = header & 0xFFFF
511
509
512 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
510 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 if supported_flags is None:
511 if supported_flags is None:
514 msg = _(b'unknown version (%d) in revlog %s')
512 msg = _(b'unknown version (%d) in revlog %s')
515 msg %= (self._format_version, self.display_id)
513 msg %= (self._format_version, self.display_id)
516 raise error.RevlogError(msg)
514 raise error.RevlogError(msg)
517 elif self._format_flags & ~supported_flags:
515 elif self._format_flags & ~supported_flags:
518 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
516 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 display_flag = self._format_flags >> 16
517 display_flag = self._format_flags >> 16
520 msg %= (display_flag, self._format_version, self.display_id)
518 msg %= (display_flag, self._format_version, self.display_id)
521 raise error.RevlogError(msg)
519 raise error.RevlogError(msg)
522
520
523 features = FEATURES_BY_VERSION[self._format_version]
521 features = FEATURES_BY_VERSION[self._format_version]
524 self._inline = features[b'inline'](self._format_flags)
522 self._inline = features[b'inline'](self._format_flags)
525 self._generaldelta = features[b'generaldelta'](self._format_flags)
523 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self.hassidedata = features[b'sidedata']
524 self.hassidedata = features[b'sidedata']
527
525
528 if not features[b'docket']:
526 if not features[b'docket']:
529 self._indexfile = entry_point
527 self._indexfile = entry_point
530 index_data = entry_data
528 index_data = entry_data
531 else:
529 else:
532 self._docket_file = entry_point
530 self._docket_file = entry_point
533 if self._initempty:
531 if self._initempty:
534 self._docket = docketutil.default_docket(self, header)
532 self._docket = docketutil.default_docket(self, header)
535 else:
533 else:
536 self._docket = docketutil.parse_docket(
534 self._docket = docketutil.parse_docket(
537 self, entry_data, use_pending=self._trypending
535 self, entry_data, use_pending=self._trypending
538 )
536 )
539
537
540 if self._docket is not None:
538 if self._docket is not None:
541 self._indexfile = self._docket.index_filepath()
539 self._indexfile = self._docket.index_filepath()
542 index_data = b''
540 index_data = b''
543 index_size = self._docket.index_end
541 index_size = self._docket.index_end
544 if index_size > 0:
542 if index_size > 0:
545 index_data = self._get_data(
543 index_data = self._get_data(
546 self._indexfile, mmapindexthreshold, size=index_size
544 self._indexfile, mmapindexthreshold, size=index_size
547 )
545 )
548 if len(index_data) < index_size:
546 if len(index_data) < index_size:
549 msg = _(b'too few index data for %s: got %d, expected %d')
547 msg = _(b'too few index data for %s: got %d, expected %d')
550 msg %= (self.display_id, len(index_data), index_size)
548 msg %= (self.display_id, len(index_data), index_size)
551 raise error.RevlogError(msg)
549 raise error.RevlogError(msg)
552
550
553 self._inline = False
551 self._inline = False
554 # generaldelta implied by version 2 revlogs.
552 # generaldelta implied by version 2 revlogs.
555 self._generaldelta = True
553 self._generaldelta = True
556 # the logic for persistent nodemap will be dealt with within the
554 # the logic for persistent nodemap will be dealt with within the
557 # main docket, so disable it for now.
555 # main docket, so disable it for now.
558 self._nodemap_file = None
556 self._nodemap_file = None
559
557
560 if self._docket is not None:
558 if self._docket is not None:
561 self._datafile = self._docket.data_filepath()
559 self._datafile = self._docket.data_filepath()
562 self._sidedatafile = self._docket.sidedata_filepath()
560 self._sidedatafile = self._docket.sidedata_filepath()
563 elif self.postfix is None:
561 elif self.postfix is None:
564 self._datafile = b'%s.d' % self.radix
562 self._datafile = b'%s.d' % self.radix
565 else:
563 else:
566 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
564 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
567
565
568 self.nodeconstants = sha1nodeconstants
566 self.nodeconstants = sha1nodeconstants
569 self.nullid = self.nodeconstants.nullid
567 self.nullid = self.nodeconstants.nullid
570
568
571 # sparse-revlog can't be on without general-delta (issue6056)
569 # sparse-revlog can't be on without general-delta (issue6056)
572 if not self._generaldelta:
570 if not self._generaldelta:
573 self._sparserevlog = False
571 self._sparserevlog = False
574
572
575 self._storedeltachains = True
573 self._storedeltachains = True
576
574
577 devel_nodemap = (
575 devel_nodemap = (
578 self._nodemap_file
576 self._nodemap_file
579 and force_nodemap
577 and force_nodemap
580 and parse_index_v1_nodemap is not None
578 and parse_index_v1_nodemap is not None
581 )
579 )
582
580
583 use_rust_index = False
581 use_rust_index = False
584 if rustrevlog is not None:
582 if rustrevlog is not None:
585 if self._nodemap_file is not None:
583 if self._nodemap_file is not None:
586 use_rust_index = True
584 use_rust_index = True
587 else:
585 else:
588 use_rust_index = self.opener.options.get(b'rust.index')
586 use_rust_index = self.opener.options.get(b'rust.index')
589
587
590 self._parse_index = parse_index_v1
588 self._parse_index = parse_index_v1
591 if self._format_version == REVLOGV0:
589 if self._format_version == REVLOGV0:
592 self._parse_index = revlogv0.parse_index_v0
590 self._parse_index = revlogv0.parse_index_v0
593 elif self._format_version == REVLOGV2:
591 elif self._format_version == REVLOGV2:
594 self._parse_index = parse_index_v2
592 self._parse_index = parse_index_v2
595 elif self._format_version == CHANGELOGV2:
593 elif self._format_version == CHANGELOGV2:
596 self._parse_index = parse_index_cl_v2
594 self._parse_index = parse_index_cl_v2
597 elif devel_nodemap:
595 elif devel_nodemap:
598 self._parse_index = parse_index_v1_nodemap
596 self._parse_index = parse_index_v1_nodemap
599 elif use_rust_index:
597 elif use_rust_index:
600 self._parse_index = parse_index_v1_mixed
598 self._parse_index = parse_index_v1_mixed
601 try:
599 try:
602 d = self._parse_index(index_data, self._inline)
600 d = self._parse_index(index_data, self._inline)
603 index, chunkcache = d
601 index, chunkcache = d
604 use_nodemap = (
602 use_nodemap = (
605 not self._inline
603 not self._inline
606 and self._nodemap_file is not None
604 and self._nodemap_file is not None
607 and util.safehasattr(index, 'update_nodemap_data')
605 and util.safehasattr(index, 'update_nodemap_data')
608 )
606 )
609 if use_nodemap:
607 if use_nodemap:
610 nodemap_data = nodemaputil.persisted_data(self)
608 nodemap_data = nodemaputil.persisted_data(self)
611 if nodemap_data is not None:
609 if nodemap_data is not None:
612 docket = nodemap_data[0]
610 docket = nodemap_data[0]
613 if (
611 if (
614 len(d[0]) > docket.tip_rev
612 len(d[0]) > docket.tip_rev
615 and d[0][docket.tip_rev][7] == docket.tip_node
613 and d[0][docket.tip_rev][7] == docket.tip_node
616 ):
614 ):
617 # no changelog tampering
615 # no changelog tampering
618 self._nodemap_docket = docket
616 self._nodemap_docket = docket
619 index.update_nodemap_data(*nodemap_data)
617 index.update_nodemap_data(*nodemap_data)
620 except (ValueError, IndexError):
618 except (ValueError, IndexError):
621 raise error.RevlogError(
619 raise error.RevlogError(
622 _(b"index %s is corrupted") % self.display_id
620 _(b"index %s is corrupted") % self.display_id
623 )
621 )
624 self.index = index
622 self.index = index
625 self._segmentfile = randomaccessfile.randomaccessfile(
623 self._segmentfile = randomaccessfile.randomaccessfile(
626 self.opener,
624 self.opener,
627 (self._indexfile if self._inline else self._datafile),
625 (self._indexfile if self._inline else self._datafile),
628 self._chunkcachesize,
626 self._chunkcachesize,
629 chunkcache,
627 chunkcache,
630 )
628 )
631 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
629 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
632 self.opener,
630 self.opener,
633 self._sidedatafile,
631 self._sidedatafile,
634 self._chunkcachesize,
632 self._chunkcachesize,
635 )
633 )
636 # revnum -> (chain-length, sum-delta-length)
634 # revnum -> (chain-length, sum-delta-length)
637 self._chaininfocache = util.lrucachedict(500)
635 self._chaininfocache = util.lrucachedict(500)
638 # revlog header -> revlog compressor
636 # revlog header -> revlog compressor
639 self._decompressors = {}
637 self._decompressors = {}
640
638
641 @util.propertycache
639 @util.propertycache
642 def revlog_kind(self):
640 def revlog_kind(self):
643 return self.target[0]
641 return self.target[0]
644
642
645 @util.propertycache
643 @util.propertycache
646 def display_id(self):
644 def display_id(self):
647 """The public facing "ID" of the revlog that we use in message"""
645 """The public facing "ID" of the revlog that we use in message"""
648 # Maybe we should build a user facing representation of
646 # Maybe we should build a user facing representation of
649 # revlog.target instead of using `self.radix`
647 # revlog.target instead of using `self.radix`
650 return self.radix
648 return self.radix
651
649
652 def _get_decompressor(self, t):
650 def _get_decompressor(self, t):
653 try:
651 try:
654 compressor = self._decompressors[t]
652 compressor = self._decompressors[t]
655 except KeyError:
653 except KeyError:
656 try:
654 try:
657 engine = util.compengines.forrevlogheader(t)
655 engine = util.compengines.forrevlogheader(t)
658 compressor = engine.revlogcompressor(self._compengineopts)
656 compressor = engine.revlogcompressor(self._compengineopts)
659 self._decompressors[t] = compressor
657 self._decompressors[t] = compressor
660 except KeyError:
658 except KeyError:
661 raise error.RevlogError(
659 raise error.RevlogError(
662 _(b'unknown compression type %s') % binascii.hexlify(t)
660 _(b'unknown compression type %s') % binascii.hexlify(t)
663 )
661 )
664 return compressor
662 return compressor
665
663
666 @util.propertycache
664 @util.propertycache
667 def _compressor(self):
665 def _compressor(self):
668 engine = util.compengines[self._compengine]
666 engine = util.compengines[self._compengine]
669 return engine.revlogcompressor(self._compengineopts)
667 return engine.revlogcompressor(self._compengineopts)
670
668
671 @util.propertycache
669 @util.propertycache
672 def _decompressor(self):
670 def _decompressor(self):
673 """the default decompressor"""
671 """the default decompressor"""
674 if self._docket is None:
672 if self._docket is None:
675 return None
673 return None
676 t = self._docket.default_compression_header
674 t = self._docket.default_compression_header
677 c = self._get_decompressor(t)
675 c = self._get_decompressor(t)
678 return c.decompress
676 return c.decompress
679
677
680 def _indexfp(self):
678 def _indexfp(self):
681 """file object for the revlog's index file"""
679 """file object for the revlog's index file"""
682 return self.opener(self._indexfile, mode=b"r")
680 return self.opener(self._indexfile, mode=b"r")
683
681
684 def __index_write_fp(self):
682 def __index_write_fp(self):
685 # You should not use this directly and use `_writing` instead
683 # You should not use this directly and use `_writing` instead
686 try:
684 try:
687 f = self.opener(
685 f = self.opener(
688 self._indexfile, mode=b"r+", checkambig=self._checkambig
686 self._indexfile, mode=b"r+", checkambig=self._checkambig
689 )
687 )
690 if self._docket is None:
688 if self._docket is None:
691 f.seek(0, os.SEEK_END)
689 f.seek(0, os.SEEK_END)
692 else:
690 else:
693 f.seek(self._docket.index_end, os.SEEK_SET)
691 f.seek(self._docket.index_end, os.SEEK_SET)
694 return f
692 return f
695 except IOError as inst:
693 except IOError as inst:
696 if inst.errno != errno.ENOENT:
694 if inst.errno != errno.ENOENT:
697 raise
695 raise
698 return self.opener(
696 return self.opener(
699 self._indexfile, mode=b"w+", checkambig=self._checkambig
697 self._indexfile, mode=b"w+", checkambig=self._checkambig
700 )
698 )
701
699
702 def __index_new_fp(self):
700 def __index_new_fp(self):
703 # You should not use this unless you are upgrading from inline revlog
701 # You should not use this unless you are upgrading from inline revlog
704 return self.opener(
702 return self.opener(
705 self._indexfile,
703 self._indexfile,
706 mode=b"w",
704 mode=b"w",
707 checkambig=self._checkambig,
705 checkambig=self._checkambig,
708 atomictemp=True,
706 atomictemp=True,
709 )
707 )
710
708
711 def _datafp(self, mode=b'r'):
709 def _datafp(self, mode=b'r'):
712 """file object for the revlog's data file"""
710 """file object for the revlog's data file"""
713 return self.opener(self._datafile, mode=mode)
711 return self.opener(self._datafile, mode=mode)
714
712
715 @contextlib.contextmanager
713 @contextlib.contextmanager
716 def _sidedatareadfp(self):
714 def _sidedatareadfp(self):
717 """file object suitable to read sidedata"""
715 """file object suitable to read sidedata"""
718 if self._writinghandles:
716 if self._writinghandles:
719 yield self._writinghandles[2]
717 yield self._writinghandles[2]
720 else:
718 else:
721 with self.opener(self._sidedatafile) as fp:
719 with self.opener(self._sidedatafile) as fp:
722 yield fp
720 yield fp
723
721
724 def tiprev(self):
722 def tiprev(self):
725 return len(self.index) - 1
723 return len(self.index) - 1
726
724
727 def tip(self):
725 def tip(self):
728 return self.node(self.tiprev())
726 return self.node(self.tiprev())
729
727
730 def __contains__(self, rev):
728 def __contains__(self, rev):
731 return 0 <= rev < len(self)
729 return 0 <= rev < len(self)
732
730
733 def __len__(self):
731 def __len__(self):
734 return len(self.index)
732 return len(self.index)
735
733
736 def __iter__(self):
734 def __iter__(self):
737 return iter(pycompat.xrange(len(self)))
735 return iter(pycompat.xrange(len(self)))
738
736
739 def revs(self, start=0, stop=None):
737 def revs(self, start=0, stop=None):
740 """iterate over all rev in this revlog (from start to stop)"""
738 """iterate over all rev in this revlog (from start to stop)"""
741 return storageutil.iterrevs(len(self), start=start, stop=stop)
739 return storageutil.iterrevs(len(self), start=start, stop=stop)
742
740
743 def hasnode(self, node):
741 def hasnode(self, node):
744 try:
742 try:
745 self.rev(node)
743 self.rev(node)
746 return True
744 return True
747 except KeyError:
745 except KeyError:
748 return False
746 return False
749
747
750 def candelta(self, baserev, rev):
748 def candelta(self, baserev, rev):
751 """whether two revisions (baserev, rev) can be delta-ed or not"""
749 """whether two revisions (baserev, rev) can be delta-ed or not"""
752 # Disable delta if either rev requires a content-changing flag
750 # Disable delta if either rev requires a content-changing flag
753 # processor (ex. LFS). This is because such flag processor can alter
751 # processor (ex. LFS). This is because such flag processor can alter
754 # the rawtext content that the delta will be based on, and two clients
752 # the rawtext content that the delta will be based on, and two clients
755 # could have a same revlog node with different flags (i.e. different
753 # could have a same revlog node with different flags (i.e. different
756 # rawtext contents) and the delta could be incompatible.
754 # rawtext contents) and the delta could be incompatible.
757 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
755 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
758 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
756 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
759 ):
757 ):
760 return False
758 return False
761 return True
759 return True
762
760
763 def update_caches(self, transaction):
761 def update_caches(self, transaction):
764 if self._nodemap_file is not None:
762 if self._nodemap_file is not None:
765 if transaction is None:
763 if transaction is None:
766 nodemaputil.update_persistent_nodemap(self)
764 nodemaputil.update_persistent_nodemap(self)
767 else:
765 else:
768 nodemaputil.setup_persistent_nodemap(transaction, self)
766 nodemaputil.setup_persistent_nodemap(transaction, self)
769
767
770 def clearcaches(self):
768 def clearcaches(self):
771 self._revisioncache = None
769 self._revisioncache = None
772 self._chainbasecache.clear()
770 self._chainbasecache.clear()
773 self._segmentfile.clear_cache()
771 self._segmentfile.clear_cache()
774 self._segmentfile_sidedata.clear_cache()
772 self._segmentfile_sidedata.clear_cache()
775 self._pcache = {}
773 self._pcache = {}
776 self._nodemap_docket = None
774 self._nodemap_docket = None
777 self.index.clearcaches()
775 self.index.clearcaches()
778 # The python code is the one responsible for validating the docket, we
776 # The python code is the one responsible for validating the docket, we
779 # end up having to refresh it here.
777 # end up having to refresh it here.
780 use_nodemap = (
778 use_nodemap = (
781 not self._inline
779 not self._inline
782 and self._nodemap_file is not None
780 and self._nodemap_file is not None
783 and util.safehasattr(self.index, 'update_nodemap_data')
781 and util.safehasattr(self.index, 'update_nodemap_data')
784 )
782 )
785 if use_nodemap:
783 if use_nodemap:
786 nodemap_data = nodemaputil.persisted_data(self)
784 nodemap_data = nodemaputil.persisted_data(self)
787 if nodemap_data is not None:
785 if nodemap_data is not None:
788 self._nodemap_docket = nodemap_data[0]
786 self._nodemap_docket = nodemap_data[0]
789 self.index.update_nodemap_data(*nodemap_data)
787 self.index.update_nodemap_data(*nodemap_data)
790
788
791 def rev(self, node):
789 def rev(self, node):
792 try:
790 try:
793 return self.index.rev(node)
791 return self.index.rev(node)
794 except TypeError:
792 except TypeError:
795 raise
793 raise
796 except error.RevlogError:
794 except error.RevlogError:
797 # parsers.c radix tree lookup failed
795 # parsers.c radix tree lookup failed
798 if (
796 if (
799 node == self.nodeconstants.wdirid
797 node == self.nodeconstants.wdirid
800 or node in self.nodeconstants.wdirfilenodeids
798 or node in self.nodeconstants.wdirfilenodeids
801 ):
799 ):
802 raise error.WdirUnsupported
800 raise error.WdirUnsupported
803 raise error.LookupError(node, self.display_id, _(b'no node'))
801 raise error.LookupError(node, self.display_id, _(b'no node'))
804
802
805 # Accessors for index entries.
803 # Accessors for index entries.
806
804
807 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
805 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
808 # are flags.
806 # are flags.
809 def start(self, rev):
807 def start(self, rev):
810 return int(self.index[rev][0] >> 16)
808 return int(self.index[rev][0] >> 16)
811
809
812 def sidedata_cut_off(self, rev):
810 def sidedata_cut_off(self, rev):
813 sd_cut_off = self.index[rev][8]
811 sd_cut_off = self.index[rev][8]
814 if sd_cut_off != 0:
812 if sd_cut_off != 0:
815 return sd_cut_off
813 return sd_cut_off
816 # This is some annoying dance, because entries without sidedata
814 # This is some annoying dance, because entries without sidedata
817 # currently use 0 as their ofsset. (instead of previous-offset +
815 # currently use 0 as their ofsset. (instead of previous-offset +
818 # previous-size)
816 # previous-size)
819 #
817 #
820 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
818 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
821 # In the meantime, we need this.
819 # In the meantime, we need this.
822 while 0 <= rev:
820 while 0 <= rev:
823 e = self.index[rev]
821 e = self.index[rev]
824 if e[9] != 0:
822 if e[9] != 0:
825 return e[8] + e[9]
823 return e[8] + e[9]
826 rev -= 1
824 rev -= 1
827 return 0
825 return 0
828
826
829 def flags(self, rev):
827 def flags(self, rev):
830 return self.index[rev][0] & 0xFFFF
828 return self.index[rev][0] & 0xFFFF
831
829
832 def length(self, rev):
830 def length(self, rev):
833 return self.index[rev][1]
831 return self.index[rev][1]
834
832
835 def sidedata_length(self, rev):
833 def sidedata_length(self, rev):
836 if not self.hassidedata:
834 if not self.hassidedata:
837 return 0
835 return 0
838 return self.index[rev][9]
836 return self.index[rev][9]
839
837
840 def rawsize(self, rev):
838 def rawsize(self, rev):
841 """return the length of the uncompressed text for a given revision"""
839 """return the length of the uncompressed text for a given revision"""
842 l = self.index[rev][2]
840 l = self.index[rev][2]
843 if l >= 0:
841 if l >= 0:
844 return l
842 return l
845
843
846 t = self.rawdata(rev)
844 t = self.rawdata(rev)
847 return len(t)
845 return len(t)
848
846
849 def size(self, rev):
847 def size(self, rev):
850 """length of non-raw text (processed by a "read" flag processor)"""
848 """length of non-raw text (processed by a "read" flag processor)"""
851 # fast path: if no "read" flag processor could change the content,
849 # fast path: if no "read" flag processor could change the content,
852 # size is rawsize. note: ELLIPSIS is known to not change the content.
850 # size is rawsize. note: ELLIPSIS is known to not change the content.
853 flags = self.flags(rev)
851 flags = self.flags(rev)
854 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
852 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
855 return self.rawsize(rev)
853 return self.rawsize(rev)
856
854
857 return len(self.revision(rev))
855 return len(self.revision(rev))
858
856
859 def fast_rank(self, rev):
857 def fast_rank(self, rev):
860 """Return the rank of a revision if already known, or None otherwise.
858 """Return the rank of a revision if already known, or None otherwise.
861
859
862 The rank of a revision is the size of the sub-graph it defines as a
860 The rank of a revision is the size of the sub-graph it defines as a
863 head. Equivalently, the rank of a revision `r` is the size of the set
861 head. Equivalently, the rank of a revision `r` is the size of the set
864 `ancestors(r)`, `r` included.
862 `ancestors(r)`, `r` included.
865
863
866 This method returns the rank retrieved from the revlog in constant
864 This method returns the rank retrieved from the revlog in constant
867 time. It makes no attempt at computing unknown values for versions of
865 time. It makes no attempt at computing unknown values for versions of
868 the revlog which do not persist the rank.
866 the revlog which do not persist the rank.
869 """
867 """
870 rank = self.index[rev][ENTRY_RANK]
868 rank = self.index[rev][ENTRY_RANK]
871 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
869 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
872 return None
870 return None
873 if rev == nullrev:
871 if rev == nullrev:
874 return 0 # convention
872 return 0 # convention
875 return rank
873 return rank
876
874
877 def chainbase(self, rev):
875 def chainbase(self, rev):
878 base = self._chainbasecache.get(rev)
876 base = self._chainbasecache.get(rev)
879 if base is not None:
877 if base is not None:
880 return base
878 return base
881
879
882 index = self.index
880 index = self.index
883 iterrev = rev
881 iterrev = rev
884 base = index[iterrev][3]
882 base = index[iterrev][3]
885 while base != iterrev:
883 while base != iterrev:
886 iterrev = base
884 iterrev = base
887 base = index[iterrev][3]
885 base = index[iterrev][3]
888
886
889 self._chainbasecache[rev] = base
887 self._chainbasecache[rev] = base
890 return base
888 return base
891
889
892 def linkrev(self, rev):
890 def linkrev(self, rev):
893 return self.index[rev][4]
891 return self.index[rev][4]
894
892
895 def parentrevs(self, rev):
893 def parentrevs(self, rev):
896 try:
894 try:
897 entry = self.index[rev]
895 entry = self.index[rev]
898 except IndexError:
896 except IndexError:
899 if rev == wdirrev:
897 if rev == wdirrev:
900 raise error.WdirUnsupported
898 raise error.WdirUnsupported
901 raise
899 raise
902
900
903 return entry[5], entry[6]
901 return entry[5], entry[6]
904
902
905 # fast parentrevs(rev) where rev isn't filtered
903 # fast parentrevs(rev) where rev isn't filtered
906 _uncheckedparentrevs = parentrevs
904 _uncheckedparentrevs = parentrevs
907
905
908 def node(self, rev):
906 def node(self, rev):
909 try:
907 try:
910 return self.index[rev][7]
908 return self.index[rev][7]
911 except IndexError:
909 except IndexError:
912 if rev == wdirrev:
910 if rev == wdirrev:
913 raise error.WdirUnsupported
911 raise error.WdirUnsupported
914 raise
912 raise
915
913
916 # Derived from index values.
914 # Derived from index values.
917
915
918 def end(self, rev):
916 def end(self, rev):
919 return self.start(rev) + self.length(rev)
917 return self.start(rev) + self.length(rev)
920
918
921 def parents(self, node):
919 def parents(self, node):
922 i = self.index
920 i = self.index
923 d = i[self.rev(node)]
921 d = i[self.rev(node)]
924 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
922 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
925
923
926 def chainlen(self, rev):
924 def chainlen(self, rev):
927 return self._chaininfo(rev)[0]
925 return self._chaininfo(rev)[0]
928
926
929 def _chaininfo(self, rev):
927 def _chaininfo(self, rev):
930 chaininfocache = self._chaininfocache
928 chaininfocache = self._chaininfocache
931 if rev in chaininfocache:
929 if rev in chaininfocache:
932 return chaininfocache[rev]
930 return chaininfocache[rev]
933 index = self.index
931 index = self.index
934 generaldelta = self._generaldelta
932 generaldelta = self._generaldelta
935 iterrev = rev
933 iterrev = rev
936 e = index[iterrev]
934 e = index[iterrev]
937 clen = 0
935 clen = 0
938 compresseddeltalen = 0
936 compresseddeltalen = 0
939 while iterrev != e[3]:
937 while iterrev != e[3]:
940 clen += 1
938 clen += 1
941 compresseddeltalen += e[1]
939 compresseddeltalen += e[1]
942 if generaldelta:
940 if generaldelta:
943 iterrev = e[3]
941 iterrev = e[3]
944 else:
942 else:
945 iterrev -= 1
943 iterrev -= 1
946 if iterrev in chaininfocache:
944 if iterrev in chaininfocache:
947 t = chaininfocache[iterrev]
945 t = chaininfocache[iterrev]
948 clen += t[0]
946 clen += t[0]
949 compresseddeltalen += t[1]
947 compresseddeltalen += t[1]
950 break
948 break
951 e = index[iterrev]
949 e = index[iterrev]
952 else:
950 else:
953 # Add text length of base since decompressing that also takes
951 # Add text length of base since decompressing that also takes
954 # work. For cache hits the length is already included.
952 # work. For cache hits the length is already included.
955 compresseddeltalen += e[1]
953 compresseddeltalen += e[1]
956 r = (clen, compresseddeltalen)
954 r = (clen, compresseddeltalen)
957 chaininfocache[rev] = r
955 chaininfocache[rev] = r
958 return r
956 return r
959
957
960 def _deltachain(self, rev, stoprev=None):
958 def _deltachain(self, rev, stoprev=None):
961 """Obtain the delta chain for a revision.
959 """Obtain the delta chain for a revision.
962
960
963 ``stoprev`` specifies a revision to stop at. If not specified, we
961 ``stoprev`` specifies a revision to stop at. If not specified, we
964 stop at the base of the chain.
962 stop at the base of the chain.
965
963
966 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
967 revs in ascending order and ``stopped`` is a bool indicating whether
965 revs in ascending order and ``stopped`` is a bool indicating whether
968 ``stoprev`` was hit.
966 ``stoprev`` was hit.
969 """
967 """
970 # Try C implementation.
968 # Try C implementation.
971 try:
969 try:
972 return self.index.deltachain(rev, stoprev, self._generaldelta)
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
973 except AttributeError:
971 except AttributeError:
974 pass
972 pass
975
973
976 chain = []
974 chain = []
977
975
978 # Alias to prevent attribute lookup in tight loop.
976 # Alias to prevent attribute lookup in tight loop.
979 index = self.index
977 index = self.index
980 generaldelta = self._generaldelta
978 generaldelta = self._generaldelta
981
979
982 iterrev = rev
980 iterrev = rev
983 e = index[iterrev]
981 e = index[iterrev]
984 while iterrev != e[3] and iterrev != stoprev:
982 while iterrev != e[3] and iterrev != stoprev:
985 chain.append(iterrev)
983 chain.append(iterrev)
986 if generaldelta:
984 if generaldelta:
987 iterrev = e[3]
985 iterrev = e[3]
988 else:
986 else:
989 iterrev -= 1
987 iterrev -= 1
990 e = index[iterrev]
988 e = index[iterrev]
991
989
992 if iterrev == stoprev:
990 if iterrev == stoprev:
993 stopped = True
991 stopped = True
994 else:
992 else:
995 chain.append(iterrev)
993 chain.append(iterrev)
996 stopped = False
994 stopped = False
997
995
998 chain.reverse()
996 chain.reverse()
999 return chain, stopped
997 return chain, stopped
1000
998
1001 def ancestors(self, revs, stoprev=0, inclusive=False):
999 def ancestors(self, revs, stoprev=0, inclusive=False):
1002 """Generate the ancestors of 'revs' in reverse revision order.
1000 """Generate the ancestors of 'revs' in reverse revision order.
1003 Does not generate revs lower than stoprev.
1001 Does not generate revs lower than stoprev.
1004
1002
1005 See the documentation for ancestor.lazyancestors for more details."""
1003 See the documentation for ancestor.lazyancestors for more details."""
1006
1004
1007 # first, make sure start revisions aren't filtered
1005 # first, make sure start revisions aren't filtered
1008 revs = list(revs)
1006 revs = list(revs)
1009 checkrev = self.node
1007 checkrev = self.node
1010 for r in revs:
1008 for r in revs:
1011 checkrev(r)
1009 checkrev(r)
1012 # and we're sure ancestors aren't filtered as well
1010 # and we're sure ancestors aren't filtered as well
1013
1011
1014 if rustancestor is not None and self.index.rust_ext_compat:
1012 if rustancestor is not None and self.index.rust_ext_compat:
1015 lazyancestors = rustancestor.LazyAncestors
1013 lazyancestors = rustancestor.LazyAncestors
1016 arg = self.index
1014 arg = self.index
1017 else:
1015 else:
1018 lazyancestors = ancestor.lazyancestors
1016 lazyancestors = ancestor.lazyancestors
1019 arg = self._uncheckedparentrevs
1017 arg = self._uncheckedparentrevs
1020 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1021
1019
1022 def descendants(self, revs):
1020 def descendants(self, revs):
1023 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1024
1022
1025 def findcommonmissing(self, common=None, heads=None):
1023 def findcommonmissing(self, common=None, heads=None):
1026 """Return a tuple of the ancestors of common and the ancestors of heads
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1027 that are not ancestors of common. In revset terminology, we return the
1025 that are not ancestors of common. In revset terminology, we return the
1028 tuple:
1026 tuple:
1029
1027
1030 ::common, (::heads) - (::common)
1028 ::common, (::heads) - (::common)
1031
1029
1032 The list is sorted by revision number, meaning it is
1030 The list is sorted by revision number, meaning it is
1033 topologically sorted.
1031 topologically sorted.
1034
1032
1035 'heads' and 'common' are both lists of node IDs. If heads is
1033 'heads' and 'common' are both lists of node IDs. If heads is
1036 not supplied, uses all of the revlog's heads. If common is not
1034 not supplied, uses all of the revlog's heads. If common is not
1037 supplied, uses nullid."""
1035 supplied, uses nullid."""
1038 if common is None:
1036 if common is None:
1039 common = [self.nullid]
1037 common = [self.nullid]
1040 if heads is None:
1038 if heads is None:
1041 heads = self.heads()
1039 heads = self.heads()
1042
1040
1043 common = [self.rev(n) for n in common]
1041 common = [self.rev(n) for n in common]
1044 heads = [self.rev(n) for n in heads]
1042 heads = [self.rev(n) for n in heads]
1045
1043
1046 # we want the ancestors, but inclusive
1044 # we want the ancestors, but inclusive
1047 class lazyset(object):
1045 class lazyset(object):
1048 def __init__(self, lazyvalues):
1046 def __init__(self, lazyvalues):
1049 self.addedvalues = set()
1047 self.addedvalues = set()
1050 self.lazyvalues = lazyvalues
1048 self.lazyvalues = lazyvalues
1051
1049
1052 def __contains__(self, value):
1050 def __contains__(self, value):
1053 return value in self.addedvalues or value in self.lazyvalues
1051 return value in self.addedvalues or value in self.lazyvalues
1054
1052
1055 def __iter__(self):
1053 def __iter__(self):
1056 added = self.addedvalues
1054 added = self.addedvalues
1057 for r in added:
1055 for r in added:
1058 yield r
1056 yield r
1059 for r in self.lazyvalues:
1057 for r in self.lazyvalues:
1060 if not r in added:
1058 if not r in added:
1061 yield r
1059 yield r
1062
1060
1063 def add(self, value):
1061 def add(self, value):
1064 self.addedvalues.add(value)
1062 self.addedvalues.add(value)
1065
1063
1066 def update(self, values):
1064 def update(self, values):
1067 self.addedvalues.update(values)
1065 self.addedvalues.update(values)
1068
1066
1069 has = lazyset(self.ancestors(common))
1067 has = lazyset(self.ancestors(common))
1070 has.add(nullrev)
1068 has.add(nullrev)
1071 has.update(common)
1069 has.update(common)
1072
1070
1073 # take all ancestors from heads that aren't in has
1071 # take all ancestors from heads that aren't in has
1074 missing = set()
1072 missing = set()
1075 visit = collections.deque(r for r in heads if r not in has)
1073 visit = collections.deque(r for r in heads if r not in has)
1076 while visit:
1074 while visit:
1077 r = visit.popleft()
1075 r = visit.popleft()
1078 if r in missing:
1076 if r in missing:
1079 continue
1077 continue
1080 else:
1078 else:
1081 missing.add(r)
1079 missing.add(r)
1082 for p in self.parentrevs(r):
1080 for p in self.parentrevs(r):
1083 if p not in has:
1081 if p not in has:
1084 visit.append(p)
1082 visit.append(p)
1085 missing = list(missing)
1083 missing = list(missing)
1086 missing.sort()
1084 missing.sort()
1087 return has, [self.node(miss) for miss in missing]
1085 return has, [self.node(miss) for miss in missing]
1088
1086
1089 def incrementalmissingrevs(self, common=None):
1087 def incrementalmissingrevs(self, common=None):
1090 """Return an object that can be used to incrementally compute the
1088 """Return an object that can be used to incrementally compute the
1091 revision numbers of the ancestors of arbitrary sets that are not
1089 revision numbers of the ancestors of arbitrary sets that are not
1092 ancestors of common. This is an ancestor.incrementalmissingancestors
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1093 object.
1091 object.
1094
1092
1095 'common' is a list of revision numbers. If common is not supplied, uses
1093 'common' is a list of revision numbers. If common is not supplied, uses
1096 nullrev.
1094 nullrev.
1097 """
1095 """
1098 if common is None:
1096 if common is None:
1099 common = [nullrev]
1097 common = [nullrev]
1100
1098
1101 if rustancestor is not None and self.index.rust_ext_compat:
1099 if rustancestor is not None and self.index.rust_ext_compat:
1102 return rustancestor.MissingAncestors(self.index, common)
1100 return rustancestor.MissingAncestors(self.index, common)
1103 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1104
1102
1105 def findmissingrevs(self, common=None, heads=None):
1103 def findmissingrevs(self, common=None, heads=None):
1106 """Return the revision numbers of the ancestors of heads that
1104 """Return the revision numbers of the ancestors of heads that
1107 are not ancestors of common.
1105 are not ancestors of common.
1108
1106
1109 More specifically, return a list of revision numbers corresponding to
1107 More specifically, return a list of revision numbers corresponding to
1110 nodes N such that every N satisfies the following constraints:
1108 nodes N such that every N satisfies the following constraints:
1111
1109
1112 1. N is an ancestor of some node in 'heads'
1110 1. N is an ancestor of some node in 'heads'
1113 2. N is not an ancestor of any node in 'common'
1111 2. N is not an ancestor of any node in 'common'
1114
1112
1115 The list is sorted by revision number, meaning it is
1113 The list is sorted by revision number, meaning it is
1116 topologically sorted.
1114 topologically sorted.
1117
1115
1118 'heads' and 'common' are both lists of revision numbers. If heads is
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1119 not supplied, uses all of the revlog's heads. If common is not
1117 not supplied, uses all of the revlog's heads. If common is not
1120 supplied, uses nullid."""
1118 supplied, uses nullid."""
1121 if common is None:
1119 if common is None:
1122 common = [nullrev]
1120 common = [nullrev]
1123 if heads is None:
1121 if heads is None:
1124 heads = self.headrevs()
1122 heads = self.headrevs()
1125
1123
1126 inc = self.incrementalmissingrevs(common=common)
1124 inc = self.incrementalmissingrevs(common=common)
1127 return inc.missingancestors(heads)
1125 return inc.missingancestors(heads)
1128
1126
1129 def findmissing(self, common=None, heads=None):
1127 def findmissing(self, common=None, heads=None):
1130 """Return the ancestors of heads that are not ancestors of common.
1128 """Return the ancestors of heads that are not ancestors of common.
1131
1129
1132 More specifically, return a list of nodes N such that every N
1130 More specifically, return a list of nodes N such that every N
1133 satisfies the following constraints:
1131 satisfies the following constraints:
1134
1132
1135 1. N is an ancestor of some node in 'heads'
1133 1. N is an ancestor of some node in 'heads'
1136 2. N is not an ancestor of any node in 'common'
1134 2. N is not an ancestor of any node in 'common'
1137
1135
1138 The list is sorted by revision number, meaning it is
1136 The list is sorted by revision number, meaning it is
1139 topologically sorted.
1137 topologically sorted.
1140
1138
1141 'heads' and 'common' are both lists of node IDs. If heads is
1139 'heads' and 'common' are both lists of node IDs. If heads is
1142 not supplied, uses all of the revlog's heads. If common is not
1140 not supplied, uses all of the revlog's heads. If common is not
1143 supplied, uses nullid."""
1141 supplied, uses nullid."""
1144 if common is None:
1142 if common is None:
1145 common = [self.nullid]
1143 common = [self.nullid]
1146 if heads is None:
1144 if heads is None:
1147 heads = self.heads()
1145 heads = self.heads()
1148
1146
1149 common = [self.rev(n) for n in common]
1147 common = [self.rev(n) for n in common]
1150 heads = [self.rev(n) for n in heads]
1148 heads = [self.rev(n) for n in heads]
1151
1149
1152 inc = self.incrementalmissingrevs(common=common)
1150 inc = self.incrementalmissingrevs(common=common)
1153 return [self.node(r) for r in inc.missingancestors(heads)]
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1154
1152
1155 def nodesbetween(self, roots=None, heads=None):
1153 def nodesbetween(self, roots=None, heads=None):
1156 """Return a topological path from 'roots' to 'heads'.
1154 """Return a topological path from 'roots' to 'heads'.
1157
1155
1158 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1159 topologically sorted list of all nodes N that satisfy both of
1157 topologically sorted list of all nodes N that satisfy both of
1160 these constraints:
1158 these constraints:
1161
1159
1162 1. N is a descendant of some node in 'roots'
1160 1. N is a descendant of some node in 'roots'
1163 2. N is an ancestor of some node in 'heads'
1161 2. N is an ancestor of some node in 'heads'
1164
1162
1165 Every node is considered to be both a descendant and an ancestor
1163 Every node is considered to be both a descendant and an ancestor
1166 of itself, so every reachable node in 'roots' and 'heads' will be
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1167 included in 'nodes'.
1165 included in 'nodes'.
1168
1166
1169 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1170 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1171 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1172
1170
1173 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1174 unspecified, uses nullid as the only root. If 'heads' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1175 unspecified, uses list of all of the revlog's heads."""
1173 unspecified, uses list of all of the revlog's heads."""
1176 nonodes = ([], [], [])
1174 nonodes = ([], [], [])
1177 if roots is not None:
1175 if roots is not None:
1178 roots = list(roots)
1176 roots = list(roots)
1179 if not roots:
1177 if not roots:
1180 return nonodes
1178 return nonodes
1181 lowestrev = min([self.rev(n) for n in roots])
1179 lowestrev = min([self.rev(n) for n in roots])
1182 else:
1180 else:
1183 roots = [self.nullid] # Everybody's a descendant of nullid
1181 roots = [self.nullid] # Everybody's a descendant of nullid
1184 lowestrev = nullrev
1182 lowestrev = nullrev
1185 if (lowestrev == nullrev) and (heads is None):
1183 if (lowestrev == nullrev) and (heads is None):
1186 # We want _all_ the nodes!
1184 # We want _all_ the nodes!
1187 return (
1185 return (
1188 [self.node(r) for r in self],
1186 [self.node(r) for r in self],
1189 [self.nullid],
1187 [self.nullid],
1190 list(self.heads()),
1188 list(self.heads()),
1191 )
1189 )
1192 if heads is None:
1190 if heads is None:
1193 # All nodes are ancestors, so the latest ancestor is the last
1191 # All nodes are ancestors, so the latest ancestor is the last
1194 # node.
1192 # node.
1195 highestrev = len(self) - 1
1193 highestrev = len(self) - 1
1196 # Set ancestors to None to signal that every node is an ancestor.
1194 # Set ancestors to None to signal that every node is an ancestor.
1197 ancestors = None
1195 ancestors = None
1198 # Set heads to an empty dictionary for later discovery of heads
1196 # Set heads to an empty dictionary for later discovery of heads
1199 heads = {}
1197 heads = {}
1200 else:
1198 else:
1201 heads = list(heads)
1199 heads = list(heads)
1202 if not heads:
1200 if not heads:
1203 return nonodes
1201 return nonodes
1204 ancestors = set()
1202 ancestors = set()
1205 # Turn heads into a dictionary so we can remove 'fake' heads.
1203 # Turn heads into a dictionary so we can remove 'fake' heads.
1206 # Also, later we will be using it to filter out the heads we can't
1204 # Also, later we will be using it to filter out the heads we can't
1207 # find from roots.
1205 # find from roots.
1208 heads = dict.fromkeys(heads, False)
1206 heads = dict.fromkeys(heads, False)
1209 # Start at the top and keep marking parents until we're done.
1207 # Start at the top and keep marking parents until we're done.
1210 nodestotag = set(heads)
1208 nodestotag = set(heads)
1211 # Remember where the top was so we can use it as a limit later.
1209 # Remember where the top was so we can use it as a limit later.
1212 highestrev = max([self.rev(n) for n in nodestotag])
1210 highestrev = max([self.rev(n) for n in nodestotag])
1213 while nodestotag:
1211 while nodestotag:
1214 # grab a node to tag
1212 # grab a node to tag
1215 n = nodestotag.pop()
1213 n = nodestotag.pop()
1216 # Never tag nullid
1214 # Never tag nullid
1217 if n == self.nullid:
1215 if n == self.nullid:
1218 continue
1216 continue
1219 # A node's revision number represents its place in a
1217 # A node's revision number represents its place in a
1220 # topologically sorted list of nodes.
1218 # topologically sorted list of nodes.
1221 r = self.rev(n)
1219 r = self.rev(n)
1222 if r >= lowestrev:
1220 if r >= lowestrev:
1223 if n not in ancestors:
1221 if n not in ancestors:
1224 # If we are possibly a descendant of one of the roots
1222 # If we are possibly a descendant of one of the roots
1225 # and we haven't already been marked as an ancestor
1223 # and we haven't already been marked as an ancestor
1226 ancestors.add(n) # Mark as ancestor
1224 ancestors.add(n) # Mark as ancestor
1227 # Add non-nullid parents to list of nodes to tag.
1225 # Add non-nullid parents to list of nodes to tag.
1228 nodestotag.update(
1226 nodestotag.update(
1229 [p for p in self.parents(n) if p != self.nullid]
1227 [p for p in self.parents(n) if p != self.nullid]
1230 )
1228 )
1231 elif n in heads: # We've seen it before, is it a fake head?
1229 elif n in heads: # We've seen it before, is it a fake head?
1232 # So it is, real heads should not be the ancestors of
1230 # So it is, real heads should not be the ancestors of
1233 # any other heads.
1231 # any other heads.
1234 heads.pop(n)
1232 heads.pop(n)
1235 if not ancestors:
1233 if not ancestors:
1236 return nonodes
1234 return nonodes
1237 # Now that we have our set of ancestors, we want to remove any
1235 # Now that we have our set of ancestors, we want to remove any
1238 # roots that are not ancestors.
1236 # roots that are not ancestors.
1239
1237
1240 # If one of the roots was nullid, everything is included anyway.
1238 # If one of the roots was nullid, everything is included anyway.
1241 if lowestrev > nullrev:
1239 if lowestrev > nullrev:
1242 # But, since we weren't, let's recompute the lowest rev to not
1240 # But, since we weren't, let's recompute the lowest rev to not
1243 # include roots that aren't ancestors.
1241 # include roots that aren't ancestors.
1244
1242
1245 # Filter out roots that aren't ancestors of heads
1243 # Filter out roots that aren't ancestors of heads
1246 roots = [root for root in roots if root in ancestors]
1244 roots = [root for root in roots if root in ancestors]
1247 # Recompute the lowest revision
1245 # Recompute the lowest revision
1248 if roots:
1246 if roots:
1249 lowestrev = min([self.rev(root) for root in roots])
1247 lowestrev = min([self.rev(root) for root in roots])
1250 else:
1248 else:
1251 # No more roots? Return empty list
1249 # No more roots? Return empty list
1252 return nonodes
1250 return nonodes
1253 else:
1251 else:
1254 # We are descending from nullid, and don't need to care about
1252 # We are descending from nullid, and don't need to care about
1255 # any other roots.
1253 # any other roots.
1256 lowestrev = nullrev
1254 lowestrev = nullrev
1257 roots = [self.nullid]
1255 roots = [self.nullid]
1258 # Transform our roots list into a set.
1256 # Transform our roots list into a set.
1259 descendants = set(roots)
1257 descendants = set(roots)
1260 # Also, keep the original roots so we can filter out roots that aren't
1258 # Also, keep the original roots so we can filter out roots that aren't
1261 # 'real' roots (i.e. are descended from other roots).
1259 # 'real' roots (i.e. are descended from other roots).
1262 roots = descendants.copy()
1260 roots = descendants.copy()
1263 # Our topologically sorted list of output nodes.
1261 # Our topologically sorted list of output nodes.
1264 orderedout = []
1262 orderedout = []
1265 # Don't start at nullid since we don't want nullid in our output list,
1263 # Don't start at nullid since we don't want nullid in our output list,
1266 # and if nullid shows up in descendants, empty parents will look like
1264 # and if nullid shows up in descendants, empty parents will look like
1267 # they're descendants.
1265 # they're descendants.
1268 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1266 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1269 n = self.node(r)
1267 n = self.node(r)
1270 isdescendant = False
1268 isdescendant = False
1271 if lowestrev == nullrev: # Everybody is a descendant of nullid
1269 if lowestrev == nullrev: # Everybody is a descendant of nullid
1272 isdescendant = True
1270 isdescendant = True
1273 elif n in descendants:
1271 elif n in descendants:
1274 # n is already a descendant
1272 # n is already a descendant
1275 isdescendant = True
1273 isdescendant = True
1276 # This check only needs to be done here because all the roots
1274 # This check only needs to be done here because all the roots
1277 # will start being marked is descendants before the loop.
1275 # will start being marked is descendants before the loop.
1278 if n in roots:
1276 if n in roots:
1279 # If n was a root, check if it's a 'real' root.
1277 # If n was a root, check if it's a 'real' root.
1280 p = tuple(self.parents(n))
1278 p = tuple(self.parents(n))
1281 # If any of its parents are descendants, it's not a root.
1279 # If any of its parents are descendants, it's not a root.
1282 if (p[0] in descendants) or (p[1] in descendants):
1280 if (p[0] in descendants) or (p[1] in descendants):
1283 roots.remove(n)
1281 roots.remove(n)
1284 else:
1282 else:
1285 p = tuple(self.parents(n))
1283 p = tuple(self.parents(n))
1286 # A node is a descendant if either of its parents are
1284 # A node is a descendant if either of its parents are
1287 # descendants. (We seeded the dependents list with the roots
1285 # descendants. (We seeded the dependents list with the roots
1288 # up there, remember?)
1286 # up there, remember?)
1289 if (p[0] in descendants) or (p[1] in descendants):
1287 if (p[0] in descendants) or (p[1] in descendants):
1290 descendants.add(n)
1288 descendants.add(n)
1291 isdescendant = True
1289 isdescendant = True
1292 if isdescendant and ((ancestors is None) or (n in ancestors)):
1290 if isdescendant and ((ancestors is None) or (n in ancestors)):
1293 # Only include nodes that are both descendants and ancestors.
1291 # Only include nodes that are both descendants and ancestors.
1294 orderedout.append(n)
1292 orderedout.append(n)
1295 if (ancestors is not None) and (n in heads):
1293 if (ancestors is not None) and (n in heads):
1296 # We're trying to figure out which heads are reachable
1294 # We're trying to figure out which heads are reachable
1297 # from roots.
1295 # from roots.
1298 # Mark this head as having been reached
1296 # Mark this head as having been reached
1299 heads[n] = True
1297 heads[n] = True
1300 elif ancestors is None:
1298 elif ancestors is None:
1301 # Otherwise, we're trying to discover the heads.
1299 # Otherwise, we're trying to discover the heads.
1302 # Assume this is a head because if it isn't, the next step
1300 # Assume this is a head because if it isn't, the next step
1303 # will eventually remove it.
1301 # will eventually remove it.
1304 heads[n] = True
1302 heads[n] = True
1305 # But, obviously its parents aren't.
1303 # But, obviously its parents aren't.
1306 for p in self.parents(n):
1304 for p in self.parents(n):
1307 heads.pop(p, None)
1305 heads.pop(p, None)
1308 heads = [head for head, flag in heads.items() if flag]
1306 heads = [head for head, flag in heads.items() if flag]
1309 roots = list(roots)
1307 roots = list(roots)
1310 assert orderedout
1308 assert orderedout
1311 assert roots
1309 assert roots
1312 assert heads
1310 assert heads
1313 return (orderedout, roots, heads)
1311 return (orderedout, roots, heads)
1314
1312
1315 def headrevs(self, revs=None):
1313 def headrevs(self, revs=None):
1316 if revs is None:
1314 if revs is None:
1317 try:
1315 try:
1318 return self.index.headrevs()
1316 return self.index.headrevs()
1319 except AttributeError:
1317 except AttributeError:
1320 return self._headrevs()
1318 return self._headrevs()
1321 if rustdagop is not None and self.index.rust_ext_compat:
1319 if rustdagop is not None and self.index.rust_ext_compat:
1322 return rustdagop.headrevs(self.index, revs)
1320 return rustdagop.headrevs(self.index, revs)
1323 return dagop.headrevs(revs, self._uncheckedparentrevs)
1321 return dagop.headrevs(revs, self._uncheckedparentrevs)
1324
1322
1325 def computephases(self, roots):
1323 def computephases(self, roots):
1326 return self.index.computephasesmapsets(roots)
1324 return self.index.computephasesmapsets(roots)
1327
1325
1328 def _headrevs(self):
1326 def _headrevs(self):
1329 count = len(self)
1327 count = len(self)
1330 if not count:
1328 if not count:
1331 return [nullrev]
1329 return [nullrev]
1332 # we won't iter over filtered rev so nobody is a head at start
1330 # we won't iter over filtered rev so nobody is a head at start
1333 ishead = [0] * (count + 1)
1331 ishead = [0] * (count + 1)
1334 index = self.index
1332 index = self.index
1335 for r in self:
1333 for r in self:
1336 ishead[r] = 1 # I may be an head
1334 ishead[r] = 1 # I may be an head
1337 e = index[r]
1335 e = index[r]
1338 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1336 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1339 return [r for r, val in enumerate(ishead) if val]
1337 return [r for r, val in enumerate(ishead) if val]
1340
1338
1341 def heads(self, start=None, stop=None):
1339 def heads(self, start=None, stop=None):
1342 """return the list of all nodes that have no children
1340 """return the list of all nodes that have no children
1343
1341
1344 if start is specified, only heads that are descendants of
1342 if start is specified, only heads that are descendants of
1345 start will be returned
1343 start will be returned
1346 if stop is specified, it will consider all the revs from stop
1344 if stop is specified, it will consider all the revs from stop
1347 as if they had no children
1345 as if they had no children
1348 """
1346 """
1349 if start is None and stop is None:
1347 if start is None and stop is None:
1350 if not len(self):
1348 if not len(self):
1351 return [self.nullid]
1349 return [self.nullid]
1352 return [self.node(r) for r in self.headrevs()]
1350 return [self.node(r) for r in self.headrevs()]
1353
1351
1354 if start is None:
1352 if start is None:
1355 start = nullrev
1353 start = nullrev
1356 else:
1354 else:
1357 start = self.rev(start)
1355 start = self.rev(start)
1358
1356
1359 stoprevs = {self.rev(n) for n in stop or []}
1357 stoprevs = {self.rev(n) for n in stop or []}
1360
1358
1361 revs = dagop.headrevssubset(
1359 revs = dagop.headrevssubset(
1362 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1360 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1363 )
1361 )
1364
1362
1365 return [self.node(rev) for rev in revs]
1363 return [self.node(rev) for rev in revs]
1366
1364
1367 def children(self, node):
1365 def children(self, node):
1368 """find the children of a given node"""
1366 """find the children of a given node"""
1369 c = []
1367 c = []
1370 p = self.rev(node)
1368 p = self.rev(node)
1371 for r in self.revs(start=p + 1):
1369 for r in self.revs(start=p + 1):
1372 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1370 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1373 if prevs:
1371 if prevs:
1374 for pr in prevs:
1372 for pr in prevs:
1375 if pr == p:
1373 if pr == p:
1376 c.append(self.node(r))
1374 c.append(self.node(r))
1377 elif p == nullrev:
1375 elif p == nullrev:
1378 c.append(self.node(r))
1376 c.append(self.node(r))
1379 return c
1377 return c
1380
1378
1381 def commonancestorsheads(self, a, b):
1379 def commonancestorsheads(self, a, b):
1382 """calculate all the heads of the common ancestors of nodes a and b"""
1380 """calculate all the heads of the common ancestors of nodes a and b"""
1383 a, b = self.rev(a), self.rev(b)
1381 a, b = self.rev(a), self.rev(b)
1384 ancs = self._commonancestorsheads(a, b)
1382 ancs = self._commonancestorsheads(a, b)
1385 return pycompat.maplist(self.node, ancs)
1383 return pycompat.maplist(self.node, ancs)
1386
1384
1387 def _commonancestorsheads(self, *revs):
1385 def _commonancestorsheads(self, *revs):
1388 """calculate all the heads of the common ancestors of revs"""
1386 """calculate all the heads of the common ancestors of revs"""
1389 try:
1387 try:
1390 ancs = self.index.commonancestorsheads(*revs)
1388 ancs = self.index.commonancestorsheads(*revs)
1391 except (AttributeError, OverflowError): # C implementation failed
1389 except (AttributeError, OverflowError): # C implementation failed
1392 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1390 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1393 return ancs
1391 return ancs
1394
1392
1395 def isancestor(self, a, b):
1393 def isancestor(self, a, b):
1396 """return True if node a is an ancestor of node b
1394 """return True if node a is an ancestor of node b
1397
1395
1398 A revision is considered an ancestor of itself."""
1396 A revision is considered an ancestor of itself."""
1399 a, b = self.rev(a), self.rev(b)
1397 a, b = self.rev(a), self.rev(b)
1400 return self.isancestorrev(a, b)
1398 return self.isancestorrev(a, b)
1401
1399
1402 def isancestorrev(self, a, b):
1400 def isancestorrev(self, a, b):
1403 """return True if revision a is an ancestor of revision b
1401 """return True if revision a is an ancestor of revision b
1404
1402
1405 A revision is considered an ancestor of itself.
1403 A revision is considered an ancestor of itself.
1406
1404
1407 The implementation of this is trivial but the use of
1405 The implementation of this is trivial but the use of
1408 reachableroots is not."""
1406 reachableroots is not."""
1409 if a == nullrev:
1407 if a == nullrev:
1410 return True
1408 return True
1411 elif a == b:
1409 elif a == b:
1412 return True
1410 return True
1413 elif a > b:
1411 elif a > b:
1414 return False
1412 return False
1415 return bool(self.reachableroots(a, [b], [a], includepath=False))
1413 return bool(self.reachableroots(a, [b], [a], includepath=False))
1416
1414
1417 def reachableroots(self, minroot, heads, roots, includepath=False):
1415 def reachableroots(self, minroot, heads, roots, includepath=False):
1418 """return (heads(::(<roots> and <roots>::<heads>)))
1416 """return (heads(::(<roots> and <roots>::<heads>)))
1419
1417
1420 If includepath is True, return (<roots>::<heads>)."""
1418 If includepath is True, return (<roots>::<heads>)."""
1421 try:
1419 try:
1422 return self.index.reachableroots2(
1420 return self.index.reachableroots2(
1423 minroot, heads, roots, includepath
1421 minroot, heads, roots, includepath
1424 )
1422 )
1425 except AttributeError:
1423 except AttributeError:
1426 return dagop._reachablerootspure(
1424 return dagop._reachablerootspure(
1427 self.parentrevs, minroot, roots, heads, includepath
1425 self.parentrevs, minroot, roots, heads, includepath
1428 )
1426 )
1429
1427
1430 def ancestor(self, a, b):
1428 def ancestor(self, a, b):
1431 """calculate the "best" common ancestor of nodes a and b"""
1429 """calculate the "best" common ancestor of nodes a and b"""
1432
1430
1433 a, b = self.rev(a), self.rev(b)
1431 a, b = self.rev(a), self.rev(b)
1434 try:
1432 try:
1435 ancs = self.index.ancestors(a, b)
1433 ancs = self.index.ancestors(a, b)
1436 except (AttributeError, OverflowError):
1434 except (AttributeError, OverflowError):
1437 ancs = ancestor.ancestors(self.parentrevs, a, b)
1435 ancs = ancestor.ancestors(self.parentrevs, a, b)
1438 if ancs:
1436 if ancs:
1439 # choose a consistent winner when there's a tie
1437 # choose a consistent winner when there's a tie
1440 return min(map(self.node, ancs))
1438 return min(map(self.node, ancs))
1441 return self.nullid
1439 return self.nullid
1442
1440
1443 def _match(self, id):
1441 def _match(self, id):
1444 if isinstance(id, int):
1442 if isinstance(id, int):
1445 # rev
1443 # rev
1446 return self.node(id)
1444 return self.node(id)
1447 if len(id) == self.nodeconstants.nodelen:
1445 if len(id) == self.nodeconstants.nodelen:
1448 # possibly a binary node
1446 # possibly a binary node
1449 # odds of a binary node being all hex in ASCII are 1 in 10**25
1447 # odds of a binary node being all hex in ASCII are 1 in 10**25
1450 try:
1448 try:
1451 node = id
1449 node = id
1452 self.rev(node) # quick search the index
1450 self.rev(node) # quick search the index
1453 return node
1451 return node
1454 except error.LookupError:
1452 except error.LookupError:
1455 pass # may be partial hex id
1453 pass # may be partial hex id
1456 try:
1454 try:
1457 # str(rev)
1455 # str(rev)
1458 rev = int(id)
1456 rev = int(id)
1459 if b"%d" % rev != id:
1457 if b"%d" % rev != id:
1460 raise ValueError
1458 raise ValueError
1461 if rev < 0:
1459 if rev < 0:
1462 rev = len(self) + rev
1460 rev = len(self) + rev
1463 if rev < 0 or rev >= len(self):
1461 if rev < 0 or rev >= len(self):
1464 raise ValueError
1462 raise ValueError
1465 return self.node(rev)
1463 return self.node(rev)
1466 except (ValueError, OverflowError):
1464 except (ValueError, OverflowError):
1467 pass
1465 pass
1468 if len(id) == 2 * self.nodeconstants.nodelen:
1466 if len(id) == 2 * self.nodeconstants.nodelen:
1469 try:
1467 try:
1470 # a full hex nodeid?
1468 # a full hex nodeid?
1471 node = bin(id)
1469 node = bin(id)
1472 self.rev(node)
1470 self.rev(node)
1473 return node
1471 return node
1474 except (TypeError, error.LookupError):
1472 except (TypeError, error.LookupError):
1475 pass
1473 pass
1476
1474
1477 def _partialmatch(self, id):
1475 def _partialmatch(self, id):
1478 # we don't care wdirfilenodeids as they should be always full hash
1476 # we don't care wdirfilenodeids as they should be always full hash
1479 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1477 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1480 ambiguous = False
1478 ambiguous = False
1481 try:
1479 try:
1482 partial = self.index.partialmatch(id)
1480 partial = self.index.partialmatch(id)
1483 if partial and self.hasnode(partial):
1481 if partial and self.hasnode(partial):
1484 if maybewdir:
1482 if maybewdir:
1485 # single 'ff...' match in radix tree, ambiguous with wdir
1483 # single 'ff...' match in radix tree, ambiguous with wdir
1486 ambiguous = True
1484 ambiguous = True
1487 else:
1485 else:
1488 return partial
1486 return partial
1489 elif maybewdir:
1487 elif maybewdir:
1490 # no 'ff...' match in radix tree, wdir identified
1488 # no 'ff...' match in radix tree, wdir identified
1491 raise error.WdirUnsupported
1489 raise error.WdirUnsupported
1492 else:
1490 else:
1493 return None
1491 return None
1494 except error.RevlogError:
1492 except error.RevlogError:
1495 # parsers.c radix tree lookup gave multiple matches
1493 # parsers.c radix tree lookup gave multiple matches
1496 # fast path: for unfiltered changelog, radix tree is accurate
1494 # fast path: for unfiltered changelog, radix tree is accurate
1497 if not getattr(self, 'filteredrevs', None):
1495 if not getattr(self, 'filteredrevs', None):
1498 ambiguous = True
1496 ambiguous = True
1499 # fall through to slow path that filters hidden revisions
1497 # fall through to slow path that filters hidden revisions
1500 except (AttributeError, ValueError):
1498 except (AttributeError, ValueError):
1501 # we are pure python, or key was too short to search radix tree
1499 # we are pure python, or key was too short to search radix tree
1502 pass
1500 pass
1503 if ambiguous:
1501 if ambiguous:
1504 raise error.AmbiguousPrefixLookupError(
1502 raise error.AmbiguousPrefixLookupError(
1505 id, self.display_id, _(b'ambiguous identifier')
1503 id, self.display_id, _(b'ambiguous identifier')
1506 )
1504 )
1507
1505
1508 if id in self._pcache:
1506 if id in self._pcache:
1509 return self._pcache[id]
1507 return self._pcache[id]
1510
1508
1511 if len(id) <= 40:
1509 if len(id) <= 40:
1512 try:
1510 try:
1513 # hex(node)[:...]
1511 # hex(node)[:...]
1514 l = len(id) // 2 # grab an even number of digits
1512 l = len(id) // 2 # grab an even number of digits
1515 prefix = bin(id[: l * 2])
1513 prefix = bin(id[: l * 2])
1516 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1517 nl = [
1515 nl = [
1518 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1519 ]
1517 ]
1520 if self.nodeconstants.nullhex.startswith(id):
1518 if self.nodeconstants.nullhex.startswith(id):
1521 nl.append(self.nullid)
1519 nl.append(self.nullid)
1522 if len(nl) > 0:
1520 if len(nl) > 0:
1523 if len(nl) == 1 and not maybewdir:
1521 if len(nl) == 1 and not maybewdir:
1524 self._pcache[id] = nl[0]
1522 self._pcache[id] = nl[0]
1525 return nl[0]
1523 return nl[0]
1526 raise error.AmbiguousPrefixLookupError(
1524 raise error.AmbiguousPrefixLookupError(
1527 id, self.display_id, _(b'ambiguous identifier')
1525 id, self.display_id, _(b'ambiguous identifier')
1528 )
1526 )
1529 if maybewdir:
1527 if maybewdir:
1530 raise error.WdirUnsupported
1528 raise error.WdirUnsupported
1531 return None
1529 return None
1532 except TypeError:
1530 except TypeError:
1533 pass
1531 pass
1534
1532
1535 def lookup(self, id):
1533 def lookup(self, id):
1536 """locate a node based on:
1534 """locate a node based on:
1537 - revision number or str(revision number)
1535 - revision number or str(revision number)
1538 - nodeid or subset of hex nodeid
1536 - nodeid or subset of hex nodeid
1539 """
1537 """
1540 n = self._match(id)
1538 n = self._match(id)
1541 if n is not None:
1539 if n is not None:
1542 return n
1540 return n
1543 n = self._partialmatch(id)
1541 n = self._partialmatch(id)
1544 if n:
1542 if n:
1545 return n
1543 return n
1546
1544
1547 raise error.LookupError(id, self.display_id, _(b'no match found'))
1545 raise error.LookupError(id, self.display_id, _(b'no match found'))
1548
1546
1549 def shortest(self, node, minlength=1):
1547 def shortest(self, node, minlength=1):
1550 """Find the shortest unambiguous prefix that matches node."""
1548 """Find the shortest unambiguous prefix that matches node."""
1551
1549
1552 def isvalid(prefix):
1550 def isvalid(prefix):
1553 try:
1551 try:
1554 matchednode = self._partialmatch(prefix)
1552 matchednode = self._partialmatch(prefix)
1555 except error.AmbiguousPrefixLookupError:
1553 except error.AmbiguousPrefixLookupError:
1556 return False
1554 return False
1557 except error.WdirUnsupported:
1555 except error.WdirUnsupported:
1558 # single 'ff...' match
1556 # single 'ff...' match
1559 return True
1557 return True
1560 if matchednode is None:
1558 if matchednode is None:
1561 raise error.LookupError(node, self.display_id, _(b'no node'))
1559 raise error.LookupError(node, self.display_id, _(b'no node'))
1562 return True
1560 return True
1563
1561
1564 def maybewdir(prefix):
1562 def maybewdir(prefix):
1565 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1566
1564
1567 hexnode = hex(node)
1565 hexnode = hex(node)
1568
1566
1569 def disambiguate(hexnode, minlength):
1567 def disambiguate(hexnode, minlength):
1570 """Disambiguate against wdirid."""
1568 """Disambiguate against wdirid."""
1571 for length in range(minlength, len(hexnode) + 1):
1569 for length in range(minlength, len(hexnode) + 1):
1572 prefix = hexnode[:length]
1570 prefix = hexnode[:length]
1573 if not maybewdir(prefix):
1571 if not maybewdir(prefix):
1574 return prefix
1572 return prefix
1575
1573
1576 if not getattr(self, 'filteredrevs', None):
1574 if not getattr(self, 'filteredrevs', None):
1577 try:
1575 try:
1578 length = max(self.index.shortest(node), minlength)
1576 length = max(self.index.shortest(node), minlength)
1579 return disambiguate(hexnode, length)
1577 return disambiguate(hexnode, length)
1580 except error.RevlogError:
1578 except error.RevlogError:
1581 if node != self.nodeconstants.wdirid:
1579 if node != self.nodeconstants.wdirid:
1582 raise error.LookupError(
1580 raise error.LookupError(
1583 node, self.display_id, _(b'no node')
1581 node, self.display_id, _(b'no node')
1584 )
1582 )
1585 except AttributeError:
1583 except AttributeError:
1586 # Fall through to pure code
1584 # Fall through to pure code
1587 pass
1585 pass
1588
1586
1589 if node == self.nodeconstants.wdirid:
1587 if node == self.nodeconstants.wdirid:
1590 for length in range(minlength, len(hexnode) + 1):
1588 for length in range(minlength, len(hexnode) + 1):
1591 prefix = hexnode[:length]
1589 prefix = hexnode[:length]
1592 if isvalid(prefix):
1590 if isvalid(prefix):
1593 return prefix
1591 return prefix
1594
1592
1595 for length in range(minlength, len(hexnode) + 1):
1593 for length in range(minlength, len(hexnode) + 1):
1596 prefix = hexnode[:length]
1594 prefix = hexnode[:length]
1597 if isvalid(prefix):
1595 if isvalid(prefix):
1598 return disambiguate(hexnode, length)
1596 return disambiguate(hexnode, length)
1599
1597
1600 def cmp(self, node, text):
1598 def cmp(self, node, text):
1601 """compare text with a given file revision
1599 """compare text with a given file revision
1602
1600
1603 returns True if text is different than what is stored.
1601 returns True if text is different than what is stored.
1604 """
1602 """
1605 p1, p2 = self.parents(node)
1603 p1, p2 = self.parents(node)
1606 return storageutil.hashrevisionsha1(text, p1, p2) != node
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1607
1605
1608 def _getsegmentforrevs(self, startrev, endrev, df=None):
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1609 """Obtain a segment of raw data corresponding to a range of revisions.
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1610
1608
1611 Accepts the start and end revisions and an optional already-open
1609 Accepts the start and end revisions and an optional already-open
1612 file handle to be used for reading. If the file handle is read, its
1610 file handle to be used for reading. If the file handle is read, its
1613 seek position will not be preserved.
1611 seek position will not be preserved.
1614
1612
1615 Requests for data may be satisfied by a cache.
1613 Requests for data may be satisfied by a cache.
1616
1614
1617 Returns a 2-tuple of (offset, data) for the requested range of
1615 Returns a 2-tuple of (offset, data) for the requested range of
1618 revisions. Offset is the integer offset from the beginning of the
1616 revisions. Offset is the integer offset from the beginning of the
1619 revlog and data is a str or buffer of the raw byte data.
1617 revlog and data is a str or buffer of the raw byte data.
1620
1618
1621 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1622 to determine where each revision's data begins and ends.
1620 to determine where each revision's data begins and ends.
1623 """
1621 """
1624 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1625 # (functions are expensive).
1623 # (functions are expensive).
1626 index = self.index
1624 index = self.index
1627 istart = index[startrev]
1625 istart = index[startrev]
1628 start = int(istart[0] >> 16)
1626 start = int(istart[0] >> 16)
1629 if startrev == endrev:
1627 if startrev == endrev:
1630 end = start + istart[1]
1628 end = start + istart[1]
1631 else:
1629 else:
1632 iend = index[endrev]
1630 iend = index[endrev]
1633 end = int(iend[0] >> 16) + iend[1]
1631 end = int(iend[0] >> 16) + iend[1]
1634
1632
1635 if self._inline:
1633 if self._inline:
1636 start += (startrev + 1) * self.index.entry_size
1634 start += (startrev + 1) * self.index.entry_size
1637 end += (endrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1638 length = end - start
1636 length = end - start
1639
1637
1640 return start, self._segmentfile.read_chunk(start, length, df)
1638 return start, self._segmentfile.read_chunk(start, length, df)
1641
1639
1642 def _chunk(self, rev, df=None):
1640 def _chunk(self, rev, df=None):
1643 """Obtain a single decompressed chunk for a revision.
1641 """Obtain a single decompressed chunk for a revision.
1644
1642
1645 Accepts an integer revision and an optional already-open file handle
1643 Accepts an integer revision and an optional already-open file handle
1646 to be used for reading. If used, the seek position of the file will not
1644 to be used for reading. If used, the seek position of the file will not
1647 be preserved.
1645 be preserved.
1648
1646
1649 Returns a str holding uncompressed data for the requested revision.
1647 Returns a str holding uncompressed data for the requested revision.
1650 """
1648 """
1651 compression_mode = self.index[rev][10]
1649 compression_mode = self.index[rev][10]
1652 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1653 if compression_mode == COMP_MODE_PLAIN:
1651 if compression_mode == COMP_MODE_PLAIN:
1654 return data
1652 return data
1655 elif compression_mode == COMP_MODE_DEFAULT:
1653 elif compression_mode == COMP_MODE_DEFAULT:
1656 return self._decompressor(data)
1654 return self._decompressor(data)
1657 elif compression_mode == COMP_MODE_INLINE:
1655 elif compression_mode == COMP_MODE_INLINE:
1658 return self.decompress(data)
1656 return self.decompress(data)
1659 else:
1657 else:
1660 msg = b'unknown compression mode %d'
1658 msg = b'unknown compression mode %d'
1661 msg %= compression_mode
1659 msg %= compression_mode
1662 raise error.RevlogError(msg)
1660 raise error.RevlogError(msg)
1663
1661
1664 def _chunks(self, revs, df=None, targetsize=None):
1662 def _chunks(self, revs, df=None, targetsize=None):
1665 """Obtain decompressed chunks for the specified revisions.
1663 """Obtain decompressed chunks for the specified revisions.
1666
1664
1667 Accepts an iterable of numeric revisions that are assumed to be in
1665 Accepts an iterable of numeric revisions that are assumed to be in
1668 ascending order. Also accepts an optional already-open file handle
1666 ascending order. Also accepts an optional already-open file handle
1669 to be used for reading. If used, the seek position of the file will
1667 to be used for reading. If used, the seek position of the file will
1670 not be preserved.
1668 not be preserved.
1671
1669
1672 This function is similar to calling ``self._chunk()`` multiple times,
1670 This function is similar to calling ``self._chunk()`` multiple times,
1673 but is faster.
1671 but is faster.
1674
1672
1675 Returns a list with decompressed data for each requested revision.
1673 Returns a list with decompressed data for each requested revision.
1676 """
1674 """
1677 if not revs:
1675 if not revs:
1678 return []
1676 return []
1679 start = self.start
1677 start = self.start
1680 length = self.length
1678 length = self.length
1681 inline = self._inline
1679 inline = self._inline
1682 iosize = self.index.entry_size
1680 iosize = self.index.entry_size
1683 buffer = util.buffer
1681 buffer = util.buffer
1684
1682
1685 l = []
1683 l = []
1686 ladd = l.append
1684 ladd = l.append
1687
1685
1688 if not self._withsparseread:
1686 if not self._withsparseread:
1689 slicedchunks = (revs,)
1687 slicedchunks = (revs,)
1690 else:
1688 else:
1691 slicedchunks = deltautil.slicechunk(
1689 slicedchunks = deltautil.slicechunk(
1692 self, revs, targetsize=targetsize
1690 self, revs, targetsize=targetsize
1693 )
1691 )
1694
1692
1695 for revschunk in slicedchunks:
1693 for revschunk in slicedchunks:
1696 firstrev = revschunk[0]
1694 firstrev = revschunk[0]
1697 # Skip trailing revisions with empty diff
1695 # Skip trailing revisions with empty diff
1698 for lastrev in revschunk[::-1]:
1696 for lastrev in revschunk[::-1]:
1699 if length(lastrev) != 0:
1697 if length(lastrev) != 0:
1700 break
1698 break
1701
1699
1702 try:
1700 try:
1703 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1701 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1704 except OverflowError:
1702 except OverflowError:
1705 # issue4215 - we can't cache a run of chunks greater than
1703 # issue4215 - we can't cache a run of chunks greater than
1706 # 2G on Windows
1704 # 2G on Windows
1707 return [self._chunk(rev, df=df) for rev in revschunk]
1705 return [self._chunk(rev, df=df) for rev in revschunk]
1708
1706
1709 decomp = self.decompress
1707 decomp = self.decompress
1710 # self._decompressor might be None, but will not be used in that case
1708 # self._decompressor might be None, but will not be used in that case
1711 def_decomp = self._decompressor
1709 def_decomp = self._decompressor
1712 for rev in revschunk:
1710 for rev in revschunk:
1713 chunkstart = start(rev)
1711 chunkstart = start(rev)
1714 if inline:
1712 if inline:
1715 chunkstart += (rev + 1) * iosize
1713 chunkstart += (rev + 1) * iosize
1716 chunklength = length(rev)
1714 chunklength = length(rev)
1717 comp_mode = self.index[rev][10]
1715 comp_mode = self.index[rev][10]
1718 c = buffer(data, chunkstart - offset, chunklength)
1716 c = buffer(data, chunkstart - offset, chunklength)
1719 if comp_mode == COMP_MODE_PLAIN:
1717 if comp_mode == COMP_MODE_PLAIN:
1720 ladd(c)
1718 ladd(c)
1721 elif comp_mode == COMP_MODE_INLINE:
1719 elif comp_mode == COMP_MODE_INLINE:
1722 ladd(decomp(c))
1720 ladd(decomp(c))
1723 elif comp_mode == COMP_MODE_DEFAULT:
1721 elif comp_mode == COMP_MODE_DEFAULT:
1724 ladd(def_decomp(c))
1722 ladd(def_decomp(c))
1725 else:
1723 else:
1726 msg = b'unknown compression mode %d'
1724 msg = b'unknown compression mode %d'
1727 msg %= comp_mode
1725 msg %= comp_mode
1728 raise error.RevlogError(msg)
1726 raise error.RevlogError(msg)
1729
1727
1730 return l
1728 return l
1731
1729
1732 def deltaparent(self, rev):
1730 def deltaparent(self, rev):
1733 """return deltaparent of the given revision"""
1731 """return deltaparent of the given revision"""
1734 base = self.index[rev][3]
1732 base = self.index[rev][3]
1735 if base == rev:
1733 if base == rev:
1736 return nullrev
1734 return nullrev
1737 elif self._generaldelta:
1735 elif self._generaldelta:
1738 return base
1736 return base
1739 else:
1737 else:
1740 return rev - 1
1738 return rev - 1
1741
1739
1742 def issnapshot(self, rev):
1740 def issnapshot(self, rev):
1743 """tells whether rev is a snapshot"""
1741 """tells whether rev is a snapshot"""
1744 if not self._sparserevlog:
1742 if not self._sparserevlog:
1745 return self.deltaparent(rev) == nullrev
1743 return self.deltaparent(rev) == nullrev
1746 elif util.safehasattr(self.index, b'issnapshot'):
1744 elif util.safehasattr(self.index, b'issnapshot'):
1747 # directly assign the method to cache the testing and access
1745 # directly assign the method to cache the testing and access
1748 self.issnapshot = self.index.issnapshot
1746 self.issnapshot = self.index.issnapshot
1749 return self.issnapshot(rev)
1747 return self.issnapshot(rev)
1750 if rev == nullrev:
1748 if rev == nullrev:
1751 return True
1749 return True
1752 entry = self.index[rev]
1750 entry = self.index[rev]
1753 base = entry[3]
1751 base = entry[3]
1754 if base == rev:
1752 if base == rev:
1755 return True
1753 return True
1756 if base == nullrev:
1754 if base == nullrev:
1757 return True
1755 return True
1758 p1 = entry[5]
1756 p1 = entry[5]
1759 p2 = entry[6]
1757 p2 = entry[6]
1760 if base == p1 or base == p2:
1758 if base == p1 or base == p2:
1761 return False
1759 return False
1762 return self.issnapshot(base)
1760 return self.issnapshot(base)
1763
1761
1764 def snapshotdepth(self, rev):
1762 def snapshotdepth(self, rev):
1765 """number of snapshot in the chain before this one"""
1763 """number of snapshot in the chain before this one"""
1766 if not self.issnapshot(rev):
1764 if not self.issnapshot(rev):
1767 raise error.ProgrammingError(b'revision %d not a snapshot')
1765 raise error.ProgrammingError(b'revision %d not a snapshot')
1768 return len(self._deltachain(rev)[0]) - 1
1766 return len(self._deltachain(rev)[0]) - 1
1769
1767
1770 def revdiff(self, rev1, rev2):
1768 def revdiff(self, rev1, rev2):
1771 """return or calculate a delta between two revisions
1769 """return or calculate a delta between two revisions
1772
1770
1773 The delta calculated is in binary form and is intended to be written to
1771 The delta calculated is in binary form and is intended to be written to
1774 revlog data directly. So this function needs raw revision data.
1772 revlog data directly. So this function needs raw revision data.
1775 """
1773 """
1776 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1774 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1777 return bytes(self._chunk(rev2))
1775 return bytes(self._chunk(rev2))
1778
1776
1779 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1777 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1780
1778
1781 def revision(self, nodeorrev, _df=None):
1779 def revision(self, nodeorrev, _df=None):
1782 """return an uncompressed revision of a given node or revision
1780 """return an uncompressed revision of a given node or revision
1783 number.
1781 number.
1784
1782
1785 _df - an existing file handle to read from. (internal-only)
1783 _df - an existing file handle to read from. (internal-only)
1786 """
1784 """
1787 return self._revisiondata(nodeorrev, _df)
1785 return self._revisiondata(nodeorrev, _df)
1788
1786
1789 def sidedata(self, nodeorrev, _df=None):
1787 def sidedata(self, nodeorrev, _df=None):
1790 """a map of extra data related to the changeset but not part of the hash
1788 """a map of extra data related to the changeset but not part of the hash
1791
1789
1792 This function currently return a dictionary. However, more advanced
1790 This function currently return a dictionary. However, more advanced
1793 mapping object will likely be used in the future for a more
1791 mapping object will likely be used in the future for a more
1794 efficient/lazy code.
1792 efficient/lazy code.
1795 """
1793 """
1796 # deal with <nodeorrev> argument type
1794 # deal with <nodeorrev> argument type
1797 if isinstance(nodeorrev, int):
1795 if isinstance(nodeorrev, int):
1798 rev = nodeorrev
1796 rev = nodeorrev
1799 else:
1797 else:
1800 rev = self.rev(nodeorrev)
1798 rev = self.rev(nodeorrev)
1801 return self._sidedata(rev)
1799 return self._sidedata(rev)
1802
1800
1803 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1801 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1804 # deal with <nodeorrev> argument type
1802 # deal with <nodeorrev> argument type
1805 if isinstance(nodeorrev, int):
1803 if isinstance(nodeorrev, int):
1806 rev = nodeorrev
1804 rev = nodeorrev
1807 node = self.node(rev)
1805 node = self.node(rev)
1808 else:
1806 else:
1809 node = nodeorrev
1807 node = nodeorrev
1810 rev = None
1808 rev = None
1811
1809
1812 # fast path the special `nullid` rev
1810 # fast path the special `nullid` rev
1813 if node == self.nullid:
1811 if node == self.nullid:
1814 return b""
1812 return b""
1815
1813
1816 # ``rawtext`` is the text as stored inside the revlog. Might be the
1814 # ``rawtext`` is the text as stored inside the revlog. Might be the
1817 # revision or might need to be processed to retrieve the revision.
1815 # revision or might need to be processed to retrieve the revision.
1818 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1816 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1819
1817
1820 if raw and validated:
1818 if raw and validated:
1821 # if we don't want to process the raw text and that raw
1819 # if we don't want to process the raw text and that raw
1822 # text is cached, we can exit early.
1820 # text is cached, we can exit early.
1823 return rawtext
1821 return rawtext
1824 if rev is None:
1822 if rev is None:
1825 rev = self.rev(node)
1823 rev = self.rev(node)
1826 # the revlog's flag for this revision
1824 # the revlog's flag for this revision
1827 # (usually alter its state or content)
1825 # (usually alter its state or content)
1828 flags = self.flags(rev)
1826 flags = self.flags(rev)
1829
1827
1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1828 if validated and flags == REVIDX_DEFAULT_FLAGS:
1831 # no extra flags set, no flag processor runs, text = rawtext
1829 # no extra flags set, no flag processor runs, text = rawtext
1832 return rawtext
1830 return rawtext
1833
1831
1834 if raw:
1832 if raw:
1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1833 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1836 text = rawtext
1834 text = rawtext
1837 else:
1835 else:
1838 r = flagutil.processflagsread(self, rawtext, flags)
1836 r = flagutil.processflagsread(self, rawtext, flags)
1839 text, validatehash = r
1837 text, validatehash = r
1840 if validatehash:
1838 if validatehash:
1841 self.checkhash(text, node, rev=rev)
1839 self.checkhash(text, node, rev=rev)
1842 if not validated:
1840 if not validated:
1843 self._revisioncache = (node, rev, rawtext)
1841 self._revisioncache = (node, rev, rawtext)
1844
1842
1845 return text
1843 return text
1846
1844
1847 def _rawtext(self, node, rev, _df=None):
1845 def _rawtext(self, node, rev, _df=None):
1848 """return the possibly unvalidated rawtext for a revision
1846 """return the possibly unvalidated rawtext for a revision
1849
1847
1850 returns (rev, rawtext, validated)
1848 returns (rev, rawtext, validated)
1851 """
1849 """
1852
1850
1853 # revision in the cache (could be useful to apply delta)
1851 # revision in the cache (could be useful to apply delta)
1854 cachedrev = None
1852 cachedrev = None
1855 # An intermediate text to apply deltas to
1853 # An intermediate text to apply deltas to
1856 basetext = None
1854 basetext = None
1857
1855
1858 # Check if we have the entry in cache
1856 # Check if we have the entry in cache
1859 # The cache entry looks like (node, rev, rawtext)
1857 # The cache entry looks like (node, rev, rawtext)
1860 if self._revisioncache:
1858 if self._revisioncache:
1861 if self._revisioncache[0] == node:
1859 if self._revisioncache[0] == node:
1862 return (rev, self._revisioncache[2], True)
1860 return (rev, self._revisioncache[2], True)
1863 cachedrev = self._revisioncache[1]
1861 cachedrev = self._revisioncache[1]
1864
1862
1865 if rev is None:
1863 if rev is None:
1866 rev = self.rev(node)
1864 rev = self.rev(node)
1867
1865
1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1866 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1869 if stopped:
1867 if stopped:
1870 basetext = self._revisioncache[2]
1868 basetext = self._revisioncache[2]
1871
1869
1872 # drop cache to save memory, the caller is expected to
1870 # drop cache to save memory, the caller is expected to
1873 # update self._revisioncache after validating the text
1871 # update self._revisioncache after validating the text
1874 self._revisioncache = None
1872 self._revisioncache = None
1875
1873
1876 targetsize = None
1874 targetsize = None
1877 rawsize = self.index[rev][2]
1875 rawsize = self.index[rev][2]
1878 if 0 <= rawsize:
1876 if 0 <= rawsize:
1879 targetsize = 4 * rawsize
1877 targetsize = 4 * rawsize
1880
1878
1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1879 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1882 if basetext is None:
1880 if basetext is None:
1883 basetext = bytes(bins[0])
1881 basetext = bytes(bins[0])
1884 bins = bins[1:]
1882 bins = bins[1:]
1885
1883
1886 rawtext = mdiff.patches(basetext, bins)
1884 rawtext = mdiff.patches(basetext, bins)
1887 del basetext # let us have a chance to free memory early
1885 del basetext # let us have a chance to free memory early
1888 return (rev, rawtext, False)
1886 return (rev, rawtext, False)
1889
1887
1890 def _sidedata(self, rev):
1888 def _sidedata(self, rev):
1891 """Return the sidedata for a given revision number."""
1889 """Return the sidedata for a given revision number."""
1892 index_entry = self.index[rev]
1890 index_entry = self.index[rev]
1893 sidedata_offset = index_entry[8]
1891 sidedata_offset = index_entry[8]
1894 sidedata_size = index_entry[9]
1892 sidedata_size = index_entry[9]
1895
1893
1896 if self._inline:
1894 if self._inline:
1897 sidedata_offset += self.index.entry_size * (1 + rev)
1895 sidedata_offset += self.index.entry_size * (1 + rev)
1898 if sidedata_size == 0:
1896 if sidedata_size == 0:
1899 return {}
1897 return {}
1900
1898
1901 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1899 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1902 filename = self._sidedatafile
1900 filename = self._sidedatafile
1903 end = self._docket.sidedata_end
1901 end = self._docket.sidedata_end
1904 offset = sidedata_offset
1902 offset = sidedata_offset
1905 length = sidedata_size
1903 length = sidedata_size
1906 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1904 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1907 raise error.RevlogError(m)
1905 raise error.RevlogError(m)
1908
1906
1909 comp_segment = self._segmentfile_sidedata.read_chunk(
1907 comp_segment = self._segmentfile_sidedata.read_chunk(
1910 sidedata_offset, sidedata_size
1908 sidedata_offset, sidedata_size
1911 )
1909 )
1912
1910
1913 comp = self.index[rev][11]
1911 comp = self.index[rev][11]
1914 if comp == COMP_MODE_PLAIN:
1912 if comp == COMP_MODE_PLAIN:
1915 segment = comp_segment
1913 segment = comp_segment
1916 elif comp == COMP_MODE_DEFAULT:
1914 elif comp == COMP_MODE_DEFAULT:
1917 segment = self._decompressor(comp_segment)
1915 segment = self._decompressor(comp_segment)
1918 elif comp == COMP_MODE_INLINE:
1916 elif comp == COMP_MODE_INLINE:
1919 segment = self.decompress(comp_segment)
1917 segment = self.decompress(comp_segment)
1920 else:
1918 else:
1921 msg = b'unknown compression mode %d'
1919 msg = b'unknown compression mode %d'
1922 msg %= comp
1920 msg %= comp
1923 raise error.RevlogError(msg)
1921 raise error.RevlogError(msg)
1924
1922
1925 sidedata = sidedatautil.deserialize_sidedata(segment)
1923 sidedata = sidedatautil.deserialize_sidedata(segment)
1926 return sidedata
1924 return sidedata
1927
1925
1928 def rawdata(self, nodeorrev, _df=None):
1926 def rawdata(self, nodeorrev, _df=None):
1929 """return an uncompressed raw data of a given node or revision number.
1927 """return an uncompressed raw data of a given node or revision number.
1930
1928
1931 _df - an existing file handle to read from. (internal-only)
1929 _df - an existing file handle to read from. (internal-only)
1932 """
1930 """
1933 return self._revisiondata(nodeorrev, _df, raw=True)
1931 return self._revisiondata(nodeorrev, _df, raw=True)
1934
1932
1935 def hash(self, text, p1, p2):
1933 def hash(self, text, p1, p2):
1936 """Compute a node hash.
1934 """Compute a node hash.
1937
1935
1938 Available as a function so that subclasses can replace the hash
1936 Available as a function so that subclasses can replace the hash
1939 as needed.
1937 as needed.
1940 """
1938 """
1941 return storageutil.hashrevisionsha1(text, p1, p2)
1939 return storageutil.hashrevisionsha1(text, p1, p2)
1942
1940
1943 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1941 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1944 """Check node hash integrity.
1942 """Check node hash integrity.
1945
1943
1946 Available as a function so that subclasses can extend hash mismatch
1944 Available as a function so that subclasses can extend hash mismatch
1947 behaviors as needed.
1945 behaviors as needed.
1948 """
1946 """
1949 try:
1947 try:
1950 if p1 is None and p2 is None:
1948 if p1 is None and p2 is None:
1951 p1, p2 = self.parents(node)
1949 p1, p2 = self.parents(node)
1952 if node != self.hash(text, p1, p2):
1950 if node != self.hash(text, p1, p2):
1953 # Clear the revision cache on hash failure. The revision cache
1951 # Clear the revision cache on hash failure. The revision cache
1954 # only stores the raw revision and clearing the cache does have
1952 # only stores the raw revision and clearing the cache does have
1955 # the side-effect that we won't have a cache hit when the raw
1953 # the side-effect that we won't have a cache hit when the raw
1956 # revision data is accessed. But this case should be rare and
1954 # revision data is accessed. But this case should be rare and
1957 # it is extra work to teach the cache about the hash
1955 # it is extra work to teach the cache about the hash
1958 # verification state.
1956 # verification state.
1959 if self._revisioncache and self._revisioncache[0] == node:
1957 if self._revisioncache and self._revisioncache[0] == node:
1960 self._revisioncache = None
1958 self._revisioncache = None
1961
1959
1962 revornode = rev
1960 revornode = rev
1963 if revornode is None:
1961 if revornode is None:
1964 revornode = templatefilters.short(hex(node))
1962 revornode = templatefilters.short(hex(node))
1965 raise error.RevlogError(
1963 raise error.RevlogError(
1966 _(b"integrity check failed on %s:%s")
1964 _(b"integrity check failed on %s:%s")
1967 % (self.display_id, pycompat.bytestr(revornode))
1965 % (self.display_id, pycompat.bytestr(revornode))
1968 )
1966 )
1969 except error.RevlogError:
1967 except error.RevlogError:
1970 if self._censorable and storageutil.iscensoredtext(text):
1968 if self._censorable and storageutil.iscensoredtext(text):
1971 raise error.CensoredNodeError(self.display_id, node, text)
1969 raise error.CensoredNodeError(self.display_id, node, text)
1972 raise
1970 raise
1973
1971
1974 def _enforceinlinesize(self, tr):
1972 def _enforceinlinesize(self, tr):
1975 """Check if the revlog is too big for inline and convert if so.
1973 """Check if the revlog is too big for inline and convert if so.
1976
1974
1977 This should be called after revisions are added to the revlog. If the
1975 This should be called after revisions are added to the revlog. If the
1978 revlog has grown too large to be an inline revlog, it will convert it
1976 revlog has grown too large to be an inline revlog, it will convert it
1979 to use multiple index and data files.
1977 to use multiple index and data files.
1980 """
1978 """
1981 tiprev = len(self) - 1
1979 tiprev = len(self) - 1
1982 total_size = self.start(tiprev) + self.length(tiprev)
1980 total_size = self.start(tiprev) + self.length(tiprev)
1983 if not self._inline or total_size < _maxinline:
1981 if not self._inline or total_size < _maxinline:
1984 return
1982 return
1985
1983
1986 troffset = tr.findoffset(self._indexfile)
1984 troffset = tr.findoffset(self._indexfile)
1987 if troffset is None:
1985 if troffset is None:
1988 raise error.RevlogError(
1986 raise error.RevlogError(
1989 _(b"%s not found in the transaction") % self._indexfile
1987 _(b"%s not found in the transaction") % self._indexfile
1990 )
1988 )
1991 trindex = None
1989 trindex = None
1992 tr.add(self._datafile, 0)
1990 tr.add(self._datafile, 0)
1993
1991
1994 existing_handles = False
1992 existing_handles = False
1995 if self._writinghandles is not None:
1993 if self._writinghandles is not None:
1996 existing_handles = True
1994 existing_handles = True
1997 fp = self._writinghandles[0]
1995 fp = self._writinghandles[0]
1998 fp.flush()
1996 fp.flush()
1999 fp.close()
1997 fp.close()
2000 # We can't use the cached file handle after close(). So prevent
1998 # We can't use the cached file handle after close(). So prevent
2001 # its usage.
1999 # its usage.
2002 self._writinghandles = None
2000 self._writinghandles = None
2003 self._segmentfile.writing_handle = None
2001 self._segmentfile.writing_handle = None
2004 # No need to deal with sidedata writing handle as it is only
2002 # No need to deal with sidedata writing handle as it is only
2005 # relevant with revlog-v2 which is never inline, not reaching
2003 # relevant with revlog-v2 which is never inline, not reaching
2006 # this code
2004 # this code
2007
2005
2008 new_dfh = self._datafp(b'w+')
2006 new_dfh = self._datafp(b'w+')
2009 new_dfh.truncate(0) # drop any potentially existing data
2007 new_dfh.truncate(0) # drop any potentially existing data
2010 try:
2008 try:
2011 with self._indexfp() as read_ifh:
2009 with self._indexfp() as read_ifh:
2012 for r in self:
2010 for r in self:
2013 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2011 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2014 if (
2012 if (
2015 trindex is None
2013 trindex is None
2016 and troffset
2014 and troffset
2017 <= self.start(r) + r * self.index.entry_size
2015 <= self.start(r) + r * self.index.entry_size
2018 ):
2016 ):
2019 trindex = r
2017 trindex = r
2020 new_dfh.flush()
2018 new_dfh.flush()
2021
2019
2022 if trindex is None:
2020 if trindex is None:
2023 trindex = 0
2021 trindex = 0
2024
2022
2025 with self.__index_new_fp() as fp:
2023 with self.__index_new_fp() as fp:
2026 self._format_flags &= ~FLAG_INLINE_DATA
2024 self._format_flags &= ~FLAG_INLINE_DATA
2027 self._inline = False
2025 self._inline = False
2028 for i in self:
2026 for i in self:
2029 e = self.index.entry_binary(i)
2027 e = self.index.entry_binary(i)
2030 if i == 0 and self._docket is None:
2028 if i == 0 and self._docket is None:
2031 header = self._format_flags | self._format_version
2029 header = self._format_flags | self._format_version
2032 header = self.index.pack_header(header)
2030 header = self.index.pack_header(header)
2033 e = header + e
2031 e = header + e
2034 fp.write(e)
2032 fp.write(e)
2035 if self._docket is not None:
2033 if self._docket is not None:
2036 self._docket.index_end = fp.tell()
2034 self._docket.index_end = fp.tell()
2037
2035
2038 # There is a small transactional race here. If the rename of
2036 # There is a small transactional race here. If the rename of
2039 # the index fails, we should remove the datafile. It is more
2037 # the index fails, we should remove the datafile. It is more
2040 # important to ensure that the data file is not truncated
2038 # important to ensure that the data file is not truncated
2041 # when the index is replaced as otherwise data is lost.
2039 # when the index is replaced as otherwise data is lost.
2042 tr.replace(self._datafile, self.start(trindex))
2040 tr.replace(self._datafile, self.start(trindex))
2043
2041
2044 # the temp file replace the real index when we exit the context
2042 # the temp file replace the real index when we exit the context
2045 # manager
2043 # manager
2046
2044
2047 tr.replace(self._indexfile, trindex * self.index.entry_size)
2045 tr.replace(self._indexfile, trindex * self.index.entry_size)
2048 nodemaputil.setup_persistent_nodemap(tr, self)
2046 nodemaputil.setup_persistent_nodemap(tr, self)
2049 self._segmentfile = randomaccessfile.randomaccessfile(
2047 self._segmentfile = randomaccessfile.randomaccessfile(
2050 self.opener,
2048 self.opener,
2051 self._datafile,
2049 self._datafile,
2052 self._chunkcachesize,
2050 self._chunkcachesize,
2053 )
2051 )
2054
2052
2055 if existing_handles:
2053 if existing_handles:
2056 # switched from inline to conventional reopen the index
2054 # switched from inline to conventional reopen the index
2057 ifh = self.__index_write_fp()
2055 ifh = self.__index_write_fp()
2058 self._writinghandles = (ifh, new_dfh, None)
2056 self._writinghandles = (ifh, new_dfh, None)
2059 self._segmentfile.writing_handle = new_dfh
2057 self._segmentfile.writing_handle = new_dfh
2060 new_dfh = None
2058 new_dfh = None
2061 # No need to deal with sidedata writing handle as it is only
2059 # No need to deal with sidedata writing handle as it is only
2062 # relevant with revlog-v2 which is never inline, not reaching
2060 # relevant with revlog-v2 which is never inline, not reaching
2063 # this code
2061 # this code
2064 finally:
2062 finally:
2065 if new_dfh is not None:
2063 if new_dfh is not None:
2066 new_dfh.close()
2064 new_dfh.close()
2067
2065
2068 def _nodeduplicatecallback(self, transaction, node):
2066 def _nodeduplicatecallback(self, transaction, node):
2069 """called when trying to add a node already stored."""
2067 """called when trying to add a node already stored."""
2070
2068
2071 @contextlib.contextmanager
2069 @contextlib.contextmanager
2072 def reading(self):
2070 def reading(self):
2073 """Context manager that keeps data and sidedata files open for reading"""
2071 """Context manager that keeps data and sidedata files open for reading"""
2074 with self._segmentfile.reading():
2072 with self._segmentfile.reading():
2075 with self._segmentfile_sidedata.reading():
2073 with self._segmentfile_sidedata.reading():
2076 yield
2074 yield
2077
2075
2078 @contextlib.contextmanager
2076 @contextlib.contextmanager
2079 def _writing(self, transaction):
2077 def _writing(self, transaction):
2080 if self._trypending:
2078 if self._trypending:
2081 msg = b'try to write in a `trypending` revlog: %s'
2079 msg = b'try to write in a `trypending` revlog: %s'
2082 msg %= self.display_id
2080 msg %= self.display_id
2083 raise error.ProgrammingError(msg)
2081 raise error.ProgrammingError(msg)
2084 if self._writinghandles is not None:
2082 if self._writinghandles is not None:
2085 yield
2083 yield
2086 else:
2084 else:
2087 ifh = dfh = sdfh = None
2085 ifh = dfh = sdfh = None
2088 try:
2086 try:
2089 r = len(self)
2087 r = len(self)
2090 # opening the data file.
2088 # opening the data file.
2091 dsize = 0
2089 dsize = 0
2092 if r:
2090 if r:
2093 dsize = self.end(r - 1)
2091 dsize = self.end(r - 1)
2094 dfh = None
2092 dfh = None
2095 if not self._inline:
2093 if not self._inline:
2096 try:
2094 try:
2097 dfh = self._datafp(b"r+")
2095 dfh = self._datafp(b"r+")
2098 if self._docket is None:
2096 if self._docket is None:
2099 dfh.seek(0, os.SEEK_END)
2097 dfh.seek(0, os.SEEK_END)
2100 else:
2098 else:
2101 dfh.seek(self._docket.data_end, os.SEEK_SET)
2099 dfh.seek(self._docket.data_end, os.SEEK_SET)
2102 except IOError as inst:
2100 except IOError as inst:
2103 if inst.errno != errno.ENOENT:
2101 if inst.errno != errno.ENOENT:
2104 raise
2102 raise
2105 dfh = self._datafp(b"w+")
2103 dfh = self._datafp(b"w+")
2106 transaction.add(self._datafile, dsize)
2104 transaction.add(self._datafile, dsize)
2107 if self._sidedatafile is not None:
2105 if self._sidedatafile is not None:
2108 # revlog-v2 does not inline, help Pytype
2106 # revlog-v2 does not inline, help Pytype
2109 assert dfh is not None
2107 assert dfh is not None
2110 try:
2108 try:
2111 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2109 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2112 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2110 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2113 except IOError as inst:
2111 except IOError as inst:
2114 if inst.errno != errno.ENOENT:
2112 if inst.errno != errno.ENOENT:
2115 raise
2113 raise
2116 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2114 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2117 transaction.add(
2115 transaction.add(
2118 self._sidedatafile, self._docket.sidedata_end
2116 self._sidedatafile, self._docket.sidedata_end
2119 )
2117 )
2120
2118
2121 # opening the index file.
2119 # opening the index file.
2122 isize = r * self.index.entry_size
2120 isize = r * self.index.entry_size
2123 ifh = self.__index_write_fp()
2121 ifh = self.__index_write_fp()
2124 if self._inline:
2122 if self._inline:
2125 transaction.add(self._indexfile, dsize + isize)
2123 transaction.add(self._indexfile, dsize + isize)
2126 else:
2124 else:
2127 transaction.add(self._indexfile, isize)
2125 transaction.add(self._indexfile, isize)
2128 # exposing all file handle for writing.
2126 # exposing all file handle for writing.
2129 self._writinghandles = (ifh, dfh, sdfh)
2127 self._writinghandles = (ifh, dfh, sdfh)
2130 self._segmentfile.writing_handle = ifh if self._inline else dfh
2128 self._segmentfile.writing_handle = ifh if self._inline else dfh
2131 self._segmentfile_sidedata.writing_handle = sdfh
2129 self._segmentfile_sidedata.writing_handle = sdfh
2132 yield
2130 yield
2133 if self._docket is not None:
2131 if self._docket is not None:
2134 self._write_docket(transaction)
2132 self._write_docket(transaction)
2135 finally:
2133 finally:
2136 self._writinghandles = None
2134 self._writinghandles = None
2137 self._segmentfile.writing_handle = None
2135 self._segmentfile.writing_handle = None
2138 self._segmentfile_sidedata.writing_handle = None
2136 self._segmentfile_sidedata.writing_handle = None
2139 if dfh is not None:
2137 if dfh is not None:
2140 dfh.close()
2138 dfh.close()
2141 if sdfh is not None:
2139 if sdfh is not None:
2142 sdfh.close()
2140 sdfh.close()
2143 # closing the index file last to avoid exposing referent to
2141 # closing the index file last to avoid exposing referent to
2144 # potential unflushed data content.
2142 # potential unflushed data content.
2145 if ifh is not None:
2143 if ifh is not None:
2146 ifh.close()
2144 ifh.close()
2147
2145
2148 def _write_docket(self, transaction):
2146 def _write_docket(self, transaction):
2149 """write the current docket on disk
2147 """write the current docket on disk
2150
2148
2151 Exist as a method to help changelog to implement transaction logic
2149 Exist as a method to help changelog to implement transaction logic
2152
2150
2153 We could also imagine using the same transaction logic for all revlog
2151 We could also imagine using the same transaction logic for all revlog
2154 since docket are cheap."""
2152 since docket are cheap."""
2155 self._docket.write(transaction)
2153 self._docket.write(transaction)
2156
2154
2157 def addrevision(
2155 def addrevision(
2158 self,
2156 self,
2159 text,
2157 text,
2160 transaction,
2158 transaction,
2161 link,
2159 link,
2162 p1,
2160 p1,
2163 p2,
2161 p2,
2164 cachedelta=None,
2162 cachedelta=None,
2165 node=None,
2163 node=None,
2166 flags=REVIDX_DEFAULT_FLAGS,
2164 flags=REVIDX_DEFAULT_FLAGS,
2167 deltacomputer=None,
2165 deltacomputer=None,
2168 sidedata=None,
2166 sidedata=None,
2169 ):
2167 ):
2170 """add a revision to the log
2168 """add a revision to the log
2171
2169
2172 text - the revision data to add
2170 text - the revision data to add
2173 transaction - the transaction object used for rollback
2171 transaction - the transaction object used for rollback
2174 link - the linkrev data to add
2172 link - the linkrev data to add
2175 p1, p2 - the parent nodeids of the revision
2173 p1, p2 - the parent nodeids of the revision
2176 cachedelta - an optional precomputed delta
2174 cachedelta - an optional precomputed delta
2177 node - nodeid of revision; typically node is not specified, and it is
2175 node - nodeid of revision; typically node is not specified, and it is
2178 computed by default as hash(text, p1, p2), however subclasses might
2176 computed by default as hash(text, p1, p2), however subclasses might
2179 use different hashing method (and override checkhash() in such case)
2177 use different hashing method (and override checkhash() in such case)
2180 flags - the known flags to set on the revision
2178 flags - the known flags to set on the revision
2181 deltacomputer - an optional deltacomputer instance shared between
2179 deltacomputer - an optional deltacomputer instance shared between
2182 multiple calls
2180 multiple calls
2183 """
2181 """
2184 if link == nullrev:
2182 if link == nullrev:
2185 raise error.RevlogError(
2183 raise error.RevlogError(
2186 _(b"attempted to add linkrev -1 to %s") % self.display_id
2184 _(b"attempted to add linkrev -1 to %s") % self.display_id
2187 )
2185 )
2188
2186
2189 if sidedata is None:
2187 if sidedata is None:
2190 sidedata = {}
2188 sidedata = {}
2191 elif sidedata and not self.hassidedata:
2189 elif sidedata and not self.hassidedata:
2192 raise error.ProgrammingError(
2190 raise error.ProgrammingError(
2193 _(b"trying to add sidedata to a revlog who don't support them")
2191 _(b"trying to add sidedata to a revlog who don't support them")
2194 )
2192 )
2195
2193
2196 if flags:
2194 if flags:
2197 node = node or self.hash(text, p1, p2)
2195 node = node or self.hash(text, p1, p2)
2198
2196
2199 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2197 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2200
2198
2201 # If the flag processor modifies the revision data, ignore any provided
2199 # If the flag processor modifies the revision data, ignore any provided
2202 # cachedelta.
2200 # cachedelta.
2203 if rawtext != text:
2201 if rawtext != text:
2204 cachedelta = None
2202 cachedelta = None
2205
2203
2206 if len(rawtext) > _maxentrysize:
2204 if len(rawtext) > _maxentrysize:
2207 raise error.RevlogError(
2205 raise error.RevlogError(
2208 _(
2206 _(
2209 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2207 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2210 )
2208 )
2211 % (self.display_id, len(rawtext))
2209 % (self.display_id, len(rawtext))
2212 )
2210 )
2213
2211
2214 node = node or self.hash(rawtext, p1, p2)
2212 node = node or self.hash(rawtext, p1, p2)
2215 rev = self.index.get_rev(node)
2213 rev = self.index.get_rev(node)
2216 if rev is not None:
2214 if rev is not None:
2217 return rev
2215 return rev
2218
2216
2219 if validatehash:
2217 if validatehash:
2220 self.checkhash(rawtext, node, p1=p1, p2=p2)
2218 self.checkhash(rawtext, node, p1=p1, p2=p2)
2221
2219
2222 return self.addrawrevision(
2220 return self.addrawrevision(
2223 rawtext,
2221 rawtext,
2224 transaction,
2222 transaction,
2225 link,
2223 link,
2226 p1,
2224 p1,
2227 p2,
2225 p2,
2228 node,
2226 node,
2229 flags,
2227 flags,
2230 cachedelta=cachedelta,
2228 cachedelta=cachedelta,
2231 deltacomputer=deltacomputer,
2229 deltacomputer=deltacomputer,
2232 sidedata=sidedata,
2230 sidedata=sidedata,
2233 )
2231 )
2234
2232
2235 def addrawrevision(
2233 def addrawrevision(
2236 self,
2234 self,
2237 rawtext,
2235 rawtext,
2238 transaction,
2236 transaction,
2239 link,
2237 link,
2240 p1,
2238 p1,
2241 p2,
2239 p2,
2242 node,
2240 node,
2243 flags,
2241 flags,
2244 cachedelta=None,
2242 cachedelta=None,
2245 deltacomputer=None,
2243 deltacomputer=None,
2246 sidedata=None,
2244 sidedata=None,
2247 ):
2245 ):
2248 """add a raw revision with known flags, node and parents
2246 """add a raw revision with known flags, node and parents
2249 useful when reusing a revision not stored in this revlog (ex: received
2247 useful when reusing a revision not stored in this revlog (ex: received
2250 over wire, or read from an external bundle).
2248 over wire, or read from an external bundle).
2251 """
2249 """
2252 with self._writing(transaction):
2250 with self._writing(transaction):
2253 return self._addrevision(
2251 return self._addrevision(
2254 node,
2252 node,
2255 rawtext,
2253 rawtext,
2256 transaction,
2254 transaction,
2257 link,
2255 link,
2258 p1,
2256 p1,
2259 p2,
2257 p2,
2260 flags,
2258 flags,
2261 cachedelta,
2259 cachedelta,
2262 deltacomputer=deltacomputer,
2260 deltacomputer=deltacomputer,
2263 sidedata=sidedata,
2261 sidedata=sidedata,
2264 )
2262 )
2265
2263
2266 def compress(self, data):
2264 def compress(self, data):
2267 """Generate a possibly-compressed representation of data."""
2265 """Generate a possibly-compressed representation of data."""
2268 if not data:
2266 if not data:
2269 return b'', data
2267 return b'', data
2270
2268
2271 compressed = self._compressor.compress(data)
2269 compressed = self._compressor.compress(data)
2272
2270
2273 if compressed:
2271 if compressed:
2274 # The revlog compressor added the header in the returned data.
2272 # The revlog compressor added the header in the returned data.
2275 return b'', compressed
2273 return b'', compressed
2276
2274
2277 if data[0:1] == b'\0':
2275 if data[0:1] == b'\0':
2278 return b'', data
2276 return b'', data
2279 return b'u', data
2277 return b'u', data
2280
2278
2281 def decompress(self, data):
2279 def decompress(self, data):
2282 """Decompress a revlog chunk.
2280 """Decompress a revlog chunk.
2283
2281
2284 The chunk is expected to begin with a header identifying the
2282 The chunk is expected to begin with a header identifying the
2285 format type so it can be routed to an appropriate decompressor.
2283 format type so it can be routed to an appropriate decompressor.
2286 """
2284 """
2287 if not data:
2285 if not data:
2288 return data
2286 return data
2289
2287
2290 # Revlogs are read much more frequently than they are written and many
2288 # Revlogs are read much more frequently than they are written and many
2291 # chunks only take microseconds to decompress, so performance is
2289 # chunks only take microseconds to decompress, so performance is
2292 # important here.
2290 # important here.
2293 #
2291 #
2294 # We can make a few assumptions about revlogs:
2292 # We can make a few assumptions about revlogs:
2295 #
2293 #
2296 # 1) the majority of chunks will be compressed (as opposed to inline
2294 # 1) the majority of chunks will be compressed (as opposed to inline
2297 # raw data).
2295 # raw data).
2298 # 2) decompressing *any* data will likely by at least 10x slower than
2296 # 2) decompressing *any* data will likely by at least 10x slower than
2299 # returning raw inline data.
2297 # returning raw inline data.
2300 # 3) we want to prioritize common and officially supported compression
2298 # 3) we want to prioritize common and officially supported compression
2301 # engines
2299 # engines
2302 #
2300 #
2303 # It follows that we want to optimize for "decompress compressed data
2301 # It follows that we want to optimize for "decompress compressed data
2304 # when encoded with common and officially supported compression engines"
2302 # when encoded with common and officially supported compression engines"
2305 # case over "raw data" and "data encoded by less common or non-official
2303 # case over "raw data" and "data encoded by less common or non-official
2306 # compression engines." That is why we have the inline lookup first
2304 # compression engines." That is why we have the inline lookup first
2307 # followed by the compengines lookup.
2305 # followed by the compengines lookup.
2308 #
2306 #
2309 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2307 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2310 # compressed chunks. And this matters for changelog and manifest reads.
2308 # compressed chunks. And this matters for changelog and manifest reads.
2311 t = data[0:1]
2309 t = data[0:1]
2312
2310
2313 if t == b'x':
2311 if t == b'x':
2314 try:
2312 try:
2315 return _zlibdecompress(data)
2313 return _zlibdecompress(data)
2316 except zlib.error as e:
2314 except zlib.error as e:
2317 raise error.RevlogError(
2315 raise error.RevlogError(
2318 _(b'revlog decompress error: %s')
2316 _(b'revlog decompress error: %s')
2319 % stringutil.forcebytestr(e)
2317 % stringutil.forcebytestr(e)
2320 )
2318 )
2321 # '\0' is more common than 'u' so it goes first.
2319 # '\0' is more common than 'u' so it goes first.
2322 elif t == b'\0':
2320 elif t == b'\0':
2323 return data
2321 return data
2324 elif t == b'u':
2322 elif t == b'u':
2325 return util.buffer(data, 1)
2323 return util.buffer(data, 1)
2326
2324
2327 compressor = self._get_decompressor(t)
2325 compressor = self._get_decompressor(t)
2328
2326
2329 return compressor.decompress(data)
2327 return compressor.decompress(data)
2330
2328
2331 def _addrevision(
2329 def _addrevision(
2332 self,
2330 self,
2333 node,
2331 node,
2334 rawtext,
2332 rawtext,
2335 transaction,
2333 transaction,
2336 link,
2334 link,
2337 p1,
2335 p1,
2338 p2,
2336 p2,
2339 flags,
2337 flags,
2340 cachedelta,
2338 cachedelta,
2341 alwayscache=False,
2339 alwayscache=False,
2342 deltacomputer=None,
2340 deltacomputer=None,
2343 sidedata=None,
2341 sidedata=None,
2344 ):
2342 ):
2345 """internal function to add revisions to the log
2343 """internal function to add revisions to the log
2346
2344
2347 see addrevision for argument descriptions.
2345 see addrevision for argument descriptions.
2348
2346
2349 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2347 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2350
2348
2351 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2349 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2352 be used.
2350 be used.
2353
2351
2354 invariants:
2352 invariants:
2355 - rawtext is optional (can be None); if not set, cachedelta must be set.
2353 - rawtext is optional (can be None); if not set, cachedelta must be set.
2356 if both are set, they must correspond to each other.
2354 if both are set, they must correspond to each other.
2357 """
2355 """
2358 if node == self.nullid:
2356 if node == self.nullid:
2359 raise error.RevlogError(
2357 raise error.RevlogError(
2360 _(b"%s: attempt to add null revision") % self.display_id
2358 _(b"%s: attempt to add null revision") % self.display_id
2361 )
2359 )
2362 if (
2360 if (
2363 node == self.nodeconstants.wdirid
2361 node == self.nodeconstants.wdirid
2364 or node in self.nodeconstants.wdirfilenodeids
2362 or node in self.nodeconstants.wdirfilenodeids
2365 ):
2363 ):
2366 raise error.RevlogError(
2364 raise error.RevlogError(
2367 _(b"%s: attempt to add wdir revision") % self.display_id
2365 _(b"%s: attempt to add wdir revision") % self.display_id
2368 )
2366 )
2369 if self._writinghandles is None:
2367 if self._writinghandles is None:
2370 msg = b'adding revision outside `revlog._writing` context'
2368 msg = b'adding revision outside `revlog._writing` context'
2371 raise error.ProgrammingError(msg)
2369 raise error.ProgrammingError(msg)
2372
2370
2373 if self._inline:
2371 if self._inline:
2374 fh = self._writinghandles[0]
2372 fh = self._writinghandles[0]
2375 else:
2373 else:
2376 fh = self._writinghandles[1]
2374 fh = self._writinghandles[1]
2377
2375
2378 btext = [rawtext]
2376 btext = [rawtext]
2379
2377
2380 curr = len(self)
2378 curr = len(self)
2381 prev = curr - 1
2379 prev = curr - 1
2382
2380
2383 offset = self._get_data_offset(prev)
2381 offset = self._get_data_offset(prev)
2384
2382
2385 if self._concurrencychecker:
2383 if self._concurrencychecker:
2386 ifh, dfh, sdfh = self._writinghandles
2384 ifh, dfh, sdfh = self._writinghandles
2387 # XXX no checking for the sidedata file
2385 # XXX no checking for the sidedata file
2388 if self._inline:
2386 if self._inline:
2389 # offset is "as if" it were in the .d file, so we need to add on
2387 # offset is "as if" it were in the .d file, so we need to add on
2390 # the size of the entry metadata.
2388 # the size of the entry metadata.
2391 self._concurrencychecker(
2389 self._concurrencychecker(
2392 ifh, self._indexfile, offset + curr * self.index.entry_size
2390 ifh, self._indexfile, offset + curr * self.index.entry_size
2393 )
2391 )
2394 else:
2392 else:
2395 # Entries in the .i are a consistent size.
2393 # Entries in the .i are a consistent size.
2396 self._concurrencychecker(
2394 self._concurrencychecker(
2397 ifh, self._indexfile, curr * self.index.entry_size
2395 ifh, self._indexfile, curr * self.index.entry_size
2398 )
2396 )
2399 self._concurrencychecker(dfh, self._datafile, offset)
2397 self._concurrencychecker(dfh, self._datafile, offset)
2400
2398
2401 p1r, p2r = self.rev(p1), self.rev(p2)
2399 p1r, p2r = self.rev(p1), self.rev(p2)
2402
2400
2403 # full versions are inserted when the needed deltas
2401 # full versions are inserted when the needed deltas
2404 # become comparable to the uncompressed text
2402 # become comparable to the uncompressed text
2405 if rawtext is None:
2403 if rawtext is None:
2406 # need rawtext size, before changed by flag processors, which is
2404 # need rawtext size, before changed by flag processors, which is
2407 # the non-raw size. use revlog explicitly to avoid filelog's extra
2405 # the non-raw size. use revlog explicitly to avoid filelog's extra
2408 # logic that might remove metadata size.
2406 # logic that might remove metadata size.
2409 textlen = mdiff.patchedsize(
2407 textlen = mdiff.patchedsize(
2410 revlog.size(self, cachedelta[0]), cachedelta[1]
2408 revlog.size(self, cachedelta[0]), cachedelta[1]
2411 )
2409 )
2412 else:
2410 else:
2413 textlen = len(rawtext)
2411 textlen = len(rawtext)
2414
2412
2415 if deltacomputer is None:
2413 if deltacomputer is None:
2416 deltacomputer = deltautil.deltacomputer(self)
2414 deltacomputer = deltautil.deltacomputer(self)
2417
2415
2418 revinfo = revlogutils.revisioninfo(
2416 revinfo = revlogutils.revisioninfo(
2419 node,
2417 node,
2420 p1,
2418 p1,
2421 p2,
2419 p2,
2422 btext,
2420 btext,
2423 textlen,
2421 textlen,
2424 cachedelta,
2422 cachedelta,
2425 flags,
2423 flags,
2426 )
2424 )
2427
2425
2428 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2426 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2429
2427
2430 compression_mode = COMP_MODE_INLINE
2428 compression_mode = COMP_MODE_INLINE
2431 if self._docket is not None:
2429 if self._docket is not None:
2432 default_comp = self._docket.default_compression_header
2430 default_comp = self._docket.default_compression_header
2433 r = deltautil.delta_compression(default_comp, deltainfo)
2431 r = deltautil.delta_compression(default_comp, deltainfo)
2434 compression_mode, deltainfo = r
2432 compression_mode, deltainfo = r
2435
2433
2436 sidedata_compression_mode = COMP_MODE_INLINE
2434 sidedata_compression_mode = COMP_MODE_INLINE
2437 if sidedata and self.hassidedata:
2435 if sidedata and self.hassidedata:
2438 sidedata_compression_mode = COMP_MODE_PLAIN
2436 sidedata_compression_mode = COMP_MODE_PLAIN
2439 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2437 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2440 sidedata_offset = self._docket.sidedata_end
2438 sidedata_offset = self._docket.sidedata_end
2441 h, comp_sidedata = self.compress(serialized_sidedata)
2439 h, comp_sidedata = self.compress(serialized_sidedata)
2442 if (
2440 if (
2443 h != b'u'
2441 h != b'u'
2444 and comp_sidedata[0:1] != b'\0'
2442 and comp_sidedata[0:1] != b'\0'
2445 and len(comp_sidedata) < len(serialized_sidedata)
2443 and len(comp_sidedata) < len(serialized_sidedata)
2446 ):
2444 ):
2447 assert not h
2445 assert not h
2448 if (
2446 if (
2449 comp_sidedata[0:1]
2447 comp_sidedata[0:1]
2450 == self._docket.default_compression_header
2448 == self._docket.default_compression_header
2451 ):
2449 ):
2452 sidedata_compression_mode = COMP_MODE_DEFAULT
2450 sidedata_compression_mode = COMP_MODE_DEFAULT
2453 serialized_sidedata = comp_sidedata
2451 serialized_sidedata = comp_sidedata
2454 else:
2452 else:
2455 sidedata_compression_mode = COMP_MODE_INLINE
2453 sidedata_compression_mode = COMP_MODE_INLINE
2456 serialized_sidedata = comp_sidedata
2454 serialized_sidedata = comp_sidedata
2457 else:
2455 else:
2458 serialized_sidedata = b""
2456 serialized_sidedata = b""
2459 # Don't store the offset if the sidedata is empty, that way
2457 # Don't store the offset if the sidedata is empty, that way
2460 # we can easily detect empty sidedata and they will be no different
2458 # we can easily detect empty sidedata and they will be no different
2461 # than ones we manually add.
2459 # than ones we manually add.
2462 sidedata_offset = 0
2460 sidedata_offset = 0
2463
2461
2464 rank = RANK_UNKNOWN
2462 rank = RANK_UNKNOWN
2465 if self._format_version == CHANGELOGV2:
2463 if self._format_version == CHANGELOGV2:
2466 if (p1r, p2r) == (nullrev, nullrev):
2464 if (p1r, p2r) == (nullrev, nullrev):
2467 rank = 1
2465 rank = 1
2468 elif p1r != nullrev and p2r == nullrev:
2466 elif p1r != nullrev and p2r == nullrev:
2469 rank = 1 + self.fast_rank(p1r)
2467 rank = 1 + self.fast_rank(p1r)
2470 elif p1r == nullrev and p2r != nullrev:
2468 elif p1r == nullrev and p2r != nullrev:
2471 rank = 1 + self.fast_rank(p2r)
2469 rank = 1 + self.fast_rank(p2r)
2472 else: # merge node
2470 else: # merge node
2473 if rustdagop is not None and self.index.rust_ext_compat:
2471 if rustdagop is not None and self.index.rust_ext_compat:
2474 rank = rustdagop.rank(self.index, p1r, p2r)
2472 rank = rustdagop.rank(self.index, p1r, p2r)
2475 else:
2473 else:
2476 pmin, pmax = sorted((p1r, p2r))
2474 pmin, pmax = sorted((p1r, p2r))
2477 rank = 1 + self.fast_rank(pmax)
2475 rank = 1 + self.fast_rank(pmax)
2478 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2476 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2479
2477
2480 e = revlogutils.entry(
2478 e = revlogutils.entry(
2481 flags=flags,
2479 flags=flags,
2482 data_offset=offset,
2480 data_offset=offset,
2483 data_compressed_length=deltainfo.deltalen,
2481 data_compressed_length=deltainfo.deltalen,
2484 data_uncompressed_length=textlen,
2482 data_uncompressed_length=textlen,
2485 data_compression_mode=compression_mode,
2483 data_compression_mode=compression_mode,
2486 data_delta_base=deltainfo.base,
2484 data_delta_base=deltainfo.base,
2487 link_rev=link,
2485 link_rev=link,
2488 parent_rev_1=p1r,
2486 parent_rev_1=p1r,
2489 parent_rev_2=p2r,
2487 parent_rev_2=p2r,
2490 node_id=node,
2488 node_id=node,
2491 sidedata_offset=sidedata_offset,
2489 sidedata_offset=sidedata_offset,
2492 sidedata_compressed_length=len(serialized_sidedata),
2490 sidedata_compressed_length=len(serialized_sidedata),
2493 sidedata_compression_mode=sidedata_compression_mode,
2491 sidedata_compression_mode=sidedata_compression_mode,
2494 rank=rank,
2492 rank=rank,
2495 )
2493 )
2496
2494
2497 self.index.append(e)
2495 self.index.append(e)
2498 entry = self.index.entry_binary(curr)
2496 entry = self.index.entry_binary(curr)
2499 if curr == 0 and self._docket is None:
2497 if curr == 0 and self._docket is None:
2500 header = self._format_flags | self._format_version
2498 header = self._format_flags | self._format_version
2501 header = self.index.pack_header(header)
2499 header = self.index.pack_header(header)
2502 entry = header + entry
2500 entry = header + entry
2503 self._writeentry(
2501 self._writeentry(
2504 transaction,
2502 transaction,
2505 entry,
2503 entry,
2506 deltainfo.data,
2504 deltainfo.data,
2507 link,
2505 link,
2508 offset,
2506 offset,
2509 serialized_sidedata,
2507 serialized_sidedata,
2510 sidedata_offset,
2508 sidedata_offset,
2511 )
2509 )
2512
2510
2513 rawtext = btext[0]
2511 rawtext = btext[0]
2514
2512
2515 if alwayscache and rawtext is None:
2513 if alwayscache and rawtext is None:
2516 rawtext = deltacomputer.buildtext(revinfo, fh)
2514 rawtext = deltacomputer.buildtext(revinfo, fh)
2517
2515
2518 if type(rawtext) == bytes: # only accept immutable objects
2516 if type(rawtext) == bytes: # only accept immutable objects
2519 self._revisioncache = (node, curr, rawtext)
2517 self._revisioncache = (node, curr, rawtext)
2520 self._chainbasecache[curr] = deltainfo.chainbase
2518 self._chainbasecache[curr] = deltainfo.chainbase
2521 return curr
2519 return curr
2522
2520
2523 def _get_data_offset(self, prev):
2521 def _get_data_offset(self, prev):
2524 """Returns the current offset in the (in-transaction) data file.
2522 """Returns the current offset in the (in-transaction) data file.
2525 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2523 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2526 file to store that information: since sidedata can be rewritten to the
2524 file to store that information: since sidedata can be rewritten to the
2527 end of the data file within a transaction, you can have cases where, for
2525 end of the data file within a transaction, you can have cases where, for
2528 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2526 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2529 to `n - 1`'s sidedata being written after `n`'s data.
2527 to `n - 1`'s sidedata being written after `n`'s data.
2530
2528
2531 TODO cache this in a docket file before getting out of experimental."""
2529 TODO cache this in a docket file before getting out of experimental."""
2532 if self._docket is None:
2530 if self._docket is None:
2533 return self.end(prev)
2531 return self.end(prev)
2534 else:
2532 else:
2535 return self._docket.data_end
2533 return self._docket.data_end
2536
2534
2537 def _writeentry(
2535 def _writeentry(
2538 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2536 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2539 ):
2537 ):
2540 # Files opened in a+ mode have inconsistent behavior on various
2538 # Files opened in a+ mode have inconsistent behavior on various
2541 # platforms. Windows requires that a file positioning call be made
2539 # platforms. Windows requires that a file positioning call be made
2542 # when the file handle transitions between reads and writes. See
2540 # when the file handle transitions between reads and writes. See
2543 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2541 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2544 # platforms, Python or the platform itself can be buggy. Some versions
2542 # platforms, Python or the platform itself can be buggy. Some versions
2545 # of Solaris have been observed to not append at the end of the file
2543 # of Solaris have been observed to not append at the end of the file
2546 # if the file was seeked to before the end. See issue4943 for more.
2544 # if the file was seeked to before the end. See issue4943 for more.
2547 #
2545 #
2548 # We work around this issue by inserting a seek() before writing.
2546 # We work around this issue by inserting a seek() before writing.
2549 # Note: This is likely not necessary on Python 3. However, because
2547 # Note: This is likely not necessary on Python 3. However, because
2550 # the file handle is reused for reads and may be seeked there, we need
2548 # the file handle is reused for reads and may be seeked there, we need
2551 # to be careful before changing this.
2549 # to be careful before changing this.
2552 if self._writinghandles is None:
2550 if self._writinghandles is None:
2553 msg = b'adding revision outside `revlog._writing` context'
2551 msg = b'adding revision outside `revlog._writing` context'
2554 raise error.ProgrammingError(msg)
2552 raise error.ProgrammingError(msg)
2555 ifh, dfh, sdfh = self._writinghandles
2553 ifh, dfh, sdfh = self._writinghandles
2556 if self._docket is None:
2554 if self._docket is None:
2557 ifh.seek(0, os.SEEK_END)
2555 ifh.seek(0, os.SEEK_END)
2558 else:
2556 else:
2559 ifh.seek(self._docket.index_end, os.SEEK_SET)
2557 ifh.seek(self._docket.index_end, os.SEEK_SET)
2560 if dfh:
2558 if dfh:
2561 if self._docket is None:
2559 if self._docket is None:
2562 dfh.seek(0, os.SEEK_END)
2560 dfh.seek(0, os.SEEK_END)
2563 else:
2561 else:
2564 dfh.seek(self._docket.data_end, os.SEEK_SET)
2562 dfh.seek(self._docket.data_end, os.SEEK_SET)
2565 if sdfh:
2563 if sdfh:
2566 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2564 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2567
2565
2568 curr = len(self) - 1
2566 curr = len(self) - 1
2569 if not self._inline:
2567 if not self._inline:
2570 transaction.add(self._datafile, offset)
2568 transaction.add(self._datafile, offset)
2571 if self._sidedatafile:
2569 if self._sidedatafile:
2572 transaction.add(self._sidedatafile, sidedata_offset)
2570 transaction.add(self._sidedatafile, sidedata_offset)
2573 transaction.add(self._indexfile, curr * len(entry))
2571 transaction.add(self._indexfile, curr * len(entry))
2574 if data[0]:
2572 if data[0]:
2575 dfh.write(data[0])
2573 dfh.write(data[0])
2576 dfh.write(data[1])
2574 dfh.write(data[1])
2577 if sidedata:
2575 if sidedata:
2578 sdfh.write(sidedata)
2576 sdfh.write(sidedata)
2579 ifh.write(entry)
2577 ifh.write(entry)
2580 else:
2578 else:
2581 offset += curr * self.index.entry_size
2579 offset += curr * self.index.entry_size
2582 transaction.add(self._indexfile, offset)
2580 transaction.add(self._indexfile, offset)
2583 ifh.write(entry)
2581 ifh.write(entry)
2584 ifh.write(data[0])
2582 ifh.write(data[0])
2585 ifh.write(data[1])
2583 ifh.write(data[1])
2586 assert not sidedata
2584 assert not sidedata
2587 self._enforceinlinesize(transaction)
2585 self._enforceinlinesize(transaction)
2588 if self._docket is not None:
2586 if self._docket is not None:
2589 # revlog-v2 always has 3 writing handles, help Pytype
2587 # revlog-v2 always has 3 writing handles, help Pytype
2590 wh1 = self._writinghandles[0]
2588 wh1 = self._writinghandles[0]
2591 wh2 = self._writinghandles[1]
2589 wh2 = self._writinghandles[1]
2592 wh3 = self._writinghandles[2]
2590 wh3 = self._writinghandles[2]
2593 assert wh1 is not None
2591 assert wh1 is not None
2594 assert wh2 is not None
2592 assert wh2 is not None
2595 assert wh3 is not None
2593 assert wh3 is not None
2596 self._docket.index_end = wh1.tell()
2594 self._docket.index_end = wh1.tell()
2597 self._docket.data_end = wh2.tell()
2595 self._docket.data_end = wh2.tell()
2598 self._docket.sidedata_end = wh3.tell()
2596 self._docket.sidedata_end = wh3.tell()
2599
2597
2600 nodemaputil.setup_persistent_nodemap(transaction, self)
2598 nodemaputil.setup_persistent_nodemap(transaction, self)
2601
2599
2602 def addgroup(
2600 def addgroup(
2603 self,
2601 self,
2604 deltas,
2602 deltas,
2605 linkmapper,
2603 linkmapper,
2606 transaction,
2604 transaction,
2607 alwayscache=False,
2605 alwayscache=False,
2608 addrevisioncb=None,
2606 addrevisioncb=None,
2609 duplicaterevisioncb=None,
2607 duplicaterevisioncb=None,
2610 ):
2608 ):
2611 """
2609 """
2612 add a delta group
2610 add a delta group
2613
2611
2614 given a set of deltas, add them to the revision log. the
2612 given a set of deltas, add them to the revision log. the
2615 first delta is against its parent, which should be in our
2613 first delta is against its parent, which should be in our
2616 log, the rest are against the previous delta.
2614 log, the rest are against the previous delta.
2617
2615
2618 If ``addrevisioncb`` is defined, it will be called with arguments of
2616 If ``addrevisioncb`` is defined, it will be called with arguments of
2619 this revlog and the node that was added.
2617 this revlog and the node that was added.
2620 """
2618 """
2621
2619
2622 if self._adding_group:
2620 if self._adding_group:
2623 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2621 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2624
2622
2625 self._adding_group = True
2623 self._adding_group = True
2626 empty = True
2624 empty = True
2627 try:
2625 try:
2628 with self._writing(transaction):
2626 with self._writing(transaction):
2629 deltacomputer = deltautil.deltacomputer(self)
2627 deltacomputer = deltautil.deltacomputer(self)
2630 # loop through our set of deltas
2628 # loop through our set of deltas
2631 for data in deltas:
2629 for data in deltas:
2632 (
2630 (
2633 node,
2631 node,
2634 p1,
2632 p1,
2635 p2,
2633 p2,
2636 linknode,
2634 linknode,
2637 deltabase,
2635 deltabase,
2638 delta,
2636 delta,
2639 flags,
2637 flags,
2640 sidedata,
2638 sidedata,
2641 ) = data
2639 ) = data
2642 link = linkmapper(linknode)
2640 link = linkmapper(linknode)
2643 flags = flags or REVIDX_DEFAULT_FLAGS
2641 flags = flags or REVIDX_DEFAULT_FLAGS
2644
2642
2645 rev = self.index.get_rev(node)
2643 rev = self.index.get_rev(node)
2646 if rev is not None:
2644 if rev is not None:
2647 # this can happen if two branches make the same change
2645 # this can happen if two branches make the same change
2648 self._nodeduplicatecallback(transaction, rev)
2646 self._nodeduplicatecallback(transaction, rev)
2649 if duplicaterevisioncb:
2647 if duplicaterevisioncb:
2650 duplicaterevisioncb(self, rev)
2648 duplicaterevisioncb(self, rev)
2651 empty = False
2649 empty = False
2652 continue
2650 continue
2653
2651
2654 for p in (p1, p2):
2652 for p in (p1, p2):
2655 if not self.index.has_node(p):
2653 if not self.index.has_node(p):
2656 raise error.LookupError(
2654 raise error.LookupError(
2657 p, self.radix, _(b'unknown parent')
2655 p, self.radix, _(b'unknown parent')
2658 )
2656 )
2659
2657
2660 if not self.index.has_node(deltabase):
2658 if not self.index.has_node(deltabase):
2661 raise error.LookupError(
2659 raise error.LookupError(
2662 deltabase, self.display_id, _(b'unknown delta base')
2660 deltabase, self.display_id, _(b'unknown delta base')
2663 )
2661 )
2664
2662
2665 baserev = self.rev(deltabase)
2663 baserev = self.rev(deltabase)
2666
2664
2667 if baserev != nullrev and self.iscensored(baserev):
2665 if baserev != nullrev and self.iscensored(baserev):
2668 # if base is censored, delta must be full replacement in a
2666 # if base is censored, delta must be full replacement in a
2669 # single patch operation
2667 # single patch operation
2670 hlen = struct.calcsize(b">lll")
2668 hlen = struct.calcsize(b">lll")
2671 oldlen = self.rawsize(baserev)
2669 oldlen = self.rawsize(baserev)
2672 newlen = len(delta) - hlen
2670 newlen = len(delta) - hlen
2673 if delta[:hlen] != mdiff.replacediffheader(
2671 if delta[:hlen] != mdiff.replacediffheader(
2674 oldlen, newlen
2672 oldlen, newlen
2675 ):
2673 ):
2676 raise error.CensoredBaseError(
2674 raise error.CensoredBaseError(
2677 self.display_id, self.node(baserev)
2675 self.display_id, self.node(baserev)
2678 )
2676 )
2679
2677
2680 if not flags and self._peek_iscensored(baserev, delta):
2678 if not flags and self._peek_iscensored(baserev, delta):
2681 flags |= REVIDX_ISCENSORED
2679 flags |= REVIDX_ISCENSORED
2682
2680
2683 # We assume consumers of addrevisioncb will want to retrieve
2681 # We assume consumers of addrevisioncb will want to retrieve
2684 # the added revision, which will require a call to
2682 # the added revision, which will require a call to
2685 # revision(). revision() will fast path if there is a cache
2683 # revision(). revision() will fast path if there is a cache
2686 # hit. So, we tell _addrevision() to always cache in this case.
2684 # hit. So, we tell _addrevision() to always cache in this case.
2687 # We're only using addgroup() in the context of changegroup
2685 # We're only using addgroup() in the context of changegroup
2688 # generation so the revision data can always be handled as raw
2686 # generation so the revision data can always be handled as raw
2689 # by the flagprocessor.
2687 # by the flagprocessor.
2690 rev = self._addrevision(
2688 rev = self._addrevision(
2691 node,
2689 node,
2692 None,
2690 None,
2693 transaction,
2691 transaction,
2694 link,
2692 link,
2695 p1,
2693 p1,
2696 p2,
2694 p2,
2697 flags,
2695 flags,
2698 (baserev, delta),
2696 (baserev, delta),
2699 alwayscache=alwayscache,
2697 alwayscache=alwayscache,
2700 deltacomputer=deltacomputer,
2698 deltacomputer=deltacomputer,
2701 sidedata=sidedata,
2699 sidedata=sidedata,
2702 )
2700 )
2703
2701
2704 if addrevisioncb:
2702 if addrevisioncb:
2705 addrevisioncb(self, rev)
2703 addrevisioncb(self, rev)
2706 empty = False
2704 empty = False
2707 finally:
2705 finally:
2708 self._adding_group = False
2706 self._adding_group = False
2709 return not empty
2707 return not empty
2710
2708
2711 def iscensored(self, rev):
2709 def iscensored(self, rev):
2712 """Check if a file revision is censored."""
2710 """Check if a file revision is censored."""
2713 if not self._censorable:
2711 if not self._censorable:
2714 return False
2712 return False
2715
2713
2716 return self.flags(rev) & REVIDX_ISCENSORED
2714 return self.flags(rev) & REVIDX_ISCENSORED
2717
2715
2718 def _peek_iscensored(self, baserev, delta):
2716 def _peek_iscensored(self, baserev, delta):
2719 """Quickly check if a delta produces a censored revision."""
2717 """Quickly check if a delta produces a censored revision."""
2720 if not self._censorable:
2718 if not self._censorable:
2721 return False
2719 return False
2722
2720
2723 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2721 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2724
2722
2725 def getstrippoint(self, minlink):
2723 def getstrippoint(self, minlink):
2726 """find the minimum rev that must be stripped to strip the linkrev
2724 """find the minimum rev that must be stripped to strip the linkrev
2727
2725
2728 Returns a tuple containing the minimum rev and a set of all revs that
2726 Returns a tuple containing the minimum rev and a set of all revs that
2729 have linkrevs that will be broken by this strip.
2727 have linkrevs that will be broken by this strip.
2730 """
2728 """
2731 return storageutil.resolvestripinfo(
2729 return storageutil.resolvestripinfo(
2732 minlink,
2730 minlink,
2733 len(self) - 1,
2731 len(self) - 1,
2734 self.headrevs(),
2732 self.headrevs(),
2735 self.linkrev,
2733 self.linkrev,
2736 self.parentrevs,
2734 self.parentrevs,
2737 )
2735 )
2738
2736
2739 def strip(self, minlink, transaction):
2737 def strip(self, minlink, transaction):
2740 """truncate the revlog on the first revision with a linkrev >= minlink
2738 """truncate the revlog on the first revision with a linkrev >= minlink
2741
2739
2742 This function is called when we're stripping revision minlink and
2740 This function is called when we're stripping revision minlink and
2743 its descendants from the repository.
2741 its descendants from the repository.
2744
2742
2745 We have to remove all revisions with linkrev >= minlink, because
2743 We have to remove all revisions with linkrev >= minlink, because
2746 the equivalent changelog revisions will be renumbered after the
2744 the equivalent changelog revisions will be renumbered after the
2747 strip.
2745 strip.
2748
2746
2749 So we truncate the revlog on the first of these revisions, and
2747 So we truncate the revlog on the first of these revisions, and
2750 trust that the caller has saved the revisions that shouldn't be
2748 trust that the caller has saved the revisions that shouldn't be
2751 removed and that it'll re-add them after this truncation.
2749 removed and that it'll re-add them after this truncation.
2752 """
2750 """
2753 if len(self) == 0:
2751 if len(self) == 0:
2754 return
2752 return
2755
2753
2756 rev, _ = self.getstrippoint(minlink)
2754 rev, _ = self.getstrippoint(minlink)
2757 if rev == len(self):
2755 if rev == len(self):
2758 return
2756 return
2759
2757
2760 # first truncate the files on disk
2758 # first truncate the files on disk
2761 data_end = self.start(rev)
2759 data_end = self.start(rev)
2762 if not self._inline:
2760 if not self._inline:
2763 transaction.add(self._datafile, data_end)
2761 transaction.add(self._datafile, data_end)
2764 end = rev * self.index.entry_size
2762 end = rev * self.index.entry_size
2765 else:
2763 else:
2766 end = data_end + (rev * self.index.entry_size)
2764 end = data_end + (rev * self.index.entry_size)
2767
2765
2768 if self._sidedatafile:
2766 if self._sidedatafile:
2769 sidedata_end = self.sidedata_cut_off(rev)
2767 sidedata_end = self.sidedata_cut_off(rev)
2770 transaction.add(self._sidedatafile, sidedata_end)
2768 transaction.add(self._sidedatafile, sidedata_end)
2771
2769
2772 transaction.add(self._indexfile, end)
2770 transaction.add(self._indexfile, end)
2773 if self._docket is not None:
2771 if self._docket is not None:
2774 # XXX we could, leverage the docket while stripping. However it is
2772 # XXX we could, leverage the docket while stripping. However it is
2775 # not powerfull enough at the time of this comment
2773 # not powerfull enough at the time of this comment
2776 self._docket.index_end = end
2774 self._docket.index_end = end
2777 self._docket.data_end = data_end
2775 self._docket.data_end = data_end
2778 self._docket.sidedata_end = sidedata_end
2776 self._docket.sidedata_end = sidedata_end
2779 self._docket.write(transaction, stripping=True)
2777 self._docket.write(transaction, stripping=True)
2780
2778
2781 # then reset internal state in memory to forget those revisions
2779 # then reset internal state in memory to forget those revisions
2782 self._revisioncache = None
2780 self._revisioncache = None
2783 self._chaininfocache = util.lrucachedict(500)
2781 self._chaininfocache = util.lrucachedict(500)
2784 self._segmentfile.clear_cache()
2782 self._segmentfile.clear_cache()
2785 self._segmentfile_sidedata.clear_cache()
2783 self._segmentfile_sidedata.clear_cache()
2786
2784
2787 del self.index[rev:-1]
2785 del self.index[rev:-1]
2788
2786
2789 def checksize(self):
2787 def checksize(self):
2790 """Check size of index and data files
2788 """Check size of index and data files
2791
2789
2792 return a (dd, di) tuple.
2790 return a (dd, di) tuple.
2793 - dd: extra bytes for the "data" file
2791 - dd: extra bytes for the "data" file
2794 - di: extra bytes for the "index" file
2792 - di: extra bytes for the "index" file
2795
2793
2796 A healthy revlog will return (0, 0).
2794 A healthy revlog will return (0, 0).
2797 """
2795 """
2798 expected = 0
2796 expected = 0
2799 if len(self):
2797 if len(self):
2800 expected = max(0, self.end(len(self) - 1))
2798 expected = max(0, self.end(len(self) - 1))
2801
2799
2802 try:
2800 try:
2803 with self._datafp() as f:
2801 with self._datafp() as f:
2804 f.seek(0, io.SEEK_END)
2802 f.seek(0, io.SEEK_END)
2805 actual = f.tell()
2803 actual = f.tell()
2806 dd = actual - expected
2804 dd = actual - expected
2807 except IOError as inst:
2805 except IOError as inst:
2808 if inst.errno != errno.ENOENT:
2806 if inst.errno != errno.ENOENT:
2809 raise
2807 raise
2810 dd = 0
2808 dd = 0
2811
2809
2812 try:
2810 try:
2813 f = self.opener(self._indexfile)
2811 f = self.opener(self._indexfile)
2814 f.seek(0, io.SEEK_END)
2812 f.seek(0, io.SEEK_END)
2815 actual = f.tell()
2813 actual = f.tell()
2816 f.close()
2814 f.close()
2817 s = self.index.entry_size
2815 s = self.index.entry_size
2818 i = max(0, actual // s)
2816 i = max(0, actual // s)
2819 di = actual - (i * s)
2817 di = actual - (i * s)
2820 if self._inline:
2818 if self._inline:
2821 databytes = 0
2819 databytes = 0
2822 for r in self:
2820 for r in self:
2823 databytes += max(0, self.length(r))
2821 databytes += max(0, self.length(r))
2824 dd = 0
2822 dd = 0
2825 di = actual - len(self) * s - databytes
2823 di = actual - len(self) * s - databytes
2826 except IOError as inst:
2824 except IOError as inst:
2827 if inst.errno != errno.ENOENT:
2825 if inst.errno != errno.ENOENT:
2828 raise
2826 raise
2829 di = 0
2827 di = 0
2830
2828
2831 return (dd, di)
2829 return (dd, di)
2832
2830
2833 def files(self):
2831 def files(self):
2834 res = [self._indexfile]
2832 res = [self._indexfile]
2835 if self._docket_file is None:
2833 if self._docket_file is None:
2836 if not self._inline:
2834 if not self._inline:
2837 res.append(self._datafile)
2835 res.append(self._datafile)
2838 else:
2836 else:
2839 res.append(self._docket_file)
2837 res.append(self._docket_file)
2840 res.extend(self._docket.old_index_filepaths(include_empty=False))
2838 res.extend(self._docket.old_index_filepaths(include_empty=False))
2841 if self._docket.data_end:
2839 if self._docket.data_end:
2842 res.append(self._datafile)
2840 res.append(self._datafile)
2843 res.extend(self._docket.old_data_filepaths(include_empty=False))
2841 res.extend(self._docket.old_data_filepaths(include_empty=False))
2844 if self._docket.sidedata_end:
2842 if self._docket.sidedata_end:
2845 res.append(self._sidedatafile)
2843 res.append(self._sidedatafile)
2846 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2844 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2847 return res
2845 return res
2848
2846
2849 def emitrevisions(
2847 def emitrevisions(
2850 self,
2848 self,
2851 nodes,
2849 nodes,
2852 nodesorder=None,
2850 nodesorder=None,
2853 revisiondata=False,
2851 revisiondata=False,
2854 assumehaveparentrevisions=False,
2852 assumehaveparentrevisions=False,
2855 deltamode=repository.CG_DELTAMODE_STD,
2853 deltamode=repository.CG_DELTAMODE_STD,
2856 sidedata_helpers=None,
2854 sidedata_helpers=None,
2857 ):
2855 ):
2858 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2856 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2859 raise error.ProgrammingError(
2857 raise error.ProgrammingError(
2860 b'unhandled value for nodesorder: %s' % nodesorder
2858 b'unhandled value for nodesorder: %s' % nodesorder
2861 )
2859 )
2862
2860
2863 if nodesorder is None and not self._generaldelta:
2861 if nodesorder is None and not self._generaldelta:
2864 nodesorder = b'storage'
2862 nodesorder = b'storage'
2865
2863
2866 if (
2864 if (
2867 not self._storedeltachains
2865 not self._storedeltachains
2868 and deltamode != repository.CG_DELTAMODE_PREV
2866 and deltamode != repository.CG_DELTAMODE_PREV
2869 ):
2867 ):
2870 deltamode = repository.CG_DELTAMODE_FULL
2868 deltamode = repository.CG_DELTAMODE_FULL
2871
2869
2872 return storageutil.emitrevisions(
2870 return storageutil.emitrevisions(
2873 self,
2871 self,
2874 nodes,
2872 nodes,
2875 nodesorder,
2873 nodesorder,
2876 revlogrevisiondelta,
2874 revlogrevisiondelta,
2877 deltaparentfn=self.deltaparent,
2875 deltaparentfn=self.deltaparent,
2878 candeltafn=self.candelta,
2876 candeltafn=self.candelta,
2879 rawsizefn=self.rawsize,
2877 rawsizefn=self.rawsize,
2880 revdifffn=self.revdiff,
2878 revdifffn=self.revdiff,
2881 flagsfn=self.flags,
2879 flagsfn=self.flags,
2882 deltamode=deltamode,
2880 deltamode=deltamode,
2883 revisiondata=revisiondata,
2881 revisiondata=revisiondata,
2884 assumehaveparentrevisions=assumehaveparentrevisions,
2882 assumehaveparentrevisions=assumehaveparentrevisions,
2885 sidedata_helpers=sidedata_helpers,
2883 sidedata_helpers=sidedata_helpers,
2886 )
2884 )
2887
2885
2888 DELTAREUSEALWAYS = b'always'
2886 DELTAREUSEALWAYS = b'always'
2889 DELTAREUSESAMEREVS = b'samerevs'
2887 DELTAREUSESAMEREVS = b'samerevs'
2890 DELTAREUSENEVER = b'never'
2888 DELTAREUSENEVER = b'never'
2891
2889
2892 DELTAREUSEFULLADD = b'fulladd'
2890 DELTAREUSEFULLADD = b'fulladd'
2893
2891
2894 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2892 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2895
2893
2896 def clone(
2894 def clone(
2897 self,
2895 self,
2898 tr,
2896 tr,
2899 destrevlog,
2897 destrevlog,
2900 addrevisioncb=None,
2898 addrevisioncb=None,
2901 deltareuse=DELTAREUSESAMEREVS,
2899 deltareuse=DELTAREUSESAMEREVS,
2902 forcedeltabothparents=None,
2900 forcedeltabothparents=None,
2903 sidedata_helpers=None,
2901 sidedata_helpers=None,
2904 ):
2902 ):
2905 """Copy this revlog to another, possibly with format changes.
2903 """Copy this revlog to another, possibly with format changes.
2906
2904
2907 The destination revlog will contain the same revisions and nodes.
2905 The destination revlog will contain the same revisions and nodes.
2908 However, it may not be bit-for-bit identical due to e.g. delta encoding
2906 However, it may not be bit-for-bit identical due to e.g. delta encoding
2909 differences.
2907 differences.
2910
2908
2911 The ``deltareuse`` argument control how deltas from the existing revlog
2909 The ``deltareuse`` argument control how deltas from the existing revlog
2912 are preserved in the destination revlog. The argument can have the
2910 are preserved in the destination revlog. The argument can have the
2913 following values:
2911 following values:
2914
2912
2915 DELTAREUSEALWAYS
2913 DELTAREUSEALWAYS
2916 Deltas will always be reused (if possible), even if the destination
2914 Deltas will always be reused (if possible), even if the destination
2917 revlog would not select the same revisions for the delta. This is the
2915 revlog would not select the same revisions for the delta. This is the
2918 fastest mode of operation.
2916 fastest mode of operation.
2919 DELTAREUSESAMEREVS
2917 DELTAREUSESAMEREVS
2920 Deltas will be reused if the destination revlog would pick the same
2918 Deltas will be reused if the destination revlog would pick the same
2921 revisions for the delta. This mode strikes a balance between speed
2919 revisions for the delta. This mode strikes a balance between speed
2922 and optimization.
2920 and optimization.
2923 DELTAREUSENEVER
2921 DELTAREUSENEVER
2924 Deltas will never be reused. This is the slowest mode of execution.
2922 Deltas will never be reused. This is the slowest mode of execution.
2925 This mode can be used to recompute deltas (e.g. if the diff/delta
2923 This mode can be used to recompute deltas (e.g. if the diff/delta
2926 algorithm changes).
2924 algorithm changes).
2927 DELTAREUSEFULLADD
2925 DELTAREUSEFULLADD
2928 Revision will be re-added as if their were new content. This is
2926 Revision will be re-added as if their were new content. This is
2929 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2927 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2930 eg: large file detection and handling.
2928 eg: large file detection and handling.
2931
2929
2932 Delta computation can be slow, so the choice of delta reuse policy can
2930 Delta computation can be slow, so the choice of delta reuse policy can
2933 significantly affect run time.
2931 significantly affect run time.
2934
2932
2935 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2933 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2936 two extremes. Deltas will be reused if they are appropriate. But if the
2934 two extremes. Deltas will be reused if they are appropriate. But if the
2937 delta could choose a better revision, it will do so. This means if you
2935 delta could choose a better revision, it will do so. This means if you
2938 are converting a non-generaldelta revlog to a generaldelta revlog,
2936 are converting a non-generaldelta revlog to a generaldelta revlog,
2939 deltas will be recomputed if the delta's parent isn't a parent of the
2937 deltas will be recomputed if the delta's parent isn't a parent of the
2940 revision.
2938 revision.
2941
2939
2942 In addition to the delta policy, the ``forcedeltabothparents``
2940 In addition to the delta policy, the ``forcedeltabothparents``
2943 argument controls whether to force compute deltas against both parents
2941 argument controls whether to force compute deltas against both parents
2944 for merges. By default, the current default is used.
2942 for merges. By default, the current default is used.
2945
2943
2946 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2944 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2947 `sidedata_helpers`.
2945 `sidedata_helpers`.
2948 """
2946 """
2949 if deltareuse not in self.DELTAREUSEALL:
2947 if deltareuse not in self.DELTAREUSEALL:
2950 raise ValueError(
2948 raise ValueError(
2951 _(b'value for deltareuse invalid: %s') % deltareuse
2949 _(b'value for deltareuse invalid: %s') % deltareuse
2952 )
2950 )
2953
2951
2954 if len(destrevlog):
2952 if len(destrevlog):
2955 raise ValueError(_(b'destination revlog is not empty'))
2953 raise ValueError(_(b'destination revlog is not empty'))
2956
2954
2957 if getattr(self, 'filteredrevs', None):
2955 if getattr(self, 'filteredrevs', None):
2958 raise ValueError(_(b'source revlog has filtered revisions'))
2956 raise ValueError(_(b'source revlog has filtered revisions'))
2959 if getattr(destrevlog, 'filteredrevs', None):
2957 if getattr(destrevlog, 'filteredrevs', None):
2960 raise ValueError(_(b'destination revlog has filtered revisions'))
2958 raise ValueError(_(b'destination revlog has filtered revisions'))
2961
2959
2962 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2960 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2963 # if possible.
2961 # if possible.
2964 oldlazydelta = destrevlog._lazydelta
2962 oldlazydelta = destrevlog._lazydelta
2965 oldlazydeltabase = destrevlog._lazydeltabase
2963 oldlazydeltabase = destrevlog._lazydeltabase
2966 oldamd = destrevlog._deltabothparents
2964 oldamd = destrevlog._deltabothparents
2967
2965
2968 try:
2966 try:
2969 if deltareuse == self.DELTAREUSEALWAYS:
2967 if deltareuse == self.DELTAREUSEALWAYS:
2970 destrevlog._lazydeltabase = True
2968 destrevlog._lazydeltabase = True
2971 destrevlog._lazydelta = True
2969 destrevlog._lazydelta = True
2972 elif deltareuse == self.DELTAREUSESAMEREVS:
2970 elif deltareuse == self.DELTAREUSESAMEREVS:
2973 destrevlog._lazydeltabase = False
2971 destrevlog._lazydeltabase = False
2974 destrevlog._lazydelta = True
2972 destrevlog._lazydelta = True
2975 elif deltareuse == self.DELTAREUSENEVER:
2973 elif deltareuse == self.DELTAREUSENEVER:
2976 destrevlog._lazydeltabase = False
2974 destrevlog._lazydeltabase = False
2977 destrevlog._lazydelta = False
2975 destrevlog._lazydelta = False
2978
2976
2979 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2977 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2980
2978
2981 self._clone(
2979 self._clone(
2982 tr,
2980 tr,
2983 destrevlog,
2981 destrevlog,
2984 addrevisioncb,
2982 addrevisioncb,
2985 deltareuse,
2983 deltareuse,
2986 forcedeltabothparents,
2984 forcedeltabothparents,
2987 sidedata_helpers,
2985 sidedata_helpers,
2988 )
2986 )
2989
2987
2990 finally:
2988 finally:
2991 destrevlog._lazydelta = oldlazydelta
2989 destrevlog._lazydelta = oldlazydelta
2992 destrevlog._lazydeltabase = oldlazydeltabase
2990 destrevlog._lazydeltabase = oldlazydeltabase
2993 destrevlog._deltabothparents = oldamd
2991 destrevlog._deltabothparents = oldamd
2994
2992
2995 def _clone(
2993 def _clone(
2996 self,
2994 self,
2997 tr,
2995 tr,
2998 destrevlog,
2996 destrevlog,
2999 addrevisioncb,
2997 addrevisioncb,
3000 deltareuse,
2998 deltareuse,
3001 forcedeltabothparents,
2999 forcedeltabothparents,
3002 sidedata_helpers,
3000 sidedata_helpers,
3003 ):
3001 ):
3004 """perform the core duty of `revlog.clone` after parameter processing"""
3002 """perform the core duty of `revlog.clone` after parameter processing"""
3005 deltacomputer = deltautil.deltacomputer(destrevlog)
3003 deltacomputer = deltautil.deltacomputer(destrevlog)
3006 index = self.index
3004 index = self.index
3007 for rev in self:
3005 for rev in self:
3008 entry = index[rev]
3006 entry = index[rev]
3009
3007
3010 # Some classes override linkrev to take filtered revs into
3008 # Some classes override linkrev to take filtered revs into
3011 # account. Use raw entry from index.
3009 # account. Use raw entry from index.
3012 flags = entry[0] & 0xFFFF
3010 flags = entry[0] & 0xFFFF
3013 linkrev = entry[4]
3011 linkrev = entry[4]
3014 p1 = index[entry[5]][7]
3012 p1 = index[entry[5]][7]
3015 p2 = index[entry[6]][7]
3013 p2 = index[entry[6]][7]
3016 node = entry[7]
3014 node = entry[7]
3017
3015
3018 # (Possibly) reuse the delta from the revlog if allowed and
3016 # (Possibly) reuse the delta from the revlog if allowed and
3019 # the revlog chunk is a delta.
3017 # the revlog chunk is a delta.
3020 cachedelta = None
3018 cachedelta = None
3021 rawtext = None
3019 rawtext = None
3022 if deltareuse == self.DELTAREUSEFULLADD:
3020 if deltareuse == self.DELTAREUSEFULLADD:
3023 text = self._revisiondata(rev)
3021 text = self._revisiondata(rev)
3024 sidedata = self.sidedata(rev)
3022 sidedata = self.sidedata(rev)
3025
3023
3026 if sidedata_helpers is not None:
3024 if sidedata_helpers is not None:
3027 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3025 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3028 self, sidedata_helpers, sidedata, rev
3026 self, sidedata_helpers, sidedata, rev
3029 )
3027 )
3030 flags = flags | new_flags[0] & ~new_flags[1]
3028 flags = flags | new_flags[0] & ~new_flags[1]
3031
3029
3032 destrevlog.addrevision(
3030 destrevlog.addrevision(
3033 text,
3031 text,
3034 tr,
3032 tr,
3035 linkrev,
3033 linkrev,
3036 p1,
3034 p1,
3037 p2,
3035 p2,
3038 cachedelta=cachedelta,
3036 cachedelta=cachedelta,
3039 node=node,
3037 node=node,
3040 flags=flags,
3038 flags=flags,
3041 deltacomputer=deltacomputer,
3039 deltacomputer=deltacomputer,
3042 sidedata=sidedata,
3040 sidedata=sidedata,
3043 )
3041 )
3044 else:
3042 else:
3045 if destrevlog._lazydelta:
3043 if destrevlog._lazydelta:
3046 dp = self.deltaparent(rev)
3044 dp = self.deltaparent(rev)
3047 if dp != nullrev:
3045 if dp != nullrev:
3048 cachedelta = (dp, bytes(self._chunk(rev)))
3046 cachedelta = (dp, bytes(self._chunk(rev)))
3049
3047
3050 sidedata = None
3048 sidedata = None
3051 if not cachedelta:
3049 if not cachedelta:
3052 rawtext = self._revisiondata(rev)
3050 rawtext = self._revisiondata(rev)
3053 sidedata = self.sidedata(rev)
3051 sidedata = self.sidedata(rev)
3054 if sidedata is None:
3052 if sidedata is None:
3055 sidedata = self.sidedata(rev)
3053 sidedata = self.sidedata(rev)
3056
3054
3057 if sidedata_helpers is not None:
3055 if sidedata_helpers is not None:
3058 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3056 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3059 self, sidedata_helpers, sidedata, rev
3057 self, sidedata_helpers, sidedata, rev
3060 )
3058 )
3061 flags = flags | new_flags[0] & ~new_flags[1]
3059 flags = flags | new_flags[0] & ~new_flags[1]
3062
3060
3063 with destrevlog._writing(tr):
3061 with destrevlog._writing(tr):
3064 destrevlog._addrevision(
3062 destrevlog._addrevision(
3065 node,
3063 node,
3066 rawtext,
3064 rawtext,
3067 tr,
3065 tr,
3068 linkrev,
3066 linkrev,
3069 p1,
3067 p1,
3070 p2,
3068 p2,
3071 flags,
3069 flags,
3072 cachedelta,
3070 cachedelta,
3073 deltacomputer=deltacomputer,
3071 deltacomputer=deltacomputer,
3074 sidedata=sidedata,
3072 sidedata=sidedata,
3075 )
3073 )
3076
3074
3077 if addrevisioncb:
3075 if addrevisioncb:
3078 addrevisioncb(self, rev, node)
3076 addrevisioncb(self, rev, node)
3079
3077
3080 def censorrevision(self, tr, censornode, tombstone=b''):
3078 def censorrevision(self, tr, censornode, tombstone=b''):
3081 if self._format_version == REVLOGV0:
3079 if self._format_version == REVLOGV0:
3082 raise error.RevlogError(
3080 raise error.RevlogError(
3083 _(b'cannot censor with version %d revlogs')
3081 _(b'cannot censor with version %d revlogs')
3084 % self._format_version
3082 % self._format_version
3085 )
3083 )
3086 elif self._format_version == REVLOGV1:
3084 elif self._format_version == REVLOGV1:
3087 rewrite.v1_censor(self, tr, censornode, tombstone)
3085 rewrite.v1_censor(self, tr, censornode, tombstone)
3088 else:
3086 else:
3089 rewrite.v2_censor(self, tr, censornode, tombstone)
3087 rewrite.v2_censor(self, tr, censornode, tombstone)
3090
3088
3091 def verifyintegrity(self, state):
3089 def verifyintegrity(self, state):
3092 """Verifies the integrity of the revlog.
3090 """Verifies the integrity of the revlog.
3093
3091
3094 Yields ``revlogproblem`` instances describing problems that are
3092 Yields ``revlogproblem`` instances describing problems that are
3095 found.
3093 found.
3096 """
3094 """
3097 dd, di = self.checksize()
3095 dd, di = self.checksize()
3098 if dd:
3096 if dd:
3099 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3097 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3100 if di:
3098 if di:
3101 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3099 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3102
3100
3103 version = self._format_version
3101 version = self._format_version
3104
3102
3105 # The verifier tells us what version revlog we should be.
3103 # The verifier tells us what version revlog we should be.
3106 if version != state[b'expectedversion']:
3104 if version != state[b'expectedversion']:
3107 yield revlogproblem(
3105 yield revlogproblem(
3108 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3106 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3109 % (self.display_id, version, state[b'expectedversion'])
3107 % (self.display_id, version, state[b'expectedversion'])
3110 )
3108 )
3111
3109
3112 state[b'skipread'] = set()
3110 state[b'skipread'] = set()
3113 state[b'safe_renamed'] = set()
3111 state[b'safe_renamed'] = set()
3114
3112
3115 for rev in self:
3113 for rev in self:
3116 node = self.node(rev)
3114 node = self.node(rev)
3117
3115
3118 # Verify contents. 4 cases to care about:
3116 # Verify contents. 4 cases to care about:
3119 #
3117 #
3120 # common: the most common case
3118 # common: the most common case
3121 # rename: with a rename
3119 # rename: with a rename
3122 # meta: file content starts with b'\1\n', the metadata
3120 # meta: file content starts with b'\1\n', the metadata
3123 # header defined in filelog.py, but without a rename
3121 # header defined in filelog.py, but without a rename
3124 # ext: content stored externally
3122 # ext: content stored externally
3125 #
3123 #
3126 # More formally, their differences are shown below:
3124 # More formally, their differences are shown below:
3127 #
3125 #
3128 # | common | rename | meta | ext
3126 # | common | rename | meta | ext
3129 # -------------------------------------------------------
3127 # -------------------------------------------------------
3130 # flags() | 0 | 0 | 0 | not 0
3128 # flags() | 0 | 0 | 0 | not 0
3131 # renamed() | False | True | False | ?
3129 # renamed() | False | True | False | ?
3132 # rawtext[0:2]=='\1\n'| False | True | True | ?
3130 # rawtext[0:2]=='\1\n'| False | True | True | ?
3133 #
3131 #
3134 # "rawtext" means the raw text stored in revlog data, which
3132 # "rawtext" means the raw text stored in revlog data, which
3135 # could be retrieved by "rawdata(rev)". "text"
3133 # could be retrieved by "rawdata(rev)". "text"
3136 # mentioned below is "revision(rev)".
3134 # mentioned below is "revision(rev)".
3137 #
3135 #
3138 # There are 3 different lengths stored physically:
3136 # There are 3 different lengths stored physically:
3139 # 1. L1: rawsize, stored in revlog index
3137 # 1. L1: rawsize, stored in revlog index
3140 # 2. L2: len(rawtext), stored in revlog data
3138 # 2. L2: len(rawtext), stored in revlog data
3141 # 3. L3: len(text), stored in revlog data if flags==0, or
3139 # 3. L3: len(text), stored in revlog data if flags==0, or
3142 # possibly somewhere else if flags!=0
3140 # possibly somewhere else if flags!=0
3143 #
3141 #
3144 # L1 should be equal to L2. L3 could be different from them.
3142 # L1 should be equal to L2. L3 could be different from them.
3145 # "text" may or may not affect commit hash depending on flag
3143 # "text" may or may not affect commit hash depending on flag
3146 # processors (see flagutil.addflagprocessor).
3144 # processors (see flagutil.addflagprocessor).
3147 #
3145 #
3148 # | common | rename | meta | ext
3146 # | common | rename | meta | ext
3149 # -------------------------------------------------
3147 # -------------------------------------------------
3150 # rawsize() | L1 | L1 | L1 | L1
3148 # rawsize() | L1 | L1 | L1 | L1
3151 # size() | L1 | L2-LM | L1(*) | L1 (?)
3149 # size() | L1 | L2-LM | L1(*) | L1 (?)
3152 # len(rawtext) | L2 | L2 | L2 | L2
3150 # len(rawtext) | L2 | L2 | L2 | L2
3153 # len(text) | L2 | L2 | L2 | L3
3151 # len(text) | L2 | L2 | L2 | L3
3154 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3152 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3155 #
3153 #
3156 # LM: length of metadata, depending on rawtext
3154 # LM: length of metadata, depending on rawtext
3157 # (*): not ideal, see comment in filelog.size
3155 # (*): not ideal, see comment in filelog.size
3158 # (?): could be "- len(meta)" if the resolved content has
3156 # (?): could be "- len(meta)" if the resolved content has
3159 # rename metadata
3157 # rename metadata
3160 #
3158 #
3161 # Checks needed to be done:
3159 # Checks needed to be done:
3162 # 1. length check: L1 == L2, in all cases.
3160 # 1. length check: L1 == L2, in all cases.
3163 # 2. hash check: depending on flag processor, we may need to
3161 # 2. hash check: depending on flag processor, we may need to
3164 # use either "text" (external), or "rawtext" (in revlog).
3162 # use either "text" (external), or "rawtext" (in revlog).
3165
3163
3166 try:
3164 try:
3167 skipflags = state.get(b'skipflags', 0)
3165 skipflags = state.get(b'skipflags', 0)
3168 if skipflags:
3166 if skipflags:
3169 skipflags &= self.flags(rev)
3167 skipflags &= self.flags(rev)
3170
3168
3171 _verify_revision(self, skipflags, state, node)
3169 _verify_revision(self, skipflags, state, node)
3172
3170
3173 l1 = self.rawsize(rev)
3171 l1 = self.rawsize(rev)
3174 l2 = len(self.rawdata(node))
3172 l2 = len(self.rawdata(node))
3175
3173
3176 if l1 != l2:
3174 if l1 != l2:
3177 yield revlogproblem(
3175 yield revlogproblem(
3178 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3176 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3179 node=node,
3177 node=node,
3180 )
3178 )
3181
3179
3182 except error.CensoredNodeError:
3180 except error.CensoredNodeError:
3183 if state[b'erroroncensored']:
3181 if state[b'erroroncensored']:
3184 yield revlogproblem(
3182 yield revlogproblem(
3185 error=_(b'censored file data'), node=node
3183 error=_(b'censored file data'), node=node
3186 )
3184 )
3187 state[b'skipread'].add(node)
3185 state[b'skipread'].add(node)
3188 except Exception as e:
3186 except Exception as e:
3189 yield revlogproblem(
3187 yield revlogproblem(
3190 error=_(b'unpacking %s: %s')
3188 error=_(b'unpacking %s: %s')
3191 % (short(node), stringutil.forcebytestr(e)),
3189 % (short(node), stringutil.forcebytestr(e)),
3192 node=node,
3190 node=node,
3193 )
3191 )
3194 state[b'skipread'].add(node)
3192 state[b'skipread'].add(node)
3195
3193
3196 def storageinfo(
3194 def storageinfo(
3197 self,
3195 self,
3198 exclusivefiles=False,
3196 exclusivefiles=False,
3199 sharedfiles=False,
3197 sharedfiles=False,
3200 revisionscount=False,
3198 revisionscount=False,
3201 trackedsize=False,
3199 trackedsize=False,
3202 storedsize=False,
3200 storedsize=False,
3203 ):
3201 ):
3204 d = {}
3202 d = {}
3205
3203
3206 if exclusivefiles:
3204 if exclusivefiles:
3207 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3205 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3208 if not self._inline:
3206 if not self._inline:
3209 d[b'exclusivefiles'].append((self.opener, self._datafile))
3207 d[b'exclusivefiles'].append((self.opener, self._datafile))
3210
3208
3211 if sharedfiles:
3209 if sharedfiles:
3212 d[b'sharedfiles'] = []
3210 d[b'sharedfiles'] = []
3213
3211
3214 if revisionscount:
3212 if revisionscount:
3215 d[b'revisionscount'] = len(self)
3213 d[b'revisionscount'] = len(self)
3216
3214
3217 if trackedsize:
3215 if trackedsize:
3218 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3216 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3219
3217
3220 if storedsize:
3218 if storedsize:
3221 d[b'storedsize'] = sum(
3219 d[b'storedsize'] = sum(
3222 self.opener.stat(path).st_size for path in self.files()
3220 self.opener.stat(path).st_size for path in self.files()
3223 )
3221 )
3224
3222
3225 return d
3223 return d
3226
3224
3227 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3225 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3228 if not self.hassidedata:
3226 if not self.hassidedata:
3229 return
3227 return
3230 # revlog formats with sidedata support does not support inline
3228 # revlog formats with sidedata support does not support inline
3231 assert not self._inline
3229 assert not self._inline
3232 if not helpers[1] and not helpers[2]:
3230 if not helpers[1] and not helpers[2]:
3233 # Nothing to generate or remove
3231 # Nothing to generate or remove
3234 return
3232 return
3235
3233
3236 new_entries = []
3234 new_entries = []
3237 # append the new sidedata
3235 # append the new sidedata
3238 with self._writing(transaction):
3236 with self._writing(transaction):
3239 ifh, dfh, sdfh = self._writinghandles
3237 ifh, dfh, sdfh = self._writinghandles
3240 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3238 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3241
3239
3242 current_offset = sdfh.tell()
3240 current_offset = sdfh.tell()
3243 for rev in range(startrev, endrev + 1):
3241 for rev in range(startrev, endrev + 1):
3244 entry = self.index[rev]
3242 entry = self.index[rev]
3245 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3243 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3246 store=self,
3244 store=self,
3247 sidedata_helpers=helpers,
3245 sidedata_helpers=helpers,
3248 sidedata={},
3246 sidedata={},
3249 rev=rev,
3247 rev=rev,
3250 )
3248 )
3251
3249
3252 serialized_sidedata = sidedatautil.serialize_sidedata(
3250 serialized_sidedata = sidedatautil.serialize_sidedata(
3253 new_sidedata
3251 new_sidedata
3254 )
3252 )
3255
3253
3256 sidedata_compression_mode = COMP_MODE_INLINE
3254 sidedata_compression_mode = COMP_MODE_INLINE
3257 if serialized_sidedata and self.hassidedata:
3255 if serialized_sidedata and self.hassidedata:
3258 sidedata_compression_mode = COMP_MODE_PLAIN
3256 sidedata_compression_mode = COMP_MODE_PLAIN
3259 h, comp_sidedata = self.compress(serialized_sidedata)
3257 h, comp_sidedata = self.compress(serialized_sidedata)
3260 if (
3258 if (
3261 h != b'u'
3259 h != b'u'
3262 and comp_sidedata[0] != b'\0'
3260 and comp_sidedata[0] != b'\0'
3263 and len(comp_sidedata) < len(serialized_sidedata)
3261 and len(comp_sidedata) < len(serialized_sidedata)
3264 ):
3262 ):
3265 assert not h
3263 assert not h
3266 if (
3264 if (
3267 comp_sidedata[0]
3265 comp_sidedata[0]
3268 == self._docket.default_compression_header
3266 == self._docket.default_compression_header
3269 ):
3267 ):
3270 sidedata_compression_mode = COMP_MODE_DEFAULT
3268 sidedata_compression_mode = COMP_MODE_DEFAULT
3271 serialized_sidedata = comp_sidedata
3269 serialized_sidedata = comp_sidedata
3272 else:
3270 else:
3273 sidedata_compression_mode = COMP_MODE_INLINE
3271 sidedata_compression_mode = COMP_MODE_INLINE
3274 serialized_sidedata = comp_sidedata
3272 serialized_sidedata = comp_sidedata
3275 if entry[8] != 0 or entry[9] != 0:
3273 if entry[8] != 0 or entry[9] != 0:
3276 # rewriting entries that already have sidedata is not
3274 # rewriting entries that already have sidedata is not
3277 # supported yet, because it introduces garbage data in the
3275 # supported yet, because it introduces garbage data in the
3278 # revlog.
3276 # revlog.
3279 msg = b"rewriting existing sidedata is not supported yet"
3277 msg = b"rewriting existing sidedata is not supported yet"
3280 raise error.Abort(msg)
3278 raise error.Abort(msg)
3281
3279
3282 # Apply (potential) flags to add and to remove after running
3280 # Apply (potential) flags to add and to remove after running
3283 # the sidedata helpers
3281 # the sidedata helpers
3284 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3282 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3285 entry_update = (
3283 entry_update = (
3286 current_offset,
3284 current_offset,
3287 len(serialized_sidedata),
3285 len(serialized_sidedata),
3288 new_offset_flags,
3286 new_offset_flags,
3289 sidedata_compression_mode,
3287 sidedata_compression_mode,
3290 )
3288 )
3291
3289
3292 # the sidedata computation might have move the file cursors around
3290 # the sidedata computation might have move the file cursors around
3293 sdfh.seek(current_offset, os.SEEK_SET)
3291 sdfh.seek(current_offset, os.SEEK_SET)
3294 sdfh.write(serialized_sidedata)
3292 sdfh.write(serialized_sidedata)
3295 new_entries.append(entry_update)
3293 new_entries.append(entry_update)
3296 current_offset += len(serialized_sidedata)
3294 current_offset += len(serialized_sidedata)
3297 self._docket.sidedata_end = sdfh.tell()
3295 self._docket.sidedata_end = sdfh.tell()
3298
3296
3299 # rewrite the new index entries
3297 # rewrite the new index entries
3300 ifh.seek(startrev * self.index.entry_size)
3298 ifh.seek(startrev * self.index.entry_size)
3301 for i, e in enumerate(new_entries):
3299 for i, e in enumerate(new_entries):
3302 rev = startrev + i
3300 rev = startrev + i
3303 self.index.replace_sidedata_info(rev, *e)
3301 self.index.replace_sidedata_info(rev, *e)
3304 packed = self.index.entry_binary(rev)
3302 packed = self.index.entry_binary(rev)
3305 if rev == 0 and self._docket is None:
3303 if rev == 0 and self._docket is None:
3306 header = self._format_flags | self._format_version
3304 header = self._format_flags | self._format_version
3307 header = self.index.pack_header(header)
3305 header = self.index.pack_header(header)
3308 packed = header + packed
3306 packed = header + packed
3309 ifh.write(packed)
3307 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now