##// END OF EJS Templates
revlog: unify checks for supported flag...
marmoute -
r48004:0e9105bf default
parent child Browse files
Show More
@@ -1,3220 +1,3209 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 SUPPORTED_FLAGS,
49 )
50 )
50 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
57 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
59 )
59 from .thirdparty import attr
60 from .thirdparty import attr
60 from . import (
61 from . import (
61 ancestor,
62 ancestor,
62 dagop,
63 dagop,
63 error,
64 error,
64 mdiff,
65 mdiff,
65 policy,
66 policy,
66 pycompat,
67 pycompat,
67 templatefilters,
68 templatefilters,
68 util,
69 util,
69 )
70 )
70 from .interfaces import (
71 from .interfaces import (
71 repository,
72 repository,
72 util as interfaceutil,
73 util as interfaceutil,
73 )
74 )
74 from .revlogutils import (
75 from .revlogutils import (
75 deltas as deltautil,
76 deltas as deltautil,
76 flagutil,
77 flagutil,
77 nodemap as nodemaputil,
78 nodemap as nodemaputil,
78 revlogv0,
79 revlogv0,
79 sidedata as sidedatautil,
80 sidedata as sidedatautil,
80 )
81 )
81 from .utils import (
82 from .utils import (
82 storageutil,
83 storageutil,
83 stringutil,
84 stringutil,
84 )
85 )
85
86
86 # blanked usage of all the name to prevent pyflakes constraints
87 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
88 # We need these name available in the module for extensions.
88
89
89 REVLOGV0
90 REVLOGV0
90 REVLOGV1
91 REVLOGV1
91 REVLOGV2
92 REVLOGV2
92 FLAG_INLINE_DATA
93 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
94 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
97 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
98 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
99 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
100 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
101 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
102 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
103 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
105 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 REVIDX_RAWTEXT_CHANGING_FLAGS
106
107
107 parsers = policy.importmod('parsers')
108 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
109 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
110 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
111 rustrevlog = policy.importrust('revlog')
111
112
112 # Aliased for performance.
113 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
114 _zlibdecompress = zlib.decompress
114
115
115 # max size of revlog with inline data
116 # max size of revlog with inline data
116 _maxinline = 131072
117 _maxinline = 131072
117 _chunksize = 1048576
118 _chunksize = 1048576
118
119
119 # Flag processors for REVIDX_ELLIPSIS.
120 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
121 def ellipsisreadprocessor(rl, text):
121 return text, False
122 return text, False
122
123
123
124
124 def ellipsiswriteprocessor(rl, text):
125 def ellipsiswriteprocessor(rl, text):
125 return text, False
126 return text, False
126
127
127
128
128 def ellipsisrawprocessor(rl, text):
129 def ellipsisrawprocessor(rl, text):
129 return False
130 return False
130
131
131
132
132 ellipsisprocessor = (
133 ellipsisprocessor = (
133 ellipsisreadprocessor,
134 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
135 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
136 ellipsisrawprocessor,
136 )
137 )
137
138
138
139
139 def offset_type(offset, type):
140 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
142 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
143 return int(int(offset) << 16 | type)
143
144
144
145
145 def _verify_revision(rl, skipflags, state, node):
146 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
148 point for extensions to influence the operation."""
148 if skipflags:
149 if skipflags:
149 state[b'skipread'].add(node)
150 state[b'skipread'].add(node)
150 else:
151 else:
151 # Side-effect: read content and verify hash.
152 # Side-effect: read content and verify hash.
152 rl.revision(node)
153 rl.revision(node)
153
154
154
155
155 # True if a fast implementation for persistent-nodemap is available
156 # True if a fast implementation for persistent-nodemap is available
156 #
157 #
157 # We also consider we have a "fast" implementation in "pure" python because
158 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
159 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
160 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
162 parsers, 'BaseIndexObject'
162 )
163 )
163
164
164
165
165 @attr.s(slots=True, frozen=True)
166 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
167 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
168 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
169 node: expected hash of the revision
169 p1, p2: parent revs of the revision
170 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
171 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
172 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
173 flags: flags associated to the revision storage
173
174
174 One of btext[0] or cachedelta must be set.
175 One of btext[0] or cachedelta must be set.
175 """
176 """
176
177
177 node = attr.ib()
178 node = attr.ib()
178 p1 = attr.ib()
179 p1 = attr.ib()
179 p2 = attr.ib()
180 p2 = attr.ib()
180 btext = attr.ib()
181 btext = attr.ib()
181 textlen = attr.ib()
182 textlen = attr.ib()
182 cachedelta = attr.ib()
183 cachedelta = attr.ib()
183 flags = attr.ib()
184 flags = attr.ib()
184
185
185
186
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
188 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
189 class revlogrevisiondelta(object):
189 node = attr.ib()
190 node = attr.ib()
190 p1node = attr.ib()
191 p1node = attr.ib()
191 p2node = attr.ib()
192 p2node = attr.ib()
192 basenode = attr.ib()
193 basenode = attr.ib()
193 flags = attr.ib()
194 flags = attr.ib()
194 baserevisionsize = attr.ib()
195 baserevisionsize = attr.ib()
195 revision = attr.ib()
196 revision = attr.ib()
196 delta = attr.ib()
197 delta = attr.ib()
197 sidedata = attr.ib()
198 sidedata = attr.ib()
198 protocol_flags = attr.ib()
199 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
200 linknode = attr.ib(default=None)
200
201
201
202
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
204 @attr.s(frozen=True)
204 class revlogproblem(object):
205 class revlogproblem(object):
205 warning = attr.ib(default=None)
206 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
207 error = attr.ib(default=None)
207 node = attr.ib(default=None)
208 node = attr.ib(default=None)
208
209
209
210
210 def parse_index_v1(data, inline):
211 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
213 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
214 return index, cache
214
215
215
216
216 def parse_index_v2(data, inline):
217 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
218 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
220 return index, cache
220
221
221
222
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
224
224 def parse_index_v1_nodemap(data, inline):
225 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
227 return index, cache
227
228
228
229
229 else:
230 else:
230 parse_index_v1_nodemap = None
231 parse_index_v1_nodemap = None
231
232
232
233
233 def parse_index_v1_mixed(data, inline):
234 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
235 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
236 return rustrevlog.MixedIndex(index), cache
236
237
237
238
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
240 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
241 _maxentrysize = 0x7FFFFFFF
241
242
242
243
243 class revlog(object):
244 class revlog(object):
244 """
245 """
245 the underlying revision storage object
246 the underlying revision storage object
246
247
247 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
248
249
249 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
254 data.
255 data.
255
256
256 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
262
263
263 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
267 for locking while reading.
267
268
268 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
270
271
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
273 configured threshold.
274 configured threshold.
274
275
275 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
276
277
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
279 compression for the data content.
279
280
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
284 raising).
284 """
285 """
285
286
286 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
287
288
288 def __init__(
289 def __init__(
289 self,
290 self,
290 opener,
291 opener,
291 target,
292 target,
292 radix,
293 radix,
293 postfix=None,
294 postfix=None,
294 checkambig=False,
295 checkambig=False,
295 mmaplargeindex=False,
296 mmaplargeindex=False,
296 censorable=False,
297 censorable=False,
297 upperboundcomp=None,
298 upperboundcomp=None,
298 persistentnodemap=False,
299 persistentnodemap=False,
299 concurrencychecker=None,
300 concurrencychecker=None,
300 ):
301 ):
301 """
302 """
302 create a revlog object
303 create a revlog object
303
304
304 opener is a function that abstracts the file opening operation
305 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
306 and can be used to implement COW semantics or the like.
306
307
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
309 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
310 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
311 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
312 that test, debug, or performance measurement code might not set this to
312 accurate value.
313 accurate value.
313 """
314 """
314 self.upperboundcomp = upperboundcomp
315 self.upperboundcomp = upperboundcomp
315
316
316 self.radix = radix
317 self.radix = radix
317
318
318 self._indexfile = None
319 self._indexfile = None
319 self._datafile = None
320 self._datafile = None
320 self._nodemap_file = None
321 self._nodemap_file = None
321 self.postfix = postfix
322 self.postfix = postfix
322 self.opener = opener
323 self.opener = opener
323 if persistentnodemap:
324 if persistentnodemap:
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325
326
326 assert target[0] in ALL_KINDS
327 assert target[0] in ALL_KINDS
327 assert len(target) == 2
328 assert len(target) == 2
328 self.target = target
329 self.target = target
329 # When True, indexfile is opened with checkambig=True at writing, to
330 # When True, indexfile is opened with checkambig=True at writing, to
330 # avoid file stat ambiguity.
331 # avoid file stat ambiguity.
331 self._checkambig = checkambig
332 self._checkambig = checkambig
332 self._mmaplargeindex = mmaplargeindex
333 self._mmaplargeindex = mmaplargeindex
333 self._censorable = censorable
334 self._censorable = censorable
334 # 3-tuple of (node, rev, text) for a raw revision.
335 # 3-tuple of (node, rev, text) for a raw revision.
335 self._revisioncache = None
336 self._revisioncache = None
336 # Maps rev to chain base rev.
337 # Maps rev to chain base rev.
337 self._chainbasecache = util.lrucachedict(100)
338 self._chainbasecache = util.lrucachedict(100)
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 self._chunkcache = (0, b'')
340 self._chunkcache = (0, b'')
340 # How much data to read and cache into the raw revlog data cache.
341 # How much data to read and cache into the raw revlog data cache.
341 self._chunkcachesize = 65536
342 self._chunkcachesize = 65536
342 self._maxchainlen = None
343 self._maxchainlen = None
343 self._deltabothparents = True
344 self._deltabothparents = True
344 self.index = None
345 self.index = None
345 self._nodemap_docket = None
346 self._nodemap_docket = None
346 # Mapping of partial identifiers to full nodes.
347 # Mapping of partial identifiers to full nodes.
347 self._pcache = {}
348 self._pcache = {}
348 # Mapping of revision integer to full node.
349 # Mapping of revision integer to full node.
349 self._compengine = b'zlib'
350 self._compengine = b'zlib'
350 self._compengineopts = {}
351 self._compengineopts = {}
351 self._maxdeltachainspan = -1
352 self._maxdeltachainspan = -1
352 self._withsparseread = False
353 self._withsparseread = False
353 self._sparserevlog = False
354 self._sparserevlog = False
354 self.hassidedata = False
355 self.hassidedata = False
355 self._srdensitythreshold = 0.50
356 self._srdensitythreshold = 0.50
356 self._srmingapsize = 262144
357 self._srmingapsize = 262144
357
358
358 # Make copy of flag processors so each revlog instance can support
359 # Make copy of flag processors so each revlog instance can support
359 # custom flags.
360 # custom flags.
360 self._flagprocessors = dict(flagutil.flagprocessors)
361 self._flagprocessors = dict(flagutil.flagprocessors)
361
362
362 # 2-tuple of file handles being used for active writing.
363 # 2-tuple of file handles being used for active writing.
363 self._writinghandles = None
364 self._writinghandles = None
364 # prevent nesting of addgroup
365 # prevent nesting of addgroup
365 self._adding_group = None
366 self._adding_group = None
366
367
367 self._loadindex()
368 self._loadindex()
368
369
369 self._concurrencychecker = concurrencychecker
370 self._concurrencychecker = concurrencychecker
370
371
371 def _init_opts(self):
372 def _init_opts(self):
372 """process options (from above/config) to setup associated default revlog mode
373 """process options (from above/config) to setup associated default revlog mode
373
374
374 These values might be affected when actually reading on disk information.
375 These values might be affected when actually reading on disk information.
375
376
376 The relevant values are returned for use in _loadindex().
377 The relevant values are returned for use in _loadindex().
377
378
378 * newversionflags:
379 * newversionflags:
379 version header to use if we need to create a new revlog
380 version header to use if we need to create a new revlog
380
381
381 * mmapindexthreshold:
382 * mmapindexthreshold:
382 minimal index size for start to use mmap
383 minimal index size for start to use mmap
383
384
384 * force_nodemap:
385 * force_nodemap:
385 force the usage of a "development" version of the nodemap code
386 force the usage of a "development" version of the nodemap code
386 """
387 """
387 mmapindexthreshold = None
388 mmapindexthreshold = None
388 opts = self.opener.options
389 opts = self.opener.options
389
390
390 if b'revlogv2' in opts:
391 if b'revlogv2' in opts:
391 new_header = REVLOGV2 | FLAG_INLINE_DATA
392 new_header = REVLOGV2 | FLAG_INLINE_DATA
392 elif b'revlogv1' in opts:
393 elif b'revlogv1' in opts:
393 new_header = REVLOGV1 | FLAG_INLINE_DATA
394 new_header = REVLOGV1 | FLAG_INLINE_DATA
394 if b'generaldelta' in opts:
395 if b'generaldelta' in opts:
395 new_header |= FLAG_GENERALDELTA
396 new_header |= FLAG_GENERALDELTA
396 elif b'revlogv0' in self.opener.options:
397 elif b'revlogv0' in self.opener.options:
397 new_header = REVLOGV0
398 new_header = REVLOGV0
398 else:
399 else:
399 new_header = REVLOG_DEFAULT_VERSION
400 new_header = REVLOG_DEFAULT_VERSION
400
401
401 if b'chunkcachesize' in opts:
402 if b'chunkcachesize' in opts:
402 self._chunkcachesize = opts[b'chunkcachesize']
403 self._chunkcachesize = opts[b'chunkcachesize']
403 if b'maxchainlen' in opts:
404 if b'maxchainlen' in opts:
404 self._maxchainlen = opts[b'maxchainlen']
405 self._maxchainlen = opts[b'maxchainlen']
405 if b'deltabothparents' in opts:
406 if b'deltabothparents' in opts:
406 self._deltabothparents = opts[b'deltabothparents']
407 self._deltabothparents = opts[b'deltabothparents']
407 self._lazydelta = bool(opts.get(b'lazydelta', True))
408 self._lazydelta = bool(opts.get(b'lazydelta', True))
408 self._lazydeltabase = False
409 self._lazydeltabase = False
409 if self._lazydelta:
410 if self._lazydelta:
410 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
411 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
411 if b'compengine' in opts:
412 if b'compengine' in opts:
412 self._compengine = opts[b'compengine']
413 self._compengine = opts[b'compengine']
413 if b'zlib.level' in opts:
414 if b'zlib.level' in opts:
414 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
415 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
415 if b'zstd.level' in opts:
416 if b'zstd.level' in opts:
416 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
417 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
417 if b'maxdeltachainspan' in opts:
418 if b'maxdeltachainspan' in opts:
418 self._maxdeltachainspan = opts[b'maxdeltachainspan']
419 self._maxdeltachainspan = opts[b'maxdeltachainspan']
419 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
420 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
420 mmapindexthreshold = opts[b'mmapindexthreshold']
421 mmapindexthreshold = opts[b'mmapindexthreshold']
421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 # sparse-revlog forces sparse-read
424 # sparse-revlog forces sparse-read
424 self._withsparseread = self._sparserevlog or withsparseread
425 self._withsparseread = self._sparserevlog or withsparseread
425 if b'sparse-read-density-threshold' in opts:
426 if b'sparse-read-density-threshold' in opts:
426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 if b'sparse-read-min-gap-size' in opts:
428 if b'sparse-read-min-gap-size' in opts:
428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 if opts.get(b'enableellipsis'):
430 if opts.get(b'enableellipsis'):
430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431
432
432 # revlog v0 doesn't have flag processors
433 # revlog v0 doesn't have flag processors
433 for flag, processor in pycompat.iteritems(
434 for flag, processor in pycompat.iteritems(
434 opts.get(b'flagprocessors', {})
435 opts.get(b'flagprocessors', {})
435 ):
436 ):
436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437
438
438 if self._chunkcachesize <= 0:
439 if self._chunkcachesize <= 0:
439 raise error.RevlogError(
440 raise error.RevlogError(
440 _(b'revlog chunk cache size %r is not greater than 0')
441 _(b'revlog chunk cache size %r is not greater than 0')
441 % self._chunkcachesize
442 % self._chunkcachesize
442 )
443 )
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 raise error.RevlogError(
445 raise error.RevlogError(
445 _(b'revlog chunk cache size %r is not a power of 2')
446 _(b'revlog chunk cache size %r is not a power of 2')
446 % self._chunkcachesize
447 % self._chunkcachesize
447 )
448 )
448 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 return new_header, mmapindexthreshold, force_nodemap
450 return new_header, mmapindexthreshold, force_nodemap
450
451
451 def _get_data(self, filepath, mmap_threshold):
452 def _get_data(self, filepath, mmap_threshold):
452 """return a file content with or without mmap
453 """return a file content with or without mmap
453
454
454 If the file is missing return the empty string"""
455 If the file is missing return the empty string"""
455 try:
456 try:
456 with self.opener(filepath) as fp:
457 with self.opener(filepath) as fp:
457 if mmap_threshold is not None:
458 if mmap_threshold is not None:
458 file_size = self.opener.fstat(fp).st_size
459 file_size = self.opener.fstat(fp).st_size
459 if file_size >= mmap_threshold:
460 if file_size >= mmap_threshold:
460 # TODO: should .close() to release resources without
461 # TODO: should .close() to release resources without
461 # relying on Python GC
462 # relying on Python GC
462 return util.buffer(util.mmapread(fp))
463 return util.buffer(util.mmapread(fp))
463 return fp.read()
464 return fp.read()
464 except IOError as inst:
465 except IOError as inst:
465 if inst.errno != errno.ENOENT:
466 if inst.errno != errno.ENOENT:
466 raise
467 raise
467 return b''
468 return b''
468
469
469 def _loadindex(self):
470 def _loadindex(self):
470
471
471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472
473
473 if self.postfix is None:
474 if self.postfix is None:
474 entry_point = b'%s.i' % self.radix
475 entry_point = b'%s.i' % self.radix
475 else:
476 else:
476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477
478
478 entry_data = b''
479 entry_data = b''
479 self._initempty = True
480 self._initempty = True
480 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 if len(entry_data) > 0:
482 if len(entry_data) > 0:
482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 self._initempty = False
484 self._initempty = False
484 else:
485 else:
485 header = new_header
486 header = new_header
486
487
487 self._format_flags = header & ~0xFFFF
488 self._format_flags = header & ~0xFFFF
488 self._format_version = header & 0xFFFF
489 self._format_version = header & 0xFFFF
489
490
491 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
492 if supported_flags is None:
493 msg = _(b'unknown version (%d) in revlog %s')
494 msg %= (self._format_version, self.display_id)
495 raise error.RevlogError(msg)
496 elif self._format_flags & ~supported_flags:
497 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
498 display_flag = self._format_flags >> 16
499 msg %= (display_flag, self._format_version, self.display_id)
500 raise error.RevlogError(msg)
501
490 if self._format_version == REVLOGV0:
502 if self._format_version == REVLOGV0:
491 if self._format_flags:
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
493 display_flag = self._format_flags >> 16
494 msg %= (display_flag, self._format_version, self.display_id)
495 raise error.RevlogError(msg)
496
497 self._inline = False
503 self._inline = False
498 self._generaldelta = False
504 self._generaldelta = False
499
500 elif self._format_version == REVLOGV1:
505 elif self._format_version == REVLOGV1:
501 if self._format_flags & ~REVLOGV1_FLAGS:
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
503 display_flag = self._format_flags >> 16
504 msg %= (display_flag, self._format_version, self.display_id)
505 raise error.RevlogError(msg)
506
507 self._inline = self._format_flags & FLAG_INLINE_DATA
506 self._inline = self._format_flags & FLAG_INLINE_DATA
508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
507 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
509
510 elif self._format_version == REVLOGV2:
508 elif self._format_version == REVLOGV2:
511 if self._format_flags & ~REVLOGV2_FLAGS:
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
513 display_flag = self._format_flags >> 16
514 msg %= (display_flag, self._format_version, self.display_id)
515 raise error.RevlogError(msg)
516
517 # There is a bug in the transaction handling when going from an
509 # There is a bug in the transaction handling when going from an
518 # inline revlog to a separate index and data file. Turn it off until
510 # inline revlog to a separate index and data file. Turn it off until
519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
511 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 # See issue6485
512 # See issue6485
521 self._inline = False
513 self._inline = False
522 # generaldelta implied by version 2 revlogs.
514 # generaldelta implied by version 2 revlogs.
523 self._generaldelta = True
515 self._generaldelta = True
524 # revlog-v2 has built in sidedata support
516 # revlog-v2 has built in sidedata support
525 self.hassidedata = True
517 self.hassidedata = True
526
527 else:
518 else:
528 msg = _(b'unknown version (%d) in revlog %s')
519 assert False, 'unreachable'
529 msg %= (self._format_version, self.display_id)
530 raise error.RevlogError(msg)
531
520
532 index_data = entry_data
521 index_data = entry_data
533 self._indexfile = entry_point
522 self._indexfile = entry_point
534
523
535 if self.postfix is None or self.postfix == b'a':
524 if self.postfix is None or self.postfix == b'a':
536 self._datafile = b'%s.d' % self.radix
525 self._datafile = b'%s.d' % self.radix
537 else:
526 else:
538 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
527 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
539
528
540 self.nodeconstants = sha1nodeconstants
529 self.nodeconstants = sha1nodeconstants
541 self.nullid = self.nodeconstants.nullid
530 self.nullid = self.nodeconstants.nullid
542
531
543 # sparse-revlog can't be on without general-delta (issue6056)
532 # sparse-revlog can't be on without general-delta (issue6056)
544 if not self._generaldelta:
533 if not self._generaldelta:
545 self._sparserevlog = False
534 self._sparserevlog = False
546
535
547 self._storedeltachains = True
536 self._storedeltachains = True
548
537
549 devel_nodemap = (
538 devel_nodemap = (
550 self._nodemap_file
539 self._nodemap_file
551 and force_nodemap
540 and force_nodemap
552 and parse_index_v1_nodemap is not None
541 and parse_index_v1_nodemap is not None
553 )
542 )
554
543
555 use_rust_index = False
544 use_rust_index = False
556 if rustrevlog is not None:
545 if rustrevlog is not None:
557 if self._nodemap_file is not None:
546 if self._nodemap_file is not None:
558 use_rust_index = True
547 use_rust_index = True
559 else:
548 else:
560 use_rust_index = self.opener.options.get(b'rust.index')
549 use_rust_index = self.opener.options.get(b'rust.index')
561
550
562 self._parse_index = parse_index_v1
551 self._parse_index = parse_index_v1
563 if self._format_version == REVLOGV0:
552 if self._format_version == REVLOGV0:
564 self._parse_index = revlogv0.parse_index_v0
553 self._parse_index = revlogv0.parse_index_v0
565 elif self._format_version == REVLOGV2:
554 elif self._format_version == REVLOGV2:
566 self._parse_index = parse_index_v2
555 self._parse_index = parse_index_v2
567 elif devel_nodemap:
556 elif devel_nodemap:
568 self._parse_index = parse_index_v1_nodemap
557 self._parse_index = parse_index_v1_nodemap
569 elif use_rust_index:
558 elif use_rust_index:
570 self._parse_index = parse_index_v1_mixed
559 self._parse_index = parse_index_v1_mixed
571 try:
560 try:
572 d = self._parse_index(index_data, self._inline)
561 d = self._parse_index(index_data, self._inline)
573 index, _chunkcache = d
562 index, _chunkcache = d
574 use_nodemap = (
563 use_nodemap = (
575 not self._inline
564 not self._inline
576 and self._nodemap_file is not None
565 and self._nodemap_file is not None
577 and util.safehasattr(index, 'update_nodemap_data')
566 and util.safehasattr(index, 'update_nodemap_data')
578 )
567 )
579 if use_nodemap:
568 if use_nodemap:
580 nodemap_data = nodemaputil.persisted_data(self)
569 nodemap_data = nodemaputil.persisted_data(self)
581 if nodemap_data is not None:
570 if nodemap_data is not None:
582 docket = nodemap_data[0]
571 docket = nodemap_data[0]
583 if (
572 if (
584 len(d[0]) > docket.tip_rev
573 len(d[0]) > docket.tip_rev
585 and d[0][docket.tip_rev][7] == docket.tip_node
574 and d[0][docket.tip_rev][7] == docket.tip_node
586 ):
575 ):
587 # no changelog tampering
576 # no changelog tampering
588 self._nodemap_docket = docket
577 self._nodemap_docket = docket
589 index.update_nodemap_data(*nodemap_data)
578 index.update_nodemap_data(*nodemap_data)
590 except (ValueError, IndexError):
579 except (ValueError, IndexError):
591 raise error.RevlogError(
580 raise error.RevlogError(
592 _(b"index %s is corrupted") % self.display_id
581 _(b"index %s is corrupted") % self.display_id
593 )
582 )
594 self.index, self._chunkcache = d
583 self.index, self._chunkcache = d
595 if not self._chunkcache:
584 if not self._chunkcache:
596 self._chunkclear()
585 self._chunkclear()
597 # revnum -> (chain-length, sum-delta-length)
586 # revnum -> (chain-length, sum-delta-length)
598 self._chaininfocache = util.lrucachedict(500)
587 self._chaininfocache = util.lrucachedict(500)
599 # revlog header -> revlog compressor
588 # revlog header -> revlog compressor
600 self._decompressors = {}
589 self._decompressors = {}
601
590
602 @util.propertycache
591 @util.propertycache
603 def revlog_kind(self):
592 def revlog_kind(self):
604 return self.target[0]
593 return self.target[0]
605
594
606 @util.propertycache
595 @util.propertycache
607 def display_id(self):
596 def display_id(self):
608 """The public facing "ID" of the revlog that we use in message"""
597 """The public facing "ID" of the revlog that we use in message"""
609 # Maybe we should build a user facing representation of
598 # Maybe we should build a user facing representation of
610 # revlog.target instead of using `self.radix`
599 # revlog.target instead of using `self.radix`
611 return self.radix
600 return self.radix
612
601
613 @util.propertycache
602 @util.propertycache
614 def _compressor(self):
603 def _compressor(self):
615 engine = util.compengines[self._compengine]
604 engine = util.compengines[self._compengine]
616 return engine.revlogcompressor(self._compengineopts)
605 return engine.revlogcompressor(self._compengineopts)
617
606
618 def _indexfp(self):
607 def _indexfp(self):
619 """file object for the revlog's index file"""
608 """file object for the revlog's index file"""
620 return self.opener(self._indexfile, mode=b"r")
609 return self.opener(self._indexfile, mode=b"r")
621
610
622 def __index_write_fp(self):
611 def __index_write_fp(self):
623 # You should not use this directly and use `_writing` instead
612 # You should not use this directly and use `_writing` instead
624 try:
613 try:
625 f = self.opener(
614 f = self.opener(
626 self._indexfile, mode=b"r+", checkambig=self._checkambig
615 self._indexfile, mode=b"r+", checkambig=self._checkambig
627 )
616 )
628 f.seek(0, os.SEEK_END)
617 f.seek(0, os.SEEK_END)
629 return f
618 return f
630 except IOError as inst:
619 except IOError as inst:
631 if inst.errno != errno.ENOENT:
620 if inst.errno != errno.ENOENT:
632 raise
621 raise
633 return self.opener(
622 return self.opener(
634 self._indexfile, mode=b"w+", checkambig=self._checkambig
623 self._indexfile, mode=b"w+", checkambig=self._checkambig
635 )
624 )
636
625
637 def __index_new_fp(self):
626 def __index_new_fp(self):
638 # You should not use this unless you are upgrading from inline revlog
627 # You should not use this unless you are upgrading from inline revlog
639 return self.opener(
628 return self.opener(
640 self._indexfile,
629 self._indexfile,
641 mode=b"w",
630 mode=b"w",
642 checkambig=self._checkambig,
631 checkambig=self._checkambig,
643 atomictemp=True,
632 atomictemp=True,
644 )
633 )
645
634
646 def _datafp(self, mode=b'r'):
635 def _datafp(self, mode=b'r'):
647 """file object for the revlog's data file"""
636 """file object for the revlog's data file"""
648 return self.opener(self._datafile, mode=mode)
637 return self.opener(self._datafile, mode=mode)
649
638
650 @contextlib.contextmanager
639 @contextlib.contextmanager
651 def _datareadfp(self, existingfp=None):
640 def _datareadfp(self, existingfp=None):
652 """file object suitable to read data"""
641 """file object suitable to read data"""
653 # Use explicit file handle, if given.
642 # Use explicit file handle, if given.
654 if existingfp is not None:
643 if existingfp is not None:
655 yield existingfp
644 yield existingfp
656
645
657 # Use a file handle being actively used for writes, if available.
646 # Use a file handle being actively used for writes, if available.
658 # There is some danger to doing this because reads will seek the
647 # There is some danger to doing this because reads will seek the
659 # file. However, _writeentry() performs a SEEK_END before all writes,
648 # file. However, _writeentry() performs a SEEK_END before all writes,
660 # so we should be safe.
649 # so we should be safe.
661 elif self._writinghandles:
650 elif self._writinghandles:
662 if self._inline:
651 if self._inline:
663 yield self._writinghandles[0]
652 yield self._writinghandles[0]
664 else:
653 else:
665 yield self._writinghandles[1]
654 yield self._writinghandles[1]
666
655
667 # Otherwise open a new file handle.
656 # Otherwise open a new file handle.
668 else:
657 else:
669 if self._inline:
658 if self._inline:
670 func = self._indexfp
659 func = self._indexfp
671 else:
660 else:
672 func = self._datafp
661 func = self._datafp
673 with func() as fp:
662 with func() as fp:
674 yield fp
663 yield fp
675
664
676 def tiprev(self):
665 def tiprev(self):
677 return len(self.index) - 1
666 return len(self.index) - 1
678
667
679 def tip(self):
668 def tip(self):
680 return self.node(self.tiprev())
669 return self.node(self.tiprev())
681
670
682 def __contains__(self, rev):
671 def __contains__(self, rev):
683 return 0 <= rev < len(self)
672 return 0 <= rev < len(self)
684
673
685 def __len__(self):
674 def __len__(self):
686 return len(self.index)
675 return len(self.index)
687
676
688 def __iter__(self):
677 def __iter__(self):
689 return iter(pycompat.xrange(len(self)))
678 return iter(pycompat.xrange(len(self)))
690
679
691 def revs(self, start=0, stop=None):
680 def revs(self, start=0, stop=None):
692 """iterate over all rev in this revlog (from start to stop)"""
681 """iterate over all rev in this revlog (from start to stop)"""
693 return storageutil.iterrevs(len(self), start=start, stop=stop)
682 return storageutil.iterrevs(len(self), start=start, stop=stop)
694
683
695 @property
684 @property
696 def nodemap(self):
685 def nodemap(self):
697 msg = (
686 msg = (
698 b"revlog.nodemap is deprecated, "
687 b"revlog.nodemap is deprecated, "
699 b"use revlog.index.[has_node|rev|get_rev]"
688 b"use revlog.index.[has_node|rev|get_rev]"
700 )
689 )
701 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
690 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
702 return self.index.nodemap
691 return self.index.nodemap
703
692
704 @property
693 @property
705 def _nodecache(self):
694 def _nodecache(self):
706 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
695 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
707 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
696 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
708 return self.index.nodemap
697 return self.index.nodemap
709
698
710 def hasnode(self, node):
699 def hasnode(self, node):
711 try:
700 try:
712 self.rev(node)
701 self.rev(node)
713 return True
702 return True
714 except KeyError:
703 except KeyError:
715 return False
704 return False
716
705
717 def candelta(self, baserev, rev):
706 def candelta(self, baserev, rev):
718 """whether two revisions (baserev, rev) can be delta-ed or not"""
707 """whether two revisions (baserev, rev) can be delta-ed or not"""
719 # Disable delta if either rev requires a content-changing flag
708 # Disable delta if either rev requires a content-changing flag
720 # processor (ex. LFS). This is because such flag processor can alter
709 # processor (ex. LFS). This is because such flag processor can alter
721 # the rawtext content that the delta will be based on, and two clients
710 # the rawtext content that the delta will be based on, and two clients
722 # could have a same revlog node with different flags (i.e. different
711 # could have a same revlog node with different flags (i.e. different
723 # rawtext contents) and the delta could be incompatible.
712 # rawtext contents) and the delta could be incompatible.
724 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
713 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
725 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
714 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
726 ):
715 ):
727 return False
716 return False
728 return True
717 return True
729
718
730 def update_caches(self, transaction):
719 def update_caches(self, transaction):
731 if self._nodemap_file is not None:
720 if self._nodemap_file is not None:
732 if transaction is None:
721 if transaction is None:
733 nodemaputil.update_persistent_nodemap(self)
722 nodemaputil.update_persistent_nodemap(self)
734 else:
723 else:
735 nodemaputil.setup_persistent_nodemap(transaction, self)
724 nodemaputil.setup_persistent_nodemap(transaction, self)
736
725
737 def clearcaches(self):
726 def clearcaches(self):
738 self._revisioncache = None
727 self._revisioncache = None
739 self._chainbasecache.clear()
728 self._chainbasecache.clear()
740 self._chunkcache = (0, b'')
729 self._chunkcache = (0, b'')
741 self._pcache = {}
730 self._pcache = {}
742 self._nodemap_docket = None
731 self._nodemap_docket = None
743 self.index.clearcaches()
732 self.index.clearcaches()
744 # The python code is the one responsible for validating the docket, we
733 # The python code is the one responsible for validating the docket, we
745 # end up having to refresh it here.
734 # end up having to refresh it here.
746 use_nodemap = (
735 use_nodemap = (
747 not self._inline
736 not self._inline
748 and self._nodemap_file is not None
737 and self._nodemap_file is not None
749 and util.safehasattr(self.index, 'update_nodemap_data')
738 and util.safehasattr(self.index, 'update_nodemap_data')
750 )
739 )
751 if use_nodemap:
740 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
741 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
742 if nodemap_data is not None:
754 self._nodemap_docket = nodemap_data[0]
743 self._nodemap_docket = nodemap_data[0]
755 self.index.update_nodemap_data(*nodemap_data)
744 self.index.update_nodemap_data(*nodemap_data)
756
745
757 def rev(self, node):
746 def rev(self, node):
758 try:
747 try:
759 return self.index.rev(node)
748 return self.index.rev(node)
760 except TypeError:
749 except TypeError:
761 raise
750 raise
762 except error.RevlogError:
751 except error.RevlogError:
763 # parsers.c radix tree lookup failed
752 # parsers.c radix tree lookup failed
764 if (
753 if (
765 node == self.nodeconstants.wdirid
754 node == self.nodeconstants.wdirid
766 or node in self.nodeconstants.wdirfilenodeids
755 or node in self.nodeconstants.wdirfilenodeids
767 ):
756 ):
768 raise error.WdirUnsupported
757 raise error.WdirUnsupported
769 raise error.LookupError(node, self.display_id, _(b'no node'))
758 raise error.LookupError(node, self.display_id, _(b'no node'))
770
759
771 # Accessors for index entries.
760 # Accessors for index entries.
772
761
773 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
762 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
774 # are flags.
763 # are flags.
775 def start(self, rev):
764 def start(self, rev):
776 return int(self.index[rev][0] >> 16)
765 return int(self.index[rev][0] >> 16)
777
766
778 def flags(self, rev):
767 def flags(self, rev):
779 return self.index[rev][0] & 0xFFFF
768 return self.index[rev][0] & 0xFFFF
780
769
781 def length(self, rev):
770 def length(self, rev):
782 return self.index[rev][1]
771 return self.index[rev][1]
783
772
784 def sidedata_length(self, rev):
773 def sidedata_length(self, rev):
785 if not self.hassidedata:
774 if not self.hassidedata:
786 return 0
775 return 0
787 return self.index[rev][9]
776 return self.index[rev][9]
788
777
789 def rawsize(self, rev):
778 def rawsize(self, rev):
790 """return the length of the uncompressed text for a given revision"""
779 """return the length of the uncompressed text for a given revision"""
791 l = self.index[rev][2]
780 l = self.index[rev][2]
792 if l >= 0:
781 if l >= 0:
793 return l
782 return l
794
783
795 t = self.rawdata(rev)
784 t = self.rawdata(rev)
796 return len(t)
785 return len(t)
797
786
798 def size(self, rev):
787 def size(self, rev):
799 """length of non-raw text (processed by a "read" flag processor)"""
788 """length of non-raw text (processed by a "read" flag processor)"""
800 # fast path: if no "read" flag processor could change the content,
789 # fast path: if no "read" flag processor could change the content,
801 # size is rawsize. note: ELLIPSIS is known to not change the content.
790 # size is rawsize. note: ELLIPSIS is known to not change the content.
802 flags = self.flags(rev)
791 flags = self.flags(rev)
803 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
792 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
804 return self.rawsize(rev)
793 return self.rawsize(rev)
805
794
806 return len(self.revision(rev, raw=False))
795 return len(self.revision(rev, raw=False))
807
796
808 def chainbase(self, rev):
797 def chainbase(self, rev):
809 base = self._chainbasecache.get(rev)
798 base = self._chainbasecache.get(rev)
810 if base is not None:
799 if base is not None:
811 return base
800 return base
812
801
813 index = self.index
802 index = self.index
814 iterrev = rev
803 iterrev = rev
815 base = index[iterrev][3]
804 base = index[iterrev][3]
816 while base != iterrev:
805 while base != iterrev:
817 iterrev = base
806 iterrev = base
818 base = index[iterrev][3]
807 base = index[iterrev][3]
819
808
820 self._chainbasecache[rev] = base
809 self._chainbasecache[rev] = base
821 return base
810 return base
822
811
823 def linkrev(self, rev):
812 def linkrev(self, rev):
824 return self.index[rev][4]
813 return self.index[rev][4]
825
814
826 def parentrevs(self, rev):
815 def parentrevs(self, rev):
827 try:
816 try:
828 entry = self.index[rev]
817 entry = self.index[rev]
829 except IndexError:
818 except IndexError:
830 if rev == wdirrev:
819 if rev == wdirrev:
831 raise error.WdirUnsupported
820 raise error.WdirUnsupported
832 raise
821 raise
833 if entry[5] == nullrev:
822 if entry[5] == nullrev:
834 return entry[6], entry[5]
823 return entry[6], entry[5]
835 else:
824 else:
836 return entry[5], entry[6]
825 return entry[5], entry[6]
837
826
838 # fast parentrevs(rev) where rev isn't filtered
827 # fast parentrevs(rev) where rev isn't filtered
839 _uncheckedparentrevs = parentrevs
828 _uncheckedparentrevs = parentrevs
840
829
841 def node(self, rev):
830 def node(self, rev):
842 try:
831 try:
843 return self.index[rev][7]
832 return self.index[rev][7]
844 except IndexError:
833 except IndexError:
845 if rev == wdirrev:
834 if rev == wdirrev:
846 raise error.WdirUnsupported
835 raise error.WdirUnsupported
847 raise
836 raise
848
837
849 # Derived from index values.
838 # Derived from index values.
850
839
851 def end(self, rev):
840 def end(self, rev):
852 return self.start(rev) + self.length(rev)
841 return self.start(rev) + self.length(rev)
853
842
854 def parents(self, node):
843 def parents(self, node):
855 i = self.index
844 i = self.index
856 d = i[self.rev(node)]
845 d = i[self.rev(node)]
857 # inline node() to avoid function call overhead
846 # inline node() to avoid function call overhead
858 if d[5] == self.nullid:
847 if d[5] == self.nullid:
859 return i[d[6]][7], i[d[5]][7]
848 return i[d[6]][7], i[d[5]][7]
860 else:
849 else:
861 return i[d[5]][7], i[d[6]][7]
850 return i[d[5]][7], i[d[6]][7]
862
851
863 def chainlen(self, rev):
852 def chainlen(self, rev):
864 return self._chaininfo(rev)[0]
853 return self._chaininfo(rev)[0]
865
854
866 def _chaininfo(self, rev):
855 def _chaininfo(self, rev):
867 chaininfocache = self._chaininfocache
856 chaininfocache = self._chaininfocache
868 if rev in chaininfocache:
857 if rev in chaininfocache:
869 return chaininfocache[rev]
858 return chaininfocache[rev]
870 index = self.index
859 index = self.index
871 generaldelta = self._generaldelta
860 generaldelta = self._generaldelta
872 iterrev = rev
861 iterrev = rev
873 e = index[iterrev]
862 e = index[iterrev]
874 clen = 0
863 clen = 0
875 compresseddeltalen = 0
864 compresseddeltalen = 0
876 while iterrev != e[3]:
865 while iterrev != e[3]:
877 clen += 1
866 clen += 1
878 compresseddeltalen += e[1]
867 compresseddeltalen += e[1]
879 if generaldelta:
868 if generaldelta:
880 iterrev = e[3]
869 iterrev = e[3]
881 else:
870 else:
882 iterrev -= 1
871 iterrev -= 1
883 if iterrev in chaininfocache:
872 if iterrev in chaininfocache:
884 t = chaininfocache[iterrev]
873 t = chaininfocache[iterrev]
885 clen += t[0]
874 clen += t[0]
886 compresseddeltalen += t[1]
875 compresseddeltalen += t[1]
887 break
876 break
888 e = index[iterrev]
877 e = index[iterrev]
889 else:
878 else:
890 # Add text length of base since decompressing that also takes
879 # Add text length of base since decompressing that also takes
891 # work. For cache hits the length is already included.
880 # work. For cache hits the length is already included.
892 compresseddeltalen += e[1]
881 compresseddeltalen += e[1]
893 r = (clen, compresseddeltalen)
882 r = (clen, compresseddeltalen)
894 chaininfocache[rev] = r
883 chaininfocache[rev] = r
895 return r
884 return r
896
885
897 def _deltachain(self, rev, stoprev=None):
886 def _deltachain(self, rev, stoprev=None):
898 """Obtain the delta chain for a revision.
887 """Obtain the delta chain for a revision.
899
888
900 ``stoprev`` specifies a revision to stop at. If not specified, we
889 ``stoprev`` specifies a revision to stop at. If not specified, we
901 stop at the base of the chain.
890 stop at the base of the chain.
902
891
903 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
892 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
904 revs in ascending order and ``stopped`` is a bool indicating whether
893 revs in ascending order and ``stopped`` is a bool indicating whether
905 ``stoprev`` was hit.
894 ``stoprev`` was hit.
906 """
895 """
907 # Try C implementation.
896 # Try C implementation.
908 try:
897 try:
909 return self.index.deltachain(rev, stoprev, self._generaldelta)
898 return self.index.deltachain(rev, stoprev, self._generaldelta)
910 except AttributeError:
899 except AttributeError:
911 pass
900 pass
912
901
913 chain = []
902 chain = []
914
903
915 # Alias to prevent attribute lookup in tight loop.
904 # Alias to prevent attribute lookup in tight loop.
916 index = self.index
905 index = self.index
917 generaldelta = self._generaldelta
906 generaldelta = self._generaldelta
918
907
919 iterrev = rev
908 iterrev = rev
920 e = index[iterrev]
909 e = index[iterrev]
921 while iterrev != e[3] and iterrev != stoprev:
910 while iterrev != e[3] and iterrev != stoprev:
922 chain.append(iterrev)
911 chain.append(iterrev)
923 if generaldelta:
912 if generaldelta:
924 iterrev = e[3]
913 iterrev = e[3]
925 else:
914 else:
926 iterrev -= 1
915 iterrev -= 1
927 e = index[iterrev]
916 e = index[iterrev]
928
917
929 if iterrev == stoprev:
918 if iterrev == stoprev:
930 stopped = True
919 stopped = True
931 else:
920 else:
932 chain.append(iterrev)
921 chain.append(iterrev)
933 stopped = False
922 stopped = False
934
923
935 chain.reverse()
924 chain.reverse()
936 return chain, stopped
925 return chain, stopped
937
926
938 def ancestors(self, revs, stoprev=0, inclusive=False):
927 def ancestors(self, revs, stoprev=0, inclusive=False):
939 """Generate the ancestors of 'revs' in reverse revision order.
928 """Generate the ancestors of 'revs' in reverse revision order.
940 Does not generate revs lower than stoprev.
929 Does not generate revs lower than stoprev.
941
930
942 See the documentation for ancestor.lazyancestors for more details."""
931 See the documentation for ancestor.lazyancestors for more details."""
943
932
944 # first, make sure start revisions aren't filtered
933 # first, make sure start revisions aren't filtered
945 revs = list(revs)
934 revs = list(revs)
946 checkrev = self.node
935 checkrev = self.node
947 for r in revs:
936 for r in revs:
948 checkrev(r)
937 checkrev(r)
949 # and we're sure ancestors aren't filtered as well
938 # and we're sure ancestors aren't filtered as well
950
939
951 if rustancestor is not None:
940 if rustancestor is not None:
952 lazyancestors = rustancestor.LazyAncestors
941 lazyancestors = rustancestor.LazyAncestors
953 arg = self.index
942 arg = self.index
954 else:
943 else:
955 lazyancestors = ancestor.lazyancestors
944 lazyancestors = ancestor.lazyancestors
956 arg = self._uncheckedparentrevs
945 arg = self._uncheckedparentrevs
957 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
946 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
958
947
959 def descendants(self, revs):
948 def descendants(self, revs):
960 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
949 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
961
950
962 def findcommonmissing(self, common=None, heads=None):
951 def findcommonmissing(self, common=None, heads=None):
963 """Return a tuple of the ancestors of common and the ancestors of heads
952 """Return a tuple of the ancestors of common and the ancestors of heads
964 that are not ancestors of common. In revset terminology, we return the
953 that are not ancestors of common. In revset terminology, we return the
965 tuple:
954 tuple:
966
955
967 ::common, (::heads) - (::common)
956 ::common, (::heads) - (::common)
968
957
969 The list is sorted by revision number, meaning it is
958 The list is sorted by revision number, meaning it is
970 topologically sorted.
959 topologically sorted.
971
960
972 'heads' and 'common' are both lists of node IDs. If heads is
961 'heads' and 'common' are both lists of node IDs. If heads is
973 not supplied, uses all of the revlog's heads. If common is not
962 not supplied, uses all of the revlog's heads. If common is not
974 supplied, uses nullid."""
963 supplied, uses nullid."""
975 if common is None:
964 if common is None:
976 common = [self.nullid]
965 common = [self.nullid]
977 if heads is None:
966 if heads is None:
978 heads = self.heads()
967 heads = self.heads()
979
968
980 common = [self.rev(n) for n in common]
969 common = [self.rev(n) for n in common]
981 heads = [self.rev(n) for n in heads]
970 heads = [self.rev(n) for n in heads]
982
971
983 # we want the ancestors, but inclusive
972 # we want the ancestors, but inclusive
984 class lazyset(object):
973 class lazyset(object):
985 def __init__(self, lazyvalues):
974 def __init__(self, lazyvalues):
986 self.addedvalues = set()
975 self.addedvalues = set()
987 self.lazyvalues = lazyvalues
976 self.lazyvalues = lazyvalues
988
977
989 def __contains__(self, value):
978 def __contains__(self, value):
990 return value in self.addedvalues or value in self.lazyvalues
979 return value in self.addedvalues or value in self.lazyvalues
991
980
992 def __iter__(self):
981 def __iter__(self):
993 added = self.addedvalues
982 added = self.addedvalues
994 for r in added:
983 for r in added:
995 yield r
984 yield r
996 for r in self.lazyvalues:
985 for r in self.lazyvalues:
997 if not r in added:
986 if not r in added:
998 yield r
987 yield r
999
988
1000 def add(self, value):
989 def add(self, value):
1001 self.addedvalues.add(value)
990 self.addedvalues.add(value)
1002
991
1003 def update(self, values):
992 def update(self, values):
1004 self.addedvalues.update(values)
993 self.addedvalues.update(values)
1005
994
1006 has = lazyset(self.ancestors(common))
995 has = lazyset(self.ancestors(common))
1007 has.add(nullrev)
996 has.add(nullrev)
1008 has.update(common)
997 has.update(common)
1009
998
1010 # take all ancestors from heads that aren't in has
999 # take all ancestors from heads that aren't in has
1011 missing = set()
1000 missing = set()
1012 visit = collections.deque(r for r in heads if r not in has)
1001 visit = collections.deque(r for r in heads if r not in has)
1013 while visit:
1002 while visit:
1014 r = visit.popleft()
1003 r = visit.popleft()
1015 if r in missing:
1004 if r in missing:
1016 continue
1005 continue
1017 else:
1006 else:
1018 missing.add(r)
1007 missing.add(r)
1019 for p in self.parentrevs(r):
1008 for p in self.parentrevs(r):
1020 if p not in has:
1009 if p not in has:
1021 visit.append(p)
1010 visit.append(p)
1022 missing = list(missing)
1011 missing = list(missing)
1023 missing.sort()
1012 missing.sort()
1024 return has, [self.node(miss) for miss in missing]
1013 return has, [self.node(miss) for miss in missing]
1025
1014
1026 def incrementalmissingrevs(self, common=None):
1015 def incrementalmissingrevs(self, common=None):
1027 """Return an object that can be used to incrementally compute the
1016 """Return an object that can be used to incrementally compute the
1028 revision numbers of the ancestors of arbitrary sets that are not
1017 revision numbers of the ancestors of arbitrary sets that are not
1029 ancestors of common. This is an ancestor.incrementalmissingancestors
1018 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 object.
1019 object.
1031
1020
1032 'common' is a list of revision numbers. If common is not supplied, uses
1021 'common' is a list of revision numbers. If common is not supplied, uses
1033 nullrev.
1022 nullrev.
1034 """
1023 """
1035 if common is None:
1024 if common is None:
1036 common = [nullrev]
1025 common = [nullrev]
1037
1026
1038 if rustancestor is not None:
1027 if rustancestor is not None:
1039 return rustancestor.MissingAncestors(self.index, common)
1028 return rustancestor.MissingAncestors(self.index, common)
1040 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1029 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1041
1030
1042 def findmissingrevs(self, common=None, heads=None):
1031 def findmissingrevs(self, common=None, heads=None):
1043 """Return the revision numbers of the ancestors of heads that
1032 """Return the revision numbers of the ancestors of heads that
1044 are not ancestors of common.
1033 are not ancestors of common.
1045
1034
1046 More specifically, return a list of revision numbers corresponding to
1035 More specifically, return a list of revision numbers corresponding to
1047 nodes N such that every N satisfies the following constraints:
1036 nodes N such that every N satisfies the following constraints:
1048
1037
1049 1. N is an ancestor of some node in 'heads'
1038 1. N is an ancestor of some node in 'heads'
1050 2. N is not an ancestor of any node in 'common'
1039 2. N is not an ancestor of any node in 'common'
1051
1040
1052 The list is sorted by revision number, meaning it is
1041 The list is sorted by revision number, meaning it is
1053 topologically sorted.
1042 topologically sorted.
1054
1043
1055 'heads' and 'common' are both lists of revision numbers. If heads is
1044 'heads' and 'common' are both lists of revision numbers. If heads is
1056 not supplied, uses all of the revlog's heads. If common is not
1045 not supplied, uses all of the revlog's heads. If common is not
1057 supplied, uses nullid."""
1046 supplied, uses nullid."""
1058 if common is None:
1047 if common is None:
1059 common = [nullrev]
1048 common = [nullrev]
1060 if heads is None:
1049 if heads is None:
1061 heads = self.headrevs()
1050 heads = self.headrevs()
1062
1051
1063 inc = self.incrementalmissingrevs(common=common)
1052 inc = self.incrementalmissingrevs(common=common)
1064 return inc.missingancestors(heads)
1053 return inc.missingancestors(heads)
1065
1054
1066 def findmissing(self, common=None, heads=None):
1055 def findmissing(self, common=None, heads=None):
1067 """Return the ancestors of heads that are not ancestors of common.
1056 """Return the ancestors of heads that are not ancestors of common.
1068
1057
1069 More specifically, return a list of nodes N such that every N
1058 More specifically, return a list of nodes N such that every N
1070 satisfies the following constraints:
1059 satisfies the following constraints:
1071
1060
1072 1. N is an ancestor of some node in 'heads'
1061 1. N is an ancestor of some node in 'heads'
1073 2. N is not an ancestor of any node in 'common'
1062 2. N is not an ancestor of any node in 'common'
1074
1063
1075 The list is sorted by revision number, meaning it is
1064 The list is sorted by revision number, meaning it is
1076 topologically sorted.
1065 topologically sorted.
1077
1066
1078 'heads' and 'common' are both lists of node IDs. If heads is
1067 'heads' and 'common' are both lists of node IDs. If heads is
1079 not supplied, uses all of the revlog's heads. If common is not
1068 not supplied, uses all of the revlog's heads. If common is not
1080 supplied, uses nullid."""
1069 supplied, uses nullid."""
1081 if common is None:
1070 if common is None:
1082 common = [self.nullid]
1071 common = [self.nullid]
1083 if heads is None:
1072 if heads is None:
1084 heads = self.heads()
1073 heads = self.heads()
1085
1074
1086 common = [self.rev(n) for n in common]
1075 common = [self.rev(n) for n in common]
1087 heads = [self.rev(n) for n in heads]
1076 heads = [self.rev(n) for n in heads]
1088
1077
1089 inc = self.incrementalmissingrevs(common=common)
1078 inc = self.incrementalmissingrevs(common=common)
1090 return [self.node(r) for r in inc.missingancestors(heads)]
1079 return [self.node(r) for r in inc.missingancestors(heads)]
1091
1080
1092 def nodesbetween(self, roots=None, heads=None):
1081 def nodesbetween(self, roots=None, heads=None):
1093 """Return a topological path from 'roots' to 'heads'.
1082 """Return a topological path from 'roots' to 'heads'.
1094
1083
1095 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1084 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1096 topologically sorted list of all nodes N that satisfy both of
1085 topologically sorted list of all nodes N that satisfy both of
1097 these constraints:
1086 these constraints:
1098
1087
1099 1. N is a descendant of some node in 'roots'
1088 1. N is a descendant of some node in 'roots'
1100 2. N is an ancestor of some node in 'heads'
1089 2. N is an ancestor of some node in 'heads'
1101
1090
1102 Every node is considered to be both a descendant and an ancestor
1091 Every node is considered to be both a descendant and an ancestor
1103 of itself, so every reachable node in 'roots' and 'heads' will be
1092 of itself, so every reachable node in 'roots' and 'heads' will be
1104 included in 'nodes'.
1093 included in 'nodes'.
1105
1094
1106 'outroots' is the list of reachable nodes in 'roots', i.e., the
1095 'outroots' is the list of reachable nodes in 'roots', i.e., the
1107 subset of 'roots' that is returned in 'nodes'. Likewise,
1096 subset of 'roots' that is returned in 'nodes'. Likewise,
1108 'outheads' is the subset of 'heads' that is also in 'nodes'.
1097 'outheads' is the subset of 'heads' that is also in 'nodes'.
1109
1098
1110 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1099 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1111 unspecified, uses nullid as the only root. If 'heads' is
1100 unspecified, uses nullid as the only root. If 'heads' is
1112 unspecified, uses list of all of the revlog's heads."""
1101 unspecified, uses list of all of the revlog's heads."""
1113 nonodes = ([], [], [])
1102 nonodes = ([], [], [])
1114 if roots is not None:
1103 if roots is not None:
1115 roots = list(roots)
1104 roots = list(roots)
1116 if not roots:
1105 if not roots:
1117 return nonodes
1106 return nonodes
1118 lowestrev = min([self.rev(n) for n in roots])
1107 lowestrev = min([self.rev(n) for n in roots])
1119 else:
1108 else:
1120 roots = [self.nullid] # Everybody's a descendant of nullid
1109 roots = [self.nullid] # Everybody's a descendant of nullid
1121 lowestrev = nullrev
1110 lowestrev = nullrev
1122 if (lowestrev == nullrev) and (heads is None):
1111 if (lowestrev == nullrev) and (heads is None):
1123 # We want _all_ the nodes!
1112 # We want _all_ the nodes!
1124 return (
1113 return (
1125 [self.node(r) for r in self],
1114 [self.node(r) for r in self],
1126 [self.nullid],
1115 [self.nullid],
1127 list(self.heads()),
1116 list(self.heads()),
1128 )
1117 )
1129 if heads is None:
1118 if heads is None:
1130 # All nodes are ancestors, so the latest ancestor is the last
1119 # All nodes are ancestors, so the latest ancestor is the last
1131 # node.
1120 # node.
1132 highestrev = len(self) - 1
1121 highestrev = len(self) - 1
1133 # Set ancestors to None to signal that every node is an ancestor.
1122 # Set ancestors to None to signal that every node is an ancestor.
1134 ancestors = None
1123 ancestors = None
1135 # Set heads to an empty dictionary for later discovery of heads
1124 # Set heads to an empty dictionary for later discovery of heads
1136 heads = {}
1125 heads = {}
1137 else:
1126 else:
1138 heads = list(heads)
1127 heads = list(heads)
1139 if not heads:
1128 if not heads:
1140 return nonodes
1129 return nonodes
1141 ancestors = set()
1130 ancestors = set()
1142 # Turn heads into a dictionary so we can remove 'fake' heads.
1131 # Turn heads into a dictionary so we can remove 'fake' heads.
1143 # Also, later we will be using it to filter out the heads we can't
1132 # Also, later we will be using it to filter out the heads we can't
1144 # find from roots.
1133 # find from roots.
1145 heads = dict.fromkeys(heads, False)
1134 heads = dict.fromkeys(heads, False)
1146 # Start at the top and keep marking parents until we're done.
1135 # Start at the top and keep marking parents until we're done.
1147 nodestotag = set(heads)
1136 nodestotag = set(heads)
1148 # Remember where the top was so we can use it as a limit later.
1137 # Remember where the top was so we can use it as a limit later.
1149 highestrev = max([self.rev(n) for n in nodestotag])
1138 highestrev = max([self.rev(n) for n in nodestotag])
1150 while nodestotag:
1139 while nodestotag:
1151 # grab a node to tag
1140 # grab a node to tag
1152 n = nodestotag.pop()
1141 n = nodestotag.pop()
1153 # Never tag nullid
1142 # Never tag nullid
1154 if n == self.nullid:
1143 if n == self.nullid:
1155 continue
1144 continue
1156 # A node's revision number represents its place in a
1145 # A node's revision number represents its place in a
1157 # topologically sorted list of nodes.
1146 # topologically sorted list of nodes.
1158 r = self.rev(n)
1147 r = self.rev(n)
1159 if r >= lowestrev:
1148 if r >= lowestrev:
1160 if n not in ancestors:
1149 if n not in ancestors:
1161 # If we are possibly a descendant of one of the roots
1150 # If we are possibly a descendant of one of the roots
1162 # and we haven't already been marked as an ancestor
1151 # and we haven't already been marked as an ancestor
1163 ancestors.add(n) # Mark as ancestor
1152 ancestors.add(n) # Mark as ancestor
1164 # Add non-nullid parents to list of nodes to tag.
1153 # Add non-nullid parents to list of nodes to tag.
1165 nodestotag.update(
1154 nodestotag.update(
1166 [p for p in self.parents(n) if p != self.nullid]
1155 [p for p in self.parents(n) if p != self.nullid]
1167 )
1156 )
1168 elif n in heads: # We've seen it before, is it a fake head?
1157 elif n in heads: # We've seen it before, is it a fake head?
1169 # So it is, real heads should not be the ancestors of
1158 # So it is, real heads should not be the ancestors of
1170 # any other heads.
1159 # any other heads.
1171 heads.pop(n)
1160 heads.pop(n)
1172 if not ancestors:
1161 if not ancestors:
1173 return nonodes
1162 return nonodes
1174 # Now that we have our set of ancestors, we want to remove any
1163 # Now that we have our set of ancestors, we want to remove any
1175 # roots that are not ancestors.
1164 # roots that are not ancestors.
1176
1165
1177 # If one of the roots was nullid, everything is included anyway.
1166 # If one of the roots was nullid, everything is included anyway.
1178 if lowestrev > nullrev:
1167 if lowestrev > nullrev:
1179 # But, since we weren't, let's recompute the lowest rev to not
1168 # But, since we weren't, let's recompute the lowest rev to not
1180 # include roots that aren't ancestors.
1169 # include roots that aren't ancestors.
1181
1170
1182 # Filter out roots that aren't ancestors of heads
1171 # Filter out roots that aren't ancestors of heads
1183 roots = [root for root in roots if root in ancestors]
1172 roots = [root for root in roots if root in ancestors]
1184 # Recompute the lowest revision
1173 # Recompute the lowest revision
1185 if roots:
1174 if roots:
1186 lowestrev = min([self.rev(root) for root in roots])
1175 lowestrev = min([self.rev(root) for root in roots])
1187 else:
1176 else:
1188 # No more roots? Return empty list
1177 # No more roots? Return empty list
1189 return nonodes
1178 return nonodes
1190 else:
1179 else:
1191 # We are descending from nullid, and don't need to care about
1180 # We are descending from nullid, and don't need to care about
1192 # any other roots.
1181 # any other roots.
1193 lowestrev = nullrev
1182 lowestrev = nullrev
1194 roots = [self.nullid]
1183 roots = [self.nullid]
1195 # Transform our roots list into a set.
1184 # Transform our roots list into a set.
1196 descendants = set(roots)
1185 descendants = set(roots)
1197 # Also, keep the original roots so we can filter out roots that aren't
1186 # Also, keep the original roots so we can filter out roots that aren't
1198 # 'real' roots (i.e. are descended from other roots).
1187 # 'real' roots (i.e. are descended from other roots).
1199 roots = descendants.copy()
1188 roots = descendants.copy()
1200 # Our topologically sorted list of output nodes.
1189 # Our topologically sorted list of output nodes.
1201 orderedout = []
1190 orderedout = []
1202 # Don't start at nullid since we don't want nullid in our output list,
1191 # Don't start at nullid since we don't want nullid in our output list,
1203 # and if nullid shows up in descendants, empty parents will look like
1192 # and if nullid shows up in descendants, empty parents will look like
1204 # they're descendants.
1193 # they're descendants.
1205 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1194 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1206 n = self.node(r)
1195 n = self.node(r)
1207 isdescendant = False
1196 isdescendant = False
1208 if lowestrev == nullrev: # Everybody is a descendant of nullid
1197 if lowestrev == nullrev: # Everybody is a descendant of nullid
1209 isdescendant = True
1198 isdescendant = True
1210 elif n in descendants:
1199 elif n in descendants:
1211 # n is already a descendant
1200 # n is already a descendant
1212 isdescendant = True
1201 isdescendant = True
1213 # This check only needs to be done here because all the roots
1202 # This check only needs to be done here because all the roots
1214 # will start being marked is descendants before the loop.
1203 # will start being marked is descendants before the loop.
1215 if n in roots:
1204 if n in roots:
1216 # If n was a root, check if it's a 'real' root.
1205 # If n was a root, check if it's a 'real' root.
1217 p = tuple(self.parents(n))
1206 p = tuple(self.parents(n))
1218 # If any of its parents are descendants, it's not a root.
1207 # If any of its parents are descendants, it's not a root.
1219 if (p[0] in descendants) or (p[1] in descendants):
1208 if (p[0] in descendants) or (p[1] in descendants):
1220 roots.remove(n)
1209 roots.remove(n)
1221 else:
1210 else:
1222 p = tuple(self.parents(n))
1211 p = tuple(self.parents(n))
1223 # A node is a descendant if either of its parents are
1212 # A node is a descendant if either of its parents are
1224 # descendants. (We seeded the dependents list with the roots
1213 # descendants. (We seeded the dependents list with the roots
1225 # up there, remember?)
1214 # up there, remember?)
1226 if (p[0] in descendants) or (p[1] in descendants):
1215 if (p[0] in descendants) or (p[1] in descendants):
1227 descendants.add(n)
1216 descendants.add(n)
1228 isdescendant = True
1217 isdescendant = True
1229 if isdescendant and ((ancestors is None) or (n in ancestors)):
1218 if isdescendant and ((ancestors is None) or (n in ancestors)):
1230 # Only include nodes that are both descendants and ancestors.
1219 # Only include nodes that are both descendants and ancestors.
1231 orderedout.append(n)
1220 orderedout.append(n)
1232 if (ancestors is not None) and (n in heads):
1221 if (ancestors is not None) and (n in heads):
1233 # We're trying to figure out which heads are reachable
1222 # We're trying to figure out which heads are reachable
1234 # from roots.
1223 # from roots.
1235 # Mark this head as having been reached
1224 # Mark this head as having been reached
1236 heads[n] = True
1225 heads[n] = True
1237 elif ancestors is None:
1226 elif ancestors is None:
1238 # Otherwise, we're trying to discover the heads.
1227 # Otherwise, we're trying to discover the heads.
1239 # Assume this is a head because if it isn't, the next step
1228 # Assume this is a head because if it isn't, the next step
1240 # will eventually remove it.
1229 # will eventually remove it.
1241 heads[n] = True
1230 heads[n] = True
1242 # But, obviously its parents aren't.
1231 # But, obviously its parents aren't.
1243 for p in self.parents(n):
1232 for p in self.parents(n):
1244 heads.pop(p, None)
1233 heads.pop(p, None)
1245 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1234 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1246 roots = list(roots)
1235 roots = list(roots)
1247 assert orderedout
1236 assert orderedout
1248 assert roots
1237 assert roots
1249 assert heads
1238 assert heads
1250 return (orderedout, roots, heads)
1239 return (orderedout, roots, heads)
1251
1240
1252 def headrevs(self, revs=None):
1241 def headrevs(self, revs=None):
1253 if revs is None:
1242 if revs is None:
1254 try:
1243 try:
1255 return self.index.headrevs()
1244 return self.index.headrevs()
1256 except AttributeError:
1245 except AttributeError:
1257 return self._headrevs()
1246 return self._headrevs()
1258 if rustdagop is not None:
1247 if rustdagop is not None:
1259 return rustdagop.headrevs(self.index, revs)
1248 return rustdagop.headrevs(self.index, revs)
1260 return dagop.headrevs(revs, self._uncheckedparentrevs)
1249 return dagop.headrevs(revs, self._uncheckedparentrevs)
1261
1250
1262 def computephases(self, roots):
1251 def computephases(self, roots):
1263 return self.index.computephasesmapsets(roots)
1252 return self.index.computephasesmapsets(roots)
1264
1253
1265 def _headrevs(self):
1254 def _headrevs(self):
1266 count = len(self)
1255 count = len(self)
1267 if not count:
1256 if not count:
1268 return [nullrev]
1257 return [nullrev]
1269 # we won't iter over filtered rev so nobody is a head at start
1258 # we won't iter over filtered rev so nobody is a head at start
1270 ishead = [0] * (count + 1)
1259 ishead = [0] * (count + 1)
1271 index = self.index
1260 index = self.index
1272 for r in self:
1261 for r in self:
1273 ishead[r] = 1 # I may be an head
1262 ishead[r] = 1 # I may be an head
1274 e = index[r]
1263 e = index[r]
1275 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1264 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1276 return [r for r, val in enumerate(ishead) if val]
1265 return [r for r, val in enumerate(ishead) if val]
1277
1266
1278 def heads(self, start=None, stop=None):
1267 def heads(self, start=None, stop=None):
1279 """return the list of all nodes that have no children
1268 """return the list of all nodes that have no children
1280
1269
1281 if start is specified, only heads that are descendants of
1270 if start is specified, only heads that are descendants of
1282 start will be returned
1271 start will be returned
1283 if stop is specified, it will consider all the revs from stop
1272 if stop is specified, it will consider all the revs from stop
1284 as if they had no children
1273 as if they had no children
1285 """
1274 """
1286 if start is None and stop is None:
1275 if start is None and stop is None:
1287 if not len(self):
1276 if not len(self):
1288 return [self.nullid]
1277 return [self.nullid]
1289 return [self.node(r) for r in self.headrevs()]
1278 return [self.node(r) for r in self.headrevs()]
1290
1279
1291 if start is None:
1280 if start is None:
1292 start = nullrev
1281 start = nullrev
1293 else:
1282 else:
1294 start = self.rev(start)
1283 start = self.rev(start)
1295
1284
1296 stoprevs = {self.rev(n) for n in stop or []}
1285 stoprevs = {self.rev(n) for n in stop or []}
1297
1286
1298 revs = dagop.headrevssubset(
1287 revs = dagop.headrevssubset(
1299 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1288 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1300 )
1289 )
1301
1290
1302 return [self.node(rev) for rev in revs]
1291 return [self.node(rev) for rev in revs]
1303
1292
1304 def children(self, node):
1293 def children(self, node):
1305 """find the children of a given node"""
1294 """find the children of a given node"""
1306 c = []
1295 c = []
1307 p = self.rev(node)
1296 p = self.rev(node)
1308 for r in self.revs(start=p + 1):
1297 for r in self.revs(start=p + 1):
1309 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1298 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1310 if prevs:
1299 if prevs:
1311 for pr in prevs:
1300 for pr in prevs:
1312 if pr == p:
1301 if pr == p:
1313 c.append(self.node(r))
1302 c.append(self.node(r))
1314 elif p == nullrev:
1303 elif p == nullrev:
1315 c.append(self.node(r))
1304 c.append(self.node(r))
1316 return c
1305 return c
1317
1306
1318 def commonancestorsheads(self, a, b):
1307 def commonancestorsheads(self, a, b):
1319 """calculate all the heads of the common ancestors of nodes a and b"""
1308 """calculate all the heads of the common ancestors of nodes a and b"""
1320 a, b = self.rev(a), self.rev(b)
1309 a, b = self.rev(a), self.rev(b)
1321 ancs = self._commonancestorsheads(a, b)
1310 ancs = self._commonancestorsheads(a, b)
1322 return pycompat.maplist(self.node, ancs)
1311 return pycompat.maplist(self.node, ancs)
1323
1312
1324 def _commonancestorsheads(self, *revs):
1313 def _commonancestorsheads(self, *revs):
1325 """calculate all the heads of the common ancestors of revs"""
1314 """calculate all the heads of the common ancestors of revs"""
1326 try:
1315 try:
1327 ancs = self.index.commonancestorsheads(*revs)
1316 ancs = self.index.commonancestorsheads(*revs)
1328 except (AttributeError, OverflowError): # C implementation failed
1317 except (AttributeError, OverflowError): # C implementation failed
1329 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1318 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1330 return ancs
1319 return ancs
1331
1320
1332 def isancestor(self, a, b):
1321 def isancestor(self, a, b):
1333 """return True if node a is an ancestor of node b
1322 """return True if node a is an ancestor of node b
1334
1323
1335 A revision is considered an ancestor of itself."""
1324 A revision is considered an ancestor of itself."""
1336 a, b = self.rev(a), self.rev(b)
1325 a, b = self.rev(a), self.rev(b)
1337 return self.isancestorrev(a, b)
1326 return self.isancestorrev(a, b)
1338
1327
1339 def isancestorrev(self, a, b):
1328 def isancestorrev(self, a, b):
1340 """return True if revision a is an ancestor of revision b
1329 """return True if revision a is an ancestor of revision b
1341
1330
1342 A revision is considered an ancestor of itself.
1331 A revision is considered an ancestor of itself.
1343
1332
1344 The implementation of this is trivial but the use of
1333 The implementation of this is trivial but the use of
1345 reachableroots is not."""
1334 reachableroots is not."""
1346 if a == nullrev:
1335 if a == nullrev:
1347 return True
1336 return True
1348 elif a == b:
1337 elif a == b:
1349 return True
1338 return True
1350 elif a > b:
1339 elif a > b:
1351 return False
1340 return False
1352 return bool(self.reachableroots(a, [b], [a], includepath=False))
1341 return bool(self.reachableroots(a, [b], [a], includepath=False))
1353
1342
1354 def reachableroots(self, minroot, heads, roots, includepath=False):
1343 def reachableroots(self, minroot, heads, roots, includepath=False):
1355 """return (heads(::(<roots> and <roots>::<heads>)))
1344 """return (heads(::(<roots> and <roots>::<heads>)))
1356
1345
1357 If includepath is True, return (<roots>::<heads>)."""
1346 If includepath is True, return (<roots>::<heads>)."""
1358 try:
1347 try:
1359 return self.index.reachableroots2(
1348 return self.index.reachableroots2(
1360 minroot, heads, roots, includepath
1349 minroot, heads, roots, includepath
1361 )
1350 )
1362 except AttributeError:
1351 except AttributeError:
1363 return dagop._reachablerootspure(
1352 return dagop._reachablerootspure(
1364 self.parentrevs, minroot, roots, heads, includepath
1353 self.parentrevs, minroot, roots, heads, includepath
1365 )
1354 )
1366
1355
1367 def ancestor(self, a, b):
1356 def ancestor(self, a, b):
1368 """calculate the "best" common ancestor of nodes a and b"""
1357 """calculate the "best" common ancestor of nodes a and b"""
1369
1358
1370 a, b = self.rev(a), self.rev(b)
1359 a, b = self.rev(a), self.rev(b)
1371 try:
1360 try:
1372 ancs = self.index.ancestors(a, b)
1361 ancs = self.index.ancestors(a, b)
1373 except (AttributeError, OverflowError):
1362 except (AttributeError, OverflowError):
1374 ancs = ancestor.ancestors(self.parentrevs, a, b)
1363 ancs = ancestor.ancestors(self.parentrevs, a, b)
1375 if ancs:
1364 if ancs:
1376 # choose a consistent winner when there's a tie
1365 # choose a consistent winner when there's a tie
1377 return min(map(self.node, ancs))
1366 return min(map(self.node, ancs))
1378 return self.nullid
1367 return self.nullid
1379
1368
1380 def _match(self, id):
1369 def _match(self, id):
1381 if isinstance(id, int):
1370 if isinstance(id, int):
1382 # rev
1371 # rev
1383 return self.node(id)
1372 return self.node(id)
1384 if len(id) == self.nodeconstants.nodelen:
1373 if len(id) == self.nodeconstants.nodelen:
1385 # possibly a binary node
1374 # possibly a binary node
1386 # odds of a binary node being all hex in ASCII are 1 in 10**25
1375 # odds of a binary node being all hex in ASCII are 1 in 10**25
1387 try:
1376 try:
1388 node = id
1377 node = id
1389 self.rev(node) # quick search the index
1378 self.rev(node) # quick search the index
1390 return node
1379 return node
1391 except error.LookupError:
1380 except error.LookupError:
1392 pass # may be partial hex id
1381 pass # may be partial hex id
1393 try:
1382 try:
1394 # str(rev)
1383 # str(rev)
1395 rev = int(id)
1384 rev = int(id)
1396 if b"%d" % rev != id:
1385 if b"%d" % rev != id:
1397 raise ValueError
1386 raise ValueError
1398 if rev < 0:
1387 if rev < 0:
1399 rev = len(self) + rev
1388 rev = len(self) + rev
1400 if rev < 0 or rev >= len(self):
1389 if rev < 0 or rev >= len(self):
1401 raise ValueError
1390 raise ValueError
1402 return self.node(rev)
1391 return self.node(rev)
1403 except (ValueError, OverflowError):
1392 except (ValueError, OverflowError):
1404 pass
1393 pass
1405 if len(id) == 2 * self.nodeconstants.nodelen:
1394 if len(id) == 2 * self.nodeconstants.nodelen:
1406 try:
1395 try:
1407 # a full hex nodeid?
1396 # a full hex nodeid?
1408 node = bin(id)
1397 node = bin(id)
1409 self.rev(node)
1398 self.rev(node)
1410 return node
1399 return node
1411 except (TypeError, error.LookupError):
1400 except (TypeError, error.LookupError):
1412 pass
1401 pass
1413
1402
1414 def _partialmatch(self, id):
1403 def _partialmatch(self, id):
1415 # we don't care wdirfilenodeids as they should be always full hash
1404 # we don't care wdirfilenodeids as they should be always full hash
1416 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1405 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1417 try:
1406 try:
1418 partial = self.index.partialmatch(id)
1407 partial = self.index.partialmatch(id)
1419 if partial and self.hasnode(partial):
1408 if partial and self.hasnode(partial):
1420 if maybewdir:
1409 if maybewdir:
1421 # single 'ff...' match in radix tree, ambiguous with wdir
1410 # single 'ff...' match in radix tree, ambiguous with wdir
1422 raise error.RevlogError
1411 raise error.RevlogError
1423 return partial
1412 return partial
1424 if maybewdir:
1413 if maybewdir:
1425 # no 'ff...' match in radix tree, wdir identified
1414 # no 'ff...' match in radix tree, wdir identified
1426 raise error.WdirUnsupported
1415 raise error.WdirUnsupported
1427 return None
1416 return None
1428 except error.RevlogError:
1417 except error.RevlogError:
1429 # parsers.c radix tree lookup gave multiple matches
1418 # parsers.c radix tree lookup gave multiple matches
1430 # fast path: for unfiltered changelog, radix tree is accurate
1419 # fast path: for unfiltered changelog, radix tree is accurate
1431 if not getattr(self, 'filteredrevs', None):
1420 if not getattr(self, 'filteredrevs', None):
1432 raise error.AmbiguousPrefixLookupError(
1421 raise error.AmbiguousPrefixLookupError(
1433 id, self.display_id, _(b'ambiguous identifier')
1422 id, self.display_id, _(b'ambiguous identifier')
1434 )
1423 )
1435 # fall through to slow path that filters hidden revisions
1424 # fall through to slow path that filters hidden revisions
1436 except (AttributeError, ValueError):
1425 except (AttributeError, ValueError):
1437 # we are pure python, or key was too short to search radix tree
1426 # we are pure python, or key was too short to search radix tree
1438 pass
1427 pass
1439
1428
1440 if id in self._pcache:
1429 if id in self._pcache:
1441 return self._pcache[id]
1430 return self._pcache[id]
1442
1431
1443 if len(id) <= 40:
1432 if len(id) <= 40:
1444 try:
1433 try:
1445 # hex(node)[:...]
1434 # hex(node)[:...]
1446 l = len(id) // 2 # grab an even number of digits
1435 l = len(id) // 2 # grab an even number of digits
1447 prefix = bin(id[: l * 2])
1436 prefix = bin(id[: l * 2])
1448 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1437 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1449 nl = [
1438 nl = [
1450 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1439 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1451 ]
1440 ]
1452 if self.nodeconstants.nullhex.startswith(id):
1441 if self.nodeconstants.nullhex.startswith(id):
1453 nl.append(self.nullid)
1442 nl.append(self.nullid)
1454 if len(nl) > 0:
1443 if len(nl) > 0:
1455 if len(nl) == 1 and not maybewdir:
1444 if len(nl) == 1 and not maybewdir:
1456 self._pcache[id] = nl[0]
1445 self._pcache[id] = nl[0]
1457 return nl[0]
1446 return nl[0]
1458 raise error.AmbiguousPrefixLookupError(
1447 raise error.AmbiguousPrefixLookupError(
1459 id, self.display_id, _(b'ambiguous identifier')
1448 id, self.display_id, _(b'ambiguous identifier')
1460 )
1449 )
1461 if maybewdir:
1450 if maybewdir:
1462 raise error.WdirUnsupported
1451 raise error.WdirUnsupported
1463 return None
1452 return None
1464 except TypeError:
1453 except TypeError:
1465 pass
1454 pass
1466
1455
1467 def lookup(self, id):
1456 def lookup(self, id):
1468 """locate a node based on:
1457 """locate a node based on:
1469 - revision number or str(revision number)
1458 - revision number or str(revision number)
1470 - nodeid or subset of hex nodeid
1459 - nodeid or subset of hex nodeid
1471 """
1460 """
1472 n = self._match(id)
1461 n = self._match(id)
1473 if n is not None:
1462 if n is not None:
1474 return n
1463 return n
1475 n = self._partialmatch(id)
1464 n = self._partialmatch(id)
1476 if n:
1465 if n:
1477 return n
1466 return n
1478
1467
1479 raise error.LookupError(id, self.display_id, _(b'no match found'))
1468 raise error.LookupError(id, self.display_id, _(b'no match found'))
1480
1469
1481 def shortest(self, node, minlength=1):
1470 def shortest(self, node, minlength=1):
1482 """Find the shortest unambiguous prefix that matches node."""
1471 """Find the shortest unambiguous prefix that matches node."""
1483
1472
1484 def isvalid(prefix):
1473 def isvalid(prefix):
1485 try:
1474 try:
1486 matchednode = self._partialmatch(prefix)
1475 matchednode = self._partialmatch(prefix)
1487 except error.AmbiguousPrefixLookupError:
1476 except error.AmbiguousPrefixLookupError:
1488 return False
1477 return False
1489 except error.WdirUnsupported:
1478 except error.WdirUnsupported:
1490 # single 'ff...' match
1479 # single 'ff...' match
1491 return True
1480 return True
1492 if matchednode is None:
1481 if matchednode is None:
1493 raise error.LookupError(node, self.display_id, _(b'no node'))
1482 raise error.LookupError(node, self.display_id, _(b'no node'))
1494 return True
1483 return True
1495
1484
1496 def maybewdir(prefix):
1485 def maybewdir(prefix):
1497 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1486 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1498
1487
1499 hexnode = hex(node)
1488 hexnode = hex(node)
1500
1489
1501 def disambiguate(hexnode, minlength):
1490 def disambiguate(hexnode, minlength):
1502 """Disambiguate against wdirid."""
1491 """Disambiguate against wdirid."""
1503 for length in range(minlength, len(hexnode) + 1):
1492 for length in range(minlength, len(hexnode) + 1):
1504 prefix = hexnode[:length]
1493 prefix = hexnode[:length]
1505 if not maybewdir(prefix):
1494 if not maybewdir(prefix):
1506 return prefix
1495 return prefix
1507
1496
1508 if not getattr(self, 'filteredrevs', None):
1497 if not getattr(self, 'filteredrevs', None):
1509 try:
1498 try:
1510 length = max(self.index.shortest(node), minlength)
1499 length = max(self.index.shortest(node), minlength)
1511 return disambiguate(hexnode, length)
1500 return disambiguate(hexnode, length)
1512 except error.RevlogError:
1501 except error.RevlogError:
1513 if node != self.nodeconstants.wdirid:
1502 if node != self.nodeconstants.wdirid:
1514 raise error.LookupError(
1503 raise error.LookupError(
1515 node, self.display_id, _(b'no node')
1504 node, self.display_id, _(b'no node')
1516 )
1505 )
1517 except AttributeError:
1506 except AttributeError:
1518 # Fall through to pure code
1507 # Fall through to pure code
1519 pass
1508 pass
1520
1509
1521 if node == self.nodeconstants.wdirid:
1510 if node == self.nodeconstants.wdirid:
1522 for length in range(minlength, len(hexnode) + 1):
1511 for length in range(minlength, len(hexnode) + 1):
1523 prefix = hexnode[:length]
1512 prefix = hexnode[:length]
1524 if isvalid(prefix):
1513 if isvalid(prefix):
1525 return prefix
1514 return prefix
1526
1515
1527 for length in range(minlength, len(hexnode) + 1):
1516 for length in range(minlength, len(hexnode) + 1):
1528 prefix = hexnode[:length]
1517 prefix = hexnode[:length]
1529 if isvalid(prefix):
1518 if isvalid(prefix):
1530 return disambiguate(hexnode, length)
1519 return disambiguate(hexnode, length)
1531
1520
1532 def cmp(self, node, text):
1521 def cmp(self, node, text):
1533 """compare text with a given file revision
1522 """compare text with a given file revision
1534
1523
1535 returns True if text is different than what is stored.
1524 returns True if text is different than what is stored.
1536 """
1525 """
1537 p1, p2 = self.parents(node)
1526 p1, p2 = self.parents(node)
1538 return storageutil.hashrevisionsha1(text, p1, p2) != node
1527 return storageutil.hashrevisionsha1(text, p1, p2) != node
1539
1528
1540 def _cachesegment(self, offset, data):
1529 def _cachesegment(self, offset, data):
1541 """Add a segment to the revlog cache.
1530 """Add a segment to the revlog cache.
1542
1531
1543 Accepts an absolute offset and the data that is at that location.
1532 Accepts an absolute offset and the data that is at that location.
1544 """
1533 """
1545 o, d = self._chunkcache
1534 o, d = self._chunkcache
1546 # try to add to existing cache
1535 # try to add to existing cache
1547 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1536 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1548 self._chunkcache = o, d + data
1537 self._chunkcache = o, d + data
1549 else:
1538 else:
1550 self._chunkcache = offset, data
1539 self._chunkcache = offset, data
1551
1540
1552 def _readsegment(self, offset, length, df=None):
1541 def _readsegment(self, offset, length, df=None):
1553 """Load a segment of raw data from the revlog.
1542 """Load a segment of raw data from the revlog.
1554
1543
1555 Accepts an absolute offset, length to read, and an optional existing
1544 Accepts an absolute offset, length to read, and an optional existing
1556 file handle to read from.
1545 file handle to read from.
1557
1546
1558 If an existing file handle is passed, it will be seeked and the
1547 If an existing file handle is passed, it will be seeked and the
1559 original seek position will NOT be restored.
1548 original seek position will NOT be restored.
1560
1549
1561 Returns a str or buffer of raw byte data.
1550 Returns a str or buffer of raw byte data.
1562
1551
1563 Raises if the requested number of bytes could not be read.
1552 Raises if the requested number of bytes could not be read.
1564 """
1553 """
1565 # Cache data both forward and backward around the requested
1554 # Cache data both forward and backward around the requested
1566 # data, in a fixed size window. This helps speed up operations
1555 # data, in a fixed size window. This helps speed up operations
1567 # involving reading the revlog backwards.
1556 # involving reading the revlog backwards.
1568 cachesize = self._chunkcachesize
1557 cachesize = self._chunkcachesize
1569 realoffset = offset & ~(cachesize - 1)
1558 realoffset = offset & ~(cachesize - 1)
1570 reallength = (
1559 reallength = (
1571 (offset + length + cachesize) & ~(cachesize - 1)
1560 (offset + length + cachesize) & ~(cachesize - 1)
1572 ) - realoffset
1561 ) - realoffset
1573 with self._datareadfp(df) as df:
1562 with self._datareadfp(df) as df:
1574 df.seek(realoffset)
1563 df.seek(realoffset)
1575 d = df.read(reallength)
1564 d = df.read(reallength)
1576
1565
1577 self._cachesegment(realoffset, d)
1566 self._cachesegment(realoffset, d)
1578 if offset != realoffset or reallength != length:
1567 if offset != realoffset or reallength != length:
1579 startoffset = offset - realoffset
1568 startoffset = offset - realoffset
1580 if len(d) - startoffset < length:
1569 if len(d) - startoffset < length:
1581 raise error.RevlogError(
1570 raise error.RevlogError(
1582 _(
1571 _(
1583 b'partial read of revlog %s; expected %d bytes from '
1572 b'partial read of revlog %s; expected %d bytes from '
1584 b'offset %d, got %d'
1573 b'offset %d, got %d'
1585 )
1574 )
1586 % (
1575 % (
1587 self._indexfile if self._inline else self._datafile,
1576 self._indexfile if self._inline else self._datafile,
1588 length,
1577 length,
1589 offset,
1578 offset,
1590 len(d) - startoffset,
1579 len(d) - startoffset,
1591 )
1580 )
1592 )
1581 )
1593
1582
1594 return util.buffer(d, startoffset, length)
1583 return util.buffer(d, startoffset, length)
1595
1584
1596 if len(d) < length:
1585 if len(d) < length:
1597 raise error.RevlogError(
1586 raise error.RevlogError(
1598 _(
1587 _(
1599 b'partial read of revlog %s; expected %d bytes from offset '
1588 b'partial read of revlog %s; expected %d bytes from offset '
1600 b'%d, got %d'
1589 b'%d, got %d'
1601 )
1590 )
1602 % (
1591 % (
1603 self._indexfile if self._inline else self._datafile,
1592 self._indexfile if self._inline else self._datafile,
1604 length,
1593 length,
1605 offset,
1594 offset,
1606 len(d),
1595 len(d),
1607 )
1596 )
1608 )
1597 )
1609
1598
1610 return d
1599 return d
1611
1600
1612 def _getsegment(self, offset, length, df=None):
1601 def _getsegment(self, offset, length, df=None):
1613 """Obtain a segment of raw data from the revlog.
1602 """Obtain a segment of raw data from the revlog.
1614
1603
1615 Accepts an absolute offset, length of bytes to obtain, and an
1604 Accepts an absolute offset, length of bytes to obtain, and an
1616 optional file handle to the already-opened revlog. If the file
1605 optional file handle to the already-opened revlog. If the file
1617 handle is used, it's original seek position will not be preserved.
1606 handle is used, it's original seek position will not be preserved.
1618
1607
1619 Requests for data may be returned from a cache.
1608 Requests for data may be returned from a cache.
1620
1609
1621 Returns a str or a buffer instance of raw byte data.
1610 Returns a str or a buffer instance of raw byte data.
1622 """
1611 """
1623 o, d = self._chunkcache
1612 o, d = self._chunkcache
1624 l = len(d)
1613 l = len(d)
1625
1614
1626 # is it in the cache?
1615 # is it in the cache?
1627 cachestart = offset - o
1616 cachestart = offset - o
1628 cacheend = cachestart + length
1617 cacheend = cachestart + length
1629 if cachestart >= 0 and cacheend <= l:
1618 if cachestart >= 0 and cacheend <= l:
1630 if cachestart == 0 and cacheend == l:
1619 if cachestart == 0 and cacheend == l:
1631 return d # avoid a copy
1620 return d # avoid a copy
1632 return util.buffer(d, cachestart, cacheend - cachestart)
1621 return util.buffer(d, cachestart, cacheend - cachestart)
1633
1622
1634 return self._readsegment(offset, length, df=df)
1623 return self._readsegment(offset, length, df=df)
1635
1624
1636 def _getsegmentforrevs(self, startrev, endrev, df=None):
1625 def _getsegmentforrevs(self, startrev, endrev, df=None):
1637 """Obtain a segment of raw data corresponding to a range of revisions.
1626 """Obtain a segment of raw data corresponding to a range of revisions.
1638
1627
1639 Accepts the start and end revisions and an optional already-open
1628 Accepts the start and end revisions and an optional already-open
1640 file handle to be used for reading. If the file handle is read, its
1629 file handle to be used for reading. If the file handle is read, its
1641 seek position will not be preserved.
1630 seek position will not be preserved.
1642
1631
1643 Requests for data may be satisfied by a cache.
1632 Requests for data may be satisfied by a cache.
1644
1633
1645 Returns a 2-tuple of (offset, data) for the requested range of
1634 Returns a 2-tuple of (offset, data) for the requested range of
1646 revisions. Offset is the integer offset from the beginning of the
1635 revisions. Offset is the integer offset from the beginning of the
1647 revlog and data is a str or buffer of the raw byte data.
1636 revlog and data is a str or buffer of the raw byte data.
1648
1637
1649 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1638 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1650 to determine where each revision's data begins and ends.
1639 to determine where each revision's data begins and ends.
1651 """
1640 """
1652 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1641 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1653 # (functions are expensive).
1642 # (functions are expensive).
1654 index = self.index
1643 index = self.index
1655 istart = index[startrev]
1644 istart = index[startrev]
1656 start = int(istart[0] >> 16)
1645 start = int(istart[0] >> 16)
1657 if startrev == endrev:
1646 if startrev == endrev:
1658 end = start + istart[1]
1647 end = start + istart[1]
1659 else:
1648 else:
1660 iend = index[endrev]
1649 iend = index[endrev]
1661 end = int(iend[0] >> 16) + iend[1]
1650 end = int(iend[0] >> 16) + iend[1]
1662
1651
1663 if self._inline:
1652 if self._inline:
1664 start += (startrev + 1) * self.index.entry_size
1653 start += (startrev + 1) * self.index.entry_size
1665 end += (endrev + 1) * self.index.entry_size
1654 end += (endrev + 1) * self.index.entry_size
1666 length = end - start
1655 length = end - start
1667
1656
1668 return start, self._getsegment(start, length, df=df)
1657 return start, self._getsegment(start, length, df=df)
1669
1658
1670 def _chunk(self, rev, df=None):
1659 def _chunk(self, rev, df=None):
1671 """Obtain a single decompressed chunk for a revision.
1660 """Obtain a single decompressed chunk for a revision.
1672
1661
1673 Accepts an integer revision and an optional already-open file handle
1662 Accepts an integer revision and an optional already-open file handle
1674 to be used for reading. If used, the seek position of the file will not
1663 to be used for reading. If used, the seek position of the file will not
1675 be preserved.
1664 be preserved.
1676
1665
1677 Returns a str holding uncompressed data for the requested revision.
1666 Returns a str holding uncompressed data for the requested revision.
1678 """
1667 """
1679 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1668 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1680
1669
1681 def _chunks(self, revs, df=None, targetsize=None):
1670 def _chunks(self, revs, df=None, targetsize=None):
1682 """Obtain decompressed chunks for the specified revisions.
1671 """Obtain decompressed chunks for the specified revisions.
1683
1672
1684 Accepts an iterable of numeric revisions that are assumed to be in
1673 Accepts an iterable of numeric revisions that are assumed to be in
1685 ascending order. Also accepts an optional already-open file handle
1674 ascending order. Also accepts an optional already-open file handle
1686 to be used for reading. If used, the seek position of the file will
1675 to be used for reading. If used, the seek position of the file will
1687 not be preserved.
1676 not be preserved.
1688
1677
1689 This function is similar to calling ``self._chunk()`` multiple times,
1678 This function is similar to calling ``self._chunk()`` multiple times,
1690 but is faster.
1679 but is faster.
1691
1680
1692 Returns a list with decompressed data for each requested revision.
1681 Returns a list with decompressed data for each requested revision.
1693 """
1682 """
1694 if not revs:
1683 if not revs:
1695 return []
1684 return []
1696 start = self.start
1685 start = self.start
1697 length = self.length
1686 length = self.length
1698 inline = self._inline
1687 inline = self._inline
1699 iosize = self.index.entry_size
1688 iosize = self.index.entry_size
1700 buffer = util.buffer
1689 buffer = util.buffer
1701
1690
1702 l = []
1691 l = []
1703 ladd = l.append
1692 ladd = l.append
1704
1693
1705 if not self._withsparseread:
1694 if not self._withsparseread:
1706 slicedchunks = (revs,)
1695 slicedchunks = (revs,)
1707 else:
1696 else:
1708 slicedchunks = deltautil.slicechunk(
1697 slicedchunks = deltautil.slicechunk(
1709 self, revs, targetsize=targetsize
1698 self, revs, targetsize=targetsize
1710 )
1699 )
1711
1700
1712 for revschunk in slicedchunks:
1701 for revschunk in slicedchunks:
1713 firstrev = revschunk[0]
1702 firstrev = revschunk[0]
1714 # Skip trailing revisions with empty diff
1703 # Skip trailing revisions with empty diff
1715 for lastrev in revschunk[::-1]:
1704 for lastrev in revschunk[::-1]:
1716 if length(lastrev) != 0:
1705 if length(lastrev) != 0:
1717 break
1706 break
1718
1707
1719 try:
1708 try:
1720 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1709 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1721 except OverflowError:
1710 except OverflowError:
1722 # issue4215 - we can't cache a run of chunks greater than
1711 # issue4215 - we can't cache a run of chunks greater than
1723 # 2G on Windows
1712 # 2G on Windows
1724 return [self._chunk(rev, df=df) for rev in revschunk]
1713 return [self._chunk(rev, df=df) for rev in revschunk]
1725
1714
1726 decomp = self.decompress
1715 decomp = self.decompress
1727 for rev in revschunk:
1716 for rev in revschunk:
1728 chunkstart = start(rev)
1717 chunkstart = start(rev)
1729 if inline:
1718 if inline:
1730 chunkstart += (rev + 1) * iosize
1719 chunkstart += (rev + 1) * iosize
1731 chunklength = length(rev)
1720 chunklength = length(rev)
1732 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1721 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1733
1722
1734 return l
1723 return l
1735
1724
1736 def _chunkclear(self):
1725 def _chunkclear(self):
1737 """Clear the raw chunk cache."""
1726 """Clear the raw chunk cache."""
1738 self._chunkcache = (0, b'')
1727 self._chunkcache = (0, b'')
1739
1728
1740 def deltaparent(self, rev):
1729 def deltaparent(self, rev):
1741 """return deltaparent of the given revision"""
1730 """return deltaparent of the given revision"""
1742 base = self.index[rev][3]
1731 base = self.index[rev][3]
1743 if base == rev:
1732 if base == rev:
1744 return nullrev
1733 return nullrev
1745 elif self._generaldelta:
1734 elif self._generaldelta:
1746 return base
1735 return base
1747 else:
1736 else:
1748 return rev - 1
1737 return rev - 1
1749
1738
1750 def issnapshot(self, rev):
1739 def issnapshot(self, rev):
1751 """tells whether rev is a snapshot"""
1740 """tells whether rev is a snapshot"""
1752 if not self._sparserevlog:
1741 if not self._sparserevlog:
1753 return self.deltaparent(rev) == nullrev
1742 return self.deltaparent(rev) == nullrev
1754 elif util.safehasattr(self.index, b'issnapshot'):
1743 elif util.safehasattr(self.index, b'issnapshot'):
1755 # directly assign the method to cache the testing and access
1744 # directly assign the method to cache the testing and access
1756 self.issnapshot = self.index.issnapshot
1745 self.issnapshot = self.index.issnapshot
1757 return self.issnapshot(rev)
1746 return self.issnapshot(rev)
1758 if rev == nullrev:
1747 if rev == nullrev:
1759 return True
1748 return True
1760 entry = self.index[rev]
1749 entry = self.index[rev]
1761 base = entry[3]
1750 base = entry[3]
1762 if base == rev:
1751 if base == rev:
1763 return True
1752 return True
1764 if base == nullrev:
1753 if base == nullrev:
1765 return True
1754 return True
1766 p1 = entry[5]
1755 p1 = entry[5]
1767 p2 = entry[6]
1756 p2 = entry[6]
1768 if base == p1 or base == p2:
1757 if base == p1 or base == p2:
1769 return False
1758 return False
1770 return self.issnapshot(base)
1759 return self.issnapshot(base)
1771
1760
1772 def snapshotdepth(self, rev):
1761 def snapshotdepth(self, rev):
1773 """number of snapshot in the chain before this one"""
1762 """number of snapshot in the chain before this one"""
1774 if not self.issnapshot(rev):
1763 if not self.issnapshot(rev):
1775 raise error.ProgrammingError(b'revision %d not a snapshot')
1764 raise error.ProgrammingError(b'revision %d not a snapshot')
1776 return len(self._deltachain(rev)[0]) - 1
1765 return len(self._deltachain(rev)[0]) - 1
1777
1766
1778 def revdiff(self, rev1, rev2):
1767 def revdiff(self, rev1, rev2):
1779 """return or calculate a delta between two revisions
1768 """return or calculate a delta between two revisions
1780
1769
1781 The delta calculated is in binary form and is intended to be written to
1770 The delta calculated is in binary form and is intended to be written to
1782 revlog data directly. So this function needs raw revision data.
1771 revlog data directly. So this function needs raw revision data.
1783 """
1772 """
1784 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1773 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1785 return bytes(self._chunk(rev2))
1774 return bytes(self._chunk(rev2))
1786
1775
1787 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1776 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1788
1777
1789 def _processflags(self, text, flags, operation, raw=False):
1778 def _processflags(self, text, flags, operation, raw=False):
1790 """deprecated entry point to access flag processors"""
1779 """deprecated entry point to access flag processors"""
1791 msg = b'_processflag(...) use the specialized variant'
1780 msg = b'_processflag(...) use the specialized variant'
1792 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1781 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1793 if raw:
1782 if raw:
1794 return text, flagutil.processflagsraw(self, text, flags)
1783 return text, flagutil.processflagsraw(self, text, flags)
1795 elif operation == b'read':
1784 elif operation == b'read':
1796 return flagutil.processflagsread(self, text, flags)
1785 return flagutil.processflagsread(self, text, flags)
1797 else: # write operation
1786 else: # write operation
1798 return flagutil.processflagswrite(self, text, flags)
1787 return flagutil.processflagswrite(self, text, flags)
1799
1788
1800 def revision(self, nodeorrev, _df=None, raw=False):
1789 def revision(self, nodeorrev, _df=None, raw=False):
1801 """return an uncompressed revision of a given node or revision
1790 """return an uncompressed revision of a given node or revision
1802 number.
1791 number.
1803
1792
1804 _df - an existing file handle to read from. (internal-only)
1793 _df - an existing file handle to read from. (internal-only)
1805 raw - an optional argument specifying if the revision data is to be
1794 raw - an optional argument specifying if the revision data is to be
1806 treated as raw data when applying flag transforms. 'raw' should be set
1795 treated as raw data when applying flag transforms. 'raw' should be set
1807 to True when generating changegroups or in debug commands.
1796 to True when generating changegroups or in debug commands.
1808 """
1797 """
1809 if raw:
1798 if raw:
1810 msg = (
1799 msg = (
1811 b'revlog.revision(..., raw=True) is deprecated, '
1800 b'revlog.revision(..., raw=True) is deprecated, '
1812 b'use revlog.rawdata(...)'
1801 b'use revlog.rawdata(...)'
1813 )
1802 )
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1803 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1804 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1816
1805
1817 def sidedata(self, nodeorrev, _df=None):
1806 def sidedata(self, nodeorrev, _df=None):
1818 """a map of extra data related to the changeset but not part of the hash
1807 """a map of extra data related to the changeset but not part of the hash
1819
1808
1820 This function currently return a dictionary. However, more advanced
1809 This function currently return a dictionary. However, more advanced
1821 mapping object will likely be used in the future for a more
1810 mapping object will likely be used in the future for a more
1822 efficient/lazy code.
1811 efficient/lazy code.
1823 """
1812 """
1824 return self._revisiondata(nodeorrev, _df)[1]
1813 return self._revisiondata(nodeorrev, _df)[1]
1825
1814
1826 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1815 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1827 # deal with <nodeorrev> argument type
1816 # deal with <nodeorrev> argument type
1828 if isinstance(nodeorrev, int):
1817 if isinstance(nodeorrev, int):
1829 rev = nodeorrev
1818 rev = nodeorrev
1830 node = self.node(rev)
1819 node = self.node(rev)
1831 else:
1820 else:
1832 node = nodeorrev
1821 node = nodeorrev
1833 rev = None
1822 rev = None
1834
1823
1835 # fast path the special `nullid` rev
1824 # fast path the special `nullid` rev
1836 if node == self.nullid:
1825 if node == self.nullid:
1837 return b"", {}
1826 return b"", {}
1838
1827
1839 # ``rawtext`` is the text as stored inside the revlog. Might be the
1828 # ``rawtext`` is the text as stored inside the revlog. Might be the
1840 # revision or might need to be processed to retrieve the revision.
1829 # revision or might need to be processed to retrieve the revision.
1841 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1830 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1842
1831
1843 if self.hassidedata:
1832 if self.hassidedata:
1844 if rev is None:
1833 if rev is None:
1845 rev = self.rev(node)
1834 rev = self.rev(node)
1846 sidedata = self._sidedata(rev)
1835 sidedata = self._sidedata(rev)
1847 else:
1836 else:
1848 sidedata = {}
1837 sidedata = {}
1849
1838
1850 if raw and validated:
1839 if raw and validated:
1851 # if we don't want to process the raw text and that raw
1840 # if we don't want to process the raw text and that raw
1852 # text is cached, we can exit early.
1841 # text is cached, we can exit early.
1853 return rawtext, sidedata
1842 return rawtext, sidedata
1854 if rev is None:
1843 if rev is None:
1855 rev = self.rev(node)
1844 rev = self.rev(node)
1856 # the revlog's flag for this revision
1845 # the revlog's flag for this revision
1857 # (usually alter its state or content)
1846 # (usually alter its state or content)
1858 flags = self.flags(rev)
1847 flags = self.flags(rev)
1859
1848
1860 if validated and flags == REVIDX_DEFAULT_FLAGS:
1849 if validated and flags == REVIDX_DEFAULT_FLAGS:
1861 # no extra flags set, no flag processor runs, text = rawtext
1850 # no extra flags set, no flag processor runs, text = rawtext
1862 return rawtext, sidedata
1851 return rawtext, sidedata
1863
1852
1864 if raw:
1853 if raw:
1865 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1854 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1866 text = rawtext
1855 text = rawtext
1867 else:
1856 else:
1868 r = flagutil.processflagsread(self, rawtext, flags)
1857 r = flagutil.processflagsread(self, rawtext, flags)
1869 text, validatehash = r
1858 text, validatehash = r
1870 if validatehash:
1859 if validatehash:
1871 self.checkhash(text, node, rev=rev)
1860 self.checkhash(text, node, rev=rev)
1872 if not validated:
1861 if not validated:
1873 self._revisioncache = (node, rev, rawtext)
1862 self._revisioncache = (node, rev, rawtext)
1874
1863
1875 return text, sidedata
1864 return text, sidedata
1876
1865
1877 def _rawtext(self, node, rev, _df=None):
1866 def _rawtext(self, node, rev, _df=None):
1878 """return the possibly unvalidated rawtext for a revision
1867 """return the possibly unvalidated rawtext for a revision
1879
1868
1880 returns (rev, rawtext, validated)
1869 returns (rev, rawtext, validated)
1881 """
1870 """
1882
1871
1883 # revision in the cache (could be useful to apply delta)
1872 # revision in the cache (could be useful to apply delta)
1884 cachedrev = None
1873 cachedrev = None
1885 # An intermediate text to apply deltas to
1874 # An intermediate text to apply deltas to
1886 basetext = None
1875 basetext = None
1887
1876
1888 # Check if we have the entry in cache
1877 # Check if we have the entry in cache
1889 # The cache entry looks like (node, rev, rawtext)
1878 # The cache entry looks like (node, rev, rawtext)
1890 if self._revisioncache:
1879 if self._revisioncache:
1891 if self._revisioncache[0] == node:
1880 if self._revisioncache[0] == node:
1892 return (rev, self._revisioncache[2], True)
1881 return (rev, self._revisioncache[2], True)
1893 cachedrev = self._revisioncache[1]
1882 cachedrev = self._revisioncache[1]
1894
1883
1895 if rev is None:
1884 if rev is None:
1896 rev = self.rev(node)
1885 rev = self.rev(node)
1897
1886
1898 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1887 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1899 if stopped:
1888 if stopped:
1900 basetext = self._revisioncache[2]
1889 basetext = self._revisioncache[2]
1901
1890
1902 # drop cache to save memory, the caller is expected to
1891 # drop cache to save memory, the caller is expected to
1903 # update self._revisioncache after validating the text
1892 # update self._revisioncache after validating the text
1904 self._revisioncache = None
1893 self._revisioncache = None
1905
1894
1906 targetsize = None
1895 targetsize = None
1907 rawsize = self.index[rev][2]
1896 rawsize = self.index[rev][2]
1908 if 0 <= rawsize:
1897 if 0 <= rawsize:
1909 targetsize = 4 * rawsize
1898 targetsize = 4 * rawsize
1910
1899
1911 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1900 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1912 if basetext is None:
1901 if basetext is None:
1913 basetext = bytes(bins[0])
1902 basetext = bytes(bins[0])
1914 bins = bins[1:]
1903 bins = bins[1:]
1915
1904
1916 rawtext = mdiff.patches(basetext, bins)
1905 rawtext = mdiff.patches(basetext, bins)
1917 del basetext # let us have a chance to free memory early
1906 del basetext # let us have a chance to free memory early
1918 return (rev, rawtext, False)
1907 return (rev, rawtext, False)
1919
1908
1920 def _sidedata(self, rev):
1909 def _sidedata(self, rev):
1921 """Return the sidedata for a given revision number."""
1910 """Return the sidedata for a given revision number."""
1922 index_entry = self.index[rev]
1911 index_entry = self.index[rev]
1923 sidedata_offset = index_entry[8]
1912 sidedata_offset = index_entry[8]
1924 sidedata_size = index_entry[9]
1913 sidedata_size = index_entry[9]
1925
1914
1926 if self._inline:
1915 if self._inline:
1927 sidedata_offset += self.index.entry_size * (1 + rev)
1916 sidedata_offset += self.index.entry_size * (1 + rev)
1928 if sidedata_size == 0:
1917 if sidedata_size == 0:
1929 return {}
1918 return {}
1930
1919
1931 segment = self._getsegment(sidedata_offset, sidedata_size)
1920 segment = self._getsegment(sidedata_offset, sidedata_size)
1932 sidedata = sidedatautil.deserialize_sidedata(segment)
1921 sidedata = sidedatautil.deserialize_sidedata(segment)
1933 return sidedata
1922 return sidedata
1934
1923
1935 def rawdata(self, nodeorrev, _df=None):
1924 def rawdata(self, nodeorrev, _df=None):
1936 """return an uncompressed raw data of a given node or revision number.
1925 """return an uncompressed raw data of a given node or revision number.
1937
1926
1938 _df - an existing file handle to read from. (internal-only)
1927 _df - an existing file handle to read from. (internal-only)
1939 """
1928 """
1940 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1929 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1941
1930
1942 def hash(self, text, p1, p2):
1931 def hash(self, text, p1, p2):
1943 """Compute a node hash.
1932 """Compute a node hash.
1944
1933
1945 Available as a function so that subclasses can replace the hash
1934 Available as a function so that subclasses can replace the hash
1946 as needed.
1935 as needed.
1947 """
1936 """
1948 return storageutil.hashrevisionsha1(text, p1, p2)
1937 return storageutil.hashrevisionsha1(text, p1, p2)
1949
1938
1950 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1939 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1951 """Check node hash integrity.
1940 """Check node hash integrity.
1952
1941
1953 Available as a function so that subclasses can extend hash mismatch
1942 Available as a function so that subclasses can extend hash mismatch
1954 behaviors as needed.
1943 behaviors as needed.
1955 """
1944 """
1956 try:
1945 try:
1957 if p1 is None and p2 is None:
1946 if p1 is None and p2 is None:
1958 p1, p2 = self.parents(node)
1947 p1, p2 = self.parents(node)
1959 if node != self.hash(text, p1, p2):
1948 if node != self.hash(text, p1, p2):
1960 # Clear the revision cache on hash failure. The revision cache
1949 # Clear the revision cache on hash failure. The revision cache
1961 # only stores the raw revision and clearing the cache does have
1950 # only stores the raw revision and clearing the cache does have
1962 # the side-effect that we won't have a cache hit when the raw
1951 # the side-effect that we won't have a cache hit when the raw
1963 # revision data is accessed. But this case should be rare and
1952 # revision data is accessed. But this case should be rare and
1964 # it is extra work to teach the cache about the hash
1953 # it is extra work to teach the cache about the hash
1965 # verification state.
1954 # verification state.
1966 if self._revisioncache and self._revisioncache[0] == node:
1955 if self._revisioncache and self._revisioncache[0] == node:
1967 self._revisioncache = None
1956 self._revisioncache = None
1968
1957
1969 revornode = rev
1958 revornode = rev
1970 if revornode is None:
1959 if revornode is None:
1971 revornode = templatefilters.short(hex(node))
1960 revornode = templatefilters.short(hex(node))
1972 raise error.RevlogError(
1961 raise error.RevlogError(
1973 _(b"integrity check failed on %s:%s")
1962 _(b"integrity check failed on %s:%s")
1974 % (self.display_id, pycompat.bytestr(revornode))
1963 % (self.display_id, pycompat.bytestr(revornode))
1975 )
1964 )
1976 except error.RevlogError:
1965 except error.RevlogError:
1977 if self._censorable and storageutil.iscensoredtext(text):
1966 if self._censorable and storageutil.iscensoredtext(text):
1978 raise error.CensoredNodeError(self.display_id, node, text)
1967 raise error.CensoredNodeError(self.display_id, node, text)
1979 raise
1968 raise
1980
1969
1981 def _enforceinlinesize(self, tr):
1970 def _enforceinlinesize(self, tr):
1982 """Check if the revlog is too big for inline and convert if so.
1971 """Check if the revlog is too big for inline and convert if so.
1983
1972
1984 This should be called after revisions are added to the revlog. If the
1973 This should be called after revisions are added to the revlog. If the
1985 revlog has grown too large to be an inline revlog, it will convert it
1974 revlog has grown too large to be an inline revlog, it will convert it
1986 to use multiple index and data files.
1975 to use multiple index and data files.
1987 """
1976 """
1988 tiprev = len(self) - 1
1977 tiprev = len(self) - 1
1989 total_size = self.start(tiprev) + self.length(tiprev)
1978 total_size = self.start(tiprev) + self.length(tiprev)
1990 if not self._inline or total_size < _maxinline:
1979 if not self._inline or total_size < _maxinline:
1991 return
1980 return
1992
1981
1993 troffset = tr.findoffset(self._indexfile)
1982 troffset = tr.findoffset(self._indexfile)
1994 if troffset is None:
1983 if troffset is None:
1995 raise error.RevlogError(
1984 raise error.RevlogError(
1996 _(b"%s not found in the transaction") % self._indexfile
1985 _(b"%s not found in the transaction") % self._indexfile
1997 )
1986 )
1998 trindex = 0
1987 trindex = 0
1999 tr.add(self._datafile, 0)
1988 tr.add(self._datafile, 0)
2000
1989
2001 existing_handles = False
1990 existing_handles = False
2002 if self._writinghandles is not None:
1991 if self._writinghandles is not None:
2003 existing_handles = True
1992 existing_handles = True
2004 fp = self._writinghandles[0]
1993 fp = self._writinghandles[0]
2005 fp.flush()
1994 fp.flush()
2006 fp.close()
1995 fp.close()
2007 # We can't use the cached file handle after close(). So prevent
1996 # We can't use the cached file handle after close(). So prevent
2008 # its usage.
1997 # its usage.
2009 self._writinghandles = None
1998 self._writinghandles = None
2010
1999
2011 new_dfh = self._datafp(b'w+')
2000 new_dfh = self._datafp(b'w+')
2012 new_dfh.truncate(0) # drop any potentially existing data
2001 new_dfh.truncate(0) # drop any potentially existing data
2013 try:
2002 try:
2014 with self._indexfp() as read_ifh:
2003 with self._indexfp() as read_ifh:
2015 for r in self:
2004 for r in self:
2016 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2005 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2017 if troffset <= self.start(r):
2006 if troffset <= self.start(r):
2018 trindex = r
2007 trindex = r
2019 new_dfh.flush()
2008 new_dfh.flush()
2020
2009
2021 with self.__index_new_fp() as fp:
2010 with self.__index_new_fp() as fp:
2022 self._format_flags &= ~FLAG_INLINE_DATA
2011 self._format_flags &= ~FLAG_INLINE_DATA
2023 self._inline = False
2012 self._inline = False
2024 for i in self:
2013 for i in self:
2025 e = self.index.entry_binary(i)
2014 e = self.index.entry_binary(i)
2026 if i == 0:
2015 if i == 0:
2027 header = self._format_flags | self._format_version
2016 header = self._format_flags | self._format_version
2028 header = self.index.pack_header(header)
2017 header = self.index.pack_header(header)
2029 e = header + e
2018 e = header + e
2030 fp.write(e)
2019 fp.write(e)
2031 # the temp file replace the real index when we exit the context
2020 # the temp file replace the real index when we exit the context
2032 # manager
2021 # manager
2033
2022
2034 tr.replace(self._indexfile, trindex * self.index.entry_size)
2023 tr.replace(self._indexfile, trindex * self.index.entry_size)
2035 nodemaputil.setup_persistent_nodemap(tr, self)
2024 nodemaputil.setup_persistent_nodemap(tr, self)
2036 self._chunkclear()
2025 self._chunkclear()
2037
2026
2038 if existing_handles:
2027 if existing_handles:
2039 # switched from inline to conventional reopen the index
2028 # switched from inline to conventional reopen the index
2040 ifh = self.__index_write_fp()
2029 ifh = self.__index_write_fp()
2041 self._writinghandles = (ifh, new_dfh)
2030 self._writinghandles = (ifh, new_dfh)
2042 new_dfh = None
2031 new_dfh = None
2043 finally:
2032 finally:
2044 if new_dfh is not None:
2033 if new_dfh is not None:
2045 new_dfh.close()
2034 new_dfh.close()
2046
2035
2047 def _nodeduplicatecallback(self, transaction, node):
2036 def _nodeduplicatecallback(self, transaction, node):
2048 """called when trying to add a node already stored."""
2037 """called when trying to add a node already stored."""
2049
2038
2050 @contextlib.contextmanager
2039 @contextlib.contextmanager
2051 def _writing(self, transaction):
2040 def _writing(self, transaction):
2052 if self._writinghandles is not None:
2041 if self._writinghandles is not None:
2053 yield
2042 yield
2054 else:
2043 else:
2055 r = len(self)
2044 r = len(self)
2056 dsize = 0
2045 dsize = 0
2057 if r:
2046 if r:
2058 dsize = self.end(r - 1)
2047 dsize = self.end(r - 1)
2059 dfh = None
2048 dfh = None
2060 if not self._inline:
2049 if not self._inline:
2061 try:
2050 try:
2062 dfh = self._datafp(b"r+")
2051 dfh = self._datafp(b"r+")
2063 dfh.seek(0, os.SEEK_END)
2052 dfh.seek(0, os.SEEK_END)
2064 except IOError as inst:
2053 except IOError as inst:
2065 if inst.errno != errno.ENOENT:
2054 if inst.errno != errno.ENOENT:
2066 raise
2055 raise
2067 dfh = self._datafp(b"w+")
2056 dfh = self._datafp(b"w+")
2068 transaction.add(self._datafile, dsize)
2057 transaction.add(self._datafile, dsize)
2069 try:
2058 try:
2070 isize = r * self.index.entry_size
2059 isize = r * self.index.entry_size
2071 ifh = self.__index_write_fp()
2060 ifh = self.__index_write_fp()
2072 if self._inline:
2061 if self._inline:
2073 transaction.add(self._indexfile, dsize + isize)
2062 transaction.add(self._indexfile, dsize + isize)
2074 else:
2063 else:
2075 transaction.add(self._indexfile, isize)
2064 transaction.add(self._indexfile, isize)
2076 try:
2065 try:
2077 self._writinghandles = (ifh, dfh)
2066 self._writinghandles = (ifh, dfh)
2078 try:
2067 try:
2079 yield
2068 yield
2080 finally:
2069 finally:
2081 self._writinghandles = None
2070 self._writinghandles = None
2082 finally:
2071 finally:
2083 ifh.close()
2072 ifh.close()
2084 finally:
2073 finally:
2085 if dfh is not None:
2074 if dfh is not None:
2086 dfh.close()
2075 dfh.close()
2087
2076
2088 def addrevision(
2077 def addrevision(
2089 self,
2078 self,
2090 text,
2079 text,
2091 transaction,
2080 transaction,
2092 link,
2081 link,
2093 p1,
2082 p1,
2094 p2,
2083 p2,
2095 cachedelta=None,
2084 cachedelta=None,
2096 node=None,
2085 node=None,
2097 flags=REVIDX_DEFAULT_FLAGS,
2086 flags=REVIDX_DEFAULT_FLAGS,
2098 deltacomputer=None,
2087 deltacomputer=None,
2099 sidedata=None,
2088 sidedata=None,
2100 ):
2089 ):
2101 """add a revision to the log
2090 """add a revision to the log
2102
2091
2103 text - the revision data to add
2092 text - the revision data to add
2104 transaction - the transaction object used for rollback
2093 transaction - the transaction object used for rollback
2105 link - the linkrev data to add
2094 link - the linkrev data to add
2106 p1, p2 - the parent nodeids of the revision
2095 p1, p2 - the parent nodeids of the revision
2107 cachedelta - an optional precomputed delta
2096 cachedelta - an optional precomputed delta
2108 node - nodeid of revision; typically node is not specified, and it is
2097 node - nodeid of revision; typically node is not specified, and it is
2109 computed by default as hash(text, p1, p2), however subclasses might
2098 computed by default as hash(text, p1, p2), however subclasses might
2110 use different hashing method (and override checkhash() in such case)
2099 use different hashing method (and override checkhash() in such case)
2111 flags - the known flags to set on the revision
2100 flags - the known flags to set on the revision
2112 deltacomputer - an optional deltacomputer instance shared between
2101 deltacomputer - an optional deltacomputer instance shared between
2113 multiple calls
2102 multiple calls
2114 """
2103 """
2115 if link == nullrev:
2104 if link == nullrev:
2116 raise error.RevlogError(
2105 raise error.RevlogError(
2117 _(b"attempted to add linkrev -1 to %s") % self.display_id
2106 _(b"attempted to add linkrev -1 to %s") % self.display_id
2118 )
2107 )
2119
2108
2120 if sidedata is None:
2109 if sidedata is None:
2121 sidedata = {}
2110 sidedata = {}
2122 elif sidedata and not self.hassidedata:
2111 elif sidedata and not self.hassidedata:
2123 raise error.ProgrammingError(
2112 raise error.ProgrammingError(
2124 _(b"trying to add sidedata to a revlog who don't support them")
2113 _(b"trying to add sidedata to a revlog who don't support them")
2125 )
2114 )
2126
2115
2127 if flags:
2116 if flags:
2128 node = node or self.hash(text, p1, p2)
2117 node = node or self.hash(text, p1, p2)
2129
2118
2130 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2119 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2131
2120
2132 # If the flag processor modifies the revision data, ignore any provided
2121 # If the flag processor modifies the revision data, ignore any provided
2133 # cachedelta.
2122 # cachedelta.
2134 if rawtext != text:
2123 if rawtext != text:
2135 cachedelta = None
2124 cachedelta = None
2136
2125
2137 if len(rawtext) > _maxentrysize:
2126 if len(rawtext) > _maxentrysize:
2138 raise error.RevlogError(
2127 raise error.RevlogError(
2139 _(
2128 _(
2140 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2129 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2141 )
2130 )
2142 % (self.display_id, len(rawtext))
2131 % (self.display_id, len(rawtext))
2143 )
2132 )
2144
2133
2145 node = node or self.hash(rawtext, p1, p2)
2134 node = node or self.hash(rawtext, p1, p2)
2146 rev = self.index.get_rev(node)
2135 rev = self.index.get_rev(node)
2147 if rev is not None:
2136 if rev is not None:
2148 return rev
2137 return rev
2149
2138
2150 if validatehash:
2139 if validatehash:
2151 self.checkhash(rawtext, node, p1=p1, p2=p2)
2140 self.checkhash(rawtext, node, p1=p1, p2=p2)
2152
2141
2153 return self.addrawrevision(
2142 return self.addrawrevision(
2154 rawtext,
2143 rawtext,
2155 transaction,
2144 transaction,
2156 link,
2145 link,
2157 p1,
2146 p1,
2158 p2,
2147 p2,
2159 node,
2148 node,
2160 flags,
2149 flags,
2161 cachedelta=cachedelta,
2150 cachedelta=cachedelta,
2162 deltacomputer=deltacomputer,
2151 deltacomputer=deltacomputer,
2163 sidedata=sidedata,
2152 sidedata=sidedata,
2164 )
2153 )
2165
2154
2166 def addrawrevision(
2155 def addrawrevision(
2167 self,
2156 self,
2168 rawtext,
2157 rawtext,
2169 transaction,
2158 transaction,
2170 link,
2159 link,
2171 p1,
2160 p1,
2172 p2,
2161 p2,
2173 node,
2162 node,
2174 flags,
2163 flags,
2175 cachedelta=None,
2164 cachedelta=None,
2176 deltacomputer=None,
2165 deltacomputer=None,
2177 sidedata=None,
2166 sidedata=None,
2178 ):
2167 ):
2179 """add a raw revision with known flags, node and parents
2168 """add a raw revision with known flags, node and parents
2180 useful when reusing a revision not stored in this revlog (ex: received
2169 useful when reusing a revision not stored in this revlog (ex: received
2181 over wire, or read from an external bundle).
2170 over wire, or read from an external bundle).
2182 """
2171 """
2183 with self._writing(transaction):
2172 with self._writing(transaction):
2184 return self._addrevision(
2173 return self._addrevision(
2185 node,
2174 node,
2186 rawtext,
2175 rawtext,
2187 transaction,
2176 transaction,
2188 link,
2177 link,
2189 p1,
2178 p1,
2190 p2,
2179 p2,
2191 flags,
2180 flags,
2192 cachedelta,
2181 cachedelta,
2193 deltacomputer=deltacomputer,
2182 deltacomputer=deltacomputer,
2194 sidedata=sidedata,
2183 sidedata=sidedata,
2195 )
2184 )
2196
2185
2197 def compress(self, data):
2186 def compress(self, data):
2198 """Generate a possibly-compressed representation of data."""
2187 """Generate a possibly-compressed representation of data."""
2199 if not data:
2188 if not data:
2200 return b'', data
2189 return b'', data
2201
2190
2202 compressed = self._compressor.compress(data)
2191 compressed = self._compressor.compress(data)
2203
2192
2204 if compressed:
2193 if compressed:
2205 # The revlog compressor added the header in the returned data.
2194 # The revlog compressor added the header in the returned data.
2206 return b'', compressed
2195 return b'', compressed
2207
2196
2208 if data[0:1] == b'\0':
2197 if data[0:1] == b'\0':
2209 return b'', data
2198 return b'', data
2210 return b'u', data
2199 return b'u', data
2211
2200
2212 def decompress(self, data):
2201 def decompress(self, data):
2213 """Decompress a revlog chunk.
2202 """Decompress a revlog chunk.
2214
2203
2215 The chunk is expected to begin with a header identifying the
2204 The chunk is expected to begin with a header identifying the
2216 format type so it can be routed to an appropriate decompressor.
2205 format type so it can be routed to an appropriate decompressor.
2217 """
2206 """
2218 if not data:
2207 if not data:
2219 return data
2208 return data
2220
2209
2221 # Revlogs are read much more frequently than they are written and many
2210 # Revlogs are read much more frequently than they are written and many
2222 # chunks only take microseconds to decompress, so performance is
2211 # chunks only take microseconds to decompress, so performance is
2223 # important here.
2212 # important here.
2224 #
2213 #
2225 # We can make a few assumptions about revlogs:
2214 # We can make a few assumptions about revlogs:
2226 #
2215 #
2227 # 1) the majority of chunks will be compressed (as opposed to inline
2216 # 1) the majority of chunks will be compressed (as opposed to inline
2228 # raw data).
2217 # raw data).
2229 # 2) decompressing *any* data will likely by at least 10x slower than
2218 # 2) decompressing *any* data will likely by at least 10x slower than
2230 # returning raw inline data.
2219 # returning raw inline data.
2231 # 3) we want to prioritize common and officially supported compression
2220 # 3) we want to prioritize common and officially supported compression
2232 # engines
2221 # engines
2233 #
2222 #
2234 # It follows that we want to optimize for "decompress compressed data
2223 # It follows that we want to optimize for "decompress compressed data
2235 # when encoded with common and officially supported compression engines"
2224 # when encoded with common and officially supported compression engines"
2236 # case over "raw data" and "data encoded by less common or non-official
2225 # case over "raw data" and "data encoded by less common or non-official
2237 # compression engines." That is why we have the inline lookup first
2226 # compression engines." That is why we have the inline lookup first
2238 # followed by the compengines lookup.
2227 # followed by the compengines lookup.
2239 #
2228 #
2240 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2229 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2241 # compressed chunks. And this matters for changelog and manifest reads.
2230 # compressed chunks. And this matters for changelog and manifest reads.
2242 t = data[0:1]
2231 t = data[0:1]
2243
2232
2244 if t == b'x':
2233 if t == b'x':
2245 try:
2234 try:
2246 return _zlibdecompress(data)
2235 return _zlibdecompress(data)
2247 except zlib.error as e:
2236 except zlib.error as e:
2248 raise error.RevlogError(
2237 raise error.RevlogError(
2249 _(b'revlog decompress error: %s')
2238 _(b'revlog decompress error: %s')
2250 % stringutil.forcebytestr(e)
2239 % stringutil.forcebytestr(e)
2251 )
2240 )
2252 # '\0' is more common than 'u' so it goes first.
2241 # '\0' is more common than 'u' so it goes first.
2253 elif t == b'\0':
2242 elif t == b'\0':
2254 return data
2243 return data
2255 elif t == b'u':
2244 elif t == b'u':
2256 return util.buffer(data, 1)
2245 return util.buffer(data, 1)
2257
2246
2258 try:
2247 try:
2259 compressor = self._decompressors[t]
2248 compressor = self._decompressors[t]
2260 except KeyError:
2249 except KeyError:
2261 try:
2250 try:
2262 engine = util.compengines.forrevlogheader(t)
2251 engine = util.compengines.forrevlogheader(t)
2263 compressor = engine.revlogcompressor(self._compengineopts)
2252 compressor = engine.revlogcompressor(self._compengineopts)
2264 self._decompressors[t] = compressor
2253 self._decompressors[t] = compressor
2265 except KeyError:
2254 except KeyError:
2266 raise error.RevlogError(
2255 raise error.RevlogError(
2267 _(b'unknown compression type %s') % binascii.hexlify(t)
2256 _(b'unknown compression type %s') % binascii.hexlify(t)
2268 )
2257 )
2269
2258
2270 return compressor.decompress(data)
2259 return compressor.decompress(data)
2271
2260
2272 def _addrevision(
2261 def _addrevision(
2273 self,
2262 self,
2274 node,
2263 node,
2275 rawtext,
2264 rawtext,
2276 transaction,
2265 transaction,
2277 link,
2266 link,
2278 p1,
2267 p1,
2279 p2,
2268 p2,
2280 flags,
2269 flags,
2281 cachedelta,
2270 cachedelta,
2282 alwayscache=False,
2271 alwayscache=False,
2283 deltacomputer=None,
2272 deltacomputer=None,
2284 sidedata=None,
2273 sidedata=None,
2285 ):
2274 ):
2286 """internal function to add revisions to the log
2275 """internal function to add revisions to the log
2287
2276
2288 see addrevision for argument descriptions.
2277 see addrevision for argument descriptions.
2289
2278
2290 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2279 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2291
2280
2292 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2281 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2293 be used.
2282 be used.
2294
2283
2295 invariants:
2284 invariants:
2296 - rawtext is optional (can be None); if not set, cachedelta must be set.
2285 - rawtext is optional (can be None); if not set, cachedelta must be set.
2297 if both are set, they must correspond to each other.
2286 if both are set, they must correspond to each other.
2298 """
2287 """
2299 if node == self.nullid:
2288 if node == self.nullid:
2300 raise error.RevlogError(
2289 raise error.RevlogError(
2301 _(b"%s: attempt to add null revision") % self.display_id
2290 _(b"%s: attempt to add null revision") % self.display_id
2302 )
2291 )
2303 if (
2292 if (
2304 node == self.nodeconstants.wdirid
2293 node == self.nodeconstants.wdirid
2305 or node in self.nodeconstants.wdirfilenodeids
2294 or node in self.nodeconstants.wdirfilenodeids
2306 ):
2295 ):
2307 raise error.RevlogError(
2296 raise error.RevlogError(
2308 _(b"%s: attempt to add wdir revision") % self.display_id
2297 _(b"%s: attempt to add wdir revision") % self.display_id
2309 )
2298 )
2310 if self._writinghandles is None:
2299 if self._writinghandles is None:
2311 msg = b'adding revision outside `revlog._writing` context'
2300 msg = b'adding revision outside `revlog._writing` context'
2312 raise error.ProgrammingError(msg)
2301 raise error.ProgrammingError(msg)
2313
2302
2314 if self._inline:
2303 if self._inline:
2315 fh = self._writinghandles[0]
2304 fh = self._writinghandles[0]
2316 else:
2305 else:
2317 fh = self._writinghandles[1]
2306 fh = self._writinghandles[1]
2318
2307
2319 btext = [rawtext]
2308 btext = [rawtext]
2320
2309
2321 curr = len(self)
2310 curr = len(self)
2322 prev = curr - 1
2311 prev = curr - 1
2323
2312
2324 offset = self._get_data_offset(prev)
2313 offset = self._get_data_offset(prev)
2325
2314
2326 if self._concurrencychecker:
2315 if self._concurrencychecker:
2327 ifh, dfh = self._writinghandles
2316 ifh, dfh = self._writinghandles
2328 if self._inline:
2317 if self._inline:
2329 # offset is "as if" it were in the .d file, so we need to add on
2318 # offset is "as if" it were in the .d file, so we need to add on
2330 # the size of the entry metadata.
2319 # the size of the entry metadata.
2331 self._concurrencychecker(
2320 self._concurrencychecker(
2332 ifh, self._indexfile, offset + curr * self.index.entry_size
2321 ifh, self._indexfile, offset + curr * self.index.entry_size
2333 )
2322 )
2334 else:
2323 else:
2335 # Entries in the .i are a consistent size.
2324 # Entries in the .i are a consistent size.
2336 self._concurrencychecker(
2325 self._concurrencychecker(
2337 ifh, self._indexfile, curr * self.index.entry_size
2326 ifh, self._indexfile, curr * self.index.entry_size
2338 )
2327 )
2339 self._concurrencychecker(dfh, self._datafile, offset)
2328 self._concurrencychecker(dfh, self._datafile, offset)
2340
2329
2341 p1r, p2r = self.rev(p1), self.rev(p2)
2330 p1r, p2r = self.rev(p1), self.rev(p2)
2342
2331
2343 # full versions are inserted when the needed deltas
2332 # full versions are inserted when the needed deltas
2344 # become comparable to the uncompressed text
2333 # become comparable to the uncompressed text
2345 if rawtext is None:
2334 if rawtext is None:
2346 # need rawtext size, before changed by flag processors, which is
2335 # need rawtext size, before changed by flag processors, which is
2347 # the non-raw size. use revlog explicitly to avoid filelog's extra
2336 # the non-raw size. use revlog explicitly to avoid filelog's extra
2348 # logic that might remove metadata size.
2337 # logic that might remove metadata size.
2349 textlen = mdiff.patchedsize(
2338 textlen = mdiff.patchedsize(
2350 revlog.size(self, cachedelta[0]), cachedelta[1]
2339 revlog.size(self, cachedelta[0]), cachedelta[1]
2351 )
2340 )
2352 else:
2341 else:
2353 textlen = len(rawtext)
2342 textlen = len(rawtext)
2354
2343
2355 if deltacomputer is None:
2344 if deltacomputer is None:
2356 deltacomputer = deltautil.deltacomputer(self)
2345 deltacomputer = deltautil.deltacomputer(self)
2357
2346
2358 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2347 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2359
2348
2360 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2349 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2361
2350
2362 if sidedata and self.hassidedata:
2351 if sidedata and self.hassidedata:
2363 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2352 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2364 sidedata_offset = offset + deltainfo.deltalen
2353 sidedata_offset = offset + deltainfo.deltalen
2365 else:
2354 else:
2366 serialized_sidedata = b""
2355 serialized_sidedata = b""
2367 # Don't store the offset if the sidedata is empty, that way
2356 # Don't store the offset if the sidedata is empty, that way
2368 # we can easily detect empty sidedata and they will be no different
2357 # we can easily detect empty sidedata and they will be no different
2369 # than ones we manually add.
2358 # than ones we manually add.
2370 sidedata_offset = 0
2359 sidedata_offset = 0
2371
2360
2372 e = (
2361 e = (
2373 offset_type(offset, flags),
2362 offset_type(offset, flags),
2374 deltainfo.deltalen,
2363 deltainfo.deltalen,
2375 textlen,
2364 textlen,
2376 deltainfo.base,
2365 deltainfo.base,
2377 link,
2366 link,
2378 p1r,
2367 p1r,
2379 p2r,
2368 p2r,
2380 node,
2369 node,
2381 sidedata_offset,
2370 sidedata_offset,
2382 len(serialized_sidedata),
2371 len(serialized_sidedata),
2383 )
2372 )
2384
2373
2385 self.index.append(e)
2374 self.index.append(e)
2386 entry = self.index.entry_binary(curr)
2375 entry = self.index.entry_binary(curr)
2387 if curr == 0:
2376 if curr == 0:
2388 header = self._format_flags | self._format_version
2377 header = self._format_flags | self._format_version
2389 header = self.index.pack_header(header)
2378 header = self.index.pack_header(header)
2390 entry = header + entry
2379 entry = header + entry
2391 self._writeentry(
2380 self._writeentry(
2392 transaction,
2381 transaction,
2393 entry,
2382 entry,
2394 deltainfo.data,
2383 deltainfo.data,
2395 link,
2384 link,
2396 offset,
2385 offset,
2397 serialized_sidedata,
2386 serialized_sidedata,
2398 )
2387 )
2399
2388
2400 rawtext = btext[0]
2389 rawtext = btext[0]
2401
2390
2402 if alwayscache and rawtext is None:
2391 if alwayscache and rawtext is None:
2403 rawtext = deltacomputer.buildtext(revinfo, fh)
2392 rawtext = deltacomputer.buildtext(revinfo, fh)
2404
2393
2405 if type(rawtext) == bytes: # only accept immutable objects
2394 if type(rawtext) == bytes: # only accept immutable objects
2406 self._revisioncache = (node, curr, rawtext)
2395 self._revisioncache = (node, curr, rawtext)
2407 self._chainbasecache[curr] = deltainfo.chainbase
2396 self._chainbasecache[curr] = deltainfo.chainbase
2408 return curr
2397 return curr
2409
2398
2410 def _get_data_offset(self, prev):
2399 def _get_data_offset(self, prev):
2411 """Returns the current offset in the (in-transaction) data file.
2400 """Returns the current offset in the (in-transaction) data file.
2412 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2401 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2413 file to store that information: since sidedata can be rewritten to the
2402 file to store that information: since sidedata can be rewritten to the
2414 end of the data file within a transaction, you can have cases where, for
2403 end of the data file within a transaction, you can have cases where, for
2415 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2404 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2416 to `n - 1`'s sidedata being written after `n`'s data.
2405 to `n - 1`'s sidedata being written after `n`'s data.
2417
2406
2418 TODO cache this in a docket file before getting out of experimental."""
2407 TODO cache this in a docket file before getting out of experimental."""
2419 if self._format_version != REVLOGV2:
2408 if self._format_version != REVLOGV2:
2420 return self.end(prev)
2409 return self.end(prev)
2421
2410
2422 offset = 0
2411 offset = 0
2423 for rev, entry in enumerate(self.index):
2412 for rev, entry in enumerate(self.index):
2424 sidedata_end = entry[8] + entry[9]
2413 sidedata_end = entry[8] + entry[9]
2425 # Sidedata for a previous rev has potentially been written after
2414 # Sidedata for a previous rev has potentially been written after
2426 # this rev's end, so take the max.
2415 # this rev's end, so take the max.
2427 offset = max(self.end(rev), offset, sidedata_end)
2416 offset = max(self.end(rev), offset, sidedata_end)
2428 return offset
2417 return offset
2429
2418
2430 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2419 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2431 # Files opened in a+ mode have inconsistent behavior on various
2420 # Files opened in a+ mode have inconsistent behavior on various
2432 # platforms. Windows requires that a file positioning call be made
2421 # platforms. Windows requires that a file positioning call be made
2433 # when the file handle transitions between reads and writes. See
2422 # when the file handle transitions between reads and writes. See
2434 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2423 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2435 # platforms, Python or the platform itself can be buggy. Some versions
2424 # platforms, Python or the platform itself can be buggy. Some versions
2436 # of Solaris have been observed to not append at the end of the file
2425 # of Solaris have been observed to not append at the end of the file
2437 # if the file was seeked to before the end. See issue4943 for more.
2426 # if the file was seeked to before the end. See issue4943 for more.
2438 #
2427 #
2439 # We work around this issue by inserting a seek() before writing.
2428 # We work around this issue by inserting a seek() before writing.
2440 # Note: This is likely not necessary on Python 3. However, because
2429 # Note: This is likely not necessary on Python 3. However, because
2441 # the file handle is reused for reads and may be seeked there, we need
2430 # the file handle is reused for reads and may be seeked there, we need
2442 # to be careful before changing this.
2431 # to be careful before changing this.
2443 if self._writinghandles is None:
2432 if self._writinghandles is None:
2444 msg = b'adding revision outside `revlog._writing` context'
2433 msg = b'adding revision outside `revlog._writing` context'
2445 raise error.ProgrammingError(msg)
2434 raise error.ProgrammingError(msg)
2446 ifh, dfh = self._writinghandles
2435 ifh, dfh = self._writinghandles
2447 ifh.seek(0, os.SEEK_END)
2436 ifh.seek(0, os.SEEK_END)
2448 if dfh:
2437 if dfh:
2449 dfh.seek(0, os.SEEK_END)
2438 dfh.seek(0, os.SEEK_END)
2450
2439
2451 curr = len(self) - 1
2440 curr = len(self) - 1
2452 if not self._inline:
2441 if not self._inline:
2453 transaction.add(self._datafile, offset)
2442 transaction.add(self._datafile, offset)
2454 transaction.add(self._indexfile, curr * len(entry))
2443 transaction.add(self._indexfile, curr * len(entry))
2455 if data[0]:
2444 if data[0]:
2456 dfh.write(data[0])
2445 dfh.write(data[0])
2457 dfh.write(data[1])
2446 dfh.write(data[1])
2458 if sidedata:
2447 if sidedata:
2459 dfh.write(sidedata)
2448 dfh.write(sidedata)
2460 ifh.write(entry)
2449 ifh.write(entry)
2461 else:
2450 else:
2462 offset += curr * self.index.entry_size
2451 offset += curr * self.index.entry_size
2463 transaction.add(self._indexfile, offset)
2452 transaction.add(self._indexfile, offset)
2464 ifh.write(entry)
2453 ifh.write(entry)
2465 ifh.write(data[0])
2454 ifh.write(data[0])
2466 ifh.write(data[1])
2455 ifh.write(data[1])
2467 if sidedata:
2456 if sidedata:
2468 ifh.write(sidedata)
2457 ifh.write(sidedata)
2469 self._enforceinlinesize(transaction)
2458 self._enforceinlinesize(transaction)
2470 nodemaputil.setup_persistent_nodemap(transaction, self)
2459 nodemaputil.setup_persistent_nodemap(transaction, self)
2471
2460
2472 def addgroup(
2461 def addgroup(
2473 self,
2462 self,
2474 deltas,
2463 deltas,
2475 linkmapper,
2464 linkmapper,
2476 transaction,
2465 transaction,
2477 alwayscache=False,
2466 alwayscache=False,
2478 addrevisioncb=None,
2467 addrevisioncb=None,
2479 duplicaterevisioncb=None,
2468 duplicaterevisioncb=None,
2480 ):
2469 ):
2481 """
2470 """
2482 add a delta group
2471 add a delta group
2483
2472
2484 given a set of deltas, add them to the revision log. the
2473 given a set of deltas, add them to the revision log. the
2485 first delta is against its parent, which should be in our
2474 first delta is against its parent, which should be in our
2486 log, the rest are against the previous delta.
2475 log, the rest are against the previous delta.
2487
2476
2488 If ``addrevisioncb`` is defined, it will be called with arguments of
2477 If ``addrevisioncb`` is defined, it will be called with arguments of
2489 this revlog and the node that was added.
2478 this revlog and the node that was added.
2490 """
2479 """
2491
2480
2492 if self._adding_group:
2481 if self._adding_group:
2493 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2482 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2494
2483
2495 self._adding_group = True
2484 self._adding_group = True
2496 empty = True
2485 empty = True
2497 try:
2486 try:
2498 with self._writing(transaction):
2487 with self._writing(transaction):
2499 deltacomputer = deltautil.deltacomputer(self)
2488 deltacomputer = deltautil.deltacomputer(self)
2500 # loop through our set of deltas
2489 # loop through our set of deltas
2501 for data in deltas:
2490 for data in deltas:
2502 (
2491 (
2503 node,
2492 node,
2504 p1,
2493 p1,
2505 p2,
2494 p2,
2506 linknode,
2495 linknode,
2507 deltabase,
2496 deltabase,
2508 delta,
2497 delta,
2509 flags,
2498 flags,
2510 sidedata,
2499 sidedata,
2511 ) = data
2500 ) = data
2512 link = linkmapper(linknode)
2501 link = linkmapper(linknode)
2513 flags = flags or REVIDX_DEFAULT_FLAGS
2502 flags = flags or REVIDX_DEFAULT_FLAGS
2514
2503
2515 rev = self.index.get_rev(node)
2504 rev = self.index.get_rev(node)
2516 if rev is not None:
2505 if rev is not None:
2517 # this can happen if two branches make the same change
2506 # this can happen if two branches make the same change
2518 self._nodeduplicatecallback(transaction, rev)
2507 self._nodeduplicatecallback(transaction, rev)
2519 if duplicaterevisioncb:
2508 if duplicaterevisioncb:
2520 duplicaterevisioncb(self, rev)
2509 duplicaterevisioncb(self, rev)
2521 empty = False
2510 empty = False
2522 continue
2511 continue
2523
2512
2524 for p in (p1, p2):
2513 for p in (p1, p2):
2525 if not self.index.has_node(p):
2514 if not self.index.has_node(p):
2526 raise error.LookupError(
2515 raise error.LookupError(
2527 p, self.radix, _(b'unknown parent')
2516 p, self.radix, _(b'unknown parent')
2528 )
2517 )
2529
2518
2530 if not self.index.has_node(deltabase):
2519 if not self.index.has_node(deltabase):
2531 raise error.LookupError(
2520 raise error.LookupError(
2532 deltabase, self.display_id, _(b'unknown delta base')
2521 deltabase, self.display_id, _(b'unknown delta base')
2533 )
2522 )
2534
2523
2535 baserev = self.rev(deltabase)
2524 baserev = self.rev(deltabase)
2536
2525
2537 if baserev != nullrev and self.iscensored(baserev):
2526 if baserev != nullrev and self.iscensored(baserev):
2538 # if base is censored, delta must be full replacement in a
2527 # if base is censored, delta must be full replacement in a
2539 # single patch operation
2528 # single patch operation
2540 hlen = struct.calcsize(b">lll")
2529 hlen = struct.calcsize(b">lll")
2541 oldlen = self.rawsize(baserev)
2530 oldlen = self.rawsize(baserev)
2542 newlen = len(delta) - hlen
2531 newlen = len(delta) - hlen
2543 if delta[:hlen] != mdiff.replacediffheader(
2532 if delta[:hlen] != mdiff.replacediffheader(
2544 oldlen, newlen
2533 oldlen, newlen
2545 ):
2534 ):
2546 raise error.CensoredBaseError(
2535 raise error.CensoredBaseError(
2547 self.display_id, self.node(baserev)
2536 self.display_id, self.node(baserev)
2548 )
2537 )
2549
2538
2550 if not flags and self._peek_iscensored(baserev, delta):
2539 if not flags and self._peek_iscensored(baserev, delta):
2551 flags |= REVIDX_ISCENSORED
2540 flags |= REVIDX_ISCENSORED
2552
2541
2553 # We assume consumers of addrevisioncb will want to retrieve
2542 # We assume consumers of addrevisioncb will want to retrieve
2554 # the added revision, which will require a call to
2543 # the added revision, which will require a call to
2555 # revision(). revision() will fast path if there is a cache
2544 # revision(). revision() will fast path if there is a cache
2556 # hit. So, we tell _addrevision() to always cache in this case.
2545 # hit. So, we tell _addrevision() to always cache in this case.
2557 # We're only using addgroup() in the context of changegroup
2546 # We're only using addgroup() in the context of changegroup
2558 # generation so the revision data can always be handled as raw
2547 # generation so the revision data can always be handled as raw
2559 # by the flagprocessor.
2548 # by the flagprocessor.
2560 rev = self._addrevision(
2549 rev = self._addrevision(
2561 node,
2550 node,
2562 None,
2551 None,
2563 transaction,
2552 transaction,
2564 link,
2553 link,
2565 p1,
2554 p1,
2566 p2,
2555 p2,
2567 flags,
2556 flags,
2568 (baserev, delta),
2557 (baserev, delta),
2569 alwayscache=alwayscache,
2558 alwayscache=alwayscache,
2570 deltacomputer=deltacomputer,
2559 deltacomputer=deltacomputer,
2571 sidedata=sidedata,
2560 sidedata=sidedata,
2572 )
2561 )
2573
2562
2574 if addrevisioncb:
2563 if addrevisioncb:
2575 addrevisioncb(self, rev)
2564 addrevisioncb(self, rev)
2576 empty = False
2565 empty = False
2577 finally:
2566 finally:
2578 self._adding_group = False
2567 self._adding_group = False
2579 return not empty
2568 return not empty
2580
2569
2581 def iscensored(self, rev):
2570 def iscensored(self, rev):
2582 """Check if a file revision is censored."""
2571 """Check if a file revision is censored."""
2583 if not self._censorable:
2572 if not self._censorable:
2584 return False
2573 return False
2585
2574
2586 return self.flags(rev) & REVIDX_ISCENSORED
2575 return self.flags(rev) & REVIDX_ISCENSORED
2587
2576
2588 def _peek_iscensored(self, baserev, delta):
2577 def _peek_iscensored(self, baserev, delta):
2589 """Quickly check if a delta produces a censored revision."""
2578 """Quickly check if a delta produces a censored revision."""
2590 if not self._censorable:
2579 if not self._censorable:
2591 return False
2580 return False
2592
2581
2593 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2582 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2594
2583
2595 def getstrippoint(self, minlink):
2584 def getstrippoint(self, minlink):
2596 """find the minimum rev that must be stripped to strip the linkrev
2585 """find the minimum rev that must be stripped to strip the linkrev
2597
2586
2598 Returns a tuple containing the minimum rev and a set of all revs that
2587 Returns a tuple containing the minimum rev and a set of all revs that
2599 have linkrevs that will be broken by this strip.
2588 have linkrevs that will be broken by this strip.
2600 """
2589 """
2601 return storageutil.resolvestripinfo(
2590 return storageutil.resolvestripinfo(
2602 minlink,
2591 minlink,
2603 len(self) - 1,
2592 len(self) - 1,
2604 self.headrevs(),
2593 self.headrevs(),
2605 self.linkrev,
2594 self.linkrev,
2606 self.parentrevs,
2595 self.parentrevs,
2607 )
2596 )
2608
2597
2609 def strip(self, minlink, transaction):
2598 def strip(self, minlink, transaction):
2610 """truncate the revlog on the first revision with a linkrev >= minlink
2599 """truncate the revlog on the first revision with a linkrev >= minlink
2611
2600
2612 This function is called when we're stripping revision minlink and
2601 This function is called when we're stripping revision minlink and
2613 its descendants from the repository.
2602 its descendants from the repository.
2614
2603
2615 We have to remove all revisions with linkrev >= minlink, because
2604 We have to remove all revisions with linkrev >= minlink, because
2616 the equivalent changelog revisions will be renumbered after the
2605 the equivalent changelog revisions will be renumbered after the
2617 strip.
2606 strip.
2618
2607
2619 So we truncate the revlog on the first of these revisions, and
2608 So we truncate the revlog on the first of these revisions, and
2620 trust that the caller has saved the revisions that shouldn't be
2609 trust that the caller has saved the revisions that shouldn't be
2621 removed and that it'll re-add them after this truncation.
2610 removed and that it'll re-add them after this truncation.
2622 """
2611 """
2623 if len(self) == 0:
2612 if len(self) == 0:
2624 return
2613 return
2625
2614
2626 rev, _ = self.getstrippoint(minlink)
2615 rev, _ = self.getstrippoint(minlink)
2627 if rev == len(self):
2616 if rev == len(self):
2628 return
2617 return
2629
2618
2630 # first truncate the files on disk
2619 # first truncate the files on disk
2631 end = self.start(rev)
2620 end = self.start(rev)
2632 if not self._inline:
2621 if not self._inline:
2633 transaction.add(self._datafile, end)
2622 transaction.add(self._datafile, end)
2634 end = rev * self.index.entry_size
2623 end = rev * self.index.entry_size
2635 else:
2624 else:
2636 end += rev * self.index.entry_size
2625 end += rev * self.index.entry_size
2637
2626
2638 transaction.add(self._indexfile, end)
2627 transaction.add(self._indexfile, end)
2639
2628
2640 # then reset internal state in memory to forget those revisions
2629 # then reset internal state in memory to forget those revisions
2641 self._revisioncache = None
2630 self._revisioncache = None
2642 self._chaininfocache = util.lrucachedict(500)
2631 self._chaininfocache = util.lrucachedict(500)
2643 self._chunkclear()
2632 self._chunkclear()
2644
2633
2645 del self.index[rev:-1]
2634 del self.index[rev:-1]
2646
2635
2647 def checksize(self):
2636 def checksize(self):
2648 """Check size of index and data files
2637 """Check size of index and data files
2649
2638
2650 return a (dd, di) tuple.
2639 return a (dd, di) tuple.
2651 - dd: extra bytes for the "data" file
2640 - dd: extra bytes for the "data" file
2652 - di: extra bytes for the "index" file
2641 - di: extra bytes for the "index" file
2653
2642
2654 A healthy revlog will return (0, 0).
2643 A healthy revlog will return (0, 0).
2655 """
2644 """
2656 expected = 0
2645 expected = 0
2657 if len(self):
2646 if len(self):
2658 expected = max(0, self.end(len(self) - 1))
2647 expected = max(0, self.end(len(self) - 1))
2659
2648
2660 try:
2649 try:
2661 with self._datafp() as f:
2650 with self._datafp() as f:
2662 f.seek(0, io.SEEK_END)
2651 f.seek(0, io.SEEK_END)
2663 actual = f.tell()
2652 actual = f.tell()
2664 dd = actual - expected
2653 dd = actual - expected
2665 except IOError as inst:
2654 except IOError as inst:
2666 if inst.errno != errno.ENOENT:
2655 if inst.errno != errno.ENOENT:
2667 raise
2656 raise
2668 dd = 0
2657 dd = 0
2669
2658
2670 try:
2659 try:
2671 f = self.opener(self._indexfile)
2660 f = self.opener(self._indexfile)
2672 f.seek(0, io.SEEK_END)
2661 f.seek(0, io.SEEK_END)
2673 actual = f.tell()
2662 actual = f.tell()
2674 f.close()
2663 f.close()
2675 s = self.index.entry_size
2664 s = self.index.entry_size
2676 i = max(0, actual // s)
2665 i = max(0, actual // s)
2677 di = actual - (i * s)
2666 di = actual - (i * s)
2678 if self._inline:
2667 if self._inline:
2679 databytes = 0
2668 databytes = 0
2680 for r in self:
2669 for r in self:
2681 databytes += max(0, self.length(r))
2670 databytes += max(0, self.length(r))
2682 dd = 0
2671 dd = 0
2683 di = actual - len(self) * s - databytes
2672 di = actual - len(self) * s - databytes
2684 except IOError as inst:
2673 except IOError as inst:
2685 if inst.errno != errno.ENOENT:
2674 if inst.errno != errno.ENOENT:
2686 raise
2675 raise
2687 di = 0
2676 di = 0
2688
2677
2689 return (dd, di)
2678 return (dd, di)
2690
2679
2691 def files(self):
2680 def files(self):
2692 res = [self._indexfile]
2681 res = [self._indexfile]
2693 if not self._inline:
2682 if not self._inline:
2694 res.append(self._datafile)
2683 res.append(self._datafile)
2695 return res
2684 return res
2696
2685
2697 def emitrevisions(
2686 def emitrevisions(
2698 self,
2687 self,
2699 nodes,
2688 nodes,
2700 nodesorder=None,
2689 nodesorder=None,
2701 revisiondata=False,
2690 revisiondata=False,
2702 assumehaveparentrevisions=False,
2691 assumehaveparentrevisions=False,
2703 deltamode=repository.CG_DELTAMODE_STD,
2692 deltamode=repository.CG_DELTAMODE_STD,
2704 sidedata_helpers=None,
2693 sidedata_helpers=None,
2705 ):
2694 ):
2706 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2695 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2707 raise error.ProgrammingError(
2696 raise error.ProgrammingError(
2708 b'unhandled value for nodesorder: %s' % nodesorder
2697 b'unhandled value for nodesorder: %s' % nodesorder
2709 )
2698 )
2710
2699
2711 if nodesorder is None and not self._generaldelta:
2700 if nodesorder is None and not self._generaldelta:
2712 nodesorder = b'storage'
2701 nodesorder = b'storage'
2713
2702
2714 if (
2703 if (
2715 not self._storedeltachains
2704 not self._storedeltachains
2716 and deltamode != repository.CG_DELTAMODE_PREV
2705 and deltamode != repository.CG_DELTAMODE_PREV
2717 ):
2706 ):
2718 deltamode = repository.CG_DELTAMODE_FULL
2707 deltamode = repository.CG_DELTAMODE_FULL
2719
2708
2720 return storageutil.emitrevisions(
2709 return storageutil.emitrevisions(
2721 self,
2710 self,
2722 nodes,
2711 nodes,
2723 nodesorder,
2712 nodesorder,
2724 revlogrevisiondelta,
2713 revlogrevisiondelta,
2725 deltaparentfn=self.deltaparent,
2714 deltaparentfn=self.deltaparent,
2726 candeltafn=self.candelta,
2715 candeltafn=self.candelta,
2727 rawsizefn=self.rawsize,
2716 rawsizefn=self.rawsize,
2728 revdifffn=self.revdiff,
2717 revdifffn=self.revdiff,
2729 flagsfn=self.flags,
2718 flagsfn=self.flags,
2730 deltamode=deltamode,
2719 deltamode=deltamode,
2731 revisiondata=revisiondata,
2720 revisiondata=revisiondata,
2732 assumehaveparentrevisions=assumehaveparentrevisions,
2721 assumehaveparentrevisions=assumehaveparentrevisions,
2733 sidedata_helpers=sidedata_helpers,
2722 sidedata_helpers=sidedata_helpers,
2734 )
2723 )
2735
2724
2736 DELTAREUSEALWAYS = b'always'
2725 DELTAREUSEALWAYS = b'always'
2737 DELTAREUSESAMEREVS = b'samerevs'
2726 DELTAREUSESAMEREVS = b'samerevs'
2738 DELTAREUSENEVER = b'never'
2727 DELTAREUSENEVER = b'never'
2739
2728
2740 DELTAREUSEFULLADD = b'fulladd'
2729 DELTAREUSEFULLADD = b'fulladd'
2741
2730
2742 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2731 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2743
2732
2744 def clone(
2733 def clone(
2745 self,
2734 self,
2746 tr,
2735 tr,
2747 destrevlog,
2736 destrevlog,
2748 addrevisioncb=None,
2737 addrevisioncb=None,
2749 deltareuse=DELTAREUSESAMEREVS,
2738 deltareuse=DELTAREUSESAMEREVS,
2750 forcedeltabothparents=None,
2739 forcedeltabothparents=None,
2751 sidedata_helpers=None,
2740 sidedata_helpers=None,
2752 ):
2741 ):
2753 """Copy this revlog to another, possibly with format changes.
2742 """Copy this revlog to another, possibly with format changes.
2754
2743
2755 The destination revlog will contain the same revisions and nodes.
2744 The destination revlog will contain the same revisions and nodes.
2756 However, it may not be bit-for-bit identical due to e.g. delta encoding
2745 However, it may not be bit-for-bit identical due to e.g. delta encoding
2757 differences.
2746 differences.
2758
2747
2759 The ``deltareuse`` argument control how deltas from the existing revlog
2748 The ``deltareuse`` argument control how deltas from the existing revlog
2760 are preserved in the destination revlog. The argument can have the
2749 are preserved in the destination revlog. The argument can have the
2761 following values:
2750 following values:
2762
2751
2763 DELTAREUSEALWAYS
2752 DELTAREUSEALWAYS
2764 Deltas will always be reused (if possible), even if the destination
2753 Deltas will always be reused (if possible), even if the destination
2765 revlog would not select the same revisions for the delta. This is the
2754 revlog would not select the same revisions for the delta. This is the
2766 fastest mode of operation.
2755 fastest mode of operation.
2767 DELTAREUSESAMEREVS
2756 DELTAREUSESAMEREVS
2768 Deltas will be reused if the destination revlog would pick the same
2757 Deltas will be reused if the destination revlog would pick the same
2769 revisions for the delta. This mode strikes a balance between speed
2758 revisions for the delta. This mode strikes a balance between speed
2770 and optimization.
2759 and optimization.
2771 DELTAREUSENEVER
2760 DELTAREUSENEVER
2772 Deltas will never be reused. This is the slowest mode of execution.
2761 Deltas will never be reused. This is the slowest mode of execution.
2773 This mode can be used to recompute deltas (e.g. if the diff/delta
2762 This mode can be used to recompute deltas (e.g. if the diff/delta
2774 algorithm changes).
2763 algorithm changes).
2775 DELTAREUSEFULLADD
2764 DELTAREUSEFULLADD
2776 Revision will be re-added as if their were new content. This is
2765 Revision will be re-added as if their were new content. This is
2777 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2766 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2778 eg: large file detection and handling.
2767 eg: large file detection and handling.
2779
2768
2780 Delta computation can be slow, so the choice of delta reuse policy can
2769 Delta computation can be slow, so the choice of delta reuse policy can
2781 significantly affect run time.
2770 significantly affect run time.
2782
2771
2783 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2772 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2784 two extremes. Deltas will be reused if they are appropriate. But if the
2773 two extremes. Deltas will be reused if they are appropriate. But if the
2785 delta could choose a better revision, it will do so. This means if you
2774 delta could choose a better revision, it will do so. This means if you
2786 are converting a non-generaldelta revlog to a generaldelta revlog,
2775 are converting a non-generaldelta revlog to a generaldelta revlog,
2787 deltas will be recomputed if the delta's parent isn't a parent of the
2776 deltas will be recomputed if the delta's parent isn't a parent of the
2788 revision.
2777 revision.
2789
2778
2790 In addition to the delta policy, the ``forcedeltabothparents``
2779 In addition to the delta policy, the ``forcedeltabothparents``
2791 argument controls whether to force compute deltas against both parents
2780 argument controls whether to force compute deltas against both parents
2792 for merges. By default, the current default is used.
2781 for merges. By default, the current default is used.
2793
2782
2794 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2783 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2795 `sidedata_helpers`.
2784 `sidedata_helpers`.
2796 """
2785 """
2797 if deltareuse not in self.DELTAREUSEALL:
2786 if deltareuse not in self.DELTAREUSEALL:
2798 raise ValueError(
2787 raise ValueError(
2799 _(b'value for deltareuse invalid: %s') % deltareuse
2788 _(b'value for deltareuse invalid: %s') % deltareuse
2800 )
2789 )
2801
2790
2802 if len(destrevlog):
2791 if len(destrevlog):
2803 raise ValueError(_(b'destination revlog is not empty'))
2792 raise ValueError(_(b'destination revlog is not empty'))
2804
2793
2805 if getattr(self, 'filteredrevs', None):
2794 if getattr(self, 'filteredrevs', None):
2806 raise ValueError(_(b'source revlog has filtered revisions'))
2795 raise ValueError(_(b'source revlog has filtered revisions'))
2807 if getattr(destrevlog, 'filteredrevs', None):
2796 if getattr(destrevlog, 'filteredrevs', None):
2808 raise ValueError(_(b'destination revlog has filtered revisions'))
2797 raise ValueError(_(b'destination revlog has filtered revisions'))
2809
2798
2810 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2799 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2811 # if possible.
2800 # if possible.
2812 oldlazydelta = destrevlog._lazydelta
2801 oldlazydelta = destrevlog._lazydelta
2813 oldlazydeltabase = destrevlog._lazydeltabase
2802 oldlazydeltabase = destrevlog._lazydeltabase
2814 oldamd = destrevlog._deltabothparents
2803 oldamd = destrevlog._deltabothparents
2815
2804
2816 try:
2805 try:
2817 if deltareuse == self.DELTAREUSEALWAYS:
2806 if deltareuse == self.DELTAREUSEALWAYS:
2818 destrevlog._lazydeltabase = True
2807 destrevlog._lazydeltabase = True
2819 destrevlog._lazydelta = True
2808 destrevlog._lazydelta = True
2820 elif deltareuse == self.DELTAREUSESAMEREVS:
2809 elif deltareuse == self.DELTAREUSESAMEREVS:
2821 destrevlog._lazydeltabase = False
2810 destrevlog._lazydeltabase = False
2822 destrevlog._lazydelta = True
2811 destrevlog._lazydelta = True
2823 elif deltareuse == self.DELTAREUSENEVER:
2812 elif deltareuse == self.DELTAREUSENEVER:
2824 destrevlog._lazydeltabase = False
2813 destrevlog._lazydeltabase = False
2825 destrevlog._lazydelta = False
2814 destrevlog._lazydelta = False
2826
2815
2827 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2816 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2828
2817
2829 self._clone(
2818 self._clone(
2830 tr,
2819 tr,
2831 destrevlog,
2820 destrevlog,
2832 addrevisioncb,
2821 addrevisioncb,
2833 deltareuse,
2822 deltareuse,
2834 forcedeltabothparents,
2823 forcedeltabothparents,
2835 sidedata_helpers,
2824 sidedata_helpers,
2836 )
2825 )
2837
2826
2838 finally:
2827 finally:
2839 destrevlog._lazydelta = oldlazydelta
2828 destrevlog._lazydelta = oldlazydelta
2840 destrevlog._lazydeltabase = oldlazydeltabase
2829 destrevlog._lazydeltabase = oldlazydeltabase
2841 destrevlog._deltabothparents = oldamd
2830 destrevlog._deltabothparents = oldamd
2842
2831
2843 def _clone(
2832 def _clone(
2844 self,
2833 self,
2845 tr,
2834 tr,
2846 destrevlog,
2835 destrevlog,
2847 addrevisioncb,
2836 addrevisioncb,
2848 deltareuse,
2837 deltareuse,
2849 forcedeltabothparents,
2838 forcedeltabothparents,
2850 sidedata_helpers,
2839 sidedata_helpers,
2851 ):
2840 ):
2852 """perform the core duty of `revlog.clone` after parameter processing"""
2841 """perform the core duty of `revlog.clone` after parameter processing"""
2853 deltacomputer = deltautil.deltacomputer(destrevlog)
2842 deltacomputer = deltautil.deltacomputer(destrevlog)
2854 index = self.index
2843 index = self.index
2855 for rev in self:
2844 for rev in self:
2856 entry = index[rev]
2845 entry = index[rev]
2857
2846
2858 # Some classes override linkrev to take filtered revs into
2847 # Some classes override linkrev to take filtered revs into
2859 # account. Use raw entry from index.
2848 # account. Use raw entry from index.
2860 flags = entry[0] & 0xFFFF
2849 flags = entry[0] & 0xFFFF
2861 linkrev = entry[4]
2850 linkrev = entry[4]
2862 p1 = index[entry[5]][7]
2851 p1 = index[entry[5]][7]
2863 p2 = index[entry[6]][7]
2852 p2 = index[entry[6]][7]
2864 node = entry[7]
2853 node = entry[7]
2865
2854
2866 # (Possibly) reuse the delta from the revlog if allowed and
2855 # (Possibly) reuse the delta from the revlog if allowed and
2867 # the revlog chunk is a delta.
2856 # the revlog chunk is a delta.
2868 cachedelta = None
2857 cachedelta = None
2869 rawtext = None
2858 rawtext = None
2870 if deltareuse == self.DELTAREUSEFULLADD:
2859 if deltareuse == self.DELTAREUSEFULLADD:
2871 text, sidedata = self._revisiondata(rev)
2860 text, sidedata = self._revisiondata(rev)
2872
2861
2873 if sidedata_helpers is not None:
2862 if sidedata_helpers is not None:
2874 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2863 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2875 self, sidedata_helpers, sidedata, rev
2864 self, sidedata_helpers, sidedata, rev
2876 )
2865 )
2877 flags = flags | new_flags[0] & ~new_flags[1]
2866 flags = flags | new_flags[0] & ~new_flags[1]
2878
2867
2879 destrevlog.addrevision(
2868 destrevlog.addrevision(
2880 text,
2869 text,
2881 tr,
2870 tr,
2882 linkrev,
2871 linkrev,
2883 p1,
2872 p1,
2884 p2,
2873 p2,
2885 cachedelta=cachedelta,
2874 cachedelta=cachedelta,
2886 node=node,
2875 node=node,
2887 flags=flags,
2876 flags=flags,
2888 deltacomputer=deltacomputer,
2877 deltacomputer=deltacomputer,
2889 sidedata=sidedata,
2878 sidedata=sidedata,
2890 )
2879 )
2891 else:
2880 else:
2892 if destrevlog._lazydelta:
2881 if destrevlog._lazydelta:
2893 dp = self.deltaparent(rev)
2882 dp = self.deltaparent(rev)
2894 if dp != nullrev:
2883 if dp != nullrev:
2895 cachedelta = (dp, bytes(self._chunk(rev)))
2884 cachedelta = (dp, bytes(self._chunk(rev)))
2896
2885
2897 sidedata = None
2886 sidedata = None
2898 if not cachedelta:
2887 if not cachedelta:
2899 rawtext, sidedata = self._revisiondata(rev)
2888 rawtext, sidedata = self._revisiondata(rev)
2900 if sidedata is None:
2889 if sidedata is None:
2901 sidedata = self.sidedata(rev)
2890 sidedata = self.sidedata(rev)
2902
2891
2903 if sidedata_helpers is not None:
2892 if sidedata_helpers is not None:
2904 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2893 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2905 self, sidedata_helpers, sidedata, rev
2894 self, sidedata_helpers, sidedata, rev
2906 )
2895 )
2907 flags = flags | new_flags[0] & ~new_flags[1]
2896 flags = flags | new_flags[0] & ~new_flags[1]
2908
2897
2909 with destrevlog._writing(tr):
2898 with destrevlog._writing(tr):
2910 destrevlog._addrevision(
2899 destrevlog._addrevision(
2911 node,
2900 node,
2912 rawtext,
2901 rawtext,
2913 tr,
2902 tr,
2914 linkrev,
2903 linkrev,
2915 p1,
2904 p1,
2916 p2,
2905 p2,
2917 flags,
2906 flags,
2918 cachedelta,
2907 cachedelta,
2919 deltacomputer=deltacomputer,
2908 deltacomputer=deltacomputer,
2920 sidedata=sidedata,
2909 sidedata=sidedata,
2921 )
2910 )
2922
2911
2923 if addrevisioncb:
2912 if addrevisioncb:
2924 addrevisioncb(self, rev, node)
2913 addrevisioncb(self, rev, node)
2925
2914
2926 def censorrevision(self, tr, censornode, tombstone=b''):
2915 def censorrevision(self, tr, censornode, tombstone=b''):
2927 if self._format_version == REVLOGV0:
2916 if self._format_version == REVLOGV0:
2928 raise error.RevlogError(
2917 raise error.RevlogError(
2929 _(b'cannot censor with version %d revlogs')
2918 _(b'cannot censor with version %d revlogs')
2930 % self._format_version
2919 % self._format_version
2931 )
2920 )
2932
2921
2933 censorrev = self.rev(censornode)
2922 censorrev = self.rev(censornode)
2934 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2923 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2935
2924
2936 if len(tombstone) > self.rawsize(censorrev):
2925 if len(tombstone) > self.rawsize(censorrev):
2937 raise error.Abort(
2926 raise error.Abort(
2938 _(b'censor tombstone must be no longer than censored data')
2927 _(b'censor tombstone must be no longer than censored data')
2939 )
2928 )
2940
2929
2941 # Rewriting the revlog in place is hard. Our strategy for censoring is
2930 # Rewriting the revlog in place is hard. Our strategy for censoring is
2942 # to create a new revlog, copy all revisions to it, then replace the
2931 # to create a new revlog, copy all revisions to it, then replace the
2943 # revlogs on transaction close.
2932 # revlogs on transaction close.
2944 #
2933 #
2945 # This is a bit dangerous. We could easily have a mismatch of state.
2934 # This is a bit dangerous. We could easily have a mismatch of state.
2946 newrl = revlog(
2935 newrl = revlog(
2947 self.opener,
2936 self.opener,
2948 target=self.target,
2937 target=self.target,
2949 radix=self.radix,
2938 radix=self.radix,
2950 postfix=b'tmpcensored',
2939 postfix=b'tmpcensored',
2951 censorable=True,
2940 censorable=True,
2952 )
2941 )
2953 newrl._format_version = self._format_version
2942 newrl._format_version = self._format_version
2954 newrl._format_flags = self._format_flags
2943 newrl._format_flags = self._format_flags
2955 newrl._generaldelta = self._generaldelta
2944 newrl._generaldelta = self._generaldelta
2956 newrl._parse_index = self._parse_index
2945 newrl._parse_index = self._parse_index
2957
2946
2958 for rev in self.revs():
2947 for rev in self.revs():
2959 node = self.node(rev)
2948 node = self.node(rev)
2960 p1, p2 = self.parents(node)
2949 p1, p2 = self.parents(node)
2961
2950
2962 if rev == censorrev:
2951 if rev == censorrev:
2963 newrl.addrawrevision(
2952 newrl.addrawrevision(
2964 tombstone,
2953 tombstone,
2965 tr,
2954 tr,
2966 self.linkrev(censorrev),
2955 self.linkrev(censorrev),
2967 p1,
2956 p1,
2968 p2,
2957 p2,
2969 censornode,
2958 censornode,
2970 REVIDX_ISCENSORED,
2959 REVIDX_ISCENSORED,
2971 )
2960 )
2972
2961
2973 if newrl.deltaparent(rev) != nullrev:
2962 if newrl.deltaparent(rev) != nullrev:
2974 raise error.Abort(
2963 raise error.Abort(
2975 _(
2964 _(
2976 b'censored revision stored as delta; '
2965 b'censored revision stored as delta; '
2977 b'cannot censor'
2966 b'cannot censor'
2978 ),
2967 ),
2979 hint=_(
2968 hint=_(
2980 b'censoring of revlogs is not '
2969 b'censoring of revlogs is not '
2981 b'fully implemented; please report '
2970 b'fully implemented; please report '
2982 b'this bug'
2971 b'this bug'
2983 ),
2972 ),
2984 )
2973 )
2985 continue
2974 continue
2986
2975
2987 if self.iscensored(rev):
2976 if self.iscensored(rev):
2988 if self.deltaparent(rev) != nullrev:
2977 if self.deltaparent(rev) != nullrev:
2989 raise error.Abort(
2978 raise error.Abort(
2990 _(
2979 _(
2991 b'cannot censor due to censored '
2980 b'cannot censor due to censored '
2992 b'revision having delta stored'
2981 b'revision having delta stored'
2993 )
2982 )
2994 )
2983 )
2995 rawtext = self._chunk(rev)
2984 rawtext = self._chunk(rev)
2996 else:
2985 else:
2997 rawtext = self.rawdata(rev)
2986 rawtext = self.rawdata(rev)
2998
2987
2999 newrl.addrawrevision(
2988 newrl.addrawrevision(
3000 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2989 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3001 )
2990 )
3002
2991
3003 tr.addbackup(self._indexfile, location=b'store')
2992 tr.addbackup(self._indexfile, location=b'store')
3004 if not self._inline:
2993 if not self._inline:
3005 tr.addbackup(self._datafile, location=b'store')
2994 tr.addbackup(self._datafile, location=b'store')
3006
2995
3007 self.opener.rename(newrl._indexfile, self._indexfile)
2996 self.opener.rename(newrl._indexfile, self._indexfile)
3008 if not self._inline:
2997 if not self._inline:
3009 self.opener.rename(newrl._datafile, self._datafile)
2998 self.opener.rename(newrl._datafile, self._datafile)
3010
2999
3011 self.clearcaches()
3000 self.clearcaches()
3012 self._loadindex()
3001 self._loadindex()
3013
3002
3014 def verifyintegrity(self, state):
3003 def verifyintegrity(self, state):
3015 """Verifies the integrity of the revlog.
3004 """Verifies the integrity of the revlog.
3016
3005
3017 Yields ``revlogproblem`` instances describing problems that are
3006 Yields ``revlogproblem`` instances describing problems that are
3018 found.
3007 found.
3019 """
3008 """
3020 dd, di = self.checksize()
3009 dd, di = self.checksize()
3021 if dd:
3010 if dd:
3022 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3011 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3023 if di:
3012 if di:
3024 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3013 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3025
3014
3026 version = self._format_version
3015 version = self._format_version
3027
3016
3028 # The verifier tells us what version revlog we should be.
3017 # The verifier tells us what version revlog we should be.
3029 if version != state[b'expectedversion']:
3018 if version != state[b'expectedversion']:
3030 yield revlogproblem(
3019 yield revlogproblem(
3031 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3020 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3032 % (self.display_id, version, state[b'expectedversion'])
3021 % (self.display_id, version, state[b'expectedversion'])
3033 )
3022 )
3034
3023
3035 state[b'skipread'] = set()
3024 state[b'skipread'] = set()
3036 state[b'safe_renamed'] = set()
3025 state[b'safe_renamed'] = set()
3037
3026
3038 for rev in self:
3027 for rev in self:
3039 node = self.node(rev)
3028 node = self.node(rev)
3040
3029
3041 # Verify contents. 4 cases to care about:
3030 # Verify contents. 4 cases to care about:
3042 #
3031 #
3043 # common: the most common case
3032 # common: the most common case
3044 # rename: with a rename
3033 # rename: with a rename
3045 # meta: file content starts with b'\1\n', the metadata
3034 # meta: file content starts with b'\1\n', the metadata
3046 # header defined in filelog.py, but without a rename
3035 # header defined in filelog.py, but without a rename
3047 # ext: content stored externally
3036 # ext: content stored externally
3048 #
3037 #
3049 # More formally, their differences are shown below:
3038 # More formally, their differences are shown below:
3050 #
3039 #
3051 # | common | rename | meta | ext
3040 # | common | rename | meta | ext
3052 # -------------------------------------------------------
3041 # -------------------------------------------------------
3053 # flags() | 0 | 0 | 0 | not 0
3042 # flags() | 0 | 0 | 0 | not 0
3054 # renamed() | False | True | False | ?
3043 # renamed() | False | True | False | ?
3055 # rawtext[0:2]=='\1\n'| False | True | True | ?
3044 # rawtext[0:2]=='\1\n'| False | True | True | ?
3056 #
3045 #
3057 # "rawtext" means the raw text stored in revlog data, which
3046 # "rawtext" means the raw text stored in revlog data, which
3058 # could be retrieved by "rawdata(rev)". "text"
3047 # could be retrieved by "rawdata(rev)". "text"
3059 # mentioned below is "revision(rev)".
3048 # mentioned below is "revision(rev)".
3060 #
3049 #
3061 # There are 3 different lengths stored physically:
3050 # There are 3 different lengths stored physically:
3062 # 1. L1: rawsize, stored in revlog index
3051 # 1. L1: rawsize, stored in revlog index
3063 # 2. L2: len(rawtext), stored in revlog data
3052 # 2. L2: len(rawtext), stored in revlog data
3064 # 3. L3: len(text), stored in revlog data if flags==0, or
3053 # 3. L3: len(text), stored in revlog data if flags==0, or
3065 # possibly somewhere else if flags!=0
3054 # possibly somewhere else if flags!=0
3066 #
3055 #
3067 # L1 should be equal to L2. L3 could be different from them.
3056 # L1 should be equal to L2. L3 could be different from them.
3068 # "text" may or may not affect commit hash depending on flag
3057 # "text" may or may not affect commit hash depending on flag
3069 # processors (see flagutil.addflagprocessor).
3058 # processors (see flagutil.addflagprocessor).
3070 #
3059 #
3071 # | common | rename | meta | ext
3060 # | common | rename | meta | ext
3072 # -------------------------------------------------
3061 # -------------------------------------------------
3073 # rawsize() | L1 | L1 | L1 | L1
3062 # rawsize() | L1 | L1 | L1 | L1
3074 # size() | L1 | L2-LM | L1(*) | L1 (?)
3063 # size() | L1 | L2-LM | L1(*) | L1 (?)
3075 # len(rawtext) | L2 | L2 | L2 | L2
3064 # len(rawtext) | L2 | L2 | L2 | L2
3076 # len(text) | L2 | L2 | L2 | L3
3065 # len(text) | L2 | L2 | L2 | L3
3077 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3066 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3078 #
3067 #
3079 # LM: length of metadata, depending on rawtext
3068 # LM: length of metadata, depending on rawtext
3080 # (*): not ideal, see comment in filelog.size
3069 # (*): not ideal, see comment in filelog.size
3081 # (?): could be "- len(meta)" if the resolved content has
3070 # (?): could be "- len(meta)" if the resolved content has
3082 # rename metadata
3071 # rename metadata
3083 #
3072 #
3084 # Checks needed to be done:
3073 # Checks needed to be done:
3085 # 1. length check: L1 == L2, in all cases.
3074 # 1. length check: L1 == L2, in all cases.
3086 # 2. hash check: depending on flag processor, we may need to
3075 # 2. hash check: depending on flag processor, we may need to
3087 # use either "text" (external), or "rawtext" (in revlog).
3076 # use either "text" (external), or "rawtext" (in revlog).
3088
3077
3089 try:
3078 try:
3090 skipflags = state.get(b'skipflags', 0)
3079 skipflags = state.get(b'skipflags', 0)
3091 if skipflags:
3080 if skipflags:
3092 skipflags &= self.flags(rev)
3081 skipflags &= self.flags(rev)
3093
3082
3094 _verify_revision(self, skipflags, state, node)
3083 _verify_revision(self, skipflags, state, node)
3095
3084
3096 l1 = self.rawsize(rev)
3085 l1 = self.rawsize(rev)
3097 l2 = len(self.rawdata(node))
3086 l2 = len(self.rawdata(node))
3098
3087
3099 if l1 != l2:
3088 if l1 != l2:
3100 yield revlogproblem(
3089 yield revlogproblem(
3101 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3090 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3102 node=node,
3091 node=node,
3103 )
3092 )
3104
3093
3105 except error.CensoredNodeError:
3094 except error.CensoredNodeError:
3106 if state[b'erroroncensored']:
3095 if state[b'erroroncensored']:
3107 yield revlogproblem(
3096 yield revlogproblem(
3108 error=_(b'censored file data'), node=node
3097 error=_(b'censored file data'), node=node
3109 )
3098 )
3110 state[b'skipread'].add(node)
3099 state[b'skipread'].add(node)
3111 except Exception as e:
3100 except Exception as e:
3112 yield revlogproblem(
3101 yield revlogproblem(
3113 error=_(b'unpacking %s: %s')
3102 error=_(b'unpacking %s: %s')
3114 % (short(node), stringutil.forcebytestr(e)),
3103 % (short(node), stringutil.forcebytestr(e)),
3115 node=node,
3104 node=node,
3116 )
3105 )
3117 state[b'skipread'].add(node)
3106 state[b'skipread'].add(node)
3118
3107
3119 def storageinfo(
3108 def storageinfo(
3120 self,
3109 self,
3121 exclusivefiles=False,
3110 exclusivefiles=False,
3122 sharedfiles=False,
3111 sharedfiles=False,
3123 revisionscount=False,
3112 revisionscount=False,
3124 trackedsize=False,
3113 trackedsize=False,
3125 storedsize=False,
3114 storedsize=False,
3126 ):
3115 ):
3127 d = {}
3116 d = {}
3128
3117
3129 if exclusivefiles:
3118 if exclusivefiles:
3130 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3119 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3131 if not self._inline:
3120 if not self._inline:
3132 d[b'exclusivefiles'].append((self.opener, self._datafile))
3121 d[b'exclusivefiles'].append((self.opener, self._datafile))
3133
3122
3134 if sharedfiles:
3123 if sharedfiles:
3135 d[b'sharedfiles'] = []
3124 d[b'sharedfiles'] = []
3136
3125
3137 if revisionscount:
3126 if revisionscount:
3138 d[b'revisionscount'] = len(self)
3127 d[b'revisionscount'] = len(self)
3139
3128
3140 if trackedsize:
3129 if trackedsize:
3141 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3130 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3142
3131
3143 if storedsize:
3132 if storedsize:
3144 d[b'storedsize'] = sum(
3133 d[b'storedsize'] = sum(
3145 self.opener.stat(path).st_size for path in self.files()
3134 self.opener.stat(path).st_size for path in self.files()
3146 )
3135 )
3147
3136
3148 return d
3137 return d
3149
3138
3150 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3139 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3151 if not self.hassidedata:
3140 if not self.hassidedata:
3152 return
3141 return
3153 # inline are not yet supported because they suffer from an issue when
3142 # inline are not yet supported because they suffer from an issue when
3154 # rewriting them (since it's not an append-only operation).
3143 # rewriting them (since it's not an append-only operation).
3155 # See issue6485.
3144 # See issue6485.
3156 assert not self._inline
3145 assert not self._inline
3157 if not helpers[1] and not helpers[2]:
3146 if not helpers[1] and not helpers[2]:
3158 # Nothing to generate or remove
3147 # Nothing to generate or remove
3159 return
3148 return
3160
3149
3161 # changelog implement some "delayed" writing mechanism that assume that
3150 # changelog implement some "delayed" writing mechanism that assume that
3162 # all index data is writen in append mode and is therefor incompatible
3151 # all index data is writen in append mode and is therefor incompatible
3163 # with the seeked write done in this method. The use of such "delayed"
3152 # with the seeked write done in this method. The use of such "delayed"
3164 # writing will soon be removed for revlog version that support side
3153 # writing will soon be removed for revlog version that support side
3165 # data, so for now, we only keep this simple assert to highlight the
3154 # data, so for now, we only keep this simple assert to highlight the
3166 # situation.
3155 # situation.
3167 delayed = getattr(self, '_delayed', False)
3156 delayed = getattr(self, '_delayed', False)
3168 diverted = getattr(self, '_divert', False)
3157 diverted = getattr(self, '_divert', False)
3169 if delayed and not diverted:
3158 if delayed and not diverted:
3170 msg = "cannot rewrite_sidedata of a delayed revlog"
3159 msg = "cannot rewrite_sidedata of a delayed revlog"
3171 raise error.ProgrammingError(msg)
3160 raise error.ProgrammingError(msg)
3172
3161
3173 new_entries = []
3162 new_entries = []
3174 # append the new sidedata
3163 # append the new sidedata
3175 with self._writing(transaction):
3164 with self._writing(transaction):
3176 ifh, dfh = self._writinghandles
3165 ifh, dfh = self._writinghandles
3177 dfh.seek(0, os.SEEK_END)
3166 dfh.seek(0, os.SEEK_END)
3178 current_offset = dfh.tell()
3167 current_offset = dfh.tell()
3179 for rev in range(startrev, endrev + 1):
3168 for rev in range(startrev, endrev + 1):
3180 entry = self.index[rev]
3169 entry = self.index[rev]
3181 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3170 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3182 store=self,
3171 store=self,
3183 sidedata_helpers=helpers,
3172 sidedata_helpers=helpers,
3184 sidedata={},
3173 sidedata={},
3185 rev=rev,
3174 rev=rev,
3186 )
3175 )
3187
3176
3188 serialized_sidedata = sidedatautil.serialize_sidedata(
3177 serialized_sidedata = sidedatautil.serialize_sidedata(
3189 new_sidedata
3178 new_sidedata
3190 )
3179 )
3191 if entry[8] != 0 or entry[9] != 0:
3180 if entry[8] != 0 or entry[9] != 0:
3192 # rewriting entries that already have sidedata is not
3181 # rewriting entries that already have sidedata is not
3193 # supported yet, because it introduces garbage data in the
3182 # supported yet, because it introduces garbage data in the
3194 # revlog.
3183 # revlog.
3195 msg = b"rewriting existing sidedata is not supported yet"
3184 msg = b"rewriting existing sidedata is not supported yet"
3196 raise error.Abort(msg)
3185 raise error.Abort(msg)
3197
3186
3198 # Apply (potential) flags to add and to remove after running
3187 # Apply (potential) flags to add and to remove after running
3199 # the sidedata helpers
3188 # the sidedata helpers
3200 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3189 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3201 entry = (new_offset_flags,) + entry[1:8]
3190 entry = (new_offset_flags,) + entry[1:8]
3202 entry += (current_offset, len(serialized_sidedata))
3191 entry += (current_offset, len(serialized_sidedata))
3203
3192
3204 # the sidedata computation might have move the file cursors around
3193 # the sidedata computation might have move the file cursors around
3205 dfh.seek(current_offset, os.SEEK_SET)
3194 dfh.seek(current_offset, os.SEEK_SET)
3206 dfh.write(serialized_sidedata)
3195 dfh.write(serialized_sidedata)
3207 new_entries.append(entry)
3196 new_entries.append(entry)
3208 current_offset += len(serialized_sidedata)
3197 current_offset += len(serialized_sidedata)
3209
3198
3210 # rewrite the new index entries
3199 # rewrite the new index entries
3211 ifh.seek(startrev * self.index.entry_size)
3200 ifh.seek(startrev * self.index.entry_size)
3212 for i, e in enumerate(new_entries):
3201 for i, e in enumerate(new_entries):
3213 rev = startrev + i
3202 rev = startrev + i
3214 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3203 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3215 packed = self.index.entry_binary(rev)
3204 packed = self.index.entry_binary(rev)
3216 if rev == 0:
3205 if rev == 0:
3217 header = self._format_flags | self._format_version
3206 header = self._format_flags | self._format_version
3218 header = self.index.pack_header(header)
3207 header = self.index.pack_header(header)
3219 packed = header + packed
3208 packed = header + packed
3220 ifh.write(packed)
3209 ifh.write(packed)
@@ -1,116 +1,123 b''
1 # revlogdeltas.py - constant used for revlog logic
1 # revlogdeltas.py - constant used for revlog logic
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### Internal utily constants
16 ### Internal utily constants
17
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
21 KIND_OTHER = 1004
22
22
23 ALL_KINDS = {
23 ALL_KINDS = {
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_OTHER,
27 KIND_OTHER,
28 }
28 }
29
29
30 ### main revlog header
30 ### main revlog header
31
31
32 INDEX_HEADER = struct.Struct(b">I")
32 INDEX_HEADER = struct.Struct(b">I")
33
33
34 ## revlog version
34 ## revlog version
35 REVLOGV0 = 0
35 REVLOGV0 = 0
36 REVLOGV1 = 1
36 REVLOGV1 = 1
37 # Dummy value until file format is finalized.
37 # Dummy value until file format is finalized.
38 REVLOGV2 = 0xDEAD
38 REVLOGV2 = 0xDEAD
39
39
40 ## global revlog header flags
40 ## global revlog header flags
41 # Shared across v1 and v2.
41 # Shared across v1 and v2.
42 FLAG_INLINE_DATA = 1 << 16
42 FLAG_INLINE_DATA = 1 << 16
43 # Only used by v1, implied by v2.
43 # Only used by v1, implied by v2.
44 FLAG_GENERALDELTA = 1 << 17
44 FLAG_GENERALDELTA = 1 << 17
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 REVLOGV0_FLAGS = 0
48 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
49 REVLOGV2_FLAGS = FLAG_INLINE_DATA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
50
51
51 ### individual entry
52 ### individual entry
52
53
53 ## index v0:
54 ## index v0:
54 # 4 bytes: offset
55 # 4 bytes: offset
55 # 4 bytes: compressed length
56 # 4 bytes: compressed length
56 # 4 bytes: base rev
57 # 4 bytes: base rev
57 # 4 bytes: link rev
58 # 4 bytes: link rev
58 # 20 bytes: parent 1 nodeid
59 # 20 bytes: parent 1 nodeid
59 # 20 bytes: parent 2 nodeid
60 # 20 bytes: parent 2 nodeid
60 # 20 bytes: nodeid
61 # 20 bytes: nodeid
61 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
62
63
63 ## index v1
64 ## index v1
64 # 6 bytes: offset
65 # 6 bytes: offset
65 # 2 bytes: flags
66 # 2 bytes: flags
66 # 4 bytes: compressed length
67 # 4 bytes: compressed length
67 # 4 bytes: uncompressed length
68 # 4 bytes: uncompressed length
68 # 4 bytes: base rev
69 # 4 bytes: base rev
69 # 4 bytes: link rev
70 # 4 bytes: link rev
70 # 4 bytes: parent 1 rev
71 # 4 bytes: parent 1 rev
71 # 4 bytes: parent 2 rev
72 # 4 bytes: parent 2 rev
72 # 32 bytes: nodeid
73 # 32 bytes: nodeid
73 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
74 assert INDEX_ENTRY_V1.size == 32 * 2
75 assert INDEX_ENTRY_V1.size == 32 * 2
75
76
76 # 6 bytes: offset
77 # 6 bytes: offset
77 # 2 bytes: flags
78 # 2 bytes: flags
78 # 4 bytes: compressed length
79 # 4 bytes: compressed length
79 # 4 bytes: uncompressed length
80 # 4 bytes: uncompressed length
80 # 4 bytes: base rev
81 # 4 bytes: base rev
81 # 4 bytes: link rev
82 # 4 bytes: link rev
82 # 4 bytes: parent 1 rev
83 # 4 bytes: parent 1 rev
83 # 4 bytes: parent 2 rev
84 # 4 bytes: parent 2 rev
84 # 32 bytes: nodeid
85 # 32 bytes: nodeid
85 # 8 bytes: sidedata offset
86 # 8 bytes: sidedata offset
86 # 4 bytes: sidedata compressed length
87 # 4 bytes: sidedata compressed length
87 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
88 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
88 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
89 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
89 assert INDEX_ENTRY_V2.size == 32 * 3
90 assert INDEX_ENTRY_V2.size == 32 * 3
90
91
91 # revlog index flags
92 # revlog index flags
92
93
93 # For historical reasons, revlog's internal flags were exposed via the
94 # For historical reasons, revlog's internal flags were exposed via the
94 # wire protocol and are even exposed in parts of the storage APIs.
95 # wire protocol and are even exposed in parts of the storage APIs.
95
96
96 # revision has censor metadata, must be verified
97 # revision has censor metadata, must be verified
97 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
98 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
98 # revision hash does not match data (narrowhg)
99 # revision hash does not match data (narrowhg)
99 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
100 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
100 # revision data is stored externally
101 # revision data is stored externally
101 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
102 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
102 # revision changes files in a way that could affect copy tracing.
103 # revision changes files in a way that could affect copy tracing.
103 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
104 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
104 REVIDX_DEFAULT_FLAGS = 0
105 REVIDX_DEFAULT_FLAGS = 0
105 # stable order in which flags need to be processed and their processors applied
106 # stable order in which flags need to be processed and their processors applied
106 REVIDX_FLAGS_ORDER = [
107 REVIDX_FLAGS_ORDER = [
107 REVIDX_ISCENSORED,
108 REVIDX_ISCENSORED,
108 REVIDX_ELLIPSIS,
109 REVIDX_ELLIPSIS,
109 REVIDX_EXTSTORED,
110 REVIDX_EXTSTORED,
110 REVIDX_HASCOPIESINFO,
111 REVIDX_HASCOPIESINFO,
111 ]
112 ]
112
113
113 # bitmark for flags that could cause rawdata content change
114 # bitmark for flags that could cause rawdata content change
114 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
115 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
115
116
117 SUPPORTED_FLAGS = {
118 REVLOGV0: REVLOGV0_FLAGS,
119 REVLOGV1: REVLOGV1_FLAGS,
120 REVLOGV2: REVLOGV2_FLAGS,
121 }
122
116 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
123 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now