##// END OF EJS Templates
revlog: unify flag processing when loading index...
marmoute -
r48005:4d1c893b default
parent child Browse files
Show More
@@ -1,3209 +1,3196
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FEATURES_BY_VERSION,
38 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
40 INDEX_HEADER,
41 INDEX_HEADER,
41 REVLOGV0,
42 REVLOGV0,
42 REVLOGV1,
43 REVLOGV1,
43 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
44 REVLOGV2,
45 REVLOGV2,
45 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
49 SUPPORTED_FLAGS,
50 SUPPORTED_FLAGS,
50 )
51 )
51 from .revlogutils.flagutil import (
52 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
54 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
55 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
57 REVIDX_HASCOPIESINFO,
57 REVIDX_ISCENSORED,
58 REVIDX_ISCENSORED,
58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 )
60 )
60 from .thirdparty import attr
61 from .thirdparty import attr
61 from . import (
62 from . import (
62 ancestor,
63 ancestor,
63 dagop,
64 dagop,
64 error,
65 error,
65 mdiff,
66 mdiff,
66 policy,
67 policy,
67 pycompat,
68 pycompat,
68 templatefilters,
69 templatefilters,
69 util,
70 util,
70 )
71 )
71 from .interfaces import (
72 from .interfaces import (
72 repository,
73 repository,
73 util as interfaceutil,
74 util as interfaceutil,
74 )
75 )
75 from .revlogutils import (
76 from .revlogutils import (
76 deltas as deltautil,
77 deltas as deltautil,
77 flagutil,
78 flagutil,
78 nodemap as nodemaputil,
79 nodemap as nodemaputil,
79 revlogv0,
80 revlogv0,
80 sidedata as sidedatautil,
81 sidedata as sidedatautil,
81 )
82 )
82 from .utils import (
83 from .utils import (
83 storageutil,
84 storageutil,
84 stringutil,
85 stringutil,
85 )
86 )
86
87
87 # blanked usage of all the name to prevent pyflakes constraints
88 # blanked usage of all the name to prevent pyflakes constraints
88 # We need these name available in the module for extensions.
89 # We need these name available in the module for extensions.
89
90
90 REVLOGV0
91 REVLOGV0
91 REVLOGV1
92 REVLOGV1
92 REVLOGV2
93 REVLOGV2
93 FLAG_INLINE_DATA
94 FLAG_INLINE_DATA
94 FLAG_GENERALDELTA
95 FLAG_GENERALDELTA
95 REVLOG_DEFAULT_FLAGS
96 REVLOG_DEFAULT_FLAGS
96 REVLOG_DEFAULT_FORMAT
97 REVLOG_DEFAULT_FORMAT
97 REVLOG_DEFAULT_VERSION
98 REVLOG_DEFAULT_VERSION
98 REVLOGV1_FLAGS
99 REVLOGV1_FLAGS
99 REVLOGV2_FLAGS
100 REVLOGV2_FLAGS
100 REVIDX_ISCENSORED
101 REVIDX_ISCENSORED
101 REVIDX_ELLIPSIS
102 REVIDX_ELLIPSIS
102 REVIDX_HASCOPIESINFO
103 REVIDX_HASCOPIESINFO
103 REVIDX_EXTSTORED
104 REVIDX_EXTSTORED
104 REVIDX_DEFAULT_FLAGS
105 REVIDX_DEFAULT_FLAGS
105 REVIDX_FLAGS_ORDER
106 REVIDX_FLAGS_ORDER
106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 REVIDX_RAWTEXT_CHANGING_FLAGS
107
108
108 parsers = policy.importmod('parsers')
109 parsers = policy.importmod('parsers')
109 rustancestor = policy.importrust('ancestor')
110 rustancestor = policy.importrust('ancestor')
110 rustdagop = policy.importrust('dagop')
111 rustdagop = policy.importrust('dagop')
111 rustrevlog = policy.importrust('revlog')
112 rustrevlog = policy.importrust('revlog')
112
113
113 # Aliased for performance.
114 # Aliased for performance.
114 _zlibdecompress = zlib.decompress
115 _zlibdecompress = zlib.decompress
115
116
116 # max size of revlog with inline data
117 # max size of revlog with inline data
117 _maxinline = 131072
118 _maxinline = 131072
118 _chunksize = 1048576
119 _chunksize = 1048576
119
120
120 # Flag processors for REVIDX_ELLIPSIS.
121 # Flag processors for REVIDX_ELLIPSIS.
121 def ellipsisreadprocessor(rl, text):
122 def ellipsisreadprocessor(rl, text):
122 return text, False
123 return text, False
123
124
124
125
125 def ellipsiswriteprocessor(rl, text):
126 def ellipsiswriteprocessor(rl, text):
126 return text, False
127 return text, False
127
128
128
129
129 def ellipsisrawprocessor(rl, text):
130 def ellipsisrawprocessor(rl, text):
130 return False
131 return False
131
132
132
133
133 ellipsisprocessor = (
134 ellipsisprocessor = (
134 ellipsisreadprocessor,
135 ellipsisreadprocessor,
135 ellipsiswriteprocessor,
136 ellipsiswriteprocessor,
136 ellipsisrawprocessor,
137 ellipsisrawprocessor,
137 )
138 )
138
139
139
140
140 def offset_type(offset, type):
141 def offset_type(offset, type):
141 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
142 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
142 raise ValueError(b'unknown revlog index flags')
143 raise ValueError(b'unknown revlog index flags')
143 return int(int(offset) << 16 | type)
144 return int(int(offset) << 16 | type)
144
145
145
146
146 def _verify_revision(rl, skipflags, state, node):
147 def _verify_revision(rl, skipflags, state, node):
147 """Verify the integrity of the given revlog ``node`` while providing a hook
148 """Verify the integrity of the given revlog ``node`` while providing a hook
148 point for extensions to influence the operation."""
149 point for extensions to influence the operation."""
149 if skipflags:
150 if skipflags:
150 state[b'skipread'].add(node)
151 state[b'skipread'].add(node)
151 else:
152 else:
152 # Side-effect: read content and verify hash.
153 # Side-effect: read content and verify hash.
153 rl.revision(node)
154 rl.revision(node)
154
155
155
156
156 # True if a fast implementation for persistent-nodemap is available
157 # True if a fast implementation for persistent-nodemap is available
157 #
158 #
158 # We also consider we have a "fast" implementation in "pure" python because
159 # We also consider we have a "fast" implementation in "pure" python because
159 # people using pure don't really have performance consideration (and a
160 # people using pure don't really have performance consideration (and a
160 # wheelbarrow of other slowness source)
161 # wheelbarrow of other slowness source)
161 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
162 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
162 parsers, 'BaseIndexObject'
163 parsers, 'BaseIndexObject'
163 )
164 )
164
165
165
166
166 @attr.s(slots=True, frozen=True)
167 @attr.s(slots=True, frozen=True)
167 class _revisioninfo(object):
168 class _revisioninfo(object):
168 """Information about a revision that allows building its fulltext
169 """Information about a revision that allows building its fulltext
169 node: expected hash of the revision
170 node: expected hash of the revision
170 p1, p2: parent revs of the revision
171 p1, p2: parent revs of the revision
171 btext: built text cache consisting of a one-element list
172 btext: built text cache consisting of a one-element list
172 cachedelta: (baserev, uncompressed_delta) or None
173 cachedelta: (baserev, uncompressed_delta) or None
173 flags: flags associated to the revision storage
174 flags: flags associated to the revision storage
174
175
175 One of btext[0] or cachedelta must be set.
176 One of btext[0] or cachedelta must be set.
176 """
177 """
177
178
178 node = attr.ib()
179 node = attr.ib()
179 p1 = attr.ib()
180 p1 = attr.ib()
180 p2 = attr.ib()
181 p2 = attr.ib()
181 btext = attr.ib()
182 btext = attr.ib()
182 textlen = attr.ib()
183 textlen = attr.ib()
183 cachedelta = attr.ib()
184 cachedelta = attr.ib()
184 flags = attr.ib()
185 flags = attr.ib()
185
186
186
187
187 @interfaceutil.implementer(repository.irevisiondelta)
188 @interfaceutil.implementer(repository.irevisiondelta)
188 @attr.s(slots=True)
189 @attr.s(slots=True)
189 class revlogrevisiondelta(object):
190 class revlogrevisiondelta(object):
190 node = attr.ib()
191 node = attr.ib()
191 p1node = attr.ib()
192 p1node = attr.ib()
192 p2node = attr.ib()
193 p2node = attr.ib()
193 basenode = attr.ib()
194 basenode = attr.ib()
194 flags = attr.ib()
195 flags = attr.ib()
195 baserevisionsize = attr.ib()
196 baserevisionsize = attr.ib()
196 revision = attr.ib()
197 revision = attr.ib()
197 delta = attr.ib()
198 delta = attr.ib()
198 sidedata = attr.ib()
199 sidedata = attr.ib()
199 protocol_flags = attr.ib()
200 protocol_flags = attr.ib()
200 linknode = attr.ib(default=None)
201 linknode = attr.ib(default=None)
201
202
202
203
203 @interfaceutil.implementer(repository.iverifyproblem)
204 @interfaceutil.implementer(repository.iverifyproblem)
204 @attr.s(frozen=True)
205 @attr.s(frozen=True)
205 class revlogproblem(object):
206 class revlogproblem(object):
206 warning = attr.ib(default=None)
207 warning = attr.ib(default=None)
207 error = attr.ib(default=None)
208 error = attr.ib(default=None)
208 node = attr.ib(default=None)
209 node = attr.ib(default=None)
209
210
210
211
211 def parse_index_v1(data, inline):
212 def parse_index_v1(data, inline):
212 # call the C implementation to parse the index data
213 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline)
214 index, cache = parsers.parse_index2(data, inline)
214 return index, cache
215 return index, cache
215
216
216
217
217 def parse_index_v2(data, inline):
218 def parse_index_v2(data, inline):
218 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
219 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
220 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
220 return index, cache
221 return index, cache
221
222
222
223
223 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
224 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
224
225
225 def parse_index_v1_nodemap(data, inline):
226 def parse_index_v1_nodemap(data, inline):
226 index, cache = parsers.parse_index_devel_nodemap(data, inline)
227 index, cache = parsers.parse_index_devel_nodemap(data, inline)
227 return index, cache
228 return index, cache
228
229
229
230
230 else:
231 else:
231 parse_index_v1_nodemap = None
232 parse_index_v1_nodemap = None
232
233
233
234
234 def parse_index_v1_mixed(data, inline):
235 def parse_index_v1_mixed(data, inline):
235 index, cache = parse_index_v1(data, inline)
236 index, cache = parse_index_v1(data, inline)
236 return rustrevlog.MixedIndex(index), cache
237 return rustrevlog.MixedIndex(index), cache
237
238
238
239
239 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
240 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
240 # signed integer)
241 # signed integer)
241 _maxentrysize = 0x7FFFFFFF
242 _maxentrysize = 0x7FFFFFFF
242
243
243
244
244 class revlog(object):
245 class revlog(object):
245 """
246 """
246 the underlying revision storage object
247 the underlying revision storage object
247
248
248 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
249
250
250 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
255 data.
256 data.
256
257
257 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
263
264
264 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
268 for locking while reading.
268
269
269 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
271
272
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
274 configured threshold.
275 configured threshold.
275
276
276 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
277
278
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
280 compression for the data content.
280
281
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
285 raising).
285 """
286 """
286
287
287 _flagserrorclass = error.RevlogError
288 _flagserrorclass = error.RevlogError
288
289
289 def __init__(
290 def __init__(
290 self,
291 self,
291 opener,
292 opener,
292 target,
293 target,
293 radix,
294 radix,
294 postfix=None,
295 postfix=None,
295 checkambig=False,
296 checkambig=False,
296 mmaplargeindex=False,
297 mmaplargeindex=False,
297 censorable=False,
298 censorable=False,
298 upperboundcomp=None,
299 upperboundcomp=None,
299 persistentnodemap=False,
300 persistentnodemap=False,
300 concurrencychecker=None,
301 concurrencychecker=None,
301 ):
302 ):
302 """
303 """
303 create a revlog object
304 create a revlog object
304
305
305 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
306 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
307
308
308 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
309 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
310 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
311 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
312 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
313 accurate value.
314 accurate value.
314 """
315 """
315 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
316
317
317 self.radix = radix
318 self.radix = radix
318
319
319 self._indexfile = None
320 self._indexfile = None
320 self._datafile = None
321 self._datafile = None
321 self._nodemap_file = None
322 self._nodemap_file = None
322 self.postfix = postfix
323 self.postfix = postfix
323 self.opener = opener
324 self.opener = opener
324 if persistentnodemap:
325 if persistentnodemap:
325 self._nodemap_file = nodemaputil.get_nodemap_file(self)
326 self._nodemap_file = nodemaputil.get_nodemap_file(self)
326
327
327 assert target[0] in ALL_KINDS
328 assert target[0] in ALL_KINDS
328 assert len(target) == 2
329 assert len(target) == 2
329 self.target = target
330 self.target = target
330 # When True, indexfile is opened with checkambig=True at writing, to
331 # When True, indexfile is opened with checkambig=True at writing, to
331 # avoid file stat ambiguity.
332 # avoid file stat ambiguity.
332 self._checkambig = checkambig
333 self._checkambig = checkambig
333 self._mmaplargeindex = mmaplargeindex
334 self._mmaplargeindex = mmaplargeindex
334 self._censorable = censorable
335 self._censorable = censorable
335 # 3-tuple of (node, rev, text) for a raw revision.
336 # 3-tuple of (node, rev, text) for a raw revision.
336 self._revisioncache = None
337 self._revisioncache = None
337 # Maps rev to chain base rev.
338 # Maps rev to chain base rev.
338 self._chainbasecache = util.lrucachedict(100)
339 self._chainbasecache = util.lrucachedict(100)
339 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
340 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
340 self._chunkcache = (0, b'')
341 self._chunkcache = (0, b'')
341 # How much data to read and cache into the raw revlog data cache.
342 # How much data to read and cache into the raw revlog data cache.
342 self._chunkcachesize = 65536
343 self._chunkcachesize = 65536
343 self._maxchainlen = None
344 self._maxchainlen = None
344 self._deltabothparents = True
345 self._deltabothparents = True
345 self.index = None
346 self.index = None
346 self._nodemap_docket = None
347 self._nodemap_docket = None
347 # Mapping of partial identifiers to full nodes.
348 # Mapping of partial identifiers to full nodes.
348 self._pcache = {}
349 self._pcache = {}
349 # Mapping of revision integer to full node.
350 # Mapping of revision integer to full node.
350 self._compengine = b'zlib'
351 self._compengine = b'zlib'
351 self._compengineopts = {}
352 self._compengineopts = {}
352 self._maxdeltachainspan = -1
353 self._maxdeltachainspan = -1
353 self._withsparseread = False
354 self._withsparseread = False
354 self._sparserevlog = False
355 self._sparserevlog = False
355 self.hassidedata = False
356 self.hassidedata = False
356 self._srdensitythreshold = 0.50
357 self._srdensitythreshold = 0.50
357 self._srmingapsize = 262144
358 self._srmingapsize = 262144
358
359
359 # Make copy of flag processors so each revlog instance can support
360 # Make copy of flag processors so each revlog instance can support
360 # custom flags.
361 # custom flags.
361 self._flagprocessors = dict(flagutil.flagprocessors)
362 self._flagprocessors = dict(flagutil.flagprocessors)
362
363
363 # 2-tuple of file handles being used for active writing.
364 # 2-tuple of file handles being used for active writing.
364 self._writinghandles = None
365 self._writinghandles = None
365 # prevent nesting of addgroup
366 # prevent nesting of addgroup
366 self._adding_group = None
367 self._adding_group = None
367
368
368 self._loadindex()
369 self._loadindex()
369
370
370 self._concurrencychecker = concurrencychecker
371 self._concurrencychecker = concurrencychecker
371
372
372 def _init_opts(self):
373 def _init_opts(self):
373 """process options (from above/config) to setup associated default revlog mode
374 """process options (from above/config) to setup associated default revlog mode
374
375
375 These values might be affected when actually reading on disk information.
376 These values might be affected when actually reading on disk information.
376
377
377 The relevant values are returned for use in _loadindex().
378 The relevant values are returned for use in _loadindex().
378
379
379 * newversionflags:
380 * newversionflags:
380 version header to use if we need to create a new revlog
381 version header to use if we need to create a new revlog
381
382
382 * mmapindexthreshold:
383 * mmapindexthreshold:
383 minimal index size for start to use mmap
384 minimal index size for start to use mmap
384
385
385 * force_nodemap:
386 * force_nodemap:
386 force the usage of a "development" version of the nodemap code
387 force the usage of a "development" version of the nodemap code
387 """
388 """
388 mmapindexthreshold = None
389 mmapindexthreshold = None
389 opts = self.opener.options
390 opts = self.opener.options
390
391
391 if b'revlogv2' in opts:
392 if b'revlogv2' in opts:
392 new_header = REVLOGV2 | FLAG_INLINE_DATA
393 new_header = REVLOGV2 | FLAG_INLINE_DATA
393 elif b'revlogv1' in opts:
394 elif b'revlogv1' in opts:
394 new_header = REVLOGV1 | FLAG_INLINE_DATA
395 new_header = REVLOGV1 | FLAG_INLINE_DATA
395 if b'generaldelta' in opts:
396 if b'generaldelta' in opts:
396 new_header |= FLAG_GENERALDELTA
397 new_header |= FLAG_GENERALDELTA
397 elif b'revlogv0' in self.opener.options:
398 elif b'revlogv0' in self.opener.options:
398 new_header = REVLOGV0
399 new_header = REVLOGV0
399 else:
400 else:
400 new_header = REVLOG_DEFAULT_VERSION
401 new_header = REVLOG_DEFAULT_VERSION
401
402
402 if b'chunkcachesize' in opts:
403 if b'chunkcachesize' in opts:
403 self._chunkcachesize = opts[b'chunkcachesize']
404 self._chunkcachesize = opts[b'chunkcachesize']
404 if b'maxchainlen' in opts:
405 if b'maxchainlen' in opts:
405 self._maxchainlen = opts[b'maxchainlen']
406 self._maxchainlen = opts[b'maxchainlen']
406 if b'deltabothparents' in opts:
407 if b'deltabothparents' in opts:
407 self._deltabothparents = opts[b'deltabothparents']
408 self._deltabothparents = opts[b'deltabothparents']
408 self._lazydelta = bool(opts.get(b'lazydelta', True))
409 self._lazydelta = bool(opts.get(b'lazydelta', True))
409 self._lazydeltabase = False
410 self._lazydeltabase = False
410 if self._lazydelta:
411 if self._lazydelta:
411 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
412 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
412 if b'compengine' in opts:
413 if b'compengine' in opts:
413 self._compengine = opts[b'compengine']
414 self._compengine = opts[b'compengine']
414 if b'zlib.level' in opts:
415 if b'zlib.level' in opts:
415 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
416 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
416 if b'zstd.level' in opts:
417 if b'zstd.level' in opts:
417 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
418 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
418 if b'maxdeltachainspan' in opts:
419 if b'maxdeltachainspan' in opts:
419 self._maxdeltachainspan = opts[b'maxdeltachainspan']
420 self._maxdeltachainspan = opts[b'maxdeltachainspan']
420 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
421 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
421 mmapindexthreshold = opts[b'mmapindexthreshold']
422 mmapindexthreshold = opts[b'mmapindexthreshold']
422 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
423 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
423 withsparseread = bool(opts.get(b'with-sparse-read', False))
424 withsparseread = bool(opts.get(b'with-sparse-read', False))
424 # sparse-revlog forces sparse-read
425 # sparse-revlog forces sparse-read
425 self._withsparseread = self._sparserevlog or withsparseread
426 self._withsparseread = self._sparserevlog or withsparseread
426 if b'sparse-read-density-threshold' in opts:
427 if b'sparse-read-density-threshold' in opts:
427 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
428 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
428 if b'sparse-read-min-gap-size' in opts:
429 if b'sparse-read-min-gap-size' in opts:
429 self._srmingapsize = opts[b'sparse-read-min-gap-size']
430 self._srmingapsize = opts[b'sparse-read-min-gap-size']
430 if opts.get(b'enableellipsis'):
431 if opts.get(b'enableellipsis'):
431 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
432 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
432
433
433 # revlog v0 doesn't have flag processors
434 # revlog v0 doesn't have flag processors
434 for flag, processor in pycompat.iteritems(
435 for flag, processor in pycompat.iteritems(
435 opts.get(b'flagprocessors', {})
436 opts.get(b'flagprocessors', {})
436 ):
437 ):
437 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
438 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
438
439
439 if self._chunkcachesize <= 0:
440 if self._chunkcachesize <= 0:
440 raise error.RevlogError(
441 raise error.RevlogError(
441 _(b'revlog chunk cache size %r is not greater than 0')
442 _(b'revlog chunk cache size %r is not greater than 0')
442 % self._chunkcachesize
443 % self._chunkcachesize
443 )
444 )
444 elif self._chunkcachesize & (self._chunkcachesize - 1):
445 elif self._chunkcachesize & (self._chunkcachesize - 1):
445 raise error.RevlogError(
446 raise error.RevlogError(
446 _(b'revlog chunk cache size %r is not a power of 2')
447 _(b'revlog chunk cache size %r is not a power of 2')
447 % self._chunkcachesize
448 % self._chunkcachesize
448 )
449 )
449 force_nodemap = opts.get(b'devel-force-nodemap', False)
450 force_nodemap = opts.get(b'devel-force-nodemap', False)
450 return new_header, mmapindexthreshold, force_nodemap
451 return new_header, mmapindexthreshold, force_nodemap
451
452
452 def _get_data(self, filepath, mmap_threshold):
453 def _get_data(self, filepath, mmap_threshold):
453 """return a file content with or without mmap
454 """return a file content with or without mmap
454
455
455 If the file is missing return the empty string"""
456 If the file is missing return the empty string"""
456 try:
457 try:
457 with self.opener(filepath) as fp:
458 with self.opener(filepath) as fp:
458 if mmap_threshold is not None:
459 if mmap_threshold is not None:
459 file_size = self.opener.fstat(fp).st_size
460 file_size = self.opener.fstat(fp).st_size
460 if file_size >= mmap_threshold:
461 if file_size >= mmap_threshold:
461 # TODO: should .close() to release resources without
462 # TODO: should .close() to release resources without
462 # relying on Python GC
463 # relying on Python GC
463 return util.buffer(util.mmapread(fp))
464 return util.buffer(util.mmapread(fp))
464 return fp.read()
465 return fp.read()
465 except IOError as inst:
466 except IOError as inst:
466 if inst.errno != errno.ENOENT:
467 if inst.errno != errno.ENOENT:
467 raise
468 raise
468 return b''
469 return b''
469
470
470 def _loadindex(self):
471 def _loadindex(self):
471
472
472 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
473 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
473
474
474 if self.postfix is None:
475 if self.postfix is None:
475 entry_point = b'%s.i' % self.radix
476 entry_point = b'%s.i' % self.radix
476 else:
477 else:
477 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
478 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
478
479
479 entry_data = b''
480 entry_data = b''
480 self._initempty = True
481 self._initempty = True
481 entry_data = self._get_data(entry_point, mmapindexthreshold)
482 entry_data = self._get_data(entry_point, mmapindexthreshold)
482 if len(entry_data) > 0:
483 if len(entry_data) > 0:
483 header = INDEX_HEADER.unpack(entry_data[:4])[0]
484 header = INDEX_HEADER.unpack(entry_data[:4])[0]
484 self._initempty = False
485 self._initempty = False
485 else:
486 else:
486 header = new_header
487 header = new_header
487
488
488 self._format_flags = header & ~0xFFFF
489 self._format_flags = header & ~0xFFFF
489 self._format_version = header & 0xFFFF
490 self._format_version = header & 0xFFFF
490
491
491 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
492 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
492 if supported_flags is None:
493 if supported_flags is None:
493 msg = _(b'unknown version (%d) in revlog %s')
494 msg = _(b'unknown version (%d) in revlog %s')
494 msg %= (self._format_version, self.display_id)
495 msg %= (self._format_version, self.display_id)
495 raise error.RevlogError(msg)
496 raise error.RevlogError(msg)
496 elif self._format_flags & ~supported_flags:
497 elif self._format_flags & ~supported_flags:
497 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
498 display_flag = self._format_flags >> 16
499 display_flag = self._format_flags >> 16
499 msg %= (display_flag, self._format_version, self.display_id)
500 msg %= (display_flag, self._format_version, self.display_id)
500 raise error.RevlogError(msg)
501 raise error.RevlogError(msg)
501
502
502 if self._format_version == REVLOGV0:
503 features = FEATURES_BY_VERSION[self._format_version]
503 self._inline = False
504 self._inline = features[b'inline'](self._format_flags)
504 self._generaldelta = False
505 self._generaldelta = features[b'generaldelta'](self._format_flags)
505 elif self._format_version == REVLOGV1:
506 self.hassidedata = features[b'sidedata']
506 self._inline = self._format_flags & FLAG_INLINE_DATA
507 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
508 elif self._format_version == REVLOGV2:
509 # There is a bug in the transaction handling when going from an
510 # inline revlog to a separate index and data file. Turn it off until
511 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
512 # See issue6485
513 self._inline = False
514 # generaldelta implied by version 2 revlogs.
515 self._generaldelta = True
516 # revlog-v2 has built in sidedata support
517 self.hassidedata = True
518 else:
519 assert False, 'unreachable'
520
507
521 index_data = entry_data
508 index_data = entry_data
522 self._indexfile = entry_point
509 self._indexfile = entry_point
523
510
524 if self.postfix is None or self.postfix == b'a':
511 if self.postfix is None or self.postfix == b'a':
525 self._datafile = b'%s.d' % self.radix
512 self._datafile = b'%s.d' % self.radix
526 else:
513 else:
527 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
514 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
528
515
529 self.nodeconstants = sha1nodeconstants
516 self.nodeconstants = sha1nodeconstants
530 self.nullid = self.nodeconstants.nullid
517 self.nullid = self.nodeconstants.nullid
531
518
532 # sparse-revlog can't be on without general-delta (issue6056)
519 # sparse-revlog can't be on without general-delta (issue6056)
533 if not self._generaldelta:
520 if not self._generaldelta:
534 self._sparserevlog = False
521 self._sparserevlog = False
535
522
536 self._storedeltachains = True
523 self._storedeltachains = True
537
524
538 devel_nodemap = (
525 devel_nodemap = (
539 self._nodemap_file
526 self._nodemap_file
540 and force_nodemap
527 and force_nodemap
541 and parse_index_v1_nodemap is not None
528 and parse_index_v1_nodemap is not None
542 )
529 )
543
530
544 use_rust_index = False
531 use_rust_index = False
545 if rustrevlog is not None:
532 if rustrevlog is not None:
546 if self._nodemap_file is not None:
533 if self._nodemap_file is not None:
547 use_rust_index = True
534 use_rust_index = True
548 else:
535 else:
549 use_rust_index = self.opener.options.get(b'rust.index')
536 use_rust_index = self.opener.options.get(b'rust.index')
550
537
551 self._parse_index = parse_index_v1
538 self._parse_index = parse_index_v1
552 if self._format_version == REVLOGV0:
539 if self._format_version == REVLOGV0:
553 self._parse_index = revlogv0.parse_index_v0
540 self._parse_index = revlogv0.parse_index_v0
554 elif self._format_version == REVLOGV2:
541 elif self._format_version == REVLOGV2:
555 self._parse_index = parse_index_v2
542 self._parse_index = parse_index_v2
556 elif devel_nodemap:
543 elif devel_nodemap:
557 self._parse_index = parse_index_v1_nodemap
544 self._parse_index = parse_index_v1_nodemap
558 elif use_rust_index:
545 elif use_rust_index:
559 self._parse_index = parse_index_v1_mixed
546 self._parse_index = parse_index_v1_mixed
560 try:
547 try:
561 d = self._parse_index(index_data, self._inline)
548 d = self._parse_index(index_data, self._inline)
562 index, _chunkcache = d
549 index, _chunkcache = d
563 use_nodemap = (
550 use_nodemap = (
564 not self._inline
551 not self._inline
565 and self._nodemap_file is not None
552 and self._nodemap_file is not None
566 and util.safehasattr(index, 'update_nodemap_data')
553 and util.safehasattr(index, 'update_nodemap_data')
567 )
554 )
568 if use_nodemap:
555 if use_nodemap:
569 nodemap_data = nodemaputil.persisted_data(self)
556 nodemap_data = nodemaputil.persisted_data(self)
570 if nodemap_data is not None:
557 if nodemap_data is not None:
571 docket = nodemap_data[0]
558 docket = nodemap_data[0]
572 if (
559 if (
573 len(d[0]) > docket.tip_rev
560 len(d[0]) > docket.tip_rev
574 and d[0][docket.tip_rev][7] == docket.tip_node
561 and d[0][docket.tip_rev][7] == docket.tip_node
575 ):
562 ):
576 # no changelog tampering
563 # no changelog tampering
577 self._nodemap_docket = docket
564 self._nodemap_docket = docket
578 index.update_nodemap_data(*nodemap_data)
565 index.update_nodemap_data(*nodemap_data)
579 except (ValueError, IndexError):
566 except (ValueError, IndexError):
580 raise error.RevlogError(
567 raise error.RevlogError(
581 _(b"index %s is corrupted") % self.display_id
568 _(b"index %s is corrupted") % self.display_id
582 )
569 )
583 self.index, self._chunkcache = d
570 self.index, self._chunkcache = d
584 if not self._chunkcache:
571 if not self._chunkcache:
585 self._chunkclear()
572 self._chunkclear()
586 # revnum -> (chain-length, sum-delta-length)
573 # revnum -> (chain-length, sum-delta-length)
587 self._chaininfocache = util.lrucachedict(500)
574 self._chaininfocache = util.lrucachedict(500)
588 # revlog header -> revlog compressor
575 # revlog header -> revlog compressor
589 self._decompressors = {}
576 self._decompressors = {}
590
577
591 @util.propertycache
578 @util.propertycache
592 def revlog_kind(self):
579 def revlog_kind(self):
593 return self.target[0]
580 return self.target[0]
594
581
595 @util.propertycache
582 @util.propertycache
596 def display_id(self):
583 def display_id(self):
597 """The public facing "ID" of the revlog that we use in message"""
584 """The public facing "ID" of the revlog that we use in message"""
598 # Maybe we should build a user facing representation of
585 # Maybe we should build a user facing representation of
599 # revlog.target instead of using `self.radix`
586 # revlog.target instead of using `self.radix`
600 return self.radix
587 return self.radix
601
588
602 @util.propertycache
589 @util.propertycache
603 def _compressor(self):
590 def _compressor(self):
604 engine = util.compengines[self._compengine]
591 engine = util.compengines[self._compengine]
605 return engine.revlogcompressor(self._compengineopts)
592 return engine.revlogcompressor(self._compengineopts)
606
593
607 def _indexfp(self):
594 def _indexfp(self):
608 """file object for the revlog's index file"""
595 """file object for the revlog's index file"""
609 return self.opener(self._indexfile, mode=b"r")
596 return self.opener(self._indexfile, mode=b"r")
610
597
611 def __index_write_fp(self):
598 def __index_write_fp(self):
612 # You should not use this directly and use `_writing` instead
599 # You should not use this directly and use `_writing` instead
613 try:
600 try:
614 f = self.opener(
601 f = self.opener(
615 self._indexfile, mode=b"r+", checkambig=self._checkambig
602 self._indexfile, mode=b"r+", checkambig=self._checkambig
616 )
603 )
617 f.seek(0, os.SEEK_END)
604 f.seek(0, os.SEEK_END)
618 return f
605 return f
619 except IOError as inst:
606 except IOError as inst:
620 if inst.errno != errno.ENOENT:
607 if inst.errno != errno.ENOENT:
621 raise
608 raise
622 return self.opener(
609 return self.opener(
623 self._indexfile, mode=b"w+", checkambig=self._checkambig
610 self._indexfile, mode=b"w+", checkambig=self._checkambig
624 )
611 )
625
612
626 def __index_new_fp(self):
613 def __index_new_fp(self):
627 # You should not use this unless you are upgrading from inline revlog
614 # You should not use this unless you are upgrading from inline revlog
628 return self.opener(
615 return self.opener(
629 self._indexfile,
616 self._indexfile,
630 mode=b"w",
617 mode=b"w",
631 checkambig=self._checkambig,
618 checkambig=self._checkambig,
632 atomictemp=True,
619 atomictemp=True,
633 )
620 )
634
621
635 def _datafp(self, mode=b'r'):
622 def _datafp(self, mode=b'r'):
636 """file object for the revlog's data file"""
623 """file object for the revlog's data file"""
637 return self.opener(self._datafile, mode=mode)
624 return self.opener(self._datafile, mode=mode)
638
625
639 @contextlib.contextmanager
626 @contextlib.contextmanager
640 def _datareadfp(self, existingfp=None):
627 def _datareadfp(self, existingfp=None):
641 """file object suitable to read data"""
628 """file object suitable to read data"""
642 # Use explicit file handle, if given.
629 # Use explicit file handle, if given.
643 if existingfp is not None:
630 if existingfp is not None:
644 yield existingfp
631 yield existingfp
645
632
646 # Use a file handle being actively used for writes, if available.
633 # Use a file handle being actively used for writes, if available.
647 # There is some danger to doing this because reads will seek the
634 # There is some danger to doing this because reads will seek the
648 # file. However, _writeentry() performs a SEEK_END before all writes,
635 # file. However, _writeentry() performs a SEEK_END before all writes,
649 # so we should be safe.
636 # so we should be safe.
650 elif self._writinghandles:
637 elif self._writinghandles:
651 if self._inline:
638 if self._inline:
652 yield self._writinghandles[0]
639 yield self._writinghandles[0]
653 else:
640 else:
654 yield self._writinghandles[1]
641 yield self._writinghandles[1]
655
642
656 # Otherwise open a new file handle.
643 # Otherwise open a new file handle.
657 else:
644 else:
658 if self._inline:
645 if self._inline:
659 func = self._indexfp
646 func = self._indexfp
660 else:
647 else:
661 func = self._datafp
648 func = self._datafp
662 with func() as fp:
649 with func() as fp:
663 yield fp
650 yield fp
664
651
665 def tiprev(self):
652 def tiprev(self):
666 return len(self.index) - 1
653 return len(self.index) - 1
667
654
668 def tip(self):
655 def tip(self):
669 return self.node(self.tiprev())
656 return self.node(self.tiprev())
670
657
671 def __contains__(self, rev):
658 def __contains__(self, rev):
672 return 0 <= rev < len(self)
659 return 0 <= rev < len(self)
673
660
674 def __len__(self):
661 def __len__(self):
675 return len(self.index)
662 return len(self.index)
676
663
677 def __iter__(self):
664 def __iter__(self):
678 return iter(pycompat.xrange(len(self)))
665 return iter(pycompat.xrange(len(self)))
679
666
680 def revs(self, start=0, stop=None):
667 def revs(self, start=0, stop=None):
681 """iterate over all rev in this revlog (from start to stop)"""
668 """iterate over all rev in this revlog (from start to stop)"""
682 return storageutil.iterrevs(len(self), start=start, stop=stop)
669 return storageutil.iterrevs(len(self), start=start, stop=stop)
683
670
684 @property
671 @property
685 def nodemap(self):
672 def nodemap(self):
686 msg = (
673 msg = (
687 b"revlog.nodemap is deprecated, "
674 b"revlog.nodemap is deprecated, "
688 b"use revlog.index.[has_node|rev|get_rev]"
675 b"use revlog.index.[has_node|rev|get_rev]"
689 )
676 )
690 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
691 return self.index.nodemap
678 return self.index.nodemap
692
679
693 @property
680 @property
694 def _nodecache(self):
681 def _nodecache(self):
695 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
682 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
696 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
683 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
697 return self.index.nodemap
684 return self.index.nodemap
698
685
699 def hasnode(self, node):
686 def hasnode(self, node):
700 try:
687 try:
701 self.rev(node)
688 self.rev(node)
702 return True
689 return True
703 except KeyError:
690 except KeyError:
704 return False
691 return False
705
692
706 def candelta(self, baserev, rev):
693 def candelta(self, baserev, rev):
707 """whether two revisions (baserev, rev) can be delta-ed or not"""
694 """whether two revisions (baserev, rev) can be delta-ed or not"""
708 # Disable delta if either rev requires a content-changing flag
695 # Disable delta if either rev requires a content-changing flag
709 # processor (ex. LFS). This is because such flag processor can alter
696 # processor (ex. LFS). This is because such flag processor can alter
710 # the rawtext content that the delta will be based on, and two clients
697 # the rawtext content that the delta will be based on, and two clients
711 # could have a same revlog node with different flags (i.e. different
698 # could have a same revlog node with different flags (i.e. different
712 # rawtext contents) and the delta could be incompatible.
699 # rawtext contents) and the delta could be incompatible.
713 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
700 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
714 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
701 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
715 ):
702 ):
716 return False
703 return False
717 return True
704 return True
718
705
719 def update_caches(self, transaction):
706 def update_caches(self, transaction):
720 if self._nodemap_file is not None:
707 if self._nodemap_file is not None:
721 if transaction is None:
708 if transaction is None:
722 nodemaputil.update_persistent_nodemap(self)
709 nodemaputil.update_persistent_nodemap(self)
723 else:
710 else:
724 nodemaputil.setup_persistent_nodemap(transaction, self)
711 nodemaputil.setup_persistent_nodemap(transaction, self)
725
712
726 def clearcaches(self):
713 def clearcaches(self):
727 self._revisioncache = None
714 self._revisioncache = None
728 self._chainbasecache.clear()
715 self._chainbasecache.clear()
729 self._chunkcache = (0, b'')
716 self._chunkcache = (0, b'')
730 self._pcache = {}
717 self._pcache = {}
731 self._nodemap_docket = None
718 self._nodemap_docket = None
732 self.index.clearcaches()
719 self.index.clearcaches()
733 # The python code is the one responsible for validating the docket, we
720 # The python code is the one responsible for validating the docket, we
734 # end up having to refresh it here.
721 # end up having to refresh it here.
735 use_nodemap = (
722 use_nodemap = (
736 not self._inline
723 not self._inline
737 and self._nodemap_file is not None
724 and self._nodemap_file is not None
738 and util.safehasattr(self.index, 'update_nodemap_data')
725 and util.safehasattr(self.index, 'update_nodemap_data')
739 )
726 )
740 if use_nodemap:
727 if use_nodemap:
741 nodemap_data = nodemaputil.persisted_data(self)
728 nodemap_data = nodemaputil.persisted_data(self)
742 if nodemap_data is not None:
729 if nodemap_data is not None:
743 self._nodemap_docket = nodemap_data[0]
730 self._nodemap_docket = nodemap_data[0]
744 self.index.update_nodemap_data(*nodemap_data)
731 self.index.update_nodemap_data(*nodemap_data)
745
732
746 def rev(self, node):
733 def rev(self, node):
747 try:
734 try:
748 return self.index.rev(node)
735 return self.index.rev(node)
749 except TypeError:
736 except TypeError:
750 raise
737 raise
751 except error.RevlogError:
738 except error.RevlogError:
752 # parsers.c radix tree lookup failed
739 # parsers.c radix tree lookup failed
753 if (
740 if (
754 node == self.nodeconstants.wdirid
741 node == self.nodeconstants.wdirid
755 or node in self.nodeconstants.wdirfilenodeids
742 or node in self.nodeconstants.wdirfilenodeids
756 ):
743 ):
757 raise error.WdirUnsupported
744 raise error.WdirUnsupported
758 raise error.LookupError(node, self.display_id, _(b'no node'))
745 raise error.LookupError(node, self.display_id, _(b'no node'))
759
746
760 # Accessors for index entries.
747 # Accessors for index entries.
761
748
762 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
749 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
763 # are flags.
750 # are flags.
764 def start(self, rev):
751 def start(self, rev):
765 return int(self.index[rev][0] >> 16)
752 return int(self.index[rev][0] >> 16)
766
753
767 def flags(self, rev):
754 def flags(self, rev):
768 return self.index[rev][0] & 0xFFFF
755 return self.index[rev][0] & 0xFFFF
769
756
770 def length(self, rev):
757 def length(self, rev):
771 return self.index[rev][1]
758 return self.index[rev][1]
772
759
773 def sidedata_length(self, rev):
760 def sidedata_length(self, rev):
774 if not self.hassidedata:
761 if not self.hassidedata:
775 return 0
762 return 0
776 return self.index[rev][9]
763 return self.index[rev][9]
777
764
778 def rawsize(self, rev):
765 def rawsize(self, rev):
779 """return the length of the uncompressed text for a given revision"""
766 """return the length of the uncompressed text for a given revision"""
780 l = self.index[rev][2]
767 l = self.index[rev][2]
781 if l >= 0:
768 if l >= 0:
782 return l
769 return l
783
770
784 t = self.rawdata(rev)
771 t = self.rawdata(rev)
785 return len(t)
772 return len(t)
786
773
787 def size(self, rev):
774 def size(self, rev):
788 """length of non-raw text (processed by a "read" flag processor)"""
775 """length of non-raw text (processed by a "read" flag processor)"""
789 # fast path: if no "read" flag processor could change the content,
776 # fast path: if no "read" flag processor could change the content,
790 # size is rawsize. note: ELLIPSIS is known to not change the content.
777 # size is rawsize. note: ELLIPSIS is known to not change the content.
791 flags = self.flags(rev)
778 flags = self.flags(rev)
792 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
779 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
793 return self.rawsize(rev)
780 return self.rawsize(rev)
794
781
795 return len(self.revision(rev, raw=False))
782 return len(self.revision(rev, raw=False))
796
783
797 def chainbase(self, rev):
784 def chainbase(self, rev):
798 base = self._chainbasecache.get(rev)
785 base = self._chainbasecache.get(rev)
799 if base is not None:
786 if base is not None:
800 return base
787 return base
801
788
802 index = self.index
789 index = self.index
803 iterrev = rev
790 iterrev = rev
804 base = index[iterrev][3]
791 base = index[iterrev][3]
805 while base != iterrev:
792 while base != iterrev:
806 iterrev = base
793 iterrev = base
807 base = index[iterrev][3]
794 base = index[iterrev][3]
808
795
809 self._chainbasecache[rev] = base
796 self._chainbasecache[rev] = base
810 return base
797 return base
811
798
812 def linkrev(self, rev):
799 def linkrev(self, rev):
813 return self.index[rev][4]
800 return self.index[rev][4]
814
801
815 def parentrevs(self, rev):
802 def parentrevs(self, rev):
816 try:
803 try:
817 entry = self.index[rev]
804 entry = self.index[rev]
818 except IndexError:
805 except IndexError:
819 if rev == wdirrev:
806 if rev == wdirrev:
820 raise error.WdirUnsupported
807 raise error.WdirUnsupported
821 raise
808 raise
822 if entry[5] == nullrev:
809 if entry[5] == nullrev:
823 return entry[6], entry[5]
810 return entry[6], entry[5]
824 else:
811 else:
825 return entry[5], entry[6]
812 return entry[5], entry[6]
826
813
827 # fast parentrevs(rev) where rev isn't filtered
814 # fast parentrevs(rev) where rev isn't filtered
828 _uncheckedparentrevs = parentrevs
815 _uncheckedparentrevs = parentrevs
829
816
830 def node(self, rev):
817 def node(self, rev):
831 try:
818 try:
832 return self.index[rev][7]
819 return self.index[rev][7]
833 except IndexError:
820 except IndexError:
834 if rev == wdirrev:
821 if rev == wdirrev:
835 raise error.WdirUnsupported
822 raise error.WdirUnsupported
836 raise
823 raise
837
824
838 # Derived from index values.
825 # Derived from index values.
839
826
840 def end(self, rev):
827 def end(self, rev):
841 return self.start(rev) + self.length(rev)
828 return self.start(rev) + self.length(rev)
842
829
843 def parents(self, node):
830 def parents(self, node):
844 i = self.index
831 i = self.index
845 d = i[self.rev(node)]
832 d = i[self.rev(node)]
846 # inline node() to avoid function call overhead
833 # inline node() to avoid function call overhead
847 if d[5] == self.nullid:
834 if d[5] == self.nullid:
848 return i[d[6]][7], i[d[5]][7]
835 return i[d[6]][7], i[d[5]][7]
849 else:
836 else:
850 return i[d[5]][7], i[d[6]][7]
837 return i[d[5]][7], i[d[6]][7]
851
838
852 def chainlen(self, rev):
839 def chainlen(self, rev):
853 return self._chaininfo(rev)[0]
840 return self._chaininfo(rev)[0]
854
841
855 def _chaininfo(self, rev):
842 def _chaininfo(self, rev):
856 chaininfocache = self._chaininfocache
843 chaininfocache = self._chaininfocache
857 if rev in chaininfocache:
844 if rev in chaininfocache:
858 return chaininfocache[rev]
845 return chaininfocache[rev]
859 index = self.index
846 index = self.index
860 generaldelta = self._generaldelta
847 generaldelta = self._generaldelta
861 iterrev = rev
848 iterrev = rev
862 e = index[iterrev]
849 e = index[iterrev]
863 clen = 0
850 clen = 0
864 compresseddeltalen = 0
851 compresseddeltalen = 0
865 while iterrev != e[3]:
852 while iterrev != e[3]:
866 clen += 1
853 clen += 1
867 compresseddeltalen += e[1]
854 compresseddeltalen += e[1]
868 if generaldelta:
855 if generaldelta:
869 iterrev = e[3]
856 iterrev = e[3]
870 else:
857 else:
871 iterrev -= 1
858 iterrev -= 1
872 if iterrev in chaininfocache:
859 if iterrev in chaininfocache:
873 t = chaininfocache[iterrev]
860 t = chaininfocache[iterrev]
874 clen += t[0]
861 clen += t[0]
875 compresseddeltalen += t[1]
862 compresseddeltalen += t[1]
876 break
863 break
877 e = index[iterrev]
864 e = index[iterrev]
878 else:
865 else:
879 # Add text length of base since decompressing that also takes
866 # Add text length of base since decompressing that also takes
880 # work. For cache hits the length is already included.
867 # work. For cache hits the length is already included.
881 compresseddeltalen += e[1]
868 compresseddeltalen += e[1]
882 r = (clen, compresseddeltalen)
869 r = (clen, compresseddeltalen)
883 chaininfocache[rev] = r
870 chaininfocache[rev] = r
884 return r
871 return r
885
872
886 def _deltachain(self, rev, stoprev=None):
873 def _deltachain(self, rev, stoprev=None):
887 """Obtain the delta chain for a revision.
874 """Obtain the delta chain for a revision.
888
875
889 ``stoprev`` specifies a revision to stop at. If not specified, we
876 ``stoprev`` specifies a revision to stop at. If not specified, we
890 stop at the base of the chain.
877 stop at the base of the chain.
891
878
892 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
879 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
893 revs in ascending order and ``stopped`` is a bool indicating whether
880 revs in ascending order and ``stopped`` is a bool indicating whether
894 ``stoprev`` was hit.
881 ``stoprev`` was hit.
895 """
882 """
896 # Try C implementation.
883 # Try C implementation.
897 try:
884 try:
898 return self.index.deltachain(rev, stoprev, self._generaldelta)
885 return self.index.deltachain(rev, stoprev, self._generaldelta)
899 except AttributeError:
886 except AttributeError:
900 pass
887 pass
901
888
902 chain = []
889 chain = []
903
890
904 # Alias to prevent attribute lookup in tight loop.
891 # Alias to prevent attribute lookup in tight loop.
905 index = self.index
892 index = self.index
906 generaldelta = self._generaldelta
893 generaldelta = self._generaldelta
907
894
908 iterrev = rev
895 iterrev = rev
909 e = index[iterrev]
896 e = index[iterrev]
910 while iterrev != e[3] and iterrev != stoprev:
897 while iterrev != e[3] and iterrev != stoprev:
911 chain.append(iterrev)
898 chain.append(iterrev)
912 if generaldelta:
899 if generaldelta:
913 iterrev = e[3]
900 iterrev = e[3]
914 else:
901 else:
915 iterrev -= 1
902 iterrev -= 1
916 e = index[iterrev]
903 e = index[iterrev]
917
904
918 if iterrev == stoprev:
905 if iterrev == stoprev:
919 stopped = True
906 stopped = True
920 else:
907 else:
921 chain.append(iterrev)
908 chain.append(iterrev)
922 stopped = False
909 stopped = False
923
910
924 chain.reverse()
911 chain.reverse()
925 return chain, stopped
912 return chain, stopped
926
913
927 def ancestors(self, revs, stoprev=0, inclusive=False):
914 def ancestors(self, revs, stoprev=0, inclusive=False):
928 """Generate the ancestors of 'revs' in reverse revision order.
915 """Generate the ancestors of 'revs' in reverse revision order.
929 Does not generate revs lower than stoprev.
916 Does not generate revs lower than stoprev.
930
917
931 See the documentation for ancestor.lazyancestors for more details."""
918 See the documentation for ancestor.lazyancestors for more details."""
932
919
933 # first, make sure start revisions aren't filtered
920 # first, make sure start revisions aren't filtered
934 revs = list(revs)
921 revs = list(revs)
935 checkrev = self.node
922 checkrev = self.node
936 for r in revs:
923 for r in revs:
937 checkrev(r)
924 checkrev(r)
938 # and we're sure ancestors aren't filtered as well
925 # and we're sure ancestors aren't filtered as well
939
926
940 if rustancestor is not None:
927 if rustancestor is not None:
941 lazyancestors = rustancestor.LazyAncestors
928 lazyancestors = rustancestor.LazyAncestors
942 arg = self.index
929 arg = self.index
943 else:
930 else:
944 lazyancestors = ancestor.lazyancestors
931 lazyancestors = ancestor.lazyancestors
945 arg = self._uncheckedparentrevs
932 arg = self._uncheckedparentrevs
946 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
933 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
947
934
948 def descendants(self, revs):
935 def descendants(self, revs):
949 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
936 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
950
937
951 def findcommonmissing(self, common=None, heads=None):
938 def findcommonmissing(self, common=None, heads=None):
952 """Return a tuple of the ancestors of common and the ancestors of heads
939 """Return a tuple of the ancestors of common and the ancestors of heads
953 that are not ancestors of common. In revset terminology, we return the
940 that are not ancestors of common. In revset terminology, we return the
954 tuple:
941 tuple:
955
942
956 ::common, (::heads) - (::common)
943 ::common, (::heads) - (::common)
957
944
958 The list is sorted by revision number, meaning it is
945 The list is sorted by revision number, meaning it is
959 topologically sorted.
946 topologically sorted.
960
947
961 'heads' and 'common' are both lists of node IDs. If heads is
948 'heads' and 'common' are both lists of node IDs. If heads is
962 not supplied, uses all of the revlog's heads. If common is not
949 not supplied, uses all of the revlog's heads. If common is not
963 supplied, uses nullid."""
950 supplied, uses nullid."""
964 if common is None:
951 if common is None:
965 common = [self.nullid]
952 common = [self.nullid]
966 if heads is None:
953 if heads is None:
967 heads = self.heads()
954 heads = self.heads()
968
955
969 common = [self.rev(n) for n in common]
956 common = [self.rev(n) for n in common]
970 heads = [self.rev(n) for n in heads]
957 heads = [self.rev(n) for n in heads]
971
958
972 # we want the ancestors, but inclusive
959 # we want the ancestors, but inclusive
973 class lazyset(object):
960 class lazyset(object):
974 def __init__(self, lazyvalues):
961 def __init__(self, lazyvalues):
975 self.addedvalues = set()
962 self.addedvalues = set()
976 self.lazyvalues = lazyvalues
963 self.lazyvalues = lazyvalues
977
964
978 def __contains__(self, value):
965 def __contains__(self, value):
979 return value in self.addedvalues or value in self.lazyvalues
966 return value in self.addedvalues or value in self.lazyvalues
980
967
981 def __iter__(self):
968 def __iter__(self):
982 added = self.addedvalues
969 added = self.addedvalues
983 for r in added:
970 for r in added:
984 yield r
971 yield r
985 for r in self.lazyvalues:
972 for r in self.lazyvalues:
986 if not r in added:
973 if not r in added:
987 yield r
974 yield r
988
975
989 def add(self, value):
976 def add(self, value):
990 self.addedvalues.add(value)
977 self.addedvalues.add(value)
991
978
992 def update(self, values):
979 def update(self, values):
993 self.addedvalues.update(values)
980 self.addedvalues.update(values)
994
981
995 has = lazyset(self.ancestors(common))
982 has = lazyset(self.ancestors(common))
996 has.add(nullrev)
983 has.add(nullrev)
997 has.update(common)
984 has.update(common)
998
985
999 # take all ancestors from heads that aren't in has
986 # take all ancestors from heads that aren't in has
1000 missing = set()
987 missing = set()
1001 visit = collections.deque(r for r in heads if r not in has)
988 visit = collections.deque(r for r in heads if r not in has)
1002 while visit:
989 while visit:
1003 r = visit.popleft()
990 r = visit.popleft()
1004 if r in missing:
991 if r in missing:
1005 continue
992 continue
1006 else:
993 else:
1007 missing.add(r)
994 missing.add(r)
1008 for p in self.parentrevs(r):
995 for p in self.parentrevs(r):
1009 if p not in has:
996 if p not in has:
1010 visit.append(p)
997 visit.append(p)
1011 missing = list(missing)
998 missing = list(missing)
1012 missing.sort()
999 missing.sort()
1013 return has, [self.node(miss) for miss in missing]
1000 return has, [self.node(miss) for miss in missing]
1014
1001
1015 def incrementalmissingrevs(self, common=None):
1002 def incrementalmissingrevs(self, common=None):
1016 """Return an object that can be used to incrementally compute the
1003 """Return an object that can be used to incrementally compute the
1017 revision numbers of the ancestors of arbitrary sets that are not
1004 revision numbers of the ancestors of arbitrary sets that are not
1018 ancestors of common. This is an ancestor.incrementalmissingancestors
1005 ancestors of common. This is an ancestor.incrementalmissingancestors
1019 object.
1006 object.
1020
1007
1021 'common' is a list of revision numbers. If common is not supplied, uses
1008 'common' is a list of revision numbers. If common is not supplied, uses
1022 nullrev.
1009 nullrev.
1023 """
1010 """
1024 if common is None:
1011 if common is None:
1025 common = [nullrev]
1012 common = [nullrev]
1026
1013
1027 if rustancestor is not None:
1014 if rustancestor is not None:
1028 return rustancestor.MissingAncestors(self.index, common)
1015 return rustancestor.MissingAncestors(self.index, common)
1029 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1016 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1030
1017
1031 def findmissingrevs(self, common=None, heads=None):
1018 def findmissingrevs(self, common=None, heads=None):
1032 """Return the revision numbers of the ancestors of heads that
1019 """Return the revision numbers of the ancestors of heads that
1033 are not ancestors of common.
1020 are not ancestors of common.
1034
1021
1035 More specifically, return a list of revision numbers corresponding to
1022 More specifically, return a list of revision numbers corresponding to
1036 nodes N such that every N satisfies the following constraints:
1023 nodes N such that every N satisfies the following constraints:
1037
1024
1038 1. N is an ancestor of some node in 'heads'
1025 1. N is an ancestor of some node in 'heads'
1039 2. N is not an ancestor of any node in 'common'
1026 2. N is not an ancestor of any node in 'common'
1040
1027
1041 The list is sorted by revision number, meaning it is
1028 The list is sorted by revision number, meaning it is
1042 topologically sorted.
1029 topologically sorted.
1043
1030
1044 'heads' and 'common' are both lists of revision numbers. If heads is
1031 'heads' and 'common' are both lists of revision numbers. If heads is
1045 not supplied, uses all of the revlog's heads. If common is not
1032 not supplied, uses all of the revlog's heads. If common is not
1046 supplied, uses nullid."""
1033 supplied, uses nullid."""
1047 if common is None:
1034 if common is None:
1048 common = [nullrev]
1035 common = [nullrev]
1049 if heads is None:
1036 if heads is None:
1050 heads = self.headrevs()
1037 heads = self.headrevs()
1051
1038
1052 inc = self.incrementalmissingrevs(common=common)
1039 inc = self.incrementalmissingrevs(common=common)
1053 return inc.missingancestors(heads)
1040 return inc.missingancestors(heads)
1054
1041
1055 def findmissing(self, common=None, heads=None):
1042 def findmissing(self, common=None, heads=None):
1056 """Return the ancestors of heads that are not ancestors of common.
1043 """Return the ancestors of heads that are not ancestors of common.
1057
1044
1058 More specifically, return a list of nodes N such that every N
1045 More specifically, return a list of nodes N such that every N
1059 satisfies the following constraints:
1046 satisfies the following constraints:
1060
1047
1061 1. N is an ancestor of some node in 'heads'
1048 1. N is an ancestor of some node in 'heads'
1062 2. N is not an ancestor of any node in 'common'
1049 2. N is not an ancestor of any node in 'common'
1063
1050
1064 The list is sorted by revision number, meaning it is
1051 The list is sorted by revision number, meaning it is
1065 topologically sorted.
1052 topologically sorted.
1066
1053
1067 'heads' and 'common' are both lists of node IDs. If heads is
1054 'heads' and 'common' are both lists of node IDs. If heads is
1068 not supplied, uses all of the revlog's heads. If common is not
1055 not supplied, uses all of the revlog's heads. If common is not
1069 supplied, uses nullid."""
1056 supplied, uses nullid."""
1070 if common is None:
1057 if common is None:
1071 common = [self.nullid]
1058 common = [self.nullid]
1072 if heads is None:
1059 if heads is None:
1073 heads = self.heads()
1060 heads = self.heads()
1074
1061
1075 common = [self.rev(n) for n in common]
1062 common = [self.rev(n) for n in common]
1076 heads = [self.rev(n) for n in heads]
1063 heads = [self.rev(n) for n in heads]
1077
1064
1078 inc = self.incrementalmissingrevs(common=common)
1065 inc = self.incrementalmissingrevs(common=common)
1079 return [self.node(r) for r in inc.missingancestors(heads)]
1066 return [self.node(r) for r in inc.missingancestors(heads)]
1080
1067
1081 def nodesbetween(self, roots=None, heads=None):
1068 def nodesbetween(self, roots=None, heads=None):
1082 """Return a topological path from 'roots' to 'heads'.
1069 """Return a topological path from 'roots' to 'heads'.
1083
1070
1084 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1071 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1085 topologically sorted list of all nodes N that satisfy both of
1072 topologically sorted list of all nodes N that satisfy both of
1086 these constraints:
1073 these constraints:
1087
1074
1088 1. N is a descendant of some node in 'roots'
1075 1. N is a descendant of some node in 'roots'
1089 2. N is an ancestor of some node in 'heads'
1076 2. N is an ancestor of some node in 'heads'
1090
1077
1091 Every node is considered to be both a descendant and an ancestor
1078 Every node is considered to be both a descendant and an ancestor
1092 of itself, so every reachable node in 'roots' and 'heads' will be
1079 of itself, so every reachable node in 'roots' and 'heads' will be
1093 included in 'nodes'.
1080 included in 'nodes'.
1094
1081
1095 'outroots' is the list of reachable nodes in 'roots', i.e., the
1082 'outroots' is the list of reachable nodes in 'roots', i.e., the
1096 subset of 'roots' that is returned in 'nodes'. Likewise,
1083 subset of 'roots' that is returned in 'nodes'. Likewise,
1097 'outheads' is the subset of 'heads' that is also in 'nodes'.
1084 'outheads' is the subset of 'heads' that is also in 'nodes'.
1098
1085
1099 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1086 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1100 unspecified, uses nullid as the only root. If 'heads' is
1087 unspecified, uses nullid as the only root. If 'heads' is
1101 unspecified, uses list of all of the revlog's heads."""
1088 unspecified, uses list of all of the revlog's heads."""
1102 nonodes = ([], [], [])
1089 nonodes = ([], [], [])
1103 if roots is not None:
1090 if roots is not None:
1104 roots = list(roots)
1091 roots = list(roots)
1105 if not roots:
1092 if not roots:
1106 return nonodes
1093 return nonodes
1107 lowestrev = min([self.rev(n) for n in roots])
1094 lowestrev = min([self.rev(n) for n in roots])
1108 else:
1095 else:
1109 roots = [self.nullid] # Everybody's a descendant of nullid
1096 roots = [self.nullid] # Everybody's a descendant of nullid
1110 lowestrev = nullrev
1097 lowestrev = nullrev
1111 if (lowestrev == nullrev) and (heads is None):
1098 if (lowestrev == nullrev) and (heads is None):
1112 # We want _all_ the nodes!
1099 # We want _all_ the nodes!
1113 return (
1100 return (
1114 [self.node(r) for r in self],
1101 [self.node(r) for r in self],
1115 [self.nullid],
1102 [self.nullid],
1116 list(self.heads()),
1103 list(self.heads()),
1117 )
1104 )
1118 if heads is None:
1105 if heads is None:
1119 # All nodes are ancestors, so the latest ancestor is the last
1106 # All nodes are ancestors, so the latest ancestor is the last
1120 # node.
1107 # node.
1121 highestrev = len(self) - 1
1108 highestrev = len(self) - 1
1122 # Set ancestors to None to signal that every node is an ancestor.
1109 # Set ancestors to None to signal that every node is an ancestor.
1123 ancestors = None
1110 ancestors = None
1124 # Set heads to an empty dictionary for later discovery of heads
1111 # Set heads to an empty dictionary for later discovery of heads
1125 heads = {}
1112 heads = {}
1126 else:
1113 else:
1127 heads = list(heads)
1114 heads = list(heads)
1128 if not heads:
1115 if not heads:
1129 return nonodes
1116 return nonodes
1130 ancestors = set()
1117 ancestors = set()
1131 # Turn heads into a dictionary so we can remove 'fake' heads.
1118 # Turn heads into a dictionary so we can remove 'fake' heads.
1132 # Also, later we will be using it to filter out the heads we can't
1119 # Also, later we will be using it to filter out the heads we can't
1133 # find from roots.
1120 # find from roots.
1134 heads = dict.fromkeys(heads, False)
1121 heads = dict.fromkeys(heads, False)
1135 # Start at the top and keep marking parents until we're done.
1122 # Start at the top and keep marking parents until we're done.
1136 nodestotag = set(heads)
1123 nodestotag = set(heads)
1137 # Remember where the top was so we can use it as a limit later.
1124 # Remember where the top was so we can use it as a limit later.
1138 highestrev = max([self.rev(n) for n in nodestotag])
1125 highestrev = max([self.rev(n) for n in nodestotag])
1139 while nodestotag:
1126 while nodestotag:
1140 # grab a node to tag
1127 # grab a node to tag
1141 n = nodestotag.pop()
1128 n = nodestotag.pop()
1142 # Never tag nullid
1129 # Never tag nullid
1143 if n == self.nullid:
1130 if n == self.nullid:
1144 continue
1131 continue
1145 # A node's revision number represents its place in a
1132 # A node's revision number represents its place in a
1146 # topologically sorted list of nodes.
1133 # topologically sorted list of nodes.
1147 r = self.rev(n)
1134 r = self.rev(n)
1148 if r >= lowestrev:
1135 if r >= lowestrev:
1149 if n not in ancestors:
1136 if n not in ancestors:
1150 # If we are possibly a descendant of one of the roots
1137 # If we are possibly a descendant of one of the roots
1151 # and we haven't already been marked as an ancestor
1138 # and we haven't already been marked as an ancestor
1152 ancestors.add(n) # Mark as ancestor
1139 ancestors.add(n) # Mark as ancestor
1153 # Add non-nullid parents to list of nodes to tag.
1140 # Add non-nullid parents to list of nodes to tag.
1154 nodestotag.update(
1141 nodestotag.update(
1155 [p for p in self.parents(n) if p != self.nullid]
1142 [p for p in self.parents(n) if p != self.nullid]
1156 )
1143 )
1157 elif n in heads: # We've seen it before, is it a fake head?
1144 elif n in heads: # We've seen it before, is it a fake head?
1158 # So it is, real heads should not be the ancestors of
1145 # So it is, real heads should not be the ancestors of
1159 # any other heads.
1146 # any other heads.
1160 heads.pop(n)
1147 heads.pop(n)
1161 if not ancestors:
1148 if not ancestors:
1162 return nonodes
1149 return nonodes
1163 # Now that we have our set of ancestors, we want to remove any
1150 # Now that we have our set of ancestors, we want to remove any
1164 # roots that are not ancestors.
1151 # roots that are not ancestors.
1165
1152
1166 # If one of the roots was nullid, everything is included anyway.
1153 # If one of the roots was nullid, everything is included anyway.
1167 if lowestrev > nullrev:
1154 if lowestrev > nullrev:
1168 # But, since we weren't, let's recompute the lowest rev to not
1155 # But, since we weren't, let's recompute the lowest rev to not
1169 # include roots that aren't ancestors.
1156 # include roots that aren't ancestors.
1170
1157
1171 # Filter out roots that aren't ancestors of heads
1158 # Filter out roots that aren't ancestors of heads
1172 roots = [root for root in roots if root in ancestors]
1159 roots = [root for root in roots if root in ancestors]
1173 # Recompute the lowest revision
1160 # Recompute the lowest revision
1174 if roots:
1161 if roots:
1175 lowestrev = min([self.rev(root) for root in roots])
1162 lowestrev = min([self.rev(root) for root in roots])
1176 else:
1163 else:
1177 # No more roots? Return empty list
1164 # No more roots? Return empty list
1178 return nonodes
1165 return nonodes
1179 else:
1166 else:
1180 # We are descending from nullid, and don't need to care about
1167 # We are descending from nullid, and don't need to care about
1181 # any other roots.
1168 # any other roots.
1182 lowestrev = nullrev
1169 lowestrev = nullrev
1183 roots = [self.nullid]
1170 roots = [self.nullid]
1184 # Transform our roots list into a set.
1171 # Transform our roots list into a set.
1185 descendants = set(roots)
1172 descendants = set(roots)
1186 # Also, keep the original roots so we can filter out roots that aren't
1173 # Also, keep the original roots so we can filter out roots that aren't
1187 # 'real' roots (i.e. are descended from other roots).
1174 # 'real' roots (i.e. are descended from other roots).
1188 roots = descendants.copy()
1175 roots = descendants.copy()
1189 # Our topologically sorted list of output nodes.
1176 # Our topologically sorted list of output nodes.
1190 orderedout = []
1177 orderedout = []
1191 # Don't start at nullid since we don't want nullid in our output list,
1178 # Don't start at nullid since we don't want nullid in our output list,
1192 # and if nullid shows up in descendants, empty parents will look like
1179 # and if nullid shows up in descendants, empty parents will look like
1193 # they're descendants.
1180 # they're descendants.
1194 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1181 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1195 n = self.node(r)
1182 n = self.node(r)
1196 isdescendant = False
1183 isdescendant = False
1197 if lowestrev == nullrev: # Everybody is a descendant of nullid
1184 if lowestrev == nullrev: # Everybody is a descendant of nullid
1198 isdescendant = True
1185 isdescendant = True
1199 elif n in descendants:
1186 elif n in descendants:
1200 # n is already a descendant
1187 # n is already a descendant
1201 isdescendant = True
1188 isdescendant = True
1202 # This check only needs to be done here because all the roots
1189 # This check only needs to be done here because all the roots
1203 # will start being marked is descendants before the loop.
1190 # will start being marked is descendants before the loop.
1204 if n in roots:
1191 if n in roots:
1205 # If n was a root, check if it's a 'real' root.
1192 # If n was a root, check if it's a 'real' root.
1206 p = tuple(self.parents(n))
1193 p = tuple(self.parents(n))
1207 # If any of its parents are descendants, it's not a root.
1194 # If any of its parents are descendants, it's not a root.
1208 if (p[0] in descendants) or (p[1] in descendants):
1195 if (p[0] in descendants) or (p[1] in descendants):
1209 roots.remove(n)
1196 roots.remove(n)
1210 else:
1197 else:
1211 p = tuple(self.parents(n))
1198 p = tuple(self.parents(n))
1212 # A node is a descendant if either of its parents are
1199 # A node is a descendant if either of its parents are
1213 # descendants. (We seeded the dependents list with the roots
1200 # descendants. (We seeded the dependents list with the roots
1214 # up there, remember?)
1201 # up there, remember?)
1215 if (p[0] in descendants) or (p[1] in descendants):
1202 if (p[0] in descendants) or (p[1] in descendants):
1216 descendants.add(n)
1203 descendants.add(n)
1217 isdescendant = True
1204 isdescendant = True
1218 if isdescendant and ((ancestors is None) or (n in ancestors)):
1205 if isdescendant and ((ancestors is None) or (n in ancestors)):
1219 # Only include nodes that are both descendants and ancestors.
1206 # Only include nodes that are both descendants and ancestors.
1220 orderedout.append(n)
1207 orderedout.append(n)
1221 if (ancestors is not None) and (n in heads):
1208 if (ancestors is not None) and (n in heads):
1222 # We're trying to figure out which heads are reachable
1209 # We're trying to figure out which heads are reachable
1223 # from roots.
1210 # from roots.
1224 # Mark this head as having been reached
1211 # Mark this head as having been reached
1225 heads[n] = True
1212 heads[n] = True
1226 elif ancestors is None:
1213 elif ancestors is None:
1227 # Otherwise, we're trying to discover the heads.
1214 # Otherwise, we're trying to discover the heads.
1228 # Assume this is a head because if it isn't, the next step
1215 # Assume this is a head because if it isn't, the next step
1229 # will eventually remove it.
1216 # will eventually remove it.
1230 heads[n] = True
1217 heads[n] = True
1231 # But, obviously its parents aren't.
1218 # But, obviously its parents aren't.
1232 for p in self.parents(n):
1219 for p in self.parents(n):
1233 heads.pop(p, None)
1220 heads.pop(p, None)
1234 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1221 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1235 roots = list(roots)
1222 roots = list(roots)
1236 assert orderedout
1223 assert orderedout
1237 assert roots
1224 assert roots
1238 assert heads
1225 assert heads
1239 return (orderedout, roots, heads)
1226 return (orderedout, roots, heads)
1240
1227
1241 def headrevs(self, revs=None):
1228 def headrevs(self, revs=None):
1242 if revs is None:
1229 if revs is None:
1243 try:
1230 try:
1244 return self.index.headrevs()
1231 return self.index.headrevs()
1245 except AttributeError:
1232 except AttributeError:
1246 return self._headrevs()
1233 return self._headrevs()
1247 if rustdagop is not None:
1234 if rustdagop is not None:
1248 return rustdagop.headrevs(self.index, revs)
1235 return rustdagop.headrevs(self.index, revs)
1249 return dagop.headrevs(revs, self._uncheckedparentrevs)
1236 return dagop.headrevs(revs, self._uncheckedparentrevs)
1250
1237
1251 def computephases(self, roots):
1238 def computephases(self, roots):
1252 return self.index.computephasesmapsets(roots)
1239 return self.index.computephasesmapsets(roots)
1253
1240
1254 def _headrevs(self):
1241 def _headrevs(self):
1255 count = len(self)
1242 count = len(self)
1256 if not count:
1243 if not count:
1257 return [nullrev]
1244 return [nullrev]
1258 # we won't iter over filtered rev so nobody is a head at start
1245 # we won't iter over filtered rev so nobody is a head at start
1259 ishead = [0] * (count + 1)
1246 ishead = [0] * (count + 1)
1260 index = self.index
1247 index = self.index
1261 for r in self:
1248 for r in self:
1262 ishead[r] = 1 # I may be an head
1249 ishead[r] = 1 # I may be an head
1263 e = index[r]
1250 e = index[r]
1264 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1265 return [r for r, val in enumerate(ishead) if val]
1252 return [r for r, val in enumerate(ishead) if val]
1266
1253
1267 def heads(self, start=None, stop=None):
1254 def heads(self, start=None, stop=None):
1268 """return the list of all nodes that have no children
1255 """return the list of all nodes that have no children
1269
1256
1270 if start is specified, only heads that are descendants of
1257 if start is specified, only heads that are descendants of
1271 start will be returned
1258 start will be returned
1272 if stop is specified, it will consider all the revs from stop
1259 if stop is specified, it will consider all the revs from stop
1273 as if they had no children
1260 as if they had no children
1274 """
1261 """
1275 if start is None and stop is None:
1262 if start is None and stop is None:
1276 if not len(self):
1263 if not len(self):
1277 return [self.nullid]
1264 return [self.nullid]
1278 return [self.node(r) for r in self.headrevs()]
1265 return [self.node(r) for r in self.headrevs()]
1279
1266
1280 if start is None:
1267 if start is None:
1281 start = nullrev
1268 start = nullrev
1282 else:
1269 else:
1283 start = self.rev(start)
1270 start = self.rev(start)
1284
1271
1285 stoprevs = {self.rev(n) for n in stop or []}
1272 stoprevs = {self.rev(n) for n in stop or []}
1286
1273
1287 revs = dagop.headrevssubset(
1274 revs = dagop.headrevssubset(
1288 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1275 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1289 )
1276 )
1290
1277
1291 return [self.node(rev) for rev in revs]
1278 return [self.node(rev) for rev in revs]
1292
1279
1293 def children(self, node):
1280 def children(self, node):
1294 """find the children of a given node"""
1281 """find the children of a given node"""
1295 c = []
1282 c = []
1296 p = self.rev(node)
1283 p = self.rev(node)
1297 for r in self.revs(start=p + 1):
1284 for r in self.revs(start=p + 1):
1298 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1285 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1299 if prevs:
1286 if prevs:
1300 for pr in prevs:
1287 for pr in prevs:
1301 if pr == p:
1288 if pr == p:
1302 c.append(self.node(r))
1289 c.append(self.node(r))
1303 elif p == nullrev:
1290 elif p == nullrev:
1304 c.append(self.node(r))
1291 c.append(self.node(r))
1305 return c
1292 return c
1306
1293
1307 def commonancestorsheads(self, a, b):
1294 def commonancestorsheads(self, a, b):
1308 """calculate all the heads of the common ancestors of nodes a and b"""
1295 """calculate all the heads of the common ancestors of nodes a and b"""
1309 a, b = self.rev(a), self.rev(b)
1296 a, b = self.rev(a), self.rev(b)
1310 ancs = self._commonancestorsheads(a, b)
1297 ancs = self._commonancestorsheads(a, b)
1311 return pycompat.maplist(self.node, ancs)
1298 return pycompat.maplist(self.node, ancs)
1312
1299
1313 def _commonancestorsheads(self, *revs):
1300 def _commonancestorsheads(self, *revs):
1314 """calculate all the heads of the common ancestors of revs"""
1301 """calculate all the heads of the common ancestors of revs"""
1315 try:
1302 try:
1316 ancs = self.index.commonancestorsheads(*revs)
1303 ancs = self.index.commonancestorsheads(*revs)
1317 except (AttributeError, OverflowError): # C implementation failed
1304 except (AttributeError, OverflowError): # C implementation failed
1318 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1305 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1319 return ancs
1306 return ancs
1320
1307
1321 def isancestor(self, a, b):
1308 def isancestor(self, a, b):
1322 """return True if node a is an ancestor of node b
1309 """return True if node a is an ancestor of node b
1323
1310
1324 A revision is considered an ancestor of itself."""
1311 A revision is considered an ancestor of itself."""
1325 a, b = self.rev(a), self.rev(b)
1312 a, b = self.rev(a), self.rev(b)
1326 return self.isancestorrev(a, b)
1313 return self.isancestorrev(a, b)
1327
1314
1328 def isancestorrev(self, a, b):
1315 def isancestorrev(self, a, b):
1329 """return True if revision a is an ancestor of revision b
1316 """return True if revision a is an ancestor of revision b
1330
1317
1331 A revision is considered an ancestor of itself.
1318 A revision is considered an ancestor of itself.
1332
1319
1333 The implementation of this is trivial but the use of
1320 The implementation of this is trivial but the use of
1334 reachableroots is not."""
1321 reachableroots is not."""
1335 if a == nullrev:
1322 if a == nullrev:
1336 return True
1323 return True
1337 elif a == b:
1324 elif a == b:
1338 return True
1325 return True
1339 elif a > b:
1326 elif a > b:
1340 return False
1327 return False
1341 return bool(self.reachableroots(a, [b], [a], includepath=False))
1328 return bool(self.reachableroots(a, [b], [a], includepath=False))
1342
1329
1343 def reachableroots(self, minroot, heads, roots, includepath=False):
1330 def reachableroots(self, minroot, heads, roots, includepath=False):
1344 """return (heads(::(<roots> and <roots>::<heads>)))
1331 """return (heads(::(<roots> and <roots>::<heads>)))
1345
1332
1346 If includepath is True, return (<roots>::<heads>)."""
1333 If includepath is True, return (<roots>::<heads>)."""
1347 try:
1334 try:
1348 return self.index.reachableroots2(
1335 return self.index.reachableroots2(
1349 minroot, heads, roots, includepath
1336 minroot, heads, roots, includepath
1350 )
1337 )
1351 except AttributeError:
1338 except AttributeError:
1352 return dagop._reachablerootspure(
1339 return dagop._reachablerootspure(
1353 self.parentrevs, minroot, roots, heads, includepath
1340 self.parentrevs, minroot, roots, heads, includepath
1354 )
1341 )
1355
1342
1356 def ancestor(self, a, b):
1343 def ancestor(self, a, b):
1357 """calculate the "best" common ancestor of nodes a and b"""
1344 """calculate the "best" common ancestor of nodes a and b"""
1358
1345
1359 a, b = self.rev(a), self.rev(b)
1346 a, b = self.rev(a), self.rev(b)
1360 try:
1347 try:
1361 ancs = self.index.ancestors(a, b)
1348 ancs = self.index.ancestors(a, b)
1362 except (AttributeError, OverflowError):
1349 except (AttributeError, OverflowError):
1363 ancs = ancestor.ancestors(self.parentrevs, a, b)
1350 ancs = ancestor.ancestors(self.parentrevs, a, b)
1364 if ancs:
1351 if ancs:
1365 # choose a consistent winner when there's a tie
1352 # choose a consistent winner when there's a tie
1366 return min(map(self.node, ancs))
1353 return min(map(self.node, ancs))
1367 return self.nullid
1354 return self.nullid
1368
1355
1369 def _match(self, id):
1356 def _match(self, id):
1370 if isinstance(id, int):
1357 if isinstance(id, int):
1371 # rev
1358 # rev
1372 return self.node(id)
1359 return self.node(id)
1373 if len(id) == self.nodeconstants.nodelen:
1360 if len(id) == self.nodeconstants.nodelen:
1374 # possibly a binary node
1361 # possibly a binary node
1375 # odds of a binary node being all hex in ASCII are 1 in 10**25
1362 # odds of a binary node being all hex in ASCII are 1 in 10**25
1376 try:
1363 try:
1377 node = id
1364 node = id
1378 self.rev(node) # quick search the index
1365 self.rev(node) # quick search the index
1379 return node
1366 return node
1380 except error.LookupError:
1367 except error.LookupError:
1381 pass # may be partial hex id
1368 pass # may be partial hex id
1382 try:
1369 try:
1383 # str(rev)
1370 # str(rev)
1384 rev = int(id)
1371 rev = int(id)
1385 if b"%d" % rev != id:
1372 if b"%d" % rev != id:
1386 raise ValueError
1373 raise ValueError
1387 if rev < 0:
1374 if rev < 0:
1388 rev = len(self) + rev
1375 rev = len(self) + rev
1389 if rev < 0 or rev >= len(self):
1376 if rev < 0 or rev >= len(self):
1390 raise ValueError
1377 raise ValueError
1391 return self.node(rev)
1378 return self.node(rev)
1392 except (ValueError, OverflowError):
1379 except (ValueError, OverflowError):
1393 pass
1380 pass
1394 if len(id) == 2 * self.nodeconstants.nodelen:
1381 if len(id) == 2 * self.nodeconstants.nodelen:
1395 try:
1382 try:
1396 # a full hex nodeid?
1383 # a full hex nodeid?
1397 node = bin(id)
1384 node = bin(id)
1398 self.rev(node)
1385 self.rev(node)
1399 return node
1386 return node
1400 except (TypeError, error.LookupError):
1387 except (TypeError, error.LookupError):
1401 pass
1388 pass
1402
1389
1403 def _partialmatch(self, id):
1390 def _partialmatch(self, id):
1404 # we don't care wdirfilenodeids as they should be always full hash
1391 # we don't care wdirfilenodeids as they should be always full hash
1405 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1392 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1406 try:
1393 try:
1407 partial = self.index.partialmatch(id)
1394 partial = self.index.partialmatch(id)
1408 if partial and self.hasnode(partial):
1395 if partial and self.hasnode(partial):
1409 if maybewdir:
1396 if maybewdir:
1410 # single 'ff...' match in radix tree, ambiguous with wdir
1397 # single 'ff...' match in radix tree, ambiguous with wdir
1411 raise error.RevlogError
1398 raise error.RevlogError
1412 return partial
1399 return partial
1413 if maybewdir:
1400 if maybewdir:
1414 # no 'ff...' match in radix tree, wdir identified
1401 # no 'ff...' match in radix tree, wdir identified
1415 raise error.WdirUnsupported
1402 raise error.WdirUnsupported
1416 return None
1403 return None
1417 except error.RevlogError:
1404 except error.RevlogError:
1418 # parsers.c radix tree lookup gave multiple matches
1405 # parsers.c radix tree lookup gave multiple matches
1419 # fast path: for unfiltered changelog, radix tree is accurate
1406 # fast path: for unfiltered changelog, radix tree is accurate
1420 if not getattr(self, 'filteredrevs', None):
1407 if not getattr(self, 'filteredrevs', None):
1421 raise error.AmbiguousPrefixLookupError(
1408 raise error.AmbiguousPrefixLookupError(
1422 id, self.display_id, _(b'ambiguous identifier')
1409 id, self.display_id, _(b'ambiguous identifier')
1423 )
1410 )
1424 # fall through to slow path that filters hidden revisions
1411 # fall through to slow path that filters hidden revisions
1425 except (AttributeError, ValueError):
1412 except (AttributeError, ValueError):
1426 # we are pure python, or key was too short to search radix tree
1413 # we are pure python, or key was too short to search radix tree
1427 pass
1414 pass
1428
1415
1429 if id in self._pcache:
1416 if id in self._pcache:
1430 return self._pcache[id]
1417 return self._pcache[id]
1431
1418
1432 if len(id) <= 40:
1419 if len(id) <= 40:
1433 try:
1420 try:
1434 # hex(node)[:...]
1421 # hex(node)[:...]
1435 l = len(id) // 2 # grab an even number of digits
1422 l = len(id) // 2 # grab an even number of digits
1436 prefix = bin(id[: l * 2])
1423 prefix = bin(id[: l * 2])
1437 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1424 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1438 nl = [
1425 nl = [
1439 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1426 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1440 ]
1427 ]
1441 if self.nodeconstants.nullhex.startswith(id):
1428 if self.nodeconstants.nullhex.startswith(id):
1442 nl.append(self.nullid)
1429 nl.append(self.nullid)
1443 if len(nl) > 0:
1430 if len(nl) > 0:
1444 if len(nl) == 1 and not maybewdir:
1431 if len(nl) == 1 and not maybewdir:
1445 self._pcache[id] = nl[0]
1432 self._pcache[id] = nl[0]
1446 return nl[0]
1433 return nl[0]
1447 raise error.AmbiguousPrefixLookupError(
1434 raise error.AmbiguousPrefixLookupError(
1448 id, self.display_id, _(b'ambiguous identifier')
1435 id, self.display_id, _(b'ambiguous identifier')
1449 )
1436 )
1450 if maybewdir:
1437 if maybewdir:
1451 raise error.WdirUnsupported
1438 raise error.WdirUnsupported
1452 return None
1439 return None
1453 except TypeError:
1440 except TypeError:
1454 pass
1441 pass
1455
1442
1456 def lookup(self, id):
1443 def lookup(self, id):
1457 """locate a node based on:
1444 """locate a node based on:
1458 - revision number or str(revision number)
1445 - revision number or str(revision number)
1459 - nodeid or subset of hex nodeid
1446 - nodeid or subset of hex nodeid
1460 """
1447 """
1461 n = self._match(id)
1448 n = self._match(id)
1462 if n is not None:
1449 if n is not None:
1463 return n
1450 return n
1464 n = self._partialmatch(id)
1451 n = self._partialmatch(id)
1465 if n:
1452 if n:
1466 return n
1453 return n
1467
1454
1468 raise error.LookupError(id, self.display_id, _(b'no match found'))
1455 raise error.LookupError(id, self.display_id, _(b'no match found'))
1469
1456
1470 def shortest(self, node, minlength=1):
1457 def shortest(self, node, minlength=1):
1471 """Find the shortest unambiguous prefix that matches node."""
1458 """Find the shortest unambiguous prefix that matches node."""
1472
1459
1473 def isvalid(prefix):
1460 def isvalid(prefix):
1474 try:
1461 try:
1475 matchednode = self._partialmatch(prefix)
1462 matchednode = self._partialmatch(prefix)
1476 except error.AmbiguousPrefixLookupError:
1463 except error.AmbiguousPrefixLookupError:
1477 return False
1464 return False
1478 except error.WdirUnsupported:
1465 except error.WdirUnsupported:
1479 # single 'ff...' match
1466 # single 'ff...' match
1480 return True
1467 return True
1481 if matchednode is None:
1468 if matchednode is None:
1482 raise error.LookupError(node, self.display_id, _(b'no node'))
1469 raise error.LookupError(node, self.display_id, _(b'no node'))
1483 return True
1470 return True
1484
1471
1485 def maybewdir(prefix):
1472 def maybewdir(prefix):
1486 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1473 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1487
1474
1488 hexnode = hex(node)
1475 hexnode = hex(node)
1489
1476
1490 def disambiguate(hexnode, minlength):
1477 def disambiguate(hexnode, minlength):
1491 """Disambiguate against wdirid."""
1478 """Disambiguate against wdirid."""
1492 for length in range(minlength, len(hexnode) + 1):
1479 for length in range(minlength, len(hexnode) + 1):
1493 prefix = hexnode[:length]
1480 prefix = hexnode[:length]
1494 if not maybewdir(prefix):
1481 if not maybewdir(prefix):
1495 return prefix
1482 return prefix
1496
1483
1497 if not getattr(self, 'filteredrevs', None):
1484 if not getattr(self, 'filteredrevs', None):
1498 try:
1485 try:
1499 length = max(self.index.shortest(node), minlength)
1486 length = max(self.index.shortest(node), minlength)
1500 return disambiguate(hexnode, length)
1487 return disambiguate(hexnode, length)
1501 except error.RevlogError:
1488 except error.RevlogError:
1502 if node != self.nodeconstants.wdirid:
1489 if node != self.nodeconstants.wdirid:
1503 raise error.LookupError(
1490 raise error.LookupError(
1504 node, self.display_id, _(b'no node')
1491 node, self.display_id, _(b'no node')
1505 )
1492 )
1506 except AttributeError:
1493 except AttributeError:
1507 # Fall through to pure code
1494 # Fall through to pure code
1508 pass
1495 pass
1509
1496
1510 if node == self.nodeconstants.wdirid:
1497 if node == self.nodeconstants.wdirid:
1511 for length in range(minlength, len(hexnode) + 1):
1498 for length in range(minlength, len(hexnode) + 1):
1512 prefix = hexnode[:length]
1499 prefix = hexnode[:length]
1513 if isvalid(prefix):
1500 if isvalid(prefix):
1514 return prefix
1501 return prefix
1515
1502
1516 for length in range(minlength, len(hexnode) + 1):
1503 for length in range(minlength, len(hexnode) + 1):
1517 prefix = hexnode[:length]
1504 prefix = hexnode[:length]
1518 if isvalid(prefix):
1505 if isvalid(prefix):
1519 return disambiguate(hexnode, length)
1506 return disambiguate(hexnode, length)
1520
1507
1521 def cmp(self, node, text):
1508 def cmp(self, node, text):
1522 """compare text with a given file revision
1509 """compare text with a given file revision
1523
1510
1524 returns True if text is different than what is stored.
1511 returns True if text is different than what is stored.
1525 """
1512 """
1526 p1, p2 = self.parents(node)
1513 p1, p2 = self.parents(node)
1527 return storageutil.hashrevisionsha1(text, p1, p2) != node
1514 return storageutil.hashrevisionsha1(text, p1, p2) != node
1528
1515
1529 def _cachesegment(self, offset, data):
1516 def _cachesegment(self, offset, data):
1530 """Add a segment to the revlog cache.
1517 """Add a segment to the revlog cache.
1531
1518
1532 Accepts an absolute offset and the data that is at that location.
1519 Accepts an absolute offset and the data that is at that location.
1533 """
1520 """
1534 o, d = self._chunkcache
1521 o, d = self._chunkcache
1535 # try to add to existing cache
1522 # try to add to existing cache
1536 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1523 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1537 self._chunkcache = o, d + data
1524 self._chunkcache = o, d + data
1538 else:
1525 else:
1539 self._chunkcache = offset, data
1526 self._chunkcache = offset, data
1540
1527
1541 def _readsegment(self, offset, length, df=None):
1528 def _readsegment(self, offset, length, df=None):
1542 """Load a segment of raw data from the revlog.
1529 """Load a segment of raw data from the revlog.
1543
1530
1544 Accepts an absolute offset, length to read, and an optional existing
1531 Accepts an absolute offset, length to read, and an optional existing
1545 file handle to read from.
1532 file handle to read from.
1546
1533
1547 If an existing file handle is passed, it will be seeked and the
1534 If an existing file handle is passed, it will be seeked and the
1548 original seek position will NOT be restored.
1535 original seek position will NOT be restored.
1549
1536
1550 Returns a str or buffer of raw byte data.
1537 Returns a str or buffer of raw byte data.
1551
1538
1552 Raises if the requested number of bytes could not be read.
1539 Raises if the requested number of bytes could not be read.
1553 """
1540 """
1554 # Cache data both forward and backward around the requested
1541 # Cache data both forward and backward around the requested
1555 # data, in a fixed size window. This helps speed up operations
1542 # data, in a fixed size window. This helps speed up operations
1556 # involving reading the revlog backwards.
1543 # involving reading the revlog backwards.
1557 cachesize = self._chunkcachesize
1544 cachesize = self._chunkcachesize
1558 realoffset = offset & ~(cachesize - 1)
1545 realoffset = offset & ~(cachesize - 1)
1559 reallength = (
1546 reallength = (
1560 (offset + length + cachesize) & ~(cachesize - 1)
1547 (offset + length + cachesize) & ~(cachesize - 1)
1561 ) - realoffset
1548 ) - realoffset
1562 with self._datareadfp(df) as df:
1549 with self._datareadfp(df) as df:
1563 df.seek(realoffset)
1550 df.seek(realoffset)
1564 d = df.read(reallength)
1551 d = df.read(reallength)
1565
1552
1566 self._cachesegment(realoffset, d)
1553 self._cachesegment(realoffset, d)
1567 if offset != realoffset or reallength != length:
1554 if offset != realoffset or reallength != length:
1568 startoffset = offset - realoffset
1555 startoffset = offset - realoffset
1569 if len(d) - startoffset < length:
1556 if len(d) - startoffset < length:
1570 raise error.RevlogError(
1557 raise error.RevlogError(
1571 _(
1558 _(
1572 b'partial read of revlog %s; expected %d bytes from '
1559 b'partial read of revlog %s; expected %d bytes from '
1573 b'offset %d, got %d'
1560 b'offset %d, got %d'
1574 )
1561 )
1575 % (
1562 % (
1576 self._indexfile if self._inline else self._datafile,
1563 self._indexfile if self._inline else self._datafile,
1577 length,
1564 length,
1578 offset,
1565 offset,
1579 len(d) - startoffset,
1566 len(d) - startoffset,
1580 )
1567 )
1581 )
1568 )
1582
1569
1583 return util.buffer(d, startoffset, length)
1570 return util.buffer(d, startoffset, length)
1584
1571
1585 if len(d) < length:
1572 if len(d) < length:
1586 raise error.RevlogError(
1573 raise error.RevlogError(
1587 _(
1574 _(
1588 b'partial read of revlog %s; expected %d bytes from offset '
1575 b'partial read of revlog %s; expected %d bytes from offset '
1589 b'%d, got %d'
1576 b'%d, got %d'
1590 )
1577 )
1591 % (
1578 % (
1592 self._indexfile if self._inline else self._datafile,
1579 self._indexfile if self._inline else self._datafile,
1593 length,
1580 length,
1594 offset,
1581 offset,
1595 len(d),
1582 len(d),
1596 )
1583 )
1597 )
1584 )
1598
1585
1599 return d
1586 return d
1600
1587
1601 def _getsegment(self, offset, length, df=None):
1588 def _getsegment(self, offset, length, df=None):
1602 """Obtain a segment of raw data from the revlog.
1589 """Obtain a segment of raw data from the revlog.
1603
1590
1604 Accepts an absolute offset, length of bytes to obtain, and an
1591 Accepts an absolute offset, length of bytes to obtain, and an
1605 optional file handle to the already-opened revlog. If the file
1592 optional file handle to the already-opened revlog. If the file
1606 handle is used, it's original seek position will not be preserved.
1593 handle is used, it's original seek position will not be preserved.
1607
1594
1608 Requests for data may be returned from a cache.
1595 Requests for data may be returned from a cache.
1609
1596
1610 Returns a str or a buffer instance of raw byte data.
1597 Returns a str or a buffer instance of raw byte data.
1611 """
1598 """
1612 o, d = self._chunkcache
1599 o, d = self._chunkcache
1613 l = len(d)
1600 l = len(d)
1614
1601
1615 # is it in the cache?
1602 # is it in the cache?
1616 cachestart = offset - o
1603 cachestart = offset - o
1617 cacheend = cachestart + length
1604 cacheend = cachestart + length
1618 if cachestart >= 0 and cacheend <= l:
1605 if cachestart >= 0 and cacheend <= l:
1619 if cachestart == 0 and cacheend == l:
1606 if cachestart == 0 and cacheend == l:
1620 return d # avoid a copy
1607 return d # avoid a copy
1621 return util.buffer(d, cachestart, cacheend - cachestart)
1608 return util.buffer(d, cachestart, cacheend - cachestart)
1622
1609
1623 return self._readsegment(offset, length, df=df)
1610 return self._readsegment(offset, length, df=df)
1624
1611
1625 def _getsegmentforrevs(self, startrev, endrev, df=None):
1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1626 """Obtain a segment of raw data corresponding to a range of revisions.
1613 """Obtain a segment of raw data corresponding to a range of revisions.
1627
1614
1628 Accepts the start and end revisions and an optional already-open
1615 Accepts the start and end revisions and an optional already-open
1629 file handle to be used for reading. If the file handle is read, its
1616 file handle to be used for reading. If the file handle is read, its
1630 seek position will not be preserved.
1617 seek position will not be preserved.
1631
1618
1632 Requests for data may be satisfied by a cache.
1619 Requests for data may be satisfied by a cache.
1633
1620
1634 Returns a 2-tuple of (offset, data) for the requested range of
1621 Returns a 2-tuple of (offset, data) for the requested range of
1635 revisions. Offset is the integer offset from the beginning of the
1622 revisions. Offset is the integer offset from the beginning of the
1636 revlog and data is a str or buffer of the raw byte data.
1623 revlog and data is a str or buffer of the raw byte data.
1637
1624
1638 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1639 to determine where each revision's data begins and ends.
1626 to determine where each revision's data begins and ends.
1640 """
1627 """
1641 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1642 # (functions are expensive).
1629 # (functions are expensive).
1643 index = self.index
1630 index = self.index
1644 istart = index[startrev]
1631 istart = index[startrev]
1645 start = int(istart[0] >> 16)
1632 start = int(istart[0] >> 16)
1646 if startrev == endrev:
1633 if startrev == endrev:
1647 end = start + istart[1]
1634 end = start + istart[1]
1648 else:
1635 else:
1649 iend = index[endrev]
1636 iend = index[endrev]
1650 end = int(iend[0] >> 16) + iend[1]
1637 end = int(iend[0] >> 16) + iend[1]
1651
1638
1652 if self._inline:
1639 if self._inline:
1653 start += (startrev + 1) * self.index.entry_size
1640 start += (startrev + 1) * self.index.entry_size
1654 end += (endrev + 1) * self.index.entry_size
1641 end += (endrev + 1) * self.index.entry_size
1655 length = end - start
1642 length = end - start
1656
1643
1657 return start, self._getsegment(start, length, df=df)
1644 return start, self._getsegment(start, length, df=df)
1658
1645
1659 def _chunk(self, rev, df=None):
1646 def _chunk(self, rev, df=None):
1660 """Obtain a single decompressed chunk for a revision.
1647 """Obtain a single decompressed chunk for a revision.
1661
1648
1662 Accepts an integer revision and an optional already-open file handle
1649 Accepts an integer revision and an optional already-open file handle
1663 to be used for reading. If used, the seek position of the file will not
1650 to be used for reading. If used, the seek position of the file will not
1664 be preserved.
1651 be preserved.
1665
1652
1666 Returns a str holding uncompressed data for the requested revision.
1653 Returns a str holding uncompressed data for the requested revision.
1667 """
1654 """
1668 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1655 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1669
1656
1670 def _chunks(self, revs, df=None, targetsize=None):
1657 def _chunks(self, revs, df=None, targetsize=None):
1671 """Obtain decompressed chunks for the specified revisions.
1658 """Obtain decompressed chunks for the specified revisions.
1672
1659
1673 Accepts an iterable of numeric revisions that are assumed to be in
1660 Accepts an iterable of numeric revisions that are assumed to be in
1674 ascending order. Also accepts an optional already-open file handle
1661 ascending order. Also accepts an optional already-open file handle
1675 to be used for reading. If used, the seek position of the file will
1662 to be used for reading. If used, the seek position of the file will
1676 not be preserved.
1663 not be preserved.
1677
1664
1678 This function is similar to calling ``self._chunk()`` multiple times,
1665 This function is similar to calling ``self._chunk()`` multiple times,
1679 but is faster.
1666 but is faster.
1680
1667
1681 Returns a list with decompressed data for each requested revision.
1668 Returns a list with decompressed data for each requested revision.
1682 """
1669 """
1683 if not revs:
1670 if not revs:
1684 return []
1671 return []
1685 start = self.start
1672 start = self.start
1686 length = self.length
1673 length = self.length
1687 inline = self._inline
1674 inline = self._inline
1688 iosize = self.index.entry_size
1675 iosize = self.index.entry_size
1689 buffer = util.buffer
1676 buffer = util.buffer
1690
1677
1691 l = []
1678 l = []
1692 ladd = l.append
1679 ladd = l.append
1693
1680
1694 if not self._withsparseread:
1681 if not self._withsparseread:
1695 slicedchunks = (revs,)
1682 slicedchunks = (revs,)
1696 else:
1683 else:
1697 slicedchunks = deltautil.slicechunk(
1684 slicedchunks = deltautil.slicechunk(
1698 self, revs, targetsize=targetsize
1685 self, revs, targetsize=targetsize
1699 )
1686 )
1700
1687
1701 for revschunk in slicedchunks:
1688 for revschunk in slicedchunks:
1702 firstrev = revschunk[0]
1689 firstrev = revschunk[0]
1703 # Skip trailing revisions with empty diff
1690 # Skip trailing revisions with empty diff
1704 for lastrev in revschunk[::-1]:
1691 for lastrev in revschunk[::-1]:
1705 if length(lastrev) != 0:
1692 if length(lastrev) != 0:
1706 break
1693 break
1707
1694
1708 try:
1695 try:
1709 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1696 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1710 except OverflowError:
1697 except OverflowError:
1711 # issue4215 - we can't cache a run of chunks greater than
1698 # issue4215 - we can't cache a run of chunks greater than
1712 # 2G on Windows
1699 # 2G on Windows
1713 return [self._chunk(rev, df=df) for rev in revschunk]
1700 return [self._chunk(rev, df=df) for rev in revschunk]
1714
1701
1715 decomp = self.decompress
1702 decomp = self.decompress
1716 for rev in revschunk:
1703 for rev in revschunk:
1717 chunkstart = start(rev)
1704 chunkstart = start(rev)
1718 if inline:
1705 if inline:
1719 chunkstart += (rev + 1) * iosize
1706 chunkstart += (rev + 1) * iosize
1720 chunklength = length(rev)
1707 chunklength = length(rev)
1721 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1708 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1722
1709
1723 return l
1710 return l
1724
1711
1725 def _chunkclear(self):
1712 def _chunkclear(self):
1726 """Clear the raw chunk cache."""
1713 """Clear the raw chunk cache."""
1727 self._chunkcache = (0, b'')
1714 self._chunkcache = (0, b'')
1728
1715
1729 def deltaparent(self, rev):
1716 def deltaparent(self, rev):
1730 """return deltaparent of the given revision"""
1717 """return deltaparent of the given revision"""
1731 base = self.index[rev][3]
1718 base = self.index[rev][3]
1732 if base == rev:
1719 if base == rev:
1733 return nullrev
1720 return nullrev
1734 elif self._generaldelta:
1721 elif self._generaldelta:
1735 return base
1722 return base
1736 else:
1723 else:
1737 return rev - 1
1724 return rev - 1
1738
1725
1739 def issnapshot(self, rev):
1726 def issnapshot(self, rev):
1740 """tells whether rev is a snapshot"""
1727 """tells whether rev is a snapshot"""
1741 if not self._sparserevlog:
1728 if not self._sparserevlog:
1742 return self.deltaparent(rev) == nullrev
1729 return self.deltaparent(rev) == nullrev
1743 elif util.safehasattr(self.index, b'issnapshot'):
1730 elif util.safehasattr(self.index, b'issnapshot'):
1744 # directly assign the method to cache the testing and access
1731 # directly assign the method to cache the testing and access
1745 self.issnapshot = self.index.issnapshot
1732 self.issnapshot = self.index.issnapshot
1746 return self.issnapshot(rev)
1733 return self.issnapshot(rev)
1747 if rev == nullrev:
1734 if rev == nullrev:
1748 return True
1735 return True
1749 entry = self.index[rev]
1736 entry = self.index[rev]
1750 base = entry[3]
1737 base = entry[3]
1751 if base == rev:
1738 if base == rev:
1752 return True
1739 return True
1753 if base == nullrev:
1740 if base == nullrev:
1754 return True
1741 return True
1755 p1 = entry[5]
1742 p1 = entry[5]
1756 p2 = entry[6]
1743 p2 = entry[6]
1757 if base == p1 or base == p2:
1744 if base == p1 or base == p2:
1758 return False
1745 return False
1759 return self.issnapshot(base)
1746 return self.issnapshot(base)
1760
1747
1761 def snapshotdepth(self, rev):
1748 def snapshotdepth(self, rev):
1762 """number of snapshot in the chain before this one"""
1749 """number of snapshot in the chain before this one"""
1763 if not self.issnapshot(rev):
1750 if not self.issnapshot(rev):
1764 raise error.ProgrammingError(b'revision %d not a snapshot')
1751 raise error.ProgrammingError(b'revision %d not a snapshot')
1765 return len(self._deltachain(rev)[0]) - 1
1752 return len(self._deltachain(rev)[0]) - 1
1766
1753
1767 def revdiff(self, rev1, rev2):
1754 def revdiff(self, rev1, rev2):
1768 """return or calculate a delta between two revisions
1755 """return or calculate a delta between two revisions
1769
1756
1770 The delta calculated is in binary form and is intended to be written to
1757 The delta calculated is in binary form and is intended to be written to
1771 revlog data directly. So this function needs raw revision data.
1758 revlog data directly. So this function needs raw revision data.
1772 """
1759 """
1773 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1760 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1774 return bytes(self._chunk(rev2))
1761 return bytes(self._chunk(rev2))
1775
1762
1776 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1763 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1777
1764
1778 def _processflags(self, text, flags, operation, raw=False):
1765 def _processflags(self, text, flags, operation, raw=False):
1779 """deprecated entry point to access flag processors"""
1766 """deprecated entry point to access flag processors"""
1780 msg = b'_processflag(...) use the specialized variant'
1767 msg = b'_processflag(...) use the specialized variant'
1781 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1768 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1782 if raw:
1769 if raw:
1783 return text, flagutil.processflagsraw(self, text, flags)
1770 return text, flagutil.processflagsraw(self, text, flags)
1784 elif operation == b'read':
1771 elif operation == b'read':
1785 return flagutil.processflagsread(self, text, flags)
1772 return flagutil.processflagsread(self, text, flags)
1786 else: # write operation
1773 else: # write operation
1787 return flagutil.processflagswrite(self, text, flags)
1774 return flagutil.processflagswrite(self, text, flags)
1788
1775
1789 def revision(self, nodeorrev, _df=None, raw=False):
1776 def revision(self, nodeorrev, _df=None, raw=False):
1790 """return an uncompressed revision of a given node or revision
1777 """return an uncompressed revision of a given node or revision
1791 number.
1778 number.
1792
1779
1793 _df - an existing file handle to read from. (internal-only)
1780 _df - an existing file handle to read from. (internal-only)
1794 raw - an optional argument specifying if the revision data is to be
1781 raw - an optional argument specifying if the revision data is to be
1795 treated as raw data when applying flag transforms. 'raw' should be set
1782 treated as raw data when applying flag transforms. 'raw' should be set
1796 to True when generating changegroups or in debug commands.
1783 to True when generating changegroups or in debug commands.
1797 """
1784 """
1798 if raw:
1785 if raw:
1799 msg = (
1786 msg = (
1800 b'revlog.revision(..., raw=True) is deprecated, '
1787 b'revlog.revision(..., raw=True) is deprecated, '
1801 b'use revlog.rawdata(...)'
1788 b'use revlog.rawdata(...)'
1802 )
1789 )
1803 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1790 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1804 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1791 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1805
1792
1806 def sidedata(self, nodeorrev, _df=None):
1793 def sidedata(self, nodeorrev, _df=None):
1807 """a map of extra data related to the changeset but not part of the hash
1794 """a map of extra data related to the changeset but not part of the hash
1808
1795
1809 This function currently return a dictionary. However, more advanced
1796 This function currently return a dictionary. However, more advanced
1810 mapping object will likely be used in the future for a more
1797 mapping object will likely be used in the future for a more
1811 efficient/lazy code.
1798 efficient/lazy code.
1812 """
1799 """
1813 return self._revisiondata(nodeorrev, _df)[1]
1800 return self._revisiondata(nodeorrev, _df)[1]
1814
1801
1815 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1802 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1816 # deal with <nodeorrev> argument type
1803 # deal with <nodeorrev> argument type
1817 if isinstance(nodeorrev, int):
1804 if isinstance(nodeorrev, int):
1818 rev = nodeorrev
1805 rev = nodeorrev
1819 node = self.node(rev)
1806 node = self.node(rev)
1820 else:
1807 else:
1821 node = nodeorrev
1808 node = nodeorrev
1822 rev = None
1809 rev = None
1823
1810
1824 # fast path the special `nullid` rev
1811 # fast path the special `nullid` rev
1825 if node == self.nullid:
1812 if node == self.nullid:
1826 return b"", {}
1813 return b"", {}
1827
1814
1828 # ``rawtext`` is the text as stored inside the revlog. Might be the
1815 # ``rawtext`` is the text as stored inside the revlog. Might be the
1829 # revision or might need to be processed to retrieve the revision.
1816 # revision or might need to be processed to retrieve the revision.
1830 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1817 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1831
1818
1832 if self.hassidedata:
1819 if self.hassidedata:
1833 if rev is None:
1820 if rev is None:
1834 rev = self.rev(node)
1821 rev = self.rev(node)
1835 sidedata = self._sidedata(rev)
1822 sidedata = self._sidedata(rev)
1836 else:
1823 else:
1837 sidedata = {}
1824 sidedata = {}
1838
1825
1839 if raw and validated:
1826 if raw and validated:
1840 # if we don't want to process the raw text and that raw
1827 # if we don't want to process the raw text and that raw
1841 # text is cached, we can exit early.
1828 # text is cached, we can exit early.
1842 return rawtext, sidedata
1829 return rawtext, sidedata
1843 if rev is None:
1830 if rev is None:
1844 rev = self.rev(node)
1831 rev = self.rev(node)
1845 # the revlog's flag for this revision
1832 # the revlog's flag for this revision
1846 # (usually alter its state or content)
1833 # (usually alter its state or content)
1847 flags = self.flags(rev)
1834 flags = self.flags(rev)
1848
1835
1849 if validated and flags == REVIDX_DEFAULT_FLAGS:
1836 if validated and flags == REVIDX_DEFAULT_FLAGS:
1850 # no extra flags set, no flag processor runs, text = rawtext
1837 # no extra flags set, no flag processor runs, text = rawtext
1851 return rawtext, sidedata
1838 return rawtext, sidedata
1852
1839
1853 if raw:
1840 if raw:
1854 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1841 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1855 text = rawtext
1842 text = rawtext
1856 else:
1843 else:
1857 r = flagutil.processflagsread(self, rawtext, flags)
1844 r = flagutil.processflagsread(self, rawtext, flags)
1858 text, validatehash = r
1845 text, validatehash = r
1859 if validatehash:
1846 if validatehash:
1860 self.checkhash(text, node, rev=rev)
1847 self.checkhash(text, node, rev=rev)
1861 if not validated:
1848 if not validated:
1862 self._revisioncache = (node, rev, rawtext)
1849 self._revisioncache = (node, rev, rawtext)
1863
1850
1864 return text, sidedata
1851 return text, sidedata
1865
1852
1866 def _rawtext(self, node, rev, _df=None):
1853 def _rawtext(self, node, rev, _df=None):
1867 """return the possibly unvalidated rawtext for a revision
1854 """return the possibly unvalidated rawtext for a revision
1868
1855
1869 returns (rev, rawtext, validated)
1856 returns (rev, rawtext, validated)
1870 """
1857 """
1871
1858
1872 # revision in the cache (could be useful to apply delta)
1859 # revision in the cache (could be useful to apply delta)
1873 cachedrev = None
1860 cachedrev = None
1874 # An intermediate text to apply deltas to
1861 # An intermediate text to apply deltas to
1875 basetext = None
1862 basetext = None
1876
1863
1877 # Check if we have the entry in cache
1864 # Check if we have the entry in cache
1878 # The cache entry looks like (node, rev, rawtext)
1865 # The cache entry looks like (node, rev, rawtext)
1879 if self._revisioncache:
1866 if self._revisioncache:
1880 if self._revisioncache[0] == node:
1867 if self._revisioncache[0] == node:
1881 return (rev, self._revisioncache[2], True)
1868 return (rev, self._revisioncache[2], True)
1882 cachedrev = self._revisioncache[1]
1869 cachedrev = self._revisioncache[1]
1883
1870
1884 if rev is None:
1871 if rev is None:
1885 rev = self.rev(node)
1872 rev = self.rev(node)
1886
1873
1887 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1874 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1888 if stopped:
1875 if stopped:
1889 basetext = self._revisioncache[2]
1876 basetext = self._revisioncache[2]
1890
1877
1891 # drop cache to save memory, the caller is expected to
1878 # drop cache to save memory, the caller is expected to
1892 # update self._revisioncache after validating the text
1879 # update self._revisioncache after validating the text
1893 self._revisioncache = None
1880 self._revisioncache = None
1894
1881
1895 targetsize = None
1882 targetsize = None
1896 rawsize = self.index[rev][2]
1883 rawsize = self.index[rev][2]
1897 if 0 <= rawsize:
1884 if 0 <= rawsize:
1898 targetsize = 4 * rawsize
1885 targetsize = 4 * rawsize
1899
1886
1900 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1887 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1901 if basetext is None:
1888 if basetext is None:
1902 basetext = bytes(bins[0])
1889 basetext = bytes(bins[0])
1903 bins = bins[1:]
1890 bins = bins[1:]
1904
1891
1905 rawtext = mdiff.patches(basetext, bins)
1892 rawtext = mdiff.patches(basetext, bins)
1906 del basetext # let us have a chance to free memory early
1893 del basetext # let us have a chance to free memory early
1907 return (rev, rawtext, False)
1894 return (rev, rawtext, False)
1908
1895
1909 def _sidedata(self, rev):
1896 def _sidedata(self, rev):
1910 """Return the sidedata for a given revision number."""
1897 """Return the sidedata for a given revision number."""
1911 index_entry = self.index[rev]
1898 index_entry = self.index[rev]
1912 sidedata_offset = index_entry[8]
1899 sidedata_offset = index_entry[8]
1913 sidedata_size = index_entry[9]
1900 sidedata_size = index_entry[9]
1914
1901
1915 if self._inline:
1902 if self._inline:
1916 sidedata_offset += self.index.entry_size * (1 + rev)
1903 sidedata_offset += self.index.entry_size * (1 + rev)
1917 if sidedata_size == 0:
1904 if sidedata_size == 0:
1918 return {}
1905 return {}
1919
1906
1920 segment = self._getsegment(sidedata_offset, sidedata_size)
1907 segment = self._getsegment(sidedata_offset, sidedata_size)
1921 sidedata = sidedatautil.deserialize_sidedata(segment)
1908 sidedata = sidedatautil.deserialize_sidedata(segment)
1922 return sidedata
1909 return sidedata
1923
1910
1924 def rawdata(self, nodeorrev, _df=None):
1911 def rawdata(self, nodeorrev, _df=None):
1925 """return an uncompressed raw data of a given node or revision number.
1912 """return an uncompressed raw data of a given node or revision number.
1926
1913
1927 _df - an existing file handle to read from. (internal-only)
1914 _df - an existing file handle to read from. (internal-only)
1928 """
1915 """
1929 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1916 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1930
1917
1931 def hash(self, text, p1, p2):
1918 def hash(self, text, p1, p2):
1932 """Compute a node hash.
1919 """Compute a node hash.
1933
1920
1934 Available as a function so that subclasses can replace the hash
1921 Available as a function so that subclasses can replace the hash
1935 as needed.
1922 as needed.
1936 """
1923 """
1937 return storageutil.hashrevisionsha1(text, p1, p2)
1924 return storageutil.hashrevisionsha1(text, p1, p2)
1938
1925
1939 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1926 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1940 """Check node hash integrity.
1927 """Check node hash integrity.
1941
1928
1942 Available as a function so that subclasses can extend hash mismatch
1929 Available as a function so that subclasses can extend hash mismatch
1943 behaviors as needed.
1930 behaviors as needed.
1944 """
1931 """
1945 try:
1932 try:
1946 if p1 is None and p2 is None:
1933 if p1 is None and p2 is None:
1947 p1, p2 = self.parents(node)
1934 p1, p2 = self.parents(node)
1948 if node != self.hash(text, p1, p2):
1935 if node != self.hash(text, p1, p2):
1949 # Clear the revision cache on hash failure. The revision cache
1936 # Clear the revision cache on hash failure. The revision cache
1950 # only stores the raw revision and clearing the cache does have
1937 # only stores the raw revision and clearing the cache does have
1951 # the side-effect that we won't have a cache hit when the raw
1938 # the side-effect that we won't have a cache hit when the raw
1952 # revision data is accessed. But this case should be rare and
1939 # revision data is accessed. But this case should be rare and
1953 # it is extra work to teach the cache about the hash
1940 # it is extra work to teach the cache about the hash
1954 # verification state.
1941 # verification state.
1955 if self._revisioncache and self._revisioncache[0] == node:
1942 if self._revisioncache and self._revisioncache[0] == node:
1956 self._revisioncache = None
1943 self._revisioncache = None
1957
1944
1958 revornode = rev
1945 revornode = rev
1959 if revornode is None:
1946 if revornode is None:
1960 revornode = templatefilters.short(hex(node))
1947 revornode = templatefilters.short(hex(node))
1961 raise error.RevlogError(
1948 raise error.RevlogError(
1962 _(b"integrity check failed on %s:%s")
1949 _(b"integrity check failed on %s:%s")
1963 % (self.display_id, pycompat.bytestr(revornode))
1950 % (self.display_id, pycompat.bytestr(revornode))
1964 )
1951 )
1965 except error.RevlogError:
1952 except error.RevlogError:
1966 if self._censorable and storageutil.iscensoredtext(text):
1953 if self._censorable and storageutil.iscensoredtext(text):
1967 raise error.CensoredNodeError(self.display_id, node, text)
1954 raise error.CensoredNodeError(self.display_id, node, text)
1968 raise
1955 raise
1969
1956
1970 def _enforceinlinesize(self, tr):
1957 def _enforceinlinesize(self, tr):
1971 """Check if the revlog is too big for inline and convert if so.
1958 """Check if the revlog is too big for inline and convert if so.
1972
1959
1973 This should be called after revisions are added to the revlog. If the
1960 This should be called after revisions are added to the revlog. If the
1974 revlog has grown too large to be an inline revlog, it will convert it
1961 revlog has grown too large to be an inline revlog, it will convert it
1975 to use multiple index and data files.
1962 to use multiple index and data files.
1976 """
1963 """
1977 tiprev = len(self) - 1
1964 tiprev = len(self) - 1
1978 total_size = self.start(tiprev) + self.length(tiprev)
1965 total_size = self.start(tiprev) + self.length(tiprev)
1979 if not self._inline or total_size < _maxinline:
1966 if not self._inline or total_size < _maxinline:
1980 return
1967 return
1981
1968
1982 troffset = tr.findoffset(self._indexfile)
1969 troffset = tr.findoffset(self._indexfile)
1983 if troffset is None:
1970 if troffset is None:
1984 raise error.RevlogError(
1971 raise error.RevlogError(
1985 _(b"%s not found in the transaction") % self._indexfile
1972 _(b"%s not found in the transaction") % self._indexfile
1986 )
1973 )
1987 trindex = 0
1974 trindex = 0
1988 tr.add(self._datafile, 0)
1975 tr.add(self._datafile, 0)
1989
1976
1990 existing_handles = False
1977 existing_handles = False
1991 if self._writinghandles is not None:
1978 if self._writinghandles is not None:
1992 existing_handles = True
1979 existing_handles = True
1993 fp = self._writinghandles[0]
1980 fp = self._writinghandles[0]
1994 fp.flush()
1981 fp.flush()
1995 fp.close()
1982 fp.close()
1996 # We can't use the cached file handle after close(). So prevent
1983 # We can't use the cached file handle after close(). So prevent
1997 # its usage.
1984 # its usage.
1998 self._writinghandles = None
1985 self._writinghandles = None
1999
1986
2000 new_dfh = self._datafp(b'w+')
1987 new_dfh = self._datafp(b'w+')
2001 new_dfh.truncate(0) # drop any potentially existing data
1988 new_dfh.truncate(0) # drop any potentially existing data
2002 try:
1989 try:
2003 with self._indexfp() as read_ifh:
1990 with self._indexfp() as read_ifh:
2004 for r in self:
1991 for r in self:
2005 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1992 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2006 if troffset <= self.start(r):
1993 if troffset <= self.start(r):
2007 trindex = r
1994 trindex = r
2008 new_dfh.flush()
1995 new_dfh.flush()
2009
1996
2010 with self.__index_new_fp() as fp:
1997 with self.__index_new_fp() as fp:
2011 self._format_flags &= ~FLAG_INLINE_DATA
1998 self._format_flags &= ~FLAG_INLINE_DATA
2012 self._inline = False
1999 self._inline = False
2013 for i in self:
2000 for i in self:
2014 e = self.index.entry_binary(i)
2001 e = self.index.entry_binary(i)
2015 if i == 0:
2002 if i == 0:
2016 header = self._format_flags | self._format_version
2003 header = self._format_flags | self._format_version
2017 header = self.index.pack_header(header)
2004 header = self.index.pack_header(header)
2018 e = header + e
2005 e = header + e
2019 fp.write(e)
2006 fp.write(e)
2020 # the temp file replace the real index when we exit the context
2007 # the temp file replace the real index when we exit the context
2021 # manager
2008 # manager
2022
2009
2023 tr.replace(self._indexfile, trindex * self.index.entry_size)
2010 tr.replace(self._indexfile, trindex * self.index.entry_size)
2024 nodemaputil.setup_persistent_nodemap(tr, self)
2011 nodemaputil.setup_persistent_nodemap(tr, self)
2025 self._chunkclear()
2012 self._chunkclear()
2026
2013
2027 if existing_handles:
2014 if existing_handles:
2028 # switched from inline to conventional reopen the index
2015 # switched from inline to conventional reopen the index
2029 ifh = self.__index_write_fp()
2016 ifh = self.__index_write_fp()
2030 self._writinghandles = (ifh, new_dfh)
2017 self._writinghandles = (ifh, new_dfh)
2031 new_dfh = None
2018 new_dfh = None
2032 finally:
2019 finally:
2033 if new_dfh is not None:
2020 if new_dfh is not None:
2034 new_dfh.close()
2021 new_dfh.close()
2035
2022
2036 def _nodeduplicatecallback(self, transaction, node):
2023 def _nodeduplicatecallback(self, transaction, node):
2037 """called when trying to add a node already stored."""
2024 """called when trying to add a node already stored."""
2038
2025
2039 @contextlib.contextmanager
2026 @contextlib.contextmanager
2040 def _writing(self, transaction):
2027 def _writing(self, transaction):
2041 if self._writinghandles is not None:
2028 if self._writinghandles is not None:
2042 yield
2029 yield
2043 else:
2030 else:
2044 r = len(self)
2031 r = len(self)
2045 dsize = 0
2032 dsize = 0
2046 if r:
2033 if r:
2047 dsize = self.end(r - 1)
2034 dsize = self.end(r - 1)
2048 dfh = None
2035 dfh = None
2049 if not self._inline:
2036 if not self._inline:
2050 try:
2037 try:
2051 dfh = self._datafp(b"r+")
2038 dfh = self._datafp(b"r+")
2052 dfh.seek(0, os.SEEK_END)
2039 dfh.seek(0, os.SEEK_END)
2053 except IOError as inst:
2040 except IOError as inst:
2054 if inst.errno != errno.ENOENT:
2041 if inst.errno != errno.ENOENT:
2055 raise
2042 raise
2056 dfh = self._datafp(b"w+")
2043 dfh = self._datafp(b"w+")
2057 transaction.add(self._datafile, dsize)
2044 transaction.add(self._datafile, dsize)
2058 try:
2045 try:
2059 isize = r * self.index.entry_size
2046 isize = r * self.index.entry_size
2060 ifh = self.__index_write_fp()
2047 ifh = self.__index_write_fp()
2061 if self._inline:
2048 if self._inline:
2062 transaction.add(self._indexfile, dsize + isize)
2049 transaction.add(self._indexfile, dsize + isize)
2063 else:
2050 else:
2064 transaction.add(self._indexfile, isize)
2051 transaction.add(self._indexfile, isize)
2065 try:
2052 try:
2066 self._writinghandles = (ifh, dfh)
2053 self._writinghandles = (ifh, dfh)
2067 try:
2054 try:
2068 yield
2055 yield
2069 finally:
2056 finally:
2070 self._writinghandles = None
2057 self._writinghandles = None
2071 finally:
2058 finally:
2072 ifh.close()
2059 ifh.close()
2073 finally:
2060 finally:
2074 if dfh is not None:
2061 if dfh is not None:
2075 dfh.close()
2062 dfh.close()
2076
2063
2077 def addrevision(
2064 def addrevision(
2078 self,
2065 self,
2079 text,
2066 text,
2080 transaction,
2067 transaction,
2081 link,
2068 link,
2082 p1,
2069 p1,
2083 p2,
2070 p2,
2084 cachedelta=None,
2071 cachedelta=None,
2085 node=None,
2072 node=None,
2086 flags=REVIDX_DEFAULT_FLAGS,
2073 flags=REVIDX_DEFAULT_FLAGS,
2087 deltacomputer=None,
2074 deltacomputer=None,
2088 sidedata=None,
2075 sidedata=None,
2089 ):
2076 ):
2090 """add a revision to the log
2077 """add a revision to the log
2091
2078
2092 text - the revision data to add
2079 text - the revision data to add
2093 transaction - the transaction object used for rollback
2080 transaction - the transaction object used for rollback
2094 link - the linkrev data to add
2081 link - the linkrev data to add
2095 p1, p2 - the parent nodeids of the revision
2082 p1, p2 - the parent nodeids of the revision
2096 cachedelta - an optional precomputed delta
2083 cachedelta - an optional precomputed delta
2097 node - nodeid of revision; typically node is not specified, and it is
2084 node - nodeid of revision; typically node is not specified, and it is
2098 computed by default as hash(text, p1, p2), however subclasses might
2085 computed by default as hash(text, p1, p2), however subclasses might
2099 use different hashing method (and override checkhash() in such case)
2086 use different hashing method (and override checkhash() in such case)
2100 flags - the known flags to set on the revision
2087 flags - the known flags to set on the revision
2101 deltacomputer - an optional deltacomputer instance shared between
2088 deltacomputer - an optional deltacomputer instance shared between
2102 multiple calls
2089 multiple calls
2103 """
2090 """
2104 if link == nullrev:
2091 if link == nullrev:
2105 raise error.RevlogError(
2092 raise error.RevlogError(
2106 _(b"attempted to add linkrev -1 to %s") % self.display_id
2093 _(b"attempted to add linkrev -1 to %s") % self.display_id
2107 )
2094 )
2108
2095
2109 if sidedata is None:
2096 if sidedata is None:
2110 sidedata = {}
2097 sidedata = {}
2111 elif sidedata and not self.hassidedata:
2098 elif sidedata and not self.hassidedata:
2112 raise error.ProgrammingError(
2099 raise error.ProgrammingError(
2113 _(b"trying to add sidedata to a revlog who don't support them")
2100 _(b"trying to add sidedata to a revlog who don't support them")
2114 )
2101 )
2115
2102
2116 if flags:
2103 if flags:
2117 node = node or self.hash(text, p1, p2)
2104 node = node or self.hash(text, p1, p2)
2118
2105
2119 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2106 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2120
2107
2121 # If the flag processor modifies the revision data, ignore any provided
2108 # If the flag processor modifies the revision data, ignore any provided
2122 # cachedelta.
2109 # cachedelta.
2123 if rawtext != text:
2110 if rawtext != text:
2124 cachedelta = None
2111 cachedelta = None
2125
2112
2126 if len(rawtext) > _maxentrysize:
2113 if len(rawtext) > _maxentrysize:
2127 raise error.RevlogError(
2114 raise error.RevlogError(
2128 _(
2115 _(
2129 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2116 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2130 )
2117 )
2131 % (self.display_id, len(rawtext))
2118 % (self.display_id, len(rawtext))
2132 )
2119 )
2133
2120
2134 node = node or self.hash(rawtext, p1, p2)
2121 node = node or self.hash(rawtext, p1, p2)
2135 rev = self.index.get_rev(node)
2122 rev = self.index.get_rev(node)
2136 if rev is not None:
2123 if rev is not None:
2137 return rev
2124 return rev
2138
2125
2139 if validatehash:
2126 if validatehash:
2140 self.checkhash(rawtext, node, p1=p1, p2=p2)
2127 self.checkhash(rawtext, node, p1=p1, p2=p2)
2141
2128
2142 return self.addrawrevision(
2129 return self.addrawrevision(
2143 rawtext,
2130 rawtext,
2144 transaction,
2131 transaction,
2145 link,
2132 link,
2146 p1,
2133 p1,
2147 p2,
2134 p2,
2148 node,
2135 node,
2149 flags,
2136 flags,
2150 cachedelta=cachedelta,
2137 cachedelta=cachedelta,
2151 deltacomputer=deltacomputer,
2138 deltacomputer=deltacomputer,
2152 sidedata=sidedata,
2139 sidedata=sidedata,
2153 )
2140 )
2154
2141
2155 def addrawrevision(
2142 def addrawrevision(
2156 self,
2143 self,
2157 rawtext,
2144 rawtext,
2158 transaction,
2145 transaction,
2159 link,
2146 link,
2160 p1,
2147 p1,
2161 p2,
2148 p2,
2162 node,
2149 node,
2163 flags,
2150 flags,
2164 cachedelta=None,
2151 cachedelta=None,
2165 deltacomputer=None,
2152 deltacomputer=None,
2166 sidedata=None,
2153 sidedata=None,
2167 ):
2154 ):
2168 """add a raw revision with known flags, node and parents
2155 """add a raw revision with known flags, node and parents
2169 useful when reusing a revision not stored in this revlog (ex: received
2156 useful when reusing a revision not stored in this revlog (ex: received
2170 over wire, or read from an external bundle).
2157 over wire, or read from an external bundle).
2171 """
2158 """
2172 with self._writing(transaction):
2159 with self._writing(transaction):
2173 return self._addrevision(
2160 return self._addrevision(
2174 node,
2161 node,
2175 rawtext,
2162 rawtext,
2176 transaction,
2163 transaction,
2177 link,
2164 link,
2178 p1,
2165 p1,
2179 p2,
2166 p2,
2180 flags,
2167 flags,
2181 cachedelta,
2168 cachedelta,
2182 deltacomputer=deltacomputer,
2169 deltacomputer=deltacomputer,
2183 sidedata=sidedata,
2170 sidedata=sidedata,
2184 )
2171 )
2185
2172
2186 def compress(self, data):
2173 def compress(self, data):
2187 """Generate a possibly-compressed representation of data."""
2174 """Generate a possibly-compressed representation of data."""
2188 if not data:
2175 if not data:
2189 return b'', data
2176 return b'', data
2190
2177
2191 compressed = self._compressor.compress(data)
2178 compressed = self._compressor.compress(data)
2192
2179
2193 if compressed:
2180 if compressed:
2194 # The revlog compressor added the header in the returned data.
2181 # The revlog compressor added the header in the returned data.
2195 return b'', compressed
2182 return b'', compressed
2196
2183
2197 if data[0:1] == b'\0':
2184 if data[0:1] == b'\0':
2198 return b'', data
2185 return b'', data
2199 return b'u', data
2186 return b'u', data
2200
2187
2201 def decompress(self, data):
2188 def decompress(self, data):
2202 """Decompress a revlog chunk.
2189 """Decompress a revlog chunk.
2203
2190
2204 The chunk is expected to begin with a header identifying the
2191 The chunk is expected to begin with a header identifying the
2205 format type so it can be routed to an appropriate decompressor.
2192 format type so it can be routed to an appropriate decompressor.
2206 """
2193 """
2207 if not data:
2194 if not data:
2208 return data
2195 return data
2209
2196
2210 # Revlogs are read much more frequently than they are written and many
2197 # Revlogs are read much more frequently than they are written and many
2211 # chunks only take microseconds to decompress, so performance is
2198 # chunks only take microseconds to decompress, so performance is
2212 # important here.
2199 # important here.
2213 #
2200 #
2214 # We can make a few assumptions about revlogs:
2201 # We can make a few assumptions about revlogs:
2215 #
2202 #
2216 # 1) the majority of chunks will be compressed (as opposed to inline
2203 # 1) the majority of chunks will be compressed (as opposed to inline
2217 # raw data).
2204 # raw data).
2218 # 2) decompressing *any* data will likely by at least 10x slower than
2205 # 2) decompressing *any* data will likely by at least 10x slower than
2219 # returning raw inline data.
2206 # returning raw inline data.
2220 # 3) we want to prioritize common and officially supported compression
2207 # 3) we want to prioritize common and officially supported compression
2221 # engines
2208 # engines
2222 #
2209 #
2223 # It follows that we want to optimize for "decompress compressed data
2210 # It follows that we want to optimize for "decompress compressed data
2224 # when encoded with common and officially supported compression engines"
2211 # when encoded with common and officially supported compression engines"
2225 # case over "raw data" and "data encoded by less common or non-official
2212 # case over "raw data" and "data encoded by less common or non-official
2226 # compression engines." That is why we have the inline lookup first
2213 # compression engines." That is why we have the inline lookup first
2227 # followed by the compengines lookup.
2214 # followed by the compengines lookup.
2228 #
2215 #
2229 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2216 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2230 # compressed chunks. And this matters for changelog and manifest reads.
2217 # compressed chunks. And this matters for changelog and manifest reads.
2231 t = data[0:1]
2218 t = data[0:1]
2232
2219
2233 if t == b'x':
2220 if t == b'x':
2234 try:
2221 try:
2235 return _zlibdecompress(data)
2222 return _zlibdecompress(data)
2236 except zlib.error as e:
2223 except zlib.error as e:
2237 raise error.RevlogError(
2224 raise error.RevlogError(
2238 _(b'revlog decompress error: %s')
2225 _(b'revlog decompress error: %s')
2239 % stringutil.forcebytestr(e)
2226 % stringutil.forcebytestr(e)
2240 )
2227 )
2241 # '\0' is more common than 'u' so it goes first.
2228 # '\0' is more common than 'u' so it goes first.
2242 elif t == b'\0':
2229 elif t == b'\0':
2243 return data
2230 return data
2244 elif t == b'u':
2231 elif t == b'u':
2245 return util.buffer(data, 1)
2232 return util.buffer(data, 1)
2246
2233
2247 try:
2234 try:
2248 compressor = self._decompressors[t]
2235 compressor = self._decompressors[t]
2249 except KeyError:
2236 except KeyError:
2250 try:
2237 try:
2251 engine = util.compengines.forrevlogheader(t)
2238 engine = util.compengines.forrevlogheader(t)
2252 compressor = engine.revlogcompressor(self._compengineopts)
2239 compressor = engine.revlogcompressor(self._compengineopts)
2253 self._decompressors[t] = compressor
2240 self._decompressors[t] = compressor
2254 except KeyError:
2241 except KeyError:
2255 raise error.RevlogError(
2242 raise error.RevlogError(
2256 _(b'unknown compression type %s') % binascii.hexlify(t)
2243 _(b'unknown compression type %s') % binascii.hexlify(t)
2257 )
2244 )
2258
2245
2259 return compressor.decompress(data)
2246 return compressor.decompress(data)
2260
2247
2261 def _addrevision(
2248 def _addrevision(
2262 self,
2249 self,
2263 node,
2250 node,
2264 rawtext,
2251 rawtext,
2265 transaction,
2252 transaction,
2266 link,
2253 link,
2267 p1,
2254 p1,
2268 p2,
2255 p2,
2269 flags,
2256 flags,
2270 cachedelta,
2257 cachedelta,
2271 alwayscache=False,
2258 alwayscache=False,
2272 deltacomputer=None,
2259 deltacomputer=None,
2273 sidedata=None,
2260 sidedata=None,
2274 ):
2261 ):
2275 """internal function to add revisions to the log
2262 """internal function to add revisions to the log
2276
2263
2277 see addrevision for argument descriptions.
2264 see addrevision for argument descriptions.
2278
2265
2279 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2266 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2280
2267
2281 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2268 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2282 be used.
2269 be used.
2283
2270
2284 invariants:
2271 invariants:
2285 - rawtext is optional (can be None); if not set, cachedelta must be set.
2272 - rawtext is optional (can be None); if not set, cachedelta must be set.
2286 if both are set, they must correspond to each other.
2273 if both are set, they must correspond to each other.
2287 """
2274 """
2288 if node == self.nullid:
2275 if node == self.nullid:
2289 raise error.RevlogError(
2276 raise error.RevlogError(
2290 _(b"%s: attempt to add null revision") % self.display_id
2277 _(b"%s: attempt to add null revision") % self.display_id
2291 )
2278 )
2292 if (
2279 if (
2293 node == self.nodeconstants.wdirid
2280 node == self.nodeconstants.wdirid
2294 or node in self.nodeconstants.wdirfilenodeids
2281 or node in self.nodeconstants.wdirfilenodeids
2295 ):
2282 ):
2296 raise error.RevlogError(
2283 raise error.RevlogError(
2297 _(b"%s: attempt to add wdir revision") % self.display_id
2284 _(b"%s: attempt to add wdir revision") % self.display_id
2298 )
2285 )
2299 if self._writinghandles is None:
2286 if self._writinghandles is None:
2300 msg = b'adding revision outside `revlog._writing` context'
2287 msg = b'adding revision outside `revlog._writing` context'
2301 raise error.ProgrammingError(msg)
2288 raise error.ProgrammingError(msg)
2302
2289
2303 if self._inline:
2290 if self._inline:
2304 fh = self._writinghandles[0]
2291 fh = self._writinghandles[0]
2305 else:
2292 else:
2306 fh = self._writinghandles[1]
2293 fh = self._writinghandles[1]
2307
2294
2308 btext = [rawtext]
2295 btext = [rawtext]
2309
2296
2310 curr = len(self)
2297 curr = len(self)
2311 prev = curr - 1
2298 prev = curr - 1
2312
2299
2313 offset = self._get_data_offset(prev)
2300 offset = self._get_data_offset(prev)
2314
2301
2315 if self._concurrencychecker:
2302 if self._concurrencychecker:
2316 ifh, dfh = self._writinghandles
2303 ifh, dfh = self._writinghandles
2317 if self._inline:
2304 if self._inline:
2318 # offset is "as if" it were in the .d file, so we need to add on
2305 # offset is "as if" it were in the .d file, so we need to add on
2319 # the size of the entry metadata.
2306 # the size of the entry metadata.
2320 self._concurrencychecker(
2307 self._concurrencychecker(
2321 ifh, self._indexfile, offset + curr * self.index.entry_size
2308 ifh, self._indexfile, offset + curr * self.index.entry_size
2322 )
2309 )
2323 else:
2310 else:
2324 # Entries in the .i are a consistent size.
2311 # Entries in the .i are a consistent size.
2325 self._concurrencychecker(
2312 self._concurrencychecker(
2326 ifh, self._indexfile, curr * self.index.entry_size
2313 ifh, self._indexfile, curr * self.index.entry_size
2327 )
2314 )
2328 self._concurrencychecker(dfh, self._datafile, offset)
2315 self._concurrencychecker(dfh, self._datafile, offset)
2329
2316
2330 p1r, p2r = self.rev(p1), self.rev(p2)
2317 p1r, p2r = self.rev(p1), self.rev(p2)
2331
2318
2332 # full versions are inserted when the needed deltas
2319 # full versions are inserted when the needed deltas
2333 # become comparable to the uncompressed text
2320 # become comparable to the uncompressed text
2334 if rawtext is None:
2321 if rawtext is None:
2335 # need rawtext size, before changed by flag processors, which is
2322 # need rawtext size, before changed by flag processors, which is
2336 # the non-raw size. use revlog explicitly to avoid filelog's extra
2323 # the non-raw size. use revlog explicitly to avoid filelog's extra
2337 # logic that might remove metadata size.
2324 # logic that might remove metadata size.
2338 textlen = mdiff.patchedsize(
2325 textlen = mdiff.patchedsize(
2339 revlog.size(self, cachedelta[0]), cachedelta[1]
2326 revlog.size(self, cachedelta[0]), cachedelta[1]
2340 )
2327 )
2341 else:
2328 else:
2342 textlen = len(rawtext)
2329 textlen = len(rawtext)
2343
2330
2344 if deltacomputer is None:
2331 if deltacomputer is None:
2345 deltacomputer = deltautil.deltacomputer(self)
2332 deltacomputer = deltautil.deltacomputer(self)
2346
2333
2347 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2334 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2348
2335
2349 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2336 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2350
2337
2351 if sidedata and self.hassidedata:
2338 if sidedata and self.hassidedata:
2352 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2339 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2353 sidedata_offset = offset + deltainfo.deltalen
2340 sidedata_offset = offset + deltainfo.deltalen
2354 else:
2341 else:
2355 serialized_sidedata = b""
2342 serialized_sidedata = b""
2356 # Don't store the offset if the sidedata is empty, that way
2343 # Don't store the offset if the sidedata is empty, that way
2357 # we can easily detect empty sidedata and they will be no different
2344 # we can easily detect empty sidedata and they will be no different
2358 # than ones we manually add.
2345 # than ones we manually add.
2359 sidedata_offset = 0
2346 sidedata_offset = 0
2360
2347
2361 e = (
2348 e = (
2362 offset_type(offset, flags),
2349 offset_type(offset, flags),
2363 deltainfo.deltalen,
2350 deltainfo.deltalen,
2364 textlen,
2351 textlen,
2365 deltainfo.base,
2352 deltainfo.base,
2366 link,
2353 link,
2367 p1r,
2354 p1r,
2368 p2r,
2355 p2r,
2369 node,
2356 node,
2370 sidedata_offset,
2357 sidedata_offset,
2371 len(serialized_sidedata),
2358 len(serialized_sidedata),
2372 )
2359 )
2373
2360
2374 self.index.append(e)
2361 self.index.append(e)
2375 entry = self.index.entry_binary(curr)
2362 entry = self.index.entry_binary(curr)
2376 if curr == 0:
2363 if curr == 0:
2377 header = self._format_flags | self._format_version
2364 header = self._format_flags | self._format_version
2378 header = self.index.pack_header(header)
2365 header = self.index.pack_header(header)
2379 entry = header + entry
2366 entry = header + entry
2380 self._writeentry(
2367 self._writeentry(
2381 transaction,
2368 transaction,
2382 entry,
2369 entry,
2383 deltainfo.data,
2370 deltainfo.data,
2384 link,
2371 link,
2385 offset,
2372 offset,
2386 serialized_sidedata,
2373 serialized_sidedata,
2387 )
2374 )
2388
2375
2389 rawtext = btext[0]
2376 rawtext = btext[0]
2390
2377
2391 if alwayscache and rawtext is None:
2378 if alwayscache and rawtext is None:
2392 rawtext = deltacomputer.buildtext(revinfo, fh)
2379 rawtext = deltacomputer.buildtext(revinfo, fh)
2393
2380
2394 if type(rawtext) == bytes: # only accept immutable objects
2381 if type(rawtext) == bytes: # only accept immutable objects
2395 self._revisioncache = (node, curr, rawtext)
2382 self._revisioncache = (node, curr, rawtext)
2396 self._chainbasecache[curr] = deltainfo.chainbase
2383 self._chainbasecache[curr] = deltainfo.chainbase
2397 return curr
2384 return curr
2398
2385
2399 def _get_data_offset(self, prev):
2386 def _get_data_offset(self, prev):
2400 """Returns the current offset in the (in-transaction) data file.
2387 """Returns the current offset in the (in-transaction) data file.
2401 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2388 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2402 file to store that information: since sidedata can be rewritten to the
2389 file to store that information: since sidedata can be rewritten to the
2403 end of the data file within a transaction, you can have cases where, for
2390 end of the data file within a transaction, you can have cases where, for
2404 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2391 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2405 to `n - 1`'s sidedata being written after `n`'s data.
2392 to `n - 1`'s sidedata being written after `n`'s data.
2406
2393
2407 TODO cache this in a docket file before getting out of experimental."""
2394 TODO cache this in a docket file before getting out of experimental."""
2408 if self._format_version != REVLOGV2:
2395 if self._format_version != REVLOGV2:
2409 return self.end(prev)
2396 return self.end(prev)
2410
2397
2411 offset = 0
2398 offset = 0
2412 for rev, entry in enumerate(self.index):
2399 for rev, entry in enumerate(self.index):
2413 sidedata_end = entry[8] + entry[9]
2400 sidedata_end = entry[8] + entry[9]
2414 # Sidedata for a previous rev has potentially been written after
2401 # Sidedata for a previous rev has potentially been written after
2415 # this rev's end, so take the max.
2402 # this rev's end, so take the max.
2416 offset = max(self.end(rev), offset, sidedata_end)
2403 offset = max(self.end(rev), offset, sidedata_end)
2417 return offset
2404 return offset
2418
2405
2419 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2406 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2420 # Files opened in a+ mode have inconsistent behavior on various
2407 # Files opened in a+ mode have inconsistent behavior on various
2421 # platforms. Windows requires that a file positioning call be made
2408 # platforms. Windows requires that a file positioning call be made
2422 # when the file handle transitions between reads and writes. See
2409 # when the file handle transitions between reads and writes. See
2423 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2410 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2424 # platforms, Python or the platform itself can be buggy. Some versions
2411 # platforms, Python or the platform itself can be buggy. Some versions
2425 # of Solaris have been observed to not append at the end of the file
2412 # of Solaris have been observed to not append at the end of the file
2426 # if the file was seeked to before the end. See issue4943 for more.
2413 # if the file was seeked to before the end. See issue4943 for more.
2427 #
2414 #
2428 # We work around this issue by inserting a seek() before writing.
2415 # We work around this issue by inserting a seek() before writing.
2429 # Note: This is likely not necessary on Python 3. However, because
2416 # Note: This is likely not necessary on Python 3. However, because
2430 # the file handle is reused for reads and may be seeked there, we need
2417 # the file handle is reused for reads and may be seeked there, we need
2431 # to be careful before changing this.
2418 # to be careful before changing this.
2432 if self._writinghandles is None:
2419 if self._writinghandles is None:
2433 msg = b'adding revision outside `revlog._writing` context'
2420 msg = b'adding revision outside `revlog._writing` context'
2434 raise error.ProgrammingError(msg)
2421 raise error.ProgrammingError(msg)
2435 ifh, dfh = self._writinghandles
2422 ifh, dfh = self._writinghandles
2436 ifh.seek(0, os.SEEK_END)
2423 ifh.seek(0, os.SEEK_END)
2437 if dfh:
2424 if dfh:
2438 dfh.seek(0, os.SEEK_END)
2425 dfh.seek(0, os.SEEK_END)
2439
2426
2440 curr = len(self) - 1
2427 curr = len(self) - 1
2441 if not self._inline:
2428 if not self._inline:
2442 transaction.add(self._datafile, offset)
2429 transaction.add(self._datafile, offset)
2443 transaction.add(self._indexfile, curr * len(entry))
2430 transaction.add(self._indexfile, curr * len(entry))
2444 if data[0]:
2431 if data[0]:
2445 dfh.write(data[0])
2432 dfh.write(data[0])
2446 dfh.write(data[1])
2433 dfh.write(data[1])
2447 if sidedata:
2434 if sidedata:
2448 dfh.write(sidedata)
2435 dfh.write(sidedata)
2449 ifh.write(entry)
2436 ifh.write(entry)
2450 else:
2437 else:
2451 offset += curr * self.index.entry_size
2438 offset += curr * self.index.entry_size
2452 transaction.add(self._indexfile, offset)
2439 transaction.add(self._indexfile, offset)
2453 ifh.write(entry)
2440 ifh.write(entry)
2454 ifh.write(data[0])
2441 ifh.write(data[0])
2455 ifh.write(data[1])
2442 ifh.write(data[1])
2456 if sidedata:
2443 if sidedata:
2457 ifh.write(sidedata)
2444 ifh.write(sidedata)
2458 self._enforceinlinesize(transaction)
2445 self._enforceinlinesize(transaction)
2459 nodemaputil.setup_persistent_nodemap(transaction, self)
2446 nodemaputil.setup_persistent_nodemap(transaction, self)
2460
2447
2461 def addgroup(
2448 def addgroup(
2462 self,
2449 self,
2463 deltas,
2450 deltas,
2464 linkmapper,
2451 linkmapper,
2465 transaction,
2452 transaction,
2466 alwayscache=False,
2453 alwayscache=False,
2467 addrevisioncb=None,
2454 addrevisioncb=None,
2468 duplicaterevisioncb=None,
2455 duplicaterevisioncb=None,
2469 ):
2456 ):
2470 """
2457 """
2471 add a delta group
2458 add a delta group
2472
2459
2473 given a set of deltas, add them to the revision log. the
2460 given a set of deltas, add them to the revision log. the
2474 first delta is against its parent, which should be in our
2461 first delta is against its parent, which should be in our
2475 log, the rest are against the previous delta.
2462 log, the rest are against the previous delta.
2476
2463
2477 If ``addrevisioncb`` is defined, it will be called with arguments of
2464 If ``addrevisioncb`` is defined, it will be called with arguments of
2478 this revlog and the node that was added.
2465 this revlog and the node that was added.
2479 """
2466 """
2480
2467
2481 if self._adding_group:
2468 if self._adding_group:
2482 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2469 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2483
2470
2484 self._adding_group = True
2471 self._adding_group = True
2485 empty = True
2472 empty = True
2486 try:
2473 try:
2487 with self._writing(transaction):
2474 with self._writing(transaction):
2488 deltacomputer = deltautil.deltacomputer(self)
2475 deltacomputer = deltautil.deltacomputer(self)
2489 # loop through our set of deltas
2476 # loop through our set of deltas
2490 for data in deltas:
2477 for data in deltas:
2491 (
2478 (
2492 node,
2479 node,
2493 p1,
2480 p1,
2494 p2,
2481 p2,
2495 linknode,
2482 linknode,
2496 deltabase,
2483 deltabase,
2497 delta,
2484 delta,
2498 flags,
2485 flags,
2499 sidedata,
2486 sidedata,
2500 ) = data
2487 ) = data
2501 link = linkmapper(linknode)
2488 link = linkmapper(linknode)
2502 flags = flags or REVIDX_DEFAULT_FLAGS
2489 flags = flags or REVIDX_DEFAULT_FLAGS
2503
2490
2504 rev = self.index.get_rev(node)
2491 rev = self.index.get_rev(node)
2505 if rev is not None:
2492 if rev is not None:
2506 # this can happen if two branches make the same change
2493 # this can happen if two branches make the same change
2507 self._nodeduplicatecallback(transaction, rev)
2494 self._nodeduplicatecallback(transaction, rev)
2508 if duplicaterevisioncb:
2495 if duplicaterevisioncb:
2509 duplicaterevisioncb(self, rev)
2496 duplicaterevisioncb(self, rev)
2510 empty = False
2497 empty = False
2511 continue
2498 continue
2512
2499
2513 for p in (p1, p2):
2500 for p in (p1, p2):
2514 if not self.index.has_node(p):
2501 if not self.index.has_node(p):
2515 raise error.LookupError(
2502 raise error.LookupError(
2516 p, self.radix, _(b'unknown parent')
2503 p, self.radix, _(b'unknown parent')
2517 )
2504 )
2518
2505
2519 if not self.index.has_node(deltabase):
2506 if not self.index.has_node(deltabase):
2520 raise error.LookupError(
2507 raise error.LookupError(
2521 deltabase, self.display_id, _(b'unknown delta base')
2508 deltabase, self.display_id, _(b'unknown delta base')
2522 )
2509 )
2523
2510
2524 baserev = self.rev(deltabase)
2511 baserev = self.rev(deltabase)
2525
2512
2526 if baserev != nullrev and self.iscensored(baserev):
2513 if baserev != nullrev and self.iscensored(baserev):
2527 # if base is censored, delta must be full replacement in a
2514 # if base is censored, delta must be full replacement in a
2528 # single patch operation
2515 # single patch operation
2529 hlen = struct.calcsize(b">lll")
2516 hlen = struct.calcsize(b">lll")
2530 oldlen = self.rawsize(baserev)
2517 oldlen = self.rawsize(baserev)
2531 newlen = len(delta) - hlen
2518 newlen = len(delta) - hlen
2532 if delta[:hlen] != mdiff.replacediffheader(
2519 if delta[:hlen] != mdiff.replacediffheader(
2533 oldlen, newlen
2520 oldlen, newlen
2534 ):
2521 ):
2535 raise error.CensoredBaseError(
2522 raise error.CensoredBaseError(
2536 self.display_id, self.node(baserev)
2523 self.display_id, self.node(baserev)
2537 )
2524 )
2538
2525
2539 if not flags and self._peek_iscensored(baserev, delta):
2526 if not flags and self._peek_iscensored(baserev, delta):
2540 flags |= REVIDX_ISCENSORED
2527 flags |= REVIDX_ISCENSORED
2541
2528
2542 # We assume consumers of addrevisioncb will want to retrieve
2529 # We assume consumers of addrevisioncb will want to retrieve
2543 # the added revision, which will require a call to
2530 # the added revision, which will require a call to
2544 # revision(). revision() will fast path if there is a cache
2531 # revision(). revision() will fast path if there is a cache
2545 # hit. So, we tell _addrevision() to always cache in this case.
2532 # hit. So, we tell _addrevision() to always cache in this case.
2546 # We're only using addgroup() in the context of changegroup
2533 # We're only using addgroup() in the context of changegroup
2547 # generation so the revision data can always be handled as raw
2534 # generation so the revision data can always be handled as raw
2548 # by the flagprocessor.
2535 # by the flagprocessor.
2549 rev = self._addrevision(
2536 rev = self._addrevision(
2550 node,
2537 node,
2551 None,
2538 None,
2552 transaction,
2539 transaction,
2553 link,
2540 link,
2554 p1,
2541 p1,
2555 p2,
2542 p2,
2556 flags,
2543 flags,
2557 (baserev, delta),
2544 (baserev, delta),
2558 alwayscache=alwayscache,
2545 alwayscache=alwayscache,
2559 deltacomputer=deltacomputer,
2546 deltacomputer=deltacomputer,
2560 sidedata=sidedata,
2547 sidedata=sidedata,
2561 )
2548 )
2562
2549
2563 if addrevisioncb:
2550 if addrevisioncb:
2564 addrevisioncb(self, rev)
2551 addrevisioncb(self, rev)
2565 empty = False
2552 empty = False
2566 finally:
2553 finally:
2567 self._adding_group = False
2554 self._adding_group = False
2568 return not empty
2555 return not empty
2569
2556
2570 def iscensored(self, rev):
2557 def iscensored(self, rev):
2571 """Check if a file revision is censored."""
2558 """Check if a file revision is censored."""
2572 if not self._censorable:
2559 if not self._censorable:
2573 return False
2560 return False
2574
2561
2575 return self.flags(rev) & REVIDX_ISCENSORED
2562 return self.flags(rev) & REVIDX_ISCENSORED
2576
2563
2577 def _peek_iscensored(self, baserev, delta):
2564 def _peek_iscensored(self, baserev, delta):
2578 """Quickly check if a delta produces a censored revision."""
2565 """Quickly check if a delta produces a censored revision."""
2579 if not self._censorable:
2566 if not self._censorable:
2580 return False
2567 return False
2581
2568
2582 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2569 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2583
2570
2584 def getstrippoint(self, minlink):
2571 def getstrippoint(self, minlink):
2585 """find the minimum rev that must be stripped to strip the linkrev
2572 """find the minimum rev that must be stripped to strip the linkrev
2586
2573
2587 Returns a tuple containing the minimum rev and a set of all revs that
2574 Returns a tuple containing the minimum rev and a set of all revs that
2588 have linkrevs that will be broken by this strip.
2575 have linkrevs that will be broken by this strip.
2589 """
2576 """
2590 return storageutil.resolvestripinfo(
2577 return storageutil.resolvestripinfo(
2591 minlink,
2578 minlink,
2592 len(self) - 1,
2579 len(self) - 1,
2593 self.headrevs(),
2580 self.headrevs(),
2594 self.linkrev,
2581 self.linkrev,
2595 self.parentrevs,
2582 self.parentrevs,
2596 )
2583 )
2597
2584
2598 def strip(self, minlink, transaction):
2585 def strip(self, minlink, transaction):
2599 """truncate the revlog on the first revision with a linkrev >= minlink
2586 """truncate the revlog on the first revision with a linkrev >= minlink
2600
2587
2601 This function is called when we're stripping revision minlink and
2588 This function is called when we're stripping revision minlink and
2602 its descendants from the repository.
2589 its descendants from the repository.
2603
2590
2604 We have to remove all revisions with linkrev >= minlink, because
2591 We have to remove all revisions with linkrev >= minlink, because
2605 the equivalent changelog revisions will be renumbered after the
2592 the equivalent changelog revisions will be renumbered after the
2606 strip.
2593 strip.
2607
2594
2608 So we truncate the revlog on the first of these revisions, and
2595 So we truncate the revlog on the first of these revisions, and
2609 trust that the caller has saved the revisions that shouldn't be
2596 trust that the caller has saved the revisions that shouldn't be
2610 removed and that it'll re-add them after this truncation.
2597 removed and that it'll re-add them after this truncation.
2611 """
2598 """
2612 if len(self) == 0:
2599 if len(self) == 0:
2613 return
2600 return
2614
2601
2615 rev, _ = self.getstrippoint(minlink)
2602 rev, _ = self.getstrippoint(minlink)
2616 if rev == len(self):
2603 if rev == len(self):
2617 return
2604 return
2618
2605
2619 # first truncate the files on disk
2606 # first truncate the files on disk
2620 end = self.start(rev)
2607 end = self.start(rev)
2621 if not self._inline:
2608 if not self._inline:
2622 transaction.add(self._datafile, end)
2609 transaction.add(self._datafile, end)
2623 end = rev * self.index.entry_size
2610 end = rev * self.index.entry_size
2624 else:
2611 else:
2625 end += rev * self.index.entry_size
2612 end += rev * self.index.entry_size
2626
2613
2627 transaction.add(self._indexfile, end)
2614 transaction.add(self._indexfile, end)
2628
2615
2629 # then reset internal state in memory to forget those revisions
2616 # then reset internal state in memory to forget those revisions
2630 self._revisioncache = None
2617 self._revisioncache = None
2631 self._chaininfocache = util.lrucachedict(500)
2618 self._chaininfocache = util.lrucachedict(500)
2632 self._chunkclear()
2619 self._chunkclear()
2633
2620
2634 del self.index[rev:-1]
2621 del self.index[rev:-1]
2635
2622
2636 def checksize(self):
2623 def checksize(self):
2637 """Check size of index and data files
2624 """Check size of index and data files
2638
2625
2639 return a (dd, di) tuple.
2626 return a (dd, di) tuple.
2640 - dd: extra bytes for the "data" file
2627 - dd: extra bytes for the "data" file
2641 - di: extra bytes for the "index" file
2628 - di: extra bytes for the "index" file
2642
2629
2643 A healthy revlog will return (0, 0).
2630 A healthy revlog will return (0, 0).
2644 """
2631 """
2645 expected = 0
2632 expected = 0
2646 if len(self):
2633 if len(self):
2647 expected = max(0, self.end(len(self) - 1))
2634 expected = max(0, self.end(len(self) - 1))
2648
2635
2649 try:
2636 try:
2650 with self._datafp() as f:
2637 with self._datafp() as f:
2651 f.seek(0, io.SEEK_END)
2638 f.seek(0, io.SEEK_END)
2652 actual = f.tell()
2639 actual = f.tell()
2653 dd = actual - expected
2640 dd = actual - expected
2654 except IOError as inst:
2641 except IOError as inst:
2655 if inst.errno != errno.ENOENT:
2642 if inst.errno != errno.ENOENT:
2656 raise
2643 raise
2657 dd = 0
2644 dd = 0
2658
2645
2659 try:
2646 try:
2660 f = self.opener(self._indexfile)
2647 f = self.opener(self._indexfile)
2661 f.seek(0, io.SEEK_END)
2648 f.seek(0, io.SEEK_END)
2662 actual = f.tell()
2649 actual = f.tell()
2663 f.close()
2650 f.close()
2664 s = self.index.entry_size
2651 s = self.index.entry_size
2665 i = max(0, actual // s)
2652 i = max(0, actual // s)
2666 di = actual - (i * s)
2653 di = actual - (i * s)
2667 if self._inline:
2654 if self._inline:
2668 databytes = 0
2655 databytes = 0
2669 for r in self:
2656 for r in self:
2670 databytes += max(0, self.length(r))
2657 databytes += max(0, self.length(r))
2671 dd = 0
2658 dd = 0
2672 di = actual - len(self) * s - databytes
2659 di = actual - len(self) * s - databytes
2673 except IOError as inst:
2660 except IOError as inst:
2674 if inst.errno != errno.ENOENT:
2661 if inst.errno != errno.ENOENT:
2675 raise
2662 raise
2676 di = 0
2663 di = 0
2677
2664
2678 return (dd, di)
2665 return (dd, di)
2679
2666
2680 def files(self):
2667 def files(self):
2681 res = [self._indexfile]
2668 res = [self._indexfile]
2682 if not self._inline:
2669 if not self._inline:
2683 res.append(self._datafile)
2670 res.append(self._datafile)
2684 return res
2671 return res
2685
2672
2686 def emitrevisions(
2673 def emitrevisions(
2687 self,
2674 self,
2688 nodes,
2675 nodes,
2689 nodesorder=None,
2676 nodesorder=None,
2690 revisiondata=False,
2677 revisiondata=False,
2691 assumehaveparentrevisions=False,
2678 assumehaveparentrevisions=False,
2692 deltamode=repository.CG_DELTAMODE_STD,
2679 deltamode=repository.CG_DELTAMODE_STD,
2693 sidedata_helpers=None,
2680 sidedata_helpers=None,
2694 ):
2681 ):
2695 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2682 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2696 raise error.ProgrammingError(
2683 raise error.ProgrammingError(
2697 b'unhandled value for nodesorder: %s' % nodesorder
2684 b'unhandled value for nodesorder: %s' % nodesorder
2698 )
2685 )
2699
2686
2700 if nodesorder is None and not self._generaldelta:
2687 if nodesorder is None and not self._generaldelta:
2701 nodesorder = b'storage'
2688 nodesorder = b'storage'
2702
2689
2703 if (
2690 if (
2704 not self._storedeltachains
2691 not self._storedeltachains
2705 and deltamode != repository.CG_DELTAMODE_PREV
2692 and deltamode != repository.CG_DELTAMODE_PREV
2706 ):
2693 ):
2707 deltamode = repository.CG_DELTAMODE_FULL
2694 deltamode = repository.CG_DELTAMODE_FULL
2708
2695
2709 return storageutil.emitrevisions(
2696 return storageutil.emitrevisions(
2710 self,
2697 self,
2711 nodes,
2698 nodes,
2712 nodesorder,
2699 nodesorder,
2713 revlogrevisiondelta,
2700 revlogrevisiondelta,
2714 deltaparentfn=self.deltaparent,
2701 deltaparentfn=self.deltaparent,
2715 candeltafn=self.candelta,
2702 candeltafn=self.candelta,
2716 rawsizefn=self.rawsize,
2703 rawsizefn=self.rawsize,
2717 revdifffn=self.revdiff,
2704 revdifffn=self.revdiff,
2718 flagsfn=self.flags,
2705 flagsfn=self.flags,
2719 deltamode=deltamode,
2706 deltamode=deltamode,
2720 revisiondata=revisiondata,
2707 revisiondata=revisiondata,
2721 assumehaveparentrevisions=assumehaveparentrevisions,
2708 assumehaveparentrevisions=assumehaveparentrevisions,
2722 sidedata_helpers=sidedata_helpers,
2709 sidedata_helpers=sidedata_helpers,
2723 )
2710 )
2724
2711
2725 DELTAREUSEALWAYS = b'always'
2712 DELTAREUSEALWAYS = b'always'
2726 DELTAREUSESAMEREVS = b'samerevs'
2713 DELTAREUSESAMEREVS = b'samerevs'
2727 DELTAREUSENEVER = b'never'
2714 DELTAREUSENEVER = b'never'
2728
2715
2729 DELTAREUSEFULLADD = b'fulladd'
2716 DELTAREUSEFULLADD = b'fulladd'
2730
2717
2731 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2718 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2732
2719
2733 def clone(
2720 def clone(
2734 self,
2721 self,
2735 tr,
2722 tr,
2736 destrevlog,
2723 destrevlog,
2737 addrevisioncb=None,
2724 addrevisioncb=None,
2738 deltareuse=DELTAREUSESAMEREVS,
2725 deltareuse=DELTAREUSESAMEREVS,
2739 forcedeltabothparents=None,
2726 forcedeltabothparents=None,
2740 sidedata_helpers=None,
2727 sidedata_helpers=None,
2741 ):
2728 ):
2742 """Copy this revlog to another, possibly with format changes.
2729 """Copy this revlog to another, possibly with format changes.
2743
2730
2744 The destination revlog will contain the same revisions and nodes.
2731 The destination revlog will contain the same revisions and nodes.
2745 However, it may not be bit-for-bit identical due to e.g. delta encoding
2732 However, it may not be bit-for-bit identical due to e.g. delta encoding
2746 differences.
2733 differences.
2747
2734
2748 The ``deltareuse`` argument control how deltas from the existing revlog
2735 The ``deltareuse`` argument control how deltas from the existing revlog
2749 are preserved in the destination revlog. The argument can have the
2736 are preserved in the destination revlog. The argument can have the
2750 following values:
2737 following values:
2751
2738
2752 DELTAREUSEALWAYS
2739 DELTAREUSEALWAYS
2753 Deltas will always be reused (if possible), even if the destination
2740 Deltas will always be reused (if possible), even if the destination
2754 revlog would not select the same revisions for the delta. This is the
2741 revlog would not select the same revisions for the delta. This is the
2755 fastest mode of operation.
2742 fastest mode of operation.
2756 DELTAREUSESAMEREVS
2743 DELTAREUSESAMEREVS
2757 Deltas will be reused if the destination revlog would pick the same
2744 Deltas will be reused if the destination revlog would pick the same
2758 revisions for the delta. This mode strikes a balance between speed
2745 revisions for the delta. This mode strikes a balance between speed
2759 and optimization.
2746 and optimization.
2760 DELTAREUSENEVER
2747 DELTAREUSENEVER
2761 Deltas will never be reused. This is the slowest mode of execution.
2748 Deltas will never be reused. This is the slowest mode of execution.
2762 This mode can be used to recompute deltas (e.g. if the diff/delta
2749 This mode can be used to recompute deltas (e.g. if the diff/delta
2763 algorithm changes).
2750 algorithm changes).
2764 DELTAREUSEFULLADD
2751 DELTAREUSEFULLADD
2765 Revision will be re-added as if their were new content. This is
2752 Revision will be re-added as if their were new content. This is
2766 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2753 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2767 eg: large file detection and handling.
2754 eg: large file detection and handling.
2768
2755
2769 Delta computation can be slow, so the choice of delta reuse policy can
2756 Delta computation can be slow, so the choice of delta reuse policy can
2770 significantly affect run time.
2757 significantly affect run time.
2771
2758
2772 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2759 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2773 two extremes. Deltas will be reused if they are appropriate. But if the
2760 two extremes. Deltas will be reused if they are appropriate. But if the
2774 delta could choose a better revision, it will do so. This means if you
2761 delta could choose a better revision, it will do so. This means if you
2775 are converting a non-generaldelta revlog to a generaldelta revlog,
2762 are converting a non-generaldelta revlog to a generaldelta revlog,
2776 deltas will be recomputed if the delta's parent isn't a parent of the
2763 deltas will be recomputed if the delta's parent isn't a parent of the
2777 revision.
2764 revision.
2778
2765
2779 In addition to the delta policy, the ``forcedeltabothparents``
2766 In addition to the delta policy, the ``forcedeltabothparents``
2780 argument controls whether to force compute deltas against both parents
2767 argument controls whether to force compute deltas against both parents
2781 for merges. By default, the current default is used.
2768 for merges. By default, the current default is used.
2782
2769
2783 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2770 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2784 `sidedata_helpers`.
2771 `sidedata_helpers`.
2785 """
2772 """
2786 if deltareuse not in self.DELTAREUSEALL:
2773 if deltareuse not in self.DELTAREUSEALL:
2787 raise ValueError(
2774 raise ValueError(
2788 _(b'value for deltareuse invalid: %s') % deltareuse
2775 _(b'value for deltareuse invalid: %s') % deltareuse
2789 )
2776 )
2790
2777
2791 if len(destrevlog):
2778 if len(destrevlog):
2792 raise ValueError(_(b'destination revlog is not empty'))
2779 raise ValueError(_(b'destination revlog is not empty'))
2793
2780
2794 if getattr(self, 'filteredrevs', None):
2781 if getattr(self, 'filteredrevs', None):
2795 raise ValueError(_(b'source revlog has filtered revisions'))
2782 raise ValueError(_(b'source revlog has filtered revisions'))
2796 if getattr(destrevlog, 'filteredrevs', None):
2783 if getattr(destrevlog, 'filteredrevs', None):
2797 raise ValueError(_(b'destination revlog has filtered revisions'))
2784 raise ValueError(_(b'destination revlog has filtered revisions'))
2798
2785
2799 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2786 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2800 # if possible.
2787 # if possible.
2801 oldlazydelta = destrevlog._lazydelta
2788 oldlazydelta = destrevlog._lazydelta
2802 oldlazydeltabase = destrevlog._lazydeltabase
2789 oldlazydeltabase = destrevlog._lazydeltabase
2803 oldamd = destrevlog._deltabothparents
2790 oldamd = destrevlog._deltabothparents
2804
2791
2805 try:
2792 try:
2806 if deltareuse == self.DELTAREUSEALWAYS:
2793 if deltareuse == self.DELTAREUSEALWAYS:
2807 destrevlog._lazydeltabase = True
2794 destrevlog._lazydeltabase = True
2808 destrevlog._lazydelta = True
2795 destrevlog._lazydelta = True
2809 elif deltareuse == self.DELTAREUSESAMEREVS:
2796 elif deltareuse == self.DELTAREUSESAMEREVS:
2810 destrevlog._lazydeltabase = False
2797 destrevlog._lazydeltabase = False
2811 destrevlog._lazydelta = True
2798 destrevlog._lazydelta = True
2812 elif deltareuse == self.DELTAREUSENEVER:
2799 elif deltareuse == self.DELTAREUSENEVER:
2813 destrevlog._lazydeltabase = False
2800 destrevlog._lazydeltabase = False
2814 destrevlog._lazydelta = False
2801 destrevlog._lazydelta = False
2815
2802
2816 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2803 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2817
2804
2818 self._clone(
2805 self._clone(
2819 tr,
2806 tr,
2820 destrevlog,
2807 destrevlog,
2821 addrevisioncb,
2808 addrevisioncb,
2822 deltareuse,
2809 deltareuse,
2823 forcedeltabothparents,
2810 forcedeltabothparents,
2824 sidedata_helpers,
2811 sidedata_helpers,
2825 )
2812 )
2826
2813
2827 finally:
2814 finally:
2828 destrevlog._lazydelta = oldlazydelta
2815 destrevlog._lazydelta = oldlazydelta
2829 destrevlog._lazydeltabase = oldlazydeltabase
2816 destrevlog._lazydeltabase = oldlazydeltabase
2830 destrevlog._deltabothparents = oldamd
2817 destrevlog._deltabothparents = oldamd
2831
2818
2832 def _clone(
2819 def _clone(
2833 self,
2820 self,
2834 tr,
2821 tr,
2835 destrevlog,
2822 destrevlog,
2836 addrevisioncb,
2823 addrevisioncb,
2837 deltareuse,
2824 deltareuse,
2838 forcedeltabothparents,
2825 forcedeltabothparents,
2839 sidedata_helpers,
2826 sidedata_helpers,
2840 ):
2827 ):
2841 """perform the core duty of `revlog.clone` after parameter processing"""
2828 """perform the core duty of `revlog.clone` after parameter processing"""
2842 deltacomputer = deltautil.deltacomputer(destrevlog)
2829 deltacomputer = deltautil.deltacomputer(destrevlog)
2843 index = self.index
2830 index = self.index
2844 for rev in self:
2831 for rev in self:
2845 entry = index[rev]
2832 entry = index[rev]
2846
2833
2847 # Some classes override linkrev to take filtered revs into
2834 # Some classes override linkrev to take filtered revs into
2848 # account. Use raw entry from index.
2835 # account. Use raw entry from index.
2849 flags = entry[0] & 0xFFFF
2836 flags = entry[0] & 0xFFFF
2850 linkrev = entry[4]
2837 linkrev = entry[4]
2851 p1 = index[entry[5]][7]
2838 p1 = index[entry[5]][7]
2852 p2 = index[entry[6]][7]
2839 p2 = index[entry[6]][7]
2853 node = entry[7]
2840 node = entry[7]
2854
2841
2855 # (Possibly) reuse the delta from the revlog if allowed and
2842 # (Possibly) reuse the delta from the revlog if allowed and
2856 # the revlog chunk is a delta.
2843 # the revlog chunk is a delta.
2857 cachedelta = None
2844 cachedelta = None
2858 rawtext = None
2845 rawtext = None
2859 if deltareuse == self.DELTAREUSEFULLADD:
2846 if deltareuse == self.DELTAREUSEFULLADD:
2860 text, sidedata = self._revisiondata(rev)
2847 text, sidedata = self._revisiondata(rev)
2861
2848
2862 if sidedata_helpers is not None:
2849 if sidedata_helpers is not None:
2863 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2850 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2864 self, sidedata_helpers, sidedata, rev
2851 self, sidedata_helpers, sidedata, rev
2865 )
2852 )
2866 flags = flags | new_flags[0] & ~new_flags[1]
2853 flags = flags | new_flags[0] & ~new_flags[1]
2867
2854
2868 destrevlog.addrevision(
2855 destrevlog.addrevision(
2869 text,
2856 text,
2870 tr,
2857 tr,
2871 linkrev,
2858 linkrev,
2872 p1,
2859 p1,
2873 p2,
2860 p2,
2874 cachedelta=cachedelta,
2861 cachedelta=cachedelta,
2875 node=node,
2862 node=node,
2876 flags=flags,
2863 flags=flags,
2877 deltacomputer=deltacomputer,
2864 deltacomputer=deltacomputer,
2878 sidedata=sidedata,
2865 sidedata=sidedata,
2879 )
2866 )
2880 else:
2867 else:
2881 if destrevlog._lazydelta:
2868 if destrevlog._lazydelta:
2882 dp = self.deltaparent(rev)
2869 dp = self.deltaparent(rev)
2883 if dp != nullrev:
2870 if dp != nullrev:
2884 cachedelta = (dp, bytes(self._chunk(rev)))
2871 cachedelta = (dp, bytes(self._chunk(rev)))
2885
2872
2886 sidedata = None
2873 sidedata = None
2887 if not cachedelta:
2874 if not cachedelta:
2888 rawtext, sidedata = self._revisiondata(rev)
2875 rawtext, sidedata = self._revisiondata(rev)
2889 if sidedata is None:
2876 if sidedata is None:
2890 sidedata = self.sidedata(rev)
2877 sidedata = self.sidedata(rev)
2891
2878
2892 if sidedata_helpers is not None:
2879 if sidedata_helpers is not None:
2893 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2880 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2894 self, sidedata_helpers, sidedata, rev
2881 self, sidedata_helpers, sidedata, rev
2895 )
2882 )
2896 flags = flags | new_flags[0] & ~new_flags[1]
2883 flags = flags | new_flags[0] & ~new_flags[1]
2897
2884
2898 with destrevlog._writing(tr):
2885 with destrevlog._writing(tr):
2899 destrevlog._addrevision(
2886 destrevlog._addrevision(
2900 node,
2887 node,
2901 rawtext,
2888 rawtext,
2902 tr,
2889 tr,
2903 linkrev,
2890 linkrev,
2904 p1,
2891 p1,
2905 p2,
2892 p2,
2906 flags,
2893 flags,
2907 cachedelta,
2894 cachedelta,
2908 deltacomputer=deltacomputer,
2895 deltacomputer=deltacomputer,
2909 sidedata=sidedata,
2896 sidedata=sidedata,
2910 )
2897 )
2911
2898
2912 if addrevisioncb:
2899 if addrevisioncb:
2913 addrevisioncb(self, rev, node)
2900 addrevisioncb(self, rev, node)
2914
2901
2915 def censorrevision(self, tr, censornode, tombstone=b''):
2902 def censorrevision(self, tr, censornode, tombstone=b''):
2916 if self._format_version == REVLOGV0:
2903 if self._format_version == REVLOGV0:
2917 raise error.RevlogError(
2904 raise error.RevlogError(
2918 _(b'cannot censor with version %d revlogs')
2905 _(b'cannot censor with version %d revlogs')
2919 % self._format_version
2906 % self._format_version
2920 )
2907 )
2921
2908
2922 censorrev = self.rev(censornode)
2909 censorrev = self.rev(censornode)
2923 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2910 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2924
2911
2925 if len(tombstone) > self.rawsize(censorrev):
2912 if len(tombstone) > self.rawsize(censorrev):
2926 raise error.Abort(
2913 raise error.Abort(
2927 _(b'censor tombstone must be no longer than censored data')
2914 _(b'censor tombstone must be no longer than censored data')
2928 )
2915 )
2929
2916
2930 # Rewriting the revlog in place is hard. Our strategy for censoring is
2917 # Rewriting the revlog in place is hard. Our strategy for censoring is
2931 # to create a new revlog, copy all revisions to it, then replace the
2918 # to create a new revlog, copy all revisions to it, then replace the
2932 # revlogs on transaction close.
2919 # revlogs on transaction close.
2933 #
2920 #
2934 # This is a bit dangerous. We could easily have a mismatch of state.
2921 # This is a bit dangerous. We could easily have a mismatch of state.
2935 newrl = revlog(
2922 newrl = revlog(
2936 self.opener,
2923 self.opener,
2937 target=self.target,
2924 target=self.target,
2938 radix=self.radix,
2925 radix=self.radix,
2939 postfix=b'tmpcensored',
2926 postfix=b'tmpcensored',
2940 censorable=True,
2927 censorable=True,
2941 )
2928 )
2942 newrl._format_version = self._format_version
2929 newrl._format_version = self._format_version
2943 newrl._format_flags = self._format_flags
2930 newrl._format_flags = self._format_flags
2944 newrl._generaldelta = self._generaldelta
2931 newrl._generaldelta = self._generaldelta
2945 newrl._parse_index = self._parse_index
2932 newrl._parse_index = self._parse_index
2946
2933
2947 for rev in self.revs():
2934 for rev in self.revs():
2948 node = self.node(rev)
2935 node = self.node(rev)
2949 p1, p2 = self.parents(node)
2936 p1, p2 = self.parents(node)
2950
2937
2951 if rev == censorrev:
2938 if rev == censorrev:
2952 newrl.addrawrevision(
2939 newrl.addrawrevision(
2953 tombstone,
2940 tombstone,
2954 tr,
2941 tr,
2955 self.linkrev(censorrev),
2942 self.linkrev(censorrev),
2956 p1,
2943 p1,
2957 p2,
2944 p2,
2958 censornode,
2945 censornode,
2959 REVIDX_ISCENSORED,
2946 REVIDX_ISCENSORED,
2960 )
2947 )
2961
2948
2962 if newrl.deltaparent(rev) != nullrev:
2949 if newrl.deltaparent(rev) != nullrev:
2963 raise error.Abort(
2950 raise error.Abort(
2964 _(
2951 _(
2965 b'censored revision stored as delta; '
2952 b'censored revision stored as delta; '
2966 b'cannot censor'
2953 b'cannot censor'
2967 ),
2954 ),
2968 hint=_(
2955 hint=_(
2969 b'censoring of revlogs is not '
2956 b'censoring of revlogs is not '
2970 b'fully implemented; please report '
2957 b'fully implemented; please report '
2971 b'this bug'
2958 b'this bug'
2972 ),
2959 ),
2973 )
2960 )
2974 continue
2961 continue
2975
2962
2976 if self.iscensored(rev):
2963 if self.iscensored(rev):
2977 if self.deltaparent(rev) != nullrev:
2964 if self.deltaparent(rev) != nullrev:
2978 raise error.Abort(
2965 raise error.Abort(
2979 _(
2966 _(
2980 b'cannot censor due to censored '
2967 b'cannot censor due to censored '
2981 b'revision having delta stored'
2968 b'revision having delta stored'
2982 )
2969 )
2983 )
2970 )
2984 rawtext = self._chunk(rev)
2971 rawtext = self._chunk(rev)
2985 else:
2972 else:
2986 rawtext = self.rawdata(rev)
2973 rawtext = self.rawdata(rev)
2987
2974
2988 newrl.addrawrevision(
2975 newrl.addrawrevision(
2989 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2976 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2990 )
2977 )
2991
2978
2992 tr.addbackup(self._indexfile, location=b'store')
2979 tr.addbackup(self._indexfile, location=b'store')
2993 if not self._inline:
2980 if not self._inline:
2994 tr.addbackup(self._datafile, location=b'store')
2981 tr.addbackup(self._datafile, location=b'store')
2995
2982
2996 self.opener.rename(newrl._indexfile, self._indexfile)
2983 self.opener.rename(newrl._indexfile, self._indexfile)
2997 if not self._inline:
2984 if not self._inline:
2998 self.opener.rename(newrl._datafile, self._datafile)
2985 self.opener.rename(newrl._datafile, self._datafile)
2999
2986
3000 self.clearcaches()
2987 self.clearcaches()
3001 self._loadindex()
2988 self._loadindex()
3002
2989
3003 def verifyintegrity(self, state):
2990 def verifyintegrity(self, state):
3004 """Verifies the integrity of the revlog.
2991 """Verifies the integrity of the revlog.
3005
2992
3006 Yields ``revlogproblem`` instances describing problems that are
2993 Yields ``revlogproblem`` instances describing problems that are
3007 found.
2994 found.
3008 """
2995 """
3009 dd, di = self.checksize()
2996 dd, di = self.checksize()
3010 if dd:
2997 if dd:
3011 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2998 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3012 if di:
2999 if di:
3013 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3000 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3014
3001
3015 version = self._format_version
3002 version = self._format_version
3016
3003
3017 # The verifier tells us what version revlog we should be.
3004 # The verifier tells us what version revlog we should be.
3018 if version != state[b'expectedversion']:
3005 if version != state[b'expectedversion']:
3019 yield revlogproblem(
3006 yield revlogproblem(
3020 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3007 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3021 % (self.display_id, version, state[b'expectedversion'])
3008 % (self.display_id, version, state[b'expectedversion'])
3022 )
3009 )
3023
3010
3024 state[b'skipread'] = set()
3011 state[b'skipread'] = set()
3025 state[b'safe_renamed'] = set()
3012 state[b'safe_renamed'] = set()
3026
3013
3027 for rev in self:
3014 for rev in self:
3028 node = self.node(rev)
3015 node = self.node(rev)
3029
3016
3030 # Verify contents. 4 cases to care about:
3017 # Verify contents. 4 cases to care about:
3031 #
3018 #
3032 # common: the most common case
3019 # common: the most common case
3033 # rename: with a rename
3020 # rename: with a rename
3034 # meta: file content starts with b'\1\n', the metadata
3021 # meta: file content starts with b'\1\n', the metadata
3035 # header defined in filelog.py, but without a rename
3022 # header defined in filelog.py, but without a rename
3036 # ext: content stored externally
3023 # ext: content stored externally
3037 #
3024 #
3038 # More formally, their differences are shown below:
3025 # More formally, their differences are shown below:
3039 #
3026 #
3040 # | common | rename | meta | ext
3027 # | common | rename | meta | ext
3041 # -------------------------------------------------------
3028 # -------------------------------------------------------
3042 # flags() | 0 | 0 | 0 | not 0
3029 # flags() | 0 | 0 | 0 | not 0
3043 # renamed() | False | True | False | ?
3030 # renamed() | False | True | False | ?
3044 # rawtext[0:2]=='\1\n'| False | True | True | ?
3031 # rawtext[0:2]=='\1\n'| False | True | True | ?
3045 #
3032 #
3046 # "rawtext" means the raw text stored in revlog data, which
3033 # "rawtext" means the raw text stored in revlog data, which
3047 # could be retrieved by "rawdata(rev)". "text"
3034 # could be retrieved by "rawdata(rev)". "text"
3048 # mentioned below is "revision(rev)".
3035 # mentioned below is "revision(rev)".
3049 #
3036 #
3050 # There are 3 different lengths stored physically:
3037 # There are 3 different lengths stored physically:
3051 # 1. L1: rawsize, stored in revlog index
3038 # 1. L1: rawsize, stored in revlog index
3052 # 2. L2: len(rawtext), stored in revlog data
3039 # 2. L2: len(rawtext), stored in revlog data
3053 # 3. L3: len(text), stored in revlog data if flags==0, or
3040 # 3. L3: len(text), stored in revlog data if flags==0, or
3054 # possibly somewhere else if flags!=0
3041 # possibly somewhere else if flags!=0
3055 #
3042 #
3056 # L1 should be equal to L2. L3 could be different from them.
3043 # L1 should be equal to L2. L3 could be different from them.
3057 # "text" may or may not affect commit hash depending on flag
3044 # "text" may or may not affect commit hash depending on flag
3058 # processors (see flagutil.addflagprocessor).
3045 # processors (see flagutil.addflagprocessor).
3059 #
3046 #
3060 # | common | rename | meta | ext
3047 # | common | rename | meta | ext
3061 # -------------------------------------------------
3048 # -------------------------------------------------
3062 # rawsize() | L1 | L1 | L1 | L1
3049 # rawsize() | L1 | L1 | L1 | L1
3063 # size() | L1 | L2-LM | L1(*) | L1 (?)
3050 # size() | L1 | L2-LM | L1(*) | L1 (?)
3064 # len(rawtext) | L2 | L2 | L2 | L2
3051 # len(rawtext) | L2 | L2 | L2 | L2
3065 # len(text) | L2 | L2 | L2 | L3
3052 # len(text) | L2 | L2 | L2 | L3
3066 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3053 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3067 #
3054 #
3068 # LM: length of metadata, depending on rawtext
3055 # LM: length of metadata, depending on rawtext
3069 # (*): not ideal, see comment in filelog.size
3056 # (*): not ideal, see comment in filelog.size
3070 # (?): could be "- len(meta)" if the resolved content has
3057 # (?): could be "- len(meta)" if the resolved content has
3071 # rename metadata
3058 # rename metadata
3072 #
3059 #
3073 # Checks needed to be done:
3060 # Checks needed to be done:
3074 # 1. length check: L1 == L2, in all cases.
3061 # 1. length check: L1 == L2, in all cases.
3075 # 2. hash check: depending on flag processor, we may need to
3062 # 2. hash check: depending on flag processor, we may need to
3076 # use either "text" (external), or "rawtext" (in revlog).
3063 # use either "text" (external), or "rawtext" (in revlog).
3077
3064
3078 try:
3065 try:
3079 skipflags = state.get(b'skipflags', 0)
3066 skipflags = state.get(b'skipflags', 0)
3080 if skipflags:
3067 if skipflags:
3081 skipflags &= self.flags(rev)
3068 skipflags &= self.flags(rev)
3082
3069
3083 _verify_revision(self, skipflags, state, node)
3070 _verify_revision(self, skipflags, state, node)
3084
3071
3085 l1 = self.rawsize(rev)
3072 l1 = self.rawsize(rev)
3086 l2 = len(self.rawdata(node))
3073 l2 = len(self.rawdata(node))
3087
3074
3088 if l1 != l2:
3075 if l1 != l2:
3089 yield revlogproblem(
3076 yield revlogproblem(
3090 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3077 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3091 node=node,
3078 node=node,
3092 )
3079 )
3093
3080
3094 except error.CensoredNodeError:
3081 except error.CensoredNodeError:
3095 if state[b'erroroncensored']:
3082 if state[b'erroroncensored']:
3096 yield revlogproblem(
3083 yield revlogproblem(
3097 error=_(b'censored file data'), node=node
3084 error=_(b'censored file data'), node=node
3098 )
3085 )
3099 state[b'skipread'].add(node)
3086 state[b'skipread'].add(node)
3100 except Exception as e:
3087 except Exception as e:
3101 yield revlogproblem(
3088 yield revlogproblem(
3102 error=_(b'unpacking %s: %s')
3089 error=_(b'unpacking %s: %s')
3103 % (short(node), stringutil.forcebytestr(e)),
3090 % (short(node), stringutil.forcebytestr(e)),
3104 node=node,
3091 node=node,
3105 )
3092 )
3106 state[b'skipread'].add(node)
3093 state[b'skipread'].add(node)
3107
3094
3108 def storageinfo(
3095 def storageinfo(
3109 self,
3096 self,
3110 exclusivefiles=False,
3097 exclusivefiles=False,
3111 sharedfiles=False,
3098 sharedfiles=False,
3112 revisionscount=False,
3099 revisionscount=False,
3113 trackedsize=False,
3100 trackedsize=False,
3114 storedsize=False,
3101 storedsize=False,
3115 ):
3102 ):
3116 d = {}
3103 d = {}
3117
3104
3118 if exclusivefiles:
3105 if exclusivefiles:
3119 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3106 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3120 if not self._inline:
3107 if not self._inline:
3121 d[b'exclusivefiles'].append((self.opener, self._datafile))
3108 d[b'exclusivefiles'].append((self.opener, self._datafile))
3122
3109
3123 if sharedfiles:
3110 if sharedfiles:
3124 d[b'sharedfiles'] = []
3111 d[b'sharedfiles'] = []
3125
3112
3126 if revisionscount:
3113 if revisionscount:
3127 d[b'revisionscount'] = len(self)
3114 d[b'revisionscount'] = len(self)
3128
3115
3129 if trackedsize:
3116 if trackedsize:
3130 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3117 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3131
3118
3132 if storedsize:
3119 if storedsize:
3133 d[b'storedsize'] = sum(
3120 d[b'storedsize'] = sum(
3134 self.opener.stat(path).st_size for path in self.files()
3121 self.opener.stat(path).st_size for path in self.files()
3135 )
3122 )
3136
3123
3137 return d
3124 return d
3138
3125
3139 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3126 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3140 if not self.hassidedata:
3127 if not self.hassidedata:
3141 return
3128 return
3142 # inline are not yet supported because they suffer from an issue when
3129 # inline are not yet supported because they suffer from an issue when
3143 # rewriting them (since it's not an append-only operation).
3130 # rewriting them (since it's not an append-only operation).
3144 # See issue6485.
3131 # See issue6485.
3145 assert not self._inline
3132 assert not self._inline
3146 if not helpers[1] and not helpers[2]:
3133 if not helpers[1] and not helpers[2]:
3147 # Nothing to generate or remove
3134 # Nothing to generate or remove
3148 return
3135 return
3149
3136
3150 # changelog implement some "delayed" writing mechanism that assume that
3137 # changelog implement some "delayed" writing mechanism that assume that
3151 # all index data is writen in append mode and is therefor incompatible
3138 # all index data is writen in append mode and is therefor incompatible
3152 # with the seeked write done in this method. The use of such "delayed"
3139 # with the seeked write done in this method. The use of such "delayed"
3153 # writing will soon be removed for revlog version that support side
3140 # writing will soon be removed for revlog version that support side
3154 # data, so for now, we only keep this simple assert to highlight the
3141 # data, so for now, we only keep this simple assert to highlight the
3155 # situation.
3142 # situation.
3156 delayed = getattr(self, '_delayed', False)
3143 delayed = getattr(self, '_delayed', False)
3157 diverted = getattr(self, '_divert', False)
3144 diverted = getattr(self, '_divert', False)
3158 if delayed and not diverted:
3145 if delayed and not diverted:
3159 msg = "cannot rewrite_sidedata of a delayed revlog"
3146 msg = "cannot rewrite_sidedata of a delayed revlog"
3160 raise error.ProgrammingError(msg)
3147 raise error.ProgrammingError(msg)
3161
3148
3162 new_entries = []
3149 new_entries = []
3163 # append the new sidedata
3150 # append the new sidedata
3164 with self._writing(transaction):
3151 with self._writing(transaction):
3165 ifh, dfh = self._writinghandles
3152 ifh, dfh = self._writinghandles
3166 dfh.seek(0, os.SEEK_END)
3153 dfh.seek(0, os.SEEK_END)
3167 current_offset = dfh.tell()
3154 current_offset = dfh.tell()
3168 for rev in range(startrev, endrev + 1):
3155 for rev in range(startrev, endrev + 1):
3169 entry = self.index[rev]
3156 entry = self.index[rev]
3170 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3157 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3171 store=self,
3158 store=self,
3172 sidedata_helpers=helpers,
3159 sidedata_helpers=helpers,
3173 sidedata={},
3160 sidedata={},
3174 rev=rev,
3161 rev=rev,
3175 )
3162 )
3176
3163
3177 serialized_sidedata = sidedatautil.serialize_sidedata(
3164 serialized_sidedata = sidedatautil.serialize_sidedata(
3178 new_sidedata
3165 new_sidedata
3179 )
3166 )
3180 if entry[8] != 0 or entry[9] != 0:
3167 if entry[8] != 0 or entry[9] != 0:
3181 # rewriting entries that already have sidedata is not
3168 # rewriting entries that already have sidedata is not
3182 # supported yet, because it introduces garbage data in the
3169 # supported yet, because it introduces garbage data in the
3183 # revlog.
3170 # revlog.
3184 msg = b"rewriting existing sidedata is not supported yet"
3171 msg = b"rewriting existing sidedata is not supported yet"
3185 raise error.Abort(msg)
3172 raise error.Abort(msg)
3186
3173
3187 # Apply (potential) flags to add and to remove after running
3174 # Apply (potential) flags to add and to remove after running
3188 # the sidedata helpers
3175 # the sidedata helpers
3189 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3176 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3190 entry = (new_offset_flags,) + entry[1:8]
3177 entry = (new_offset_flags,) + entry[1:8]
3191 entry += (current_offset, len(serialized_sidedata))
3178 entry += (current_offset, len(serialized_sidedata))
3192
3179
3193 # the sidedata computation might have move the file cursors around
3180 # the sidedata computation might have move the file cursors around
3194 dfh.seek(current_offset, os.SEEK_SET)
3181 dfh.seek(current_offset, os.SEEK_SET)
3195 dfh.write(serialized_sidedata)
3182 dfh.write(serialized_sidedata)
3196 new_entries.append(entry)
3183 new_entries.append(entry)
3197 current_offset += len(serialized_sidedata)
3184 current_offset += len(serialized_sidedata)
3198
3185
3199 # rewrite the new index entries
3186 # rewrite the new index entries
3200 ifh.seek(startrev * self.index.entry_size)
3187 ifh.seek(startrev * self.index.entry_size)
3201 for i, e in enumerate(new_entries):
3188 for i, e in enumerate(new_entries):
3202 rev = startrev + i
3189 rev = startrev + i
3203 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3190 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3204 packed = self.index.entry_binary(rev)
3191 packed = self.index.entry_binary(rev)
3205 if rev == 0:
3192 if rev == 0:
3206 header = self._format_flags | self._format_version
3193 header = self._format_flags | self._format_version
3207 header = self.index.pack_header(header)
3194 header = self.index.pack_header(header)
3208 packed = header + packed
3195 packed = header + packed
3209 ifh.write(packed)
3196 ifh.write(packed)
@@ -1,123 +1,153
1 # revlogdeltas.py - constant used for revlog logic
1 # revlogdeltas.py - constant used for revlog logic
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### Internal utily constants
16 ### Internal utily constants
17
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
21 KIND_OTHER = 1004
22
22
23 ALL_KINDS = {
23 ALL_KINDS = {
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_OTHER,
27 KIND_OTHER,
28 }
28 }
29
29
30 ### main revlog header
30 ### main revlog header
31
31
32 INDEX_HEADER = struct.Struct(b">I")
32 INDEX_HEADER = struct.Struct(b">I")
33
33
34 ## revlog version
34 ## revlog version
35 REVLOGV0 = 0
35 REVLOGV0 = 0
36 REVLOGV1 = 1
36 REVLOGV1 = 1
37 # Dummy value until file format is finalized.
37 # Dummy value until file format is finalized.
38 REVLOGV2 = 0xDEAD
38 REVLOGV2 = 0xDEAD
39
39
40 ## global revlog header flags
40 ## global revlog header flags
41 # Shared across v1 and v2.
41 # Shared across v1 and v2.
42 FLAG_INLINE_DATA = 1 << 16
42 FLAG_INLINE_DATA = 1 << 16
43 # Only used by v1, implied by v2.
43 # Only used by v1, implied by v2.
44 FLAG_GENERALDELTA = 1 << 17
44 FLAG_GENERALDELTA = 1 << 17
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 REVLOGV0_FLAGS = 0
48 REVLOGV0_FLAGS = 0
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51
51
52 ### individual entry
52 ### individual entry
53
53
54 ## index v0:
54 ## index v0:
55 # 4 bytes: offset
55 # 4 bytes: offset
56 # 4 bytes: compressed length
56 # 4 bytes: compressed length
57 # 4 bytes: base rev
57 # 4 bytes: base rev
58 # 4 bytes: link rev
58 # 4 bytes: link rev
59 # 20 bytes: parent 1 nodeid
59 # 20 bytes: parent 1 nodeid
60 # 20 bytes: parent 2 nodeid
60 # 20 bytes: parent 2 nodeid
61 # 20 bytes: nodeid
61 # 20 bytes: nodeid
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63
63
64 ## index v1
64 ## index v1
65 # 6 bytes: offset
65 # 6 bytes: offset
66 # 2 bytes: flags
66 # 2 bytes: flags
67 # 4 bytes: compressed length
67 # 4 bytes: compressed length
68 # 4 bytes: uncompressed length
68 # 4 bytes: uncompressed length
69 # 4 bytes: base rev
69 # 4 bytes: base rev
70 # 4 bytes: link rev
70 # 4 bytes: link rev
71 # 4 bytes: parent 1 rev
71 # 4 bytes: parent 1 rev
72 # 4 bytes: parent 2 rev
72 # 4 bytes: parent 2 rev
73 # 32 bytes: nodeid
73 # 32 bytes: nodeid
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 assert INDEX_ENTRY_V1.size == 32 * 2
75 assert INDEX_ENTRY_V1.size == 32 * 2
76
76
77 # 6 bytes: offset
77 # 6 bytes: offset
78 # 2 bytes: flags
78 # 2 bytes: flags
79 # 4 bytes: compressed length
79 # 4 bytes: compressed length
80 # 4 bytes: uncompressed length
80 # 4 bytes: uncompressed length
81 # 4 bytes: base rev
81 # 4 bytes: base rev
82 # 4 bytes: link rev
82 # 4 bytes: link rev
83 # 4 bytes: parent 1 rev
83 # 4 bytes: parent 1 rev
84 # 4 bytes: parent 2 rev
84 # 4 bytes: parent 2 rev
85 # 32 bytes: nodeid
85 # 32 bytes: nodeid
86 # 8 bytes: sidedata offset
86 # 8 bytes: sidedata offset
87 # 4 bytes: sidedata compressed length
87 # 4 bytes: sidedata compressed length
88 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
88 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
89 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
89 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
90 assert INDEX_ENTRY_V2.size == 32 * 3
90 assert INDEX_ENTRY_V2.size == 32 * 3
91
91
92 # revlog index flags
92 # revlog index flags
93
93
94 # For historical reasons, revlog's internal flags were exposed via the
94 # For historical reasons, revlog's internal flags were exposed via the
95 # wire protocol and are even exposed in parts of the storage APIs.
95 # wire protocol and are even exposed in parts of the storage APIs.
96
96
97 # revision has censor metadata, must be verified
97 # revision has censor metadata, must be verified
98 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
98 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
99 # revision hash does not match data (narrowhg)
99 # revision hash does not match data (narrowhg)
100 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
100 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
101 # revision data is stored externally
101 # revision data is stored externally
102 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
102 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
103 # revision changes files in a way that could affect copy tracing.
103 # revision changes files in a way that could affect copy tracing.
104 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
104 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
105 REVIDX_DEFAULT_FLAGS = 0
105 REVIDX_DEFAULT_FLAGS = 0
106 # stable order in which flags need to be processed and their processors applied
106 # stable order in which flags need to be processed and their processors applied
107 REVIDX_FLAGS_ORDER = [
107 REVIDX_FLAGS_ORDER = [
108 REVIDX_ISCENSORED,
108 REVIDX_ISCENSORED,
109 REVIDX_ELLIPSIS,
109 REVIDX_ELLIPSIS,
110 REVIDX_EXTSTORED,
110 REVIDX_EXTSTORED,
111 REVIDX_HASCOPIESINFO,
111 REVIDX_HASCOPIESINFO,
112 ]
112 ]
113
113
114 # bitmark for flags that could cause rawdata content change
114 # bitmark for flags that could cause rawdata content change
115 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
115 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
116
116
117 SUPPORTED_FLAGS = {
117 SUPPORTED_FLAGS = {
118 REVLOGV0: REVLOGV0_FLAGS,
118 REVLOGV0: REVLOGV0_FLAGS,
119 REVLOGV1: REVLOGV1_FLAGS,
119 REVLOGV1: REVLOGV1_FLAGS,
120 REVLOGV2: REVLOGV2_FLAGS,
120 REVLOGV2: REVLOGV2_FLAGS,
121 }
121 }
122
122
123 _no = lambda flags: False
124 _yes = lambda flags: True
125
126
127 def _from_flag(flag):
128 return lambda flags: bool(flags & flag)
129
130
131 FEATURES_BY_VERSION = {
132 REVLOGV0: {
133 b'inline': _no,
134 b'generaldelta': _no,
135 b'sidedata': False,
136 },
137 REVLOGV1: {
138 b'inline': _from_flag(FLAG_INLINE_DATA),
139 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
140 b'sidedata': False,
141 },
142 REVLOGV2: {
143 # There is a bug in the transaction handling when going from an
144 # inline revlog to a separate index and data file. Turn it off until
145 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
146 # See issue6485
147 b'inline': _no,
148 b'generaldelta': _yes,
149 b'sidedata': True,
150 },
151 }
152
123 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
153 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now