##// END OF EJS Templates
censor: accept censored revision during upgrade...
marmoute -
r52006:2dec2365 stable
parent child Browse files
Show More
@@ -1,3533 +1,3537 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
141 return text, False
141 return text, False
142
142
143
143
144 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
145 return False
145 return False
146
146
147
147
148 ellipsisprocessor = (
148 ellipsisprocessor = (
149 ellipsisreadprocessor,
149 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
151 ellipsisrawprocessor,
152 )
152 )
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
176 @attr.s(slots=True)
177 class revlogrevisiondelta:
177 class revlogrevisiondelta:
178 node = attr.ib()
178 node = attr.ib()
179 p1node = attr.ib()
179 p1node = attr.ib()
180 p2node = attr.ib()
180 p2node = attr.ib()
181 basenode = attr.ib()
181 basenode = attr.ib()
182 flags = attr.ib()
182 flags = attr.ib()
183 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
184 revision = attr.ib()
184 revision = attr.ib()
185 delta = attr.ib()
185 delta = attr.ib()
186 sidedata = attr.ib()
186 sidedata = attr.ib()
187 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
189
189
190
190
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
192 @attr.s(frozen=True)
193 class revlogproblem:
193 class revlogproblem:
194 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
195 error = attr.ib(default=None)
196 node = attr.ib(default=None)
196 node = attr.ib(default=None)
197
197
198
198
199 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
202 return index, cache
203
203
204
204
205 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
208 return index, cache
209
209
210
210
211 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
214 return index, cache
215
215
216
216
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218
218
219 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 else:
224 else:
225 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
226
226
227
227
228 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline):
229 index, cache = parse_index_v1(data, inline)
229 index, cache = parse_index_v1(data, inline)
230 return rustrevlog.MixedIndex(index), cache
230 return rustrevlog.MixedIndex(index), cache
231
231
232
232
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
234 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
236
236
237 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
240 )
240 )
241
241
242 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
243
243
244
244
245 class revlog:
245 class revlog:
246 """
246 """
247 the underlying revision storage object
247 the underlying revision storage object
248
248
249 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
250
250
251 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
256 data.
256 data.
257
257
258 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
264
264
265 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
268 for locking while reading.
269
269
270 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
272
272
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
275 configured threshold.
275 configured threshold.
276
276
277 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
278
278
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
280 compression for the data content.
281
281
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
285 raising).
286
286
287 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
288 index entry.
289 """
289 """
290
290
291 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
292
292
293 @staticmethod
293 @staticmethod
294 def is_inline_index(header_bytes):
294 def is_inline_index(header_bytes):
295 if len(header_bytes) == 0:
295 if len(header_bytes) == 0:
296 return True
296 return True
297
297
298 header = INDEX_HEADER.unpack(header_bytes)[0]
298 header = INDEX_HEADER.unpack(header_bytes)[0]
299
299
300 _format_flags = header & ~0xFFFF
300 _format_flags = header & ~0xFFFF
301 _format_version = header & 0xFFFF
301 _format_version = header & 0xFFFF
302
302
303 features = FEATURES_BY_VERSION[_format_version]
303 features = FEATURES_BY_VERSION[_format_version]
304 return features[b'inline'](_format_flags)
304 return features[b'inline'](_format_flags)
305
305
306 def __init__(
306 def __init__(
307 self,
307 self,
308 opener,
308 opener,
309 target,
309 target,
310 radix,
310 radix,
311 postfix=None, # only exist for `tmpcensored` now
311 postfix=None, # only exist for `tmpcensored` now
312 checkambig=False,
312 checkambig=False,
313 mmaplargeindex=False,
313 mmaplargeindex=False,
314 censorable=False,
314 censorable=False,
315 upperboundcomp=None,
315 upperboundcomp=None,
316 persistentnodemap=False,
316 persistentnodemap=False,
317 concurrencychecker=None,
317 concurrencychecker=None,
318 trypending=False,
318 trypending=False,
319 try_split=False,
319 try_split=False,
320 canonical_parent_order=True,
320 canonical_parent_order=True,
321 ):
321 ):
322 """
322 """
323 create a revlog object
323 create a revlog object
324
324
325 opener is a function that abstracts the file opening operation
325 opener is a function that abstracts the file opening operation
326 and can be used to implement COW semantics or the like.
326 and can be used to implement COW semantics or the like.
327
327
328 `target`: a (KIND, ID) tuple that identify the content stored in
328 `target`: a (KIND, ID) tuple that identify the content stored in
329 this revlog. It help the rest of the code to understand what the revlog
329 this revlog. It help the rest of the code to understand what the revlog
330 is about without having to resort to heuristic and index filename
330 is about without having to resort to heuristic and index filename
331 analysis. Note: that this must be reliably be set by normal code, but
331 analysis. Note: that this must be reliably be set by normal code, but
332 that test, debug, or performance measurement code might not set this to
332 that test, debug, or performance measurement code might not set this to
333 accurate value.
333 accurate value.
334 """
334 """
335 self.upperboundcomp = upperboundcomp
335 self.upperboundcomp = upperboundcomp
336
336
337 self.radix = radix
337 self.radix = radix
338
338
339 self._docket_file = None
339 self._docket_file = None
340 self._indexfile = None
340 self._indexfile = None
341 self._datafile = None
341 self._datafile = None
342 self._sidedatafile = None
342 self._sidedatafile = None
343 self._nodemap_file = None
343 self._nodemap_file = None
344 self.postfix = postfix
344 self.postfix = postfix
345 self._trypending = trypending
345 self._trypending = trypending
346 self._try_split = try_split
346 self._try_split = try_split
347 self.opener = opener
347 self.opener = opener
348 if persistentnodemap:
348 if persistentnodemap:
349 self._nodemap_file = nodemaputil.get_nodemap_file(self)
349 self._nodemap_file = nodemaputil.get_nodemap_file(self)
350
350
351 assert target[0] in ALL_KINDS
351 assert target[0] in ALL_KINDS
352 assert len(target) == 2
352 assert len(target) == 2
353 self.target = target
353 self.target = target
354 # When True, indexfile is opened with checkambig=True at writing, to
354 # When True, indexfile is opened with checkambig=True at writing, to
355 # avoid file stat ambiguity.
355 # avoid file stat ambiguity.
356 self._checkambig = checkambig
356 self._checkambig = checkambig
357 self._mmaplargeindex = mmaplargeindex
357 self._mmaplargeindex = mmaplargeindex
358 self._censorable = censorable
358 self._censorable = censorable
359 # 3-tuple of (node, rev, text) for a raw revision.
359 # 3-tuple of (node, rev, text) for a raw revision.
360 self._revisioncache = None
360 self._revisioncache = None
361 # Maps rev to chain base rev.
361 # Maps rev to chain base rev.
362 self._chainbasecache = util.lrucachedict(100)
362 self._chainbasecache = util.lrucachedict(100)
363 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
363 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
364 self._chunkcache = (0, b'')
364 self._chunkcache = (0, b'')
365 # How much data to read and cache into the raw revlog data cache.
365 # How much data to read and cache into the raw revlog data cache.
366 self._chunkcachesize = 65536
366 self._chunkcachesize = 65536
367 self._maxchainlen = None
367 self._maxchainlen = None
368 self._deltabothparents = True
368 self._deltabothparents = True
369 self._candidate_group_chunk_size = 0
369 self._candidate_group_chunk_size = 0
370 self._debug_delta = False
370 self._debug_delta = False
371 self.index = None
371 self.index = None
372 self._docket = None
372 self._docket = None
373 self._nodemap_docket = None
373 self._nodemap_docket = None
374 # Mapping of partial identifiers to full nodes.
374 # Mapping of partial identifiers to full nodes.
375 self._pcache = {}
375 self._pcache = {}
376 # Mapping of revision integer to full node.
376 # Mapping of revision integer to full node.
377 self._compengine = b'zlib'
377 self._compengine = b'zlib'
378 self._compengineopts = {}
378 self._compengineopts = {}
379 self._maxdeltachainspan = -1
379 self._maxdeltachainspan = -1
380 self._withsparseread = False
380 self._withsparseread = False
381 self._sparserevlog = False
381 self._sparserevlog = False
382 self.hassidedata = False
382 self.hassidedata = False
383 self._srdensitythreshold = 0.50
383 self._srdensitythreshold = 0.50
384 self._srmingapsize = 262144
384 self._srmingapsize = 262144
385
385
386 # other optionnals features
386 # other optionnals features
387
387
388 # might remove rank configuration once the computation has no impact
388 # might remove rank configuration once the computation has no impact
389 self._compute_rank = False
389 self._compute_rank = False
390
390
391 # Make copy of flag processors so each revlog instance can support
391 # Make copy of flag processors so each revlog instance can support
392 # custom flags.
392 # custom flags.
393 self._flagprocessors = dict(flagutil.flagprocessors)
393 self._flagprocessors = dict(flagutil.flagprocessors)
394
394
395 # 3-tuple of file handles being used for active writing.
395 # 3-tuple of file handles being used for active writing.
396 self._writinghandles = None
396 self._writinghandles = None
397 # prevent nesting of addgroup
397 # prevent nesting of addgroup
398 self._adding_group = None
398 self._adding_group = None
399
399
400 self._loadindex()
400 self._loadindex()
401
401
402 self._concurrencychecker = concurrencychecker
402 self._concurrencychecker = concurrencychecker
403
403
404 # parent order is supposed to be semantically irrelevant, so we
404 # parent order is supposed to be semantically irrelevant, so we
405 # normally resort parents to ensure that the first parent is non-null,
405 # normally resort parents to ensure that the first parent is non-null,
406 # if there is a non-null parent at all.
406 # if there is a non-null parent at all.
407 # filelog abuses the parent order as flag to mark some instances of
407 # filelog abuses the parent order as flag to mark some instances of
408 # meta-encoded files, so allow it to disable this behavior.
408 # meta-encoded files, so allow it to disable this behavior.
409 self.canonical_parent_order = canonical_parent_order
409 self.canonical_parent_order = canonical_parent_order
410
410
411 def _init_opts(self):
411 def _init_opts(self):
412 """process options (from above/config) to setup associated default revlog mode
412 """process options (from above/config) to setup associated default revlog mode
413
413
414 These values might be affected when actually reading on disk information.
414 These values might be affected when actually reading on disk information.
415
415
416 The relevant values are returned for use in _loadindex().
416 The relevant values are returned for use in _loadindex().
417
417
418 * newversionflags:
418 * newversionflags:
419 version header to use if we need to create a new revlog
419 version header to use if we need to create a new revlog
420
420
421 * mmapindexthreshold:
421 * mmapindexthreshold:
422 minimal index size for start to use mmap
422 minimal index size for start to use mmap
423
423
424 * force_nodemap:
424 * force_nodemap:
425 force the usage of a "development" version of the nodemap code
425 force the usage of a "development" version of the nodemap code
426 """
426 """
427 mmapindexthreshold = None
427 mmapindexthreshold = None
428 opts = self.opener.options
428 opts = self.opener.options
429
429
430 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
430 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
431 new_header = CHANGELOGV2
431 new_header = CHANGELOGV2
432 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
432 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
433 elif b'revlogv2' in opts:
433 elif b'revlogv2' in opts:
434 new_header = REVLOGV2
434 new_header = REVLOGV2
435 elif b'revlogv1' in opts:
435 elif b'revlogv1' in opts:
436 new_header = REVLOGV1 | FLAG_INLINE_DATA
436 new_header = REVLOGV1 | FLAG_INLINE_DATA
437 if b'generaldelta' in opts:
437 if b'generaldelta' in opts:
438 new_header |= FLAG_GENERALDELTA
438 new_header |= FLAG_GENERALDELTA
439 elif b'revlogv0' in self.opener.options:
439 elif b'revlogv0' in self.opener.options:
440 new_header = REVLOGV0
440 new_header = REVLOGV0
441 else:
441 else:
442 new_header = REVLOG_DEFAULT_VERSION
442 new_header = REVLOG_DEFAULT_VERSION
443
443
444 if b'chunkcachesize' in opts:
444 if b'chunkcachesize' in opts:
445 self._chunkcachesize = opts[b'chunkcachesize']
445 self._chunkcachesize = opts[b'chunkcachesize']
446 if b'maxchainlen' in opts:
446 if b'maxchainlen' in opts:
447 self._maxchainlen = opts[b'maxchainlen']
447 self._maxchainlen = opts[b'maxchainlen']
448 if b'deltabothparents' in opts:
448 if b'deltabothparents' in opts:
449 self._deltabothparents = opts[b'deltabothparents']
449 self._deltabothparents = opts[b'deltabothparents']
450 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
450 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
451 if dps_cgds:
451 if dps_cgds:
452 self._candidate_group_chunk_size = dps_cgds
452 self._candidate_group_chunk_size = dps_cgds
453 self._lazydelta = bool(opts.get(b'lazydelta', True))
453 self._lazydelta = bool(opts.get(b'lazydelta', True))
454 self._lazydeltabase = False
454 self._lazydeltabase = False
455 if self._lazydelta:
455 if self._lazydelta:
456 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
456 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
457 if b'debug-delta' in opts:
457 if b'debug-delta' in opts:
458 self._debug_delta = opts[b'debug-delta']
458 self._debug_delta = opts[b'debug-delta']
459 if b'compengine' in opts:
459 if b'compengine' in opts:
460 self._compengine = opts[b'compengine']
460 self._compengine = opts[b'compengine']
461 if b'zlib.level' in opts:
461 if b'zlib.level' in opts:
462 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
462 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
463 if b'zstd.level' in opts:
463 if b'zstd.level' in opts:
464 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
464 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
465 if b'maxdeltachainspan' in opts:
465 if b'maxdeltachainspan' in opts:
466 self._maxdeltachainspan = opts[b'maxdeltachainspan']
466 self._maxdeltachainspan = opts[b'maxdeltachainspan']
467 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
467 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
468 mmapindexthreshold = opts[b'mmapindexthreshold']
468 mmapindexthreshold = opts[b'mmapindexthreshold']
469 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
469 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
470 withsparseread = bool(opts.get(b'with-sparse-read', False))
470 withsparseread = bool(opts.get(b'with-sparse-read', False))
471 # sparse-revlog forces sparse-read
471 # sparse-revlog forces sparse-read
472 self._withsparseread = self._sparserevlog or withsparseread
472 self._withsparseread = self._sparserevlog or withsparseread
473 if b'sparse-read-density-threshold' in opts:
473 if b'sparse-read-density-threshold' in opts:
474 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
474 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
475 if b'sparse-read-min-gap-size' in opts:
475 if b'sparse-read-min-gap-size' in opts:
476 self._srmingapsize = opts[b'sparse-read-min-gap-size']
476 self._srmingapsize = opts[b'sparse-read-min-gap-size']
477 if opts.get(b'enableellipsis'):
477 if opts.get(b'enableellipsis'):
478 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
478 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
479
479
480 # revlog v0 doesn't have flag processors
480 # revlog v0 doesn't have flag processors
481 for flag, processor in opts.get(b'flagprocessors', {}).items():
481 for flag, processor in opts.get(b'flagprocessors', {}).items():
482 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
482 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
483
483
484 if self._chunkcachesize <= 0:
484 if self._chunkcachesize <= 0:
485 raise error.RevlogError(
485 raise error.RevlogError(
486 _(b'revlog chunk cache size %r is not greater than 0')
486 _(b'revlog chunk cache size %r is not greater than 0')
487 % self._chunkcachesize
487 % self._chunkcachesize
488 )
488 )
489 elif self._chunkcachesize & (self._chunkcachesize - 1):
489 elif self._chunkcachesize & (self._chunkcachesize - 1):
490 raise error.RevlogError(
490 raise error.RevlogError(
491 _(b'revlog chunk cache size %r is not a power of 2')
491 _(b'revlog chunk cache size %r is not a power of 2')
492 % self._chunkcachesize
492 % self._chunkcachesize
493 )
493 )
494 force_nodemap = opts.get(b'devel-force-nodemap', False)
494 force_nodemap = opts.get(b'devel-force-nodemap', False)
495 return new_header, mmapindexthreshold, force_nodemap
495 return new_header, mmapindexthreshold, force_nodemap
496
496
497 def _get_data(self, filepath, mmap_threshold, size=None):
497 def _get_data(self, filepath, mmap_threshold, size=None):
498 """return a file content with or without mmap
498 """return a file content with or without mmap
499
499
500 If the file is missing return the empty string"""
500 If the file is missing return the empty string"""
501 try:
501 try:
502 with self.opener(filepath) as fp:
502 with self.opener(filepath) as fp:
503 if mmap_threshold is not None:
503 if mmap_threshold is not None:
504 file_size = self.opener.fstat(fp).st_size
504 file_size = self.opener.fstat(fp).st_size
505 if file_size >= mmap_threshold:
505 if file_size >= mmap_threshold:
506 if size is not None:
506 if size is not None:
507 # avoid potentiel mmap crash
507 # avoid potentiel mmap crash
508 size = min(file_size, size)
508 size = min(file_size, size)
509 # TODO: should .close() to release resources without
509 # TODO: should .close() to release resources without
510 # relying on Python GC
510 # relying on Python GC
511 if size is None:
511 if size is None:
512 return util.buffer(util.mmapread(fp))
512 return util.buffer(util.mmapread(fp))
513 else:
513 else:
514 return util.buffer(util.mmapread(fp, size))
514 return util.buffer(util.mmapread(fp, size))
515 if size is None:
515 if size is None:
516 return fp.read()
516 return fp.read()
517 else:
517 else:
518 return fp.read(size)
518 return fp.read(size)
519 except FileNotFoundError:
519 except FileNotFoundError:
520 return b''
520 return b''
521
521
522 def get_streams(self, max_linkrev, force_inline=False):
522 def get_streams(self, max_linkrev, force_inline=False):
523 n = len(self)
523 n = len(self)
524 index = self.index
524 index = self.index
525 while n > 0:
525 while n > 0:
526 linkrev = index[n - 1][4]
526 linkrev = index[n - 1][4]
527 if linkrev < max_linkrev:
527 if linkrev < max_linkrev:
528 break
528 break
529 # note: this loop will rarely go through multiple iterations, since
529 # note: this loop will rarely go through multiple iterations, since
530 # it only traverses commits created during the current streaming
530 # it only traverses commits created during the current streaming
531 # pull operation.
531 # pull operation.
532 #
532 #
533 # If this become a problem, using a binary search should cap the
533 # If this become a problem, using a binary search should cap the
534 # runtime of this.
534 # runtime of this.
535 n = n - 1
535 n = n - 1
536 if n == 0:
536 if n == 0:
537 # no data to send
537 # no data to send
538 return []
538 return []
539 index_size = n * index.entry_size
539 index_size = n * index.entry_size
540 data_size = self.end(n - 1)
540 data_size = self.end(n - 1)
541
541
542 # XXX we might have been split (or stripped) since the object
542 # XXX we might have been split (or stripped) since the object
543 # initialization, We need to close this race too, but having a way to
543 # initialization, We need to close this race too, but having a way to
544 # pre-open the file we feed to the revlog and never closing them before
544 # pre-open the file we feed to the revlog and never closing them before
545 # we are done streaming.
545 # we are done streaming.
546
546
547 if self._inline:
547 if self._inline:
548
548
549 def get_stream():
549 def get_stream():
550 with self._indexfp() as fp:
550 with self._indexfp() as fp:
551 yield None
551 yield None
552 size = index_size + data_size
552 size = index_size + data_size
553 if size <= 65536:
553 if size <= 65536:
554 yield fp.read(size)
554 yield fp.read(size)
555 else:
555 else:
556 yield from util.filechunkiter(fp, limit=size)
556 yield from util.filechunkiter(fp, limit=size)
557
557
558 inline_stream = get_stream()
558 inline_stream = get_stream()
559 next(inline_stream)
559 next(inline_stream)
560 return [
560 return [
561 (self._indexfile, inline_stream, index_size + data_size),
561 (self._indexfile, inline_stream, index_size + data_size),
562 ]
562 ]
563 elif force_inline:
563 elif force_inline:
564
564
565 def get_stream():
565 def get_stream():
566 with self._datafp() as fp_d:
566 with self._datafp() as fp_d:
567 yield None
567 yield None
568
568
569 for rev in range(n):
569 for rev in range(n):
570 idx = self.index.entry_binary(rev)
570 idx = self.index.entry_binary(rev)
571 if rev == 0 and self._docket is None:
571 if rev == 0 and self._docket is None:
572 # re-inject the inline flag
572 # re-inject the inline flag
573 header = self._format_flags
573 header = self._format_flags
574 header |= self._format_version
574 header |= self._format_version
575 header |= FLAG_INLINE_DATA
575 header |= FLAG_INLINE_DATA
576 header = self.index.pack_header(header)
576 header = self.index.pack_header(header)
577 idx = header + idx
577 idx = header + idx
578 yield idx
578 yield idx
579 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
579 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
580
580
581 inline_stream = get_stream()
581 inline_stream = get_stream()
582 next(inline_stream)
582 next(inline_stream)
583 return [
583 return [
584 (self._indexfile, inline_stream, index_size + data_size),
584 (self._indexfile, inline_stream, index_size + data_size),
585 ]
585 ]
586 else:
586 else:
587
587
588 def get_index_stream():
588 def get_index_stream():
589 with self._indexfp() as fp:
589 with self._indexfp() as fp:
590 yield None
590 yield None
591 if index_size <= 65536:
591 if index_size <= 65536:
592 yield fp.read(index_size)
592 yield fp.read(index_size)
593 else:
593 else:
594 yield from util.filechunkiter(fp, limit=index_size)
594 yield from util.filechunkiter(fp, limit=index_size)
595
595
596 def get_data_stream():
596 def get_data_stream():
597 with self._datafp() as fp:
597 with self._datafp() as fp:
598 yield None
598 yield None
599 if data_size <= 65536:
599 if data_size <= 65536:
600 yield fp.read(data_size)
600 yield fp.read(data_size)
601 else:
601 else:
602 yield from util.filechunkiter(fp, limit=data_size)
602 yield from util.filechunkiter(fp, limit=data_size)
603
603
604 index_stream = get_index_stream()
604 index_stream = get_index_stream()
605 next(index_stream)
605 next(index_stream)
606 data_stream = get_data_stream()
606 data_stream = get_data_stream()
607 next(data_stream)
607 next(data_stream)
608 return [
608 return [
609 (self._datafile, data_stream, data_size),
609 (self._datafile, data_stream, data_size),
610 (self._indexfile, index_stream, index_size),
610 (self._indexfile, index_stream, index_size),
611 ]
611 ]
612
612
613 def _loadindex(self, docket=None):
613 def _loadindex(self, docket=None):
614
614
615 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
615 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
616
616
617 if self.postfix is not None:
617 if self.postfix is not None:
618 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
618 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
619 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
619 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
620 entry_point = b'%s.i.a' % self.radix
620 entry_point = b'%s.i.a' % self.radix
621 elif self._try_split and self.opener.exists(self._split_index_file):
621 elif self._try_split and self.opener.exists(self._split_index_file):
622 entry_point = self._split_index_file
622 entry_point = self._split_index_file
623 else:
623 else:
624 entry_point = b'%s.i' % self.radix
624 entry_point = b'%s.i' % self.radix
625
625
626 if docket is not None:
626 if docket is not None:
627 self._docket = docket
627 self._docket = docket
628 self._docket_file = entry_point
628 self._docket_file = entry_point
629 else:
629 else:
630 self._initempty = True
630 self._initempty = True
631 entry_data = self._get_data(entry_point, mmapindexthreshold)
631 entry_data = self._get_data(entry_point, mmapindexthreshold)
632 if len(entry_data) > 0:
632 if len(entry_data) > 0:
633 header = INDEX_HEADER.unpack(entry_data[:4])[0]
633 header = INDEX_HEADER.unpack(entry_data[:4])[0]
634 self._initempty = False
634 self._initempty = False
635 else:
635 else:
636 header = new_header
636 header = new_header
637
637
638 self._format_flags = header & ~0xFFFF
638 self._format_flags = header & ~0xFFFF
639 self._format_version = header & 0xFFFF
639 self._format_version = header & 0xFFFF
640
640
641 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
641 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
642 if supported_flags is None:
642 if supported_flags is None:
643 msg = _(b'unknown version (%d) in revlog %s')
643 msg = _(b'unknown version (%d) in revlog %s')
644 msg %= (self._format_version, self.display_id)
644 msg %= (self._format_version, self.display_id)
645 raise error.RevlogError(msg)
645 raise error.RevlogError(msg)
646 elif self._format_flags & ~supported_flags:
646 elif self._format_flags & ~supported_flags:
647 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
647 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
648 display_flag = self._format_flags >> 16
648 display_flag = self._format_flags >> 16
649 msg %= (display_flag, self._format_version, self.display_id)
649 msg %= (display_flag, self._format_version, self.display_id)
650 raise error.RevlogError(msg)
650 raise error.RevlogError(msg)
651
651
652 features = FEATURES_BY_VERSION[self._format_version]
652 features = FEATURES_BY_VERSION[self._format_version]
653 self._inline = features[b'inline'](self._format_flags)
653 self._inline = features[b'inline'](self._format_flags)
654 self._generaldelta = features[b'generaldelta'](self._format_flags)
654 self._generaldelta = features[b'generaldelta'](self._format_flags)
655 self.hassidedata = features[b'sidedata']
655 self.hassidedata = features[b'sidedata']
656
656
657 if not features[b'docket']:
657 if not features[b'docket']:
658 self._indexfile = entry_point
658 self._indexfile = entry_point
659 index_data = entry_data
659 index_data = entry_data
660 else:
660 else:
661 self._docket_file = entry_point
661 self._docket_file = entry_point
662 if self._initempty:
662 if self._initempty:
663 self._docket = docketutil.default_docket(self, header)
663 self._docket = docketutil.default_docket(self, header)
664 else:
664 else:
665 self._docket = docketutil.parse_docket(
665 self._docket = docketutil.parse_docket(
666 self, entry_data, use_pending=self._trypending
666 self, entry_data, use_pending=self._trypending
667 )
667 )
668
668
669 if self._docket is not None:
669 if self._docket is not None:
670 self._indexfile = self._docket.index_filepath()
670 self._indexfile = self._docket.index_filepath()
671 index_data = b''
671 index_data = b''
672 index_size = self._docket.index_end
672 index_size = self._docket.index_end
673 if index_size > 0:
673 if index_size > 0:
674 index_data = self._get_data(
674 index_data = self._get_data(
675 self._indexfile, mmapindexthreshold, size=index_size
675 self._indexfile, mmapindexthreshold, size=index_size
676 )
676 )
677 if len(index_data) < index_size:
677 if len(index_data) < index_size:
678 msg = _(b'too few index data for %s: got %d, expected %d')
678 msg = _(b'too few index data for %s: got %d, expected %d')
679 msg %= (self.display_id, len(index_data), index_size)
679 msg %= (self.display_id, len(index_data), index_size)
680 raise error.RevlogError(msg)
680 raise error.RevlogError(msg)
681
681
682 self._inline = False
682 self._inline = False
683 # generaldelta implied by version 2 revlogs.
683 # generaldelta implied by version 2 revlogs.
684 self._generaldelta = True
684 self._generaldelta = True
685 # the logic for persistent nodemap will be dealt with within the
685 # the logic for persistent nodemap will be dealt with within the
686 # main docket, so disable it for now.
686 # main docket, so disable it for now.
687 self._nodemap_file = None
687 self._nodemap_file = None
688
688
689 if self._docket is not None:
689 if self._docket is not None:
690 self._datafile = self._docket.data_filepath()
690 self._datafile = self._docket.data_filepath()
691 self._sidedatafile = self._docket.sidedata_filepath()
691 self._sidedatafile = self._docket.sidedata_filepath()
692 elif self.postfix is None:
692 elif self.postfix is None:
693 self._datafile = b'%s.d' % self.radix
693 self._datafile = b'%s.d' % self.radix
694 else:
694 else:
695 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
695 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
696
696
697 self.nodeconstants = sha1nodeconstants
697 self.nodeconstants = sha1nodeconstants
698 self.nullid = self.nodeconstants.nullid
698 self.nullid = self.nodeconstants.nullid
699
699
700 # sparse-revlog can't be on without general-delta (issue6056)
700 # sparse-revlog can't be on without general-delta (issue6056)
701 if not self._generaldelta:
701 if not self._generaldelta:
702 self._sparserevlog = False
702 self._sparserevlog = False
703
703
704 self._storedeltachains = True
704 self._storedeltachains = True
705
705
706 devel_nodemap = (
706 devel_nodemap = (
707 self._nodemap_file
707 self._nodemap_file
708 and force_nodemap
708 and force_nodemap
709 and parse_index_v1_nodemap is not None
709 and parse_index_v1_nodemap is not None
710 )
710 )
711
711
712 use_rust_index = False
712 use_rust_index = False
713 if rustrevlog is not None:
713 if rustrevlog is not None:
714 if self._nodemap_file is not None:
714 if self._nodemap_file is not None:
715 use_rust_index = True
715 use_rust_index = True
716 else:
716 else:
717 use_rust_index = self.opener.options.get(b'rust.index')
717 use_rust_index = self.opener.options.get(b'rust.index')
718
718
719 self._parse_index = parse_index_v1
719 self._parse_index = parse_index_v1
720 if self._format_version == REVLOGV0:
720 if self._format_version == REVLOGV0:
721 self._parse_index = revlogv0.parse_index_v0
721 self._parse_index = revlogv0.parse_index_v0
722 elif self._format_version == REVLOGV2:
722 elif self._format_version == REVLOGV2:
723 self._parse_index = parse_index_v2
723 self._parse_index = parse_index_v2
724 elif self._format_version == CHANGELOGV2:
724 elif self._format_version == CHANGELOGV2:
725 self._parse_index = parse_index_cl_v2
725 self._parse_index = parse_index_cl_v2
726 elif devel_nodemap:
726 elif devel_nodemap:
727 self._parse_index = parse_index_v1_nodemap
727 self._parse_index = parse_index_v1_nodemap
728 elif use_rust_index:
728 elif use_rust_index:
729 self._parse_index = parse_index_v1_mixed
729 self._parse_index = parse_index_v1_mixed
730 try:
730 try:
731 d = self._parse_index(index_data, self._inline)
731 d = self._parse_index(index_data, self._inline)
732 index, chunkcache = d
732 index, chunkcache = d
733 use_nodemap = (
733 use_nodemap = (
734 not self._inline
734 not self._inline
735 and self._nodemap_file is not None
735 and self._nodemap_file is not None
736 and util.safehasattr(index, 'update_nodemap_data')
736 and util.safehasattr(index, 'update_nodemap_data')
737 )
737 )
738 if use_nodemap:
738 if use_nodemap:
739 nodemap_data = nodemaputil.persisted_data(self)
739 nodemap_data = nodemaputil.persisted_data(self)
740 if nodemap_data is not None:
740 if nodemap_data is not None:
741 docket = nodemap_data[0]
741 docket = nodemap_data[0]
742 if (
742 if (
743 len(d[0]) > docket.tip_rev
743 len(d[0]) > docket.tip_rev
744 and d[0][docket.tip_rev][7] == docket.tip_node
744 and d[0][docket.tip_rev][7] == docket.tip_node
745 ):
745 ):
746 # no changelog tampering
746 # no changelog tampering
747 self._nodemap_docket = docket
747 self._nodemap_docket = docket
748 index.update_nodemap_data(*nodemap_data)
748 index.update_nodemap_data(*nodemap_data)
749 except (ValueError, IndexError):
749 except (ValueError, IndexError):
750 raise error.RevlogError(
750 raise error.RevlogError(
751 _(b"index %s is corrupted") % self.display_id
751 _(b"index %s is corrupted") % self.display_id
752 )
752 )
753 self.index = index
753 self.index = index
754 self._segmentfile = randomaccessfile.randomaccessfile(
754 self._segmentfile = randomaccessfile.randomaccessfile(
755 self.opener,
755 self.opener,
756 (self._indexfile if self._inline else self._datafile),
756 (self._indexfile if self._inline else self._datafile),
757 self._chunkcachesize,
757 self._chunkcachesize,
758 chunkcache,
758 chunkcache,
759 )
759 )
760 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
760 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
761 self.opener,
761 self.opener,
762 self._sidedatafile,
762 self._sidedatafile,
763 self._chunkcachesize,
763 self._chunkcachesize,
764 )
764 )
765 # revnum -> (chain-length, sum-delta-length)
765 # revnum -> (chain-length, sum-delta-length)
766 self._chaininfocache = util.lrucachedict(500)
766 self._chaininfocache = util.lrucachedict(500)
767 # revlog header -> revlog compressor
767 # revlog header -> revlog compressor
768 self._decompressors = {}
768 self._decompressors = {}
769
769
770 def get_revlog(self):
770 def get_revlog(self):
771 """simple function to mirror API of other not-really-revlog API"""
771 """simple function to mirror API of other not-really-revlog API"""
772 return self
772 return self
773
773
774 @util.propertycache
774 @util.propertycache
775 def revlog_kind(self):
775 def revlog_kind(self):
776 return self.target[0]
776 return self.target[0]
777
777
778 @util.propertycache
778 @util.propertycache
779 def display_id(self):
779 def display_id(self):
780 """The public facing "ID" of the revlog that we use in message"""
780 """The public facing "ID" of the revlog that we use in message"""
781 if self.revlog_kind == KIND_FILELOG:
781 if self.revlog_kind == KIND_FILELOG:
782 # Reference the file without the "data/" prefix, so it is familiar
782 # Reference the file without the "data/" prefix, so it is familiar
783 # to the user.
783 # to the user.
784 return self.target[1]
784 return self.target[1]
785 else:
785 else:
786 return self.radix
786 return self.radix
787
787
788 def _get_decompressor(self, t):
788 def _get_decompressor(self, t):
789 try:
789 try:
790 compressor = self._decompressors[t]
790 compressor = self._decompressors[t]
791 except KeyError:
791 except KeyError:
792 try:
792 try:
793 engine = util.compengines.forrevlogheader(t)
793 engine = util.compengines.forrevlogheader(t)
794 compressor = engine.revlogcompressor(self._compengineopts)
794 compressor = engine.revlogcompressor(self._compengineopts)
795 self._decompressors[t] = compressor
795 self._decompressors[t] = compressor
796 except KeyError:
796 except KeyError:
797 raise error.RevlogError(
797 raise error.RevlogError(
798 _(b'unknown compression type %s') % binascii.hexlify(t)
798 _(b'unknown compression type %s') % binascii.hexlify(t)
799 )
799 )
800 return compressor
800 return compressor
801
801
802 @util.propertycache
802 @util.propertycache
803 def _compressor(self):
803 def _compressor(self):
804 engine = util.compengines[self._compengine]
804 engine = util.compengines[self._compengine]
805 return engine.revlogcompressor(self._compengineopts)
805 return engine.revlogcompressor(self._compengineopts)
806
806
807 @util.propertycache
807 @util.propertycache
808 def _decompressor(self):
808 def _decompressor(self):
809 """the default decompressor"""
809 """the default decompressor"""
810 if self._docket is None:
810 if self._docket is None:
811 return None
811 return None
812 t = self._docket.default_compression_header
812 t = self._docket.default_compression_header
813 c = self._get_decompressor(t)
813 c = self._get_decompressor(t)
814 return c.decompress
814 return c.decompress
815
815
816 def _indexfp(self):
816 def _indexfp(self):
817 """file object for the revlog's index file"""
817 """file object for the revlog's index file"""
818 return self.opener(self._indexfile, mode=b"r")
818 return self.opener(self._indexfile, mode=b"r")
819
819
820 def __index_write_fp(self):
820 def __index_write_fp(self):
821 # You should not use this directly and use `_writing` instead
821 # You should not use this directly and use `_writing` instead
822 try:
822 try:
823 f = self.opener(
823 f = self.opener(
824 self._indexfile, mode=b"r+", checkambig=self._checkambig
824 self._indexfile, mode=b"r+", checkambig=self._checkambig
825 )
825 )
826 if self._docket is None:
826 if self._docket is None:
827 f.seek(0, os.SEEK_END)
827 f.seek(0, os.SEEK_END)
828 else:
828 else:
829 f.seek(self._docket.index_end, os.SEEK_SET)
829 f.seek(self._docket.index_end, os.SEEK_SET)
830 return f
830 return f
831 except FileNotFoundError:
831 except FileNotFoundError:
832 return self.opener(
832 return self.opener(
833 self._indexfile, mode=b"w+", checkambig=self._checkambig
833 self._indexfile, mode=b"w+", checkambig=self._checkambig
834 )
834 )
835
835
836 def __index_new_fp(self):
836 def __index_new_fp(self):
837 # You should not use this unless you are upgrading from inline revlog
837 # You should not use this unless you are upgrading from inline revlog
838 return self.opener(
838 return self.opener(
839 self._indexfile,
839 self._indexfile,
840 mode=b"w",
840 mode=b"w",
841 checkambig=self._checkambig,
841 checkambig=self._checkambig,
842 atomictemp=True,
842 atomictemp=True,
843 )
843 )
844
844
845 def _datafp(self, mode=b'r'):
845 def _datafp(self, mode=b'r'):
846 """file object for the revlog's data file"""
846 """file object for the revlog's data file"""
847 return self.opener(self._datafile, mode=mode)
847 return self.opener(self._datafile, mode=mode)
848
848
849 @contextlib.contextmanager
849 @contextlib.contextmanager
850 def _sidedatareadfp(self):
850 def _sidedatareadfp(self):
851 """file object suitable to read sidedata"""
851 """file object suitable to read sidedata"""
852 if self._writinghandles:
852 if self._writinghandles:
853 yield self._writinghandles[2]
853 yield self._writinghandles[2]
854 else:
854 else:
855 with self.opener(self._sidedatafile) as fp:
855 with self.opener(self._sidedatafile) as fp:
856 yield fp
856 yield fp
857
857
858 def tiprev(self):
858 def tiprev(self):
859 return len(self.index) - 1
859 return len(self.index) - 1
860
860
861 def tip(self):
861 def tip(self):
862 return self.node(self.tiprev())
862 return self.node(self.tiprev())
863
863
864 def __contains__(self, rev):
864 def __contains__(self, rev):
865 return 0 <= rev < len(self)
865 return 0 <= rev < len(self)
866
866
867 def __len__(self):
867 def __len__(self):
868 return len(self.index)
868 return len(self.index)
869
869
870 def __iter__(self):
870 def __iter__(self):
871 return iter(range(len(self)))
871 return iter(range(len(self)))
872
872
873 def revs(self, start=0, stop=None):
873 def revs(self, start=0, stop=None):
874 """iterate over all rev in this revlog (from start to stop)"""
874 """iterate over all rev in this revlog (from start to stop)"""
875 return storageutil.iterrevs(len(self), start=start, stop=stop)
875 return storageutil.iterrevs(len(self), start=start, stop=stop)
876
876
877 def hasnode(self, node):
877 def hasnode(self, node):
878 try:
878 try:
879 self.rev(node)
879 self.rev(node)
880 return True
880 return True
881 except KeyError:
881 except KeyError:
882 return False
882 return False
883
883
884 def candelta(self, baserev, rev):
884 def candelta(self, baserev, rev):
885 """whether two revisions (baserev, rev) can be delta-ed or not"""
885 """whether two revisions (baserev, rev) can be delta-ed or not"""
886 # Disable delta if either rev requires a content-changing flag
886 # Disable delta if either rev requires a content-changing flag
887 # processor (ex. LFS). This is because such flag processor can alter
887 # processor (ex. LFS). This is because such flag processor can alter
888 # the rawtext content that the delta will be based on, and two clients
888 # the rawtext content that the delta will be based on, and two clients
889 # could have a same revlog node with different flags (i.e. different
889 # could have a same revlog node with different flags (i.e. different
890 # rawtext contents) and the delta could be incompatible.
890 # rawtext contents) and the delta could be incompatible.
891 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
891 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
892 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
892 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
893 ):
893 ):
894 return False
894 return False
895 return True
895 return True
896
896
897 def update_caches(self, transaction):
897 def update_caches(self, transaction):
898 if self._nodemap_file is not None:
898 if self._nodemap_file is not None:
899 if transaction is None:
899 if transaction is None:
900 nodemaputil.update_persistent_nodemap(self)
900 nodemaputil.update_persistent_nodemap(self)
901 else:
901 else:
902 nodemaputil.setup_persistent_nodemap(transaction, self)
902 nodemaputil.setup_persistent_nodemap(transaction, self)
903
903
904 def clearcaches(self):
904 def clearcaches(self):
905 self._revisioncache = None
905 self._revisioncache = None
906 self._chainbasecache.clear()
906 self._chainbasecache.clear()
907 self._segmentfile.clear_cache()
907 self._segmentfile.clear_cache()
908 self._segmentfile_sidedata.clear_cache()
908 self._segmentfile_sidedata.clear_cache()
909 self._pcache = {}
909 self._pcache = {}
910 self._nodemap_docket = None
910 self._nodemap_docket = None
911 self.index.clearcaches()
911 self.index.clearcaches()
912 # The python code is the one responsible for validating the docket, we
912 # The python code is the one responsible for validating the docket, we
913 # end up having to refresh it here.
913 # end up having to refresh it here.
914 use_nodemap = (
914 use_nodemap = (
915 not self._inline
915 not self._inline
916 and self._nodemap_file is not None
916 and self._nodemap_file is not None
917 and util.safehasattr(self.index, 'update_nodemap_data')
917 and util.safehasattr(self.index, 'update_nodemap_data')
918 )
918 )
919 if use_nodemap:
919 if use_nodemap:
920 nodemap_data = nodemaputil.persisted_data(self)
920 nodemap_data = nodemaputil.persisted_data(self)
921 if nodemap_data is not None:
921 if nodemap_data is not None:
922 self._nodemap_docket = nodemap_data[0]
922 self._nodemap_docket = nodemap_data[0]
923 self.index.update_nodemap_data(*nodemap_data)
923 self.index.update_nodemap_data(*nodemap_data)
924
924
925 def rev(self, node):
925 def rev(self, node):
926 try:
926 try:
927 return self.index.rev(node)
927 return self.index.rev(node)
928 except TypeError:
928 except TypeError:
929 raise
929 raise
930 except error.RevlogError:
930 except error.RevlogError:
931 # parsers.c radix tree lookup failed
931 # parsers.c radix tree lookup failed
932 if (
932 if (
933 node == self.nodeconstants.wdirid
933 node == self.nodeconstants.wdirid
934 or node in self.nodeconstants.wdirfilenodeids
934 or node in self.nodeconstants.wdirfilenodeids
935 ):
935 ):
936 raise error.WdirUnsupported
936 raise error.WdirUnsupported
937 raise error.LookupError(node, self.display_id, _(b'no node'))
937 raise error.LookupError(node, self.display_id, _(b'no node'))
938
938
939 # Accessors for index entries.
939 # Accessors for index entries.
940
940
941 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
941 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
942 # are flags.
942 # are flags.
943 def start(self, rev):
943 def start(self, rev):
944 return int(self.index[rev][0] >> 16)
944 return int(self.index[rev][0] >> 16)
945
945
946 def sidedata_cut_off(self, rev):
946 def sidedata_cut_off(self, rev):
947 sd_cut_off = self.index[rev][8]
947 sd_cut_off = self.index[rev][8]
948 if sd_cut_off != 0:
948 if sd_cut_off != 0:
949 return sd_cut_off
949 return sd_cut_off
950 # This is some annoying dance, because entries without sidedata
950 # This is some annoying dance, because entries without sidedata
951 # currently use 0 as their ofsset. (instead of previous-offset +
951 # currently use 0 as their ofsset. (instead of previous-offset +
952 # previous-size)
952 # previous-size)
953 #
953 #
954 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
954 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
955 # In the meantime, we need this.
955 # In the meantime, we need this.
956 while 0 <= rev:
956 while 0 <= rev:
957 e = self.index[rev]
957 e = self.index[rev]
958 if e[9] != 0:
958 if e[9] != 0:
959 return e[8] + e[9]
959 return e[8] + e[9]
960 rev -= 1
960 rev -= 1
961 return 0
961 return 0
962
962
963 def flags(self, rev):
963 def flags(self, rev):
964 return self.index[rev][0] & 0xFFFF
964 return self.index[rev][0] & 0xFFFF
965
965
966 def length(self, rev):
966 def length(self, rev):
967 return self.index[rev][1]
967 return self.index[rev][1]
968
968
969 def sidedata_length(self, rev):
969 def sidedata_length(self, rev):
970 if not self.hassidedata:
970 if not self.hassidedata:
971 return 0
971 return 0
972 return self.index[rev][9]
972 return self.index[rev][9]
973
973
974 def rawsize(self, rev):
974 def rawsize(self, rev):
975 """return the length of the uncompressed text for a given revision"""
975 """return the length of the uncompressed text for a given revision"""
976 l = self.index[rev][2]
976 l = self.index[rev][2]
977 if l >= 0:
977 if l >= 0:
978 return l
978 return l
979
979
980 t = self.rawdata(rev)
980 t = self.rawdata(rev)
981 return len(t)
981 return len(t)
982
982
983 def size(self, rev):
983 def size(self, rev):
984 """length of non-raw text (processed by a "read" flag processor)"""
984 """length of non-raw text (processed by a "read" flag processor)"""
985 # fast path: if no "read" flag processor could change the content,
985 # fast path: if no "read" flag processor could change the content,
986 # size is rawsize. note: ELLIPSIS is known to not change the content.
986 # size is rawsize. note: ELLIPSIS is known to not change the content.
987 flags = self.flags(rev)
987 flags = self.flags(rev)
988 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
988 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
989 return self.rawsize(rev)
989 return self.rawsize(rev)
990
990
991 return len(self.revision(rev))
991 return len(self.revision(rev))
992
992
993 def fast_rank(self, rev):
993 def fast_rank(self, rev):
994 """Return the rank of a revision if already known, or None otherwise.
994 """Return the rank of a revision if already known, or None otherwise.
995
995
996 The rank of a revision is the size of the sub-graph it defines as a
996 The rank of a revision is the size of the sub-graph it defines as a
997 head. Equivalently, the rank of a revision `r` is the size of the set
997 head. Equivalently, the rank of a revision `r` is the size of the set
998 `ancestors(r)`, `r` included.
998 `ancestors(r)`, `r` included.
999
999
1000 This method returns the rank retrieved from the revlog in constant
1000 This method returns the rank retrieved from the revlog in constant
1001 time. It makes no attempt at computing unknown values for versions of
1001 time. It makes no attempt at computing unknown values for versions of
1002 the revlog which do not persist the rank.
1002 the revlog which do not persist the rank.
1003 """
1003 """
1004 rank = self.index[rev][ENTRY_RANK]
1004 rank = self.index[rev][ENTRY_RANK]
1005 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1005 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1006 return None
1006 return None
1007 if rev == nullrev:
1007 if rev == nullrev:
1008 return 0 # convention
1008 return 0 # convention
1009 return rank
1009 return rank
1010
1010
1011 def chainbase(self, rev):
1011 def chainbase(self, rev):
1012 base = self._chainbasecache.get(rev)
1012 base = self._chainbasecache.get(rev)
1013 if base is not None:
1013 if base is not None:
1014 return base
1014 return base
1015
1015
1016 index = self.index
1016 index = self.index
1017 iterrev = rev
1017 iterrev = rev
1018 base = index[iterrev][3]
1018 base = index[iterrev][3]
1019 while base != iterrev:
1019 while base != iterrev:
1020 iterrev = base
1020 iterrev = base
1021 base = index[iterrev][3]
1021 base = index[iterrev][3]
1022
1022
1023 self._chainbasecache[rev] = base
1023 self._chainbasecache[rev] = base
1024 return base
1024 return base
1025
1025
1026 def linkrev(self, rev):
1026 def linkrev(self, rev):
1027 return self.index[rev][4]
1027 return self.index[rev][4]
1028
1028
1029 def parentrevs(self, rev):
1029 def parentrevs(self, rev):
1030 try:
1030 try:
1031 entry = self.index[rev]
1031 entry = self.index[rev]
1032 except IndexError:
1032 except IndexError:
1033 if rev == wdirrev:
1033 if rev == wdirrev:
1034 raise error.WdirUnsupported
1034 raise error.WdirUnsupported
1035 raise
1035 raise
1036
1036
1037 if self.canonical_parent_order and entry[5] == nullrev:
1037 if self.canonical_parent_order and entry[5] == nullrev:
1038 return entry[6], entry[5]
1038 return entry[6], entry[5]
1039 else:
1039 else:
1040 return entry[5], entry[6]
1040 return entry[5], entry[6]
1041
1041
1042 # fast parentrevs(rev) where rev isn't filtered
1042 # fast parentrevs(rev) where rev isn't filtered
1043 _uncheckedparentrevs = parentrevs
1043 _uncheckedparentrevs = parentrevs
1044
1044
1045 def node(self, rev):
1045 def node(self, rev):
1046 try:
1046 try:
1047 return self.index[rev][7]
1047 return self.index[rev][7]
1048 except IndexError:
1048 except IndexError:
1049 if rev == wdirrev:
1049 if rev == wdirrev:
1050 raise error.WdirUnsupported
1050 raise error.WdirUnsupported
1051 raise
1051 raise
1052
1052
1053 # Derived from index values.
1053 # Derived from index values.
1054
1054
1055 def end(self, rev):
1055 def end(self, rev):
1056 return self.start(rev) + self.length(rev)
1056 return self.start(rev) + self.length(rev)
1057
1057
1058 def parents(self, node):
1058 def parents(self, node):
1059 i = self.index
1059 i = self.index
1060 d = i[self.rev(node)]
1060 d = i[self.rev(node)]
1061 # inline node() to avoid function call overhead
1061 # inline node() to avoid function call overhead
1062 if self.canonical_parent_order and d[5] == self.nullid:
1062 if self.canonical_parent_order and d[5] == self.nullid:
1063 return i[d[6]][7], i[d[5]][7]
1063 return i[d[6]][7], i[d[5]][7]
1064 else:
1064 else:
1065 return i[d[5]][7], i[d[6]][7]
1065 return i[d[5]][7], i[d[6]][7]
1066
1066
1067 def chainlen(self, rev):
1067 def chainlen(self, rev):
1068 return self._chaininfo(rev)[0]
1068 return self._chaininfo(rev)[0]
1069
1069
1070 def _chaininfo(self, rev):
1070 def _chaininfo(self, rev):
1071 chaininfocache = self._chaininfocache
1071 chaininfocache = self._chaininfocache
1072 if rev in chaininfocache:
1072 if rev in chaininfocache:
1073 return chaininfocache[rev]
1073 return chaininfocache[rev]
1074 index = self.index
1074 index = self.index
1075 generaldelta = self._generaldelta
1075 generaldelta = self._generaldelta
1076 iterrev = rev
1076 iterrev = rev
1077 e = index[iterrev]
1077 e = index[iterrev]
1078 clen = 0
1078 clen = 0
1079 compresseddeltalen = 0
1079 compresseddeltalen = 0
1080 while iterrev != e[3]:
1080 while iterrev != e[3]:
1081 clen += 1
1081 clen += 1
1082 compresseddeltalen += e[1]
1082 compresseddeltalen += e[1]
1083 if generaldelta:
1083 if generaldelta:
1084 iterrev = e[3]
1084 iterrev = e[3]
1085 else:
1085 else:
1086 iterrev -= 1
1086 iterrev -= 1
1087 if iterrev in chaininfocache:
1087 if iterrev in chaininfocache:
1088 t = chaininfocache[iterrev]
1088 t = chaininfocache[iterrev]
1089 clen += t[0]
1089 clen += t[0]
1090 compresseddeltalen += t[1]
1090 compresseddeltalen += t[1]
1091 break
1091 break
1092 e = index[iterrev]
1092 e = index[iterrev]
1093 else:
1093 else:
1094 # Add text length of base since decompressing that also takes
1094 # Add text length of base since decompressing that also takes
1095 # work. For cache hits the length is already included.
1095 # work. For cache hits the length is already included.
1096 compresseddeltalen += e[1]
1096 compresseddeltalen += e[1]
1097 r = (clen, compresseddeltalen)
1097 r = (clen, compresseddeltalen)
1098 chaininfocache[rev] = r
1098 chaininfocache[rev] = r
1099 return r
1099 return r
1100
1100
1101 def _deltachain(self, rev, stoprev=None):
1101 def _deltachain(self, rev, stoprev=None):
1102 """Obtain the delta chain for a revision.
1102 """Obtain the delta chain for a revision.
1103
1103
1104 ``stoprev`` specifies a revision to stop at. If not specified, we
1104 ``stoprev`` specifies a revision to stop at. If not specified, we
1105 stop at the base of the chain.
1105 stop at the base of the chain.
1106
1106
1107 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1107 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1108 revs in ascending order and ``stopped`` is a bool indicating whether
1108 revs in ascending order and ``stopped`` is a bool indicating whether
1109 ``stoprev`` was hit.
1109 ``stoprev`` was hit.
1110 """
1110 """
1111 # Try C implementation.
1111 # Try C implementation.
1112 try:
1112 try:
1113 return self.index.deltachain(rev, stoprev, self._generaldelta)
1113 return self.index.deltachain(rev, stoprev, self._generaldelta)
1114 except AttributeError:
1114 except AttributeError:
1115 pass
1115 pass
1116
1116
1117 chain = []
1117 chain = []
1118
1118
1119 # Alias to prevent attribute lookup in tight loop.
1119 # Alias to prevent attribute lookup in tight loop.
1120 index = self.index
1120 index = self.index
1121 generaldelta = self._generaldelta
1121 generaldelta = self._generaldelta
1122
1122
1123 iterrev = rev
1123 iterrev = rev
1124 e = index[iterrev]
1124 e = index[iterrev]
1125 while iterrev != e[3] and iterrev != stoprev:
1125 while iterrev != e[3] and iterrev != stoprev:
1126 chain.append(iterrev)
1126 chain.append(iterrev)
1127 if generaldelta:
1127 if generaldelta:
1128 iterrev = e[3]
1128 iterrev = e[3]
1129 else:
1129 else:
1130 iterrev -= 1
1130 iterrev -= 1
1131 e = index[iterrev]
1131 e = index[iterrev]
1132
1132
1133 if iterrev == stoprev:
1133 if iterrev == stoprev:
1134 stopped = True
1134 stopped = True
1135 else:
1135 else:
1136 chain.append(iterrev)
1136 chain.append(iterrev)
1137 stopped = False
1137 stopped = False
1138
1138
1139 chain.reverse()
1139 chain.reverse()
1140 return chain, stopped
1140 return chain, stopped
1141
1141
1142 def ancestors(self, revs, stoprev=0, inclusive=False):
1142 def ancestors(self, revs, stoprev=0, inclusive=False):
1143 """Generate the ancestors of 'revs' in reverse revision order.
1143 """Generate the ancestors of 'revs' in reverse revision order.
1144 Does not generate revs lower than stoprev.
1144 Does not generate revs lower than stoprev.
1145
1145
1146 See the documentation for ancestor.lazyancestors for more details."""
1146 See the documentation for ancestor.lazyancestors for more details."""
1147
1147
1148 # first, make sure start revisions aren't filtered
1148 # first, make sure start revisions aren't filtered
1149 revs = list(revs)
1149 revs = list(revs)
1150 checkrev = self.node
1150 checkrev = self.node
1151 for r in revs:
1151 for r in revs:
1152 checkrev(r)
1152 checkrev(r)
1153 # and we're sure ancestors aren't filtered as well
1153 # and we're sure ancestors aren't filtered as well
1154
1154
1155 if rustancestor is not None and self.index.rust_ext_compat:
1155 if rustancestor is not None and self.index.rust_ext_compat:
1156 lazyancestors = rustancestor.LazyAncestors
1156 lazyancestors = rustancestor.LazyAncestors
1157 arg = self.index
1157 arg = self.index
1158 else:
1158 else:
1159 lazyancestors = ancestor.lazyancestors
1159 lazyancestors = ancestor.lazyancestors
1160 arg = self._uncheckedparentrevs
1160 arg = self._uncheckedparentrevs
1161 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1161 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1162
1162
1163 def descendants(self, revs):
1163 def descendants(self, revs):
1164 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1164 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1165
1165
1166 def findcommonmissing(self, common=None, heads=None):
1166 def findcommonmissing(self, common=None, heads=None):
1167 """Return a tuple of the ancestors of common and the ancestors of heads
1167 """Return a tuple of the ancestors of common and the ancestors of heads
1168 that are not ancestors of common. In revset terminology, we return the
1168 that are not ancestors of common. In revset terminology, we return the
1169 tuple:
1169 tuple:
1170
1170
1171 ::common, (::heads) - (::common)
1171 ::common, (::heads) - (::common)
1172
1172
1173 The list is sorted by revision number, meaning it is
1173 The list is sorted by revision number, meaning it is
1174 topologically sorted.
1174 topologically sorted.
1175
1175
1176 'heads' and 'common' are both lists of node IDs. If heads is
1176 'heads' and 'common' are both lists of node IDs. If heads is
1177 not supplied, uses all of the revlog's heads. If common is not
1177 not supplied, uses all of the revlog's heads. If common is not
1178 supplied, uses nullid."""
1178 supplied, uses nullid."""
1179 if common is None:
1179 if common is None:
1180 common = [self.nullid]
1180 common = [self.nullid]
1181 if heads is None:
1181 if heads is None:
1182 heads = self.heads()
1182 heads = self.heads()
1183
1183
1184 common = [self.rev(n) for n in common]
1184 common = [self.rev(n) for n in common]
1185 heads = [self.rev(n) for n in heads]
1185 heads = [self.rev(n) for n in heads]
1186
1186
1187 # we want the ancestors, but inclusive
1187 # we want the ancestors, but inclusive
1188 class lazyset:
1188 class lazyset:
1189 def __init__(self, lazyvalues):
1189 def __init__(self, lazyvalues):
1190 self.addedvalues = set()
1190 self.addedvalues = set()
1191 self.lazyvalues = lazyvalues
1191 self.lazyvalues = lazyvalues
1192
1192
1193 def __contains__(self, value):
1193 def __contains__(self, value):
1194 return value in self.addedvalues or value in self.lazyvalues
1194 return value in self.addedvalues or value in self.lazyvalues
1195
1195
1196 def __iter__(self):
1196 def __iter__(self):
1197 added = self.addedvalues
1197 added = self.addedvalues
1198 for r in added:
1198 for r in added:
1199 yield r
1199 yield r
1200 for r in self.lazyvalues:
1200 for r in self.lazyvalues:
1201 if not r in added:
1201 if not r in added:
1202 yield r
1202 yield r
1203
1203
1204 def add(self, value):
1204 def add(self, value):
1205 self.addedvalues.add(value)
1205 self.addedvalues.add(value)
1206
1206
1207 def update(self, values):
1207 def update(self, values):
1208 self.addedvalues.update(values)
1208 self.addedvalues.update(values)
1209
1209
1210 has = lazyset(self.ancestors(common))
1210 has = lazyset(self.ancestors(common))
1211 has.add(nullrev)
1211 has.add(nullrev)
1212 has.update(common)
1212 has.update(common)
1213
1213
1214 # take all ancestors from heads that aren't in has
1214 # take all ancestors from heads that aren't in has
1215 missing = set()
1215 missing = set()
1216 visit = collections.deque(r for r in heads if r not in has)
1216 visit = collections.deque(r for r in heads if r not in has)
1217 while visit:
1217 while visit:
1218 r = visit.popleft()
1218 r = visit.popleft()
1219 if r in missing:
1219 if r in missing:
1220 continue
1220 continue
1221 else:
1221 else:
1222 missing.add(r)
1222 missing.add(r)
1223 for p in self.parentrevs(r):
1223 for p in self.parentrevs(r):
1224 if p not in has:
1224 if p not in has:
1225 visit.append(p)
1225 visit.append(p)
1226 missing = list(missing)
1226 missing = list(missing)
1227 missing.sort()
1227 missing.sort()
1228 return has, [self.node(miss) for miss in missing]
1228 return has, [self.node(miss) for miss in missing]
1229
1229
1230 def incrementalmissingrevs(self, common=None):
1230 def incrementalmissingrevs(self, common=None):
1231 """Return an object that can be used to incrementally compute the
1231 """Return an object that can be used to incrementally compute the
1232 revision numbers of the ancestors of arbitrary sets that are not
1232 revision numbers of the ancestors of arbitrary sets that are not
1233 ancestors of common. This is an ancestor.incrementalmissingancestors
1233 ancestors of common. This is an ancestor.incrementalmissingancestors
1234 object.
1234 object.
1235
1235
1236 'common' is a list of revision numbers. If common is not supplied, uses
1236 'common' is a list of revision numbers. If common is not supplied, uses
1237 nullrev.
1237 nullrev.
1238 """
1238 """
1239 if common is None:
1239 if common is None:
1240 common = [nullrev]
1240 common = [nullrev]
1241
1241
1242 if rustancestor is not None and self.index.rust_ext_compat:
1242 if rustancestor is not None and self.index.rust_ext_compat:
1243 return rustancestor.MissingAncestors(self.index, common)
1243 return rustancestor.MissingAncestors(self.index, common)
1244 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1244 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1245
1245
1246 def findmissingrevs(self, common=None, heads=None):
1246 def findmissingrevs(self, common=None, heads=None):
1247 """Return the revision numbers of the ancestors of heads that
1247 """Return the revision numbers of the ancestors of heads that
1248 are not ancestors of common.
1248 are not ancestors of common.
1249
1249
1250 More specifically, return a list of revision numbers corresponding to
1250 More specifically, return a list of revision numbers corresponding to
1251 nodes N such that every N satisfies the following constraints:
1251 nodes N such that every N satisfies the following constraints:
1252
1252
1253 1. N is an ancestor of some node in 'heads'
1253 1. N is an ancestor of some node in 'heads'
1254 2. N is not an ancestor of any node in 'common'
1254 2. N is not an ancestor of any node in 'common'
1255
1255
1256 The list is sorted by revision number, meaning it is
1256 The list is sorted by revision number, meaning it is
1257 topologically sorted.
1257 topologically sorted.
1258
1258
1259 'heads' and 'common' are both lists of revision numbers. If heads is
1259 'heads' and 'common' are both lists of revision numbers. If heads is
1260 not supplied, uses all of the revlog's heads. If common is not
1260 not supplied, uses all of the revlog's heads. If common is not
1261 supplied, uses nullid."""
1261 supplied, uses nullid."""
1262 if common is None:
1262 if common is None:
1263 common = [nullrev]
1263 common = [nullrev]
1264 if heads is None:
1264 if heads is None:
1265 heads = self.headrevs()
1265 heads = self.headrevs()
1266
1266
1267 inc = self.incrementalmissingrevs(common=common)
1267 inc = self.incrementalmissingrevs(common=common)
1268 return inc.missingancestors(heads)
1268 return inc.missingancestors(heads)
1269
1269
1270 def findmissing(self, common=None, heads=None):
1270 def findmissing(self, common=None, heads=None):
1271 """Return the ancestors of heads that are not ancestors of common.
1271 """Return the ancestors of heads that are not ancestors of common.
1272
1272
1273 More specifically, return a list of nodes N such that every N
1273 More specifically, return a list of nodes N such that every N
1274 satisfies the following constraints:
1274 satisfies the following constraints:
1275
1275
1276 1. N is an ancestor of some node in 'heads'
1276 1. N is an ancestor of some node in 'heads'
1277 2. N is not an ancestor of any node in 'common'
1277 2. N is not an ancestor of any node in 'common'
1278
1278
1279 The list is sorted by revision number, meaning it is
1279 The list is sorted by revision number, meaning it is
1280 topologically sorted.
1280 topologically sorted.
1281
1281
1282 'heads' and 'common' are both lists of node IDs. If heads is
1282 'heads' and 'common' are both lists of node IDs. If heads is
1283 not supplied, uses all of the revlog's heads. If common is not
1283 not supplied, uses all of the revlog's heads. If common is not
1284 supplied, uses nullid."""
1284 supplied, uses nullid."""
1285 if common is None:
1285 if common is None:
1286 common = [self.nullid]
1286 common = [self.nullid]
1287 if heads is None:
1287 if heads is None:
1288 heads = self.heads()
1288 heads = self.heads()
1289
1289
1290 common = [self.rev(n) for n in common]
1290 common = [self.rev(n) for n in common]
1291 heads = [self.rev(n) for n in heads]
1291 heads = [self.rev(n) for n in heads]
1292
1292
1293 inc = self.incrementalmissingrevs(common=common)
1293 inc = self.incrementalmissingrevs(common=common)
1294 return [self.node(r) for r in inc.missingancestors(heads)]
1294 return [self.node(r) for r in inc.missingancestors(heads)]
1295
1295
1296 def nodesbetween(self, roots=None, heads=None):
1296 def nodesbetween(self, roots=None, heads=None):
1297 """Return a topological path from 'roots' to 'heads'.
1297 """Return a topological path from 'roots' to 'heads'.
1298
1298
1299 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1299 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1300 topologically sorted list of all nodes N that satisfy both of
1300 topologically sorted list of all nodes N that satisfy both of
1301 these constraints:
1301 these constraints:
1302
1302
1303 1. N is a descendant of some node in 'roots'
1303 1. N is a descendant of some node in 'roots'
1304 2. N is an ancestor of some node in 'heads'
1304 2. N is an ancestor of some node in 'heads'
1305
1305
1306 Every node is considered to be both a descendant and an ancestor
1306 Every node is considered to be both a descendant and an ancestor
1307 of itself, so every reachable node in 'roots' and 'heads' will be
1307 of itself, so every reachable node in 'roots' and 'heads' will be
1308 included in 'nodes'.
1308 included in 'nodes'.
1309
1309
1310 'outroots' is the list of reachable nodes in 'roots', i.e., the
1310 'outroots' is the list of reachable nodes in 'roots', i.e., the
1311 subset of 'roots' that is returned in 'nodes'. Likewise,
1311 subset of 'roots' that is returned in 'nodes'. Likewise,
1312 'outheads' is the subset of 'heads' that is also in 'nodes'.
1312 'outheads' is the subset of 'heads' that is also in 'nodes'.
1313
1313
1314 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1314 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1315 unspecified, uses nullid as the only root. If 'heads' is
1315 unspecified, uses nullid as the only root. If 'heads' is
1316 unspecified, uses list of all of the revlog's heads."""
1316 unspecified, uses list of all of the revlog's heads."""
1317 nonodes = ([], [], [])
1317 nonodes = ([], [], [])
1318 if roots is not None:
1318 if roots is not None:
1319 roots = list(roots)
1319 roots = list(roots)
1320 if not roots:
1320 if not roots:
1321 return nonodes
1321 return nonodes
1322 lowestrev = min([self.rev(n) for n in roots])
1322 lowestrev = min([self.rev(n) for n in roots])
1323 else:
1323 else:
1324 roots = [self.nullid] # Everybody's a descendant of nullid
1324 roots = [self.nullid] # Everybody's a descendant of nullid
1325 lowestrev = nullrev
1325 lowestrev = nullrev
1326 if (lowestrev == nullrev) and (heads is None):
1326 if (lowestrev == nullrev) and (heads is None):
1327 # We want _all_ the nodes!
1327 # We want _all_ the nodes!
1328 return (
1328 return (
1329 [self.node(r) for r in self],
1329 [self.node(r) for r in self],
1330 [self.nullid],
1330 [self.nullid],
1331 list(self.heads()),
1331 list(self.heads()),
1332 )
1332 )
1333 if heads is None:
1333 if heads is None:
1334 # All nodes are ancestors, so the latest ancestor is the last
1334 # All nodes are ancestors, so the latest ancestor is the last
1335 # node.
1335 # node.
1336 highestrev = len(self) - 1
1336 highestrev = len(self) - 1
1337 # Set ancestors to None to signal that every node is an ancestor.
1337 # Set ancestors to None to signal that every node is an ancestor.
1338 ancestors = None
1338 ancestors = None
1339 # Set heads to an empty dictionary for later discovery of heads
1339 # Set heads to an empty dictionary for later discovery of heads
1340 heads = {}
1340 heads = {}
1341 else:
1341 else:
1342 heads = list(heads)
1342 heads = list(heads)
1343 if not heads:
1343 if not heads:
1344 return nonodes
1344 return nonodes
1345 ancestors = set()
1345 ancestors = set()
1346 # Turn heads into a dictionary so we can remove 'fake' heads.
1346 # Turn heads into a dictionary so we can remove 'fake' heads.
1347 # Also, later we will be using it to filter out the heads we can't
1347 # Also, later we will be using it to filter out the heads we can't
1348 # find from roots.
1348 # find from roots.
1349 heads = dict.fromkeys(heads, False)
1349 heads = dict.fromkeys(heads, False)
1350 # Start at the top and keep marking parents until we're done.
1350 # Start at the top and keep marking parents until we're done.
1351 nodestotag = set(heads)
1351 nodestotag = set(heads)
1352 # Remember where the top was so we can use it as a limit later.
1352 # Remember where the top was so we can use it as a limit later.
1353 highestrev = max([self.rev(n) for n in nodestotag])
1353 highestrev = max([self.rev(n) for n in nodestotag])
1354 while nodestotag:
1354 while nodestotag:
1355 # grab a node to tag
1355 # grab a node to tag
1356 n = nodestotag.pop()
1356 n = nodestotag.pop()
1357 # Never tag nullid
1357 # Never tag nullid
1358 if n == self.nullid:
1358 if n == self.nullid:
1359 continue
1359 continue
1360 # A node's revision number represents its place in a
1360 # A node's revision number represents its place in a
1361 # topologically sorted list of nodes.
1361 # topologically sorted list of nodes.
1362 r = self.rev(n)
1362 r = self.rev(n)
1363 if r >= lowestrev:
1363 if r >= lowestrev:
1364 if n not in ancestors:
1364 if n not in ancestors:
1365 # If we are possibly a descendant of one of the roots
1365 # If we are possibly a descendant of one of the roots
1366 # and we haven't already been marked as an ancestor
1366 # and we haven't already been marked as an ancestor
1367 ancestors.add(n) # Mark as ancestor
1367 ancestors.add(n) # Mark as ancestor
1368 # Add non-nullid parents to list of nodes to tag.
1368 # Add non-nullid parents to list of nodes to tag.
1369 nodestotag.update(
1369 nodestotag.update(
1370 [p for p in self.parents(n) if p != self.nullid]
1370 [p for p in self.parents(n) if p != self.nullid]
1371 )
1371 )
1372 elif n in heads: # We've seen it before, is it a fake head?
1372 elif n in heads: # We've seen it before, is it a fake head?
1373 # So it is, real heads should not be the ancestors of
1373 # So it is, real heads should not be the ancestors of
1374 # any other heads.
1374 # any other heads.
1375 heads.pop(n)
1375 heads.pop(n)
1376 if not ancestors:
1376 if not ancestors:
1377 return nonodes
1377 return nonodes
1378 # Now that we have our set of ancestors, we want to remove any
1378 # Now that we have our set of ancestors, we want to remove any
1379 # roots that are not ancestors.
1379 # roots that are not ancestors.
1380
1380
1381 # If one of the roots was nullid, everything is included anyway.
1381 # If one of the roots was nullid, everything is included anyway.
1382 if lowestrev > nullrev:
1382 if lowestrev > nullrev:
1383 # But, since we weren't, let's recompute the lowest rev to not
1383 # But, since we weren't, let's recompute the lowest rev to not
1384 # include roots that aren't ancestors.
1384 # include roots that aren't ancestors.
1385
1385
1386 # Filter out roots that aren't ancestors of heads
1386 # Filter out roots that aren't ancestors of heads
1387 roots = [root for root in roots if root in ancestors]
1387 roots = [root for root in roots if root in ancestors]
1388 # Recompute the lowest revision
1388 # Recompute the lowest revision
1389 if roots:
1389 if roots:
1390 lowestrev = min([self.rev(root) for root in roots])
1390 lowestrev = min([self.rev(root) for root in roots])
1391 else:
1391 else:
1392 # No more roots? Return empty list
1392 # No more roots? Return empty list
1393 return nonodes
1393 return nonodes
1394 else:
1394 else:
1395 # We are descending from nullid, and don't need to care about
1395 # We are descending from nullid, and don't need to care about
1396 # any other roots.
1396 # any other roots.
1397 lowestrev = nullrev
1397 lowestrev = nullrev
1398 roots = [self.nullid]
1398 roots = [self.nullid]
1399 # Transform our roots list into a set.
1399 # Transform our roots list into a set.
1400 descendants = set(roots)
1400 descendants = set(roots)
1401 # Also, keep the original roots so we can filter out roots that aren't
1401 # Also, keep the original roots so we can filter out roots that aren't
1402 # 'real' roots (i.e. are descended from other roots).
1402 # 'real' roots (i.e. are descended from other roots).
1403 roots = descendants.copy()
1403 roots = descendants.copy()
1404 # Our topologically sorted list of output nodes.
1404 # Our topologically sorted list of output nodes.
1405 orderedout = []
1405 orderedout = []
1406 # Don't start at nullid since we don't want nullid in our output list,
1406 # Don't start at nullid since we don't want nullid in our output list,
1407 # and if nullid shows up in descendants, empty parents will look like
1407 # and if nullid shows up in descendants, empty parents will look like
1408 # they're descendants.
1408 # they're descendants.
1409 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1409 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1410 n = self.node(r)
1410 n = self.node(r)
1411 isdescendant = False
1411 isdescendant = False
1412 if lowestrev == nullrev: # Everybody is a descendant of nullid
1412 if lowestrev == nullrev: # Everybody is a descendant of nullid
1413 isdescendant = True
1413 isdescendant = True
1414 elif n in descendants:
1414 elif n in descendants:
1415 # n is already a descendant
1415 # n is already a descendant
1416 isdescendant = True
1416 isdescendant = True
1417 # This check only needs to be done here because all the roots
1417 # This check only needs to be done here because all the roots
1418 # will start being marked is descendants before the loop.
1418 # will start being marked is descendants before the loop.
1419 if n in roots:
1419 if n in roots:
1420 # If n was a root, check if it's a 'real' root.
1420 # If n was a root, check if it's a 'real' root.
1421 p = tuple(self.parents(n))
1421 p = tuple(self.parents(n))
1422 # If any of its parents are descendants, it's not a root.
1422 # If any of its parents are descendants, it's not a root.
1423 if (p[0] in descendants) or (p[1] in descendants):
1423 if (p[0] in descendants) or (p[1] in descendants):
1424 roots.remove(n)
1424 roots.remove(n)
1425 else:
1425 else:
1426 p = tuple(self.parents(n))
1426 p = tuple(self.parents(n))
1427 # A node is a descendant if either of its parents are
1427 # A node is a descendant if either of its parents are
1428 # descendants. (We seeded the dependents list with the roots
1428 # descendants. (We seeded the dependents list with the roots
1429 # up there, remember?)
1429 # up there, remember?)
1430 if (p[0] in descendants) or (p[1] in descendants):
1430 if (p[0] in descendants) or (p[1] in descendants):
1431 descendants.add(n)
1431 descendants.add(n)
1432 isdescendant = True
1432 isdescendant = True
1433 if isdescendant and ((ancestors is None) or (n in ancestors)):
1433 if isdescendant and ((ancestors is None) or (n in ancestors)):
1434 # Only include nodes that are both descendants and ancestors.
1434 # Only include nodes that are both descendants and ancestors.
1435 orderedout.append(n)
1435 orderedout.append(n)
1436 if (ancestors is not None) and (n in heads):
1436 if (ancestors is not None) and (n in heads):
1437 # We're trying to figure out which heads are reachable
1437 # We're trying to figure out which heads are reachable
1438 # from roots.
1438 # from roots.
1439 # Mark this head as having been reached
1439 # Mark this head as having been reached
1440 heads[n] = True
1440 heads[n] = True
1441 elif ancestors is None:
1441 elif ancestors is None:
1442 # Otherwise, we're trying to discover the heads.
1442 # Otherwise, we're trying to discover the heads.
1443 # Assume this is a head because if it isn't, the next step
1443 # Assume this is a head because if it isn't, the next step
1444 # will eventually remove it.
1444 # will eventually remove it.
1445 heads[n] = True
1445 heads[n] = True
1446 # But, obviously its parents aren't.
1446 # But, obviously its parents aren't.
1447 for p in self.parents(n):
1447 for p in self.parents(n):
1448 heads.pop(p, None)
1448 heads.pop(p, None)
1449 heads = [head for head, flag in heads.items() if flag]
1449 heads = [head for head, flag in heads.items() if flag]
1450 roots = list(roots)
1450 roots = list(roots)
1451 assert orderedout
1451 assert orderedout
1452 assert roots
1452 assert roots
1453 assert heads
1453 assert heads
1454 return (orderedout, roots, heads)
1454 return (orderedout, roots, heads)
1455
1455
1456 def headrevs(self, revs=None):
1456 def headrevs(self, revs=None):
1457 if revs is None:
1457 if revs is None:
1458 try:
1458 try:
1459 return self.index.headrevs()
1459 return self.index.headrevs()
1460 except AttributeError:
1460 except AttributeError:
1461 return self._headrevs()
1461 return self._headrevs()
1462 if rustdagop is not None and self.index.rust_ext_compat:
1462 if rustdagop is not None and self.index.rust_ext_compat:
1463 return rustdagop.headrevs(self.index, revs)
1463 return rustdagop.headrevs(self.index, revs)
1464 return dagop.headrevs(revs, self._uncheckedparentrevs)
1464 return dagop.headrevs(revs, self._uncheckedparentrevs)
1465
1465
1466 def computephases(self, roots):
1466 def computephases(self, roots):
1467 return self.index.computephasesmapsets(roots)
1467 return self.index.computephasesmapsets(roots)
1468
1468
1469 def _headrevs(self):
1469 def _headrevs(self):
1470 count = len(self)
1470 count = len(self)
1471 if not count:
1471 if not count:
1472 return [nullrev]
1472 return [nullrev]
1473 # we won't iter over filtered rev so nobody is a head at start
1473 # we won't iter over filtered rev so nobody is a head at start
1474 ishead = [0] * (count + 1)
1474 ishead = [0] * (count + 1)
1475 index = self.index
1475 index = self.index
1476 for r in self:
1476 for r in self:
1477 ishead[r] = 1 # I may be an head
1477 ishead[r] = 1 # I may be an head
1478 e = index[r]
1478 e = index[r]
1479 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1479 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1480 return [r for r, val in enumerate(ishead) if val]
1480 return [r for r, val in enumerate(ishead) if val]
1481
1481
1482 def heads(self, start=None, stop=None):
1482 def heads(self, start=None, stop=None):
1483 """return the list of all nodes that have no children
1483 """return the list of all nodes that have no children
1484
1484
1485 if start is specified, only heads that are descendants of
1485 if start is specified, only heads that are descendants of
1486 start will be returned
1486 start will be returned
1487 if stop is specified, it will consider all the revs from stop
1487 if stop is specified, it will consider all the revs from stop
1488 as if they had no children
1488 as if they had no children
1489 """
1489 """
1490 if start is None and stop is None:
1490 if start is None and stop is None:
1491 if not len(self):
1491 if not len(self):
1492 return [self.nullid]
1492 return [self.nullid]
1493 return [self.node(r) for r in self.headrevs()]
1493 return [self.node(r) for r in self.headrevs()]
1494
1494
1495 if start is None:
1495 if start is None:
1496 start = nullrev
1496 start = nullrev
1497 else:
1497 else:
1498 start = self.rev(start)
1498 start = self.rev(start)
1499
1499
1500 stoprevs = {self.rev(n) for n in stop or []}
1500 stoprevs = {self.rev(n) for n in stop or []}
1501
1501
1502 revs = dagop.headrevssubset(
1502 revs = dagop.headrevssubset(
1503 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1503 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1504 )
1504 )
1505
1505
1506 return [self.node(rev) for rev in revs]
1506 return [self.node(rev) for rev in revs]
1507
1507
1508 def children(self, node):
1508 def children(self, node):
1509 """find the children of a given node"""
1509 """find the children of a given node"""
1510 c = []
1510 c = []
1511 p = self.rev(node)
1511 p = self.rev(node)
1512 for r in self.revs(start=p + 1):
1512 for r in self.revs(start=p + 1):
1513 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1513 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1514 if prevs:
1514 if prevs:
1515 for pr in prevs:
1515 for pr in prevs:
1516 if pr == p:
1516 if pr == p:
1517 c.append(self.node(r))
1517 c.append(self.node(r))
1518 elif p == nullrev:
1518 elif p == nullrev:
1519 c.append(self.node(r))
1519 c.append(self.node(r))
1520 return c
1520 return c
1521
1521
1522 def commonancestorsheads(self, a, b):
1522 def commonancestorsheads(self, a, b):
1523 """calculate all the heads of the common ancestors of nodes a and b"""
1523 """calculate all the heads of the common ancestors of nodes a and b"""
1524 a, b = self.rev(a), self.rev(b)
1524 a, b = self.rev(a), self.rev(b)
1525 ancs = self._commonancestorsheads(a, b)
1525 ancs = self._commonancestorsheads(a, b)
1526 return pycompat.maplist(self.node, ancs)
1526 return pycompat.maplist(self.node, ancs)
1527
1527
1528 def _commonancestorsheads(self, *revs):
1528 def _commonancestorsheads(self, *revs):
1529 """calculate all the heads of the common ancestors of revs"""
1529 """calculate all the heads of the common ancestors of revs"""
1530 try:
1530 try:
1531 ancs = self.index.commonancestorsheads(*revs)
1531 ancs = self.index.commonancestorsheads(*revs)
1532 except (AttributeError, OverflowError): # C implementation failed
1532 except (AttributeError, OverflowError): # C implementation failed
1533 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1533 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1534 return ancs
1534 return ancs
1535
1535
1536 def isancestor(self, a, b):
1536 def isancestor(self, a, b):
1537 """return True if node a is an ancestor of node b
1537 """return True if node a is an ancestor of node b
1538
1538
1539 A revision is considered an ancestor of itself."""
1539 A revision is considered an ancestor of itself."""
1540 a, b = self.rev(a), self.rev(b)
1540 a, b = self.rev(a), self.rev(b)
1541 return self.isancestorrev(a, b)
1541 return self.isancestorrev(a, b)
1542
1542
1543 def isancestorrev(self, a, b):
1543 def isancestorrev(self, a, b):
1544 """return True if revision a is an ancestor of revision b
1544 """return True if revision a is an ancestor of revision b
1545
1545
1546 A revision is considered an ancestor of itself.
1546 A revision is considered an ancestor of itself.
1547
1547
1548 The implementation of this is trivial but the use of
1548 The implementation of this is trivial but the use of
1549 reachableroots is not."""
1549 reachableroots is not."""
1550 if a == nullrev:
1550 if a == nullrev:
1551 return True
1551 return True
1552 elif a == b:
1552 elif a == b:
1553 return True
1553 return True
1554 elif a > b:
1554 elif a > b:
1555 return False
1555 return False
1556 return bool(self.reachableroots(a, [b], [a], includepath=False))
1556 return bool(self.reachableroots(a, [b], [a], includepath=False))
1557
1557
1558 def reachableroots(self, minroot, heads, roots, includepath=False):
1558 def reachableroots(self, minroot, heads, roots, includepath=False):
1559 """return (heads(::(<roots> and <roots>::<heads>)))
1559 """return (heads(::(<roots> and <roots>::<heads>)))
1560
1560
1561 If includepath is True, return (<roots>::<heads>)."""
1561 If includepath is True, return (<roots>::<heads>)."""
1562 try:
1562 try:
1563 return self.index.reachableroots2(
1563 return self.index.reachableroots2(
1564 minroot, heads, roots, includepath
1564 minroot, heads, roots, includepath
1565 )
1565 )
1566 except AttributeError:
1566 except AttributeError:
1567 return dagop._reachablerootspure(
1567 return dagop._reachablerootspure(
1568 self.parentrevs, minroot, roots, heads, includepath
1568 self.parentrevs, minroot, roots, heads, includepath
1569 )
1569 )
1570
1570
1571 def ancestor(self, a, b):
1571 def ancestor(self, a, b):
1572 """calculate the "best" common ancestor of nodes a and b"""
1572 """calculate the "best" common ancestor of nodes a and b"""
1573
1573
1574 a, b = self.rev(a), self.rev(b)
1574 a, b = self.rev(a), self.rev(b)
1575 try:
1575 try:
1576 ancs = self.index.ancestors(a, b)
1576 ancs = self.index.ancestors(a, b)
1577 except (AttributeError, OverflowError):
1577 except (AttributeError, OverflowError):
1578 ancs = ancestor.ancestors(self.parentrevs, a, b)
1578 ancs = ancestor.ancestors(self.parentrevs, a, b)
1579 if ancs:
1579 if ancs:
1580 # choose a consistent winner when there's a tie
1580 # choose a consistent winner when there's a tie
1581 return min(map(self.node, ancs))
1581 return min(map(self.node, ancs))
1582 return self.nullid
1582 return self.nullid
1583
1583
1584 def _match(self, id):
1584 def _match(self, id):
1585 if isinstance(id, int):
1585 if isinstance(id, int):
1586 # rev
1586 # rev
1587 return self.node(id)
1587 return self.node(id)
1588 if len(id) == self.nodeconstants.nodelen:
1588 if len(id) == self.nodeconstants.nodelen:
1589 # possibly a binary node
1589 # possibly a binary node
1590 # odds of a binary node being all hex in ASCII are 1 in 10**25
1590 # odds of a binary node being all hex in ASCII are 1 in 10**25
1591 try:
1591 try:
1592 node = id
1592 node = id
1593 self.rev(node) # quick search the index
1593 self.rev(node) # quick search the index
1594 return node
1594 return node
1595 except error.LookupError:
1595 except error.LookupError:
1596 pass # may be partial hex id
1596 pass # may be partial hex id
1597 try:
1597 try:
1598 # str(rev)
1598 # str(rev)
1599 rev = int(id)
1599 rev = int(id)
1600 if b"%d" % rev != id:
1600 if b"%d" % rev != id:
1601 raise ValueError
1601 raise ValueError
1602 if rev < 0:
1602 if rev < 0:
1603 rev = len(self) + rev
1603 rev = len(self) + rev
1604 if rev < 0 or rev >= len(self):
1604 if rev < 0 or rev >= len(self):
1605 raise ValueError
1605 raise ValueError
1606 return self.node(rev)
1606 return self.node(rev)
1607 except (ValueError, OverflowError):
1607 except (ValueError, OverflowError):
1608 pass
1608 pass
1609 if len(id) == 2 * self.nodeconstants.nodelen:
1609 if len(id) == 2 * self.nodeconstants.nodelen:
1610 try:
1610 try:
1611 # a full hex nodeid?
1611 # a full hex nodeid?
1612 node = bin(id)
1612 node = bin(id)
1613 self.rev(node)
1613 self.rev(node)
1614 return node
1614 return node
1615 except (binascii.Error, error.LookupError):
1615 except (binascii.Error, error.LookupError):
1616 pass
1616 pass
1617
1617
1618 def _partialmatch(self, id):
1618 def _partialmatch(self, id):
1619 # we don't care wdirfilenodeids as they should be always full hash
1619 # we don't care wdirfilenodeids as they should be always full hash
1620 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1620 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1621 ambiguous = False
1621 ambiguous = False
1622 try:
1622 try:
1623 partial = self.index.partialmatch(id)
1623 partial = self.index.partialmatch(id)
1624 if partial and self.hasnode(partial):
1624 if partial and self.hasnode(partial):
1625 if maybewdir:
1625 if maybewdir:
1626 # single 'ff...' match in radix tree, ambiguous with wdir
1626 # single 'ff...' match in radix tree, ambiguous with wdir
1627 ambiguous = True
1627 ambiguous = True
1628 else:
1628 else:
1629 return partial
1629 return partial
1630 elif maybewdir:
1630 elif maybewdir:
1631 # no 'ff...' match in radix tree, wdir identified
1631 # no 'ff...' match in radix tree, wdir identified
1632 raise error.WdirUnsupported
1632 raise error.WdirUnsupported
1633 else:
1633 else:
1634 return None
1634 return None
1635 except error.RevlogError:
1635 except error.RevlogError:
1636 # parsers.c radix tree lookup gave multiple matches
1636 # parsers.c radix tree lookup gave multiple matches
1637 # fast path: for unfiltered changelog, radix tree is accurate
1637 # fast path: for unfiltered changelog, radix tree is accurate
1638 if not getattr(self, 'filteredrevs', None):
1638 if not getattr(self, 'filteredrevs', None):
1639 ambiguous = True
1639 ambiguous = True
1640 # fall through to slow path that filters hidden revisions
1640 # fall through to slow path that filters hidden revisions
1641 except (AttributeError, ValueError):
1641 except (AttributeError, ValueError):
1642 # we are pure python, or key is not hex
1642 # we are pure python, or key is not hex
1643 pass
1643 pass
1644 if ambiguous:
1644 if ambiguous:
1645 raise error.AmbiguousPrefixLookupError(
1645 raise error.AmbiguousPrefixLookupError(
1646 id, self.display_id, _(b'ambiguous identifier')
1646 id, self.display_id, _(b'ambiguous identifier')
1647 )
1647 )
1648
1648
1649 if id in self._pcache:
1649 if id in self._pcache:
1650 return self._pcache[id]
1650 return self._pcache[id]
1651
1651
1652 if len(id) <= 40:
1652 if len(id) <= 40:
1653 # hex(node)[:...]
1653 # hex(node)[:...]
1654 l = len(id) // 2 * 2 # grab an even number of digits
1654 l = len(id) // 2 * 2 # grab an even number of digits
1655 try:
1655 try:
1656 # we're dropping the last digit, so let's check that it's hex,
1656 # we're dropping the last digit, so let's check that it's hex,
1657 # to avoid the expensive computation below if it's not
1657 # to avoid the expensive computation below if it's not
1658 if len(id) % 2 > 0:
1658 if len(id) % 2 > 0:
1659 if not (id[-1] in hexdigits):
1659 if not (id[-1] in hexdigits):
1660 return None
1660 return None
1661 prefix = bin(id[:l])
1661 prefix = bin(id[:l])
1662 except binascii.Error:
1662 except binascii.Error:
1663 pass
1663 pass
1664 else:
1664 else:
1665 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1665 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1666 nl = [
1666 nl = [
1667 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1667 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1668 ]
1668 ]
1669 if self.nodeconstants.nullhex.startswith(id):
1669 if self.nodeconstants.nullhex.startswith(id):
1670 nl.append(self.nullid)
1670 nl.append(self.nullid)
1671 if len(nl) > 0:
1671 if len(nl) > 0:
1672 if len(nl) == 1 and not maybewdir:
1672 if len(nl) == 1 and not maybewdir:
1673 self._pcache[id] = nl[0]
1673 self._pcache[id] = nl[0]
1674 return nl[0]
1674 return nl[0]
1675 raise error.AmbiguousPrefixLookupError(
1675 raise error.AmbiguousPrefixLookupError(
1676 id, self.display_id, _(b'ambiguous identifier')
1676 id, self.display_id, _(b'ambiguous identifier')
1677 )
1677 )
1678 if maybewdir:
1678 if maybewdir:
1679 raise error.WdirUnsupported
1679 raise error.WdirUnsupported
1680 return None
1680 return None
1681
1681
1682 def lookup(self, id):
1682 def lookup(self, id):
1683 """locate a node based on:
1683 """locate a node based on:
1684 - revision number or str(revision number)
1684 - revision number or str(revision number)
1685 - nodeid or subset of hex nodeid
1685 - nodeid or subset of hex nodeid
1686 """
1686 """
1687 n = self._match(id)
1687 n = self._match(id)
1688 if n is not None:
1688 if n is not None:
1689 return n
1689 return n
1690 n = self._partialmatch(id)
1690 n = self._partialmatch(id)
1691 if n:
1691 if n:
1692 return n
1692 return n
1693
1693
1694 raise error.LookupError(id, self.display_id, _(b'no match found'))
1694 raise error.LookupError(id, self.display_id, _(b'no match found'))
1695
1695
1696 def shortest(self, node, minlength=1):
1696 def shortest(self, node, minlength=1):
1697 """Find the shortest unambiguous prefix that matches node."""
1697 """Find the shortest unambiguous prefix that matches node."""
1698
1698
1699 def isvalid(prefix):
1699 def isvalid(prefix):
1700 try:
1700 try:
1701 matchednode = self._partialmatch(prefix)
1701 matchednode = self._partialmatch(prefix)
1702 except error.AmbiguousPrefixLookupError:
1702 except error.AmbiguousPrefixLookupError:
1703 return False
1703 return False
1704 except error.WdirUnsupported:
1704 except error.WdirUnsupported:
1705 # single 'ff...' match
1705 # single 'ff...' match
1706 return True
1706 return True
1707 if matchednode is None:
1707 if matchednode is None:
1708 raise error.LookupError(node, self.display_id, _(b'no node'))
1708 raise error.LookupError(node, self.display_id, _(b'no node'))
1709 return True
1709 return True
1710
1710
1711 def maybewdir(prefix):
1711 def maybewdir(prefix):
1712 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1712 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1713
1713
1714 hexnode = hex(node)
1714 hexnode = hex(node)
1715
1715
1716 def disambiguate(hexnode, minlength):
1716 def disambiguate(hexnode, minlength):
1717 """Disambiguate against wdirid."""
1717 """Disambiguate against wdirid."""
1718 for length in range(minlength, len(hexnode) + 1):
1718 for length in range(minlength, len(hexnode) + 1):
1719 prefix = hexnode[:length]
1719 prefix = hexnode[:length]
1720 if not maybewdir(prefix):
1720 if not maybewdir(prefix):
1721 return prefix
1721 return prefix
1722
1722
1723 if not getattr(self, 'filteredrevs', None):
1723 if not getattr(self, 'filteredrevs', None):
1724 try:
1724 try:
1725 length = max(self.index.shortest(node), minlength)
1725 length = max(self.index.shortest(node), minlength)
1726 return disambiguate(hexnode, length)
1726 return disambiguate(hexnode, length)
1727 except error.RevlogError:
1727 except error.RevlogError:
1728 if node != self.nodeconstants.wdirid:
1728 if node != self.nodeconstants.wdirid:
1729 raise error.LookupError(
1729 raise error.LookupError(
1730 node, self.display_id, _(b'no node')
1730 node, self.display_id, _(b'no node')
1731 )
1731 )
1732 except AttributeError:
1732 except AttributeError:
1733 # Fall through to pure code
1733 # Fall through to pure code
1734 pass
1734 pass
1735
1735
1736 if node == self.nodeconstants.wdirid:
1736 if node == self.nodeconstants.wdirid:
1737 for length in range(minlength, len(hexnode) + 1):
1737 for length in range(minlength, len(hexnode) + 1):
1738 prefix = hexnode[:length]
1738 prefix = hexnode[:length]
1739 if isvalid(prefix):
1739 if isvalid(prefix):
1740 return prefix
1740 return prefix
1741
1741
1742 for length in range(minlength, len(hexnode) + 1):
1742 for length in range(minlength, len(hexnode) + 1):
1743 prefix = hexnode[:length]
1743 prefix = hexnode[:length]
1744 if isvalid(prefix):
1744 if isvalid(prefix):
1745 return disambiguate(hexnode, length)
1745 return disambiguate(hexnode, length)
1746
1746
1747 def cmp(self, node, text):
1747 def cmp(self, node, text):
1748 """compare text with a given file revision
1748 """compare text with a given file revision
1749
1749
1750 returns True if text is different than what is stored.
1750 returns True if text is different than what is stored.
1751 """
1751 """
1752 p1, p2 = self.parents(node)
1752 p1, p2 = self.parents(node)
1753 return storageutil.hashrevisionsha1(text, p1, p2) != node
1753 return storageutil.hashrevisionsha1(text, p1, p2) != node
1754
1754
1755 def _getsegmentforrevs(self, startrev, endrev, df=None):
1755 def _getsegmentforrevs(self, startrev, endrev, df=None):
1756 """Obtain a segment of raw data corresponding to a range of revisions.
1756 """Obtain a segment of raw data corresponding to a range of revisions.
1757
1757
1758 Accepts the start and end revisions and an optional already-open
1758 Accepts the start and end revisions and an optional already-open
1759 file handle to be used for reading. If the file handle is read, its
1759 file handle to be used for reading. If the file handle is read, its
1760 seek position will not be preserved.
1760 seek position will not be preserved.
1761
1761
1762 Requests for data may be satisfied by a cache.
1762 Requests for data may be satisfied by a cache.
1763
1763
1764 Returns a 2-tuple of (offset, data) for the requested range of
1764 Returns a 2-tuple of (offset, data) for the requested range of
1765 revisions. Offset is the integer offset from the beginning of the
1765 revisions. Offset is the integer offset from the beginning of the
1766 revlog and data is a str or buffer of the raw byte data.
1766 revlog and data is a str or buffer of the raw byte data.
1767
1767
1768 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1768 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1769 to determine where each revision's data begins and ends.
1769 to determine where each revision's data begins and ends.
1770 """
1770 """
1771 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1771 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1772 # (functions are expensive).
1772 # (functions are expensive).
1773 index = self.index
1773 index = self.index
1774 istart = index[startrev]
1774 istart = index[startrev]
1775 start = int(istart[0] >> 16)
1775 start = int(istart[0] >> 16)
1776 if startrev == endrev:
1776 if startrev == endrev:
1777 end = start + istart[1]
1777 end = start + istart[1]
1778 else:
1778 else:
1779 iend = index[endrev]
1779 iend = index[endrev]
1780 end = int(iend[0] >> 16) + iend[1]
1780 end = int(iend[0] >> 16) + iend[1]
1781
1781
1782 if self._inline:
1782 if self._inline:
1783 start += (startrev + 1) * self.index.entry_size
1783 start += (startrev + 1) * self.index.entry_size
1784 end += (endrev + 1) * self.index.entry_size
1784 end += (endrev + 1) * self.index.entry_size
1785 length = end - start
1785 length = end - start
1786
1786
1787 return start, self._segmentfile.read_chunk(start, length, df)
1787 return start, self._segmentfile.read_chunk(start, length, df)
1788
1788
1789 def _chunk(self, rev, df=None):
1789 def _chunk(self, rev, df=None):
1790 """Obtain a single decompressed chunk for a revision.
1790 """Obtain a single decompressed chunk for a revision.
1791
1791
1792 Accepts an integer revision and an optional already-open file handle
1792 Accepts an integer revision and an optional already-open file handle
1793 to be used for reading. If used, the seek position of the file will not
1793 to be used for reading. If used, the seek position of the file will not
1794 be preserved.
1794 be preserved.
1795
1795
1796 Returns a str holding uncompressed data for the requested revision.
1796 Returns a str holding uncompressed data for the requested revision.
1797 """
1797 """
1798 compression_mode = self.index[rev][10]
1798 compression_mode = self.index[rev][10]
1799 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1799 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1800 if compression_mode == COMP_MODE_PLAIN:
1800 if compression_mode == COMP_MODE_PLAIN:
1801 return data
1801 return data
1802 elif compression_mode == COMP_MODE_DEFAULT:
1802 elif compression_mode == COMP_MODE_DEFAULT:
1803 return self._decompressor(data)
1803 return self._decompressor(data)
1804 elif compression_mode == COMP_MODE_INLINE:
1804 elif compression_mode == COMP_MODE_INLINE:
1805 return self.decompress(data)
1805 return self.decompress(data)
1806 else:
1806 else:
1807 msg = b'unknown compression mode %d'
1807 msg = b'unknown compression mode %d'
1808 msg %= compression_mode
1808 msg %= compression_mode
1809 raise error.RevlogError(msg)
1809 raise error.RevlogError(msg)
1810
1810
1811 def _chunks(self, revs, df=None, targetsize=None):
1811 def _chunks(self, revs, df=None, targetsize=None):
1812 """Obtain decompressed chunks for the specified revisions.
1812 """Obtain decompressed chunks for the specified revisions.
1813
1813
1814 Accepts an iterable of numeric revisions that are assumed to be in
1814 Accepts an iterable of numeric revisions that are assumed to be in
1815 ascending order. Also accepts an optional already-open file handle
1815 ascending order. Also accepts an optional already-open file handle
1816 to be used for reading. If used, the seek position of the file will
1816 to be used for reading. If used, the seek position of the file will
1817 not be preserved.
1817 not be preserved.
1818
1818
1819 This function is similar to calling ``self._chunk()`` multiple times,
1819 This function is similar to calling ``self._chunk()`` multiple times,
1820 but is faster.
1820 but is faster.
1821
1821
1822 Returns a list with decompressed data for each requested revision.
1822 Returns a list with decompressed data for each requested revision.
1823 """
1823 """
1824 if not revs:
1824 if not revs:
1825 return []
1825 return []
1826 start = self.start
1826 start = self.start
1827 length = self.length
1827 length = self.length
1828 inline = self._inline
1828 inline = self._inline
1829 iosize = self.index.entry_size
1829 iosize = self.index.entry_size
1830 buffer = util.buffer
1830 buffer = util.buffer
1831
1831
1832 l = []
1832 l = []
1833 ladd = l.append
1833 ladd = l.append
1834
1834
1835 if not self._withsparseread:
1835 if not self._withsparseread:
1836 slicedchunks = (revs,)
1836 slicedchunks = (revs,)
1837 else:
1837 else:
1838 slicedchunks = deltautil.slicechunk(
1838 slicedchunks = deltautil.slicechunk(
1839 self, revs, targetsize=targetsize
1839 self, revs, targetsize=targetsize
1840 )
1840 )
1841
1841
1842 for revschunk in slicedchunks:
1842 for revschunk in slicedchunks:
1843 firstrev = revschunk[0]
1843 firstrev = revschunk[0]
1844 # Skip trailing revisions with empty diff
1844 # Skip trailing revisions with empty diff
1845 for lastrev in revschunk[::-1]:
1845 for lastrev in revschunk[::-1]:
1846 if length(lastrev) != 0:
1846 if length(lastrev) != 0:
1847 break
1847 break
1848
1848
1849 try:
1849 try:
1850 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1850 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1851 except OverflowError:
1851 except OverflowError:
1852 # issue4215 - we can't cache a run of chunks greater than
1852 # issue4215 - we can't cache a run of chunks greater than
1853 # 2G on Windows
1853 # 2G on Windows
1854 return [self._chunk(rev, df=df) for rev in revschunk]
1854 return [self._chunk(rev, df=df) for rev in revschunk]
1855
1855
1856 decomp = self.decompress
1856 decomp = self.decompress
1857 # self._decompressor might be None, but will not be used in that case
1857 # self._decompressor might be None, but will not be used in that case
1858 def_decomp = self._decompressor
1858 def_decomp = self._decompressor
1859 for rev in revschunk:
1859 for rev in revschunk:
1860 chunkstart = start(rev)
1860 chunkstart = start(rev)
1861 if inline:
1861 if inline:
1862 chunkstart += (rev + 1) * iosize
1862 chunkstart += (rev + 1) * iosize
1863 chunklength = length(rev)
1863 chunklength = length(rev)
1864 comp_mode = self.index[rev][10]
1864 comp_mode = self.index[rev][10]
1865 c = buffer(data, chunkstart - offset, chunklength)
1865 c = buffer(data, chunkstart - offset, chunklength)
1866 if comp_mode == COMP_MODE_PLAIN:
1866 if comp_mode == COMP_MODE_PLAIN:
1867 ladd(c)
1867 ladd(c)
1868 elif comp_mode == COMP_MODE_INLINE:
1868 elif comp_mode == COMP_MODE_INLINE:
1869 ladd(decomp(c))
1869 ladd(decomp(c))
1870 elif comp_mode == COMP_MODE_DEFAULT:
1870 elif comp_mode == COMP_MODE_DEFAULT:
1871 ladd(def_decomp(c))
1871 ladd(def_decomp(c))
1872 else:
1872 else:
1873 msg = b'unknown compression mode %d'
1873 msg = b'unknown compression mode %d'
1874 msg %= comp_mode
1874 msg %= comp_mode
1875 raise error.RevlogError(msg)
1875 raise error.RevlogError(msg)
1876
1876
1877 return l
1877 return l
1878
1878
1879 def deltaparent(self, rev):
1879 def deltaparent(self, rev):
1880 """return deltaparent of the given revision"""
1880 """return deltaparent of the given revision"""
1881 base = self.index[rev][3]
1881 base = self.index[rev][3]
1882 if base == rev:
1882 if base == rev:
1883 return nullrev
1883 return nullrev
1884 elif self._generaldelta:
1884 elif self._generaldelta:
1885 return base
1885 return base
1886 else:
1886 else:
1887 return rev - 1
1887 return rev - 1
1888
1888
1889 def issnapshot(self, rev):
1889 def issnapshot(self, rev):
1890 """tells whether rev is a snapshot"""
1890 """tells whether rev is a snapshot"""
1891 if not self._sparserevlog:
1891 if not self._sparserevlog:
1892 return self.deltaparent(rev) == nullrev
1892 return self.deltaparent(rev) == nullrev
1893 elif util.safehasattr(self.index, 'issnapshot'):
1893 elif util.safehasattr(self.index, 'issnapshot'):
1894 # directly assign the method to cache the testing and access
1894 # directly assign the method to cache the testing and access
1895 self.issnapshot = self.index.issnapshot
1895 self.issnapshot = self.index.issnapshot
1896 return self.issnapshot(rev)
1896 return self.issnapshot(rev)
1897 if rev == nullrev:
1897 if rev == nullrev:
1898 return True
1898 return True
1899 entry = self.index[rev]
1899 entry = self.index[rev]
1900 base = entry[3]
1900 base = entry[3]
1901 if base == rev:
1901 if base == rev:
1902 return True
1902 return True
1903 if base == nullrev:
1903 if base == nullrev:
1904 return True
1904 return True
1905 p1 = entry[5]
1905 p1 = entry[5]
1906 while self.length(p1) == 0:
1906 while self.length(p1) == 0:
1907 b = self.deltaparent(p1)
1907 b = self.deltaparent(p1)
1908 if b == p1:
1908 if b == p1:
1909 break
1909 break
1910 p1 = b
1910 p1 = b
1911 p2 = entry[6]
1911 p2 = entry[6]
1912 while self.length(p2) == 0:
1912 while self.length(p2) == 0:
1913 b = self.deltaparent(p2)
1913 b = self.deltaparent(p2)
1914 if b == p2:
1914 if b == p2:
1915 break
1915 break
1916 p2 = b
1916 p2 = b
1917 if base == p1 or base == p2:
1917 if base == p1 or base == p2:
1918 return False
1918 return False
1919 return self.issnapshot(base)
1919 return self.issnapshot(base)
1920
1920
1921 def snapshotdepth(self, rev):
1921 def snapshotdepth(self, rev):
1922 """number of snapshot in the chain before this one"""
1922 """number of snapshot in the chain before this one"""
1923 if not self.issnapshot(rev):
1923 if not self.issnapshot(rev):
1924 raise error.ProgrammingError(b'revision %d not a snapshot')
1924 raise error.ProgrammingError(b'revision %d not a snapshot')
1925 return len(self._deltachain(rev)[0]) - 1
1925 return len(self._deltachain(rev)[0]) - 1
1926
1926
1927 def revdiff(self, rev1, rev2):
1927 def revdiff(self, rev1, rev2):
1928 """return or calculate a delta between two revisions
1928 """return or calculate a delta between two revisions
1929
1929
1930 The delta calculated is in binary form and is intended to be written to
1930 The delta calculated is in binary form and is intended to be written to
1931 revlog data directly. So this function needs raw revision data.
1931 revlog data directly. So this function needs raw revision data.
1932 """
1932 """
1933 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1934 return bytes(self._chunk(rev2))
1934 return bytes(self._chunk(rev2))
1935
1935
1936 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1936 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1937
1937
1938 def revision(self, nodeorrev, _df=None):
1938 def revision(self, nodeorrev, _df=None):
1939 """return an uncompressed revision of a given node or revision
1939 """return an uncompressed revision of a given node or revision
1940 number.
1940 number.
1941
1941
1942 _df - an existing file handle to read from. (internal-only)
1942 _df - an existing file handle to read from. (internal-only)
1943 """
1943 """
1944 return self._revisiondata(nodeorrev, _df)
1944 return self._revisiondata(nodeorrev, _df)
1945
1945
1946 def sidedata(self, nodeorrev, _df=None):
1946 def sidedata(self, nodeorrev, _df=None):
1947 """a map of extra data related to the changeset but not part of the hash
1947 """a map of extra data related to the changeset but not part of the hash
1948
1948
1949 This function currently return a dictionary. However, more advanced
1949 This function currently return a dictionary. However, more advanced
1950 mapping object will likely be used in the future for a more
1950 mapping object will likely be used in the future for a more
1951 efficient/lazy code.
1951 efficient/lazy code.
1952 """
1952 """
1953 # deal with <nodeorrev> argument type
1953 # deal with <nodeorrev> argument type
1954 if isinstance(nodeorrev, int):
1954 if isinstance(nodeorrev, int):
1955 rev = nodeorrev
1955 rev = nodeorrev
1956 else:
1956 else:
1957 rev = self.rev(nodeorrev)
1957 rev = self.rev(nodeorrev)
1958 return self._sidedata(rev)
1958 return self._sidedata(rev)
1959
1959
1960 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1960 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1961 # deal with <nodeorrev> argument type
1961 # deal with <nodeorrev> argument type
1962 if isinstance(nodeorrev, int):
1962 if isinstance(nodeorrev, int):
1963 rev = nodeorrev
1963 rev = nodeorrev
1964 node = self.node(rev)
1964 node = self.node(rev)
1965 else:
1965 else:
1966 node = nodeorrev
1966 node = nodeorrev
1967 rev = None
1967 rev = None
1968
1968
1969 # fast path the special `nullid` rev
1969 # fast path the special `nullid` rev
1970 if node == self.nullid:
1970 if node == self.nullid:
1971 return b""
1971 return b""
1972
1972
1973 # ``rawtext`` is the text as stored inside the revlog. Might be the
1973 # ``rawtext`` is the text as stored inside the revlog. Might be the
1974 # revision or might need to be processed to retrieve the revision.
1974 # revision or might need to be processed to retrieve the revision.
1975 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1975 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1976
1976
1977 if raw and validated:
1977 if raw and validated:
1978 # if we don't want to process the raw text and that raw
1978 # if we don't want to process the raw text and that raw
1979 # text is cached, we can exit early.
1979 # text is cached, we can exit early.
1980 return rawtext
1980 return rawtext
1981 if rev is None:
1981 if rev is None:
1982 rev = self.rev(node)
1982 rev = self.rev(node)
1983 # the revlog's flag for this revision
1983 # the revlog's flag for this revision
1984 # (usually alter its state or content)
1984 # (usually alter its state or content)
1985 flags = self.flags(rev)
1985 flags = self.flags(rev)
1986
1986
1987 if validated and flags == REVIDX_DEFAULT_FLAGS:
1987 if validated and flags == REVIDX_DEFAULT_FLAGS:
1988 # no extra flags set, no flag processor runs, text = rawtext
1988 # no extra flags set, no flag processor runs, text = rawtext
1989 return rawtext
1989 return rawtext
1990
1990
1991 if raw:
1991 if raw:
1992 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1992 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1993 text = rawtext
1993 text = rawtext
1994 else:
1994 else:
1995 r = flagutil.processflagsread(self, rawtext, flags)
1995 r = flagutil.processflagsread(self, rawtext, flags)
1996 text, validatehash = r
1996 text, validatehash = r
1997 if validatehash:
1997 if validatehash:
1998 self.checkhash(text, node, rev=rev)
1998 self.checkhash(text, node, rev=rev)
1999 if not validated:
1999 if not validated:
2000 self._revisioncache = (node, rev, rawtext)
2000 self._revisioncache = (node, rev, rawtext)
2001
2001
2002 return text
2002 return text
2003
2003
2004 def _rawtext(self, node, rev, _df=None):
2004 def _rawtext(self, node, rev, _df=None):
2005 """return the possibly unvalidated rawtext for a revision
2005 """return the possibly unvalidated rawtext for a revision
2006
2006
2007 returns (rev, rawtext, validated)
2007 returns (rev, rawtext, validated)
2008 """
2008 """
2009
2009
2010 # revision in the cache (could be useful to apply delta)
2010 # revision in the cache (could be useful to apply delta)
2011 cachedrev = None
2011 cachedrev = None
2012 # An intermediate text to apply deltas to
2012 # An intermediate text to apply deltas to
2013 basetext = None
2013 basetext = None
2014
2014
2015 # Check if we have the entry in cache
2015 # Check if we have the entry in cache
2016 # The cache entry looks like (node, rev, rawtext)
2016 # The cache entry looks like (node, rev, rawtext)
2017 if self._revisioncache:
2017 if self._revisioncache:
2018 if self._revisioncache[0] == node:
2018 if self._revisioncache[0] == node:
2019 return (rev, self._revisioncache[2], True)
2019 return (rev, self._revisioncache[2], True)
2020 cachedrev = self._revisioncache[1]
2020 cachedrev = self._revisioncache[1]
2021
2021
2022 if rev is None:
2022 if rev is None:
2023 rev = self.rev(node)
2023 rev = self.rev(node)
2024
2024
2025 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2025 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2026 if stopped:
2026 if stopped:
2027 basetext = self._revisioncache[2]
2027 basetext = self._revisioncache[2]
2028
2028
2029 # drop cache to save memory, the caller is expected to
2029 # drop cache to save memory, the caller is expected to
2030 # update self._revisioncache after validating the text
2030 # update self._revisioncache after validating the text
2031 self._revisioncache = None
2031 self._revisioncache = None
2032
2032
2033 targetsize = None
2033 targetsize = None
2034 rawsize = self.index[rev][2]
2034 rawsize = self.index[rev][2]
2035 if 0 <= rawsize:
2035 if 0 <= rawsize:
2036 targetsize = 4 * rawsize
2036 targetsize = 4 * rawsize
2037
2037
2038 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2038 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2039 if basetext is None:
2039 if basetext is None:
2040 basetext = bytes(bins[0])
2040 basetext = bytes(bins[0])
2041 bins = bins[1:]
2041 bins = bins[1:]
2042
2042
2043 rawtext = mdiff.patches(basetext, bins)
2043 rawtext = mdiff.patches(basetext, bins)
2044 del basetext # let us have a chance to free memory early
2044 del basetext # let us have a chance to free memory early
2045 return (rev, rawtext, False)
2045 return (rev, rawtext, False)
2046
2046
2047 def _sidedata(self, rev):
2047 def _sidedata(self, rev):
2048 """Return the sidedata for a given revision number."""
2048 """Return the sidedata for a given revision number."""
2049 index_entry = self.index[rev]
2049 index_entry = self.index[rev]
2050 sidedata_offset = index_entry[8]
2050 sidedata_offset = index_entry[8]
2051 sidedata_size = index_entry[9]
2051 sidedata_size = index_entry[9]
2052
2052
2053 if self._inline:
2053 if self._inline:
2054 sidedata_offset += self.index.entry_size * (1 + rev)
2054 sidedata_offset += self.index.entry_size * (1 + rev)
2055 if sidedata_size == 0:
2055 if sidedata_size == 0:
2056 return {}
2056 return {}
2057
2057
2058 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2058 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2059 filename = self._sidedatafile
2059 filename = self._sidedatafile
2060 end = self._docket.sidedata_end
2060 end = self._docket.sidedata_end
2061 offset = sidedata_offset
2061 offset = sidedata_offset
2062 length = sidedata_size
2062 length = sidedata_size
2063 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2063 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2064 raise error.RevlogError(m)
2064 raise error.RevlogError(m)
2065
2065
2066 comp_segment = self._segmentfile_sidedata.read_chunk(
2066 comp_segment = self._segmentfile_sidedata.read_chunk(
2067 sidedata_offset, sidedata_size
2067 sidedata_offset, sidedata_size
2068 )
2068 )
2069
2069
2070 comp = self.index[rev][11]
2070 comp = self.index[rev][11]
2071 if comp == COMP_MODE_PLAIN:
2071 if comp == COMP_MODE_PLAIN:
2072 segment = comp_segment
2072 segment = comp_segment
2073 elif comp == COMP_MODE_DEFAULT:
2073 elif comp == COMP_MODE_DEFAULT:
2074 segment = self._decompressor(comp_segment)
2074 segment = self._decompressor(comp_segment)
2075 elif comp == COMP_MODE_INLINE:
2075 elif comp == COMP_MODE_INLINE:
2076 segment = self.decompress(comp_segment)
2076 segment = self.decompress(comp_segment)
2077 else:
2077 else:
2078 msg = b'unknown compression mode %d'
2078 msg = b'unknown compression mode %d'
2079 msg %= comp
2079 msg %= comp
2080 raise error.RevlogError(msg)
2080 raise error.RevlogError(msg)
2081
2081
2082 sidedata = sidedatautil.deserialize_sidedata(segment)
2082 sidedata = sidedatautil.deserialize_sidedata(segment)
2083 return sidedata
2083 return sidedata
2084
2084
2085 def rawdata(self, nodeorrev, _df=None):
2085 def rawdata(self, nodeorrev, _df=None):
2086 """return an uncompressed raw data of a given node or revision number.
2086 """return an uncompressed raw data of a given node or revision number.
2087
2087
2088 _df - an existing file handle to read from. (internal-only)
2088 _df - an existing file handle to read from. (internal-only)
2089 """
2089 """
2090 return self._revisiondata(nodeorrev, _df, raw=True)
2090 return self._revisiondata(nodeorrev, _df, raw=True)
2091
2091
2092 def hash(self, text, p1, p2):
2092 def hash(self, text, p1, p2):
2093 """Compute a node hash.
2093 """Compute a node hash.
2094
2094
2095 Available as a function so that subclasses can replace the hash
2095 Available as a function so that subclasses can replace the hash
2096 as needed.
2096 as needed.
2097 """
2097 """
2098 return storageutil.hashrevisionsha1(text, p1, p2)
2098 return storageutil.hashrevisionsha1(text, p1, p2)
2099
2099
2100 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2100 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2101 """Check node hash integrity.
2101 """Check node hash integrity.
2102
2102
2103 Available as a function so that subclasses can extend hash mismatch
2103 Available as a function so that subclasses can extend hash mismatch
2104 behaviors as needed.
2104 behaviors as needed.
2105 """
2105 """
2106 try:
2106 try:
2107 if p1 is None and p2 is None:
2107 if p1 is None and p2 is None:
2108 p1, p2 = self.parents(node)
2108 p1, p2 = self.parents(node)
2109 if node != self.hash(text, p1, p2):
2109 if node != self.hash(text, p1, p2):
2110 # Clear the revision cache on hash failure. The revision cache
2110 # Clear the revision cache on hash failure. The revision cache
2111 # only stores the raw revision and clearing the cache does have
2111 # only stores the raw revision and clearing the cache does have
2112 # the side-effect that we won't have a cache hit when the raw
2112 # the side-effect that we won't have a cache hit when the raw
2113 # revision data is accessed. But this case should be rare and
2113 # revision data is accessed. But this case should be rare and
2114 # it is extra work to teach the cache about the hash
2114 # it is extra work to teach the cache about the hash
2115 # verification state.
2115 # verification state.
2116 if self._revisioncache and self._revisioncache[0] == node:
2116 if self._revisioncache and self._revisioncache[0] == node:
2117 self._revisioncache = None
2117 self._revisioncache = None
2118
2118
2119 revornode = rev
2119 revornode = rev
2120 if revornode is None:
2120 if revornode is None:
2121 revornode = templatefilters.short(hex(node))
2121 revornode = templatefilters.short(hex(node))
2122 raise error.RevlogError(
2122 raise error.RevlogError(
2123 _(b"integrity check failed on %s:%s")
2123 _(b"integrity check failed on %s:%s")
2124 % (self.display_id, pycompat.bytestr(revornode))
2124 % (self.display_id, pycompat.bytestr(revornode))
2125 )
2125 )
2126 except error.RevlogError:
2126 except error.RevlogError:
2127 if self._censorable and storageutil.iscensoredtext(text):
2127 if self._censorable and storageutil.iscensoredtext(text):
2128 raise error.CensoredNodeError(self.display_id, node, text)
2128 raise error.CensoredNodeError(self.display_id, node, text)
2129 raise
2129 raise
2130
2130
2131 @property
2131 @property
2132 def _split_index_file(self):
2132 def _split_index_file(self):
2133 """the path where to expect the index of an ongoing splitting operation
2133 """the path where to expect the index of an ongoing splitting operation
2134
2134
2135 The file will only exist if a splitting operation is in progress, but
2135 The file will only exist if a splitting operation is in progress, but
2136 it is always expected at the same location."""
2136 it is always expected at the same location."""
2137 parts = self.radix.split(b'/')
2137 parts = self.radix.split(b'/')
2138 if len(parts) > 1:
2138 if len(parts) > 1:
2139 # adds a '-s' prefix to the ``data/` or `meta/` base
2139 # adds a '-s' prefix to the ``data/` or `meta/` base
2140 head = parts[0] + b'-s'
2140 head = parts[0] + b'-s'
2141 mids = parts[1:-1]
2141 mids = parts[1:-1]
2142 tail = parts[-1] + b'.i'
2142 tail = parts[-1] + b'.i'
2143 pieces = [head] + mids + [tail]
2143 pieces = [head] + mids + [tail]
2144 return b'/'.join(pieces)
2144 return b'/'.join(pieces)
2145 else:
2145 else:
2146 # the revlog is stored at the root of the store (changelog or
2146 # the revlog is stored at the root of the store (changelog or
2147 # manifest), no risk of collision.
2147 # manifest), no risk of collision.
2148 return self.radix + b'.i.s'
2148 return self.radix + b'.i.s'
2149
2149
2150 def _enforceinlinesize(self, tr, side_write=True):
2150 def _enforceinlinesize(self, tr, side_write=True):
2151 """Check if the revlog is too big for inline and convert if so.
2151 """Check if the revlog is too big for inline and convert if so.
2152
2152
2153 This should be called after revisions are added to the revlog. If the
2153 This should be called after revisions are added to the revlog. If the
2154 revlog has grown too large to be an inline revlog, it will convert it
2154 revlog has grown too large to be an inline revlog, it will convert it
2155 to use multiple index and data files.
2155 to use multiple index and data files.
2156 """
2156 """
2157 tiprev = len(self) - 1
2157 tiprev = len(self) - 1
2158 total_size = self.start(tiprev) + self.length(tiprev)
2158 total_size = self.start(tiprev) + self.length(tiprev)
2159 if not self._inline or total_size < _maxinline:
2159 if not self._inline or total_size < _maxinline:
2160 return
2160 return
2161
2161
2162 troffset = tr.findoffset(self._indexfile)
2162 troffset = tr.findoffset(self._indexfile)
2163 if troffset is None:
2163 if troffset is None:
2164 raise error.RevlogError(
2164 raise error.RevlogError(
2165 _(b"%s not found in the transaction") % self._indexfile
2165 _(b"%s not found in the transaction") % self._indexfile
2166 )
2166 )
2167 if troffset:
2167 if troffset:
2168 tr.addbackup(self._indexfile, for_offset=True)
2168 tr.addbackup(self._indexfile, for_offset=True)
2169 tr.add(self._datafile, 0)
2169 tr.add(self._datafile, 0)
2170
2170
2171 existing_handles = False
2171 existing_handles = False
2172 if self._writinghandles is not None:
2172 if self._writinghandles is not None:
2173 existing_handles = True
2173 existing_handles = True
2174 fp = self._writinghandles[0]
2174 fp = self._writinghandles[0]
2175 fp.flush()
2175 fp.flush()
2176 fp.close()
2176 fp.close()
2177 # We can't use the cached file handle after close(). So prevent
2177 # We can't use the cached file handle after close(). So prevent
2178 # its usage.
2178 # its usage.
2179 self._writinghandles = None
2179 self._writinghandles = None
2180 self._segmentfile.writing_handle = None
2180 self._segmentfile.writing_handle = None
2181 # No need to deal with sidedata writing handle as it is only
2181 # No need to deal with sidedata writing handle as it is only
2182 # relevant with revlog-v2 which is never inline, not reaching
2182 # relevant with revlog-v2 which is never inline, not reaching
2183 # this code
2183 # this code
2184 if side_write:
2184 if side_write:
2185 old_index_file_path = self._indexfile
2185 old_index_file_path = self._indexfile
2186 new_index_file_path = self._split_index_file
2186 new_index_file_path = self._split_index_file
2187 opener = self.opener
2187 opener = self.opener
2188 weak_self = weakref.ref(self)
2188 weak_self = weakref.ref(self)
2189
2189
2190 # the "split" index replace the real index when the transaction is finalized
2190 # the "split" index replace the real index when the transaction is finalized
2191 def finalize_callback(tr):
2191 def finalize_callback(tr):
2192 opener.rename(
2192 opener.rename(
2193 new_index_file_path,
2193 new_index_file_path,
2194 old_index_file_path,
2194 old_index_file_path,
2195 checkambig=True,
2195 checkambig=True,
2196 )
2196 )
2197 maybe_self = weak_self()
2197 maybe_self = weak_self()
2198 if maybe_self is not None:
2198 if maybe_self is not None:
2199 maybe_self._indexfile = old_index_file_path
2199 maybe_self._indexfile = old_index_file_path
2200
2200
2201 def abort_callback(tr):
2201 def abort_callback(tr):
2202 maybe_self = weak_self()
2202 maybe_self = weak_self()
2203 if maybe_self is not None:
2203 if maybe_self is not None:
2204 maybe_self._indexfile = old_index_file_path
2204 maybe_self._indexfile = old_index_file_path
2205
2205
2206 tr.registertmp(new_index_file_path)
2206 tr.registertmp(new_index_file_path)
2207 if self.target[1] is not None:
2207 if self.target[1] is not None:
2208 callback_id = b'000-revlog-split-%d-%s' % self.target
2208 callback_id = b'000-revlog-split-%d-%s' % self.target
2209 else:
2209 else:
2210 callback_id = b'000-revlog-split-%d' % self.target[0]
2210 callback_id = b'000-revlog-split-%d' % self.target[0]
2211 tr.addfinalize(callback_id, finalize_callback)
2211 tr.addfinalize(callback_id, finalize_callback)
2212 tr.addabort(callback_id, abort_callback)
2212 tr.addabort(callback_id, abort_callback)
2213
2213
2214 new_dfh = self._datafp(b'w+')
2214 new_dfh = self._datafp(b'w+')
2215 new_dfh.truncate(0) # drop any potentially existing data
2215 new_dfh.truncate(0) # drop any potentially existing data
2216 try:
2216 try:
2217 with self._indexfp() as read_ifh:
2217 with self._indexfp() as read_ifh:
2218 for r in self:
2218 for r in self:
2219 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2219 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2220 new_dfh.flush()
2220 new_dfh.flush()
2221
2221
2222 if side_write:
2222 if side_write:
2223 self._indexfile = new_index_file_path
2223 self._indexfile = new_index_file_path
2224 with self.__index_new_fp() as fp:
2224 with self.__index_new_fp() as fp:
2225 self._format_flags &= ~FLAG_INLINE_DATA
2225 self._format_flags &= ~FLAG_INLINE_DATA
2226 self._inline = False
2226 self._inline = False
2227 for i in self:
2227 for i in self:
2228 e = self.index.entry_binary(i)
2228 e = self.index.entry_binary(i)
2229 if i == 0 and self._docket is None:
2229 if i == 0 and self._docket is None:
2230 header = self._format_flags | self._format_version
2230 header = self._format_flags | self._format_version
2231 header = self.index.pack_header(header)
2231 header = self.index.pack_header(header)
2232 e = header + e
2232 e = header + e
2233 fp.write(e)
2233 fp.write(e)
2234 if self._docket is not None:
2234 if self._docket is not None:
2235 self._docket.index_end = fp.tell()
2235 self._docket.index_end = fp.tell()
2236
2236
2237 # If we don't use side-write, the temp file replace the real
2237 # If we don't use side-write, the temp file replace the real
2238 # index when we exit the context manager
2238 # index when we exit the context manager
2239
2239
2240 nodemaputil.setup_persistent_nodemap(tr, self)
2240 nodemaputil.setup_persistent_nodemap(tr, self)
2241 self._segmentfile = randomaccessfile.randomaccessfile(
2241 self._segmentfile = randomaccessfile.randomaccessfile(
2242 self.opener,
2242 self.opener,
2243 self._datafile,
2243 self._datafile,
2244 self._chunkcachesize,
2244 self._chunkcachesize,
2245 )
2245 )
2246
2246
2247 if existing_handles:
2247 if existing_handles:
2248 # switched from inline to conventional reopen the index
2248 # switched from inline to conventional reopen the index
2249 ifh = self.__index_write_fp()
2249 ifh = self.__index_write_fp()
2250 self._writinghandles = (ifh, new_dfh, None)
2250 self._writinghandles = (ifh, new_dfh, None)
2251 self._segmentfile.writing_handle = new_dfh
2251 self._segmentfile.writing_handle = new_dfh
2252 new_dfh = None
2252 new_dfh = None
2253 # No need to deal with sidedata writing handle as it is only
2253 # No need to deal with sidedata writing handle as it is only
2254 # relevant with revlog-v2 which is never inline, not reaching
2254 # relevant with revlog-v2 which is never inline, not reaching
2255 # this code
2255 # this code
2256 finally:
2256 finally:
2257 if new_dfh is not None:
2257 if new_dfh is not None:
2258 new_dfh.close()
2258 new_dfh.close()
2259
2259
2260 def _nodeduplicatecallback(self, transaction, node):
2260 def _nodeduplicatecallback(self, transaction, node):
2261 """called when trying to add a node already stored."""
2261 """called when trying to add a node already stored."""
2262
2262
2263 @contextlib.contextmanager
2263 @contextlib.contextmanager
2264 def reading(self):
2264 def reading(self):
2265 """Context manager that keeps data and sidedata files open for reading"""
2265 """Context manager that keeps data and sidedata files open for reading"""
2266 with self._segmentfile.reading():
2266 with self._segmentfile.reading():
2267 with self._segmentfile_sidedata.reading():
2267 with self._segmentfile_sidedata.reading():
2268 yield
2268 yield
2269
2269
2270 @contextlib.contextmanager
2270 @contextlib.contextmanager
2271 def _writing(self, transaction):
2271 def _writing(self, transaction):
2272 if self._trypending:
2272 if self._trypending:
2273 msg = b'try to write in a `trypending` revlog: %s'
2273 msg = b'try to write in a `trypending` revlog: %s'
2274 msg %= self.display_id
2274 msg %= self.display_id
2275 raise error.ProgrammingError(msg)
2275 raise error.ProgrammingError(msg)
2276 if self._writinghandles is not None:
2276 if self._writinghandles is not None:
2277 yield
2277 yield
2278 else:
2278 else:
2279 ifh = dfh = sdfh = None
2279 ifh = dfh = sdfh = None
2280 try:
2280 try:
2281 r = len(self)
2281 r = len(self)
2282 # opening the data file.
2282 # opening the data file.
2283 dsize = 0
2283 dsize = 0
2284 if r:
2284 if r:
2285 dsize = self.end(r - 1)
2285 dsize = self.end(r - 1)
2286 dfh = None
2286 dfh = None
2287 if not self._inline:
2287 if not self._inline:
2288 try:
2288 try:
2289 dfh = self._datafp(b"r+")
2289 dfh = self._datafp(b"r+")
2290 if self._docket is None:
2290 if self._docket is None:
2291 dfh.seek(0, os.SEEK_END)
2291 dfh.seek(0, os.SEEK_END)
2292 else:
2292 else:
2293 dfh.seek(self._docket.data_end, os.SEEK_SET)
2293 dfh.seek(self._docket.data_end, os.SEEK_SET)
2294 except FileNotFoundError:
2294 except FileNotFoundError:
2295 dfh = self._datafp(b"w+")
2295 dfh = self._datafp(b"w+")
2296 transaction.add(self._datafile, dsize)
2296 transaction.add(self._datafile, dsize)
2297 if self._sidedatafile is not None:
2297 if self._sidedatafile is not None:
2298 # revlog-v2 does not inline, help Pytype
2298 # revlog-v2 does not inline, help Pytype
2299 assert dfh is not None
2299 assert dfh is not None
2300 try:
2300 try:
2301 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2301 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2302 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2302 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2303 except FileNotFoundError:
2303 except FileNotFoundError:
2304 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2304 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2305 transaction.add(
2305 transaction.add(
2306 self._sidedatafile, self._docket.sidedata_end
2306 self._sidedatafile, self._docket.sidedata_end
2307 )
2307 )
2308
2308
2309 # opening the index file.
2309 # opening the index file.
2310 isize = r * self.index.entry_size
2310 isize = r * self.index.entry_size
2311 ifh = self.__index_write_fp()
2311 ifh = self.__index_write_fp()
2312 if self._inline:
2312 if self._inline:
2313 transaction.add(self._indexfile, dsize + isize)
2313 transaction.add(self._indexfile, dsize + isize)
2314 else:
2314 else:
2315 transaction.add(self._indexfile, isize)
2315 transaction.add(self._indexfile, isize)
2316 # exposing all file handle for writing.
2316 # exposing all file handle for writing.
2317 self._writinghandles = (ifh, dfh, sdfh)
2317 self._writinghandles = (ifh, dfh, sdfh)
2318 self._segmentfile.writing_handle = ifh if self._inline else dfh
2318 self._segmentfile.writing_handle = ifh if self._inline else dfh
2319 self._segmentfile_sidedata.writing_handle = sdfh
2319 self._segmentfile_sidedata.writing_handle = sdfh
2320 yield
2320 yield
2321 if self._docket is not None:
2321 if self._docket is not None:
2322 self._write_docket(transaction)
2322 self._write_docket(transaction)
2323 finally:
2323 finally:
2324 self._writinghandles = None
2324 self._writinghandles = None
2325 self._segmentfile.writing_handle = None
2325 self._segmentfile.writing_handle = None
2326 self._segmentfile_sidedata.writing_handle = None
2326 self._segmentfile_sidedata.writing_handle = None
2327 if dfh is not None:
2327 if dfh is not None:
2328 dfh.close()
2328 dfh.close()
2329 if sdfh is not None:
2329 if sdfh is not None:
2330 sdfh.close()
2330 sdfh.close()
2331 # closing the index file last to avoid exposing referent to
2331 # closing the index file last to avoid exposing referent to
2332 # potential unflushed data content.
2332 # potential unflushed data content.
2333 if ifh is not None:
2333 if ifh is not None:
2334 ifh.close()
2334 ifh.close()
2335
2335
2336 def _write_docket(self, transaction):
2336 def _write_docket(self, transaction):
2337 """write the current docket on disk
2337 """write the current docket on disk
2338
2338
2339 Exist as a method to help changelog to implement transaction logic
2339 Exist as a method to help changelog to implement transaction logic
2340
2340
2341 We could also imagine using the same transaction logic for all revlog
2341 We could also imagine using the same transaction logic for all revlog
2342 since docket are cheap."""
2342 since docket are cheap."""
2343 self._docket.write(transaction)
2343 self._docket.write(transaction)
2344
2344
2345 def addrevision(
2345 def addrevision(
2346 self,
2346 self,
2347 text,
2347 text,
2348 transaction,
2348 transaction,
2349 link,
2349 link,
2350 p1,
2350 p1,
2351 p2,
2351 p2,
2352 cachedelta=None,
2352 cachedelta=None,
2353 node=None,
2353 node=None,
2354 flags=REVIDX_DEFAULT_FLAGS,
2354 flags=REVIDX_DEFAULT_FLAGS,
2355 deltacomputer=None,
2355 deltacomputer=None,
2356 sidedata=None,
2356 sidedata=None,
2357 ):
2357 ):
2358 """add a revision to the log
2358 """add a revision to the log
2359
2359
2360 text - the revision data to add
2360 text - the revision data to add
2361 transaction - the transaction object used for rollback
2361 transaction - the transaction object used for rollback
2362 link - the linkrev data to add
2362 link - the linkrev data to add
2363 p1, p2 - the parent nodeids of the revision
2363 p1, p2 - the parent nodeids of the revision
2364 cachedelta - an optional precomputed delta
2364 cachedelta - an optional precomputed delta
2365 node - nodeid of revision; typically node is not specified, and it is
2365 node - nodeid of revision; typically node is not specified, and it is
2366 computed by default as hash(text, p1, p2), however subclasses might
2366 computed by default as hash(text, p1, p2), however subclasses might
2367 use different hashing method (and override checkhash() in such case)
2367 use different hashing method (and override checkhash() in such case)
2368 flags - the known flags to set on the revision
2368 flags - the known flags to set on the revision
2369 deltacomputer - an optional deltacomputer instance shared between
2369 deltacomputer - an optional deltacomputer instance shared between
2370 multiple calls
2370 multiple calls
2371 """
2371 """
2372 if link == nullrev:
2372 if link == nullrev:
2373 raise error.RevlogError(
2373 raise error.RevlogError(
2374 _(b"attempted to add linkrev -1 to %s") % self.display_id
2374 _(b"attempted to add linkrev -1 to %s") % self.display_id
2375 )
2375 )
2376
2376
2377 if sidedata is None:
2377 if sidedata is None:
2378 sidedata = {}
2378 sidedata = {}
2379 elif sidedata and not self.hassidedata:
2379 elif sidedata and not self.hassidedata:
2380 raise error.ProgrammingError(
2380 raise error.ProgrammingError(
2381 _(b"trying to add sidedata to a revlog who don't support them")
2381 _(b"trying to add sidedata to a revlog who don't support them")
2382 )
2382 )
2383
2383
2384 if flags:
2384 if flags:
2385 node = node or self.hash(text, p1, p2)
2385 node = node or self.hash(text, p1, p2)
2386
2386
2387 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2387 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2388
2388
2389 # If the flag processor modifies the revision data, ignore any provided
2389 # If the flag processor modifies the revision data, ignore any provided
2390 # cachedelta.
2390 # cachedelta.
2391 if rawtext != text:
2391 if rawtext != text:
2392 cachedelta = None
2392 cachedelta = None
2393
2393
2394 if len(rawtext) > _maxentrysize:
2394 if len(rawtext) > _maxentrysize:
2395 raise error.RevlogError(
2395 raise error.RevlogError(
2396 _(
2396 _(
2397 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2397 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2398 )
2398 )
2399 % (self.display_id, len(rawtext))
2399 % (self.display_id, len(rawtext))
2400 )
2400 )
2401
2401
2402 node = node or self.hash(rawtext, p1, p2)
2402 node = node or self.hash(rawtext, p1, p2)
2403 rev = self.index.get_rev(node)
2403 rev = self.index.get_rev(node)
2404 if rev is not None:
2404 if rev is not None:
2405 return rev
2405 return rev
2406
2406
2407 if validatehash:
2407 if validatehash:
2408 self.checkhash(rawtext, node, p1=p1, p2=p2)
2408 self.checkhash(rawtext, node, p1=p1, p2=p2)
2409
2409
2410 return self.addrawrevision(
2410 return self.addrawrevision(
2411 rawtext,
2411 rawtext,
2412 transaction,
2412 transaction,
2413 link,
2413 link,
2414 p1,
2414 p1,
2415 p2,
2415 p2,
2416 node,
2416 node,
2417 flags,
2417 flags,
2418 cachedelta=cachedelta,
2418 cachedelta=cachedelta,
2419 deltacomputer=deltacomputer,
2419 deltacomputer=deltacomputer,
2420 sidedata=sidedata,
2420 sidedata=sidedata,
2421 )
2421 )
2422
2422
2423 def addrawrevision(
2423 def addrawrevision(
2424 self,
2424 self,
2425 rawtext,
2425 rawtext,
2426 transaction,
2426 transaction,
2427 link,
2427 link,
2428 p1,
2428 p1,
2429 p2,
2429 p2,
2430 node,
2430 node,
2431 flags,
2431 flags,
2432 cachedelta=None,
2432 cachedelta=None,
2433 deltacomputer=None,
2433 deltacomputer=None,
2434 sidedata=None,
2434 sidedata=None,
2435 ):
2435 ):
2436 """add a raw revision with known flags, node and parents
2436 """add a raw revision with known flags, node and parents
2437 useful when reusing a revision not stored in this revlog (ex: received
2437 useful when reusing a revision not stored in this revlog (ex: received
2438 over wire, or read from an external bundle).
2438 over wire, or read from an external bundle).
2439 """
2439 """
2440 with self._writing(transaction):
2440 with self._writing(transaction):
2441 return self._addrevision(
2441 return self._addrevision(
2442 node,
2442 node,
2443 rawtext,
2443 rawtext,
2444 transaction,
2444 transaction,
2445 link,
2445 link,
2446 p1,
2446 p1,
2447 p2,
2447 p2,
2448 flags,
2448 flags,
2449 cachedelta,
2449 cachedelta,
2450 deltacomputer=deltacomputer,
2450 deltacomputer=deltacomputer,
2451 sidedata=sidedata,
2451 sidedata=sidedata,
2452 )
2452 )
2453
2453
2454 def compress(self, data):
2454 def compress(self, data):
2455 """Generate a possibly-compressed representation of data."""
2455 """Generate a possibly-compressed representation of data."""
2456 if not data:
2456 if not data:
2457 return b'', data
2457 return b'', data
2458
2458
2459 compressed = self._compressor.compress(data)
2459 compressed = self._compressor.compress(data)
2460
2460
2461 if compressed:
2461 if compressed:
2462 # The revlog compressor added the header in the returned data.
2462 # The revlog compressor added the header in the returned data.
2463 return b'', compressed
2463 return b'', compressed
2464
2464
2465 if data[0:1] == b'\0':
2465 if data[0:1] == b'\0':
2466 return b'', data
2466 return b'', data
2467 return b'u', data
2467 return b'u', data
2468
2468
2469 def decompress(self, data):
2469 def decompress(self, data):
2470 """Decompress a revlog chunk.
2470 """Decompress a revlog chunk.
2471
2471
2472 The chunk is expected to begin with a header identifying the
2472 The chunk is expected to begin with a header identifying the
2473 format type so it can be routed to an appropriate decompressor.
2473 format type so it can be routed to an appropriate decompressor.
2474 """
2474 """
2475 if not data:
2475 if not data:
2476 return data
2476 return data
2477
2477
2478 # Revlogs are read much more frequently than they are written and many
2478 # Revlogs are read much more frequently than they are written and many
2479 # chunks only take microseconds to decompress, so performance is
2479 # chunks only take microseconds to decompress, so performance is
2480 # important here.
2480 # important here.
2481 #
2481 #
2482 # We can make a few assumptions about revlogs:
2482 # We can make a few assumptions about revlogs:
2483 #
2483 #
2484 # 1) the majority of chunks will be compressed (as opposed to inline
2484 # 1) the majority of chunks will be compressed (as opposed to inline
2485 # raw data).
2485 # raw data).
2486 # 2) decompressing *any* data will likely by at least 10x slower than
2486 # 2) decompressing *any* data will likely by at least 10x slower than
2487 # returning raw inline data.
2487 # returning raw inline data.
2488 # 3) we want to prioritize common and officially supported compression
2488 # 3) we want to prioritize common and officially supported compression
2489 # engines
2489 # engines
2490 #
2490 #
2491 # It follows that we want to optimize for "decompress compressed data
2491 # It follows that we want to optimize for "decompress compressed data
2492 # when encoded with common and officially supported compression engines"
2492 # when encoded with common and officially supported compression engines"
2493 # case over "raw data" and "data encoded by less common or non-official
2493 # case over "raw data" and "data encoded by less common or non-official
2494 # compression engines." That is why we have the inline lookup first
2494 # compression engines." That is why we have the inline lookup first
2495 # followed by the compengines lookup.
2495 # followed by the compengines lookup.
2496 #
2496 #
2497 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2497 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2498 # compressed chunks. And this matters for changelog and manifest reads.
2498 # compressed chunks. And this matters for changelog and manifest reads.
2499 t = data[0:1]
2499 t = data[0:1]
2500
2500
2501 if t == b'x':
2501 if t == b'x':
2502 try:
2502 try:
2503 return _zlibdecompress(data)
2503 return _zlibdecompress(data)
2504 except zlib.error as e:
2504 except zlib.error as e:
2505 raise error.RevlogError(
2505 raise error.RevlogError(
2506 _(b'revlog decompress error: %s')
2506 _(b'revlog decompress error: %s')
2507 % stringutil.forcebytestr(e)
2507 % stringutil.forcebytestr(e)
2508 )
2508 )
2509 # '\0' is more common than 'u' so it goes first.
2509 # '\0' is more common than 'u' so it goes first.
2510 elif t == b'\0':
2510 elif t == b'\0':
2511 return data
2511 return data
2512 elif t == b'u':
2512 elif t == b'u':
2513 return util.buffer(data, 1)
2513 return util.buffer(data, 1)
2514
2514
2515 compressor = self._get_decompressor(t)
2515 compressor = self._get_decompressor(t)
2516
2516
2517 return compressor.decompress(data)
2517 return compressor.decompress(data)
2518
2518
2519 def _addrevision(
2519 def _addrevision(
2520 self,
2520 self,
2521 node,
2521 node,
2522 rawtext,
2522 rawtext,
2523 transaction,
2523 transaction,
2524 link,
2524 link,
2525 p1,
2525 p1,
2526 p2,
2526 p2,
2527 flags,
2527 flags,
2528 cachedelta,
2528 cachedelta,
2529 alwayscache=False,
2529 alwayscache=False,
2530 deltacomputer=None,
2530 deltacomputer=None,
2531 sidedata=None,
2531 sidedata=None,
2532 ):
2532 ):
2533 """internal function to add revisions to the log
2533 """internal function to add revisions to the log
2534
2534
2535 see addrevision for argument descriptions.
2535 see addrevision for argument descriptions.
2536
2536
2537 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2537 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2538
2538
2539 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2539 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2540 be used.
2540 be used.
2541
2541
2542 invariants:
2542 invariants:
2543 - rawtext is optional (can be None); if not set, cachedelta must be set.
2543 - rawtext is optional (can be None); if not set, cachedelta must be set.
2544 if both are set, they must correspond to each other.
2544 if both are set, they must correspond to each other.
2545 """
2545 """
2546 if node == self.nullid:
2546 if node == self.nullid:
2547 raise error.RevlogError(
2547 raise error.RevlogError(
2548 _(b"%s: attempt to add null revision") % self.display_id
2548 _(b"%s: attempt to add null revision") % self.display_id
2549 )
2549 )
2550 if (
2550 if (
2551 node == self.nodeconstants.wdirid
2551 node == self.nodeconstants.wdirid
2552 or node in self.nodeconstants.wdirfilenodeids
2552 or node in self.nodeconstants.wdirfilenodeids
2553 ):
2553 ):
2554 raise error.RevlogError(
2554 raise error.RevlogError(
2555 _(b"%s: attempt to add wdir revision") % self.display_id
2555 _(b"%s: attempt to add wdir revision") % self.display_id
2556 )
2556 )
2557 if self._writinghandles is None:
2557 if self._writinghandles is None:
2558 msg = b'adding revision outside `revlog._writing` context'
2558 msg = b'adding revision outside `revlog._writing` context'
2559 raise error.ProgrammingError(msg)
2559 raise error.ProgrammingError(msg)
2560
2560
2561 if self._inline:
2561 if self._inline:
2562 fh = self._writinghandles[0]
2562 fh = self._writinghandles[0]
2563 else:
2563 else:
2564 fh = self._writinghandles[1]
2564 fh = self._writinghandles[1]
2565
2565
2566 btext = [rawtext]
2566 btext = [rawtext]
2567
2567
2568 curr = len(self)
2568 curr = len(self)
2569 prev = curr - 1
2569 prev = curr - 1
2570
2570
2571 offset = self._get_data_offset(prev)
2571 offset = self._get_data_offset(prev)
2572
2572
2573 if self._concurrencychecker:
2573 if self._concurrencychecker:
2574 ifh, dfh, sdfh = self._writinghandles
2574 ifh, dfh, sdfh = self._writinghandles
2575 # XXX no checking for the sidedata file
2575 # XXX no checking for the sidedata file
2576 if self._inline:
2576 if self._inline:
2577 # offset is "as if" it were in the .d file, so we need to add on
2577 # offset is "as if" it were in the .d file, so we need to add on
2578 # the size of the entry metadata.
2578 # the size of the entry metadata.
2579 self._concurrencychecker(
2579 self._concurrencychecker(
2580 ifh, self._indexfile, offset + curr * self.index.entry_size
2580 ifh, self._indexfile, offset + curr * self.index.entry_size
2581 )
2581 )
2582 else:
2582 else:
2583 # Entries in the .i are a consistent size.
2583 # Entries in the .i are a consistent size.
2584 self._concurrencychecker(
2584 self._concurrencychecker(
2585 ifh, self._indexfile, curr * self.index.entry_size
2585 ifh, self._indexfile, curr * self.index.entry_size
2586 )
2586 )
2587 self._concurrencychecker(dfh, self._datafile, offset)
2587 self._concurrencychecker(dfh, self._datafile, offset)
2588
2588
2589 p1r, p2r = self.rev(p1), self.rev(p2)
2589 p1r, p2r = self.rev(p1), self.rev(p2)
2590
2590
2591 # full versions are inserted when the needed deltas
2591 # full versions are inserted when the needed deltas
2592 # become comparable to the uncompressed text
2592 # become comparable to the uncompressed text
2593 if rawtext is None:
2593 if rawtext is None:
2594 # need rawtext size, before changed by flag processors, which is
2594 # need rawtext size, before changed by flag processors, which is
2595 # the non-raw size. use revlog explicitly to avoid filelog's extra
2595 # the non-raw size. use revlog explicitly to avoid filelog's extra
2596 # logic that might remove metadata size.
2596 # logic that might remove metadata size.
2597 textlen = mdiff.patchedsize(
2597 textlen = mdiff.patchedsize(
2598 revlog.size(self, cachedelta[0]), cachedelta[1]
2598 revlog.size(self, cachedelta[0]), cachedelta[1]
2599 )
2599 )
2600 else:
2600 else:
2601 textlen = len(rawtext)
2601 textlen = len(rawtext)
2602
2602
2603 if deltacomputer is None:
2603 if deltacomputer is None:
2604 write_debug = None
2604 write_debug = None
2605 if self._debug_delta:
2605 if self._debug_delta:
2606 write_debug = transaction._report
2606 write_debug = transaction._report
2607 deltacomputer = deltautil.deltacomputer(
2607 deltacomputer = deltautil.deltacomputer(
2608 self, write_debug=write_debug
2608 self, write_debug=write_debug
2609 )
2609 )
2610
2610
2611 if cachedelta is not None and len(cachedelta) == 2:
2611 if cachedelta is not None and len(cachedelta) == 2:
2612 # If the cached delta has no information about how it should be
2612 # If the cached delta has no information about how it should be
2613 # reused, add the default reuse instruction according to the
2613 # reused, add the default reuse instruction according to the
2614 # revlog's configuration.
2614 # revlog's configuration.
2615 if self._generaldelta and self._lazydeltabase:
2615 if self._generaldelta and self._lazydeltabase:
2616 delta_base_reuse = DELTA_BASE_REUSE_TRY
2616 delta_base_reuse = DELTA_BASE_REUSE_TRY
2617 else:
2617 else:
2618 delta_base_reuse = DELTA_BASE_REUSE_NO
2618 delta_base_reuse = DELTA_BASE_REUSE_NO
2619 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2619 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2620
2620
2621 revinfo = revlogutils.revisioninfo(
2621 revinfo = revlogutils.revisioninfo(
2622 node,
2622 node,
2623 p1,
2623 p1,
2624 p2,
2624 p2,
2625 btext,
2625 btext,
2626 textlen,
2626 textlen,
2627 cachedelta,
2627 cachedelta,
2628 flags,
2628 flags,
2629 )
2629 )
2630
2630
2631 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2631 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2632
2632
2633 compression_mode = COMP_MODE_INLINE
2633 compression_mode = COMP_MODE_INLINE
2634 if self._docket is not None:
2634 if self._docket is not None:
2635 default_comp = self._docket.default_compression_header
2635 default_comp = self._docket.default_compression_header
2636 r = deltautil.delta_compression(default_comp, deltainfo)
2636 r = deltautil.delta_compression(default_comp, deltainfo)
2637 compression_mode, deltainfo = r
2637 compression_mode, deltainfo = r
2638
2638
2639 sidedata_compression_mode = COMP_MODE_INLINE
2639 sidedata_compression_mode = COMP_MODE_INLINE
2640 if sidedata and self.hassidedata:
2640 if sidedata and self.hassidedata:
2641 sidedata_compression_mode = COMP_MODE_PLAIN
2641 sidedata_compression_mode = COMP_MODE_PLAIN
2642 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2642 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2643 sidedata_offset = self._docket.sidedata_end
2643 sidedata_offset = self._docket.sidedata_end
2644 h, comp_sidedata = self.compress(serialized_sidedata)
2644 h, comp_sidedata = self.compress(serialized_sidedata)
2645 if (
2645 if (
2646 h != b'u'
2646 h != b'u'
2647 and comp_sidedata[0:1] != b'\0'
2647 and comp_sidedata[0:1] != b'\0'
2648 and len(comp_sidedata) < len(serialized_sidedata)
2648 and len(comp_sidedata) < len(serialized_sidedata)
2649 ):
2649 ):
2650 assert not h
2650 assert not h
2651 if (
2651 if (
2652 comp_sidedata[0:1]
2652 comp_sidedata[0:1]
2653 == self._docket.default_compression_header
2653 == self._docket.default_compression_header
2654 ):
2654 ):
2655 sidedata_compression_mode = COMP_MODE_DEFAULT
2655 sidedata_compression_mode = COMP_MODE_DEFAULT
2656 serialized_sidedata = comp_sidedata
2656 serialized_sidedata = comp_sidedata
2657 else:
2657 else:
2658 sidedata_compression_mode = COMP_MODE_INLINE
2658 sidedata_compression_mode = COMP_MODE_INLINE
2659 serialized_sidedata = comp_sidedata
2659 serialized_sidedata = comp_sidedata
2660 else:
2660 else:
2661 serialized_sidedata = b""
2661 serialized_sidedata = b""
2662 # Don't store the offset if the sidedata is empty, that way
2662 # Don't store the offset if the sidedata is empty, that way
2663 # we can easily detect empty sidedata and they will be no different
2663 # we can easily detect empty sidedata and they will be no different
2664 # than ones we manually add.
2664 # than ones we manually add.
2665 sidedata_offset = 0
2665 sidedata_offset = 0
2666
2666
2667 rank = RANK_UNKNOWN
2667 rank = RANK_UNKNOWN
2668 if self._compute_rank:
2668 if self._compute_rank:
2669 if (p1r, p2r) == (nullrev, nullrev):
2669 if (p1r, p2r) == (nullrev, nullrev):
2670 rank = 1
2670 rank = 1
2671 elif p1r != nullrev and p2r == nullrev:
2671 elif p1r != nullrev and p2r == nullrev:
2672 rank = 1 + self.fast_rank(p1r)
2672 rank = 1 + self.fast_rank(p1r)
2673 elif p1r == nullrev and p2r != nullrev:
2673 elif p1r == nullrev and p2r != nullrev:
2674 rank = 1 + self.fast_rank(p2r)
2674 rank = 1 + self.fast_rank(p2r)
2675 else: # merge node
2675 else: # merge node
2676 if rustdagop is not None and self.index.rust_ext_compat:
2676 if rustdagop is not None and self.index.rust_ext_compat:
2677 rank = rustdagop.rank(self.index, p1r, p2r)
2677 rank = rustdagop.rank(self.index, p1r, p2r)
2678 else:
2678 else:
2679 pmin, pmax = sorted((p1r, p2r))
2679 pmin, pmax = sorted((p1r, p2r))
2680 rank = 1 + self.fast_rank(pmax)
2680 rank = 1 + self.fast_rank(pmax)
2681 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2681 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2682
2682
2683 e = revlogutils.entry(
2683 e = revlogutils.entry(
2684 flags=flags,
2684 flags=flags,
2685 data_offset=offset,
2685 data_offset=offset,
2686 data_compressed_length=deltainfo.deltalen,
2686 data_compressed_length=deltainfo.deltalen,
2687 data_uncompressed_length=textlen,
2687 data_uncompressed_length=textlen,
2688 data_compression_mode=compression_mode,
2688 data_compression_mode=compression_mode,
2689 data_delta_base=deltainfo.base,
2689 data_delta_base=deltainfo.base,
2690 link_rev=link,
2690 link_rev=link,
2691 parent_rev_1=p1r,
2691 parent_rev_1=p1r,
2692 parent_rev_2=p2r,
2692 parent_rev_2=p2r,
2693 node_id=node,
2693 node_id=node,
2694 sidedata_offset=sidedata_offset,
2694 sidedata_offset=sidedata_offset,
2695 sidedata_compressed_length=len(serialized_sidedata),
2695 sidedata_compressed_length=len(serialized_sidedata),
2696 sidedata_compression_mode=sidedata_compression_mode,
2696 sidedata_compression_mode=sidedata_compression_mode,
2697 rank=rank,
2697 rank=rank,
2698 )
2698 )
2699
2699
2700 self.index.append(e)
2700 self.index.append(e)
2701 entry = self.index.entry_binary(curr)
2701 entry = self.index.entry_binary(curr)
2702 if curr == 0 and self._docket is None:
2702 if curr == 0 and self._docket is None:
2703 header = self._format_flags | self._format_version
2703 header = self._format_flags | self._format_version
2704 header = self.index.pack_header(header)
2704 header = self.index.pack_header(header)
2705 entry = header + entry
2705 entry = header + entry
2706 self._writeentry(
2706 self._writeentry(
2707 transaction,
2707 transaction,
2708 entry,
2708 entry,
2709 deltainfo.data,
2709 deltainfo.data,
2710 link,
2710 link,
2711 offset,
2711 offset,
2712 serialized_sidedata,
2712 serialized_sidedata,
2713 sidedata_offset,
2713 sidedata_offset,
2714 )
2714 )
2715
2715
2716 rawtext = btext[0]
2716 rawtext = btext[0]
2717
2717
2718 if alwayscache and rawtext is None:
2718 if alwayscache and rawtext is None:
2719 rawtext = deltacomputer.buildtext(revinfo, fh)
2719 rawtext = deltacomputer.buildtext(revinfo, fh)
2720
2720
2721 if type(rawtext) == bytes: # only accept immutable objects
2721 if type(rawtext) == bytes: # only accept immutable objects
2722 self._revisioncache = (node, curr, rawtext)
2722 self._revisioncache = (node, curr, rawtext)
2723 self._chainbasecache[curr] = deltainfo.chainbase
2723 self._chainbasecache[curr] = deltainfo.chainbase
2724 return curr
2724 return curr
2725
2725
2726 def _get_data_offset(self, prev):
2726 def _get_data_offset(self, prev):
2727 """Returns the current offset in the (in-transaction) data file.
2727 """Returns the current offset in the (in-transaction) data file.
2728 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2728 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2729 file to store that information: since sidedata can be rewritten to the
2729 file to store that information: since sidedata can be rewritten to the
2730 end of the data file within a transaction, you can have cases where, for
2730 end of the data file within a transaction, you can have cases where, for
2731 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2731 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2732 to `n - 1`'s sidedata being written after `n`'s data.
2732 to `n - 1`'s sidedata being written after `n`'s data.
2733
2733
2734 TODO cache this in a docket file before getting out of experimental."""
2734 TODO cache this in a docket file before getting out of experimental."""
2735 if self._docket is None:
2735 if self._docket is None:
2736 return self.end(prev)
2736 return self.end(prev)
2737 else:
2737 else:
2738 return self._docket.data_end
2738 return self._docket.data_end
2739
2739
2740 def _writeentry(
2740 def _writeentry(
2741 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2741 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2742 ):
2742 ):
2743 # Files opened in a+ mode have inconsistent behavior on various
2743 # Files opened in a+ mode have inconsistent behavior on various
2744 # platforms. Windows requires that a file positioning call be made
2744 # platforms. Windows requires that a file positioning call be made
2745 # when the file handle transitions between reads and writes. See
2745 # when the file handle transitions between reads and writes. See
2746 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2746 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2747 # platforms, Python or the platform itself can be buggy. Some versions
2747 # platforms, Python or the platform itself can be buggy. Some versions
2748 # of Solaris have been observed to not append at the end of the file
2748 # of Solaris have been observed to not append at the end of the file
2749 # if the file was seeked to before the end. See issue4943 for more.
2749 # if the file was seeked to before the end. See issue4943 for more.
2750 #
2750 #
2751 # We work around this issue by inserting a seek() before writing.
2751 # We work around this issue by inserting a seek() before writing.
2752 # Note: This is likely not necessary on Python 3. However, because
2752 # Note: This is likely not necessary on Python 3. However, because
2753 # the file handle is reused for reads and may be seeked there, we need
2753 # the file handle is reused for reads and may be seeked there, we need
2754 # to be careful before changing this.
2754 # to be careful before changing this.
2755 if self._writinghandles is None:
2755 if self._writinghandles is None:
2756 msg = b'adding revision outside `revlog._writing` context'
2756 msg = b'adding revision outside `revlog._writing` context'
2757 raise error.ProgrammingError(msg)
2757 raise error.ProgrammingError(msg)
2758 ifh, dfh, sdfh = self._writinghandles
2758 ifh, dfh, sdfh = self._writinghandles
2759 if self._docket is None:
2759 if self._docket is None:
2760 ifh.seek(0, os.SEEK_END)
2760 ifh.seek(0, os.SEEK_END)
2761 else:
2761 else:
2762 ifh.seek(self._docket.index_end, os.SEEK_SET)
2762 ifh.seek(self._docket.index_end, os.SEEK_SET)
2763 if dfh:
2763 if dfh:
2764 if self._docket is None:
2764 if self._docket is None:
2765 dfh.seek(0, os.SEEK_END)
2765 dfh.seek(0, os.SEEK_END)
2766 else:
2766 else:
2767 dfh.seek(self._docket.data_end, os.SEEK_SET)
2767 dfh.seek(self._docket.data_end, os.SEEK_SET)
2768 if sdfh:
2768 if sdfh:
2769 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2769 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2770
2770
2771 curr = len(self) - 1
2771 curr = len(self) - 1
2772 if not self._inline:
2772 if not self._inline:
2773 transaction.add(self._datafile, offset)
2773 transaction.add(self._datafile, offset)
2774 if self._sidedatafile:
2774 if self._sidedatafile:
2775 transaction.add(self._sidedatafile, sidedata_offset)
2775 transaction.add(self._sidedatafile, sidedata_offset)
2776 transaction.add(self._indexfile, curr * len(entry))
2776 transaction.add(self._indexfile, curr * len(entry))
2777 if data[0]:
2777 if data[0]:
2778 dfh.write(data[0])
2778 dfh.write(data[0])
2779 dfh.write(data[1])
2779 dfh.write(data[1])
2780 if sidedata:
2780 if sidedata:
2781 sdfh.write(sidedata)
2781 sdfh.write(sidedata)
2782 ifh.write(entry)
2782 ifh.write(entry)
2783 else:
2783 else:
2784 offset += curr * self.index.entry_size
2784 offset += curr * self.index.entry_size
2785 transaction.add(self._indexfile, offset)
2785 transaction.add(self._indexfile, offset)
2786 ifh.write(entry)
2786 ifh.write(entry)
2787 ifh.write(data[0])
2787 ifh.write(data[0])
2788 ifh.write(data[1])
2788 ifh.write(data[1])
2789 assert not sidedata
2789 assert not sidedata
2790 self._enforceinlinesize(transaction)
2790 self._enforceinlinesize(transaction)
2791 if self._docket is not None:
2791 if self._docket is not None:
2792 # revlog-v2 always has 3 writing handles, help Pytype
2792 # revlog-v2 always has 3 writing handles, help Pytype
2793 wh1 = self._writinghandles[0]
2793 wh1 = self._writinghandles[0]
2794 wh2 = self._writinghandles[1]
2794 wh2 = self._writinghandles[1]
2795 wh3 = self._writinghandles[2]
2795 wh3 = self._writinghandles[2]
2796 assert wh1 is not None
2796 assert wh1 is not None
2797 assert wh2 is not None
2797 assert wh2 is not None
2798 assert wh3 is not None
2798 assert wh3 is not None
2799 self._docket.index_end = wh1.tell()
2799 self._docket.index_end = wh1.tell()
2800 self._docket.data_end = wh2.tell()
2800 self._docket.data_end = wh2.tell()
2801 self._docket.sidedata_end = wh3.tell()
2801 self._docket.sidedata_end = wh3.tell()
2802
2802
2803 nodemaputil.setup_persistent_nodemap(transaction, self)
2803 nodemaputil.setup_persistent_nodemap(transaction, self)
2804
2804
2805 def addgroup(
2805 def addgroup(
2806 self,
2806 self,
2807 deltas,
2807 deltas,
2808 linkmapper,
2808 linkmapper,
2809 transaction,
2809 transaction,
2810 alwayscache=False,
2810 alwayscache=False,
2811 addrevisioncb=None,
2811 addrevisioncb=None,
2812 duplicaterevisioncb=None,
2812 duplicaterevisioncb=None,
2813 debug_info=None,
2813 debug_info=None,
2814 delta_base_reuse_policy=None,
2814 delta_base_reuse_policy=None,
2815 ):
2815 ):
2816 """
2816 """
2817 add a delta group
2817 add a delta group
2818
2818
2819 given a set of deltas, add them to the revision log. the
2819 given a set of deltas, add them to the revision log. the
2820 first delta is against its parent, which should be in our
2820 first delta is against its parent, which should be in our
2821 log, the rest are against the previous delta.
2821 log, the rest are against the previous delta.
2822
2822
2823 If ``addrevisioncb`` is defined, it will be called with arguments of
2823 If ``addrevisioncb`` is defined, it will be called with arguments of
2824 this revlog and the node that was added.
2824 this revlog and the node that was added.
2825 """
2825 """
2826
2826
2827 if self._adding_group:
2827 if self._adding_group:
2828 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2828 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2829
2829
2830 # read the default delta-base reuse policy from revlog config if the
2830 # read the default delta-base reuse policy from revlog config if the
2831 # group did not specify one.
2831 # group did not specify one.
2832 if delta_base_reuse_policy is None:
2832 if delta_base_reuse_policy is None:
2833 if self._generaldelta and self._lazydeltabase:
2833 if self._generaldelta and self._lazydeltabase:
2834 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2834 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2835 else:
2835 else:
2836 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2836 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2837
2837
2838 self._adding_group = True
2838 self._adding_group = True
2839 empty = True
2839 empty = True
2840 try:
2840 try:
2841 with self._writing(transaction):
2841 with self._writing(transaction):
2842 write_debug = None
2842 write_debug = None
2843 if self._debug_delta:
2843 if self._debug_delta:
2844 write_debug = transaction._report
2844 write_debug = transaction._report
2845 deltacomputer = deltautil.deltacomputer(
2845 deltacomputer = deltautil.deltacomputer(
2846 self,
2846 self,
2847 write_debug=write_debug,
2847 write_debug=write_debug,
2848 debug_info=debug_info,
2848 debug_info=debug_info,
2849 )
2849 )
2850 # loop through our set of deltas
2850 # loop through our set of deltas
2851 for data in deltas:
2851 for data in deltas:
2852 (
2852 (
2853 node,
2853 node,
2854 p1,
2854 p1,
2855 p2,
2855 p2,
2856 linknode,
2856 linknode,
2857 deltabase,
2857 deltabase,
2858 delta,
2858 delta,
2859 flags,
2859 flags,
2860 sidedata,
2860 sidedata,
2861 ) = data
2861 ) = data
2862 link = linkmapper(linknode)
2862 link = linkmapper(linknode)
2863 flags = flags or REVIDX_DEFAULT_FLAGS
2863 flags = flags or REVIDX_DEFAULT_FLAGS
2864
2864
2865 rev = self.index.get_rev(node)
2865 rev = self.index.get_rev(node)
2866 if rev is not None:
2866 if rev is not None:
2867 # this can happen if two branches make the same change
2867 # this can happen if two branches make the same change
2868 self._nodeduplicatecallback(transaction, rev)
2868 self._nodeduplicatecallback(transaction, rev)
2869 if duplicaterevisioncb:
2869 if duplicaterevisioncb:
2870 duplicaterevisioncb(self, rev)
2870 duplicaterevisioncb(self, rev)
2871 empty = False
2871 empty = False
2872 continue
2872 continue
2873
2873
2874 for p in (p1, p2):
2874 for p in (p1, p2):
2875 if not self.index.has_node(p):
2875 if not self.index.has_node(p):
2876 raise error.LookupError(
2876 raise error.LookupError(
2877 p, self.radix, _(b'unknown parent')
2877 p, self.radix, _(b'unknown parent')
2878 )
2878 )
2879
2879
2880 if not self.index.has_node(deltabase):
2880 if not self.index.has_node(deltabase):
2881 raise error.LookupError(
2881 raise error.LookupError(
2882 deltabase, self.display_id, _(b'unknown delta base')
2882 deltabase, self.display_id, _(b'unknown delta base')
2883 )
2883 )
2884
2884
2885 baserev = self.rev(deltabase)
2885 baserev = self.rev(deltabase)
2886
2886
2887 if baserev != nullrev and self.iscensored(baserev):
2887 if baserev != nullrev and self.iscensored(baserev):
2888 # if base is censored, delta must be full replacement in a
2888 # if base is censored, delta must be full replacement in a
2889 # single patch operation
2889 # single patch operation
2890 hlen = struct.calcsize(b">lll")
2890 hlen = struct.calcsize(b">lll")
2891 oldlen = self.rawsize(baserev)
2891 oldlen = self.rawsize(baserev)
2892 newlen = len(delta) - hlen
2892 newlen = len(delta) - hlen
2893 if delta[:hlen] != mdiff.replacediffheader(
2893 if delta[:hlen] != mdiff.replacediffheader(
2894 oldlen, newlen
2894 oldlen, newlen
2895 ):
2895 ):
2896 raise error.CensoredBaseError(
2896 raise error.CensoredBaseError(
2897 self.display_id, self.node(baserev)
2897 self.display_id, self.node(baserev)
2898 )
2898 )
2899
2899
2900 if not flags and self._peek_iscensored(baserev, delta):
2900 if not flags and self._peek_iscensored(baserev, delta):
2901 flags |= REVIDX_ISCENSORED
2901 flags |= REVIDX_ISCENSORED
2902
2902
2903 # We assume consumers of addrevisioncb will want to retrieve
2903 # We assume consumers of addrevisioncb will want to retrieve
2904 # the added revision, which will require a call to
2904 # the added revision, which will require a call to
2905 # revision(). revision() will fast path if there is a cache
2905 # revision(). revision() will fast path if there is a cache
2906 # hit. So, we tell _addrevision() to always cache in this case.
2906 # hit. So, we tell _addrevision() to always cache in this case.
2907 # We're only using addgroup() in the context of changegroup
2907 # We're only using addgroup() in the context of changegroup
2908 # generation so the revision data can always be handled as raw
2908 # generation so the revision data can always be handled as raw
2909 # by the flagprocessor.
2909 # by the flagprocessor.
2910 rev = self._addrevision(
2910 rev = self._addrevision(
2911 node,
2911 node,
2912 None,
2912 None,
2913 transaction,
2913 transaction,
2914 link,
2914 link,
2915 p1,
2915 p1,
2916 p2,
2916 p2,
2917 flags,
2917 flags,
2918 (baserev, delta, delta_base_reuse_policy),
2918 (baserev, delta, delta_base_reuse_policy),
2919 alwayscache=alwayscache,
2919 alwayscache=alwayscache,
2920 deltacomputer=deltacomputer,
2920 deltacomputer=deltacomputer,
2921 sidedata=sidedata,
2921 sidedata=sidedata,
2922 )
2922 )
2923
2923
2924 if addrevisioncb:
2924 if addrevisioncb:
2925 addrevisioncb(self, rev)
2925 addrevisioncb(self, rev)
2926 empty = False
2926 empty = False
2927 finally:
2927 finally:
2928 self._adding_group = False
2928 self._adding_group = False
2929 return not empty
2929 return not empty
2930
2930
2931 def iscensored(self, rev):
2931 def iscensored(self, rev):
2932 """Check if a file revision is censored."""
2932 """Check if a file revision is censored."""
2933 if not self._censorable:
2933 if not self._censorable:
2934 return False
2934 return False
2935
2935
2936 return self.flags(rev) & REVIDX_ISCENSORED
2936 return self.flags(rev) & REVIDX_ISCENSORED
2937
2937
2938 def _peek_iscensored(self, baserev, delta):
2938 def _peek_iscensored(self, baserev, delta):
2939 """Quickly check if a delta produces a censored revision."""
2939 """Quickly check if a delta produces a censored revision."""
2940 if not self._censorable:
2940 if not self._censorable:
2941 return False
2941 return False
2942
2942
2943 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2943 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2944
2944
2945 def getstrippoint(self, minlink):
2945 def getstrippoint(self, minlink):
2946 """find the minimum rev that must be stripped to strip the linkrev
2946 """find the minimum rev that must be stripped to strip the linkrev
2947
2947
2948 Returns a tuple containing the minimum rev and a set of all revs that
2948 Returns a tuple containing the minimum rev and a set of all revs that
2949 have linkrevs that will be broken by this strip.
2949 have linkrevs that will be broken by this strip.
2950 """
2950 """
2951 return storageutil.resolvestripinfo(
2951 return storageutil.resolvestripinfo(
2952 minlink,
2952 minlink,
2953 len(self) - 1,
2953 len(self) - 1,
2954 self.headrevs(),
2954 self.headrevs(),
2955 self.linkrev,
2955 self.linkrev,
2956 self.parentrevs,
2956 self.parentrevs,
2957 )
2957 )
2958
2958
2959 def strip(self, minlink, transaction):
2959 def strip(self, minlink, transaction):
2960 """truncate the revlog on the first revision with a linkrev >= minlink
2960 """truncate the revlog on the first revision with a linkrev >= minlink
2961
2961
2962 This function is called when we're stripping revision minlink and
2962 This function is called when we're stripping revision minlink and
2963 its descendants from the repository.
2963 its descendants from the repository.
2964
2964
2965 We have to remove all revisions with linkrev >= minlink, because
2965 We have to remove all revisions with linkrev >= minlink, because
2966 the equivalent changelog revisions will be renumbered after the
2966 the equivalent changelog revisions will be renumbered after the
2967 strip.
2967 strip.
2968
2968
2969 So we truncate the revlog on the first of these revisions, and
2969 So we truncate the revlog on the first of these revisions, and
2970 trust that the caller has saved the revisions that shouldn't be
2970 trust that the caller has saved the revisions that shouldn't be
2971 removed and that it'll re-add them after this truncation.
2971 removed and that it'll re-add them after this truncation.
2972 """
2972 """
2973 if len(self) == 0:
2973 if len(self) == 0:
2974 return
2974 return
2975
2975
2976 rev, _ = self.getstrippoint(minlink)
2976 rev, _ = self.getstrippoint(minlink)
2977 if rev == len(self):
2977 if rev == len(self):
2978 return
2978 return
2979
2979
2980 # first truncate the files on disk
2980 # first truncate the files on disk
2981 data_end = self.start(rev)
2981 data_end = self.start(rev)
2982 if not self._inline:
2982 if not self._inline:
2983 transaction.add(self._datafile, data_end)
2983 transaction.add(self._datafile, data_end)
2984 end = rev * self.index.entry_size
2984 end = rev * self.index.entry_size
2985 else:
2985 else:
2986 end = data_end + (rev * self.index.entry_size)
2986 end = data_end + (rev * self.index.entry_size)
2987
2987
2988 if self._sidedatafile:
2988 if self._sidedatafile:
2989 sidedata_end = self.sidedata_cut_off(rev)
2989 sidedata_end = self.sidedata_cut_off(rev)
2990 transaction.add(self._sidedatafile, sidedata_end)
2990 transaction.add(self._sidedatafile, sidedata_end)
2991
2991
2992 transaction.add(self._indexfile, end)
2992 transaction.add(self._indexfile, end)
2993 if self._docket is not None:
2993 if self._docket is not None:
2994 # XXX we could, leverage the docket while stripping. However it is
2994 # XXX we could, leverage the docket while stripping. However it is
2995 # not powerfull enough at the time of this comment
2995 # not powerfull enough at the time of this comment
2996 self._docket.index_end = end
2996 self._docket.index_end = end
2997 self._docket.data_end = data_end
2997 self._docket.data_end = data_end
2998 self._docket.sidedata_end = sidedata_end
2998 self._docket.sidedata_end = sidedata_end
2999 self._docket.write(transaction, stripping=True)
2999 self._docket.write(transaction, stripping=True)
3000
3000
3001 # then reset internal state in memory to forget those revisions
3001 # then reset internal state in memory to forget those revisions
3002 self._revisioncache = None
3002 self._revisioncache = None
3003 self._chaininfocache = util.lrucachedict(500)
3003 self._chaininfocache = util.lrucachedict(500)
3004 self._segmentfile.clear_cache()
3004 self._segmentfile.clear_cache()
3005 self._segmentfile_sidedata.clear_cache()
3005 self._segmentfile_sidedata.clear_cache()
3006
3006
3007 del self.index[rev:-1]
3007 del self.index[rev:-1]
3008
3008
3009 def checksize(self):
3009 def checksize(self):
3010 """Check size of index and data files
3010 """Check size of index and data files
3011
3011
3012 return a (dd, di) tuple.
3012 return a (dd, di) tuple.
3013 - dd: extra bytes for the "data" file
3013 - dd: extra bytes for the "data" file
3014 - di: extra bytes for the "index" file
3014 - di: extra bytes for the "index" file
3015
3015
3016 A healthy revlog will return (0, 0).
3016 A healthy revlog will return (0, 0).
3017 """
3017 """
3018 expected = 0
3018 expected = 0
3019 if len(self):
3019 if len(self):
3020 expected = max(0, self.end(len(self) - 1))
3020 expected = max(0, self.end(len(self) - 1))
3021
3021
3022 try:
3022 try:
3023 with self._datafp() as f:
3023 with self._datafp() as f:
3024 f.seek(0, io.SEEK_END)
3024 f.seek(0, io.SEEK_END)
3025 actual = f.tell()
3025 actual = f.tell()
3026 dd = actual - expected
3026 dd = actual - expected
3027 except FileNotFoundError:
3027 except FileNotFoundError:
3028 dd = 0
3028 dd = 0
3029
3029
3030 try:
3030 try:
3031 f = self.opener(self._indexfile)
3031 f = self.opener(self._indexfile)
3032 f.seek(0, io.SEEK_END)
3032 f.seek(0, io.SEEK_END)
3033 actual = f.tell()
3033 actual = f.tell()
3034 f.close()
3034 f.close()
3035 s = self.index.entry_size
3035 s = self.index.entry_size
3036 i = max(0, actual // s)
3036 i = max(0, actual // s)
3037 di = actual - (i * s)
3037 di = actual - (i * s)
3038 if self._inline:
3038 if self._inline:
3039 databytes = 0
3039 databytes = 0
3040 for r in self:
3040 for r in self:
3041 databytes += max(0, self.length(r))
3041 databytes += max(0, self.length(r))
3042 dd = 0
3042 dd = 0
3043 di = actual - len(self) * s - databytes
3043 di = actual - len(self) * s - databytes
3044 except FileNotFoundError:
3044 except FileNotFoundError:
3045 di = 0
3045 di = 0
3046
3046
3047 return (dd, di)
3047 return (dd, di)
3048
3048
3049 def files(self):
3049 def files(self):
3050 res = [self._indexfile]
3050 res = [self._indexfile]
3051 if self._docket_file is None:
3051 if self._docket_file is None:
3052 if not self._inline:
3052 if not self._inline:
3053 res.append(self._datafile)
3053 res.append(self._datafile)
3054 else:
3054 else:
3055 res.append(self._docket_file)
3055 res.append(self._docket_file)
3056 res.extend(self._docket.old_index_filepaths(include_empty=False))
3056 res.extend(self._docket.old_index_filepaths(include_empty=False))
3057 if self._docket.data_end:
3057 if self._docket.data_end:
3058 res.append(self._datafile)
3058 res.append(self._datafile)
3059 res.extend(self._docket.old_data_filepaths(include_empty=False))
3059 res.extend(self._docket.old_data_filepaths(include_empty=False))
3060 if self._docket.sidedata_end:
3060 if self._docket.sidedata_end:
3061 res.append(self._sidedatafile)
3061 res.append(self._sidedatafile)
3062 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3062 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3063 return res
3063 return res
3064
3064
3065 def emitrevisions(
3065 def emitrevisions(
3066 self,
3066 self,
3067 nodes,
3067 nodes,
3068 nodesorder=None,
3068 nodesorder=None,
3069 revisiondata=False,
3069 revisiondata=False,
3070 assumehaveparentrevisions=False,
3070 assumehaveparentrevisions=False,
3071 deltamode=repository.CG_DELTAMODE_STD,
3071 deltamode=repository.CG_DELTAMODE_STD,
3072 sidedata_helpers=None,
3072 sidedata_helpers=None,
3073 debug_info=None,
3073 debug_info=None,
3074 ):
3074 ):
3075 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3075 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3076 raise error.ProgrammingError(
3076 raise error.ProgrammingError(
3077 b'unhandled value for nodesorder: %s' % nodesorder
3077 b'unhandled value for nodesorder: %s' % nodesorder
3078 )
3078 )
3079
3079
3080 if nodesorder is None and not self._generaldelta:
3080 if nodesorder is None and not self._generaldelta:
3081 nodesorder = b'storage'
3081 nodesorder = b'storage'
3082
3082
3083 if (
3083 if (
3084 not self._storedeltachains
3084 not self._storedeltachains
3085 and deltamode != repository.CG_DELTAMODE_PREV
3085 and deltamode != repository.CG_DELTAMODE_PREV
3086 ):
3086 ):
3087 deltamode = repository.CG_DELTAMODE_FULL
3087 deltamode = repository.CG_DELTAMODE_FULL
3088
3088
3089 return storageutil.emitrevisions(
3089 return storageutil.emitrevisions(
3090 self,
3090 self,
3091 nodes,
3091 nodes,
3092 nodesorder,
3092 nodesorder,
3093 revlogrevisiondelta,
3093 revlogrevisiondelta,
3094 deltaparentfn=self.deltaparent,
3094 deltaparentfn=self.deltaparent,
3095 candeltafn=self.candelta,
3095 candeltafn=self.candelta,
3096 rawsizefn=self.rawsize,
3096 rawsizefn=self.rawsize,
3097 revdifffn=self.revdiff,
3097 revdifffn=self.revdiff,
3098 flagsfn=self.flags,
3098 flagsfn=self.flags,
3099 deltamode=deltamode,
3099 deltamode=deltamode,
3100 revisiondata=revisiondata,
3100 revisiondata=revisiondata,
3101 assumehaveparentrevisions=assumehaveparentrevisions,
3101 assumehaveparentrevisions=assumehaveparentrevisions,
3102 sidedata_helpers=sidedata_helpers,
3102 sidedata_helpers=sidedata_helpers,
3103 debug_info=debug_info,
3103 debug_info=debug_info,
3104 )
3104 )
3105
3105
3106 DELTAREUSEALWAYS = b'always'
3106 DELTAREUSEALWAYS = b'always'
3107 DELTAREUSESAMEREVS = b'samerevs'
3107 DELTAREUSESAMEREVS = b'samerevs'
3108 DELTAREUSENEVER = b'never'
3108 DELTAREUSENEVER = b'never'
3109
3109
3110 DELTAREUSEFULLADD = b'fulladd'
3110 DELTAREUSEFULLADD = b'fulladd'
3111
3111
3112 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3112 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3113
3113
3114 def clone(
3114 def clone(
3115 self,
3115 self,
3116 tr,
3116 tr,
3117 destrevlog,
3117 destrevlog,
3118 addrevisioncb=None,
3118 addrevisioncb=None,
3119 deltareuse=DELTAREUSESAMEREVS,
3119 deltareuse=DELTAREUSESAMEREVS,
3120 forcedeltabothparents=None,
3120 forcedeltabothparents=None,
3121 sidedata_helpers=None,
3121 sidedata_helpers=None,
3122 ):
3122 ):
3123 """Copy this revlog to another, possibly with format changes.
3123 """Copy this revlog to another, possibly with format changes.
3124
3124
3125 The destination revlog will contain the same revisions and nodes.
3125 The destination revlog will contain the same revisions and nodes.
3126 However, it may not be bit-for-bit identical due to e.g. delta encoding
3126 However, it may not be bit-for-bit identical due to e.g. delta encoding
3127 differences.
3127 differences.
3128
3128
3129 The ``deltareuse`` argument control how deltas from the existing revlog
3129 The ``deltareuse`` argument control how deltas from the existing revlog
3130 are preserved in the destination revlog. The argument can have the
3130 are preserved in the destination revlog. The argument can have the
3131 following values:
3131 following values:
3132
3132
3133 DELTAREUSEALWAYS
3133 DELTAREUSEALWAYS
3134 Deltas will always be reused (if possible), even if the destination
3134 Deltas will always be reused (if possible), even if the destination
3135 revlog would not select the same revisions for the delta. This is the
3135 revlog would not select the same revisions for the delta. This is the
3136 fastest mode of operation.
3136 fastest mode of operation.
3137 DELTAREUSESAMEREVS
3137 DELTAREUSESAMEREVS
3138 Deltas will be reused if the destination revlog would pick the same
3138 Deltas will be reused if the destination revlog would pick the same
3139 revisions for the delta. This mode strikes a balance between speed
3139 revisions for the delta. This mode strikes a balance between speed
3140 and optimization.
3140 and optimization.
3141 DELTAREUSENEVER
3141 DELTAREUSENEVER
3142 Deltas will never be reused. This is the slowest mode of execution.
3142 Deltas will never be reused. This is the slowest mode of execution.
3143 This mode can be used to recompute deltas (e.g. if the diff/delta
3143 This mode can be used to recompute deltas (e.g. if the diff/delta
3144 algorithm changes).
3144 algorithm changes).
3145 DELTAREUSEFULLADD
3145 DELTAREUSEFULLADD
3146 Revision will be re-added as if their were new content. This is
3146 Revision will be re-added as if their were new content. This is
3147 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3147 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3148 eg: large file detection and handling.
3148 eg: large file detection and handling.
3149
3149
3150 Delta computation can be slow, so the choice of delta reuse policy can
3150 Delta computation can be slow, so the choice of delta reuse policy can
3151 significantly affect run time.
3151 significantly affect run time.
3152
3152
3153 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3153 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3154 two extremes. Deltas will be reused if they are appropriate. But if the
3154 two extremes. Deltas will be reused if they are appropriate. But if the
3155 delta could choose a better revision, it will do so. This means if you
3155 delta could choose a better revision, it will do so. This means if you
3156 are converting a non-generaldelta revlog to a generaldelta revlog,
3156 are converting a non-generaldelta revlog to a generaldelta revlog,
3157 deltas will be recomputed if the delta's parent isn't a parent of the
3157 deltas will be recomputed if the delta's parent isn't a parent of the
3158 revision.
3158 revision.
3159
3159
3160 In addition to the delta policy, the ``forcedeltabothparents``
3160 In addition to the delta policy, the ``forcedeltabothparents``
3161 argument controls whether to force compute deltas against both parents
3161 argument controls whether to force compute deltas against both parents
3162 for merges. By default, the current default is used.
3162 for merges. By default, the current default is used.
3163
3163
3164 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3164 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3165 `sidedata_helpers`.
3165 `sidedata_helpers`.
3166 """
3166 """
3167 if deltareuse not in self.DELTAREUSEALL:
3167 if deltareuse not in self.DELTAREUSEALL:
3168 raise ValueError(
3168 raise ValueError(
3169 _(b'value for deltareuse invalid: %s') % deltareuse
3169 _(b'value for deltareuse invalid: %s') % deltareuse
3170 )
3170 )
3171
3171
3172 if len(destrevlog):
3172 if len(destrevlog):
3173 raise ValueError(_(b'destination revlog is not empty'))
3173 raise ValueError(_(b'destination revlog is not empty'))
3174
3174
3175 if getattr(self, 'filteredrevs', None):
3175 if getattr(self, 'filteredrevs', None):
3176 raise ValueError(_(b'source revlog has filtered revisions'))
3176 raise ValueError(_(b'source revlog has filtered revisions'))
3177 if getattr(destrevlog, 'filteredrevs', None):
3177 if getattr(destrevlog, 'filteredrevs', None):
3178 raise ValueError(_(b'destination revlog has filtered revisions'))
3178 raise ValueError(_(b'destination revlog has filtered revisions'))
3179
3179
3180 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3180 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3181 # if possible.
3181 # if possible.
3182 oldlazydelta = destrevlog._lazydelta
3182 oldlazydelta = destrevlog._lazydelta
3183 oldlazydeltabase = destrevlog._lazydeltabase
3183 oldlazydeltabase = destrevlog._lazydeltabase
3184 oldamd = destrevlog._deltabothparents
3184 oldamd = destrevlog._deltabothparents
3185
3185
3186 try:
3186 try:
3187 if deltareuse == self.DELTAREUSEALWAYS:
3187 if deltareuse == self.DELTAREUSEALWAYS:
3188 destrevlog._lazydeltabase = True
3188 destrevlog._lazydeltabase = True
3189 destrevlog._lazydelta = True
3189 destrevlog._lazydelta = True
3190 elif deltareuse == self.DELTAREUSESAMEREVS:
3190 elif deltareuse == self.DELTAREUSESAMEREVS:
3191 destrevlog._lazydeltabase = False
3191 destrevlog._lazydeltabase = False
3192 destrevlog._lazydelta = True
3192 destrevlog._lazydelta = True
3193 elif deltareuse == self.DELTAREUSENEVER:
3193 elif deltareuse == self.DELTAREUSENEVER:
3194 destrevlog._lazydeltabase = False
3194 destrevlog._lazydeltabase = False
3195 destrevlog._lazydelta = False
3195 destrevlog._lazydelta = False
3196
3196
3197 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3197 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3198
3198
3199 self._clone(
3199 self._clone(
3200 tr,
3200 tr,
3201 destrevlog,
3201 destrevlog,
3202 addrevisioncb,
3202 addrevisioncb,
3203 deltareuse,
3203 deltareuse,
3204 forcedeltabothparents,
3204 forcedeltabothparents,
3205 sidedata_helpers,
3205 sidedata_helpers,
3206 )
3206 )
3207
3207
3208 finally:
3208 finally:
3209 destrevlog._lazydelta = oldlazydelta
3209 destrevlog._lazydelta = oldlazydelta
3210 destrevlog._lazydeltabase = oldlazydeltabase
3210 destrevlog._lazydeltabase = oldlazydeltabase
3211 destrevlog._deltabothparents = oldamd
3211 destrevlog._deltabothparents = oldamd
3212
3212
3213 def _clone(
3213 def _clone(
3214 self,
3214 self,
3215 tr,
3215 tr,
3216 destrevlog,
3216 destrevlog,
3217 addrevisioncb,
3217 addrevisioncb,
3218 deltareuse,
3218 deltareuse,
3219 forcedeltabothparents,
3219 forcedeltabothparents,
3220 sidedata_helpers,
3220 sidedata_helpers,
3221 ):
3221 ):
3222 """perform the core duty of `revlog.clone` after parameter processing"""
3222 """perform the core duty of `revlog.clone` after parameter processing"""
3223 write_debug = None
3223 write_debug = None
3224 if self._debug_delta:
3224 if self._debug_delta:
3225 write_debug = tr._report
3225 write_debug = tr._report
3226 deltacomputer = deltautil.deltacomputer(
3226 deltacomputer = deltautil.deltacomputer(
3227 destrevlog,
3227 destrevlog,
3228 write_debug=write_debug,
3228 write_debug=write_debug,
3229 )
3229 )
3230 index = self.index
3230 index = self.index
3231 for rev in self:
3231 for rev in self:
3232 entry = index[rev]
3232 entry = index[rev]
3233
3233
3234 # Some classes override linkrev to take filtered revs into
3234 # Some classes override linkrev to take filtered revs into
3235 # account. Use raw entry from index.
3235 # account. Use raw entry from index.
3236 flags = entry[0] & 0xFFFF
3236 flags = entry[0] & 0xFFFF
3237 linkrev = entry[4]
3237 linkrev = entry[4]
3238 p1 = index[entry[5]][7]
3238 p1 = index[entry[5]][7]
3239 p2 = index[entry[6]][7]
3239 p2 = index[entry[6]][7]
3240 node = entry[7]
3240 node = entry[7]
3241
3241
3242 # (Possibly) reuse the delta from the revlog if allowed and
3242 # (Possibly) reuse the delta from the revlog if allowed and
3243 # the revlog chunk is a delta.
3243 # the revlog chunk is a delta.
3244 cachedelta = None
3244 cachedelta = None
3245 rawtext = None
3245 rawtext = None
3246 if deltareuse == self.DELTAREUSEFULLADD:
3246 if deltareuse == self.DELTAREUSEFULLADD:
3247 text = self._revisiondata(rev)
3247 text = self._revisiondata(rev)
3248 sidedata = self.sidedata(rev)
3248 sidedata = self.sidedata(rev)
3249
3249
3250 if sidedata_helpers is not None:
3250 if sidedata_helpers is not None:
3251 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3251 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3252 self, sidedata_helpers, sidedata, rev
3252 self, sidedata_helpers, sidedata, rev
3253 )
3253 )
3254 flags = flags | new_flags[0] & ~new_flags[1]
3254 flags = flags | new_flags[0] & ~new_flags[1]
3255
3255
3256 destrevlog.addrevision(
3256 destrevlog.addrevision(
3257 text,
3257 text,
3258 tr,
3258 tr,
3259 linkrev,
3259 linkrev,
3260 p1,
3260 p1,
3261 p2,
3261 p2,
3262 cachedelta=cachedelta,
3262 cachedelta=cachedelta,
3263 node=node,
3263 node=node,
3264 flags=flags,
3264 flags=flags,
3265 deltacomputer=deltacomputer,
3265 deltacomputer=deltacomputer,
3266 sidedata=sidedata,
3266 sidedata=sidedata,
3267 )
3267 )
3268 else:
3268 else:
3269 if destrevlog._lazydelta:
3269 if destrevlog._lazydelta:
3270 dp = self.deltaparent(rev)
3270 dp = self.deltaparent(rev)
3271 if dp != nullrev:
3271 if dp != nullrev:
3272 cachedelta = (dp, bytes(self._chunk(rev)))
3272 cachedelta = (dp, bytes(self._chunk(rev)))
3273
3273
3274 sidedata = None
3274 sidedata = None
3275 if not cachedelta:
3275 if not cachedelta:
3276 rawtext = self._revisiondata(rev)
3276 try:
3277 rawtext = self._revisiondata(rev)
3278 except error.CensoredNodeError as censored:
3279 assert flags & REVIDX_ISCENSORED
3280 rawtext = censored.tombstone
3277 sidedata = self.sidedata(rev)
3281 sidedata = self.sidedata(rev)
3278 if sidedata is None:
3282 if sidedata is None:
3279 sidedata = self.sidedata(rev)
3283 sidedata = self.sidedata(rev)
3280
3284
3281 if sidedata_helpers is not None:
3285 if sidedata_helpers is not None:
3282 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3286 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3283 self, sidedata_helpers, sidedata, rev
3287 self, sidedata_helpers, sidedata, rev
3284 )
3288 )
3285 flags = flags | new_flags[0] & ~new_flags[1]
3289 flags = flags | new_flags[0] & ~new_flags[1]
3286
3290
3287 with destrevlog._writing(tr):
3291 with destrevlog._writing(tr):
3288 destrevlog._addrevision(
3292 destrevlog._addrevision(
3289 node,
3293 node,
3290 rawtext,
3294 rawtext,
3291 tr,
3295 tr,
3292 linkrev,
3296 linkrev,
3293 p1,
3297 p1,
3294 p2,
3298 p2,
3295 flags,
3299 flags,
3296 cachedelta,
3300 cachedelta,
3297 deltacomputer=deltacomputer,
3301 deltacomputer=deltacomputer,
3298 sidedata=sidedata,
3302 sidedata=sidedata,
3299 )
3303 )
3300
3304
3301 if addrevisioncb:
3305 if addrevisioncb:
3302 addrevisioncb(self, rev, node)
3306 addrevisioncb(self, rev, node)
3303
3307
3304 def censorrevision(self, tr, censornode, tombstone=b''):
3308 def censorrevision(self, tr, censornode, tombstone=b''):
3305 if self._format_version == REVLOGV0:
3309 if self._format_version == REVLOGV0:
3306 raise error.RevlogError(
3310 raise error.RevlogError(
3307 _(b'cannot censor with version %d revlogs')
3311 _(b'cannot censor with version %d revlogs')
3308 % self._format_version
3312 % self._format_version
3309 )
3313 )
3310 elif self._format_version == REVLOGV1:
3314 elif self._format_version == REVLOGV1:
3311 rewrite.v1_censor(self, tr, censornode, tombstone)
3315 rewrite.v1_censor(self, tr, censornode, tombstone)
3312 else:
3316 else:
3313 rewrite.v2_censor(self, tr, censornode, tombstone)
3317 rewrite.v2_censor(self, tr, censornode, tombstone)
3314
3318
3315 def verifyintegrity(self, state):
3319 def verifyintegrity(self, state):
3316 """Verifies the integrity of the revlog.
3320 """Verifies the integrity of the revlog.
3317
3321
3318 Yields ``revlogproblem`` instances describing problems that are
3322 Yields ``revlogproblem`` instances describing problems that are
3319 found.
3323 found.
3320 """
3324 """
3321 dd, di = self.checksize()
3325 dd, di = self.checksize()
3322 if dd:
3326 if dd:
3323 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3327 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3324 if di:
3328 if di:
3325 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3329 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3326
3330
3327 version = self._format_version
3331 version = self._format_version
3328
3332
3329 # The verifier tells us what version revlog we should be.
3333 # The verifier tells us what version revlog we should be.
3330 if version != state[b'expectedversion']:
3334 if version != state[b'expectedversion']:
3331 yield revlogproblem(
3335 yield revlogproblem(
3332 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3336 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3333 % (self.display_id, version, state[b'expectedversion'])
3337 % (self.display_id, version, state[b'expectedversion'])
3334 )
3338 )
3335
3339
3336 state[b'skipread'] = set()
3340 state[b'skipread'] = set()
3337 state[b'safe_renamed'] = set()
3341 state[b'safe_renamed'] = set()
3338
3342
3339 for rev in self:
3343 for rev in self:
3340 node = self.node(rev)
3344 node = self.node(rev)
3341
3345
3342 # Verify contents. 4 cases to care about:
3346 # Verify contents. 4 cases to care about:
3343 #
3347 #
3344 # common: the most common case
3348 # common: the most common case
3345 # rename: with a rename
3349 # rename: with a rename
3346 # meta: file content starts with b'\1\n', the metadata
3350 # meta: file content starts with b'\1\n', the metadata
3347 # header defined in filelog.py, but without a rename
3351 # header defined in filelog.py, but without a rename
3348 # ext: content stored externally
3352 # ext: content stored externally
3349 #
3353 #
3350 # More formally, their differences are shown below:
3354 # More formally, their differences are shown below:
3351 #
3355 #
3352 # | common | rename | meta | ext
3356 # | common | rename | meta | ext
3353 # -------------------------------------------------------
3357 # -------------------------------------------------------
3354 # flags() | 0 | 0 | 0 | not 0
3358 # flags() | 0 | 0 | 0 | not 0
3355 # renamed() | False | True | False | ?
3359 # renamed() | False | True | False | ?
3356 # rawtext[0:2]=='\1\n'| False | True | True | ?
3360 # rawtext[0:2]=='\1\n'| False | True | True | ?
3357 #
3361 #
3358 # "rawtext" means the raw text stored in revlog data, which
3362 # "rawtext" means the raw text stored in revlog data, which
3359 # could be retrieved by "rawdata(rev)". "text"
3363 # could be retrieved by "rawdata(rev)". "text"
3360 # mentioned below is "revision(rev)".
3364 # mentioned below is "revision(rev)".
3361 #
3365 #
3362 # There are 3 different lengths stored physically:
3366 # There are 3 different lengths stored physically:
3363 # 1. L1: rawsize, stored in revlog index
3367 # 1. L1: rawsize, stored in revlog index
3364 # 2. L2: len(rawtext), stored in revlog data
3368 # 2. L2: len(rawtext), stored in revlog data
3365 # 3. L3: len(text), stored in revlog data if flags==0, or
3369 # 3. L3: len(text), stored in revlog data if flags==0, or
3366 # possibly somewhere else if flags!=0
3370 # possibly somewhere else if flags!=0
3367 #
3371 #
3368 # L1 should be equal to L2. L3 could be different from them.
3372 # L1 should be equal to L2. L3 could be different from them.
3369 # "text" may or may not affect commit hash depending on flag
3373 # "text" may or may not affect commit hash depending on flag
3370 # processors (see flagutil.addflagprocessor).
3374 # processors (see flagutil.addflagprocessor).
3371 #
3375 #
3372 # | common | rename | meta | ext
3376 # | common | rename | meta | ext
3373 # -------------------------------------------------
3377 # -------------------------------------------------
3374 # rawsize() | L1 | L1 | L1 | L1
3378 # rawsize() | L1 | L1 | L1 | L1
3375 # size() | L1 | L2-LM | L1(*) | L1 (?)
3379 # size() | L1 | L2-LM | L1(*) | L1 (?)
3376 # len(rawtext) | L2 | L2 | L2 | L2
3380 # len(rawtext) | L2 | L2 | L2 | L2
3377 # len(text) | L2 | L2 | L2 | L3
3381 # len(text) | L2 | L2 | L2 | L3
3378 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3382 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3379 #
3383 #
3380 # LM: length of metadata, depending on rawtext
3384 # LM: length of metadata, depending on rawtext
3381 # (*): not ideal, see comment in filelog.size
3385 # (*): not ideal, see comment in filelog.size
3382 # (?): could be "- len(meta)" if the resolved content has
3386 # (?): could be "- len(meta)" if the resolved content has
3383 # rename metadata
3387 # rename metadata
3384 #
3388 #
3385 # Checks needed to be done:
3389 # Checks needed to be done:
3386 # 1. length check: L1 == L2, in all cases.
3390 # 1. length check: L1 == L2, in all cases.
3387 # 2. hash check: depending on flag processor, we may need to
3391 # 2. hash check: depending on flag processor, we may need to
3388 # use either "text" (external), or "rawtext" (in revlog).
3392 # use either "text" (external), or "rawtext" (in revlog).
3389
3393
3390 try:
3394 try:
3391 skipflags = state.get(b'skipflags', 0)
3395 skipflags = state.get(b'skipflags', 0)
3392 if skipflags:
3396 if skipflags:
3393 skipflags &= self.flags(rev)
3397 skipflags &= self.flags(rev)
3394
3398
3395 _verify_revision(self, skipflags, state, node)
3399 _verify_revision(self, skipflags, state, node)
3396
3400
3397 l1 = self.rawsize(rev)
3401 l1 = self.rawsize(rev)
3398 l2 = len(self.rawdata(node))
3402 l2 = len(self.rawdata(node))
3399
3403
3400 if l1 != l2:
3404 if l1 != l2:
3401 yield revlogproblem(
3405 yield revlogproblem(
3402 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3406 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3403 node=node,
3407 node=node,
3404 )
3408 )
3405
3409
3406 except error.CensoredNodeError:
3410 except error.CensoredNodeError:
3407 if state[b'erroroncensored']:
3411 if state[b'erroroncensored']:
3408 yield revlogproblem(
3412 yield revlogproblem(
3409 error=_(b'censored file data'), node=node
3413 error=_(b'censored file data'), node=node
3410 )
3414 )
3411 state[b'skipread'].add(node)
3415 state[b'skipread'].add(node)
3412 except Exception as e:
3416 except Exception as e:
3413 yield revlogproblem(
3417 yield revlogproblem(
3414 error=_(b'unpacking %s: %s')
3418 error=_(b'unpacking %s: %s')
3415 % (short(node), stringutil.forcebytestr(e)),
3419 % (short(node), stringutil.forcebytestr(e)),
3416 node=node,
3420 node=node,
3417 )
3421 )
3418 state[b'skipread'].add(node)
3422 state[b'skipread'].add(node)
3419
3423
3420 def storageinfo(
3424 def storageinfo(
3421 self,
3425 self,
3422 exclusivefiles=False,
3426 exclusivefiles=False,
3423 sharedfiles=False,
3427 sharedfiles=False,
3424 revisionscount=False,
3428 revisionscount=False,
3425 trackedsize=False,
3429 trackedsize=False,
3426 storedsize=False,
3430 storedsize=False,
3427 ):
3431 ):
3428 d = {}
3432 d = {}
3429
3433
3430 if exclusivefiles:
3434 if exclusivefiles:
3431 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3435 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3432 if not self._inline:
3436 if not self._inline:
3433 d[b'exclusivefiles'].append((self.opener, self._datafile))
3437 d[b'exclusivefiles'].append((self.opener, self._datafile))
3434
3438
3435 if sharedfiles:
3439 if sharedfiles:
3436 d[b'sharedfiles'] = []
3440 d[b'sharedfiles'] = []
3437
3441
3438 if revisionscount:
3442 if revisionscount:
3439 d[b'revisionscount'] = len(self)
3443 d[b'revisionscount'] = len(self)
3440
3444
3441 if trackedsize:
3445 if trackedsize:
3442 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3446 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3443
3447
3444 if storedsize:
3448 if storedsize:
3445 d[b'storedsize'] = sum(
3449 d[b'storedsize'] = sum(
3446 self.opener.stat(path).st_size for path in self.files()
3450 self.opener.stat(path).st_size for path in self.files()
3447 )
3451 )
3448
3452
3449 return d
3453 return d
3450
3454
3451 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3455 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3452 if not self.hassidedata:
3456 if not self.hassidedata:
3453 return
3457 return
3454 # revlog formats with sidedata support does not support inline
3458 # revlog formats with sidedata support does not support inline
3455 assert not self._inline
3459 assert not self._inline
3456 if not helpers[1] and not helpers[2]:
3460 if not helpers[1] and not helpers[2]:
3457 # Nothing to generate or remove
3461 # Nothing to generate or remove
3458 return
3462 return
3459
3463
3460 new_entries = []
3464 new_entries = []
3461 # append the new sidedata
3465 # append the new sidedata
3462 with self._writing(transaction):
3466 with self._writing(transaction):
3463 ifh, dfh, sdfh = self._writinghandles
3467 ifh, dfh, sdfh = self._writinghandles
3464 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3468 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3465
3469
3466 current_offset = sdfh.tell()
3470 current_offset = sdfh.tell()
3467 for rev in range(startrev, endrev + 1):
3471 for rev in range(startrev, endrev + 1):
3468 entry = self.index[rev]
3472 entry = self.index[rev]
3469 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3473 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3470 store=self,
3474 store=self,
3471 sidedata_helpers=helpers,
3475 sidedata_helpers=helpers,
3472 sidedata={},
3476 sidedata={},
3473 rev=rev,
3477 rev=rev,
3474 )
3478 )
3475
3479
3476 serialized_sidedata = sidedatautil.serialize_sidedata(
3480 serialized_sidedata = sidedatautil.serialize_sidedata(
3477 new_sidedata
3481 new_sidedata
3478 )
3482 )
3479
3483
3480 sidedata_compression_mode = COMP_MODE_INLINE
3484 sidedata_compression_mode = COMP_MODE_INLINE
3481 if serialized_sidedata and self.hassidedata:
3485 if serialized_sidedata and self.hassidedata:
3482 sidedata_compression_mode = COMP_MODE_PLAIN
3486 sidedata_compression_mode = COMP_MODE_PLAIN
3483 h, comp_sidedata = self.compress(serialized_sidedata)
3487 h, comp_sidedata = self.compress(serialized_sidedata)
3484 if (
3488 if (
3485 h != b'u'
3489 h != b'u'
3486 and comp_sidedata[0] != b'\0'
3490 and comp_sidedata[0] != b'\0'
3487 and len(comp_sidedata) < len(serialized_sidedata)
3491 and len(comp_sidedata) < len(serialized_sidedata)
3488 ):
3492 ):
3489 assert not h
3493 assert not h
3490 if (
3494 if (
3491 comp_sidedata[0]
3495 comp_sidedata[0]
3492 == self._docket.default_compression_header
3496 == self._docket.default_compression_header
3493 ):
3497 ):
3494 sidedata_compression_mode = COMP_MODE_DEFAULT
3498 sidedata_compression_mode = COMP_MODE_DEFAULT
3495 serialized_sidedata = comp_sidedata
3499 serialized_sidedata = comp_sidedata
3496 else:
3500 else:
3497 sidedata_compression_mode = COMP_MODE_INLINE
3501 sidedata_compression_mode = COMP_MODE_INLINE
3498 serialized_sidedata = comp_sidedata
3502 serialized_sidedata = comp_sidedata
3499 if entry[8] != 0 or entry[9] != 0:
3503 if entry[8] != 0 or entry[9] != 0:
3500 # rewriting entries that already have sidedata is not
3504 # rewriting entries that already have sidedata is not
3501 # supported yet, because it introduces garbage data in the
3505 # supported yet, because it introduces garbage data in the
3502 # revlog.
3506 # revlog.
3503 msg = b"rewriting existing sidedata is not supported yet"
3507 msg = b"rewriting existing sidedata is not supported yet"
3504 raise error.Abort(msg)
3508 raise error.Abort(msg)
3505
3509
3506 # Apply (potential) flags to add and to remove after running
3510 # Apply (potential) flags to add and to remove after running
3507 # the sidedata helpers
3511 # the sidedata helpers
3508 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3512 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3509 entry_update = (
3513 entry_update = (
3510 current_offset,
3514 current_offset,
3511 len(serialized_sidedata),
3515 len(serialized_sidedata),
3512 new_offset_flags,
3516 new_offset_flags,
3513 sidedata_compression_mode,
3517 sidedata_compression_mode,
3514 )
3518 )
3515
3519
3516 # the sidedata computation might have move the file cursors around
3520 # the sidedata computation might have move the file cursors around
3517 sdfh.seek(current_offset, os.SEEK_SET)
3521 sdfh.seek(current_offset, os.SEEK_SET)
3518 sdfh.write(serialized_sidedata)
3522 sdfh.write(serialized_sidedata)
3519 new_entries.append(entry_update)
3523 new_entries.append(entry_update)
3520 current_offset += len(serialized_sidedata)
3524 current_offset += len(serialized_sidedata)
3521 self._docket.sidedata_end = sdfh.tell()
3525 self._docket.sidedata_end = sdfh.tell()
3522
3526
3523 # rewrite the new index entries
3527 # rewrite the new index entries
3524 ifh.seek(startrev * self.index.entry_size)
3528 ifh.seek(startrev * self.index.entry_size)
3525 for i, e in enumerate(new_entries):
3529 for i, e in enumerate(new_entries):
3526 rev = startrev + i
3530 rev = startrev + i
3527 self.index.replace_sidedata_info(rev, *e)
3531 self.index.replace_sidedata_info(rev, *e)
3528 packed = self.index.entry_binary(rev)
3532 packed = self.index.entry_binary(rev)
3529 if rev == 0 and self._docket is None:
3533 if rev == 0 and self._docket is None:
3530 header = self._format_flags | self._format_version
3534 header = self._format_flags | self._format_version
3531 header = self.index.pack_header(header)
3535 header = self.index.pack_header(header)
3532 packed = header + packed
3536 packed = header + packed
3533 ifh.write(packed)
3537 ifh.write(packed)
@@ -1,611 +1,603 b''
1 #require no-reposimplestore
1 #require no-reposimplestore
2 #testcases revlogv1 revlogv2
2 #testcases revlogv1 revlogv2
3
3
4 #if revlogv2
4 #if revlogv2
5
5
6 $ cat >> $HGRCPATH <<EOF
6 $ cat >> $HGRCPATH <<EOF
7 > [experimental]
7 > [experimental]
8 > revlogv2=enable-unstable-format-and-corrupt-my-data
8 > revlogv2=enable-unstable-format-and-corrupt-my-data
9 > EOF
9 > EOF
10
10
11 #endif
11 #endif
12
12
13 $ cp $HGRCPATH $HGRCPATH.orig
13 $ cp $HGRCPATH $HGRCPATH.orig
14
14
15 Create repo with unimpeachable content
15 Create repo with unimpeachable content
16
16
17 $ hg init r
17 $ hg init r
18 $ cd r
18 $ cd r
19 $ echo 'Initially untainted file' > target
19 $ echo 'Initially untainted file' > target
20 $ echo 'Normal file here' > bystander
20 $ echo 'Normal file here' > bystander
21 $ hg add target bystander
21 $ hg add target bystander
22 $ hg ci -m init
22 $ hg ci -m init
23
23
24 Clone repo so we can test pull later
24 Clone repo so we can test pull later
25
25
26 $ cd ..
26 $ cd ..
27 $ hg clone r rpull
27 $ hg clone r rpull
28 updating to branch default
28 updating to branch default
29 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
29 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
30 $ cd r
30 $ cd r
31
31
32 Introduce content which will ultimately require censorship. Name the first
32 Introduce content which will ultimately require censorship. Name the first
33 censored node C1, second C2, and so on
33 censored node C1, second C2, and so on
34
34
35 $ echo 'Tainted file' > target
35 $ echo 'Tainted file' > target
36 $ echo 'Passwords: hunter2' >> target
36 $ echo 'Passwords: hunter2' >> target
37 $ hg ci -m taint target
37 $ hg ci -m taint target
38 $ C1=`hg id --debug -i`
38 $ C1=`hg id --debug -i`
39
39
40 $ echo 'hunter3' >> target
40 $ echo 'hunter3' >> target
41 $ echo 'Normal file v2' > bystander
41 $ echo 'Normal file v2' > bystander
42 $ hg ci -m moretaint target bystander
42 $ hg ci -m moretaint target bystander
43 $ C2=`hg id --debug -i`
43 $ C2=`hg id --debug -i`
44
44
45 Add a new sanitized versions to correct our mistake. Name the first head H1,
45 Add a new sanitized versions to correct our mistake. Name the first head H1,
46 the second head H2, and so on
46 the second head H2, and so on
47
47
48 $ echo 'Tainted file is now sanitized' > target
48 $ echo 'Tainted file is now sanitized' > target
49 $ hg ci -m sanitized target
49 $ hg ci -m sanitized target
50 $ H1=`hg id --debug -i`
50 $ H1=`hg id --debug -i`
51
51
52 $ hg update -r $C2
52 $ hg update -r $C2
53 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
53 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
54 $ echo 'Tainted file now super sanitized' > target
54 $ echo 'Tainted file now super sanitized' > target
55 $ hg ci -m 'super sanitized' target
55 $ hg ci -m 'super sanitized' target
56 created new head
56 created new head
57 $ H2=`hg id --debug -i`
57 $ H2=`hg id --debug -i`
58
58
59 Verify target contents before censorship at each revision
59 Verify target contents before censorship at each revision
60
60
61 $ hg cat -r $H1 target | head -n 10
61 $ hg cat -r $H1 target | head -n 10
62 Tainted file is now sanitized
62 Tainted file is now sanitized
63 $ hg cat -r $H2 target | head -n 10
63 $ hg cat -r $H2 target | head -n 10
64 Tainted file now super sanitized
64 Tainted file now super sanitized
65 $ hg cat -r $C2 target | head -n 10
65 $ hg cat -r $C2 target | head -n 10
66 Tainted file
66 Tainted file
67 Passwords: hunter2
67 Passwords: hunter2
68 hunter3
68 hunter3
69 $ hg cat -r $C1 target | head -n 10
69 $ hg cat -r $C1 target | head -n 10
70 Tainted file
70 Tainted file
71 Passwords: hunter2
71 Passwords: hunter2
72 $ hg cat -r 0 target | head -n 10
72 $ hg cat -r 0 target | head -n 10
73 Initially untainted file
73 Initially untainted file
74
74
75 Censor revision with 2 offenses
75 Censor revision with 2 offenses
76
76
77 (this also tests file pattern matching: path relative to cwd case)
77 (this also tests file pattern matching: path relative to cwd case)
78
78
79 $ mkdir -p foo/bar/baz
79 $ mkdir -p foo/bar/baz
80 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target
80 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target
81 $ hg cat -r $H1 target | head -n 10
81 $ hg cat -r $H1 target | head -n 10
82 Tainted file is now sanitized
82 Tainted file is now sanitized
83 $ hg cat -r $H2 target | head -n 10
83 $ hg cat -r $H2 target | head -n 10
84 Tainted file now super sanitized
84 Tainted file now super sanitized
85 $ hg cat -r $C2 target | head -n 10
85 $ hg cat -r $C2 target | head -n 10
86 abort: censored node: 1e0247a9a4b7
86 abort: censored node: 1e0247a9a4b7
87 (set censor.policy to ignore errors)
87 (set censor.policy to ignore errors)
88 $ hg cat -r $C1 target | head -n 10
88 $ hg cat -r $C1 target | head -n 10
89 Tainted file
89 Tainted file
90 Passwords: hunter2
90 Passwords: hunter2
91 $ hg cat -r 0 target | head -n 10
91 $ hg cat -r 0 target | head -n 10
92 Initially untainted file
92 Initially untainted file
93
93
94 Censor revision with 1 offense
94 Censor revision with 1 offense
95
95
96 (this also tests file pattern matching: with 'path:' scheme)
96 (this also tests file pattern matching: with 'path:' scheme)
97
97
98 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C1 path:target
98 $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C1 path:target
99 $ hg cat -r $H1 target | head -n 10
99 $ hg cat -r $H1 target | head -n 10
100 Tainted file is now sanitized
100 Tainted file is now sanitized
101 $ hg cat -r $H2 target | head -n 10
101 $ hg cat -r $H2 target | head -n 10
102 Tainted file now super sanitized
102 Tainted file now super sanitized
103 $ hg cat -r $C2 target | head -n 10
103 $ hg cat -r $C2 target | head -n 10
104 abort: censored node: 1e0247a9a4b7
104 abort: censored node: 1e0247a9a4b7
105 (set censor.policy to ignore errors)
105 (set censor.policy to ignore errors)
106 $ hg cat -r $C1 target | head -n 10
106 $ hg cat -r $C1 target | head -n 10
107 abort: censored node: 613bc869fceb
107 abort: censored node: 613bc869fceb
108 (set censor.policy to ignore errors)
108 (set censor.policy to ignore errors)
109 $ hg cat -r 0 target | head -n 10
109 $ hg cat -r 0 target | head -n 10
110 Initially untainted file
110 Initially untainted file
111
111
112 Can only checkout target at uncensored revisions, -X is workaround for --all
112 Can only checkout target at uncensored revisions, -X is workaround for --all
113
113
114 $ hg revert -r $C2 target | head -n 10
114 $ hg revert -r $C2 target | head -n 10
115 abort: censored node: 1e0247a9a4b7
115 abort: censored node: 1e0247a9a4b7
116 (set censor.policy to ignore errors)
116 (set censor.policy to ignore errors)
117 $ hg revert -r $C1 target | head -n 10
117 $ hg revert -r $C1 target | head -n 10
118 abort: censored node: 613bc869fceb
118 abort: censored node: 613bc869fceb
119 (set censor.policy to ignore errors)
119 (set censor.policy to ignore errors)
120 $ hg revert -r $C1 --all
120 $ hg revert -r $C1 --all
121 reverting bystander
121 reverting bystander
122 reverting target
122 reverting target
123 abort: censored node: 613bc869fceb
123 abort: censored node: 613bc869fceb
124 (set censor.policy to ignore errors)
124 (set censor.policy to ignore errors)
125 [255]
125 [255]
126 $ hg revert -r $C1 --all -X target
126 $ hg revert -r $C1 --all -X target
127 $ cat target | head -n 10
127 $ cat target | head -n 10
128 Tainted file now super sanitized
128 Tainted file now super sanitized
129 $ hg revert -r 0 --all
129 $ hg revert -r 0 --all
130 reverting target
130 reverting target
131 $ cat target | head -n 10
131 $ cat target | head -n 10
132 Initially untainted file
132 Initially untainted file
133 $ hg revert -r $H2 --all
133 $ hg revert -r $H2 --all
134 reverting bystander
134 reverting bystander
135 reverting target
135 reverting target
136 $ cat target | head -n 10
136 $ cat target | head -n 10
137 Tainted file now super sanitized
137 Tainted file now super sanitized
138
138
139 Uncensored file can be viewed at any revision
139 Uncensored file can be viewed at any revision
140
140
141 $ hg cat -r $H1 bystander | head -n 10
141 $ hg cat -r $H1 bystander | head -n 10
142 Normal file v2
142 Normal file v2
143 $ hg cat -r $C2 bystander | head -n 10
143 $ hg cat -r $C2 bystander | head -n 10
144 Normal file v2
144 Normal file v2
145 $ hg cat -r $C1 bystander | head -n 10
145 $ hg cat -r $C1 bystander | head -n 10
146 Normal file here
146 Normal file here
147 $ hg cat -r 0 bystander | head -n 10
147 $ hg cat -r 0 bystander | head -n 10
148 Normal file here
148 Normal file here
149
149
150 Can update to children of censored revision
150 Can update to children of censored revision
151
151
152 $ hg update -r $H1
152 $ hg update -r $H1
153 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
153 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
154 $ cat target | head -n 10
154 $ cat target | head -n 10
155 Tainted file is now sanitized
155 Tainted file is now sanitized
156 $ hg update -r $H2
156 $ hg update -r $H2
157 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
157 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
158 $ cat target | head -n 10
158 $ cat target | head -n 10
159 Tainted file now super sanitized
159 Tainted file now super sanitized
160
160
161 Set censor policy to abort in trusted $HGRC so hg verify fails
161 Set censor policy to abort in trusted $HGRC so hg verify fails
162
162
163 $ cp $HGRCPATH.orig $HGRCPATH
163 $ cp $HGRCPATH.orig $HGRCPATH
164 $ cat >> $HGRCPATH <<EOF
164 $ cat >> $HGRCPATH <<EOF
165 > [censor]
165 > [censor]
166 > policy = abort
166 > policy = abort
167 > EOF
167 > EOF
168
168
169 Repo fails verification due to censorship
169 Repo fails verification due to censorship
170
170
171 $ hg verify
171 $ hg verify
172 checking changesets
172 checking changesets
173 checking manifests
173 checking manifests
174 crosschecking files in changesets and manifests
174 crosschecking files in changesets and manifests
175 checking files
175 checking files
176 target@1: censored file data
176 target@1: censored file data
177 target@2: censored file data
177 target@2: censored file data
178 not checking dirstate because of previous errors
178 not checking dirstate because of previous errors
179 checked 5 changesets with 7 changes to 2 files
179 checked 5 changesets with 7 changes to 2 files
180 2 integrity errors encountered!
180 2 integrity errors encountered!
181 (first damaged changeset appears to be 1)
181 (first damaged changeset appears to be 1)
182 [1]
182 [1]
183
183
184 Cannot update to revision with censored data
184 Cannot update to revision with censored data
185
185
186 $ hg update -r $C2
186 $ hg update -r $C2
187 abort: censored node: 1e0247a9a4b7
187 abort: censored node: 1e0247a9a4b7
188 (set censor.policy to ignore errors)
188 (set censor.policy to ignore errors)
189 [255]
189 [255]
190 $ hg update -r $C1
190 $ hg update -r $C1
191 abort: censored node: 613bc869fceb
191 abort: censored node: 613bc869fceb
192 (set censor.policy to ignore errors)
192 (set censor.policy to ignore errors)
193 [255]
193 [255]
194 $ hg update -r 0
194 $ hg update -r 0
195 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
195 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
196 $ hg update -r $H2
196 $ hg update -r $H2
197 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
197 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
198
198
199 Set censor policy to ignore in trusted $HGRC so hg verify passes
199 Set censor policy to ignore in trusted $HGRC so hg verify passes
200
200
201 $ cp $HGRCPATH.orig $HGRCPATH
201 $ cp $HGRCPATH.orig $HGRCPATH
202 $ cat >> $HGRCPATH <<EOF
202 $ cat >> $HGRCPATH <<EOF
203 > [censor]
203 > [censor]
204 > policy = ignore
204 > policy = ignore
205 > EOF
205 > EOF
206
206
207 Repo passes verification with warnings with explicit config
207 Repo passes verification with warnings with explicit config
208
208
209 $ hg verify -q
209 $ hg verify -q
210
210
211 May update to revision with censored data with explicit config
211 May update to revision with censored data with explicit config
212
212
213 $ hg update -r $C2
213 $ hg update -r $C2
214 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
214 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
215 $ cat target | head -n 10
215 $ cat target | head -n 10
216 $ hg update -r $C1
216 $ hg update -r $C1
217 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
217 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
218 $ cat target | head -n 10
218 $ cat target | head -n 10
219 $ hg update -r 0
219 $ hg update -r 0
220 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
220 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
221 $ cat target | head -n 10
221 $ cat target | head -n 10
222 Initially untainted file
222 Initially untainted file
223 $ hg update -r $H2
223 $ hg update -r $H2
224 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
224 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
225 $ cat target | head -n 10
225 $ cat target | head -n 10
226 Tainted file now super sanitized
226 Tainted file now super sanitized
227
227
228 Can merge in revision with censored data. Test requires one branch of history
228 Can merge in revision with censored data. Test requires one branch of history
229 with the file censored, but we can't censor at a head, so advance H1.
229 with the file censored, but we can't censor at a head, so advance H1.
230
230
231 $ hg update -r $H1
231 $ hg update -r $H1
232 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
232 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
233 $ C3=$H1
233 $ C3=$H1
234 $ echo 'advanced head H1' > target
234 $ echo 'advanced head H1' > target
235 $ hg ci -m 'advance head H1' target
235 $ hg ci -m 'advance head H1' target
236 $ H1=`hg id --debug -i`
236 $ H1=`hg id --debug -i`
237 $ hg --config extensions.censor= censor -r $C3 target
237 $ hg --config extensions.censor= censor -r $C3 target
238 $ hg update -r $H2
238 $ hg update -r $H2
239 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
239 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
240 $ hg merge -r $C3
240 $ hg merge -r $C3
241 merging target
241 merging target
242 0 files updated, 1 files merged, 0 files removed, 0 files unresolved
242 0 files updated, 1 files merged, 0 files removed, 0 files unresolved
243 (branch merge, don't forget to commit)
243 (branch merge, don't forget to commit)
244
244
245 Revisions present in repository heads may not be censored
245 Revisions present in repository heads may not be censored
246
246
247 $ hg update -C -r $H2
247 $ hg update -C -r $H2
248 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
248 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
249 $ hg --config extensions.censor= censor -r $H2 target
249 $ hg --config extensions.censor= censor -r $H2 target
250 abort: cannot censor file in heads (78a8fc215e79)
250 abort: cannot censor file in heads (78a8fc215e79)
251 (clean/delete and commit first)
251 (clean/delete and commit first)
252 [255]
252 [255]
253 $ echo 'twiddling thumbs' > bystander
253 $ echo 'twiddling thumbs' > bystander
254 $ hg ci -m 'bystander commit'
254 $ hg ci -m 'bystander commit'
255 $ H2=`hg id --debug -i`
255 $ H2=`hg id --debug -i`
256 $ hg --config extensions.censor= censor -r "$H2^" target
256 $ hg --config extensions.censor= censor -r "$H2^" target
257 abort: cannot censor file in heads (efbe78065929)
257 abort: cannot censor file in heads (efbe78065929)
258 (clean/delete and commit first)
258 (clean/delete and commit first)
259 [255]
259 [255]
260
260
261 Cannot censor working directory
261 Cannot censor working directory
262
262
263 $ echo 'seriously no passwords' > target
263 $ echo 'seriously no passwords' > target
264 $ hg ci -m 'extend second head arbitrarily' target
264 $ hg ci -m 'extend second head arbitrarily' target
265 $ H2=`hg id --debug -i`
265 $ H2=`hg id --debug -i`
266 $ hg update -r "$H2^"
266 $ hg update -r "$H2^"
267 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
267 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
268 $ hg --config extensions.censor= censor -r . target
268 $ hg --config extensions.censor= censor -r . target
269 abort: cannot censor working directory
269 abort: cannot censor working directory
270 (clean/delete/update first)
270 (clean/delete/update first)
271 [255]
271 [255]
272 $ hg update -r $H2
272 $ hg update -r $H2
273 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
273 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
274
274
275 Can re-add file after being deleted + censored
275 Can re-add file after being deleted + censored
276
276
277 $ C4=$H2
277 $ C4=$H2
278 $ hg rm target
278 $ hg rm target
279 $ hg ci -m 'delete target so it may be censored'
279 $ hg ci -m 'delete target so it may be censored'
280 $ H2=`hg id --debug -i`
280 $ H2=`hg id --debug -i`
281 $ hg --config extensions.censor= censor -r $C4 target
281 $ hg --config extensions.censor= censor -r $C4 target
282 $ hg cat -r $C4 target | head -n 10
282 $ hg cat -r $C4 target | head -n 10
283 $ hg cat -r "$H2^^" target | head -n 10
283 $ hg cat -r "$H2^^" target | head -n 10
284 Tainted file now super sanitized
284 Tainted file now super sanitized
285 $ echo 'fresh start' > target
285 $ echo 'fresh start' > target
286 $ hg add target
286 $ hg add target
287 $ hg ci -m reincarnated target
287 $ hg ci -m reincarnated target
288 $ H2=`hg id --debug -i`
288 $ H2=`hg id --debug -i`
289 $ hg cat -r $H2 target | head -n 10
289 $ hg cat -r $H2 target | head -n 10
290 fresh start
290 fresh start
291 $ hg cat -r "$H2^" target | head -n 10
291 $ hg cat -r "$H2^" target | head -n 10
292 target: no such file in rev 452ec1762369
292 target: no such file in rev 452ec1762369
293 $ hg cat -r $C4 target | head -n 10
293 $ hg cat -r $C4 target | head -n 10
294 $ hg cat -r "$H2^^^" target | head -n 10
294 $ hg cat -r "$H2^^^" target | head -n 10
295 Tainted file now super sanitized
295 Tainted file now super sanitized
296
296
297 Can censor after revlog has expanded to no longer permit inline storage
297 Can censor after revlog has expanded to no longer permit inline storage
298
298
299 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
299 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
300 > do
300 > do
301 > echo "Password: hunter$x" >> target
301 > echo "Password: hunter$x" >> target
302 > done
302 > done
303 $ hg ci -m 'add 100k passwords'
303 $ hg ci -m 'add 100k passwords'
304 $ H2=`hg id --debug -i`
304 $ H2=`hg id --debug -i`
305 $ C5=$H2
305 $ C5=$H2
306 $ hg revert -r "$H2^" target
306 $ hg revert -r "$H2^" target
307 $ hg ci -m 'cleaned 100k passwords'
307 $ hg ci -m 'cleaned 100k passwords'
308 $ H2=`hg id --debug -i`
308 $ H2=`hg id --debug -i`
309 $ hg --config extensions.censor= censor -r $C5 target
309 $ hg --config extensions.censor= censor -r $C5 target
310 $ hg cat -r $C5 target | head -n 10
310 $ hg cat -r $C5 target | head -n 10
311 $ hg cat -r $H2 target | head -n 10
311 $ hg cat -r $H2 target | head -n 10
312 fresh start
312 fresh start
313
313
314 Repo with censored nodes can be cloned and cloned nodes are censored
314 Repo with censored nodes can be cloned and cloned nodes are censored
315
315
316 $ cd ..
316 $ cd ..
317 $ hg clone r rclone
317 $ hg clone r rclone
318 updating to branch default
318 updating to branch default
319 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
319 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
320 $ cd rclone
320 $ cd rclone
321 $ hg cat -r $H1 target | head -n 10
321 $ hg cat -r $H1 target | head -n 10
322 advanced head H1
322 advanced head H1
323 $ hg cat -r $H2~5 target | head -n 10
323 $ hg cat -r $H2~5 target | head -n 10
324 Tainted file now super sanitized
324 Tainted file now super sanitized
325 $ hg cat -r $C2 target | head -n 10
325 $ hg cat -r $C2 target | head -n 10
326 $ hg cat -r $C1 target | head -n 10
326 $ hg cat -r $C1 target | head -n 10
327 $ hg cat -r 0 target | head -n 10
327 $ hg cat -r 0 target | head -n 10
328 Initially untainted file
328 Initially untainted file
329 $ hg verify -q
329 $ hg verify -q
330
330
331 Repo cloned before tainted content introduced can pull censored nodes
331 Repo cloned before tainted content introduced can pull censored nodes
332
332
333 $ cd ../rpull
333 $ cd ../rpull
334 $ hg cat -r tip target | head -n 10
334 $ hg cat -r tip target | head -n 10
335 Initially untainted file
335 Initially untainted file
336 $ hg verify -q
336 $ hg verify -q
337 $ hg pull -r $H1 -r $H2
337 $ hg pull -r $H1 -r $H2
338 pulling from $TESTTMP/r
338 pulling from $TESTTMP/r
339 searching for changes
339 searching for changes
340 adding changesets
340 adding changesets
341 adding manifests
341 adding manifests
342 adding file changes
342 adding file changes
343 added 11 changesets with 11 changes to 2 files (+1 heads)
343 added 11 changesets with 11 changes to 2 files (+1 heads)
344 new changesets 186fb27560c3:683e4645fded
344 new changesets 186fb27560c3:683e4645fded
345 (run 'hg heads' to see heads, 'hg merge' to merge)
345 (run 'hg heads' to see heads, 'hg merge' to merge)
346 $ hg update 4
346 $ hg update 4
347 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
347 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
348 $ cat target | head -n 10
348 $ cat target | head -n 10
349 Tainted file now super sanitized
349 Tainted file now super sanitized
350 $ hg cat -r $H1 target | head -n 10
350 $ hg cat -r $H1 target | head -n 10
351 advanced head H1
351 advanced head H1
352 $ hg cat -r $H2~5 target | head -n 10
352 $ hg cat -r $H2~5 target | head -n 10
353 Tainted file now super sanitized
353 Tainted file now super sanitized
354 $ hg cat -r $C2 target | head -n 10
354 $ hg cat -r $C2 target | head -n 10
355 $ hg cat -r $C1 target | head -n 10
355 $ hg cat -r $C1 target | head -n 10
356 $ hg cat -r 0 target | head -n 10
356 $ hg cat -r 0 target | head -n 10
357 Initially untainted file
357 Initially untainted file
358 $ hg verify -q
358 $ hg verify -q
359
359
360 Censored nodes can be pushed if they censor previously unexchanged nodes
360 Censored nodes can be pushed if they censor previously unexchanged nodes
361
361
362 $ echo 'Passwords: hunter2hunter2' > target
362 $ echo 'Passwords: hunter2hunter2' > target
363 $ hg ci -m 're-add password from clone' target
363 $ hg ci -m 're-add password from clone' target
364 created new head
364 created new head
365 $ H3=`hg id --debug -i`
365 $ H3=`hg id --debug -i`
366 $ REV=$H3
366 $ REV=$H3
367 $ echo 'Re-sanitized; nothing to see here' > target
367 $ echo 'Re-sanitized; nothing to see here' > target
368 $ hg ci -m 're-sanitized' target
368 $ hg ci -m 're-sanitized' target
369 $ H2=`hg id --debug -i`
369 $ H2=`hg id --debug -i`
370 $ CLEANREV=$H2
370 $ CLEANREV=$H2
371 $ hg cat -r $REV target | head -n 10
371 $ hg cat -r $REV target | head -n 10
372 Passwords: hunter2hunter2
372 Passwords: hunter2hunter2
373 $ hg --config extensions.censor= censor -r $REV target
373 $ hg --config extensions.censor= censor -r $REV target
374 $ hg cat -r $REV target | head -n 10
374 $ hg cat -r $REV target | head -n 10
375 $ hg cat -r $CLEANREV target | head -n 10
375 $ hg cat -r $CLEANREV target | head -n 10
376 Re-sanitized; nothing to see here
376 Re-sanitized; nothing to see here
377 $ hg push -f -r $H2
377 $ hg push -f -r $H2
378 pushing to $TESTTMP/r
378 pushing to $TESTTMP/r
379 searching for changes
379 searching for changes
380 adding changesets
380 adding changesets
381 adding manifests
381 adding manifests
382 adding file changes
382 adding file changes
383 added 2 changesets with 2 changes to 1 files (+1 heads)
383 added 2 changesets with 2 changes to 1 files (+1 heads)
384
384
385 $ cd ../r
385 $ cd ../r
386 $ hg cat -r $REV target | head -n 10
386 $ hg cat -r $REV target | head -n 10
387 $ hg cat -r $CLEANREV target | head -n 10
387 $ hg cat -r $CLEANREV target | head -n 10
388 Re-sanitized; nothing to see here
388 Re-sanitized; nothing to see here
389 $ hg update $CLEANREV
389 $ hg update $CLEANREV
390 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
390 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
391 $ cat target | head -n 10
391 $ cat target | head -n 10
392 Re-sanitized; nothing to see here
392 Re-sanitized; nothing to see here
393
393
394 Censored nodes can be bundled up and unbundled in another repo
394 Censored nodes can be bundled up and unbundled in another repo
395
395
396 $ hg bundle --base 0 ../pwbundle
396 $ hg bundle --base 0 ../pwbundle
397 13 changesets found
397 13 changesets found
398 $ cd ../rclone
398 $ cd ../rclone
399 $ hg unbundle ../pwbundle
399 $ hg unbundle ../pwbundle
400 adding changesets
400 adding changesets
401 adding manifests
401 adding manifests
402 adding file changes
402 adding file changes
403 added 2 changesets with 2 changes to 2 files (+1 heads)
403 added 2 changesets with 2 changes to 2 files (+1 heads)
404 new changesets 075be80ac777:dcbaf17bf3a1 (2 drafts)
404 new changesets 075be80ac777:dcbaf17bf3a1 (2 drafts)
405 (run 'hg heads .' to see heads, 'hg merge' to merge)
405 (run 'hg heads .' to see heads, 'hg merge' to merge)
406 $ hg cat -r $REV target | head -n 10
406 $ hg cat -r $REV target | head -n 10
407 $ hg cat -r $CLEANREV target | head -n 10
407 $ hg cat -r $CLEANREV target | head -n 10
408 Re-sanitized; nothing to see here
408 Re-sanitized; nothing to see here
409 $ hg update $CLEANREV
409 $ hg update $CLEANREV
410 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
410 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
411 $ cat target | head -n 10
411 $ cat target | head -n 10
412 Re-sanitized; nothing to see here
412 Re-sanitized; nothing to see here
413 $ hg verify -q
413 $ hg verify -q
414
414
415 Grepping only warns, doesn't error out
415 Grepping only warns, doesn't error out
416
416
417 $ cd ../rpull
417 $ cd ../rpull
418 $ hg grep 'Normal file'
418 $ hg grep 'Normal file'
419 bystander:Normal file v2
419 bystander:Normal file v2
420 $ hg grep nothing
420 $ hg grep nothing
421 target:Re-sanitized; nothing to see here
421 target:Re-sanitized; nothing to see here
422 $ hg grep --diff 'Normal file'
422 $ hg grep --diff 'Normal file'
423 cannot search in censored file: target:7
423 cannot search in censored file: target:7
424 cannot search in censored file: target:10
424 cannot search in censored file: target:10
425 cannot search in censored file: target:12
425 cannot search in censored file: target:12
426 bystander:6:-:Normal file v2
426 bystander:6:-:Normal file v2
427 cannot search in censored file: target:1
427 cannot search in censored file: target:1
428 cannot search in censored file: target:2
428 cannot search in censored file: target:2
429 cannot search in censored file: target:3
429 cannot search in censored file: target:3
430 bystander:2:-:Normal file here
430 bystander:2:-:Normal file here
431 bystander:2:+:Normal file v2
431 bystander:2:+:Normal file v2
432 bystander:0:+:Normal file here
432 bystander:0:+:Normal file here
433 $ hg grep --diff nothing
433 $ hg grep --diff nothing
434 cannot search in censored file: target:7
434 cannot search in censored file: target:7
435 cannot search in censored file: target:10
435 cannot search in censored file: target:10
436 cannot search in censored file: target:12
436 cannot search in censored file: target:12
437 target:13:+:Re-sanitized; nothing to see here
437 target:13:+:Re-sanitized; nothing to see here
438 cannot search in censored file: target:1
438 cannot search in censored file: target:1
439 cannot search in censored file: target:2
439 cannot search in censored file: target:2
440 cannot search in censored file: target:3
440 cannot search in censored file: target:3
441
441
442 Censored nodes can be imported on top of censored nodes, consecutively
442 Censored nodes can be imported on top of censored nodes, consecutively
443
443
444 $ hg init ../rimport
444 $ hg init ../rimport
445 $ hg bundle --base 1 ../rimport/splitbundle
445 $ hg bundle --base 1 ../rimport/splitbundle
446 12 changesets found
446 12 changesets found
447 $ cd ../rimport
447 $ cd ../rimport
448 $ hg pull -r $H1 -r $H2 ../r
448 $ hg pull -r $H1 -r $H2 ../r
449 pulling from ../r
449 pulling from ../r
450 adding changesets
450 adding changesets
451 adding manifests
451 adding manifests
452 adding file changes
452 adding file changes
453 added 8 changesets with 10 changes to 2 files (+1 heads)
453 added 8 changesets with 10 changes to 2 files (+1 heads)
454 new changesets e97f55b2665a:dcbaf17bf3a1
454 new changesets e97f55b2665a:dcbaf17bf3a1
455 (run 'hg heads' to see heads, 'hg merge' to merge)
455 (run 'hg heads' to see heads, 'hg merge' to merge)
456 $ hg unbundle splitbundle
456 $ hg unbundle splitbundle
457 adding changesets
457 adding changesets
458 adding manifests
458 adding manifests
459 adding file changes
459 adding file changes
460 added 6 changesets with 5 changes to 2 files (+1 heads)
460 added 6 changesets with 5 changes to 2 files (+1 heads)
461 new changesets efbe78065929:683e4645fded (6 drafts)
461 new changesets efbe78065929:683e4645fded (6 drafts)
462 (run 'hg heads .' to see heads, 'hg merge' to merge)
462 (run 'hg heads .' to see heads, 'hg merge' to merge)
463 $ hg update $H2
463 $ hg update $H2
464 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
464 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
465 $ cat target | head -n 10
465 $ cat target | head -n 10
466 Re-sanitized; nothing to see here
466 Re-sanitized; nothing to see here
467 $ hg verify -q
467 $ hg verify -q
468 $ cd ../r
468 $ cd ../r
469
469
470 Can import bundle where first revision of a file is censored
470 Can import bundle where first revision of a file is censored
471
471
472 $ hg init ../rinit
472 $ hg init ../rinit
473 $ hg --config extensions.censor= censor -r 0 target
473 $ hg --config extensions.censor= censor -r 0 target
474 $ hg bundle -r 0 --base null ../rinit/initbundle
474 $ hg bundle -r 0 --base null ../rinit/initbundle
475 1 changesets found
475 1 changesets found
476 $ cd ../rinit
476 $ cd ../rinit
477 $ hg unbundle initbundle
477 $ hg unbundle initbundle
478 adding changesets
478 adding changesets
479 adding manifests
479 adding manifests
480 adding file changes
480 adding file changes
481 added 1 changesets with 2 changes to 2 files
481 added 1 changesets with 2 changes to 2 files
482 new changesets e97f55b2665a (1 drafts)
482 new changesets e97f55b2665a (1 drafts)
483 (run 'hg update' to get a working copy)
483 (run 'hg update' to get a working copy)
484 $ hg cat -r 0 target | head -n 10
484 $ hg cat -r 0 target | head -n 10
485
485
486 #if revlogv2
486 #if revlogv2
487
487
488 Testing feature that does not work in revlog v1
488 Testing feature that does not work in revlog v1
489 ===============================================
489 ===============================================
490
490
491 Censoring a revision that is used as delta base
491 Censoring a revision that is used as delta base
492 -----------------------------------------------
492 -----------------------------------------------
493
493
494 $ cd ..
494 $ cd ..
495 $ hg init censor-with-delta
495 $ hg init censor-with-delta
496 $ cd censor-with-delta
496 $ cd censor-with-delta
497 $ echo root > target
497 $ echo root > target
498 $ hg add target
498 $ hg add target
499 $ hg commit -m root
499 $ hg commit -m root
500 $ B0=`hg id --debug -i`
500 $ B0=`hg id --debug -i`
501 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
501 $ for x in `"$PYTHON" $TESTDIR/seq.py 0 50000`
502 > do
502 > do
503 > echo "Password: hunter$x" >> target
503 > echo "Password: hunter$x" >> target
504 > done
504 > done
505 $ hg ci -m 'write a long file'
505 $ hg ci -m 'write a long file'
506 $ B1=`hg id --debug -i`
506 $ B1=`hg id --debug -i`
507 $ echo 'small change (should create a delta)' >> target
507 $ echo 'small change (should create a delta)' >> target
508 $ hg ci -m 'create a delta over the password'
508 $ hg ci -m 'create a delta over the password'
509 (should show that the last revision is a delta, not a snapshot)
509 (should show that the last revision is a delta, not a snapshot)
510 $ B2=`hg id --debug -i`
510 $ B2=`hg id --debug -i`
511
511
512 Make sure the last revision is a delta against the revision we will censor
512 Make sure the last revision is a delta against the revision we will censor
513
513
514 $ hg debugdeltachain target -T '{rev} {chainid} {chainlen} {prevrev}\n'
514 $ hg debugdeltachain target -T '{rev} {chainid} {chainlen} {prevrev}\n'
515 0 1 1 -1
515 0 1 1 -1
516 1 2 1 -1
516 1 2 1 -1
517 2 2 2 1
517 2 2 2 1
518
518
519 Censor the file
519 Censor the file
520
520
521 $ hg cat -r $B1 target | wc -l
521 $ hg cat -r $B1 target | wc -l
522 *50002 (re)
522 *50002 (re)
523 $ hg --config extensions.censor= censor -r $B1 target
523 $ hg --config extensions.censor= censor -r $B1 target
524 $ hg cat -r $B1 target | wc -l
524 $ hg cat -r $B1 target | wc -l
525 *0 (re)
525 *0 (re)
526
526
527 Check the children is fine
527 Check the children is fine
528
528
529 $ hg cat -r $B2 target | wc -l
529 $ hg cat -r $B2 target | wc -l
530 *50003 (re)
530 *50003 (re)
531
531
532 #endif
532 #endif
533
533
534 Testing repository upgrade with censors revision
534 Testing repository upgrade with censors revision
535 ================================================
535 ================================================
536
536
537 $ cd ../rclone
537 $ cd ../rclone
538
538
539 With the "abort" policy
539 With the "abort" policy
540 =======================
540 =======================
541
541
542 $ hg verify --config censor.policy=ignore
542 $ hg verify --config censor.policy=ignore
543 checking changesets
543 checking changesets
544 checking manifests
544 checking manifests
545 crosschecking files in changesets and manifests
545 crosschecking files in changesets and manifests
546 checking files
546 checking files
547 checking dirstate
547 checking dirstate
548 checked 14 changesets with 15 changes to 2 files
548 checked 14 changesets with 15 changes to 2 files
549 $ hg debugupgraderepo --run --quiet \
549 $ hg debugupgraderepo --run --quiet \
550 > --optimize re-delta-parent \
550 > --optimize re-delta-parent \
551 > --config censor.policy=abort
551 > --config censor.policy=abort
552 upgrade will perform the following actions:
552 upgrade will perform the following actions:
553
553
554 requirements
554 requirements
555 preserved: * (glob)
555 preserved: * (glob)
556
556
557 optimisations: re-delta-parent
557 optimisations: re-delta-parent
558
558
559 processed revlogs:
559 processed revlogs:
560 - all-filelogs
560 - all-filelogs
561 - changelog
561 - changelog
562 - manifest
562 - manifest
563
563
564 transaction abort!
565 rollback completed
566 abort: file censored target:613bc869fceb
567 [255]
568 $ hg verify --config censor.policy=ignore
564 $ hg verify --config censor.policy=ignore
569 checking changesets
565 checking changesets
570 checking manifests
566 checking manifests
571 crosschecking files in changesets and manifests
567 crosschecking files in changesets and manifests
572 checking files
568 checking files
573 checking dirstate
569 checking dirstate
574 checked 14 changesets with 15 changes to 2 files
570 checked 14 changesets with 15 changes to 2 files
575
571
576 With the "ignore" policy
572 With the "ignore" policy
577 ========================
573 ========================
578
574
579 $ hg verify --config censor.policy=ignore
575 $ hg verify --config censor.policy=ignore
580 checking changesets
576 checking changesets
581 checking manifests
577 checking manifests
582 crosschecking files in changesets and manifests
578 crosschecking files in changesets and manifests
583 checking files
579 checking files
584 checking dirstate
580 checking dirstate
585 checked 14 changesets with 15 changes to 2 files
581 checked 14 changesets with 15 changes to 2 files
586 $ hg debugupgraderepo --run --quiet \
582 $ hg debugupgraderepo --run --quiet \
587 > --optimize re-delta-parent \
583 > --optimize re-delta-parent \
588 > --config censor.policy=ignore
584 > --config censor.policy=ignore
589 upgrade will perform the following actions:
585 upgrade will perform the following actions:
590
586
591 requirements
587 requirements
592 preserved: * (glob)
588 preserved: * (glob)
593
589
594 optimisations: re-delta-parent
590 optimisations: re-delta-parent
595
591
596 processed revlogs:
592 processed revlogs:
597 - all-filelogs
593 - all-filelogs
598 - changelog
594 - changelog
599 - manifest
595 - manifest
600
596
601 transaction abort!
602 rollback completed
603 abort: file censored target:613bc869fceb
604 [255]
605 $ hg verify --config censor.policy=ignore
597 $ hg verify --config censor.policy=ignore
606 checking changesets
598 checking changesets
607 checking manifests
599 checking manifests
608 crosschecking files in changesets and manifests
600 crosschecking files in changesets and manifests
609 checking files
601 checking files
610 checking dirstate
602 checking dirstate
611 checked 14 changesets with 15 changes to 2 files
603 checked 14 changesets with 15 changes to 2 files
General Comments 0
You need to be logged in to leave comments. Login now