##// END OF EJS Templates
revlog: code for `revlogv0` in its own module...
marmoute -
r47812:724db234 default
parent child Browse files
Show More
@@ -0,0 +1,144 b''
1 # revlogv0 - code related to revlog format "V0"
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
8
9
10 from ..node import sha1nodeconstants
11 from .constants import (
12 INDEX_ENTRY_V0,
13 )
14 from ..i18n import _
15
16 from .. import (
17 error,
18 node,
19 pycompat,
20 util,
21 )
22
23 from . import (
24 flagutil,
25 nodemap as nodemaputil,
26 )
27
28
29 def getoffset(q):
30 return int(q >> 16)
31
32
33 def gettype(q):
34 return int(q & 0xFFFF)
35
36
37 def offset_type(offset, type):
38 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
39 raise ValueError(b'unknown revlog index flags')
40 return int(int(offset) << 16 | type)
41
42
43 class revlogoldindex(list):
44 entry_size = INDEX_ENTRY_V0.size
45
46 @property
47 def nodemap(self):
48 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
49 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
50 return self._nodemap
51
52 @util.propertycache
53 def _nodemap(self):
54 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: node.nullrev})
55 for r in range(0, len(self)):
56 n = self[r][7]
57 nodemap[n] = r
58 return nodemap
59
60 def has_node(self, node):
61 """return True if the node exist in the index"""
62 return node in self._nodemap
63
64 def rev(self, node):
65 """return a revision for a node
66
67 If the node is unknown, raise a RevlogError"""
68 return self._nodemap[node]
69
70 def get_rev(self, node):
71 """return a revision for a node
72
73 If the node is unknown, return None"""
74 return self._nodemap.get(node)
75
76 def append(self, tup):
77 self._nodemap[tup[7]] = len(self)
78 super(revlogoldindex, self).append(tup)
79
80 def __delitem__(self, i):
81 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
82 raise ValueError(b"deleting slices only supports a:-1 with step 1")
83 for r in pycompat.xrange(i.start, len(self)):
84 del self._nodemap[self[r][7]]
85 super(revlogoldindex, self).__delitem__(i)
86
87 def clearcaches(self):
88 self.__dict__.pop('_nodemap', None)
89
90 def __getitem__(self, i):
91 if i == -1:
92 return (0, 0, 0, -1, -1, -1, -1, node.nullid)
93 return list.__getitem__(self, i)
94
95 def pack_header(self, header):
96 """pack header information in binary"""
97 return b''
98
99 def entry_binary(self, rev):
100 """return the raw binary string representing a revision"""
101 entry = self[rev]
102 if gettype(entry[0]):
103 raise error.RevlogError(
104 _(b'index entry flags need revlog version 1')
105 )
106 e2 = (
107 getoffset(entry[0]),
108 entry[1],
109 entry[3],
110 entry[4],
111 self[entry[5]][7],
112 self[entry[6]][7],
113 entry[7],
114 )
115 return INDEX_ENTRY_V0.pack(*e2)
116
117
118 def parse_index_v0(data, inline):
119 s = INDEX_ENTRY_V0.size
120 index = []
121 nodemap = nodemaputil.NodeMap({node.nullid: node.nullrev})
122 n = off = 0
123 l = len(data)
124 while off + s <= l:
125 cur = data[off : off + s]
126 off += s
127 e = INDEX_ENTRY_V0.unpack(cur)
128 # transform to revlogv1 format
129 e2 = (
130 offset_type(e[0], 0),
131 e[1],
132 -1,
133 e[2],
134 e[3],
135 nodemap.get(e[4], node.nullrev),
136 nodemap.get(e[5], node.nullrev),
137 e[6],
138 )
139 index.append(e2)
140 nodemap[e[6]] = n
141 n += 1
142
143 index = revlogoldindex(index)
144 return index, None
@@ -1,3233 +1,3121 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 FLAG_GENERALDELTA,
37 FLAG_GENERALDELTA,
38 FLAG_INLINE_DATA,
38 FLAG_INLINE_DATA,
39 INDEX_ENTRY_V0,
40 INDEX_HEADER,
39 INDEX_HEADER,
41 REVLOGV0,
40 REVLOGV0,
42 REVLOGV1,
41 REVLOGV1,
43 REVLOGV1_FLAGS,
42 REVLOGV1_FLAGS,
44 REVLOGV2,
43 REVLOGV2,
45 REVLOGV2_FLAGS,
44 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
45 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
46 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
47 REVLOG_DEFAULT_VERSION,
49 )
48 )
50 from .revlogutils.flagutil import (
49 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
50 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
51 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
52 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
53 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
54 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
57 REVIDX_SIDEDATA,
59 )
58 )
60 from .thirdparty import attr
59 from .thirdparty import attr
61 from . import (
60 from . import (
62 ancestor,
61 ancestor,
63 dagop,
62 dagop,
64 error,
63 error,
65 mdiff,
64 mdiff,
66 policy,
65 policy,
67 pycompat,
66 pycompat,
68 templatefilters,
67 templatefilters,
69 util,
68 util,
70 )
69 )
71 from .interfaces import (
70 from .interfaces import (
72 repository,
71 repository,
73 util as interfaceutil,
72 util as interfaceutil,
74 )
73 )
75 from .revlogutils import (
74 from .revlogutils import (
76 deltas as deltautil,
75 deltas as deltautil,
77 flagutil,
76 flagutil,
78 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def getoffset(q):
140 return int(q >> 16)
141
142
143 def gettype(q):
144 return int(q & 0xFFFF)
145
146
147 def offset_type(offset, type):
139 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
151
143
152
144
153 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
156 if skipflags:
148 if skipflags:
157 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
158 else:
150 else:
159 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
160 rl.revision(node)
152 rl.revision(node)
161
153
162
154
163 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
164 #
156 #
165 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
170 )
162 )
171
163
172
164
173 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
166 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
168 node: expected hash of the revision
177 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
181
173
182 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
183 """
175 """
184
176
185 node = attr.ib()
177 node = attr.ib()
186 p1 = attr.ib()
178 p1 = attr.ib()
187 p2 = attr.ib()
179 p2 = attr.ib()
188 btext = attr.ib()
180 btext = attr.ib()
189 textlen = attr.ib()
181 textlen = attr.ib()
190 cachedelta = attr.ib()
182 cachedelta = attr.ib()
191 flags = attr.ib()
183 flags = attr.ib()
192
184
193
185
194 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
187 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
197 node = attr.ib()
189 node = attr.ib()
198 p1node = attr.ib()
190 p1node = attr.ib()
199 p2node = attr.ib()
191 p2node = attr.ib()
200 basenode = attr.ib()
192 basenode = attr.ib()
201 flags = attr.ib()
193 flags = attr.ib()
202 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
203 revision = attr.ib()
195 revision = attr.ib()
204 delta = attr.ib()
196 delta = attr.ib()
205 sidedata = attr.ib()
197 sidedata = attr.ib()
206 linknode = attr.ib(default=None)
198 linknode = attr.ib(default=None)
207
199
208
200
209 @interfaceutil.implementer(repository.iverifyproblem)
201 @interfaceutil.implementer(repository.iverifyproblem)
210 @attr.s(frozen=True)
202 @attr.s(frozen=True)
211 class revlogproblem(object):
203 class revlogproblem(object):
212 warning = attr.ib(default=None)
204 warning = attr.ib(default=None)
213 error = attr.ib(default=None)
205 error = attr.ib(default=None)
214 node = attr.ib(default=None)
206 node = attr.ib(default=None)
215
207
216
208
217 class revlogoldindex(list):
218 entry_size = INDEX_ENTRY_V0.size
219
220 @property
221 def nodemap(self):
222 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
223 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
224 return self._nodemap
225
226 @util.propertycache
227 def _nodemap(self):
228 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
229 for r in range(0, len(self)):
230 n = self[r][7]
231 nodemap[n] = r
232 return nodemap
233
234 def has_node(self, node):
235 """return True if the node exist in the index"""
236 return node in self._nodemap
237
238 def rev(self, node):
239 """return a revision for a node
240
241 If the node is unknown, raise a RevlogError"""
242 return self._nodemap[node]
243
244 def get_rev(self, node):
245 """return a revision for a node
246
247 If the node is unknown, return None"""
248 return self._nodemap.get(node)
249
250 def append(self, tup):
251 self._nodemap[tup[7]] = len(self)
252 super(revlogoldindex, self).append(tup)
253
254 def __delitem__(self, i):
255 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
256 raise ValueError(b"deleting slices only supports a:-1 with step 1")
257 for r in pycompat.xrange(i.start, len(self)):
258 del self._nodemap[self[r][7]]
259 super(revlogoldindex, self).__delitem__(i)
260
261 def clearcaches(self):
262 self.__dict__.pop('_nodemap', None)
263
264 def __getitem__(self, i):
265 if i == -1:
266 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
267 return list.__getitem__(self, i)
268
269 def entry_binary(self, rev):
270 """return the raw binary string representing a revision"""
271 entry = self[rev]
272 if gettype(entry[0]):
273 raise error.RevlogError(
274 _(b'index entry flags need revlog version 1')
275 )
276 e2 = (
277 getoffset(entry[0]),
278 entry[1],
279 entry[3],
280 entry[4],
281 self[entry[5]][7],
282 self[entry[6]][7],
283 entry[7],
284 )
285 return INDEX_ENTRY_V0.pack(*e2)
286
287 def pack_header(self, header):
288 """Pack header information in binary"""
289 return b''
290
291
292 def parse_index_v0(data, inline):
293 s = INDEX_ENTRY_V0.size
294 index = []
295 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
296 n = off = 0
297 l = len(data)
298 while off + s <= l:
299 cur = data[off : off + s]
300 off += s
301 e = INDEX_ENTRY_V0.unpack(cur)
302 # transform to revlogv1 format
303 e2 = (
304 offset_type(e[0], 0),
305 e[1],
306 -1,
307 e[2],
308 e[3],
309 nodemap.get(e[4], nullrev),
310 nodemap.get(e[5], nullrev),
311 e[6],
312 )
313 index.append(e2)
314 nodemap[e[6]] = n
315 n += 1
316
317 index = revlogoldindex(index)
318 return index, None
319
320
321 def parse_index_v1(data, inline):
209 def parse_index_v1(data, inline):
322 # call the C implementation to parse the index data
210 # call the C implementation to parse the index data
323 index, cache = parsers.parse_index2(data, inline)
211 index, cache = parsers.parse_index2(data, inline)
324 return index, cache
212 return index, cache
325
213
326
214
327 def parse_index_v2(data, inline):
215 def parse_index_v2(data, inline):
328 # call the C implementation to parse the index data
216 # call the C implementation to parse the index data
329 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
217 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
330 return index, cache
218 return index, cache
331
219
332
220
333 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
221 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
334
222
335 def parse_index_v1_nodemap(data, inline):
223 def parse_index_v1_nodemap(data, inline):
336 index, cache = parsers.parse_index_devel_nodemap(data, inline)
224 index, cache = parsers.parse_index_devel_nodemap(data, inline)
337 return index, cache
225 return index, cache
338
226
339
227
340 else:
228 else:
341 parse_index_v1_nodemap = None
229 parse_index_v1_nodemap = None
342
230
343
231
344 def parse_index_v1_mixed(data, inline):
232 def parse_index_v1_mixed(data, inline):
345 index, cache = parse_index_v1(data, inline)
233 index, cache = parse_index_v1(data, inline)
346 return rustrevlog.MixedIndex(index), cache
234 return rustrevlog.MixedIndex(index), cache
347
235
348
236
349 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
237 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
350 # signed integer)
238 # signed integer)
351 _maxentrysize = 0x7FFFFFFF
239 _maxentrysize = 0x7FFFFFFF
352
240
353
241
354 class revlog(object):
242 class revlog(object):
355 """
243 """
356 the underlying revision storage object
244 the underlying revision storage object
357
245
358 A revlog consists of two parts, an index and the revision data.
246 A revlog consists of two parts, an index and the revision data.
359
247
360 The index is a file with a fixed record size containing
248 The index is a file with a fixed record size containing
361 information on each revision, including its nodeid (hash), the
249 information on each revision, including its nodeid (hash), the
362 nodeids of its parents, the position and offset of its data within
250 nodeids of its parents, the position and offset of its data within
363 the data file, and the revision it's based on. Finally, each entry
251 the data file, and the revision it's based on. Finally, each entry
364 contains a linkrev entry that can serve as a pointer to external
252 contains a linkrev entry that can serve as a pointer to external
365 data.
253 data.
366
254
367 The revision data itself is a linear collection of data chunks.
255 The revision data itself is a linear collection of data chunks.
368 Each chunk represents a revision and is usually represented as a
256 Each chunk represents a revision and is usually represented as a
369 delta against the previous chunk. To bound lookup time, runs of
257 delta against the previous chunk. To bound lookup time, runs of
370 deltas are limited to about 2 times the length of the original
258 deltas are limited to about 2 times the length of the original
371 version data. This makes retrieval of a version proportional to
259 version data. This makes retrieval of a version proportional to
372 its size, or O(1) relative to the number of revisions.
260 its size, or O(1) relative to the number of revisions.
373
261
374 Both pieces of the revlog are written to in an append-only
262 Both pieces of the revlog are written to in an append-only
375 fashion, which means we never need to rewrite a file to insert or
263 fashion, which means we never need to rewrite a file to insert or
376 remove data, and can use some simple techniques to avoid the need
264 remove data, and can use some simple techniques to avoid the need
377 for locking while reading.
265 for locking while reading.
378
266
379 If checkambig, indexfile is opened with checkambig=True at
267 If checkambig, indexfile is opened with checkambig=True at
380 writing, to avoid file stat ambiguity.
268 writing, to avoid file stat ambiguity.
381
269
382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 If mmaplargeindex is True, and an mmapindexthreshold is set, the
383 index will be mmapped rather than read if it is larger than the
271 index will be mmapped rather than read if it is larger than the
384 configured threshold.
272 configured threshold.
385
273
386 If censorable is True, the revlog can have censored revisions.
274 If censorable is True, the revlog can have censored revisions.
387
275
388 If `upperboundcomp` is not None, this is the expected maximal gain from
276 If `upperboundcomp` is not None, this is the expected maximal gain from
389 compression for the data content.
277 compression for the data content.
390
278
391 `concurrencychecker` is an optional function that receives 3 arguments: a
279 `concurrencychecker` is an optional function that receives 3 arguments: a
392 file handle, a filename, and an expected position. It should check whether
280 file handle, a filename, and an expected position. It should check whether
393 the current position in the file handle is valid, and log/warn/fail (by
281 the current position in the file handle is valid, and log/warn/fail (by
394 raising).
282 raising).
395 """
283 """
396
284
397 _flagserrorclass = error.RevlogError
285 _flagserrorclass = error.RevlogError
398
286
399 def __init__(
287 def __init__(
400 self,
288 self,
401 opener,
289 opener,
402 indexfile,
290 indexfile,
403 datafile=None,
291 datafile=None,
404 checkambig=False,
292 checkambig=False,
405 mmaplargeindex=False,
293 mmaplargeindex=False,
406 censorable=False,
294 censorable=False,
407 upperboundcomp=None,
295 upperboundcomp=None,
408 persistentnodemap=False,
296 persistentnodemap=False,
409 concurrencychecker=None,
297 concurrencychecker=None,
410 ):
298 ):
411 """
299 """
412 create a revlog object
300 create a revlog object
413
301
414 opener is a function that abstracts the file opening operation
302 opener is a function that abstracts the file opening operation
415 and can be used to implement COW semantics or the like.
303 and can be used to implement COW semantics or the like.
416
304
417 """
305 """
418 self.upperboundcomp = upperboundcomp
306 self.upperboundcomp = upperboundcomp
419 self.indexfile = indexfile
307 self.indexfile = indexfile
420 self.datafile = datafile or (indexfile[:-2] + b".d")
308 self.datafile = datafile or (indexfile[:-2] + b".d")
421 self.nodemap_file = None
309 self.nodemap_file = None
422 if persistentnodemap:
310 if persistentnodemap:
423 self.nodemap_file = nodemaputil.get_nodemap_file(
311 self.nodemap_file = nodemaputil.get_nodemap_file(
424 opener, self.indexfile
312 opener, self.indexfile
425 )
313 )
426
314
427 self.opener = opener
315 self.opener = opener
428 # When True, indexfile is opened with checkambig=True at writing, to
316 # When True, indexfile is opened with checkambig=True at writing, to
429 # avoid file stat ambiguity.
317 # avoid file stat ambiguity.
430 self._checkambig = checkambig
318 self._checkambig = checkambig
431 self._mmaplargeindex = mmaplargeindex
319 self._mmaplargeindex = mmaplargeindex
432 self._censorable = censorable
320 self._censorable = censorable
433 # 3-tuple of (node, rev, text) for a raw revision.
321 # 3-tuple of (node, rev, text) for a raw revision.
434 self._revisioncache = None
322 self._revisioncache = None
435 # Maps rev to chain base rev.
323 # Maps rev to chain base rev.
436 self._chainbasecache = util.lrucachedict(100)
324 self._chainbasecache = util.lrucachedict(100)
437 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
325 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
438 self._chunkcache = (0, b'')
326 self._chunkcache = (0, b'')
439 # How much data to read and cache into the raw revlog data cache.
327 # How much data to read and cache into the raw revlog data cache.
440 self._chunkcachesize = 65536
328 self._chunkcachesize = 65536
441 self._maxchainlen = None
329 self._maxchainlen = None
442 self._deltabothparents = True
330 self._deltabothparents = True
443 self.index = None
331 self.index = None
444 self._nodemap_docket = None
332 self._nodemap_docket = None
445 # Mapping of partial identifiers to full nodes.
333 # Mapping of partial identifiers to full nodes.
446 self._pcache = {}
334 self._pcache = {}
447 # Mapping of revision integer to full node.
335 # Mapping of revision integer to full node.
448 self._compengine = b'zlib'
336 self._compengine = b'zlib'
449 self._compengineopts = {}
337 self._compengineopts = {}
450 self._maxdeltachainspan = -1
338 self._maxdeltachainspan = -1
451 self._withsparseread = False
339 self._withsparseread = False
452 self._sparserevlog = False
340 self._sparserevlog = False
453 self._srdensitythreshold = 0.50
341 self._srdensitythreshold = 0.50
454 self._srmingapsize = 262144
342 self._srmingapsize = 262144
455
343
456 # Make copy of flag processors so each revlog instance can support
344 # Make copy of flag processors so each revlog instance can support
457 # custom flags.
345 # custom flags.
458 self._flagprocessors = dict(flagutil.flagprocessors)
346 self._flagprocessors = dict(flagutil.flagprocessors)
459
347
460 # 2-tuple of file handles being used for active writing.
348 # 2-tuple of file handles being used for active writing.
461 self._writinghandles = None
349 self._writinghandles = None
462
350
463 self._loadindex()
351 self._loadindex()
464
352
465 self._concurrencychecker = concurrencychecker
353 self._concurrencychecker = concurrencychecker
466
354
467 def _loadindex(self):
355 def _loadindex(self):
468 mmapindexthreshold = None
356 mmapindexthreshold = None
469 opts = self.opener.options
357 opts = self.opener.options
470
358
471 if b'revlogv2' in opts:
359 if b'revlogv2' in opts:
472 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
360 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
473 elif b'revlogv1' in opts:
361 elif b'revlogv1' in opts:
474 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
362 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
475 if b'generaldelta' in opts:
363 if b'generaldelta' in opts:
476 newversionflags |= FLAG_GENERALDELTA
364 newversionflags |= FLAG_GENERALDELTA
477 elif b'revlogv0' in self.opener.options:
365 elif b'revlogv0' in self.opener.options:
478 newversionflags = REVLOGV0
366 newversionflags = REVLOGV0
479 else:
367 else:
480 newversionflags = REVLOG_DEFAULT_VERSION
368 newversionflags = REVLOG_DEFAULT_VERSION
481
369
482 if b'chunkcachesize' in opts:
370 if b'chunkcachesize' in opts:
483 self._chunkcachesize = opts[b'chunkcachesize']
371 self._chunkcachesize = opts[b'chunkcachesize']
484 if b'maxchainlen' in opts:
372 if b'maxchainlen' in opts:
485 self._maxchainlen = opts[b'maxchainlen']
373 self._maxchainlen = opts[b'maxchainlen']
486 if b'deltabothparents' in opts:
374 if b'deltabothparents' in opts:
487 self._deltabothparents = opts[b'deltabothparents']
375 self._deltabothparents = opts[b'deltabothparents']
488 self._lazydelta = bool(opts.get(b'lazydelta', True))
376 self._lazydelta = bool(opts.get(b'lazydelta', True))
489 self._lazydeltabase = False
377 self._lazydeltabase = False
490 if self._lazydelta:
378 if self._lazydelta:
491 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
379 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
492 if b'compengine' in opts:
380 if b'compengine' in opts:
493 self._compengine = opts[b'compengine']
381 self._compengine = opts[b'compengine']
494 if b'zlib.level' in opts:
382 if b'zlib.level' in opts:
495 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
383 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
496 if b'zstd.level' in opts:
384 if b'zstd.level' in opts:
497 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
385 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
498 if b'maxdeltachainspan' in opts:
386 if b'maxdeltachainspan' in opts:
499 self._maxdeltachainspan = opts[b'maxdeltachainspan']
387 self._maxdeltachainspan = opts[b'maxdeltachainspan']
500 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
388 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
501 mmapindexthreshold = opts[b'mmapindexthreshold']
389 mmapindexthreshold = opts[b'mmapindexthreshold']
502 self.hassidedata = bool(opts.get(b'side-data', False))
390 self.hassidedata = bool(opts.get(b'side-data', False))
503 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
391 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
504 withsparseread = bool(opts.get(b'with-sparse-read', False))
392 withsparseread = bool(opts.get(b'with-sparse-read', False))
505 # sparse-revlog forces sparse-read
393 # sparse-revlog forces sparse-read
506 self._withsparseread = self._sparserevlog or withsparseread
394 self._withsparseread = self._sparserevlog or withsparseread
507 if b'sparse-read-density-threshold' in opts:
395 if b'sparse-read-density-threshold' in opts:
508 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
396 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
509 if b'sparse-read-min-gap-size' in opts:
397 if b'sparse-read-min-gap-size' in opts:
510 self._srmingapsize = opts[b'sparse-read-min-gap-size']
398 self._srmingapsize = opts[b'sparse-read-min-gap-size']
511 if opts.get(b'enableellipsis'):
399 if opts.get(b'enableellipsis'):
512 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
513
401
514 # revlog v0 doesn't have flag processors
402 # revlog v0 doesn't have flag processors
515 for flag, processor in pycompat.iteritems(
403 for flag, processor in pycompat.iteritems(
516 opts.get(b'flagprocessors', {})
404 opts.get(b'flagprocessors', {})
517 ):
405 ):
518 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
406 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
519
407
520 if self._chunkcachesize <= 0:
408 if self._chunkcachesize <= 0:
521 raise error.RevlogError(
409 raise error.RevlogError(
522 _(b'revlog chunk cache size %r is not greater than 0')
410 _(b'revlog chunk cache size %r is not greater than 0')
523 % self._chunkcachesize
411 % self._chunkcachesize
524 )
412 )
525 elif self._chunkcachesize & (self._chunkcachesize - 1):
413 elif self._chunkcachesize & (self._chunkcachesize - 1):
526 raise error.RevlogError(
414 raise error.RevlogError(
527 _(b'revlog chunk cache size %r is not a power of 2')
415 _(b'revlog chunk cache size %r is not a power of 2')
528 % self._chunkcachesize
416 % self._chunkcachesize
529 )
417 )
530
418
531 indexdata = b''
419 indexdata = b''
532 self._initempty = True
420 self._initempty = True
533 try:
421 try:
534 with self._indexfp() as f:
422 with self._indexfp() as f:
535 if (
423 if (
536 mmapindexthreshold is not None
424 mmapindexthreshold is not None
537 and self.opener.fstat(f).st_size >= mmapindexthreshold
425 and self.opener.fstat(f).st_size >= mmapindexthreshold
538 ):
426 ):
539 # TODO: should .close() to release resources without
427 # TODO: should .close() to release resources without
540 # relying on Python GC
428 # relying on Python GC
541 indexdata = util.buffer(util.mmapread(f))
429 indexdata = util.buffer(util.mmapread(f))
542 else:
430 else:
543 indexdata = f.read()
431 indexdata = f.read()
544 if len(indexdata) > 0:
432 if len(indexdata) > 0:
545 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
433 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
546 self._initempty = False
434 self._initempty = False
547 else:
435 else:
548 versionflags = newversionflags
436 versionflags = newversionflags
549 except IOError as inst:
437 except IOError as inst:
550 if inst.errno != errno.ENOENT:
438 if inst.errno != errno.ENOENT:
551 raise
439 raise
552
440
553 versionflags = newversionflags
441 versionflags = newversionflags
554
442
555 self.version = versionflags
443 self.version = versionflags
556
444
557 flags = versionflags & ~0xFFFF
445 flags = versionflags & ~0xFFFF
558 fmt = versionflags & 0xFFFF
446 fmt = versionflags & 0xFFFF
559
447
560 if fmt == REVLOGV0:
448 if fmt == REVLOGV0:
561 if flags:
449 if flags:
562 raise error.RevlogError(
450 raise error.RevlogError(
563 _(b'unknown flags (%#04x) in version %d revlog %s')
451 _(b'unknown flags (%#04x) in version %d revlog %s')
564 % (flags >> 16, fmt, self.indexfile)
452 % (flags >> 16, fmt, self.indexfile)
565 )
453 )
566
454
567 self._inline = False
455 self._inline = False
568 self._generaldelta = False
456 self._generaldelta = False
569
457
570 elif fmt == REVLOGV1:
458 elif fmt == REVLOGV1:
571 if flags & ~REVLOGV1_FLAGS:
459 if flags & ~REVLOGV1_FLAGS:
572 raise error.RevlogError(
460 raise error.RevlogError(
573 _(b'unknown flags (%#04x) in version %d revlog %s')
461 _(b'unknown flags (%#04x) in version %d revlog %s')
574 % (flags >> 16, fmt, self.indexfile)
462 % (flags >> 16, fmt, self.indexfile)
575 )
463 )
576
464
577 self._inline = versionflags & FLAG_INLINE_DATA
465 self._inline = versionflags & FLAG_INLINE_DATA
578 self._generaldelta = versionflags & FLAG_GENERALDELTA
466 self._generaldelta = versionflags & FLAG_GENERALDELTA
579
467
580 elif fmt == REVLOGV2:
468 elif fmt == REVLOGV2:
581 if flags & ~REVLOGV2_FLAGS:
469 if flags & ~REVLOGV2_FLAGS:
582 raise error.RevlogError(
470 raise error.RevlogError(
583 _(b'unknown flags (%#04x) in version %d revlog %s')
471 _(b'unknown flags (%#04x) in version %d revlog %s')
584 % (flags >> 16, fmt, self.indexfile)
472 % (flags >> 16, fmt, self.indexfile)
585 )
473 )
586
474
587 # There is a bug in the transaction handling when going from an
475 # There is a bug in the transaction handling when going from an
588 # inline revlog to a separate index and data file. Turn it off until
476 # inline revlog to a separate index and data file. Turn it off until
589 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
477 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
590 # See issue6485
478 # See issue6485
591 self._inline = False
479 self._inline = False
592 # generaldelta implied by version 2 revlogs.
480 # generaldelta implied by version 2 revlogs.
593 self._generaldelta = True
481 self._generaldelta = True
594
482
595 else:
483 else:
596 raise error.RevlogError(
484 raise error.RevlogError(
597 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
485 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
598 )
486 )
599
487
600 self.nodeconstants = sha1nodeconstants
488 self.nodeconstants = sha1nodeconstants
601 self.nullid = self.nodeconstants.nullid
489 self.nullid = self.nodeconstants.nullid
602
490
603 # sparse-revlog can't be on without general-delta (issue6056)
491 # sparse-revlog can't be on without general-delta (issue6056)
604 if not self._generaldelta:
492 if not self._generaldelta:
605 self._sparserevlog = False
493 self._sparserevlog = False
606
494
607 self._storedeltachains = True
495 self._storedeltachains = True
608
496
609 devel_nodemap = (
497 devel_nodemap = (
610 self.nodemap_file
498 self.nodemap_file
611 and opts.get(b'devel-force-nodemap', False)
499 and opts.get(b'devel-force-nodemap', False)
612 and parse_index_v1_nodemap is not None
500 and parse_index_v1_nodemap is not None
613 )
501 )
614
502
615 use_rust_index = False
503 use_rust_index = False
616 if rustrevlog is not None:
504 if rustrevlog is not None:
617 if self.nodemap_file is not None:
505 if self.nodemap_file is not None:
618 use_rust_index = True
506 use_rust_index = True
619 else:
507 else:
620 use_rust_index = self.opener.options.get(b'rust.index')
508 use_rust_index = self.opener.options.get(b'rust.index')
621
509
622 self._parse_index = parse_index_v1
510 self._parse_index = parse_index_v1
623 if self.version == REVLOGV0:
511 if self.version == REVLOGV0:
624 self._parse_index = parse_index_v0
512 self._parse_index = revlogv0.parse_index_v0
625 elif fmt == REVLOGV2:
513 elif fmt == REVLOGV2:
626 self._parse_index = parse_index_v2
514 self._parse_index = parse_index_v2
627 elif devel_nodemap:
515 elif devel_nodemap:
628 self._parse_index = parse_index_v1_nodemap
516 self._parse_index = parse_index_v1_nodemap
629 elif use_rust_index:
517 elif use_rust_index:
630 self._parse_index = parse_index_v1_mixed
518 self._parse_index = parse_index_v1_mixed
631 try:
519 try:
632 d = self._parse_index(indexdata, self._inline)
520 d = self._parse_index(indexdata, self._inline)
633 index, _chunkcache = d
521 index, _chunkcache = d
634 use_nodemap = (
522 use_nodemap = (
635 not self._inline
523 not self._inline
636 and self.nodemap_file is not None
524 and self.nodemap_file is not None
637 and util.safehasattr(index, 'update_nodemap_data')
525 and util.safehasattr(index, 'update_nodemap_data')
638 )
526 )
639 if use_nodemap:
527 if use_nodemap:
640 nodemap_data = nodemaputil.persisted_data(self)
528 nodemap_data = nodemaputil.persisted_data(self)
641 if nodemap_data is not None:
529 if nodemap_data is not None:
642 docket = nodemap_data[0]
530 docket = nodemap_data[0]
643 if (
531 if (
644 len(d[0]) > docket.tip_rev
532 len(d[0]) > docket.tip_rev
645 and d[0][docket.tip_rev][7] == docket.tip_node
533 and d[0][docket.tip_rev][7] == docket.tip_node
646 ):
534 ):
647 # no changelog tampering
535 # no changelog tampering
648 self._nodemap_docket = docket
536 self._nodemap_docket = docket
649 index.update_nodemap_data(*nodemap_data)
537 index.update_nodemap_data(*nodemap_data)
650 except (ValueError, IndexError):
538 except (ValueError, IndexError):
651 raise error.RevlogError(
539 raise error.RevlogError(
652 _(b"index %s is corrupted") % self.indexfile
540 _(b"index %s is corrupted") % self.indexfile
653 )
541 )
654 self.index, self._chunkcache = d
542 self.index, self._chunkcache = d
655 if not self._chunkcache:
543 if not self._chunkcache:
656 self._chunkclear()
544 self._chunkclear()
657 # revnum -> (chain-length, sum-delta-length)
545 # revnum -> (chain-length, sum-delta-length)
658 self._chaininfocache = util.lrucachedict(500)
546 self._chaininfocache = util.lrucachedict(500)
659 # revlog header -> revlog compressor
547 # revlog header -> revlog compressor
660 self._decompressors = {}
548 self._decompressors = {}
661
549
662 @util.propertycache
550 @util.propertycache
663 def _compressor(self):
551 def _compressor(self):
664 engine = util.compengines[self._compengine]
552 engine = util.compengines[self._compengine]
665 return engine.revlogcompressor(self._compengineopts)
553 return engine.revlogcompressor(self._compengineopts)
666
554
667 def _indexfp(self, mode=b'r'):
555 def _indexfp(self, mode=b'r'):
668 """file object for the revlog's index file"""
556 """file object for the revlog's index file"""
669 args = {'mode': mode}
557 args = {'mode': mode}
670 if mode != b'r':
558 if mode != b'r':
671 args['checkambig'] = self._checkambig
559 args['checkambig'] = self._checkambig
672 if mode == b'w':
560 if mode == b'w':
673 args['atomictemp'] = True
561 args['atomictemp'] = True
674 return self.opener(self.indexfile, **args)
562 return self.opener(self.indexfile, **args)
675
563
676 def _datafp(self, mode=b'r'):
564 def _datafp(self, mode=b'r'):
677 """file object for the revlog's data file"""
565 """file object for the revlog's data file"""
678 return self.opener(self.datafile, mode=mode)
566 return self.opener(self.datafile, mode=mode)
679
567
680 @contextlib.contextmanager
568 @contextlib.contextmanager
681 def _datareadfp(self, existingfp=None):
569 def _datareadfp(self, existingfp=None):
682 """file object suitable to read data"""
570 """file object suitable to read data"""
683 # Use explicit file handle, if given.
571 # Use explicit file handle, if given.
684 if existingfp is not None:
572 if existingfp is not None:
685 yield existingfp
573 yield existingfp
686
574
687 # Use a file handle being actively used for writes, if available.
575 # Use a file handle being actively used for writes, if available.
688 # There is some danger to doing this because reads will seek the
576 # There is some danger to doing this because reads will seek the
689 # file. However, _writeentry() performs a SEEK_END before all writes,
577 # file. However, _writeentry() performs a SEEK_END before all writes,
690 # so we should be safe.
578 # so we should be safe.
691 elif self._writinghandles:
579 elif self._writinghandles:
692 if self._inline:
580 if self._inline:
693 yield self._writinghandles[0]
581 yield self._writinghandles[0]
694 else:
582 else:
695 yield self._writinghandles[1]
583 yield self._writinghandles[1]
696
584
697 # Otherwise open a new file handle.
585 # Otherwise open a new file handle.
698 else:
586 else:
699 if self._inline:
587 if self._inline:
700 func = self._indexfp
588 func = self._indexfp
701 else:
589 else:
702 func = self._datafp
590 func = self._datafp
703 with func() as fp:
591 with func() as fp:
704 yield fp
592 yield fp
705
593
706 def tiprev(self):
594 def tiprev(self):
707 return len(self.index) - 1
595 return len(self.index) - 1
708
596
709 def tip(self):
597 def tip(self):
710 return self.node(self.tiprev())
598 return self.node(self.tiprev())
711
599
712 def __contains__(self, rev):
600 def __contains__(self, rev):
713 return 0 <= rev < len(self)
601 return 0 <= rev < len(self)
714
602
715 def __len__(self):
603 def __len__(self):
716 return len(self.index)
604 return len(self.index)
717
605
718 def __iter__(self):
606 def __iter__(self):
719 return iter(pycompat.xrange(len(self)))
607 return iter(pycompat.xrange(len(self)))
720
608
721 def revs(self, start=0, stop=None):
609 def revs(self, start=0, stop=None):
722 """iterate over all rev in this revlog (from start to stop)"""
610 """iterate over all rev in this revlog (from start to stop)"""
723 return storageutil.iterrevs(len(self), start=start, stop=stop)
611 return storageutil.iterrevs(len(self), start=start, stop=stop)
724
612
725 @property
613 @property
726 def nodemap(self):
614 def nodemap(self):
727 msg = (
615 msg = (
728 b"revlog.nodemap is deprecated, "
616 b"revlog.nodemap is deprecated, "
729 b"use revlog.index.[has_node|rev|get_rev]"
617 b"use revlog.index.[has_node|rev|get_rev]"
730 )
618 )
731 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
619 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
732 return self.index.nodemap
620 return self.index.nodemap
733
621
734 @property
622 @property
735 def _nodecache(self):
623 def _nodecache(self):
736 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
624 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
737 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
625 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
738 return self.index.nodemap
626 return self.index.nodemap
739
627
740 def hasnode(self, node):
628 def hasnode(self, node):
741 try:
629 try:
742 self.rev(node)
630 self.rev(node)
743 return True
631 return True
744 except KeyError:
632 except KeyError:
745 return False
633 return False
746
634
747 def candelta(self, baserev, rev):
635 def candelta(self, baserev, rev):
748 """whether two revisions (baserev, rev) can be delta-ed or not"""
636 """whether two revisions (baserev, rev) can be delta-ed or not"""
749 # Disable delta if either rev requires a content-changing flag
637 # Disable delta if either rev requires a content-changing flag
750 # processor (ex. LFS). This is because such flag processor can alter
638 # processor (ex. LFS). This is because such flag processor can alter
751 # the rawtext content that the delta will be based on, and two clients
639 # the rawtext content that the delta will be based on, and two clients
752 # could have a same revlog node with different flags (i.e. different
640 # could have a same revlog node with different flags (i.e. different
753 # rawtext contents) and the delta could be incompatible.
641 # rawtext contents) and the delta could be incompatible.
754 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
642 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
755 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
643 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
756 ):
644 ):
757 return False
645 return False
758 return True
646 return True
759
647
760 def update_caches(self, transaction):
648 def update_caches(self, transaction):
761 if self.nodemap_file is not None:
649 if self.nodemap_file is not None:
762 if transaction is None:
650 if transaction is None:
763 nodemaputil.update_persistent_nodemap(self)
651 nodemaputil.update_persistent_nodemap(self)
764 else:
652 else:
765 nodemaputil.setup_persistent_nodemap(transaction, self)
653 nodemaputil.setup_persistent_nodemap(transaction, self)
766
654
767 def clearcaches(self):
655 def clearcaches(self):
768 self._revisioncache = None
656 self._revisioncache = None
769 self._chainbasecache.clear()
657 self._chainbasecache.clear()
770 self._chunkcache = (0, b'')
658 self._chunkcache = (0, b'')
771 self._pcache = {}
659 self._pcache = {}
772 self._nodemap_docket = None
660 self._nodemap_docket = None
773 self.index.clearcaches()
661 self.index.clearcaches()
774 # The python code is the one responsible for validating the docket, we
662 # The python code is the one responsible for validating the docket, we
775 # end up having to refresh it here.
663 # end up having to refresh it here.
776 use_nodemap = (
664 use_nodemap = (
777 not self._inline
665 not self._inline
778 and self.nodemap_file is not None
666 and self.nodemap_file is not None
779 and util.safehasattr(self.index, 'update_nodemap_data')
667 and util.safehasattr(self.index, 'update_nodemap_data')
780 )
668 )
781 if use_nodemap:
669 if use_nodemap:
782 nodemap_data = nodemaputil.persisted_data(self)
670 nodemap_data = nodemaputil.persisted_data(self)
783 if nodemap_data is not None:
671 if nodemap_data is not None:
784 self._nodemap_docket = nodemap_data[0]
672 self._nodemap_docket = nodemap_data[0]
785 self.index.update_nodemap_data(*nodemap_data)
673 self.index.update_nodemap_data(*nodemap_data)
786
674
787 def rev(self, node):
675 def rev(self, node):
788 try:
676 try:
789 return self.index.rev(node)
677 return self.index.rev(node)
790 except TypeError:
678 except TypeError:
791 raise
679 raise
792 except error.RevlogError:
680 except error.RevlogError:
793 # parsers.c radix tree lookup failed
681 # parsers.c radix tree lookup failed
794 if (
682 if (
795 node == self.nodeconstants.wdirid
683 node == self.nodeconstants.wdirid
796 or node in self.nodeconstants.wdirfilenodeids
684 or node in self.nodeconstants.wdirfilenodeids
797 ):
685 ):
798 raise error.WdirUnsupported
686 raise error.WdirUnsupported
799 raise error.LookupError(node, self.indexfile, _(b'no node'))
687 raise error.LookupError(node, self.indexfile, _(b'no node'))
800
688
801 # Accessors for index entries.
689 # Accessors for index entries.
802
690
803 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
691 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
804 # are flags.
692 # are flags.
805 def start(self, rev):
693 def start(self, rev):
806 return int(self.index[rev][0] >> 16)
694 return int(self.index[rev][0] >> 16)
807
695
808 def flags(self, rev):
696 def flags(self, rev):
809 return self.index[rev][0] & 0xFFFF
697 return self.index[rev][0] & 0xFFFF
810
698
811 def length(self, rev):
699 def length(self, rev):
812 return self.index[rev][1]
700 return self.index[rev][1]
813
701
814 def sidedata_length(self, rev):
702 def sidedata_length(self, rev):
815 if self.version & 0xFFFF != REVLOGV2:
703 if self.version & 0xFFFF != REVLOGV2:
816 return 0
704 return 0
817 return self.index[rev][9]
705 return self.index[rev][9]
818
706
819 def rawsize(self, rev):
707 def rawsize(self, rev):
820 """return the length of the uncompressed text for a given revision"""
708 """return the length of the uncompressed text for a given revision"""
821 l = self.index[rev][2]
709 l = self.index[rev][2]
822 if l >= 0:
710 if l >= 0:
823 return l
711 return l
824
712
825 t = self.rawdata(rev)
713 t = self.rawdata(rev)
826 return len(t)
714 return len(t)
827
715
828 def size(self, rev):
716 def size(self, rev):
829 """length of non-raw text (processed by a "read" flag processor)"""
717 """length of non-raw text (processed by a "read" flag processor)"""
830 # fast path: if no "read" flag processor could change the content,
718 # fast path: if no "read" flag processor could change the content,
831 # size is rawsize. note: ELLIPSIS is known to not change the content.
719 # size is rawsize. note: ELLIPSIS is known to not change the content.
832 flags = self.flags(rev)
720 flags = self.flags(rev)
833 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
721 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
834 return self.rawsize(rev)
722 return self.rawsize(rev)
835
723
836 return len(self.revision(rev, raw=False))
724 return len(self.revision(rev, raw=False))
837
725
838 def chainbase(self, rev):
726 def chainbase(self, rev):
839 base = self._chainbasecache.get(rev)
727 base = self._chainbasecache.get(rev)
840 if base is not None:
728 if base is not None:
841 return base
729 return base
842
730
843 index = self.index
731 index = self.index
844 iterrev = rev
732 iterrev = rev
845 base = index[iterrev][3]
733 base = index[iterrev][3]
846 while base != iterrev:
734 while base != iterrev:
847 iterrev = base
735 iterrev = base
848 base = index[iterrev][3]
736 base = index[iterrev][3]
849
737
850 self._chainbasecache[rev] = base
738 self._chainbasecache[rev] = base
851 return base
739 return base
852
740
853 def linkrev(self, rev):
741 def linkrev(self, rev):
854 return self.index[rev][4]
742 return self.index[rev][4]
855
743
856 def parentrevs(self, rev):
744 def parentrevs(self, rev):
857 try:
745 try:
858 entry = self.index[rev]
746 entry = self.index[rev]
859 except IndexError:
747 except IndexError:
860 if rev == wdirrev:
748 if rev == wdirrev:
861 raise error.WdirUnsupported
749 raise error.WdirUnsupported
862 raise
750 raise
863 if entry[5] == nullrev:
751 if entry[5] == nullrev:
864 return entry[6], entry[5]
752 return entry[6], entry[5]
865 else:
753 else:
866 return entry[5], entry[6]
754 return entry[5], entry[6]
867
755
868 # fast parentrevs(rev) where rev isn't filtered
756 # fast parentrevs(rev) where rev isn't filtered
869 _uncheckedparentrevs = parentrevs
757 _uncheckedparentrevs = parentrevs
870
758
871 def node(self, rev):
759 def node(self, rev):
872 try:
760 try:
873 return self.index[rev][7]
761 return self.index[rev][7]
874 except IndexError:
762 except IndexError:
875 if rev == wdirrev:
763 if rev == wdirrev:
876 raise error.WdirUnsupported
764 raise error.WdirUnsupported
877 raise
765 raise
878
766
879 # Derived from index values.
767 # Derived from index values.
880
768
881 def end(self, rev):
769 def end(self, rev):
882 return self.start(rev) + self.length(rev)
770 return self.start(rev) + self.length(rev)
883
771
884 def parents(self, node):
772 def parents(self, node):
885 i = self.index
773 i = self.index
886 d = i[self.rev(node)]
774 d = i[self.rev(node)]
887 # inline node() to avoid function call overhead
775 # inline node() to avoid function call overhead
888 if d[5] == self.nullid:
776 if d[5] == self.nullid:
889 return i[d[6]][7], i[d[5]][7]
777 return i[d[6]][7], i[d[5]][7]
890 else:
778 else:
891 return i[d[5]][7], i[d[6]][7]
779 return i[d[5]][7], i[d[6]][7]
892
780
893 def chainlen(self, rev):
781 def chainlen(self, rev):
894 return self._chaininfo(rev)[0]
782 return self._chaininfo(rev)[0]
895
783
896 def _chaininfo(self, rev):
784 def _chaininfo(self, rev):
897 chaininfocache = self._chaininfocache
785 chaininfocache = self._chaininfocache
898 if rev in chaininfocache:
786 if rev in chaininfocache:
899 return chaininfocache[rev]
787 return chaininfocache[rev]
900 index = self.index
788 index = self.index
901 generaldelta = self._generaldelta
789 generaldelta = self._generaldelta
902 iterrev = rev
790 iterrev = rev
903 e = index[iterrev]
791 e = index[iterrev]
904 clen = 0
792 clen = 0
905 compresseddeltalen = 0
793 compresseddeltalen = 0
906 while iterrev != e[3]:
794 while iterrev != e[3]:
907 clen += 1
795 clen += 1
908 compresseddeltalen += e[1]
796 compresseddeltalen += e[1]
909 if generaldelta:
797 if generaldelta:
910 iterrev = e[3]
798 iterrev = e[3]
911 else:
799 else:
912 iterrev -= 1
800 iterrev -= 1
913 if iterrev in chaininfocache:
801 if iterrev in chaininfocache:
914 t = chaininfocache[iterrev]
802 t = chaininfocache[iterrev]
915 clen += t[0]
803 clen += t[0]
916 compresseddeltalen += t[1]
804 compresseddeltalen += t[1]
917 break
805 break
918 e = index[iterrev]
806 e = index[iterrev]
919 else:
807 else:
920 # Add text length of base since decompressing that also takes
808 # Add text length of base since decompressing that also takes
921 # work. For cache hits the length is already included.
809 # work. For cache hits the length is already included.
922 compresseddeltalen += e[1]
810 compresseddeltalen += e[1]
923 r = (clen, compresseddeltalen)
811 r = (clen, compresseddeltalen)
924 chaininfocache[rev] = r
812 chaininfocache[rev] = r
925 return r
813 return r
926
814
927 def _deltachain(self, rev, stoprev=None):
815 def _deltachain(self, rev, stoprev=None):
928 """Obtain the delta chain for a revision.
816 """Obtain the delta chain for a revision.
929
817
930 ``stoprev`` specifies a revision to stop at. If not specified, we
818 ``stoprev`` specifies a revision to stop at. If not specified, we
931 stop at the base of the chain.
819 stop at the base of the chain.
932
820
933 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
821 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
934 revs in ascending order and ``stopped`` is a bool indicating whether
822 revs in ascending order and ``stopped`` is a bool indicating whether
935 ``stoprev`` was hit.
823 ``stoprev`` was hit.
936 """
824 """
937 # Try C implementation.
825 # Try C implementation.
938 try:
826 try:
939 return self.index.deltachain(rev, stoprev, self._generaldelta)
827 return self.index.deltachain(rev, stoprev, self._generaldelta)
940 except AttributeError:
828 except AttributeError:
941 pass
829 pass
942
830
943 chain = []
831 chain = []
944
832
945 # Alias to prevent attribute lookup in tight loop.
833 # Alias to prevent attribute lookup in tight loop.
946 index = self.index
834 index = self.index
947 generaldelta = self._generaldelta
835 generaldelta = self._generaldelta
948
836
949 iterrev = rev
837 iterrev = rev
950 e = index[iterrev]
838 e = index[iterrev]
951 while iterrev != e[3] and iterrev != stoprev:
839 while iterrev != e[3] and iterrev != stoprev:
952 chain.append(iterrev)
840 chain.append(iterrev)
953 if generaldelta:
841 if generaldelta:
954 iterrev = e[3]
842 iterrev = e[3]
955 else:
843 else:
956 iterrev -= 1
844 iterrev -= 1
957 e = index[iterrev]
845 e = index[iterrev]
958
846
959 if iterrev == stoprev:
847 if iterrev == stoprev:
960 stopped = True
848 stopped = True
961 else:
849 else:
962 chain.append(iterrev)
850 chain.append(iterrev)
963 stopped = False
851 stopped = False
964
852
965 chain.reverse()
853 chain.reverse()
966 return chain, stopped
854 return chain, stopped
967
855
968 def ancestors(self, revs, stoprev=0, inclusive=False):
856 def ancestors(self, revs, stoprev=0, inclusive=False):
969 """Generate the ancestors of 'revs' in reverse revision order.
857 """Generate the ancestors of 'revs' in reverse revision order.
970 Does not generate revs lower than stoprev.
858 Does not generate revs lower than stoprev.
971
859
972 See the documentation for ancestor.lazyancestors for more details."""
860 See the documentation for ancestor.lazyancestors for more details."""
973
861
974 # first, make sure start revisions aren't filtered
862 # first, make sure start revisions aren't filtered
975 revs = list(revs)
863 revs = list(revs)
976 checkrev = self.node
864 checkrev = self.node
977 for r in revs:
865 for r in revs:
978 checkrev(r)
866 checkrev(r)
979 # and we're sure ancestors aren't filtered as well
867 # and we're sure ancestors aren't filtered as well
980
868
981 if rustancestor is not None:
869 if rustancestor is not None:
982 lazyancestors = rustancestor.LazyAncestors
870 lazyancestors = rustancestor.LazyAncestors
983 arg = self.index
871 arg = self.index
984 else:
872 else:
985 lazyancestors = ancestor.lazyancestors
873 lazyancestors = ancestor.lazyancestors
986 arg = self._uncheckedparentrevs
874 arg = self._uncheckedparentrevs
987 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
875 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
988
876
989 def descendants(self, revs):
877 def descendants(self, revs):
990 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
878 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
991
879
992 def findcommonmissing(self, common=None, heads=None):
880 def findcommonmissing(self, common=None, heads=None):
993 """Return a tuple of the ancestors of common and the ancestors of heads
881 """Return a tuple of the ancestors of common and the ancestors of heads
994 that are not ancestors of common. In revset terminology, we return the
882 that are not ancestors of common. In revset terminology, we return the
995 tuple:
883 tuple:
996
884
997 ::common, (::heads) - (::common)
885 ::common, (::heads) - (::common)
998
886
999 The list is sorted by revision number, meaning it is
887 The list is sorted by revision number, meaning it is
1000 topologically sorted.
888 topologically sorted.
1001
889
1002 'heads' and 'common' are both lists of node IDs. If heads is
890 'heads' and 'common' are both lists of node IDs. If heads is
1003 not supplied, uses all of the revlog's heads. If common is not
891 not supplied, uses all of the revlog's heads. If common is not
1004 supplied, uses nullid."""
892 supplied, uses nullid."""
1005 if common is None:
893 if common is None:
1006 common = [self.nullid]
894 common = [self.nullid]
1007 if heads is None:
895 if heads is None:
1008 heads = self.heads()
896 heads = self.heads()
1009
897
1010 common = [self.rev(n) for n in common]
898 common = [self.rev(n) for n in common]
1011 heads = [self.rev(n) for n in heads]
899 heads = [self.rev(n) for n in heads]
1012
900
1013 # we want the ancestors, but inclusive
901 # we want the ancestors, but inclusive
1014 class lazyset(object):
902 class lazyset(object):
1015 def __init__(self, lazyvalues):
903 def __init__(self, lazyvalues):
1016 self.addedvalues = set()
904 self.addedvalues = set()
1017 self.lazyvalues = lazyvalues
905 self.lazyvalues = lazyvalues
1018
906
1019 def __contains__(self, value):
907 def __contains__(self, value):
1020 return value in self.addedvalues or value in self.lazyvalues
908 return value in self.addedvalues or value in self.lazyvalues
1021
909
1022 def __iter__(self):
910 def __iter__(self):
1023 added = self.addedvalues
911 added = self.addedvalues
1024 for r in added:
912 for r in added:
1025 yield r
913 yield r
1026 for r in self.lazyvalues:
914 for r in self.lazyvalues:
1027 if not r in added:
915 if not r in added:
1028 yield r
916 yield r
1029
917
1030 def add(self, value):
918 def add(self, value):
1031 self.addedvalues.add(value)
919 self.addedvalues.add(value)
1032
920
1033 def update(self, values):
921 def update(self, values):
1034 self.addedvalues.update(values)
922 self.addedvalues.update(values)
1035
923
1036 has = lazyset(self.ancestors(common))
924 has = lazyset(self.ancestors(common))
1037 has.add(nullrev)
925 has.add(nullrev)
1038 has.update(common)
926 has.update(common)
1039
927
1040 # take all ancestors from heads that aren't in has
928 # take all ancestors from heads that aren't in has
1041 missing = set()
929 missing = set()
1042 visit = collections.deque(r for r in heads if r not in has)
930 visit = collections.deque(r for r in heads if r not in has)
1043 while visit:
931 while visit:
1044 r = visit.popleft()
932 r = visit.popleft()
1045 if r in missing:
933 if r in missing:
1046 continue
934 continue
1047 else:
935 else:
1048 missing.add(r)
936 missing.add(r)
1049 for p in self.parentrevs(r):
937 for p in self.parentrevs(r):
1050 if p not in has:
938 if p not in has:
1051 visit.append(p)
939 visit.append(p)
1052 missing = list(missing)
940 missing = list(missing)
1053 missing.sort()
941 missing.sort()
1054 return has, [self.node(miss) for miss in missing]
942 return has, [self.node(miss) for miss in missing]
1055
943
1056 def incrementalmissingrevs(self, common=None):
944 def incrementalmissingrevs(self, common=None):
1057 """Return an object that can be used to incrementally compute the
945 """Return an object that can be used to incrementally compute the
1058 revision numbers of the ancestors of arbitrary sets that are not
946 revision numbers of the ancestors of arbitrary sets that are not
1059 ancestors of common. This is an ancestor.incrementalmissingancestors
947 ancestors of common. This is an ancestor.incrementalmissingancestors
1060 object.
948 object.
1061
949
1062 'common' is a list of revision numbers. If common is not supplied, uses
950 'common' is a list of revision numbers. If common is not supplied, uses
1063 nullrev.
951 nullrev.
1064 """
952 """
1065 if common is None:
953 if common is None:
1066 common = [nullrev]
954 common = [nullrev]
1067
955
1068 if rustancestor is not None:
956 if rustancestor is not None:
1069 return rustancestor.MissingAncestors(self.index, common)
957 return rustancestor.MissingAncestors(self.index, common)
1070 return ancestor.incrementalmissingancestors(self.parentrevs, common)
958 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1071
959
1072 def findmissingrevs(self, common=None, heads=None):
960 def findmissingrevs(self, common=None, heads=None):
1073 """Return the revision numbers of the ancestors of heads that
961 """Return the revision numbers of the ancestors of heads that
1074 are not ancestors of common.
962 are not ancestors of common.
1075
963
1076 More specifically, return a list of revision numbers corresponding to
964 More specifically, return a list of revision numbers corresponding to
1077 nodes N such that every N satisfies the following constraints:
965 nodes N such that every N satisfies the following constraints:
1078
966
1079 1. N is an ancestor of some node in 'heads'
967 1. N is an ancestor of some node in 'heads'
1080 2. N is not an ancestor of any node in 'common'
968 2. N is not an ancestor of any node in 'common'
1081
969
1082 The list is sorted by revision number, meaning it is
970 The list is sorted by revision number, meaning it is
1083 topologically sorted.
971 topologically sorted.
1084
972
1085 'heads' and 'common' are both lists of revision numbers. If heads is
973 'heads' and 'common' are both lists of revision numbers. If heads is
1086 not supplied, uses all of the revlog's heads. If common is not
974 not supplied, uses all of the revlog's heads. If common is not
1087 supplied, uses nullid."""
975 supplied, uses nullid."""
1088 if common is None:
976 if common is None:
1089 common = [nullrev]
977 common = [nullrev]
1090 if heads is None:
978 if heads is None:
1091 heads = self.headrevs()
979 heads = self.headrevs()
1092
980
1093 inc = self.incrementalmissingrevs(common=common)
981 inc = self.incrementalmissingrevs(common=common)
1094 return inc.missingancestors(heads)
982 return inc.missingancestors(heads)
1095
983
1096 def findmissing(self, common=None, heads=None):
984 def findmissing(self, common=None, heads=None):
1097 """Return the ancestors of heads that are not ancestors of common.
985 """Return the ancestors of heads that are not ancestors of common.
1098
986
1099 More specifically, return a list of nodes N such that every N
987 More specifically, return a list of nodes N such that every N
1100 satisfies the following constraints:
988 satisfies the following constraints:
1101
989
1102 1. N is an ancestor of some node in 'heads'
990 1. N is an ancestor of some node in 'heads'
1103 2. N is not an ancestor of any node in 'common'
991 2. N is not an ancestor of any node in 'common'
1104
992
1105 The list is sorted by revision number, meaning it is
993 The list is sorted by revision number, meaning it is
1106 topologically sorted.
994 topologically sorted.
1107
995
1108 'heads' and 'common' are both lists of node IDs. If heads is
996 'heads' and 'common' are both lists of node IDs. If heads is
1109 not supplied, uses all of the revlog's heads. If common is not
997 not supplied, uses all of the revlog's heads. If common is not
1110 supplied, uses nullid."""
998 supplied, uses nullid."""
1111 if common is None:
999 if common is None:
1112 common = [self.nullid]
1000 common = [self.nullid]
1113 if heads is None:
1001 if heads is None:
1114 heads = self.heads()
1002 heads = self.heads()
1115
1003
1116 common = [self.rev(n) for n in common]
1004 common = [self.rev(n) for n in common]
1117 heads = [self.rev(n) for n in heads]
1005 heads = [self.rev(n) for n in heads]
1118
1006
1119 inc = self.incrementalmissingrevs(common=common)
1007 inc = self.incrementalmissingrevs(common=common)
1120 return [self.node(r) for r in inc.missingancestors(heads)]
1008 return [self.node(r) for r in inc.missingancestors(heads)]
1121
1009
1122 def nodesbetween(self, roots=None, heads=None):
1010 def nodesbetween(self, roots=None, heads=None):
1123 """Return a topological path from 'roots' to 'heads'.
1011 """Return a topological path from 'roots' to 'heads'.
1124
1012
1125 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1013 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1126 topologically sorted list of all nodes N that satisfy both of
1014 topologically sorted list of all nodes N that satisfy both of
1127 these constraints:
1015 these constraints:
1128
1016
1129 1. N is a descendant of some node in 'roots'
1017 1. N is a descendant of some node in 'roots'
1130 2. N is an ancestor of some node in 'heads'
1018 2. N is an ancestor of some node in 'heads'
1131
1019
1132 Every node is considered to be both a descendant and an ancestor
1020 Every node is considered to be both a descendant and an ancestor
1133 of itself, so every reachable node in 'roots' and 'heads' will be
1021 of itself, so every reachable node in 'roots' and 'heads' will be
1134 included in 'nodes'.
1022 included in 'nodes'.
1135
1023
1136 'outroots' is the list of reachable nodes in 'roots', i.e., the
1024 'outroots' is the list of reachable nodes in 'roots', i.e., the
1137 subset of 'roots' that is returned in 'nodes'. Likewise,
1025 subset of 'roots' that is returned in 'nodes'. Likewise,
1138 'outheads' is the subset of 'heads' that is also in 'nodes'.
1026 'outheads' is the subset of 'heads' that is also in 'nodes'.
1139
1027
1140 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1028 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1141 unspecified, uses nullid as the only root. If 'heads' is
1029 unspecified, uses nullid as the only root. If 'heads' is
1142 unspecified, uses list of all of the revlog's heads."""
1030 unspecified, uses list of all of the revlog's heads."""
1143 nonodes = ([], [], [])
1031 nonodes = ([], [], [])
1144 if roots is not None:
1032 if roots is not None:
1145 roots = list(roots)
1033 roots = list(roots)
1146 if not roots:
1034 if not roots:
1147 return nonodes
1035 return nonodes
1148 lowestrev = min([self.rev(n) for n in roots])
1036 lowestrev = min([self.rev(n) for n in roots])
1149 else:
1037 else:
1150 roots = [self.nullid] # Everybody's a descendant of nullid
1038 roots = [self.nullid] # Everybody's a descendant of nullid
1151 lowestrev = nullrev
1039 lowestrev = nullrev
1152 if (lowestrev == nullrev) and (heads is None):
1040 if (lowestrev == nullrev) and (heads is None):
1153 # We want _all_ the nodes!
1041 # We want _all_ the nodes!
1154 return (
1042 return (
1155 [self.node(r) for r in self],
1043 [self.node(r) for r in self],
1156 [self.nullid],
1044 [self.nullid],
1157 list(self.heads()),
1045 list(self.heads()),
1158 )
1046 )
1159 if heads is None:
1047 if heads is None:
1160 # All nodes are ancestors, so the latest ancestor is the last
1048 # All nodes are ancestors, so the latest ancestor is the last
1161 # node.
1049 # node.
1162 highestrev = len(self) - 1
1050 highestrev = len(self) - 1
1163 # Set ancestors to None to signal that every node is an ancestor.
1051 # Set ancestors to None to signal that every node is an ancestor.
1164 ancestors = None
1052 ancestors = None
1165 # Set heads to an empty dictionary for later discovery of heads
1053 # Set heads to an empty dictionary for later discovery of heads
1166 heads = {}
1054 heads = {}
1167 else:
1055 else:
1168 heads = list(heads)
1056 heads = list(heads)
1169 if not heads:
1057 if not heads:
1170 return nonodes
1058 return nonodes
1171 ancestors = set()
1059 ancestors = set()
1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1060 # Turn heads into a dictionary so we can remove 'fake' heads.
1173 # Also, later we will be using it to filter out the heads we can't
1061 # Also, later we will be using it to filter out the heads we can't
1174 # find from roots.
1062 # find from roots.
1175 heads = dict.fromkeys(heads, False)
1063 heads = dict.fromkeys(heads, False)
1176 # Start at the top and keep marking parents until we're done.
1064 # Start at the top and keep marking parents until we're done.
1177 nodestotag = set(heads)
1065 nodestotag = set(heads)
1178 # Remember where the top was so we can use it as a limit later.
1066 # Remember where the top was so we can use it as a limit later.
1179 highestrev = max([self.rev(n) for n in nodestotag])
1067 highestrev = max([self.rev(n) for n in nodestotag])
1180 while nodestotag:
1068 while nodestotag:
1181 # grab a node to tag
1069 # grab a node to tag
1182 n = nodestotag.pop()
1070 n = nodestotag.pop()
1183 # Never tag nullid
1071 # Never tag nullid
1184 if n == self.nullid:
1072 if n == self.nullid:
1185 continue
1073 continue
1186 # A node's revision number represents its place in a
1074 # A node's revision number represents its place in a
1187 # topologically sorted list of nodes.
1075 # topologically sorted list of nodes.
1188 r = self.rev(n)
1076 r = self.rev(n)
1189 if r >= lowestrev:
1077 if r >= lowestrev:
1190 if n not in ancestors:
1078 if n not in ancestors:
1191 # If we are possibly a descendant of one of the roots
1079 # If we are possibly a descendant of one of the roots
1192 # and we haven't already been marked as an ancestor
1080 # and we haven't already been marked as an ancestor
1193 ancestors.add(n) # Mark as ancestor
1081 ancestors.add(n) # Mark as ancestor
1194 # Add non-nullid parents to list of nodes to tag.
1082 # Add non-nullid parents to list of nodes to tag.
1195 nodestotag.update(
1083 nodestotag.update(
1196 [p for p in self.parents(n) if p != self.nullid]
1084 [p for p in self.parents(n) if p != self.nullid]
1197 )
1085 )
1198 elif n in heads: # We've seen it before, is it a fake head?
1086 elif n in heads: # We've seen it before, is it a fake head?
1199 # So it is, real heads should not be the ancestors of
1087 # So it is, real heads should not be the ancestors of
1200 # any other heads.
1088 # any other heads.
1201 heads.pop(n)
1089 heads.pop(n)
1202 if not ancestors:
1090 if not ancestors:
1203 return nonodes
1091 return nonodes
1204 # Now that we have our set of ancestors, we want to remove any
1092 # Now that we have our set of ancestors, we want to remove any
1205 # roots that are not ancestors.
1093 # roots that are not ancestors.
1206
1094
1207 # If one of the roots was nullid, everything is included anyway.
1095 # If one of the roots was nullid, everything is included anyway.
1208 if lowestrev > nullrev:
1096 if lowestrev > nullrev:
1209 # But, since we weren't, let's recompute the lowest rev to not
1097 # But, since we weren't, let's recompute the lowest rev to not
1210 # include roots that aren't ancestors.
1098 # include roots that aren't ancestors.
1211
1099
1212 # Filter out roots that aren't ancestors of heads
1100 # Filter out roots that aren't ancestors of heads
1213 roots = [root for root in roots if root in ancestors]
1101 roots = [root for root in roots if root in ancestors]
1214 # Recompute the lowest revision
1102 # Recompute the lowest revision
1215 if roots:
1103 if roots:
1216 lowestrev = min([self.rev(root) for root in roots])
1104 lowestrev = min([self.rev(root) for root in roots])
1217 else:
1105 else:
1218 # No more roots? Return empty list
1106 # No more roots? Return empty list
1219 return nonodes
1107 return nonodes
1220 else:
1108 else:
1221 # We are descending from nullid, and don't need to care about
1109 # We are descending from nullid, and don't need to care about
1222 # any other roots.
1110 # any other roots.
1223 lowestrev = nullrev
1111 lowestrev = nullrev
1224 roots = [self.nullid]
1112 roots = [self.nullid]
1225 # Transform our roots list into a set.
1113 # Transform our roots list into a set.
1226 descendants = set(roots)
1114 descendants = set(roots)
1227 # Also, keep the original roots so we can filter out roots that aren't
1115 # Also, keep the original roots so we can filter out roots that aren't
1228 # 'real' roots (i.e. are descended from other roots).
1116 # 'real' roots (i.e. are descended from other roots).
1229 roots = descendants.copy()
1117 roots = descendants.copy()
1230 # Our topologically sorted list of output nodes.
1118 # Our topologically sorted list of output nodes.
1231 orderedout = []
1119 orderedout = []
1232 # Don't start at nullid since we don't want nullid in our output list,
1120 # Don't start at nullid since we don't want nullid in our output list,
1233 # and if nullid shows up in descendants, empty parents will look like
1121 # and if nullid shows up in descendants, empty parents will look like
1234 # they're descendants.
1122 # they're descendants.
1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1123 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1236 n = self.node(r)
1124 n = self.node(r)
1237 isdescendant = False
1125 isdescendant = False
1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1126 if lowestrev == nullrev: # Everybody is a descendant of nullid
1239 isdescendant = True
1127 isdescendant = True
1240 elif n in descendants:
1128 elif n in descendants:
1241 # n is already a descendant
1129 # n is already a descendant
1242 isdescendant = True
1130 isdescendant = True
1243 # This check only needs to be done here because all the roots
1131 # This check only needs to be done here because all the roots
1244 # will start being marked is descendants before the loop.
1132 # will start being marked is descendants before the loop.
1245 if n in roots:
1133 if n in roots:
1246 # If n was a root, check if it's a 'real' root.
1134 # If n was a root, check if it's a 'real' root.
1247 p = tuple(self.parents(n))
1135 p = tuple(self.parents(n))
1248 # If any of its parents are descendants, it's not a root.
1136 # If any of its parents are descendants, it's not a root.
1249 if (p[0] in descendants) or (p[1] in descendants):
1137 if (p[0] in descendants) or (p[1] in descendants):
1250 roots.remove(n)
1138 roots.remove(n)
1251 else:
1139 else:
1252 p = tuple(self.parents(n))
1140 p = tuple(self.parents(n))
1253 # A node is a descendant if either of its parents are
1141 # A node is a descendant if either of its parents are
1254 # descendants. (We seeded the dependents list with the roots
1142 # descendants. (We seeded the dependents list with the roots
1255 # up there, remember?)
1143 # up there, remember?)
1256 if (p[0] in descendants) or (p[1] in descendants):
1144 if (p[0] in descendants) or (p[1] in descendants):
1257 descendants.add(n)
1145 descendants.add(n)
1258 isdescendant = True
1146 isdescendant = True
1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1147 if isdescendant and ((ancestors is None) or (n in ancestors)):
1260 # Only include nodes that are both descendants and ancestors.
1148 # Only include nodes that are both descendants and ancestors.
1261 orderedout.append(n)
1149 orderedout.append(n)
1262 if (ancestors is not None) and (n in heads):
1150 if (ancestors is not None) and (n in heads):
1263 # We're trying to figure out which heads are reachable
1151 # We're trying to figure out which heads are reachable
1264 # from roots.
1152 # from roots.
1265 # Mark this head as having been reached
1153 # Mark this head as having been reached
1266 heads[n] = True
1154 heads[n] = True
1267 elif ancestors is None:
1155 elif ancestors is None:
1268 # Otherwise, we're trying to discover the heads.
1156 # Otherwise, we're trying to discover the heads.
1269 # Assume this is a head because if it isn't, the next step
1157 # Assume this is a head because if it isn't, the next step
1270 # will eventually remove it.
1158 # will eventually remove it.
1271 heads[n] = True
1159 heads[n] = True
1272 # But, obviously its parents aren't.
1160 # But, obviously its parents aren't.
1273 for p in self.parents(n):
1161 for p in self.parents(n):
1274 heads.pop(p, None)
1162 heads.pop(p, None)
1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1163 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1276 roots = list(roots)
1164 roots = list(roots)
1277 assert orderedout
1165 assert orderedout
1278 assert roots
1166 assert roots
1279 assert heads
1167 assert heads
1280 return (orderedout, roots, heads)
1168 return (orderedout, roots, heads)
1281
1169
1282 def headrevs(self, revs=None):
1170 def headrevs(self, revs=None):
1283 if revs is None:
1171 if revs is None:
1284 try:
1172 try:
1285 return self.index.headrevs()
1173 return self.index.headrevs()
1286 except AttributeError:
1174 except AttributeError:
1287 return self._headrevs()
1175 return self._headrevs()
1288 if rustdagop is not None:
1176 if rustdagop is not None:
1289 return rustdagop.headrevs(self.index, revs)
1177 return rustdagop.headrevs(self.index, revs)
1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1178 return dagop.headrevs(revs, self._uncheckedparentrevs)
1291
1179
1292 def computephases(self, roots):
1180 def computephases(self, roots):
1293 return self.index.computephasesmapsets(roots)
1181 return self.index.computephasesmapsets(roots)
1294
1182
1295 def _headrevs(self):
1183 def _headrevs(self):
1296 count = len(self)
1184 count = len(self)
1297 if not count:
1185 if not count:
1298 return [nullrev]
1186 return [nullrev]
1299 # we won't iter over filtered rev so nobody is a head at start
1187 # we won't iter over filtered rev so nobody is a head at start
1300 ishead = [0] * (count + 1)
1188 ishead = [0] * (count + 1)
1301 index = self.index
1189 index = self.index
1302 for r in self:
1190 for r in self:
1303 ishead[r] = 1 # I may be an head
1191 ishead[r] = 1 # I may be an head
1304 e = index[r]
1192 e = index[r]
1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1193 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1306 return [r for r, val in enumerate(ishead) if val]
1194 return [r for r, val in enumerate(ishead) if val]
1307
1195
1308 def heads(self, start=None, stop=None):
1196 def heads(self, start=None, stop=None):
1309 """return the list of all nodes that have no children
1197 """return the list of all nodes that have no children
1310
1198
1311 if start is specified, only heads that are descendants of
1199 if start is specified, only heads that are descendants of
1312 start will be returned
1200 start will be returned
1313 if stop is specified, it will consider all the revs from stop
1201 if stop is specified, it will consider all the revs from stop
1314 as if they had no children
1202 as if they had no children
1315 """
1203 """
1316 if start is None and stop is None:
1204 if start is None and stop is None:
1317 if not len(self):
1205 if not len(self):
1318 return [self.nullid]
1206 return [self.nullid]
1319 return [self.node(r) for r in self.headrevs()]
1207 return [self.node(r) for r in self.headrevs()]
1320
1208
1321 if start is None:
1209 if start is None:
1322 start = nullrev
1210 start = nullrev
1323 else:
1211 else:
1324 start = self.rev(start)
1212 start = self.rev(start)
1325
1213
1326 stoprevs = {self.rev(n) for n in stop or []}
1214 stoprevs = {self.rev(n) for n in stop or []}
1327
1215
1328 revs = dagop.headrevssubset(
1216 revs = dagop.headrevssubset(
1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1217 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1330 )
1218 )
1331
1219
1332 return [self.node(rev) for rev in revs]
1220 return [self.node(rev) for rev in revs]
1333
1221
1334 def children(self, node):
1222 def children(self, node):
1335 """find the children of a given node"""
1223 """find the children of a given node"""
1336 c = []
1224 c = []
1337 p = self.rev(node)
1225 p = self.rev(node)
1338 for r in self.revs(start=p + 1):
1226 for r in self.revs(start=p + 1):
1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1227 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1340 if prevs:
1228 if prevs:
1341 for pr in prevs:
1229 for pr in prevs:
1342 if pr == p:
1230 if pr == p:
1343 c.append(self.node(r))
1231 c.append(self.node(r))
1344 elif p == nullrev:
1232 elif p == nullrev:
1345 c.append(self.node(r))
1233 c.append(self.node(r))
1346 return c
1234 return c
1347
1235
1348 def commonancestorsheads(self, a, b):
1236 def commonancestorsheads(self, a, b):
1349 """calculate all the heads of the common ancestors of nodes a and b"""
1237 """calculate all the heads of the common ancestors of nodes a and b"""
1350 a, b = self.rev(a), self.rev(b)
1238 a, b = self.rev(a), self.rev(b)
1351 ancs = self._commonancestorsheads(a, b)
1239 ancs = self._commonancestorsheads(a, b)
1352 return pycompat.maplist(self.node, ancs)
1240 return pycompat.maplist(self.node, ancs)
1353
1241
1354 def _commonancestorsheads(self, *revs):
1242 def _commonancestorsheads(self, *revs):
1355 """calculate all the heads of the common ancestors of revs"""
1243 """calculate all the heads of the common ancestors of revs"""
1356 try:
1244 try:
1357 ancs = self.index.commonancestorsheads(*revs)
1245 ancs = self.index.commonancestorsheads(*revs)
1358 except (AttributeError, OverflowError): # C implementation failed
1246 except (AttributeError, OverflowError): # C implementation failed
1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1247 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1360 return ancs
1248 return ancs
1361
1249
1362 def isancestor(self, a, b):
1250 def isancestor(self, a, b):
1363 """return True if node a is an ancestor of node b
1251 """return True if node a is an ancestor of node b
1364
1252
1365 A revision is considered an ancestor of itself."""
1253 A revision is considered an ancestor of itself."""
1366 a, b = self.rev(a), self.rev(b)
1254 a, b = self.rev(a), self.rev(b)
1367 return self.isancestorrev(a, b)
1255 return self.isancestorrev(a, b)
1368
1256
1369 def isancestorrev(self, a, b):
1257 def isancestorrev(self, a, b):
1370 """return True if revision a is an ancestor of revision b
1258 """return True if revision a is an ancestor of revision b
1371
1259
1372 A revision is considered an ancestor of itself.
1260 A revision is considered an ancestor of itself.
1373
1261
1374 The implementation of this is trivial but the use of
1262 The implementation of this is trivial but the use of
1375 reachableroots is not."""
1263 reachableroots is not."""
1376 if a == nullrev:
1264 if a == nullrev:
1377 return True
1265 return True
1378 elif a == b:
1266 elif a == b:
1379 return True
1267 return True
1380 elif a > b:
1268 elif a > b:
1381 return False
1269 return False
1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1270 return bool(self.reachableroots(a, [b], [a], includepath=False))
1383
1271
1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1272 def reachableroots(self, minroot, heads, roots, includepath=False):
1385 """return (heads(::(<roots> and <roots>::<heads>)))
1273 """return (heads(::(<roots> and <roots>::<heads>)))
1386
1274
1387 If includepath is True, return (<roots>::<heads>)."""
1275 If includepath is True, return (<roots>::<heads>)."""
1388 try:
1276 try:
1389 return self.index.reachableroots2(
1277 return self.index.reachableroots2(
1390 minroot, heads, roots, includepath
1278 minroot, heads, roots, includepath
1391 )
1279 )
1392 except AttributeError:
1280 except AttributeError:
1393 return dagop._reachablerootspure(
1281 return dagop._reachablerootspure(
1394 self.parentrevs, minroot, roots, heads, includepath
1282 self.parentrevs, minroot, roots, heads, includepath
1395 )
1283 )
1396
1284
1397 def ancestor(self, a, b):
1285 def ancestor(self, a, b):
1398 """calculate the "best" common ancestor of nodes a and b"""
1286 """calculate the "best" common ancestor of nodes a and b"""
1399
1287
1400 a, b = self.rev(a), self.rev(b)
1288 a, b = self.rev(a), self.rev(b)
1401 try:
1289 try:
1402 ancs = self.index.ancestors(a, b)
1290 ancs = self.index.ancestors(a, b)
1403 except (AttributeError, OverflowError):
1291 except (AttributeError, OverflowError):
1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1292 ancs = ancestor.ancestors(self.parentrevs, a, b)
1405 if ancs:
1293 if ancs:
1406 # choose a consistent winner when there's a tie
1294 # choose a consistent winner when there's a tie
1407 return min(map(self.node, ancs))
1295 return min(map(self.node, ancs))
1408 return self.nullid
1296 return self.nullid
1409
1297
1410 def _match(self, id):
1298 def _match(self, id):
1411 if isinstance(id, int):
1299 if isinstance(id, int):
1412 # rev
1300 # rev
1413 return self.node(id)
1301 return self.node(id)
1414 if len(id) == 20:
1302 if len(id) == 20:
1415 # possibly a binary node
1303 # possibly a binary node
1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1304 # odds of a binary node being all hex in ASCII are 1 in 10**25
1417 try:
1305 try:
1418 node = id
1306 node = id
1419 self.rev(node) # quick search the index
1307 self.rev(node) # quick search the index
1420 return node
1308 return node
1421 except error.LookupError:
1309 except error.LookupError:
1422 pass # may be partial hex id
1310 pass # may be partial hex id
1423 try:
1311 try:
1424 # str(rev)
1312 # str(rev)
1425 rev = int(id)
1313 rev = int(id)
1426 if b"%d" % rev != id:
1314 if b"%d" % rev != id:
1427 raise ValueError
1315 raise ValueError
1428 if rev < 0:
1316 if rev < 0:
1429 rev = len(self) + rev
1317 rev = len(self) + rev
1430 if rev < 0 or rev >= len(self):
1318 if rev < 0 or rev >= len(self):
1431 raise ValueError
1319 raise ValueError
1432 return self.node(rev)
1320 return self.node(rev)
1433 except (ValueError, OverflowError):
1321 except (ValueError, OverflowError):
1434 pass
1322 pass
1435 if len(id) == 40:
1323 if len(id) == 40:
1436 try:
1324 try:
1437 # a full hex nodeid?
1325 # a full hex nodeid?
1438 node = bin(id)
1326 node = bin(id)
1439 self.rev(node)
1327 self.rev(node)
1440 return node
1328 return node
1441 except (TypeError, error.LookupError):
1329 except (TypeError, error.LookupError):
1442 pass
1330 pass
1443
1331
1444 def _partialmatch(self, id):
1332 def _partialmatch(self, id):
1445 # we don't care wdirfilenodeids as they should be always full hash
1333 # we don't care wdirfilenodeids as they should be always full hash
1446 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1334 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1447 try:
1335 try:
1448 partial = self.index.partialmatch(id)
1336 partial = self.index.partialmatch(id)
1449 if partial and self.hasnode(partial):
1337 if partial and self.hasnode(partial):
1450 if maybewdir:
1338 if maybewdir:
1451 # single 'ff...' match in radix tree, ambiguous with wdir
1339 # single 'ff...' match in radix tree, ambiguous with wdir
1452 raise error.RevlogError
1340 raise error.RevlogError
1453 return partial
1341 return partial
1454 if maybewdir:
1342 if maybewdir:
1455 # no 'ff...' match in radix tree, wdir identified
1343 # no 'ff...' match in radix tree, wdir identified
1456 raise error.WdirUnsupported
1344 raise error.WdirUnsupported
1457 return None
1345 return None
1458 except error.RevlogError:
1346 except error.RevlogError:
1459 # parsers.c radix tree lookup gave multiple matches
1347 # parsers.c radix tree lookup gave multiple matches
1460 # fast path: for unfiltered changelog, radix tree is accurate
1348 # fast path: for unfiltered changelog, radix tree is accurate
1461 if not getattr(self, 'filteredrevs', None):
1349 if not getattr(self, 'filteredrevs', None):
1462 raise error.AmbiguousPrefixLookupError(
1350 raise error.AmbiguousPrefixLookupError(
1463 id, self.indexfile, _(b'ambiguous identifier')
1351 id, self.indexfile, _(b'ambiguous identifier')
1464 )
1352 )
1465 # fall through to slow path that filters hidden revisions
1353 # fall through to slow path that filters hidden revisions
1466 except (AttributeError, ValueError):
1354 except (AttributeError, ValueError):
1467 # we are pure python, or key was too short to search radix tree
1355 # we are pure python, or key was too short to search radix tree
1468 pass
1356 pass
1469
1357
1470 if id in self._pcache:
1358 if id in self._pcache:
1471 return self._pcache[id]
1359 return self._pcache[id]
1472
1360
1473 if len(id) <= 40:
1361 if len(id) <= 40:
1474 try:
1362 try:
1475 # hex(node)[:...]
1363 # hex(node)[:...]
1476 l = len(id) // 2 # grab an even number of digits
1364 l = len(id) // 2 # grab an even number of digits
1477 prefix = bin(id[: l * 2])
1365 prefix = bin(id[: l * 2])
1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1366 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1479 nl = [
1367 nl = [
1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1368 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1481 ]
1369 ]
1482 if self.nodeconstants.nullhex.startswith(id):
1370 if self.nodeconstants.nullhex.startswith(id):
1483 nl.append(self.nullid)
1371 nl.append(self.nullid)
1484 if len(nl) > 0:
1372 if len(nl) > 0:
1485 if len(nl) == 1 and not maybewdir:
1373 if len(nl) == 1 and not maybewdir:
1486 self._pcache[id] = nl[0]
1374 self._pcache[id] = nl[0]
1487 return nl[0]
1375 return nl[0]
1488 raise error.AmbiguousPrefixLookupError(
1376 raise error.AmbiguousPrefixLookupError(
1489 id, self.indexfile, _(b'ambiguous identifier')
1377 id, self.indexfile, _(b'ambiguous identifier')
1490 )
1378 )
1491 if maybewdir:
1379 if maybewdir:
1492 raise error.WdirUnsupported
1380 raise error.WdirUnsupported
1493 return None
1381 return None
1494 except TypeError:
1382 except TypeError:
1495 pass
1383 pass
1496
1384
1497 def lookup(self, id):
1385 def lookup(self, id):
1498 """locate a node based on:
1386 """locate a node based on:
1499 - revision number or str(revision number)
1387 - revision number or str(revision number)
1500 - nodeid or subset of hex nodeid
1388 - nodeid or subset of hex nodeid
1501 """
1389 """
1502 n = self._match(id)
1390 n = self._match(id)
1503 if n is not None:
1391 if n is not None:
1504 return n
1392 return n
1505 n = self._partialmatch(id)
1393 n = self._partialmatch(id)
1506 if n:
1394 if n:
1507 return n
1395 return n
1508
1396
1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1397 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1510
1398
1511 def shortest(self, node, minlength=1):
1399 def shortest(self, node, minlength=1):
1512 """Find the shortest unambiguous prefix that matches node."""
1400 """Find the shortest unambiguous prefix that matches node."""
1513
1401
1514 def isvalid(prefix):
1402 def isvalid(prefix):
1515 try:
1403 try:
1516 matchednode = self._partialmatch(prefix)
1404 matchednode = self._partialmatch(prefix)
1517 except error.AmbiguousPrefixLookupError:
1405 except error.AmbiguousPrefixLookupError:
1518 return False
1406 return False
1519 except error.WdirUnsupported:
1407 except error.WdirUnsupported:
1520 # single 'ff...' match
1408 # single 'ff...' match
1521 return True
1409 return True
1522 if matchednode is None:
1410 if matchednode is None:
1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1411 raise error.LookupError(node, self.indexfile, _(b'no node'))
1524 return True
1412 return True
1525
1413
1526 def maybewdir(prefix):
1414 def maybewdir(prefix):
1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1415 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1528
1416
1529 hexnode = hex(node)
1417 hexnode = hex(node)
1530
1418
1531 def disambiguate(hexnode, minlength):
1419 def disambiguate(hexnode, minlength):
1532 """Disambiguate against wdirid."""
1420 """Disambiguate against wdirid."""
1533 for length in range(minlength, len(hexnode) + 1):
1421 for length in range(minlength, len(hexnode) + 1):
1534 prefix = hexnode[:length]
1422 prefix = hexnode[:length]
1535 if not maybewdir(prefix):
1423 if not maybewdir(prefix):
1536 return prefix
1424 return prefix
1537
1425
1538 if not getattr(self, 'filteredrevs', None):
1426 if not getattr(self, 'filteredrevs', None):
1539 try:
1427 try:
1540 length = max(self.index.shortest(node), minlength)
1428 length = max(self.index.shortest(node), minlength)
1541 return disambiguate(hexnode, length)
1429 return disambiguate(hexnode, length)
1542 except error.RevlogError:
1430 except error.RevlogError:
1543 if node != self.nodeconstants.wdirid:
1431 if node != self.nodeconstants.wdirid:
1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1432 raise error.LookupError(node, self.indexfile, _(b'no node'))
1545 except AttributeError:
1433 except AttributeError:
1546 # Fall through to pure code
1434 # Fall through to pure code
1547 pass
1435 pass
1548
1436
1549 if node == self.nodeconstants.wdirid:
1437 if node == self.nodeconstants.wdirid:
1550 for length in range(minlength, len(hexnode) + 1):
1438 for length in range(minlength, len(hexnode) + 1):
1551 prefix = hexnode[:length]
1439 prefix = hexnode[:length]
1552 if isvalid(prefix):
1440 if isvalid(prefix):
1553 return prefix
1441 return prefix
1554
1442
1555 for length in range(minlength, len(hexnode) + 1):
1443 for length in range(minlength, len(hexnode) + 1):
1556 prefix = hexnode[:length]
1444 prefix = hexnode[:length]
1557 if isvalid(prefix):
1445 if isvalid(prefix):
1558 return disambiguate(hexnode, length)
1446 return disambiguate(hexnode, length)
1559
1447
1560 def cmp(self, node, text):
1448 def cmp(self, node, text):
1561 """compare text with a given file revision
1449 """compare text with a given file revision
1562
1450
1563 returns True if text is different than what is stored.
1451 returns True if text is different than what is stored.
1564 """
1452 """
1565 p1, p2 = self.parents(node)
1453 p1, p2 = self.parents(node)
1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1454 return storageutil.hashrevisionsha1(text, p1, p2) != node
1567
1455
1568 def _cachesegment(self, offset, data):
1456 def _cachesegment(self, offset, data):
1569 """Add a segment to the revlog cache.
1457 """Add a segment to the revlog cache.
1570
1458
1571 Accepts an absolute offset and the data that is at that location.
1459 Accepts an absolute offset and the data that is at that location.
1572 """
1460 """
1573 o, d = self._chunkcache
1461 o, d = self._chunkcache
1574 # try to add to existing cache
1462 # try to add to existing cache
1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1463 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 self._chunkcache = o, d + data
1464 self._chunkcache = o, d + data
1577 else:
1465 else:
1578 self._chunkcache = offset, data
1466 self._chunkcache = offset, data
1579
1467
1580 def _readsegment(self, offset, length, df=None):
1468 def _readsegment(self, offset, length, df=None):
1581 """Load a segment of raw data from the revlog.
1469 """Load a segment of raw data from the revlog.
1582
1470
1583 Accepts an absolute offset, length to read, and an optional existing
1471 Accepts an absolute offset, length to read, and an optional existing
1584 file handle to read from.
1472 file handle to read from.
1585
1473
1586 If an existing file handle is passed, it will be seeked and the
1474 If an existing file handle is passed, it will be seeked and the
1587 original seek position will NOT be restored.
1475 original seek position will NOT be restored.
1588
1476
1589 Returns a str or buffer of raw byte data.
1477 Returns a str or buffer of raw byte data.
1590
1478
1591 Raises if the requested number of bytes could not be read.
1479 Raises if the requested number of bytes could not be read.
1592 """
1480 """
1593 # Cache data both forward and backward around the requested
1481 # Cache data both forward and backward around the requested
1594 # data, in a fixed size window. This helps speed up operations
1482 # data, in a fixed size window. This helps speed up operations
1595 # involving reading the revlog backwards.
1483 # involving reading the revlog backwards.
1596 cachesize = self._chunkcachesize
1484 cachesize = self._chunkcachesize
1597 realoffset = offset & ~(cachesize - 1)
1485 realoffset = offset & ~(cachesize - 1)
1598 reallength = (
1486 reallength = (
1599 (offset + length + cachesize) & ~(cachesize - 1)
1487 (offset + length + cachesize) & ~(cachesize - 1)
1600 ) - realoffset
1488 ) - realoffset
1601 with self._datareadfp(df) as df:
1489 with self._datareadfp(df) as df:
1602 df.seek(realoffset)
1490 df.seek(realoffset)
1603 d = df.read(reallength)
1491 d = df.read(reallength)
1604
1492
1605 self._cachesegment(realoffset, d)
1493 self._cachesegment(realoffset, d)
1606 if offset != realoffset or reallength != length:
1494 if offset != realoffset or reallength != length:
1607 startoffset = offset - realoffset
1495 startoffset = offset - realoffset
1608 if len(d) - startoffset < length:
1496 if len(d) - startoffset < length:
1609 raise error.RevlogError(
1497 raise error.RevlogError(
1610 _(
1498 _(
1611 b'partial read of revlog %s; expected %d bytes from '
1499 b'partial read of revlog %s; expected %d bytes from '
1612 b'offset %d, got %d'
1500 b'offset %d, got %d'
1613 )
1501 )
1614 % (
1502 % (
1615 self.indexfile if self._inline else self.datafile,
1503 self.indexfile if self._inline else self.datafile,
1616 length,
1504 length,
1617 realoffset,
1505 realoffset,
1618 len(d) - startoffset,
1506 len(d) - startoffset,
1619 )
1507 )
1620 )
1508 )
1621
1509
1622 return util.buffer(d, startoffset, length)
1510 return util.buffer(d, startoffset, length)
1623
1511
1624 if len(d) < length:
1512 if len(d) < length:
1625 raise error.RevlogError(
1513 raise error.RevlogError(
1626 _(
1514 _(
1627 b'partial read of revlog %s; expected %d bytes from offset '
1515 b'partial read of revlog %s; expected %d bytes from offset '
1628 b'%d, got %d'
1516 b'%d, got %d'
1629 )
1517 )
1630 % (
1518 % (
1631 self.indexfile if self._inline else self.datafile,
1519 self.indexfile if self._inline else self.datafile,
1632 length,
1520 length,
1633 offset,
1521 offset,
1634 len(d),
1522 len(d),
1635 )
1523 )
1636 )
1524 )
1637
1525
1638 return d
1526 return d
1639
1527
1640 def _getsegment(self, offset, length, df=None):
1528 def _getsegment(self, offset, length, df=None):
1641 """Obtain a segment of raw data from the revlog.
1529 """Obtain a segment of raw data from the revlog.
1642
1530
1643 Accepts an absolute offset, length of bytes to obtain, and an
1531 Accepts an absolute offset, length of bytes to obtain, and an
1644 optional file handle to the already-opened revlog. If the file
1532 optional file handle to the already-opened revlog. If the file
1645 handle is used, it's original seek position will not be preserved.
1533 handle is used, it's original seek position will not be preserved.
1646
1534
1647 Requests for data may be returned from a cache.
1535 Requests for data may be returned from a cache.
1648
1536
1649 Returns a str or a buffer instance of raw byte data.
1537 Returns a str or a buffer instance of raw byte data.
1650 """
1538 """
1651 o, d = self._chunkcache
1539 o, d = self._chunkcache
1652 l = len(d)
1540 l = len(d)
1653
1541
1654 # is it in the cache?
1542 # is it in the cache?
1655 cachestart = offset - o
1543 cachestart = offset - o
1656 cacheend = cachestart + length
1544 cacheend = cachestart + length
1657 if cachestart >= 0 and cacheend <= l:
1545 if cachestart >= 0 and cacheend <= l:
1658 if cachestart == 0 and cacheend == l:
1546 if cachestart == 0 and cacheend == l:
1659 return d # avoid a copy
1547 return d # avoid a copy
1660 return util.buffer(d, cachestart, cacheend - cachestart)
1548 return util.buffer(d, cachestart, cacheend - cachestart)
1661
1549
1662 return self._readsegment(offset, length, df=df)
1550 return self._readsegment(offset, length, df=df)
1663
1551
1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1552 def _getsegmentforrevs(self, startrev, endrev, df=None):
1665 """Obtain a segment of raw data corresponding to a range of revisions.
1553 """Obtain a segment of raw data corresponding to a range of revisions.
1666
1554
1667 Accepts the start and end revisions and an optional already-open
1555 Accepts the start and end revisions and an optional already-open
1668 file handle to be used for reading. If the file handle is read, its
1556 file handle to be used for reading. If the file handle is read, its
1669 seek position will not be preserved.
1557 seek position will not be preserved.
1670
1558
1671 Requests for data may be satisfied by a cache.
1559 Requests for data may be satisfied by a cache.
1672
1560
1673 Returns a 2-tuple of (offset, data) for the requested range of
1561 Returns a 2-tuple of (offset, data) for the requested range of
1674 revisions. Offset is the integer offset from the beginning of the
1562 revisions. Offset is the integer offset from the beginning of the
1675 revlog and data is a str or buffer of the raw byte data.
1563 revlog and data is a str or buffer of the raw byte data.
1676
1564
1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1565 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1678 to determine where each revision's data begins and ends.
1566 to determine where each revision's data begins and ends.
1679 """
1567 """
1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1568 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1681 # (functions are expensive).
1569 # (functions are expensive).
1682 index = self.index
1570 index = self.index
1683 istart = index[startrev]
1571 istart = index[startrev]
1684 start = int(istart[0] >> 16)
1572 start = int(istart[0] >> 16)
1685 if startrev == endrev:
1573 if startrev == endrev:
1686 end = start + istart[1]
1574 end = start + istart[1]
1687 else:
1575 else:
1688 iend = index[endrev]
1576 iend = index[endrev]
1689 end = int(iend[0] >> 16) + iend[1]
1577 end = int(iend[0] >> 16) + iend[1]
1690
1578
1691 if self._inline:
1579 if self._inline:
1692 start += (startrev + 1) * self.index.entry_size
1580 start += (startrev + 1) * self.index.entry_size
1693 end += (endrev + 1) * self.index.entry_size
1581 end += (endrev + 1) * self.index.entry_size
1694 length = end - start
1582 length = end - start
1695
1583
1696 return start, self._getsegment(start, length, df=df)
1584 return start, self._getsegment(start, length, df=df)
1697
1585
1698 def _chunk(self, rev, df=None):
1586 def _chunk(self, rev, df=None):
1699 """Obtain a single decompressed chunk for a revision.
1587 """Obtain a single decompressed chunk for a revision.
1700
1588
1701 Accepts an integer revision and an optional already-open file handle
1589 Accepts an integer revision and an optional already-open file handle
1702 to be used for reading. If used, the seek position of the file will not
1590 to be used for reading. If used, the seek position of the file will not
1703 be preserved.
1591 be preserved.
1704
1592
1705 Returns a str holding uncompressed data for the requested revision.
1593 Returns a str holding uncompressed data for the requested revision.
1706 """
1594 """
1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1595 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1708
1596
1709 def _chunks(self, revs, df=None, targetsize=None):
1597 def _chunks(self, revs, df=None, targetsize=None):
1710 """Obtain decompressed chunks for the specified revisions.
1598 """Obtain decompressed chunks for the specified revisions.
1711
1599
1712 Accepts an iterable of numeric revisions that are assumed to be in
1600 Accepts an iterable of numeric revisions that are assumed to be in
1713 ascending order. Also accepts an optional already-open file handle
1601 ascending order. Also accepts an optional already-open file handle
1714 to be used for reading. If used, the seek position of the file will
1602 to be used for reading. If used, the seek position of the file will
1715 not be preserved.
1603 not be preserved.
1716
1604
1717 This function is similar to calling ``self._chunk()`` multiple times,
1605 This function is similar to calling ``self._chunk()`` multiple times,
1718 but is faster.
1606 but is faster.
1719
1607
1720 Returns a list with decompressed data for each requested revision.
1608 Returns a list with decompressed data for each requested revision.
1721 """
1609 """
1722 if not revs:
1610 if not revs:
1723 return []
1611 return []
1724 start = self.start
1612 start = self.start
1725 length = self.length
1613 length = self.length
1726 inline = self._inline
1614 inline = self._inline
1727 iosize = self.index.entry_size
1615 iosize = self.index.entry_size
1728 buffer = util.buffer
1616 buffer = util.buffer
1729
1617
1730 l = []
1618 l = []
1731 ladd = l.append
1619 ladd = l.append
1732
1620
1733 if not self._withsparseread:
1621 if not self._withsparseread:
1734 slicedchunks = (revs,)
1622 slicedchunks = (revs,)
1735 else:
1623 else:
1736 slicedchunks = deltautil.slicechunk(
1624 slicedchunks = deltautil.slicechunk(
1737 self, revs, targetsize=targetsize
1625 self, revs, targetsize=targetsize
1738 )
1626 )
1739
1627
1740 for revschunk in slicedchunks:
1628 for revschunk in slicedchunks:
1741 firstrev = revschunk[0]
1629 firstrev = revschunk[0]
1742 # Skip trailing revisions with empty diff
1630 # Skip trailing revisions with empty diff
1743 for lastrev in revschunk[::-1]:
1631 for lastrev in revschunk[::-1]:
1744 if length(lastrev) != 0:
1632 if length(lastrev) != 0:
1745 break
1633 break
1746
1634
1747 try:
1635 try:
1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1749 except OverflowError:
1637 except OverflowError:
1750 # issue4215 - we can't cache a run of chunks greater than
1638 # issue4215 - we can't cache a run of chunks greater than
1751 # 2G on Windows
1639 # 2G on Windows
1752 return [self._chunk(rev, df=df) for rev in revschunk]
1640 return [self._chunk(rev, df=df) for rev in revschunk]
1753
1641
1754 decomp = self.decompress
1642 decomp = self.decompress
1755 for rev in revschunk:
1643 for rev in revschunk:
1756 chunkstart = start(rev)
1644 chunkstart = start(rev)
1757 if inline:
1645 if inline:
1758 chunkstart += (rev + 1) * iosize
1646 chunkstart += (rev + 1) * iosize
1759 chunklength = length(rev)
1647 chunklength = length(rev)
1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1761
1649
1762 return l
1650 return l
1763
1651
1764 def _chunkclear(self):
1652 def _chunkclear(self):
1765 """Clear the raw chunk cache."""
1653 """Clear the raw chunk cache."""
1766 self._chunkcache = (0, b'')
1654 self._chunkcache = (0, b'')
1767
1655
1768 def deltaparent(self, rev):
1656 def deltaparent(self, rev):
1769 """return deltaparent of the given revision"""
1657 """return deltaparent of the given revision"""
1770 base = self.index[rev][3]
1658 base = self.index[rev][3]
1771 if base == rev:
1659 if base == rev:
1772 return nullrev
1660 return nullrev
1773 elif self._generaldelta:
1661 elif self._generaldelta:
1774 return base
1662 return base
1775 else:
1663 else:
1776 return rev - 1
1664 return rev - 1
1777
1665
1778 def issnapshot(self, rev):
1666 def issnapshot(self, rev):
1779 """tells whether rev is a snapshot"""
1667 """tells whether rev is a snapshot"""
1780 if not self._sparserevlog:
1668 if not self._sparserevlog:
1781 return self.deltaparent(rev) == nullrev
1669 return self.deltaparent(rev) == nullrev
1782 elif util.safehasattr(self.index, b'issnapshot'):
1670 elif util.safehasattr(self.index, b'issnapshot'):
1783 # directly assign the method to cache the testing and access
1671 # directly assign the method to cache the testing and access
1784 self.issnapshot = self.index.issnapshot
1672 self.issnapshot = self.index.issnapshot
1785 return self.issnapshot(rev)
1673 return self.issnapshot(rev)
1786 if rev == nullrev:
1674 if rev == nullrev:
1787 return True
1675 return True
1788 entry = self.index[rev]
1676 entry = self.index[rev]
1789 base = entry[3]
1677 base = entry[3]
1790 if base == rev:
1678 if base == rev:
1791 return True
1679 return True
1792 if base == nullrev:
1680 if base == nullrev:
1793 return True
1681 return True
1794 p1 = entry[5]
1682 p1 = entry[5]
1795 p2 = entry[6]
1683 p2 = entry[6]
1796 if base == p1 or base == p2:
1684 if base == p1 or base == p2:
1797 return False
1685 return False
1798 return self.issnapshot(base)
1686 return self.issnapshot(base)
1799
1687
1800 def snapshotdepth(self, rev):
1688 def snapshotdepth(self, rev):
1801 """number of snapshot in the chain before this one"""
1689 """number of snapshot in the chain before this one"""
1802 if not self.issnapshot(rev):
1690 if not self.issnapshot(rev):
1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1691 raise error.ProgrammingError(b'revision %d not a snapshot')
1804 return len(self._deltachain(rev)[0]) - 1
1692 return len(self._deltachain(rev)[0]) - 1
1805
1693
1806 def revdiff(self, rev1, rev2):
1694 def revdiff(self, rev1, rev2):
1807 """return or calculate a delta between two revisions
1695 """return or calculate a delta between two revisions
1808
1696
1809 The delta calculated is in binary form and is intended to be written to
1697 The delta calculated is in binary form and is intended to be written to
1810 revlog data directly. So this function needs raw revision data.
1698 revlog data directly. So this function needs raw revision data.
1811 """
1699 """
1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1700 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1813 return bytes(self._chunk(rev2))
1701 return bytes(self._chunk(rev2))
1814
1702
1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1703 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1816
1704
1817 def _processflags(self, text, flags, operation, raw=False):
1705 def _processflags(self, text, flags, operation, raw=False):
1818 """deprecated entry point to access flag processors"""
1706 """deprecated entry point to access flag processors"""
1819 msg = b'_processflag(...) use the specialized variant'
1707 msg = b'_processflag(...) use the specialized variant'
1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1708 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1821 if raw:
1709 if raw:
1822 return text, flagutil.processflagsraw(self, text, flags)
1710 return text, flagutil.processflagsraw(self, text, flags)
1823 elif operation == b'read':
1711 elif operation == b'read':
1824 return flagutil.processflagsread(self, text, flags)
1712 return flagutil.processflagsread(self, text, flags)
1825 else: # write operation
1713 else: # write operation
1826 return flagutil.processflagswrite(self, text, flags)
1714 return flagutil.processflagswrite(self, text, flags)
1827
1715
1828 def revision(self, nodeorrev, _df=None, raw=False):
1716 def revision(self, nodeorrev, _df=None, raw=False):
1829 """return an uncompressed revision of a given node or revision
1717 """return an uncompressed revision of a given node or revision
1830 number.
1718 number.
1831
1719
1832 _df - an existing file handle to read from. (internal-only)
1720 _df - an existing file handle to read from. (internal-only)
1833 raw - an optional argument specifying if the revision data is to be
1721 raw - an optional argument specifying if the revision data is to be
1834 treated as raw data when applying flag transforms. 'raw' should be set
1722 treated as raw data when applying flag transforms. 'raw' should be set
1835 to True when generating changegroups or in debug commands.
1723 to True when generating changegroups or in debug commands.
1836 """
1724 """
1837 if raw:
1725 if raw:
1838 msg = (
1726 msg = (
1839 b'revlog.revision(..., raw=True) is deprecated, '
1727 b'revlog.revision(..., raw=True) is deprecated, '
1840 b'use revlog.rawdata(...)'
1728 b'use revlog.rawdata(...)'
1841 )
1729 )
1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1730 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1731 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1844
1732
1845 def sidedata(self, nodeorrev, _df=None):
1733 def sidedata(self, nodeorrev, _df=None):
1846 """a map of extra data related to the changeset but not part of the hash
1734 """a map of extra data related to the changeset but not part of the hash
1847
1735
1848 This function currently return a dictionary. However, more advanced
1736 This function currently return a dictionary. However, more advanced
1849 mapping object will likely be used in the future for a more
1737 mapping object will likely be used in the future for a more
1850 efficient/lazy code.
1738 efficient/lazy code.
1851 """
1739 """
1852 return self._revisiondata(nodeorrev, _df)[1]
1740 return self._revisiondata(nodeorrev, _df)[1]
1853
1741
1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1742 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1855 # deal with <nodeorrev> argument type
1743 # deal with <nodeorrev> argument type
1856 if isinstance(nodeorrev, int):
1744 if isinstance(nodeorrev, int):
1857 rev = nodeorrev
1745 rev = nodeorrev
1858 node = self.node(rev)
1746 node = self.node(rev)
1859 else:
1747 else:
1860 node = nodeorrev
1748 node = nodeorrev
1861 rev = None
1749 rev = None
1862
1750
1863 # fast path the special `nullid` rev
1751 # fast path the special `nullid` rev
1864 if node == self.nullid:
1752 if node == self.nullid:
1865 return b"", {}
1753 return b"", {}
1866
1754
1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1755 # ``rawtext`` is the text as stored inside the revlog. Might be the
1868 # revision or might need to be processed to retrieve the revision.
1756 # revision or might need to be processed to retrieve the revision.
1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1757 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1870
1758
1871 if self.version & 0xFFFF == REVLOGV2:
1759 if self.version & 0xFFFF == REVLOGV2:
1872 if rev is None:
1760 if rev is None:
1873 rev = self.rev(node)
1761 rev = self.rev(node)
1874 sidedata = self._sidedata(rev)
1762 sidedata = self._sidedata(rev)
1875 else:
1763 else:
1876 sidedata = {}
1764 sidedata = {}
1877
1765
1878 if raw and validated:
1766 if raw and validated:
1879 # if we don't want to process the raw text and that raw
1767 # if we don't want to process the raw text and that raw
1880 # text is cached, we can exit early.
1768 # text is cached, we can exit early.
1881 return rawtext, sidedata
1769 return rawtext, sidedata
1882 if rev is None:
1770 if rev is None:
1883 rev = self.rev(node)
1771 rev = self.rev(node)
1884 # the revlog's flag for this revision
1772 # the revlog's flag for this revision
1885 # (usually alter its state or content)
1773 # (usually alter its state or content)
1886 flags = self.flags(rev)
1774 flags = self.flags(rev)
1887
1775
1888 if validated and flags == REVIDX_DEFAULT_FLAGS:
1776 if validated and flags == REVIDX_DEFAULT_FLAGS:
1889 # no extra flags set, no flag processor runs, text = rawtext
1777 # no extra flags set, no flag processor runs, text = rawtext
1890 return rawtext, sidedata
1778 return rawtext, sidedata
1891
1779
1892 if raw:
1780 if raw:
1893 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1781 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1894 text = rawtext
1782 text = rawtext
1895 else:
1783 else:
1896 r = flagutil.processflagsread(self, rawtext, flags)
1784 r = flagutil.processflagsread(self, rawtext, flags)
1897 text, validatehash = r
1785 text, validatehash = r
1898 if validatehash:
1786 if validatehash:
1899 self.checkhash(text, node, rev=rev)
1787 self.checkhash(text, node, rev=rev)
1900 if not validated:
1788 if not validated:
1901 self._revisioncache = (node, rev, rawtext)
1789 self._revisioncache = (node, rev, rawtext)
1902
1790
1903 return text, sidedata
1791 return text, sidedata
1904
1792
1905 def _rawtext(self, node, rev, _df=None):
1793 def _rawtext(self, node, rev, _df=None):
1906 """return the possibly unvalidated rawtext for a revision
1794 """return the possibly unvalidated rawtext for a revision
1907
1795
1908 returns (rev, rawtext, validated)
1796 returns (rev, rawtext, validated)
1909 """
1797 """
1910
1798
1911 # revision in the cache (could be useful to apply delta)
1799 # revision in the cache (could be useful to apply delta)
1912 cachedrev = None
1800 cachedrev = None
1913 # An intermediate text to apply deltas to
1801 # An intermediate text to apply deltas to
1914 basetext = None
1802 basetext = None
1915
1803
1916 # Check if we have the entry in cache
1804 # Check if we have the entry in cache
1917 # The cache entry looks like (node, rev, rawtext)
1805 # The cache entry looks like (node, rev, rawtext)
1918 if self._revisioncache:
1806 if self._revisioncache:
1919 if self._revisioncache[0] == node:
1807 if self._revisioncache[0] == node:
1920 return (rev, self._revisioncache[2], True)
1808 return (rev, self._revisioncache[2], True)
1921 cachedrev = self._revisioncache[1]
1809 cachedrev = self._revisioncache[1]
1922
1810
1923 if rev is None:
1811 if rev is None:
1924 rev = self.rev(node)
1812 rev = self.rev(node)
1925
1813
1926 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1814 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1927 if stopped:
1815 if stopped:
1928 basetext = self._revisioncache[2]
1816 basetext = self._revisioncache[2]
1929
1817
1930 # drop cache to save memory, the caller is expected to
1818 # drop cache to save memory, the caller is expected to
1931 # update self._revisioncache after validating the text
1819 # update self._revisioncache after validating the text
1932 self._revisioncache = None
1820 self._revisioncache = None
1933
1821
1934 targetsize = None
1822 targetsize = None
1935 rawsize = self.index[rev][2]
1823 rawsize = self.index[rev][2]
1936 if 0 <= rawsize:
1824 if 0 <= rawsize:
1937 targetsize = 4 * rawsize
1825 targetsize = 4 * rawsize
1938
1826
1939 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1827 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1940 if basetext is None:
1828 if basetext is None:
1941 basetext = bytes(bins[0])
1829 basetext = bytes(bins[0])
1942 bins = bins[1:]
1830 bins = bins[1:]
1943
1831
1944 rawtext = mdiff.patches(basetext, bins)
1832 rawtext = mdiff.patches(basetext, bins)
1945 del basetext # let us have a chance to free memory early
1833 del basetext # let us have a chance to free memory early
1946 return (rev, rawtext, False)
1834 return (rev, rawtext, False)
1947
1835
1948 def _sidedata(self, rev):
1836 def _sidedata(self, rev):
1949 """Return the sidedata for a given revision number."""
1837 """Return the sidedata for a given revision number."""
1950 index_entry = self.index[rev]
1838 index_entry = self.index[rev]
1951 sidedata_offset = index_entry[8]
1839 sidedata_offset = index_entry[8]
1952 sidedata_size = index_entry[9]
1840 sidedata_size = index_entry[9]
1953
1841
1954 if self._inline:
1842 if self._inline:
1955 sidedata_offset += self.index.entry_size * (1 + rev)
1843 sidedata_offset += self.index.entry_size * (1 + rev)
1956 if sidedata_size == 0:
1844 if sidedata_size == 0:
1957 return {}
1845 return {}
1958
1846
1959 segment = self._getsegment(sidedata_offset, sidedata_size)
1847 segment = self._getsegment(sidedata_offset, sidedata_size)
1960 sidedata = sidedatautil.deserialize_sidedata(segment)
1848 sidedata = sidedatautil.deserialize_sidedata(segment)
1961 return sidedata
1849 return sidedata
1962
1850
1963 def rawdata(self, nodeorrev, _df=None):
1851 def rawdata(self, nodeorrev, _df=None):
1964 """return an uncompressed raw data of a given node or revision number.
1852 """return an uncompressed raw data of a given node or revision number.
1965
1853
1966 _df - an existing file handle to read from. (internal-only)
1854 _df - an existing file handle to read from. (internal-only)
1967 """
1855 """
1968 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1856 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1969
1857
1970 def hash(self, text, p1, p2):
1858 def hash(self, text, p1, p2):
1971 """Compute a node hash.
1859 """Compute a node hash.
1972
1860
1973 Available as a function so that subclasses can replace the hash
1861 Available as a function so that subclasses can replace the hash
1974 as needed.
1862 as needed.
1975 """
1863 """
1976 return storageutil.hashrevisionsha1(text, p1, p2)
1864 return storageutil.hashrevisionsha1(text, p1, p2)
1977
1865
1978 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1866 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1979 """Check node hash integrity.
1867 """Check node hash integrity.
1980
1868
1981 Available as a function so that subclasses can extend hash mismatch
1869 Available as a function so that subclasses can extend hash mismatch
1982 behaviors as needed.
1870 behaviors as needed.
1983 """
1871 """
1984 try:
1872 try:
1985 if p1 is None and p2 is None:
1873 if p1 is None and p2 is None:
1986 p1, p2 = self.parents(node)
1874 p1, p2 = self.parents(node)
1987 if node != self.hash(text, p1, p2):
1875 if node != self.hash(text, p1, p2):
1988 # Clear the revision cache on hash failure. The revision cache
1876 # Clear the revision cache on hash failure. The revision cache
1989 # only stores the raw revision and clearing the cache does have
1877 # only stores the raw revision and clearing the cache does have
1990 # the side-effect that we won't have a cache hit when the raw
1878 # the side-effect that we won't have a cache hit when the raw
1991 # revision data is accessed. But this case should be rare and
1879 # revision data is accessed. But this case should be rare and
1992 # it is extra work to teach the cache about the hash
1880 # it is extra work to teach the cache about the hash
1993 # verification state.
1881 # verification state.
1994 if self._revisioncache and self._revisioncache[0] == node:
1882 if self._revisioncache and self._revisioncache[0] == node:
1995 self._revisioncache = None
1883 self._revisioncache = None
1996
1884
1997 revornode = rev
1885 revornode = rev
1998 if revornode is None:
1886 if revornode is None:
1999 revornode = templatefilters.short(hex(node))
1887 revornode = templatefilters.short(hex(node))
2000 raise error.RevlogError(
1888 raise error.RevlogError(
2001 _(b"integrity check failed on %s:%s")
1889 _(b"integrity check failed on %s:%s")
2002 % (self.indexfile, pycompat.bytestr(revornode))
1890 % (self.indexfile, pycompat.bytestr(revornode))
2003 )
1891 )
2004 except error.RevlogError:
1892 except error.RevlogError:
2005 if self._censorable and storageutil.iscensoredtext(text):
1893 if self._censorable and storageutil.iscensoredtext(text):
2006 raise error.CensoredNodeError(self.indexfile, node, text)
1894 raise error.CensoredNodeError(self.indexfile, node, text)
2007 raise
1895 raise
2008
1896
2009 def _enforceinlinesize(self, tr, fp=None):
1897 def _enforceinlinesize(self, tr, fp=None):
2010 """Check if the revlog is too big for inline and convert if so.
1898 """Check if the revlog is too big for inline and convert if so.
2011
1899
2012 This should be called after revisions are added to the revlog. If the
1900 This should be called after revisions are added to the revlog. If the
2013 revlog has grown too large to be an inline revlog, it will convert it
1901 revlog has grown too large to be an inline revlog, it will convert it
2014 to use multiple index and data files.
1902 to use multiple index and data files.
2015 """
1903 """
2016 tiprev = len(self) - 1
1904 tiprev = len(self) - 1
2017 if (
1905 if (
2018 not self._inline
1906 not self._inline
2019 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1907 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2020 ):
1908 ):
2021 return
1909 return
2022
1910
2023 troffset = tr.findoffset(self.indexfile)
1911 troffset = tr.findoffset(self.indexfile)
2024 if troffset is None:
1912 if troffset is None:
2025 raise error.RevlogError(
1913 raise error.RevlogError(
2026 _(b"%s not found in the transaction") % self.indexfile
1914 _(b"%s not found in the transaction") % self.indexfile
2027 )
1915 )
2028 trindex = 0
1916 trindex = 0
2029 tr.add(self.datafile, 0)
1917 tr.add(self.datafile, 0)
2030
1918
2031 if fp:
1919 if fp:
2032 fp.flush()
1920 fp.flush()
2033 fp.close()
1921 fp.close()
2034 # We can't use the cached file handle after close(). So prevent
1922 # We can't use the cached file handle after close(). So prevent
2035 # its usage.
1923 # its usage.
2036 self._writinghandles = None
1924 self._writinghandles = None
2037
1925
2038 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1926 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2039 for r in self:
1927 for r in self:
2040 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1928 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2041 if troffset <= self.start(r):
1929 if troffset <= self.start(r):
2042 trindex = r
1930 trindex = r
2043
1931
2044 with self._indexfp(b'w') as fp:
1932 with self._indexfp(b'w') as fp:
2045 self.version &= ~FLAG_INLINE_DATA
1933 self.version &= ~FLAG_INLINE_DATA
2046 self._inline = False
1934 self._inline = False
2047 for i in self:
1935 for i in self:
2048 e = self.index.entry_binary(i)
1936 e = self.index.entry_binary(i)
2049 if i == 0:
1937 if i == 0:
2050 header = self.index.pack_header(self.version)
1938 header = self.index.pack_header(self.version)
2051 e = header + e
1939 e = header + e
2052 fp.write(e)
1940 fp.write(e)
2053
1941
2054 # the temp file replace the real index when we exit the context
1942 # the temp file replace the real index when we exit the context
2055 # manager
1943 # manager
2056
1944
2057 tr.replace(self.indexfile, trindex * self.index.entry_size)
1945 tr.replace(self.indexfile, trindex * self.index.entry_size)
2058 nodemaputil.setup_persistent_nodemap(tr, self)
1946 nodemaputil.setup_persistent_nodemap(tr, self)
2059 self._chunkclear()
1947 self._chunkclear()
2060
1948
2061 def _nodeduplicatecallback(self, transaction, node):
1949 def _nodeduplicatecallback(self, transaction, node):
2062 """called when trying to add a node already stored."""
1950 """called when trying to add a node already stored."""
2063
1951
2064 def addrevision(
1952 def addrevision(
2065 self,
1953 self,
2066 text,
1954 text,
2067 transaction,
1955 transaction,
2068 link,
1956 link,
2069 p1,
1957 p1,
2070 p2,
1958 p2,
2071 cachedelta=None,
1959 cachedelta=None,
2072 node=None,
1960 node=None,
2073 flags=REVIDX_DEFAULT_FLAGS,
1961 flags=REVIDX_DEFAULT_FLAGS,
2074 deltacomputer=None,
1962 deltacomputer=None,
2075 sidedata=None,
1963 sidedata=None,
2076 ):
1964 ):
2077 """add a revision to the log
1965 """add a revision to the log
2078
1966
2079 text - the revision data to add
1967 text - the revision data to add
2080 transaction - the transaction object used for rollback
1968 transaction - the transaction object used for rollback
2081 link - the linkrev data to add
1969 link - the linkrev data to add
2082 p1, p2 - the parent nodeids of the revision
1970 p1, p2 - the parent nodeids of the revision
2083 cachedelta - an optional precomputed delta
1971 cachedelta - an optional precomputed delta
2084 node - nodeid of revision; typically node is not specified, and it is
1972 node - nodeid of revision; typically node is not specified, and it is
2085 computed by default as hash(text, p1, p2), however subclasses might
1973 computed by default as hash(text, p1, p2), however subclasses might
2086 use different hashing method (and override checkhash() in such case)
1974 use different hashing method (and override checkhash() in such case)
2087 flags - the known flags to set on the revision
1975 flags - the known flags to set on the revision
2088 deltacomputer - an optional deltacomputer instance shared between
1976 deltacomputer - an optional deltacomputer instance shared between
2089 multiple calls
1977 multiple calls
2090 """
1978 """
2091 if link == nullrev:
1979 if link == nullrev:
2092 raise error.RevlogError(
1980 raise error.RevlogError(
2093 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1981 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2094 )
1982 )
2095
1983
2096 if sidedata is None:
1984 if sidedata is None:
2097 sidedata = {}
1985 sidedata = {}
2098 elif not self.hassidedata:
1986 elif not self.hassidedata:
2099 raise error.ProgrammingError(
1987 raise error.ProgrammingError(
2100 _(b"trying to add sidedata to a revlog who don't support them")
1988 _(b"trying to add sidedata to a revlog who don't support them")
2101 )
1989 )
2102
1990
2103 if flags:
1991 if flags:
2104 node = node or self.hash(text, p1, p2)
1992 node = node or self.hash(text, p1, p2)
2105
1993
2106 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
1994 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2107
1995
2108 # If the flag processor modifies the revision data, ignore any provided
1996 # If the flag processor modifies the revision data, ignore any provided
2109 # cachedelta.
1997 # cachedelta.
2110 if rawtext != text:
1998 if rawtext != text:
2111 cachedelta = None
1999 cachedelta = None
2112
2000
2113 if len(rawtext) > _maxentrysize:
2001 if len(rawtext) > _maxentrysize:
2114 raise error.RevlogError(
2002 raise error.RevlogError(
2115 _(
2003 _(
2116 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2004 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2117 )
2005 )
2118 % (self.indexfile, len(rawtext))
2006 % (self.indexfile, len(rawtext))
2119 )
2007 )
2120
2008
2121 node = node or self.hash(rawtext, p1, p2)
2009 node = node or self.hash(rawtext, p1, p2)
2122 rev = self.index.get_rev(node)
2010 rev = self.index.get_rev(node)
2123 if rev is not None:
2011 if rev is not None:
2124 return rev
2012 return rev
2125
2013
2126 if validatehash:
2014 if validatehash:
2127 self.checkhash(rawtext, node, p1=p1, p2=p2)
2015 self.checkhash(rawtext, node, p1=p1, p2=p2)
2128
2016
2129 return self.addrawrevision(
2017 return self.addrawrevision(
2130 rawtext,
2018 rawtext,
2131 transaction,
2019 transaction,
2132 link,
2020 link,
2133 p1,
2021 p1,
2134 p2,
2022 p2,
2135 node,
2023 node,
2136 flags,
2024 flags,
2137 cachedelta=cachedelta,
2025 cachedelta=cachedelta,
2138 deltacomputer=deltacomputer,
2026 deltacomputer=deltacomputer,
2139 sidedata=sidedata,
2027 sidedata=sidedata,
2140 )
2028 )
2141
2029
2142 def addrawrevision(
2030 def addrawrevision(
2143 self,
2031 self,
2144 rawtext,
2032 rawtext,
2145 transaction,
2033 transaction,
2146 link,
2034 link,
2147 p1,
2035 p1,
2148 p2,
2036 p2,
2149 node,
2037 node,
2150 flags,
2038 flags,
2151 cachedelta=None,
2039 cachedelta=None,
2152 deltacomputer=None,
2040 deltacomputer=None,
2153 sidedata=None,
2041 sidedata=None,
2154 ):
2042 ):
2155 """add a raw revision with known flags, node and parents
2043 """add a raw revision with known flags, node and parents
2156 useful when reusing a revision not stored in this revlog (ex: received
2044 useful when reusing a revision not stored in this revlog (ex: received
2157 over wire, or read from an external bundle).
2045 over wire, or read from an external bundle).
2158 """
2046 """
2159 dfh = None
2047 dfh = None
2160 if not self._inline:
2048 if not self._inline:
2161 dfh = self._datafp(b"a+")
2049 dfh = self._datafp(b"a+")
2162 ifh = self._indexfp(b"a+")
2050 ifh = self._indexfp(b"a+")
2163 try:
2051 try:
2164 return self._addrevision(
2052 return self._addrevision(
2165 node,
2053 node,
2166 rawtext,
2054 rawtext,
2167 transaction,
2055 transaction,
2168 link,
2056 link,
2169 p1,
2057 p1,
2170 p2,
2058 p2,
2171 flags,
2059 flags,
2172 cachedelta,
2060 cachedelta,
2173 ifh,
2061 ifh,
2174 dfh,
2062 dfh,
2175 deltacomputer=deltacomputer,
2063 deltacomputer=deltacomputer,
2176 sidedata=sidedata,
2064 sidedata=sidedata,
2177 )
2065 )
2178 finally:
2066 finally:
2179 if dfh:
2067 if dfh:
2180 dfh.close()
2068 dfh.close()
2181 ifh.close()
2069 ifh.close()
2182
2070
2183 def compress(self, data):
2071 def compress(self, data):
2184 """Generate a possibly-compressed representation of data."""
2072 """Generate a possibly-compressed representation of data."""
2185 if not data:
2073 if not data:
2186 return b'', data
2074 return b'', data
2187
2075
2188 compressed = self._compressor.compress(data)
2076 compressed = self._compressor.compress(data)
2189
2077
2190 if compressed:
2078 if compressed:
2191 # The revlog compressor added the header in the returned data.
2079 # The revlog compressor added the header in the returned data.
2192 return b'', compressed
2080 return b'', compressed
2193
2081
2194 if data[0:1] == b'\0':
2082 if data[0:1] == b'\0':
2195 return b'', data
2083 return b'', data
2196 return b'u', data
2084 return b'u', data
2197
2085
2198 def decompress(self, data):
2086 def decompress(self, data):
2199 """Decompress a revlog chunk.
2087 """Decompress a revlog chunk.
2200
2088
2201 The chunk is expected to begin with a header identifying the
2089 The chunk is expected to begin with a header identifying the
2202 format type so it can be routed to an appropriate decompressor.
2090 format type so it can be routed to an appropriate decompressor.
2203 """
2091 """
2204 if not data:
2092 if not data:
2205 return data
2093 return data
2206
2094
2207 # Revlogs are read much more frequently than they are written and many
2095 # Revlogs are read much more frequently than they are written and many
2208 # chunks only take microseconds to decompress, so performance is
2096 # chunks only take microseconds to decompress, so performance is
2209 # important here.
2097 # important here.
2210 #
2098 #
2211 # We can make a few assumptions about revlogs:
2099 # We can make a few assumptions about revlogs:
2212 #
2100 #
2213 # 1) the majority of chunks will be compressed (as opposed to inline
2101 # 1) the majority of chunks will be compressed (as opposed to inline
2214 # raw data).
2102 # raw data).
2215 # 2) decompressing *any* data will likely by at least 10x slower than
2103 # 2) decompressing *any* data will likely by at least 10x slower than
2216 # returning raw inline data.
2104 # returning raw inline data.
2217 # 3) we want to prioritize common and officially supported compression
2105 # 3) we want to prioritize common and officially supported compression
2218 # engines
2106 # engines
2219 #
2107 #
2220 # It follows that we want to optimize for "decompress compressed data
2108 # It follows that we want to optimize for "decompress compressed data
2221 # when encoded with common and officially supported compression engines"
2109 # when encoded with common and officially supported compression engines"
2222 # case over "raw data" and "data encoded by less common or non-official
2110 # case over "raw data" and "data encoded by less common or non-official
2223 # compression engines." That is why we have the inline lookup first
2111 # compression engines." That is why we have the inline lookup first
2224 # followed by the compengines lookup.
2112 # followed by the compengines lookup.
2225 #
2113 #
2226 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2114 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2227 # compressed chunks. And this matters for changelog and manifest reads.
2115 # compressed chunks. And this matters for changelog and manifest reads.
2228 t = data[0:1]
2116 t = data[0:1]
2229
2117
2230 if t == b'x':
2118 if t == b'x':
2231 try:
2119 try:
2232 return _zlibdecompress(data)
2120 return _zlibdecompress(data)
2233 except zlib.error as e:
2121 except zlib.error as e:
2234 raise error.RevlogError(
2122 raise error.RevlogError(
2235 _(b'revlog decompress error: %s')
2123 _(b'revlog decompress error: %s')
2236 % stringutil.forcebytestr(e)
2124 % stringutil.forcebytestr(e)
2237 )
2125 )
2238 # '\0' is more common than 'u' so it goes first.
2126 # '\0' is more common than 'u' so it goes first.
2239 elif t == b'\0':
2127 elif t == b'\0':
2240 return data
2128 return data
2241 elif t == b'u':
2129 elif t == b'u':
2242 return util.buffer(data, 1)
2130 return util.buffer(data, 1)
2243
2131
2244 try:
2132 try:
2245 compressor = self._decompressors[t]
2133 compressor = self._decompressors[t]
2246 except KeyError:
2134 except KeyError:
2247 try:
2135 try:
2248 engine = util.compengines.forrevlogheader(t)
2136 engine = util.compengines.forrevlogheader(t)
2249 compressor = engine.revlogcompressor(self._compengineopts)
2137 compressor = engine.revlogcompressor(self._compengineopts)
2250 self._decompressors[t] = compressor
2138 self._decompressors[t] = compressor
2251 except KeyError:
2139 except KeyError:
2252 raise error.RevlogError(
2140 raise error.RevlogError(
2253 _(b'unknown compression type %s') % binascii.hexlify(t)
2141 _(b'unknown compression type %s') % binascii.hexlify(t)
2254 )
2142 )
2255
2143
2256 return compressor.decompress(data)
2144 return compressor.decompress(data)
2257
2145
2258 def _addrevision(
2146 def _addrevision(
2259 self,
2147 self,
2260 node,
2148 node,
2261 rawtext,
2149 rawtext,
2262 transaction,
2150 transaction,
2263 link,
2151 link,
2264 p1,
2152 p1,
2265 p2,
2153 p2,
2266 flags,
2154 flags,
2267 cachedelta,
2155 cachedelta,
2268 ifh,
2156 ifh,
2269 dfh,
2157 dfh,
2270 alwayscache=False,
2158 alwayscache=False,
2271 deltacomputer=None,
2159 deltacomputer=None,
2272 sidedata=None,
2160 sidedata=None,
2273 ):
2161 ):
2274 """internal function to add revisions to the log
2162 """internal function to add revisions to the log
2275
2163
2276 see addrevision for argument descriptions.
2164 see addrevision for argument descriptions.
2277
2165
2278 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2166 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2279
2167
2280 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2168 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2281 be used.
2169 be used.
2282
2170
2283 invariants:
2171 invariants:
2284 - rawtext is optional (can be None); if not set, cachedelta must be set.
2172 - rawtext is optional (can be None); if not set, cachedelta must be set.
2285 if both are set, they must correspond to each other.
2173 if both are set, they must correspond to each other.
2286 """
2174 """
2287 if node == self.nullid:
2175 if node == self.nullid:
2288 raise error.RevlogError(
2176 raise error.RevlogError(
2289 _(b"%s: attempt to add null revision") % self.indexfile
2177 _(b"%s: attempt to add null revision") % self.indexfile
2290 )
2178 )
2291 if (
2179 if (
2292 node == self.nodeconstants.wdirid
2180 node == self.nodeconstants.wdirid
2293 or node in self.nodeconstants.wdirfilenodeids
2181 or node in self.nodeconstants.wdirfilenodeids
2294 ):
2182 ):
2295 raise error.RevlogError(
2183 raise error.RevlogError(
2296 _(b"%s: attempt to add wdir revision") % self.indexfile
2184 _(b"%s: attempt to add wdir revision") % self.indexfile
2297 )
2185 )
2298
2186
2299 if self._inline:
2187 if self._inline:
2300 fh = ifh
2188 fh = ifh
2301 else:
2189 else:
2302 fh = dfh
2190 fh = dfh
2303
2191
2304 btext = [rawtext]
2192 btext = [rawtext]
2305
2193
2306 curr = len(self)
2194 curr = len(self)
2307 prev = curr - 1
2195 prev = curr - 1
2308
2196
2309 offset = self._get_data_offset(prev)
2197 offset = self._get_data_offset(prev)
2310
2198
2311 if self._concurrencychecker:
2199 if self._concurrencychecker:
2312 if self._inline:
2200 if self._inline:
2313 # offset is "as if" it were in the .d file, so we need to add on
2201 # offset is "as if" it were in the .d file, so we need to add on
2314 # the size of the entry metadata.
2202 # the size of the entry metadata.
2315 self._concurrencychecker(
2203 self._concurrencychecker(
2316 ifh, self.indexfile, offset + curr * self.index.entry_size
2204 ifh, self.indexfile, offset + curr * self.index.entry_size
2317 )
2205 )
2318 else:
2206 else:
2319 # Entries in the .i are a consistent size.
2207 # Entries in the .i are a consistent size.
2320 self._concurrencychecker(
2208 self._concurrencychecker(
2321 ifh, self.indexfile, curr * self.index.entry_size
2209 ifh, self.indexfile, curr * self.index.entry_size
2322 )
2210 )
2323 self._concurrencychecker(dfh, self.datafile, offset)
2211 self._concurrencychecker(dfh, self.datafile, offset)
2324
2212
2325 p1r, p2r = self.rev(p1), self.rev(p2)
2213 p1r, p2r = self.rev(p1), self.rev(p2)
2326
2214
2327 # full versions are inserted when the needed deltas
2215 # full versions are inserted when the needed deltas
2328 # become comparable to the uncompressed text
2216 # become comparable to the uncompressed text
2329 if rawtext is None:
2217 if rawtext is None:
2330 # need rawtext size, before changed by flag processors, which is
2218 # need rawtext size, before changed by flag processors, which is
2331 # the non-raw size. use revlog explicitly to avoid filelog's extra
2219 # the non-raw size. use revlog explicitly to avoid filelog's extra
2332 # logic that might remove metadata size.
2220 # logic that might remove metadata size.
2333 textlen = mdiff.patchedsize(
2221 textlen = mdiff.patchedsize(
2334 revlog.size(self, cachedelta[0]), cachedelta[1]
2222 revlog.size(self, cachedelta[0]), cachedelta[1]
2335 )
2223 )
2336 else:
2224 else:
2337 textlen = len(rawtext)
2225 textlen = len(rawtext)
2338
2226
2339 if deltacomputer is None:
2227 if deltacomputer is None:
2340 deltacomputer = deltautil.deltacomputer(self)
2228 deltacomputer = deltautil.deltacomputer(self)
2341
2229
2342 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2230 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2343
2231
2344 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2232 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2345
2233
2346 if sidedata:
2234 if sidedata:
2347 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2235 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2348 sidedata_offset = offset + deltainfo.deltalen
2236 sidedata_offset = offset + deltainfo.deltalen
2349 else:
2237 else:
2350 serialized_sidedata = b""
2238 serialized_sidedata = b""
2351 # Don't store the offset if the sidedata is empty, that way
2239 # Don't store the offset if the sidedata is empty, that way
2352 # we can easily detect empty sidedata and they will be no different
2240 # we can easily detect empty sidedata and they will be no different
2353 # than ones we manually add.
2241 # than ones we manually add.
2354 sidedata_offset = 0
2242 sidedata_offset = 0
2355
2243
2356 e = (
2244 e = (
2357 offset_type(offset, flags),
2245 offset_type(offset, flags),
2358 deltainfo.deltalen,
2246 deltainfo.deltalen,
2359 textlen,
2247 textlen,
2360 deltainfo.base,
2248 deltainfo.base,
2361 link,
2249 link,
2362 p1r,
2250 p1r,
2363 p2r,
2251 p2r,
2364 node,
2252 node,
2365 sidedata_offset,
2253 sidedata_offset,
2366 len(serialized_sidedata),
2254 len(serialized_sidedata),
2367 )
2255 )
2368
2256
2369 if self.version & 0xFFFF != REVLOGV2:
2257 if self.version & 0xFFFF != REVLOGV2:
2370 e = e[:8]
2258 e = e[:8]
2371
2259
2372 self.index.append(e)
2260 self.index.append(e)
2373 entry = self.index.entry_binary(curr)
2261 entry = self.index.entry_binary(curr)
2374 if curr == 0:
2262 if curr == 0:
2375 header = self.index.pack_header(self.version)
2263 header = self.index.pack_header(self.version)
2376 entry = header + entry
2264 entry = header + entry
2377 self._writeentry(
2265 self._writeentry(
2378 transaction,
2266 transaction,
2379 ifh,
2267 ifh,
2380 dfh,
2268 dfh,
2381 entry,
2269 entry,
2382 deltainfo.data,
2270 deltainfo.data,
2383 link,
2271 link,
2384 offset,
2272 offset,
2385 serialized_sidedata,
2273 serialized_sidedata,
2386 )
2274 )
2387
2275
2388 rawtext = btext[0]
2276 rawtext = btext[0]
2389
2277
2390 if alwayscache and rawtext is None:
2278 if alwayscache and rawtext is None:
2391 rawtext = deltacomputer.buildtext(revinfo, fh)
2279 rawtext = deltacomputer.buildtext(revinfo, fh)
2392
2280
2393 if type(rawtext) == bytes: # only accept immutable objects
2281 if type(rawtext) == bytes: # only accept immutable objects
2394 self._revisioncache = (node, curr, rawtext)
2282 self._revisioncache = (node, curr, rawtext)
2395 self._chainbasecache[curr] = deltainfo.chainbase
2283 self._chainbasecache[curr] = deltainfo.chainbase
2396 return curr
2284 return curr
2397
2285
2398 def _get_data_offset(self, prev):
2286 def _get_data_offset(self, prev):
2399 """Returns the current offset in the (in-transaction) data file.
2287 """Returns the current offset in the (in-transaction) data file.
2400 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2288 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2401 file to store that information: since sidedata can be rewritten to the
2289 file to store that information: since sidedata can be rewritten to the
2402 end of the data file within a transaction, you can have cases where, for
2290 end of the data file within a transaction, you can have cases where, for
2403 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2291 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2404 to `n - 1`'s sidedata being written after `n`'s data.
2292 to `n - 1`'s sidedata being written after `n`'s data.
2405
2293
2406 TODO cache this in a docket file before getting out of experimental."""
2294 TODO cache this in a docket file before getting out of experimental."""
2407 if self.version & 0xFFFF != REVLOGV2:
2295 if self.version & 0xFFFF != REVLOGV2:
2408 return self.end(prev)
2296 return self.end(prev)
2409
2297
2410 offset = 0
2298 offset = 0
2411 for rev, entry in enumerate(self.index):
2299 for rev, entry in enumerate(self.index):
2412 sidedata_end = entry[8] + entry[9]
2300 sidedata_end = entry[8] + entry[9]
2413 # Sidedata for a previous rev has potentially been written after
2301 # Sidedata for a previous rev has potentially been written after
2414 # this rev's end, so take the max.
2302 # this rev's end, so take the max.
2415 offset = max(self.end(rev), offset, sidedata_end)
2303 offset = max(self.end(rev), offset, sidedata_end)
2416 return offset
2304 return offset
2417
2305
2418 def _writeentry(
2306 def _writeentry(
2419 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2307 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2420 ):
2308 ):
2421 # Files opened in a+ mode have inconsistent behavior on various
2309 # Files opened in a+ mode have inconsistent behavior on various
2422 # platforms. Windows requires that a file positioning call be made
2310 # platforms. Windows requires that a file positioning call be made
2423 # when the file handle transitions between reads and writes. See
2311 # when the file handle transitions between reads and writes. See
2424 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2312 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2425 # platforms, Python or the platform itself can be buggy. Some versions
2313 # platforms, Python or the platform itself can be buggy. Some versions
2426 # of Solaris have been observed to not append at the end of the file
2314 # of Solaris have been observed to not append at the end of the file
2427 # if the file was seeked to before the end. See issue4943 for more.
2315 # if the file was seeked to before the end. See issue4943 for more.
2428 #
2316 #
2429 # We work around this issue by inserting a seek() before writing.
2317 # We work around this issue by inserting a seek() before writing.
2430 # Note: This is likely not necessary on Python 3. However, because
2318 # Note: This is likely not necessary on Python 3. However, because
2431 # the file handle is reused for reads and may be seeked there, we need
2319 # the file handle is reused for reads and may be seeked there, we need
2432 # to be careful before changing this.
2320 # to be careful before changing this.
2433 ifh.seek(0, os.SEEK_END)
2321 ifh.seek(0, os.SEEK_END)
2434 if dfh:
2322 if dfh:
2435 dfh.seek(0, os.SEEK_END)
2323 dfh.seek(0, os.SEEK_END)
2436
2324
2437 curr = len(self) - 1
2325 curr = len(self) - 1
2438 if not self._inline:
2326 if not self._inline:
2439 transaction.add(self.datafile, offset)
2327 transaction.add(self.datafile, offset)
2440 transaction.add(self.indexfile, curr * len(entry))
2328 transaction.add(self.indexfile, curr * len(entry))
2441 if data[0]:
2329 if data[0]:
2442 dfh.write(data[0])
2330 dfh.write(data[0])
2443 dfh.write(data[1])
2331 dfh.write(data[1])
2444 if sidedata:
2332 if sidedata:
2445 dfh.write(sidedata)
2333 dfh.write(sidedata)
2446 ifh.write(entry)
2334 ifh.write(entry)
2447 else:
2335 else:
2448 offset += curr * self.index.entry_size
2336 offset += curr * self.index.entry_size
2449 transaction.add(self.indexfile, offset)
2337 transaction.add(self.indexfile, offset)
2450 ifh.write(entry)
2338 ifh.write(entry)
2451 ifh.write(data[0])
2339 ifh.write(data[0])
2452 ifh.write(data[1])
2340 ifh.write(data[1])
2453 if sidedata:
2341 if sidedata:
2454 ifh.write(sidedata)
2342 ifh.write(sidedata)
2455 self._enforceinlinesize(transaction, ifh)
2343 self._enforceinlinesize(transaction, ifh)
2456 nodemaputil.setup_persistent_nodemap(transaction, self)
2344 nodemaputil.setup_persistent_nodemap(transaction, self)
2457
2345
2458 def addgroup(
2346 def addgroup(
2459 self,
2347 self,
2460 deltas,
2348 deltas,
2461 linkmapper,
2349 linkmapper,
2462 transaction,
2350 transaction,
2463 alwayscache=False,
2351 alwayscache=False,
2464 addrevisioncb=None,
2352 addrevisioncb=None,
2465 duplicaterevisioncb=None,
2353 duplicaterevisioncb=None,
2466 ):
2354 ):
2467 """
2355 """
2468 add a delta group
2356 add a delta group
2469
2357
2470 given a set of deltas, add them to the revision log. the
2358 given a set of deltas, add them to the revision log. the
2471 first delta is against its parent, which should be in our
2359 first delta is against its parent, which should be in our
2472 log, the rest are against the previous delta.
2360 log, the rest are against the previous delta.
2473
2361
2474 If ``addrevisioncb`` is defined, it will be called with arguments of
2362 If ``addrevisioncb`` is defined, it will be called with arguments of
2475 this revlog and the node that was added.
2363 this revlog and the node that was added.
2476 """
2364 """
2477
2365
2478 if self._writinghandles:
2366 if self._writinghandles:
2479 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2367 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2480
2368
2481 r = len(self)
2369 r = len(self)
2482 end = 0
2370 end = 0
2483 if r:
2371 if r:
2484 end = self.end(r - 1)
2372 end = self.end(r - 1)
2485 ifh = self._indexfp(b"a+")
2373 ifh = self._indexfp(b"a+")
2486 isize = r * self.index.entry_size
2374 isize = r * self.index.entry_size
2487 if self._inline:
2375 if self._inline:
2488 transaction.add(self.indexfile, end + isize)
2376 transaction.add(self.indexfile, end + isize)
2489 dfh = None
2377 dfh = None
2490 else:
2378 else:
2491 transaction.add(self.indexfile, isize)
2379 transaction.add(self.indexfile, isize)
2492 transaction.add(self.datafile, end)
2380 transaction.add(self.datafile, end)
2493 dfh = self._datafp(b"a+")
2381 dfh = self._datafp(b"a+")
2494
2382
2495 def flush():
2383 def flush():
2496 if dfh:
2384 if dfh:
2497 dfh.flush()
2385 dfh.flush()
2498 ifh.flush()
2386 ifh.flush()
2499
2387
2500 self._writinghandles = (ifh, dfh)
2388 self._writinghandles = (ifh, dfh)
2501 empty = True
2389 empty = True
2502
2390
2503 try:
2391 try:
2504 deltacomputer = deltautil.deltacomputer(self)
2392 deltacomputer = deltautil.deltacomputer(self)
2505 # loop through our set of deltas
2393 # loop through our set of deltas
2506 for data in deltas:
2394 for data in deltas:
2507 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2395 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2508 link = linkmapper(linknode)
2396 link = linkmapper(linknode)
2509 flags = flags or REVIDX_DEFAULT_FLAGS
2397 flags = flags or REVIDX_DEFAULT_FLAGS
2510
2398
2511 rev = self.index.get_rev(node)
2399 rev = self.index.get_rev(node)
2512 if rev is not None:
2400 if rev is not None:
2513 # this can happen if two branches make the same change
2401 # this can happen if two branches make the same change
2514 self._nodeduplicatecallback(transaction, rev)
2402 self._nodeduplicatecallback(transaction, rev)
2515 if duplicaterevisioncb:
2403 if duplicaterevisioncb:
2516 duplicaterevisioncb(self, rev)
2404 duplicaterevisioncb(self, rev)
2517 empty = False
2405 empty = False
2518 continue
2406 continue
2519
2407
2520 for p in (p1, p2):
2408 for p in (p1, p2):
2521 if not self.index.has_node(p):
2409 if not self.index.has_node(p):
2522 raise error.LookupError(
2410 raise error.LookupError(
2523 p, self.indexfile, _(b'unknown parent')
2411 p, self.indexfile, _(b'unknown parent')
2524 )
2412 )
2525
2413
2526 if not self.index.has_node(deltabase):
2414 if not self.index.has_node(deltabase):
2527 raise error.LookupError(
2415 raise error.LookupError(
2528 deltabase, self.indexfile, _(b'unknown delta base')
2416 deltabase, self.indexfile, _(b'unknown delta base')
2529 )
2417 )
2530
2418
2531 baserev = self.rev(deltabase)
2419 baserev = self.rev(deltabase)
2532
2420
2533 if baserev != nullrev and self.iscensored(baserev):
2421 if baserev != nullrev and self.iscensored(baserev):
2534 # if base is censored, delta must be full replacement in a
2422 # if base is censored, delta must be full replacement in a
2535 # single patch operation
2423 # single patch operation
2536 hlen = struct.calcsize(b">lll")
2424 hlen = struct.calcsize(b">lll")
2537 oldlen = self.rawsize(baserev)
2425 oldlen = self.rawsize(baserev)
2538 newlen = len(delta) - hlen
2426 newlen = len(delta) - hlen
2539 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2427 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2540 raise error.CensoredBaseError(
2428 raise error.CensoredBaseError(
2541 self.indexfile, self.node(baserev)
2429 self.indexfile, self.node(baserev)
2542 )
2430 )
2543
2431
2544 if not flags and self._peek_iscensored(baserev, delta, flush):
2432 if not flags and self._peek_iscensored(baserev, delta, flush):
2545 flags |= REVIDX_ISCENSORED
2433 flags |= REVIDX_ISCENSORED
2546
2434
2547 # We assume consumers of addrevisioncb will want to retrieve
2435 # We assume consumers of addrevisioncb will want to retrieve
2548 # the added revision, which will require a call to
2436 # the added revision, which will require a call to
2549 # revision(). revision() will fast path if there is a cache
2437 # revision(). revision() will fast path if there is a cache
2550 # hit. So, we tell _addrevision() to always cache in this case.
2438 # hit. So, we tell _addrevision() to always cache in this case.
2551 # We're only using addgroup() in the context of changegroup
2439 # We're only using addgroup() in the context of changegroup
2552 # generation so the revision data can always be handled as raw
2440 # generation so the revision data can always be handled as raw
2553 # by the flagprocessor.
2441 # by the flagprocessor.
2554 rev = self._addrevision(
2442 rev = self._addrevision(
2555 node,
2443 node,
2556 None,
2444 None,
2557 transaction,
2445 transaction,
2558 link,
2446 link,
2559 p1,
2447 p1,
2560 p2,
2448 p2,
2561 flags,
2449 flags,
2562 (baserev, delta),
2450 (baserev, delta),
2563 ifh,
2451 ifh,
2564 dfh,
2452 dfh,
2565 alwayscache=alwayscache,
2453 alwayscache=alwayscache,
2566 deltacomputer=deltacomputer,
2454 deltacomputer=deltacomputer,
2567 sidedata=sidedata,
2455 sidedata=sidedata,
2568 )
2456 )
2569
2457
2570 if addrevisioncb:
2458 if addrevisioncb:
2571 addrevisioncb(self, rev)
2459 addrevisioncb(self, rev)
2572 empty = False
2460 empty = False
2573
2461
2574 if not dfh and not self._inline:
2462 if not dfh and not self._inline:
2575 # addrevision switched from inline to conventional
2463 # addrevision switched from inline to conventional
2576 # reopen the index
2464 # reopen the index
2577 ifh.close()
2465 ifh.close()
2578 dfh = self._datafp(b"a+")
2466 dfh = self._datafp(b"a+")
2579 ifh = self._indexfp(b"a+")
2467 ifh = self._indexfp(b"a+")
2580 self._writinghandles = (ifh, dfh)
2468 self._writinghandles = (ifh, dfh)
2581 finally:
2469 finally:
2582 self._writinghandles = None
2470 self._writinghandles = None
2583
2471
2584 if dfh:
2472 if dfh:
2585 dfh.close()
2473 dfh.close()
2586 ifh.close()
2474 ifh.close()
2587 return not empty
2475 return not empty
2588
2476
2589 def iscensored(self, rev):
2477 def iscensored(self, rev):
2590 """Check if a file revision is censored."""
2478 """Check if a file revision is censored."""
2591 if not self._censorable:
2479 if not self._censorable:
2592 return False
2480 return False
2593
2481
2594 return self.flags(rev) & REVIDX_ISCENSORED
2482 return self.flags(rev) & REVIDX_ISCENSORED
2595
2483
2596 def _peek_iscensored(self, baserev, delta, flush):
2484 def _peek_iscensored(self, baserev, delta, flush):
2597 """Quickly check if a delta produces a censored revision."""
2485 """Quickly check if a delta produces a censored revision."""
2598 if not self._censorable:
2486 if not self._censorable:
2599 return False
2487 return False
2600
2488
2601 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2489 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2602
2490
2603 def getstrippoint(self, minlink):
2491 def getstrippoint(self, minlink):
2604 """find the minimum rev that must be stripped to strip the linkrev
2492 """find the minimum rev that must be stripped to strip the linkrev
2605
2493
2606 Returns a tuple containing the minimum rev and a set of all revs that
2494 Returns a tuple containing the minimum rev and a set of all revs that
2607 have linkrevs that will be broken by this strip.
2495 have linkrevs that will be broken by this strip.
2608 """
2496 """
2609 return storageutil.resolvestripinfo(
2497 return storageutil.resolvestripinfo(
2610 minlink,
2498 minlink,
2611 len(self) - 1,
2499 len(self) - 1,
2612 self.headrevs(),
2500 self.headrevs(),
2613 self.linkrev,
2501 self.linkrev,
2614 self.parentrevs,
2502 self.parentrevs,
2615 )
2503 )
2616
2504
2617 def strip(self, minlink, transaction):
2505 def strip(self, minlink, transaction):
2618 """truncate the revlog on the first revision with a linkrev >= minlink
2506 """truncate the revlog on the first revision with a linkrev >= minlink
2619
2507
2620 This function is called when we're stripping revision minlink and
2508 This function is called when we're stripping revision minlink and
2621 its descendants from the repository.
2509 its descendants from the repository.
2622
2510
2623 We have to remove all revisions with linkrev >= minlink, because
2511 We have to remove all revisions with linkrev >= minlink, because
2624 the equivalent changelog revisions will be renumbered after the
2512 the equivalent changelog revisions will be renumbered after the
2625 strip.
2513 strip.
2626
2514
2627 So we truncate the revlog on the first of these revisions, and
2515 So we truncate the revlog on the first of these revisions, and
2628 trust that the caller has saved the revisions that shouldn't be
2516 trust that the caller has saved the revisions that shouldn't be
2629 removed and that it'll re-add them after this truncation.
2517 removed and that it'll re-add them after this truncation.
2630 """
2518 """
2631 if len(self) == 0:
2519 if len(self) == 0:
2632 return
2520 return
2633
2521
2634 rev, _ = self.getstrippoint(minlink)
2522 rev, _ = self.getstrippoint(minlink)
2635 if rev == len(self):
2523 if rev == len(self):
2636 return
2524 return
2637
2525
2638 # first truncate the files on disk
2526 # first truncate the files on disk
2639 end = self.start(rev)
2527 end = self.start(rev)
2640 if not self._inline:
2528 if not self._inline:
2641 transaction.add(self.datafile, end)
2529 transaction.add(self.datafile, end)
2642 end = rev * self.index.entry_size
2530 end = rev * self.index.entry_size
2643 else:
2531 else:
2644 end += rev * self.index.entry_size
2532 end += rev * self.index.entry_size
2645
2533
2646 transaction.add(self.indexfile, end)
2534 transaction.add(self.indexfile, end)
2647
2535
2648 # then reset internal state in memory to forget those revisions
2536 # then reset internal state in memory to forget those revisions
2649 self._revisioncache = None
2537 self._revisioncache = None
2650 self._chaininfocache = util.lrucachedict(500)
2538 self._chaininfocache = util.lrucachedict(500)
2651 self._chunkclear()
2539 self._chunkclear()
2652
2540
2653 del self.index[rev:-1]
2541 del self.index[rev:-1]
2654
2542
2655 def checksize(self):
2543 def checksize(self):
2656 """Check size of index and data files
2544 """Check size of index and data files
2657
2545
2658 return a (dd, di) tuple.
2546 return a (dd, di) tuple.
2659 - dd: extra bytes for the "data" file
2547 - dd: extra bytes for the "data" file
2660 - di: extra bytes for the "index" file
2548 - di: extra bytes for the "index" file
2661
2549
2662 A healthy revlog will return (0, 0).
2550 A healthy revlog will return (0, 0).
2663 """
2551 """
2664 expected = 0
2552 expected = 0
2665 if len(self):
2553 if len(self):
2666 expected = max(0, self.end(len(self) - 1))
2554 expected = max(0, self.end(len(self) - 1))
2667
2555
2668 try:
2556 try:
2669 with self._datafp() as f:
2557 with self._datafp() as f:
2670 f.seek(0, io.SEEK_END)
2558 f.seek(0, io.SEEK_END)
2671 actual = f.tell()
2559 actual = f.tell()
2672 dd = actual - expected
2560 dd = actual - expected
2673 except IOError as inst:
2561 except IOError as inst:
2674 if inst.errno != errno.ENOENT:
2562 if inst.errno != errno.ENOENT:
2675 raise
2563 raise
2676 dd = 0
2564 dd = 0
2677
2565
2678 try:
2566 try:
2679 f = self.opener(self.indexfile)
2567 f = self.opener(self.indexfile)
2680 f.seek(0, io.SEEK_END)
2568 f.seek(0, io.SEEK_END)
2681 actual = f.tell()
2569 actual = f.tell()
2682 f.close()
2570 f.close()
2683 s = self.index.entry_size
2571 s = self.index.entry_size
2684 i = max(0, actual // s)
2572 i = max(0, actual // s)
2685 di = actual - (i * s)
2573 di = actual - (i * s)
2686 if self._inline:
2574 if self._inline:
2687 databytes = 0
2575 databytes = 0
2688 for r in self:
2576 for r in self:
2689 databytes += max(0, self.length(r))
2577 databytes += max(0, self.length(r))
2690 dd = 0
2578 dd = 0
2691 di = actual - len(self) * s - databytes
2579 di = actual - len(self) * s - databytes
2692 except IOError as inst:
2580 except IOError as inst:
2693 if inst.errno != errno.ENOENT:
2581 if inst.errno != errno.ENOENT:
2694 raise
2582 raise
2695 di = 0
2583 di = 0
2696
2584
2697 return (dd, di)
2585 return (dd, di)
2698
2586
2699 def files(self):
2587 def files(self):
2700 res = [self.indexfile]
2588 res = [self.indexfile]
2701 if not self._inline:
2589 if not self._inline:
2702 res.append(self.datafile)
2590 res.append(self.datafile)
2703 return res
2591 return res
2704
2592
2705 def emitrevisions(
2593 def emitrevisions(
2706 self,
2594 self,
2707 nodes,
2595 nodes,
2708 nodesorder=None,
2596 nodesorder=None,
2709 revisiondata=False,
2597 revisiondata=False,
2710 assumehaveparentrevisions=False,
2598 assumehaveparentrevisions=False,
2711 deltamode=repository.CG_DELTAMODE_STD,
2599 deltamode=repository.CG_DELTAMODE_STD,
2712 sidedata_helpers=None,
2600 sidedata_helpers=None,
2713 ):
2601 ):
2714 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2602 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2715 raise error.ProgrammingError(
2603 raise error.ProgrammingError(
2716 b'unhandled value for nodesorder: %s' % nodesorder
2604 b'unhandled value for nodesorder: %s' % nodesorder
2717 )
2605 )
2718
2606
2719 if nodesorder is None and not self._generaldelta:
2607 if nodesorder is None and not self._generaldelta:
2720 nodesorder = b'storage'
2608 nodesorder = b'storage'
2721
2609
2722 if (
2610 if (
2723 not self._storedeltachains
2611 not self._storedeltachains
2724 and deltamode != repository.CG_DELTAMODE_PREV
2612 and deltamode != repository.CG_DELTAMODE_PREV
2725 ):
2613 ):
2726 deltamode = repository.CG_DELTAMODE_FULL
2614 deltamode = repository.CG_DELTAMODE_FULL
2727
2615
2728 return storageutil.emitrevisions(
2616 return storageutil.emitrevisions(
2729 self,
2617 self,
2730 nodes,
2618 nodes,
2731 nodesorder,
2619 nodesorder,
2732 revlogrevisiondelta,
2620 revlogrevisiondelta,
2733 deltaparentfn=self.deltaparent,
2621 deltaparentfn=self.deltaparent,
2734 candeltafn=self.candelta,
2622 candeltafn=self.candelta,
2735 rawsizefn=self.rawsize,
2623 rawsizefn=self.rawsize,
2736 revdifffn=self.revdiff,
2624 revdifffn=self.revdiff,
2737 flagsfn=self.flags,
2625 flagsfn=self.flags,
2738 deltamode=deltamode,
2626 deltamode=deltamode,
2739 revisiondata=revisiondata,
2627 revisiondata=revisiondata,
2740 assumehaveparentrevisions=assumehaveparentrevisions,
2628 assumehaveparentrevisions=assumehaveparentrevisions,
2741 sidedata_helpers=sidedata_helpers,
2629 sidedata_helpers=sidedata_helpers,
2742 )
2630 )
2743
2631
2744 DELTAREUSEALWAYS = b'always'
2632 DELTAREUSEALWAYS = b'always'
2745 DELTAREUSESAMEREVS = b'samerevs'
2633 DELTAREUSESAMEREVS = b'samerevs'
2746 DELTAREUSENEVER = b'never'
2634 DELTAREUSENEVER = b'never'
2747
2635
2748 DELTAREUSEFULLADD = b'fulladd'
2636 DELTAREUSEFULLADD = b'fulladd'
2749
2637
2750 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2638 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2751
2639
2752 def clone(
2640 def clone(
2753 self,
2641 self,
2754 tr,
2642 tr,
2755 destrevlog,
2643 destrevlog,
2756 addrevisioncb=None,
2644 addrevisioncb=None,
2757 deltareuse=DELTAREUSESAMEREVS,
2645 deltareuse=DELTAREUSESAMEREVS,
2758 forcedeltabothparents=None,
2646 forcedeltabothparents=None,
2759 sidedatacompanion=None,
2647 sidedatacompanion=None,
2760 ):
2648 ):
2761 """Copy this revlog to another, possibly with format changes.
2649 """Copy this revlog to another, possibly with format changes.
2762
2650
2763 The destination revlog will contain the same revisions and nodes.
2651 The destination revlog will contain the same revisions and nodes.
2764 However, it may not be bit-for-bit identical due to e.g. delta encoding
2652 However, it may not be bit-for-bit identical due to e.g. delta encoding
2765 differences.
2653 differences.
2766
2654
2767 The ``deltareuse`` argument control how deltas from the existing revlog
2655 The ``deltareuse`` argument control how deltas from the existing revlog
2768 are preserved in the destination revlog. The argument can have the
2656 are preserved in the destination revlog. The argument can have the
2769 following values:
2657 following values:
2770
2658
2771 DELTAREUSEALWAYS
2659 DELTAREUSEALWAYS
2772 Deltas will always be reused (if possible), even if the destination
2660 Deltas will always be reused (if possible), even if the destination
2773 revlog would not select the same revisions for the delta. This is the
2661 revlog would not select the same revisions for the delta. This is the
2774 fastest mode of operation.
2662 fastest mode of operation.
2775 DELTAREUSESAMEREVS
2663 DELTAREUSESAMEREVS
2776 Deltas will be reused if the destination revlog would pick the same
2664 Deltas will be reused if the destination revlog would pick the same
2777 revisions for the delta. This mode strikes a balance between speed
2665 revisions for the delta. This mode strikes a balance between speed
2778 and optimization.
2666 and optimization.
2779 DELTAREUSENEVER
2667 DELTAREUSENEVER
2780 Deltas will never be reused. This is the slowest mode of execution.
2668 Deltas will never be reused. This is the slowest mode of execution.
2781 This mode can be used to recompute deltas (e.g. if the diff/delta
2669 This mode can be used to recompute deltas (e.g. if the diff/delta
2782 algorithm changes).
2670 algorithm changes).
2783 DELTAREUSEFULLADD
2671 DELTAREUSEFULLADD
2784 Revision will be re-added as if their were new content. This is
2672 Revision will be re-added as if their were new content. This is
2785 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2673 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2786 eg: large file detection and handling.
2674 eg: large file detection and handling.
2787
2675
2788 Delta computation can be slow, so the choice of delta reuse policy can
2676 Delta computation can be slow, so the choice of delta reuse policy can
2789 significantly affect run time.
2677 significantly affect run time.
2790
2678
2791 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2679 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2792 two extremes. Deltas will be reused if they are appropriate. But if the
2680 two extremes. Deltas will be reused if they are appropriate. But if the
2793 delta could choose a better revision, it will do so. This means if you
2681 delta could choose a better revision, it will do so. This means if you
2794 are converting a non-generaldelta revlog to a generaldelta revlog,
2682 are converting a non-generaldelta revlog to a generaldelta revlog,
2795 deltas will be recomputed if the delta's parent isn't a parent of the
2683 deltas will be recomputed if the delta's parent isn't a parent of the
2796 revision.
2684 revision.
2797
2685
2798 In addition to the delta policy, the ``forcedeltabothparents``
2686 In addition to the delta policy, the ``forcedeltabothparents``
2799 argument controls whether to force compute deltas against both parents
2687 argument controls whether to force compute deltas against both parents
2800 for merges. By default, the current default is used.
2688 for merges. By default, the current default is used.
2801
2689
2802 If not None, the `sidedatacompanion` is callable that accept two
2690 If not None, the `sidedatacompanion` is callable that accept two
2803 arguments:
2691 arguments:
2804
2692
2805 (srcrevlog, rev)
2693 (srcrevlog, rev)
2806
2694
2807 and return a quintet that control changes to sidedata content from the
2695 and return a quintet that control changes to sidedata content from the
2808 old revision to the new clone result:
2696 old revision to the new clone result:
2809
2697
2810 (dropall, filterout, update, new_flags, dropped_flags)
2698 (dropall, filterout, update, new_flags, dropped_flags)
2811
2699
2812 * if `dropall` is True, all sidedata should be dropped
2700 * if `dropall` is True, all sidedata should be dropped
2813 * `filterout` is a set of sidedata keys that should be dropped
2701 * `filterout` is a set of sidedata keys that should be dropped
2814 * `update` is a mapping of additionnal/new key -> value
2702 * `update` is a mapping of additionnal/new key -> value
2815 * new_flags is a bitfields of new flags that the revision should get
2703 * new_flags is a bitfields of new flags that the revision should get
2816 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2704 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2817 """
2705 """
2818 if deltareuse not in self.DELTAREUSEALL:
2706 if deltareuse not in self.DELTAREUSEALL:
2819 raise ValueError(
2707 raise ValueError(
2820 _(b'value for deltareuse invalid: %s') % deltareuse
2708 _(b'value for deltareuse invalid: %s') % deltareuse
2821 )
2709 )
2822
2710
2823 if len(destrevlog):
2711 if len(destrevlog):
2824 raise ValueError(_(b'destination revlog is not empty'))
2712 raise ValueError(_(b'destination revlog is not empty'))
2825
2713
2826 if getattr(self, 'filteredrevs', None):
2714 if getattr(self, 'filteredrevs', None):
2827 raise ValueError(_(b'source revlog has filtered revisions'))
2715 raise ValueError(_(b'source revlog has filtered revisions'))
2828 if getattr(destrevlog, 'filteredrevs', None):
2716 if getattr(destrevlog, 'filteredrevs', None):
2829 raise ValueError(_(b'destination revlog has filtered revisions'))
2717 raise ValueError(_(b'destination revlog has filtered revisions'))
2830
2718
2831 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2719 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2832 # if possible.
2720 # if possible.
2833 oldlazydelta = destrevlog._lazydelta
2721 oldlazydelta = destrevlog._lazydelta
2834 oldlazydeltabase = destrevlog._lazydeltabase
2722 oldlazydeltabase = destrevlog._lazydeltabase
2835 oldamd = destrevlog._deltabothparents
2723 oldamd = destrevlog._deltabothparents
2836
2724
2837 try:
2725 try:
2838 if deltareuse == self.DELTAREUSEALWAYS:
2726 if deltareuse == self.DELTAREUSEALWAYS:
2839 destrevlog._lazydeltabase = True
2727 destrevlog._lazydeltabase = True
2840 destrevlog._lazydelta = True
2728 destrevlog._lazydelta = True
2841 elif deltareuse == self.DELTAREUSESAMEREVS:
2729 elif deltareuse == self.DELTAREUSESAMEREVS:
2842 destrevlog._lazydeltabase = False
2730 destrevlog._lazydeltabase = False
2843 destrevlog._lazydelta = True
2731 destrevlog._lazydelta = True
2844 elif deltareuse == self.DELTAREUSENEVER:
2732 elif deltareuse == self.DELTAREUSENEVER:
2845 destrevlog._lazydeltabase = False
2733 destrevlog._lazydeltabase = False
2846 destrevlog._lazydelta = False
2734 destrevlog._lazydelta = False
2847
2735
2848 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2736 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2849
2737
2850 self._clone(
2738 self._clone(
2851 tr,
2739 tr,
2852 destrevlog,
2740 destrevlog,
2853 addrevisioncb,
2741 addrevisioncb,
2854 deltareuse,
2742 deltareuse,
2855 forcedeltabothparents,
2743 forcedeltabothparents,
2856 sidedatacompanion,
2744 sidedatacompanion,
2857 )
2745 )
2858
2746
2859 finally:
2747 finally:
2860 destrevlog._lazydelta = oldlazydelta
2748 destrevlog._lazydelta = oldlazydelta
2861 destrevlog._lazydeltabase = oldlazydeltabase
2749 destrevlog._lazydeltabase = oldlazydeltabase
2862 destrevlog._deltabothparents = oldamd
2750 destrevlog._deltabothparents = oldamd
2863
2751
2864 def _clone(
2752 def _clone(
2865 self,
2753 self,
2866 tr,
2754 tr,
2867 destrevlog,
2755 destrevlog,
2868 addrevisioncb,
2756 addrevisioncb,
2869 deltareuse,
2757 deltareuse,
2870 forcedeltabothparents,
2758 forcedeltabothparents,
2871 sidedatacompanion,
2759 sidedatacompanion,
2872 ):
2760 ):
2873 """perform the core duty of `revlog.clone` after parameter processing"""
2761 """perform the core duty of `revlog.clone` after parameter processing"""
2874 deltacomputer = deltautil.deltacomputer(destrevlog)
2762 deltacomputer = deltautil.deltacomputer(destrevlog)
2875 index = self.index
2763 index = self.index
2876 for rev in self:
2764 for rev in self:
2877 entry = index[rev]
2765 entry = index[rev]
2878
2766
2879 # Some classes override linkrev to take filtered revs into
2767 # Some classes override linkrev to take filtered revs into
2880 # account. Use raw entry from index.
2768 # account. Use raw entry from index.
2881 flags = entry[0] & 0xFFFF
2769 flags = entry[0] & 0xFFFF
2882 linkrev = entry[4]
2770 linkrev = entry[4]
2883 p1 = index[entry[5]][7]
2771 p1 = index[entry[5]][7]
2884 p2 = index[entry[6]][7]
2772 p2 = index[entry[6]][7]
2885 node = entry[7]
2773 node = entry[7]
2886
2774
2887 sidedataactions = (False, [], {}, 0, 0)
2775 sidedataactions = (False, [], {}, 0, 0)
2888 if sidedatacompanion is not None:
2776 if sidedatacompanion is not None:
2889 sidedataactions = sidedatacompanion(self, rev)
2777 sidedataactions = sidedatacompanion(self, rev)
2890
2778
2891 # (Possibly) reuse the delta from the revlog if allowed and
2779 # (Possibly) reuse the delta from the revlog if allowed and
2892 # the revlog chunk is a delta.
2780 # the revlog chunk is a delta.
2893 cachedelta = None
2781 cachedelta = None
2894 rawtext = None
2782 rawtext = None
2895 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2783 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2896 dropall = sidedataactions[0]
2784 dropall = sidedataactions[0]
2897 filterout = sidedataactions[1]
2785 filterout = sidedataactions[1]
2898 update = sidedataactions[2]
2786 update = sidedataactions[2]
2899 new_flags = sidedataactions[3]
2787 new_flags = sidedataactions[3]
2900 dropped_flags = sidedataactions[4]
2788 dropped_flags = sidedataactions[4]
2901 text, sidedata = self._revisiondata(rev)
2789 text, sidedata = self._revisiondata(rev)
2902 if dropall:
2790 if dropall:
2903 sidedata = {}
2791 sidedata = {}
2904 for key in filterout:
2792 for key in filterout:
2905 sidedata.pop(key, None)
2793 sidedata.pop(key, None)
2906 sidedata.update(update)
2794 sidedata.update(update)
2907 if not sidedata:
2795 if not sidedata:
2908 sidedata = None
2796 sidedata = None
2909
2797
2910 flags |= new_flags
2798 flags |= new_flags
2911 flags &= ~dropped_flags
2799 flags &= ~dropped_flags
2912
2800
2913 destrevlog.addrevision(
2801 destrevlog.addrevision(
2914 text,
2802 text,
2915 tr,
2803 tr,
2916 linkrev,
2804 linkrev,
2917 p1,
2805 p1,
2918 p2,
2806 p2,
2919 cachedelta=cachedelta,
2807 cachedelta=cachedelta,
2920 node=node,
2808 node=node,
2921 flags=flags,
2809 flags=flags,
2922 deltacomputer=deltacomputer,
2810 deltacomputer=deltacomputer,
2923 sidedata=sidedata,
2811 sidedata=sidedata,
2924 )
2812 )
2925 else:
2813 else:
2926 if destrevlog._lazydelta:
2814 if destrevlog._lazydelta:
2927 dp = self.deltaparent(rev)
2815 dp = self.deltaparent(rev)
2928 if dp != nullrev:
2816 if dp != nullrev:
2929 cachedelta = (dp, bytes(self._chunk(rev)))
2817 cachedelta = (dp, bytes(self._chunk(rev)))
2930
2818
2931 if not cachedelta:
2819 if not cachedelta:
2932 rawtext = self.rawdata(rev)
2820 rawtext = self.rawdata(rev)
2933
2821
2934 ifh = destrevlog.opener(
2822 ifh = destrevlog.opener(
2935 destrevlog.indexfile, b'a+', checkambig=False
2823 destrevlog.indexfile, b'a+', checkambig=False
2936 )
2824 )
2937 dfh = None
2825 dfh = None
2938 if not destrevlog._inline:
2826 if not destrevlog._inline:
2939 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2827 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2940 try:
2828 try:
2941 destrevlog._addrevision(
2829 destrevlog._addrevision(
2942 node,
2830 node,
2943 rawtext,
2831 rawtext,
2944 tr,
2832 tr,
2945 linkrev,
2833 linkrev,
2946 p1,
2834 p1,
2947 p2,
2835 p2,
2948 flags,
2836 flags,
2949 cachedelta,
2837 cachedelta,
2950 ifh,
2838 ifh,
2951 dfh,
2839 dfh,
2952 deltacomputer=deltacomputer,
2840 deltacomputer=deltacomputer,
2953 )
2841 )
2954 finally:
2842 finally:
2955 if dfh:
2843 if dfh:
2956 dfh.close()
2844 dfh.close()
2957 ifh.close()
2845 ifh.close()
2958
2846
2959 if addrevisioncb:
2847 if addrevisioncb:
2960 addrevisioncb(self, rev, node)
2848 addrevisioncb(self, rev, node)
2961
2849
2962 def censorrevision(self, tr, censornode, tombstone=b''):
2850 def censorrevision(self, tr, censornode, tombstone=b''):
2963 if (self.version & 0xFFFF) == REVLOGV0:
2851 if (self.version & 0xFFFF) == REVLOGV0:
2964 raise error.RevlogError(
2852 raise error.RevlogError(
2965 _(b'cannot censor with version %d revlogs') % self.version
2853 _(b'cannot censor with version %d revlogs') % self.version
2966 )
2854 )
2967
2855
2968 censorrev = self.rev(censornode)
2856 censorrev = self.rev(censornode)
2969 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2857 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2970
2858
2971 if len(tombstone) > self.rawsize(censorrev):
2859 if len(tombstone) > self.rawsize(censorrev):
2972 raise error.Abort(
2860 raise error.Abort(
2973 _(b'censor tombstone must be no longer than censored data')
2861 _(b'censor tombstone must be no longer than censored data')
2974 )
2862 )
2975
2863
2976 # Rewriting the revlog in place is hard. Our strategy for censoring is
2864 # Rewriting the revlog in place is hard. Our strategy for censoring is
2977 # to create a new revlog, copy all revisions to it, then replace the
2865 # to create a new revlog, copy all revisions to it, then replace the
2978 # revlogs on transaction close.
2866 # revlogs on transaction close.
2979
2867
2980 newindexfile = self.indexfile + b'.tmpcensored'
2868 newindexfile = self.indexfile + b'.tmpcensored'
2981 newdatafile = self.datafile + b'.tmpcensored'
2869 newdatafile = self.datafile + b'.tmpcensored'
2982
2870
2983 # This is a bit dangerous. We could easily have a mismatch of state.
2871 # This is a bit dangerous. We could easily have a mismatch of state.
2984 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2872 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2985 newrl.version = self.version
2873 newrl.version = self.version
2986 newrl._generaldelta = self._generaldelta
2874 newrl._generaldelta = self._generaldelta
2987 newrl._parse_index = self._parse_index
2875 newrl._parse_index = self._parse_index
2988
2876
2989 for rev in self.revs():
2877 for rev in self.revs():
2990 node = self.node(rev)
2878 node = self.node(rev)
2991 p1, p2 = self.parents(node)
2879 p1, p2 = self.parents(node)
2992
2880
2993 if rev == censorrev:
2881 if rev == censorrev:
2994 newrl.addrawrevision(
2882 newrl.addrawrevision(
2995 tombstone,
2883 tombstone,
2996 tr,
2884 tr,
2997 self.linkrev(censorrev),
2885 self.linkrev(censorrev),
2998 p1,
2886 p1,
2999 p2,
2887 p2,
3000 censornode,
2888 censornode,
3001 REVIDX_ISCENSORED,
2889 REVIDX_ISCENSORED,
3002 )
2890 )
3003
2891
3004 if newrl.deltaparent(rev) != nullrev:
2892 if newrl.deltaparent(rev) != nullrev:
3005 raise error.Abort(
2893 raise error.Abort(
3006 _(
2894 _(
3007 b'censored revision stored as delta; '
2895 b'censored revision stored as delta; '
3008 b'cannot censor'
2896 b'cannot censor'
3009 ),
2897 ),
3010 hint=_(
2898 hint=_(
3011 b'censoring of revlogs is not '
2899 b'censoring of revlogs is not '
3012 b'fully implemented; please report '
2900 b'fully implemented; please report '
3013 b'this bug'
2901 b'this bug'
3014 ),
2902 ),
3015 )
2903 )
3016 continue
2904 continue
3017
2905
3018 if self.iscensored(rev):
2906 if self.iscensored(rev):
3019 if self.deltaparent(rev) != nullrev:
2907 if self.deltaparent(rev) != nullrev:
3020 raise error.Abort(
2908 raise error.Abort(
3021 _(
2909 _(
3022 b'cannot censor due to censored '
2910 b'cannot censor due to censored '
3023 b'revision having delta stored'
2911 b'revision having delta stored'
3024 )
2912 )
3025 )
2913 )
3026 rawtext = self._chunk(rev)
2914 rawtext = self._chunk(rev)
3027 else:
2915 else:
3028 rawtext = self.rawdata(rev)
2916 rawtext = self.rawdata(rev)
3029
2917
3030 newrl.addrawrevision(
2918 newrl.addrawrevision(
3031 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2919 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3032 )
2920 )
3033
2921
3034 tr.addbackup(self.indexfile, location=b'store')
2922 tr.addbackup(self.indexfile, location=b'store')
3035 if not self._inline:
2923 if not self._inline:
3036 tr.addbackup(self.datafile, location=b'store')
2924 tr.addbackup(self.datafile, location=b'store')
3037
2925
3038 self.opener.rename(newrl.indexfile, self.indexfile)
2926 self.opener.rename(newrl.indexfile, self.indexfile)
3039 if not self._inline:
2927 if not self._inline:
3040 self.opener.rename(newrl.datafile, self.datafile)
2928 self.opener.rename(newrl.datafile, self.datafile)
3041
2929
3042 self.clearcaches()
2930 self.clearcaches()
3043 self._loadindex()
2931 self._loadindex()
3044
2932
3045 def verifyintegrity(self, state):
2933 def verifyintegrity(self, state):
3046 """Verifies the integrity of the revlog.
2934 """Verifies the integrity of the revlog.
3047
2935
3048 Yields ``revlogproblem`` instances describing problems that are
2936 Yields ``revlogproblem`` instances describing problems that are
3049 found.
2937 found.
3050 """
2938 """
3051 dd, di = self.checksize()
2939 dd, di = self.checksize()
3052 if dd:
2940 if dd:
3053 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2941 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3054 if di:
2942 if di:
3055 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2943 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3056
2944
3057 version = self.version & 0xFFFF
2945 version = self.version & 0xFFFF
3058
2946
3059 # The verifier tells us what version revlog we should be.
2947 # The verifier tells us what version revlog we should be.
3060 if version != state[b'expectedversion']:
2948 if version != state[b'expectedversion']:
3061 yield revlogproblem(
2949 yield revlogproblem(
3062 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2950 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3063 % (self.indexfile, version, state[b'expectedversion'])
2951 % (self.indexfile, version, state[b'expectedversion'])
3064 )
2952 )
3065
2953
3066 state[b'skipread'] = set()
2954 state[b'skipread'] = set()
3067 state[b'safe_renamed'] = set()
2955 state[b'safe_renamed'] = set()
3068
2956
3069 for rev in self:
2957 for rev in self:
3070 node = self.node(rev)
2958 node = self.node(rev)
3071
2959
3072 # Verify contents. 4 cases to care about:
2960 # Verify contents. 4 cases to care about:
3073 #
2961 #
3074 # common: the most common case
2962 # common: the most common case
3075 # rename: with a rename
2963 # rename: with a rename
3076 # meta: file content starts with b'\1\n', the metadata
2964 # meta: file content starts with b'\1\n', the metadata
3077 # header defined in filelog.py, but without a rename
2965 # header defined in filelog.py, but without a rename
3078 # ext: content stored externally
2966 # ext: content stored externally
3079 #
2967 #
3080 # More formally, their differences are shown below:
2968 # More formally, their differences are shown below:
3081 #
2969 #
3082 # | common | rename | meta | ext
2970 # | common | rename | meta | ext
3083 # -------------------------------------------------------
2971 # -------------------------------------------------------
3084 # flags() | 0 | 0 | 0 | not 0
2972 # flags() | 0 | 0 | 0 | not 0
3085 # renamed() | False | True | False | ?
2973 # renamed() | False | True | False | ?
3086 # rawtext[0:2]=='\1\n'| False | True | True | ?
2974 # rawtext[0:2]=='\1\n'| False | True | True | ?
3087 #
2975 #
3088 # "rawtext" means the raw text stored in revlog data, which
2976 # "rawtext" means the raw text stored in revlog data, which
3089 # could be retrieved by "rawdata(rev)". "text"
2977 # could be retrieved by "rawdata(rev)". "text"
3090 # mentioned below is "revision(rev)".
2978 # mentioned below is "revision(rev)".
3091 #
2979 #
3092 # There are 3 different lengths stored physically:
2980 # There are 3 different lengths stored physically:
3093 # 1. L1: rawsize, stored in revlog index
2981 # 1. L1: rawsize, stored in revlog index
3094 # 2. L2: len(rawtext), stored in revlog data
2982 # 2. L2: len(rawtext), stored in revlog data
3095 # 3. L3: len(text), stored in revlog data if flags==0, or
2983 # 3. L3: len(text), stored in revlog data if flags==0, or
3096 # possibly somewhere else if flags!=0
2984 # possibly somewhere else if flags!=0
3097 #
2985 #
3098 # L1 should be equal to L2. L3 could be different from them.
2986 # L1 should be equal to L2. L3 could be different from them.
3099 # "text" may or may not affect commit hash depending on flag
2987 # "text" may or may not affect commit hash depending on flag
3100 # processors (see flagutil.addflagprocessor).
2988 # processors (see flagutil.addflagprocessor).
3101 #
2989 #
3102 # | common | rename | meta | ext
2990 # | common | rename | meta | ext
3103 # -------------------------------------------------
2991 # -------------------------------------------------
3104 # rawsize() | L1 | L1 | L1 | L1
2992 # rawsize() | L1 | L1 | L1 | L1
3105 # size() | L1 | L2-LM | L1(*) | L1 (?)
2993 # size() | L1 | L2-LM | L1(*) | L1 (?)
3106 # len(rawtext) | L2 | L2 | L2 | L2
2994 # len(rawtext) | L2 | L2 | L2 | L2
3107 # len(text) | L2 | L2 | L2 | L3
2995 # len(text) | L2 | L2 | L2 | L3
3108 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2996 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3109 #
2997 #
3110 # LM: length of metadata, depending on rawtext
2998 # LM: length of metadata, depending on rawtext
3111 # (*): not ideal, see comment in filelog.size
2999 # (*): not ideal, see comment in filelog.size
3112 # (?): could be "- len(meta)" if the resolved content has
3000 # (?): could be "- len(meta)" if the resolved content has
3113 # rename metadata
3001 # rename metadata
3114 #
3002 #
3115 # Checks needed to be done:
3003 # Checks needed to be done:
3116 # 1. length check: L1 == L2, in all cases.
3004 # 1. length check: L1 == L2, in all cases.
3117 # 2. hash check: depending on flag processor, we may need to
3005 # 2. hash check: depending on flag processor, we may need to
3118 # use either "text" (external), or "rawtext" (in revlog).
3006 # use either "text" (external), or "rawtext" (in revlog).
3119
3007
3120 try:
3008 try:
3121 skipflags = state.get(b'skipflags', 0)
3009 skipflags = state.get(b'skipflags', 0)
3122 if skipflags:
3010 if skipflags:
3123 skipflags &= self.flags(rev)
3011 skipflags &= self.flags(rev)
3124
3012
3125 _verify_revision(self, skipflags, state, node)
3013 _verify_revision(self, skipflags, state, node)
3126
3014
3127 l1 = self.rawsize(rev)
3015 l1 = self.rawsize(rev)
3128 l2 = len(self.rawdata(node))
3016 l2 = len(self.rawdata(node))
3129
3017
3130 if l1 != l2:
3018 if l1 != l2:
3131 yield revlogproblem(
3019 yield revlogproblem(
3132 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3020 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3133 node=node,
3021 node=node,
3134 )
3022 )
3135
3023
3136 except error.CensoredNodeError:
3024 except error.CensoredNodeError:
3137 if state[b'erroroncensored']:
3025 if state[b'erroroncensored']:
3138 yield revlogproblem(
3026 yield revlogproblem(
3139 error=_(b'censored file data'), node=node
3027 error=_(b'censored file data'), node=node
3140 )
3028 )
3141 state[b'skipread'].add(node)
3029 state[b'skipread'].add(node)
3142 except Exception as e:
3030 except Exception as e:
3143 yield revlogproblem(
3031 yield revlogproblem(
3144 error=_(b'unpacking %s: %s')
3032 error=_(b'unpacking %s: %s')
3145 % (short(node), stringutil.forcebytestr(e)),
3033 % (short(node), stringutil.forcebytestr(e)),
3146 node=node,
3034 node=node,
3147 )
3035 )
3148 state[b'skipread'].add(node)
3036 state[b'skipread'].add(node)
3149
3037
3150 def storageinfo(
3038 def storageinfo(
3151 self,
3039 self,
3152 exclusivefiles=False,
3040 exclusivefiles=False,
3153 sharedfiles=False,
3041 sharedfiles=False,
3154 revisionscount=False,
3042 revisionscount=False,
3155 trackedsize=False,
3043 trackedsize=False,
3156 storedsize=False,
3044 storedsize=False,
3157 ):
3045 ):
3158 d = {}
3046 d = {}
3159
3047
3160 if exclusivefiles:
3048 if exclusivefiles:
3161 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3049 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3162 if not self._inline:
3050 if not self._inline:
3163 d[b'exclusivefiles'].append((self.opener, self.datafile))
3051 d[b'exclusivefiles'].append((self.opener, self.datafile))
3164
3052
3165 if sharedfiles:
3053 if sharedfiles:
3166 d[b'sharedfiles'] = []
3054 d[b'sharedfiles'] = []
3167
3055
3168 if revisionscount:
3056 if revisionscount:
3169 d[b'revisionscount'] = len(self)
3057 d[b'revisionscount'] = len(self)
3170
3058
3171 if trackedsize:
3059 if trackedsize:
3172 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3060 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3173
3061
3174 if storedsize:
3062 if storedsize:
3175 d[b'storedsize'] = sum(
3063 d[b'storedsize'] = sum(
3176 self.opener.stat(path).st_size for path in self.files()
3064 self.opener.stat(path).st_size for path in self.files()
3177 )
3065 )
3178
3066
3179 return d
3067 return d
3180
3068
3181 def rewrite_sidedata(self, helpers, startrev, endrev):
3069 def rewrite_sidedata(self, helpers, startrev, endrev):
3182 if self.version & 0xFFFF != REVLOGV2:
3070 if self.version & 0xFFFF != REVLOGV2:
3183 return
3071 return
3184 # inline are not yet supported because they suffer from an issue when
3072 # inline are not yet supported because they suffer from an issue when
3185 # rewriting them (since it's not an append-only operation).
3073 # rewriting them (since it's not an append-only operation).
3186 # See issue6485.
3074 # See issue6485.
3187 assert not self._inline
3075 assert not self._inline
3188 if not helpers[1] and not helpers[2]:
3076 if not helpers[1] and not helpers[2]:
3189 # Nothing to generate or remove
3077 # Nothing to generate or remove
3190 return
3078 return
3191
3079
3192 new_entries = []
3080 new_entries = []
3193 # append the new sidedata
3081 # append the new sidedata
3194 with self._datafp(b'a+') as fp:
3082 with self._datafp(b'a+') as fp:
3195 # Maybe this bug still exists, see revlog._writeentry
3083 # Maybe this bug still exists, see revlog._writeentry
3196 fp.seek(0, os.SEEK_END)
3084 fp.seek(0, os.SEEK_END)
3197 current_offset = fp.tell()
3085 current_offset = fp.tell()
3198 for rev in range(startrev, endrev + 1):
3086 for rev in range(startrev, endrev + 1):
3199 entry = self.index[rev]
3087 entry = self.index[rev]
3200 new_sidedata = storageutil.run_sidedata_helpers(
3088 new_sidedata = storageutil.run_sidedata_helpers(
3201 store=self,
3089 store=self,
3202 sidedata_helpers=helpers,
3090 sidedata_helpers=helpers,
3203 sidedata={},
3091 sidedata={},
3204 rev=rev,
3092 rev=rev,
3205 )
3093 )
3206
3094
3207 serialized_sidedata = sidedatautil.serialize_sidedata(
3095 serialized_sidedata = sidedatautil.serialize_sidedata(
3208 new_sidedata
3096 new_sidedata
3209 )
3097 )
3210 if entry[8] != 0 or entry[9] != 0:
3098 if entry[8] != 0 or entry[9] != 0:
3211 # rewriting entries that already have sidedata is not
3099 # rewriting entries that already have sidedata is not
3212 # supported yet, because it introduces garbage data in the
3100 # supported yet, because it introduces garbage data in the
3213 # revlog.
3101 # revlog.
3214 msg = b"Rewriting existing sidedata is not supported yet"
3102 msg = b"Rewriting existing sidedata is not supported yet"
3215 raise error.Abort(msg)
3103 raise error.Abort(msg)
3216 entry = entry[:8]
3104 entry = entry[:8]
3217 entry += (current_offset, len(serialized_sidedata))
3105 entry += (current_offset, len(serialized_sidedata))
3218
3106
3219 fp.write(serialized_sidedata)
3107 fp.write(serialized_sidedata)
3220 new_entries.append(entry)
3108 new_entries.append(entry)
3221 current_offset += len(serialized_sidedata)
3109 current_offset += len(serialized_sidedata)
3222
3110
3223 # rewrite the new index entries
3111 # rewrite the new index entries
3224 with self._indexfp(b'w+') as fp:
3112 with self._indexfp(b'w+') as fp:
3225 fp.seek(startrev * self.index.entry_size)
3113 fp.seek(startrev * self.index.entry_size)
3226 for i, entry in enumerate(new_entries):
3114 for i, entry in enumerate(new_entries):
3227 rev = startrev + i
3115 rev = startrev + i
3228 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3116 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3229 packed = self.index.entry_binary(rev)
3117 packed = self.index.entry_binary(rev)
3230 if rev == 0:
3118 if rev == 0:
3231 header = self.index.pack_header(self.version)
3119 header = self.index.pack_header(self.version)
3232 packed = header + packed
3120 packed = header + packed
3233 fp.write(packed)
3121 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now