##// END OF EJS Templates
revlog: split the content verification of a node into a separate method...
Matt Harbison -
r44409:7c3118b9 default
parent child Browse files
Show More
@@ -1,2975 +1,2981 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
154 if skipflags:
155 state[b'skipread'].add(node)
156 else:
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
159
160
151 @attr.s(slots=True, frozen=True)
161 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
162 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
163 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
164 node: expected hash of the revision
155 p1, p2: parent revs of the revision
165 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
166 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
167 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
168 flags: flags associated to the revision storage
159
169
160 One of btext[0] or cachedelta must be set.
170 One of btext[0] or cachedelta must be set.
161 """
171 """
162
172
163 node = attr.ib()
173 node = attr.ib()
164 p1 = attr.ib()
174 p1 = attr.ib()
165 p2 = attr.ib()
175 p2 = attr.ib()
166 btext = attr.ib()
176 btext = attr.ib()
167 textlen = attr.ib()
177 textlen = attr.ib()
168 cachedelta = attr.ib()
178 cachedelta = attr.ib()
169 flags = attr.ib()
179 flags = attr.ib()
170
180
171
181
172 @interfaceutil.implementer(repository.irevisiondelta)
182 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
183 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
184 class revlogrevisiondelta(object):
175 node = attr.ib()
185 node = attr.ib()
176 p1node = attr.ib()
186 p1node = attr.ib()
177 p2node = attr.ib()
187 p2node = attr.ib()
178 basenode = attr.ib()
188 basenode = attr.ib()
179 flags = attr.ib()
189 flags = attr.ib()
180 baserevisionsize = attr.ib()
190 baserevisionsize = attr.ib()
181 revision = attr.ib()
191 revision = attr.ib()
182 delta = attr.ib()
192 delta = attr.ib()
183 linknode = attr.ib(default=None)
193 linknode = attr.ib(default=None)
184
194
185
195
186 @interfaceutil.implementer(repository.iverifyproblem)
196 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
197 @attr.s(frozen=True)
188 class revlogproblem(object):
198 class revlogproblem(object):
189 warning = attr.ib(default=None)
199 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
200 error = attr.ib(default=None)
191 node = attr.ib(default=None)
201 node = attr.ib(default=None)
192
202
193
203
194 # index v0:
204 # index v0:
195 # 4 bytes: offset
205 # 4 bytes: offset
196 # 4 bytes: compressed length
206 # 4 bytes: compressed length
197 # 4 bytes: base rev
207 # 4 bytes: base rev
198 # 4 bytes: link rev
208 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
209 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
210 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
211 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
212 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
213 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
214 indexformatv0_unpack = indexformatv0.unpack
205
215
206
216
207 class revlogoldindex(list):
217 class revlogoldindex(list):
208 @property
218 @property
209 def nodemap(self):
219 def nodemap(self):
210 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
220 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
211 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
221 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
212 return self._nodemap
222 return self._nodemap
213
223
214 @util.propertycache
224 @util.propertycache
215 def _nodemap(self):
225 def _nodemap(self):
216 nodemap = revlogutils.NodeMap({nullid: nullrev})
226 nodemap = revlogutils.NodeMap({nullid: nullrev})
217 for r in range(0, len(self)):
227 for r in range(0, len(self)):
218 n = self[r][7]
228 n = self[r][7]
219 nodemap[n] = r
229 nodemap[n] = r
220 return nodemap
230 return nodemap
221
231
222 def has_node(self, node):
232 def has_node(self, node):
223 """return True if the node exist in the index"""
233 """return True if the node exist in the index"""
224 return node in self._nodemap
234 return node in self._nodemap
225
235
226 def rev(self, node):
236 def rev(self, node):
227 """return a revision for a node
237 """return a revision for a node
228
238
229 If the node is unknown, raise a RevlogError"""
239 If the node is unknown, raise a RevlogError"""
230 return self._nodemap[node]
240 return self._nodemap[node]
231
241
232 def get_rev(self, node):
242 def get_rev(self, node):
233 """return a revision for a node
243 """return a revision for a node
234
244
235 If the node is unknown, return None"""
245 If the node is unknown, return None"""
236 return self._nodemap.get(node)
246 return self._nodemap.get(node)
237
247
238 def append(self, tup):
248 def append(self, tup):
239 self._nodemap[tup[7]] = len(self)
249 self._nodemap[tup[7]] = len(self)
240 super(revlogoldindex, self).append(tup)
250 super(revlogoldindex, self).append(tup)
241
251
242 def __delitem__(self, i):
252 def __delitem__(self, i):
243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
253 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
254 raise ValueError(b"deleting slices only supports a:-1 with step 1")
245 for r in pycompat.xrange(i.start, len(self)):
255 for r in pycompat.xrange(i.start, len(self)):
246 del self._nodemap[self[r][7]]
256 del self._nodemap[self[r][7]]
247 super(revlogoldindex, self).__delitem__(i)
257 super(revlogoldindex, self).__delitem__(i)
248
258
249 def clearcaches(self):
259 def clearcaches(self):
250 self.__dict__.pop('_nodemap', None)
260 self.__dict__.pop('_nodemap', None)
251
261
252 def __getitem__(self, i):
262 def __getitem__(self, i):
253 if i == -1:
263 if i == -1:
254 return (0, 0, 0, -1, -1, -1, -1, nullid)
264 return (0, 0, 0, -1, -1, -1, -1, nullid)
255 return list.__getitem__(self, i)
265 return list.__getitem__(self, i)
256
266
257
267
258 class revlogoldio(object):
268 class revlogoldio(object):
259 def __init__(self):
269 def __init__(self):
260 self.size = indexformatv0.size
270 self.size = indexformatv0.size
261
271
262 def parseindex(self, data, inline):
272 def parseindex(self, data, inline):
263 s = self.size
273 s = self.size
264 index = []
274 index = []
265 nodemap = revlogutils.NodeMap({nullid: nullrev})
275 nodemap = revlogutils.NodeMap({nullid: nullrev})
266 n = off = 0
276 n = off = 0
267 l = len(data)
277 l = len(data)
268 while off + s <= l:
278 while off + s <= l:
269 cur = data[off : off + s]
279 cur = data[off : off + s]
270 off += s
280 off += s
271 e = indexformatv0_unpack(cur)
281 e = indexformatv0_unpack(cur)
272 # transform to revlogv1 format
282 # transform to revlogv1 format
273 e2 = (
283 e2 = (
274 offset_type(e[0], 0),
284 offset_type(e[0], 0),
275 e[1],
285 e[1],
276 -1,
286 -1,
277 e[2],
287 e[2],
278 e[3],
288 e[3],
279 nodemap.get(e[4], nullrev),
289 nodemap.get(e[4], nullrev),
280 nodemap.get(e[5], nullrev),
290 nodemap.get(e[5], nullrev),
281 e[6],
291 e[6],
282 )
292 )
283 index.append(e2)
293 index.append(e2)
284 nodemap[e[6]] = n
294 nodemap[e[6]] = n
285 n += 1
295 n += 1
286
296
287 index = revlogoldindex(index)
297 index = revlogoldindex(index)
288 return index, None
298 return index, None
289
299
290 def packentry(self, entry, node, version, rev):
300 def packentry(self, entry, node, version, rev):
291 if gettype(entry[0]):
301 if gettype(entry[0]):
292 raise error.RevlogError(
302 raise error.RevlogError(
293 _(b'index entry flags need revlog version 1')
303 _(b'index entry flags need revlog version 1')
294 )
304 )
295 e2 = (
305 e2 = (
296 getoffset(entry[0]),
306 getoffset(entry[0]),
297 entry[1],
307 entry[1],
298 entry[3],
308 entry[3],
299 entry[4],
309 entry[4],
300 node(entry[5]),
310 node(entry[5]),
301 node(entry[6]),
311 node(entry[6]),
302 entry[7],
312 entry[7],
303 )
313 )
304 return indexformatv0_pack(*e2)
314 return indexformatv0_pack(*e2)
305
315
306
316
307 # index ng:
317 # index ng:
308 # 6 bytes: offset
318 # 6 bytes: offset
309 # 2 bytes: flags
319 # 2 bytes: flags
310 # 4 bytes: compressed length
320 # 4 bytes: compressed length
311 # 4 bytes: uncompressed length
321 # 4 bytes: uncompressed length
312 # 4 bytes: base rev
322 # 4 bytes: base rev
313 # 4 bytes: link rev
323 # 4 bytes: link rev
314 # 4 bytes: parent 1 rev
324 # 4 bytes: parent 1 rev
315 # 4 bytes: parent 2 rev
325 # 4 bytes: parent 2 rev
316 # 32 bytes: nodeid
326 # 32 bytes: nodeid
317 indexformatng = struct.Struct(b">Qiiiiii20s12x")
327 indexformatng = struct.Struct(b">Qiiiiii20s12x")
318 indexformatng_pack = indexformatng.pack
328 indexformatng_pack = indexformatng.pack
319 versionformat = struct.Struct(b">I")
329 versionformat = struct.Struct(b">I")
320 versionformat_pack = versionformat.pack
330 versionformat_pack = versionformat.pack
321 versionformat_unpack = versionformat.unpack
331 versionformat_unpack = versionformat.unpack
322
332
323 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
333 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
324 # signed integer)
334 # signed integer)
325 _maxentrysize = 0x7FFFFFFF
335 _maxentrysize = 0x7FFFFFFF
326
336
327
337
328 class revlogio(object):
338 class revlogio(object):
329 def __init__(self):
339 def __init__(self):
330 self.size = indexformatng.size
340 self.size = indexformatng.size
331
341
332 def parseindex(self, data, inline):
342 def parseindex(self, data, inline):
333 # call the C implementation to parse the index data
343 # call the C implementation to parse the index data
334 index, cache = parsers.parse_index2(data, inline)
344 index, cache = parsers.parse_index2(data, inline)
335 return index, cache
345 return index, cache
336
346
337 def packentry(self, entry, node, version, rev):
347 def packentry(self, entry, node, version, rev):
338 p = indexformatng_pack(*entry)
348 p = indexformatng_pack(*entry)
339 if rev == 0:
349 if rev == 0:
340 p = versionformat_pack(version) + p[4:]
350 p = versionformat_pack(version) + p[4:]
341 return p
351 return p
342
352
343
353
344 class revlog(object):
354 class revlog(object):
345 """
355 """
346 the underlying revision storage object
356 the underlying revision storage object
347
357
348 A revlog consists of two parts, an index and the revision data.
358 A revlog consists of two parts, an index and the revision data.
349
359
350 The index is a file with a fixed record size containing
360 The index is a file with a fixed record size containing
351 information on each revision, including its nodeid (hash), the
361 information on each revision, including its nodeid (hash), the
352 nodeids of its parents, the position and offset of its data within
362 nodeids of its parents, the position and offset of its data within
353 the data file, and the revision it's based on. Finally, each entry
363 the data file, and the revision it's based on. Finally, each entry
354 contains a linkrev entry that can serve as a pointer to external
364 contains a linkrev entry that can serve as a pointer to external
355 data.
365 data.
356
366
357 The revision data itself is a linear collection of data chunks.
367 The revision data itself is a linear collection of data chunks.
358 Each chunk represents a revision and is usually represented as a
368 Each chunk represents a revision and is usually represented as a
359 delta against the previous chunk. To bound lookup time, runs of
369 delta against the previous chunk. To bound lookup time, runs of
360 deltas are limited to about 2 times the length of the original
370 deltas are limited to about 2 times the length of the original
361 version data. This makes retrieval of a version proportional to
371 version data. This makes retrieval of a version proportional to
362 its size, or O(1) relative to the number of revisions.
372 its size, or O(1) relative to the number of revisions.
363
373
364 Both pieces of the revlog are written to in an append-only
374 Both pieces of the revlog are written to in an append-only
365 fashion, which means we never need to rewrite a file to insert or
375 fashion, which means we never need to rewrite a file to insert or
366 remove data, and can use some simple techniques to avoid the need
376 remove data, and can use some simple techniques to avoid the need
367 for locking while reading.
377 for locking while reading.
368
378
369 If checkambig, indexfile is opened with checkambig=True at
379 If checkambig, indexfile is opened with checkambig=True at
370 writing, to avoid file stat ambiguity.
380 writing, to avoid file stat ambiguity.
371
381
372 If mmaplargeindex is True, and an mmapindexthreshold is set, the
382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
373 index will be mmapped rather than read if it is larger than the
383 index will be mmapped rather than read if it is larger than the
374 configured threshold.
384 configured threshold.
375
385
376 If censorable is True, the revlog can have censored revisions.
386 If censorable is True, the revlog can have censored revisions.
377
387
378 If `upperboundcomp` is not None, this is the expected maximal gain from
388 If `upperboundcomp` is not None, this is the expected maximal gain from
379 compression for the data content.
389 compression for the data content.
380 """
390 """
381
391
382 _flagserrorclass = error.RevlogError
392 _flagserrorclass = error.RevlogError
383
393
384 def __init__(
394 def __init__(
385 self,
395 self,
386 opener,
396 opener,
387 indexfile,
397 indexfile,
388 datafile=None,
398 datafile=None,
389 checkambig=False,
399 checkambig=False,
390 mmaplargeindex=False,
400 mmaplargeindex=False,
391 censorable=False,
401 censorable=False,
392 upperboundcomp=None,
402 upperboundcomp=None,
393 ):
403 ):
394 """
404 """
395 create a revlog object
405 create a revlog object
396
406
397 opener is a function that abstracts the file opening operation
407 opener is a function that abstracts the file opening operation
398 and can be used to implement COW semantics or the like.
408 and can be used to implement COW semantics or the like.
399
409
400 """
410 """
401 self.upperboundcomp = upperboundcomp
411 self.upperboundcomp = upperboundcomp
402 self.indexfile = indexfile
412 self.indexfile = indexfile
403 self.datafile = datafile or (indexfile[:-2] + b".d")
413 self.datafile = datafile or (indexfile[:-2] + b".d")
404 self.opener = opener
414 self.opener = opener
405 # When True, indexfile is opened with checkambig=True at writing, to
415 # When True, indexfile is opened with checkambig=True at writing, to
406 # avoid file stat ambiguity.
416 # avoid file stat ambiguity.
407 self._checkambig = checkambig
417 self._checkambig = checkambig
408 self._mmaplargeindex = mmaplargeindex
418 self._mmaplargeindex = mmaplargeindex
409 self._censorable = censorable
419 self._censorable = censorable
410 # 3-tuple of (node, rev, text) for a raw revision.
420 # 3-tuple of (node, rev, text) for a raw revision.
411 self._revisioncache = None
421 self._revisioncache = None
412 # Maps rev to chain base rev.
422 # Maps rev to chain base rev.
413 self._chainbasecache = util.lrucachedict(100)
423 self._chainbasecache = util.lrucachedict(100)
414 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
424 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
415 self._chunkcache = (0, b'')
425 self._chunkcache = (0, b'')
416 # How much data to read and cache into the raw revlog data cache.
426 # How much data to read and cache into the raw revlog data cache.
417 self._chunkcachesize = 65536
427 self._chunkcachesize = 65536
418 self._maxchainlen = None
428 self._maxchainlen = None
419 self._deltabothparents = True
429 self._deltabothparents = True
420 self.index = None
430 self.index = None
421 # Mapping of partial identifiers to full nodes.
431 # Mapping of partial identifiers to full nodes.
422 self._pcache = {}
432 self._pcache = {}
423 # Mapping of revision integer to full node.
433 # Mapping of revision integer to full node.
424 self._compengine = b'zlib'
434 self._compengine = b'zlib'
425 self._compengineopts = {}
435 self._compengineopts = {}
426 self._maxdeltachainspan = -1
436 self._maxdeltachainspan = -1
427 self._withsparseread = False
437 self._withsparseread = False
428 self._sparserevlog = False
438 self._sparserevlog = False
429 self._srdensitythreshold = 0.50
439 self._srdensitythreshold = 0.50
430 self._srmingapsize = 262144
440 self._srmingapsize = 262144
431
441
432 # Make copy of flag processors so each revlog instance can support
442 # Make copy of flag processors so each revlog instance can support
433 # custom flags.
443 # custom flags.
434 self._flagprocessors = dict(flagutil.flagprocessors)
444 self._flagprocessors = dict(flagutil.flagprocessors)
435
445
436 # 2-tuple of file handles being used for active writing.
446 # 2-tuple of file handles being used for active writing.
437 self._writinghandles = None
447 self._writinghandles = None
438
448
439 self._loadindex()
449 self._loadindex()
440
450
441 def _loadindex(self):
451 def _loadindex(self):
442 mmapindexthreshold = None
452 mmapindexthreshold = None
443 opts = self.opener.options
453 opts = self.opener.options
444
454
445 if b'revlogv2' in opts:
455 if b'revlogv2' in opts:
446 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
456 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
447 elif b'revlogv1' in opts:
457 elif b'revlogv1' in opts:
448 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
458 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
449 if b'generaldelta' in opts:
459 if b'generaldelta' in opts:
450 newversionflags |= FLAG_GENERALDELTA
460 newversionflags |= FLAG_GENERALDELTA
451 elif b'revlogv0' in self.opener.options:
461 elif b'revlogv0' in self.opener.options:
452 newversionflags = REVLOGV0
462 newversionflags = REVLOGV0
453 else:
463 else:
454 newversionflags = REVLOG_DEFAULT_VERSION
464 newversionflags = REVLOG_DEFAULT_VERSION
455
465
456 if b'chunkcachesize' in opts:
466 if b'chunkcachesize' in opts:
457 self._chunkcachesize = opts[b'chunkcachesize']
467 self._chunkcachesize = opts[b'chunkcachesize']
458 if b'maxchainlen' in opts:
468 if b'maxchainlen' in opts:
459 self._maxchainlen = opts[b'maxchainlen']
469 self._maxchainlen = opts[b'maxchainlen']
460 if b'deltabothparents' in opts:
470 if b'deltabothparents' in opts:
461 self._deltabothparents = opts[b'deltabothparents']
471 self._deltabothparents = opts[b'deltabothparents']
462 self._lazydelta = bool(opts.get(b'lazydelta', True))
472 self._lazydelta = bool(opts.get(b'lazydelta', True))
463 self._lazydeltabase = False
473 self._lazydeltabase = False
464 if self._lazydelta:
474 if self._lazydelta:
465 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
475 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
466 if b'compengine' in opts:
476 if b'compengine' in opts:
467 self._compengine = opts[b'compengine']
477 self._compengine = opts[b'compengine']
468 if b'zlib.level' in opts:
478 if b'zlib.level' in opts:
469 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
479 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
470 if b'zstd.level' in opts:
480 if b'zstd.level' in opts:
471 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
481 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
472 if b'maxdeltachainspan' in opts:
482 if b'maxdeltachainspan' in opts:
473 self._maxdeltachainspan = opts[b'maxdeltachainspan']
483 self._maxdeltachainspan = opts[b'maxdeltachainspan']
474 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
484 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
475 mmapindexthreshold = opts[b'mmapindexthreshold']
485 mmapindexthreshold = opts[b'mmapindexthreshold']
476 self.hassidedata = bool(opts.get(b'side-data', False))
486 self.hassidedata = bool(opts.get(b'side-data', False))
477 if self.hassidedata:
487 if self.hassidedata:
478 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
488 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
479 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
489 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
480 withsparseread = bool(opts.get(b'with-sparse-read', False))
490 withsparseread = bool(opts.get(b'with-sparse-read', False))
481 # sparse-revlog forces sparse-read
491 # sparse-revlog forces sparse-read
482 self._withsparseread = self._sparserevlog or withsparseread
492 self._withsparseread = self._sparserevlog or withsparseread
483 if b'sparse-read-density-threshold' in opts:
493 if b'sparse-read-density-threshold' in opts:
484 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
494 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
485 if b'sparse-read-min-gap-size' in opts:
495 if b'sparse-read-min-gap-size' in opts:
486 self._srmingapsize = opts[b'sparse-read-min-gap-size']
496 self._srmingapsize = opts[b'sparse-read-min-gap-size']
487 if opts.get(b'enableellipsis'):
497 if opts.get(b'enableellipsis'):
488 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
498 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
489
499
490 # revlog v0 doesn't have flag processors
500 # revlog v0 doesn't have flag processors
491 for flag, processor in pycompat.iteritems(
501 for flag, processor in pycompat.iteritems(
492 opts.get(b'flagprocessors', {})
502 opts.get(b'flagprocessors', {})
493 ):
503 ):
494 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
504 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
495
505
496 if self._chunkcachesize <= 0:
506 if self._chunkcachesize <= 0:
497 raise error.RevlogError(
507 raise error.RevlogError(
498 _(b'revlog chunk cache size %r is not greater than 0')
508 _(b'revlog chunk cache size %r is not greater than 0')
499 % self._chunkcachesize
509 % self._chunkcachesize
500 )
510 )
501 elif self._chunkcachesize & (self._chunkcachesize - 1):
511 elif self._chunkcachesize & (self._chunkcachesize - 1):
502 raise error.RevlogError(
512 raise error.RevlogError(
503 _(b'revlog chunk cache size %r is not a power of 2')
513 _(b'revlog chunk cache size %r is not a power of 2')
504 % self._chunkcachesize
514 % self._chunkcachesize
505 )
515 )
506
516
507 indexdata = b''
517 indexdata = b''
508 self._initempty = True
518 self._initempty = True
509 try:
519 try:
510 with self._indexfp() as f:
520 with self._indexfp() as f:
511 if (
521 if (
512 mmapindexthreshold is not None
522 mmapindexthreshold is not None
513 and self.opener.fstat(f).st_size >= mmapindexthreshold
523 and self.opener.fstat(f).st_size >= mmapindexthreshold
514 ):
524 ):
515 # TODO: should .close() to release resources without
525 # TODO: should .close() to release resources without
516 # relying on Python GC
526 # relying on Python GC
517 indexdata = util.buffer(util.mmapread(f))
527 indexdata = util.buffer(util.mmapread(f))
518 else:
528 else:
519 indexdata = f.read()
529 indexdata = f.read()
520 if len(indexdata) > 0:
530 if len(indexdata) > 0:
521 versionflags = versionformat_unpack(indexdata[:4])[0]
531 versionflags = versionformat_unpack(indexdata[:4])[0]
522 self._initempty = False
532 self._initempty = False
523 else:
533 else:
524 versionflags = newversionflags
534 versionflags = newversionflags
525 except IOError as inst:
535 except IOError as inst:
526 if inst.errno != errno.ENOENT:
536 if inst.errno != errno.ENOENT:
527 raise
537 raise
528
538
529 versionflags = newversionflags
539 versionflags = newversionflags
530
540
531 self.version = versionflags
541 self.version = versionflags
532
542
533 flags = versionflags & ~0xFFFF
543 flags = versionflags & ~0xFFFF
534 fmt = versionflags & 0xFFFF
544 fmt = versionflags & 0xFFFF
535
545
536 if fmt == REVLOGV0:
546 if fmt == REVLOGV0:
537 if flags:
547 if flags:
538 raise error.RevlogError(
548 raise error.RevlogError(
539 _(b'unknown flags (%#04x) in version %d revlog %s')
549 _(b'unknown flags (%#04x) in version %d revlog %s')
540 % (flags >> 16, fmt, self.indexfile)
550 % (flags >> 16, fmt, self.indexfile)
541 )
551 )
542
552
543 self._inline = False
553 self._inline = False
544 self._generaldelta = False
554 self._generaldelta = False
545
555
546 elif fmt == REVLOGV1:
556 elif fmt == REVLOGV1:
547 if flags & ~REVLOGV1_FLAGS:
557 if flags & ~REVLOGV1_FLAGS:
548 raise error.RevlogError(
558 raise error.RevlogError(
549 _(b'unknown flags (%#04x) in version %d revlog %s')
559 _(b'unknown flags (%#04x) in version %d revlog %s')
550 % (flags >> 16, fmt, self.indexfile)
560 % (flags >> 16, fmt, self.indexfile)
551 )
561 )
552
562
553 self._inline = versionflags & FLAG_INLINE_DATA
563 self._inline = versionflags & FLAG_INLINE_DATA
554 self._generaldelta = versionflags & FLAG_GENERALDELTA
564 self._generaldelta = versionflags & FLAG_GENERALDELTA
555
565
556 elif fmt == REVLOGV2:
566 elif fmt == REVLOGV2:
557 if flags & ~REVLOGV2_FLAGS:
567 if flags & ~REVLOGV2_FLAGS:
558 raise error.RevlogError(
568 raise error.RevlogError(
559 _(b'unknown flags (%#04x) in version %d revlog %s')
569 _(b'unknown flags (%#04x) in version %d revlog %s')
560 % (flags >> 16, fmt, self.indexfile)
570 % (flags >> 16, fmt, self.indexfile)
561 )
571 )
562
572
563 self._inline = versionflags & FLAG_INLINE_DATA
573 self._inline = versionflags & FLAG_INLINE_DATA
564 # generaldelta implied by version 2 revlogs.
574 # generaldelta implied by version 2 revlogs.
565 self._generaldelta = True
575 self._generaldelta = True
566
576
567 else:
577 else:
568 raise error.RevlogError(
578 raise error.RevlogError(
569 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
579 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
570 )
580 )
571 # sparse-revlog can't be on without general-delta (issue6056)
581 # sparse-revlog can't be on without general-delta (issue6056)
572 if not self._generaldelta:
582 if not self._generaldelta:
573 self._sparserevlog = False
583 self._sparserevlog = False
574
584
575 self._storedeltachains = True
585 self._storedeltachains = True
576
586
577 self._io = revlogio()
587 self._io = revlogio()
578 if self.version == REVLOGV0:
588 if self.version == REVLOGV0:
579 self._io = revlogoldio()
589 self._io = revlogoldio()
580 try:
590 try:
581 d = self._io.parseindex(indexdata, self._inline)
591 d = self._io.parseindex(indexdata, self._inline)
582 except (ValueError, IndexError):
592 except (ValueError, IndexError):
583 raise error.RevlogError(
593 raise error.RevlogError(
584 _(b"index %s is corrupted") % self.indexfile
594 _(b"index %s is corrupted") % self.indexfile
585 )
595 )
586 self.index, self._chunkcache = d
596 self.index, self._chunkcache = d
587 if not self._chunkcache:
597 if not self._chunkcache:
588 self._chunkclear()
598 self._chunkclear()
589 # revnum -> (chain-length, sum-delta-length)
599 # revnum -> (chain-length, sum-delta-length)
590 self._chaininfocache = {}
600 self._chaininfocache = {}
591 # revlog header -> revlog compressor
601 # revlog header -> revlog compressor
592 self._decompressors = {}
602 self._decompressors = {}
593
603
594 @util.propertycache
604 @util.propertycache
595 def _compressor(self):
605 def _compressor(self):
596 engine = util.compengines[self._compengine]
606 engine = util.compengines[self._compengine]
597 return engine.revlogcompressor(self._compengineopts)
607 return engine.revlogcompressor(self._compengineopts)
598
608
599 def _indexfp(self, mode=b'r'):
609 def _indexfp(self, mode=b'r'):
600 """file object for the revlog's index file"""
610 """file object for the revlog's index file"""
601 args = {'mode': mode}
611 args = {'mode': mode}
602 if mode != b'r':
612 if mode != b'r':
603 args['checkambig'] = self._checkambig
613 args['checkambig'] = self._checkambig
604 if mode == b'w':
614 if mode == b'w':
605 args['atomictemp'] = True
615 args['atomictemp'] = True
606 return self.opener(self.indexfile, **args)
616 return self.opener(self.indexfile, **args)
607
617
608 def _datafp(self, mode=b'r'):
618 def _datafp(self, mode=b'r'):
609 """file object for the revlog's data file"""
619 """file object for the revlog's data file"""
610 return self.opener(self.datafile, mode=mode)
620 return self.opener(self.datafile, mode=mode)
611
621
612 @contextlib.contextmanager
622 @contextlib.contextmanager
613 def _datareadfp(self, existingfp=None):
623 def _datareadfp(self, existingfp=None):
614 """file object suitable to read data"""
624 """file object suitable to read data"""
615 # Use explicit file handle, if given.
625 # Use explicit file handle, if given.
616 if existingfp is not None:
626 if existingfp is not None:
617 yield existingfp
627 yield existingfp
618
628
619 # Use a file handle being actively used for writes, if available.
629 # Use a file handle being actively used for writes, if available.
620 # There is some danger to doing this because reads will seek the
630 # There is some danger to doing this because reads will seek the
621 # file. However, _writeentry() performs a SEEK_END before all writes,
631 # file. However, _writeentry() performs a SEEK_END before all writes,
622 # so we should be safe.
632 # so we should be safe.
623 elif self._writinghandles:
633 elif self._writinghandles:
624 if self._inline:
634 if self._inline:
625 yield self._writinghandles[0]
635 yield self._writinghandles[0]
626 else:
636 else:
627 yield self._writinghandles[1]
637 yield self._writinghandles[1]
628
638
629 # Otherwise open a new file handle.
639 # Otherwise open a new file handle.
630 else:
640 else:
631 if self._inline:
641 if self._inline:
632 func = self._indexfp
642 func = self._indexfp
633 else:
643 else:
634 func = self._datafp
644 func = self._datafp
635 with func() as fp:
645 with func() as fp:
636 yield fp
646 yield fp
637
647
638 def tiprev(self):
648 def tiprev(self):
639 return len(self.index) - 1
649 return len(self.index) - 1
640
650
641 def tip(self):
651 def tip(self):
642 return self.node(self.tiprev())
652 return self.node(self.tiprev())
643
653
644 def __contains__(self, rev):
654 def __contains__(self, rev):
645 return 0 <= rev < len(self)
655 return 0 <= rev < len(self)
646
656
647 def __len__(self):
657 def __len__(self):
648 return len(self.index)
658 return len(self.index)
649
659
650 def __iter__(self):
660 def __iter__(self):
651 return iter(pycompat.xrange(len(self)))
661 return iter(pycompat.xrange(len(self)))
652
662
653 def revs(self, start=0, stop=None):
663 def revs(self, start=0, stop=None):
654 """iterate over all rev in this revlog (from start to stop)"""
664 """iterate over all rev in this revlog (from start to stop)"""
655 return storageutil.iterrevs(len(self), start=start, stop=stop)
665 return storageutil.iterrevs(len(self), start=start, stop=stop)
656
666
657 @property
667 @property
658 def nodemap(self):
668 def nodemap(self):
659 msg = (
669 msg = (
660 b"revlog.nodemap is deprecated, "
670 b"revlog.nodemap is deprecated, "
661 b"use revlog.index.[has_node|rev|get_rev]"
671 b"use revlog.index.[has_node|rev|get_rev]"
662 )
672 )
663 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
673 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
664 return self.index.nodemap
674 return self.index.nodemap
665
675
666 @property
676 @property
667 def _nodecache(self):
677 def _nodecache(self):
668 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
678 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
669 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
679 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
670 return self.index.nodemap
680 return self.index.nodemap
671
681
672 def hasnode(self, node):
682 def hasnode(self, node):
673 try:
683 try:
674 self.rev(node)
684 self.rev(node)
675 return True
685 return True
676 except KeyError:
686 except KeyError:
677 return False
687 return False
678
688
679 def candelta(self, baserev, rev):
689 def candelta(self, baserev, rev):
680 """whether two revisions (baserev, rev) can be delta-ed or not"""
690 """whether two revisions (baserev, rev) can be delta-ed or not"""
681 # Disable delta if either rev requires a content-changing flag
691 # Disable delta if either rev requires a content-changing flag
682 # processor (ex. LFS). This is because such flag processor can alter
692 # processor (ex. LFS). This is because such flag processor can alter
683 # the rawtext content that the delta will be based on, and two clients
693 # the rawtext content that the delta will be based on, and two clients
684 # could have a same revlog node with different flags (i.e. different
694 # could have a same revlog node with different flags (i.e. different
685 # rawtext contents) and the delta could be incompatible.
695 # rawtext contents) and the delta could be incompatible.
686 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
696 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
687 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
697 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
688 ):
698 ):
689 return False
699 return False
690 return True
700 return True
691
701
692 def clearcaches(self):
702 def clearcaches(self):
693 self._revisioncache = None
703 self._revisioncache = None
694 self._chainbasecache.clear()
704 self._chainbasecache.clear()
695 self._chunkcache = (0, b'')
705 self._chunkcache = (0, b'')
696 self._pcache = {}
706 self._pcache = {}
697 self.index.clearcaches()
707 self.index.clearcaches()
698
708
699 def rev(self, node):
709 def rev(self, node):
700 try:
710 try:
701 return self.index.rev(node)
711 return self.index.rev(node)
702 except TypeError:
712 except TypeError:
703 raise
713 raise
704 except error.RevlogError:
714 except error.RevlogError:
705 # parsers.c radix tree lookup failed
715 # parsers.c radix tree lookup failed
706 if node == wdirid or node in wdirfilenodeids:
716 if node == wdirid or node in wdirfilenodeids:
707 raise error.WdirUnsupported
717 raise error.WdirUnsupported
708 raise error.LookupError(node, self.indexfile, _(b'no node'))
718 raise error.LookupError(node, self.indexfile, _(b'no node'))
709
719
710 # Accessors for index entries.
720 # Accessors for index entries.
711
721
712 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
722 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
713 # are flags.
723 # are flags.
714 def start(self, rev):
724 def start(self, rev):
715 return int(self.index[rev][0] >> 16)
725 return int(self.index[rev][0] >> 16)
716
726
717 def flags(self, rev):
727 def flags(self, rev):
718 return self.index[rev][0] & 0xFFFF
728 return self.index[rev][0] & 0xFFFF
719
729
720 def length(self, rev):
730 def length(self, rev):
721 return self.index[rev][1]
731 return self.index[rev][1]
722
732
723 def rawsize(self, rev):
733 def rawsize(self, rev):
724 """return the length of the uncompressed text for a given revision"""
734 """return the length of the uncompressed text for a given revision"""
725 l = self.index[rev][2]
735 l = self.index[rev][2]
726 if l >= 0:
736 if l >= 0:
727 return l
737 return l
728
738
729 t = self.rawdata(rev)
739 t = self.rawdata(rev)
730 return len(t)
740 return len(t)
731
741
732 def size(self, rev):
742 def size(self, rev):
733 """length of non-raw text (processed by a "read" flag processor)"""
743 """length of non-raw text (processed by a "read" flag processor)"""
734 # fast path: if no "read" flag processor could change the content,
744 # fast path: if no "read" flag processor could change the content,
735 # size is rawsize. note: ELLIPSIS is known to not change the content.
745 # size is rawsize. note: ELLIPSIS is known to not change the content.
736 flags = self.flags(rev)
746 flags = self.flags(rev)
737 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
747 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
738 return self.rawsize(rev)
748 return self.rawsize(rev)
739
749
740 return len(self.revision(rev, raw=False))
750 return len(self.revision(rev, raw=False))
741
751
742 def chainbase(self, rev):
752 def chainbase(self, rev):
743 base = self._chainbasecache.get(rev)
753 base = self._chainbasecache.get(rev)
744 if base is not None:
754 if base is not None:
745 return base
755 return base
746
756
747 index = self.index
757 index = self.index
748 iterrev = rev
758 iterrev = rev
749 base = index[iterrev][3]
759 base = index[iterrev][3]
750 while base != iterrev:
760 while base != iterrev:
751 iterrev = base
761 iterrev = base
752 base = index[iterrev][3]
762 base = index[iterrev][3]
753
763
754 self._chainbasecache[rev] = base
764 self._chainbasecache[rev] = base
755 return base
765 return base
756
766
757 def linkrev(self, rev):
767 def linkrev(self, rev):
758 return self.index[rev][4]
768 return self.index[rev][4]
759
769
760 def parentrevs(self, rev):
770 def parentrevs(self, rev):
761 try:
771 try:
762 entry = self.index[rev]
772 entry = self.index[rev]
763 except IndexError:
773 except IndexError:
764 if rev == wdirrev:
774 if rev == wdirrev:
765 raise error.WdirUnsupported
775 raise error.WdirUnsupported
766 raise
776 raise
767
777
768 return entry[5], entry[6]
778 return entry[5], entry[6]
769
779
770 # fast parentrevs(rev) where rev isn't filtered
780 # fast parentrevs(rev) where rev isn't filtered
771 _uncheckedparentrevs = parentrevs
781 _uncheckedparentrevs = parentrevs
772
782
773 def node(self, rev):
783 def node(self, rev):
774 try:
784 try:
775 return self.index[rev][7]
785 return self.index[rev][7]
776 except IndexError:
786 except IndexError:
777 if rev == wdirrev:
787 if rev == wdirrev:
778 raise error.WdirUnsupported
788 raise error.WdirUnsupported
779 raise
789 raise
780
790
781 # Derived from index values.
791 # Derived from index values.
782
792
783 def end(self, rev):
793 def end(self, rev):
784 return self.start(rev) + self.length(rev)
794 return self.start(rev) + self.length(rev)
785
795
786 def parents(self, node):
796 def parents(self, node):
787 i = self.index
797 i = self.index
788 d = i[self.rev(node)]
798 d = i[self.rev(node)]
789 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
799 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
790
800
791 def chainlen(self, rev):
801 def chainlen(self, rev):
792 return self._chaininfo(rev)[0]
802 return self._chaininfo(rev)[0]
793
803
794 def _chaininfo(self, rev):
804 def _chaininfo(self, rev):
795 chaininfocache = self._chaininfocache
805 chaininfocache = self._chaininfocache
796 if rev in chaininfocache:
806 if rev in chaininfocache:
797 return chaininfocache[rev]
807 return chaininfocache[rev]
798 index = self.index
808 index = self.index
799 generaldelta = self._generaldelta
809 generaldelta = self._generaldelta
800 iterrev = rev
810 iterrev = rev
801 e = index[iterrev]
811 e = index[iterrev]
802 clen = 0
812 clen = 0
803 compresseddeltalen = 0
813 compresseddeltalen = 0
804 while iterrev != e[3]:
814 while iterrev != e[3]:
805 clen += 1
815 clen += 1
806 compresseddeltalen += e[1]
816 compresseddeltalen += e[1]
807 if generaldelta:
817 if generaldelta:
808 iterrev = e[3]
818 iterrev = e[3]
809 else:
819 else:
810 iterrev -= 1
820 iterrev -= 1
811 if iterrev in chaininfocache:
821 if iterrev in chaininfocache:
812 t = chaininfocache[iterrev]
822 t = chaininfocache[iterrev]
813 clen += t[0]
823 clen += t[0]
814 compresseddeltalen += t[1]
824 compresseddeltalen += t[1]
815 break
825 break
816 e = index[iterrev]
826 e = index[iterrev]
817 else:
827 else:
818 # Add text length of base since decompressing that also takes
828 # Add text length of base since decompressing that also takes
819 # work. For cache hits the length is already included.
829 # work. For cache hits the length is already included.
820 compresseddeltalen += e[1]
830 compresseddeltalen += e[1]
821 r = (clen, compresseddeltalen)
831 r = (clen, compresseddeltalen)
822 chaininfocache[rev] = r
832 chaininfocache[rev] = r
823 return r
833 return r
824
834
825 def _deltachain(self, rev, stoprev=None):
835 def _deltachain(self, rev, stoprev=None):
826 """Obtain the delta chain for a revision.
836 """Obtain the delta chain for a revision.
827
837
828 ``stoprev`` specifies a revision to stop at. If not specified, we
838 ``stoprev`` specifies a revision to stop at. If not specified, we
829 stop at the base of the chain.
839 stop at the base of the chain.
830
840
831 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
841 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
832 revs in ascending order and ``stopped`` is a bool indicating whether
842 revs in ascending order and ``stopped`` is a bool indicating whether
833 ``stoprev`` was hit.
843 ``stoprev`` was hit.
834 """
844 """
835 # Try C implementation.
845 # Try C implementation.
836 try:
846 try:
837 return self.index.deltachain(rev, stoprev, self._generaldelta)
847 return self.index.deltachain(rev, stoprev, self._generaldelta)
838 except AttributeError:
848 except AttributeError:
839 pass
849 pass
840
850
841 chain = []
851 chain = []
842
852
843 # Alias to prevent attribute lookup in tight loop.
853 # Alias to prevent attribute lookup in tight loop.
844 index = self.index
854 index = self.index
845 generaldelta = self._generaldelta
855 generaldelta = self._generaldelta
846
856
847 iterrev = rev
857 iterrev = rev
848 e = index[iterrev]
858 e = index[iterrev]
849 while iterrev != e[3] and iterrev != stoprev:
859 while iterrev != e[3] and iterrev != stoprev:
850 chain.append(iterrev)
860 chain.append(iterrev)
851 if generaldelta:
861 if generaldelta:
852 iterrev = e[3]
862 iterrev = e[3]
853 else:
863 else:
854 iterrev -= 1
864 iterrev -= 1
855 e = index[iterrev]
865 e = index[iterrev]
856
866
857 if iterrev == stoprev:
867 if iterrev == stoprev:
858 stopped = True
868 stopped = True
859 else:
869 else:
860 chain.append(iterrev)
870 chain.append(iterrev)
861 stopped = False
871 stopped = False
862
872
863 chain.reverse()
873 chain.reverse()
864 return chain, stopped
874 return chain, stopped
865
875
866 def ancestors(self, revs, stoprev=0, inclusive=False):
876 def ancestors(self, revs, stoprev=0, inclusive=False):
867 """Generate the ancestors of 'revs' in reverse revision order.
877 """Generate the ancestors of 'revs' in reverse revision order.
868 Does not generate revs lower than stoprev.
878 Does not generate revs lower than stoprev.
869
879
870 See the documentation for ancestor.lazyancestors for more details."""
880 See the documentation for ancestor.lazyancestors for more details."""
871
881
872 # first, make sure start revisions aren't filtered
882 # first, make sure start revisions aren't filtered
873 revs = list(revs)
883 revs = list(revs)
874 checkrev = self.node
884 checkrev = self.node
875 for r in revs:
885 for r in revs:
876 checkrev(r)
886 checkrev(r)
877 # and we're sure ancestors aren't filtered as well
887 # and we're sure ancestors aren't filtered as well
878
888
879 if rustancestor is not None:
889 if rustancestor is not None:
880 lazyancestors = rustancestor.LazyAncestors
890 lazyancestors = rustancestor.LazyAncestors
881 arg = self.index
891 arg = self.index
882 elif util.safehasattr(parsers, b'rustlazyancestors'):
892 elif util.safehasattr(parsers, b'rustlazyancestors'):
883 lazyancestors = ancestor.rustlazyancestors
893 lazyancestors = ancestor.rustlazyancestors
884 arg = self.index
894 arg = self.index
885 else:
895 else:
886 lazyancestors = ancestor.lazyancestors
896 lazyancestors = ancestor.lazyancestors
887 arg = self._uncheckedparentrevs
897 arg = self._uncheckedparentrevs
888 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
898 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
889
899
890 def descendants(self, revs):
900 def descendants(self, revs):
891 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
901 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
892
902
893 def findcommonmissing(self, common=None, heads=None):
903 def findcommonmissing(self, common=None, heads=None):
894 """Return a tuple of the ancestors of common and the ancestors of heads
904 """Return a tuple of the ancestors of common and the ancestors of heads
895 that are not ancestors of common. In revset terminology, we return the
905 that are not ancestors of common. In revset terminology, we return the
896 tuple:
906 tuple:
897
907
898 ::common, (::heads) - (::common)
908 ::common, (::heads) - (::common)
899
909
900 The list is sorted by revision number, meaning it is
910 The list is sorted by revision number, meaning it is
901 topologically sorted.
911 topologically sorted.
902
912
903 'heads' and 'common' are both lists of node IDs. If heads is
913 'heads' and 'common' are both lists of node IDs. If heads is
904 not supplied, uses all of the revlog's heads. If common is not
914 not supplied, uses all of the revlog's heads. If common is not
905 supplied, uses nullid."""
915 supplied, uses nullid."""
906 if common is None:
916 if common is None:
907 common = [nullid]
917 common = [nullid]
908 if heads is None:
918 if heads is None:
909 heads = self.heads()
919 heads = self.heads()
910
920
911 common = [self.rev(n) for n in common]
921 common = [self.rev(n) for n in common]
912 heads = [self.rev(n) for n in heads]
922 heads = [self.rev(n) for n in heads]
913
923
914 # we want the ancestors, but inclusive
924 # we want the ancestors, but inclusive
915 class lazyset(object):
925 class lazyset(object):
916 def __init__(self, lazyvalues):
926 def __init__(self, lazyvalues):
917 self.addedvalues = set()
927 self.addedvalues = set()
918 self.lazyvalues = lazyvalues
928 self.lazyvalues = lazyvalues
919
929
920 def __contains__(self, value):
930 def __contains__(self, value):
921 return value in self.addedvalues or value in self.lazyvalues
931 return value in self.addedvalues or value in self.lazyvalues
922
932
923 def __iter__(self):
933 def __iter__(self):
924 added = self.addedvalues
934 added = self.addedvalues
925 for r in added:
935 for r in added:
926 yield r
936 yield r
927 for r in self.lazyvalues:
937 for r in self.lazyvalues:
928 if not r in added:
938 if not r in added:
929 yield r
939 yield r
930
940
931 def add(self, value):
941 def add(self, value):
932 self.addedvalues.add(value)
942 self.addedvalues.add(value)
933
943
934 def update(self, values):
944 def update(self, values):
935 self.addedvalues.update(values)
945 self.addedvalues.update(values)
936
946
937 has = lazyset(self.ancestors(common))
947 has = lazyset(self.ancestors(common))
938 has.add(nullrev)
948 has.add(nullrev)
939 has.update(common)
949 has.update(common)
940
950
941 # take all ancestors from heads that aren't in has
951 # take all ancestors from heads that aren't in has
942 missing = set()
952 missing = set()
943 visit = collections.deque(r for r in heads if r not in has)
953 visit = collections.deque(r for r in heads if r not in has)
944 while visit:
954 while visit:
945 r = visit.popleft()
955 r = visit.popleft()
946 if r in missing:
956 if r in missing:
947 continue
957 continue
948 else:
958 else:
949 missing.add(r)
959 missing.add(r)
950 for p in self.parentrevs(r):
960 for p in self.parentrevs(r):
951 if p not in has:
961 if p not in has:
952 visit.append(p)
962 visit.append(p)
953 missing = list(missing)
963 missing = list(missing)
954 missing.sort()
964 missing.sort()
955 return has, [self.node(miss) for miss in missing]
965 return has, [self.node(miss) for miss in missing]
956
966
957 def incrementalmissingrevs(self, common=None):
967 def incrementalmissingrevs(self, common=None):
958 """Return an object that can be used to incrementally compute the
968 """Return an object that can be used to incrementally compute the
959 revision numbers of the ancestors of arbitrary sets that are not
969 revision numbers of the ancestors of arbitrary sets that are not
960 ancestors of common. This is an ancestor.incrementalmissingancestors
970 ancestors of common. This is an ancestor.incrementalmissingancestors
961 object.
971 object.
962
972
963 'common' is a list of revision numbers. If common is not supplied, uses
973 'common' is a list of revision numbers. If common is not supplied, uses
964 nullrev.
974 nullrev.
965 """
975 """
966 if common is None:
976 if common is None:
967 common = [nullrev]
977 common = [nullrev]
968
978
969 if rustancestor is not None:
979 if rustancestor is not None:
970 return rustancestor.MissingAncestors(self.index, common)
980 return rustancestor.MissingAncestors(self.index, common)
971 return ancestor.incrementalmissingancestors(self.parentrevs, common)
981 return ancestor.incrementalmissingancestors(self.parentrevs, common)
972
982
973 def findmissingrevs(self, common=None, heads=None):
983 def findmissingrevs(self, common=None, heads=None):
974 """Return the revision numbers of the ancestors of heads that
984 """Return the revision numbers of the ancestors of heads that
975 are not ancestors of common.
985 are not ancestors of common.
976
986
977 More specifically, return a list of revision numbers corresponding to
987 More specifically, return a list of revision numbers corresponding to
978 nodes N such that every N satisfies the following constraints:
988 nodes N such that every N satisfies the following constraints:
979
989
980 1. N is an ancestor of some node in 'heads'
990 1. N is an ancestor of some node in 'heads'
981 2. N is not an ancestor of any node in 'common'
991 2. N is not an ancestor of any node in 'common'
982
992
983 The list is sorted by revision number, meaning it is
993 The list is sorted by revision number, meaning it is
984 topologically sorted.
994 topologically sorted.
985
995
986 'heads' and 'common' are both lists of revision numbers. If heads is
996 'heads' and 'common' are both lists of revision numbers. If heads is
987 not supplied, uses all of the revlog's heads. If common is not
997 not supplied, uses all of the revlog's heads. If common is not
988 supplied, uses nullid."""
998 supplied, uses nullid."""
989 if common is None:
999 if common is None:
990 common = [nullrev]
1000 common = [nullrev]
991 if heads is None:
1001 if heads is None:
992 heads = self.headrevs()
1002 heads = self.headrevs()
993
1003
994 inc = self.incrementalmissingrevs(common=common)
1004 inc = self.incrementalmissingrevs(common=common)
995 return inc.missingancestors(heads)
1005 return inc.missingancestors(heads)
996
1006
997 def findmissing(self, common=None, heads=None):
1007 def findmissing(self, common=None, heads=None):
998 """Return the ancestors of heads that are not ancestors of common.
1008 """Return the ancestors of heads that are not ancestors of common.
999
1009
1000 More specifically, return a list of nodes N such that every N
1010 More specifically, return a list of nodes N such that every N
1001 satisfies the following constraints:
1011 satisfies the following constraints:
1002
1012
1003 1. N is an ancestor of some node in 'heads'
1013 1. N is an ancestor of some node in 'heads'
1004 2. N is not an ancestor of any node in 'common'
1014 2. N is not an ancestor of any node in 'common'
1005
1015
1006 The list is sorted by revision number, meaning it is
1016 The list is sorted by revision number, meaning it is
1007 topologically sorted.
1017 topologically sorted.
1008
1018
1009 'heads' and 'common' are both lists of node IDs. If heads is
1019 'heads' and 'common' are both lists of node IDs. If heads is
1010 not supplied, uses all of the revlog's heads. If common is not
1020 not supplied, uses all of the revlog's heads. If common is not
1011 supplied, uses nullid."""
1021 supplied, uses nullid."""
1012 if common is None:
1022 if common is None:
1013 common = [nullid]
1023 common = [nullid]
1014 if heads is None:
1024 if heads is None:
1015 heads = self.heads()
1025 heads = self.heads()
1016
1026
1017 common = [self.rev(n) for n in common]
1027 common = [self.rev(n) for n in common]
1018 heads = [self.rev(n) for n in heads]
1028 heads = [self.rev(n) for n in heads]
1019
1029
1020 inc = self.incrementalmissingrevs(common=common)
1030 inc = self.incrementalmissingrevs(common=common)
1021 return [self.node(r) for r in inc.missingancestors(heads)]
1031 return [self.node(r) for r in inc.missingancestors(heads)]
1022
1032
1023 def nodesbetween(self, roots=None, heads=None):
1033 def nodesbetween(self, roots=None, heads=None):
1024 """Return a topological path from 'roots' to 'heads'.
1034 """Return a topological path from 'roots' to 'heads'.
1025
1035
1026 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1036 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1027 topologically sorted list of all nodes N that satisfy both of
1037 topologically sorted list of all nodes N that satisfy both of
1028 these constraints:
1038 these constraints:
1029
1039
1030 1. N is a descendant of some node in 'roots'
1040 1. N is a descendant of some node in 'roots'
1031 2. N is an ancestor of some node in 'heads'
1041 2. N is an ancestor of some node in 'heads'
1032
1042
1033 Every node is considered to be both a descendant and an ancestor
1043 Every node is considered to be both a descendant and an ancestor
1034 of itself, so every reachable node in 'roots' and 'heads' will be
1044 of itself, so every reachable node in 'roots' and 'heads' will be
1035 included in 'nodes'.
1045 included in 'nodes'.
1036
1046
1037 'outroots' is the list of reachable nodes in 'roots', i.e., the
1047 'outroots' is the list of reachable nodes in 'roots', i.e., the
1038 subset of 'roots' that is returned in 'nodes'. Likewise,
1048 subset of 'roots' that is returned in 'nodes'. Likewise,
1039 'outheads' is the subset of 'heads' that is also in 'nodes'.
1049 'outheads' is the subset of 'heads' that is also in 'nodes'.
1040
1050
1041 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1051 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1042 unspecified, uses nullid as the only root. If 'heads' is
1052 unspecified, uses nullid as the only root. If 'heads' is
1043 unspecified, uses list of all of the revlog's heads."""
1053 unspecified, uses list of all of the revlog's heads."""
1044 nonodes = ([], [], [])
1054 nonodes = ([], [], [])
1045 if roots is not None:
1055 if roots is not None:
1046 roots = list(roots)
1056 roots = list(roots)
1047 if not roots:
1057 if not roots:
1048 return nonodes
1058 return nonodes
1049 lowestrev = min([self.rev(n) for n in roots])
1059 lowestrev = min([self.rev(n) for n in roots])
1050 else:
1060 else:
1051 roots = [nullid] # Everybody's a descendant of nullid
1061 roots = [nullid] # Everybody's a descendant of nullid
1052 lowestrev = nullrev
1062 lowestrev = nullrev
1053 if (lowestrev == nullrev) and (heads is None):
1063 if (lowestrev == nullrev) and (heads is None):
1054 # We want _all_ the nodes!
1064 # We want _all_ the nodes!
1055 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1065 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1056 if heads is None:
1066 if heads is None:
1057 # All nodes are ancestors, so the latest ancestor is the last
1067 # All nodes are ancestors, so the latest ancestor is the last
1058 # node.
1068 # node.
1059 highestrev = len(self) - 1
1069 highestrev = len(self) - 1
1060 # Set ancestors to None to signal that every node is an ancestor.
1070 # Set ancestors to None to signal that every node is an ancestor.
1061 ancestors = None
1071 ancestors = None
1062 # Set heads to an empty dictionary for later discovery of heads
1072 # Set heads to an empty dictionary for later discovery of heads
1063 heads = {}
1073 heads = {}
1064 else:
1074 else:
1065 heads = list(heads)
1075 heads = list(heads)
1066 if not heads:
1076 if not heads:
1067 return nonodes
1077 return nonodes
1068 ancestors = set()
1078 ancestors = set()
1069 # Turn heads into a dictionary so we can remove 'fake' heads.
1079 # Turn heads into a dictionary so we can remove 'fake' heads.
1070 # Also, later we will be using it to filter out the heads we can't
1080 # Also, later we will be using it to filter out the heads we can't
1071 # find from roots.
1081 # find from roots.
1072 heads = dict.fromkeys(heads, False)
1082 heads = dict.fromkeys(heads, False)
1073 # Start at the top and keep marking parents until we're done.
1083 # Start at the top and keep marking parents until we're done.
1074 nodestotag = set(heads)
1084 nodestotag = set(heads)
1075 # Remember where the top was so we can use it as a limit later.
1085 # Remember where the top was so we can use it as a limit later.
1076 highestrev = max([self.rev(n) for n in nodestotag])
1086 highestrev = max([self.rev(n) for n in nodestotag])
1077 while nodestotag:
1087 while nodestotag:
1078 # grab a node to tag
1088 # grab a node to tag
1079 n = nodestotag.pop()
1089 n = nodestotag.pop()
1080 # Never tag nullid
1090 # Never tag nullid
1081 if n == nullid:
1091 if n == nullid:
1082 continue
1092 continue
1083 # A node's revision number represents its place in a
1093 # A node's revision number represents its place in a
1084 # topologically sorted list of nodes.
1094 # topologically sorted list of nodes.
1085 r = self.rev(n)
1095 r = self.rev(n)
1086 if r >= lowestrev:
1096 if r >= lowestrev:
1087 if n not in ancestors:
1097 if n not in ancestors:
1088 # If we are possibly a descendant of one of the roots
1098 # If we are possibly a descendant of one of the roots
1089 # and we haven't already been marked as an ancestor
1099 # and we haven't already been marked as an ancestor
1090 ancestors.add(n) # Mark as ancestor
1100 ancestors.add(n) # Mark as ancestor
1091 # Add non-nullid parents to list of nodes to tag.
1101 # Add non-nullid parents to list of nodes to tag.
1092 nodestotag.update(
1102 nodestotag.update(
1093 [p for p in self.parents(n) if p != nullid]
1103 [p for p in self.parents(n) if p != nullid]
1094 )
1104 )
1095 elif n in heads: # We've seen it before, is it a fake head?
1105 elif n in heads: # We've seen it before, is it a fake head?
1096 # So it is, real heads should not be the ancestors of
1106 # So it is, real heads should not be the ancestors of
1097 # any other heads.
1107 # any other heads.
1098 heads.pop(n)
1108 heads.pop(n)
1099 if not ancestors:
1109 if not ancestors:
1100 return nonodes
1110 return nonodes
1101 # Now that we have our set of ancestors, we want to remove any
1111 # Now that we have our set of ancestors, we want to remove any
1102 # roots that are not ancestors.
1112 # roots that are not ancestors.
1103
1113
1104 # If one of the roots was nullid, everything is included anyway.
1114 # If one of the roots was nullid, everything is included anyway.
1105 if lowestrev > nullrev:
1115 if lowestrev > nullrev:
1106 # But, since we weren't, let's recompute the lowest rev to not
1116 # But, since we weren't, let's recompute the lowest rev to not
1107 # include roots that aren't ancestors.
1117 # include roots that aren't ancestors.
1108
1118
1109 # Filter out roots that aren't ancestors of heads
1119 # Filter out roots that aren't ancestors of heads
1110 roots = [root for root in roots if root in ancestors]
1120 roots = [root for root in roots if root in ancestors]
1111 # Recompute the lowest revision
1121 # Recompute the lowest revision
1112 if roots:
1122 if roots:
1113 lowestrev = min([self.rev(root) for root in roots])
1123 lowestrev = min([self.rev(root) for root in roots])
1114 else:
1124 else:
1115 # No more roots? Return empty list
1125 # No more roots? Return empty list
1116 return nonodes
1126 return nonodes
1117 else:
1127 else:
1118 # We are descending from nullid, and don't need to care about
1128 # We are descending from nullid, and don't need to care about
1119 # any other roots.
1129 # any other roots.
1120 lowestrev = nullrev
1130 lowestrev = nullrev
1121 roots = [nullid]
1131 roots = [nullid]
1122 # Transform our roots list into a set.
1132 # Transform our roots list into a set.
1123 descendants = set(roots)
1133 descendants = set(roots)
1124 # Also, keep the original roots so we can filter out roots that aren't
1134 # Also, keep the original roots so we can filter out roots that aren't
1125 # 'real' roots (i.e. are descended from other roots).
1135 # 'real' roots (i.e. are descended from other roots).
1126 roots = descendants.copy()
1136 roots = descendants.copy()
1127 # Our topologically sorted list of output nodes.
1137 # Our topologically sorted list of output nodes.
1128 orderedout = []
1138 orderedout = []
1129 # Don't start at nullid since we don't want nullid in our output list,
1139 # Don't start at nullid since we don't want nullid in our output list,
1130 # and if nullid shows up in descendants, empty parents will look like
1140 # and if nullid shows up in descendants, empty parents will look like
1131 # they're descendants.
1141 # they're descendants.
1132 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1142 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1133 n = self.node(r)
1143 n = self.node(r)
1134 isdescendant = False
1144 isdescendant = False
1135 if lowestrev == nullrev: # Everybody is a descendant of nullid
1145 if lowestrev == nullrev: # Everybody is a descendant of nullid
1136 isdescendant = True
1146 isdescendant = True
1137 elif n in descendants:
1147 elif n in descendants:
1138 # n is already a descendant
1148 # n is already a descendant
1139 isdescendant = True
1149 isdescendant = True
1140 # This check only needs to be done here because all the roots
1150 # This check only needs to be done here because all the roots
1141 # will start being marked is descendants before the loop.
1151 # will start being marked is descendants before the loop.
1142 if n in roots:
1152 if n in roots:
1143 # If n was a root, check if it's a 'real' root.
1153 # If n was a root, check if it's a 'real' root.
1144 p = tuple(self.parents(n))
1154 p = tuple(self.parents(n))
1145 # If any of its parents are descendants, it's not a root.
1155 # If any of its parents are descendants, it's not a root.
1146 if (p[0] in descendants) or (p[1] in descendants):
1156 if (p[0] in descendants) or (p[1] in descendants):
1147 roots.remove(n)
1157 roots.remove(n)
1148 else:
1158 else:
1149 p = tuple(self.parents(n))
1159 p = tuple(self.parents(n))
1150 # A node is a descendant if either of its parents are
1160 # A node is a descendant if either of its parents are
1151 # descendants. (We seeded the dependents list with the roots
1161 # descendants. (We seeded the dependents list with the roots
1152 # up there, remember?)
1162 # up there, remember?)
1153 if (p[0] in descendants) or (p[1] in descendants):
1163 if (p[0] in descendants) or (p[1] in descendants):
1154 descendants.add(n)
1164 descendants.add(n)
1155 isdescendant = True
1165 isdescendant = True
1156 if isdescendant and ((ancestors is None) or (n in ancestors)):
1166 if isdescendant and ((ancestors is None) or (n in ancestors)):
1157 # Only include nodes that are both descendants and ancestors.
1167 # Only include nodes that are both descendants and ancestors.
1158 orderedout.append(n)
1168 orderedout.append(n)
1159 if (ancestors is not None) and (n in heads):
1169 if (ancestors is not None) and (n in heads):
1160 # We're trying to figure out which heads are reachable
1170 # We're trying to figure out which heads are reachable
1161 # from roots.
1171 # from roots.
1162 # Mark this head as having been reached
1172 # Mark this head as having been reached
1163 heads[n] = True
1173 heads[n] = True
1164 elif ancestors is None:
1174 elif ancestors is None:
1165 # Otherwise, we're trying to discover the heads.
1175 # Otherwise, we're trying to discover the heads.
1166 # Assume this is a head because if it isn't, the next step
1176 # Assume this is a head because if it isn't, the next step
1167 # will eventually remove it.
1177 # will eventually remove it.
1168 heads[n] = True
1178 heads[n] = True
1169 # But, obviously its parents aren't.
1179 # But, obviously its parents aren't.
1170 for p in self.parents(n):
1180 for p in self.parents(n):
1171 heads.pop(p, None)
1181 heads.pop(p, None)
1172 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1182 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1173 roots = list(roots)
1183 roots = list(roots)
1174 assert orderedout
1184 assert orderedout
1175 assert roots
1185 assert roots
1176 assert heads
1186 assert heads
1177 return (orderedout, roots, heads)
1187 return (orderedout, roots, heads)
1178
1188
1179 def headrevs(self, revs=None):
1189 def headrevs(self, revs=None):
1180 if revs is None:
1190 if revs is None:
1181 try:
1191 try:
1182 return self.index.headrevs()
1192 return self.index.headrevs()
1183 except AttributeError:
1193 except AttributeError:
1184 return self._headrevs()
1194 return self._headrevs()
1185 if rustdagop is not None:
1195 if rustdagop is not None:
1186 return rustdagop.headrevs(self.index, revs)
1196 return rustdagop.headrevs(self.index, revs)
1187 return dagop.headrevs(revs, self._uncheckedparentrevs)
1197 return dagop.headrevs(revs, self._uncheckedparentrevs)
1188
1198
1189 def computephases(self, roots):
1199 def computephases(self, roots):
1190 return self.index.computephasesmapsets(roots)
1200 return self.index.computephasesmapsets(roots)
1191
1201
1192 def _headrevs(self):
1202 def _headrevs(self):
1193 count = len(self)
1203 count = len(self)
1194 if not count:
1204 if not count:
1195 return [nullrev]
1205 return [nullrev]
1196 # we won't iter over filtered rev so nobody is a head at start
1206 # we won't iter over filtered rev so nobody is a head at start
1197 ishead = [0] * (count + 1)
1207 ishead = [0] * (count + 1)
1198 index = self.index
1208 index = self.index
1199 for r in self:
1209 for r in self:
1200 ishead[r] = 1 # I may be an head
1210 ishead[r] = 1 # I may be an head
1201 e = index[r]
1211 e = index[r]
1202 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1212 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1203 return [r for r, val in enumerate(ishead) if val]
1213 return [r for r, val in enumerate(ishead) if val]
1204
1214
1205 def heads(self, start=None, stop=None):
1215 def heads(self, start=None, stop=None):
1206 """return the list of all nodes that have no children
1216 """return the list of all nodes that have no children
1207
1217
1208 if start is specified, only heads that are descendants of
1218 if start is specified, only heads that are descendants of
1209 start will be returned
1219 start will be returned
1210 if stop is specified, it will consider all the revs from stop
1220 if stop is specified, it will consider all the revs from stop
1211 as if they had no children
1221 as if they had no children
1212 """
1222 """
1213 if start is None and stop is None:
1223 if start is None and stop is None:
1214 if not len(self):
1224 if not len(self):
1215 return [nullid]
1225 return [nullid]
1216 return [self.node(r) for r in self.headrevs()]
1226 return [self.node(r) for r in self.headrevs()]
1217
1227
1218 if start is None:
1228 if start is None:
1219 start = nullrev
1229 start = nullrev
1220 else:
1230 else:
1221 start = self.rev(start)
1231 start = self.rev(start)
1222
1232
1223 stoprevs = set(self.rev(n) for n in stop or [])
1233 stoprevs = set(self.rev(n) for n in stop or [])
1224
1234
1225 revs = dagop.headrevssubset(
1235 revs = dagop.headrevssubset(
1226 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1236 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1227 )
1237 )
1228
1238
1229 return [self.node(rev) for rev in revs]
1239 return [self.node(rev) for rev in revs]
1230
1240
1231 def children(self, node):
1241 def children(self, node):
1232 """find the children of a given node"""
1242 """find the children of a given node"""
1233 c = []
1243 c = []
1234 p = self.rev(node)
1244 p = self.rev(node)
1235 for r in self.revs(start=p + 1):
1245 for r in self.revs(start=p + 1):
1236 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1246 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1237 if prevs:
1247 if prevs:
1238 for pr in prevs:
1248 for pr in prevs:
1239 if pr == p:
1249 if pr == p:
1240 c.append(self.node(r))
1250 c.append(self.node(r))
1241 elif p == nullrev:
1251 elif p == nullrev:
1242 c.append(self.node(r))
1252 c.append(self.node(r))
1243 return c
1253 return c
1244
1254
1245 def commonancestorsheads(self, a, b):
1255 def commonancestorsheads(self, a, b):
1246 """calculate all the heads of the common ancestors of nodes a and b"""
1256 """calculate all the heads of the common ancestors of nodes a and b"""
1247 a, b = self.rev(a), self.rev(b)
1257 a, b = self.rev(a), self.rev(b)
1248 ancs = self._commonancestorsheads(a, b)
1258 ancs = self._commonancestorsheads(a, b)
1249 return pycompat.maplist(self.node, ancs)
1259 return pycompat.maplist(self.node, ancs)
1250
1260
1251 def _commonancestorsheads(self, *revs):
1261 def _commonancestorsheads(self, *revs):
1252 """calculate all the heads of the common ancestors of revs"""
1262 """calculate all the heads of the common ancestors of revs"""
1253 try:
1263 try:
1254 ancs = self.index.commonancestorsheads(*revs)
1264 ancs = self.index.commonancestorsheads(*revs)
1255 except (AttributeError, OverflowError): # C implementation failed
1265 except (AttributeError, OverflowError): # C implementation failed
1256 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1266 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1257 return ancs
1267 return ancs
1258
1268
1259 def isancestor(self, a, b):
1269 def isancestor(self, a, b):
1260 """return True if node a is an ancestor of node b
1270 """return True if node a is an ancestor of node b
1261
1271
1262 A revision is considered an ancestor of itself."""
1272 A revision is considered an ancestor of itself."""
1263 a, b = self.rev(a), self.rev(b)
1273 a, b = self.rev(a), self.rev(b)
1264 return self.isancestorrev(a, b)
1274 return self.isancestorrev(a, b)
1265
1275
1266 def isancestorrev(self, a, b):
1276 def isancestorrev(self, a, b):
1267 """return True if revision a is an ancestor of revision b
1277 """return True if revision a is an ancestor of revision b
1268
1278
1269 A revision is considered an ancestor of itself.
1279 A revision is considered an ancestor of itself.
1270
1280
1271 The implementation of this is trivial but the use of
1281 The implementation of this is trivial but the use of
1272 reachableroots is not."""
1282 reachableroots is not."""
1273 if a == nullrev:
1283 if a == nullrev:
1274 return True
1284 return True
1275 elif a == b:
1285 elif a == b:
1276 return True
1286 return True
1277 elif a > b:
1287 elif a > b:
1278 return False
1288 return False
1279 return bool(self.reachableroots(a, [b], [a], includepath=False))
1289 return bool(self.reachableroots(a, [b], [a], includepath=False))
1280
1290
1281 def reachableroots(self, minroot, heads, roots, includepath=False):
1291 def reachableroots(self, minroot, heads, roots, includepath=False):
1282 """return (heads(::(<roots> and <roots>::<heads>)))
1292 """return (heads(::(<roots> and <roots>::<heads>)))
1283
1293
1284 If includepath is True, return (<roots>::<heads>)."""
1294 If includepath is True, return (<roots>::<heads>)."""
1285 try:
1295 try:
1286 return self.index.reachableroots2(
1296 return self.index.reachableroots2(
1287 minroot, heads, roots, includepath
1297 minroot, heads, roots, includepath
1288 )
1298 )
1289 except AttributeError:
1299 except AttributeError:
1290 return dagop._reachablerootspure(
1300 return dagop._reachablerootspure(
1291 self.parentrevs, minroot, roots, heads, includepath
1301 self.parentrevs, minroot, roots, heads, includepath
1292 )
1302 )
1293
1303
1294 def ancestor(self, a, b):
1304 def ancestor(self, a, b):
1295 """calculate the "best" common ancestor of nodes a and b"""
1305 """calculate the "best" common ancestor of nodes a and b"""
1296
1306
1297 a, b = self.rev(a), self.rev(b)
1307 a, b = self.rev(a), self.rev(b)
1298 try:
1308 try:
1299 ancs = self.index.ancestors(a, b)
1309 ancs = self.index.ancestors(a, b)
1300 except (AttributeError, OverflowError):
1310 except (AttributeError, OverflowError):
1301 ancs = ancestor.ancestors(self.parentrevs, a, b)
1311 ancs = ancestor.ancestors(self.parentrevs, a, b)
1302 if ancs:
1312 if ancs:
1303 # choose a consistent winner when there's a tie
1313 # choose a consistent winner when there's a tie
1304 return min(map(self.node, ancs))
1314 return min(map(self.node, ancs))
1305 return nullid
1315 return nullid
1306
1316
1307 def _match(self, id):
1317 def _match(self, id):
1308 if isinstance(id, int):
1318 if isinstance(id, int):
1309 # rev
1319 # rev
1310 return self.node(id)
1320 return self.node(id)
1311 if len(id) == 20:
1321 if len(id) == 20:
1312 # possibly a binary node
1322 # possibly a binary node
1313 # odds of a binary node being all hex in ASCII are 1 in 10**25
1323 # odds of a binary node being all hex in ASCII are 1 in 10**25
1314 try:
1324 try:
1315 node = id
1325 node = id
1316 self.rev(node) # quick search the index
1326 self.rev(node) # quick search the index
1317 return node
1327 return node
1318 except error.LookupError:
1328 except error.LookupError:
1319 pass # may be partial hex id
1329 pass # may be partial hex id
1320 try:
1330 try:
1321 # str(rev)
1331 # str(rev)
1322 rev = int(id)
1332 rev = int(id)
1323 if b"%d" % rev != id:
1333 if b"%d" % rev != id:
1324 raise ValueError
1334 raise ValueError
1325 if rev < 0:
1335 if rev < 0:
1326 rev = len(self) + rev
1336 rev = len(self) + rev
1327 if rev < 0 or rev >= len(self):
1337 if rev < 0 or rev >= len(self):
1328 raise ValueError
1338 raise ValueError
1329 return self.node(rev)
1339 return self.node(rev)
1330 except (ValueError, OverflowError):
1340 except (ValueError, OverflowError):
1331 pass
1341 pass
1332 if len(id) == 40:
1342 if len(id) == 40:
1333 try:
1343 try:
1334 # a full hex nodeid?
1344 # a full hex nodeid?
1335 node = bin(id)
1345 node = bin(id)
1336 self.rev(node)
1346 self.rev(node)
1337 return node
1347 return node
1338 except (TypeError, error.LookupError):
1348 except (TypeError, error.LookupError):
1339 pass
1349 pass
1340
1350
1341 def _partialmatch(self, id):
1351 def _partialmatch(self, id):
1342 # we don't care wdirfilenodeids as they should be always full hash
1352 # we don't care wdirfilenodeids as they should be always full hash
1343 maybewdir = wdirhex.startswith(id)
1353 maybewdir = wdirhex.startswith(id)
1344 try:
1354 try:
1345 partial = self.index.partialmatch(id)
1355 partial = self.index.partialmatch(id)
1346 if partial and self.hasnode(partial):
1356 if partial and self.hasnode(partial):
1347 if maybewdir:
1357 if maybewdir:
1348 # single 'ff...' match in radix tree, ambiguous with wdir
1358 # single 'ff...' match in radix tree, ambiguous with wdir
1349 raise error.RevlogError
1359 raise error.RevlogError
1350 return partial
1360 return partial
1351 if maybewdir:
1361 if maybewdir:
1352 # no 'ff...' match in radix tree, wdir identified
1362 # no 'ff...' match in radix tree, wdir identified
1353 raise error.WdirUnsupported
1363 raise error.WdirUnsupported
1354 return None
1364 return None
1355 except error.RevlogError:
1365 except error.RevlogError:
1356 # parsers.c radix tree lookup gave multiple matches
1366 # parsers.c radix tree lookup gave multiple matches
1357 # fast path: for unfiltered changelog, radix tree is accurate
1367 # fast path: for unfiltered changelog, radix tree is accurate
1358 if not getattr(self, 'filteredrevs', None):
1368 if not getattr(self, 'filteredrevs', None):
1359 raise error.AmbiguousPrefixLookupError(
1369 raise error.AmbiguousPrefixLookupError(
1360 id, self.indexfile, _(b'ambiguous identifier')
1370 id, self.indexfile, _(b'ambiguous identifier')
1361 )
1371 )
1362 # fall through to slow path that filters hidden revisions
1372 # fall through to slow path that filters hidden revisions
1363 except (AttributeError, ValueError):
1373 except (AttributeError, ValueError):
1364 # we are pure python, or key was too short to search radix tree
1374 # we are pure python, or key was too short to search radix tree
1365 pass
1375 pass
1366
1376
1367 if id in self._pcache:
1377 if id in self._pcache:
1368 return self._pcache[id]
1378 return self._pcache[id]
1369
1379
1370 if len(id) <= 40:
1380 if len(id) <= 40:
1371 try:
1381 try:
1372 # hex(node)[:...]
1382 # hex(node)[:...]
1373 l = len(id) // 2 # grab an even number of digits
1383 l = len(id) // 2 # grab an even number of digits
1374 prefix = bin(id[: l * 2])
1384 prefix = bin(id[: l * 2])
1375 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1385 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1376 nl = [
1386 nl = [
1377 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1387 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1378 ]
1388 ]
1379 if nullhex.startswith(id):
1389 if nullhex.startswith(id):
1380 nl.append(nullid)
1390 nl.append(nullid)
1381 if len(nl) > 0:
1391 if len(nl) > 0:
1382 if len(nl) == 1 and not maybewdir:
1392 if len(nl) == 1 and not maybewdir:
1383 self._pcache[id] = nl[0]
1393 self._pcache[id] = nl[0]
1384 return nl[0]
1394 return nl[0]
1385 raise error.AmbiguousPrefixLookupError(
1395 raise error.AmbiguousPrefixLookupError(
1386 id, self.indexfile, _(b'ambiguous identifier')
1396 id, self.indexfile, _(b'ambiguous identifier')
1387 )
1397 )
1388 if maybewdir:
1398 if maybewdir:
1389 raise error.WdirUnsupported
1399 raise error.WdirUnsupported
1390 return None
1400 return None
1391 except TypeError:
1401 except TypeError:
1392 pass
1402 pass
1393
1403
1394 def lookup(self, id):
1404 def lookup(self, id):
1395 """locate a node based on:
1405 """locate a node based on:
1396 - revision number or str(revision number)
1406 - revision number or str(revision number)
1397 - nodeid or subset of hex nodeid
1407 - nodeid or subset of hex nodeid
1398 """
1408 """
1399 n = self._match(id)
1409 n = self._match(id)
1400 if n is not None:
1410 if n is not None:
1401 return n
1411 return n
1402 n = self._partialmatch(id)
1412 n = self._partialmatch(id)
1403 if n:
1413 if n:
1404 return n
1414 return n
1405
1415
1406 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1416 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1407
1417
1408 def shortest(self, node, minlength=1):
1418 def shortest(self, node, minlength=1):
1409 """Find the shortest unambiguous prefix that matches node."""
1419 """Find the shortest unambiguous prefix that matches node."""
1410
1420
1411 def isvalid(prefix):
1421 def isvalid(prefix):
1412 try:
1422 try:
1413 matchednode = self._partialmatch(prefix)
1423 matchednode = self._partialmatch(prefix)
1414 except error.AmbiguousPrefixLookupError:
1424 except error.AmbiguousPrefixLookupError:
1415 return False
1425 return False
1416 except error.WdirUnsupported:
1426 except error.WdirUnsupported:
1417 # single 'ff...' match
1427 # single 'ff...' match
1418 return True
1428 return True
1419 if matchednode is None:
1429 if matchednode is None:
1420 raise error.LookupError(node, self.indexfile, _(b'no node'))
1430 raise error.LookupError(node, self.indexfile, _(b'no node'))
1421 return True
1431 return True
1422
1432
1423 def maybewdir(prefix):
1433 def maybewdir(prefix):
1424 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1434 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1425
1435
1426 hexnode = hex(node)
1436 hexnode = hex(node)
1427
1437
1428 def disambiguate(hexnode, minlength):
1438 def disambiguate(hexnode, minlength):
1429 """Disambiguate against wdirid."""
1439 """Disambiguate against wdirid."""
1430 for length in range(minlength, 41):
1440 for length in range(minlength, 41):
1431 prefix = hexnode[:length]
1441 prefix = hexnode[:length]
1432 if not maybewdir(prefix):
1442 if not maybewdir(prefix):
1433 return prefix
1443 return prefix
1434
1444
1435 if not getattr(self, 'filteredrevs', None):
1445 if not getattr(self, 'filteredrevs', None):
1436 try:
1446 try:
1437 length = max(self.index.shortest(node), minlength)
1447 length = max(self.index.shortest(node), minlength)
1438 return disambiguate(hexnode, length)
1448 return disambiguate(hexnode, length)
1439 except error.RevlogError:
1449 except error.RevlogError:
1440 if node != wdirid:
1450 if node != wdirid:
1441 raise error.LookupError(node, self.indexfile, _(b'no node'))
1451 raise error.LookupError(node, self.indexfile, _(b'no node'))
1442 except AttributeError:
1452 except AttributeError:
1443 # Fall through to pure code
1453 # Fall through to pure code
1444 pass
1454 pass
1445
1455
1446 if node == wdirid:
1456 if node == wdirid:
1447 for length in range(minlength, 41):
1457 for length in range(minlength, 41):
1448 prefix = hexnode[:length]
1458 prefix = hexnode[:length]
1449 if isvalid(prefix):
1459 if isvalid(prefix):
1450 return prefix
1460 return prefix
1451
1461
1452 for length in range(minlength, 41):
1462 for length in range(minlength, 41):
1453 prefix = hexnode[:length]
1463 prefix = hexnode[:length]
1454 if isvalid(prefix):
1464 if isvalid(prefix):
1455 return disambiguate(hexnode, length)
1465 return disambiguate(hexnode, length)
1456
1466
1457 def cmp(self, node, text):
1467 def cmp(self, node, text):
1458 """compare text with a given file revision
1468 """compare text with a given file revision
1459
1469
1460 returns True if text is different than what is stored.
1470 returns True if text is different than what is stored.
1461 """
1471 """
1462 p1, p2 = self.parents(node)
1472 p1, p2 = self.parents(node)
1463 return storageutil.hashrevisionsha1(text, p1, p2) != node
1473 return storageutil.hashrevisionsha1(text, p1, p2) != node
1464
1474
1465 def _cachesegment(self, offset, data):
1475 def _cachesegment(self, offset, data):
1466 """Add a segment to the revlog cache.
1476 """Add a segment to the revlog cache.
1467
1477
1468 Accepts an absolute offset and the data that is at that location.
1478 Accepts an absolute offset and the data that is at that location.
1469 """
1479 """
1470 o, d = self._chunkcache
1480 o, d = self._chunkcache
1471 # try to add to existing cache
1481 # try to add to existing cache
1472 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1482 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1473 self._chunkcache = o, d + data
1483 self._chunkcache = o, d + data
1474 else:
1484 else:
1475 self._chunkcache = offset, data
1485 self._chunkcache = offset, data
1476
1486
1477 def _readsegment(self, offset, length, df=None):
1487 def _readsegment(self, offset, length, df=None):
1478 """Load a segment of raw data from the revlog.
1488 """Load a segment of raw data from the revlog.
1479
1489
1480 Accepts an absolute offset, length to read, and an optional existing
1490 Accepts an absolute offset, length to read, and an optional existing
1481 file handle to read from.
1491 file handle to read from.
1482
1492
1483 If an existing file handle is passed, it will be seeked and the
1493 If an existing file handle is passed, it will be seeked and the
1484 original seek position will NOT be restored.
1494 original seek position will NOT be restored.
1485
1495
1486 Returns a str or buffer of raw byte data.
1496 Returns a str or buffer of raw byte data.
1487
1497
1488 Raises if the requested number of bytes could not be read.
1498 Raises if the requested number of bytes could not be read.
1489 """
1499 """
1490 # Cache data both forward and backward around the requested
1500 # Cache data both forward and backward around the requested
1491 # data, in a fixed size window. This helps speed up operations
1501 # data, in a fixed size window. This helps speed up operations
1492 # involving reading the revlog backwards.
1502 # involving reading the revlog backwards.
1493 cachesize = self._chunkcachesize
1503 cachesize = self._chunkcachesize
1494 realoffset = offset & ~(cachesize - 1)
1504 realoffset = offset & ~(cachesize - 1)
1495 reallength = (
1505 reallength = (
1496 (offset + length + cachesize) & ~(cachesize - 1)
1506 (offset + length + cachesize) & ~(cachesize - 1)
1497 ) - realoffset
1507 ) - realoffset
1498 with self._datareadfp(df) as df:
1508 with self._datareadfp(df) as df:
1499 df.seek(realoffset)
1509 df.seek(realoffset)
1500 d = df.read(reallength)
1510 d = df.read(reallength)
1501
1511
1502 self._cachesegment(realoffset, d)
1512 self._cachesegment(realoffset, d)
1503 if offset != realoffset or reallength != length:
1513 if offset != realoffset or reallength != length:
1504 startoffset = offset - realoffset
1514 startoffset = offset - realoffset
1505 if len(d) - startoffset < length:
1515 if len(d) - startoffset < length:
1506 raise error.RevlogError(
1516 raise error.RevlogError(
1507 _(
1517 _(
1508 b'partial read of revlog %s; expected %d bytes from '
1518 b'partial read of revlog %s; expected %d bytes from '
1509 b'offset %d, got %d'
1519 b'offset %d, got %d'
1510 )
1520 )
1511 % (
1521 % (
1512 self.indexfile if self._inline else self.datafile,
1522 self.indexfile if self._inline else self.datafile,
1513 length,
1523 length,
1514 realoffset,
1524 realoffset,
1515 len(d) - startoffset,
1525 len(d) - startoffset,
1516 )
1526 )
1517 )
1527 )
1518
1528
1519 return util.buffer(d, startoffset, length)
1529 return util.buffer(d, startoffset, length)
1520
1530
1521 if len(d) < length:
1531 if len(d) < length:
1522 raise error.RevlogError(
1532 raise error.RevlogError(
1523 _(
1533 _(
1524 b'partial read of revlog %s; expected %d bytes from offset '
1534 b'partial read of revlog %s; expected %d bytes from offset '
1525 b'%d, got %d'
1535 b'%d, got %d'
1526 )
1536 )
1527 % (
1537 % (
1528 self.indexfile if self._inline else self.datafile,
1538 self.indexfile if self._inline else self.datafile,
1529 length,
1539 length,
1530 offset,
1540 offset,
1531 len(d),
1541 len(d),
1532 )
1542 )
1533 )
1543 )
1534
1544
1535 return d
1545 return d
1536
1546
1537 def _getsegment(self, offset, length, df=None):
1547 def _getsegment(self, offset, length, df=None):
1538 """Obtain a segment of raw data from the revlog.
1548 """Obtain a segment of raw data from the revlog.
1539
1549
1540 Accepts an absolute offset, length of bytes to obtain, and an
1550 Accepts an absolute offset, length of bytes to obtain, and an
1541 optional file handle to the already-opened revlog. If the file
1551 optional file handle to the already-opened revlog. If the file
1542 handle is used, it's original seek position will not be preserved.
1552 handle is used, it's original seek position will not be preserved.
1543
1553
1544 Requests for data may be returned from a cache.
1554 Requests for data may be returned from a cache.
1545
1555
1546 Returns a str or a buffer instance of raw byte data.
1556 Returns a str or a buffer instance of raw byte data.
1547 """
1557 """
1548 o, d = self._chunkcache
1558 o, d = self._chunkcache
1549 l = len(d)
1559 l = len(d)
1550
1560
1551 # is it in the cache?
1561 # is it in the cache?
1552 cachestart = offset - o
1562 cachestart = offset - o
1553 cacheend = cachestart + length
1563 cacheend = cachestart + length
1554 if cachestart >= 0 and cacheend <= l:
1564 if cachestart >= 0 and cacheend <= l:
1555 if cachestart == 0 and cacheend == l:
1565 if cachestart == 0 and cacheend == l:
1556 return d # avoid a copy
1566 return d # avoid a copy
1557 return util.buffer(d, cachestart, cacheend - cachestart)
1567 return util.buffer(d, cachestart, cacheend - cachestart)
1558
1568
1559 return self._readsegment(offset, length, df=df)
1569 return self._readsegment(offset, length, df=df)
1560
1570
1561 def _getsegmentforrevs(self, startrev, endrev, df=None):
1571 def _getsegmentforrevs(self, startrev, endrev, df=None):
1562 """Obtain a segment of raw data corresponding to a range of revisions.
1572 """Obtain a segment of raw data corresponding to a range of revisions.
1563
1573
1564 Accepts the start and end revisions and an optional already-open
1574 Accepts the start and end revisions and an optional already-open
1565 file handle to be used for reading. If the file handle is read, its
1575 file handle to be used for reading. If the file handle is read, its
1566 seek position will not be preserved.
1576 seek position will not be preserved.
1567
1577
1568 Requests for data may be satisfied by a cache.
1578 Requests for data may be satisfied by a cache.
1569
1579
1570 Returns a 2-tuple of (offset, data) for the requested range of
1580 Returns a 2-tuple of (offset, data) for the requested range of
1571 revisions. Offset is the integer offset from the beginning of the
1581 revisions. Offset is the integer offset from the beginning of the
1572 revlog and data is a str or buffer of the raw byte data.
1582 revlog and data is a str or buffer of the raw byte data.
1573
1583
1574 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1584 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1575 to determine where each revision's data begins and ends.
1585 to determine where each revision's data begins and ends.
1576 """
1586 """
1577 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1587 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1578 # (functions are expensive).
1588 # (functions are expensive).
1579 index = self.index
1589 index = self.index
1580 istart = index[startrev]
1590 istart = index[startrev]
1581 start = int(istart[0] >> 16)
1591 start = int(istart[0] >> 16)
1582 if startrev == endrev:
1592 if startrev == endrev:
1583 end = start + istart[1]
1593 end = start + istart[1]
1584 else:
1594 else:
1585 iend = index[endrev]
1595 iend = index[endrev]
1586 end = int(iend[0] >> 16) + iend[1]
1596 end = int(iend[0] >> 16) + iend[1]
1587
1597
1588 if self._inline:
1598 if self._inline:
1589 start += (startrev + 1) * self._io.size
1599 start += (startrev + 1) * self._io.size
1590 end += (endrev + 1) * self._io.size
1600 end += (endrev + 1) * self._io.size
1591 length = end - start
1601 length = end - start
1592
1602
1593 return start, self._getsegment(start, length, df=df)
1603 return start, self._getsegment(start, length, df=df)
1594
1604
1595 def _chunk(self, rev, df=None):
1605 def _chunk(self, rev, df=None):
1596 """Obtain a single decompressed chunk for a revision.
1606 """Obtain a single decompressed chunk for a revision.
1597
1607
1598 Accepts an integer revision and an optional already-open file handle
1608 Accepts an integer revision and an optional already-open file handle
1599 to be used for reading. If used, the seek position of the file will not
1609 to be used for reading. If used, the seek position of the file will not
1600 be preserved.
1610 be preserved.
1601
1611
1602 Returns a str holding uncompressed data for the requested revision.
1612 Returns a str holding uncompressed data for the requested revision.
1603 """
1613 """
1604 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1614 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1605
1615
1606 def _chunks(self, revs, df=None, targetsize=None):
1616 def _chunks(self, revs, df=None, targetsize=None):
1607 """Obtain decompressed chunks for the specified revisions.
1617 """Obtain decompressed chunks for the specified revisions.
1608
1618
1609 Accepts an iterable of numeric revisions that are assumed to be in
1619 Accepts an iterable of numeric revisions that are assumed to be in
1610 ascending order. Also accepts an optional already-open file handle
1620 ascending order. Also accepts an optional already-open file handle
1611 to be used for reading. If used, the seek position of the file will
1621 to be used for reading. If used, the seek position of the file will
1612 not be preserved.
1622 not be preserved.
1613
1623
1614 This function is similar to calling ``self._chunk()`` multiple times,
1624 This function is similar to calling ``self._chunk()`` multiple times,
1615 but is faster.
1625 but is faster.
1616
1626
1617 Returns a list with decompressed data for each requested revision.
1627 Returns a list with decompressed data for each requested revision.
1618 """
1628 """
1619 if not revs:
1629 if not revs:
1620 return []
1630 return []
1621 start = self.start
1631 start = self.start
1622 length = self.length
1632 length = self.length
1623 inline = self._inline
1633 inline = self._inline
1624 iosize = self._io.size
1634 iosize = self._io.size
1625 buffer = util.buffer
1635 buffer = util.buffer
1626
1636
1627 l = []
1637 l = []
1628 ladd = l.append
1638 ladd = l.append
1629
1639
1630 if not self._withsparseread:
1640 if not self._withsparseread:
1631 slicedchunks = (revs,)
1641 slicedchunks = (revs,)
1632 else:
1642 else:
1633 slicedchunks = deltautil.slicechunk(
1643 slicedchunks = deltautil.slicechunk(
1634 self, revs, targetsize=targetsize
1644 self, revs, targetsize=targetsize
1635 )
1645 )
1636
1646
1637 for revschunk in slicedchunks:
1647 for revschunk in slicedchunks:
1638 firstrev = revschunk[0]
1648 firstrev = revschunk[0]
1639 # Skip trailing revisions with empty diff
1649 # Skip trailing revisions with empty diff
1640 for lastrev in revschunk[::-1]:
1650 for lastrev in revschunk[::-1]:
1641 if length(lastrev) != 0:
1651 if length(lastrev) != 0:
1642 break
1652 break
1643
1653
1644 try:
1654 try:
1645 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1655 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1646 except OverflowError:
1656 except OverflowError:
1647 # issue4215 - we can't cache a run of chunks greater than
1657 # issue4215 - we can't cache a run of chunks greater than
1648 # 2G on Windows
1658 # 2G on Windows
1649 return [self._chunk(rev, df=df) for rev in revschunk]
1659 return [self._chunk(rev, df=df) for rev in revschunk]
1650
1660
1651 decomp = self.decompress
1661 decomp = self.decompress
1652 for rev in revschunk:
1662 for rev in revschunk:
1653 chunkstart = start(rev)
1663 chunkstart = start(rev)
1654 if inline:
1664 if inline:
1655 chunkstart += (rev + 1) * iosize
1665 chunkstart += (rev + 1) * iosize
1656 chunklength = length(rev)
1666 chunklength = length(rev)
1657 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1667 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1658
1668
1659 return l
1669 return l
1660
1670
1661 def _chunkclear(self):
1671 def _chunkclear(self):
1662 """Clear the raw chunk cache."""
1672 """Clear the raw chunk cache."""
1663 self._chunkcache = (0, b'')
1673 self._chunkcache = (0, b'')
1664
1674
1665 def deltaparent(self, rev):
1675 def deltaparent(self, rev):
1666 """return deltaparent of the given revision"""
1676 """return deltaparent of the given revision"""
1667 base = self.index[rev][3]
1677 base = self.index[rev][3]
1668 if base == rev:
1678 if base == rev:
1669 return nullrev
1679 return nullrev
1670 elif self._generaldelta:
1680 elif self._generaldelta:
1671 return base
1681 return base
1672 else:
1682 else:
1673 return rev - 1
1683 return rev - 1
1674
1684
1675 def issnapshot(self, rev):
1685 def issnapshot(self, rev):
1676 """tells whether rev is a snapshot
1686 """tells whether rev is a snapshot
1677 """
1687 """
1678 if not self._sparserevlog:
1688 if not self._sparserevlog:
1679 return self.deltaparent(rev) == nullrev
1689 return self.deltaparent(rev) == nullrev
1680 elif util.safehasattr(self.index, b'issnapshot'):
1690 elif util.safehasattr(self.index, b'issnapshot'):
1681 # directly assign the method to cache the testing and access
1691 # directly assign the method to cache the testing and access
1682 self.issnapshot = self.index.issnapshot
1692 self.issnapshot = self.index.issnapshot
1683 return self.issnapshot(rev)
1693 return self.issnapshot(rev)
1684 if rev == nullrev:
1694 if rev == nullrev:
1685 return True
1695 return True
1686 entry = self.index[rev]
1696 entry = self.index[rev]
1687 base = entry[3]
1697 base = entry[3]
1688 if base == rev:
1698 if base == rev:
1689 return True
1699 return True
1690 if base == nullrev:
1700 if base == nullrev:
1691 return True
1701 return True
1692 p1 = entry[5]
1702 p1 = entry[5]
1693 p2 = entry[6]
1703 p2 = entry[6]
1694 if base == p1 or base == p2:
1704 if base == p1 or base == p2:
1695 return False
1705 return False
1696 return self.issnapshot(base)
1706 return self.issnapshot(base)
1697
1707
1698 def snapshotdepth(self, rev):
1708 def snapshotdepth(self, rev):
1699 """number of snapshot in the chain before this one"""
1709 """number of snapshot in the chain before this one"""
1700 if not self.issnapshot(rev):
1710 if not self.issnapshot(rev):
1701 raise error.ProgrammingError(b'revision %d not a snapshot')
1711 raise error.ProgrammingError(b'revision %d not a snapshot')
1702 return len(self._deltachain(rev)[0]) - 1
1712 return len(self._deltachain(rev)[0]) - 1
1703
1713
1704 def revdiff(self, rev1, rev2):
1714 def revdiff(self, rev1, rev2):
1705 """return or calculate a delta between two revisions
1715 """return or calculate a delta between two revisions
1706
1716
1707 The delta calculated is in binary form and is intended to be written to
1717 The delta calculated is in binary form and is intended to be written to
1708 revlog data directly. So this function needs raw revision data.
1718 revlog data directly. So this function needs raw revision data.
1709 """
1719 """
1710 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1720 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1711 return bytes(self._chunk(rev2))
1721 return bytes(self._chunk(rev2))
1712
1722
1713 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1723 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1714
1724
1715 def _processflags(self, text, flags, operation, raw=False):
1725 def _processflags(self, text, flags, operation, raw=False):
1716 """deprecated entry point to access flag processors"""
1726 """deprecated entry point to access flag processors"""
1717 msg = b'_processflag(...) use the specialized variant'
1727 msg = b'_processflag(...) use the specialized variant'
1718 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1728 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1719 if raw:
1729 if raw:
1720 return text, flagutil.processflagsraw(self, text, flags)
1730 return text, flagutil.processflagsraw(self, text, flags)
1721 elif operation == b'read':
1731 elif operation == b'read':
1722 return flagutil.processflagsread(self, text, flags)
1732 return flagutil.processflagsread(self, text, flags)
1723 else: # write operation
1733 else: # write operation
1724 return flagutil.processflagswrite(self, text, flags)
1734 return flagutil.processflagswrite(self, text, flags)
1725
1735
1726 def revision(self, nodeorrev, _df=None, raw=False):
1736 def revision(self, nodeorrev, _df=None, raw=False):
1727 """return an uncompressed revision of a given node or revision
1737 """return an uncompressed revision of a given node or revision
1728 number.
1738 number.
1729
1739
1730 _df - an existing file handle to read from. (internal-only)
1740 _df - an existing file handle to read from. (internal-only)
1731 raw - an optional argument specifying if the revision data is to be
1741 raw - an optional argument specifying if the revision data is to be
1732 treated as raw data when applying flag transforms. 'raw' should be set
1742 treated as raw data when applying flag transforms. 'raw' should be set
1733 to True when generating changegroups or in debug commands.
1743 to True when generating changegroups or in debug commands.
1734 """
1744 """
1735 if raw:
1745 if raw:
1736 msg = (
1746 msg = (
1737 b'revlog.revision(..., raw=True) is deprecated, '
1747 b'revlog.revision(..., raw=True) is deprecated, '
1738 b'use revlog.rawdata(...)'
1748 b'use revlog.rawdata(...)'
1739 )
1749 )
1740 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1750 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1741 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1751 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1742
1752
1743 def sidedata(self, nodeorrev, _df=None):
1753 def sidedata(self, nodeorrev, _df=None):
1744 """a map of extra data related to the changeset but not part of the hash
1754 """a map of extra data related to the changeset but not part of the hash
1745
1755
1746 This function currently return a dictionary. However, more advanced
1756 This function currently return a dictionary. However, more advanced
1747 mapping object will likely be used in the future for a more
1757 mapping object will likely be used in the future for a more
1748 efficient/lazy code.
1758 efficient/lazy code.
1749 """
1759 """
1750 return self._revisiondata(nodeorrev, _df)[1]
1760 return self._revisiondata(nodeorrev, _df)[1]
1751
1761
1752 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1762 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1753 # deal with <nodeorrev> argument type
1763 # deal with <nodeorrev> argument type
1754 if isinstance(nodeorrev, int):
1764 if isinstance(nodeorrev, int):
1755 rev = nodeorrev
1765 rev = nodeorrev
1756 node = self.node(rev)
1766 node = self.node(rev)
1757 else:
1767 else:
1758 node = nodeorrev
1768 node = nodeorrev
1759 rev = None
1769 rev = None
1760
1770
1761 # fast path the special `nullid` rev
1771 # fast path the special `nullid` rev
1762 if node == nullid:
1772 if node == nullid:
1763 return b"", {}
1773 return b"", {}
1764
1774
1765 # The text as stored inside the revlog. Might be the revision or might
1775 # The text as stored inside the revlog. Might be the revision or might
1766 # need to be processed to retrieve the revision.
1776 # need to be processed to retrieve the revision.
1767 rawtext = None
1777 rawtext = None
1768
1778
1769 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1779 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1770
1780
1771 if raw and validated:
1781 if raw and validated:
1772 # if we don't want to process the raw text and that raw
1782 # if we don't want to process the raw text and that raw
1773 # text is cached, we can exit early.
1783 # text is cached, we can exit early.
1774 return rawtext, {}
1784 return rawtext, {}
1775 if rev is None:
1785 if rev is None:
1776 rev = self.rev(node)
1786 rev = self.rev(node)
1777 # the revlog's flag for this revision
1787 # the revlog's flag for this revision
1778 # (usually alter its state or content)
1788 # (usually alter its state or content)
1779 flags = self.flags(rev)
1789 flags = self.flags(rev)
1780
1790
1781 if validated and flags == REVIDX_DEFAULT_FLAGS:
1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1782 # no extra flags set, no flag processor runs, text = rawtext
1792 # no extra flags set, no flag processor runs, text = rawtext
1783 return rawtext, {}
1793 return rawtext, {}
1784
1794
1785 sidedata = {}
1795 sidedata = {}
1786 if raw:
1796 if raw:
1787 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1797 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1788 text = rawtext
1798 text = rawtext
1789 else:
1799 else:
1790 try:
1800 try:
1791 r = flagutil.processflagsread(self, rawtext, flags)
1801 r = flagutil.processflagsread(self, rawtext, flags)
1792 except error.SidedataHashError as exc:
1802 except error.SidedataHashError as exc:
1793 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1803 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1794 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1804 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1795 raise error.RevlogError(msg)
1805 raise error.RevlogError(msg)
1796 text, validatehash, sidedata = r
1806 text, validatehash, sidedata = r
1797 if validatehash:
1807 if validatehash:
1798 self.checkhash(text, node, rev=rev)
1808 self.checkhash(text, node, rev=rev)
1799 if not validated:
1809 if not validated:
1800 self._revisioncache = (node, rev, rawtext)
1810 self._revisioncache = (node, rev, rawtext)
1801
1811
1802 return text, sidedata
1812 return text, sidedata
1803
1813
1804 def _rawtext(self, node, rev, _df=None):
1814 def _rawtext(self, node, rev, _df=None):
1805 """return the possibly unvalidated rawtext for a revision
1815 """return the possibly unvalidated rawtext for a revision
1806
1816
1807 returns (rev, rawtext, validated)
1817 returns (rev, rawtext, validated)
1808 """
1818 """
1809
1819
1810 # revision in the cache (could be useful to apply delta)
1820 # revision in the cache (could be useful to apply delta)
1811 cachedrev = None
1821 cachedrev = None
1812 # An intermediate text to apply deltas to
1822 # An intermediate text to apply deltas to
1813 basetext = None
1823 basetext = None
1814
1824
1815 # Check if we have the entry in cache
1825 # Check if we have the entry in cache
1816 # The cache entry looks like (node, rev, rawtext)
1826 # The cache entry looks like (node, rev, rawtext)
1817 if self._revisioncache:
1827 if self._revisioncache:
1818 if self._revisioncache[0] == node:
1828 if self._revisioncache[0] == node:
1819 return (rev, self._revisioncache[2], True)
1829 return (rev, self._revisioncache[2], True)
1820 cachedrev = self._revisioncache[1]
1830 cachedrev = self._revisioncache[1]
1821
1831
1822 if rev is None:
1832 if rev is None:
1823 rev = self.rev(node)
1833 rev = self.rev(node)
1824
1834
1825 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1835 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1826 if stopped:
1836 if stopped:
1827 basetext = self._revisioncache[2]
1837 basetext = self._revisioncache[2]
1828
1838
1829 # drop cache to save memory, the caller is expected to
1839 # drop cache to save memory, the caller is expected to
1830 # update self._revisioncache after validating the text
1840 # update self._revisioncache after validating the text
1831 self._revisioncache = None
1841 self._revisioncache = None
1832
1842
1833 targetsize = None
1843 targetsize = None
1834 rawsize = self.index[rev][2]
1844 rawsize = self.index[rev][2]
1835 if 0 <= rawsize:
1845 if 0 <= rawsize:
1836 targetsize = 4 * rawsize
1846 targetsize = 4 * rawsize
1837
1847
1838 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1848 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1839 if basetext is None:
1849 if basetext is None:
1840 basetext = bytes(bins[0])
1850 basetext = bytes(bins[0])
1841 bins = bins[1:]
1851 bins = bins[1:]
1842
1852
1843 rawtext = mdiff.patches(basetext, bins)
1853 rawtext = mdiff.patches(basetext, bins)
1844 del basetext # let us have a chance to free memory early
1854 del basetext # let us have a chance to free memory early
1845 return (rev, rawtext, False)
1855 return (rev, rawtext, False)
1846
1856
1847 def rawdata(self, nodeorrev, _df=None):
1857 def rawdata(self, nodeorrev, _df=None):
1848 """return an uncompressed raw data of a given node or revision number.
1858 """return an uncompressed raw data of a given node or revision number.
1849
1859
1850 _df - an existing file handle to read from. (internal-only)
1860 _df - an existing file handle to read from. (internal-only)
1851 """
1861 """
1852 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1862 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1853
1863
1854 def hash(self, text, p1, p2):
1864 def hash(self, text, p1, p2):
1855 """Compute a node hash.
1865 """Compute a node hash.
1856
1866
1857 Available as a function so that subclasses can replace the hash
1867 Available as a function so that subclasses can replace the hash
1858 as needed.
1868 as needed.
1859 """
1869 """
1860 return storageutil.hashrevisionsha1(text, p1, p2)
1870 return storageutil.hashrevisionsha1(text, p1, p2)
1861
1871
1862 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1872 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1863 """Check node hash integrity.
1873 """Check node hash integrity.
1864
1874
1865 Available as a function so that subclasses can extend hash mismatch
1875 Available as a function so that subclasses can extend hash mismatch
1866 behaviors as needed.
1876 behaviors as needed.
1867 """
1877 """
1868 try:
1878 try:
1869 if p1 is None and p2 is None:
1879 if p1 is None and p2 is None:
1870 p1, p2 = self.parents(node)
1880 p1, p2 = self.parents(node)
1871 if node != self.hash(text, p1, p2):
1881 if node != self.hash(text, p1, p2):
1872 # Clear the revision cache on hash failure. The revision cache
1882 # Clear the revision cache on hash failure. The revision cache
1873 # only stores the raw revision and clearing the cache does have
1883 # only stores the raw revision and clearing the cache does have
1874 # the side-effect that we won't have a cache hit when the raw
1884 # the side-effect that we won't have a cache hit when the raw
1875 # revision data is accessed. But this case should be rare and
1885 # revision data is accessed. But this case should be rare and
1876 # it is extra work to teach the cache about the hash
1886 # it is extra work to teach the cache about the hash
1877 # verification state.
1887 # verification state.
1878 if self._revisioncache and self._revisioncache[0] == node:
1888 if self._revisioncache and self._revisioncache[0] == node:
1879 self._revisioncache = None
1889 self._revisioncache = None
1880
1890
1881 revornode = rev
1891 revornode = rev
1882 if revornode is None:
1892 if revornode is None:
1883 revornode = templatefilters.short(hex(node))
1893 revornode = templatefilters.short(hex(node))
1884 raise error.RevlogError(
1894 raise error.RevlogError(
1885 _(b"integrity check failed on %s:%s")
1895 _(b"integrity check failed on %s:%s")
1886 % (self.indexfile, pycompat.bytestr(revornode))
1896 % (self.indexfile, pycompat.bytestr(revornode))
1887 )
1897 )
1888 except error.RevlogError:
1898 except error.RevlogError:
1889 if self._censorable and storageutil.iscensoredtext(text):
1899 if self._censorable and storageutil.iscensoredtext(text):
1890 raise error.CensoredNodeError(self.indexfile, node, text)
1900 raise error.CensoredNodeError(self.indexfile, node, text)
1891 raise
1901 raise
1892
1902
1893 def _enforceinlinesize(self, tr, fp=None):
1903 def _enforceinlinesize(self, tr, fp=None):
1894 """Check if the revlog is too big for inline and convert if so.
1904 """Check if the revlog is too big for inline and convert if so.
1895
1905
1896 This should be called after revisions are added to the revlog. If the
1906 This should be called after revisions are added to the revlog. If the
1897 revlog has grown too large to be an inline revlog, it will convert it
1907 revlog has grown too large to be an inline revlog, it will convert it
1898 to use multiple index and data files.
1908 to use multiple index and data files.
1899 """
1909 """
1900 tiprev = len(self) - 1
1910 tiprev = len(self) - 1
1901 if (
1911 if (
1902 not self._inline
1912 not self._inline
1903 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1913 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1904 ):
1914 ):
1905 return
1915 return
1906
1916
1907 trinfo = tr.find(self.indexfile)
1917 trinfo = tr.find(self.indexfile)
1908 if trinfo is None:
1918 if trinfo is None:
1909 raise error.RevlogError(
1919 raise error.RevlogError(
1910 _(b"%s not found in the transaction") % self.indexfile
1920 _(b"%s not found in the transaction") % self.indexfile
1911 )
1921 )
1912
1922
1913 trindex = trinfo[2]
1923 trindex = trinfo[2]
1914 if trindex is not None:
1924 if trindex is not None:
1915 dataoff = self.start(trindex)
1925 dataoff = self.start(trindex)
1916 else:
1926 else:
1917 # revlog was stripped at start of transaction, use all leftover data
1927 # revlog was stripped at start of transaction, use all leftover data
1918 trindex = len(self) - 1
1928 trindex = len(self) - 1
1919 dataoff = self.end(tiprev)
1929 dataoff = self.end(tiprev)
1920
1930
1921 tr.add(self.datafile, dataoff)
1931 tr.add(self.datafile, dataoff)
1922
1932
1923 if fp:
1933 if fp:
1924 fp.flush()
1934 fp.flush()
1925 fp.close()
1935 fp.close()
1926 # We can't use the cached file handle after close(). So prevent
1936 # We can't use the cached file handle after close(). So prevent
1927 # its usage.
1937 # its usage.
1928 self._writinghandles = None
1938 self._writinghandles = None
1929
1939
1930 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1940 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1931 for r in self:
1941 for r in self:
1932 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1942 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1933
1943
1934 with self._indexfp(b'w') as fp:
1944 with self._indexfp(b'w') as fp:
1935 self.version &= ~FLAG_INLINE_DATA
1945 self.version &= ~FLAG_INLINE_DATA
1936 self._inline = False
1946 self._inline = False
1937 io = self._io
1947 io = self._io
1938 for i in self:
1948 for i in self:
1939 e = io.packentry(self.index[i], self.node, self.version, i)
1949 e = io.packentry(self.index[i], self.node, self.version, i)
1940 fp.write(e)
1950 fp.write(e)
1941
1951
1942 # the temp file replace the real index when we exit the context
1952 # the temp file replace the real index when we exit the context
1943 # manager
1953 # manager
1944
1954
1945 tr.replace(self.indexfile, trindex * self._io.size)
1955 tr.replace(self.indexfile, trindex * self._io.size)
1946 self._chunkclear()
1956 self._chunkclear()
1947
1957
1948 def _nodeduplicatecallback(self, transaction, node):
1958 def _nodeduplicatecallback(self, transaction, node):
1949 """called when trying to add a node already stored.
1959 """called when trying to add a node already stored.
1950 """
1960 """
1951
1961
1952 def addrevision(
1962 def addrevision(
1953 self,
1963 self,
1954 text,
1964 text,
1955 transaction,
1965 transaction,
1956 link,
1966 link,
1957 p1,
1967 p1,
1958 p2,
1968 p2,
1959 cachedelta=None,
1969 cachedelta=None,
1960 node=None,
1970 node=None,
1961 flags=REVIDX_DEFAULT_FLAGS,
1971 flags=REVIDX_DEFAULT_FLAGS,
1962 deltacomputer=None,
1972 deltacomputer=None,
1963 sidedata=None,
1973 sidedata=None,
1964 ):
1974 ):
1965 """add a revision to the log
1975 """add a revision to the log
1966
1976
1967 text - the revision data to add
1977 text - the revision data to add
1968 transaction - the transaction object used for rollback
1978 transaction - the transaction object used for rollback
1969 link - the linkrev data to add
1979 link - the linkrev data to add
1970 p1, p2 - the parent nodeids of the revision
1980 p1, p2 - the parent nodeids of the revision
1971 cachedelta - an optional precomputed delta
1981 cachedelta - an optional precomputed delta
1972 node - nodeid of revision; typically node is not specified, and it is
1982 node - nodeid of revision; typically node is not specified, and it is
1973 computed by default as hash(text, p1, p2), however subclasses might
1983 computed by default as hash(text, p1, p2), however subclasses might
1974 use different hashing method (and override checkhash() in such case)
1984 use different hashing method (and override checkhash() in such case)
1975 flags - the known flags to set on the revision
1985 flags - the known flags to set on the revision
1976 deltacomputer - an optional deltacomputer instance shared between
1986 deltacomputer - an optional deltacomputer instance shared between
1977 multiple calls
1987 multiple calls
1978 """
1988 """
1979 if link == nullrev:
1989 if link == nullrev:
1980 raise error.RevlogError(
1990 raise error.RevlogError(
1981 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1991 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1982 )
1992 )
1983
1993
1984 if sidedata is None:
1994 if sidedata is None:
1985 sidedata = {}
1995 sidedata = {}
1986 flags = flags & ~REVIDX_SIDEDATA
1996 flags = flags & ~REVIDX_SIDEDATA
1987 elif not self.hassidedata:
1997 elif not self.hassidedata:
1988 raise error.ProgrammingError(
1998 raise error.ProgrammingError(
1989 _(b"trying to add sidedata to a revlog who don't support them")
1999 _(b"trying to add sidedata to a revlog who don't support them")
1990 )
2000 )
1991 else:
2001 else:
1992 flags |= REVIDX_SIDEDATA
2002 flags |= REVIDX_SIDEDATA
1993
2003
1994 if flags:
2004 if flags:
1995 node = node or self.hash(text, p1, p2)
2005 node = node or self.hash(text, p1, p2)
1996
2006
1997 rawtext, validatehash = flagutil.processflagswrite(
2007 rawtext, validatehash = flagutil.processflagswrite(
1998 self, text, flags, sidedata=sidedata
2008 self, text, flags, sidedata=sidedata
1999 )
2009 )
2000
2010
2001 # If the flag processor modifies the revision data, ignore any provided
2011 # If the flag processor modifies the revision data, ignore any provided
2002 # cachedelta.
2012 # cachedelta.
2003 if rawtext != text:
2013 if rawtext != text:
2004 cachedelta = None
2014 cachedelta = None
2005
2015
2006 if len(rawtext) > _maxentrysize:
2016 if len(rawtext) > _maxentrysize:
2007 raise error.RevlogError(
2017 raise error.RevlogError(
2008 _(
2018 _(
2009 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2010 )
2020 )
2011 % (self.indexfile, len(rawtext))
2021 % (self.indexfile, len(rawtext))
2012 )
2022 )
2013
2023
2014 node = node or self.hash(rawtext, p1, p2)
2024 node = node or self.hash(rawtext, p1, p2)
2015 if self.index.has_node(node):
2025 if self.index.has_node(node):
2016 return node
2026 return node
2017
2027
2018 if validatehash:
2028 if validatehash:
2019 self.checkhash(rawtext, node, p1=p1, p2=p2)
2029 self.checkhash(rawtext, node, p1=p1, p2=p2)
2020
2030
2021 return self.addrawrevision(
2031 return self.addrawrevision(
2022 rawtext,
2032 rawtext,
2023 transaction,
2033 transaction,
2024 link,
2034 link,
2025 p1,
2035 p1,
2026 p2,
2036 p2,
2027 node,
2037 node,
2028 flags,
2038 flags,
2029 cachedelta=cachedelta,
2039 cachedelta=cachedelta,
2030 deltacomputer=deltacomputer,
2040 deltacomputer=deltacomputer,
2031 )
2041 )
2032
2042
2033 def addrawrevision(
2043 def addrawrevision(
2034 self,
2044 self,
2035 rawtext,
2045 rawtext,
2036 transaction,
2046 transaction,
2037 link,
2047 link,
2038 p1,
2048 p1,
2039 p2,
2049 p2,
2040 node,
2050 node,
2041 flags,
2051 flags,
2042 cachedelta=None,
2052 cachedelta=None,
2043 deltacomputer=None,
2053 deltacomputer=None,
2044 ):
2054 ):
2045 """add a raw revision with known flags, node and parents
2055 """add a raw revision with known flags, node and parents
2046 useful when reusing a revision not stored in this revlog (ex: received
2056 useful when reusing a revision not stored in this revlog (ex: received
2047 over wire, or read from an external bundle).
2057 over wire, or read from an external bundle).
2048 """
2058 """
2049 dfh = None
2059 dfh = None
2050 if not self._inline:
2060 if not self._inline:
2051 dfh = self._datafp(b"a+")
2061 dfh = self._datafp(b"a+")
2052 ifh = self._indexfp(b"a+")
2062 ifh = self._indexfp(b"a+")
2053 try:
2063 try:
2054 return self._addrevision(
2064 return self._addrevision(
2055 node,
2065 node,
2056 rawtext,
2066 rawtext,
2057 transaction,
2067 transaction,
2058 link,
2068 link,
2059 p1,
2069 p1,
2060 p2,
2070 p2,
2061 flags,
2071 flags,
2062 cachedelta,
2072 cachedelta,
2063 ifh,
2073 ifh,
2064 dfh,
2074 dfh,
2065 deltacomputer=deltacomputer,
2075 deltacomputer=deltacomputer,
2066 )
2076 )
2067 finally:
2077 finally:
2068 if dfh:
2078 if dfh:
2069 dfh.close()
2079 dfh.close()
2070 ifh.close()
2080 ifh.close()
2071
2081
2072 def compress(self, data):
2082 def compress(self, data):
2073 """Generate a possibly-compressed representation of data."""
2083 """Generate a possibly-compressed representation of data."""
2074 if not data:
2084 if not data:
2075 return b'', data
2085 return b'', data
2076
2086
2077 compressed = self._compressor.compress(data)
2087 compressed = self._compressor.compress(data)
2078
2088
2079 if compressed:
2089 if compressed:
2080 # The revlog compressor added the header in the returned data.
2090 # The revlog compressor added the header in the returned data.
2081 return b'', compressed
2091 return b'', compressed
2082
2092
2083 if data[0:1] == b'\0':
2093 if data[0:1] == b'\0':
2084 return b'', data
2094 return b'', data
2085 return b'u', data
2095 return b'u', data
2086
2096
2087 def decompress(self, data):
2097 def decompress(self, data):
2088 """Decompress a revlog chunk.
2098 """Decompress a revlog chunk.
2089
2099
2090 The chunk is expected to begin with a header identifying the
2100 The chunk is expected to begin with a header identifying the
2091 format type so it can be routed to an appropriate decompressor.
2101 format type so it can be routed to an appropriate decompressor.
2092 """
2102 """
2093 if not data:
2103 if not data:
2094 return data
2104 return data
2095
2105
2096 # Revlogs are read much more frequently than they are written and many
2106 # Revlogs are read much more frequently than they are written and many
2097 # chunks only take microseconds to decompress, so performance is
2107 # chunks only take microseconds to decompress, so performance is
2098 # important here.
2108 # important here.
2099 #
2109 #
2100 # We can make a few assumptions about revlogs:
2110 # We can make a few assumptions about revlogs:
2101 #
2111 #
2102 # 1) the majority of chunks will be compressed (as opposed to inline
2112 # 1) the majority of chunks will be compressed (as opposed to inline
2103 # raw data).
2113 # raw data).
2104 # 2) decompressing *any* data will likely by at least 10x slower than
2114 # 2) decompressing *any* data will likely by at least 10x slower than
2105 # returning raw inline data.
2115 # returning raw inline data.
2106 # 3) we want to prioritize common and officially supported compression
2116 # 3) we want to prioritize common and officially supported compression
2107 # engines
2117 # engines
2108 #
2118 #
2109 # It follows that we want to optimize for "decompress compressed data
2119 # It follows that we want to optimize for "decompress compressed data
2110 # when encoded with common and officially supported compression engines"
2120 # when encoded with common and officially supported compression engines"
2111 # case over "raw data" and "data encoded by less common or non-official
2121 # case over "raw data" and "data encoded by less common or non-official
2112 # compression engines." That is why we have the inline lookup first
2122 # compression engines." That is why we have the inline lookup first
2113 # followed by the compengines lookup.
2123 # followed by the compengines lookup.
2114 #
2124 #
2115 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2125 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2116 # compressed chunks. And this matters for changelog and manifest reads.
2126 # compressed chunks. And this matters for changelog and manifest reads.
2117 t = data[0:1]
2127 t = data[0:1]
2118
2128
2119 if t == b'x':
2129 if t == b'x':
2120 try:
2130 try:
2121 return _zlibdecompress(data)
2131 return _zlibdecompress(data)
2122 except zlib.error as e:
2132 except zlib.error as e:
2123 raise error.RevlogError(
2133 raise error.RevlogError(
2124 _(b'revlog decompress error: %s')
2134 _(b'revlog decompress error: %s')
2125 % stringutil.forcebytestr(e)
2135 % stringutil.forcebytestr(e)
2126 )
2136 )
2127 # '\0' is more common than 'u' so it goes first.
2137 # '\0' is more common than 'u' so it goes first.
2128 elif t == b'\0':
2138 elif t == b'\0':
2129 return data
2139 return data
2130 elif t == b'u':
2140 elif t == b'u':
2131 return util.buffer(data, 1)
2141 return util.buffer(data, 1)
2132
2142
2133 try:
2143 try:
2134 compressor = self._decompressors[t]
2144 compressor = self._decompressors[t]
2135 except KeyError:
2145 except KeyError:
2136 try:
2146 try:
2137 engine = util.compengines.forrevlogheader(t)
2147 engine = util.compengines.forrevlogheader(t)
2138 compressor = engine.revlogcompressor(self._compengineopts)
2148 compressor = engine.revlogcompressor(self._compengineopts)
2139 self._decompressors[t] = compressor
2149 self._decompressors[t] = compressor
2140 except KeyError:
2150 except KeyError:
2141 raise error.RevlogError(_(b'unknown compression type %r') % t)
2151 raise error.RevlogError(_(b'unknown compression type %r') % t)
2142
2152
2143 return compressor.decompress(data)
2153 return compressor.decompress(data)
2144
2154
2145 def _addrevision(
2155 def _addrevision(
2146 self,
2156 self,
2147 node,
2157 node,
2148 rawtext,
2158 rawtext,
2149 transaction,
2159 transaction,
2150 link,
2160 link,
2151 p1,
2161 p1,
2152 p2,
2162 p2,
2153 flags,
2163 flags,
2154 cachedelta,
2164 cachedelta,
2155 ifh,
2165 ifh,
2156 dfh,
2166 dfh,
2157 alwayscache=False,
2167 alwayscache=False,
2158 deltacomputer=None,
2168 deltacomputer=None,
2159 ):
2169 ):
2160 """internal function to add revisions to the log
2170 """internal function to add revisions to the log
2161
2171
2162 see addrevision for argument descriptions.
2172 see addrevision for argument descriptions.
2163
2173
2164 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2174 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2165
2175
2166 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2176 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2167 be used.
2177 be used.
2168
2178
2169 invariants:
2179 invariants:
2170 - rawtext is optional (can be None); if not set, cachedelta must be set.
2180 - rawtext is optional (can be None); if not set, cachedelta must be set.
2171 if both are set, they must correspond to each other.
2181 if both are set, they must correspond to each other.
2172 """
2182 """
2173 if node == nullid:
2183 if node == nullid:
2174 raise error.RevlogError(
2184 raise error.RevlogError(
2175 _(b"%s: attempt to add null revision") % self.indexfile
2185 _(b"%s: attempt to add null revision") % self.indexfile
2176 )
2186 )
2177 if node == wdirid or node in wdirfilenodeids:
2187 if node == wdirid or node in wdirfilenodeids:
2178 raise error.RevlogError(
2188 raise error.RevlogError(
2179 _(b"%s: attempt to add wdir revision") % self.indexfile
2189 _(b"%s: attempt to add wdir revision") % self.indexfile
2180 )
2190 )
2181
2191
2182 if self._inline:
2192 if self._inline:
2183 fh = ifh
2193 fh = ifh
2184 else:
2194 else:
2185 fh = dfh
2195 fh = dfh
2186
2196
2187 btext = [rawtext]
2197 btext = [rawtext]
2188
2198
2189 curr = len(self)
2199 curr = len(self)
2190 prev = curr - 1
2200 prev = curr - 1
2191 offset = self.end(prev)
2201 offset = self.end(prev)
2192 p1r, p2r = self.rev(p1), self.rev(p2)
2202 p1r, p2r = self.rev(p1), self.rev(p2)
2193
2203
2194 # full versions are inserted when the needed deltas
2204 # full versions are inserted when the needed deltas
2195 # become comparable to the uncompressed text
2205 # become comparable to the uncompressed text
2196 if rawtext is None:
2206 if rawtext is None:
2197 # need rawtext size, before changed by flag processors, which is
2207 # need rawtext size, before changed by flag processors, which is
2198 # the non-raw size. use revlog explicitly to avoid filelog's extra
2208 # the non-raw size. use revlog explicitly to avoid filelog's extra
2199 # logic that might remove metadata size.
2209 # logic that might remove metadata size.
2200 textlen = mdiff.patchedsize(
2210 textlen = mdiff.patchedsize(
2201 revlog.size(self, cachedelta[0]), cachedelta[1]
2211 revlog.size(self, cachedelta[0]), cachedelta[1]
2202 )
2212 )
2203 else:
2213 else:
2204 textlen = len(rawtext)
2214 textlen = len(rawtext)
2205
2215
2206 if deltacomputer is None:
2216 if deltacomputer is None:
2207 deltacomputer = deltautil.deltacomputer(self)
2217 deltacomputer = deltautil.deltacomputer(self)
2208
2218
2209 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2219 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2210
2220
2211 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2221 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2212
2222
2213 e = (
2223 e = (
2214 offset_type(offset, flags),
2224 offset_type(offset, flags),
2215 deltainfo.deltalen,
2225 deltainfo.deltalen,
2216 textlen,
2226 textlen,
2217 deltainfo.base,
2227 deltainfo.base,
2218 link,
2228 link,
2219 p1r,
2229 p1r,
2220 p2r,
2230 p2r,
2221 node,
2231 node,
2222 )
2232 )
2223 self.index.append(e)
2233 self.index.append(e)
2224
2234
2225 entry = self._io.packentry(e, self.node, self.version, curr)
2235 entry = self._io.packentry(e, self.node, self.version, curr)
2226 self._writeentry(
2236 self._writeentry(
2227 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2237 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2228 )
2238 )
2229
2239
2230 rawtext = btext[0]
2240 rawtext = btext[0]
2231
2241
2232 if alwayscache and rawtext is None:
2242 if alwayscache and rawtext is None:
2233 rawtext = deltacomputer.buildtext(revinfo, fh)
2243 rawtext = deltacomputer.buildtext(revinfo, fh)
2234
2244
2235 if type(rawtext) == bytes: # only accept immutable objects
2245 if type(rawtext) == bytes: # only accept immutable objects
2236 self._revisioncache = (node, curr, rawtext)
2246 self._revisioncache = (node, curr, rawtext)
2237 self._chainbasecache[curr] = deltainfo.chainbase
2247 self._chainbasecache[curr] = deltainfo.chainbase
2238 return node
2248 return node
2239
2249
2240 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2250 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2241 # Files opened in a+ mode have inconsistent behavior on various
2251 # Files opened in a+ mode have inconsistent behavior on various
2242 # platforms. Windows requires that a file positioning call be made
2252 # platforms. Windows requires that a file positioning call be made
2243 # when the file handle transitions between reads and writes. See
2253 # when the file handle transitions between reads and writes. See
2244 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2254 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2245 # platforms, Python or the platform itself can be buggy. Some versions
2255 # platforms, Python or the platform itself can be buggy. Some versions
2246 # of Solaris have been observed to not append at the end of the file
2256 # of Solaris have been observed to not append at the end of the file
2247 # if the file was seeked to before the end. See issue4943 for more.
2257 # if the file was seeked to before the end. See issue4943 for more.
2248 #
2258 #
2249 # We work around this issue by inserting a seek() before writing.
2259 # We work around this issue by inserting a seek() before writing.
2250 # Note: This is likely not necessary on Python 3. However, because
2260 # Note: This is likely not necessary on Python 3. However, because
2251 # the file handle is reused for reads and may be seeked there, we need
2261 # the file handle is reused for reads and may be seeked there, we need
2252 # to be careful before changing this.
2262 # to be careful before changing this.
2253 ifh.seek(0, os.SEEK_END)
2263 ifh.seek(0, os.SEEK_END)
2254 if dfh:
2264 if dfh:
2255 dfh.seek(0, os.SEEK_END)
2265 dfh.seek(0, os.SEEK_END)
2256
2266
2257 curr = len(self) - 1
2267 curr = len(self) - 1
2258 if not self._inline:
2268 if not self._inline:
2259 transaction.add(self.datafile, offset)
2269 transaction.add(self.datafile, offset)
2260 transaction.add(self.indexfile, curr * len(entry))
2270 transaction.add(self.indexfile, curr * len(entry))
2261 if data[0]:
2271 if data[0]:
2262 dfh.write(data[0])
2272 dfh.write(data[0])
2263 dfh.write(data[1])
2273 dfh.write(data[1])
2264 ifh.write(entry)
2274 ifh.write(entry)
2265 else:
2275 else:
2266 offset += curr * self._io.size
2276 offset += curr * self._io.size
2267 transaction.add(self.indexfile, offset, curr)
2277 transaction.add(self.indexfile, offset, curr)
2268 ifh.write(entry)
2278 ifh.write(entry)
2269 ifh.write(data[0])
2279 ifh.write(data[0])
2270 ifh.write(data[1])
2280 ifh.write(data[1])
2271 self._enforceinlinesize(transaction, ifh)
2281 self._enforceinlinesize(transaction, ifh)
2272
2282
2273 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2283 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2274 """
2284 """
2275 add a delta group
2285 add a delta group
2276
2286
2277 given a set of deltas, add them to the revision log. the
2287 given a set of deltas, add them to the revision log. the
2278 first delta is against its parent, which should be in our
2288 first delta is against its parent, which should be in our
2279 log, the rest are against the previous delta.
2289 log, the rest are against the previous delta.
2280
2290
2281 If ``addrevisioncb`` is defined, it will be called with arguments of
2291 If ``addrevisioncb`` is defined, it will be called with arguments of
2282 this revlog and the node that was added.
2292 this revlog and the node that was added.
2283 """
2293 """
2284
2294
2285 if self._writinghandles:
2295 if self._writinghandles:
2286 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2296 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2287
2297
2288 nodes = []
2298 nodes = []
2289
2299
2290 r = len(self)
2300 r = len(self)
2291 end = 0
2301 end = 0
2292 if r:
2302 if r:
2293 end = self.end(r - 1)
2303 end = self.end(r - 1)
2294 ifh = self._indexfp(b"a+")
2304 ifh = self._indexfp(b"a+")
2295 isize = r * self._io.size
2305 isize = r * self._io.size
2296 if self._inline:
2306 if self._inline:
2297 transaction.add(self.indexfile, end + isize, r)
2307 transaction.add(self.indexfile, end + isize, r)
2298 dfh = None
2308 dfh = None
2299 else:
2309 else:
2300 transaction.add(self.indexfile, isize, r)
2310 transaction.add(self.indexfile, isize, r)
2301 transaction.add(self.datafile, end)
2311 transaction.add(self.datafile, end)
2302 dfh = self._datafp(b"a+")
2312 dfh = self._datafp(b"a+")
2303
2313
2304 def flush():
2314 def flush():
2305 if dfh:
2315 if dfh:
2306 dfh.flush()
2316 dfh.flush()
2307 ifh.flush()
2317 ifh.flush()
2308
2318
2309 self._writinghandles = (ifh, dfh)
2319 self._writinghandles = (ifh, dfh)
2310
2320
2311 try:
2321 try:
2312 deltacomputer = deltautil.deltacomputer(self)
2322 deltacomputer = deltautil.deltacomputer(self)
2313 # loop through our set of deltas
2323 # loop through our set of deltas
2314 for data in deltas:
2324 for data in deltas:
2315 node, p1, p2, linknode, deltabase, delta, flags = data
2325 node, p1, p2, linknode, deltabase, delta, flags = data
2316 link = linkmapper(linknode)
2326 link = linkmapper(linknode)
2317 flags = flags or REVIDX_DEFAULT_FLAGS
2327 flags = flags or REVIDX_DEFAULT_FLAGS
2318
2328
2319 nodes.append(node)
2329 nodes.append(node)
2320
2330
2321 if self.index.has_node(node):
2331 if self.index.has_node(node):
2322 self._nodeduplicatecallback(transaction, node)
2332 self._nodeduplicatecallback(transaction, node)
2323 # this can happen if two branches make the same change
2333 # this can happen if two branches make the same change
2324 continue
2334 continue
2325
2335
2326 for p in (p1, p2):
2336 for p in (p1, p2):
2327 if not self.index.has_node(p):
2337 if not self.index.has_node(p):
2328 raise error.LookupError(
2338 raise error.LookupError(
2329 p, self.indexfile, _(b'unknown parent')
2339 p, self.indexfile, _(b'unknown parent')
2330 )
2340 )
2331
2341
2332 if not self.index.has_node(deltabase):
2342 if not self.index.has_node(deltabase):
2333 raise error.LookupError(
2343 raise error.LookupError(
2334 deltabase, self.indexfile, _(b'unknown delta base')
2344 deltabase, self.indexfile, _(b'unknown delta base')
2335 )
2345 )
2336
2346
2337 baserev = self.rev(deltabase)
2347 baserev = self.rev(deltabase)
2338
2348
2339 if baserev != nullrev and self.iscensored(baserev):
2349 if baserev != nullrev and self.iscensored(baserev):
2340 # if base is censored, delta must be full replacement in a
2350 # if base is censored, delta must be full replacement in a
2341 # single patch operation
2351 # single patch operation
2342 hlen = struct.calcsize(b">lll")
2352 hlen = struct.calcsize(b">lll")
2343 oldlen = self.rawsize(baserev)
2353 oldlen = self.rawsize(baserev)
2344 newlen = len(delta) - hlen
2354 newlen = len(delta) - hlen
2345 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2355 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2346 raise error.CensoredBaseError(
2356 raise error.CensoredBaseError(
2347 self.indexfile, self.node(baserev)
2357 self.indexfile, self.node(baserev)
2348 )
2358 )
2349
2359
2350 if not flags and self._peek_iscensored(baserev, delta, flush):
2360 if not flags and self._peek_iscensored(baserev, delta, flush):
2351 flags |= REVIDX_ISCENSORED
2361 flags |= REVIDX_ISCENSORED
2352
2362
2353 # We assume consumers of addrevisioncb will want to retrieve
2363 # We assume consumers of addrevisioncb will want to retrieve
2354 # the added revision, which will require a call to
2364 # the added revision, which will require a call to
2355 # revision(). revision() will fast path if there is a cache
2365 # revision(). revision() will fast path if there is a cache
2356 # hit. So, we tell _addrevision() to always cache in this case.
2366 # hit. So, we tell _addrevision() to always cache in this case.
2357 # We're only using addgroup() in the context of changegroup
2367 # We're only using addgroup() in the context of changegroup
2358 # generation so the revision data can always be handled as raw
2368 # generation so the revision data can always be handled as raw
2359 # by the flagprocessor.
2369 # by the flagprocessor.
2360 self._addrevision(
2370 self._addrevision(
2361 node,
2371 node,
2362 None,
2372 None,
2363 transaction,
2373 transaction,
2364 link,
2374 link,
2365 p1,
2375 p1,
2366 p2,
2376 p2,
2367 flags,
2377 flags,
2368 (baserev, delta),
2378 (baserev, delta),
2369 ifh,
2379 ifh,
2370 dfh,
2380 dfh,
2371 alwayscache=bool(addrevisioncb),
2381 alwayscache=bool(addrevisioncb),
2372 deltacomputer=deltacomputer,
2382 deltacomputer=deltacomputer,
2373 )
2383 )
2374
2384
2375 if addrevisioncb:
2385 if addrevisioncb:
2376 addrevisioncb(self, node)
2386 addrevisioncb(self, node)
2377
2387
2378 if not dfh and not self._inline:
2388 if not dfh and not self._inline:
2379 # addrevision switched from inline to conventional
2389 # addrevision switched from inline to conventional
2380 # reopen the index
2390 # reopen the index
2381 ifh.close()
2391 ifh.close()
2382 dfh = self._datafp(b"a+")
2392 dfh = self._datafp(b"a+")
2383 ifh = self._indexfp(b"a+")
2393 ifh = self._indexfp(b"a+")
2384 self._writinghandles = (ifh, dfh)
2394 self._writinghandles = (ifh, dfh)
2385 finally:
2395 finally:
2386 self._writinghandles = None
2396 self._writinghandles = None
2387
2397
2388 if dfh:
2398 if dfh:
2389 dfh.close()
2399 dfh.close()
2390 ifh.close()
2400 ifh.close()
2391
2401
2392 return nodes
2402 return nodes
2393
2403
2394 def iscensored(self, rev):
2404 def iscensored(self, rev):
2395 """Check if a file revision is censored."""
2405 """Check if a file revision is censored."""
2396 if not self._censorable:
2406 if not self._censorable:
2397 return False
2407 return False
2398
2408
2399 return self.flags(rev) & REVIDX_ISCENSORED
2409 return self.flags(rev) & REVIDX_ISCENSORED
2400
2410
2401 def _peek_iscensored(self, baserev, delta, flush):
2411 def _peek_iscensored(self, baserev, delta, flush):
2402 """Quickly check if a delta produces a censored revision."""
2412 """Quickly check if a delta produces a censored revision."""
2403 if not self._censorable:
2413 if not self._censorable:
2404 return False
2414 return False
2405
2415
2406 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2416 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2407
2417
2408 def getstrippoint(self, minlink):
2418 def getstrippoint(self, minlink):
2409 """find the minimum rev that must be stripped to strip the linkrev
2419 """find the minimum rev that must be stripped to strip the linkrev
2410
2420
2411 Returns a tuple containing the minimum rev and a set of all revs that
2421 Returns a tuple containing the minimum rev and a set of all revs that
2412 have linkrevs that will be broken by this strip.
2422 have linkrevs that will be broken by this strip.
2413 """
2423 """
2414 return storageutil.resolvestripinfo(
2424 return storageutil.resolvestripinfo(
2415 minlink,
2425 minlink,
2416 len(self) - 1,
2426 len(self) - 1,
2417 self.headrevs(),
2427 self.headrevs(),
2418 self.linkrev,
2428 self.linkrev,
2419 self.parentrevs,
2429 self.parentrevs,
2420 )
2430 )
2421
2431
2422 def strip(self, minlink, transaction):
2432 def strip(self, minlink, transaction):
2423 """truncate the revlog on the first revision with a linkrev >= minlink
2433 """truncate the revlog on the first revision with a linkrev >= minlink
2424
2434
2425 This function is called when we're stripping revision minlink and
2435 This function is called when we're stripping revision minlink and
2426 its descendants from the repository.
2436 its descendants from the repository.
2427
2437
2428 We have to remove all revisions with linkrev >= minlink, because
2438 We have to remove all revisions with linkrev >= minlink, because
2429 the equivalent changelog revisions will be renumbered after the
2439 the equivalent changelog revisions will be renumbered after the
2430 strip.
2440 strip.
2431
2441
2432 So we truncate the revlog on the first of these revisions, and
2442 So we truncate the revlog on the first of these revisions, and
2433 trust that the caller has saved the revisions that shouldn't be
2443 trust that the caller has saved the revisions that shouldn't be
2434 removed and that it'll re-add them after this truncation.
2444 removed and that it'll re-add them after this truncation.
2435 """
2445 """
2436 if len(self) == 0:
2446 if len(self) == 0:
2437 return
2447 return
2438
2448
2439 rev, _ = self.getstrippoint(minlink)
2449 rev, _ = self.getstrippoint(minlink)
2440 if rev == len(self):
2450 if rev == len(self):
2441 return
2451 return
2442
2452
2443 # first truncate the files on disk
2453 # first truncate the files on disk
2444 end = self.start(rev)
2454 end = self.start(rev)
2445 if not self._inline:
2455 if not self._inline:
2446 transaction.add(self.datafile, end)
2456 transaction.add(self.datafile, end)
2447 end = rev * self._io.size
2457 end = rev * self._io.size
2448 else:
2458 else:
2449 end += rev * self._io.size
2459 end += rev * self._io.size
2450
2460
2451 transaction.add(self.indexfile, end)
2461 transaction.add(self.indexfile, end)
2452
2462
2453 # then reset internal state in memory to forget those revisions
2463 # then reset internal state in memory to forget those revisions
2454 self._revisioncache = None
2464 self._revisioncache = None
2455 self._chaininfocache = {}
2465 self._chaininfocache = {}
2456 self._chunkclear()
2466 self._chunkclear()
2457
2467
2458 del self.index[rev:-1]
2468 del self.index[rev:-1]
2459
2469
2460 def checksize(self):
2470 def checksize(self):
2461 """Check size of index and data files
2471 """Check size of index and data files
2462
2472
2463 return a (dd, di) tuple.
2473 return a (dd, di) tuple.
2464 - dd: extra bytes for the "data" file
2474 - dd: extra bytes for the "data" file
2465 - di: extra bytes for the "index" file
2475 - di: extra bytes for the "index" file
2466
2476
2467 A healthy revlog will return (0, 0).
2477 A healthy revlog will return (0, 0).
2468 """
2478 """
2469 expected = 0
2479 expected = 0
2470 if len(self):
2480 if len(self):
2471 expected = max(0, self.end(len(self) - 1))
2481 expected = max(0, self.end(len(self) - 1))
2472
2482
2473 try:
2483 try:
2474 with self._datafp() as f:
2484 with self._datafp() as f:
2475 f.seek(0, io.SEEK_END)
2485 f.seek(0, io.SEEK_END)
2476 actual = f.tell()
2486 actual = f.tell()
2477 dd = actual - expected
2487 dd = actual - expected
2478 except IOError as inst:
2488 except IOError as inst:
2479 if inst.errno != errno.ENOENT:
2489 if inst.errno != errno.ENOENT:
2480 raise
2490 raise
2481 dd = 0
2491 dd = 0
2482
2492
2483 try:
2493 try:
2484 f = self.opener(self.indexfile)
2494 f = self.opener(self.indexfile)
2485 f.seek(0, io.SEEK_END)
2495 f.seek(0, io.SEEK_END)
2486 actual = f.tell()
2496 actual = f.tell()
2487 f.close()
2497 f.close()
2488 s = self._io.size
2498 s = self._io.size
2489 i = max(0, actual // s)
2499 i = max(0, actual // s)
2490 di = actual - (i * s)
2500 di = actual - (i * s)
2491 if self._inline:
2501 if self._inline:
2492 databytes = 0
2502 databytes = 0
2493 for r in self:
2503 for r in self:
2494 databytes += max(0, self.length(r))
2504 databytes += max(0, self.length(r))
2495 dd = 0
2505 dd = 0
2496 di = actual - len(self) * s - databytes
2506 di = actual - len(self) * s - databytes
2497 except IOError as inst:
2507 except IOError as inst:
2498 if inst.errno != errno.ENOENT:
2508 if inst.errno != errno.ENOENT:
2499 raise
2509 raise
2500 di = 0
2510 di = 0
2501
2511
2502 return (dd, di)
2512 return (dd, di)
2503
2513
2504 def files(self):
2514 def files(self):
2505 res = [self.indexfile]
2515 res = [self.indexfile]
2506 if not self._inline:
2516 if not self._inline:
2507 res.append(self.datafile)
2517 res.append(self.datafile)
2508 return res
2518 return res
2509
2519
2510 def emitrevisions(
2520 def emitrevisions(
2511 self,
2521 self,
2512 nodes,
2522 nodes,
2513 nodesorder=None,
2523 nodesorder=None,
2514 revisiondata=False,
2524 revisiondata=False,
2515 assumehaveparentrevisions=False,
2525 assumehaveparentrevisions=False,
2516 deltamode=repository.CG_DELTAMODE_STD,
2526 deltamode=repository.CG_DELTAMODE_STD,
2517 ):
2527 ):
2518 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2528 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2519 raise error.ProgrammingError(
2529 raise error.ProgrammingError(
2520 b'unhandled value for nodesorder: %s' % nodesorder
2530 b'unhandled value for nodesorder: %s' % nodesorder
2521 )
2531 )
2522
2532
2523 if nodesorder is None and not self._generaldelta:
2533 if nodesorder is None and not self._generaldelta:
2524 nodesorder = b'storage'
2534 nodesorder = b'storage'
2525
2535
2526 if (
2536 if (
2527 not self._storedeltachains
2537 not self._storedeltachains
2528 and deltamode != repository.CG_DELTAMODE_PREV
2538 and deltamode != repository.CG_DELTAMODE_PREV
2529 ):
2539 ):
2530 deltamode = repository.CG_DELTAMODE_FULL
2540 deltamode = repository.CG_DELTAMODE_FULL
2531
2541
2532 return storageutil.emitrevisions(
2542 return storageutil.emitrevisions(
2533 self,
2543 self,
2534 nodes,
2544 nodes,
2535 nodesorder,
2545 nodesorder,
2536 revlogrevisiondelta,
2546 revlogrevisiondelta,
2537 deltaparentfn=self.deltaparent,
2547 deltaparentfn=self.deltaparent,
2538 candeltafn=self.candelta,
2548 candeltafn=self.candelta,
2539 rawsizefn=self.rawsize,
2549 rawsizefn=self.rawsize,
2540 revdifffn=self.revdiff,
2550 revdifffn=self.revdiff,
2541 flagsfn=self.flags,
2551 flagsfn=self.flags,
2542 deltamode=deltamode,
2552 deltamode=deltamode,
2543 revisiondata=revisiondata,
2553 revisiondata=revisiondata,
2544 assumehaveparentrevisions=assumehaveparentrevisions,
2554 assumehaveparentrevisions=assumehaveparentrevisions,
2545 )
2555 )
2546
2556
2547 DELTAREUSEALWAYS = b'always'
2557 DELTAREUSEALWAYS = b'always'
2548 DELTAREUSESAMEREVS = b'samerevs'
2558 DELTAREUSESAMEREVS = b'samerevs'
2549 DELTAREUSENEVER = b'never'
2559 DELTAREUSENEVER = b'never'
2550
2560
2551 DELTAREUSEFULLADD = b'fulladd'
2561 DELTAREUSEFULLADD = b'fulladd'
2552
2562
2553 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2563 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2554
2564
2555 def clone(
2565 def clone(
2556 self,
2566 self,
2557 tr,
2567 tr,
2558 destrevlog,
2568 destrevlog,
2559 addrevisioncb=None,
2569 addrevisioncb=None,
2560 deltareuse=DELTAREUSESAMEREVS,
2570 deltareuse=DELTAREUSESAMEREVS,
2561 forcedeltabothparents=None,
2571 forcedeltabothparents=None,
2562 sidedatacompanion=None,
2572 sidedatacompanion=None,
2563 ):
2573 ):
2564 """Copy this revlog to another, possibly with format changes.
2574 """Copy this revlog to another, possibly with format changes.
2565
2575
2566 The destination revlog will contain the same revisions and nodes.
2576 The destination revlog will contain the same revisions and nodes.
2567 However, it may not be bit-for-bit identical due to e.g. delta encoding
2577 However, it may not be bit-for-bit identical due to e.g. delta encoding
2568 differences.
2578 differences.
2569
2579
2570 The ``deltareuse`` argument control how deltas from the existing revlog
2580 The ``deltareuse`` argument control how deltas from the existing revlog
2571 are preserved in the destination revlog. The argument can have the
2581 are preserved in the destination revlog. The argument can have the
2572 following values:
2582 following values:
2573
2583
2574 DELTAREUSEALWAYS
2584 DELTAREUSEALWAYS
2575 Deltas will always be reused (if possible), even if the destination
2585 Deltas will always be reused (if possible), even if the destination
2576 revlog would not select the same revisions for the delta. This is the
2586 revlog would not select the same revisions for the delta. This is the
2577 fastest mode of operation.
2587 fastest mode of operation.
2578 DELTAREUSESAMEREVS
2588 DELTAREUSESAMEREVS
2579 Deltas will be reused if the destination revlog would pick the same
2589 Deltas will be reused if the destination revlog would pick the same
2580 revisions for the delta. This mode strikes a balance between speed
2590 revisions for the delta. This mode strikes a balance between speed
2581 and optimization.
2591 and optimization.
2582 DELTAREUSENEVER
2592 DELTAREUSENEVER
2583 Deltas will never be reused. This is the slowest mode of execution.
2593 Deltas will never be reused. This is the slowest mode of execution.
2584 This mode can be used to recompute deltas (e.g. if the diff/delta
2594 This mode can be used to recompute deltas (e.g. if the diff/delta
2585 algorithm changes).
2595 algorithm changes).
2586 DELTAREUSEFULLADD
2596 DELTAREUSEFULLADD
2587 Revision will be re-added as if their were new content. This is
2597 Revision will be re-added as if their were new content. This is
2588 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2598 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2589 eg: large file detection and handling.
2599 eg: large file detection and handling.
2590
2600
2591 Delta computation can be slow, so the choice of delta reuse policy can
2601 Delta computation can be slow, so the choice of delta reuse policy can
2592 significantly affect run time.
2602 significantly affect run time.
2593
2603
2594 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2604 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2595 two extremes. Deltas will be reused if they are appropriate. But if the
2605 two extremes. Deltas will be reused if they are appropriate. But if the
2596 delta could choose a better revision, it will do so. This means if you
2606 delta could choose a better revision, it will do so. This means if you
2597 are converting a non-generaldelta revlog to a generaldelta revlog,
2607 are converting a non-generaldelta revlog to a generaldelta revlog,
2598 deltas will be recomputed if the delta's parent isn't a parent of the
2608 deltas will be recomputed if the delta's parent isn't a parent of the
2599 revision.
2609 revision.
2600
2610
2601 In addition to the delta policy, the ``forcedeltabothparents``
2611 In addition to the delta policy, the ``forcedeltabothparents``
2602 argument controls whether to force compute deltas against both parents
2612 argument controls whether to force compute deltas against both parents
2603 for merges. By default, the current default is used.
2613 for merges. By default, the current default is used.
2604
2614
2605 If not None, the `sidedatacompanion` is callable that accept two
2615 If not None, the `sidedatacompanion` is callable that accept two
2606 arguments:
2616 arguments:
2607
2617
2608 (srcrevlog, rev)
2618 (srcrevlog, rev)
2609
2619
2610 and return a triplet that control changes to sidedata content from the
2620 and return a triplet that control changes to sidedata content from the
2611 old revision to the new clone result:
2621 old revision to the new clone result:
2612
2622
2613 (dropall, filterout, update)
2623 (dropall, filterout, update)
2614
2624
2615 * if `dropall` is True, all sidedata should be dropped
2625 * if `dropall` is True, all sidedata should be dropped
2616 * `filterout` is a set of sidedata keys that should be dropped
2626 * `filterout` is a set of sidedata keys that should be dropped
2617 * `update` is a mapping of additionnal/new key -> value
2627 * `update` is a mapping of additionnal/new key -> value
2618 """
2628 """
2619 if deltareuse not in self.DELTAREUSEALL:
2629 if deltareuse not in self.DELTAREUSEALL:
2620 raise ValueError(
2630 raise ValueError(
2621 _(b'value for deltareuse invalid: %s') % deltareuse
2631 _(b'value for deltareuse invalid: %s') % deltareuse
2622 )
2632 )
2623
2633
2624 if len(destrevlog):
2634 if len(destrevlog):
2625 raise ValueError(_(b'destination revlog is not empty'))
2635 raise ValueError(_(b'destination revlog is not empty'))
2626
2636
2627 if getattr(self, 'filteredrevs', None):
2637 if getattr(self, 'filteredrevs', None):
2628 raise ValueError(_(b'source revlog has filtered revisions'))
2638 raise ValueError(_(b'source revlog has filtered revisions'))
2629 if getattr(destrevlog, 'filteredrevs', None):
2639 if getattr(destrevlog, 'filteredrevs', None):
2630 raise ValueError(_(b'destination revlog has filtered revisions'))
2640 raise ValueError(_(b'destination revlog has filtered revisions'))
2631
2641
2632 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2642 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2633 # if possible.
2643 # if possible.
2634 oldlazydelta = destrevlog._lazydelta
2644 oldlazydelta = destrevlog._lazydelta
2635 oldlazydeltabase = destrevlog._lazydeltabase
2645 oldlazydeltabase = destrevlog._lazydeltabase
2636 oldamd = destrevlog._deltabothparents
2646 oldamd = destrevlog._deltabothparents
2637
2647
2638 try:
2648 try:
2639 if deltareuse == self.DELTAREUSEALWAYS:
2649 if deltareuse == self.DELTAREUSEALWAYS:
2640 destrevlog._lazydeltabase = True
2650 destrevlog._lazydeltabase = True
2641 destrevlog._lazydelta = True
2651 destrevlog._lazydelta = True
2642 elif deltareuse == self.DELTAREUSESAMEREVS:
2652 elif deltareuse == self.DELTAREUSESAMEREVS:
2643 destrevlog._lazydeltabase = False
2653 destrevlog._lazydeltabase = False
2644 destrevlog._lazydelta = True
2654 destrevlog._lazydelta = True
2645 elif deltareuse == self.DELTAREUSENEVER:
2655 elif deltareuse == self.DELTAREUSENEVER:
2646 destrevlog._lazydeltabase = False
2656 destrevlog._lazydeltabase = False
2647 destrevlog._lazydelta = False
2657 destrevlog._lazydelta = False
2648
2658
2649 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2659 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2650
2660
2651 self._clone(
2661 self._clone(
2652 tr,
2662 tr,
2653 destrevlog,
2663 destrevlog,
2654 addrevisioncb,
2664 addrevisioncb,
2655 deltareuse,
2665 deltareuse,
2656 forcedeltabothparents,
2666 forcedeltabothparents,
2657 sidedatacompanion,
2667 sidedatacompanion,
2658 )
2668 )
2659
2669
2660 finally:
2670 finally:
2661 destrevlog._lazydelta = oldlazydelta
2671 destrevlog._lazydelta = oldlazydelta
2662 destrevlog._lazydeltabase = oldlazydeltabase
2672 destrevlog._lazydeltabase = oldlazydeltabase
2663 destrevlog._deltabothparents = oldamd
2673 destrevlog._deltabothparents = oldamd
2664
2674
2665 def _clone(
2675 def _clone(
2666 self,
2676 self,
2667 tr,
2677 tr,
2668 destrevlog,
2678 destrevlog,
2669 addrevisioncb,
2679 addrevisioncb,
2670 deltareuse,
2680 deltareuse,
2671 forcedeltabothparents,
2681 forcedeltabothparents,
2672 sidedatacompanion,
2682 sidedatacompanion,
2673 ):
2683 ):
2674 """perform the core duty of `revlog.clone` after parameter processing"""
2684 """perform the core duty of `revlog.clone` after parameter processing"""
2675 deltacomputer = deltautil.deltacomputer(destrevlog)
2685 deltacomputer = deltautil.deltacomputer(destrevlog)
2676 index = self.index
2686 index = self.index
2677 for rev in self:
2687 for rev in self:
2678 entry = index[rev]
2688 entry = index[rev]
2679
2689
2680 # Some classes override linkrev to take filtered revs into
2690 # Some classes override linkrev to take filtered revs into
2681 # account. Use raw entry from index.
2691 # account. Use raw entry from index.
2682 flags = entry[0] & 0xFFFF
2692 flags = entry[0] & 0xFFFF
2683 linkrev = entry[4]
2693 linkrev = entry[4]
2684 p1 = index[entry[5]][7]
2694 p1 = index[entry[5]][7]
2685 p2 = index[entry[6]][7]
2695 p2 = index[entry[6]][7]
2686 node = entry[7]
2696 node = entry[7]
2687
2697
2688 sidedataactions = (False, [], {})
2698 sidedataactions = (False, [], {})
2689 if sidedatacompanion is not None:
2699 if sidedatacompanion is not None:
2690 sidedataactions = sidedatacompanion(self, rev)
2700 sidedataactions = sidedatacompanion(self, rev)
2691
2701
2692 # (Possibly) reuse the delta from the revlog if allowed and
2702 # (Possibly) reuse the delta from the revlog if allowed and
2693 # the revlog chunk is a delta.
2703 # the revlog chunk is a delta.
2694 cachedelta = None
2704 cachedelta = None
2695 rawtext = None
2705 rawtext = None
2696 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2706 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2697 dropall, filterout, update = sidedataactions
2707 dropall, filterout, update = sidedataactions
2698 text, sidedata = self._revisiondata(rev)
2708 text, sidedata = self._revisiondata(rev)
2699 if dropall:
2709 if dropall:
2700 sidedata = {}
2710 sidedata = {}
2701 for key in filterout:
2711 for key in filterout:
2702 sidedata.pop(key, None)
2712 sidedata.pop(key, None)
2703 sidedata.update(update)
2713 sidedata.update(update)
2704 if not sidedata:
2714 if not sidedata:
2705 sidedata = None
2715 sidedata = None
2706 destrevlog.addrevision(
2716 destrevlog.addrevision(
2707 text,
2717 text,
2708 tr,
2718 tr,
2709 linkrev,
2719 linkrev,
2710 p1,
2720 p1,
2711 p2,
2721 p2,
2712 cachedelta=cachedelta,
2722 cachedelta=cachedelta,
2713 node=node,
2723 node=node,
2714 flags=flags,
2724 flags=flags,
2715 deltacomputer=deltacomputer,
2725 deltacomputer=deltacomputer,
2716 sidedata=sidedata,
2726 sidedata=sidedata,
2717 )
2727 )
2718 else:
2728 else:
2719 if destrevlog._lazydelta:
2729 if destrevlog._lazydelta:
2720 dp = self.deltaparent(rev)
2730 dp = self.deltaparent(rev)
2721 if dp != nullrev:
2731 if dp != nullrev:
2722 cachedelta = (dp, bytes(self._chunk(rev)))
2732 cachedelta = (dp, bytes(self._chunk(rev)))
2723
2733
2724 if not cachedelta:
2734 if not cachedelta:
2725 rawtext = self.rawdata(rev)
2735 rawtext = self.rawdata(rev)
2726
2736
2727 ifh = destrevlog.opener(
2737 ifh = destrevlog.opener(
2728 destrevlog.indexfile, b'a+', checkambig=False
2738 destrevlog.indexfile, b'a+', checkambig=False
2729 )
2739 )
2730 dfh = None
2740 dfh = None
2731 if not destrevlog._inline:
2741 if not destrevlog._inline:
2732 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2742 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2733 try:
2743 try:
2734 destrevlog._addrevision(
2744 destrevlog._addrevision(
2735 node,
2745 node,
2736 rawtext,
2746 rawtext,
2737 tr,
2747 tr,
2738 linkrev,
2748 linkrev,
2739 p1,
2749 p1,
2740 p2,
2750 p2,
2741 flags,
2751 flags,
2742 cachedelta,
2752 cachedelta,
2743 ifh,
2753 ifh,
2744 dfh,
2754 dfh,
2745 deltacomputer=deltacomputer,
2755 deltacomputer=deltacomputer,
2746 )
2756 )
2747 finally:
2757 finally:
2748 if dfh:
2758 if dfh:
2749 dfh.close()
2759 dfh.close()
2750 ifh.close()
2760 ifh.close()
2751
2761
2752 if addrevisioncb:
2762 if addrevisioncb:
2753 addrevisioncb(self, rev, node)
2763 addrevisioncb(self, rev, node)
2754
2764
2755 def censorrevision(self, tr, censornode, tombstone=b''):
2765 def censorrevision(self, tr, censornode, tombstone=b''):
2756 if (self.version & 0xFFFF) == REVLOGV0:
2766 if (self.version & 0xFFFF) == REVLOGV0:
2757 raise error.RevlogError(
2767 raise error.RevlogError(
2758 _(b'cannot censor with version %d revlogs') % self.version
2768 _(b'cannot censor with version %d revlogs') % self.version
2759 )
2769 )
2760
2770
2761 censorrev = self.rev(censornode)
2771 censorrev = self.rev(censornode)
2762 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2772 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2763
2773
2764 if len(tombstone) > self.rawsize(censorrev):
2774 if len(tombstone) > self.rawsize(censorrev):
2765 raise error.Abort(
2775 raise error.Abort(
2766 _(b'censor tombstone must be no longer than censored data')
2776 _(b'censor tombstone must be no longer than censored data')
2767 )
2777 )
2768
2778
2769 # Rewriting the revlog in place is hard. Our strategy for censoring is
2779 # Rewriting the revlog in place is hard. Our strategy for censoring is
2770 # to create a new revlog, copy all revisions to it, then replace the
2780 # to create a new revlog, copy all revisions to it, then replace the
2771 # revlogs on transaction close.
2781 # revlogs on transaction close.
2772
2782
2773 newindexfile = self.indexfile + b'.tmpcensored'
2783 newindexfile = self.indexfile + b'.tmpcensored'
2774 newdatafile = self.datafile + b'.tmpcensored'
2784 newdatafile = self.datafile + b'.tmpcensored'
2775
2785
2776 # This is a bit dangerous. We could easily have a mismatch of state.
2786 # This is a bit dangerous. We could easily have a mismatch of state.
2777 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2787 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2778 newrl.version = self.version
2788 newrl.version = self.version
2779 newrl._generaldelta = self._generaldelta
2789 newrl._generaldelta = self._generaldelta
2780 newrl._io = self._io
2790 newrl._io = self._io
2781
2791
2782 for rev in self.revs():
2792 for rev in self.revs():
2783 node = self.node(rev)
2793 node = self.node(rev)
2784 p1, p2 = self.parents(node)
2794 p1, p2 = self.parents(node)
2785
2795
2786 if rev == censorrev:
2796 if rev == censorrev:
2787 newrl.addrawrevision(
2797 newrl.addrawrevision(
2788 tombstone,
2798 tombstone,
2789 tr,
2799 tr,
2790 self.linkrev(censorrev),
2800 self.linkrev(censorrev),
2791 p1,
2801 p1,
2792 p2,
2802 p2,
2793 censornode,
2803 censornode,
2794 REVIDX_ISCENSORED,
2804 REVIDX_ISCENSORED,
2795 )
2805 )
2796
2806
2797 if newrl.deltaparent(rev) != nullrev:
2807 if newrl.deltaparent(rev) != nullrev:
2798 raise error.Abort(
2808 raise error.Abort(
2799 _(
2809 _(
2800 b'censored revision stored as delta; '
2810 b'censored revision stored as delta; '
2801 b'cannot censor'
2811 b'cannot censor'
2802 ),
2812 ),
2803 hint=_(
2813 hint=_(
2804 b'censoring of revlogs is not '
2814 b'censoring of revlogs is not '
2805 b'fully implemented; please report '
2815 b'fully implemented; please report '
2806 b'this bug'
2816 b'this bug'
2807 ),
2817 ),
2808 )
2818 )
2809 continue
2819 continue
2810
2820
2811 if self.iscensored(rev):
2821 if self.iscensored(rev):
2812 if self.deltaparent(rev) != nullrev:
2822 if self.deltaparent(rev) != nullrev:
2813 raise error.Abort(
2823 raise error.Abort(
2814 _(
2824 _(
2815 b'cannot censor due to censored '
2825 b'cannot censor due to censored '
2816 b'revision having delta stored'
2826 b'revision having delta stored'
2817 )
2827 )
2818 )
2828 )
2819 rawtext = self._chunk(rev)
2829 rawtext = self._chunk(rev)
2820 else:
2830 else:
2821 rawtext = self.rawdata(rev)
2831 rawtext = self.rawdata(rev)
2822
2832
2823 newrl.addrawrevision(
2833 newrl.addrawrevision(
2824 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2834 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2825 )
2835 )
2826
2836
2827 tr.addbackup(self.indexfile, location=b'store')
2837 tr.addbackup(self.indexfile, location=b'store')
2828 if not self._inline:
2838 if not self._inline:
2829 tr.addbackup(self.datafile, location=b'store')
2839 tr.addbackup(self.datafile, location=b'store')
2830
2840
2831 self.opener.rename(newrl.indexfile, self.indexfile)
2841 self.opener.rename(newrl.indexfile, self.indexfile)
2832 if not self._inline:
2842 if not self._inline:
2833 self.opener.rename(newrl.datafile, self.datafile)
2843 self.opener.rename(newrl.datafile, self.datafile)
2834
2844
2835 self.clearcaches()
2845 self.clearcaches()
2836 self._loadindex()
2846 self._loadindex()
2837
2847
2838 def verifyintegrity(self, state):
2848 def verifyintegrity(self, state):
2839 """Verifies the integrity of the revlog.
2849 """Verifies the integrity of the revlog.
2840
2850
2841 Yields ``revlogproblem`` instances describing problems that are
2851 Yields ``revlogproblem`` instances describing problems that are
2842 found.
2852 found.
2843 """
2853 """
2844 dd, di = self.checksize()
2854 dd, di = self.checksize()
2845 if dd:
2855 if dd:
2846 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2856 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2847 if di:
2857 if di:
2848 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2858 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2849
2859
2850 version = self.version & 0xFFFF
2860 version = self.version & 0xFFFF
2851
2861
2852 # The verifier tells us what version revlog we should be.
2862 # The verifier tells us what version revlog we should be.
2853 if version != state[b'expectedversion']:
2863 if version != state[b'expectedversion']:
2854 yield revlogproblem(
2864 yield revlogproblem(
2855 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2865 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2856 % (self.indexfile, version, state[b'expectedversion'])
2866 % (self.indexfile, version, state[b'expectedversion'])
2857 )
2867 )
2858
2868
2859 state[b'skipread'] = set()
2869 state[b'skipread'] = set()
2860
2870
2861 for rev in self:
2871 for rev in self:
2862 node = self.node(rev)
2872 node = self.node(rev)
2863
2873
2864 # Verify contents. 4 cases to care about:
2874 # Verify contents. 4 cases to care about:
2865 #
2875 #
2866 # common: the most common case
2876 # common: the most common case
2867 # rename: with a rename
2877 # rename: with a rename
2868 # meta: file content starts with b'\1\n', the metadata
2878 # meta: file content starts with b'\1\n', the metadata
2869 # header defined in filelog.py, but without a rename
2879 # header defined in filelog.py, but without a rename
2870 # ext: content stored externally
2880 # ext: content stored externally
2871 #
2881 #
2872 # More formally, their differences are shown below:
2882 # More formally, their differences are shown below:
2873 #
2883 #
2874 # | common | rename | meta | ext
2884 # | common | rename | meta | ext
2875 # -------------------------------------------------------
2885 # -------------------------------------------------------
2876 # flags() | 0 | 0 | 0 | not 0
2886 # flags() | 0 | 0 | 0 | not 0
2877 # renamed() | False | True | False | ?
2887 # renamed() | False | True | False | ?
2878 # rawtext[0:2]=='\1\n'| False | True | True | ?
2888 # rawtext[0:2]=='\1\n'| False | True | True | ?
2879 #
2889 #
2880 # "rawtext" means the raw text stored in revlog data, which
2890 # "rawtext" means the raw text stored in revlog data, which
2881 # could be retrieved by "rawdata(rev)". "text"
2891 # could be retrieved by "rawdata(rev)". "text"
2882 # mentioned below is "revision(rev)".
2892 # mentioned below is "revision(rev)".
2883 #
2893 #
2884 # There are 3 different lengths stored physically:
2894 # There are 3 different lengths stored physically:
2885 # 1. L1: rawsize, stored in revlog index
2895 # 1. L1: rawsize, stored in revlog index
2886 # 2. L2: len(rawtext), stored in revlog data
2896 # 2. L2: len(rawtext), stored in revlog data
2887 # 3. L3: len(text), stored in revlog data if flags==0, or
2897 # 3. L3: len(text), stored in revlog data if flags==0, or
2888 # possibly somewhere else if flags!=0
2898 # possibly somewhere else if flags!=0
2889 #
2899 #
2890 # L1 should be equal to L2. L3 could be different from them.
2900 # L1 should be equal to L2. L3 could be different from them.
2891 # "text" may or may not affect commit hash depending on flag
2901 # "text" may or may not affect commit hash depending on flag
2892 # processors (see flagutil.addflagprocessor).
2902 # processors (see flagutil.addflagprocessor).
2893 #
2903 #
2894 # | common | rename | meta | ext
2904 # | common | rename | meta | ext
2895 # -------------------------------------------------
2905 # -------------------------------------------------
2896 # rawsize() | L1 | L1 | L1 | L1
2906 # rawsize() | L1 | L1 | L1 | L1
2897 # size() | L1 | L2-LM | L1(*) | L1 (?)
2907 # size() | L1 | L2-LM | L1(*) | L1 (?)
2898 # len(rawtext) | L2 | L2 | L2 | L2
2908 # len(rawtext) | L2 | L2 | L2 | L2
2899 # len(text) | L2 | L2 | L2 | L3
2909 # len(text) | L2 | L2 | L2 | L3
2900 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2910 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2901 #
2911 #
2902 # LM: length of metadata, depending on rawtext
2912 # LM: length of metadata, depending on rawtext
2903 # (*): not ideal, see comment in filelog.size
2913 # (*): not ideal, see comment in filelog.size
2904 # (?): could be "- len(meta)" if the resolved content has
2914 # (?): could be "- len(meta)" if the resolved content has
2905 # rename metadata
2915 # rename metadata
2906 #
2916 #
2907 # Checks needed to be done:
2917 # Checks needed to be done:
2908 # 1. length check: L1 == L2, in all cases.
2918 # 1. length check: L1 == L2, in all cases.
2909 # 2. hash check: depending on flag processor, we may need to
2919 # 2. hash check: depending on flag processor, we may need to
2910 # use either "text" (external), or "rawtext" (in revlog).
2920 # use either "text" (external), or "rawtext" (in revlog).
2911
2921
2912 try:
2922 try:
2913 skipflags = state.get(b'skipflags', 0)
2923 skipflags = state.get(b'skipflags', 0)
2914 if skipflags:
2924 if skipflags:
2915 skipflags &= self.flags(rev)
2925 skipflags &= self.flags(rev)
2916
2926
2917 if skipflags:
2927 _verify_revision(self, skipflags, state, node)
2918 state[b'skipread'].add(node)
2919 else:
2920 # Side-effect: read content and verify hash.
2921 self.revision(node)
2922
2928
2923 l1 = self.rawsize(rev)
2929 l1 = self.rawsize(rev)
2924 l2 = len(self.rawdata(node))
2930 l2 = len(self.rawdata(node))
2925
2931
2926 if l1 != l2:
2932 if l1 != l2:
2927 yield revlogproblem(
2933 yield revlogproblem(
2928 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2934 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2929 node=node,
2935 node=node,
2930 )
2936 )
2931
2937
2932 except error.CensoredNodeError:
2938 except error.CensoredNodeError:
2933 if state[b'erroroncensored']:
2939 if state[b'erroroncensored']:
2934 yield revlogproblem(
2940 yield revlogproblem(
2935 error=_(b'censored file data'), node=node
2941 error=_(b'censored file data'), node=node
2936 )
2942 )
2937 state[b'skipread'].add(node)
2943 state[b'skipread'].add(node)
2938 except Exception as e:
2944 except Exception as e:
2939 yield revlogproblem(
2945 yield revlogproblem(
2940 error=_(b'unpacking %s: %s')
2946 error=_(b'unpacking %s: %s')
2941 % (short(node), stringutil.forcebytestr(e)),
2947 % (short(node), stringutil.forcebytestr(e)),
2942 node=node,
2948 node=node,
2943 )
2949 )
2944 state[b'skipread'].add(node)
2950 state[b'skipread'].add(node)
2945
2951
2946 def storageinfo(
2952 def storageinfo(
2947 self,
2953 self,
2948 exclusivefiles=False,
2954 exclusivefiles=False,
2949 sharedfiles=False,
2955 sharedfiles=False,
2950 revisionscount=False,
2956 revisionscount=False,
2951 trackedsize=False,
2957 trackedsize=False,
2952 storedsize=False,
2958 storedsize=False,
2953 ):
2959 ):
2954 d = {}
2960 d = {}
2955
2961
2956 if exclusivefiles:
2962 if exclusivefiles:
2957 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2963 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2958 if not self._inline:
2964 if not self._inline:
2959 d[b'exclusivefiles'].append((self.opener, self.datafile))
2965 d[b'exclusivefiles'].append((self.opener, self.datafile))
2960
2966
2961 if sharedfiles:
2967 if sharedfiles:
2962 d[b'sharedfiles'] = []
2968 d[b'sharedfiles'] = []
2963
2969
2964 if revisionscount:
2970 if revisionscount:
2965 d[b'revisionscount'] = len(self)
2971 d[b'revisionscount'] = len(self)
2966
2972
2967 if trackedsize:
2973 if trackedsize:
2968 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2974 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2969
2975
2970 if storedsize:
2976 if storedsize:
2971 d[b'storedsize'] = sum(
2977 d[b'storedsize'] = sum(
2972 self.opener.stat(path).st_size for path in self.files()
2978 self.opener.stat(path).st_size for path in self.files()
2973 )
2979 )
2974
2980
2975 return d
2981 return d
General Comments 0
You need to be logged in to leave comments. Login now