##// END OF EJS Templates
revlog: remove the, now unused, `revlog._nodepos` attribute...
marmoute -
r43980:f5991fd7 default
parent child Browse files
Show More
@@ -1,2982 +1,2975 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
152 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
154 node: expected hash of the revision
155 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
159
159
160 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
161 """
161 """
162
162
163 node = attr.ib()
163 node = attr.ib()
164 p1 = attr.ib()
164 p1 = attr.ib()
165 p2 = attr.ib()
165 p2 = attr.ib()
166 btext = attr.ib()
166 btext = attr.ib()
167 textlen = attr.ib()
167 textlen = attr.ib()
168 cachedelta = attr.ib()
168 cachedelta = attr.ib()
169 flags = attr.ib()
169 flags = attr.ib()
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 # index v0:
194 # index v0:
195 # 4 bytes: offset
195 # 4 bytes: offset
196 # 4 bytes: compressed length
196 # 4 bytes: compressed length
197 # 4 bytes: base rev
197 # 4 bytes: base rev
198 # 4 bytes: link rev
198 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
201 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
205
205
206
206
207 class revlogoldindex(list):
207 class revlogoldindex(list):
208 @property
208 @property
209 def nodemap(self):
209 def nodemap(self):
210 msg = "index.nodemap is deprecated, " "use index.[has_node|rev|get_rev]"
210 msg = "index.nodemap is deprecated, " "use index.[has_node|rev|get_rev]"
211 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
211 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
212 return self._nodemap
212 return self._nodemap
213
213
214 @util.propertycache
214 @util.propertycache
215 def _nodemap(self):
215 def _nodemap(self):
216 nodemap = revlogutils.NodeMap({nullid: nullrev})
216 nodemap = revlogutils.NodeMap({nullid: nullrev})
217 for r in range(0, len(self)):
217 for r in range(0, len(self)):
218 n = self[r][7]
218 n = self[r][7]
219 nodemap[n] = r
219 nodemap[n] = r
220 return nodemap
220 return nodemap
221
221
222 def has_node(self, node):
222 def has_node(self, node):
223 """return True if the node exist in the index"""
223 """return True if the node exist in the index"""
224 return node in self._nodemap
224 return node in self._nodemap
225
225
226 def rev(self, node):
226 def rev(self, node):
227 """return a revision for a node
227 """return a revision for a node
228
228
229 If the node is unknown, raise a RevlogError"""
229 If the node is unknown, raise a RevlogError"""
230 return self._nodemap[node]
230 return self._nodemap[node]
231
231
232 def get_rev(self, node):
232 def get_rev(self, node):
233 """return a revision for a node
233 """return a revision for a node
234
234
235 If the node is unknown, return None"""
235 If the node is unknown, return None"""
236 return self._nodemap.get(node)
236 return self._nodemap.get(node)
237
237
238 def append(self, tup):
238 def append(self, tup):
239 self._nodemap[tup[7]] = len(self)
239 self._nodemap[tup[7]] = len(self)
240 super(revlogoldindex, self).append(tup)
240 super(revlogoldindex, self).append(tup)
241
241
242 def __delitem__(self, i):
242 def __delitem__(self, i):
243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
245 for r in pycompat.xrange(i.start, len(self)):
245 for r in pycompat.xrange(i.start, len(self)):
246 del self._nodemap[self[r][7]]
246 del self._nodemap[self[r][7]]
247 super(revlogoldindex, self).__delitem__(i)
247 super(revlogoldindex, self).__delitem__(i)
248
248
249 def clearcaches(self):
249 def clearcaches(self):
250 self.__dict__.pop('_nodemap', None)
250 self.__dict__.pop('_nodemap', None)
251
251
252 def __getitem__(self, i):
252 def __getitem__(self, i):
253 if i == -1:
253 if i == -1:
254 return (0, 0, 0, -1, -1, -1, -1, nullid)
254 return (0, 0, 0, -1, -1, -1, -1, nullid)
255 return list.__getitem__(self, i)
255 return list.__getitem__(self, i)
256
256
257
257
258 class revlogoldio(object):
258 class revlogoldio(object):
259 def __init__(self):
259 def __init__(self):
260 self.size = indexformatv0.size
260 self.size = indexformatv0.size
261
261
262 def parseindex(self, data, inline):
262 def parseindex(self, data, inline):
263 s = self.size
263 s = self.size
264 index = []
264 index = []
265 nodemap = revlogutils.NodeMap({nullid: nullrev})
265 nodemap = revlogutils.NodeMap({nullid: nullrev})
266 n = off = 0
266 n = off = 0
267 l = len(data)
267 l = len(data)
268 while off + s <= l:
268 while off + s <= l:
269 cur = data[off : off + s]
269 cur = data[off : off + s]
270 off += s
270 off += s
271 e = indexformatv0_unpack(cur)
271 e = indexformatv0_unpack(cur)
272 # transform to revlogv1 format
272 # transform to revlogv1 format
273 e2 = (
273 e2 = (
274 offset_type(e[0], 0),
274 offset_type(e[0], 0),
275 e[1],
275 e[1],
276 -1,
276 -1,
277 e[2],
277 e[2],
278 e[3],
278 e[3],
279 nodemap.get(e[4], nullrev),
279 nodemap.get(e[4], nullrev),
280 nodemap.get(e[5], nullrev),
280 nodemap.get(e[5], nullrev),
281 e[6],
281 e[6],
282 )
282 )
283 index.append(e2)
283 index.append(e2)
284 nodemap[e[6]] = n
284 nodemap[e[6]] = n
285 n += 1
285 n += 1
286
286
287 index = revlogoldindex(index)
287 index = revlogoldindex(index)
288 return index, None
288 return index, None
289
289
290 def packentry(self, entry, node, version, rev):
290 def packentry(self, entry, node, version, rev):
291 if gettype(entry[0]):
291 if gettype(entry[0]):
292 raise error.RevlogError(
292 raise error.RevlogError(
293 _(b'index entry flags need revlog version 1')
293 _(b'index entry flags need revlog version 1')
294 )
294 )
295 e2 = (
295 e2 = (
296 getoffset(entry[0]),
296 getoffset(entry[0]),
297 entry[1],
297 entry[1],
298 entry[3],
298 entry[3],
299 entry[4],
299 entry[4],
300 node(entry[5]),
300 node(entry[5]),
301 node(entry[6]),
301 node(entry[6]),
302 entry[7],
302 entry[7],
303 )
303 )
304 return indexformatv0_pack(*e2)
304 return indexformatv0_pack(*e2)
305
305
306
306
307 # index ng:
307 # index ng:
308 # 6 bytes: offset
308 # 6 bytes: offset
309 # 2 bytes: flags
309 # 2 bytes: flags
310 # 4 bytes: compressed length
310 # 4 bytes: compressed length
311 # 4 bytes: uncompressed length
311 # 4 bytes: uncompressed length
312 # 4 bytes: base rev
312 # 4 bytes: base rev
313 # 4 bytes: link rev
313 # 4 bytes: link rev
314 # 4 bytes: parent 1 rev
314 # 4 bytes: parent 1 rev
315 # 4 bytes: parent 2 rev
315 # 4 bytes: parent 2 rev
316 # 32 bytes: nodeid
316 # 32 bytes: nodeid
317 indexformatng = struct.Struct(b">Qiiiiii20s12x")
317 indexformatng = struct.Struct(b">Qiiiiii20s12x")
318 indexformatng_pack = indexformatng.pack
318 indexformatng_pack = indexformatng.pack
319 versionformat = struct.Struct(b">I")
319 versionformat = struct.Struct(b">I")
320 versionformat_pack = versionformat.pack
320 versionformat_pack = versionformat.pack
321 versionformat_unpack = versionformat.unpack
321 versionformat_unpack = versionformat.unpack
322
322
323 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
323 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
324 # signed integer)
324 # signed integer)
325 _maxentrysize = 0x7FFFFFFF
325 _maxentrysize = 0x7FFFFFFF
326
326
327
327
328 class revlogio(object):
328 class revlogio(object):
329 def __init__(self):
329 def __init__(self):
330 self.size = indexformatng.size
330 self.size = indexformatng.size
331
331
332 def parseindex(self, data, inline):
332 def parseindex(self, data, inline):
333 # call the C implementation to parse the index data
333 # call the C implementation to parse the index data
334 index, cache = parsers.parse_index2(data, inline)
334 index, cache = parsers.parse_index2(data, inline)
335 return index, cache
335 return index, cache
336
336
337 def packentry(self, entry, node, version, rev):
337 def packentry(self, entry, node, version, rev):
338 p = indexformatng_pack(*entry)
338 p = indexformatng_pack(*entry)
339 if rev == 0:
339 if rev == 0:
340 p = versionformat_pack(version) + p[4:]
340 p = versionformat_pack(version) + p[4:]
341 return p
341 return p
342
342
343
343
344 class revlog(object):
344 class revlog(object):
345 """
345 """
346 the underlying revision storage object
346 the underlying revision storage object
347
347
348 A revlog consists of two parts, an index and the revision data.
348 A revlog consists of two parts, an index and the revision data.
349
349
350 The index is a file with a fixed record size containing
350 The index is a file with a fixed record size containing
351 information on each revision, including its nodeid (hash), the
351 information on each revision, including its nodeid (hash), the
352 nodeids of its parents, the position and offset of its data within
352 nodeids of its parents, the position and offset of its data within
353 the data file, and the revision it's based on. Finally, each entry
353 the data file, and the revision it's based on. Finally, each entry
354 contains a linkrev entry that can serve as a pointer to external
354 contains a linkrev entry that can serve as a pointer to external
355 data.
355 data.
356
356
357 The revision data itself is a linear collection of data chunks.
357 The revision data itself is a linear collection of data chunks.
358 Each chunk represents a revision and is usually represented as a
358 Each chunk represents a revision and is usually represented as a
359 delta against the previous chunk. To bound lookup time, runs of
359 delta against the previous chunk. To bound lookup time, runs of
360 deltas are limited to about 2 times the length of the original
360 deltas are limited to about 2 times the length of the original
361 version data. This makes retrieval of a version proportional to
361 version data. This makes retrieval of a version proportional to
362 its size, or O(1) relative to the number of revisions.
362 its size, or O(1) relative to the number of revisions.
363
363
364 Both pieces of the revlog are written to in an append-only
364 Both pieces of the revlog are written to in an append-only
365 fashion, which means we never need to rewrite a file to insert or
365 fashion, which means we never need to rewrite a file to insert or
366 remove data, and can use some simple techniques to avoid the need
366 remove data, and can use some simple techniques to avoid the need
367 for locking while reading.
367 for locking while reading.
368
368
369 If checkambig, indexfile is opened with checkambig=True at
369 If checkambig, indexfile is opened with checkambig=True at
370 writing, to avoid file stat ambiguity.
370 writing, to avoid file stat ambiguity.
371
371
372 If mmaplargeindex is True, and an mmapindexthreshold is set, the
372 If mmaplargeindex is True, and an mmapindexthreshold is set, the
373 index will be mmapped rather than read if it is larger than the
373 index will be mmapped rather than read if it is larger than the
374 configured threshold.
374 configured threshold.
375
375
376 If censorable is True, the revlog can have censored revisions.
376 If censorable is True, the revlog can have censored revisions.
377
377
378 If `upperboundcomp` is not None, this is the expected maximal gain from
378 If `upperboundcomp` is not None, this is the expected maximal gain from
379 compression for the data content.
379 compression for the data content.
380 """
380 """
381
381
382 _flagserrorclass = error.RevlogError
382 _flagserrorclass = error.RevlogError
383
383
384 def __init__(
384 def __init__(
385 self,
385 self,
386 opener,
386 opener,
387 indexfile,
387 indexfile,
388 datafile=None,
388 datafile=None,
389 checkambig=False,
389 checkambig=False,
390 mmaplargeindex=False,
390 mmaplargeindex=False,
391 censorable=False,
391 censorable=False,
392 upperboundcomp=None,
392 upperboundcomp=None,
393 ):
393 ):
394 """
394 """
395 create a revlog object
395 create a revlog object
396
396
397 opener is a function that abstracts the file opening operation
397 opener is a function that abstracts the file opening operation
398 and can be used to implement COW semantics or the like.
398 and can be used to implement COW semantics or the like.
399
399
400 """
400 """
401 self.upperboundcomp = upperboundcomp
401 self.upperboundcomp = upperboundcomp
402 self.indexfile = indexfile
402 self.indexfile = indexfile
403 self.datafile = datafile or (indexfile[:-2] + b".d")
403 self.datafile = datafile or (indexfile[:-2] + b".d")
404 self.opener = opener
404 self.opener = opener
405 # When True, indexfile is opened with checkambig=True at writing, to
405 # When True, indexfile is opened with checkambig=True at writing, to
406 # avoid file stat ambiguity.
406 # avoid file stat ambiguity.
407 self._checkambig = checkambig
407 self._checkambig = checkambig
408 self._mmaplargeindex = mmaplargeindex
408 self._mmaplargeindex = mmaplargeindex
409 self._censorable = censorable
409 self._censorable = censorable
410 # 3-tuple of (node, rev, text) for a raw revision.
410 # 3-tuple of (node, rev, text) for a raw revision.
411 self._revisioncache = None
411 self._revisioncache = None
412 # Maps rev to chain base rev.
412 # Maps rev to chain base rev.
413 self._chainbasecache = util.lrucachedict(100)
413 self._chainbasecache = util.lrucachedict(100)
414 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
414 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
415 self._chunkcache = (0, b'')
415 self._chunkcache = (0, b'')
416 # How much data to read and cache into the raw revlog data cache.
416 # How much data to read and cache into the raw revlog data cache.
417 self._chunkcachesize = 65536
417 self._chunkcachesize = 65536
418 self._maxchainlen = None
418 self._maxchainlen = None
419 self._deltabothparents = True
419 self._deltabothparents = True
420 self.index = None
420 self.index = None
421 # Mapping of partial identifiers to full nodes.
421 # Mapping of partial identifiers to full nodes.
422 self._pcache = {}
422 self._pcache = {}
423 # Mapping of revision integer to full node.
423 # Mapping of revision integer to full node.
424 self._nodepos = None
425 self._compengine = b'zlib'
424 self._compengine = b'zlib'
426 self._compengineopts = {}
425 self._compengineopts = {}
427 self._maxdeltachainspan = -1
426 self._maxdeltachainspan = -1
428 self._withsparseread = False
427 self._withsparseread = False
429 self._sparserevlog = False
428 self._sparserevlog = False
430 self._srdensitythreshold = 0.50
429 self._srdensitythreshold = 0.50
431 self._srmingapsize = 262144
430 self._srmingapsize = 262144
432
431
433 # Make copy of flag processors so each revlog instance can support
432 # Make copy of flag processors so each revlog instance can support
434 # custom flags.
433 # custom flags.
435 self._flagprocessors = dict(flagutil.flagprocessors)
434 self._flagprocessors = dict(flagutil.flagprocessors)
436
435
437 # 2-tuple of file handles being used for active writing.
436 # 2-tuple of file handles being used for active writing.
438 self._writinghandles = None
437 self._writinghandles = None
439
438
440 self._loadindex()
439 self._loadindex()
441
440
442 def _loadindex(self):
441 def _loadindex(self):
443 mmapindexthreshold = None
442 mmapindexthreshold = None
444 opts = self.opener.options
443 opts = self.opener.options
445
444
446 if b'revlogv2' in opts:
445 if b'revlogv2' in opts:
447 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
446 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
448 elif b'revlogv1' in opts:
447 elif b'revlogv1' in opts:
449 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
448 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
450 if b'generaldelta' in opts:
449 if b'generaldelta' in opts:
451 newversionflags |= FLAG_GENERALDELTA
450 newversionflags |= FLAG_GENERALDELTA
452 elif b'revlogv0' in self.opener.options:
451 elif b'revlogv0' in self.opener.options:
453 newversionflags = REVLOGV0
452 newversionflags = REVLOGV0
454 else:
453 else:
455 newversionflags = REVLOG_DEFAULT_VERSION
454 newversionflags = REVLOG_DEFAULT_VERSION
456
455
457 if b'chunkcachesize' in opts:
456 if b'chunkcachesize' in opts:
458 self._chunkcachesize = opts[b'chunkcachesize']
457 self._chunkcachesize = opts[b'chunkcachesize']
459 if b'maxchainlen' in opts:
458 if b'maxchainlen' in opts:
460 self._maxchainlen = opts[b'maxchainlen']
459 self._maxchainlen = opts[b'maxchainlen']
461 if b'deltabothparents' in opts:
460 if b'deltabothparents' in opts:
462 self._deltabothparents = opts[b'deltabothparents']
461 self._deltabothparents = opts[b'deltabothparents']
463 self._lazydelta = bool(opts.get(b'lazydelta', True))
462 self._lazydelta = bool(opts.get(b'lazydelta', True))
464 self._lazydeltabase = False
463 self._lazydeltabase = False
465 if self._lazydelta:
464 if self._lazydelta:
466 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
465 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
467 if b'compengine' in opts:
466 if b'compengine' in opts:
468 self._compengine = opts[b'compengine']
467 self._compengine = opts[b'compengine']
469 if b'zlib.level' in opts:
468 if b'zlib.level' in opts:
470 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
469 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
471 if b'zstd.level' in opts:
470 if b'zstd.level' in opts:
472 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
471 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
473 if b'maxdeltachainspan' in opts:
472 if b'maxdeltachainspan' in opts:
474 self._maxdeltachainspan = opts[b'maxdeltachainspan']
473 self._maxdeltachainspan = opts[b'maxdeltachainspan']
475 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
474 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
476 mmapindexthreshold = opts[b'mmapindexthreshold']
475 mmapindexthreshold = opts[b'mmapindexthreshold']
477 self.hassidedata = bool(opts.get(b'side-data', False))
476 self.hassidedata = bool(opts.get(b'side-data', False))
478 if self.hassidedata:
477 if self.hassidedata:
479 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
478 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
480 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
479 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
481 withsparseread = bool(opts.get(b'with-sparse-read', False))
480 withsparseread = bool(opts.get(b'with-sparse-read', False))
482 # sparse-revlog forces sparse-read
481 # sparse-revlog forces sparse-read
483 self._withsparseread = self._sparserevlog or withsparseread
482 self._withsparseread = self._sparserevlog or withsparseread
484 if b'sparse-read-density-threshold' in opts:
483 if b'sparse-read-density-threshold' in opts:
485 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
484 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
486 if b'sparse-read-min-gap-size' in opts:
485 if b'sparse-read-min-gap-size' in opts:
487 self._srmingapsize = opts[b'sparse-read-min-gap-size']
486 self._srmingapsize = opts[b'sparse-read-min-gap-size']
488 if opts.get(b'enableellipsis'):
487 if opts.get(b'enableellipsis'):
489 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
488 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
490
489
491 # revlog v0 doesn't have flag processors
490 # revlog v0 doesn't have flag processors
492 for flag, processor in pycompat.iteritems(
491 for flag, processor in pycompat.iteritems(
493 opts.get(b'flagprocessors', {})
492 opts.get(b'flagprocessors', {})
494 ):
493 ):
495 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
494 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
496
495
497 if self._chunkcachesize <= 0:
496 if self._chunkcachesize <= 0:
498 raise error.RevlogError(
497 raise error.RevlogError(
499 _(b'revlog chunk cache size %r is not greater than 0')
498 _(b'revlog chunk cache size %r is not greater than 0')
500 % self._chunkcachesize
499 % self._chunkcachesize
501 )
500 )
502 elif self._chunkcachesize & (self._chunkcachesize - 1):
501 elif self._chunkcachesize & (self._chunkcachesize - 1):
503 raise error.RevlogError(
502 raise error.RevlogError(
504 _(b'revlog chunk cache size %r is not a power of 2')
503 _(b'revlog chunk cache size %r is not a power of 2')
505 % self._chunkcachesize
504 % self._chunkcachesize
506 )
505 )
507
506
508 indexdata = b''
507 indexdata = b''
509 self._initempty = True
508 self._initempty = True
510 try:
509 try:
511 with self._indexfp() as f:
510 with self._indexfp() as f:
512 if (
511 if (
513 mmapindexthreshold is not None
512 mmapindexthreshold is not None
514 and self.opener.fstat(f).st_size >= mmapindexthreshold
513 and self.opener.fstat(f).st_size >= mmapindexthreshold
515 ):
514 ):
516 # TODO: should .close() to release resources without
515 # TODO: should .close() to release resources without
517 # relying on Python GC
516 # relying on Python GC
518 indexdata = util.buffer(util.mmapread(f))
517 indexdata = util.buffer(util.mmapread(f))
519 else:
518 else:
520 indexdata = f.read()
519 indexdata = f.read()
521 if len(indexdata) > 0:
520 if len(indexdata) > 0:
522 versionflags = versionformat_unpack(indexdata[:4])[0]
521 versionflags = versionformat_unpack(indexdata[:4])[0]
523 self._initempty = False
522 self._initempty = False
524 else:
523 else:
525 versionflags = newversionflags
524 versionflags = newversionflags
526 except IOError as inst:
525 except IOError as inst:
527 if inst.errno != errno.ENOENT:
526 if inst.errno != errno.ENOENT:
528 raise
527 raise
529
528
530 versionflags = newversionflags
529 versionflags = newversionflags
531
530
532 self.version = versionflags
531 self.version = versionflags
533
532
534 flags = versionflags & ~0xFFFF
533 flags = versionflags & ~0xFFFF
535 fmt = versionflags & 0xFFFF
534 fmt = versionflags & 0xFFFF
536
535
537 if fmt == REVLOGV0:
536 if fmt == REVLOGV0:
538 if flags:
537 if flags:
539 raise error.RevlogError(
538 raise error.RevlogError(
540 _(b'unknown flags (%#04x) in version %d revlog %s')
539 _(b'unknown flags (%#04x) in version %d revlog %s')
541 % (flags >> 16, fmt, self.indexfile)
540 % (flags >> 16, fmt, self.indexfile)
542 )
541 )
543
542
544 self._inline = False
543 self._inline = False
545 self._generaldelta = False
544 self._generaldelta = False
546
545
547 elif fmt == REVLOGV1:
546 elif fmt == REVLOGV1:
548 if flags & ~REVLOGV1_FLAGS:
547 if flags & ~REVLOGV1_FLAGS:
549 raise error.RevlogError(
548 raise error.RevlogError(
550 _(b'unknown flags (%#04x) in version %d revlog %s')
549 _(b'unknown flags (%#04x) in version %d revlog %s')
551 % (flags >> 16, fmt, self.indexfile)
550 % (flags >> 16, fmt, self.indexfile)
552 )
551 )
553
552
554 self._inline = versionflags & FLAG_INLINE_DATA
553 self._inline = versionflags & FLAG_INLINE_DATA
555 self._generaldelta = versionflags & FLAG_GENERALDELTA
554 self._generaldelta = versionflags & FLAG_GENERALDELTA
556
555
557 elif fmt == REVLOGV2:
556 elif fmt == REVLOGV2:
558 if flags & ~REVLOGV2_FLAGS:
557 if flags & ~REVLOGV2_FLAGS:
559 raise error.RevlogError(
558 raise error.RevlogError(
560 _(b'unknown flags (%#04x) in version %d revlog %s')
559 _(b'unknown flags (%#04x) in version %d revlog %s')
561 % (flags >> 16, fmt, self.indexfile)
560 % (flags >> 16, fmt, self.indexfile)
562 )
561 )
563
562
564 self._inline = versionflags & FLAG_INLINE_DATA
563 self._inline = versionflags & FLAG_INLINE_DATA
565 # generaldelta implied by version 2 revlogs.
564 # generaldelta implied by version 2 revlogs.
566 self._generaldelta = True
565 self._generaldelta = True
567
566
568 else:
567 else:
569 raise error.RevlogError(
568 raise error.RevlogError(
570 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
569 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
571 )
570 )
572 # sparse-revlog can't be on without general-delta (issue6056)
571 # sparse-revlog can't be on without general-delta (issue6056)
573 if not self._generaldelta:
572 if not self._generaldelta:
574 self._sparserevlog = False
573 self._sparserevlog = False
575
574
576 self._storedeltachains = True
575 self._storedeltachains = True
577
576
578 self._io = revlogio()
577 self._io = revlogio()
579 if self.version == REVLOGV0:
578 if self.version == REVLOGV0:
580 self._io = revlogoldio()
579 self._io = revlogoldio()
581 try:
580 try:
582 d = self._io.parseindex(indexdata, self._inline)
581 d = self._io.parseindex(indexdata, self._inline)
583 except (ValueError, IndexError):
582 except (ValueError, IndexError):
584 raise error.RevlogError(
583 raise error.RevlogError(
585 _(b"index %s is corrupted") % self.indexfile
584 _(b"index %s is corrupted") % self.indexfile
586 )
585 )
587 self.index, self._chunkcache = d
586 self.index, self._chunkcache = d
588 if not self._chunkcache:
587 if not self._chunkcache:
589 self._chunkclear()
588 self._chunkclear()
590 # revnum -> (chain-length, sum-delta-length)
589 # revnum -> (chain-length, sum-delta-length)
591 self._chaininfocache = {}
590 self._chaininfocache = {}
592 # revlog header -> revlog compressor
591 # revlog header -> revlog compressor
593 self._decompressors = {}
592 self._decompressors = {}
594
593
595 @util.propertycache
594 @util.propertycache
596 def _compressor(self):
595 def _compressor(self):
597 engine = util.compengines[self._compengine]
596 engine = util.compengines[self._compengine]
598 return engine.revlogcompressor(self._compengineopts)
597 return engine.revlogcompressor(self._compengineopts)
599
598
600 def _indexfp(self, mode=b'r'):
599 def _indexfp(self, mode=b'r'):
601 """file object for the revlog's index file"""
600 """file object for the revlog's index file"""
602 args = {'mode': mode}
601 args = {'mode': mode}
603 if mode != b'r':
602 if mode != b'r':
604 args['checkambig'] = self._checkambig
603 args['checkambig'] = self._checkambig
605 if mode == b'w':
604 if mode == b'w':
606 args['atomictemp'] = True
605 args['atomictemp'] = True
607 return self.opener(self.indexfile, **args)
606 return self.opener(self.indexfile, **args)
608
607
609 def _datafp(self, mode=b'r'):
608 def _datafp(self, mode=b'r'):
610 """file object for the revlog's data file"""
609 """file object for the revlog's data file"""
611 return self.opener(self.datafile, mode=mode)
610 return self.opener(self.datafile, mode=mode)
612
611
613 @contextlib.contextmanager
612 @contextlib.contextmanager
614 def _datareadfp(self, existingfp=None):
613 def _datareadfp(self, existingfp=None):
615 """file object suitable to read data"""
614 """file object suitable to read data"""
616 # Use explicit file handle, if given.
615 # Use explicit file handle, if given.
617 if existingfp is not None:
616 if existingfp is not None:
618 yield existingfp
617 yield existingfp
619
618
620 # Use a file handle being actively used for writes, if available.
619 # Use a file handle being actively used for writes, if available.
621 # There is some danger to doing this because reads will seek the
620 # There is some danger to doing this because reads will seek the
622 # file. However, _writeentry() performs a SEEK_END before all writes,
621 # file. However, _writeentry() performs a SEEK_END before all writes,
623 # so we should be safe.
622 # so we should be safe.
624 elif self._writinghandles:
623 elif self._writinghandles:
625 if self._inline:
624 if self._inline:
626 yield self._writinghandles[0]
625 yield self._writinghandles[0]
627 else:
626 else:
628 yield self._writinghandles[1]
627 yield self._writinghandles[1]
629
628
630 # Otherwise open a new file handle.
629 # Otherwise open a new file handle.
631 else:
630 else:
632 if self._inline:
631 if self._inline:
633 func = self._indexfp
632 func = self._indexfp
634 else:
633 else:
635 func = self._datafp
634 func = self._datafp
636 with func() as fp:
635 with func() as fp:
637 yield fp
636 yield fp
638
637
639 def tiprev(self):
638 def tiprev(self):
640 return len(self.index) - 1
639 return len(self.index) - 1
641
640
642 def tip(self):
641 def tip(self):
643 return self.node(self.tiprev())
642 return self.node(self.tiprev())
644
643
645 def __contains__(self, rev):
644 def __contains__(self, rev):
646 return 0 <= rev < len(self)
645 return 0 <= rev < len(self)
647
646
648 def __len__(self):
647 def __len__(self):
649 return len(self.index)
648 return len(self.index)
650
649
651 def __iter__(self):
650 def __iter__(self):
652 return iter(pycompat.xrange(len(self)))
651 return iter(pycompat.xrange(len(self)))
653
652
654 def revs(self, start=0, stop=None):
653 def revs(self, start=0, stop=None):
655 """iterate over all rev in this revlog (from start to stop)"""
654 """iterate over all rev in this revlog (from start to stop)"""
656 return storageutil.iterrevs(len(self), start=start, stop=stop)
655 return storageutil.iterrevs(len(self), start=start, stop=stop)
657
656
658 @property
657 @property
659 def nodemap(self):
658 def nodemap(self):
660 msg = (
659 msg = (
661 "revlog.nodemap is deprecated, "
660 "revlog.nodemap is deprecated, "
662 "use revlog.index.[has_node|rev|get_rev]"
661 "use revlog.index.[has_node|rev|get_rev]"
663 )
662 )
664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
663 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
665 return self.index.nodemap
664 return self.index.nodemap
666
665
667 @property
666 @property
668 def _nodecache(self):
667 def _nodecache(self):
669 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
668 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
670 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
669 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
671 return self.index.nodemap
670 return self.index.nodemap
672
671
673 def hasnode(self, node):
672 def hasnode(self, node):
674 try:
673 try:
675 self.rev(node)
674 self.rev(node)
676 return True
675 return True
677 except KeyError:
676 except KeyError:
678 return False
677 return False
679
678
680 def candelta(self, baserev, rev):
679 def candelta(self, baserev, rev):
681 """whether two revisions (baserev, rev) can be delta-ed or not"""
680 """whether two revisions (baserev, rev) can be delta-ed or not"""
682 # Disable delta if either rev requires a content-changing flag
681 # Disable delta if either rev requires a content-changing flag
683 # processor (ex. LFS). This is because such flag processor can alter
682 # processor (ex. LFS). This is because such flag processor can alter
684 # the rawtext content that the delta will be based on, and two clients
683 # the rawtext content that the delta will be based on, and two clients
685 # could have a same revlog node with different flags (i.e. different
684 # could have a same revlog node with different flags (i.e. different
686 # rawtext contents) and the delta could be incompatible.
685 # rawtext contents) and the delta could be incompatible.
687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
686 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
687 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
689 ):
688 ):
690 return False
689 return False
691 return True
690 return True
692
691
693 def clearcaches(self):
692 def clearcaches(self):
694 self._revisioncache = None
693 self._revisioncache = None
695 self._chainbasecache.clear()
694 self._chainbasecache.clear()
696 self._chunkcache = (0, b'')
695 self._chunkcache = (0, b'')
697 self._pcache = {}
696 self._pcache = {}
698 self.index.clearcaches()
697 self.index.clearcaches()
699
698
700 def rev(self, node):
699 def rev(self, node):
701 try:
700 try:
702 return self.index.rev(node)
701 return self.index.rev(node)
703 except TypeError:
702 except TypeError:
704 raise
703 raise
705 except error.RevlogError:
704 except error.RevlogError:
706 # parsers.c radix tree lookup failed
705 # parsers.c radix tree lookup failed
707 if node == wdirid or node in wdirfilenodeids:
706 if node == wdirid or node in wdirfilenodeids:
708 raise error.WdirUnsupported
707 raise error.WdirUnsupported
709 raise error.LookupError(node, self.indexfile, _(b'no node'))
708 raise error.LookupError(node, self.indexfile, _(b'no node'))
710
709
711 # Accessors for index entries.
710 # Accessors for index entries.
712
711
713 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
712 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
714 # are flags.
713 # are flags.
715 def start(self, rev):
714 def start(self, rev):
716 return int(self.index[rev][0] >> 16)
715 return int(self.index[rev][0] >> 16)
717
716
718 def flags(self, rev):
717 def flags(self, rev):
719 return self.index[rev][0] & 0xFFFF
718 return self.index[rev][0] & 0xFFFF
720
719
721 def length(self, rev):
720 def length(self, rev):
722 return self.index[rev][1]
721 return self.index[rev][1]
723
722
724 def rawsize(self, rev):
723 def rawsize(self, rev):
725 """return the length of the uncompressed text for a given revision"""
724 """return the length of the uncompressed text for a given revision"""
726 l = self.index[rev][2]
725 l = self.index[rev][2]
727 if l >= 0:
726 if l >= 0:
728 return l
727 return l
729
728
730 t = self.rawdata(rev)
729 t = self.rawdata(rev)
731 return len(t)
730 return len(t)
732
731
733 def size(self, rev):
732 def size(self, rev):
734 """length of non-raw text (processed by a "read" flag processor)"""
733 """length of non-raw text (processed by a "read" flag processor)"""
735 # fast path: if no "read" flag processor could change the content,
734 # fast path: if no "read" flag processor could change the content,
736 # size is rawsize. note: ELLIPSIS is known to not change the content.
735 # size is rawsize. note: ELLIPSIS is known to not change the content.
737 flags = self.flags(rev)
736 flags = self.flags(rev)
738 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
737 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
739 return self.rawsize(rev)
738 return self.rawsize(rev)
740
739
741 return len(self.revision(rev, raw=False))
740 return len(self.revision(rev, raw=False))
742
741
743 def chainbase(self, rev):
742 def chainbase(self, rev):
744 base = self._chainbasecache.get(rev)
743 base = self._chainbasecache.get(rev)
745 if base is not None:
744 if base is not None:
746 return base
745 return base
747
746
748 index = self.index
747 index = self.index
749 iterrev = rev
748 iterrev = rev
750 base = index[iterrev][3]
749 base = index[iterrev][3]
751 while base != iterrev:
750 while base != iterrev:
752 iterrev = base
751 iterrev = base
753 base = index[iterrev][3]
752 base = index[iterrev][3]
754
753
755 self._chainbasecache[rev] = base
754 self._chainbasecache[rev] = base
756 return base
755 return base
757
756
758 def linkrev(self, rev):
757 def linkrev(self, rev):
759 return self.index[rev][4]
758 return self.index[rev][4]
760
759
761 def parentrevs(self, rev):
760 def parentrevs(self, rev):
762 try:
761 try:
763 entry = self.index[rev]
762 entry = self.index[rev]
764 except IndexError:
763 except IndexError:
765 if rev == wdirrev:
764 if rev == wdirrev:
766 raise error.WdirUnsupported
765 raise error.WdirUnsupported
767 raise
766 raise
768
767
769 return entry[5], entry[6]
768 return entry[5], entry[6]
770
769
771 # fast parentrevs(rev) where rev isn't filtered
770 # fast parentrevs(rev) where rev isn't filtered
772 _uncheckedparentrevs = parentrevs
771 _uncheckedparentrevs = parentrevs
773
772
774 def node(self, rev):
773 def node(self, rev):
775 try:
774 try:
776 return self.index[rev][7]
775 return self.index[rev][7]
777 except IndexError:
776 except IndexError:
778 if rev == wdirrev:
777 if rev == wdirrev:
779 raise error.WdirUnsupported
778 raise error.WdirUnsupported
780 raise
779 raise
781
780
782 # Derived from index values.
781 # Derived from index values.
783
782
784 def end(self, rev):
783 def end(self, rev):
785 return self.start(rev) + self.length(rev)
784 return self.start(rev) + self.length(rev)
786
785
787 def parents(self, node):
786 def parents(self, node):
788 i = self.index
787 i = self.index
789 d = i[self.rev(node)]
788 d = i[self.rev(node)]
790 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
789 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
791
790
792 def chainlen(self, rev):
791 def chainlen(self, rev):
793 return self._chaininfo(rev)[0]
792 return self._chaininfo(rev)[0]
794
793
795 def _chaininfo(self, rev):
794 def _chaininfo(self, rev):
796 chaininfocache = self._chaininfocache
795 chaininfocache = self._chaininfocache
797 if rev in chaininfocache:
796 if rev in chaininfocache:
798 return chaininfocache[rev]
797 return chaininfocache[rev]
799 index = self.index
798 index = self.index
800 generaldelta = self._generaldelta
799 generaldelta = self._generaldelta
801 iterrev = rev
800 iterrev = rev
802 e = index[iterrev]
801 e = index[iterrev]
803 clen = 0
802 clen = 0
804 compresseddeltalen = 0
803 compresseddeltalen = 0
805 while iterrev != e[3]:
804 while iterrev != e[3]:
806 clen += 1
805 clen += 1
807 compresseddeltalen += e[1]
806 compresseddeltalen += e[1]
808 if generaldelta:
807 if generaldelta:
809 iterrev = e[3]
808 iterrev = e[3]
810 else:
809 else:
811 iterrev -= 1
810 iterrev -= 1
812 if iterrev in chaininfocache:
811 if iterrev in chaininfocache:
813 t = chaininfocache[iterrev]
812 t = chaininfocache[iterrev]
814 clen += t[0]
813 clen += t[0]
815 compresseddeltalen += t[1]
814 compresseddeltalen += t[1]
816 break
815 break
817 e = index[iterrev]
816 e = index[iterrev]
818 else:
817 else:
819 # Add text length of base since decompressing that also takes
818 # Add text length of base since decompressing that also takes
820 # work. For cache hits the length is already included.
819 # work. For cache hits the length is already included.
821 compresseddeltalen += e[1]
820 compresseddeltalen += e[1]
822 r = (clen, compresseddeltalen)
821 r = (clen, compresseddeltalen)
823 chaininfocache[rev] = r
822 chaininfocache[rev] = r
824 return r
823 return r
825
824
826 def _deltachain(self, rev, stoprev=None):
825 def _deltachain(self, rev, stoprev=None):
827 """Obtain the delta chain for a revision.
826 """Obtain the delta chain for a revision.
828
827
829 ``stoprev`` specifies a revision to stop at. If not specified, we
828 ``stoprev`` specifies a revision to stop at. If not specified, we
830 stop at the base of the chain.
829 stop at the base of the chain.
831
830
832 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
831 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
833 revs in ascending order and ``stopped`` is a bool indicating whether
832 revs in ascending order and ``stopped`` is a bool indicating whether
834 ``stoprev`` was hit.
833 ``stoprev`` was hit.
835 """
834 """
836 # Try C implementation.
835 # Try C implementation.
837 try:
836 try:
838 return self.index.deltachain(rev, stoprev, self._generaldelta)
837 return self.index.deltachain(rev, stoprev, self._generaldelta)
839 except AttributeError:
838 except AttributeError:
840 pass
839 pass
841
840
842 chain = []
841 chain = []
843
842
844 # Alias to prevent attribute lookup in tight loop.
843 # Alias to prevent attribute lookup in tight loop.
845 index = self.index
844 index = self.index
846 generaldelta = self._generaldelta
845 generaldelta = self._generaldelta
847
846
848 iterrev = rev
847 iterrev = rev
849 e = index[iterrev]
848 e = index[iterrev]
850 while iterrev != e[3] and iterrev != stoprev:
849 while iterrev != e[3] and iterrev != stoprev:
851 chain.append(iterrev)
850 chain.append(iterrev)
852 if generaldelta:
851 if generaldelta:
853 iterrev = e[3]
852 iterrev = e[3]
854 else:
853 else:
855 iterrev -= 1
854 iterrev -= 1
856 e = index[iterrev]
855 e = index[iterrev]
857
856
858 if iterrev == stoprev:
857 if iterrev == stoprev:
859 stopped = True
858 stopped = True
860 else:
859 else:
861 chain.append(iterrev)
860 chain.append(iterrev)
862 stopped = False
861 stopped = False
863
862
864 chain.reverse()
863 chain.reverse()
865 return chain, stopped
864 return chain, stopped
866
865
867 def ancestors(self, revs, stoprev=0, inclusive=False):
866 def ancestors(self, revs, stoprev=0, inclusive=False):
868 """Generate the ancestors of 'revs' in reverse revision order.
867 """Generate the ancestors of 'revs' in reverse revision order.
869 Does not generate revs lower than stoprev.
868 Does not generate revs lower than stoprev.
870
869
871 See the documentation for ancestor.lazyancestors for more details."""
870 See the documentation for ancestor.lazyancestors for more details."""
872
871
873 # first, make sure start revisions aren't filtered
872 # first, make sure start revisions aren't filtered
874 revs = list(revs)
873 revs = list(revs)
875 checkrev = self.node
874 checkrev = self.node
876 for r in revs:
875 for r in revs:
877 checkrev(r)
876 checkrev(r)
878 # and we're sure ancestors aren't filtered as well
877 # and we're sure ancestors aren't filtered as well
879
878
880 if rustancestor is not None:
879 if rustancestor is not None:
881 lazyancestors = rustancestor.LazyAncestors
880 lazyancestors = rustancestor.LazyAncestors
882 arg = self.index
881 arg = self.index
883 elif util.safehasattr(parsers, b'rustlazyancestors'):
882 elif util.safehasattr(parsers, b'rustlazyancestors'):
884 lazyancestors = ancestor.rustlazyancestors
883 lazyancestors = ancestor.rustlazyancestors
885 arg = self.index
884 arg = self.index
886 else:
885 else:
887 lazyancestors = ancestor.lazyancestors
886 lazyancestors = ancestor.lazyancestors
888 arg = self._uncheckedparentrevs
887 arg = self._uncheckedparentrevs
889 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
888 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
890
889
891 def descendants(self, revs):
890 def descendants(self, revs):
892 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
891 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
893
892
894 def findcommonmissing(self, common=None, heads=None):
893 def findcommonmissing(self, common=None, heads=None):
895 """Return a tuple of the ancestors of common and the ancestors of heads
894 """Return a tuple of the ancestors of common and the ancestors of heads
896 that are not ancestors of common. In revset terminology, we return the
895 that are not ancestors of common. In revset terminology, we return the
897 tuple:
896 tuple:
898
897
899 ::common, (::heads) - (::common)
898 ::common, (::heads) - (::common)
900
899
901 The list is sorted by revision number, meaning it is
900 The list is sorted by revision number, meaning it is
902 topologically sorted.
901 topologically sorted.
903
902
904 'heads' and 'common' are both lists of node IDs. If heads is
903 'heads' and 'common' are both lists of node IDs. If heads is
905 not supplied, uses all of the revlog's heads. If common is not
904 not supplied, uses all of the revlog's heads. If common is not
906 supplied, uses nullid."""
905 supplied, uses nullid."""
907 if common is None:
906 if common is None:
908 common = [nullid]
907 common = [nullid]
909 if heads is None:
908 if heads is None:
910 heads = self.heads()
909 heads = self.heads()
911
910
912 common = [self.rev(n) for n in common]
911 common = [self.rev(n) for n in common]
913 heads = [self.rev(n) for n in heads]
912 heads = [self.rev(n) for n in heads]
914
913
915 # we want the ancestors, but inclusive
914 # we want the ancestors, but inclusive
916 class lazyset(object):
915 class lazyset(object):
917 def __init__(self, lazyvalues):
916 def __init__(self, lazyvalues):
918 self.addedvalues = set()
917 self.addedvalues = set()
919 self.lazyvalues = lazyvalues
918 self.lazyvalues = lazyvalues
920
919
921 def __contains__(self, value):
920 def __contains__(self, value):
922 return value in self.addedvalues or value in self.lazyvalues
921 return value in self.addedvalues or value in self.lazyvalues
923
922
924 def __iter__(self):
923 def __iter__(self):
925 added = self.addedvalues
924 added = self.addedvalues
926 for r in added:
925 for r in added:
927 yield r
926 yield r
928 for r in self.lazyvalues:
927 for r in self.lazyvalues:
929 if not r in added:
928 if not r in added:
930 yield r
929 yield r
931
930
932 def add(self, value):
931 def add(self, value):
933 self.addedvalues.add(value)
932 self.addedvalues.add(value)
934
933
935 def update(self, values):
934 def update(self, values):
936 self.addedvalues.update(values)
935 self.addedvalues.update(values)
937
936
938 has = lazyset(self.ancestors(common))
937 has = lazyset(self.ancestors(common))
939 has.add(nullrev)
938 has.add(nullrev)
940 has.update(common)
939 has.update(common)
941
940
942 # take all ancestors from heads that aren't in has
941 # take all ancestors from heads that aren't in has
943 missing = set()
942 missing = set()
944 visit = collections.deque(r for r in heads if r not in has)
943 visit = collections.deque(r for r in heads if r not in has)
945 while visit:
944 while visit:
946 r = visit.popleft()
945 r = visit.popleft()
947 if r in missing:
946 if r in missing:
948 continue
947 continue
949 else:
948 else:
950 missing.add(r)
949 missing.add(r)
951 for p in self.parentrevs(r):
950 for p in self.parentrevs(r):
952 if p not in has:
951 if p not in has:
953 visit.append(p)
952 visit.append(p)
954 missing = list(missing)
953 missing = list(missing)
955 missing.sort()
954 missing.sort()
956 return has, [self.node(miss) for miss in missing]
955 return has, [self.node(miss) for miss in missing]
957
956
958 def incrementalmissingrevs(self, common=None):
957 def incrementalmissingrevs(self, common=None):
959 """Return an object that can be used to incrementally compute the
958 """Return an object that can be used to incrementally compute the
960 revision numbers of the ancestors of arbitrary sets that are not
959 revision numbers of the ancestors of arbitrary sets that are not
961 ancestors of common. This is an ancestor.incrementalmissingancestors
960 ancestors of common. This is an ancestor.incrementalmissingancestors
962 object.
961 object.
963
962
964 'common' is a list of revision numbers. If common is not supplied, uses
963 'common' is a list of revision numbers. If common is not supplied, uses
965 nullrev.
964 nullrev.
966 """
965 """
967 if common is None:
966 if common is None:
968 common = [nullrev]
967 common = [nullrev]
969
968
970 if rustancestor is not None:
969 if rustancestor is not None:
971 return rustancestor.MissingAncestors(self.index, common)
970 return rustancestor.MissingAncestors(self.index, common)
972 return ancestor.incrementalmissingancestors(self.parentrevs, common)
971 return ancestor.incrementalmissingancestors(self.parentrevs, common)
973
972
974 def findmissingrevs(self, common=None, heads=None):
973 def findmissingrevs(self, common=None, heads=None):
975 """Return the revision numbers of the ancestors of heads that
974 """Return the revision numbers of the ancestors of heads that
976 are not ancestors of common.
975 are not ancestors of common.
977
976
978 More specifically, return a list of revision numbers corresponding to
977 More specifically, return a list of revision numbers corresponding to
979 nodes N such that every N satisfies the following constraints:
978 nodes N such that every N satisfies the following constraints:
980
979
981 1. N is an ancestor of some node in 'heads'
980 1. N is an ancestor of some node in 'heads'
982 2. N is not an ancestor of any node in 'common'
981 2. N is not an ancestor of any node in 'common'
983
982
984 The list is sorted by revision number, meaning it is
983 The list is sorted by revision number, meaning it is
985 topologically sorted.
984 topologically sorted.
986
985
987 'heads' and 'common' are both lists of revision numbers. If heads is
986 'heads' and 'common' are both lists of revision numbers. If heads is
988 not supplied, uses all of the revlog's heads. If common is not
987 not supplied, uses all of the revlog's heads. If common is not
989 supplied, uses nullid."""
988 supplied, uses nullid."""
990 if common is None:
989 if common is None:
991 common = [nullrev]
990 common = [nullrev]
992 if heads is None:
991 if heads is None:
993 heads = self.headrevs()
992 heads = self.headrevs()
994
993
995 inc = self.incrementalmissingrevs(common=common)
994 inc = self.incrementalmissingrevs(common=common)
996 return inc.missingancestors(heads)
995 return inc.missingancestors(heads)
997
996
998 def findmissing(self, common=None, heads=None):
997 def findmissing(self, common=None, heads=None):
999 """Return the ancestors of heads that are not ancestors of common.
998 """Return the ancestors of heads that are not ancestors of common.
1000
999
1001 More specifically, return a list of nodes N such that every N
1000 More specifically, return a list of nodes N such that every N
1002 satisfies the following constraints:
1001 satisfies the following constraints:
1003
1002
1004 1. N is an ancestor of some node in 'heads'
1003 1. N is an ancestor of some node in 'heads'
1005 2. N is not an ancestor of any node in 'common'
1004 2. N is not an ancestor of any node in 'common'
1006
1005
1007 The list is sorted by revision number, meaning it is
1006 The list is sorted by revision number, meaning it is
1008 topologically sorted.
1007 topologically sorted.
1009
1008
1010 'heads' and 'common' are both lists of node IDs. If heads is
1009 'heads' and 'common' are both lists of node IDs. If heads is
1011 not supplied, uses all of the revlog's heads. If common is not
1010 not supplied, uses all of the revlog's heads. If common is not
1012 supplied, uses nullid."""
1011 supplied, uses nullid."""
1013 if common is None:
1012 if common is None:
1014 common = [nullid]
1013 common = [nullid]
1015 if heads is None:
1014 if heads is None:
1016 heads = self.heads()
1015 heads = self.heads()
1017
1016
1018 common = [self.rev(n) for n in common]
1017 common = [self.rev(n) for n in common]
1019 heads = [self.rev(n) for n in heads]
1018 heads = [self.rev(n) for n in heads]
1020
1019
1021 inc = self.incrementalmissingrevs(common=common)
1020 inc = self.incrementalmissingrevs(common=common)
1022 return [self.node(r) for r in inc.missingancestors(heads)]
1021 return [self.node(r) for r in inc.missingancestors(heads)]
1023
1022
1024 def nodesbetween(self, roots=None, heads=None):
1023 def nodesbetween(self, roots=None, heads=None):
1025 """Return a topological path from 'roots' to 'heads'.
1024 """Return a topological path from 'roots' to 'heads'.
1026
1025
1027 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1026 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1028 topologically sorted list of all nodes N that satisfy both of
1027 topologically sorted list of all nodes N that satisfy both of
1029 these constraints:
1028 these constraints:
1030
1029
1031 1. N is a descendant of some node in 'roots'
1030 1. N is a descendant of some node in 'roots'
1032 2. N is an ancestor of some node in 'heads'
1031 2. N is an ancestor of some node in 'heads'
1033
1032
1034 Every node is considered to be both a descendant and an ancestor
1033 Every node is considered to be both a descendant and an ancestor
1035 of itself, so every reachable node in 'roots' and 'heads' will be
1034 of itself, so every reachable node in 'roots' and 'heads' will be
1036 included in 'nodes'.
1035 included in 'nodes'.
1037
1036
1038 'outroots' is the list of reachable nodes in 'roots', i.e., the
1037 'outroots' is the list of reachable nodes in 'roots', i.e., the
1039 subset of 'roots' that is returned in 'nodes'. Likewise,
1038 subset of 'roots' that is returned in 'nodes'. Likewise,
1040 'outheads' is the subset of 'heads' that is also in 'nodes'.
1039 'outheads' is the subset of 'heads' that is also in 'nodes'.
1041
1040
1042 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1041 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1043 unspecified, uses nullid as the only root. If 'heads' is
1042 unspecified, uses nullid as the only root. If 'heads' is
1044 unspecified, uses list of all of the revlog's heads."""
1043 unspecified, uses list of all of the revlog's heads."""
1045 nonodes = ([], [], [])
1044 nonodes = ([], [], [])
1046 if roots is not None:
1045 if roots is not None:
1047 roots = list(roots)
1046 roots = list(roots)
1048 if not roots:
1047 if not roots:
1049 return nonodes
1048 return nonodes
1050 lowestrev = min([self.rev(n) for n in roots])
1049 lowestrev = min([self.rev(n) for n in roots])
1051 else:
1050 else:
1052 roots = [nullid] # Everybody's a descendant of nullid
1051 roots = [nullid] # Everybody's a descendant of nullid
1053 lowestrev = nullrev
1052 lowestrev = nullrev
1054 if (lowestrev == nullrev) and (heads is None):
1053 if (lowestrev == nullrev) and (heads is None):
1055 # We want _all_ the nodes!
1054 # We want _all_ the nodes!
1056 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1055 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1057 if heads is None:
1056 if heads is None:
1058 # All nodes are ancestors, so the latest ancestor is the last
1057 # All nodes are ancestors, so the latest ancestor is the last
1059 # node.
1058 # node.
1060 highestrev = len(self) - 1
1059 highestrev = len(self) - 1
1061 # Set ancestors to None to signal that every node is an ancestor.
1060 # Set ancestors to None to signal that every node is an ancestor.
1062 ancestors = None
1061 ancestors = None
1063 # Set heads to an empty dictionary for later discovery of heads
1062 # Set heads to an empty dictionary for later discovery of heads
1064 heads = {}
1063 heads = {}
1065 else:
1064 else:
1066 heads = list(heads)
1065 heads = list(heads)
1067 if not heads:
1066 if not heads:
1068 return nonodes
1067 return nonodes
1069 ancestors = set()
1068 ancestors = set()
1070 # Turn heads into a dictionary so we can remove 'fake' heads.
1069 # Turn heads into a dictionary so we can remove 'fake' heads.
1071 # Also, later we will be using it to filter out the heads we can't
1070 # Also, later we will be using it to filter out the heads we can't
1072 # find from roots.
1071 # find from roots.
1073 heads = dict.fromkeys(heads, False)
1072 heads = dict.fromkeys(heads, False)
1074 # Start at the top and keep marking parents until we're done.
1073 # Start at the top and keep marking parents until we're done.
1075 nodestotag = set(heads)
1074 nodestotag = set(heads)
1076 # Remember where the top was so we can use it as a limit later.
1075 # Remember where the top was so we can use it as a limit later.
1077 highestrev = max([self.rev(n) for n in nodestotag])
1076 highestrev = max([self.rev(n) for n in nodestotag])
1078 while nodestotag:
1077 while nodestotag:
1079 # grab a node to tag
1078 # grab a node to tag
1080 n = nodestotag.pop()
1079 n = nodestotag.pop()
1081 # Never tag nullid
1080 # Never tag nullid
1082 if n == nullid:
1081 if n == nullid:
1083 continue
1082 continue
1084 # A node's revision number represents its place in a
1083 # A node's revision number represents its place in a
1085 # topologically sorted list of nodes.
1084 # topologically sorted list of nodes.
1086 r = self.rev(n)
1085 r = self.rev(n)
1087 if r >= lowestrev:
1086 if r >= lowestrev:
1088 if n not in ancestors:
1087 if n not in ancestors:
1089 # If we are possibly a descendant of one of the roots
1088 # If we are possibly a descendant of one of the roots
1090 # and we haven't already been marked as an ancestor
1089 # and we haven't already been marked as an ancestor
1091 ancestors.add(n) # Mark as ancestor
1090 ancestors.add(n) # Mark as ancestor
1092 # Add non-nullid parents to list of nodes to tag.
1091 # Add non-nullid parents to list of nodes to tag.
1093 nodestotag.update(
1092 nodestotag.update(
1094 [p for p in self.parents(n) if p != nullid]
1093 [p for p in self.parents(n) if p != nullid]
1095 )
1094 )
1096 elif n in heads: # We've seen it before, is it a fake head?
1095 elif n in heads: # We've seen it before, is it a fake head?
1097 # So it is, real heads should not be the ancestors of
1096 # So it is, real heads should not be the ancestors of
1098 # any other heads.
1097 # any other heads.
1099 heads.pop(n)
1098 heads.pop(n)
1100 if not ancestors:
1099 if not ancestors:
1101 return nonodes
1100 return nonodes
1102 # Now that we have our set of ancestors, we want to remove any
1101 # Now that we have our set of ancestors, we want to remove any
1103 # roots that are not ancestors.
1102 # roots that are not ancestors.
1104
1103
1105 # If one of the roots was nullid, everything is included anyway.
1104 # If one of the roots was nullid, everything is included anyway.
1106 if lowestrev > nullrev:
1105 if lowestrev > nullrev:
1107 # But, since we weren't, let's recompute the lowest rev to not
1106 # But, since we weren't, let's recompute the lowest rev to not
1108 # include roots that aren't ancestors.
1107 # include roots that aren't ancestors.
1109
1108
1110 # Filter out roots that aren't ancestors of heads
1109 # Filter out roots that aren't ancestors of heads
1111 roots = [root for root in roots if root in ancestors]
1110 roots = [root for root in roots if root in ancestors]
1112 # Recompute the lowest revision
1111 # Recompute the lowest revision
1113 if roots:
1112 if roots:
1114 lowestrev = min([self.rev(root) for root in roots])
1113 lowestrev = min([self.rev(root) for root in roots])
1115 else:
1114 else:
1116 # No more roots? Return empty list
1115 # No more roots? Return empty list
1117 return nonodes
1116 return nonodes
1118 else:
1117 else:
1119 # We are descending from nullid, and don't need to care about
1118 # We are descending from nullid, and don't need to care about
1120 # any other roots.
1119 # any other roots.
1121 lowestrev = nullrev
1120 lowestrev = nullrev
1122 roots = [nullid]
1121 roots = [nullid]
1123 # Transform our roots list into a set.
1122 # Transform our roots list into a set.
1124 descendants = set(roots)
1123 descendants = set(roots)
1125 # Also, keep the original roots so we can filter out roots that aren't
1124 # Also, keep the original roots so we can filter out roots that aren't
1126 # 'real' roots (i.e. are descended from other roots).
1125 # 'real' roots (i.e. are descended from other roots).
1127 roots = descendants.copy()
1126 roots = descendants.copy()
1128 # Our topologically sorted list of output nodes.
1127 # Our topologically sorted list of output nodes.
1129 orderedout = []
1128 orderedout = []
1130 # Don't start at nullid since we don't want nullid in our output list,
1129 # Don't start at nullid since we don't want nullid in our output list,
1131 # and if nullid shows up in descendants, empty parents will look like
1130 # and if nullid shows up in descendants, empty parents will look like
1132 # they're descendants.
1131 # they're descendants.
1133 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1132 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1134 n = self.node(r)
1133 n = self.node(r)
1135 isdescendant = False
1134 isdescendant = False
1136 if lowestrev == nullrev: # Everybody is a descendant of nullid
1135 if lowestrev == nullrev: # Everybody is a descendant of nullid
1137 isdescendant = True
1136 isdescendant = True
1138 elif n in descendants:
1137 elif n in descendants:
1139 # n is already a descendant
1138 # n is already a descendant
1140 isdescendant = True
1139 isdescendant = True
1141 # This check only needs to be done here because all the roots
1140 # This check only needs to be done here because all the roots
1142 # will start being marked is descendants before the loop.
1141 # will start being marked is descendants before the loop.
1143 if n in roots:
1142 if n in roots:
1144 # If n was a root, check if it's a 'real' root.
1143 # If n was a root, check if it's a 'real' root.
1145 p = tuple(self.parents(n))
1144 p = tuple(self.parents(n))
1146 # If any of its parents are descendants, it's not a root.
1145 # If any of its parents are descendants, it's not a root.
1147 if (p[0] in descendants) or (p[1] in descendants):
1146 if (p[0] in descendants) or (p[1] in descendants):
1148 roots.remove(n)
1147 roots.remove(n)
1149 else:
1148 else:
1150 p = tuple(self.parents(n))
1149 p = tuple(self.parents(n))
1151 # A node is a descendant if either of its parents are
1150 # A node is a descendant if either of its parents are
1152 # descendants. (We seeded the dependents list with the roots
1151 # descendants. (We seeded the dependents list with the roots
1153 # up there, remember?)
1152 # up there, remember?)
1154 if (p[0] in descendants) or (p[1] in descendants):
1153 if (p[0] in descendants) or (p[1] in descendants):
1155 descendants.add(n)
1154 descendants.add(n)
1156 isdescendant = True
1155 isdescendant = True
1157 if isdescendant and ((ancestors is None) or (n in ancestors)):
1156 if isdescendant and ((ancestors is None) or (n in ancestors)):
1158 # Only include nodes that are both descendants and ancestors.
1157 # Only include nodes that are both descendants and ancestors.
1159 orderedout.append(n)
1158 orderedout.append(n)
1160 if (ancestors is not None) and (n in heads):
1159 if (ancestors is not None) and (n in heads):
1161 # We're trying to figure out which heads are reachable
1160 # We're trying to figure out which heads are reachable
1162 # from roots.
1161 # from roots.
1163 # Mark this head as having been reached
1162 # Mark this head as having been reached
1164 heads[n] = True
1163 heads[n] = True
1165 elif ancestors is None:
1164 elif ancestors is None:
1166 # Otherwise, we're trying to discover the heads.
1165 # Otherwise, we're trying to discover the heads.
1167 # Assume this is a head because if it isn't, the next step
1166 # Assume this is a head because if it isn't, the next step
1168 # will eventually remove it.
1167 # will eventually remove it.
1169 heads[n] = True
1168 heads[n] = True
1170 # But, obviously its parents aren't.
1169 # But, obviously its parents aren't.
1171 for p in self.parents(n):
1170 for p in self.parents(n):
1172 heads.pop(p, None)
1171 heads.pop(p, None)
1173 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1172 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1174 roots = list(roots)
1173 roots = list(roots)
1175 assert orderedout
1174 assert orderedout
1176 assert roots
1175 assert roots
1177 assert heads
1176 assert heads
1178 return (orderedout, roots, heads)
1177 return (orderedout, roots, heads)
1179
1178
1180 def headrevs(self, revs=None):
1179 def headrevs(self, revs=None):
1181 if revs is None:
1180 if revs is None:
1182 try:
1181 try:
1183 return self.index.headrevs()
1182 return self.index.headrevs()
1184 except AttributeError:
1183 except AttributeError:
1185 return self._headrevs()
1184 return self._headrevs()
1186 if rustdagop is not None:
1185 if rustdagop is not None:
1187 return rustdagop.headrevs(self.index, revs)
1186 return rustdagop.headrevs(self.index, revs)
1188 return dagop.headrevs(revs, self._uncheckedparentrevs)
1187 return dagop.headrevs(revs, self._uncheckedparentrevs)
1189
1188
1190 def computephases(self, roots):
1189 def computephases(self, roots):
1191 return self.index.computephasesmapsets(roots)
1190 return self.index.computephasesmapsets(roots)
1192
1191
1193 def _headrevs(self):
1192 def _headrevs(self):
1194 count = len(self)
1193 count = len(self)
1195 if not count:
1194 if not count:
1196 return [nullrev]
1195 return [nullrev]
1197 # we won't iter over filtered rev so nobody is a head at start
1196 # we won't iter over filtered rev so nobody is a head at start
1198 ishead = [0] * (count + 1)
1197 ishead = [0] * (count + 1)
1199 index = self.index
1198 index = self.index
1200 for r in self:
1199 for r in self:
1201 ishead[r] = 1 # I may be an head
1200 ishead[r] = 1 # I may be an head
1202 e = index[r]
1201 e = index[r]
1203 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1202 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1204 return [r for r, val in enumerate(ishead) if val]
1203 return [r for r, val in enumerate(ishead) if val]
1205
1204
1206 def heads(self, start=None, stop=None):
1205 def heads(self, start=None, stop=None):
1207 """return the list of all nodes that have no children
1206 """return the list of all nodes that have no children
1208
1207
1209 if start is specified, only heads that are descendants of
1208 if start is specified, only heads that are descendants of
1210 start will be returned
1209 start will be returned
1211 if stop is specified, it will consider all the revs from stop
1210 if stop is specified, it will consider all the revs from stop
1212 as if they had no children
1211 as if they had no children
1213 """
1212 """
1214 if start is None and stop is None:
1213 if start is None and stop is None:
1215 if not len(self):
1214 if not len(self):
1216 return [nullid]
1215 return [nullid]
1217 return [self.node(r) for r in self.headrevs()]
1216 return [self.node(r) for r in self.headrevs()]
1218
1217
1219 if start is None:
1218 if start is None:
1220 start = nullrev
1219 start = nullrev
1221 else:
1220 else:
1222 start = self.rev(start)
1221 start = self.rev(start)
1223
1222
1224 stoprevs = set(self.rev(n) for n in stop or [])
1223 stoprevs = set(self.rev(n) for n in stop or [])
1225
1224
1226 revs = dagop.headrevssubset(
1225 revs = dagop.headrevssubset(
1227 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1226 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1228 )
1227 )
1229
1228
1230 return [self.node(rev) for rev in revs]
1229 return [self.node(rev) for rev in revs]
1231
1230
1232 def children(self, node):
1231 def children(self, node):
1233 """find the children of a given node"""
1232 """find the children of a given node"""
1234 c = []
1233 c = []
1235 p = self.rev(node)
1234 p = self.rev(node)
1236 for r in self.revs(start=p + 1):
1235 for r in self.revs(start=p + 1):
1237 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1236 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1238 if prevs:
1237 if prevs:
1239 for pr in prevs:
1238 for pr in prevs:
1240 if pr == p:
1239 if pr == p:
1241 c.append(self.node(r))
1240 c.append(self.node(r))
1242 elif p == nullrev:
1241 elif p == nullrev:
1243 c.append(self.node(r))
1242 c.append(self.node(r))
1244 return c
1243 return c
1245
1244
1246 def commonancestorsheads(self, a, b):
1245 def commonancestorsheads(self, a, b):
1247 """calculate all the heads of the common ancestors of nodes a and b"""
1246 """calculate all the heads of the common ancestors of nodes a and b"""
1248 a, b = self.rev(a), self.rev(b)
1247 a, b = self.rev(a), self.rev(b)
1249 ancs = self._commonancestorsheads(a, b)
1248 ancs = self._commonancestorsheads(a, b)
1250 return pycompat.maplist(self.node, ancs)
1249 return pycompat.maplist(self.node, ancs)
1251
1250
1252 def _commonancestorsheads(self, *revs):
1251 def _commonancestorsheads(self, *revs):
1253 """calculate all the heads of the common ancestors of revs"""
1252 """calculate all the heads of the common ancestors of revs"""
1254 try:
1253 try:
1255 ancs = self.index.commonancestorsheads(*revs)
1254 ancs = self.index.commonancestorsheads(*revs)
1256 except (AttributeError, OverflowError): # C implementation failed
1255 except (AttributeError, OverflowError): # C implementation failed
1257 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1256 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1258 return ancs
1257 return ancs
1259
1258
1260 def isancestor(self, a, b):
1259 def isancestor(self, a, b):
1261 """return True if node a is an ancestor of node b
1260 """return True if node a is an ancestor of node b
1262
1261
1263 A revision is considered an ancestor of itself."""
1262 A revision is considered an ancestor of itself."""
1264 a, b = self.rev(a), self.rev(b)
1263 a, b = self.rev(a), self.rev(b)
1265 return self.isancestorrev(a, b)
1264 return self.isancestorrev(a, b)
1266
1265
1267 def isancestorrev(self, a, b):
1266 def isancestorrev(self, a, b):
1268 """return True if revision a is an ancestor of revision b
1267 """return True if revision a is an ancestor of revision b
1269
1268
1270 A revision is considered an ancestor of itself.
1269 A revision is considered an ancestor of itself.
1271
1270
1272 The implementation of this is trivial but the use of
1271 The implementation of this is trivial but the use of
1273 reachableroots is not."""
1272 reachableroots is not."""
1274 if a == nullrev:
1273 if a == nullrev:
1275 return True
1274 return True
1276 elif a == b:
1275 elif a == b:
1277 return True
1276 return True
1278 elif a > b:
1277 elif a > b:
1279 return False
1278 return False
1280 return bool(self.reachableroots(a, [b], [a], includepath=False))
1279 return bool(self.reachableroots(a, [b], [a], includepath=False))
1281
1280
1282 def reachableroots(self, minroot, heads, roots, includepath=False):
1281 def reachableroots(self, minroot, heads, roots, includepath=False):
1283 """return (heads(::<roots> and <roots>::<heads>))
1282 """return (heads(::<roots> and <roots>::<heads>))
1284
1283
1285 If includepath is True, return (<roots>::<heads>)."""
1284 If includepath is True, return (<roots>::<heads>)."""
1286 try:
1285 try:
1287 return self.index.reachableroots2(
1286 return self.index.reachableroots2(
1288 minroot, heads, roots, includepath
1287 minroot, heads, roots, includepath
1289 )
1288 )
1290 except AttributeError:
1289 except AttributeError:
1291 return dagop._reachablerootspure(
1290 return dagop._reachablerootspure(
1292 self.parentrevs, minroot, roots, heads, includepath
1291 self.parentrevs, minroot, roots, heads, includepath
1293 )
1292 )
1294
1293
1295 def ancestor(self, a, b):
1294 def ancestor(self, a, b):
1296 """calculate the "best" common ancestor of nodes a and b"""
1295 """calculate the "best" common ancestor of nodes a and b"""
1297
1296
1298 a, b = self.rev(a), self.rev(b)
1297 a, b = self.rev(a), self.rev(b)
1299 try:
1298 try:
1300 ancs = self.index.ancestors(a, b)
1299 ancs = self.index.ancestors(a, b)
1301 except (AttributeError, OverflowError):
1300 except (AttributeError, OverflowError):
1302 ancs = ancestor.ancestors(self.parentrevs, a, b)
1301 ancs = ancestor.ancestors(self.parentrevs, a, b)
1303 if ancs:
1302 if ancs:
1304 # choose a consistent winner when there's a tie
1303 # choose a consistent winner when there's a tie
1305 return min(map(self.node, ancs))
1304 return min(map(self.node, ancs))
1306 return nullid
1305 return nullid
1307
1306
1308 def _match(self, id):
1307 def _match(self, id):
1309 if isinstance(id, int):
1308 if isinstance(id, int):
1310 # rev
1309 # rev
1311 return self.node(id)
1310 return self.node(id)
1312 if len(id) == 20:
1311 if len(id) == 20:
1313 # possibly a binary node
1312 # possibly a binary node
1314 # odds of a binary node being all hex in ASCII are 1 in 10**25
1313 # odds of a binary node being all hex in ASCII are 1 in 10**25
1315 try:
1314 try:
1316 node = id
1315 node = id
1317 self.rev(node) # quick search the index
1316 self.rev(node) # quick search the index
1318 return node
1317 return node
1319 except error.LookupError:
1318 except error.LookupError:
1320 pass # may be partial hex id
1319 pass # may be partial hex id
1321 try:
1320 try:
1322 # str(rev)
1321 # str(rev)
1323 rev = int(id)
1322 rev = int(id)
1324 if b"%d" % rev != id:
1323 if b"%d" % rev != id:
1325 raise ValueError
1324 raise ValueError
1326 if rev < 0:
1325 if rev < 0:
1327 rev = len(self) + rev
1326 rev = len(self) + rev
1328 if rev < 0 or rev >= len(self):
1327 if rev < 0 or rev >= len(self):
1329 raise ValueError
1328 raise ValueError
1330 return self.node(rev)
1329 return self.node(rev)
1331 except (ValueError, OverflowError):
1330 except (ValueError, OverflowError):
1332 pass
1331 pass
1333 if len(id) == 40:
1332 if len(id) == 40:
1334 try:
1333 try:
1335 # a full hex nodeid?
1334 # a full hex nodeid?
1336 node = bin(id)
1335 node = bin(id)
1337 self.rev(node)
1336 self.rev(node)
1338 return node
1337 return node
1339 except (TypeError, error.LookupError):
1338 except (TypeError, error.LookupError):
1340 pass
1339 pass
1341
1340
1342 def _partialmatch(self, id):
1341 def _partialmatch(self, id):
1343 # we don't care wdirfilenodeids as they should be always full hash
1342 # we don't care wdirfilenodeids as they should be always full hash
1344 maybewdir = wdirhex.startswith(id)
1343 maybewdir = wdirhex.startswith(id)
1345 try:
1344 try:
1346 partial = self.index.partialmatch(id)
1345 partial = self.index.partialmatch(id)
1347 if partial and self.hasnode(partial):
1346 if partial and self.hasnode(partial):
1348 if maybewdir:
1347 if maybewdir:
1349 # single 'ff...' match in radix tree, ambiguous with wdir
1348 # single 'ff...' match in radix tree, ambiguous with wdir
1350 raise error.RevlogError
1349 raise error.RevlogError
1351 return partial
1350 return partial
1352 if maybewdir:
1351 if maybewdir:
1353 # no 'ff...' match in radix tree, wdir identified
1352 # no 'ff...' match in radix tree, wdir identified
1354 raise error.WdirUnsupported
1353 raise error.WdirUnsupported
1355 return None
1354 return None
1356 except error.RevlogError:
1355 except error.RevlogError:
1357 # parsers.c radix tree lookup gave multiple matches
1356 # parsers.c radix tree lookup gave multiple matches
1358 # fast path: for unfiltered changelog, radix tree is accurate
1357 # fast path: for unfiltered changelog, radix tree is accurate
1359 if not getattr(self, 'filteredrevs', None):
1358 if not getattr(self, 'filteredrevs', None):
1360 raise error.AmbiguousPrefixLookupError(
1359 raise error.AmbiguousPrefixLookupError(
1361 id, self.indexfile, _(b'ambiguous identifier')
1360 id, self.indexfile, _(b'ambiguous identifier')
1362 )
1361 )
1363 # fall through to slow path that filters hidden revisions
1362 # fall through to slow path that filters hidden revisions
1364 except (AttributeError, ValueError):
1363 except (AttributeError, ValueError):
1365 # we are pure python, or key was too short to search radix tree
1364 # we are pure python, or key was too short to search radix tree
1366 pass
1365 pass
1367
1366
1368 if id in self._pcache:
1367 if id in self._pcache:
1369 return self._pcache[id]
1368 return self._pcache[id]
1370
1369
1371 if len(id) <= 40:
1370 if len(id) <= 40:
1372 try:
1371 try:
1373 # hex(node)[:...]
1372 # hex(node)[:...]
1374 l = len(id) // 2 # grab an even number of digits
1373 l = len(id) // 2 # grab an even number of digits
1375 prefix = bin(id[: l * 2])
1374 prefix = bin(id[: l * 2])
1376 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1375 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1377 nl = [
1376 nl = [
1378 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1377 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1379 ]
1378 ]
1380 if nullhex.startswith(id):
1379 if nullhex.startswith(id):
1381 nl.append(nullid)
1380 nl.append(nullid)
1382 if len(nl) > 0:
1381 if len(nl) > 0:
1383 if len(nl) == 1 and not maybewdir:
1382 if len(nl) == 1 and not maybewdir:
1384 self._pcache[id] = nl[0]
1383 self._pcache[id] = nl[0]
1385 return nl[0]
1384 return nl[0]
1386 raise error.AmbiguousPrefixLookupError(
1385 raise error.AmbiguousPrefixLookupError(
1387 id, self.indexfile, _(b'ambiguous identifier')
1386 id, self.indexfile, _(b'ambiguous identifier')
1388 )
1387 )
1389 if maybewdir:
1388 if maybewdir:
1390 raise error.WdirUnsupported
1389 raise error.WdirUnsupported
1391 return None
1390 return None
1392 except TypeError:
1391 except TypeError:
1393 pass
1392 pass
1394
1393
1395 def lookup(self, id):
1394 def lookup(self, id):
1396 """locate a node based on:
1395 """locate a node based on:
1397 - revision number or str(revision number)
1396 - revision number or str(revision number)
1398 - nodeid or subset of hex nodeid
1397 - nodeid or subset of hex nodeid
1399 """
1398 """
1400 n = self._match(id)
1399 n = self._match(id)
1401 if n is not None:
1400 if n is not None:
1402 return n
1401 return n
1403 n = self._partialmatch(id)
1402 n = self._partialmatch(id)
1404 if n:
1403 if n:
1405 return n
1404 return n
1406
1405
1407 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1406 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1408
1407
1409 def shortest(self, node, minlength=1):
1408 def shortest(self, node, minlength=1):
1410 """Find the shortest unambiguous prefix that matches node."""
1409 """Find the shortest unambiguous prefix that matches node."""
1411
1410
1412 def isvalid(prefix):
1411 def isvalid(prefix):
1413 try:
1412 try:
1414 matchednode = self._partialmatch(prefix)
1413 matchednode = self._partialmatch(prefix)
1415 except error.AmbiguousPrefixLookupError:
1414 except error.AmbiguousPrefixLookupError:
1416 return False
1415 return False
1417 except error.WdirUnsupported:
1416 except error.WdirUnsupported:
1418 # single 'ff...' match
1417 # single 'ff...' match
1419 return True
1418 return True
1420 if matchednode is None:
1419 if matchednode is None:
1421 raise error.LookupError(node, self.indexfile, _(b'no node'))
1420 raise error.LookupError(node, self.indexfile, _(b'no node'))
1422 return True
1421 return True
1423
1422
1424 def maybewdir(prefix):
1423 def maybewdir(prefix):
1425 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1424 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1426
1425
1427 hexnode = hex(node)
1426 hexnode = hex(node)
1428
1427
1429 def disambiguate(hexnode, minlength):
1428 def disambiguate(hexnode, minlength):
1430 """Disambiguate against wdirid."""
1429 """Disambiguate against wdirid."""
1431 for length in range(minlength, 41):
1430 for length in range(minlength, 41):
1432 prefix = hexnode[:length]
1431 prefix = hexnode[:length]
1433 if not maybewdir(prefix):
1432 if not maybewdir(prefix):
1434 return prefix
1433 return prefix
1435
1434
1436 if not getattr(self, 'filteredrevs', None):
1435 if not getattr(self, 'filteredrevs', None):
1437 try:
1436 try:
1438 length = max(self.index.shortest(node), minlength)
1437 length = max(self.index.shortest(node), minlength)
1439 return disambiguate(hexnode, length)
1438 return disambiguate(hexnode, length)
1440 except error.RevlogError:
1439 except error.RevlogError:
1441 if node != wdirid:
1440 if node != wdirid:
1442 raise error.LookupError(node, self.indexfile, _(b'no node'))
1441 raise error.LookupError(node, self.indexfile, _(b'no node'))
1443 except AttributeError:
1442 except AttributeError:
1444 # Fall through to pure code
1443 # Fall through to pure code
1445 pass
1444 pass
1446
1445
1447 if node == wdirid:
1446 if node == wdirid:
1448 for length in range(minlength, 41):
1447 for length in range(minlength, 41):
1449 prefix = hexnode[:length]
1448 prefix = hexnode[:length]
1450 if isvalid(prefix):
1449 if isvalid(prefix):
1451 return prefix
1450 return prefix
1452
1451
1453 for length in range(minlength, 41):
1452 for length in range(minlength, 41):
1454 prefix = hexnode[:length]
1453 prefix = hexnode[:length]
1455 if isvalid(prefix):
1454 if isvalid(prefix):
1456 return disambiguate(hexnode, length)
1455 return disambiguate(hexnode, length)
1457
1456
1458 def cmp(self, node, text):
1457 def cmp(self, node, text):
1459 """compare text with a given file revision
1458 """compare text with a given file revision
1460
1459
1461 returns True if text is different than what is stored.
1460 returns True if text is different than what is stored.
1462 """
1461 """
1463 p1, p2 = self.parents(node)
1462 p1, p2 = self.parents(node)
1464 return storageutil.hashrevisionsha1(text, p1, p2) != node
1463 return storageutil.hashrevisionsha1(text, p1, p2) != node
1465
1464
1466 def _cachesegment(self, offset, data):
1465 def _cachesegment(self, offset, data):
1467 """Add a segment to the revlog cache.
1466 """Add a segment to the revlog cache.
1468
1467
1469 Accepts an absolute offset and the data that is at that location.
1468 Accepts an absolute offset and the data that is at that location.
1470 """
1469 """
1471 o, d = self._chunkcache
1470 o, d = self._chunkcache
1472 # try to add to existing cache
1471 # try to add to existing cache
1473 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1472 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1474 self._chunkcache = o, d + data
1473 self._chunkcache = o, d + data
1475 else:
1474 else:
1476 self._chunkcache = offset, data
1475 self._chunkcache = offset, data
1477
1476
1478 def _readsegment(self, offset, length, df=None):
1477 def _readsegment(self, offset, length, df=None):
1479 """Load a segment of raw data from the revlog.
1478 """Load a segment of raw data from the revlog.
1480
1479
1481 Accepts an absolute offset, length to read, and an optional existing
1480 Accepts an absolute offset, length to read, and an optional existing
1482 file handle to read from.
1481 file handle to read from.
1483
1482
1484 If an existing file handle is passed, it will be seeked and the
1483 If an existing file handle is passed, it will be seeked and the
1485 original seek position will NOT be restored.
1484 original seek position will NOT be restored.
1486
1485
1487 Returns a str or buffer of raw byte data.
1486 Returns a str or buffer of raw byte data.
1488
1487
1489 Raises if the requested number of bytes could not be read.
1488 Raises if the requested number of bytes could not be read.
1490 """
1489 """
1491 # Cache data both forward and backward around the requested
1490 # Cache data both forward and backward around the requested
1492 # data, in a fixed size window. This helps speed up operations
1491 # data, in a fixed size window. This helps speed up operations
1493 # involving reading the revlog backwards.
1492 # involving reading the revlog backwards.
1494 cachesize = self._chunkcachesize
1493 cachesize = self._chunkcachesize
1495 realoffset = offset & ~(cachesize - 1)
1494 realoffset = offset & ~(cachesize - 1)
1496 reallength = (
1495 reallength = (
1497 (offset + length + cachesize) & ~(cachesize - 1)
1496 (offset + length + cachesize) & ~(cachesize - 1)
1498 ) - realoffset
1497 ) - realoffset
1499 with self._datareadfp(df) as df:
1498 with self._datareadfp(df) as df:
1500 df.seek(realoffset)
1499 df.seek(realoffset)
1501 d = df.read(reallength)
1500 d = df.read(reallength)
1502
1501
1503 self._cachesegment(realoffset, d)
1502 self._cachesegment(realoffset, d)
1504 if offset != realoffset or reallength != length:
1503 if offset != realoffset or reallength != length:
1505 startoffset = offset - realoffset
1504 startoffset = offset - realoffset
1506 if len(d) - startoffset < length:
1505 if len(d) - startoffset < length:
1507 raise error.RevlogError(
1506 raise error.RevlogError(
1508 _(
1507 _(
1509 b'partial read of revlog %s; expected %d bytes from '
1508 b'partial read of revlog %s; expected %d bytes from '
1510 b'offset %d, got %d'
1509 b'offset %d, got %d'
1511 )
1510 )
1512 % (
1511 % (
1513 self.indexfile if self._inline else self.datafile,
1512 self.indexfile if self._inline else self.datafile,
1514 length,
1513 length,
1515 realoffset,
1514 realoffset,
1516 len(d) - startoffset,
1515 len(d) - startoffset,
1517 )
1516 )
1518 )
1517 )
1519
1518
1520 return util.buffer(d, startoffset, length)
1519 return util.buffer(d, startoffset, length)
1521
1520
1522 if len(d) < length:
1521 if len(d) < length:
1523 raise error.RevlogError(
1522 raise error.RevlogError(
1524 _(
1523 _(
1525 b'partial read of revlog %s; expected %d bytes from offset '
1524 b'partial read of revlog %s; expected %d bytes from offset '
1526 b'%d, got %d'
1525 b'%d, got %d'
1527 )
1526 )
1528 % (
1527 % (
1529 self.indexfile if self._inline else self.datafile,
1528 self.indexfile if self._inline else self.datafile,
1530 length,
1529 length,
1531 offset,
1530 offset,
1532 len(d),
1531 len(d),
1533 )
1532 )
1534 )
1533 )
1535
1534
1536 return d
1535 return d
1537
1536
1538 def _getsegment(self, offset, length, df=None):
1537 def _getsegment(self, offset, length, df=None):
1539 """Obtain a segment of raw data from the revlog.
1538 """Obtain a segment of raw data from the revlog.
1540
1539
1541 Accepts an absolute offset, length of bytes to obtain, and an
1540 Accepts an absolute offset, length of bytes to obtain, and an
1542 optional file handle to the already-opened revlog. If the file
1541 optional file handle to the already-opened revlog. If the file
1543 handle is used, it's original seek position will not be preserved.
1542 handle is used, it's original seek position will not be preserved.
1544
1543
1545 Requests for data may be returned from a cache.
1544 Requests for data may be returned from a cache.
1546
1545
1547 Returns a str or a buffer instance of raw byte data.
1546 Returns a str or a buffer instance of raw byte data.
1548 """
1547 """
1549 o, d = self._chunkcache
1548 o, d = self._chunkcache
1550 l = len(d)
1549 l = len(d)
1551
1550
1552 # is it in the cache?
1551 # is it in the cache?
1553 cachestart = offset - o
1552 cachestart = offset - o
1554 cacheend = cachestart + length
1553 cacheend = cachestart + length
1555 if cachestart >= 0 and cacheend <= l:
1554 if cachestart >= 0 and cacheend <= l:
1556 if cachestart == 0 and cacheend == l:
1555 if cachestart == 0 and cacheend == l:
1557 return d # avoid a copy
1556 return d # avoid a copy
1558 return util.buffer(d, cachestart, cacheend - cachestart)
1557 return util.buffer(d, cachestart, cacheend - cachestart)
1559
1558
1560 return self._readsegment(offset, length, df=df)
1559 return self._readsegment(offset, length, df=df)
1561
1560
1562 def _getsegmentforrevs(self, startrev, endrev, df=None):
1561 def _getsegmentforrevs(self, startrev, endrev, df=None):
1563 """Obtain a segment of raw data corresponding to a range of revisions.
1562 """Obtain a segment of raw data corresponding to a range of revisions.
1564
1563
1565 Accepts the start and end revisions and an optional already-open
1564 Accepts the start and end revisions and an optional already-open
1566 file handle to be used for reading. If the file handle is read, its
1565 file handle to be used for reading. If the file handle is read, its
1567 seek position will not be preserved.
1566 seek position will not be preserved.
1568
1567
1569 Requests for data may be satisfied by a cache.
1568 Requests for data may be satisfied by a cache.
1570
1569
1571 Returns a 2-tuple of (offset, data) for the requested range of
1570 Returns a 2-tuple of (offset, data) for the requested range of
1572 revisions. Offset is the integer offset from the beginning of the
1571 revisions. Offset is the integer offset from the beginning of the
1573 revlog and data is a str or buffer of the raw byte data.
1572 revlog and data is a str or buffer of the raw byte data.
1574
1573
1575 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1574 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1576 to determine where each revision's data begins and ends.
1575 to determine where each revision's data begins and ends.
1577 """
1576 """
1578 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1577 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1579 # (functions are expensive).
1578 # (functions are expensive).
1580 index = self.index
1579 index = self.index
1581 istart = index[startrev]
1580 istart = index[startrev]
1582 start = int(istart[0] >> 16)
1581 start = int(istart[0] >> 16)
1583 if startrev == endrev:
1582 if startrev == endrev:
1584 end = start + istart[1]
1583 end = start + istart[1]
1585 else:
1584 else:
1586 iend = index[endrev]
1585 iend = index[endrev]
1587 end = int(iend[0] >> 16) + iend[1]
1586 end = int(iend[0] >> 16) + iend[1]
1588
1587
1589 if self._inline:
1588 if self._inline:
1590 start += (startrev + 1) * self._io.size
1589 start += (startrev + 1) * self._io.size
1591 end += (endrev + 1) * self._io.size
1590 end += (endrev + 1) * self._io.size
1592 length = end - start
1591 length = end - start
1593
1592
1594 return start, self._getsegment(start, length, df=df)
1593 return start, self._getsegment(start, length, df=df)
1595
1594
1596 def _chunk(self, rev, df=None):
1595 def _chunk(self, rev, df=None):
1597 """Obtain a single decompressed chunk for a revision.
1596 """Obtain a single decompressed chunk for a revision.
1598
1597
1599 Accepts an integer revision and an optional already-open file handle
1598 Accepts an integer revision and an optional already-open file handle
1600 to be used for reading. If used, the seek position of the file will not
1599 to be used for reading. If used, the seek position of the file will not
1601 be preserved.
1600 be preserved.
1602
1601
1603 Returns a str holding uncompressed data for the requested revision.
1602 Returns a str holding uncompressed data for the requested revision.
1604 """
1603 """
1605 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1604 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1606
1605
1607 def _chunks(self, revs, df=None, targetsize=None):
1606 def _chunks(self, revs, df=None, targetsize=None):
1608 """Obtain decompressed chunks for the specified revisions.
1607 """Obtain decompressed chunks for the specified revisions.
1609
1608
1610 Accepts an iterable of numeric revisions that are assumed to be in
1609 Accepts an iterable of numeric revisions that are assumed to be in
1611 ascending order. Also accepts an optional already-open file handle
1610 ascending order. Also accepts an optional already-open file handle
1612 to be used for reading. If used, the seek position of the file will
1611 to be used for reading. If used, the seek position of the file will
1613 not be preserved.
1612 not be preserved.
1614
1613
1615 This function is similar to calling ``self._chunk()`` multiple times,
1614 This function is similar to calling ``self._chunk()`` multiple times,
1616 but is faster.
1615 but is faster.
1617
1616
1618 Returns a list with decompressed data for each requested revision.
1617 Returns a list with decompressed data for each requested revision.
1619 """
1618 """
1620 if not revs:
1619 if not revs:
1621 return []
1620 return []
1622 start = self.start
1621 start = self.start
1623 length = self.length
1622 length = self.length
1624 inline = self._inline
1623 inline = self._inline
1625 iosize = self._io.size
1624 iosize = self._io.size
1626 buffer = util.buffer
1625 buffer = util.buffer
1627
1626
1628 l = []
1627 l = []
1629 ladd = l.append
1628 ladd = l.append
1630
1629
1631 if not self._withsparseread:
1630 if not self._withsparseread:
1632 slicedchunks = (revs,)
1631 slicedchunks = (revs,)
1633 else:
1632 else:
1634 slicedchunks = deltautil.slicechunk(
1633 slicedchunks = deltautil.slicechunk(
1635 self, revs, targetsize=targetsize
1634 self, revs, targetsize=targetsize
1636 )
1635 )
1637
1636
1638 for revschunk in slicedchunks:
1637 for revschunk in slicedchunks:
1639 firstrev = revschunk[0]
1638 firstrev = revschunk[0]
1640 # Skip trailing revisions with empty diff
1639 # Skip trailing revisions with empty diff
1641 for lastrev in revschunk[::-1]:
1640 for lastrev in revschunk[::-1]:
1642 if length(lastrev) != 0:
1641 if length(lastrev) != 0:
1643 break
1642 break
1644
1643
1645 try:
1644 try:
1646 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1645 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1647 except OverflowError:
1646 except OverflowError:
1648 # issue4215 - we can't cache a run of chunks greater than
1647 # issue4215 - we can't cache a run of chunks greater than
1649 # 2G on Windows
1648 # 2G on Windows
1650 return [self._chunk(rev, df=df) for rev in revschunk]
1649 return [self._chunk(rev, df=df) for rev in revschunk]
1651
1650
1652 decomp = self.decompress
1651 decomp = self.decompress
1653 for rev in revschunk:
1652 for rev in revschunk:
1654 chunkstart = start(rev)
1653 chunkstart = start(rev)
1655 if inline:
1654 if inline:
1656 chunkstart += (rev + 1) * iosize
1655 chunkstart += (rev + 1) * iosize
1657 chunklength = length(rev)
1656 chunklength = length(rev)
1658 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1657 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1659
1658
1660 return l
1659 return l
1661
1660
1662 def _chunkclear(self):
1661 def _chunkclear(self):
1663 """Clear the raw chunk cache."""
1662 """Clear the raw chunk cache."""
1664 self._chunkcache = (0, b'')
1663 self._chunkcache = (0, b'')
1665
1664
1666 def deltaparent(self, rev):
1665 def deltaparent(self, rev):
1667 """return deltaparent of the given revision"""
1666 """return deltaparent of the given revision"""
1668 base = self.index[rev][3]
1667 base = self.index[rev][3]
1669 if base == rev:
1668 if base == rev:
1670 return nullrev
1669 return nullrev
1671 elif self._generaldelta:
1670 elif self._generaldelta:
1672 return base
1671 return base
1673 else:
1672 else:
1674 return rev - 1
1673 return rev - 1
1675
1674
1676 def issnapshot(self, rev):
1675 def issnapshot(self, rev):
1677 """tells whether rev is a snapshot
1676 """tells whether rev is a snapshot
1678 """
1677 """
1679 if not self._sparserevlog:
1678 if not self._sparserevlog:
1680 return self.deltaparent(rev) == nullrev
1679 return self.deltaparent(rev) == nullrev
1681 elif util.safehasattr(self.index, b'issnapshot'):
1680 elif util.safehasattr(self.index, b'issnapshot'):
1682 # directly assign the method to cache the testing and access
1681 # directly assign the method to cache the testing and access
1683 self.issnapshot = self.index.issnapshot
1682 self.issnapshot = self.index.issnapshot
1684 return self.issnapshot(rev)
1683 return self.issnapshot(rev)
1685 if rev == nullrev:
1684 if rev == nullrev:
1686 return True
1685 return True
1687 entry = self.index[rev]
1686 entry = self.index[rev]
1688 base = entry[3]
1687 base = entry[3]
1689 if base == rev:
1688 if base == rev:
1690 return True
1689 return True
1691 if base == nullrev:
1690 if base == nullrev:
1692 return True
1691 return True
1693 p1 = entry[5]
1692 p1 = entry[5]
1694 p2 = entry[6]
1693 p2 = entry[6]
1695 if base == p1 or base == p2:
1694 if base == p1 or base == p2:
1696 return False
1695 return False
1697 return self.issnapshot(base)
1696 return self.issnapshot(base)
1698
1697
1699 def snapshotdepth(self, rev):
1698 def snapshotdepth(self, rev):
1700 """number of snapshot in the chain before this one"""
1699 """number of snapshot in the chain before this one"""
1701 if not self.issnapshot(rev):
1700 if not self.issnapshot(rev):
1702 raise error.ProgrammingError(b'revision %d not a snapshot')
1701 raise error.ProgrammingError(b'revision %d not a snapshot')
1703 return len(self._deltachain(rev)[0]) - 1
1702 return len(self._deltachain(rev)[0]) - 1
1704
1703
1705 def revdiff(self, rev1, rev2):
1704 def revdiff(self, rev1, rev2):
1706 """return or calculate a delta between two revisions
1705 """return or calculate a delta between two revisions
1707
1706
1708 The delta calculated is in binary form and is intended to be written to
1707 The delta calculated is in binary form and is intended to be written to
1709 revlog data directly. So this function needs raw revision data.
1708 revlog data directly. So this function needs raw revision data.
1710 """
1709 """
1711 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1710 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1712 return bytes(self._chunk(rev2))
1711 return bytes(self._chunk(rev2))
1713
1712
1714 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1713 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1715
1714
1716 def _processflags(self, text, flags, operation, raw=False):
1715 def _processflags(self, text, flags, operation, raw=False):
1717 """deprecated entry point to access flag processors"""
1716 """deprecated entry point to access flag processors"""
1718 msg = b'_processflag(...) use the specialized variant'
1717 msg = b'_processflag(...) use the specialized variant'
1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1718 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1720 if raw:
1719 if raw:
1721 return text, flagutil.processflagsraw(self, text, flags)
1720 return text, flagutil.processflagsraw(self, text, flags)
1722 elif operation == b'read':
1721 elif operation == b'read':
1723 return flagutil.processflagsread(self, text, flags)
1722 return flagutil.processflagsread(self, text, flags)
1724 else: # write operation
1723 else: # write operation
1725 return flagutil.processflagswrite(self, text, flags)
1724 return flagutil.processflagswrite(self, text, flags)
1726
1725
1727 def revision(self, nodeorrev, _df=None, raw=False):
1726 def revision(self, nodeorrev, _df=None, raw=False):
1728 """return an uncompressed revision of a given node or revision
1727 """return an uncompressed revision of a given node or revision
1729 number.
1728 number.
1730
1729
1731 _df - an existing file handle to read from. (internal-only)
1730 _df - an existing file handle to read from. (internal-only)
1732 raw - an optional argument specifying if the revision data is to be
1731 raw - an optional argument specifying if the revision data is to be
1733 treated as raw data when applying flag transforms. 'raw' should be set
1732 treated as raw data when applying flag transforms. 'raw' should be set
1734 to True when generating changegroups or in debug commands.
1733 to True when generating changegroups or in debug commands.
1735 """
1734 """
1736 if raw:
1735 if raw:
1737 msg = (
1736 msg = (
1738 b'revlog.revision(..., raw=True) is deprecated, '
1737 b'revlog.revision(..., raw=True) is deprecated, '
1739 b'use revlog.rawdata(...)'
1738 b'use revlog.rawdata(...)'
1740 )
1739 )
1741 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1740 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1742 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1741 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1743
1742
1744 def sidedata(self, nodeorrev, _df=None):
1743 def sidedata(self, nodeorrev, _df=None):
1745 """a map of extra data related to the changeset but not part of the hash
1744 """a map of extra data related to the changeset but not part of the hash
1746
1745
1747 This function currently return a dictionary. However, more advanced
1746 This function currently return a dictionary. However, more advanced
1748 mapping object will likely be used in the future for a more
1747 mapping object will likely be used in the future for a more
1749 efficient/lazy code.
1748 efficient/lazy code.
1750 """
1749 """
1751 return self._revisiondata(nodeorrev, _df)[1]
1750 return self._revisiondata(nodeorrev, _df)[1]
1752
1751
1753 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1752 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1754 # deal with <nodeorrev> argument type
1753 # deal with <nodeorrev> argument type
1755 if isinstance(nodeorrev, int):
1754 if isinstance(nodeorrev, int):
1756 rev = nodeorrev
1755 rev = nodeorrev
1757 node = self.node(rev)
1756 node = self.node(rev)
1758 else:
1757 else:
1759 node = nodeorrev
1758 node = nodeorrev
1760 rev = None
1759 rev = None
1761
1760
1762 # fast path the special `nullid` rev
1761 # fast path the special `nullid` rev
1763 if node == nullid:
1762 if node == nullid:
1764 return b"", {}
1763 return b"", {}
1765
1764
1766 # The text as stored inside the revlog. Might be the revision or might
1765 # The text as stored inside the revlog. Might be the revision or might
1767 # need to be processed to retrieve the revision.
1766 # need to be processed to retrieve the revision.
1768 rawtext = None
1767 rawtext = None
1769
1768
1770 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1769 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1771
1770
1772 if raw and validated:
1771 if raw and validated:
1773 # if we don't want to process the raw text and that raw
1772 # if we don't want to process the raw text and that raw
1774 # text is cached, we can exit early.
1773 # text is cached, we can exit early.
1775 return rawtext, {}
1774 return rawtext, {}
1776 if rev is None:
1775 if rev is None:
1777 rev = self.rev(node)
1776 rev = self.rev(node)
1778 # the revlog's flag for this revision
1777 # the revlog's flag for this revision
1779 # (usually alter its state or content)
1778 # (usually alter its state or content)
1780 flags = self.flags(rev)
1779 flags = self.flags(rev)
1781
1780
1782 if validated and flags == REVIDX_DEFAULT_FLAGS:
1781 if validated and flags == REVIDX_DEFAULT_FLAGS:
1783 # no extra flags set, no flag processor runs, text = rawtext
1782 # no extra flags set, no flag processor runs, text = rawtext
1784 return rawtext, {}
1783 return rawtext, {}
1785
1784
1786 sidedata = {}
1785 sidedata = {}
1787 if raw:
1786 if raw:
1788 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1787 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1789 text = rawtext
1788 text = rawtext
1790 else:
1789 else:
1791 try:
1790 try:
1792 r = flagutil.processflagsread(self, rawtext, flags)
1791 r = flagutil.processflagsread(self, rawtext, flags)
1793 except error.SidedataHashError as exc:
1792 except error.SidedataHashError as exc:
1794 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1793 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1795 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1794 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1796 raise error.RevlogError(msg)
1795 raise error.RevlogError(msg)
1797 text, validatehash, sidedata = r
1796 text, validatehash, sidedata = r
1798 if validatehash:
1797 if validatehash:
1799 self.checkhash(text, node, rev=rev)
1798 self.checkhash(text, node, rev=rev)
1800 if not validated:
1799 if not validated:
1801 self._revisioncache = (node, rev, rawtext)
1800 self._revisioncache = (node, rev, rawtext)
1802
1801
1803 return text, sidedata
1802 return text, sidedata
1804
1803
1805 def _rawtext(self, node, rev, _df=None):
1804 def _rawtext(self, node, rev, _df=None):
1806 """return the possibly unvalidated rawtext for a revision
1805 """return the possibly unvalidated rawtext for a revision
1807
1806
1808 returns (rev, rawtext, validated)
1807 returns (rev, rawtext, validated)
1809 """
1808 """
1810
1809
1811 # revision in the cache (could be useful to apply delta)
1810 # revision in the cache (could be useful to apply delta)
1812 cachedrev = None
1811 cachedrev = None
1813 # An intermediate text to apply deltas to
1812 # An intermediate text to apply deltas to
1814 basetext = None
1813 basetext = None
1815
1814
1816 # Check if we have the entry in cache
1815 # Check if we have the entry in cache
1817 # The cache entry looks like (node, rev, rawtext)
1816 # The cache entry looks like (node, rev, rawtext)
1818 if self._revisioncache:
1817 if self._revisioncache:
1819 if self._revisioncache[0] == node:
1818 if self._revisioncache[0] == node:
1820 return (rev, self._revisioncache[2], True)
1819 return (rev, self._revisioncache[2], True)
1821 cachedrev = self._revisioncache[1]
1820 cachedrev = self._revisioncache[1]
1822
1821
1823 if rev is None:
1822 if rev is None:
1824 rev = self.rev(node)
1823 rev = self.rev(node)
1825
1824
1826 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1825 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1827 if stopped:
1826 if stopped:
1828 basetext = self._revisioncache[2]
1827 basetext = self._revisioncache[2]
1829
1828
1830 # drop cache to save memory, the caller is expected to
1829 # drop cache to save memory, the caller is expected to
1831 # update self._revisioncache after validating the text
1830 # update self._revisioncache after validating the text
1832 self._revisioncache = None
1831 self._revisioncache = None
1833
1832
1834 targetsize = None
1833 targetsize = None
1835 rawsize = self.index[rev][2]
1834 rawsize = self.index[rev][2]
1836 if 0 <= rawsize:
1835 if 0 <= rawsize:
1837 targetsize = 4 * rawsize
1836 targetsize = 4 * rawsize
1838
1837
1839 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1838 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1840 if basetext is None:
1839 if basetext is None:
1841 basetext = bytes(bins[0])
1840 basetext = bytes(bins[0])
1842 bins = bins[1:]
1841 bins = bins[1:]
1843
1842
1844 rawtext = mdiff.patches(basetext, bins)
1843 rawtext = mdiff.patches(basetext, bins)
1845 del basetext # let us have a chance to free memory early
1844 del basetext # let us have a chance to free memory early
1846 return (rev, rawtext, False)
1845 return (rev, rawtext, False)
1847
1846
1848 def rawdata(self, nodeorrev, _df=None):
1847 def rawdata(self, nodeorrev, _df=None):
1849 """return an uncompressed raw data of a given node or revision number.
1848 """return an uncompressed raw data of a given node or revision number.
1850
1849
1851 _df - an existing file handle to read from. (internal-only)
1850 _df - an existing file handle to read from. (internal-only)
1852 """
1851 """
1853 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1852 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1854
1853
1855 def hash(self, text, p1, p2):
1854 def hash(self, text, p1, p2):
1856 """Compute a node hash.
1855 """Compute a node hash.
1857
1856
1858 Available as a function so that subclasses can replace the hash
1857 Available as a function so that subclasses can replace the hash
1859 as needed.
1858 as needed.
1860 """
1859 """
1861 return storageutil.hashrevisionsha1(text, p1, p2)
1860 return storageutil.hashrevisionsha1(text, p1, p2)
1862
1861
1863 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1862 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1864 """Check node hash integrity.
1863 """Check node hash integrity.
1865
1864
1866 Available as a function so that subclasses can extend hash mismatch
1865 Available as a function so that subclasses can extend hash mismatch
1867 behaviors as needed.
1866 behaviors as needed.
1868 """
1867 """
1869 try:
1868 try:
1870 if p1 is None and p2 is None:
1869 if p1 is None and p2 is None:
1871 p1, p2 = self.parents(node)
1870 p1, p2 = self.parents(node)
1872 if node != self.hash(text, p1, p2):
1871 if node != self.hash(text, p1, p2):
1873 # Clear the revision cache on hash failure. The revision cache
1872 # Clear the revision cache on hash failure. The revision cache
1874 # only stores the raw revision and clearing the cache does have
1873 # only stores the raw revision and clearing the cache does have
1875 # the side-effect that we won't have a cache hit when the raw
1874 # the side-effect that we won't have a cache hit when the raw
1876 # revision data is accessed. But this case should be rare and
1875 # revision data is accessed. But this case should be rare and
1877 # it is extra work to teach the cache about the hash
1876 # it is extra work to teach the cache about the hash
1878 # verification state.
1877 # verification state.
1879 if self._revisioncache and self._revisioncache[0] == node:
1878 if self._revisioncache and self._revisioncache[0] == node:
1880 self._revisioncache = None
1879 self._revisioncache = None
1881
1880
1882 revornode = rev
1881 revornode = rev
1883 if revornode is None:
1882 if revornode is None:
1884 revornode = templatefilters.short(hex(node))
1883 revornode = templatefilters.short(hex(node))
1885 raise error.RevlogError(
1884 raise error.RevlogError(
1886 _(b"integrity check failed on %s:%s")
1885 _(b"integrity check failed on %s:%s")
1887 % (self.indexfile, pycompat.bytestr(revornode))
1886 % (self.indexfile, pycompat.bytestr(revornode))
1888 )
1887 )
1889 except error.RevlogError:
1888 except error.RevlogError:
1890 if self._censorable and storageutil.iscensoredtext(text):
1889 if self._censorable and storageutil.iscensoredtext(text):
1891 raise error.CensoredNodeError(self.indexfile, node, text)
1890 raise error.CensoredNodeError(self.indexfile, node, text)
1892 raise
1891 raise
1893
1892
1894 def _enforceinlinesize(self, tr, fp=None):
1893 def _enforceinlinesize(self, tr, fp=None):
1895 """Check if the revlog is too big for inline and convert if so.
1894 """Check if the revlog is too big for inline and convert if so.
1896
1895
1897 This should be called after revisions are added to the revlog. If the
1896 This should be called after revisions are added to the revlog. If the
1898 revlog has grown too large to be an inline revlog, it will convert it
1897 revlog has grown too large to be an inline revlog, it will convert it
1899 to use multiple index and data files.
1898 to use multiple index and data files.
1900 """
1899 """
1901 tiprev = len(self) - 1
1900 tiprev = len(self) - 1
1902 if (
1901 if (
1903 not self._inline
1902 not self._inline
1904 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1903 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1905 ):
1904 ):
1906 return
1905 return
1907
1906
1908 trinfo = tr.find(self.indexfile)
1907 trinfo = tr.find(self.indexfile)
1909 if trinfo is None:
1908 if trinfo is None:
1910 raise error.RevlogError(
1909 raise error.RevlogError(
1911 _(b"%s not found in the transaction") % self.indexfile
1910 _(b"%s not found in the transaction") % self.indexfile
1912 )
1911 )
1913
1912
1914 trindex = trinfo[2]
1913 trindex = trinfo[2]
1915 if trindex is not None:
1914 if trindex is not None:
1916 dataoff = self.start(trindex)
1915 dataoff = self.start(trindex)
1917 else:
1916 else:
1918 # revlog was stripped at start of transaction, use all leftover data
1917 # revlog was stripped at start of transaction, use all leftover data
1919 trindex = len(self) - 1
1918 trindex = len(self) - 1
1920 dataoff = self.end(tiprev)
1919 dataoff = self.end(tiprev)
1921
1920
1922 tr.add(self.datafile, dataoff)
1921 tr.add(self.datafile, dataoff)
1923
1922
1924 if fp:
1923 if fp:
1925 fp.flush()
1924 fp.flush()
1926 fp.close()
1925 fp.close()
1927 # We can't use the cached file handle after close(). So prevent
1926 # We can't use the cached file handle after close(). So prevent
1928 # its usage.
1927 # its usage.
1929 self._writinghandles = None
1928 self._writinghandles = None
1930
1929
1931 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1930 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1932 for r in self:
1931 for r in self:
1933 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1932 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1934
1933
1935 with self._indexfp(b'w') as fp:
1934 with self._indexfp(b'w') as fp:
1936 self.version &= ~FLAG_INLINE_DATA
1935 self.version &= ~FLAG_INLINE_DATA
1937 self._inline = False
1936 self._inline = False
1938 io = self._io
1937 io = self._io
1939 for i in self:
1938 for i in self:
1940 e = io.packentry(self.index[i], self.node, self.version, i)
1939 e = io.packentry(self.index[i], self.node, self.version, i)
1941 fp.write(e)
1940 fp.write(e)
1942
1941
1943 # the temp file replace the real index when we exit the context
1942 # the temp file replace the real index when we exit the context
1944 # manager
1943 # manager
1945
1944
1946 tr.replace(self.indexfile, trindex * self._io.size)
1945 tr.replace(self.indexfile, trindex * self._io.size)
1947 self._chunkclear()
1946 self._chunkclear()
1948
1947
1949 def _nodeduplicatecallback(self, transaction, node):
1948 def _nodeduplicatecallback(self, transaction, node):
1950 """called when trying to add a node already stored.
1949 """called when trying to add a node already stored.
1951 """
1950 """
1952
1951
1953 def addrevision(
1952 def addrevision(
1954 self,
1953 self,
1955 text,
1954 text,
1956 transaction,
1955 transaction,
1957 link,
1956 link,
1958 p1,
1957 p1,
1959 p2,
1958 p2,
1960 cachedelta=None,
1959 cachedelta=None,
1961 node=None,
1960 node=None,
1962 flags=REVIDX_DEFAULT_FLAGS,
1961 flags=REVIDX_DEFAULT_FLAGS,
1963 deltacomputer=None,
1962 deltacomputer=None,
1964 sidedata=None,
1963 sidedata=None,
1965 ):
1964 ):
1966 """add a revision to the log
1965 """add a revision to the log
1967
1966
1968 text - the revision data to add
1967 text - the revision data to add
1969 transaction - the transaction object used for rollback
1968 transaction - the transaction object used for rollback
1970 link - the linkrev data to add
1969 link - the linkrev data to add
1971 p1, p2 - the parent nodeids of the revision
1970 p1, p2 - the parent nodeids of the revision
1972 cachedelta - an optional precomputed delta
1971 cachedelta - an optional precomputed delta
1973 node - nodeid of revision; typically node is not specified, and it is
1972 node - nodeid of revision; typically node is not specified, and it is
1974 computed by default as hash(text, p1, p2), however subclasses might
1973 computed by default as hash(text, p1, p2), however subclasses might
1975 use different hashing method (and override checkhash() in such case)
1974 use different hashing method (and override checkhash() in such case)
1976 flags - the known flags to set on the revision
1975 flags - the known flags to set on the revision
1977 deltacomputer - an optional deltacomputer instance shared between
1976 deltacomputer - an optional deltacomputer instance shared between
1978 multiple calls
1977 multiple calls
1979 """
1978 """
1980 if link == nullrev:
1979 if link == nullrev:
1981 raise error.RevlogError(
1980 raise error.RevlogError(
1982 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1981 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1983 )
1982 )
1984
1983
1985 if sidedata is None:
1984 if sidedata is None:
1986 sidedata = {}
1985 sidedata = {}
1987 flags = flags & ~REVIDX_SIDEDATA
1986 flags = flags & ~REVIDX_SIDEDATA
1988 elif not self.hassidedata:
1987 elif not self.hassidedata:
1989 raise error.ProgrammingError(
1988 raise error.ProgrammingError(
1990 _(b"trying to add sidedata to a revlog who don't support them")
1989 _(b"trying to add sidedata to a revlog who don't support them")
1991 )
1990 )
1992 else:
1991 else:
1993 flags |= REVIDX_SIDEDATA
1992 flags |= REVIDX_SIDEDATA
1994
1993
1995 if flags:
1994 if flags:
1996 node = node or self.hash(text, p1, p2)
1995 node = node or self.hash(text, p1, p2)
1997
1996
1998 rawtext, validatehash = flagutil.processflagswrite(
1997 rawtext, validatehash = flagutil.processflagswrite(
1999 self, text, flags, sidedata=sidedata
1998 self, text, flags, sidedata=sidedata
2000 )
1999 )
2001
2000
2002 # If the flag processor modifies the revision data, ignore any provided
2001 # If the flag processor modifies the revision data, ignore any provided
2003 # cachedelta.
2002 # cachedelta.
2004 if rawtext != text:
2003 if rawtext != text:
2005 cachedelta = None
2004 cachedelta = None
2006
2005
2007 if len(rawtext) > _maxentrysize:
2006 if len(rawtext) > _maxentrysize:
2008 raise error.RevlogError(
2007 raise error.RevlogError(
2009 _(
2008 _(
2010 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2009 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2011 )
2010 )
2012 % (self.indexfile, len(rawtext))
2011 % (self.indexfile, len(rawtext))
2013 )
2012 )
2014
2013
2015 node = node or self.hash(rawtext, p1, p2)
2014 node = node or self.hash(rawtext, p1, p2)
2016 if self.index.has_node(node):
2015 if self.index.has_node(node):
2017 return node
2016 return node
2018
2017
2019 if validatehash:
2018 if validatehash:
2020 self.checkhash(rawtext, node, p1=p1, p2=p2)
2019 self.checkhash(rawtext, node, p1=p1, p2=p2)
2021
2020
2022 return self.addrawrevision(
2021 return self.addrawrevision(
2023 rawtext,
2022 rawtext,
2024 transaction,
2023 transaction,
2025 link,
2024 link,
2026 p1,
2025 p1,
2027 p2,
2026 p2,
2028 node,
2027 node,
2029 flags,
2028 flags,
2030 cachedelta=cachedelta,
2029 cachedelta=cachedelta,
2031 deltacomputer=deltacomputer,
2030 deltacomputer=deltacomputer,
2032 )
2031 )
2033
2032
2034 def addrawrevision(
2033 def addrawrevision(
2035 self,
2034 self,
2036 rawtext,
2035 rawtext,
2037 transaction,
2036 transaction,
2038 link,
2037 link,
2039 p1,
2038 p1,
2040 p2,
2039 p2,
2041 node,
2040 node,
2042 flags,
2041 flags,
2043 cachedelta=None,
2042 cachedelta=None,
2044 deltacomputer=None,
2043 deltacomputer=None,
2045 ):
2044 ):
2046 """add a raw revision with known flags, node and parents
2045 """add a raw revision with known flags, node and parents
2047 useful when reusing a revision not stored in this revlog (ex: received
2046 useful when reusing a revision not stored in this revlog (ex: received
2048 over wire, or read from an external bundle).
2047 over wire, or read from an external bundle).
2049 """
2048 """
2050 dfh = None
2049 dfh = None
2051 if not self._inline:
2050 if not self._inline:
2052 dfh = self._datafp(b"a+")
2051 dfh = self._datafp(b"a+")
2053 ifh = self._indexfp(b"a+")
2052 ifh = self._indexfp(b"a+")
2054 try:
2053 try:
2055 return self._addrevision(
2054 return self._addrevision(
2056 node,
2055 node,
2057 rawtext,
2056 rawtext,
2058 transaction,
2057 transaction,
2059 link,
2058 link,
2060 p1,
2059 p1,
2061 p2,
2060 p2,
2062 flags,
2061 flags,
2063 cachedelta,
2062 cachedelta,
2064 ifh,
2063 ifh,
2065 dfh,
2064 dfh,
2066 deltacomputer=deltacomputer,
2065 deltacomputer=deltacomputer,
2067 )
2066 )
2068 finally:
2067 finally:
2069 if dfh:
2068 if dfh:
2070 dfh.close()
2069 dfh.close()
2071 ifh.close()
2070 ifh.close()
2072
2071
2073 def compress(self, data):
2072 def compress(self, data):
2074 """Generate a possibly-compressed representation of data."""
2073 """Generate a possibly-compressed representation of data."""
2075 if not data:
2074 if not data:
2076 return b'', data
2075 return b'', data
2077
2076
2078 compressed = self._compressor.compress(data)
2077 compressed = self._compressor.compress(data)
2079
2078
2080 if compressed:
2079 if compressed:
2081 # The revlog compressor added the header in the returned data.
2080 # The revlog compressor added the header in the returned data.
2082 return b'', compressed
2081 return b'', compressed
2083
2082
2084 if data[0:1] == b'\0':
2083 if data[0:1] == b'\0':
2085 return b'', data
2084 return b'', data
2086 return b'u', data
2085 return b'u', data
2087
2086
2088 def decompress(self, data):
2087 def decompress(self, data):
2089 """Decompress a revlog chunk.
2088 """Decompress a revlog chunk.
2090
2089
2091 The chunk is expected to begin with a header identifying the
2090 The chunk is expected to begin with a header identifying the
2092 format type so it can be routed to an appropriate decompressor.
2091 format type so it can be routed to an appropriate decompressor.
2093 """
2092 """
2094 if not data:
2093 if not data:
2095 return data
2094 return data
2096
2095
2097 # Revlogs are read much more frequently than they are written and many
2096 # Revlogs are read much more frequently than they are written and many
2098 # chunks only take microseconds to decompress, so performance is
2097 # chunks only take microseconds to decompress, so performance is
2099 # important here.
2098 # important here.
2100 #
2099 #
2101 # We can make a few assumptions about revlogs:
2100 # We can make a few assumptions about revlogs:
2102 #
2101 #
2103 # 1) the majority of chunks will be compressed (as opposed to inline
2102 # 1) the majority of chunks will be compressed (as opposed to inline
2104 # raw data).
2103 # raw data).
2105 # 2) decompressing *any* data will likely by at least 10x slower than
2104 # 2) decompressing *any* data will likely by at least 10x slower than
2106 # returning raw inline data.
2105 # returning raw inline data.
2107 # 3) we want to prioritize common and officially supported compression
2106 # 3) we want to prioritize common and officially supported compression
2108 # engines
2107 # engines
2109 #
2108 #
2110 # It follows that we want to optimize for "decompress compressed data
2109 # It follows that we want to optimize for "decompress compressed data
2111 # when encoded with common and officially supported compression engines"
2110 # when encoded with common and officially supported compression engines"
2112 # case over "raw data" and "data encoded by less common or non-official
2111 # case over "raw data" and "data encoded by less common or non-official
2113 # compression engines." That is why we have the inline lookup first
2112 # compression engines." That is why we have the inline lookup first
2114 # followed by the compengines lookup.
2113 # followed by the compengines lookup.
2115 #
2114 #
2116 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2115 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2117 # compressed chunks. And this matters for changelog and manifest reads.
2116 # compressed chunks. And this matters for changelog and manifest reads.
2118 t = data[0:1]
2117 t = data[0:1]
2119
2118
2120 if t == b'x':
2119 if t == b'x':
2121 try:
2120 try:
2122 return _zlibdecompress(data)
2121 return _zlibdecompress(data)
2123 except zlib.error as e:
2122 except zlib.error as e:
2124 raise error.RevlogError(
2123 raise error.RevlogError(
2125 _(b'revlog decompress error: %s')
2124 _(b'revlog decompress error: %s')
2126 % stringutil.forcebytestr(e)
2125 % stringutil.forcebytestr(e)
2127 )
2126 )
2128 # '\0' is more common than 'u' so it goes first.
2127 # '\0' is more common than 'u' so it goes first.
2129 elif t == b'\0':
2128 elif t == b'\0':
2130 return data
2129 return data
2131 elif t == b'u':
2130 elif t == b'u':
2132 return util.buffer(data, 1)
2131 return util.buffer(data, 1)
2133
2132
2134 try:
2133 try:
2135 compressor = self._decompressors[t]
2134 compressor = self._decompressors[t]
2136 except KeyError:
2135 except KeyError:
2137 try:
2136 try:
2138 engine = util.compengines.forrevlogheader(t)
2137 engine = util.compengines.forrevlogheader(t)
2139 compressor = engine.revlogcompressor(self._compengineopts)
2138 compressor = engine.revlogcompressor(self._compengineopts)
2140 self._decompressors[t] = compressor
2139 self._decompressors[t] = compressor
2141 except KeyError:
2140 except KeyError:
2142 raise error.RevlogError(_(b'unknown compression type %r') % t)
2141 raise error.RevlogError(_(b'unknown compression type %r') % t)
2143
2142
2144 return compressor.decompress(data)
2143 return compressor.decompress(data)
2145
2144
2146 def _addrevision(
2145 def _addrevision(
2147 self,
2146 self,
2148 node,
2147 node,
2149 rawtext,
2148 rawtext,
2150 transaction,
2149 transaction,
2151 link,
2150 link,
2152 p1,
2151 p1,
2153 p2,
2152 p2,
2154 flags,
2153 flags,
2155 cachedelta,
2154 cachedelta,
2156 ifh,
2155 ifh,
2157 dfh,
2156 dfh,
2158 alwayscache=False,
2157 alwayscache=False,
2159 deltacomputer=None,
2158 deltacomputer=None,
2160 ):
2159 ):
2161 """internal function to add revisions to the log
2160 """internal function to add revisions to the log
2162
2161
2163 see addrevision for argument descriptions.
2162 see addrevision for argument descriptions.
2164
2163
2165 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2164 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2166
2165
2167 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2166 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2168 be used.
2167 be used.
2169
2168
2170 invariants:
2169 invariants:
2171 - rawtext is optional (can be None); if not set, cachedelta must be set.
2170 - rawtext is optional (can be None); if not set, cachedelta must be set.
2172 if both are set, they must correspond to each other.
2171 if both are set, they must correspond to each other.
2173 """
2172 """
2174 if node == nullid:
2173 if node == nullid:
2175 raise error.RevlogError(
2174 raise error.RevlogError(
2176 _(b"%s: attempt to add null revision") % self.indexfile
2175 _(b"%s: attempt to add null revision") % self.indexfile
2177 )
2176 )
2178 if node == wdirid or node in wdirfilenodeids:
2177 if node == wdirid or node in wdirfilenodeids:
2179 raise error.RevlogError(
2178 raise error.RevlogError(
2180 _(b"%s: attempt to add wdir revision") % self.indexfile
2179 _(b"%s: attempt to add wdir revision") % self.indexfile
2181 )
2180 )
2182
2181
2183 if self._inline:
2182 if self._inline:
2184 fh = ifh
2183 fh = ifh
2185 else:
2184 else:
2186 fh = dfh
2185 fh = dfh
2187
2186
2188 btext = [rawtext]
2187 btext = [rawtext]
2189
2188
2190 curr = len(self)
2189 curr = len(self)
2191 prev = curr - 1
2190 prev = curr - 1
2192 offset = self.end(prev)
2191 offset = self.end(prev)
2193 p1r, p2r = self.rev(p1), self.rev(p2)
2192 p1r, p2r = self.rev(p1), self.rev(p2)
2194
2193
2195 # full versions are inserted when the needed deltas
2194 # full versions are inserted when the needed deltas
2196 # become comparable to the uncompressed text
2195 # become comparable to the uncompressed text
2197 if rawtext is None:
2196 if rawtext is None:
2198 # need rawtext size, before changed by flag processors, which is
2197 # need rawtext size, before changed by flag processors, which is
2199 # the non-raw size. use revlog explicitly to avoid filelog's extra
2198 # the non-raw size. use revlog explicitly to avoid filelog's extra
2200 # logic that might remove metadata size.
2199 # logic that might remove metadata size.
2201 textlen = mdiff.patchedsize(
2200 textlen = mdiff.patchedsize(
2202 revlog.size(self, cachedelta[0]), cachedelta[1]
2201 revlog.size(self, cachedelta[0]), cachedelta[1]
2203 )
2202 )
2204 else:
2203 else:
2205 textlen = len(rawtext)
2204 textlen = len(rawtext)
2206
2205
2207 if deltacomputer is None:
2206 if deltacomputer is None:
2208 deltacomputer = deltautil.deltacomputer(self)
2207 deltacomputer = deltautil.deltacomputer(self)
2209
2208
2210 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2209 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2211
2210
2212 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2211 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2213
2212
2214 e = (
2213 e = (
2215 offset_type(offset, flags),
2214 offset_type(offset, flags),
2216 deltainfo.deltalen,
2215 deltainfo.deltalen,
2217 textlen,
2216 textlen,
2218 deltainfo.base,
2217 deltainfo.base,
2219 link,
2218 link,
2220 p1r,
2219 p1r,
2221 p2r,
2220 p2r,
2222 node,
2221 node,
2223 )
2222 )
2224 self.index.append(e)
2223 self.index.append(e)
2225
2224
2226 # Reset the pure node cache start lookup offset to account for new
2227 # revision.
2228 if self._nodepos is not None:
2229 self._nodepos = curr
2230
2231 entry = self._io.packentry(e, self.node, self.version, curr)
2225 entry = self._io.packentry(e, self.node, self.version, curr)
2232 self._writeentry(
2226 self._writeentry(
2233 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2227 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2234 )
2228 )
2235
2229
2236 rawtext = btext[0]
2230 rawtext = btext[0]
2237
2231
2238 if alwayscache and rawtext is None:
2232 if alwayscache and rawtext is None:
2239 rawtext = deltacomputer.buildtext(revinfo, fh)
2233 rawtext = deltacomputer.buildtext(revinfo, fh)
2240
2234
2241 if type(rawtext) == bytes: # only accept immutable objects
2235 if type(rawtext) == bytes: # only accept immutable objects
2242 self._revisioncache = (node, curr, rawtext)
2236 self._revisioncache = (node, curr, rawtext)
2243 self._chainbasecache[curr] = deltainfo.chainbase
2237 self._chainbasecache[curr] = deltainfo.chainbase
2244 return node
2238 return node
2245
2239
2246 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2240 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2247 # Files opened in a+ mode have inconsistent behavior on various
2241 # Files opened in a+ mode have inconsistent behavior on various
2248 # platforms. Windows requires that a file positioning call be made
2242 # platforms. Windows requires that a file positioning call be made
2249 # when the file handle transitions between reads and writes. See
2243 # when the file handle transitions between reads and writes. See
2250 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2244 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2251 # platforms, Python or the platform itself can be buggy. Some versions
2245 # platforms, Python or the platform itself can be buggy. Some versions
2252 # of Solaris have been observed to not append at the end of the file
2246 # of Solaris have been observed to not append at the end of the file
2253 # if the file was seeked to before the end. See issue4943 for more.
2247 # if the file was seeked to before the end. See issue4943 for more.
2254 #
2248 #
2255 # We work around this issue by inserting a seek() before writing.
2249 # We work around this issue by inserting a seek() before writing.
2256 # Note: This is likely not necessary on Python 3. However, because
2250 # Note: This is likely not necessary on Python 3. However, because
2257 # the file handle is reused for reads and may be seeked there, we need
2251 # the file handle is reused for reads and may be seeked there, we need
2258 # to be careful before changing this.
2252 # to be careful before changing this.
2259 ifh.seek(0, os.SEEK_END)
2253 ifh.seek(0, os.SEEK_END)
2260 if dfh:
2254 if dfh:
2261 dfh.seek(0, os.SEEK_END)
2255 dfh.seek(0, os.SEEK_END)
2262
2256
2263 curr = len(self) - 1
2257 curr = len(self) - 1
2264 if not self._inline:
2258 if not self._inline:
2265 transaction.add(self.datafile, offset)
2259 transaction.add(self.datafile, offset)
2266 transaction.add(self.indexfile, curr * len(entry))
2260 transaction.add(self.indexfile, curr * len(entry))
2267 if data[0]:
2261 if data[0]:
2268 dfh.write(data[0])
2262 dfh.write(data[0])
2269 dfh.write(data[1])
2263 dfh.write(data[1])
2270 ifh.write(entry)
2264 ifh.write(entry)
2271 else:
2265 else:
2272 offset += curr * self._io.size
2266 offset += curr * self._io.size
2273 transaction.add(self.indexfile, offset, curr)
2267 transaction.add(self.indexfile, offset, curr)
2274 ifh.write(entry)
2268 ifh.write(entry)
2275 ifh.write(data[0])
2269 ifh.write(data[0])
2276 ifh.write(data[1])
2270 ifh.write(data[1])
2277 self._enforceinlinesize(transaction, ifh)
2271 self._enforceinlinesize(transaction, ifh)
2278
2272
2279 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2273 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2280 """
2274 """
2281 add a delta group
2275 add a delta group
2282
2276
2283 given a set of deltas, add them to the revision log. the
2277 given a set of deltas, add them to the revision log. the
2284 first delta is against its parent, which should be in our
2278 first delta is against its parent, which should be in our
2285 log, the rest are against the previous delta.
2279 log, the rest are against the previous delta.
2286
2280
2287 If ``addrevisioncb`` is defined, it will be called with arguments of
2281 If ``addrevisioncb`` is defined, it will be called with arguments of
2288 this revlog and the node that was added.
2282 this revlog and the node that was added.
2289 """
2283 """
2290
2284
2291 if self._writinghandles:
2285 if self._writinghandles:
2292 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2286 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2293
2287
2294 nodes = []
2288 nodes = []
2295
2289
2296 r = len(self)
2290 r = len(self)
2297 end = 0
2291 end = 0
2298 if r:
2292 if r:
2299 end = self.end(r - 1)
2293 end = self.end(r - 1)
2300 ifh = self._indexfp(b"a+")
2294 ifh = self._indexfp(b"a+")
2301 isize = r * self._io.size
2295 isize = r * self._io.size
2302 if self._inline:
2296 if self._inline:
2303 transaction.add(self.indexfile, end + isize, r)
2297 transaction.add(self.indexfile, end + isize, r)
2304 dfh = None
2298 dfh = None
2305 else:
2299 else:
2306 transaction.add(self.indexfile, isize, r)
2300 transaction.add(self.indexfile, isize, r)
2307 transaction.add(self.datafile, end)
2301 transaction.add(self.datafile, end)
2308 dfh = self._datafp(b"a+")
2302 dfh = self._datafp(b"a+")
2309
2303
2310 def flush():
2304 def flush():
2311 if dfh:
2305 if dfh:
2312 dfh.flush()
2306 dfh.flush()
2313 ifh.flush()
2307 ifh.flush()
2314
2308
2315 self._writinghandles = (ifh, dfh)
2309 self._writinghandles = (ifh, dfh)
2316
2310
2317 try:
2311 try:
2318 deltacomputer = deltautil.deltacomputer(self)
2312 deltacomputer = deltautil.deltacomputer(self)
2319 # loop through our set of deltas
2313 # loop through our set of deltas
2320 for data in deltas:
2314 for data in deltas:
2321 node, p1, p2, linknode, deltabase, delta, flags = data
2315 node, p1, p2, linknode, deltabase, delta, flags = data
2322 link = linkmapper(linknode)
2316 link = linkmapper(linknode)
2323 flags = flags or REVIDX_DEFAULT_FLAGS
2317 flags = flags or REVIDX_DEFAULT_FLAGS
2324
2318
2325 nodes.append(node)
2319 nodes.append(node)
2326
2320
2327 if self.index.has_node(node):
2321 if self.index.has_node(node):
2328 self._nodeduplicatecallback(transaction, node)
2322 self._nodeduplicatecallback(transaction, node)
2329 # this can happen if two branches make the same change
2323 # this can happen if two branches make the same change
2330 continue
2324 continue
2331
2325
2332 for p in (p1, p2):
2326 for p in (p1, p2):
2333 if not self.index.has_node(p):
2327 if not self.index.has_node(p):
2334 raise error.LookupError(
2328 raise error.LookupError(
2335 p, self.indexfile, _(b'unknown parent')
2329 p, self.indexfile, _(b'unknown parent')
2336 )
2330 )
2337
2331
2338 if not self.index.has_node(deltabase):
2332 if not self.index.has_node(deltabase):
2339 raise error.LookupError(
2333 raise error.LookupError(
2340 deltabase, self.indexfile, _(b'unknown delta base')
2334 deltabase, self.indexfile, _(b'unknown delta base')
2341 )
2335 )
2342
2336
2343 baserev = self.rev(deltabase)
2337 baserev = self.rev(deltabase)
2344
2338
2345 if baserev != nullrev and self.iscensored(baserev):
2339 if baserev != nullrev and self.iscensored(baserev):
2346 # if base is censored, delta must be full replacement in a
2340 # if base is censored, delta must be full replacement in a
2347 # single patch operation
2341 # single patch operation
2348 hlen = struct.calcsize(b">lll")
2342 hlen = struct.calcsize(b">lll")
2349 oldlen = self.rawsize(baserev)
2343 oldlen = self.rawsize(baserev)
2350 newlen = len(delta) - hlen
2344 newlen = len(delta) - hlen
2351 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2345 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2352 raise error.CensoredBaseError(
2346 raise error.CensoredBaseError(
2353 self.indexfile, self.node(baserev)
2347 self.indexfile, self.node(baserev)
2354 )
2348 )
2355
2349
2356 if not flags and self._peek_iscensored(baserev, delta, flush):
2350 if not flags and self._peek_iscensored(baserev, delta, flush):
2357 flags |= REVIDX_ISCENSORED
2351 flags |= REVIDX_ISCENSORED
2358
2352
2359 # We assume consumers of addrevisioncb will want to retrieve
2353 # We assume consumers of addrevisioncb will want to retrieve
2360 # the added revision, which will require a call to
2354 # the added revision, which will require a call to
2361 # revision(). revision() will fast path if there is a cache
2355 # revision(). revision() will fast path if there is a cache
2362 # hit. So, we tell _addrevision() to always cache in this case.
2356 # hit. So, we tell _addrevision() to always cache in this case.
2363 # We're only using addgroup() in the context of changegroup
2357 # We're only using addgroup() in the context of changegroup
2364 # generation so the revision data can always be handled as raw
2358 # generation so the revision data can always be handled as raw
2365 # by the flagprocessor.
2359 # by the flagprocessor.
2366 self._addrevision(
2360 self._addrevision(
2367 node,
2361 node,
2368 None,
2362 None,
2369 transaction,
2363 transaction,
2370 link,
2364 link,
2371 p1,
2365 p1,
2372 p2,
2366 p2,
2373 flags,
2367 flags,
2374 (baserev, delta),
2368 (baserev, delta),
2375 ifh,
2369 ifh,
2376 dfh,
2370 dfh,
2377 alwayscache=bool(addrevisioncb),
2371 alwayscache=bool(addrevisioncb),
2378 deltacomputer=deltacomputer,
2372 deltacomputer=deltacomputer,
2379 )
2373 )
2380
2374
2381 if addrevisioncb:
2375 if addrevisioncb:
2382 addrevisioncb(self, node)
2376 addrevisioncb(self, node)
2383
2377
2384 if not dfh and not self._inline:
2378 if not dfh and not self._inline:
2385 # addrevision switched from inline to conventional
2379 # addrevision switched from inline to conventional
2386 # reopen the index
2380 # reopen the index
2387 ifh.close()
2381 ifh.close()
2388 dfh = self._datafp(b"a+")
2382 dfh = self._datafp(b"a+")
2389 ifh = self._indexfp(b"a+")
2383 ifh = self._indexfp(b"a+")
2390 self._writinghandles = (ifh, dfh)
2384 self._writinghandles = (ifh, dfh)
2391 finally:
2385 finally:
2392 self._writinghandles = None
2386 self._writinghandles = None
2393
2387
2394 if dfh:
2388 if dfh:
2395 dfh.close()
2389 dfh.close()
2396 ifh.close()
2390 ifh.close()
2397
2391
2398 return nodes
2392 return nodes
2399
2393
2400 def iscensored(self, rev):
2394 def iscensored(self, rev):
2401 """Check if a file revision is censored."""
2395 """Check if a file revision is censored."""
2402 if not self._censorable:
2396 if not self._censorable:
2403 return False
2397 return False
2404
2398
2405 return self.flags(rev) & REVIDX_ISCENSORED
2399 return self.flags(rev) & REVIDX_ISCENSORED
2406
2400
2407 def _peek_iscensored(self, baserev, delta, flush):
2401 def _peek_iscensored(self, baserev, delta, flush):
2408 """Quickly check if a delta produces a censored revision."""
2402 """Quickly check if a delta produces a censored revision."""
2409 if not self._censorable:
2403 if not self._censorable:
2410 return False
2404 return False
2411
2405
2412 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2406 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2413
2407
2414 def getstrippoint(self, minlink):
2408 def getstrippoint(self, minlink):
2415 """find the minimum rev that must be stripped to strip the linkrev
2409 """find the minimum rev that must be stripped to strip the linkrev
2416
2410
2417 Returns a tuple containing the minimum rev and a set of all revs that
2411 Returns a tuple containing the minimum rev and a set of all revs that
2418 have linkrevs that will be broken by this strip.
2412 have linkrevs that will be broken by this strip.
2419 """
2413 """
2420 return storageutil.resolvestripinfo(
2414 return storageutil.resolvestripinfo(
2421 minlink,
2415 minlink,
2422 len(self) - 1,
2416 len(self) - 1,
2423 self.headrevs(),
2417 self.headrevs(),
2424 self.linkrev,
2418 self.linkrev,
2425 self.parentrevs,
2419 self.parentrevs,
2426 )
2420 )
2427
2421
2428 def strip(self, minlink, transaction):
2422 def strip(self, minlink, transaction):
2429 """truncate the revlog on the first revision with a linkrev >= minlink
2423 """truncate the revlog on the first revision with a linkrev >= minlink
2430
2424
2431 This function is called when we're stripping revision minlink and
2425 This function is called when we're stripping revision minlink and
2432 its descendants from the repository.
2426 its descendants from the repository.
2433
2427
2434 We have to remove all revisions with linkrev >= minlink, because
2428 We have to remove all revisions with linkrev >= minlink, because
2435 the equivalent changelog revisions will be renumbered after the
2429 the equivalent changelog revisions will be renumbered after the
2436 strip.
2430 strip.
2437
2431
2438 So we truncate the revlog on the first of these revisions, and
2432 So we truncate the revlog on the first of these revisions, and
2439 trust that the caller has saved the revisions that shouldn't be
2433 trust that the caller has saved the revisions that shouldn't be
2440 removed and that it'll re-add them after this truncation.
2434 removed and that it'll re-add them after this truncation.
2441 """
2435 """
2442 if len(self) == 0:
2436 if len(self) == 0:
2443 return
2437 return
2444
2438
2445 rev, _ = self.getstrippoint(minlink)
2439 rev, _ = self.getstrippoint(minlink)
2446 if rev == len(self):
2440 if rev == len(self):
2447 return
2441 return
2448
2442
2449 # first truncate the files on disk
2443 # first truncate the files on disk
2450 end = self.start(rev)
2444 end = self.start(rev)
2451 if not self._inline:
2445 if not self._inline:
2452 transaction.add(self.datafile, end)
2446 transaction.add(self.datafile, end)
2453 end = rev * self._io.size
2447 end = rev * self._io.size
2454 else:
2448 else:
2455 end += rev * self._io.size
2449 end += rev * self._io.size
2456
2450
2457 transaction.add(self.indexfile, end)
2451 transaction.add(self.indexfile, end)
2458
2452
2459 # then reset internal state in memory to forget those revisions
2453 # then reset internal state in memory to forget those revisions
2460 self._revisioncache = None
2454 self._revisioncache = None
2461 self._chaininfocache = {}
2455 self._chaininfocache = {}
2462 self._chunkclear()
2456 self._chunkclear()
2463
2457
2464 del self.index[rev:-1]
2458 del self.index[rev:-1]
2465 self._nodepos = None
2466
2459
2467 def checksize(self):
2460 def checksize(self):
2468 """Check size of index and data files
2461 """Check size of index and data files
2469
2462
2470 return a (dd, di) tuple.
2463 return a (dd, di) tuple.
2471 - dd: extra bytes for the "data" file
2464 - dd: extra bytes for the "data" file
2472 - di: extra bytes for the "index" file
2465 - di: extra bytes for the "index" file
2473
2466
2474 A healthy revlog will return (0, 0).
2467 A healthy revlog will return (0, 0).
2475 """
2468 """
2476 expected = 0
2469 expected = 0
2477 if len(self):
2470 if len(self):
2478 expected = max(0, self.end(len(self) - 1))
2471 expected = max(0, self.end(len(self) - 1))
2479
2472
2480 try:
2473 try:
2481 with self._datafp() as f:
2474 with self._datafp() as f:
2482 f.seek(0, io.SEEK_END)
2475 f.seek(0, io.SEEK_END)
2483 actual = f.tell()
2476 actual = f.tell()
2484 dd = actual - expected
2477 dd = actual - expected
2485 except IOError as inst:
2478 except IOError as inst:
2486 if inst.errno != errno.ENOENT:
2479 if inst.errno != errno.ENOENT:
2487 raise
2480 raise
2488 dd = 0
2481 dd = 0
2489
2482
2490 try:
2483 try:
2491 f = self.opener(self.indexfile)
2484 f = self.opener(self.indexfile)
2492 f.seek(0, io.SEEK_END)
2485 f.seek(0, io.SEEK_END)
2493 actual = f.tell()
2486 actual = f.tell()
2494 f.close()
2487 f.close()
2495 s = self._io.size
2488 s = self._io.size
2496 i = max(0, actual // s)
2489 i = max(0, actual // s)
2497 di = actual - (i * s)
2490 di = actual - (i * s)
2498 if self._inline:
2491 if self._inline:
2499 databytes = 0
2492 databytes = 0
2500 for r in self:
2493 for r in self:
2501 databytes += max(0, self.length(r))
2494 databytes += max(0, self.length(r))
2502 dd = 0
2495 dd = 0
2503 di = actual - len(self) * s - databytes
2496 di = actual - len(self) * s - databytes
2504 except IOError as inst:
2497 except IOError as inst:
2505 if inst.errno != errno.ENOENT:
2498 if inst.errno != errno.ENOENT:
2506 raise
2499 raise
2507 di = 0
2500 di = 0
2508
2501
2509 return (dd, di)
2502 return (dd, di)
2510
2503
2511 def files(self):
2504 def files(self):
2512 res = [self.indexfile]
2505 res = [self.indexfile]
2513 if not self._inline:
2506 if not self._inline:
2514 res.append(self.datafile)
2507 res.append(self.datafile)
2515 return res
2508 return res
2516
2509
2517 def emitrevisions(
2510 def emitrevisions(
2518 self,
2511 self,
2519 nodes,
2512 nodes,
2520 nodesorder=None,
2513 nodesorder=None,
2521 revisiondata=False,
2514 revisiondata=False,
2522 assumehaveparentrevisions=False,
2515 assumehaveparentrevisions=False,
2523 deltamode=repository.CG_DELTAMODE_STD,
2516 deltamode=repository.CG_DELTAMODE_STD,
2524 ):
2517 ):
2525 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2518 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2526 raise error.ProgrammingError(
2519 raise error.ProgrammingError(
2527 b'unhandled value for nodesorder: %s' % nodesorder
2520 b'unhandled value for nodesorder: %s' % nodesorder
2528 )
2521 )
2529
2522
2530 if nodesorder is None and not self._generaldelta:
2523 if nodesorder is None and not self._generaldelta:
2531 nodesorder = b'storage'
2524 nodesorder = b'storage'
2532
2525
2533 if (
2526 if (
2534 not self._storedeltachains
2527 not self._storedeltachains
2535 and deltamode != repository.CG_DELTAMODE_PREV
2528 and deltamode != repository.CG_DELTAMODE_PREV
2536 ):
2529 ):
2537 deltamode = repository.CG_DELTAMODE_FULL
2530 deltamode = repository.CG_DELTAMODE_FULL
2538
2531
2539 return storageutil.emitrevisions(
2532 return storageutil.emitrevisions(
2540 self,
2533 self,
2541 nodes,
2534 nodes,
2542 nodesorder,
2535 nodesorder,
2543 revlogrevisiondelta,
2536 revlogrevisiondelta,
2544 deltaparentfn=self.deltaparent,
2537 deltaparentfn=self.deltaparent,
2545 candeltafn=self.candelta,
2538 candeltafn=self.candelta,
2546 rawsizefn=self.rawsize,
2539 rawsizefn=self.rawsize,
2547 revdifffn=self.revdiff,
2540 revdifffn=self.revdiff,
2548 flagsfn=self.flags,
2541 flagsfn=self.flags,
2549 deltamode=deltamode,
2542 deltamode=deltamode,
2550 revisiondata=revisiondata,
2543 revisiondata=revisiondata,
2551 assumehaveparentrevisions=assumehaveparentrevisions,
2544 assumehaveparentrevisions=assumehaveparentrevisions,
2552 )
2545 )
2553
2546
2554 DELTAREUSEALWAYS = b'always'
2547 DELTAREUSEALWAYS = b'always'
2555 DELTAREUSESAMEREVS = b'samerevs'
2548 DELTAREUSESAMEREVS = b'samerevs'
2556 DELTAREUSENEVER = b'never'
2549 DELTAREUSENEVER = b'never'
2557
2550
2558 DELTAREUSEFULLADD = b'fulladd'
2551 DELTAREUSEFULLADD = b'fulladd'
2559
2552
2560 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2553 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2561
2554
2562 def clone(
2555 def clone(
2563 self,
2556 self,
2564 tr,
2557 tr,
2565 destrevlog,
2558 destrevlog,
2566 addrevisioncb=None,
2559 addrevisioncb=None,
2567 deltareuse=DELTAREUSESAMEREVS,
2560 deltareuse=DELTAREUSESAMEREVS,
2568 forcedeltabothparents=None,
2561 forcedeltabothparents=None,
2569 sidedatacompanion=None,
2562 sidedatacompanion=None,
2570 ):
2563 ):
2571 """Copy this revlog to another, possibly with format changes.
2564 """Copy this revlog to another, possibly with format changes.
2572
2565
2573 The destination revlog will contain the same revisions and nodes.
2566 The destination revlog will contain the same revisions and nodes.
2574 However, it may not be bit-for-bit identical due to e.g. delta encoding
2567 However, it may not be bit-for-bit identical due to e.g. delta encoding
2575 differences.
2568 differences.
2576
2569
2577 The ``deltareuse`` argument control how deltas from the existing revlog
2570 The ``deltareuse`` argument control how deltas from the existing revlog
2578 are preserved in the destination revlog. The argument can have the
2571 are preserved in the destination revlog. The argument can have the
2579 following values:
2572 following values:
2580
2573
2581 DELTAREUSEALWAYS
2574 DELTAREUSEALWAYS
2582 Deltas will always be reused (if possible), even if the destination
2575 Deltas will always be reused (if possible), even if the destination
2583 revlog would not select the same revisions for the delta. This is the
2576 revlog would not select the same revisions for the delta. This is the
2584 fastest mode of operation.
2577 fastest mode of operation.
2585 DELTAREUSESAMEREVS
2578 DELTAREUSESAMEREVS
2586 Deltas will be reused if the destination revlog would pick the same
2579 Deltas will be reused if the destination revlog would pick the same
2587 revisions for the delta. This mode strikes a balance between speed
2580 revisions for the delta. This mode strikes a balance between speed
2588 and optimization.
2581 and optimization.
2589 DELTAREUSENEVER
2582 DELTAREUSENEVER
2590 Deltas will never be reused. This is the slowest mode of execution.
2583 Deltas will never be reused. This is the slowest mode of execution.
2591 This mode can be used to recompute deltas (e.g. if the diff/delta
2584 This mode can be used to recompute deltas (e.g. if the diff/delta
2592 algorithm changes).
2585 algorithm changes).
2593 DELTAREUSEFULLADD
2586 DELTAREUSEFULLADD
2594 Revision will be re-added as if their were new content. This is
2587 Revision will be re-added as if their were new content. This is
2595 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2588 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2596 eg: large file detection and handling.
2589 eg: large file detection and handling.
2597
2590
2598 Delta computation can be slow, so the choice of delta reuse policy can
2591 Delta computation can be slow, so the choice of delta reuse policy can
2599 significantly affect run time.
2592 significantly affect run time.
2600
2593
2601 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2594 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2602 two extremes. Deltas will be reused if they are appropriate. But if the
2595 two extremes. Deltas will be reused if they are appropriate. But if the
2603 delta could choose a better revision, it will do so. This means if you
2596 delta could choose a better revision, it will do so. This means if you
2604 are converting a non-generaldelta revlog to a generaldelta revlog,
2597 are converting a non-generaldelta revlog to a generaldelta revlog,
2605 deltas will be recomputed if the delta's parent isn't a parent of the
2598 deltas will be recomputed if the delta's parent isn't a parent of the
2606 revision.
2599 revision.
2607
2600
2608 In addition to the delta policy, the ``forcedeltabothparents``
2601 In addition to the delta policy, the ``forcedeltabothparents``
2609 argument controls whether to force compute deltas against both parents
2602 argument controls whether to force compute deltas against both parents
2610 for merges. By default, the current default is used.
2603 for merges. By default, the current default is used.
2611
2604
2612 If not None, the `sidedatacompanion` is callable that accept two
2605 If not None, the `sidedatacompanion` is callable that accept two
2613 arguments:
2606 arguments:
2614
2607
2615 (srcrevlog, rev)
2608 (srcrevlog, rev)
2616
2609
2617 and return a triplet that control changes to sidedata content from the
2610 and return a triplet that control changes to sidedata content from the
2618 old revision to the new clone result:
2611 old revision to the new clone result:
2619
2612
2620 (dropall, filterout, update)
2613 (dropall, filterout, update)
2621
2614
2622 * if `dropall` is True, all sidedata should be dropped
2615 * if `dropall` is True, all sidedata should be dropped
2623 * `filterout` is a set of sidedata keys that should be dropped
2616 * `filterout` is a set of sidedata keys that should be dropped
2624 * `update` is a mapping of additionnal/new key -> value
2617 * `update` is a mapping of additionnal/new key -> value
2625 """
2618 """
2626 if deltareuse not in self.DELTAREUSEALL:
2619 if deltareuse not in self.DELTAREUSEALL:
2627 raise ValueError(
2620 raise ValueError(
2628 _(b'value for deltareuse invalid: %s') % deltareuse
2621 _(b'value for deltareuse invalid: %s') % deltareuse
2629 )
2622 )
2630
2623
2631 if len(destrevlog):
2624 if len(destrevlog):
2632 raise ValueError(_(b'destination revlog is not empty'))
2625 raise ValueError(_(b'destination revlog is not empty'))
2633
2626
2634 if getattr(self, 'filteredrevs', None):
2627 if getattr(self, 'filteredrevs', None):
2635 raise ValueError(_(b'source revlog has filtered revisions'))
2628 raise ValueError(_(b'source revlog has filtered revisions'))
2636 if getattr(destrevlog, 'filteredrevs', None):
2629 if getattr(destrevlog, 'filteredrevs', None):
2637 raise ValueError(_(b'destination revlog has filtered revisions'))
2630 raise ValueError(_(b'destination revlog has filtered revisions'))
2638
2631
2639 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2632 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2640 # if possible.
2633 # if possible.
2641 oldlazydelta = destrevlog._lazydelta
2634 oldlazydelta = destrevlog._lazydelta
2642 oldlazydeltabase = destrevlog._lazydeltabase
2635 oldlazydeltabase = destrevlog._lazydeltabase
2643 oldamd = destrevlog._deltabothparents
2636 oldamd = destrevlog._deltabothparents
2644
2637
2645 try:
2638 try:
2646 if deltareuse == self.DELTAREUSEALWAYS:
2639 if deltareuse == self.DELTAREUSEALWAYS:
2647 destrevlog._lazydeltabase = True
2640 destrevlog._lazydeltabase = True
2648 destrevlog._lazydelta = True
2641 destrevlog._lazydelta = True
2649 elif deltareuse == self.DELTAREUSESAMEREVS:
2642 elif deltareuse == self.DELTAREUSESAMEREVS:
2650 destrevlog._lazydeltabase = False
2643 destrevlog._lazydeltabase = False
2651 destrevlog._lazydelta = True
2644 destrevlog._lazydelta = True
2652 elif deltareuse == self.DELTAREUSENEVER:
2645 elif deltareuse == self.DELTAREUSENEVER:
2653 destrevlog._lazydeltabase = False
2646 destrevlog._lazydeltabase = False
2654 destrevlog._lazydelta = False
2647 destrevlog._lazydelta = False
2655
2648
2656 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2649 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2657
2650
2658 self._clone(
2651 self._clone(
2659 tr,
2652 tr,
2660 destrevlog,
2653 destrevlog,
2661 addrevisioncb,
2654 addrevisioncb,
2662 deltareuse,
2655 deltareuse,
2663 forcedeltabothparents,
2656 forcedeltabothparents,
2664 sidedatacompanion,
2657 sidedatacompanion,
2665 )
2658 )
2666
2659
2667 finally:
2660 finally:
2668 destrevlog._lazydelta = oldlazydelta
2661 destrevlog._lazydelta = oldlazydelta
2669 destrevlog._lazydeltabase = oldlazydeltabase
2662 destrevlog._lazydeltabase = oldlazydeltabase
2670 destrevlog._deltabothparents = oldamd
2663 destrevlog._deltabothparents = oldamd
2671
2664
2672 def _clone(
2665 def _clone(
2673 self,
2666 self,
2674 tr,
2667 tr,
2675 destrevlog,
2668 destrevlog,
2676 addrevisioncb,
2669 addrevisioncb,
2677 deltareuse,
2670 deltareuse,
2678 forcedeltabothparents,
2671 forcedeltabothparents,
2679 sidedatacompanion,
2672 sidedatacompanion,
2680 ):
2673 ):
2681 """perform the core duty of `revlog.clone` after parameter processing"""
2674 """perform the core duty of `revlog.clone` after parameter processing"""
2682 deltacomputer = deltautil.deltacomputer(destrevlog)
2675 deltacomputer = deltautil.deltacomputer(destrevlog)
2683 index = self.index
2676 index = self.index
2684 for rev in self:
2677 for rev in self:
2685 entry = index[rev]
2678 entry = index[rev]
2686
2679
2687 # Some classes override linkrev to take filtered revs into
2680 # Some classes override linkrev to take filtered revs into
2688 # account. Use raw entry from index.
2681 # account. Use raw entry from index.
2689 flags = entry[0] & 0xFFFF
2682 flags = entry[0] & 0xFFFF
2690 linkrev = entry[4]
2683 linkrev = entry[4]
2691 p1 = index[entry[5]][7]
2684 p1 = index[entry[5]][7]
2692 p2 = index[entry[6]][7]
2685 p2 = index[entry[6]][7]
2693 node = entry[7]
2686 node = entry[7]
2694
2687
2695 sidedataactions = (False, [], {})
2688 sidedataactions = (False, [], {})
2696 if sidedatacompanion is not None:
2689 if sidedatacompanion is not None:
2697 sidedataactions = sidedatacompanion(self, rev)
2690 sidedataactions = sidedatacompanion(self, rev)
2698
2691
2699 # (Possibly) reuse the delta from the revlog if allowed and
2692 # (Possibly) reuse the delta from the revlog if allowed and
2700 # the revlog chunk is a delta.
2693 # the revlog chunk is a delta.
2701 cachedelta = None
2694 cachedelta = None
2702 rawtext = None
2695 rawtext = None
2703 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2696 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2704 dropall, filterout, update = sidedataactions
2697 dropall, filterout, update = sidedataactions
2705 text, sidedata = self._revisiondata(rev)
2698 text, sidedata = self._revisiondata(rev)
2706 if dropall:
2699 if dropall:
2707 sidedata = {}
2700 sidedata = {}
2708 for key in filterout:
2701 for key in filterout:
2709 sidedata.pop(key, None)
2702 sidedata.pop(key, None)
2710 sidedata.update(update)
2703 sidedata.update(update)
2711 if not sidedata:
2704 if not sidedata:
2712 sidedata = None
2705 sidedata = None
2713 destrevlog.addrevision(
2706 destrevlog.addrevision(
2714 text,
2707 text,
2715 tr,
2708 tr,
2716 linkrev,
2709 linkrev,
2717 p1,
2710 p1,
2718 p2,
2711 p2,
2719 cachedelta=cachedelta,
2712 cachedelta=cachedelta,
2720 node=node,
2713 node=node,
2721 flags=flags,
2714 flags=flags,
2722 deltacomputer=deltacomputer,
2715 deltacomputer=deltacomputer,
2723 sidedata=sidedata,
2716 sidedata=sidedata,
2724 )
2717 )
2725 else:
2718 else:
2726 if destrevlog._lazydelta:
2719 if destrevlog._lazydelta:
2727 dp = self.deltaparent(rev)
2720 dp = self.deltaparent(rev)
2728 if dp != nullrev:
2721 if dp != nullrev:
2729 cachedelta = (dp, bytes(self._chunk(rev)))
2722 cachedelta = (dp, bytes(self._chunk(rev)))
2730
2723
2731 if not cachedelta:
2724 if not cachedelta:
2732 rawtext = self.rawdata(rev)
2725 rawtext = self.rawdata(rev)
2733
2726
2734 ifh = destrevlog.opener(
2727 ifh = destrevlog.opener(
2735 destrevlog.indexfile, b'a+', checkambig=False
2728 destrevlog.indexfile, b'a+', checkambig=False
2736 )
2729 )
2737 dfh = None
2730 dfh = None
2738 if not destrevlog._inline:
2731 if not destrevlog._inline:
2739 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2732 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2740 try:
2733 try:
2741 destrevlog._addrevision(
2734 destrevlog._addrevision(
2742 node,
2735 node,
2743 rawtext,
2736 rawtext,
2744 tr,
2737 tr,
2745 linkrev,
2738 linkrev,
2746 p1,
2739 p1,
2747 p2,
2740 p2,
2748 flags,
2741 flags,
2749 cachedelta,
2742 cachedelta,
2750 ifh,
2743 ifh,
2751 dfh,
2744 dfh,
2752 deltacomputer=deltacomputer,
2745 deltacomputer=deltacomputer,
2753 )
2746 )
2754 finally:
2747 finally:
2755 if dfh:
2748 if dfh:
2756 dfh.close()
2749 dfh.close()
2757 ifh.close()
2750 ifh.close()
2758
2751
2759 if addrevisioncb:
2752 if addrevisioncb:
2760 addrevisioncb(self, rev, node)
2753 addrevisioncb(self, rev, node)
2761
2754
2762 def censorrevision(self, tr, censornode, tombstone=b''):
2755 def censorrevision(self, tr, censornode, tombstone=b''):
2763 if (self.version & 0xFFFF) == REVLOGV0:
2756 if (self.version & 0xFFFF) == REVLOGV0:
2764 raise error.RevlogError(
2757 raise error.RevlogError(
2765 _(b'cannot censor with version %d revlogs') % self.version
2758 _(b'cannot censor with version %d revlogs') % self.version
2766 )
2759 )
2767
2760
2768 censorrev = self.rev(censornode)
2761 censorrev = self.rev(censornode)
2769 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2762 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2770
2763
2771 if len(tombstone) > self.rawsize(censorrev):
2764 if len(tombstone) > self.rawsize(censorrev):
2772 raise error.Abort(
2765 raise error.Abort(
2773 _(b'censor tombstone must be no longer than censored data')
2766 _(b'censor tombstone must be no longer than censored data')
2774 )
2767 )
2775
2768
2776 # Rewriting the revlog in place is hard. Our strategy for censoring is
2769 # Rewriting the revlog in place is hard. Our strategy for censoring is
2777 # to create a new revlog, copy all revisions to it, then replace the
2770 # to create a new revlog, copy all revisions to it, then replace the
2778 # revlogs on transaction close.
2771 # revlogs on transaction close.
2779
2772
2780 newindexfile = self.indexfile + b'.tmpcensored'
2773 newindexfile = self.indexfile + b'.tmpcensored'
2781 newdatafile = self.datafile + b'.tmpcensored'
2774 newdatafile = self.datafile + b'.tmpcensored'
2782
2775
2783 # This is a bit dangerous. We could easily have a mismatch of state.
2776 # This is a bit dangerous. We could easily have a mismatch of state.
2784 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2777 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2785 newrl.version = self.version
2778 newrl.version = self.version
2786 newrl._generaldelta = self._generaldelta
2779 newrl._generaldelta = self._generaldelta
2787 newrl._io = self._io
2780 newrl._io = self._io
2788
2781
2789 for rev in self.revs():
2782 for rev in self.revs():
2790 node = self.node(rev)
2783 node = self.node(rev)
2791 p1, p2 = self.parents(node)
2784 p1, p2 = self.parents(node)
2792
2785
2793 if rev == censorrev:
2786 if rev == censorrev:
2794 newrl.addrawrevision(
2787 newrl.addrawrevision(
2795 tombstone,
2788 tombstone,
2796 tr,
2789 tr,
2797 self.linkrev(censorrev),
2790 self.linkrev(censorrev),
2798 p1,
2791 p1,
2799 p2,
2792 p2,
2800 censornode,
2793 censornode,
2801 REVIDX_ISCENSORED,
2794 REVIDX_ISCENSORED,
2802 )
2795 )
2803
2796
2804 if newrl.deltaparent(rev) != nullrev:
2797 if newrl.deltaparent(rev) != nullrev:
2805 raise error.Abort(
2798 raise error.Abort(
2806 _(
2799 _(
2807 b'censored revision stored as delta; '
2800 b'censored revision stored as delta; '
2808 b'cannot censor'
2801 b'cannot censor'
2809 ),
2802 ),
2810 hint=_(
2803 hint=_(
2811 b'censoring of revlogs is not '
2804 b'censoring of revlogs is not '
2812 b'fully implemented; please report '
2805 b'fully implemented; please report '
2813 b'this bug'
2806 b'this bug'
2814 ),
2807 ),
2815 )
2808 )
2816 continue
2809 continue
2817
2810
2818 if self.iscensored(rev):
2811 if self.iscensored(rev):
2819 if self.deltaparent(rev) != nullrev:
2812 if self.deltaparent(rev) != nullrev:
2820 raise error.Abort(
2813 raise error.Abort(
2821 _(
2814 _(
2822 b'cannot censor due to censored '
2815 b'cannot censor due to censored '
2823 b'revision having delta stored'
2816 b'revision having delta stored'
2824 )
2817 )
2825 )
2818 )
2826 rawtext = self._chunk(rev)
2819 rawtext = self._chunk(rev)
2827 else:
2820 else:
2828 rawtext = self.rawdata(rev)
2821 rawtext = self.rawdata(rev)
2829
2822
2830 newrl.addrawrevision(
2823 newrl.addrawrevision(
2831 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2824 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2832 )
2825 )
2833
2826
2834 tr.addbackup(self.indexfile, location=b'store')
2827 tr.addbackup(self.indexfile, location=b'store')
2835 if not self._inline:
2828 if not self._inline:
2836 tr.addbackup(self.datafile, location=b'store')
2829 tr.addbackup(self.datafile, location=b'store')
2837
2830
2838 self.opener.rename(newrl.indexfile, self.indexfile)
2831 self.opener.rename(newrl.indexfile, self.indexfile)
2839 if not self._inline:
2832 if not self._inline:
2840 self.opener.rename(newrl.datafile, self.datafile)
2833 self.opener.rename(newrl.datafile, self.datafile)
2841
2834
2842 self.clearcaches()
2835 self.clearcaches()
2843 self._loadindex()
2836 self._loadindex()
2844
2837
2845 def verifyintegrity(self, state):
2838 def verifyintegrity(self, state):
2846 """Verifies the integrity of the revlog.
2839 """Verifies the integrity of the revlog.
2847
2840
2848 Yields ``revlogproblem`` instances describing problems that are
2841 Yields ``revlogproblem`` instances describing problems that are
2849 found.
2842 found.
2850 """
2843 """
2851 dd, di = self.checksize()
2844 dd, di = self.checksize()
2852 if dd:
2845 if dd:
2853 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2846 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2854 if di:
2847 if di:
2855 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2848 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2856
2849
2857 version = self.version & 0xFFFF
2850 version = self.version & 0xFFFF
2858
2851
2859 # The verifier tells us what version revlog we should be.
2852 # The verifier tells us what version revlog we should be.
2860 if version != state[b'expectedversion']:
2853 if version != state[b'expectedversion']:
2861 yield revlogproblem(
2854 yield revlogproblem(
2862 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2855 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2863 % (self.indexfile, version, state[b'expectedversion'])
2856 % (self.indexfile, version, state[b'expectedversion'])
2864 )
2857 )
2865
2858
2866 state[b'skipread'] = set()
2859 state[b'skipread'] = set()
2867
2860
2868 for rev in self:
2861 for rev in self:
2869 node = self.node(rev)
2862 node = self.node(rev)
2870
2863
2871 # Verify contents. 4 cases to care about:
2864 # Verify contents. 4 cases to care about:
2872 #
2865 #
2873 # common: the most common case
2866 # common: the most common case
2874 # rename: with a rename
2867 # rename: with a rename
2875 # meta: file content starts with b'\1\n', the metadata
2868 # meta: file content starts with b'\1\n', the metadata
2876 # header defined in filelog.py, but without a rename
2869 # header defined in filelog.py, but without a rename
2877 # ext: content stored externally
2870 # ext: content stored externally
2878 #
2871 #
2879 # More formally, their differences are shown below:
2872 # More formally, their differences are shown below:
2880 #
2873 #
2881 # | common | rename | meta | ext
2874 # | common | rename | meta | ext
2882 # -------------------------------------------------------
2875 # -------------------------------------------------------
2883 # flags() | 0 | 0 | 0 | not 0
2876 # flags() | 0 | 0 | 0 | not 0
2884 # renamed() | False | True | False | ?
2877 # renamed() | False | True | False | ?
2885 # rawtext[0:2]=='\1\n'| False | True | True | ?
2878 # rawtext[0:2]=='\1\n'| False | True | True | ?
2886 #
2879 #
2887 # "rawtext" means the raw text stored in revlog data, which
2880 # "rawtext" means the raw text stored in revlog data, which
2888 # could be retrieved by "rawdata(rev)". "text"
2881 # could be retrieved by "rawdata(rev)". "text"
2889 # mentioned below is "revision(rev)".
2882 # mentioned below is "revision(rev)".
2890 #
2883 #
2891 # There are 3 different lengths stored physically:
2884 # There are 3 different lengths stored physically:
2892 # 1. L1: rawsize, stored in revlog index
2885 # 1. L1: rawsize, stored in revlog index
2893 # 2. L2: len(rawtext), stored in revlog data
2886 # 2. L2: len(rawtext), stored in revlog data
2894 # 3. L3: len(text), stored in revlog data if flags==0, or
2887 # 3. L3: len(text), stored in revlog data if flags==0, or
2895 # possibly somewhere else if flags!=0
2888 # possibly somewhere else if flags!=0
2896 #
2889 #
2897 # L1 should be equal to L2. L3 could be different from them.
2890 # L1 should be equal to L2. L3 could be different from them.
2898 # "text" may or may not affect commit hash depending on flag
2891 # "text" may or may not affect commit hash depending on flag
2899 # processors (see flagutil.addflagprocessor).
2892 # processors (see flagutil.addflagprocessor).
2900 #
2893 #
2901 # | common | rename | meta | ext
2894 # | common | rename | meta | ext
2902 # -------------------------------------------------
2895 # -------------------------------------------------
2903 # rawsize() | L1 | L1 | L1 | L1
2896 # rawsize() | L1 | L1 | L1 | L1
2904 # size() | L1 | L2-LM | L1(*) | L1 (?)
2897 # size() | L1 | L2-LM | L1(*) | L1 (?)
2905 # len(rawtext) | L2 | L2 | L2 | L2
2898 # len(rawtext) | L2 | L2 | L2 | L2
2906 # len(text) | L2 | L2 | L2 | L3
2899 # len(text) | L2 | L2 | L2 | L3
2907 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2900 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2908 #
2901 #
2909 # LM: length of metadata, depending on rawtext
2902 # LM: length of metadata, depending on rawtext
2910 # (*): not ideal, see comment in filelog.size
2903 # (*): not ideal, see comment in filelog.size
2911 # (?): could be "- len(meta)" if the resolved content has
2904 # (?): could be "- len(meta)" if the resolved content has
2912 # rename metadata
2905 # rename metadata
2913 #
2906 #
2914 # Checks needed to be done:
2907 # Checks needed to be done:
2915 # 1. length check: L1 == L2, in all cases.
2908 # 1. length check: L1 == L2, in all cases.
2916 # 2. hash check: depending on flag processor, we may need to
2909 # 2. hash check: depending on flag processor, we may need to
2917 # use either "text" (external), or "rawtext" (in revlog).
2910 # use either "text" (external), or "rawtext" (in revlog).
2918
2911
2919 try:
2912 try:
2920 skipflags = state.get(b'skipflags', 0)
2913 skipflags = state.get(b'skipflags', 0)
2921 if skipflags:
2914 if skipflags:
2922 skipflags &= self.flags(rev)
2915 skipflags &= self.flags(rev)
2923
2916
2924 if skipflags:
2917 if skipflags:
2925 state[b'skipread'].add(node)
2918 state[b'skipread'].add(node)
2926 else:
2919 else:
2927 # Side-effect: read content and verify hash.
2920 # Side-effect: read content and verify hash.
2928 self.revision(node)
2921 self.revision(node)
2929
2922
2930 l1 = self.rawsize(rev)
2923 l1 = self.rawsize(rev)
2931 l2 = len(self.rawdata(node))
2924 l2 = len(self.rawdata(node))
2932
2925
2933 if l1 != l2:
2926 if l1 != l2:
2934 yield revlogproblem(
2927 yield revlogproblem(
2935 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2928 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2936 node=node,
2929 node=node,
2937 )
2930 )
2938
2931
2939 except error.CensoredNodeError:
2932 except error.CensoredNodeError:
2940 if state[b'erroroncensored']:
2933 if state[b'erroroncensored']:
2941 yield revlogproblem(
2934 yield revlogproblem(
2942 error=_(b'censored file data'), node=node
2935 error=_(b'censored file data'), node=node
2943 )
2936 )
2944 state[b'skipread'].add(node)
2937 state[b'skipread'].add(node)
2945 except Exception as e:
2938 except Exception as e:
2946 yield revlogproblem(
2939 yield revlogproblem(
2947 error=_(b'unpacking %s: %s')
2940 error=_(b'unpacking %s: %s')
2948 % (short(node), stringutil.forcebytestr(e)),
2941 % (short(node), stringutil.forcebytestr(e)),
2949 node=node,
2942 node=node,
2950 )
2943 )
2951 state[b'skipread'].add(node)
2944 state[b'skipread'].add(node)
2952
2945
2953 def storageinfo(
2946 def storageinfo(
2954 self,
2947 self,
2955 exclusivefiles=False,
2948 exclusivefiles=False,
2956 sharedfiles=False,
2949 sharedfiles=False,
2957 revisionscount=False,
2950 revisionscount=False,
2958 trackedsize=False,
2951 trackedsize=False,
2959 storedsize=False,
2952 storedsize=False,
2960 ):
2953 ):
2961 d = {}
2954 d = {}
2962
2955
2963 if exclusivefiles:
2956 if exclusivefiles:
2964 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2957 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2965 if not self._inline:
2958 if not self._inline:
2966 d[b'exclusivefiles'].append((self.opener, self.datafile))
2959 d[b'exclusivefiles'].append((self.opener, self.datafile))
2967
2960
2968 if sharedfiles:
2961 if sharedfiles:
2969 d[b'sharedfiles'] = []
2962 d[b'sharedfiles'] = []
2970
2963
2971 if revisionscount:
2964 if revisionscount:
2972 d[b'revisionscount'] = len(self)
2965 d[b'revisionscount'] = len(self)
2973
2966
2974 if trackedsize:
2967 if trackedsize:
2975 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2968 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2976
2969
2977 if storedsize:
2970 if storedsize:
2978 d[b'storedsize'] = sum(
2971 d[b'storedsize'] = sum(
2979 self.opener.stat(path).st_size for path in self.files()
2972 self.opener.stat(path).st_size for path in self.files()
2980 )
2973 )
2981
2974
2982 return d
2975 return d
General Comments 0
You need to be logged in to leave comments. Login now